/* SPDX-License-Identifier: GPL-2.0-only */ /* * Media device node * * Copyright (C) 2010 Nokia Corporation * * Contacts: Laurent Pinchart * Sakari Ailus * * -- * * Common functions for media-related drivers to register and unregister media * device nodes. */ #ifndef _MEDIA_DEVNODE_H #define _MEDIA_DEVNODE_H #include #include #include #include struct media_device; /* * Flag to mark the media_devnode struct as registered. Drivers must not touch * this flag directly, it will be set and cleared by media_devnode_register and * media_devnode_unregister. */ #define MEDIA_FLAG_REGISTERED 0 /** * struct media_file_operations - Media device file operations * * @owner: should be filled with %THIS_MODULE * @read: pointer to the function that implements read() syscall * @write: pointer to the function that implements write() syscall * @poll: pointer to the function that implements poll() syscall * @ioctl: pointer to the function that implements ioctl() syscall * @compat_ioctl: pointer to the function that will handle 32 bits userspace * calls to the ioctl() syscall on a Kernel compiled with 64 bits. * @open: pointer to the function that implements open() syscall * @release: pointer to the function that will release the resources allocated * by the @open function. */ struct media_file_operations { struct module *owner; ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*open) (struct file *); int (*release) (struct file *); }; /** * struct media_devnode - Media device node * @media_dev: pointer to struct &media_device * @fops: pointer to struct &media_file_operations with media device ops * @dev: pointer to struct &device containing the media controller device * @cdev: struct cdev pointer character device * @parent: parent device * @minor: device node minor number * @flags: flags, combination of the ``MEDIA_FLAG_*`` constants * @release: release callback called at the end of ``media_devnode_release()`` * routine at media-device.c. * * This structure represents a media-related device node. * * The @parent is a physical device. It must be set by core or device drivers * before registering the node. */ struct media_devnode { struct media_device *media_dev; /* device ops */ const struct media_file_operations *fops; /* sysfs */ struct device dev; /* media device */ struct cdev cdev; /* character device */ struct device *parent; /* device parent */ /* device info */ int minor; unsigned long flags; /* Use bitops to access flags */ /* callbacks */ void (*release)(struct media_devnode *devnode); }; /* dev to media_devnode */ #define to_media_devnode(cd) container_of(cd, struct media_devnode, dev) /** * media_devnode_register - register a media device node * * @mdev: struct media_device we want to register a device node * @devnode: media device node structure we want to register * @owner: should be filled with %THIS_MODULE * * The registration code assigns minor numbers and registers the new device node * with the kernel. An error is returned if no free minor number can be found, * or if the registration of the device node fails. * * Zero is returned on success. * * Note that if the media_devnode_register call fails, the release() callback of * the media_devnode structure is *not* called, so the caller is responsible for * freeing any data. */ int __must_check media_devnode_register(struct media_device *mdev, struct media_devnode *devnode, struct module *owner); /** * media_devnode_unregister_prepare - clear the media device node register bit * @devnode: the device node to prepare for unregister * * This clears the passed device register bit. Future open calls will be met * with errors. Should be called before media_devnode_unregister() to avoid * races with unregister and device file open calls. * * This function can safely be called if the device node has never been * registered or has already been unregistered. */ void media_devnode_unregister_prepare(struct media_devnode *devnode); /** * media_devnode_unregister - unregister a media device node * @devnode: the device node to unregister * * This unregisters the passed device. Future open calls will be met with * errors. * * Should be called after media_devnode_unregister_prepare() */ void media_devnode_unregister(struct media_devnode *devnode); /** * media_devnode_data - returns a pointer to the &media_devnode * * @filp: pointer to struct &file */ static inline struct media_devnode *media_devnode_data(struct file *filp) { return filp->private_data; } /** * media_devnode_is_registered - returns true if &media_devnode is registered; * false otherwise. * * @devnode: pointer to struct &media_devnode. * * Note: If mdev is NULL, it also returns false. */ static inline int media_devnode_is_registered(struct media_devnode *devnode) { if (!devnode) return false; return test_bit(MEDIA_FLAG_REGISTERED, &devnode->flags); } #endif /* _MEDIA_DEVNODE_H */ mmitter'>committer
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/acp/include/acp_gfx_if.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c135
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h475
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c984
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h232
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c298
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c384
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c301
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c707
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c117
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c307
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c167
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c214
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c591
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c230
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c372
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c3589
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_df.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c819
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c216
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c164
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c754
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c241
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c193
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c205
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c476
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c1378
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h148
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c885
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c343
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c210
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c319
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c476
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c486
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c1480
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h181
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c554
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c1196
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c2814
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h251
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c346
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c195
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c455
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h222
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c349
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c262
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c472
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c217
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h80
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c329
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c550
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c1090
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h142
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c1009
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c877
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h204
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c625
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c1184
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c287
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c992
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c272
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c728
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c738
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h186
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c885
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c122
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_si.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c3799
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c477
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c148
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_15.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_15.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h (renamed from drivers/gpu/drm/amd/amdgpu/dce_v11_0.h)11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c1721
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c2227
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c5793
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c210
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c320
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c1173
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c1566
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c516
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c521
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c431
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c394
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c1043
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c138
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c169
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c885
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c787
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.c406
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.h (renamed from drivers/gpu/drm/amd/amdgpu/mes_v10_1.h)9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c377
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c143
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c249
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c698
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c872
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c733
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c1100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c485
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c1187
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c746
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c1931
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c327
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c746
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c647
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c157
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c226
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c554
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c400
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nvd.h208
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c286
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.c705
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c104
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c273
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c667
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c792
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c829
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c823
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c1858
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c592
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c183
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_enums.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h1924
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c234
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c271
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c601
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c75
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c728
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.h105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c434
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c329
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c321
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c425
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c414
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c1337
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c873
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c1059
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c1118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c1723
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c1442
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c1727
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h218
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c398
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h4740
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm469
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm1136
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c300
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c267
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c74
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c76
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c525
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c988
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c75
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c77
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c102
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c213
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c119
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c230
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c54
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c330
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c54
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c459
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c139
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c187
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c116
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h174
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c411
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c240
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c385
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c104
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c759
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h33
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c320
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h28
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h1
-rw-r--r--drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c64
-rw-r--r--drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h1
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig19
-rw-r--r--drivers/gpu/drm/amd/display/Makefile16
-rw-r--r--drivers/gpu/drm/amd/display/TODO110
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/Makefile15
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c4737
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h298
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c917
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c568
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h56
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c404
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c610
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c136
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c402
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c94
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c583
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c1037
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h12
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c97
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c178
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c136
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c9
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h31
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c215
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h36
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c76
-rw-r--r--drivers/gpu/drm/amd/display/dc/Makefile56
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/Makefile9
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/bw_fixed.c (renamed from drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c)13
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/calcs_logger.h (renamed from drivers/gpu/drm/amd/display/dc/dml/calcs/calcs_logger.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/conversion.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/custom_float.c (renamed from drivers/gpu/drm/amd/display/dc/dml/calcs/custom_float.c)90
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dc_common.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c (renamed from drivers/gpu/drm/amd/display/dc/dml/calcs/dce_calcs.c)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/basics/vector.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c292
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table2.c88
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table2.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c106
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile20
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c73
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c22
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c79
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c118
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c31
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c14
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c246
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c252
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c166
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/smu13_driver_if.h108
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c141
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c1556
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c508
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h220
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h55
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c1631
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h117
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c472
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h66
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c2751
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_debug.c208
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c651
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c53
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c2871
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stat.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c1077
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c779
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_surface.c126
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h1010
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_bios_types.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c1519
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h247
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dp_types.h267
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dsc.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_fused_io.c148
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_fused_io.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_helper.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h241
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_plane.h51
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_plane_priv.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.c229
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_state.h77
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_state_priv.h128
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h196
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream_priv.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h365
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/Makefile103
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h)206
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h)21
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn302/dcn302_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_dccg.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c2470
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h247
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c914
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h249
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.h88
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.c306
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_audio.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c149
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c87
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_opp.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_opp.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_transform.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c63
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c280
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/Makefile6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c220
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c126
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/Makefile5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/Makefile7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/Makefile10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/Makefile10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/Makefile6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/Makefile18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c282
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/Makefile15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn302/Makefile12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/Makefile13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/Makefile15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c48
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/Makefile18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/Makefile20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn321/Makefile17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/Makefile117
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c)11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h)37
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h)43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c)13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c)34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h)40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c)84
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.h)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c)42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn321/dcn321_dio_link_encoder.c)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn321/dcn321_dio_link_encoder.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c391
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h188
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c520
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h332
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c322
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h134
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c856
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h240
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_cp_psp.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_helpers.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_pp_smu.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_services_types.h28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile62
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c149
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dc_features.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c235
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c30
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c92
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c73
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c26
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c1194
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c70
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c97
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c620
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c639
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/Makefile141
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h94
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c10335
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h204
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h2033
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_lib_defines.h79
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c798
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c929
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h28
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c516
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c470
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h135
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h373
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h190
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h509
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h210
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h737
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c660
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c13315
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h2326
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c786
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c785
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c198
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c39
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c706
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c2371
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c147
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c49
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c1170
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h189
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h988
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c1174
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h43
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h157
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c910
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c311
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h47
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c1525
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c560
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h149
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c704
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h309
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h (renamed from drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.h)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c573
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h63
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/Makefile83
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c)24
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c)80
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c)89
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c)67
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c)86
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h)11
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c)63
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c)76
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.h)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c149
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c428
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h740
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c229
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c1186
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c343
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c)58
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h)31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c112
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c393
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h346
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dsc.h (renamed from drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/Makefile (renamed from drivers/gpu/drm/amd/display/dc/dce100/Makefile)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c)38
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c57
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.h61
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/Makefile10
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c264
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c335
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/hdcp/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c23
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/Makefile50
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hpo_dp_link_encoder.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hpo_dp_link_encoder.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/Makefile104
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c)30
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h)85
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.c)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c)39
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c)1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c)71
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c)65
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c615
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h171
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c1272
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h206
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/Makefile97
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c)32
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h)37
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c)51
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h)55
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c)57
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h)17
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c)25
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.h)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c246
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c1097
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h373
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/Makefile202
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h)82
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c)34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c)481
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c)707
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c)1024
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h)34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c)12
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c)22
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.h)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c)94
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c)13
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c)458
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h)21
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.c (renamed from drivers/gpu/drm/amd/amdkfd/kfd_pasid.c)62
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c)155
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h)5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c)220
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c)798
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h)23
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c)29
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c1594
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h104
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c178
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile (renamed from drivers/gpu/drm/amd/display/dc/dcn315/Makefile)12
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.c182
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c172
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c2671
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h113
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c153
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h (renamed from drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h)201
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h (renamed from drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h)34
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/clock_source.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_status.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h157
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/abm.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/audio.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h57
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h144
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h32
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h76
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h109
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h27
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h911
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/opp.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/optc.h197
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h55
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/transform.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/vpg.h53
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link_service.h (renamed from drivers/gpu/drm/amd/display/dc/inc/link.h)43
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/reg_helper.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/resource.h357
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h24
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/Makefile36
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c49
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c47
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c46
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c52
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c102
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c400
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.h34
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c382
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c381
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c414
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.c66
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.h10
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq_types.h31
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/Makefile4
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c248
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c95
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c118
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c112
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c56
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.c62
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.c490
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.c140
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_resource.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.c286
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c504
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h12
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c100
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c602
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h56
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c122
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c78
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c186
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c135
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c104
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c376
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c381
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile54
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.h75
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/Makefile72
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c)16
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c)34
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c)171
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h)12
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c)395
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.h)10
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c634
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h257
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/Makefile51
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c)17
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c)39
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h)6
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c53
-rw-r--r--drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h67
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/Makefile114
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c)190
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h)453
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c)19
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h)12
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c)10
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h)3
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c)59
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c)7
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c)101
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h)14
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c)13
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h)11
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c)71
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h)13
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c537
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h83
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c551
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h196
-rw-r--r--drivers/gpu/drm/amd/display/dc/os_types.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/Makefile35
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c573
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.h195
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/Makefile225
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c)37
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c)11
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c)29
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c)35
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c)102
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c)66
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c)75
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h)1
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c)189
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h)9
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c)44
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c)81
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h)2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c)81
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h)8
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c)50
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c)18
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c)48
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h38
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c)95
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h)15
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c)99
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h)4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c)89
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c)22
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c)493
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h)231
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c (renamed from drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c)272
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c (renamed from drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c)85
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h (renamed from drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h)0
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c2219
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h315
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c2192
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c2192
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.h73
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c2278
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h655
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c304
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c27
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/Makefile33
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c1910
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h27
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.h15
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.c553
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.h42
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.c2586
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.h37
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.c1233
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.h13
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h559
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.c151
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.h29
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_debug.h30
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c493
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.h522
-rw-r--r--drivers/gpu/drm/amd/display/dc/sspl/spl_os_types.h56
-rw-r--r--drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c7
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv.h317
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h2610
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/Makefile4
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c87
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h5
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c19
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.h19
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c115
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h5
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c210
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h30
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c571
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h288
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.c34
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.h13
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.c34
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.h13
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c667
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h290
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h1
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c636
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c35
-rw-r--r--drivers/gpu/drm/amd/display/include/audio_types.h17
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h22
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/include/ddc_service_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/include/dpcd_defs.h24
-rw-r--r--drivers/gpu/drm/amd/display/include/fixed31_32.h6
-rw-r--r--drivers/gpu/drm/amd/display/include/gpio_service_interface.h3
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h4
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_id.h4
-rw-r--r--drivers/gpu/drm/amd/display/include/hdcp_msg_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/include/link_service_types.h12
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_interface.h9
-rw-r--r--drivers/gpu/drm/amd/display/include/logger_types.h148
-rw-r--r--drivers/gpu/drm/amd/display/include/signal_types.h14
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c424
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.h11
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c209
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c5
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h1
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c28
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c92
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c48
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c99
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h10
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c10
-rw-r--r--drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h10
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h54
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h39
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_stats.h6
-rw-r--r--drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c6
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.c74
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.h14
-rw-r--r--drivers/gpu/drm/amd/include/amd_acpi.h4
-rw-r--r--drivers/gpu/drm/amd/include/amd_cper.h269
-rw-r--r--drivers/gpu/drm/amd/include/amd_pcie.h18
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h228
-rw-r--r--drivers/gpu/drm/amd/include/amdgpu_reg_state.h153
-rw-r--r--drivers/gpu/drm/amd/include/arct_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_offset.h287
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_sh_mask.h1348
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h7
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h)4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h)4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h136
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h70
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_offset.h110
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h52
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_offset.h44
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h34
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h19
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_offset.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_sh_mask.h10
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_offset.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_sh_mask.h10
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_0_offset.h60
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_0_sh_mask.h55
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_1_offset.h37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_2_1_sh_mask.h16
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_offset.h15279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_0_sh_mask.h53485
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_1_offset.h15259
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_5_1_sh_mask.h53464
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_6_0_offset.h15485
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_6_0_sh_mask.h61940
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_offset.h16662
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h145870
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_0_offset.h24
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_3_0_3_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_4_2_0_offset.h10
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h12
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h12
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_offset.h10002
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_5_0_sh_mask.h36579
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_12_0_0_offset.h11061
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_12_0_0_sh_mask.h40550
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_3_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_7_0_0_offset.h219
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/hdp/hdp_7_0_0_sh_mask.h735
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_7_0_0_offset.h388
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/lsdma/lsdma_7_0_0_sh_mask.h1411
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_1_0_offset.h23
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_3_0_offset.h1395
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_3_3_0_sh_mask.h6722
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_4_1_0_offset.h1341
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_4_1_0_sh_mask.h6943
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_offset.h32
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_1_sh_mask.h48
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_2_sh_mask.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_13_0_6_sh_mask.h28
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_0_offset.h359
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_0_sh_mask.h534
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_2_offset.h468
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/mp/mp_14_0_2_sh_mask.h692
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_3_1_offset.h11287
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbif/nbif_6_3_1_sh_mask.h32806
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h9406
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h57899
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_9_0_sh_mask.h8
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h23
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h41
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_0_0_offset.h4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_6_0_0_sh_mask.h10
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_7_0_0_offset.h279
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/oss/osssys_7_0_0_sh_mask.h1029
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/pcie/pcie_6_1_0_offset.h630
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/pcie/pcie_6_1_0_sh_mask.h4250
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h44
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h188
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_offset.h102
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_10_0_2_sh_mask.h184
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_14_0_2_offset.h511
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/smuio/smuio_14_0_2_sh_mask.h1106
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_14_0_2_offset.h228
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/thm/thm_14_0_2_sh_mask.h940
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_12_0_0_offset.h33
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_12_0_0_sh_mask.h95
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h29
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_0_offset.h422
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_0_sh_mask.h882
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_3_offset.h37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_5_offset.h1797
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_4_0_5_sh_mask.h8614
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h1694
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h7666
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vpe/vpe_6_1_0_offset.h1553
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/vpe/vpe_6_1_0_sh_mask.h4393
-rw-r--r--drivers/gpu/drm/amd/include/atom-bits.h2
-rw-r--r--drivers/gpu/drm/amd/include/atombios.h6
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h133
-rw-r--r--drivers/gpu/drm/amd/include/beige_goby_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/cgs_common.h23
-rw-r--r--drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/dimgrey_cavefish_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/discovery.h70
-rw-r--r--drivers/gpu/drm/amd/include/dm_pp_interface.h10
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_11_0_0.h1
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/gfx/irqsrcs_gfx_12_0_0.h74
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/isp/irqsrcs_isp_4_1.h62
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h47
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vpe/irqsrcs_vpe_6_1.h40
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h39
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h757
-rw-r--r--drivers/gpu/drm/amd/include/mes_api_def.h570
-rw-r--r--drivers/gpu/drm/amd/include/mes_v11_api_def.h90
-rw-r--r--drivers/gpu/drm/amd/include/mes_v12_api_def.h906
-rw-r--r--drivers/gpu/drm/amd/include/navi12_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/navi14_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/pptable.h95
-rw-r--r--drivers/gpu/drm/amd/include/renoir_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/sienna_cichlid_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/soc15_hw_ip.h1
-rw-r--r--drivers/gpu/drm/amd/include/soc15_ih_clientid.h2
-rw-r--r--drivers/gpu/drm/amd/include/soc21_enum.h2
-rw-r--r--drivers/gpu/drm/amd/include/soc24_enum.h21073
-rw-r--r--drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h446
-rw-r--r--drivers/gpu/drm/amd/include/v10_structs.h3
-rw-r--r--drivers/gpu/drm/amd/include/v11_structs.h8
-rw-r--r--drivers/gpu/drm/amd/include/v12_structs.h1189
-rw-r--r--drivers/gpu/drm/amd/include/vangogh_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/vega10_ip_offset.h6
-rw-r--r--drivers/gpu/drm/amd/include/vega20_ip_offset.h78
-rw-r--r--drivers/gpu/drm/amd/pm/Makefile1
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm.c453
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c86
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c2702
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h72
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h6
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h40
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c165
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c196
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.h7
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c627
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c66
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c103
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c10
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c21
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c475
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomfwctrl.c82
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomfwctrl.h3
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h555
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h36
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c40
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_clockpowergating.h1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c102
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c8
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c32
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu9_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c121
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_pptable.h24
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c8
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c22
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c33
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_processpptables.c574
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/hardwaremanager.h1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h25
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c1
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu10_smumgr.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c38
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c5
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c24
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/Makefile2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c1275
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h363
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_6.h91
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h1889
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h263
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h373
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h148
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h258
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h15
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_7_ppsmc.h3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h194
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h141
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h150
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h66
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h9
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v12_0.h2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h51
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h (renamed from drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h)0
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h249
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h204
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c972
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c374
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c1711
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c152
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c222
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c169
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c468
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c512
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c1090
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c936
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c52
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c66
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c2389
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h70
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c953
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c96
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/Makefile (renamed from drivers/gpu/drm/amd/display/dc/dcn316/Makefile)12
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c1977
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c1722
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c2931
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.h28
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c260
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h71
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_internal.h7
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/Makefile0
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_core_status.h37
1581 files changed, 929031 insertions, 54775 deletions
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
index feab8eb7f2a8..b26710cae801 100644
--- a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -19,7 +19,7 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
-*/
+ */
#ifndef _ACP_GFX_IF_H
#define _ACP_GFX_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 22d88f8ef527..1acfed2f92ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -2,10 +2,13 @@
config DRM_AMDGPU
tristate "AMD GPU"
- depends on DRM && PCI && MMU
+ depends on DRM && PCI
depends on !UML
select FW_LOADER
+ select DRM_CLIENT
+ select DRM_CLIENT_SELECTION
select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_DSC_HELPER
select DRM_DISPLAY_HDMI_HELPER
select DRM_DISPLAY_HDCP_HELPER
select DRM_DISPLAY_HELPER
@@ -17,11 +20,13 @@ config DRM_AMDGPU
select HWMON
select I2C
select I2C_ALGOBIT
+ select CRC16
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
select DRM_BUDDY
select DRM_SUBALLOC_HELPER
select DRM_EXEC
+ select DRM_PANEL_BACKLIGHT_QUIRKS
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
# ACPI_VIDEO's dependencies must also be selected.
select INPUT if ACPI
@@ -63,13 +68,23 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- depends on MMU
select HMM_MIRROR
select MMU_NOTIFIER
help
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
isn't already selected to enabled full userptr support.
+config DRM_AMD_ISP
+ bool "Enable AMD Image Signal Processor IP support"
+ depends on DRM_AMDGPU && ACPI
+ select MFD_CORE
+ select PM_GENERIC_DOMAINS if PM
+ help
+ Choose this option to enable ISP IP support for AMD SOCs.
+ This adds the ISP (Image Signal Processor) IP driver and wires
+ it up into the amdgpu driver. It is required for cameras
+ on APUs which utilize mipi cameras.
+
config DRM_AMDGPU_WERROR
bool "Force the compiler to throw an error instead of a warning when compiling"
depends on DRM_AMDGPU
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 384b798a9bad..64e7acff8f18 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -23,7 +23,7 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-FULL_AMD_PATH=$(srctree)/$(src)/..
+FULL_AMD_PATH=$(src)/..
DISPLAY_FOLDER_NAME=display
FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
@@ -39,23 +39,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
-subdir-ccflags-y := -Wextra
-subdir-ccflags-y += -Wunused
-subdir-ccflags-y += -Wmissing-prototypes
-subdir-ccflags-y += -Wmissing-declarations
-subdir-ccflags-y += -Wmissing-include-dirs
-subdir-ccflags-y += -Wold-style-definition
-subdir-ccflags-y += -Wmissing-format-attribute
-# Need this to avoid recursive variable evaluation issues
-cond-flags := $(call cc-option, -Wunused-but-set-variable) \
- $(call cc-option, -Wunused-const-variable) \
- $(call cc-option, -Wstringop-truncation) \
- $(call cc-option, -Wpacked-not-aligned)
-subdir-ccflags-y += $(cond-flags)
-subdir-ccflags-y += -Wno-unused-parameter
-subdir-ccflags-y += -Wno-type-limits
-subdir-ccflags-y += -Wno-sign-compare
-subdir-ccflags-y += -Wno-missing-field-initializers
+# Locally disable W=1 warnings enabled in drm subsystem Makefile
subdir-ccflags-y += -Wno-override-init
subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
@@ -70,7 +54,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
- amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+ amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
@@ -80,7 +65,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
- amdgpu_ring_mux.o amdgpu_xcp.o
+ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
+ amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@@ -96,15 +82,18 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
- nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
+ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
- nbio_v7_9.o aqua_vanjaram.o
+ nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \
+ cyan_skillfish_reg_init.o
# add DF block
amdgpu-y += \
df_v1_7.o \
df_v3_6.o \
- df_v4_3.o
+ df_v4_3.o \
+ df_v4_6_2.o \
+ df_v4_15.o
# add GMC block
amdgpu-y += \
@@ -113,11 +102,12 @@ amdgpu-y += \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
- mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o
+ mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
+ gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o
# add UMC block
amdgpu-y += \
- umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o
+ umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o
# add IH block
amdgpu-y += \
@@ -130,7 +120,8 @@ amdgpu-y += \
vega20_ih.o \
navi10_ih.o \
ih_v6_0.o \
- ih_v6_1.o
+ ih_v6_1.o \
+ ih_v7_0.o
# add PSP block
amdgpu-y += \
@@ -141,12 +132,12 @@ amdgpu-y += \
psp_v11_0_8.o \
psp_v12_0.o \
psp_v13_0.o \
- psp_v13_0_4.o
+ psp_v13_0_4.o \
+ psp_v14_0.o
# add DCE block
amdgpu-y += \
dce_v10_0.o \
- dce_v11_0.o \
amdgpu_vkms.o
# add GFX block
@@ -162,7 +153,9 @@ amdgpu-y += \
imu_v11_0.o \
gfx_v11_0.o \
gfx_v11_0_3.o \
- imu_v11_0_3.o
+ imu_v11_0_3.o \
+ gfx_v12_0.o \
+ imu_v12_0.o
# add async DMA block
amdgpu-y += \
@@ -174,13 +167,17 @@ amdgpu-y += \
sdma_v4_4_2.o \
sdma_v5_0.o \
sdma_v5_2.o \
- sdma_v6_0.o
+ sdma_v6_0.o \
+ sdma_v7_0.o
# add MES block
amdgpu-y += \
amdgpu_mes.o \
- mes_v10_1.o \
- mes_v11_0.o
+ mes_v11_0.o \
+ mes_v12_0.o \
+
+# add GFX userqueue support
+amdgpu-y += mes_userqueue.o
# add UVD block
amdgpu-y += \
@@ -205,20 +202,38 @@ amdgpu-y += \
vcn_v3_0.o \
vcn_v4_0.o \
vcn_v4_0_3.o \
+ vcn_v4_0_5.o \
+ vcn_v5_0_0.o \
+ vcn_v5_0_1.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
jpeg_v2_5.o \
jpeg_v3_0.o \
jpeg_v4_0.o \
- jpeg_v4_0_3.o
+ jpeg_v4_0_3.o \
+ jpeg_v4_0_5.o \
+ jpeg_v5_0_0.o \
+ jpeg_v5_0_1.o
+
+# add VPE block
+amdgpu-y += \
+ amdgpu_vpe.o \
+ vpe_v6_1.o
+
+# add UMSCH block
+amdgpu-y += \
+ amdgpu_umsch_mm.o \
+ umsch_mm_v4_0.o
+#
# add ATHUB block
amdgpu-y += \
athub_v1_0.o \
athub_v2_0.o \
athub_v2_1.o \
- athub_v3_0.o
+ athub_v3_0.o \
+ athub_v4_1_0.o
# add SMUIO block
amdgpu-y += \
@@ -227,7 +242,8 @@ amdgpu-y += \
smuio_v11_0_6.o \
smuio_v13_0.o \
smuio_v13_0_3.o \
- smuio_v13_0_6.o
+ smuio_v13_0_6.o \
+ smuio_v14_0_2.o
# add reset block
amdgpu-y += \
@@ -240,6 +256,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
+# add gfx usermode queue
+amdgpu-y += amdgpu_userq.o
ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
@@ -255,7 +273,8 @@ amdgpu-y += \
amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o \
- amdgpu_amdkfd_gfx_v11.o
+ amdgpu_amdkfd_gfx_v11.o \
+ amdgpu_amdkfd_gfx_v12.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
@@ -297,4 +316,12 @@ amdgpu-y += $(AMD_DISPLAY_FILES)
endif
+# add isp block
+ifneq ($(CONFIG_DRM_AMD_ISP),)
+amdgpu-y += \
+ amdgpu_isp.o \
+ isp_v4_1_0.o \
+ isp_v4_1_1.o
+endif
+
obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 2b97b8a96fb4..9569dc16dd3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -35,7 +35,7 @@ static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
{
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
- if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
adev->gmc.xgmi.connected_to_cpu))
return true;
@@ -48,59 +48,60 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
{
struct amdgpu_reset_handler *handler;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
+
+ if (reset_context->method == AMD_RESET_METHOD_NONE) {
+ if (aldebaran_is_mode2_default(reset_ctl))
+ reset_context->method = AMD_RESET_METHOD_MODE2;
+ else
+ reset_context->method = amdgpu_asic_reset_method(adev);
+ }
if (reset_context->method != AMD_RESET_METHOD_NONE) {
dev_dbg(adev->dev, "Getting reset handler for method %d\n",
reset_context->method);
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_context->method)
return handler;
}
}
- if (aldebaran_is_mode2_default(reset_ctl)) {
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
- if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
- reset_context->method = AMD_RESET_METHOD_MODE2;
- return handler;
- }
- }
- }
-
dev_dbg(adev->dev, "Reset handler not found!\n");
return NULL;
}
+static inline uint32_t aldebaran_get_ip_block_mask(struct amdgpu_device *adev)
+{
+ uint32_t ip_block_mask = BIT(AMD_IP_BLOCK_TYPE_GFX) |
+ BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
+ if (adev->aid_mask)
+ ip_block_mask |= BIT(AMD_IP_BLOCK_TYPE_IH);
+
+ return ip_block_mask;
+}
+
static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
{
+ uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
+ uint32_t ip_block;
int r, i;
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
- if (!(adev->ip_blocks[i].version->type ==
- AMD_IP_BLOCK_TYPE_GFX ||
- adev->ip_blocks[i].version->type ==
- AMD_IP_BLOCK_TYPE_SDMA))
+ ip_block = BIT(adev->ip_blocks[i].version->type);
+ if (!(ip_block_mask & ip_block))
continue;
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
-
- if (r) {
- dev_err(adev->dev,
- "suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = false;
}
- return r;
+ return 0;
}
static int
@@ -124,9 +125,9 @@ static void aldebaran_async_reset(struct work_struct *work)
struct amdgpu_reset_control *reset_ctl =
container_of(work, struct amdgpu_reset_control, reset_work);
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_ctl->active_reset) {
dev_dbg(adev->dev, "Resetting device\n");
handler->do_reset(adev);
@@ -157,7 +158,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
if (reset_device_list == NULL)
return -EINVAL;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
@@ -210,8 +211,10 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
{
struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM];
+ uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
struct amdgpu_firmware_info *ucode;
struct amdgpu_ip_block *cmn_block;
+ struct amdgpu_ip_block *ih_block;
int ucode_count = 0;
int i, r;
@@ -249,10 +252,22 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
dev_err(adev->dev, "Failed to get BIF handle\n");
return -EINVAL;
}
- r = cmn_block->version->funcs->resume(adev);
+ r = amdgpu_ip_block_resume(cmn_block);
if (r)
return r;
+ if (ip_block_mask & BIT(AMD_IP_BLOCK_TYPE_IH)) {
+ ih_block = amdgpu_device_ip_get_ip_block(adev,
+ AMD_IP_BLOCK_TYPE_IH);
+ if (unlikely(!ih_block)) {
+ dev_err(adev->dev, "Failed to get IH handle\n");
+ return -EINVAL;
+ }
+ r = amdgpu_ip_block_resume(ih_block);
+ if (r)
+ return r;
+ }
+
/* Reinit GFXHUB */
adev->gfxhub.funcs->init(adev);
r = adev->gfxhub.funcs->gart_enable(adev);
@@ -285,15 +300,10 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
- adev->ip_blocks[i].status.hw = true;
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -307,7 +317,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init(
- (void *)adev);
+ &adev->ip_blocks[i]);
if (r) {
dev_err(adev->dev,
"late_init of IP block <%s> failed %d after reset\n",
@@ -319,8 +329,6 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].status.late_initialized = true;
}
- amdgpu_ras_set_error_query_ready(adev, true);
-
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
@@ -333,12 +341,13 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
{
struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
+ struct amdgpu_ras *con;
int r;
if (reset_device_list == NULL)
return -EINVAL;
- if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
+ if (amdgpu_ip_version(reset_context->reset_req_dev, MP1_HWIP, 0) ==
IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
/* Wrong context, return error */
@@ -346,8 +355,12 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
}
list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
+ /*TBD: Ideally should clear only GFX, SDMA blocks*/
+ amdgpu_ras_clear_err_state(tmp_adev);
r = aldebaran_mode2_restore_ip(tmp_adev);
if (r)
goto end;
@@ -358,7 +371,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
*/
amdgpu_register_gpu_instance(tmp_adev);
- /* Resume RAS */
+ /* Resume RAS, ecc_irq */
+ con = amdgpu_ras_get_context(tmp_adev);
+ if (!amdgpu_sriov_vf(tmp_adev) && con) {
+ if (tmp_adev->sdma.ras &&
+ tmp_adev->sdma.ras->ras_block.ras_late_init) {
+ r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->sdma.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+
+ if (tmp_adev->gfx.ras &&
+ tmp_adev->gfx.ras->ras_block.ras_late_init) {
+ r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+ &tmp_adev->gfx.ras->ras_block.ras_comm);
+ if (r) {
+ dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+ goto end;
+ }
+ }
+ }
+
amdgpu_ras_resume(tmp_adev);
/* Update PSP FW topology after reset */
@@ -368,6 +404,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
tmp_adev);
if (!r) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
@@ -395,6 +433,12 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler = {
.do_reset = aldebaran_mode2_reset,
};
+static struct amdgpu_reset_handler
+ *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &aldebaran_mode2_handler,
+ &xgmi_reset_on_init_handler,
+ };
+
int aldebaran_reset_init(struct amdgpu_device *adev)
{
struct amdgpu_reset_control *reset_ctl;
@@ -408,10 +452,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev)
reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
- INIT_LIST_HEAD(&reset_ctl->reset_handlers);
INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
/* Only mode2 is handled through reset control now */
- amdgpu_reset_add_handler(reset_ctl, &aldebaran_mode2_handler);
+ reset_ctl->reset_handlers = &aldebaran_rst_handlers;
adev->reset_cntl = reset_ctl;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a79d53bdbe13..2a0df4cabb99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -63,6 +63,7 @@
#include "kgd_pp_interface.h"
#include "amd_shared.h"
+#include "amdgpu_utils.h"
#include "amdgpu_mode.h"
#include "amdgpu_ih.h"
#include "amdgpu_irq.h"
@@ -79,6 +80,8 @@
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
#include "amdgpu_jpeg.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_umsch_mm.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
@@ -105,28 +108,33 @@
#include "amdgpu_smuio.h"
#include "amdgpu_fdinfo.h"
#include "amdgpu_mca.h"
+#include "amdgpu_aca.h"
#include "amdgpu_ras.h"
+#include "amdgpu_cper.h"
#include "amdgpu_xcp.h"
+#include "amdgpu_seq64.h"
+#include "amdgpu_reg_state.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_eviction_fence.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
#define MAX_GPU_INSTANCE 64
-struct amdgpu_gpu_instance
-{
+#define GFX_SLICE_PERIOD_MS 250
+
+struct amdgpu_gpu_instance {
struct amdgpu_device *adev;
int mgpu_fan_enabled;
};
-struct amdgpu_mgpu_info
-{
+struct amdgpu_mgpu_info {
struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
struct mutex mutex;
uint32_t num_gpu;
uint32_t num_dgpu;
uint32_t num_apu;
-
- /* delayed reset_func for XGMI configuration if necessary */
- struct delayed_work delayed_reset_work;
- bool pending_reset;
};
enum amdgpu_ss {
@@ -136,8 +144,15 @@ enum amdgpu_ss {
AMDGPU_SS_DRV_UNLOAD
};
-struct amdgpu_watchdog_timer
-{
+struct amdgpu_hwip_reg_entry {
+ u32 hwip;
+ u32 inst;
+ u32 seg;
+ u32 reg_offset;
+ const char *reg_name;
+};
+
+struct amdgpu_watchdog_timer {
bool timeout_fatal_disable;
uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
};
@@ -192,10 +207,12 @@ extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
extern int amdgpu_smu_pptable_id;
extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_freesync_vid_mode;
extern uint amdgpu_dc_debug_mask;
extern uint amdgpu_dc_visual_confirm;
-extern uint amdgpu_dm_abm_level;
+extern int amdgpu_dm_abm_level;
extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
extern struct amdgpu_mgpu_info mgpu_info;
extern int amdgpu_ras_enable;
extern uint amdgpu_ras_mask;
@@ -206,18 +223,21 @@ extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
extern int amdgpu_mes_kiq;
+extern int amdgpu_uni_mes;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
extern int amdgpu_use_xgmi_p2p;
extern int amdgpu_mtype_local;
-extern bool enforce_isolation;
+extern int amdgpu_enforce_isolation;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
extern bool no_system_mem_limit;
extern int halt_if_hws_hang;
+extern uint amdgpu_svm_default_granularity;
#else
static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
static const bool __maybe_unused debug_evictions; /* = false */
@@ -240,10 +260,19 @@ extern int amdgpu_cik_support;
extern int amdgpu_num_kcq;
#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
+#define AMDGPU_UMSCHFW_LOG_SIZE (32 * 1024)
extern int amdgpu_vcnfw_log;
extern int amdgpu_sg_display;
+extern int amdgpu_umsch_mm;
+extern int amdgpu_seamless;
+extern int amdgpu_umsch_mm_fwlog;
extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
+extern int amdgpu_rebar;
+
+extern int amdgpu_wbrf;
+extern int amdgpu_user_queue;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
@@ -276,6 +305,12 @@ extern int amdgpu_user_partt_mode;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
+/* reset mask */
+#define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */
+#define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */
+#define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */
+#define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */
+
/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
@@ -323,7 +358,6 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
-#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
@@ -338,8 +372,11 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
u64 *flags);
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
#define AMDGPU_MAX_IP_NUM 16
@@ -359,12 +396,10 @@ struct amdgpu_ip_block_version {
const struct amd_ip_funcs *funcs;
};
-#define HW_REV(_Major, _Minor, _Rev) \
- ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
-
struct amdgpu_ip_block {
struct amdgpu_ip_block_status status;
const struct amdgpu_ip_block_version *version;
+ struct amdgpu_device *adev;
};
int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
@@ -385,6 +420,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev);
bool amdgpu_read_bios(struct amdgpu_device *adev);
bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes);
+void amdgpu_bios_release(struct amdgpu_device *adev);
/*
* Clocks
*/
@@ -399,7 +435,6 @@ struct amdgpu_clock {
uint32_t default_mclk;
uint32_t default_sclk;
uint32_t default_dispclk;
- uint32_t current_dispclk;
uint32_t dp_extclk;
uint32_t max_pixel_clock;
};
@@ -435,9 +470,6 @@ struct amdgpu_sa_manager {
void *cpu_ptr;
};
-int amdgpu_fence_slab_init(void);
-void amdgpu_fence_slab_fini(void);
-
/*
* IRQS.
*/
@@ -457,7 +489,6 @@ struct amdgpu_flip_work {
bool async;
};
-
/*
* file private structure
*/
@@ -466,9 +497,15 @@ struct amdgpu_fpriv {
struct amdgpu_vm vm;
struct amdgpu_bo_va *prt_va;
struct amdgpu_bo_va *csa_va;
+ struct amdgpu_bo_va *seq64_va;
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
+ struct amdgpu_userq_mgr userq_mgr;
+
+ /* Eviction fence infra */
+ struct amdgpu_eviction_fence_mgr evf_mgr;
+
/** GPU partition selection */
uint32_t xcp_id;
};
@@ -480,12 +517,63 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
*/
#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
+/**
+ * amdgpu_wb - This struct is used for small GPU memory allocation.
+ *
+ * This struct is used to allocate a small amount of GPU memory that can be
+ * used to shadow certain states into the memory. This is especially useful for
+ * providing easy CPU access to some states without requiring register access
+ * (e.g., if some block is power gated, reading register may be problematic).
+ *
+ * Note: the term writeback was initially used because many of the amdgpu
+ * components had some level of writeback memory, and this struct initially
+ * described those components.
+ */
struct amdgpu_wb {
+
+ /**
+ * @wb_obj:
+ *
+ * Buffer Object used for the writeback memory.
+ */
struct amdgpu_bo *wb_obj;
- volatile uint32_t *wb;
+
+ /**
+ * @wb:
+ *
+ * Pointer to the first writeback slot. In terms of CPU address
+ * this value can be accessed directly by using the offset as an index.
+ * For the GPU address, it is necessary to use gpu_addr and the offset.
+ */
+ uint32_t *wb;
+
+ /**
+ * @gpu_addr:
+ *
+ * Writeback base address in the GPU.
+ */
uint64_t gpu_addr;
- u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
+
+ /**
+ * @num_wb:
+ *
+ * Number of writeback slots reserved for amdgpu.
+ */
+ u32 num_wb;
+
+ /**
+ * @used:
+ *
+ * Track the writeback slot already used.
+ */
unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+
+ /**
+ * @lock:
+ *
+ * Protects read and write of the used field array.
+ */
+ spinlock_t lock;
};
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
@@ -504,14 +592,42 @@ struct amdgpu_allowed_register_entry {
bool grbm_indexed;
};
+/**
+ * enum amd_reset_method - Methods for resetting AMD GPU devices
+ *
+ * @AMD_RESET_METHOD_NONE: The device will not be reset.
+ * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs.
+ * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the
+ * any device.
+ * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.)
+ * individually. Suitable only for some discrete GPU, not
+ * available for all ASICs.
+ * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs
+ * are reset depends on the ASIC. Notably doesn't reset IPs
+ * shared with the CPU on APUs or the memory controllers (so
+ * VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs
+ * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
+ * but without powering off the PCI bus. Suitable only for
+ * discrete GPUs.
+ * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset
+ * and does a secondary bus reset or FLR, depending on what the
+ * underlying hardware supports.
+ *
+ * Methods available for AMD GPU driver for resetting the device. Not all
+ * methods are suitable for every device. User can override the method using
+ * module parameter `reset_method`.
+ */
enum amd_reset_method {
AMD_RESET_METHOD_NONE = -1,
AMD_RESET_METHOD_LEGACY = 0,
AMD_RESET_METHOD_MODE0,
AMD_RESET_METHOD_MODE1,
AMD_RESET_METHOD_MODE2,
+ AMD_RESET_METHOD_LINK,
AMD_RESET_METHOD_BACO,
AMD_RESET_METHOD_PCI,
+ AMD_RESET_METHOD_ON_INIT,
};
struct amdgpu_video_codec_info {
@@ -573,7 +689,7 @@ struct amdgpu_asic_funcs {
/* PCIe replay counter */
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
/* device supports BACO */
- bool (*supports_baco)(struct amdgpu_device *adev);
+ int (*supports_baco)(struct amdgpu_device *adev);
/* pre asic_init quirks */
void (*pre_asic_init)(struct amdgpu_device *adev);
/* enter/exit umd stable pstate */
@@ -583,6 +699,10 @@ struct amdgpu_asic_funcs {
const struct amdgpu_video_codecs **codecs);
/* encode "> 32bits" smn addressing */
u64 (*encode_ext_smn_addressing)(int ext_id);
+
+ ssize_t (*get_reg_state)(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size);
};
/*
@@ -601,7 +721,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
/* VRAM scratch page for HDP bug, default vram page */
struct amdgpu_mem_scratch {
struct amdgpu_bo *robj;
- volatile uint32_t *ptr;
+ uint32_t *ptr;
u64 gpu_addr;
};
@@ -623,12 +743,16 @@ typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
+typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t);
+
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
+ struct amdgpu_bo *bo;
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
@@ -654,6 +778,7 @@ enum amd_hw_ip_block_type {
JPEG_HWIP = VCN_HWIP,
VCN1_HWIP,
VCE_HWIP,
+ VPE_HWIP,
DF_HWIP,
DCE_HWIP,
OSSSYS_HWIP,
@@ -667,16 +792,22 @@ enum amd_hw_ip_block_type {
XGMI_HWIP,
DCI_HWIP,
PCIE_HWIP,
+ ISP_HWIP,
MAX_HWIP
};
#define HWIP_MAX_INSTANCE 44
#define HW_ID_MAX 300
-#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
-#define IP_VERSION_MAJ(ver) ((ver) >> 16)
-#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF)
-#define IP_VERSION_REV(ver) ((ver) & 0xFF)
+#define IP_VERSION_FULL(mj, mn, rv, var, srev) \
+ (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev))
+#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0)
+#define IP_VERSION_MAJ(ver) ((ver) >> 24)
+#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF)
+#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF)
+#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF)
+#define IP_VERSION_SUBREV(ver) ((ver) & 0xF)
+#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8)
struct amdgpu_ip_map_info {
/* Map of logical to actual dev instances/mask */
@@ -689,6 +820,20 @@ struct amdgpu_ip_map_info {
uint32_t mask);
};
+enum amdgpu_uid_type {
+ AMDGPU_UID_TYPE_XCD,
+ AMDGPU_UID_TYPE_AID,
+ AMDGPU_UID_TYPE_SOC,
+ AMDGPU_UID_TYPE_MAX
+};
+
+#define AMDGPU_UID_INST_MAX 8 /* max number of instances for each UID type */
+
+struct amdgpu_uid {
+ uint64_t uid[AMDGPU_UID_TYPE_MAX][AMDGPU_UID_INST_MAX];
+ struct amdgpu_device *adev;
+};
+
struct amd_powerplay {
void *pp_handle;
const struct amd_pm_funcs *pp_funcs;
@@ -746,7 +891,14 @@ struct amdgpu_mqd_prop {
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
uint32_t hqd_queue_priority;
+ bool allow_tunneling;
bool hqd_active;
+ uint64_t shadow_addr;
+ uint64_t gds_bkup_addr;
+ uint64_t csa_addr;
+ uint64_t fence_address;
+ bool tmz_queue;
+ bool kernel_queue;
};
struct amdgpu_mqd {
@@ -755,15 +907,45 @@ struct amdgpu_mqd {
struct amdgpu_mqd_prop *p);
};
+struct amdgpu_pcie_reset_ctx {
+ bool in_link_reset;
+ bool occurs_dpc;
+ bool audio_suspended;
+ struct pci_dev *swus;
+ struct pci_saved_state *swus_pcistate;
+ struct pci_saved_state *swds_pcistate;
+};
+
+/*
+ * Custom Init levels could be defined for different situations where a full
+ * initialization of all hardware blocks are not expected. Sample cases are
+ * custom init sequences after resume after S0i3/S3, reset on initialization,
+ * partial reset of blocks etc. Presently, this defines only two levels. Levels
+ * are described in corresponding struct definitions - amdgpu_init_default,
+ * amdgpu_init_minimal_xgmi.
+ */
+enum amdgpu_init_lvl_id {
+ AMDGPU_INIT_LEVEL_DEFAULT,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+};
+
+struct amdgpu_init_level {
+ enum amdgpu_init_lvl_id level;
+ uint32_t hwini_ip_block_mask;
+};
+
#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
-#define AMDGPU_PRODUCT_NAME_LEN 64
struct amdgpu_reset_domain;
+struct amdgpu_fru_info;
-/*
- * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
- */
-#define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size)
+enum amdgpu_enforce_isolation_mode {
+ AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3,
+};
struct amdgpu_device {
struct device *dev;
@@ -788,6 +970,7 @@ struct amdgpu_device {
bool need_swiotlb;
bool accel_working;
struct notifier_block acpi_nb;
+ struct notifier_block pm_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct debugfs_blob_wrapper debugfs_vbios_blob;
struct debugfs_blob_wrapper debugfs_discovery_blob;
@@ -826,6 +1009,8 @@ struct amdgpu_device {
amdgpu_wreg_ext_t pcie_wreg_ext;
amdgpu_rreg64_t pcie_rreg64;
amdgpu_wreg64_t pcie_wreg64;
+ amdgpu_rreg64_ext_t pcie_rreg64_ext;
+ amdgpu_wreg64_ext_t pcie_wreg64_ext;
/* protects concurrent UVD register access */
spinlock_t uvd_ctx_idx_lock;
amdgpu_rreg_t uvd_ctx_rreg;
@@ -946,6 +1131,13 @@ struct amdgpu_device {
/* jpeg */
struct amdgpu_jpeg jpeg;
+ /* vpe */
+ struct amdgpu_vpe vpe;
+
+ /* umsch */
+ struct amdgpu_umsch_mm umsch_mm;
+ bool enable_umsch_mm;
+
/* firmwares */
struct amdgpu_firmware firmware;
@@ -955,8 +1147,8 @@ struct amdgpu_device {
/* GDS */
struct amdgpu_gds gds;
- /* KFD */
- struct amdgpu_kfd_dev kfd;
+ /* for userq and VM fences */
+ struct amdgpu_seq64 seq64;
/* UMC */
struct amdgpu_umc umc;
@@ -964,11 +1156,24 @@ struct amdgpu_device {
/* display related functionality */
struct amdgpu_display_manager dm;
+#if defined(CONFIG_DRM_AMD_ISP)
+ /* isp */
+ struct amdgpu_isp isp;
+#endif
+
/* mes */
bool enable_mes;
bool enable_mes_kiq;
+ bool enable_uni_mes;
struct amdgpu_mes mes;
struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
+ const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
+
+ /* xarray used to retrieve the user queue fence driver reference
+ * in the EOP interrupt handler to signal the particular user
+ * queue fence.
+ */
+ struct xarray userq_xa;
/* df */
struct amdgpu_df df;
@@ -976,6 +1181,12 @@ struct amdgpu_device {
/* MCA */
struct amdgpu_mca mca;
+ /* ACA */
+ struct amdgpu_aca aca;
+
+ /* CPER */
+ struct amdgpu_cper cper;
+
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
uint32_t harvest_ip_mask;
int num_ip_blocks;
@@ -996,10 +1207,6 @@ struct amdgpu_device {
struct amdgpu_virt virt;
- /* link all shadow bo */
- struct list_head shadow_list;
- struct mutex shadow_list_lock;
-
/* record hw reset is performed */
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
@@ -1009,6 +1216,7 @@ struct amdgpu_device {
bool in_s3;
bool in_s4;
bool in_s0ix;
+ suspend_state_t last_suspend_state;
enum pp_mp1_state mp1_state;
struct amdgpu_doorbell_index doorbell_index;
@@ -1023,6 +1231,7 @@ struct amdgpu_device {
long sdma_timeout;
long video_timeout;
long compute_timeout;
+ long psp_timeout;
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
@@ -1033,20 +1242,19 @@ struct amdgpu_device {
bool ucode_sysfs_en;
- /* Chip product information */
- char product_number[20];
- char product_name[AMDGPU_PRODUCT_NAME_LEN];
- char serial[20];
-
+ struct amdgpu_fru_info *fru_info;
atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
uint32_t ras_hw_enabled;
uint32_t ras_enabled;
+ bool ras_default_ecc_enabled;
bool no_hw_access;
struct pci_saved_state *pci_state;
pci_channel_state_t pci_channel_state;
+ struct amdgpu_pcie_reset_ctx pcie_reset_ctx;
+
/* Track auto wait count on s_barrier settings */
bool barrier_has_auto_waitcnt;
@@ -1063,27 +1271,70 @@ struct amdgpu_device {
struct mutex benchmark_mutex;
- /* reset dump register */
- uint32_t *reset_dump_reg_list;
- uint32_t *reset_dump_reg_value;
- int num_regs;
-#ifdef CONFIG_DEV_COREDUMP
- struct amdgpu_task_info reset_task_info;
- bool reset_vram_lost;
- struct timespec64 reset_time;
-#endif
-
bool scpm_enabled;
uint32_t scpm_status;
struct work_struct reset_work;
- bool job_hang;
bool dc_enabled;
/* Mask of active clusters */
uint32_t aid_mask;
+
+ /* Debug */
+ bool debug_vm;
+ bool debug_largebar;
+ bool debug_disable_soft_recovery;
+ bool debug_use_vram_fw_buf;
+ bool debug_enable_ras_aca;
+ bool debug_exp_resets;
+ bool debug_disable_gpu_ring_reset;
+ bool debug_vm_userptr;
+ bool debug_disable_ce_logs;
+
+ /* Protection for the following isolation structure */
+ struct mutex enforce_isolation_mutex;
+ enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP];
+ struct amdgpu_isolation {
+ void *owner;
+ struct dma_fence *spearhead;
+ struct amdgpu_sync active;
+ struct amdgpu_sync prev;
+ } isolation[MAX_XCP];
+
+ struct amdgpu_init_level *init_lvl;
+
+ /* This flag is used to determine how VRAM allocations are handled for APUs
+ * in KFD: VRAM or GTT.
+ */
+ bool apu_prefer_gtt;
+
+ struct list_head userq_mgr_list;
+ struct mutex userq_mutex;
+ bool userq_halt_for_enforce_isolation;
+ struct amdgpu_uid *uid_info;
+
+ /* KFD
+ * Must be last --ends in a flexible-array member.
+ */
+ struct amdgpu_kfd_dev kfd;
};
+static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
+ uint8_t ip, uint8_t inst)
+{
+ /* This considers only major/minor/rev and ignores
+ * subrevision/variant fields.
+ */
+ return adev->ip_versions[ip][inst] & ~0xFFU;
+}
+
+static inline uint32_t amdgpu_ip_version_full(const struct amdgpu_device *adev,
+ uint8_t ip, uint8_t inst)
+{
+ /* This returns full version - major/minor/rev/variant/subrevision */
+ return adev->ip_versions[ip][inst];
+}
+
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
{
return container_of(ddev, struct amdgpu_device, ddev);
@@ -1099,6 +1350,11 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev)
return container_of(bdev, struct amdgpu_device, mman.bdev);
}
+static inline bool amdgpu_is_multi_aid(struct amdgpu_device *adev)
+{
+ return !!adev->aid_mask;
+}
+
int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags);
void amdgpu_device_fini_hw(struct amdgpu_device *adev);
@@ -1120,11 +1376,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t acc_flags);
u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags,
+ uint32_t xcc_id);
void amdgpu_device_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t v,
uint32_t acc_flags);
void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags,
+ uint32_t xcc_id);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
uint32_t reg, uint32_t v, uint32_t xcc_id);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
@@ -1134,12 +1397,17 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
u32 reg_addr);
u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
u32 reg_addr);
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr);
void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
u32 reg_addr, u32 reg_data);
void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
u32 reg_addr, u64 reg_data);
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u64 reg_data);
u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev);
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+ enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev);
@@ -1150,6 +1418,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context);
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context);
+
int emu_soc_asic_init(struct amdgpu_device *adev);
/*
@@ -1161,8 +1431,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
-#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
-#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1172,6 +1442,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
@@ -1180,6 +1452,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
+#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg))
+#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
@@ -1235,6 +1509,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_FIELD_OFFSET(reg, offset, field, val) \
WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l))
/*
* BIOS helpers.
*/
@@ -1275,15 +1550,13 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)
#define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c))
-#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter))
#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i))
#define for_each_inst(i, inst_mask) \
for (i = ffs(inst_mask); i-- != 0; \
i = ffs(inst_mask & BIT_MASK_UPPER(i + 1)))
-#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
-
/* Common functions */
bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1293,9 +1566,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
-bool amdgpu_device_pcie_dynamic_switching_supported(void);
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
-bool amdgpu_device_aspm_support_quirk(void);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
@@ -1305,15 +1577,17 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
-bool amdgpu_device_supports_atpx(struct drm_device *dev);
-bool amdgpu_device_supports_px(struct drm_device *dev);
-bool amdgpu_device_supports_boco(struct drm_device *dev);
-bool amdgpu_device_supports_smart_shift(struct drm_device *dev);
-bool amdgpu_device_supports_baco(struct drm_device *dev);
+int amdgpu_device_link_reset(struct amdgpu_device *adev);
+bool amdgpu_device_supports_atpx(struct amdgpu_device *adev);
+bool amdgpu_device_supports_px(struct amdgpu_device *adev);
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev);
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev);
+int amdgpu_device_supports_baco(struct amdgpu_device *adev);
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
-int amdgpu_device_baco_enter(struct drm_device *dev);
-int amdgpu_device_baco_exit(struct drm_device *dev);
+int amdgpu_device_baco_enter(struct amdgpu_device *adev);
+int amdgpu_device_baco_exit(struct amdgpu_device *adev);
void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
@@ -1325,9 +1599,15 @@ u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
u32 reg);
void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
u32 reg, u32 v);
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
@@ -1335,23 +1615,15 @@ void amdgpu_register_atpx_handler(void);
void amdgpu_unregister_atpx_handler(void);
bool amdgpu_has_atpx_dgpu_power_cntl(void);
bool amdgpu_is_atpx_hybrid(void);
-bool amdgpu_atpx_dgpu_req_power_for_displays(void);
bool amdgpu_has_atpx(void);
#else
static inline void amdgpu_register_atpx_handler(void) {}
static inline void amdgpu_unregister_atpx_handler(void) {}
static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
-static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
static inline bool amdgpu_has_atpx(void) { return false; }
#endif
-#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void);
-#else
-static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; }
-#endif
-
/*
* KMS
*/
@@ -1360,13 +1632,14 @@ extern const int amdgpu_max_kms_ioctl;
int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev);
-void amdgpu_driver_lastclose_kms(struct drm_device *dev);
int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
void amdgpu_driver_release_kms(struct drm_device *dev);
int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
+int amdgpu_device_prepare(struct drm_device *dev);
+void amdgpu_device_complete(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
@@ -1417,7 +1690,8 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
u8 perf_req, bool advertise);
int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state);
-int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state);
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
u64 *tmr_size);
@@ -1448,8 +1722,12 @@ static inline void amdgpu_acpi_release(void) { }
static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
u8 dev_state, bool drv_state) { return 0; }
-static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev,
- enum amdgpu_ss ss_state) { return 0; }
+static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state)
+{
+ return 0;
+}
+static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { }
#endif
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
@@ -1460,13 +1738,10 @@ static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { retu
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
#endif
-#if defined(CONFIG_DRM_AMD_DC)
-int amdgpu_dm_display_resume(struct amdgpu_device *adev );
-#else
-static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
+#if defined(CONFIG_DRM_AMD_ISP)
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev);
#endif
-
void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
@@ -1508,4 +1783,26 @@ extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
extern const struct attribute_group amdgpu_flash_attr_group;
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl);
+
+static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev)
+{
+ u32 status;
+ int r;
+
+ r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status);
+ if (r || PCI_POSSIBLE_ERROR(status)) {
+ dev_err(adev->dev, "device lost from bus!");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst,
+ uint64_t uid);
+uint64_t amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
new file mode 100644
index 000000000000..9b3180449150
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -0,0 +1,984 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_aca.h"
+#include "amdgpu_ras.h"
+
+#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
+
+typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+
+static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
+ ACA_BANK_HWID(SMU, 0x01, 0x01),
+ ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
+ ACA_BANK_HWID(UMC, 0x96, 0x00),
+};
+
+static void aca_banks_init(struct aca_banks *banks)
+{
+ if (!banks)
+ return;
+
+ memset(banks, 0, sizeof(*banks));
+ INIT_LIST_HEAD(&banks->list);
+}
+
+static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank)
+{
+ struct aca_bank_node *node;
+
+ if (!bank)
+ return -EINVAL;
+
+ node = kvzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ memcpy(&node->bank, bank, sizeof(*bank));
+
+ INIT_LIST_HEAD(&node->node);
+ list_add_tail(&node->node, &banks->list);
+
+ banks->nr_banks++;
+
+ return 0;
+}
+
+static void aca_banks_release(struct aca_banks *banks)
+{
+ struct aca_bank_node *node, *tmp;
+
+ if (list_empty(&banks->list))
+ return;
+
+ list_for_each_entry_safe(node, tmp, &banks->list, node) {
+ list_del(&node->node);
+ kvfree(node);
+ banks->nr_banks--;
+ }
+}
+
+static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!count)
+ return -EINVAL;
+
+ if (!smu_funcs || !smu_funcs->get_valid_aca_count)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->get_valid_aca_count(adev, type, count);
+}
+
+static struct aca_regs_dump {
+ const char *name;
+ int reg_idx;
+} aca_regs[] = {
+ {"CONTROL", ACA_REG_IDX_CTL},
+ {"STATUS", ACA_REG_IDX_STATUS},
+ {"ADDR", ACA_REG_IDX_ADDR},
+ {"MISC", ACA_REG_IDX_MISC0},
+ {"CONFIG", ACA_REG_IDX_CONFIG},
+ {"IPID", ACA_REG_IDX_IPID},
+ {"SYND", ACA_REG_IDX_SYND},
+ {"DESTAT", ACA_REG_IDX_DESTAT},
+ {"DEADDR", ACA_REG_IDX_DEADDR},
+ {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
+};
+
+static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
+ struct ras_query_context *qctx)
+{
+ u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+ int i;
+
+ if (adev->debug_disable_ce_logs &&
+ bank->smu_err_type == ACA_SMU_TYPE_CE &&
+ !ACA_BANK_ERR_IS_DEFFERED(bank))
+ return;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ /* plus 1 for output format, e.g: ACA[08/08]: xxxx */
+ for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
+ idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+
+ if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS]))
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
+}
+
+static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
+{
+
+ struct aca_hwip *hwip;
+ int hwid, mcatype;
+ u64 ipid;
+
+ if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
+ return false;
+
+ hwip = &aca_hwid_mcatypes[type];
+ if (!hwip->hwid)
+ return false;
+
+ ipid = bank->regs[ACA_REG_IDX_IPID];
+ hwid = ACA_REG__IPID__HARDWAREID(ipid);
+ mcatype = ACA_REG__IPID__MCATYPE(ipid);
+
+ return hwip->hwid == hwid && hwip->mcatype == mcatype;
+}
+
+static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
+ int start, int count,
+ struct aca_banks *banks, struct ras_query_context *qctx)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+ struct aca_bank bank;
+ int i, max_count, ret;
+
+ if (!count)
+ return 0;
+
+ if (!smu_funcs || !smu_funcs->get_valid_aca_bank)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ max_count = smu_funcs->max_ue_bank_count;
+ break;
+ case ACA_SMU_TYPE_CE:
+ max_count = smu_funcs->max_ce_bank_count;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (start + count > max_count)
+ return -EINVAL;
+
+ count = min_t(int, count, max_count);
+ for (i = 0; i < count; i++) {
+ memset(&bank, 0, sizeof(bank));
+ ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank);
+ if (ret)
+ return ret;
+
+ bank.smu_err_type = type;
+
+ /*
+ * Poison being consumed when injecting a UE while running background workloads,
+ * which are unexpected.
+ */
+ if (type == ACA_SMU_TYPE_UE &&
+ ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
+ !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
+ continue;
+
+ aca_smu_bank_dump(adev, i, count, &bank, qctx);
+
+ ret = aca_banks_add_bank(banks, &bank);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+ const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+ /* Parse all deferred errors with UMC aca handle */
+ if (ACA_BANK_ERR_IS_DEFFERED(bank))
+ return handle->hwip == ACA_HWIP_TYPE_UMC;
+
+ if (!aca_bank_hwip_is_matched(bank, handle->hwip))
+ return false;
+
+ if (!bank_ops->aca_bank_is_valid)
+ return true;
+
+ return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data);
+}
+
+static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error;
+
+ bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL);
+ if (!bank_error)
+ return NULL;
+
+ INIT_LIST_HEAD(&bank_error->node);
+ memcpy(&bank_error->info, info, sizeof(*info));
+
+ mutex_lock(&aerr->lock);
+ list_add_tail(&bank_error->node, &aerr->list);
+ aerr->nr_errors++;
+ mutex_unlock(&aerr->lock);
+
+ return bank_error;
+}
+
+static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error = NULL;
+ struct aca_bank_info *tmp_info;
+ bool found = false;
+
+ mutex_lock(&aerr->lock);
+ list_for_each_entry(bank_error, &aerr->list, node) {
+ tmp_info = &bank_error->info;
+ if (tmp_info->socket_id == info->socket_id &&
+ tmp_info->die_id == info->die_id) {
+ found = true;
+ goto out_unlock;
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&aerr->lock);
+
+ return found ? bank_error : NULL;
+}
+
+static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error)
+{
+ if (!aerr || !bank_error)
+ return;
+
+ list_del(&bank_error->node);
+ aerr->nr_errors--;
+
+ kvfree(bank_error);
+}
+
+static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+ struct aca_bank_error *bank_error;
+
+ if (!aerr || !info)
+ return NULL;
+
+ bank_error = find_bank_error(aerr, info);
+ if (bank_error)
+ return bank_error;
+
+ return new_bank_error(aerr, info);
+}
+
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ struct aca_bank_error *bank_error;
+ struct aca_error *aerr;
+
+ if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ aerr = &error_cache->errors[type];
+ bank_error = get_bank_error(aerr, info);
+ if (!bank_error)
+ return -ENOMEM;
+
+ bank_error->count += count;
+
+ return 0;
+}
+
+static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+ const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+ if (!bank)
+ return -EINVAL;
+
+ if (!bank_ops->aca_bank_parser)
+ return -EOPNOTSUPP;
+
+ return bank_ops->aca_bank_parser(handle, bank, type,
+ handle->data);
+}
+
+static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ int ret;
+
+ ret = aca_bank_parser(handle, bank, type);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
+ enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+ struct aca_handle *handle;
+ int ret;
+
+ if (list_empty(&mgr->list))
+ return 0;
+
+ list_for_each_entry(handle, &mgr->list, node) {
+ if (!aca_bank_is_valid(handle, bank, type))
+ continue;
+
+ ret = handler(handle, bank, type, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
+ enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ int ret;
+
+ if (!mgr || !banks)
+ return -EINVAL;
+
+ /* pre check to avoid unnecessary operations */
+ if (list_empty(&mgr->list) || list_empty(&banks->list))
+ return 0;
+
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+
+ ret = aca_dispatch_bank(mgr, bank, type, handler, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ bool ret = true;
+
+ /*
+ * Because the UE Valid MCA count will only be cleared after reset,
+ * in order to avoid repeated counting of the error count,
+ * the aca bank is only updated once during the gpu recovery stage.
+ */
+ if (type == ACA_SMU_TYPE_UE) {
+ if (amdgpu_ras_intr_triggered())
+ ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0;
+ else
+ atomic_set(&aca->ue_update_flag, 0);
+ }
+
+ return ret;
+}
+
+static void aca_banks_generate_cper(struct amdgpu_device *adev,
+ enum aca_smu_type type,
+ struct aca_banks *banks,
+ int count)
+{
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ int r;
+
+ if (!adev->cper.enabled)
+ return;
+
+ if (!banks || !count) {
+ dev_warn(adev->dev, "fail to generate cper records\n");
+ return;
+ }
+
+ /* UEs must be encoded into separate CPER entries */
+ if (type == ACA_SMU_TYPE_UE) {
+ struct aca_banks de_banks;
+
+ aca_banks_init(&de_banks);
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ r = aca_banks_add_bank(&de_banks, bank);
+ if (r)
+ dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
+ } else {
+ if (amdgpu_cper_generate_ue_record(adev, bank))
+ dev_warn(adev->dev, "fail to generate ue cper records\n");
+ }
+ }
+
+ if (!list_empty(&de_banks.list)) {
+ if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
+ dev_warn(adev->dev, "fail to generate de cper records\n");
+ }
+
+ aca_banks_release(&de_banks);
+ } else {
+ /*
+ * SMU_TYPE_CE banks are combined into 1 CPER entries,
+ * they could be CEs or DEs or both
+ */
+ if (amdgpu_cper_generate_ce_records(adev, banks, count))
+ dev_warn(adev->dev, "fail to generate ce cper records\n");
+ }
+}
+
+static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
+ bank_handler_t handler, struct ras_query_context *qctx, void *data)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ struct aca_banks banks;
+ u32 count = 0;
+ int ret;
+
+ if (list_empty(&aca->mgr.list))
+ return 0;
+
+ if (!aca_bank_should_update(adev, type))
+ return 0;
+
+ ret = aca_smu_get_valid_aca_count(adev, type, &count);
+ if (ret)
+ return ret;
+
+ if (!count)
+ return 0;
+
+ aca_banks_init(&banks);
+
+ ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
+ if (ret)
+ goto err_release_banks;
+
+ if (list_empty(&banks.list)) {
+ ret = 0;
+ goto err_release_banks;
+ }
+
+ ret = aca_dispatch_banks(&aca->mgr, &banks, type,
+ handler, data);
+ if (ret)
+ goto err_release_banks;
+
+ aca_banks_generate_cper(adev, type, &banks, count);
+
+err_release_banks:
+ aca_banks_release(&banks);
+
+ return ret;
+}
+
+static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data)
+{
+ struct aca_bank_info *info;
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ u64 count;
+
+ if (type >= ACA_ERROR_TYPE_COUNT)
+ return -EINVAL;
+
+ count = bank_error->count;
+ if (!count)
+ return 0;
+
+ info = &bank_error->info;
+ mcm_info.die_id = info->die_id;
+ mcm_info.socket_id = info->socket_id;
+
+ switch (type) {
+ case ACA_ERROR_TYPE_UE:
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count);
+ break;
+ case ACA_ERROR_TYPE_CE:
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count);
+ break;
+ case ACA_ERROR_TYPE_DEFERRED:
+ amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ struct aca_error *aerr = &error_cache->errors[type];
+ struct aca_bank_error *bank_error, *tmp;
+
+ mutex_lock(&aerr->lock);
+
+ if (list_empty(&aerr->list))
+ goto out_unlock;
+
+ list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) {
+ aca_log_aca_error_data(bank_error, type, err_data);
+ aca_bank_error_remove(aerr, bank_error);
+ }
+
+out_unlock:
+ mutex_unlock(&aerr->lock);
+
+ return 0;
+}
+
+static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
+{
+ enum aca_smu_type smu_type;
+ int ret;
+
+ switch (type) {
+ case ACA_ERROR_TYPE_UE:
+ smu_type = ACA_SMU_TYPE_UE;
+ break;
+ case ACA_ERROR_TYPE_CE:
+ case ACA_ERROR_TYPE_DEFERRED:
+ smu_type = ACA_SMU_TYPE_CE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* update aca bank to aca source error_cache first */
+ ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
+ if (ret)
+ return ret;
+
+ /* DEs may contain in CEs or UEs */
+ if (type != ACA_ERROR_TYPE_DEFERRED)
+ aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
+
+ return aca_log_aca_error(handle, type, err_data);
+}
+
+static bool aca_handle_is_valid(struct aca_handle *handle)
+{
+ if (!handle->mask || !list_empty(&handle->node))
+ return false;
+
+ return true;
+}
+
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ if (!handle || !err_data)
+ return -EINVAL;
+
+ if (aca_handle_is_valid(handle))
+ return -EOPNOTSUPP;
+
+ if ((type < 0) || (!(BIT(type) & handle->mask)))
+ return 0;
+
+ return __aca_get_error_data(adev, handle, type, err_data, qctx);
+}
+
+static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
+{
+ mutex_init(&aerr->lock);
+ INIT_LIST_HEAD(&aerr->list);
+ aerr->type = type;
+ aerr->nr_errors = 0;
+}
+
+static void aca_init_error_cache(struct aca_handle *handle)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ int type;
+
+ for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+ aca_error_init(&error_cache->errors[type], type);
+}
+
+static void aca_error_fini(struct aca_error *aerr)
+{
+ struct aca_bank_error *bank_error, *tmp;
+
+ mutex_lock(&aerr->lock);
+ if (list_empty(&aerr->list))
+ goto out_unlock;
+
+ list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
+ aca_bank_error_remove(aerr, bank_error);
+
+out_unlock:
+ mutex_destroy(&aerr->lock);
+}
+
+static void aca_fini_error_cache(struct aca_handle *handle)
+{
+ struct aca_error_cache *error_cache = &handle->error_cache;
+ int type;
+
+ for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+ aca_error_fini(&error_cache->errors[type]);
+}
+
+static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle,
+ const char *name, const struct aca_info *ras_info, void *data)
+{
+ memset(handle, 0, sizeof(*handle));
+
+ handle->adev = adev;
+ handle->mgr = mgr;
+ handle->name = name;
+ handle->hwip = ras_info->hwip;
+ handle->mask = ras_info->mask;
+ handle->bank_ops = ras_info->bank_ops;
+ handle->data = data;
+ aca_init_error_cache(handle);
+
+ INIT_LIST_HEAD(&handle->node);
+ list_add_tail(&handle->node, &mgr->list);
+ mgr->nr_handles++;
+
+ return 0;
+}
+
+static ssize_t aca_sysfs_read(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr);
+
+ /* NOTE: the aca cache will be auto cleared once read,
+ * So the driver should unify the query entry point, forward request to ras query interface directly */
+ return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data);
+}
+
+static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle)
+{
+ struct device_attribute *aca_attr = &handle->aca_attr;
+
+ snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name);
+ aca_attr->show = aca_sysfs_read;
+ aca_attr->attr.name = handle->attr_name;
+ aca_attr->attr.mode = S_IRUGO;
+ sysfs_attr_init(&aca_attr->attr);
+
+ return sysfs_add_file_to_group(&adev->dev->kobj,
+ &aca_attr->attr,
+ "ras");
+}
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+ const char *name, const struct aca_info *ras_info, void *data)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ int ret;
+
+ if (!amdgpu_aca_is_enabled(adev))
+ return 0;
+
+ ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
+ if (ret)
+ return ret;
+
+ return add_aca_sysfs(adev, handle);
+}
+
+static void remove_aca_handle(struct aca_handle *handle)
+{
+ struct aca_handle_manager *mgr = handle->mgr;
+
+ aca_fini_error_cache(handle);
+ list_del(&handle->node);
+ mgr->nr_handles--;
+}
+
+static void remove_aca_sysfs(struct aca_handle *handle)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct device_attribute *aca_attr = &handle->aca_attr;
+
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
+ &aca_attr->attr,
+ "ras");
+}
+
+void amdgpu_aca_remove_handle(struct aca_handle *handle)
+{
+ if (!handle || list_empty(&handle->node))
+ return;
+
+ remove_aca_sysfs(handle);
+ remove_aca_handle(handle);
+}
+
+static int aca_manager_init(struct aca_handle_manager *mgr)
+{
+ INIT_LIST_HEAD(&mgr->list);
+ mgr->nr_handles = 0;
+
+ return 0;
+}
+
+static void aca_manager_fini(struct aca_handle_manager *mgr)
+{
+ struct aca_handle *handle, *tmp;
+
+ if (list_empty(&mgr->list))
+ return;
+
+ list_for_each_entry_safe(handle, tmp, &mgr->list, node)
+ amdgpu_aca_remove_handle(handle);
+}
+
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
+{
+ return (adev->aca.is_enabled ||
+ adev->debug_enable_ras_aca);
+}
+
+int amdgpu_aca_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ int ret;
+
+ atomic_set(&aca->ue_update_flag, 0);
+
+ ret = aca_manager_init(&aca->mgr);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+void amdgpu_aca_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ aca_manager_fini(&aca->mgr);
+
+ atomic_set(&aca->ue_update_flag, 0);
+}
+
+int amdgpu_aca_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ atomic_set(&aca->ue_update_flag, 0);
+
+ return 0;
+}
+
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+
+ WARN_ON(aca->smu_funcs);
+ aca->smu_funcs = smu_funcs;
+}
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
+{
+ u64 ipid;
+ u32 instidhi, instidlo;
+
+ if (!bank || !info)
+ return -EINVAL;
+
+ ipid = bank->regs[ACA_REG_IDX_IPID];
+ info->hwid = ACA_REG__IPID__HARDWAREID(ipid);
+ info->mcatype = ACA_REG__IPID__MCATYPE(ipid);
+ /*
+ * Unfied DieID Format: SAASS. A:AID, S:Socket.
+ * Unfied DieID[4:4] = InstanceId[0:0]
+ * Unfied DieID[0:3] = InstanceIdHi[0:3]
+ */
+ instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid);
+ instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid);
+ info->die_id = ((instidhi >> 2) & 0x03);
+ info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03);
+
+ return 0;
+}
+
+static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!smu_funcs || !smu_funcs->parse_error_code)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->parse_error_code(adev, bank);
+}
+
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
+{
+ int i, error_code;
+
+ if (!bank || !err_codes)
+ return -EINVAL;
+
+ error_code = aca_bank_get_error_code(adev, bank);
+ if (error_code < 0)
+ return error_code;
+
+ for (i = 0; i < size; i++) {
+ if (err_codes[i] == error_code)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+ if (!smu_funcs || !smu_funcs->set_debug_mode)
+ return -EOPNOTSUPP;
+
+ return smu_funcs->set_debug_mode(adev, en);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ int ret;
+
+ ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false);
+ if (ret)
+ return ret;
+
+ dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off");
+
+ return 0;
+}
+
+static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx)
+{
+ struct aca_bank_info info;
+ int i, ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return;
+
+ seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE");
+ seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+ idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+ seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]);
+}
+
+struct aca_dump_context {
+ struct seq_file *m;
+ int idx;
+};
+
+static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_dump_context *ctx = (struct aca_dump_context *)data;
+
+ aca_dump_entry(ctx->m, bank, type, ctx->idx++);
+
+ return handler_aca_log_bank_error(handle, bank, type, NULL);
+}
+
+static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct aca_dump_context context = {
+ .m = m,
+ .idx = 0,
+ };
+
+ return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context);
+}
+
+static int aca_dump_ce_show(struct seq_file *m, void *unused)
+{
+ return aca_dump_show(m, ACA_SMU_TYPE_CE);
+}
+
+static int aca_dump_ce_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, aca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations aca_ce_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = aca_dump_ce_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int aca_dump_ue_show(struct seq_file *m, void *unused)
+{
+ return aca_dump_show(m, ACA_SMU_TYPE_UE);
+}
+
+static int aca_dump_ue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, aca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations aca_ue_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = aca_dump_ue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+ if (!root)
+ return;
+
+ debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);
+ debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops);
+ debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
new file mode 100644
index 000000000000..38c88897e1ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ACA_H__
+#define __AMDGPU_ACA_H__
+
+#include <linux/list.h>
+
+struct ras_err_data;
+struct ras_query_context;
+
+#define ACA_MAX_REGS_COUNT (16)
+
+#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
+#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62)
+#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61)
+#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60)
+#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59)
+#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58)
+#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57)
+#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56)
+#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55)
+#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53)
+#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46)
+#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45)
+#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44)
+#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43)
+#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40)
+#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32)
+#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24)
+#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16)
+#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0)
+
+#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48)
+#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44)
+#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32)
+#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0)
+
+#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48)
+#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32)
+
+#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0)
+
+/* NOTE: The following codes refers to the smu header file */
+#define ACA_EXTERROR_CODE_CE 0x3a
+#define ACA_EXTERROR_CODE_FAULT 0x3b
+
+#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE)
+#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
+#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
+
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */
+#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
+#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
+#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+
+#define ACA_BANK_ERR_IS_DEFFERED(bank) \
+ (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
+ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
+
+enum aca_reg_idx {
+ ACA_REG_IDX_CTL = 0,
+ ACA_REG_IDX_STATUS = 1,
+ ACA_REG_IDX_ADDR = 2,
+ ACA_REG_IDX_MISC0 = 3,
+ ACA_REG_IDX_CONFIG = 4,
+ ACA_REG_IDX_IPID = 5,
+ ACA_REG_IDX_SYND = 6,
+ ACA_REG_IDX_DESTAT = 8,
+ ACA_REG_IDX_DEADDR = 9,
+ ACA_REG_IDX_CTL_MASK = 10,
+ ACA_REG_IDX_COUNT = 16,
+};
+
+enum aca_hwip_type {
+ ACA_HWIP_TYPE_UNKNOW = -1,
+ ACA_HWIP_TYPE_PSP = 0,
+ ACA_HWIP_TYPE_UMC,
+ ACA_HWIP_TYPE_SMU,
+ ACA_HWIP_TYPE_PCS_XGMI,
+ ACA_HWIP_TYPE_COUNT,
+};
+
+enum aca_error_type {
+ ACA_ERROR_TYPE_INVALID = -1,
+ ACA_ERROR_TYPE_UE = 0,
+ ACA_ERROR_TYPE_CE,
+ ACA_ERROR_TYPE_DEFERRED,
+ ACA_ERROR_TYPE_COUNT
+};
+
+enum aca_smu_type {
+ ACA_SMU_TYPE_INVALID = -1,
+ ACA_SMU_TYPE_UE = 0,
+ ACA_SMU_TYPE_CE,
+ ACA_SMU_TYPE_COUNT,
+};
+
+struct aca_hwip {
+ int hwid;
+ int mcatype;
+};
+
+struct aca_bank {
+ enum aca_error_type aca_err_type;
+ enum aca_smu_type smu_err_type;
+ u64 regs[ACA_MAX_REGS_COUNT];
+};
+
+struct aca_bank_node {
+ struct aca_bank bank;
+ struct list_head node;
+};
+
+struct aca_banks {
+ int nr_banks;
+ struct list_head list;
+};
+
+struct aca_bank_info {
+ int die_id;
+ int socket_id;
+ int hwid;
+ int mcatype;
+};
+
+struct aca_bank_error {
+ struct list_head node;
+ struct aca_bank_info info;
+ u64 count;
+};
+
+struct aca_error {
+ struct list_head list;
+ struct mutex lock;
+ enum aca_error_type type;
+ int nr_errors;
+};
+
+struct aca_handle_manager {
+ struct list_head list;
+ int nr_handles;
+};
+
+struct aca_error_cache {
+ struct aca_error errors[ACA_ERROR_TYPE_COUNT];
+};
+
+struct aca_handle {
+ struct list_head node;
+ enum aca_hwip_type hwip;
+ struct amdgpu_device *adev;
+ struct aca_handle_manager *mgr;
+ struct aca_error_cache error_cache;
+ const struct aca_bank_ops *bank_ops;
+ struct device_attribute aca_attr;
+ char attr_name[64];
+ const char *name;
+ u32 mask;
+ void *data;
+};
+
+struct aca_bank_ops {
+ int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+ bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
+ void *data);
+};
+
+struct aca_smu_funcs {
+ int max_ue_bank_count;
+ int max_ce_bank_count;
+ int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
+ int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
+ int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
+ int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
+};
+
+struct amdgpu_aca {
+ struct aca_handle_manager mgr;
+ const struct aca_smu_funcs *smu_funcs;
+ atomic_t ue_update_flag;
+ bool is_enabled;
+};
+
+struct aca_info {
+ enum aca_hwip_type hwip;
+ const struct aca_bank_ops *bank_ops;
+ u32 mask;
+};
+
+int amdgpu_aca_init(struct amdgpu_device *adev);
+void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info);
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size);
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+ const char *name, const struct aca_info *aca_info, void *data);
+void amdgpu_aca_remove_handle(struct aca_handle *handle);
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx);
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 6d72355ac492..4926996f94da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -98,9 +98,9 @@ enum {
ACP_TILE_DSP2,
};
-static int acp_sw_init(void *handle)
+static int acp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->acp.parent = adev->dev;
@@ -112,9 +112,9 @@ static int acp_sw_init(void *handle)
return 0;
}
-static int acp_sw_fini(void *handle)
+static int acp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->acp.cgs_device)
amdgpu_cgs_destroy_device(adev->acp.cgs_device);
@@ -140,7 +140,7 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
@@ -157,7 +157,7 @@ static int acp_poweron(struct generic_pm_domain *genpd)
* 2. turn on acp clock
* 3. power on acp tiles
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -219,10 +219,10 @@ static const struct dmi_system_id acp_quirk_table[] = {
/**
* acp_hw_init - start and test ACP block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_init(void *handle)
+static int acp_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
u64 acp_base;
@@ -230,19 +230,13 @@ static int acp_hw_init(void *handle)
u32 count = 0;
struct i2s_platform_data *i2s_pdata = NULL;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- const struct amdgpu_ip_block *ip_block =
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
-
- if (!ip_block)
- return -EINVAL;
+ struct amdgpu_device *adev = ip_block->adev;
r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
} else if (r) {
return r;
@@ -503,18 +497,18 @@ failure:
/**
* acp_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_fini(void *handle)
+static int acp_hw_fini(struct amdgpu_ip_block *ip_block)
{
u32 val = 0;
u32 count = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* return early if no ACP */
if (!adev->acp.acp_genpd) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -565,67 +559,50 @@ static int acp_hw_fini(void *handle)
return 0;
}
-static int acp_suspend(void *handle)
+static int acp_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power up on suspend */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
-static int acp_resume(void *handle)
+static int acp_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power down again on resume */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
- return 0;
-}
-
-static int acp_early_init(void *handle)
-{
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
-static bool acp_is_idle(void *handle)
+static bool acp_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int acp_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int acp_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int acp_set_clockgating_state(void *handle,
+static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int acp_set_powergating_state(void *handle,
+static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0);
return 0;
}
static const struct amd_ip_funcs acp_ip_funcs = {
.name = "acp_ip",
- .early_init = acp_early_init,
- .late_init = NULL,
.sw_init = acp_sw_init,
.sw_fini = acp_sw_fini,
.hw_init = acp_hw_init,
@@ -633,8 +610,6 @@ static const struct amd_ip_funcs acp_ip_funcs = {
.suspend = acp_suspend,
.resume = acp_resume,
.is_idle = acp_is_idle,
- .wait_for_idle = acp_wait_for_idle,
- .soft_reset = acp_soft_reset,
.set_clockgating_state = acp_set_clockgating_state,
.set_powergating_state = acp_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2bca37044ad0..6c62e27b9800 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -68,7 +68,7 @@ struct amdgpu_acpi_xcc_info {
struct amdgpu_acpi_dev_info {
struct list_head list;
struct list_head xcc_list;
- uint16_t bdf;
+ uint32_t sbdf;
uint16_t supp_xcp_mode;
uint16_t xcp_mode;
uint16_t mem_mode;
@@ -147,6 +147,7 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
struct acpi_buffer *params)
{
acpi_status status;
+ union acpi_object *obj;
union acpi_object atif_arg_elements[2];
struct acpi_object_list atif_arg;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -169,16 +170,24 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
&buffer);
+ obj = (union acpi_object *)buffer.pointer;
- /* Fail only if calling the method fails and ATIF is supported */
- if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ /* Fail if calling the method fails */
+ if (ACPI_FAILURE(status)) {
DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n",
acpi_format_exception(status));
- kfree(buffer.pointer);
+ kfree(obj);
return NULL;
}
- return buffer.pointer;
+ if (obj->type != ACPI_TYPE_BUFFER) {
+ DRM_DEBUG_DRIVER("bad object returned from ATIF: %d\n",
+ obj->type);
+ kfree(obj);
+ return NULL;
+ }
+
+ return obj;
}
/**
@@ -383,6 +392,12 @@ static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
characteristics.min_input_signal;
atif->backlight_caps.max_input_signal =
characteristics.max_input_signal;
+ atif->backlight_caps.ac_level = characteristics.ac_level;
+ atif->backlight_caps.dc_level = characteristics.dc_level;
+ atif->backlight_caps.data_points = characteristics.number_of_points;
+ memcpy(atif->backlight_caps.luminance_data,
+ characteristics.data_points,
+ sizeof(atif->backlight_caps.luminance_data));
out:
kfree(info);
return err;
@@ -789,24 +804,25 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
return -EIO;
}
+ kfree(info);
return 0;
}
/**
* amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
* @ss_state: current smart shift event
*
* returns 0 on success,
* otherwise return error number.
*/
-int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state)
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+ enum amdgpu_ss ss_state)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
int r;
- if (!amdgpu_device_supports_smart_shift(dev))
+ if (!amdgpu_device_supports_smart_shift(adev))
return 0;
switch (ss_state) {
@@ -927,7 +943,7 @@ static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
#endif
}
-static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf)
{
struct amdgpu_acpi_dev_info *acpi_dev;
@@ -935,14 +951,14 @@ static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf)
return NULL;
list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
- if (acpi_dev->bdf == bdf)
+ if (acpi_dev->sbdf == sbdf)
return acpi_dev;
return NULL;
}
static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
- struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf)
+ struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf)
{
struct amdgpu_acpi_dev_info *tmp;
union acpi_object *obj;
@@ -955,7 +971,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
INIT_LIST_HEAD(&tmp->xcc_list);
INIT_LIST_HEAD(&tmp->list);
- tmp->bdf = bdf;
+ tmp->sbdf = sbdf;
obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
AMD_XCC_DSM_GET_SUPP_MODE, NULL,
@@ -1007,7 +1023,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
DRM_DEBUG_DRIVER(
"New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ",
- tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+ tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
tmp->tmr_base, tmp->tmr_size);
list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
*dev_info = tmp;
@@ -1023,7 +1039,7 @@ out:
}
static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
- u16 *bdf)
+ u32 *sbdf)
{
union acpi_object *obj;
acpi_status status;
@@ -1054,8 +1070,10 @@ static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
/* xcp node of this xcc [47:40] */
xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+ /* PF domain of this xcc [31:16] */
+ *sbdf = (obj->integer.value) & 0xFFFF0000;
/* PF bus/dev/fn of this xcc [63:48] */
- *bdf = (obj->integer.value >> 48) & 0xFFFF;
+ *sbdf |= (obj->integer.value >> 48) & 0xFFFF;
ACPI_FREE(obj);
obj = NULL;
@@ -1079,7 +1097,7 @@ static int amdgpu_acpi_enumerate_xcc(void)
struct acpi_device *acpi_dev;
char hid[ACPI_ID_LEN];
int ret, id;
- u16 bdf;
+ u32 sbdf;
INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
xa_init(&numa_info_xa);
@@ -1107,16 +1125,16 @@ static int amdgpu_acpi_enumerate_xcc(void)
xcc_info->handle = acpi_device_handle(acpi_dev);
acpi_dev_put(acpi_dev);
- ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf);
+ ret = amdgpu_acpi_get_xcc_info(xcc_info, &sbdf);
if (ret) {
kfree(xcc_info);
continue;
}
- dev_info = amdgpu_acpi_get_dev(bdf);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
if (!dev_info)
- ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf);
+ ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf);
if (ret == -ENOMEM)
return ret;
@@ -1136,13 +1154,14 @@ int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
u64 *tmr_size)
{
struct amdgpu_acpi_dev_info *dev_info;
- u16 bdf;
+ u32 sbdf;
if (!tmr_offset || !tmr_size)
return -EINVAL;
- bdf = pci_dev_id(adev->pdev);
- dev_info = amdgpu_acpi_get_dev(bdf);
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
if (!dev_info)
return -ENOENT;
@@ -1157,13 +1176,14 @@ int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
{
struct amdgpu_acpi_dev_info *dev_info;
struct amdgpu_acpi_xcc_info *xcc_info;
- u16 bdf;
+ u32 sbdf;
if (!numa_info)
return -EINVAL;
- bdf = pci_dev_id(adev->pdev);
- dev_info = amdgpu_acpi_get_dev(bdf);
+ sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+ sbdf |= pci_dev_id(adev->pdev);
+ dev_info = amdgpu_acpi_get_dev(sbdf);
if (!dev_info)
return -ENOENT;
@@ -1261,9 +1281,7 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps)
{
struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
- caps->caps_valid = atif->backlight_caps.caps_valid;
- caps->min_input_signal = atif->backlight_caps.min_input_signal;
- caps->max_input_signal = atif->backlight_caps.max_input_signal;
+ memcpy(caps, &atif->backlight_caps, sizeof(*caps));
}
/**
@@ -1389,14 +1407,11 @@ void amdgpu_acpi_detect(void)
struct pci_dev *pdev = NULL;
int ret;
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
- if (!atif->handle)
- amdgpu_atif_pci_probe_handle(pdev);
- if (!atcs->handle)
- amdgpu_atcs_pci_probe_handle(pdev);
- }
+ while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+ if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+ (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+ continue;
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
if (!atif->handle)
amdgpu_atif_pci_probe_handle(pdev);
if (!atcs->handle)
@@ -1493,6 +1508,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
if (adev->asic_type < CHIP_RAVEN)
return false;
+ if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+ return false;
+
/*
* If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally
* risky to do any special firmware-related preparations for entering
@@ -1514,5 +1532,35 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
return true;
#endif /* CONFIG_AMD_PMC */
}
-
#endif /* CONFIG_SUSPEND */
+
+#if IS_ENABLED(CONFIG_DRM_AMD_ISP)
+static const struct acpi_device_id isp_sensor_ids[] = {
+ { "OMNI5C10" },
+ { }
+};
+
+static int isp_match_acpi_device_ids(struct device *dev, const void *data)
+{
+ return acpi_match_device(data, dev) ? 1 : 0;
+}
+
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev)
+{
+ struct device *pdev __free(put_device) = NULL;
+ struct acpi_device *acpi_pdev;
+
+ pdev = bus_find_device(&platform_bus_type, NULL, isp_sensor_ids,
+ isp_match_acpi_device_ids);
+ if (!pdev)
+ return -EINVAL;
+
+ acpi_pdev = ACPI_COMPANION(pdev);
+ if (!acpi_pdev)
+ return -ENODEV;
+
+ *dev = acpi_pdev;
+
+ return 0;
+}
+#endif /* CONFIG_DRM_AMD_ISP */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
index a4d65973bf7c..80771b1480ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -100,6 +100,7 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000);
amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100);
amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000);
+ res.clock = clock;
return res;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 25d5fda5b243..a2879d2b7c8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -28,6 +28,7 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_dma_buf.h"
+#include <drm/ttm/ttm_tt.h>
#include <linux/module.h>
#include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"
@@ -132,11 +133,38 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
+ reset_context.src = adev->enable_mes ?
+ AMDGPU_RESET_SRC_MES :
+ AMDGPU_RESET_SRC_HWS;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
+static const struct drm_client_funcs kfd_client_funcs = {
+ .unregister = drm_client_release,
+};
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!adev->kfd.init_complete || adev->kfd.client.dev)
+ return 0;
+
+ ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+ &kfd_client_funcs);
+ if (ret) {
+ dev_err(adev->dev, "Failed to init DRM client: %d\n",
+ ret);
+ return ret;
+ }
+
+ drm_client_register(&adev->kfd.client);
+
+ return 0;
+}
+
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
@@ -164,7 +192,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
*/
bitmap_complement(gpu_resources.cp_queue_bitmap,
adev->gfx.mec_bitmap[0].queue_bitmap,
- KGD_MAX_QUEUES);
+ AMDGPU_MAX_QUEUES);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant
@@ -172,7 +200,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
- for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+ for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i)
clear_bit(i, gpu_resources.cp_queue_bitmap);
amdgpu_doorbell_get_kfd_info(adev,
@@ -220,28 +248,53 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
}
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
+{
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ kgd2kfd_stop_sched_all_nodes(adev->kfd.dev);
+ else
+ kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
+ }
+}
+
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)
+{
+ int r = 0;
+
+ if (adev->kfd.dev) {
+ if (adev->in_s0ix)
+ r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev);
+ else
+ r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
+ }
+
+ return r;
+}
+
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev)
{
if (adev->kfd.dev)
- kgd2kfd_suspend(adev->kfd.dev, run_pm);
+ kgd2kfd_suspend_process(adev->kfd.dev);
}
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev)
{
int r = 0;
if (adev->kfd.dev)
- r = kgd2kfd_resume(adev->kfd.dev, run_pm);
+ r = kgd2kfd_resume_process(adev->kfd.dev);
return r;
}
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
{
int r = 0;
if (adev->kfd.dev)
- r = kgd2kfd_pre_reset(adev->kfd.dev);
+ r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);
return r;
}
@@ -335,15 +388,18 @@ allocate_mem_reserve_bo_failed:
return r;
}
-void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
{
- struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
+ struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
- amdgpu_bo_reserve(bo, true);
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unpin(bo);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&(bo));
+ if (!bo || !*bo)
+ return;
+
+ (void)amdgpu_bo_reserve(*bo, true);
+ amdgpu_bo_kunmap(*bo);
+ amdgpu_bo_unpin(*bo);
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
}
int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
@@ -430,6 +486,9 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
else
mem_info->local_mem_size_private =
KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+ } else if (adev->apu_prefer_gtt) {
+ mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
+ mem_info->local_mem_size_private = 0;
} else {
mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
mem_info->local_mem_size_private = adev->gmc.real_vram_size -
@@ -467,28 +526,6 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
return 100;
}
-void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
-{
- struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
-
- memset(cu_info, 0, sizeof(*cu_info));
- if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
- return;
-
- cu_info->cu_active_number = acu_info.number;
- cu_info->cu_ao_mask = acu_info.ao_cu_mask;
- memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
- sizeof(cu_info->cu_bitmap));
- cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
- cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
- cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
- cu_info->simd_per_cu = acu_info.simd_per_cu;
- cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
- cu_info->wave_front_size = acu_info.wave_front_size;
- cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
- cu_info->lds_size = acu_info.lds_size;
-}
-
int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
@@ -545,48 +582,6 @@ out_put:
return r;
}
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
- struct amdgpu_device *src)
-{
- struct amdgpu_device *peer_adev = src;
- struct amdgpu_device *adev = dst;
- int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
-
- if (ret < 0) {
- DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
- adev->gmc.xgmi.physical_node_id,
- peer_adev->gmc.xgmi.physical_node_id, ret);
- ret = 0;
- }
- return (uint8_t)ret;
-}
-
-int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
- struct amdgpu_device *src,
- bool is_min)
-{
- struct amdgpu_device *adev = dst, *peer_adev;
- int num_links;
-
- if (adev->asic_type != CHIP_ALDEBARAN)
- return 0;
-
- if (src)
- peer_adev = src;
-
- /* num links returns 0 for indirect peers since indirect route is unknown. */
- num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
- if (num_links < 0) {
- DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
- adev->gmc.xgmi.physical_node_id,
- peer_adev->gmc.xgmi.physical_node_id, num_links);
- num_links = 0;
- }
-
- /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
- return (num_links * 16 * 25000)/BITS_PER_BYTE;
-}
-
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
{
int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
@@ -671,7 +666,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
goto err;
}
- ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job);
+ ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0);
if (ret)
goto err;
@@ -704,12 +699,20 @@ err:
void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
{
- /* Temporary workaround to fix issues observed in some
- * compute applications when GFXOFF is enabled on GFX11.
- */
- if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) {
+ enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
+ if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+ ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) ||
+ (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) {
pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
amdgpu_gfx_off_ctrl(adev, idle);
+ } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
+ (adev->flags & AMD_IS_APU)) {
+ /* Disable GFXOFF and PG. Temporary workaround
+ * to fix some compute applications issue on GFX9.
+ */
+ struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (gfx_block != NULL)
+ gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
}
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE,
@@ -724,48 +727,32 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
- uint16_t vmid)
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
{
- if (adev->family == AMDGPU_FAMILY_AI) {
- int i;
-
- for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
- } else {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
- }
-
- return 0;
+ return adev->have_atomics_support;
}
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid,
- enum TLB_FLUSH_TYPE flush_type,
- uint32_t inst)
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
{
- bool all_hub = false;
-
- if (adev->family == AMDGPU_FAMILY_AI ||
- adev->family == AMDGPU_FAMILY_RV)
- all_hub = true;
-
- return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
+ amdgpu_device_flush_hdp(adev, NULL);
}
-bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
{
- return adev->have_atomics_support;
+ return amdgpu_ras_get_fed_status(adev);
}
-void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
{
- amdgpu_device_flush_hdp(adev, NULL);
+ amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
}
-void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset)
{
- amdgpu_umc_poison_handler(adev, reset);
+ amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
}
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
@@ -784,34 +771,40 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
return 0;
}
-bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
-{
- if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
- return adev->gfx.ras->query_utcl2_poison_status(adev);
- else
- return false;
-}
-
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
{
- return kgd2kfd_check_and_lock_kfd();
+ return kgd2kfd_check_and_lock_kfd(adev->kfd.dev);
}
void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
{
- kgd2kfd_unlock_kfd();
+ kgd2kfd_unlock_kfd(adev->kfd.dev);
}
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
{
- u64 tmp;
s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
+ u64 tmp;
if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
- tmp = adev->gmc.mem_partitions[mem_id].size;
+ if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) {
+ /* In NPS1 mode, we should restrict the vram reporting
+ * tied to the ttm_pages_limit which is 1/2 of the system
+ * memory. For other partition modes, the HBM is uniformly
+ * divided already per numa node reported. If user wants to
+ * go beyond the default ttm limit and maximize the ROCm
+ * allocations, they can go up to max ttm and sysmem limits.
+ */
+
+ tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes();
+ } else {
+ tmp = adev->gmc.mem_partitions[mem_id].size;
+ }
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
return ALIGN_DOWN(tmp, PAGE_SIZE);
+ } else if (adev->apu_prefer_gtt) {
+ return (ttm_tt_pages_limit() << PAGE_SHIFT);
} else {
return adev->gmc.real_vram_size;
}
@@ -829,6 +822,9 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
if (!ring_funcs)
return -ENOMEM;
@@ -853,8 +849,14 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
- if (kiq_ring->sched.ready && !adev->job_hang)
- r = amdgpu_ring_test_helper(kiq_ring);
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
@@ -866,3 +868,45 @@ free_ring_funcs:
return r;
}
+
+/* Stop scheduling on KFD */
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id)
+{
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ return kgd2kfd_stop_sched(adev->kfd.dev, node_id);
+}
+
+/* Start scheduling on KFD */
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id)
+{
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ return kgd2kfd_start_sched(adev->kfd.dev, node_id);
+}
+
+/* check if there are KFD queues active */
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id)
+{
+ if (!adev->kfd.init_complete)
+ return false;
+
+ return kgd2kfd_compute_active(adev->kfd.dev, node_id);
+}
+
+/* Config CGTT_SQ_CLK_CTRL */
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable)
+{
+ int r;
+
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable,
+ reg_override_enable, perfmon_override_enable);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2fe9860725bd..9e120c934cc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -33,6 +33,7 @@
#include <linux/mmu_notifier.h>
#include <linux/memremap.h>
#include <kgd_kfd_interface.h>
+#include <drm/drm_client.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
#include "amdgpu_xcp.h"
@@ -46,6 +47,8 @@ enum TLB_FLUSH_TYPE {
};
struct amdgpu_device;
+struct kfd_process_device;
+struct amdgpu_reset_context;
enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
@@ -83,6 +86,7 @@ struct kgd_mem {
struct amdgpu_sync sync;
+ uint32_t gem_handle;
bool aql_queue;
bool is_imported;
};
@@ -103,7 +107,12 @@ struct amdgpu_kfd_dev {
bool init_complete;
struct work_struct reset_work;
- /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
+ /* Client for KFD BO GEM handle allocations */
+ struct drm_client_dev client;
+
+ /* HMM page migration MEMORY_DEVICE_PRIVATE mapping
+ * Must be last --ends in a flexible-array member.
+ */
struct dev_pagemap pgmap;
};
@@ -147,8 +156,10 @@ struct amdkfd_process_info {
int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
-void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
-int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc);
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev);
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry);
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
@@ -162,15 +173,11 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
- uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
- uint32_t inst);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
@@ -182,15 +189,20 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
#endif
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
unsigned long cur_seq, struct kgd_mem *mem);
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence);
#else
static inline
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
@@ -205,9 +217,8 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
}
static inline
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- return 0;
}
static inline
@@ -216,12 +227,19 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
{
return 0;
}
+static inline
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
+{
+ return 0;
+}
#endif
/* Shared API */
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
-void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj);
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj);
int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
void **mem_obj);
void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
@@ -235,23 +253,22 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
-void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev,
- struct kfd_cu_info *cu_info);
int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
struct amdgpu_device **dmabuf_adev,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags, int8_t *xcp_id);
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
- struct amdgpu_device *src);
-int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
- struct amdgpu_device *src,
- bool is_min);
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload);
int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
u32 inst);
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
+
/* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when
@@ -281,14 +298,10 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
(&((struct amdgpu_fpriv *) \
((struct drm_file *)(drm_priv))->driver_priv)->vm)
-int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
- struct amdgpu_vm *avm, u32 pasid);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
- void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
uint8_t xcp_id);
@@ -303,34 +316,39 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
void **kptr, uint64_t *size);
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
-int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
- struct dma_fence **ef);
+ struct dma_fence __rcu **ef);
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
struct kfd_vm_fault_info *info);
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
- struct dma_buf *dmabuf,
- uint64_t va, void *drm_priv,
- struct kgd_mem **mem, uint64_t *size,
- uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset);
int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
struct dma_buf **dmabuf);
void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config);
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
- bool reset);
-bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
+ enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem);
void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);
-bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
@@ -397,15 +415,26 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
bool kgd2kfd_device_init(struct kfd_dev *kfd,
const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
-int kgd2kfd_pre_reset(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc);
+int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc);
+void kgd2kfd_suspend_process(struct kfd_dev *kfd);
+int kgd2kfd_resume_process(struct kfd_dev *kfd);
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
-int kgd2kfd_check_and_lock_kfd(void);
-void kgd2kfd_unlock_kfd(void);
+int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd);
+void kgd2kfd_unlock_kfd(struct kfd_dev *kfd);
+int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd);
+int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault);
+
#else
static inline int kgd2kfd_init(void)
{
@@ -433,16 +462,26 @@ static inline void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
}
-static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
{
}
-static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
{
return 0;
}
-static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd)
+{
+}
+
+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
return 0;
}
@@ -467,13 +506,45 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
}
-static inline int kgd2kfd_check_and_lock_kfd(void)
+static inline int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
{
return 0;
}
-static inline void kgd2kfd_unlock_kfd(void)
+static inline void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
{
}
+
+static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return 0;
+}
+
+static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ return 0;
+}
+
+static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return false;
+}
+
+static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ return false;
+}
+
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index aff08321e976..7e9f7a280c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -189,6 +189,9 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 625db444df1c..1105a09e55dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/module.h>
-#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include "amdgpu.h"
@@ -200,7 +199,7 @@ int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -290,7 +289,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
- if (!(ring && ring->sched.thread))
+ if (!amdgpu_ring_sched_ready(ring))
continue;
/* stop secheduler and drain ring. */
@@ -300,7 +299,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
if (r)
goto out;
} else {
- drm_sched_start(&ring->sched, false);
+ drm_sched_start(&ring->sched, 0);
}
}
@@ -321,7 +320,7 @@ static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_wai
if (!down_read_trylock(&adev->reset_domain->sem))
return;
- amdgpu_amdkfd_suspend(adev, false);
+ amdgpu_amdkfd_suspend(adev, true);
if (suspend_resume_compute_scheduler(adev, true))
goto out;
@@ -334,7 +333,7 @@ static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_wai
out:
suspend_resume_compute_scheduler(adev, false);
- amdgpu_amdkfd_resume(adev, false);
+ amdgpu_amdkfd_resume(adev, true);
up_read(&adev->reset_domain->sem);
}
@@ -416,7 +415,10 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
- .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
+ .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 469785d33791..1ef758ac5076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
return NULL;
fence = container_of(f, struct amdgpu_amdkfd_fence, base);
- if (fence && f->ops == &amdkfd_fence_ops)
+ if (f->ops == &amdkfd_fence_ops)
return fence;
return NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index 490c8f5ddb60..89a45a9218f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -141,7 +141,7 @@ static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -300,14 +300,13 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
for (reg = hqd_base; reg <= hqd_end; reg++)
- WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+ WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL),
- data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO),
- lower_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI),
- upper_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR),
- lower_32_bits((uintptr_t)wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
- regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
upper_32_bits((uintptr_t)wptr));
- WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1),
- (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
- queue_id));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR),
- REG_SET_FIELD(m->cp_hqd_eop_rptr,
- CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+ REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
kgd_gfx_v9_release_queue(adev, inst);
@@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch(
VALID,
1);
- WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
- watch_address_high);
+ watch_address_high, inst);
- WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
- watch_address_low);
+ watch_address_low, inst);
return watch_address_cntl;
}
@@ -513,6 +509,17 @@ static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev,
return 0;
}
+static uint32_t kgd_gfx_v9_4_3_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ uint32_t reg_offset = get_sdma_rlc_reg_offset(adev, engine, queue);
+ uint32_t status = RREG32(regSDMA_RLC0_CONTEXT_STATUS + reg_offset);
+ uint32_t doorbell_off = RREG32(regSDMA_RLC0_DOORBELL_OFFSET + reg_offset);
+ bool is_active = !!REG_GET_FIELD(status, SDMA_RLC0_CONTEXT_STATUS, SELECTED);
+
+ return is_active ? doorbell_off >> 2 : 0;
+}
+
const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
@@ -534,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings =
kgd_gfx_v9_program_trap_handler_settings,
- .build_grace_period_packet_info =
- kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info =
+ kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
@@ -545,5 +552,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
kgd_gfx_v9_4_3_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
- .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch
+ .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_4_3_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 69810b3f1c63..0239114fb6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -352,7 +352,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -449,7 +449,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -881,6 +881,7 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
}
#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H)
uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
@@ -889,55 +890,93 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
uint32_t debug_vmid,
uint32_t inst)
{
+ /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the
+ * same values.
+ */
uint32_t watch_address_high;
uint32_t watch_address_low;
- uint32_t watch_address_cntl;
-
- watch_address_cntl = 0;
+ uint32_t tcp_watch_address_cntl;
+ uint32_t sq_watch_address_cntl;
watch_address_low = lower_32_bits(watch_address);
watch_address_high = upper_32_bits(watch_address) & 0xffff;
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = 0;
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
VMID,
debug_vmid);
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
MODE,
watch_mode);
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
MASK,
watch_address_mask >> 7);
+ sq_watch_address_cntl = 0;
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
/* Turning off this watch point until we set all the registers */
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
0);
-
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
- watch_address_cntl);
+ tcp_watch_address_cntl);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+
+ /* Program {TCP,SQ}_WATCH?_ADDR* */
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
-
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_low);
+
/* Enable the watch point */
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
1);
-
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
- watch_address_cntl);
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
return 0;
}
@@ -953,8 +992,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_cntl);
+
return 0;
}
+#undef TCP_WATCH_STRIDE
+#undef SQ_WATCH_STRIDE
/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
@@ -976,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
- /*
- * The CP cannont handle a 0 grace period input and will result in
- * an infinite grace period being set so set to 1 to prevent this.
- */
- if (grace_period == 0)
- grace_period = 1;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
@@ -1025,6 +1070,26 @@ static void program_trap_handler_settings(struct amdgpu_device *adev,
unlock_srbm(adev);
}
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ return 0;
+}
+
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ return 0;
+}
+
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -1050,6 +1115,9 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index 67bcaa3d4226..a4c607c88178 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -51,8 +51,20 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst);
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst,
+ unsigned int utimeout);
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index 8c8437a4383f..f2278a0937ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -338,7 +338,7 @@ static int hqd_dump_v10_3(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -435,7 +435,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+12)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -673,12 +673,15 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v10_set_address_watch,
- .clear_address_watch = kgd_gfx_v10_clear_address_watch
+ .clear_address_watch = kgd_gfx_v10_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index d67d003bada2..aaccf0b9947d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -323,7 +323,7 @@ static int hqd_dump_v11(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -420,7 +420,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (7+11+1+12+12)
- *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -658,7 +658,7 @@ static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 4))
*trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
@@ -786,6 +786,26 @@ static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
return 0;
}
+static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ return 0;
+}
+
+static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v11_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v11,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
@@ -808,5 +828,8 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v11_set_address_watch,
- .clear_address_watch = kgd_gfx_v11_clear_address_watch
+ .clear_address_watch = kgd_gfx_v11_clear_address_watch,
+ .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v11_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v11_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
new file mode 100644
index 000000000000..e0ceab400b2d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+ unlock_srbm(adev);
+}
+
+static int init_interrupts_v12(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst)
+{
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(adev, mec, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(adev);
+
+ return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
+
+ switch (engine_id) {
+ case 0:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ case 1:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+ break;
+ default:
+ BUG();
+ }
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
+}
+
+static int hqd_dump_v12(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(adev, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(adev);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int hqd_sdma_dump_v12(struct amdgpu_device *adev,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
+ uint32_t i = 0, reg;
+
+ const uint32_t first_reg = regSDMA0_QUEUE0_RB_CNTL;
+ const uint32_t last_reg = regSDMA0_QUEUE0_CONTEXT_STATUS;
+#undef HQD_N_REGS
+#define HQD_N_REGS (last_reg - first_reg + 1)
+
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = first_reg;
+ reg <= last_reg; reg++)
+ DUMP_REG(sdma_rlc_reg_offset + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int wave_control_execute_v12(struct amdgpu_device *adev,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd, uint32_t inst)
+{
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_enable_debug_trap(struct amdgpu_device *adev,
+ bool restore_dbg_registers,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_disable_debug_trap(struct amdgpu_device *adev,
+ bool keep_trap_enabled,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+ return data;
+}
+
+static int kgd_gfx_v12_validate_trap_override_request(struct amdgpu_device *adev,
+ uint32_t trap_override,
+ uint32_t *trap_mask_supported)
+{
+ *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+ KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+
+ if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+ trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+ return -EPERM;
+
+ return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+ uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+ uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+ uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+ KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+ KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+ KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+ KFD_DBG_TRAP_MASK_FP_INEXACT |
+ KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+ KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+ KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+ uint32_t ret;
+
+ ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+ ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+ return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+ uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+ if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+ ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+ return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_trap_override(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint32_t trap_override,
+ uint32_t trap_mask_bits,
+ uint32_t trap_mask_request,
+ uint32_t *trap_mask_prev,
+ uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+ uint32_t data = 0;
+
+ *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+ data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+ data = trap_mask_map_sw_to_hw(data);
+
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+ return data;
+}
+
+/* returns STALL_VMID or LAUNCH_MODE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_mode(struct amdgpu_device *adev,
+ uint8_t wave_launch_mode,
+ uint32_t vmid)
+{
+ uint32_t data = 0;
+ bool is_stall_mode = wave_launch_mode == 4;
+
+ if (is_stall_mode)
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID,
+ 1);
+ else
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE,
+ wave_launch_mode);
+
+ return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v12_set_address_watch(struct amdgpu_device *adev,
+ uint64_t watch_address,
+ uint32_t watch_address_mask,
+ uint32_t watch_id,
+ uint32_t watch_mode,
+ uint32_t debug_vmid,
+ uint32_t inst)
+{
+ uint32_t watch_address_high;
+ uint32_t watch_address_low;
+ uint32_t watch_address_cntl;
+
+ watch_address_cntl = 0;
+ watch_address_low = lower_32_bits(watch_address);
+ watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 7);
+
+ watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ TCP_WATCH0_CNTL,
+ VALID,
+ 1);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_high);
+
+ WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+ (watch_id * TCP_WATCH_STRIDE)),
+ watch_address_low);
+
+ return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev,
+ uint32_t watch_id)
+{
+ return 0;
+}
+
+static uint32_t kgd_gfx_v12_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
+const struct kfd2kgd_calls gfx_v12_kfd2kgd = {
+ .init_interrupts = init_interrupts_v12,
+ .hqd_dump = hqd_dump_v12,
+ .hqd_sdma_dump = hqd_sdma_dump_v12,
+ .wave_control_execute = wave_control_execute_v12,
+ .get_atc_vmid_pasid_mapping_info = NULL,
+ .enable_debug_trap = kgd_gfx_v12_enable_debug_trap,
+ .disable_debug_trap = kgd_gfx_v12_disable_debug_trap,
+ .validate_trap_override_request = kgd_gfx_v12_validate_trap_override_request,
+ .set_wave_launch_trap_override = kgd_gfx_v12_set_wave_launch_trap_override,
+ .set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode,
+ .set_address_watch = kgd_gfx_v12_set_address_watch,
+ .clear_address_watch = kgd_gfx_v12_clear_address_watch,
+ .hqd_sdma_get_doorbell = kgd_gfx_v12_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 6bf448ab3dff..df77558e03ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -214,7 +214,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -301,7 +301,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+4)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev)
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index cd06e4a6d1da..e68c0fa8d751 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -238,7 +238,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -324,7 +324,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+4+2+3+7)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev,
lower_32_bits(page_table_base));
}
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
get_atc_vmid_pasid_mapping_info,
.set_scratch_backing_va = set_scratch_backing_va,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 3c45a188b701..088d09cc7a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
{
kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config);
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases);
/* APE1 no longer exists on GFX9 */
kgd_gfx_v9_unlock_srbm(adev, inst);
@@ -239,14 +239,13 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
- WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
+ WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL),
- data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -275,25 +274,24 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO),
- lower_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI),
- upper_32_bits(guessed_wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR),
- lower_32_bits((uintptr_t)wptr));
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
- upper_32_bits((uintptr_t)wptr));
- WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
- (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO,
+ lower_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI,
+ upper_32_bits(guessed_wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ lower_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ upper_32_bits((uintptr_t)wptr));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
+ (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR),
- REG_SET_FIELD(m->cp_hqd_eop_rptr,
- CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR,
+ REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data);
kgd_gfx_v9_release_queue(adev, inst);
@@ -365,7 +363,7 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
(*dump)[i++][1] = RREG32(addr); \
} while (0)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -462,7 +460,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
+ *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
@@ -556,7 +554,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
break;
}
- WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@@ -677,7 +675,7 @@ void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
int i;
uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
stall ? 1 << vmid : 0);
else
@@ -908,8 +906,8 @@ void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t inst)
{
- *wait_times = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
- mmCP_IQ_WAIT_TIME2));
+ *wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst),
+ mmCP_IQ_WAIT_TIME2);
}
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
@@ -946,34 +944,34 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
*
* @adev: Handle of device whose registers are to be read
* @queue_idx: Index of queue in the queue-map bit-field
- * @wave_cnt: Output parameter updated with number of waves in flight
- * @vmid: Output parameter updated with VMID of queue whose wave count
- * is being collected
+ * @queue_cnt: Stores the wave count and doorbell offset for an active queue
* @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
- int *wave_cnt, int *vmid, uint32_t inst)
+ struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
{
int pipe_idx;
int queue_slot;
unsigned int reg_val;
-
+ unsigned int wave_cnt;
/*
* Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
* parameters to read out waves in flight. Get VMID if there are
* non-zero waves in flight.
*/
- *vmid = 0xFF;
- *wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
- soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
- reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
- queue_slot);
- *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
- if (*wave_cnt != 0)
- *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
- CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
+ soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
+ reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+ mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
+ wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+ if (wave_cnt != 0) {
+ queue_cnt->wave_cnt += wave_cnt;
+ queue_cnt->doorbell_off =
+ (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ }
}
/**
@@ -983,9 +981,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* or more queues running and submitting waves to compute units.
*
* @adev: Handle of device from which to get number of waves in flight
- * @pasid: Identifies the process for which this query call is invoked
- * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
- * belong to process with given pasid
+ * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
+ * for comparison later.
* @max_waves_per_cu: Output parameter updated with maximum number of waves
* possible per Compute Unit
* @inst: xcc's instance number on a multi-XCC setup
@@ -1013,111 +1010,92 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* number of waves that are in flight for the queue at specified index. The
* index ranges from 0 to 7.
*
- * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
- * of the wave(s).
+ * If non-zero waves are in flight, store the corresponding doorbell offset
+ * of the queue, along with the wave count.
*
- * Determine if VMID from above step maps to pasid provided as parameter. If
- * it matches agrregate the wave count. That the VMID will not match pasid is
- * a normal condition i.e. a device is expected to support multiple queues
- * from multiple proceses.
+ * Determine if the queue belongs to the process by comparing the doorbell
+ * offset against the process's queues. If it matches, aggregate the wave
+ * count for the process.
*
* Reading registers referenced above involves programming GRBM appropriately
*/
-void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst)
{
int qidx;
- int vmid;
int se_idx;
- int sh_idx;
int se_cnt;
- int sh_cnt;
- int wave_cnt;
int queue_map;
- int pasid_tmp;
int max_queue_cnt;
- int vmid_wave_cnt = 0;
- DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
+ DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
lock_spi_csq_mutexes(adev);
- soc15_grbm_select(adev, 1, 0, 0, 0, inst);
+ soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
/*
* Iterate through the shader engines and arrays of the device
* to get number of waves in flight
*/
bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
- KGD_MAX_QUEUES);
+ AMDGPU_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
- sh_cnt = adev->gfx.config.max_sh_per_se;
se_cnt = adev->gfx.config.max_shader_engines;
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
- for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
+ amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
+ queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
+
+ /*
+ * Assumption: queue map encodes following schema: four
+ * pipes per each micro-engine, with each pipe mapping
+ * eight queues. This schema is true for GFX9 devices
+ * and must be verified for newer device families
+ */
+ for (qidx = 0; qidx < max_queue_cnt; qidx++) {
+ /* Skip qeueus that are not associated with
+ * compute functions
+ */
+ if (!test_bit(qidx, cp_queue_bitmap))
+ continue;
- amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
- queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
+ if (!(queue_map & (1 << qidx)))
+ continue;
- /*
- * Assumption: queue map encodes following schema: four
- * pipes per each micro-engine, with each pipe mapping
- * eight queues. This schema is true for GFX9 devices
- * and must be verified for newer device families
- */
- for (qidx = 0; qidx < max_queue_cnt; qidx++) {
-
- /* Skip qeueus that are not associated with
- * compute functions
- */
- if (!test_bit(qidx, cp_queue_bitmap))
- continue;
-
- if (!(queue_map & (1 << qidx)))
- continue;
-
- /* Get number of waves in flight and aggregate them */
- get_wave_count(adev, qidx, &wave_cnt, &vmid,
- inst);
- if (wave_cnt != 0) {
- pasid_tmp =
- RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
- mmIH_VMID_0_LUT) + vmid);
- if (pasid_tmp == pasid)
- vmid_wave_cnt += wave_cnt;
- }
- }
+ /* Get number of waves in flight and aggregate them */
+ get_wave_count(adev, qidx, &cu_occupancy[qidx],
+ inst);
}
}
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
- soc15_grbm_select(adev, 0, 0, 0, 0, inst);
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
unlock_spi_csq_mutexes(adev);
/* Update the output parameters and return */
- *pasid_wave_cnt = vmid_wave_cnt;
*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
adev->gfx.cu_info.max_waves_per_simd;
}
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
- /*
- * The CP cannot handle a 0 grace period input and will result in
- * an infinite grace period being set so set to 1 to prevent this.
- */
- if (grace_period == 0)
- grace_period = 1;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
@@ -1146,6 +1124,112 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
kgd_gfx_v9_unlock_srbm(adev, inst);
}
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst)
+{
+ uint32_t low, high;
+ uint64_t queue_addr = 0;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+ if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+ goto unlock_out;
+
+ low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+ high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+ /* only concerned with user queues. */
+ if (!high)
+ goto unlock_out;
+
+ queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+unlock_out:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return queue_addr;
+}
+
+/* assume queue acquired */
+static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst,
+ unsigned int utimeout)
+{
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ while (true) {
+ uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ return 0;
+
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+
+ usleep_range(500, 1000);
+ }
+}
+
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t inst, unsigned int utimeout)
+{
+ uint32_t low, high, pipe_reset_data = 0;
+ uint64_t queue_addr = 0;
+
+ kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+ if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+ goto unlock_out;
+
+ low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+ high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+ /* only concerned with user queues. */
+ if (!high)
+ goto unlock_out;
+
+ queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+ pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n",
+ inst, pipe_id, queue_id);
+
+ /* assume previous dequeue request issued will take affect after reset */
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+ goto unlock_out;
+
+ pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id);
+
+ pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1);
+ pipe_reset_data = pipe_reset_data << pipe_id;
+
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data);
+ WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0);
+
+ if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+ queue_addr = 0;
+
+unlock_out:
+ pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
+ inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!");
+ amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+ kgd_gfx_v9_release_queue(adev, inst);
+
+ return queue_addr;
+}
+
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -1171,7 +1255,10 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+ .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index ce424615f59b..704452ca62f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
-void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
- int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+ struct kfd_cu_occupancy *cu_occupancy,
+ int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);
@@ -96,8 +97,20 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst);
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+ uint32_t pipe_id,
+ uint32_t queue_id,
+ uint32_t inst,
+ unsigned int utimeout);
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7d6daf8d2bfa..83020963dfde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -44,6 +44,7 @@
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29)
/*
* Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
@@ -117,11 +118,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
return;
si_meminfo(&si);
- mem = si.freeram - si.freehigh;
+ mem = si.totalram - si.totalhigh;
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6);
+ if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT)
+ kfd_mem_limit.max_system_mem_limit >>= 1;
+ else
+ kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT;
+
kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
@@ -165,6 +171,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
uint64_t vram_size = 0;
@@ -189,7 +197,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
return -EINVAL;
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
system_mem_needed = size;
ttm_mem_needed = size;
}
@@ -205,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit)
+ kfd_mem_limit.max_system_mem_limit) {
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+ if (!no_system_mem_limit) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
- if ((kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
- (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
- kfd_mem_limit.max_ttm_mem_limit) ||
- (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
- vram_size - reserved_for_pt)) {
+ if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+ kfd_mem_limit.max_ttm_mem_limit) {
ret = -ENOMEM;
goto release;
}
+ /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
+ * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
+ * VRAM check since ttm_mem_limit check already cover this allocation
+ */
+
+ if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
+ uint64_t vram_available =
+ vram_size - reserved_for_pt - reserved_for_ras -
+ atomic64_read(&adev->vram_pin_size);
+ if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
+
/* Update memory accounting by decreasing available system
* memory, TTM memory and GPU memory as computed above
*/
@@ -225,7 +249,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
"adev reference can't be null when vram is used");
if (adev && xcp_id >= 0) {
adev->kfd.vram_used[xcp_id] += vram_needed;
- adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
+ adev->kfd.vram_used_aligned[xcp_id] +=
+ adev->apu_prefer_gtt ?
vram_needed :
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
}
@@ -253,7 +278,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
if (adev) {
adev->kfd.vram_used[xcp_id] -= size;
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
adev->kfd.vram_used_aligned[xcp_id] -= size;
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
@@ -361,40 +386,32 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
return 0;
}
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+/**
+ * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences
+ * @bo: the BO where to remove the evictions fences from.
+ *
+ * This functions should only be used on release when all references to the BO
+ * are already dropped. We remove the eviction fence from the private copy of
+ * the dma_resv object here since that is what is used during release to
+ * determine of the BO is idle or not.
+ */
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- struct amdgpu_bo *root = bo;
- struct amdgpu_vm_bo_base *vm_bo;
- struct amdgpu_vm *vm;
- struct amdkfd_process_info *info;
- struct amdgpu_amdkfd_fence *ef;
- int ret;
-
- /* we can always get vm_bo from root PD bo.*/
- while (root->parent)
- root = root->parent;
-
- vm_bo = root->vm_bo;
- if (!vm_bo)
- return 0;
-
- vm = vm_bo->vm;
- if (!vm)
- return 0;
+ struct dma_resv *resv = &bo->tbo.base._resv;
+ struct dma_fence *fence, *stub;
+ struct dma_resv_iter cursor;
- info = vm->process_info;
- if (!info || !info->eviction_fence)
- return 0;
+ dma_resv_assert_held(resv);
- ef = container_of(dma_fence_get(&info->eviction_fence->base),
- struct amdgpu_amdkfd_fence, base);
-
- BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
- dma_resv_unlock(bo->tbo.base.resv);
+ stub = dma_fence_get_stub();
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ if (!to_amdgpu_amdkfd_fence(fence))
+ continue;
- dma_fence_put(&ef->base);
- return ret;
+ dma_resv_replace_fences(resv, fence->context, stub,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ dma_fence_put(stub);
}
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
@@ -407,6 +424,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
"Called with userptr BO"))
return -EINVAL;
+ /* bo has been pinned, not need validate it */
+ if (bo->tbo.pin_count)
+ return 0;
+
amdgpu_bo_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -419,6 +440,32 @@ validate_fail:
return ret;
}
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+ uint32_t domain,
+ struct dma_fence *fence)
+{
+ int ret = amdgpu_bo_reserve(bo, false);
+
+ if (ret)
+ return ret;
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret)
+ goto unreserve_out;
+
+ ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+ if (ret)
+ goto unreserve_out;
+
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+ amdgpu_bo_unreserve(bo);
+
+ return ret;
+}
+
static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
{
return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
@@ -431,13 +478,15 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
* again. Page directories are only updated after updating page
* tables.
*/
-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_bo *pd = vm->root.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL);
+ ret = amdgpu_vm_validate(adev, vm, ticket,
+ amdgpu_amdkfd_validate_vm_bo, NULL);
if (ret) {
pr_err("failed to validate PT BOs\n");
return ret;
@@ -458,10 +507,11 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update);
+ return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
}
-static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct kgd_mem *mem)
{
uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
AMDGPU_VM_MTYPE_DEFAULT;
@@ -471,7 +521,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
- return amdgpu_gem_va_map_flags(adev, mapping_flags);
+ return mapping_flags;
}
/**
@@ -562,12 +612,6 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
- int ret;
-
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (ret)
- return ret;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -689,7 +733,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
return;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
@@ -733,12 +777,12 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
enum dma_data_direction dir;
if (unlikely(!ttm->sg)) {
- pr_err("SG Table of BO is UNEXPECTEDLY NULL");
+ pr_debug("SG Table of BO is NULL");
return;
}
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -774,13 +818,17 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
{
if (!mem->dmabuf) {
- struct dma_buf *ret = amdgpu_gem_prime_export(
- &mem->bo->tbo.base,
+ struct amdgpu_device *bo_adev;
+ struct dma_buf *dmabuf;
+
+ bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file,
+ mem->gem_handle,
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
- DRM_RDWR : 0);
- if (IS_ERR(ret))
- return PTR_ERR(ret);
- mem->dmabuf = ret;
+ DRM_RDWR : 0);
+ if (IS_ERR(dmabuf))
+ return PTR_ERR(dmabuf);
+ mem->dmabuf = dmabuf;
}
return 0;
@@ -828,6 +876,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
+ struct amdgpu_bo_va *bo_va;
bool same_hive = false;
int i, ret;
@@ -844,7 +893,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
* if peer device has large BAR. In contrast, access over xGMI is
* allowed for both small and large BAR configurations of peer device
*/
- if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
+ if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -866,9 +915,10 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
(amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
- same_hive) {
+ (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) ||
+ same_hive) {
/* Mappings on the local GPU, or VRAM mappings in the
- * local hive, or userptr mapping can reuse dma map
+ * local hive, or userptr, or GTT mapping can reuse dma map
* address space share the original BO
*/
attachment[i]->type = KFD_MEM_ATT_SHARED;
@@ -914,7 +964,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
}
- attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+ if (!bo_va)
+ bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+ else
+ ++bo_va->ref_count;
+ attachment[i]->bo_va = bo_va;
amdgpu_bo_unreserve(bo[i]);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
@@ -923,7 +978,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
goto unwind;
}
attachment[i]->va = va;
- attachment[i]->pte_flags = get_pte_flags(adev, mem);
+ attachment[i]->pte_flags = get_pte_flags(adev, vm, mem);
attachment[i]->adev = adev;
list_add(&attachment[i]->list, &mem->attachments);
@@ -937,8 +992,9 @@ unwind:
if (!attachment[i])
continue;
if (attachment[i]->bo_va) {
- amdgpu_bo_reserve(bo[i], true);
- amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+ (void)amdgpu_bo_reserve(bo[i], true);
+ if (--attachment[i]->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del(&attachment[i]->list);
}
@@ -955,7 +1011,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
pr_debug("\t remove VA 0x%llx in entry %p\n",
attachment->va, attachment);
- amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
+ if (--attachment->bo_va->ref_count == 0)
+ amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
drm_gem_object_put(&bo->tbo.base);
list_del(&attachment->list);
kfree(attachment);
@@ -1032,9 +1089,12 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
return 0;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, &range);
if (ret) {
- pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ if (ret == -EAGAIN)
+ pr_debug("Failed to get user pages, try again\n");
+ else
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
}
@@ -1043,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@@ -1096,14 +1159,14 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->n_vms = 1;
ctx->sync = &mem->sync;
- drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&ctx->exec) {
ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
drm_exec_retry_on_contention(&ctx->exec);
if (unlikely(ret))
goto error;
- ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base);
+ ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&ctx->exec);
if (unlikely(ret))
goto error;
@@ -1135,7 +1198,8 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
int ret;
ctx->sync = &mem->sync;
- drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&ctx->exec) {
ctx->n_vms = 0;
list_for_each_entry(entry, &mem->attachments, list) {
@@ -1188,7 +1252,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
return ret;
}
-static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
+static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync)
{
@@ -1196,13 +1260,18 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
struct amdgpu_device *adev = entry->adev;
struct amdgpu_vm *vm = bo_va->base.vm;
- amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+ if (bo_va->queue_refcount) {
+ pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount);
+ return -EBUSY;
+ }
+
+ (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
- amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+ (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
- kfd_mem_dmaunmap_attachment(mem, entry);
+ return 0;
}
static int update_gpuvm_pte(struct kgd_mem *mem,
@@ -1224,7 +1293,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
}
static int map_bo_to_gpuvm(struct kgd_mem *mem,
@@ -1257,17 +1326,19 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
update_gpuvm_pte_failed:
unmap_bo_from_gpuvm(mem, entry, sync);
+ kfd_mem_dmaunmap_attachment(mem, entry);
return ret;
}
-static int process_validate_vms(struct amdkfd_process_info *process_info)
+static int process_validate_vms(struct amdkfd_process_info *process_info,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_vm *peer_vm;
int ret;
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
- ret = vm_validate_pt_pd_bos(peer_vm);
+ ret = vm_validate_pt_pd_bos(peer_vm, ticket);
if (ret)
return ret;
}
@@ -1344,7 +1415,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
amdgpu_amdkfd_restore_userptr_worker);
*process_info = info;
- *ef = dma_fence_get(&info->eviction_fence->base);
}
vm->process_info = *process_info;
@@ -1353,7 +1423,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
ret = amdgpu_bo_reserve(vm->root.bo, true);
if (ret)
goto reserve_pd_fail;
- ret = vm_validate_pt_pd_bos(vm);
+ ret = vm_validate_pt_pd_bos(vm, NULL);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
goto validate_pd_fail;
@@ -1375,6 +1445,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
list_add_tail(&vm->vm_list_node,
&(vm->process_info->vm_list_head));
vm->process_info->n_vms++;
+ if (ef)
+ *ef = dma_fence_get(&vm->process_info->eviction_fence->base);
mutex_unlock(&vm->process_info->lock);
return 0;
@@ -1386,10 +1458,7 @@ validate_pd_fail:
reserve_pd_fail:
vm->process_info = NULL;
if (info) {
- /* Two fence references: one in info and one in *ef */
dma_fence_put(&info->eviction_fence->base);
- dma_fence_put(*ef);
- *ef = NULL;
*process_info = NULL;
put_pid(info->pid);
create_evict_fence_fail:
@@ -1419,13 +1488,30 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
if (unlikely(ret))
return ret;
- ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+ /*
+ * If bo is not contiguous on VRAM, move to system memory first to ensure
+ * we can get contiguous VRAM space after evicting other BOs.
+ */
+ if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+ struct ttm_operation_ctx ctx = { true, false };
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret)) {
+ pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret);
+ goto out;
+ }
+ }
+ }
+
+ ret = amdgpu_bo_pin(bo, domain);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);
amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+out:
amdgpu_bo_unreserve(bo);
-
return ret;
}
@@ -1449,27 +1535,6 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
amdgpu_bo_unreserve(bo);
}
-int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
- struct amdgpu_vm *avm, u32 pasid)
-
-{
- int ret;
-
- /* Free the original amdgpu allocated pasid,
- * will be replaced with kfd allocated pasid.
- */
- if (avm->pasid) {
- amdgpu_pasid_free(avm->pasid);
- amdgpu_vm_set_pasid(adev, avm, 0);
- }
-
- ret = amdgpu_vm_set_pasid(adev, avm, pasid);
- if (ret)
- return ret;
-
- return 0;
-}
-
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
struct amdgpu_vm *avm,
void **process_info,
@@ -1527,27 +1592,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
}
}
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
- void *drm_priv)
-{
- struct amdgpu_vm *avm;
-
- if (WARN_ON(!adev || !drm_priv))
- return;
-
- avm = drm_priv_to_vm(drm_priv);
-
- pr_debug("Releasing process vm %p\n", avm);
-
- /* The original pasid of amdgpu vm has already been
- * released during making a amdgpu vm to a compute vm
- * The current pasid is managed by kfd and will be
- * released on kfd process destroy. Set amdgpu pasid
- * to 0 to avoid duplicate release.
- */
- amdgpu_vm_release_compute(adev, avm);
-}
-
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1583,7 +1627,8 @@ int amdgpu_amdkfd_criu_resume(void *p)
goto out_unlock;
}
WRITE_ONCE(pinfo->block_mmu_notifications, false);
- schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+ queue_delayed_work(system_freezable_wq,
+ &pinfo->restore_userptr_work, 0);
out_unlock:
mutex_unlock(&pinfo->lock);
@@ -1595,16 +1640,23 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
ssize_t available;
uint64_t vram_available, system_mem_available, ttm_mem_available;
spin_lock(&kfd_mem_limit.mem_limit_lock);
- vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
- - adev->kfd.vram_used_aligned[xcp_id]
- - atomic64_read(&adev->vram_pin_size)
- - reserved_for_pt;
+ if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id];
+ else
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id]
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt
+ - reserved_for_ras;
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
system_mem_available = no_system_mem_limit ?
kfd_mem_limit.max_system_mem_limit :
kfd_mem_limit.max_system_mem_limit -
@@ -1652,7 +1704,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- if (adev->gmc.is_app_apu) {
+ if (adev->apu_prefer_gtt) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
@@ -1660,6 +1712,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+
+ /* For contiguous VRAM allocation */
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
+ alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
}
xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
0 : fpriv->xcp_id;
@@ -1690,6 +1746,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+ alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
@@ -1737,6 +1795,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
pr_debug("Failed to allow vma node access. ret %d\n", ret);
goto err_node_allow;
}
+ ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+ if (ret)
+ goto err_gem_handle_create;
bo = gem_to_amdgpu_bo(gobj);
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
@@ -1768,6 +1829,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ } else {
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_validate_bo;
}
if (offset)
@@ -1777,7 +1847,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
err_pin_bo:
+err_validate_bo:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
/* Don't unreserve system mem limit twice */
@@ -1785,6 +1858,7 @@ err_node_allow:
err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
+ amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
if (gobj)
drm_gem_object_put(gobj);
@@ -1850,18 +1924,16 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (unlikely(ret))
return ret;
- /* The eviction fence should be removed by the last unmap.
- * TODO: Log an error condition if the bo still has the eviction fence
- * attached
- */
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
/* Remove from VM internal data structures */
- list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
+ list_for_each_entry_safe(entry, tmp, &mem->attachments, list) {
+ kfd_mem_dmaunmap_attachment(mem, entry);
kfd_mem_detach(entry);
+ }
ret = unreserve_bo_and_vms(&ctx, false, false);
@@ -1883,7 +1955,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (size) {
if (!is_imported &&
(mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
- (adev->gmc.is_app_apu &&
+ (adev->apu_prefer_gtt &&
mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
*size = bo_size;
else
@@ -1892,8 +1964,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the BO*/
drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
- if (mem->dmabuf)
+ drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+ if (mem->dmabuf) {
dma_buf_put(mem->dmabuf);
+ mem->dmabuf = NULL;
+ }
mutex_destroy(&mem->lock);
/* If this releases the last reference, it will end up calling
@@ -1976,23 +2051,10 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
- ret = vm_validate_pt_pd_bos(avm);
+ ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto out_unreserve;
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
- /* Validate BO only once. The eviction fence gets added to BO
- * the first time it is mapped. Validate will wait for all
- * background evictions to complete.
- */
- ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
- if (ret) {
- pr_debug("Validate failed\n");
- goto out_unreserve;
- }
- }
-
list_for_each_entry(entry, &mem->attachments, list) {
if (entry->bo_va->base.vm != avm || entry->is_mapped)
continue;
@@ -2019,10 +2081,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
mem->mapped_to_gpu_memory);
}
- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
- dma_resv_add_fence(bo->tbo.base.resv,
- &avm->process_info->eviction_fence->base,
- DMA_RESV_USAGE_BOOKKEEP);
ret = unreserve_bo_and_vms(&ctx, false, false);
goto out;
@@ -2035,11 +2093,41 @@ out:
return ret;
}
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+{
+ struct kfd_mem_attachment *entry;
+ struct amdgpu_vm *vm;
+ int ret;
+
+ vm = drm_priv_to_vm(drm_priv);
+
+ mutex_lock(&mem->lock);
+
+ ret = amdgpu_bo_reserve(mem->bo, true);
+ if (ret)
+ goto out;
+
+ list_for_each_entry(entry, &mem->attachments, list) {
+ if (entry->bo_va->base.vm != vm)
+ continue;
+ if (entry->bo_va->base.bo->tbo.ttm &&
+ !entry->bo_va->base.bo->tbo.ttm->sg)
+ continue;
+
+ kfd_mem_dmaunmap_attachment(mem, entry);
+ }
+
+ amdgpu_bo_unreserve(mem->bo);
+out:
+ mutex_unlock(&mem->lock);
+
+ return ret;
+}
+
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct amdkfd_process_info *process_info = avm->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_mem_attachment *entry;
struct bo_vm_reservation_context ctx;
@@ -2056,7 +2144,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
goto unreserve_out;
}
- ret = vm_validate_pt_pd_bos(avm);
+ ret = vm_validate_pt_pd_bos(avm, NULL);
if (unlikely(ret))
goto unreserve_out;
@@ -2072,7 +2160,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
entry->va, entry->va + bo_size, entry);
- unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+ if (ret)
+ goto unreserve_out;
+
entry->is_mapped = false;
mem->mapped_to_gpu_memory--;
@@ -2080,15 +2171,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
mem->mapped_to_gpu_memory);
}
- /* If BO is unmapped from all VMs, unfence it. It can be evicted if
- * required.
- */
- if (mem->mapped_to_gpu_memory == 0 &&
- !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
- !mem->bo->tbo.pin_count)
- amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence);
-
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
out:
@@ -2115,13 +2197,13 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
/**
* amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
- * @adev: Device to which allocated BO belongs
* @bo: Buffer object to be mapped
+ * @bo_gart: Return bo reference
*
* Before return, bo reference count is incremented. To release the reference and unpin/
* unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
*/
-int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart)
{
int ret;
@@ -2148,7 +2230,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b
amdgpu_bo_unreserve(bo);
- bo = amdgpu_bo_ref(bo);
+ *bo_gart = amdgpu_bo_ref(bo);
return 0;
@@ -2238,7 +2320,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
struct amdgpu_bo *bo = mem->bo;
- amdgpu_bo_reserve(bo, true);
+ (void)amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
@@ -2255,34 +2337,26 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
return 0;
}
-int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
- struct dma_buf *dma_buf,
- uint64_t va, void *drm_priv,
- struct kgd_mem **mem, uint64_t *size,
- uint64_t *mmap_offset)
+static int import_obj_create(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf,
+ struct drm_gem_object *obj,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
- struct drm_gem_object *obj;
struct amdgpu_bo *bo;
int ret;
- obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
bo = gem_to_amdgpu_bo(obj);
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT))) {
+ AMDGPU_GEM_DOMAIN_GTT)))
/* Only VRAM and GTT BOs are supported */
- ret = -EINVAL;
- goto err_put_obj;
- }
+ return -EINVAL;
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
- if (!*mem) {
- ret = -ENOMEM;
- goto err_put_obj;
- }
+ if (!*mem)
+ return -ENOMEM;
ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
if (ret)
@@ -2307,8 +2381,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
(*mem)->dmabuf = dma_buf;
(*mem)->bo = bo;
(*mem)->va = va;
- (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
- AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
+ !adev->apu_prefer_gtt ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
@@ -2316,12 +2391,57 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
amdgpu_sync_create(&(*mem)->sync);
(*mem)->is_imported = true;
+ mutex_lock(&avm->process_info->lock);
+ if (avm->process_info->eviction_fence &&
+ !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+ &avm->process_info->eviction_fence->base);
+ mutex_unlock(&avm->process_info->lock);
+ if (ret)
+ goto err_remove_mem;
+
return 0;
+err_remove_mem:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&obj->vma_node, drm_priv);
err_free_mem:
kfree(*mem);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+ uint64_t va, void *drm_priv,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
+{
+ struct drm_gem_object *obj;
+ uint32_t handle;
+ int ret;
+
+ ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+ &handle);
+ if (ret)
+ return ret;
+ obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+ if (!obj) {
+ ret = -EINVAL;
+ goto err_release_handle;
+ }
+
+ ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+ mmap_offset);
+ if (ret)
+ goto err_put_obj;
+
+ (*mem)->gem_handle = handle;
+
+ return 0;
+
err_put_obj:
drm_gem_object_put(obj);
+err_release_handle:
+ drm_gem_handle_delete(adev->kfd.client.file, handle);
return ret;
}
@@ -2372,10 +2492,14 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
/* First eviction, stop the queues */
r = kgd2kfd_quiesce_mm(mni->mm,
KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
- if (r)
+
+ if (r && r != -ESRCH)
pr_err("Failed to quiesce KFD\n");
- schedule_delayed_work(&process_info->restore_userptr_work,
- msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ if (r != -ESRCH)
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
mutex_unlock(&process_info->notifier_lock);
@@ -2444,8 +2568,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
}
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
- &mem->range);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
if (ret) {
pr_debug("Failed %d to get user pages\n", ret);
@@ -2459,9 +2582,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
if (ret != -EFAULT)
return ret;
+ /* If applications unmap memory before destroying the userptr
+ * from the KFD, trigger a segmentation fault in VM debug mode.
+ */
+ if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+ struct kfd_process *p;
+
+ pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
+ pid_nr(process_info->pid), mem->va);
+
+ // Send GPU VM fault to user space
+ p = kfd_lookup_process_by_pid(process_info->pid);
+ if (p) {
+ kfd_signal_vm_fault_event_with_userptr(p, mem->va);
+ kfd_unref_process(p);
+ }
+ }
+
ret = 0;
}
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+
mutex_lock(&process_info->notifier_lock);
/* Mark the BO as valid unless it was invalidated
@@ -2500,7 +2642,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
amdgpu_sync_create(&sync);
- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, 0, 0);
/* Reserve all BOs and page tables for validation */
drm_exec_until_all_locked(&exec) {
/* Reserve all the page directories */
@@ -2525,7 +2667,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- ret = process_validate_vms(process_info);
+ ret = process_validate_vms(process_info, NULL);
if (ret)
goto unreserve_out;
@@ -2597,7 +2739,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
/* keep mem without hmm range at userptr_inval_list */
if (!mem->range)
- continue;
+ continue;
/* Only check mem with hmm range associated */
valid = amdgpu_ttm_tt_get_user_pages_done(
@@ -2697,7 +2839,8 @@ unlock_out:
/* If validation failed, reschedule another attempt */
if (evicted_bos) {
- schedule_delayed_work(&process_info->restore_userptr_work,
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm);
@@ -2706,6 +2849,23 @@ unlock_out:
put_task_struct(usertask);
}
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
+ struct dma_fence *new_ef)
+{
+ struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+ /* protected by process_info->lock */);
+
+ /* If we're replacing an unsignaled eviction fence, that fence will
+ * never be signaled, and if anyone is still waiting on that fence,
+ * they will hang forever. This should never happen. We should only
+ * replace the fence in restore_work that only gets scheduled after
+ * eviction work signaled the fence.
+ */
+ WARN_ONCE(!dma_fence_is_signaled(old_ef),
+ "Replacing unsignaled eviction fence");
+ dma_fence_put(old_ef);
+}
+
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*
@@ -2724,12 +2884,11 @@ unlock_out:
* 7. Add fence to all PD and PT BOs.
* 8. Unreserve all BOs
*/
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
{
struct amdkfd_process_info *process_info = info;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem;
- struct amdgpu_amdkfd_fence *new_fence;
struct list_head duplicate_save;
struct amdgpu_sync sync_obj;
unsigned long failed_size = 0;
@@ -2741,14 +2900,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
mutex_lock(&process_info->lock);
- drm_exec_init(&exec, 0);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
drm_exec_retry_on_contention(&exec);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ pr_err("Locking VM PD failed, ret: %d\n", ret);
goto ttm_reserve_fail;
+ }
}
/* Reserve all BOs and page tables/directory. Add all BOs from
@@ -2761,31 +2922,21 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
gobj = &mem->bo->tbo.base;
ret = drm_exec_prepare_obj(&exec, gobj, 1);
drm_exec_retry_on_contention(&exec);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret);
goto ttm_reserve_fail;
+ }
}
}
amdgpu_sync_create(&sync_obj);
- /* Validate PDs and PTs */
- ret = process_validate_vms(process_info);
- if (ret)
- goto validate_map_fail;
-
- ret = process_sync_pds_resv(process_info, &sync_obj);
- if (ret) {
- pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
- goto validate_map_fail;
- }
-
- /* Validate BOs and map them to GPUVM (update VM page tables). */
+ /* Validate BOs managed by KFD */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list) {
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
- struct kfd_mem_attachment *attachment;
struct dma_resv_iter cursor;
struct dma_fence *fence;
@@ -2804,19 +2955,35 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
- ret = amdgpu_sync_fence(&sync_obj, fence);
+ ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
}
}
+ }
+
+ if (failed_size)
+ pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+ /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+ * validations above would invalidate DMABuf imports again.
+ */
+ ret = process_validate_vms(process_info, &exec.ticket);
+ if (ret) {
+ pr_debug("Validating VMs failed, ret: %d\n", ret);
+ goto validate_map_fail;
+ }
+
+ /* Update mappings managed by KFD. */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
+
list_for_each_entry(attachment, &mem->attachments, list) {
if (!attachment->is_mapped)
continue;
- if (attachment->bo_va->base.bo->tbo.pin_count)
- continue;
-
kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, &sync_obj);
if (ret) {
@@ -2826,8 +2993,31 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
}
- if (failed_size)
- pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+ /* Update mappings not managed by KFD */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_device *adev = amdgpu_ttm_adev(
+ peer_vm->root.bo->tbo.bdev);
+
+ struct amdgpu_fpriv *fpriv =
+ container_of(peer_vm, struct amdgpu_fpriv, vm);
+
+ ret = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle PRT moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+
+ ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+ if (ret) {
+ dev_dbg(adev->dev,
+ "Memory eviction: handle moved failed, pid %8d. Try again.\n",
+ pid_nr(process_info->pid));
+ goto validate_map_fail;
+ }
+ }
/* Update page directories */
ret = process_update_pds(process_info, &sync_obj);
@@ -2836,25 +3026,47 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
+ /* Sync with fences on all the page tables. They implicitly depend on any
+ * move fences from amdgpu_vm_handle_moved above.
+ */
+ ret = process_sync_pds_resv(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+ goto validate_map_fail;
+ }
+
/* Wait for validate and PT updates to finish */
amdgpu_sync_wait(&sync_obj, false);
- /* Release old eviction fence and create new one, because fence only
- * goes from unsignaled to signaled, fence cannot be reused.
- * Use context and mm from the old fence.
+ /* The old eviction fence may be unsignaled if restore happens
+ * after a GPU reset or suspend/resume. Keep the old fence in that
+ * case. Otherwise release the old eviction fence and create new
+ * one, because fence only goes from unsignaled to signaled once
+ * and cannot be reused. Use context and mm from the old fence.
+ *
+ * If an old eviction fence signals after this check, that's OK.
+ * Anyone signaling an eviction fence must stop the queues first
+ * and schedule another restore worker.
*/
- new_fence = amdgpu_amdkfd_fence_create(
+ if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+ struct amdgpu_amdkfd_fence *new_fence =
+ amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
process_info->eviction_fence->mm,
NULL);
- if (!new_fence) {
- pr_err("Failed to create eviction fence\n");
- ret = -ENOMEM;
- goto validate_map_fail;
+
+ if (!new_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto validate_map_fail;
+ }
+ dma_fence_put(&process_info->eviction_fence->base);
+ process_info->eviction_fence = new_fence;
+ replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+ } else {
+ WARN_ONCE(*ef != &process_info->eviction_fence->base,
+ "KFD eviction fence doesn't match KGD process_info");
}
- dma_fence_put(&process_info->eviction_fence->base);
- process_info->eviction_fence = new_fence;
- *ef = dma_fence_get(&new_fence->base);
/* Attach new eviction fence to all BOs except pinned ones */
list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
@@ -2865,7 +3077,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
&process_info->eviction_fence->base,
DMA_RESV_USAGE_BOOKKEEP);
}
- /* Attach eviction fence to PD / PT BOs */
+ /* Attach eviction fence to PD / PT BOs and DMABuf imports */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
struct amdgpu_bo *bo = peer_vm->root.bo;
@@ -2995,12 +3207,13 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
return 0;
}
-bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem)
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem)
{
+ struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv);
struct kfd_mem_attachment *entry;
list_for_each_entry(entry, &mem->attachments, list) {
- if (entry->is_mapped && entry->adev == adev)
+ if (entry->is_mapped && entry->bo_va->base.vm == vm)
return true;
}
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 73ee14f7a9a4..763f2b8dcf13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -36,13 +36,6 @@
#include "atombios_encoders.h"
#include "bif/bif_4_1_d.h"
-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev,
- ATOM_GPIO_I2C_ASSIGMENT *gpio,
- u8 index)
-{
-
-}
-
static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
{
struct amdgpu_i2c_bus_rec i2c;
@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
-
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
if (gpio->sucI2cId.ucAccess == id) {
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
break;
@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
if (i2c.valid) {
@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
}
}
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ ATOM_GPIO_I2C_ASSIGMENT *gpio;
+ struct amdgpu_i2c_bus_rec i2c;
+ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+ struct _ATOM_GPIO_I2C_INFO *i2c_info;
+ uint16_t data_offset, size;
+ int i, num_indices;
+ char stmp[32];
+
+ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+ gpio = &i2c_info->asGPIO_Info[0];
+ for (i = 0; i < num_indices; i++) {
+ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+
+ if (i2c.valid && i2c.i2c_id == i2c_id) {
+ sprintf(stmp, "OEM 0x%x", i2c.i2c_id);
+ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
+ break;
+ }
+ gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+ }
+ }
+}
+
struct amdgpu_gpio_rec
amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
u8 id)
@@ -686,7 +706,6 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
}
adev->clock.dp_extclk =
le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
- adev->clock.current_dispclk = adev->clock.default_dispclk;
adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock);
if (adev->clock.max_pixel_clock == 0)
@@ -1018,7 +1037,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_div = args.v3.ucPostDiv;
dividers->enable_post_div = (args.v3.ucCntlFlag &
@@ -1038,7 +1059,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_div = args.v5.ucPostDiv;
dividers->enable_post_div = (args.v5.ucCntlFlag &
@@ -1056,7 +1079,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
/* fusion */
args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->post_divider = dividers->post_div = args.v4.ucPostDiv;
dividers->real_clock = le32_to_cpu(args.v4.ulClock);
@@ -1067,7 +1092,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
args.v6_in.ulClock.ulComputeClockFlag = clock_type;
args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv);
dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac);
@@ -1109,7 +1136,9 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
if (strobe_mode)
args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac);
mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv);
@@ -1135,8 +1164,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock)
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock)
{
SET_ENGINE_CLOCK_PS_ALLOCATION args;
int index = GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings);
@@ -1151,7 +1180,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
if (mem_clock)
args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ return amdgpu_atom_execute_table(adev->mode_info.atom_context, index,
+ (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
@@ -1205,7 +1235,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v2.ucVoltageMode = 0;
args.v2.usVoltageLevel = 0;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
*voltage = le16_to_cpu(args.v2.usVoltageLevel);
break;
@@ -1214,7 +1246,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+ index, (uint32_t *)&args, sizeof(args)))
+ return -EINVAL;
*voltage = le16_to_cpu(args.v3.usVoltageLevel);
break;
@@ -1476,6 +1510,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
(u32)le32_to_cpu(*((u32 *)reg_data + j));
j++;
} else if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_EQU_PREV) {
+ if (i == 0)
+ continue;
reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
}
@@ -1776,19 +1812,46 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
struct atom_context *ctx = adev->mode_info.atom_context;
- return sysfs_emit(buf, "%s\n", ctx->vbios_ver_str);
+ return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
+}
+
+static ssize_t amdgpu_atombios_get_vbios_build(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ return sysfs_emit(buf, "%s\n", ctx->build_num);
}
static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
NULL);
+static DEVICE_ATTR(vbios_build, 0444, amdgpu_atombios_get_vbios_build, NULL);
static struct attribute *amdgpu_vbios_version_attrs[] = {
- &dev_attr_vbios_version.attr,
- NULL
+ &dev_attr_vbios_version.attr, &dev_attr_vbios_build.attr, NULL
};
+static umode_t amdgpu_vbios_version_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr,
+ int index)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ if (attr == &dev_attr_vbios_build.attr && !strlen(ctx->build_num))
+ return 0;
+
+ return attr->mode;
+}
+
const struct attribute_group amdgpu_vbios_version_attr_group = {
- .attrs = amdgpu_vbios_version_attrs
+ .attrs = amdgpu_vbios_version_attrs,
+ .is_visible = amdgpu_vbios_version_attrs_is_visible,
};
int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 0811474e8fd3..867bc5c5ce67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
uint8_t id);
void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id);
bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
@@ -163,8 +164,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
bool strobe_mode,
struct atom_mpll_param *mpll_param);
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock);
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock);
bool
amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index fb2681dd6b33..c7d32fb216e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -34,6 +34,7 @@ union firmware_info {
struct atom_firmware_info_v3_2 v32;
struct atom_firmware_info_v3_3 v33;
struct atom_firmware_info_v3_4 v34;
+ struct atom_firmware_info_v3_5 v35;
};
/*
@@ -211,6 +212,7 @@ union igp_info {
struct atom_integrated_system_info_v1_11 v11;
struct atom_integrated_system_info_v1_12 v12;
struct atom_integrated_system_info_v2_1 v21;
+ struct atom_integrated_system_info_v2_3 v23;
};
union umc_info {
@@ -279,6 +281,9 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
case ATOM_DGPU_VRAM_TYPE_GDDR6:
vram_type = AMDGPU_VRAM_TYPE_GDDR6;
break;
+ case ATOM_DGPU_VRAM_TYPE_HBM3E:
+ vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+ break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;
@@ -288,7 +293,6 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
return vram_type;
}
-
int
amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type,
@@ -299,6 +303,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
u16 data_offset, size;
union igp_info *igp_info;
union vram_info *vram_info;
+ union umc_info *umc_info;
union vram_module *vram_module;
u8 frev, crev;
u8 mem_type;
@@ -310,10 +315,16 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (adev->flags & AMD_IS_APU)
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
integratedsysteminfo);
- else
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- vram_info);
-
+ else {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info);
+ break;
+ default:
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info);
+ }
+ }
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@@ -359,6 +370,20 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
break;
+ case 3:
+ mem_channel_number = igp_info->v23.umachannelnumber;
+ if (!mem_channel_number)
+ mem_channel_number = 1;
+ mem_type = igp_info->v23.memorytype;
+ if (mem_type == LpDdr5MemType)
+ mem_channel_width = 32;
+ else
+ mem_channel_width = 64;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
default:
return -EINVAL;
}
@@ -367,123 +392,150 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
return -EINVAL;
}
} else {
- vram_info = (union vram_info *)
- (mode_info->atom_context->bios + data_offset);
- module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
- if (frev == 3) {
- switch (crev) {
- /* v30 */
- case 0:
- vram_module = (union vram_module *)vram_info->v30.vram_module;
- mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- mem_type = vram_info->v30.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_info->v30.channel_num;
- mem_channel_width = vram_info->v30.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- break;
- default:
- return -EINVAL;
- }
- } else if (frev == 2) {
- switch (crev) {
- /* v23 */
- case 3:
- if (module_id > vram_info->v23.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v23.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
- }
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- /* v24 */
- case 4:
- if (module_id > vram_info->v24.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v24.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v10.vram_module_size);
- i++;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+
+ if (frev == 4) {
+ switch (crev) {
+ case 0:
+ mem_channel_number = le32_to_cpu(umc_info->v40.channel_num);
+ mem_type = le32_to_cpu(umc_info->v40.vram_type);
+ mem_channel_width = le32_to_cpu(umc_info->v40.channel_width);
+ mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ break;
+ default:
+ return -EINVAL;
}
- mem_type = vram_module->v10.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v10.channel_num;
- mem_channel_width = vram_module->v10.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- /* v25 */
- case 5:
- if (module_id > vram_info->v25.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v25.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v11.vram_module_size);
- i++;
+ } else
+ return -EINVAL;
+ break;
+ default:
+ vram_info = (union vram_info *)
+ (mode_info->atom_context->bios + data_offset);
+
+ module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
+ if (frev == 3) {
+ switch (crev) {
+ /* v30 */
+ case 0:
+ vram_module = (union vram_module *)vram_info->v30.vram_module;
+ mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ mem_type = vram_info->v30.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_info->v30.channel_num;
+ mem_channel_width = vram_info->v30.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * 16;
+ break;
+ default:
+ return -EINVAL;
}
- mem_type = vram_module->v11.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v11.channel_num;
- mem_channel_width = vram_module->v11.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- /* v26 */
- case 6:
- if (module_id > vram_info->v26.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v26.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
+ } else if (frev == 2) {
+ switch (crev) {
+ /* v23 */
+ case 3:
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v24 */
+ case 4:
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v25 */
+ case 5:
+ if (module_id > vram_info->v25.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v25.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v11.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v11.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v11.channel_num;
+ mem_channel_width = vram_module->v11.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v26 */
+ case 6:
+ if (module_id > vram_info->v26.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v26.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ default:
+ return -EINVAL;
}
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- default:
+ } else {
+ /* invalid frev */
return -EINVAL;
}
- } else {
- /* invalid frev */
- return -EINVAL;
}
}
-
}
return 0;
@@ -500,9 +552,10 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
u16 data_offset, size;
union umc_info *umc_info;
u8 frev, crev;
- bool ecc_default_enabled = false;
+ bool mem_ecc_enabled = false;
u8 umc_config;
u32 umc_config1;
+ adev->ras_default_ecc_enabled = false;
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info);
@@ -514,20 +567,22 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
switch (crev) {
case 1:
umc_config = le32_to_cpu(umc_info->v31.umc_config);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 2:
umc_config = le32_to_cpu(umc_info->v32.umc_config);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 3:
umc_config = le32_to_cpu(umc_info->v33.umc_config);
umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
- ecc_default_enabled =
+ mem_ecc_enabled =
((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
default:
/* unsupported crev */
@@ -536,9 +591,12 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
} else if (frev == 4) {
switch (crev) {
case 0:
+ umc_config = le32_to_cpu(umc_info->v40.umc_config);
umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
default:
/* unsupported crev */
@@ -550,7 +608,7 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
}
}
- return ecc_default_enabled;
+ return mem_ecc_enabled;
}
/*
@@ -872,6 +930,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
fw_reserved_fb_size =
(firmware_info->v34.fw_reserved_size_in_kb << 10);
break;
+ case 5:
+ fw_reserved_fb_size =
+ (firmware_info->v35.fw_reserved_size_in_kb << 10);
+ break;
default:
fw_reserved_fb_size = 0;
break;
@@ -941,5 +1003,6 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
return -EINVAL;
}
- return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1);
+ return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1,
+ sizeof(asic_init_ps_v2_1));
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index c7eb2caec65a..649b5530d8ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -36,7 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
-bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address);
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address);
bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 375f02002579..3893e6fc2f03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -89,18 +89,6 @@ bool amdgpu_is_atpx_hybrid(void)
return amdgpu_atpx_priv.atpx.is_hybrid;
}
-bool amdgpu_atpx_dgpu_req_power_for_displays(void)
-{
- return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
-}
-
-#if defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void)
-{
- return amdgpu_atpx_priv.dhandle;
-}
-#endif
-
/**
* amdgpu_atpx_call - call an ATPX method
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index edc6377ec5ff..199693369c7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
for (i = 0; i < n; i++) {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
- false, false, false);
+ false, false, 0);
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 38ccec913f00..00e96419fcda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "atom.h"
+#include <linux/device.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/acpi.h>
@@ -46,48 +47,58 @@
/* Check if current bios is an ATOM BIOS.
* Return true if it is ATOM BIOS. Otherwise, return false.
*/
-static bool check_atom_bios(uint8_t *bios, size_t size)
+static bool check_atom_bios(struct amdgpu_device *adev, size_t size)
{
uint16_t tmp, bios_header_start;
+ uint8_t *bios = adev->bios;
if (!bios || size < 0x49) {
- DRM_INFO("vbios mem is null or mem size is wrong\n");
+ dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n");
return false;
}
if (!AMD_IS_VALID_VBIOS(bios)) {
- DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]);
+ dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0],
+ bios[1]);
return false;
}
bios_header_start = bios[0x48] | (bios[0x49] << 8);
if (!bios_header_start) {
- DRM_INFO("Can't locate bios header\n");
+ dev_dbg(adev->dev, "Can't locate VBIOS header\n");
return false;
}
tmp = bios_header_start + 4;
if (size < tmp) {
- DRM_INFO("BIOS header is broken\n");
+ dev_dbg(adev->dev, "VBIOS header is broken\n");
return false;
}
if (!memcmp(bios + tmp, "ATOM", 4) ||
!memcmp(bios + tmp, "MOTA", 4)) {
- DRM_DEBUG("ATOMBIOS detected\n");
+ dev_dbg(adev->dev, "ATOMBIOS detected\n");
return true;
}
return false;
}
+void amdgpu_bios_release(struct amdgpu_device *adev)
+{
+ kfree(adev->bios);
+ adev->bios = NULL;
+ adev->bios_size = 0;
+}
+
/* If you boot an IGP board with a discrete card as the primary,
* the IGP rom is not accessible via the rom bar as the IGP rom is
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
+ * For SR-IOV, the vbios image is also put in VRAM in the VF.
*/
-static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
+static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
uint8_t __iomem *bios;
resource_size_t vram_base;
@@ -116,8 +127,8 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
iounmap(bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -144,8 +155,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
pci_unmap_rom(adev->pdev, bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -184,8 +195,8 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
/* read complete BIOS */
amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
- if (!check_atom_bios(adev->bios, len)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, len)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -214,14 +225,15 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, romlen);
iounmap(bios);
- if (!check_atom_bios(adev->bios, romlen))
+ if (!check_atom_bios(adev, romlen))
goto free_bios;
adev->bios_size = romlen;
return true;
free_bios:
- kfree(adev->bios);
+ amdgpu_bios_release(adev);
+
return false;
}
@@ -283,11 +295,15 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
acpi_status status;
bool found = false;
- /* ATRM is for the discrete card only */
- if (adev->flags & AMD_IS_APU)
+ /* ATRM is for on-platform devices only */
+ if (dev_is_removable(&adev->pdev->dev))
return false;
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+ if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+ (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+ continue;
+
dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
continue;
@@ -299,20 +315,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
}
}
- if (!found) {
- while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
- dhandle = ACPI_HANDLE(&pdev->dev);
- if (!dhandle)
- continue;
-
- status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
- if (ACPI_SUCCESS(status)) {
- found = true;
- break;
- }
- }
- }
-
if (!found)
return false;
pci_dev_put(pdev);
@@ -332,8 +334,8 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
break;
}
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = size;
@@ -348,11 +350,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
{
- if (adev->flags & AMD_IS_APU)
- return igp_read_bios_from_vram(adev);
- else
- return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
- false : amdgpu_asic_read_disabled_bios(adev);
+ return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
+ false : amdgpu_asic_read_disabled_bios(adev);
}
#ifdef CONFIG_ACPI
@@ -400,8 +399,8 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
vhdr->ImageLength,
GFP_KERNEL);
- if (!check_atom_bios(adev->bios, vhdr->ImageLength)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, vhdr->ImageLength)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = vhdr->ImageLength;
@@ -419,7 +418,43 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
}
#endif
-bool amdgpu_get_bios(struct amdgpu_device *adev)
+static bool amdgpu_get_bios_apu(struct amdgpu_device *adev)
+{
+ if (amdgpu_acpi_vfct_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios_from_vram(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
+ return false;
+
+success:
+ return true;
+}
+
+static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev)
+{
+ struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE];
+
+ return (res->flags & IORESOURCE_ROM_SHADOW);
+}
+
+static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
@@ -431,14 +466,33 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
- if (igp_read_bios_from_vram(adev)) {
+ /* this is required for SR-IOV */
+ if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}
- if (amdgpu_read_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
- goto success;
+ if (amdgpu_prefer_rom_resource(adev)) {
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
+
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ } else {
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
}
if (amdgpu_read_bios_from_rom(adev)) {
@@ -451,19 +505,28 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
- if (amdgpu_read_platform_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from platform\n");
- goto success;
- }
-
dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
return false;
success:
- adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
return true;
}
+bool amdgpu_get_bios(struct amdgpu_device *adev)
+{
+ bool found;
+
+ if (adev->flags & AMD_IS_APU)
+ found = amdgpu_get_bios_apu(adev);
+ else
+ found = amdgpu_get_bios_dgpu(adev);
+
+ if (found)
+ adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
+
+ return found;
+}
+
/* helper function for soc15 and onwards to read bios from rom */
bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index b6298e901cbd..66fb37b64388 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -75,27 +75,17 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
struct amdgpu_bo_list_entry *array;
struct amdgpu_bo_list *list;
uint64_t total_size = 0;
- size_t size;
unsigned i;
int r;
- if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
- / sizeof(struct amdgpu_bo_list_entry))
- return -EINVAL;
-
- size = sizeof(struct amdgpu_bo_list);
- size += num_entries * sizeof(struct amdgpu_bo_list_entry);
- list = kvmalloc(size, GFP_KERNEL);
+ list = kvzalloc(struct_size(list, entries, num_entries), GFP_KERNEL);
if (!list)
return -ENOMEM;
kref_init(&list->refcount);
- list->gds_obj = NULL;
- list->gws_obj = NULL;
- list->oa_obj = NULL;
- array = amdgpu_bo_list_array_entry(list, 0);
- memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
+ list->num_entries = num_entries;
+ array = list->entries;
for (i = 0; i < num_entries; ++i) {
struct amdgpu_bo_list_entry *entry;
@@ -140,7 +130,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
}
list->first_userptr = first_userptr;
- list->num_entries = num_entries;
sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
amdgpu_bo_list_entry_cmp, NULL);
@@ -183,6 +172,7 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
}
rcu_read_unlock();
+ *result = NULL;
return -ENOENT;
}
@@ -194,43 +184,36 @@ void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
struct drm_amdgpu_bo_list_entry **info_param)
{
- const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+ const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+ const uint32_t bo_info_size = in->bo_info_size;
+ const uint32_t bo_number = in->bo_number;
struct drm_amdgpu_bo_list_entry *info;
- int r;
-
- info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
- if (!info)
- return -ENOMEM;
/* copy the handle array from userspace to a kernel buffer */
- r = -EFAULT;
- if (likely(info_size == in->bo_info_size)) {
- unsigned long bytes = in->bo_number *
- in->bo_info_size;
-
- if (copy_from_user(info, uptr, bytes))
- goto error_free;
-
+ if (likely(info_size == bo_info_size)) {
+ info = vmemdup_array_user(uptr, bo_number, info_size);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
} else {
- unsigned long bytes = min(in->bo_info_size, info_size);
+ const uint32_t bytes = min(bo_info_size, info_size);
unsigned i;
- memset(info, 0, in->bo_number * info_size);
- for (i = 0; i < in->bo_number; ++i) {
- if (copy_from_user(&info[i], uptr, bytes))
- goto error_free;
+ info = kvmalloc_array(bo_number, info_size, GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
- uptr += in->bo_info_size;
+ memset(info, 0, bo_number * info_size);
+ for (i = 0; i < bo_number; ++i, uptr += bo_info_size) {
+ if (copy_from_user(&info[i], uptr, bytes)) {
+ kvfree(info);
+ return -EFAULT;
+ }
}
}
*info_param = info;
return 0;
-
-error_free:
- kvfree(info);
- return r;
}
int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 26c01cb131f2..a716c9886c74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo *bo;
struct amdgpu_bo_va *bo_va;
uint32_t priority;
- struct page **user_pages;
struct hmm_range *range;
bool user_invalidated;
};
@@ -55,6 +54,8 @@ struct amdgpu_bo_list {
/* Protect access during command submission.
*/
struct mutex bo_list_mutex;
+
+ struct amdgpu_bo_list_entry entries[] __counted_by(num_entries);
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
@@ -69,22 +70,14 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev,
size_t num_entries,
struct amdgpu_bo_list **list);
-static inline struct amdgpu_bo_list_entry *
-amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index)
-{
- struct amdgpu_bo_list_entry *array = (void *)&list[1];
-
- return &array[index];
-}
-
#define amdgpu_bo_list_for_each_entry(e, list) \
- for (e = amdgpu_bo_list_array_entry(list, 0); \
- e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ for (e = list->entries; \
+ e != &list->entries[list->num_entries]; \
++e)
#define amdgpu_bo_list_for_each_userptr_entry(e, list) \
- for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \
- e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ for (e = &list->entries[list->first_userptr]; \
+ e != &list->entries[list->num_entries]; \
++e)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index b8280be6225d..004a6a9d6b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -213,6 +213,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
struct amdgpu_firmware_info *ucode;
id = fw_type_convert(cgs_device, type);
+ if (id >= AMDGPU_UCODE_ID_MAXIMUM)
+ return -EINVAL;
+
ucode = &adev->firmware.ucode[id];
if (ucode->fw == NULL)
return -EINVAL;
@@ -249,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
if (!adev->pm.fw) {
switch (adev->asic_type) {
- case CHIP_TAHITI:
- strcpy(fw_name, "radeon/tahiti_smc.bin");
- break;
- case CHIP_PITCAIRN:
- if ((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6810) ||
- (adev->pdev->device == 0x6811))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/pitcairn_smc.bin");
- }
- break;
- case CHIP_VERDE:
- if (((adev->pdev->device == 0x6820) &&
- ((adev->pdev->revision == 0x81) ||
- (adev->pdev->revision == 0x83))) ||
- ((adev->pdev->device == 0x6821) &&
- ((adev->pdev->revision == 0x83) ||
- (adev->pdev->revision == 0x87))) ||
- ((adev->pdev->revision == 0x87) &&
- ((adev->pdev->device == 0x6823) ||
- (adev->pdev->device == 0x682b)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/verde_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/verde_smc.bin");
- }
- break;
- case CHIP_OLAND:
- if (((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6600) ||
- (adev->pdev->device == 0x6604) ||
- (adev->pdev->device == 0x6605) ||
- (adev->pdev->device == 0x6610))) ||
- ((adev->pdev->revision == 0x83) &&
- (adev->pdev->device == 0x6610))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/oland_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/oland_smc.bin");
- }
- break;
- case CHIP_HAINAN:
- if (((adev->pdev->revision == 0x81) &&
- (adev->pdev->device == 0x6660)) ||
- ((adev->pdev->revision == 0x83) &&
- ((adev->pdev->device == 0x6660) ||
- (adev->pdev->device == 0x6663) ||
- (adev->pdev->device == 0x6665) ||
- (adev->pdev->device == 0x6667)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/hainan_k_smc.bin");
- } else if ((adev->pdev->revision == 0xc3) &&
- (adev->pdev->device == 0x6665)) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/banks_k_2_smc.bin");
- } else {
- strcpy(fw_name, "radeon/hainan_smc.bin");
- }
- break;
case CHIP_BONAIRE:
if ((adev->pdev->revision == 0x80) ||
(adev->pdev->revision == 0x81) ||
(adev->pdev->device == 0x665f)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/bonaire_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_smc.bin");
}
break;
case CHIP_HAWAII:
if (adev->pdev->revision == 0x80) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/hawaii_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_smc.bin");
}
break;
case CHIP_TOPAZ:
@@ -335,83 +277,85 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/topaz_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_smc.bin");
break;
case CHIP_TONGA:
if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/tonga_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_smc.bin");
break;
case CHIP_FIJI:
- strcpy(fw_name, "amdgpu/fiji_smc.bin");
+ strscpy(fw_name, "amdgpu/fiji_smc.bin");
break;
case CHIP_POLARIS11:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k_smc.bin");
} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
}
break;
case CHIP_POLARIS10:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k_smc.bin");
} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
}
break;
case CHIP_POLARIS12:
if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_smc.bin");
}
break;
case CHIP_VEGAM:
- strcpy(fw_name, "amdgpu/vegam_smc.bin");
+ strscpy(fw_name, "amdgpu/vegam_smc.bin");
break;
case CHIP_VEGA10:
if ((adev->pdev->device == 0x687f) &&
((adev->pdev->revision == 0xc0) ||
(adev->pdev->revision == 0xc1) ||
(adev->pdev->revision == 0xc3)))
- strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_acg_smc.bin");
else
- strcpy(fw_name, "amdgpu/vega10_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_smc.bin");
break;
case CHIP_VEGA12:
- strcpy(fw_name, "amdgpu/vega12_smc.bin");
+ strscpy(fw_name, "amdgpu/vega12_smc.bin");
break;
case CHIP_VEGA20:
- strcpy(fw_name, "amdgpu/vega20_smc.bin");
+ strscpy(fw_name, "amdgpu/vega20_smc.bin");
break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw,
+ AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (err) {
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
amdgpu_ucode_release(&adev->pm.fw);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index d34037b85cf8..47e9bfba0642 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -103,7 +103,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *dig_connector;
int bpc = 8;
- unsigned mode_clock, max_tmds_clock;
+ unsigned int mode_clock, max_tmds_clock;
switch (connector->connector_type) {
case DRM_MODE_CONNECTOR_DVII:
@@ -246,36 +246,10 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
return NULL;
}
-struct edid *amdgpu_connector_edid(struct drm_connector *connector)
-{
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- struct drm_property_blob *edid_blob = connector->edid_blob_ptr;
-
- if (amdgpu_connector->edid) {
- return amdgpu_connector->edid;
- } else if (edid_blob) {
- struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL);
- if (edid)
- amdgpu_connector->edid = edid;
- }
- return amdgpu_connector->edid;
-}
-
static struct edid *
amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev)
{
- struct edid *edid;
-
- if (adev->mode_info.bios_hardcoded_edid) {
- edid = kmalloc(adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL);
- if (edid) {
- memcpy((unsigned char *)edid,
- (unsigned char *)adev->mode_info.bios_hardcoded_edid,
- adev->mode_info.bios_hardcoded_edid_size);
- return edid;
- }
- }
- return NULL;
+ return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid));
}
static void amdgpu_connector_get_edid(struct drm_connector *connector)
@@ -424,30 +398,28 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
struct drm_display_mode *mode = NULL;
struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
int i;
- static const struct mode_size {
+ int n;
+ struct mode_size {
+ char name[DRM_DISPLAY_MODE_LEN];
int w;
int h;
- } common_modes[17] = {
- { 640, 480},
- { 720, 480},
- { 800, 600},
- { 848, 480},
- {1024, 768},
- {1152, 768},
- {1280, 720},
- {1280, 800},
- {1280, 854},
- {1280, 960},
- {1280, 1024},
- {1440, 900},
- {1400, 1050},
- {1680, 1050},
- {1600, 1200},
- {1920, 1080},
- {1920, 1200}
+ } common_modes[] = {
+ { "640x480", 640, 480},
+ { "800x600", 800, 600},
+ { "1024x768", 1024, 768},
+ { "1280x720", 1280, 720},
+ { "1280x800", 1280, 800},
+ {"1280x1024", 1280, 1024},
+ { "1440x900", 1440, 900},
+ {"1680x1050", 1680, 1050},
+ {"1600x1200", 1600, 1200},
+ {"1920x1080", 1920, 1080},
+ {"1920x1200", 1920, 1200}
};
- for (i = 0; i < 17; i++) {
+ n = ARRAY_SIZE(common_modes);
+
+ for (i = 0; i < n; i++) {
if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
if (common_modes[i].w > 1024 ||
common_modes[i].h > 768)
@@ -460,10 +432,12 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
common_modes[i].h == native_mode->vdisplay))
continue;
}
- if (common_modes[i].w < 320 || common_modes[i].h < 200)
- continue;
mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ if (!mode)
+ return;
+ strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN);
+
drm_mode_probed_add(connector, mode);
}
}
@@ -588,6 +562,7 @@ static int amdgpu_connector_set_property(struct drm_connector *connector,
amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
} else {
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
}
@@ -696,7 +671,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -804,6 +779,7 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
else {
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
}
@@ -860,7 +836,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -986,6 +962,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
return false;
}
+static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
+ struct drm_connector *connector,
+ struct amdgpu_connector *amdgpu_connector)
+{
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ struct amdgpu_connector *list_amdgpu_connector;
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ if (amdgpu_connector->shared_ddc && *status == connector_status_connected) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(list_connector,
+ &iter) {
+ if (connector == list_connector)
+ continue;
+ list_amdgpu_connector = to_amdgpu_connector(list_connector);
+ if (list_amdgpu_connector->shared_ddc &&
+ list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+ amdgpu_connector->ddc_bus->rec.i2c_id) {
+ /* cases where both connectors are digital */
+ if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+ /* hpd is our only option in this case */
+ if (!amdgpu_display_hpd_sense(adev,
+ amdgpu_connector->hpd.hpd)) {
+ amdgpu_connector_free_edid(connector);
+ *status = connector_status_disconnected;
+ }
+ }
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ }
+}
+
/*
* DVI is complicated
* Do a DDC probe, if DDC probe passes, get the full EDID so
@@ -1072,32 +1083,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
* DDC line. The latter is more complex because with DVI<->HDMI adapters
* you don't really know what's connected to which port as both are digital.
*/
- if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
- struct drm_connector *list_connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *list_amdgpu_connector;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(list_connector,
- &iter) {
- if (connector == list_connector)
- continue;
- list_amdgpu_connector = to_amdgpu_connector(list_connector);
- if (list_amdgpu_connector->shared_ddc &&
- (list_amdgpu_connector->ddc_bus->rec.i2c_id ==
- amdgpu_connector->ddc_bus->rec.i2c_id)) {
- /* cases where both connectors are digital */
- if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
- /* hpd is our only option in this case */
- if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
- amdgpu_connector_free_edid(connector);
- ret = connector_status_disconnected;
- }
- }
- }
- }
- drm_connector_list_iter_end(&iter);
- }
+ amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
}
}
@@ -1199,35 +1185,76 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
static void amdgpu_connector_dvi_force(struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
if (connector->force == DRM_FORCE_ON)
amdgpu_connector->use_digital = false;
if (connector->force == DRM_FORCE_ON_DIGITAL)
amdgpu_connector->use_digital = true;
}
+/**
+ * amdgpu_max_hdmi_pixel_clock - Return max supported HDMI (TMDS) pixel clock
+ * @adev: pointer to amdgpu_device
+ *
+ * Return: maximum supported HDMI (TMDS) pixel clock in KHz.
+ */
+static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev)
+{
+ if (adev->asic_type >= CHIP_POLARIS10)
+ return 600000;
+ else if (adev->asic_type >= CHIP_TONGA)
+ return 300000;
+ else
+ return 297000;
+}
+
+/**
+ * amdgpu_connector_dvi_mode_valid - Validate a mode on DVI/HDMI connectors
+ * @connector: DRM connector to validate the mode on
+ * @mode: display mode to validate
+ *
+ * Validate the given display mode on DVI and HDMI connectors, including
+ * analog signals on DVI-I.
+ *
+ * Return: drm_mode_status indicating whether the mode is valid.
+ */
static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+ const int max_hdmi_pixel_clock = amdgpu_max_hdmi_pixel_clock(adev);
+ const int max_dvi_single_link_pixel_clock = 165000;
+ int max_digital_pixel_clock_khz;
/* XXX check mode bandwidth */
- if (amdgpu_connector->use_digital && (mode->clock > 165000)) {
- if ((amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I) ||
- (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) ||
- (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) {
- return MODE_OK;
- } else if (connector->display_info.is_hdmi) {
- /* HDMI 1.3+ supports max clock of 340 Mhz */
- if (mode->clock > 340000)
- return MODE_CLOCK_HIGH;
- else
- return MODE_OK;
- } else {
- return MODE_CLOCK_HIGH;
+ if (amdgpu_connector->use_digital) {
+ switch (amdgpu_connector->connector_object_id) {
+ case CONNECTOR_OBJECT_ID_HDMI_TYPE_A:
+ max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+ break;
+ case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I:
+ case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D:
+ max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock;
+ break;
+ case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I:
+ case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D:
+ case CONNECTOR_OBJECT_ID_HDMI_TYPE_B:
+ max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2;
+ break;
}
+
+ /* When the display EDID claims that it's an HDMI display,
+ * we use the HDMI encoder mode of the display HW,
+ * so we should verify against the max HDMI clock here.
+ */
+ if (connector->display_info.is_hdmi)
+ max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+
+ if (mode->clock > max_digital_pixel_clock_khz)
+ return MODE_CLOCK_HIGH;
}
/* check against the max pixel clock */
@@ -1433,6 +1460,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
ret = connector_status_connected;
else if (amdgpu_connector->dac_load_detect) { /* try load detection */
const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+
ret = encoder_funcs->detect(encoder, connector);
}
}
@@ -1473,7 +1501,7 @@ out:
}
static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
index 61fcef15ad72..eff833b6ed31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
@@ -24,7 +24,6 @@
#ifndef __AMDGPU_CONNECTORS_H__
#define __AMDGPU_CONNECTORS_H__
-struct edid *amdgpu_connector_edid(struct drm_connector *connector);
void amdgpu_connector_hotplug(struct drm_connector *connector);
int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector);
u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
new file mode 100644
index 000000000000..ef996493115f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+
+static const guid_t MCE = CPER_NOTIFY_MCE;
+static const guid_t CMC = CPER_NOTIFY_CMC;
+static const guid_t BOOT = BOOT_TYPE;
+
+static const guid_t CRASHDUMP = AMD_CRASHDUMP;
+static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR;
+
+static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size)
+{
+ hdr->record_length += size;
+}
+
+static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp)
+{
+ struct tm tm;
+ time64_t now = ktime_get_real_seconds();
+
+ time64_to_tm(now, 0, &tm);
+ timestamp->seconds = tm.tm_sec;
+ timestamp->minutes = tm.tm_min;
+ timestamp->hours = tm.tm_hour;
+ timestamp->flag = 0;
+ timestamp->day = tm.tm_mday;
+ timestamp->month = 1 + tm.tm_mon;
+ timestamp->year = (1900 + tm.tm_year) % 100;
+ timestamp->century = (1900 + tm.tm_year) / 100;
+}
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev)
+{
+ char record_id[16];
+
+ hdr->signature[0] = 'C';
+ hdr->signature[1] = 'P';
+ hdr->signature[2] = 'E';
+ hdr->signature[3] = 'R';
+ hdr->revision = CPER_HDR_REV_1;
+ hdr->signature_end = 0xFFFFFFFF;
+ hdr->error_severity = sev;
+
+ hdr->valid_bits.platform_id = 1;
+ hdr->valid_bits.timestamp = 1;
+
+ amdgpu_cper_get_timestamp(&hdr->timestamp);
+
+ snprintf(record_id, 9, "%d:%X",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0,
+ atomic_inc_return(&adev->cper.unique_id));
+ memcpy(hdr->record_id, record_id, 8);
+
+ snprintf(hdr->platform_id, 16, "0x%04X:0x%04X",
+ adev->pdev->vendor, adev->pdev->device);
+ /* pmfw version should be part of creator_id according to CPER spec */
+ snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_BOOT:
+ hdr->notify_type = BOOT;
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ hdr->notify_type = MCE;
+ break;
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ if (sev == CPER_SEV_NON_FATAL_CORRECTED)
+ hdr->notify_type = CMC;
+ else
+ hdr->notify_type = MCE;
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type\n");
+ break;
+ }
+
+ __inc_entry_length(hdr, HDR_LEN);
+}
+
+static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev,
+ struct cper_sec_desc *section_desc,
+ bool bp_threshold,
+ bool poison,
+ enum cper_error_severity sev,
+ guid_t sec_type,
+ uint32_t section_length,
+ uint32_t section_offset)
+{
+ section_desc->revision_minor = CPER_SEC_MINOR_REV_1;
+ section_desc->revision_major = CPER_SEC_MAJOR_REV_22;
+ section_desc->sec_offset = section_offset;
+ section_desc->sec_length = section_length;
+ section_desc->valid_bits.fru_text = 1;
+ section_desc->flag_bits.primary = 1;
+ section_desc->severity = sev;
+ section_desc->sec_type = sec_type;
+
+ snprintf(section_desc->fru_text, 20, "OAM%d",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0);
+
+ if (bp_threshold)
+ section_desc->flag_bits.exceed_err_threshold = 1;
+ if (poison)
+ section_desc->flag_bits.latent_err = 1;
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_crashdump_fatal *section;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr +
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false,
+ CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN,
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->body.reg_arr_size = sizeof(reg_data);
+ section->body.data = reg_data;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+ bool poison;
+
+ poison = sev != CPER_SEV_NON_FATAL_CORRECTED;
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison,
+ sev, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ reg_count = umin(reg_count, CPER_ACA_REG_COUNT);
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t));
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+ uint32_t socket_id;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
+ CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.valid_bits.ms_chk = 1;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->info.ms_chk_bits.err_type = 1;
+ section->info.ms_chk_bits.pcc = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ /* Hardcoded Reg dump for bad page threshold CPER */
+ socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0;
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1;
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = (socket_id / 4) & 0x01;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x096 | (((socket_id % 4) & 0x3) << 12);
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count)
+{
+ struct cper_hdr *hdr;
+ uint32_t size = 0;
+
+ size += HDR_LEN;
+ size += (SEC_DESC_LEN * section_count);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ size += (NONSTD_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ size += (FATAL_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_BOOT:
+ size += (BOOT_SEC_LEN * section_count);
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type!\n");
+ return NULL;
+ }
+
+ hdr = kzalloc(size, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ /* Save this early */
+ hdr->sec_cnt = section_count;
+
+ return hdr;
+}
+
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank)
+{
+ struct cper_hdr *fatal = NULL;
+ struct cper_sec_crashdump_reg_data reg_data = { 0 };
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
+ if (!fatal) {
+ dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
+ return -ENOMEM;
+ }
+
+ reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
+ kfree(fatal);
+
+ return 0;
+}
+
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
+{
+ struct cper_hdr *bp_threshold = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
+ if (!bp_threshold) {
+ dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
+ return -ENOMEM;
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, bp_threshold,
+ AMDGPU_CPER_TYPE_BP_THRESHOLD,
+ CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
+ kfree(bp_threshold);
+
+ return 0;
+}
+
+static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
+ enum aca_error_type aca_err_type)
+{
+ switch (aca_err_type) {
+ case ACA_ERROR_TYPE_UE:
+ return CPER_SEV_FATAL;
+ case ACA_ERROR_TYPE_CE:
+ return CPER_SEV_NON_FATAL_CORRECTED;
+ case ACA_ERROR_TYPE_DEFERRED:
+ return CPER_SEV_NON_FATAL_UNCORRECTED;
+ default:
+ dev_err(adev->dev, "Unknown ACA error type!\n");
+ return CPER_SEV_FATAL;
+ }
+}
+
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count)
+{
+ struct cper_hdr *corrected = NULL;
+ enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ uint32_t i = 0;
+ int ret;
+
+ corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
+ if (!corrected) {
+ dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
+ return -ENOMEM;
+ }
+
+ /* Raise severity if any DE is detected in the ACA bank list */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ sev = CPER_SEV_NON_FATAL_UNCORRECTED;
+ break;
+ }
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
+
+ /* Combine CE and DE in cper record */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
+ amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
+ reg_data, CPER_ACA_REG_COUNT);
+ if (ret)
+ return ret;
+ }
+
+ amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
+ kfree(corrected);
+
+ return 0;
+}
+
+static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ return strcmp(chdr->signature, "CPER") ? false : true;
+}
+
+static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+ u64 p;
+ u32 chunk, rec_len = 0;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ chunk = ring->ring_size - (pos << 2);
+
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = chdr->record_length;
+ goto calc;
+ }
+
+ /* ring buffer is not full, no cper data after ring->wptr */
+ if (ring->count_dw)
+ goto calc;
+
+ for (p = pos + 1; p <= ring->buf_mask; p++) {
+ chdr = (struct cper_hdr *)&(ring->ring[p]);
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = (p - pos) << 2;
+ goto calc;
+ }
+ }
+
+calc:
+ if (!rec_len)
+ return chunk;
+ else
+ return umin(rec_len, chunk);
+}
+
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
+{
+ u64 pos, wptr_old, rptr;
+ int rec_cnt_dw = count >> 2;
+ u32 chunk, ent_sz;
+ u8 *s = (u8 *)src;
+
+ if (count >= ring->ring_size - 4) {
+ dev_err(ring->adev->dev,
+ "CPER data size(%d) is larger than ring size(%d)\n",
+ count, ring->ring_size - 4);
+
+ return;
+ }
+
+ mutex_lock(&ring->adev->cper.ring_lock);
+
+ wptr_old = ring->wptr;
+ rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
+
+ while (count) {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr);
+ chunk = umin(ent_sz, count);
+
+ memcpy(&ring->ring[ring->wptr], s, chunk);
+
+ ring->wptr += (chunk >> 2);
+ ring->wptr &= ring->ptr_mask;
+ count -= chunk;
+ s += chunk;
+ }
+
+ if (ring->count_dw < rec_cnt_dw)
+ ring->count_dw = 0;
+
+ /* the buffer is overflow, adjust rptr */
+ if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
+ ((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
+ ((rptr <= ring->wptr) && (ring->wptr < wptr_old))) {
+ pos = (ring->wptr + 1) & ring->ptr_mask;
+
+ do {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos);
+
+ rptr += (ent_sz >> 2);
+ rptr &= ring->ptr_mask;
+ *ring->rptr_cpu_addr = rptr;
+
+ pos = rptr;
+ } while (!amdgpu_cper_is_hdr(ring, rptr));
+ }
+
+ if (ring->count_dw >= rec_cnt_dw)
+ ring->count_dw -= rec_cnt_dw;
+ mutex_unlock(&ring->adev->cper.ring_lock);
+}
+
+static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *(ring->rptr_cpu_addr);
+}
+
+static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ return ring->wptr;
+}
+
+static const struct amdgpu_ring_funcs cper_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_CPER,
+ .align_mask = 0xff,
+ .support_64bit_ptrs = false,
+ .get_rptr = amdgpu_cper_ring_get_rptr,
+ .get_wptr = amdgpu_cper_ring_get_wptr,
+};
+
+static int amdgpu_cper_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &(adev->cper.ring_buf);
+
+ mutex_init(&adev->cper.ring_lock);
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ ring->no_scheduler = true;
+ ring->funcs = &cper_ring_funcs;
+
+ sprintf(ring->name, "cper");
+ return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_cper_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ r = amdgpu_cper_ring_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r);
+ return r;
+ }
+
+ mutex_init(&adev->cper.cper_lock);
+
+ adev->cper.enabled = true;
+ adev->cper.max_count = CPER_MAX_ALLOWED_COUNT;
+
+ return 0;
+}
+
+int amdgpu_cper_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ adev->cper.enabled = false;
+
+ amdgpu_ring_fini(&(adev->cper.ring_buf));
+ adev->cper.count = 0;
+ adev->cper.wptr = 0;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
new file mode 100644
index 000000000000..bcb97d245673
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_CPER_H__
+#define __AMDGPU_CPER_H__
+
+#include "amd_cper.h"
+#include "amdgpu_aca.h"
+
+#define CPER_MAX_ALLOWED_COUNT 0x1000
+#define CPER_MAX_RING_SIZE 0X100000
+#define HDR_LEN (sizeof(struct cper_hdr))
+#define SEC_DESC_LEN (sizeof(struct cper_sec_desc))
+
+#define BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot))
+#define FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal))
+#define NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err))
+
+#define SEC_DESC_OFFSET(idx) (HDR_LEN + (SEC_DESC_LEN * idx))
+
+#define BOOT_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (BOOT_SEC_LEN * idx))
+#define FATAL_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (FATAL_SEC_LEN * idx))
+#define NONSTD_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (NONSTD_SEC_LEN * idx))
+
+enum amdgpu_cper_type {
+ AMDGPU_CPER_TYPE_RUNTIME,
+ AMDGPU_CPER_TYPE_FATAL,
+ AMDGPU_CPER_TYPE_BOOT,
+ AMDGPU_CPER_TYPE_BP_THRESHOLD,
+};
+
+struct amdgpu_cper {
+ bool enabled;
+
+ atomic_t unique_id;
+ struct mutex cper_lock;
+
+ /* Lifetime CPERs generated */
+ uint32_t count;
+ uint32_t max_count;
+
+ uint32_t wptr;
+
+ void *ring[CPER_MAX_ALLOWED_COUNT];
+ struct amdgpu_ring ring_buf;
+ struct mutex ring_lock;
+};
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev);
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data);
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count);
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t section_idx);
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count);
+/* UE must be encoded into separated cper entries, 1 UE 1 cper */
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank);
+/* CEs and DEs are combined into 1 cper entry */
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count);
+/* Bad page threshold is encoded into separated cper entry */
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
+ void *src, int count);
+int amdgpu_cper_init(struct amdgpu_device *adev);
+int amdgpu_cper_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index efdb1c48f431..9cd7741d2254 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -29,6 +29,7 @@
#include <linux/pagemap.h>
#include <linux/sync_file.h>
#include <linux/dma-buf.h>
+#include <linux/hmm.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
@@ -65,7 +66,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
}
amdgpu_sync_create(&p->sync);
- drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
return 0;
}
@@ -177,25 +179,17 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
struct amdgpu_vm *vm = &fpriv->vm;
- uint64_t *chunk_array_user;
uint64_t *chunk_array;
uint32_t uf_offset = 0;
size_t size;
int ret;
int i;
- chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
- GFP_KERNEL);
- if (!chunk_array)
- return -ENOMEM;
-
- /* get chunks */
- chunk_array_user = u64_to_user_ptr(cs->in.chunks);
- if (copy_from_user(chunk_array, chunk_array_user,
- sizeof(uint64_t)*cs->in.num_chunks)) {
- ret = -EFAULT;
- goto free_chunk;
- }
+ chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),
+ cs->in.num_chunks,
+ sizeof(uint64_t));
+ if (IS_ERR(chunk_array))
+ return PTR_ERR(chunk_array);
p->nchunks = cs->in.num_chunks;
p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
@@ -206,9 +200,8 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
for (i = 0; i < p->nchunks; i++) {
- struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
- uint32_t __user *cdata;
chunk_ptr = u64_to_user_ptr(chunk_array[i]);
if (copy_from_user(&user_chunk, chunk_ptr,
@@ -221,20 +214,16 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
- cdata = u64_to_user_ptr(user_chunk.chunk_data);
- p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
- GFP_KERNEL);
- if (p->chunks[i].kdata == NULL) {
- ret = -ENOMEM;
+ p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),
+ size,
+ sizeof(uint32_t));
+ if (IS_ERR(p->chunks[i].kdata)) {
+ ret = PTR_ERR(p->chunks[i].kdata);
i--;
goto free_partial_kdata;
}
size *= sizeof(uint32_t);
- if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
- ret = -EFAULT;
- goto free_partial_kdata;
- }
/* Assume the worst on the following checks */
ret = -EINVAL;
@@ -262,6 +251,10 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
if (size < sizeof(struct drm_amdgpu_bo_list_in))
goto free_partial_kdata;
+ /* Only a single BO list is allowed to simplify handling. */
+ if (p->bo_list)
+ goto free_partial_kdata;
+
ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
if (ret)
goto free_partial_kdata;
@@ -281,16 +274,36 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
}
- if (!p->gang_size) {
+ if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {
ret = -EINVAL;
goto free_all_kdata;
}
for (i = 0; i < p->gang_size; ++i) {
ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
- num_ibs[i], &p->jobs[i]);
+ num_ibs[i], &p->jobs[i],
+ p->filp->client_id);
if (ret)
goto free_all_kdata;
+ switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
+ case AMDGPU_ENFORCE_ISOLATION_DISABLE:
+ default:
+ p->jobs[i]->enforce_isolation = false;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = true;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ }
}
p->gang_leader = p->jobs[p->gang_leader_idx];
@@ -343,6 +356,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
ring = amdgpu_job_ring(job);
ib = &job->ibs[job->num_ibs++];
+ /* submissions to kernel queues are disabled */
+ if (ring->no_user_submission)
+ return -EINVAL;
+
/* MM engine doesn't support user fences */
if (p->uf_bo && ring->funcs->no_user_fence)
return -EINVAL;
@@ -367,7 +384,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
chunk_ib->ib_bytes : 0,
AMDGPU_IB_POOL_DELAYED, ib);
if (r) {
- DRM_ERROR("Failed to get ib !\n");
+ drm_err(adev_to_drm(p->adev), "Failed to get ib !\n");
return r;
}
@@ -422,7 +439,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(&p->sync, fence);
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
@@ -439,12 +456,12 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
if (r) {
- DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+ drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n",
handle, point, r);
return r;
}
- r = amdgpu_sync_fence(&p->sync, fence);
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
return r;
}
@@ -818,7 +835,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -867,26 +884,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
bool userpage_invalidated = false;
struct amdgpu_bo *bo = e->bo;
- int i;
-
- e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!e->user_pages) {
- DRM_ERROR("kvmalloc_array failure\n");
- r = -ENOMEM;
- goto out_free_user_pages;
- }
- r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
- if (r) {
- kvfree(e->user_pages);
- e->user_pages = NULL;
+ r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);
+ if (r)
goto out_free_user_pages;
- }
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
- if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) {
userpage_invalidated = true;
break;
}
@@ -930,7 +934,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
- e->user_invalidated && e->user_pages) {
+ e->user_invalidated) {
amdgpu_bo_placement_from_domain(e->bo,
AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
@@ -939,11 +943,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out_free_user_pages;
amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
- e->user_pages);
+ e->range);
}
-
- kvfree(e->user_pages);
- e->user_pages = NULL;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -951,10 +952,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->bytes_moved = 0;
p->bytes_moved_vis = 0;
- r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
- amdgpu_cs_bo_validate, p);
+ r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
+ amdgpu_cs_bo_validate, p);
if (r) {
- DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
+ drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n");
goto out_free_user_pages;
}
@@ -985,11 +986,7 @@ out_free_user_pages:
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->bo;
- if (!e->user_pages)
- continue;
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
- kvfree(e->user_pages);
- e->user_pages = NULL;
e->range = NULL;
}
mutex_unlock(&p->bo_list->bo_list_mutex);
@@ -1032,13 +1029,13 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
if (r) {
- DRM_ERROR("IB va_start is invalid\n");
+ drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n");
return r;
}
if ((va_start + ib->length_dw * 4) >
(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n");
return -EINVAL;
}
@@ -1056,6 +1053,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
r = amdgpu_ring_parse_cs(ring, p, job, ib);
if (r)
return r;
+
+ if (ib->sa_bo)
+ ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
} else {
ib->ptr = (uint32_t *)kptr;
r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
@@ -1092,6 +1092,24 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
unsigned int i;
int r;
+ /*
+ * We can't use gang submit on with reserved VMIDs when the VM changes
+ * can't be invalidated by more than one engine at the same time.
+ */
+ if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
+ for (i = 0; i < p->gang_size; ++i) {
+ struct drm_sched_entity *entity = p->entities[i];
+ struct drm_gpu_scheduler *sched = entity->rq->sched;
+ struct amdgpu_ring *ring = to_amdgpu_ring(sched);
+
+ if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
+ return -EINVAL;
+ }
+ }
+
+ if (!amdgpu_vm_ready(vm))
+ return -EINVAL;
+
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
return r;
@@ -1100,7 +1118,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
@@ -1111,11 +1130,17 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
+ /* FIXME: In theory this loop shouldn't be needed any more when
+ * amdgpu_vm_handle_moved handles all moved BOs that are reserved
+ * with p->ticket. But removing it caused test regressions, so I'm
+ * leaving it here for now.
+ */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
bo_va = e->bo_va;
if (bo_va == NULL)
@@ -1125,12 +1150,13 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
if (r)
return r;
@@ -1138,7 +1164,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, vm->last_update);
+ r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
if (r)
return r;
@@ -1151,7 +1177,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
}
- if (amdgpu_vm_debug) {
+ if (adev->debug_vm) {
/* Invalidate all BOs to test for userspace bugs */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = e->bo;
@@ -1160,7 +1186,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (!bo)
continue;
- amdgpu_vm_bo_invalidate(adev, bo, false);
+ amdgpu_vm_bo_invalidate(bo, false);
}
}
@@ -1180,7 +1206,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
if (r) {
if (r != -ERESTARTSYS)
- DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+ drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n");
return r;
}
@@ -1219,7 +1245,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
continue;
}
- r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
+ GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
@@ -1392,8 +1419,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_init(&parser, adev, filp, data);
if (r) {
- if (printk_ratelimit())
- DRM_ERROR("Failed to initialize parser %d!\n", r);
+ drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r);
return r;
}
@@ -1408,9 +1434,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
- DRM_ERROR("Not enough memory for command submission!\n");
+ drm_err(dev, "Not enough memory for command submission!\n");
else if (r != -ERESTARTSYS && r != -EAGAIN)
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
+ drm_dbg(dev, "Failed to process the buffer list %d!\n", r);
goto error_fini;
}
@@ -1709,30 +1735,21 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
{
struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_wait_fences *wait = data;
- uint32_t fence_count = wait->in.fence_count;
- struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences;
int r;
/* Get the fences from userspace */
- fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
- GFP_KERNEL);
- if (fences == NULL)
- return -ENOMEM;
-
- fences_user = u64_to_user_ptr(wait->in.fences);
- if (copy_from_user(fences, fences_user,
- sizeof(struct drm_amdgpu_fence) * fence_count)) {
- r = -EFAULT;
- goto err_free_fences;
- }
+ fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),
+ wait->in.fence_count,
+ sizeof(struct drm_amdgpu_fence));
+ if (IS_ERR(fences))
+ return PTR_ERR(fences);
if (wait->in.wait_all)
r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
else
r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
-err_free_fences:
kfree(fences);
return r;
@@ -1758,7 +1775,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va_mapping *mapping;
- int r;
+ int i, r;
addr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1773,9 +1790,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL;
- if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
- (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ /* Make sure VRAM is allocated contigiously */
+ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
+ !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+ for (i = 0; i < (*bo)->placement.num_placement; i++)
+ (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 720011019741..02138aa55793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -28,9 +28,8 @@
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
- uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
+ uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev);
- addr -= AMDGPU_VA_RESERVED_SIZE;
addr = amdgpu_gmc_sign_extend(addr);
return addr;
@@ -70,7 +69,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
@@ -110,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0dc9c655c4fb..f5d5c45ddc0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,12 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_DEC] = 1,
[AMDGPU_HW_IP_VCN_ENC] = 1,
[AMDGPU_HW_IP_VCN_JPEG] = 1,
+ [AMDGPU_HW_IP_VPE] = 1,
};
bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
{
switch (ctx_prio) {
- case AMDGPU_CTX_PRIORITY_UNSET:
case AMDGPU_CTX_PRIORITY_VERY_LOW:
case AMDGPU_CTX_PRIORITY_LOW:
case AMDGPU_CTX_PRIORITY_NORMAL:
@@ -55,6 +55,11 @@ bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
return true;
default:
+ case AMDGPU_CTX_PRIORITY_UNSET:
+ /* UNSET priority is not valid and we don't carry that
+ * around, but set it to NORMAL in the only place this
+ * function is called, amdgpu_ctx_ioctl().
+ */
return false;
}
}
@@ -64,13 +69,14 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
{
switch (ctx_prio) {
case AMDGPU_CTX_PRIORITY_UNSET:
- return DRM_SCHED_PRIORITY_UNSET;
+ pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
+ return DRM_SCHED_PRIORITY_NORMAL;
case AMDGPU_CTX_PRIORITY_VERY_LOW:
- return DRM_SCHED_PRIORITY_MIN;
+ return DRM_SCHED_PRIORITY_LOW;
case AMDGPU_CTX_PRIORITY_LOW:
- return DRM_SCHED_PRIORITY_MIN;
+ return DRM_SCHED_PRIORITY_LOW;
case AMDGPU_CTX_PRIORITY_NORMAL:
return DRM_SCHED_PRIORITY_NORMAL;
@@ -94,9 +100,6 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
int32_t priority)
{
- if (!amdgpu_ctx_priority_is_valid(priority))
- return -EINVAL;
-
/* NORMAL and below are accessible by everyone */
if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
return 0;
@@ -631,8 +634,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
return 0;
}
-
-
static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv, uint32_t id,
bool set, u32 *stable_pstate)
@@ -675,23 +676,33 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
id = args->in.ctx_id;
priority = args->in.priority;
- /* For backwards compatibility reasons, we need to accept
- * ioctls with garbage in the priority field */
+ /* For backwards compatibility, we need to accept ioctls with garbage
+ * in the priority field. Garbage values in the priority field, result
+ * in the priority being set to NORMAL.
+ */
if (!amdgpu_ctx_priority_is_valid(priority))
priority = AMDGPU_CTX_PRIORITY_NORMAL;
switch (args->in.op) {
case AMDGPU_CTX_OP_ALLOC_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
args->out.alloc.ctx_id = id;
break;
case AMDGPU_CTX_OP_FREE_CTX:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_free(fpriv, id);
break;
case AMDGPU_CTX_OP_QUERY_STATE:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
break;
case AMDGPU_CTX_OP_QUERY_STATE2:
+ if (args->in.flags)
+ return -EINVAL;
r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
break;
case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
@@ -908,7 +919,7 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
return timeout;
}
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
@@ -933,24 +944,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
drm_sched_entity_fini(entity);
}
}
+ kref_put(&ctx->refcount, amdgpu_ctx_fini);
}
}
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
- struct amdgpu_ctx *ctx;
- struct idr *idp;
- uint32_t id;
-
amdgpu_ctx_mgr_entity_fini(mgr);
-
- idp = &mgr->ctx_handles;
-
- idr_for_each_entry(idp, ctx, id) {
- if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
- DRM_ERROR("ctx %p is still alive\n", ctx);
- }
-
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 85376baaa92f..090dfe86f75b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -92,7 +92,6 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
struct amdgpu_device *adev);
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a4faea4fa0b5..a70651050acf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -402,7 +402,7 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz
int r;
uint32_t *data, x;
- if (size & 0x3 || *pos & 0x3)
+ if (size > 4096 || size & 0x3 || *pos & 0x3)
return -EINVAL;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
- value = RREG32_PCIE(*pos);
+ if (upper_32_bits(*pos))
+ value = RREG32_PCIE_EXT(*pos);
+ else
+ value = RREG32_PCIE(*pos);
+
r = put_user(value, (uint32_t *)buf);
if (r)
goto out;
@@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
if (r)
goto out;
- WREG32_PCIE(*pos, value);
+ if (upper_32_bits(*pos))
+ WREG32_PCIE_EXT(*pos, value);
+ else
+ WREG32_PCIE(*pos, value);
result += 4;
buf += 4;
@@ -638,6 +645,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ if (!adev->didt_rreg)
+ return -EOPNOTSUPP;
+
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -694,6 +704,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ if (!adev->didt_wreg)
+ return -EOPNOTSUPP;
+
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -748,6 +761,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
ssize_t result = 0;
int r;
+ if (!adev->smc_rreg)
+ return -EOPNOTSUPP;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
@@ -804,6 +820,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
ssize_t result = 0;
int r;
+ if (!adev->smc_wreg)
+ return -EOPNOTSUPP;
+
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
@@ -899,7 +918,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
/* rev==1 */
config[no_regs++] = adev->rev_id;
- config[no_regs++] = lower_32_bits(adev->pg_flags);
+ config[no_regs++] = adev->pg_flags;
config[no_regs++] = lower_32_bits(adev->cg_flags);
/* rev==2 */
@@ -916,7 +935,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0;
/* rev==5 PG/CG flag upper 32bit */
- config[no_regs++] = upper_32_bits(adev->pg_flags);
+ config[no_regs++] = 0;
config[no_regs++] = upper_32_bits(adev->cg_flags);
while (size && (*pos < no_regs * 4)) {
@@ -1629,7 +1648,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
- S_IFREG | 0444, root,
+ S_IFREG | 0400, root,
adev, debugfs_regs[i]);
if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
@@ -1659,9 +1678,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- kthread_park(ring->sched.thread);
+ drm_sched_wqueue_stop(&ring->sched);
}
seq_puts(m, "run ib test:\n");
@@ -1675,9 +1694,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- kthread_unpark(ring->sched.thread);
+ drm_sched_wqueue_start(&ring->sched);
}
up_write(&adev->reset_domain->sem);
@@ -1763,9 +1782,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_task_info *ti;
+
+ ti = amdgpu_vm_get_task_info_vm(vm);
+ if (ti) {
+ seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->task.pid, ti->process_name);
+ amdgpu_vm_put_task_info(ti);
+ }
- seq_printf(m, "pid:%d\tProcess:%s ----------\n",
- vm->task_info.pid, vm->task_info.process_name);
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
break;
@@ -1878,7 +1902,7 @@ no_preempt:
continue;
}
job = to_amdgpu_job(s_job);
- if (preempted && (&job->hw_fence) == fence)
+ if (preempted && (&job->hw_fence.base) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
@@ -1897,7 +1921,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
ring = adev->rings[val];
- if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring) ||
+ !ring->funcs->preempt_ib)
return -EINVAL;
/* the last preemption failed */
@@ -1915,7 +1940,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
goto pro_end;
/* stop the scheduler */
- kthread_park(ring->sched.thread);
+ drm_sched_wqueue_stop(&ring->sched);
/* preempt the IB */
r = amdgpu_ring_preempt_ib(ring);
@@ -1949,7 +1974,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
failure:
/* restart the scheduler */
- kthread_unpark(ring->sched.thread);
+ drm_sched_wqueue_start(&ring->sched);
up_read(&adev->reset_domain->sem);
@@ -1965,7 +1990,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
uint32_t max_freq, min_freq;
struct amdgpu_device *adev = (struct amdgpu_device *)data;
- if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ if (amdgpu_sriov_multi_vf_mode(adev))
return -EINVAL;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2001,99 +2026,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
amdgpu_debugfs_sclk_set, "%llu\n");
-static ssize_t amdgpu_reset_dump_register_list_read(struct file *f,
- char __user *buf, size_t size, loff_t *pos)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
- char reg_offset[12];
- int i, ret, len = 0;
-
- if (*pos)
- return 0;
-
- memset(reg_offset, 0, 12);
- ret = down_read_killable(&adev->reset_domain->sem);
- if (ret)
- return ret;
-
- for (i = 0; i < adev->num_regs; i++) {
- sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]);
- up_read(&adev->reset_domain->sem);
- if (copy_to_user(buf + len, reg_offset, strlen(reg_offset)))
- return -EFAULT;
-
- len += strlen(reg_offset);
- ret = down_read_killable(&adev->reset_domain->sem);
- if (ret)
- return ret;
- }
-
- up_read(&adev->reset_domain->sem);
- *pos += len;
-
- return len;
-}
-
-static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
- const char __user *buf, size_t size, loff_t *pos)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
- char reg_offset[11];
- uint32_t *new = NULL, *tmp = NULL;
- int ret, i = 0, len = 0;
-
- do {
- memset(reg_offset, 0, 11);
- if (copy_from_user(reg_offset, buf + len,
- min(10, ((int)size-len)))) {
- ret = -EFAULT;
- goto error_free;
- }
-
- new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- goto error_free;
- }
- tmp = new;
- if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) {
- ret = -EINVAL;
- goto error_free;
- }
-
- len += ret;
- i++;
- } while (len < size);
-
- new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- goto error_free;
- }
- ret = down_write_killable(&adev->reset_domain->sem);
- if (ret)
- goto error_free;
-
- swap(adev->reset_dump_reg_list, tmp);
- swap(adev->reset_dump_reg_value, new);
- adev->num_regs = i;
- up_write(&adev->reset_domain->sem);
- ret = size;
-
-error_free:
- if (tmp != new)
- kfree(tmp);
- kfree(new);
- return ret;
-}
-
-static const struct file_operations amdgpu_reset_dump_register_list = {
- .owner = THIS_MODULE,
- .read = amdgpu_reset_dump_register_list_read,
- .write = amdgpu_reset_dump_register_list_write,
- .llseek = default_llseek
-};
-
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
@@ -2134,6 +2066,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_firmware_init(adev);
amdgpu_ta_if_debugfs_init(adev);
+ amdgpu_debugfs_mes_event_log_init(adev);
+
#if defined(CONFIG_DRM_AMD_DC)
if (adev->dc_enabled)
dtn_debugfs_init(adev);
@@ -2158,23 +2092,31 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]);
}
+ if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
+ amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
+
+ amdgpu_debugfs_vcn_sched_mask_init(adev);
+ amdgpu_debugfs_jpeg_sched_mask_init(adev);
+ amdgpu_debugfs_gfx_sched_mask_init(adev);
+ amdgpu_debugfs_compute_sched_mask_init(adev);
+ amdgpu_debugfs_sdma_sched_mask_init(adev);
+
amdgpu_ras_debugfs_create_all(adev);
amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev);
+ amdgpu_psp_debugfs_init(adev);
- debugfs_create_file("amdgpu_evict_vram", 0444, root, adev,
+ debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
&amdgpu_evict_vram_fops);
- debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev,
+ debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev,
&amdgpu_evict_gtt_fops);
- debugfs_create_file("amdgpu_test_ib", 0444, root, adev,
+ debugfs_create_file("amdgpu_test_ib", 0400, root, adev,
&amdgpu_debugfs_test_ib_fops);
debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
&amdgpu_debugfs_vm_info_fops);
debugfs_create_file("amdgpu_benchmark", 0200, root, adev,
&amdgpu_benchmark_fops);
- debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev,
- &amdgpu_reset_dump_register_list);
adev->debugfs_vbios_blob.data = adev->bios;
adev->debugfs_vbios_blob.size = adev->bios_size;
@@ -2189,6 +2131,61 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
return 0;
}
+static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
+{
+ struct drm_file *file;
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_bo *root_bo;
+ struct amdgpu_device *adev;
+ int r;
+
+ file = m->private;
+ if (!file)
+ return -EINVAL;
+
+ adev = drm_to_adev(file->minor->dev);
+ fpriv = file->driver_priv;
+ if (!fpriv || !fpriv->vm.root.bo)
+ return -ENODEV;
+
+ root_bo = amdgpu_bo_ref(fpriv->vm.root.bo);
+ r = amdgpu_bo_reserve(root_bo, true);
+ if (r) {
+ amdgpu_bo_unref(&root_bo);
+ return -EINVAL;
+ }
+
+ seq_printf(m, "pd_address: 0x%llx\n", amdgpu_gmc_pd_addr(fpriv->vm.root.bo));
+ seq_printf(m, "max_pfn: 0x%llx\n", adev->vm_manager.max_pfn);
+ seq_printf(m, "num_level: 0x%x\n", adev->vm_manager.num_level);
+ seq_printf(m, "block_size: 0x%x\n", adev->vm_manager.block_size);
+ seq_printf(m, "fragment_size: 0x%x\n", adev->vm_manager.fragment_size);
+
+ amdgpu_bo_unreserve(root_bo);
+ amdgpu_bo_unref(&root_bo);
+
+ return 0;
+}
+
+static int amdgpu_pt_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, amdgpu_pt_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_pt_info_fops = {
+ .owner = THIS_MODULE,
+ .open = amdgpu_pt_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+ debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file,
+ &amdgpu_pt_info_fops);
+}
+
#else
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
@@ -2198,4 +2195,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
return 0;
}
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 371a6f0deb29..e7b3c38e5186 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -32,3 +32,6 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_vm_init(struct drm_file *file);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
new file mode 100644
index 000000000000..8a026bc9ea44
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/devcoredump.h>
+#include "amdgpu_dev_coredump.h"
+#include "atom.h"
+
+#ifndef CONFIG_DEV_COREDUMP
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job)
+{
+}
+#else
+
+const char *hw_ip_names[MAX_HWIP] = {
+ [GC_HWIP] = "GC",
+ [HDP_HWIP] = "HDP",
+ [SDMA0_HWIP] = "SDMA0",
+ [SDMA1_HWIP] = "SDMA1",
+ [SDMA2_HWIP] = "SDMA2",
+ [SDMA3_HWIP] = "SDMA3",
+ [SDMA4_HWIP] = "SDMA4",
+ [SDMA5_HWIP] = "SDMA5",
+ [SDMA6_HWIP] = "SDMA6",
+ [SDMA7_HWIP] = "SDMA7",
+ [LSDMA_HWIP] = "LSDMA",
+ [MMHUB_HWIP] = "MMHUB",
+ [ATHUB_HWIP] = "ATHUB",
+ [NBIO_HWIP] = "NBIO",
+ [MP0_HWIP] = "MP0",
+ [MP1_HWIP] = "MP1",
+ [UVD_HWIP] = "UVD/JPEG/VCN",
+ [VCN1_HWIP] = "VCN1",
+ [VCE_HWIP] = "VCE",
+ [VPE_HWIP] = "VPE",
+ [DF_HWIP] = "DF",
+ [DCE_HWIP] = "DCE",
+ [OSSSYS_HWIP] = "OSSSYS",
+ [SMUIO_HWIP] = "SMUIO",
+ [PWR_HWIP] = "PWR",
+ [NBIF_HWIP] = "NBIF",
+ [THM_HWIP] = "THM",
+ [CLK_HWIP] = "CLK",
+ [UMC_HWIP] = "UMC",
+ [RSMU_HWIP] = "RSMU",
+ [XGMI_HWIP] = "XGMI",
+ [DCI_HWIP] = "DCI",
+ [PCIE_HWIP] = "PCIE",
+};
+
+static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
+ struct drm_printer *p)
+{
+ uint32_t version;
+ uint32_t feature;
+ uint8_t smu_program, smu_major, smu_minor, smu_debug;
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
+ adev->vce.fb_version, adev->vce.fw_version);
+ drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0,
+ adev->uvd.fw_version);
+ drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gmc.fw_version);
+ drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.me_feature_version, adev->gfx.me_fw_version);
+ drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
+ drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
+ drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
+
+ drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlc_feature_version,
+ adev->gfx.rlc_srlc_fw_version);
+ drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlg_feature_version,
+ adev->gfx.rlc_srlg_fw_version);
+ drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srls_feature_version,
+ adev->gfx.rlc_srls_fw_version);
+ drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcp_ucode_feature_version,
+ adev->gfx.rlcp_ucode_version);
+ drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcv_ucode_feature_version,
+ adev->gfx.rlcv_ucode_version);
+ drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec_feature_version, adev->gfx.mec_fw_version);
+
+ if (adev->gfx.mec2_fw)
+ drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec2_feature_version,
+ adev->gfx.mec2_fw_version);
+
+ drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gfx.imu_fw_version);
+ drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
+ adev->psp.sos.feature_version, adev->psp.sos.fw_version);
+ drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
+ adev->psp.asd_context.bin_desc.feature_version,
+ adev->psp.asd_context.bin_desc.fw_version);
+
+ drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.xgmi_context.context.bin_desc.feature_version,
+ adev->psp.xgmi_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.ras_context.context.bin_desc.feature_version,
+ adev->psp.ras_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.hdcp_context.context.bin_desc.feature_version,
+ adev->psp.hdcp_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.dtm_context.context.bin_desc.feature_version,
+ adev->psp.dtm_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.rap_context.context.bin_desc.feature_version,
+ adev->psp.rap_context.context.bin_desc.fw_version);
+ drm_printf(p,
+ "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.securedisplay_context.context.bin_desc.feature_version,
+ adev->psp.securedisplay_context.context.bin_desc.fw_version);
+
+ /* SMC firmware */
+ version = adev->pm.fw_version;
+
+ smu_program = (version >> 24) & 0xff;
+ smu_major = (version >> 16) & 0xff;
+ smu_minor = (version >> 8) & 0xff;
+ smu_debug = (version >> 0) & 0xff;
+ drm_printf(p,
+ "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
+ 0, smu_program, version, smu_major, smu_minor, smu_debug);
+
+ /* SDMA firmware */
+ for (int i = 0; i < adev->sdma.num_instances; i++) {
+ drm_printf(p,
+ "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+ i, adev->sdma.instance[i].feature_version,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0,
+ adev->vcn.fw_version);
+ drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcu_fw_version);
+ drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcub_fw_version);
+ drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
+ adev->psp.toc.feature_version, adev->psp.toc.fw_version);
+
+ version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
+ feature, version);
+
+ version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature,
+ version);
+
+ drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
+ adev->vpe.feature_version, adev->vpe.fw_version);
+
+ drm_printf(p, "\nVBIOS Information\n");
+ drm_printf(p, "vbios name : %s\n", ctx->name);
+ drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn);
+ drm_printf(p, "vbios version : %d\n", ctx->version);
+ drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str);
+ drm_printf(p, "vbios date : %s\n", ctx->date);
+}
+
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen)
+{
+ struct drm_printer p;
+ struct amdgpu_coredump_info *coredump = data;
+ struct drm_print_iterator iter;
+ struct amdgpu_vm_fault_info *fault_info;
+ struct amdgpu_ip_block *ip_block;
+ int ver;
+
+ iter.data = buffer;
+ iter.offset = 0;
+ iter.start = offset;
+ iter.remain = count;
+
+ p = drm_coredump_printer(&iter);
+
+ drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
+ drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
+ drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec,
+ coredump->reset_time.tv_nsec);
+
+ if (coredump->reset_task_info.task.pid)
+ drm_printf(&p, "process_name: %s PID: %d\n",
+ coredump->reset_task_info.process_name,
+ coredump->reset_task_info.task.pid);
+
+ /* SOC Information */
+ drm_printf(&p, "\nSOC Information\n");
+ drm_printf(&p, "SOC Device id: %d\n", coredump->adev->pdev->device);
+ drm_printf(&p, "SOC PCI Revision id: %d\n", coredump->adev->pdev->revision);
+ drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+ drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+ drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id);
+
+ /* Memory Information */
+ drm_printf(&p, "\nSOC Memory Information\n");
+ drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size);
+ drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size);
+ drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size);
+
+ /* GDS Config */
+ drm_printf(&p, "\nGDS Config\n");
+ drm_printf(&p, "gds: total size: %d\n", coredump->adev->gds.gds_size);
+ drm_printf(&p, "gds: compute partition size: %d\n", coredump->adev->gds.gds_size);
+ drm_printf(&p, "gds: gws per compute partition: %d\n", coredump->adev->gds.gws_size);
+ drm_printf(&p, "gds: os per compute partition: %d\n", coredump->adev->gds.oa_size);
+
+ /* HWIP Version Information */
+ drm_printf(&p, "\nHW IP Version Information\n");
+ for (int i = 1; i < MAX_HWIP; i++) {
+ for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+ ver = coredump->adev->ip_versions[i][j];
+ if (ver)
+ drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n",
+ hw_ip_names[i], i, j,
+ IP_VERSION_MAJ(ver),
+ IP_VERSION_MIN(ver),
+ IP_VERSION_REV(ver),
+ IP_VERSION_VARIANT(ver),
+ IP_VERSION_SUBREV(ver));
+ }
+ }
+
+ /* IP firmware information */
+ drm_printf(&p, "\nIP Firmwares\n");
+ amdgpu_devcoredump_fw_info(coredump->adev, &p);
+
+ if (coredump->ring) {
+ drm_printf(&p, "\nRing timed out details\n");
+ drm_printf(&p, "IP Type: %d Ring Name: %s\n",
+ coredump->ring->funcs->type,
+ coredump->ring->name);
+ }
+
+ /* Add page fault information */
+ fault_info = &coredump->adev->vm_manager.fault_info;
+ drm_printf(&p, "\n[%s] Page fault observed\n",
+ fault_info->vmhub ? "mmhub" : "gfxhub");
+ drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr);
+ drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status);
+
+ /* dump the ip state for each ip */
+ drm_printf(&p, "IP Dump\n");
+ for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+ ip_block = &coredump->adev->ip_blocks[i];
+ if (ip_block->version->funcs->print_ip_state) {
+ drm_printf(&p, "IP: %s\n", ip_block->version->funcs->name);
+ ip_block->version->funcs->print_ip_state(ip_block, &p);
+ drm_printf(&p, "\n");
+ }
+ }
+
+ /* Add ring buffer information */
+ drm_printf(&p, "Ring buffer information\n");
+ for (int i = 0; i < coredump->adev->num_rings; i++) {
+ int j = 0;
+ struct amdgpu_ring *ring = coredump->adev->rings[i];
+
+ drm_printf(&p, "ring name: %s\n", ring->name);
+ drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
+ amdgpu_ring_get_rptr(ring),
+ amdgpu_ring_get_wptr(ring),
+ ring->buf_mask);
+ drm_printf(&p, "Ring size in dwords: %d\n",
+ ring->ring_size / 4);
+ drm_printf(&p, "Ring contents\n");
+ drm_printf(&p, "Offset \t Value\n");
+
+ while (j < ring->ring_size) {
+ drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
+ j += 4;
+ }
+ }
+
+ if (coredump->skip_vram_check)
+ drm_printf(&p, "VRAM lost check is skipped!\n");
+ else if (coredump->reset_vram_lost)
+ drm_printf(&p, "VRAM is lost due to GPU reset!\n");
+
+ return count - iter.remain;
+}
+
+static void amdgpu_devcoredump_free(void *data)
+{
+ kfree(data);
+}
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct amdgpu_coredump_info *coredump;
+ struct drm_sched_job *s_job;
+
+ coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
+
+ if (!coredump) {
+ DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
+ return;
+ }
+
+ coredump->skip_vram_check = skip_vram_check;
+ coredump->reset_vram_lost = vram_lost;
+
+ if (job && job->pasid) {
+ struct amdgpu_task_info *ti;
+
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+ if (ti) {
+ coredump->reset_task_info = *ti;
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
+ if (job) {
+ s_job = &job->base;
+ coredump->ring = to_amdgpu_ring(s_job->sched);
+ }
+
+ coredump->adev = adev;
+
+ ktime_get_ts64(&coredump->reset_time);
+
+ dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+ drm_info(dev, "AMDGPU device coredump file has been created\n");
+ drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+ dev->primary->index);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
new file mode 100644
index 000000000000..ef9772c6bcc9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DEV_COREDUMP_H__
+#define __AMDGPU_DEV_COREDUMP_H__
+
+#include "amdgpu.h"
+
+#ifdef CONFIG_DEV_COREDUMP
+
+#define AMDGPU_COREDUMP_VERSION "1"
+
+struct amdgpu_coredump_info {
+ struct amdgpu_device *adev;
+ struct amdgpu_task_info reset_task_info;
+ struct timespec64 reset_time;
+ bool skip_vram_check;
+ bool reset_vram_lost;
+ struct amdgpu_ring *ring;
+};
+#endif
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+ bool vram_lost, struct amdgpu_job *job);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 30c4f5cca02c..7a899fb4de29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,6 +25,8 @@
* Alex Deucher
* Jerome Glisse
*/
+
+#include <linux/aperture.h>
#include <linux/power_supply.h>
#include <linux/kthread.h>
#include <linux/module.h>
@@ -32,17 +34,15 @@
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/pci.h>
-#include <linux/devcoredump.h>
-#include <generated/utsrelease.h>
#include <linux/pci-p2pdma.h>
#include <linux/apple-gmux.h>
-#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client_event.h>
#include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
+#include <linux/device.h>
#include <linux/vgaarb.h>
#include <linux/vga_switcheroo.h>
#include <linux/efi.h>
@@ -74,6 +74,8 @@
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
+#include "amdgpu_virt.h"
+#include "amdgpu_dev_coredump.h"
#include <linux/suspend.h>
#include <drm/task_barrier.h>
@@ -83,6 +85,7 @@
#if IS_ENABLED(CONFIG_X86)
#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#endif
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
@@ -92,10 +95,17 @@ MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
#define AMDGPU_MAX_RETRY_LIMIT 2
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
+#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
+#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
+
+#define AMDGPU_VBIOS_SKIP (1U << 0)
+#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
static const struct drm_driver amdgpu_kms_driver;
@@ -140,13 +150,72 @@ const char *amdgpu_asic_name[] = {
"LAST",
};
+#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
+/*
+ * Default init level where all blocks are expected to be initialized. This is
+ * the level of initialization expected by default and also after a full reset
+ * of the device.
+ */
+struct amdgpu_init_level amdgpu_init_default = {
+ .level = AMDGPU_INIT_LEVEL_DEFAULT,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+struct amdgpu_init_level amdgpu_init_recovery = {
+ .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+/*
+ * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
+ * is used for cases like reset on initialization where the entire hive needs to
+ * be reset before first use.
+ */
+struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
+ .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ .hwini_ip_block_mask =
+ BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
+ BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
+ BIT(AMD_IP_BLOCK_TYPE_PSP)
+};
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
+
+static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
+ enum amd_ip_block_type block)
+{
+ return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
+}
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl)
+{
+ switch (lvl) {
+ case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
+ adev->init_lvl = &amdgpu_init_minimal_xgmi;
+ break;
+ case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
+ adev->init_lvl = &amdgpu_init_recovery;
+ break;
+ case AMDGPU_INIT_LEVEL_DEFAULT:
+ fallthrough;
+ default:
+ adev->init_lvl = &amdgpu_init_default;
+ break;
+ }
+}
+
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data);
+
/**
* DOC: pcie_replay_count
*
* The amdgpu driver provides a sysfs API for reporting the total number
- * of PCIe replays (NAKs)
+ * of PCIe replays (NAKs).
* The file pcie_replay_count is used for this and returns the total
- * number of replays as a sum of the NAKs generated and NAKs received
+ * number of replays as a sum of the NAKs generated and NAKs received.
*/
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
@@ -162,21 +231,199 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL);
-static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ if (amdgpu_nbio_is_replay_cnt_supported(adev))
+ ret = sysfs_create_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
+
+ return ret;
+}
+
+static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_nbio_is_replay_cnt_supported(adev))
+ sysfs_remove_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
+}
+
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf,
+ loff_t ppos, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ ssize_t bytes_read;
+ switch (ppos) {
+ case AMDGPU_SYS_REG_STATE_XGMI:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_WAFL:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_PCIE:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+ break;
+ case AMDGPU_SYS_REG_STATE_USR_1:
+ bytes_read = amdgpu_asic_get_reg_state(
+ adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return bytes_read;
+}
+
+static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+ AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret;
+
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return 0;
+
+ ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+ return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_asic_get_reg_state_supported(adev))
+ return;
+ sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->suspend) {
+ r = ip_block->version->funcs->suspend(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = false;
+ return 0;
+}
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->resume) {
+ r = ip_block->version->funcs->resume(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = true;
+ return 0;
+}
+
+/**
+ * DOC: board_info
+ *
+ * The amdgpu driver provides a sysfs API for giving board related information.
+ * It provides the form factor information in the format
+ *
+ * type : form factor
+ *
+ * Possible form factor values
+ *
+ * - "cem" - PCIE CEM card
+ * - "oam" - Open Compute Accelerator Module
+ * - "unknown" - Not known
+ *
+ */
+
+static ssize_t amdgpu_device_get_board_info(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
+ const char *pkg;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
+ pkg_type = adev->smuio.funcs->get_pkg_type(adev);
+
+ switch (pkg_type) {
+ case AMDGPU_PKG_TYPE_CEM:
+ pkg = "cem";
+ break;
+ case AMDGPU_PKG_TYPE_OAM:
+ pkg = "oam";
+ break;
+ default:
+ pkg = "unknown";
+ break;
+ }
+
+ return sysfs_emit(buf, "%s : %s\n", "type", pkg);
+}
+
+static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
+
+static struct attribute *amdgpu_board_attrs[] = {
+ &dev_attr_board_info.attr,
+ NULL,
+};
+
+static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (adev->flags & AMD_IS_APU)
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group amdgpu_board_attrs_group = {
+ .attrs = amdgpu_board_attrs,
+ .is_visible = amdgpu_board_attrs_is_visible
+};
+
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
* amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
* Returns true if the device is a dGPU with ATPX power control,
* otherwise return false.
*/
-bool amdgpu_device_supports_px(struct drm_device *dev)
+bool amdgpu_device_supports_px(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
-
if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
return true;
return false;
@@ -185,14 +432,15 @@ bool amdgpu_device_supports_px(struct drm_device *dev)
/**
* amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
* Returns true if the device is a dGPU with ACPI power control,
* otherwise return false.
*/
-bool amdgpu_device_supports_boco(struct drm_device *dev)
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
+ if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+ return false;
if (adev->has_pr3 ||
((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
@@ -203,30 +451,105 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
/**
* amdgpu_device_supports_baco - Does the device support BACO
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
- * Returns true if the device supporte BACO,
- * otherwise return false.
+ * Return:
+ * 1 if the device supports BACO;
+ * 3 if the device supports MACO (only works if BACO is supported)
+ * otherwise return 0.
*/
-bool amdgpu_device_supports_baco(struct drm_device *dev)
+int amdgpu_device_supports_baco(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
-
return amdgpu_asic_supports_baco(adev);
}
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
+{
+ int bamaco_support;
+
+ adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
+ bamaco_support = amdgpu_device_supports_baco(adev);
+
+ switch (amdgpu_runtime_pm) {
+ case 2:
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
+ } else if (bamaco_support == BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
+ }
+ break;
+ case 1:
+ if (bamaco_support & BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Forcing BACO for runtime pm\n");
+ }
+ break;
+ case -1:
+ case -2:
+ if (amdgpu_device_supports_px(adev)) {
+ /* enable PX as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
+ dev_info(adev->dev, "Using ATPX for runtime pm\n");
+ } else if (amdgpu_device_supports_boco(adev)) {
+ /* enable boco as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
+ dev_info(adev->dev, "Using BOCO for runtime pm\n");
+ } else {
+ if (!bamaco_support)
+ goto no_runtime_pm;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ /* BACO are not supported on vega20 and arctrus */
+ break;
+ case CHIP_VEGA10:
+ /* enable BACO as runpm mode if noretry=0 */
+ if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ default:
+ /* enable BACO as runpm mode on CI+ */
+ if (!amdgpu_passthrough(adev))
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ }
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Using BAMACO for runtime pm\n");
+ } else {
+ dev_info(adev->dev, "Using BACO for runtime pm\n");
+ }
+ }
+ }
+ break;
+ case 0:
+ dev_info(adev->dev, "runtime pm is manually disabled\n");
+ break;
+ default:
+ break;
+ }
+
+no_runtime_pm:
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
+ dev_info(adev->dev, "Runtime PM not available\n");
+}
/**
* amdgpu_device_supports_smart_shift - Is the device dGPU with
* smart shift support
*
- * @dev: drm_device pointer
+ * @adev: amdgpu device pointer
*
* Returns true if the device is a dGPU with Smart Shift support,
* otherwise returns false.
*/
-bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
{
- return (amdgpu_device_supports_boco(dev) &&
+ return (amdgpu_device_supports_boco(adev) &&
amdgpu_acpi_is_power_shift_control_supported());
}
@@ -277,7 +600,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
}
/**
- * amdgpu_device_aper_access - access vram by vram aperature
+ * amdgpu_device_aper_access - access vram by vram aperture
*
* @adev: amdgpu_device pointer
* @pos: offset of the buffer in vram
@@ -368,7 +691,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
* here is that the GPU reset is not running on another thread in parallel.
*
* For this we trylock the read side of the reset semaphore, if that succeeds
- * we know that the reset is not running in paralell.
+ * we know that the reset is not running in parallel.
*
* If the trylock fails we assert that we are either already holding the read
* side of the lock or are the reset thread itself and hold the write side of
@@ -405,7 +728,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_domain->sem)) {
- ret = amdgpu_kiq_rreg(adev, reg);
+ ret = amdgpu_kiq_rreg(adev, reg, 0);
up_read(&adev->reset_domain->sem);
} else {
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -442,6 +765,49 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
BUG();
}
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t acc_flags,
+ uint32_t xcc_id)
+{
+ uint32_t ret, rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, false,
+ &rlcg_flag)) {
+ ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ ret = adev->pcie_rreg(adev, reg * 4);
+ }
+
+ return ret;
+}
+
/*
* MMIO register write with bytes helper functions
* @offset:bytes offset from MMIO start
@@ -489,7 +855,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_domain->sem)) {
- amdgpu_kiq_wreg(adev, reg, v);
+ amdgpu_kiq_wreg(adev, reg, v, 0);
up_read(&adev->reset_domain->sem);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
@@ -507,6 +873,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
* @reg: mmio/rlc register
* @v: value to write
+ * @xcc_id: xcc accelerated compute core id
*
* this function is invoked only for the debugfs register access
*/
@@ -530,6 +897,47 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
}
/**
+ * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t v,
+ uint32_t acc_flags, uint32_t xcc_id)
+{
+ uint32_t rlcg_flag;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ if ((reg * 4) < adev->rmmio_size) {
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ adev->gfx.rlc.rlcg_reg_access_supported &&
+ amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+ GC_HWIP, true,
+ &rlcg_flag)) {
+ amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
+ } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+ amdgpu_sriov_runtime(adev) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_kiq_wreg(adev, reg, v, xcc_id);
+ up_read(&adev->reset_domain->sem);
+ } else {
+ writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+ }
+ } else {
+ adev->pcie_wreg(adev, reg * 4, v);
+ }
+}
+
+/**
* amdgpu_device_indirect_rreg - read an indirect register
*
* @adev: amdgpu_device pointer
@@ -569,12 +977,22 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
void __iomem *pcie_index_hi_offset;
void __iomem *pcie_data_offset;
- pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
- pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if (adev->nbio.funcs->get_pcie_index_hi_offset)
- pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
- else
+ if (unlikely(!adev->nbio.funcs)) {
+ pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
+ pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
+ } else {
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ }
+
+ if (reg_addr >> 32) {
+ if (unlikely(!adev->nbio.funcs))
+ pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
+ else
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+ } else {
pcie_index_hi = 0;
+ }
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -638,6 +1056,56 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
return r;
}
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+ u64 r;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ /* read low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r = readl(pcie_data_offset);
+ /* read high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ r |= ((u64)readl(pcie_data_offset) << 32);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ return r;
+}
+
/**
* amdgpu_device_indirect_wreg - write an indirect register address
*
@@ -677,7 +1145,7 @@ void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
- if (adev->nbio.funcs->get_pcie_index_hi_offset)
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
else
pcie_index_hi = 0;
@@ -742,6 +1210,55 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
}
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+ u64 reg_addr, u64 reg_data)
+{
+ unsigned long flags, pcie_index, pcie_data;
+ unsigned long pcie_index_hi = 0;
+ void __iomem *pcie_index_offset;
+ void __iomem *pcie_index_hi_offset;
+ void __iomem *pcie_data_offset;
+
+ pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+ pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+ if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+ pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+ pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+ if (pcie_index_hi != 0)
+ pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+ pcie_index_hi * 4;
+
+ /* write low 32 bits */
+ writel(reg_addr, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+ readl(pcie_data_offset);
+ /* write high 32 bits */
+ writel(reg_addr + 4, pcie_index_offset);
+ readl(pcie_index_offset);
+ if (pcie_index_hi != 0) {
+ writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+ writel((u32)(reg_data >> 32), pcie_data_offset);
+ readl(pcie_data_offset);
+
+ /* clear the high bits */
+ if (pcie_index_hi != 0) {
+ writel(0, pcie_index_hi_offset);
+ readl(pcie_index_hi_offset);
+ }
+
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
/**
* amdgpu_device_get_rev_id - query device rev_id
*
@@ -766,14 +1283,14 @@ u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
*/
static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
+ dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
BUG();
return 0;
}
static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
{
- DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
+ dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
BUG();
return 0;
}
@@ -790,15 +1307,17 @@ static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg
*/
static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
{
- DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
- reg, v);
+ dev_err(adev->dev,
+ "Invalid callback to write register 0x%04X with 0x%08X\n", reg,
+ v);
BUG();
}
static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
{
- DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
- reg, v);
+ dev_err(adev->dev,
+ "Invalid callback to write register 0x%llX with 0x%08X\n", reg,
+ v);
BUG();
}
@@ -814,7 +1333,15 @@ static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, ui
*/
static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
+ dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
+ reg);
+ BUG();
+ return 0;
+}
+
+static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+ dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
BUG();
return 0;
}
@@ -831,8 +1358,17 @@ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
*/
static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
{
- DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
- reg, v);
+ dev_err(adev->dev,
+ "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
+ reg, v);
+ BUG();
+}
+
+static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
+{
+ dev_err(adev->dev,
+ "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
+ reg, v);
BUG();
}
@@ -850,8 +1386,9 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint
static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
uint32_t block, uint32_t reg)
{
- DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
- reg, block);
+ dev_err(adev->dev,
+ "Invalid callback to read register 0x%04X in block 0x%04X\n",
+ reg, block);
BUG();
return 0;
}
@@ -871,11 +1408,23 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
uint32_t block,
uint32_t reg, uint32_t v)
{
- DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
- reg, block, v);
+ dev_err(adev->dev,
+ "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
+ reg, block, v);
BUG();
}
+static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
+{
+ if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
+ return AMDGPU_VBIOS_SKIP;
+
+ if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
+ return AMDGPU_VBIOS_OPTIONAL;
+
+ return 0;
+}
+
/**
* amdgpu_device_asic_init - Wrapper for atom asic_init
*
@@ -885,16 +1434,28 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
*/
static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{
+ uint32_t flags;
+ bool optional;
int ret;
amdgpu_asic_pre_asic_init(adev);
+ flags = amdgpu_device_get_vbios_flags(adev);
+ optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
- adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
+ if (optional && !adev->bios)
+ return 0;
+
ret = amdgpu_atomfirmware_asic_init(adev, true);
return ret;
} else {
+ if (optional && !adev->bios)
+ return 0;
+
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
}
@@ -1064,13 +1625,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
*/
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
{
- unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+ unsigned long flags, offset;
+ spin_lock_irqsave(&adev->wb.lock, flags);
+ offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
if (offset < adev->wb.num_wb) {
__set_bit(offset, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
*wb = offset << 3; /* convert to dw offset */
return 0;
} else {
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
return -EINVAL;
}
}
@@ -1085,9 +1650,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
*/
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
{
+ unsigned long flags;
+
wb >>= 3;
+ spin_lock_irqsave(&adev->wb.lock, flags);
if (wb < adev->wb.num_wb)
__clear_bit(wb, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
}
/**
@@ -1115,6 +1684,22 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
+ if (!amdgpu_rebar)
+ return 0;
+
+ /* resizing on Dell G5 SE platforms causes problems with runtime pm */
+ if ((amdgpu_runtime_pm != 0) &&
+ adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
+ adev->pdev->device == 0x731f &&
+ adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
+ return 0;
+
+ /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
+ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
+ dev_warn(
+ adev->dev,
+ "System can't access extended configuration space, please check!!\n");
+
/* skip if the bios has already enabled large BAR */
if (adev->gmc.real_vram_size &&
(pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
@@ -1153,9 +1738,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
r = pci_resize_resource(adev->pdev, 0, rbar_size);
if (r == -ENOSPC)
- DRM_INFO("Not enough PCI address space for a large BAR.");
+ dev_info(adev->dev,
+ "Not enough PCI address space for a large BAR.");
else if (r && r != -ENOTSUPP)
- DRM_ERROR("Problem resizing BAR0 (%d).", r);
+ dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
pci_assign_unassigned_bus_resources(adev->pdev->bus);
@@ -1171,14 +1757,6 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
return 0;
}
-static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
-{
- if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
- return false;
-
- return true;
-}
-
/*
* GPU helpers function.
*/
@@ -1193,12 +1771,15 @@ static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
*/
bool amdgpu_device_need_post(struct amdgpu_device *adev)
{
- uint32_t reg;
+ uint32_t reg, flags;
if (amdgpu_sriov_vf(adev))
return false;
- if (!amdgpu_device_read_bios(adev))
+ flags = amdgpu_device_get_vbios_flags(adev);
+ if (flags & AMDGPU_VBIOS_SKIP)
+ return false;
+ if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
return false;
if (amdgpu_passthrough(adev)) {
@@ -1212,18 +1793,19 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
uint32_t fw_ver;
err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
- /* force vPost if error occured */
+ /* force vPost if error occurred */
if (err)
return true;
fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+ release_firmware(adev->pm.fw);
if (fw_ver < 0x00160e00)
return true;
}
}
/* Don't post if we need to reset whole hive on init */
- if (adev->gmc.xgmi.pending_reset)
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
return false;
if (adev->has_hw_reset) {
@@ -1245,24 +1827,88 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
}
/*
- * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
- * speed switching. Until we have confirmation from Intel that a specific host
- * supports it, it's safer that we keep it disabled for all.
+ * Check whether seamless boot is supported.
+ *
+ * So far we only support seamless boot on DCE 3.0 or later.
+ * If users report that it works on older ASICS as well, we may
+ * loosen this.
+ */
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
+{
+ switch (amdgpu_seamless) {
+ case -1:
+ break;
+ case 1:
+ return true;
+ case 0:
+ return false;
+ default:
+ dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
+ amdgpu_seamless);
+ return false;
+ }
+
+ if (!(adev->flags & AMD_IS_APU))
+ return false;
+
+ if (adev->mman.keep_stolen_vga_memory)
+ return false;
+
+ return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
+}
+
+/*
+ * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
+ * don't support dynamic speed switching. Until we have confirmation from Intel
+ * that a specific host supports it, it's safer that we keep it disabled for all.
*
* https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
* https://gitlab.freedesktop.org/drm/amd/-/issues/2663
*/
-bool amdgpu_device_pcie_dynamic_switching_supported(void)
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
{
#if IS_ENABLED(CONFIG_X86)
struct cpuinfo_x86 *c = &cpu_data(0);
+ /* eGPU change speeds based on USB4 fabric conditions */
+ if (dev_is_removable(adev->dev))
+ return true;
+
if (c->x86_vendor == X86_VENDOR_INTEL)
return false;
#endif
return true;
}
+static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
+ return false;
+
+ if (c->x86 == 6 &&
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
+ switch (c->x86_model) {
+ case VFM_MODEL(INTEL_ALDERLAKE):
+ case VFM_MODEL(INTEL_ALDERLAKE_L):
+ case VFM_MODEL(INTEL_RAPTORLAKE):
+ case VFM_MODEL(INTEL_RAPTORLAKE_P):
+ case VFM_MODEL(INTEL_RAPTORLAKE_S):
+ return true;
+ default:
+ return false;
+ }
+ } else {
+ return false;
+ }
+#else
+ return false;
+#endif
+}
+
/**
* amdgpu_device_should_use_aspm - check if the device should program ASPM
*
@@ -1285,20 +1931,13 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
default:
return false;
}
+ if (adev->flags & AMD_IS_APU)
+ return false;
+ if (amdgpu_device_aspm_support_quirk(adev))
+ return false;
return pcie_aspm_enabled(adev->pdev);
}
-bool amdgpu_device_aspm_support_quirk(void)
-{
-#if IS_ENABLED(CONFIG_X86)
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
-#else
- return true;
-#endif
-}
-
/* if we get transitioned to only one device, take VGA back */
/**
* amdgpu_device_vga_set_decode - enable/disable vga decode
@@ -1381,7 +2020,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
return;
if (!is_os_64) {
- DRM_WARN("Not 64-bit OS, feature not supported\n");
+ dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
goto def_value;
}
si_meminfo(&si);
@@ -1396,7 +2035,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
if (total_memory < dram_size_seven_GB)
goto def_value1;
} else {
- DRM_WARN("Smu memory pool size not supported\n");
+ dev_warn(adev->dev, "Smu memory pool size not supported\n");
goto def_value;
}
adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
@@ -1404,7 +2043,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
return;
def_value1:
- DRM_WARN("No enough system memory\n");
+ dev_warn(adev->dev, "No enough system memory\n");
def_value:
adev->pm.smu_prv_buffer_size = 0;
}
@@ -1456,6 +2095,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
*/
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
+ int i;
+
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
@@ -1510,6 +2151,32 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+ for (i = 0; i < MAX_XCP; i++) {
+ switch (amdgpu_enforce_isolation) {
+ case -1:
+ case 0:
+ default:
+ /* disable */
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ /* enable */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ /* enable legacy mode */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ /* enable only process isolation without submitting cleaner shader */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
+ }
+ }
+
return 0;
}
@@ -1528,7 +2195,8 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
struct drm_device *dev = pci_get_drvdata(pdev);
int r;
- if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
+ if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
+ state == VGA_SWITCHEROO_OFF)
return;
if (state == VGA_SWITCHEROO_ON) {
@@ -1540,13 +2208,15 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
amdgpu_device_load_pci_state(pdev);
r = pci_enable_device(pdev);
if (r)
- DRM_WARN("pci_enable_device failed (%d)\n", r);
+ dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
+ r);
amdgpu_device_resume(dev, true);
dev->switch_power_state = DRM_SWITCH_POWER_ON;
} else {
- pr_info("switched off\n");
+ dev_info(&pdev->dev, "switched off\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ amdgpu_device_prepare(dev);
amdgpu_device_suspend(dev, true);
amdgpu_device_cache_pci_state(pdev);
/* Shut down the device */
@@ -1609,10 +2279,11 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
- DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_clockgating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1643,10 +2314,11 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
- DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_powergating_state of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
return r;
}
@@ -1671,7 +2343,8 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
- adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
+ adev->ip_blocks[i].version->funcs->get_clockgating_state(
+ &adev->ip_blocks[i], flags);
}
}
@@ -1693,9 +2366,12 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
- if (r)
- return r;
+ if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
+ r = adev->ip_blocks[i].version->funcs->wait_for_idle(
+ &adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
break;
}
}
@@ -1704,26 +2380,24 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_ip_is_idle - is the hardware IP idle
+ * amdgpu_device_ip_is_valid - is the hardware IP enabled
*
* @adev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
*
- * Check if the hardware IP is idle or not.
- * Returns true if it the IP is idle, false if not.
+ * Check if the hardware IP is enable or not.
+ * Returns true if it the IP is enable, false if not.
*/
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type)
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
{
int i;
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
if (adev->ip_blocks[i].version->type == block_type)
- return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
+ return adev->ip_blocks[i].status.valid;
}
- return true;
+ return false;
}
@@ -1774,6 +2448,33 @@ int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
return 1;
}
+static const char *ip_block_names[] = {
+ [AMD_IP_BLOCK_TYPE_COMMON] = "common",
+ [AMD_IP_BLOCK_TYPE_GMC] = "gmc",
+ [AMD_IP_BLOCK_TYPE_IH] = "ih",
+ [AMD_IP_BLOCK_TYPE_SMC] = "smu",
+ [AMD_IP_BLOCK_TYPE_PSP] = "psp",
+ [AMD_IP_BLOCK_TYPE_DCE] = "dce",
+ [AMD_IP_BLOCK_TYPE_GFX] = "gfx",
+ [AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
+ [AMD_IP_BLOCK_TYPE_UVD] = "uvd",
+ [AMD_IP_BLOCK_TYPE_VCE] = "vce",
+ [AMD_IP_BLOCK_TYPE_ACP] = "acp",
+ [AMD_IP_BLOCK_TYPE_VCN] = "vcn",
+ [AMD_IP_BLOCK_TYPE_MES] = "mes",
+ [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
+ [AMD_IP_BLOCK_TYPE_VPE] = "vpe",
+ [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
+ [AMD_IP_BLOCK_TYPE_ISP] = "isp",
+};
+
+static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type)
+{
+ int idx = (int)type;
+
+ return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown";
+}
+
/**
* amdgpu_device_ip_block_add
*
@@ -1802,8 +2503,15 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
break;
}
- DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
- ip_block_version->funcs->name);
+ dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
+ adev->num_ip_blocks,
+ ip_block_name(adev, ip_block_version->type),
+ ip_block_version->major,
+ ip_block_version->minor,
+ ip_block_version->rev,
+ ip_block_version->funcs->name);
+
+ adev->ip_blocks[adev->num_ip_blocks].adev = adev;
adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -1819,7 +2527,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
* the module parameter virtual_display. This feature provides a virtual
* display hardware on headless boards or in virtualized environments.
* This function parses and validates the configuration string specified by
- * the user and configues the virtual display configuration (number of
+ * the user and configures the virtual display configuration (number of
* virtual connectors, crtcs, etc.) specified.
*/
static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
@@ -1858,9 +2566,11 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
}
}
- DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
- amdgpu_virtual_display, pci_address_name,
- adev->enable_virtual_display, adev->mode_info.num_crtc);
+ dev_info(
+ adev->dev,
+ "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
+ amdgpu_virtual_display, pci_address_name,
+ adev->enable_virtual_display, adev->mode_info.num_crtc);
kfree(pciaddstr);
}
@@ -1871,8 +2581,9 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
adev->mode_info.num_crtc = 1;
adev->enable_virtual_display = true;
- DRM_INFO("virtual_display:%d, num_crtc:%d\n",
- adev->enable_virtual_display, adev->mode_info.num_crtc);
+ dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
+ adev->enable_virtual_display,
+ adev->mode_info.num_crtc);
}
}
@@ -1882,29 +2593,18 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
*
* Parses the asic configuration parameters specified in the gpu info
- * firmware and makes them availale to the driver for use in configuring
+ * firmware and makes them available to the driver for use in configuring
* the asic.
* Returns 0 on success, -EINVAL on failure.
*/
static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[40];
int err;
const struct gpu_info_firmware_header_v1_0 *hdr;
adev->firmware.gpu_info_fw = NULL;
- if (adev->mman.discovery_bin) {
- /*
- * FIXME: The bounding box is still needed by Navi12, so
- * temporarily read it from gpu_info firmware. Should be dropped
- * when DAL no longer needs it.
- */
- if (adev->asic_type != CHIP_NAVI12)
- return 0;
- }
-
switch (adev->asic_type) {
default:
return 0;
@@ -1926,16 +2626,22 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
chip_name = "arcturus";
break;
case CHIP_NAVI12:
+ if (adev->mman.discovery_bin)
+ return 0;
chip_name = "navi12";
break;
+ case CHIP_CYAN_SKILLFISH:
+ chip_name = "cyan_skillfish";
+ break;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_gpu_info.bin", chip_name);
if (err) {
dev_err(adev->dev,
- "Failed to get gpu_info firmware \"%s\"\n",
- fw_name);
+ "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
+ chip_name);
goto out;
}
@@ -1950,7 +2656,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
/*
- * Should be droped when DAL no longer needs it.
+ * Should be dropped when DAL no longer needs it.
*/
if (adev->asic_type == CHIP_NAVI12)
goto parse_soc_bounding_box;
@@ -2006,6 +2712,24 @@ out:
return err;
}
+static void amdgpu_uid_init(struct amdgpu_device *adev)
+{
+ /* Initialize the UID for the device */
+ adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL);
+ if (!adev->uid_info) {
+ dev_warn(adev->dev, "Failed to allocate memory for UID\n");
+ return;
+ }
+ adev->uid_info->adev = adev;
+}
+
+static void amdgpu_uid_fini(struct amdgpu_device *adev)
+{
+ /* Free the UID memory */
+ kfree(adev->uid_info);
+ adev->uid_info = NULL;
+}
+
/**
* amdgpu_device_ip_early_init - run early init for hardware IPs
*
@@ -2018,10 +2742,11 @@ out:
*/
static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
- struct drm_device *dev = adev_to_drm(adev);
+ struct amdgpu_ip_block *ip_block;
struct pci_dev *parent;
+ bool total, skip_bios;
+ uint32_t bios_flags;
int i, r;
- bool total;
amdgpu_device_enable_virtual_display(adev);
@@ -2085,46 +2810,62 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
break;
}
+ /* Check for IP version 9.4.3 with A0 hardware */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+ !amdgpu_device_get_rev_id(adev)) {
+ dev_err(adev->dev, "Unsupported A0 hardware\n");
+ return -ENODEV; /* device unsupported - no device error */
+ }
+
if (amdgpu_has_atpx() &&
(amdgpu_is_atpx_hybrid() ||
amdgpu_has_atpx_dgpu_power_cntl()) &&
((adev->flags & AMD_IS_APU) == 0) &&
- !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
+ !dev_is_removable(&adev->pdev->dev))
adev->flags |= AMD_IS_PX;
if (!(adev->flags & AMD_IS_APU)) {
- parent = pci_upstream_bridge(adev->pdev);
+ parent = pcie_find_root_port(adev->pdev);
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
}
-
adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
+ if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
+ adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
+
+ adev->virt.is_xgmi_node_migrate_enabled = false;
+ if (amdgpu_sriov_vf(adev)) {
+ adev->virt.is_xgmi_node_migrate_enabled =
+ amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
+ }
total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
+ ip_block = &adev->ip_blocks[i];
+
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
- DRM_WARN("disabled ip block: %d <%s>\n",
- i, adev->ip_blocks[i].version->funcs->name);
+ dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
+ adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false;
- } else {
- if (adev->ip_blocks[i].version->funcs->early_init) {
- r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
- if (r == -ENOENT) {
- adev->ip_blocks[i].status.valid = false;
- } else if (r) {
- DRM_ERROR("early_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- total = false;
- } else {
- adev->ip_blocks[i].status.valid = true;
- }
+ } else if (ip_block->version->funcs->early_init) {
+ r = ip_block->version->funcs->early_init(ip_block);
+ if (r == -ENOENT) {
+ adev->ip_blocks[i].status.valid = false;
+ } else if (r) {
+ dev_err(adev->dev,
+ "early_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ total = false;
} else {
adev->ip_blocks[i].status.valid = true;
}
+ } else {
+ adev->ip_blocks[i].status.valid = true;
}
/* get the vbios after the asic_funcs are set up */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
@@ -2132,16 +2873,31 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
+ bios_flags = amdgpu_device_get_vbios_flags(adev);
+ skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
/* Read BIOS */
- if (amdgpu_device_read_bios(adev)) {
- if (!amdgpu_get_bios(adev))
+ if (!skip_bios) {
+ bool optional =
+ !!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
+ if (!amdgpu_get_bios(adev) && !optional)
return -EINVAL;
- r = amdgpu_atombios_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
- return r;
+ if (optional && !adev->bios)
+ dev_info(
+ adev->dev,
+ "VBIOS image optional, proceeding without VBIOS image");
+
+ if (adev->bios) {
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(
+ adev,
+ AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
+ 0, 0);
+ return r;
+ }
}
}
@@ -2154,7 +2910,15 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (!total)
return -ENODEV;
- amdgpu_amdkfd_device_probe(adev);
+ if (adev->gmc.xgmi.supported)
+ amdgpu_xgmi_early_init(adev);
+
+ if (amdgpu_is_multi_aid(adev))
+ amdgpu_uid_init(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (ip_block->status.valid != false)
+ amdgpu_amdkfd_device_probe(adev);
+
adev->cg_flags &= amdgpu_cg_mask;
adev->pg_flags &= amdgpu_pg_mask;
@@ -2170,13 +2934,18 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
adev->ip_blocks[i].status.hw = true;
@@ -2195,10 +2964,14 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
return r;
}
adev->ip_blocks[i].status.hw = true;
@@ -2218,6 +2991,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
continue;
+ if (!amdgpu_ip_member_of_hwini(adev,
+ AMD_IP_BLOCK_TYPE_PSP))
+ break;
+
if (!adev->ip_blocks[i].status.sw)
continue;
@@ -2226,22 +3003,21 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
break;
if (amdgpu_in_reset(adev) || adev->in_suspend) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
} else {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i]
+ .version->funcs->name,
+ r);
return r;
}
+ adev->ip_blocks[i].status.hw = true;
}
-
- adev->ip_blocks[i].status.hw = true;
break;
}
}
@@ -2254,6 +3030,12 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
{
+ struct drm_sched_init_args args = {
+ .ops = &amdgpu_sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .timeout_wq = adev->reset_domain->wq,
+ .dev = adev->dev,
+ };
long timeout;
int r, i;
@@ -2279,19 +3061,36 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
break;
}
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
- ring->num_hw_submission, 0,
- timeout, adev->reset_domain->wq,
- ring->sched_score, ring->name,
- adev->dev);
+ args.timeout = timeout;
+ args.credit_limit = ring->num_hw_submission;
+ args.score = ring->sched_score;
+ args.name = ring->name;
+
+ r = drm_sched_init(&ring->sched, &args);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ r = amdgpu_uvd_entity_init(adev, ring);
+ if (r) {
+ dev_err(adev->dev,
+ "Failed to create UVD scheduling entity on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ r = amdgpu_vce_entity_init(adev, ring);
if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
+ dev_err(adev->dev,
+ "Failed to create VCE scheduling entity on ring %s.\n",
+ ring->name);
return r;
}
}
- amdgpu_xcp_update_partition_sched_list(adev);
+ if (adev->xcp_mgr)
+ amdgpu_xcp_update_partition_sched_list(adev);
return 0;
}
@@ -2310,6 +3109,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
*/
static int amdgpu_device_ip_init(struct amdgpu_device *adev)
{
+ bool init_badpage;
int i, r;
r = amdgpu_ras_init(adev);
@@ -2319,19 +3119,28 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
- r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
- if (r) {
- DRM_ERROR("sw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- goto init_failed;
+ if (adev->ip_blocks[i].version->funcs->sw_init) {
+ r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "sw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ goto init_failed;
+ }
}
adev->ip_blocks[i].status.sw = true;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
/* need to do common hw init early so everything is set up for gmc */
- r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init %d failed %d\n", i, r);
+ dev_err(adev->dev, "hw_init %d failed %d\n", i,
+ r);
goto init_failed;
}
adev->ip_blocks[i].status.hw = true;
@@ -2343,17 +3152,21 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
r = amdgpu_device_mem_scratch_init(adev);
if (r) {
- DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
+ dev_err(adev->dev,
+ "amdgpu_mem_scratch_init failed %d\n",
+ r);
goto init_failed;
}
- r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("hw_init %d failed %d\n", i, r);
+ dev_err(adev->dev, "hw_init %d failed %d\n", i,
+ r);
goto init_failed;
}
r = amdgpu_device_wb_init(adev);
if (r) {
- DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
+ dev_err(adev->dev,
+ "amdgpu_device_wb_init failed %d\n", r);
goto init_failed;
}
adev->ip_blocks[i].status.hw = true;
@@ -2365,10 +3178,18 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_CSA_SIZE);
if (r) {
- DRM_ERROR("allocate CSA failed %d\n", r);
+ dev_err(adev->dev,
+ "allocate CSA failed %d\n", r);
goto init_failed;
}
}
+
+ r = amdgpu_seq64_init(adev);
+ if (r) {
+ dev_err(adev->dev, "allocate seq64 failed %d\n",
+ r);
+ goto init_failed;
+ }
}
}
@@ -2413,7 +3234,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
* Note: theoretically, this should be called before all vram allocations
* to protect retired page from abusing
*/
- r = amdgpu_ras_recovery_init(adev);
+ init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+ r = amdgpu_ras_recovery_init(adev, init_badpage);
if (r)
goto init_failed;
@@ -2449,14 +3271,20 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
/* Don't init kfd if whole hive need to be reset during init */
- if (!adev->gmc.xgmi.pending_reset) {
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev);
}
amdgpu_fru_get_product_info(adev);
+ if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
+ r = amdgpu_cper_init(adev);
+
init_failed:
return r;
@@ -2469,7 +3297,7 @@ init_failed:
*
* Writes a reset magic value to the gart pointer in VRAM. The driver calls
* this function before a GPU reset. If the value is retained after a
- * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
+ * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
*/
static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
{
@@ -2500,6 +3328,8 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* always assumed to be lost.
*/
switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_LEGACY:
+ case AMD_RESET_METHOD_LINK:
case AMD_RESET_METHOD_BACO:
case AMD_RESET_METHOD_MODE1:
return true;
@@ -2545,11 +3375,13 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
state);
if (r) {
- DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_clockgating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
@@ -2582,11 +3414,13 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
- r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
state);
if (r) {
- DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "set_powergating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
@@ -2613,7 +3447,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void)
for (i = 0; i < mgpu_info.num_dgpu; i++) {
gpu_ins = &(mgpu_info.gpu_ins[i]);
adev = gpu_ins->adev;
- if (!(adev->flags & AMD_IS_APU) &&
+ if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
!gpu_ins->mgpu_fan_enabled) {
ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
if (ret)
@@ -2650,10 +3484,12 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->funcs->late_init) {
- r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
if (r) {
- DRM_ERROR("late_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_err(adev->dev,
+ "late_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
return r;
}
}
@@ -2662,11 +3498,12 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
r = amdgpu_ras_late_init(adev);
if (r) {
- DRM_ERROR("amdgpu_ras_late_init failed %d", r);
+ dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
return r;
}
- amdgpu_ras_set_error_query_ready(adev, true);
+ if (!amdgpu_reset_in_recovery(adev))
+ amdgpu_ras_set_error_query_ready(adev, true);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
@@ -2675,7 +3512,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
- DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+ dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
if (amdgpu_passthrough(adev) &&
@@ -2708,7 +3545,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
AMDGPU_XGMI_PSTATE_MIN);
if (r) {
- DRM_ERROR("pstate setting failed (%d).\n", r);
+ dev_err(adev->dev,
+ "pstate setting failed (%d).\n",
+ r);
break;
}
}
@@ -2720,6 +3559,27 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
return 0;
}
+static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ if (!ip_block->version->funcs->hw_fini) {
+ dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
+ ip_block->version->funcs->name);
+ } else {
+ r = ip_block->version->funcs->hw_fini(ip_block);
+ /* XXX handle errors */
+ if (r) {
+ dev_dbg(adev->dev,
+ "hw_fini of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ }
+ }
+
+ ip_block->status.hw = false;
+}
+
/**
* amdgpu_device_smu_fini_early - smu hw_fini wrapper
*
@@ -2729,22 +3589,16 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
*/
static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
{
- int i, r;
+ int i;
- if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
return;
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
break;
}
}
@@ -2758,38 +3612,34 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].version->funcs->early_fini)
continue;
- r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
if (r) {
- DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ dev_dbg(adev->dev,
+ "early_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
}
}
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
- amdgpu_amdkfd_suspend(adev, false);
+ amdgpu_amdkfd_suspend(adev, true);
+ amdgpu_userq_suspend(adev);
- /* Workaroud for ASICs need to disable SMC first */
+ /* Workaround for ASICs need to disable SMC first */
amdgpu_device_smu_fini_early(adev);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.hw)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
-
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
}
if (amdgpu_sriov_vf(adev)) {
if (amdgpu_virt_release_full_gpu(adev, false))
- DRM_ERROR("failed to release exclusive mode on fini\n");
+ dev_err(adev->dev,
+ "failed to release exclusive mode on fini\n");
}
return 0;
@@ -2810,6 +3660,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
+ amdgpu_cper_fini(adev);
+
if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
amdgpu_virt_release_ras_err_handler_data(adev);
@@ -2828,13 +3680,18 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_device_wb_fini(adev);
amdgpu_device_mem_scratch_fini(adev);
amdgpu_ib_pool_fini(adev);
+ amdgpu_seq64_fini(adev);
+ amdgpu_doorbell_fini(adev);
}
-
- r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ if (adev->ip_blocks[i].version->funcs->sw_fini) {
+ r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
+ /* XXX handle errors */
+ if (r) {
+ dev_dbg(adev->dev,
+ "sw_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name,
+ r);
+ }
}
adev->ip_blocks[i].status.sw = false;
adev->ip_blocks[i].status.valid = false;
@@ -2844,11 +3701,12 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.late_initialized)
continue;
if (adev->ip_blocks[i].version->funcs->late_fini)
- adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
+ adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
adev->ip_blocks[i].status.late_initialized = false;
}
amdgpu_ras_fini(adev);
+ amdgpu_uid_fini(adev);
return 0;
}
@@ -2866,7 +3724,7 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
r = amdgpu_ib_ring_tests(adev);
if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
+ dev_err(adev->dev, "ib ring test failed (%d).\n", r);
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@@ -2877,7 +3735,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
WARN_ON_ONCE(adev->gfx.gfx_off_state);
WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
adev->gfx.gfx_off_state = true;
}
@@ -2916,15 +3774,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
continue;
/* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = false;
}
return 0;
@@ -2962,15 +3814,17 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
}
/* skip unnecessary suspend if we do not initialize them yet */
- if (adev->gmc.xgmi.pending_reset &&
- !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
- adev->ip_blocks[i].status.hw = false;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
continue;
- }
+ /* Since we skip suspend for S0i3, we need to cancel the delayed
+ * idle work here as the suspend callback never gets called.
+ */
+ if (adev->in_s0ix &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
/* skip suspend of gfx/mes and psp for S0ix
* gfx is in gfxoff state, so on resume it will exit gfxoff just
* like at runtime. PSP is also part of the always on hardware
@@ -2984,8 +3838,10 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
if (adev->in_s0ix &&
- (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
- (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+ (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0)) &&
+ (adev->ip_blocks[i].version->type ==
+ AMD_IP_BLOCK_TYPE_SDMA))
continue;
/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
@@ -3001,20 +3857,17 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
continue;
/* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
adev->ip_blocks[i].status.hw = false;
+
/* handle putting the SMC in the appropriate state */
if (!amdgpu_sriov_vf(adev)) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
if (r) {
- DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
- adev->mp1_state, r);
+ dev_err(adev->dev,
+ "SMC failed to set mp1 state %d, %d\n",
+ adev->mp1_state, r);
return r;
}
}
@@ -3044,6 +3897,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
amdgpu_virt_request_full_gpu(adev, false);
}
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
r = amdgpu_device_ip_suspend_phase1(adev);
if (r)
return r;
@@ -3079,10 +3934,12 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
!block->status.valid)
continue;
- r = block->version->funcs->hw_init(adev);
- DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
+ r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-early: %s failed\n",
+ block->version->funcs->name);
return r;
+ }
block->status.hw = true;
}
}
@@ -3092,7 +3949,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
{
- int i, r;
+ struct amdgpu_ip_block *block;
+ int i, r = 0;
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC,
@@ -3107,30 +3965,28 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
- int j;
- struct amdgpu_ip_block *block;
-
- for (j = 0; j < adev->num_ip_blocks; j++) {
- block = &adev->ip_blocks[j];
+ block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
- if (block->version->type != ip_order[i] ||
- !block->status.valid ||
- block->status.hw)
- continue;
+ if (!block)
+ continue;
- if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
- r = block->version->funcs->resume(adev);
- else
- r = block->version->funcs->hw_init(adev);
+ if (block->status.valid && !block->status.hw) {
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ r = amdgpu_ip_block_resume(block);
+ } else {
+ r = block->version->funcs->hw_init(block);
+ }
- DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
- return r;
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-late: %s failed\n",
+ block->version->funcs->name);
+ break;
+ }
block->status.hw = true;
}
}
- return 0;
+ return r;
}
/**
@@ -3157,13 +4013,9 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = true;
}
}
@@ -3175,7 +4027,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * First resume function for hardware IPs. The list of all the hardware
+ * Second resume function for hardware IPs. The list of all the hardware
* IPs that make up the asic is walked and the resume callbacks are run for
* all blocks except COMMON, GMC, and IH. resume puts the hardware into a
* functional state after a suspend and updates the software state as
@@ -3193,15 +4045,42 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Third resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all DCE. resume puts the hardware into a functional state after a suspend
+ * and updates the software state as necessary. This function is also used
+ * for restoring the GPU after a GPU reset.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
}
- adev->ip_blocks[i].status.hw = true;
}
return 0;
@@ -3233,6 +4112,16 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
r = amdgpu_device_ip_resume_phase2(adev);
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
+ if (r)
+ return r;
+
+ amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_device_ip_resume_phase3(adev);
+
return r;
}
@@ -3262,12 +4151,14 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
/**
* amdgpu_device_asic_has_dc_support - determine if DC supports the asic
*
+ * @pdev : pci device context
* @asic_type: AMD asic type
*
* Check if there is DC (new modesetting infrastructre) support for an asic.
* returns true if DC has support, false if not.
*/
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+ enum amd_asic_type asic_type)
{
switch (asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -3310,7 +4201,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
#else
default:
if (amdgpu_dc > 0)
- DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
+ dev_info_once(
+ &pdev->dev,
+ "Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
return false;
#endif
}
@@ -3329,7 +4222,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
return false;
- return amdgpu_device_asic_has_dc_support(adev->asic_type);
+ return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
}
static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
@@ -3351,20 +4244,18 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
task_barrier_enter(&hive->tb);
- adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
+ adev->asic_reset_res = amdgpu_device_baco_enter(adev);
if (adev->asic_reset_res)
goto fail;
task_barrier_exit(&hive->tb);
- adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
+ adev->asic_reset_res = amdgpu_device_baco_exit(adev);
if (adev->asic_reset_res)
goto fail;
- if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
- adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
- adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
} else {
task_barrier_full(&hive->tb);
@@ -3373,7 +4264,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
fail:
if (adev->asic_reset_res)
- DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
+ dev_warn(adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev_to_drm(adev)->unique);
amdgpu_put_xgmi_hive(hive);
}
@@ -3387,18 +4279,10 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
int ret = 0;
/*
- * By default timeout for non compute jobs is 10000
- * and 60000 for compute jobs.
- * In SR-IOV or passthrough mode, timeout for compute
- * jobs are 60000 by default.
+ * By default timeout for jobs is 10 sec
*/
- adev->gfx_timeout = msecs_to_jiffies(10000);
+ adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- if (amdgpu_sriov_vf(adev))
- adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
- msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
- else
- adev->compute_timeout = msecs_to_jiffies(60000);
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
while ((timeout_setting = strsep(&input, ",")) &&
@@ -3465,10 +4349,26 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
adev->ram_is_direct_mapped = true;
}
-static const struct attribute *amdgpu_dev_attributes[] = {
- &dev_attr_pcie_replay_count.attr,
- NULL
-};
+#if defined(CONFIG_HSA_AMD_P2P)
+/**
+ * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * return if IOMMU remapping bar address
+ */
+static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
+{
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev(adev->dev);
+ if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
+ domain->type == IOMMU_DOMAIN_DMA_FQ))
+ return true;
+
+ return false;
+}
+#endif
static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
{
@@ -3476,16 +4376,12 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
adev->gfx.mcbp = true;
else if (amdgpu_mcbp == 0)
adev->gfx.mcbp = false;
- else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
- adev->gfx.num_gfx_rings)
- adev->gfx.mcbp = true;
if (amdgpu_sriov_vf(adev))
adev->gfx.mcbp = true;
if (adev->gfx.mcbp)
- DRM_INFO("MCBP is enabled\n");
+ dev_info(adev->dev, "MCBP is enabled\n");
}
/**
@@ -3501,7 +4397,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags)
{
- struct drm_device *ddev = adev_to_drm(adev);
struct pci_dev *pdev = adev->pdev;
int r, i;
bool px = false;
@@ -3542,6 +4437,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->pciep_wreg = &amdgpu_invalid_wreg;
adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
+ adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
adev->didt_rreg = &amdgpu_invalid_rreg;
@@ -3551,9 +4448,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
- DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
- amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
- pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
+ dev_info(
+ adev->dev,
+ "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
+ amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
/* mutex initialization are all done here so we
* can recall function without having locking issues
@@ -3573,6 +4472,18 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->notifier_lock);
mutex_init(&adev->pm.stable_pstate_ctx_lock);
mutex_init(&adev->benchmark_mutex);
+ mutex_init(&adev->gfx.reset_sem_mutex);
+ /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
+ mutex_init(&adev->enforce_isolation_mutex);
+ for (i = 0; i < MAX_XCP; ++i) {
+ adev->isolation[i].spearhead = dma_fence_get_stub();
+ amdgpu_sync_create(&adev->isolation[i].active);
+ amdgpu_sync_create(&adev->isolation[i].prev);
+ }
+ mutex_init(&adev->gfx.userq_sch_mutex);
+ mutex_init(&adev->gfx.workload_profile_mutex);
+ mutex_init(&adev->vcn.workload_profile_mutex);
+ mutex_init(&adev->userq_mutex);
amdgpu_device_init_apu_flags(adev);
@@ -3589,18 +4500,38 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->se_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->virt.rlcg_reg_lock);
+ spin_lock_init(&adev->wb.lock);
- INIT_LIST_HEAD(&adev->shadow_list);
- mutex_init(&adev->shadow_list_lock);
+ xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
INIT_LIST_HEAD(&adev->reset_list);
INIT_LIST_HEAD(&adev->ras_list);
+ INIT_LIST_HEAD(&adev->pm.od_kobj_list);
+
+ INIT_LIST_HEAD(&adev->userq_mgr_list);
+
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
+ /*
+ * Initialize the enforce_isolation work structures for each XCP
+ * partition. This work handler is responsible for enforcing shader
+ * isolation on AMD GPUs. It counts the number of emitted fences for
+ * each GFX and compute ring. If there are any fences, it schedules
+ * the `enforce_isolation_work` to be run after a delay. If there are
+ * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
+ * runqueue.
+ */
+ for (i = 0; i < MAX_XCP; i++) {
+ INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
+ amdgpu_gfx_enforce_isolation_handler);
+ adev->gfx.enforce_isolation[i].adev = adev;
+ adev->gfx.enforce_isolation[i].xcp_id = i;
+ }
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
@@ -3618,6 +4549,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* for throttling interrupt) = 60 seconds.
*/
ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+
ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
/* Registers mapping */
@@ -3637,12 +4569,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (!adev->rmmio)
return -ENOMEM;
- DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
- DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
+ dev_info(adev->dev, "register mmio base: 0x%08X\n",
+ (uint32_t)adev->rmmio_base);
+ dev_info(adev->dev, "register mmio size: %u\n",
+ (unsigned int)adev->rmmio_size);
/*
* Reset domain needs to be present early, before XGMI hive discovered
- * (if any) and intitialized to use reset sem and in_gpu reset flag
+ * (if any) and initialized to use reset sem and in_gpu reset flag
* early on during init and before calling to RREG32.
*/
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
@@ -3650,7 +4584,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return -ENOMEM;
/* detect hw virtualization here */
- amdgpu_detect_virtualization(adev);
+ amdgpu_virt_init(adev);
amdgpu_device_get_pcie_info(adev);
@@ -3660,21 +4594,41 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
+ amdgpu_device_set_mcbp(adev);
+
+ /*
+ * By default, use default mode where all blocks are expected to be
+ * initialized. At present a 'swinit' of blocks is required to be
+ * completed before the need for a different level is detected.
+ */
+ amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
return r;
- amdgpu_device_set_mcbp(adev);
-
- /* Get rid of things like offb */
- r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
- if (r)
- return r;
+ /*
+ * No need to remove conflicting FBs for non-display class devices.
+ * This prevents the sysfb from being freed accidently.
+ */
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ /* Get rid of things like offb */
+ r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
+ if (r)
+ return r;
+ }
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+ /* VF MMIO access (except mailbox range) from CPU
+ * will be blocked during sriov runtime
+ */
+ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
+
amdgpu_gmc_noretry_set(adev);
/* Need to get xgmi info early to decide the reset behavior*/
if (adev->gmc.xgmi.supported) {
@@ -3693,7 +4647,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* internal path natively support atomics, set have_atomics_support to true.
*/
} else if ((adev->flags & AMD_IS_APU) &&
- (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >
+ IP_VERSION(9, 0, 0))) {
adev->have_atomics_support = true;
} else {
adev->have_atomics_support =
@@ -3726,32 +4681,24 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
if (adev->gmc.xgmi.num_physical_nodes) {
dev_info(adev->dev, "Pending hive reset.\n");
- adev->gmc.xgmi.pending_reset = true;
- /* Only need to init necessary block for SMU to handle the reset */
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
- DRM_DEBUG("IP %s disabled for hw_init.\n",
- adev->ip_blocks[i].version->funcs->name);
- adev->ip_blocks[i].status.hw = true;
- }
- }
+ amdgpu_set_init_level(adev,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+ } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev)) {
+ r = psp_gpu_reset(adev);
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
- if (r) {
- dev_err(adev->dev, "asic reset on init failed\n");
- goto failed;
- }
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ }
+
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
}
}
@@ -3762,7 +4709,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = -EINVAL;
goto failed;
}
- DRM_INFO("GPU posting now...\n");
+ dev_info(adev->dev, "GPU posting now...\n");
r = amdgpu_device_asic_init(adev);
if (r) {
dev_err(adev->dev, "gpu post error!\n");
@@ -3788,8 +4735,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
/* init i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_atombios_i2c_init(adev);
+ amdgpu_i2c_init(adev);
}
}
@@ -3833,22 +4779,6 @@ fence_driver_init:
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
- r = amdgpu_atombios_sysfs_init(adev);
- if (r)
- drm_err(&adev->ddev,
- "registering atombios sysfs failed (%d).\n", r);
-
- r = amdgpu_pm_sysfs_init(adev);
- if (r)
- DRM_ERROR("registering pm sysfs failed (%d).\n", r);
-
- r = amdgpu_ucode_sysfs_init(adev);
- if (r) {
- adev->ucode_sysfs_en = false;
- DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
- } else
- adev->ucode_sysfs_en = true;
-
/*
* Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
* Otherwise the mgpu fan boost feature will be skipped due to the
@@ -3859,7 +4789,7 @@ fence_driver_init:
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
- if (!adev->gmc.xgmi.pending_reset) {
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
r = amdgpu_device_ip_late_init(adev);
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
@@ -3877,11 +4807,39 @@ fence_driver_init:
flush_delayed_work(&adev->delayed_init_work);
}
- r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ /*
+ * Place those sysfs registering after `late_init`. As some of those
+ * operations performed in `late_init` might affect the sysfs
+ * interfaces creating.
+ */
+ r = amdgpu_atombios_sysfs_init(adev);
+ if (r)
+ drm_err(&adev->ddev,
+ "registering atombios sysfs failed (%d).\n", r);
+
+ r = amdgpu_pm_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
+
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r) {
+ adev->ucode_sysfs_en = false;
+ dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
+
+ r = amdgpu_device_attr_sysfs_init(adev);
if (r)
dev_err(adev->dev, "Could not create amdgpu device attr\n");
+ r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
+ if (r)
+ dev_err(adev->dev,
+ "Could not create amdgpu board attributes\n");
+
amdgpu_fru_sysfs_init(adev);
+ amdgpu_reg_state_sysfs_init(adev);
+ amdgpu_xcp_sysfs_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
r = amdgpu_pmu_init(adev);
@@ -3899,9 +4857,9 @@ fence_driver_init:
if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
- px = amdgpu_device_supports_px(ddev);
+ px = amdgpu_device_supports_px(adev);
- if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_register_client(adev->pdev,
&amdgpu_switcheroo_ops, px);
@@ -3909,12 +4867,16 @@ fence_driver_init:
if (px)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
- if (adev->gmc.xgmi.pending_reset)
- queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+ amdgpu_xgmi_reset_on_init(adev);
amdgpu_device_check_iommu_direct_map(adev);
+ adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
+ r = register_pm_notifier(&adev->pm_nb);
+ if (r)
+ goto failed;
+
return 0;
release_ras_con:
@@ -3974,8 +4936,13 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
{
dev_info(adev->dev, "amdgpu: finishing device.\n");
flush_delayed_work(&adev->delayed_init_work);
+
+ if (adev->mman.initialized)
+ drain_workqueue(adev->mman.bdev.wq);
adev->shutdown = true;
+ unregister_pm_notifier(&adev->pm_nb);
+
/* make sure IB test finished before entering exclusive mode
* to avoid preemption on IB test
*/
@@ -3994,19 +4961,21 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
}
amdgpu_fence_driver_hw_fini(adev);
- if (adev->mman.initialized)
- drain_workqueue(adev->mman.bdev.wq);
-
if (adev->pm.sysfs_initialized)
amdgpu_pm_sysfs_fini(adev);
if (adev->ucode_sysfs_en)
amdgpu_ucode_sysfs_fini(adev);
- sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ amdgpu_device_attr_sysfs_fini(adev);
amdgpu_fru_sysfs_fini(adev);
+ amdgpu_reg_state_sysfs_fini(adev);
+ amdgpu_xcp_sysfs_fini(adev);
+
/* disable ras feature must before hw fini */
amdgpu_ras_pre_fini(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
amdgpu_device_ip_fini_early(adev);
amdgpu_irq_fini_hw(adev);
@@ -4023,30 +4992,40 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{
- int idx;
+ int i, idx;
bool px;
- amdgpu_fence_driver_sw_fini(adev);
amdgpu_device_ip_fini(adev);
+ amdgpu_fence_driver_sw_fini(adev);
amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false;
dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
+ for (i = 0; i < MAX_XCP; ++i) {
+ dma_fence_put(adev->isolation[i].spearhead);
+ amdgpu_sync_free(&adev->isolation[i].active);
+ amdgpu_sync_free(&adev->isolation[i].prev);
+ }
amdgpu_reset_fini(adev);
/* free i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_i2c_fini(adev);
+ amdgpu_i2c_fini(adev);
+
+ if (adev->bios) {
+ if (amdgpu_emu_mode != 1)
+ amdgpu_atombios_fini(adev);
+ amdgpu_bios_release(adev);
+ }
- if (amdgpu_emu_mode != 1)
- amdgpu_atombios_fini(adev);
+ kfree(adev->fru_info);
+ adev->fru_info = NULL;
- kfree(adev->bios);
- adev->bios = NULL;
+ kfree(adev->xcp_mgr);
+ adev->xcp_mgr = NULL;
- px = amdgpu_device_supports_px(adev_to_drm(adev));
+ px = amdgpu_device_supports_px(adev);
- if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
+ if (px || (!dev_is_removable(&adev->pdev->dev) &&
apple_gmux_detect(NULL, NULL)))
vga_switcheroo_unregister_client(adev->pdev);
@@ -4060,7 +5039,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
- amdgpu_doorbell_fini(adev);
drm_dev_exit(idx);
}
@@ -4073,7 +5051,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
adev->reset_domain = NULL;
kfree(adev->pci_state);
-
+ kfree(adev->pcie_reset_ctx.swds_pcistate);
+ kfree(adev->pcie_reset_ctx.swus_pcistate);
}
/**
@@ -4089,13 +5068,25 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
{
int ret;
- /* No need to evict vram on APUs for suspend to ram or s2idle */
- if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
+ /* No need to evict vram on APUs unless going to S4 */
+ if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
+ return 0;
+
+ /* No need to evict when going to S5 through S4 callbacks */
+ if (system_state == SYSTEM_POWER_OFF)
return 0;
ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
- if (ret)
- DRM_WARN("evicting device resources failed\n");
+ if (ret) {
+ dev_warn(adev->dev, "evicting device resources failed\n");
+ return ret;
+ }
+
+ if (adev->in_s4) {
+ ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
+ if (ret)
+ dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
+ }
return ret;
}
@@ -4103,16 +5094,102 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
* Suspend & resume.
*/
/**
+ * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
+ * @nb: notifier block
+ * @mode: suspend mode
+ * @data: data
+ *
+ * This function is called when the system is about to suspend or hibernate.
+ * It is used to set the appropriate flags so that eviction can be optimized
+ * in the pm prepare callback.
+ */
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data)
+{
+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ adev->in_s4 = true;
+ break;
+ case PM_POST_HIBERNATION:
+ adev->in_s4 = false;
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i, r;
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ /* Evict the majority of BOs before starting suspend sequence */
+ r = amdgpu_device_evict_resources(adev);
+ if (r)
+ return r;
+
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_complete - complete power state transition
+ *
+ * @dev: drm dev pointer
+ *
+ * Undo the changes from amdgpu_device_prepare. This will be
+ * called on all resume transitions, including those that failed.
+ */
+void amdgpu_device_complete(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].version->funcs->complete)
+ continue;
+ adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
+ }
+}
+
+/**
* amdgpu_device_suspend - initiate device suspend
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of suspend
+ * @notify_clients: notify in-kernel DRM clients
*
* Puts the hw in the suspend state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
-int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
@@ -4122,38 +5199,36 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
adev->in_suspend = true;
- /* Evict the majority of BOs before grabbing the full access */
- r = amdgpu_device_evict_resources(adev);
- if (r)
- return r;
-
if (amdgpu_sriov_vf(adev)) {
+ if (!adev->in_runpm)
+ amdgpu_amdkfd_suspend_process(adev);
amdgpu_virt_fini_data_exchange(adev);
r = amdgpu_virt_request_full_gpu(adev, false);
if (r)
return r;
}
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
- DRM_WARN("smart shift update failed\n");
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3))
+ dev_warn(adev->dev, "smart shift update failed\n");
- if (fbcon)
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
+ if (notify_clients)
+ drm_client_dev_suspend(adev_to_drm(adev), false);
cancel_delayed_work_sync(&adev->delayed_init_work);
- flush_delayed_work(&adev->gfx.gfx_off_delay_work);
amdgpu_ras_suspend(adev);
amdgpu_device_ip_suspend_phase1(adev);
- if (!adev->in_s0ix)
- amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+ amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ amdgpu_userq_suspend(adev);
r = amdgpu_device_evict_resources(adev);
if (r)
return r;
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
amdgpu_fence_driver_hw_fini(adev);
amdgpu_device_ip_suspend_phase2(adev);
@@ -4161,6 +5236,36 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
+ r = amdgpu_dpm_notify_rlc_state(adev, false);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
+{
+ int r;
+ unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
+
+ /* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
+ * may not work. The access could be blocked by nBIF protection as VF isn't in
+ * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
+ * so that QEMU reprograms MSIX table.
+ */
+ amdgpu_restore_msix(adev);
+
+ r = adev->gfxhub.funcs->get_xgmi_info(adev);
+ if (r)
+ return r;
+
+ dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
+ prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
+
+ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+
return 0;
}
@@ -4168,13 +5273,13 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
* amdgpu_device_resume - initiate device resume
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of resume
+ * @notify_clients: notify in-kernel DRM clients
*
* Bring the hw back to operating state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
-int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
@@ -4185,6 +5290,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
return r;
}
+ if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+ r = amdgpu_virt_resume(adev);
+ if (r)
+ goto exit;
+ }
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@@ -4204,7 +5315,14 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
goto exit;
}
- amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+ if (r)
+ goto exit;
+
+ r = amdgpu_userq_resume(adev);
+ if (r)
+ goto exit;
r = amdgpu_device_ip_late_init(adev);
if (r)
@@ -4212,17 +5330,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
-
- if (!adev->in_s0ix) {
- r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
- if (r)
- goto exit;
- }
-
exit:
if (amdgpu_sriov_vf(adev)) {
amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_release_full_gpu(adev, true);
+
+ if (!r && !adev->in_runpm)
+ r = amdgpu_amdkfd_resume_process(adev);
}
if (r)
@@ -4231,8 +5345,8 @@ exit:
/* Make sure IB tests flushed */
flush_delayed_work(&adev->delayed_init_work);
- if (fbcon)
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
+ if (notify_clients)
+ drm_client_dev_resume(adev_to_drm(adev), false);
amdgpu_ras_resume(adev);
@@ -4257,13 +5371,12 @@ exit:
dev->dev->power.disable_depth--;
#endif
}
- adev->in_suspend = false;
- if (adev->enable_mes)
- amdgpu_mes_self_test(adev);
+ amdgpu_vram_mgr_clear_reset_blocks(adev);
+ adev->in_suspend = false;
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
- DRM_WARN("smart shift update failed\n");
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
+ dev_warn(adev->dev, "smart shift update failed\n");
return 0;
}
@@ -4294,7 +5407,8 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->funcs->check_soft_reset)
adev->ip_blocks[i].status.hang =
- adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
+ adev->ip_blocks[i].version->funcs->check_soft_reset(
+ &adev->ip_blocks[i]);
if (adev->ip_blocks[i].status.hang) {
dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
asic_hang = true;
@@ -4323,7 +5437,7 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->pre_soft_reset) {
- r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4385,7 +5499,7 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->soft_reset) {
- r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4414,7 +5528,7 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->post_soft_reset)
- r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4423,103 +5537,33 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_recover_vram - Recover some VRAM contents
- *
- * @adev: amdgpu_device pointer
- *
- * Restores the contents of VRAM buffers from the shadows in GTT. Used to
- * restore things like GPUVM page tables after a GPU reset where
- * the contents of VRAM might be lost.
- *
- * Returns:
- * 0 on success, negative error code on failure.
- */
-static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
-{
- struct dma_fence *fence = NULL, *next = NULL;
- struct amdgpu_bo *shadow;
- struct amdgpu_bo_vm *vmbo;
- long r = 1, tmo;
-
- if (amdgpu_sriov_runtime(adev))
- tmo = msecs_to_jiffies(8000);
- else
- tmo = msecs_to_jiffies(100);
-
- dev_info(adev->dev, "recover vram bo from shadow start\n");
- mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
- /* If vm is compute context or adev is APU, shadow will be NULL */
- if (!vmbo->shadow)
- continue;
- shadow = vmbo->shadow;
-
- /* No need to recover an evicted BO */
- if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
- shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
- shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
- continue;
-
- r = amdgpu_bo_restore_shadow(shadow, &next);
- if (r)
- break;
-
- if (fence) {
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
- fence = next;
- if (tmo == 0) {
- r = -ETIMEDOUT;
- break;
- } else if (tmo < 0) {
- r = tmo;
- break;
- }
- } else {
- fence = next;
- }
- }
- mutex_unlock(&adev->shadow_list_lock);
-
- if (fence)
- tmo = dma_fence_wait_timeout(fence, false, tmo);
- dma_fence_put(fence);
-
- if (r < 0 || tmo <= 0) {
- dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
- return -EIO;
- }
-
- dev_info(adev->dev, "recover vram bo from shadow done\n");
- return 0;
-}
-
-
-/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu_device pointer
- * @from_hypervisor: request from hypervisor
+ * @reset_context: amdgpu reset context pointer
*
* do VF FLR and reinitialize Asic
* return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
- bool from_hypervisor)
+ struct amdgpu_reset_context *reset_context)
{
int r;
struct amdgpu_hive_info *hive = NULL;
- int retry_limit = 0;
-
-retry:
- amdgpu_amdkfd_pre_reset(adev);
- if (from_hypervisor)
+ if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
+ if (!amdgpu_ras_get_fed_status(adev))
+ amdgpu_virt_ready_to_reset(adev);
+ amdgpu_virt_wait_reset(adev);
+ clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
r = amdgpu_virt_request_full_gpu(adev, true);
- else
+ } else {
r = amdgpu_virt_reset_gpu(adev);
+ }
if (r)
return r;
+
+ amdgpu_ras_clear_err_state(adev);
amdgpu_irq_gpu_reset_resume_helper(adev);
/* some sw clean up VF needs to do before recover */
@@ -4528,7 +5572,7 @@ retry:
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
- goto error;
+ return r;
amdgpu_virt_init_data_exchange(adev);
@@ -4539,63 +5583,64 @@ retry:
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
if (r)
- goto error;
+ return r;
hive = amdgpu_get_xgmi_hive(adev);
/* Update PSP FW topology after reset */
if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
r = amdgpu_xgmi_update_topology(hive, adev);
-
if (hive)
amdgpu_put_xgmi_hive(hive);
+ if (r)
+ return r;
- if (!r) {
- r = amdgpu_ib_ring_tests(adev);
-
- amdgpu_amdkfd_post_reset(adev);
- }
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ return r;
-error:
- if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+ if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
amdgpu_inc_vram_lost(adev);
- r = amdgpu_device_recover_vram(adev);
- }
+
+ /* need to be called during full access so we can't do it later like
+ * bare-metal does.
+ */
+ amdgpu_amdkfd_post_reset(adev);
amdgpu_virt_release_full_gpu(adev, true);
- if (AMDGPU_RETRY_SRIOV_RESET(r)) {
- if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
- retry_limit++;
- goto retry;
- } else
- DRM_ERROR("GPU reset retry is beyond the retry limit\n");
- }
+ /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
+ amdgpu_ras_resume(adev);
- return r;
+ amdgpu_virt_ras_telemetry_post_reset(adev);
+
+ return 0;
}
/**
- * amdgpu_device_has_job_running - check if there is any job in mirror list
+ * amdgpu_device_has_job_running - check if there is any unfinished job
*
* @adev: amdgpu_device pointer
*
- * check if there is any job in mirror list
+ * check if there is any job running on the device when guest driver receives
+ * FLR notification from host driver. If there are still jobs running, then
+ * the guest driver will not respond the FLR reset. Instead, let the job hit
+ * the timeout and guest driver then issue the reset request.
*/
bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
{
int i;
- struct drm_sched_job *job;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- spin_lock(&ring->sched.job_list_lock);
- job = list_first_entry_or_null(&ring->sched.pending_list,
- struct drm_sched_job, list);
- spin_unlock(&ring->sched.job_list_lock);
- if (job)
+ if (amdgpu_fence_count_emitted(ring))
return true;
}
return false;
@@ -4657,15 +5702,19 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
u32 i;
int ret = 0;
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
dev_info(adev->dev, "GPU mode1 reset\n");
+ /* Cache the state before bus master disable. The saved config space
+ * values are used in other cases like restore after mode-2 reset.
+ */
+ amdgpu_device_cache_pci_state(adev->pdev);
+
/* disable BM */
pci_clear_master(adev->pdev);
- amdgpu_device_cache_pci_state(adev->pdev);
-
if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
dev_info(adev->dev, "GPU smu mode1 reset\n");
ret = amdgpu_dpm_mode1_reset(adev);
@@ -4696,7 +5745,8 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
goto mode1_reset_failed;
}
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
return 0;
@@ -4705,21 +5755,43 @@ mode1_reset_failed:
return ret;
}
+int amdgpu_device_link_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ dev_info(adev->dev, "GPU link reset\n");
+
+ if (!amdgpu_reset_in_dpc(adev))
+ ret = amdgpu_dpm_link_reset(adev);
+
+ if (ret)
+ goto link_reset_failed;
+
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto link_reset_failed;
+
+ return 0;
+
+link_reset_failed:
+ dev_err(adev->dev, "GPU link reset failed\n");
+ return ret;
+}
+
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context)
{
int i, r = 0;
struct amdgpu_job *job = NULL;
+ struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
if (reset_context->reset_req_dev == adev)
job = reset_context->job;
- if (amdgpu_sriov_vf(adev)) {
- /* stop the data exchange thread */
- amdgpu_virt_fini_data_exchange(adev);
- }
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_pre_reset(adev);
amdgpu_fence_driver_isr_toggle(adev, true);
@@ -4727,7 +5799,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
/* Clear job fence from fence drv to avoid force_completion
@@ -4768,6 +5840,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
}
}
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
+ dev_info(tmp_adev->dev, "Dumping IP State\n");
+ /* Trigger ip dump before we reset the asic */
+ for (i = 0; i < tmp_adev->num_ip_blocks; i++)
+ if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
+ tmp_adev->ip_blocks[i].version->funcs
+ ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
+ dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
+ }
+
if (need_full_reset)
r = amdgpu_device_ip_suspend(adev);
if (need_full_reset)
@@ -4780,163 +5862,36 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
return r;
}
-static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
-{
- int i;
-
- lockdep_assert_held(&adev->reset_domain->sem);
-
- for (i = 0; i < adev->num_regs; i++) {
- adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
- trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
- adev->reset_dump_reg_value[i]);
- }
-
- return 0;
-}
-
-#ifdef CONFIG_DEV_COREDUMP
-static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
- size_t count, void *data, size_t datalen)
-{
- struct drm_printer p;
- struct amdgpu_device *adev = data;
- struct drm_print_iterator iter;
- int i;
-
- iter.data = buffer;
- iter.offset = 0;
- iter.start = offset;
- iter.remain = count;
-
- p = drm_coredump_printer(&iter);
-
- drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
- drm_printf(&p, "kernel: " UTS_RELEASE "\n");
- drm_printf(&p, "module: " KBUILD_MODNAME "\n");
- drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
- if (adev->reset_task_info.pid)
- drm_printf(&p, "process_name: %s PID: %d\n",
- adev->reset_task_info.process_name,
- adev->reset_task_info.pid);
-
- if (adev->reset_vram_lost)
- drm_printf(&p, "VRAM is lost due to GPU reset!\n");
- if (adev->num_regs) {
- drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
-
- for (i = 0; i < adev->num_regs; i++)
- drm_printf(&p, "0x%08x: 0x%08x\n",
- adev->reset_dump_reg_list[i],
- adev->reset_dump_reg_value[i]);
- }
-
- return count - iter.remain;
-}
-
-static void amdgpu_devcoredump_free(void *data)
-{
-}
-
-static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
-
- ktime_get_ts64(&adev->reset_time);
- dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
- amdgpu_devcoredump_read, amdgpu_devcoredump_free);
-}
-#endif
-
-int amdgpu_do_asic_reset(struct list_head *device_list_handle,
- struct amdgpu_reset_context *reset_context)
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_device *tmp_adev = NULL;
- bool need_full_reset, skip_hw_reset, vram_lost = false;
- int r = 0;
- bool gpu_reset_for_dev_remove = 0;
+ struct list_head *device_list_handle;
+ bool full_reset, vram_lost = false;
+ struct amdgpu_device *tmp_adev;
+ int r, init_level;
- /* Try reset handler method first */
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
- amdgpu_reset_reg_dumps(tmp_adev);
-
- reset_context->reset_device_list = device_list_handle;
- r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
- /* If reset handler not implemented, continue; otherwise return */
- if (r == -EOPNOTSUPP)
- r = 0;
- else
- return r;
-
- /* Reset handler not implemented, use the default method */
- need_full_reset =
- test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
-
- gpu_reset_for_dev_remove =
- test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
- test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
-
- /*
- * ASIC reset has to be done on all XGMI hive nodes ASAP
- * to allow proper links negotiation in FW (within 1 sec)
- */
- if (!skip_hw_reset && need_full_reset) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- /* For XGMI run all resets in parallel to speed up the process */
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- tmp_adev->gmc.xgmi.pending_reset = false;
- if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
- r = -EALREADY;
- } else
- r = amdgpu_asic_reset(tmp_adev);
+ device_list_handle = reset_context->reset_device_list;
- if (r) {
- dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
- r, adev_to_drm(tmp_adev)->unique);
- break;
- }
- }
-
- /* For XGMI wait for all resets to complete before proceed */
- if (!r) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- flush_work(&tmp_adev->xgmi_reset_work);
- r = tmp_adev->asic_reset_res;
- if (r)
- break;
- }
- }
- }
- }
-
- if (!r && amdgpu_ras_intr_triggered()) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
- tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
- tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
- }
+ if (!device_list_handle)
+ return -EINVAL;
- amdgpu_ras_intr_cleared();
- }
+ full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- /* Since the mode1 reset affects base ip blocks, the
- * phase1 ip blocks need to be resumed. Otherwise there
- * will be a BIOS signature error and the psp bootloader
- * can't load kdb on the next amdgpu install.
+ /**
+ * If it's reset on init, it's default init level, otherwise keep level
+ * as recovery level.
*/
- if (gpu_reset_for_dev_remove) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list)
- amdgpu_device_ip_resume_phase1(tmp_adev);
-
- goto end;
- }
+ if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
+ init_level = AMDGPU_INIT_LEVEL_DEFAULT;
+ else
+ init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
+ r = 0;
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (need_full_reset) {
+ amdgpu_set_init_level(tmp_adev, init_level);
+ if (full_reset) {
/* post card */
+ amdgpu_reset_set_dpc_status(tmp_adev, false);
+ amdgpu_ras_clear_err_state(tmp_adev);
r = amdgpu_device_asic_init(tmp_adev);
if (r) {
dev_warn(tmp_adev->dev, "asic atom init failed!");
@@ -4948,17 +5903,14 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
-#ifdef CONFIG_DEV_COREDUMP
- tmp_adev->reset_vram_lost = vram_lost;
- memset(&tmp_adev->reset_task_info, 0,
- sizeof(tmp_adev->reset_task_info));
- if (reset_context->job && reset_context->job->vm)
- tmp_adev->reset_task_info =
- reset_context->job->vm->task_info;
- amdgpu_reset_capture_coredumpm(tmp_adev);
-#endif
+
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
+ amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
+
if (vram_lost) {
- DRM_INFO("VRAM is lost due to GPU reset!\n");
+ dev_info(
+ tmp_adev->dev,
+ "VRAM is lost due to GPU reset!\n");
amdgpu_inc_vram_lost(tmp_adev);
}
@@ -4966,10 +5918,22 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r)
return r;
+ r = amdgpu_xcp_restore_partition_mode(
+ tmp_adev->xcp_mgr);
+ if (r)
+ goto out;
+
r = amdgpu_device_ip_resume_phase2(tmp_adev);
if (r)
goto out;
+ if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+
+ r = amdgpu_device_ip_resume_phase3(tmp_adev);
+ if (r)
+ goto out;
+
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
@@ -4987,7 +5951,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r)
goto out;
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
+ drm_client_dev_resume(adev_to_drm(tmp_adev), false);
/*
* The GPU enters bad state once faulty pages
@@ -4999,7 +5963,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
* bad_page_threshold value to fix this once
* probing driver again.
*/
- if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
+ if (!amdgpu_ras_is_rma(tmp_adev)) {
/* must succeed. */
amdgpu_ras_resume(tmp_adev);
} else {
@@ -5017,27 +5981,102 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
out:
if (!r) {
+ /* IP init is complete now, set level as default */
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
- need_full_reset = true;
r = -EAGAIN;
goto end;
}
}
- if (!r)
- r = amdgpu_device_recover_vram(tmp_adev);
- else
+ if (r)
tmp_adev->asic_reset_res = r;
}
end:
- if (need_full_reset)
+ return r;
+}
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ bool need_full_reset, skip_hw_reset;
+ int r = 0;
+
+ /* Try reset handler method first */
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+
+ reset_context->reset_device_list = device_list_handle;
+ r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
+ /* If reset handler not implemented, continue; otherwise return */
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ else
+ return r;
+
+ /* Reset handler not implemented, use the default method */
+ need_full_reset =
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+
+ /*
+ * ASIC reset has to be done on all XGMI hive nodes ASAP
+ * to allow proper links negotiation in FW (within 1 sec)
+ */
+ if (!skip_hw_reset && need_full_reset) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (!queue_work(system_unbound_wq,
+ &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ } else
+ r = amdgpu_asic_reset(tmp_adev);
+
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
+ r, adev_to_drm(tmp_adev)->unique);
+ goto out;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, device_list_handle,
+ reset_list) {
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+ }
+ }
+
+ if (!r && amdgpu_ras_intr_triggered()) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ amdgpu_ras_reset_error_count(tmp_adev,
+ AMDGPU_RAS_BLOCK__MMHUB);
+ }
+
+ amdgpu_ras_intr_cleared();
+ }
+
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r == -EAGAIN)
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
else
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+out:
return r;
}
@@ -5046,6 +6085,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
+ case AMD_RESET_METHOD_LINK:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
break;
case AMD_RESET_METHOD_MODE2:
@@ -5145,89 +6185,80 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
}
-/**
- * amdgpu_device_gpu_recover - reset the asic and recover scheduler
- *
- * @adev: amdgpu_device pointer
- * @job: which job trigger hang
- * @reset_context: amdgpu reset context pointer
- *
- * Attempt to reset the GPU if it has hung (all asics).
- * Attempt to do soft-reset or full-reset and reinitialize Asic
- * Returns 0 for success or an error on failure.
- */
-
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job *job,
- struct amdgpu_reset_context *reset_context)
+static int amdgpu_device_health_check(struct list_head *device_list_handle)
{
- struct list_head device_list, *device_list_handle = NULL;
- bool job_signaled = false;
- struct amdgpu_hive_info *hive = NULL;
- struct amdgpu_device *tmp_adev = NULL;
- int i, r = 0;
- bool need_emergency_restart = false;
- bool audio_suspended = false;
- bool gpu_reset_for_dev_remove = false;
-
- gpu_reset_for_dev_remove =
- test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
- test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
-
- /*
- * Special case: RAS triggered and full reset isn't supported
- */
- need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
-
- /*
- * Flush RAM to disk so that after reboot
- * the user can read log and see why the system rebooted.
- */
- if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
- DRM_WARN("Emergency reboot.");
+ struct amdgpu_device *tmp_adev;
+ int ret = 0;
- ksys_sync_helper();
- emergency_restart();
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ ret |= amdgpu_device_bus_status_check(tmp_adev);
}
- dev_info(adev->dev, "GPU %s begin!\n",
- need_emergency_restart ? "jobs stop":"reset");
+ return ret;
+}
- if (!amdgpu_sriov_vf(adev))
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive)
- mutex_lock(&hive->hive_lock);
+static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive)
+{
+ struct amdgpu_device *tmp_adev = NULL;
- reset_context->job = job;
- reset_context->hive = hive;
/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
* to put adev in the 1st position.
*/
- INIT_LIST_HEAD(&device_list);
- if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
+ if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- list_add_tail(&tmp_adev->reset_list, &device_list);
- if (gpu_reset_for_dev_remove && adev->shutdown)
+ list_add_tail(&tmp_adev->reset_list, device_list);
+ if (adev->shutdown)
tmp_adev->shutdown = true;
+ if (amdgpu_reset_in_dpc(adev))
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
}
- if (!list_is_first(&adev->reset_list, &device_list))
- list_rotate_to_front(&adev->reset_list, &device_list);
- device_list_handle = &device_list;
+ if (!list_is_first(&adev->reset_list, device_list))
+ list_rotate_to_front(&adev->reset_list, device_list);
} else {
- list_add_tail(&adev->reset_list, &device_list);
- device_list_handle = &device_list;
+ list_add_tail(&adev->reset_list, device_list);
}
+}
- /* We need to lock reset domain only once both for XGMI and single device */
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
+static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+}
- /* block all schedulers and reset given job's ring */
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+}
+
+static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i;
+
+ /* block all schedulers and reset given job's ring */
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
amdgpu_device_set_mp1_state(tmp_adev);
/*
@@ -5241,32 +6272,31 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* some audio codec errors.
*/
if (!amdgpu_device_suspend_display_audio(tmp_adev))
- audio_suspended = true;
+ tmp_adev->pcie_reset_ctx.audio_suspended = true;
amdgpu_ras_set_error_query_ready(tmp_adev, false);
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
- if (!amdgpu_sriov_vf(tmp_adev))
- amdgpu_amdkfd_pre_reset(tmp_adev);
+ amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
/*
- * Mark these ASICs to be reseted as untracked first
+ * Mark these ASICs to be reset as untracked first
* And add them back after reset completed
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
+ drm_client_dev_suspend(adev_to_drm(tmp_adev), false);
/* disable ras on ALL IPs */
- if (!need_emergency_restart &&
- amdgpu_device_ip_need_full_reset(tmp_adev))
+ if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
@@ -5276,28 +6306,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
}
atomic_inc(&tmp_adev->gpu_reset_counter);
}
+}
- if (need_emergency_restart)
- goto skip_sched_resume;
-
- /*
- * Must check guilty signal here since after this point all old
- * HW fences are force signaled.
- *
- * job->base holds a reference to parent fence
- */
- if (job && dma_fence_is_signaled(&job->hw_fence)) {
- job_signaled = true;
- dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
- goto skip_hw_reset;
- }
+static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
+ int r = 0;
retry: /* Rest of adevs pre asic reset from XGMI hive. */
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (gpu_reset_for_dev_remove) {
- /* Workaroud for ASICs need to disable SMC first */
- amdgpu_device_smu_fini_early(tmp_adev);
- }
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
/*TODO Should we stop ?*/
if (r) {
@@ -5305,72 +6325,109 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
r, adev_to_drm(tmp_adev)->unique);
tmp_adev->asic_reset_res = r;
}
-
- /*
- * Drop all pending non scheduler resets. Scheduler resets
- * were already dropped during drm_sched_stop
- */
- amdgpu_device_stop_pending_resets(tmp_adev);
}
/* Actual ASIC resets if needed.*/
/* Host driver will handle XGMI hive reset for SRIOV */
if (amdgpu_sriov_vf(adev)) {
- r = amdgpu_device_reset_sriov(adev, job ? false : true);
+
+ /* Bail out of reset early */
+ if (amdgpu_ras_is_rma(adev))
+ return -ENODEV;
+
+ if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
+ dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
+ amdgpu_ras_set_fed(adev, true);
+ set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
+ }
+
+ r = amdgpu_device_reset_sriov(adev, reset_context);
+ if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
+ amdgpu_virt_release_full_gpu(adev, true);
+ goto retry;
+ }
if (r)
adev->asic_reset_res = r;
-
- /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
- amdgpu_ras_resume(adev);
} else {
- r = amdgpu_do_asic_reset(device_list_handle, reset_context);
+ r = amdgpu_do_asic_reset(device_list, reset_context);
if (r && r == -EAGAIN)
goto retry;
+ }
- if (!r && gpu_reset_for_dev_remove)
- goto recover_end;
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ /*
+ * Drop any pending non scheduler resets queued before reset is done.
+ * Any reset scheduled after this point would be valid. Scheduler resets
+ * were already dropped during drm_sched_stop and no new ones can come
+ * in before drm_sched_start.
+ */
+ amdgpu_device_stop_pending_resets(tmp_adev);
}
-skip_hw_reset:
+ return r;
+}
+
+static int amdgpu_device_sched_resume(struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context,
+ bool job_signaled)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i, r = 0;
/* Post ASIC reset for all devs .*/
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
- if (!ring || !ring->sched.thread)
+ if (!amdgpu_ring_sched_ready(ring))
continue;
- drm_sched_start(&ring->sched, true);
+ drm_sched_start(&ring->sched, 0);
}
- if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
- amdgpu_mes_self_test(tmp_adev);
-
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
- if (tmp_adev->asic_reset_res)
- r = tmp_adev->asic_reset_res;
-
- tmp_adev->asic_reset_res = 0;
-
- if (r) {
- /* bad news, how to tell it to userspace ? */
- dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
- amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
+ if (tmp_adev->asic_reset_res) {
+ /* bad news, how to tell it to userspace ?
+ * for ras error, we should report GPU bad status instead of
+ * reset failure
+ */
+ if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
+ !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
+ dev_info(
+ tmp_adev->dev,
+ "GPU reset(%d) failed with error %d \n",
+ atomic_read(
+ &tmp_adev->gpu_reset_counter),
+ tmp_adev->asic_reset_res);
+ amdgpu_vf_error_put(tmp_adev,
+ AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
+ tmp_adev->asic_reset_res);
+ if (!r)
+ r = tmp_adev->asic_reset_res;
+ tmp_adev->asic_reset_res = 0;
} else {
- dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
- if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
- DRM_WARN("smart shift update failed\n");
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
+ atomic_read(&tmp_adev->gpu_reset_counter));
+ if (amdgpu_acpi_smart_shift_update(tmp_adev,
+ AMDGPU_SS_DEV_D0))
+ dev_warn(tmp_adev->dev,
+ "smart shift update failed\n");
}
}
-skip_sched_resume:
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ return r;
+}
+
+static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
/* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
@@ -5381,19 +6438,121 @@ skip_sched_resume:
if (!adev->kfd.init_complete)
amdgpu_amdkfd_device_init(adev);
- if (audio_suspended)
+ if (tmp_adev->pcie_reset_ctx.audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unset_mp1_state(tmp_adev);
amdgpu_ras_set_error_query_ready(tmp_adev, true);
+
}
+}
-recover_end:
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
- amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu_device pointer
+ * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Attempt to do soft-reset or full-reset and reinitialize Asic
+ * Returns 0 for success or an error on failure.
+ */
+
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head device_list;
+ bool job_signaled = false;
+ struct amdgpu_hive_info *hive = NULL;
+ int r = 0;
+ bool need_emergency_restart = false;
+
+ /*
+ * If it reaches here because of hang/timeout and a RAS error is
+ * detected at the same time, let RAS recovery take care of it.
+ */
+ if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
+ !amdgpu_sriov_vf(adev) &&
+ reset_context->src != AMDGPU_RESET_SRC_RAS) {
+ dev_dbg(adev->dev,
+ "Gpu recovery from source: %d yielding to RAS error recovery handling",
+ reset_context->src);
+ return 0;
+ }
+
+ /*
+ * Special case: RAS triggered and full reset isn't supported
+ */
+ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
+ /*
+ * Flush RAM to disk so that after reboot
+ * the user can read log and see why the system rebooted.
+ */
+ if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+ amdgpu_ras_get_context(adev)->reboot) {
+ dev_warn(adev->dev, "Emergency reboot.");
+
+ ksys_sync_helper();
+ emergency_restart();
+ }
+
+ dev_info(adev->dev, "GPU %s begin!. Source: %d\n",
+ need_emergency_restart ? "jobs stop" : "reset",
+ reset_context->src);
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+
+ reset_context->job = job;
+ reset_context->hive = hive;
+ INIT_LIST_HEAD(&device_list);
+
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ r = amdgpu_device_health_check(&device_list);
+ if (r)
+ goto end_reset;
+ }
+
+ /* We need to lock reset domain only once both for XGMI and single device */
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+
+ amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
+ hive, need_emergency_restart);
+ if (need_emergency_restart)
+ goto skip_sched_resume;
+ /*
+ * Must check guilty signal here since after this point all old
+ * HW fences are force signaled.
+ *
+ * job->base holds a reference to parent fence
+ */
+ if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
+ job_signaled = true;
+ dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+ goto skip_hw_reset;
+ }
+
+ r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
+ if (r)
+ goto reset_unlock;
+skip_hw_reset:
+ r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
+ if (r)
+ goto reset_unlock;
+skip_sched_resume:
+ amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
+reset_unlock:
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+end_reset:
if (hive) {
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
@@ -5403,23 +6562,111 @@ recover_end:
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
atomic_set(&adev->reset_domain->reset_res, r);
+
+ if (!r) {
+ struct amdgpu_task_info *ti = NULL;
+
+ if (job)
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
+ ti ? &ti->task : NULL);
+
+ amdgpu_vm_put_task_info(ti);
+ }
+
return r;
}
/**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ *speed = PCI_SPEED_UNKNOWN;
+ *width = PCIE_LNK_WIDTH_UNKNOWN;
+
+ if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
+ while ((parent = pci_upstream_bridge(parent))) {
+ /* skip upstream/downstream switches internal to dGPU*/
+ if (parent->vendor == PCI_VENDOR_ID_ATI)
+ continue;
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ break;
+ }
+ } else {
+ /* use the current speeds rather than max if switching is not supported */
+ pcie_bandwidth_available(adev->pdev, NULL, speed, width);
+ }
+}
+
+/**
+ * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * AMD dGPU which may be a virtual upstream bridge.
+ */
+static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ parent = pci_upstream_bridge(parent);
+ if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ while ((parent = pci_upstream_bridge(parent))) {
+ if (parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ }
+ }
+ } else {
+ /* use the device itself */
+ *speed = pcie_get_speed_cap(adev->pdev);
+ *width = pcie_get_width_cap(adev->pdev);
+ }
+}
+
+/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
* @adev: amdgpu_device pointer
*
- * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * Fetches and stores in the driver the PCIE capabilities (gen speed
* and lanes) of the slot the device is in. Handles APUs and
* virtualized environments where PCIE config space may not be available.
*/
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
{
- struct pci_dev *pdev;
enum pci_bus_speed speed_cap, platform_speed_cap;
- enum pcie_link_width platform_link_width;
+ enum pcie_link_width platform_link_width, link_width;
if (amdgpu_pcie_gen_cap)
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -5439,13 +6686,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
return;
- pcie_bandwidth_available(adev->pdev, NULL,
- &platform_speed_cap, &platform_link_width);
+ amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+ &platform_link_width);
+ amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
- pdev = adev->pdev;
- speed_cap = pcie_get_speed_cap(pdev);
if (speed_cap == PCI_SPEED_UNKNOWN) {
adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
@@ -5501,51 +6747,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
if (adev->pm.pcie_mlw_mask == 0) {
+ /* asic caps */
+ if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+ adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
+ } else {
+ switch (link_width) {
+ case PCIE_LNK_X32:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X16:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X12:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X8:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X4:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X2:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X1:
+ adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
+ break;
+ default:
+ break;
+ }
+ }
+ /* platform caps */
if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
} else {
switch (platform_link_width) {
case PCIE_LNK_X32:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X16:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X12:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X8:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X4:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X2:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X1:
- adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+ adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
break;
default:
break;
@@ -5568,29 +6866,37 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev)
{
#ifdef CONFIG_HSA_AMD_P2P
- uint64_t address_mask = peer_adev->dev->dma_mask ?
- ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
- resource_size_t aper_limit =
- adev->gmc.aper_base + adev->gmc.aper_size - 1;
bool p2p_access =
!adev->gmc.xgmi.connected_to_cpu &&
!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+ if (!p2p_access)
+ dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
+ pci_name(peer_adev->pdev));
+
+ bool is_large_bar = adev->gmc.visible_vram_size &&
+ adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
+ bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
+
+ if (!p2p_addressable) {
+ uint64_t address_mask = peer_adev->dev->dma_mask ?
+ ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
+ resource_size_t aper_limit =
+ adev->gmc.aper_base + adev->gmc.aper_size - 1;
- return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
- adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
- !(adev->gmc.aper_base & address_mask ||
- aper_limit & address_mask));
+ p2p_addressable = !(adev->gmc.aper_base & address_mask ||
+ aper_limit & address_mask);
+ }
+ return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
#else
return false;
#endif
}
-int amdgpu_device_baco_enter(struct drm_device *dev)
+int amdgpu_device_baco_enter(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- if (!amdgpu_device_supports_baco(dev))
+ if (!amdgpu_device_supports_baco(adev))
return -ENOTSUPP;
if (ras && adev->ras_enabled &&
@@ -5600,13 +6906,12 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
return amdgpu_dpm_baco_enter(adev);
}
-int amdgpu_device_baco_exit(struct drm_device *dev)
+int amdgpu_device_baco_exit(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int ret = 0;
- if (!amdgpu_device_supports_baco(dev))
+ if (!amdgpu_device_supports_baco(adev))
return -ENOTSUPP;
ret = amdgpu_dpm_baco_exit(adev);
@@ -5617,7 +6922,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
- if (amdgpu_passthrough(adev) &&
+ if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
adev->nbio.funcs->clear_doorbell_interrupt)
adev->nbio.funcs->clear_doorbell_interrupt(adev);
@@ -5637,45 +6942,52 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
-
- DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
+ struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
+ amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_reset_context reset_context;
+ struct list_head device_list;
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- DRM_WARN("No support for XGMI hive yet...");
- return PCI_ERS_RESULT_DISCONNECT;
- }
+ dev_info(adev->dev, "PCI error: detected callback!!\n");
adev->pci_channel_state = state;
switch (state) {
case pci_channel_io_normal:
+ dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
return PCI_ERS_RESULT_CAN_RECOVER;
- /* Fatal error, prepare for slot reset */
case pci_channel_io_frozen:
- /*
- * Locking adev->reset_domain->sem will prevent any external access
- * to GPU during PCI error recovery
- */
- amdgpu_device_lock_reset_domain(adev->reset_domain);
- amdgpu_device_set_mp1_state(adev);
-
- /*
- * Block any work scheduling as we do for regular GPU reset
- * for the duration of the recovery
- */
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
+ /* Fatal error, prepare for slot reset */
+ dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
+ if (hive) {
+ /* Hive devices should be able to support FW based
+ * link reset on other devices, if not return.
+ */
+ if (!amdgpu_dpm_is_link_reset_supported(adev)) {
+ dev_warn(adev->dev,
+ "No support for XGMI hive yet...\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ /* Set dpc status only if device is part of hive
+ * Non-hive devices should be able to recover after
+ * link reset.
+ */
+ amdgpu_reset_set_dpc_status(adev, true);
- drm_sched_stop(&ring->sched, NULL);
+ mutex_lock(&hive->hive_lock);
}
- atomic_inc(&adev->gpu_reset_counter);
+ memset(&reset_context, 0, sizeof(reset_context));
+ INIT_LIST_HEAD(&device_list);
+
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+ amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
+ hive, false);
+ if (hive)
+ mutex_unlock(&hive->hive_lock);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
/* Permanent error, prepare for device removal */
+ dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -5688,8 +7000,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
*/
pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
- DRM_INFO("PCI error: mmio enabled callback!!\n");
+ dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
/* TODO - dump whatever for debugging purposes */
@@ -5713,21 +7027,38 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int r, i;
struct amdgpu_reset_context reset_context;
- u32 memsize;
+ struct amdgpu_device *tmp_adev;
+ struct amdgpu_hive_info *hive;
struct list_head device_list;
+ struct pci_dev *link_dev;
+ int r = 0, i, timeout;
+ u32 memsize;
+ u16 status;
- DRM_INFO("PCI error: slot reset callback!!\n");
+ dev_info(adev->dev, "PCI error: slot reset callback!!\n");
memset(&reset_context, 0, sizeof(reset_context));
- INIT_LIST_HEAD(&device_list);
- list_add_tail(&adev->reset_list, &device_list);
+ if (adev->pcie_reset_ctx.swus)
+ link_dev = adev->pcie_reset_ctx.swus;
+ else
+ link_dev = adev->pdev;
+ /* wait for asic to come out of reset, timeout = 10s */
+ timeout = 10000;
+ do {
+ usleep_range(10000, 10500);
+ r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
+ timeout -= 10;
+ } while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
+ (status != PCI_VENDOR_ID_AMD));
- /* wait for asic to come out of reset */
- msleep(500);
+ if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
+ r = -ETIME;
+ goto out;
+ }
+ amdgpu_device_load_switch_state(adev);
/* Restore PCI confspace */
amdgpu_device_load_pci_state(pdev);
@@ -5747,26 +7078,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
-
- adev->no_hw_access = true;
- r = amdgpu_device_pre_asic_reset(adev, &reset_context);
- adev->no_hw_access = false;
- if (r)
- goto out;
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+ INIT_LIST_HEAD(&device_list);
- r = amdgpu_do_asic_reset(&device_list, &reset_context);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ reset_context.hive = hive;
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else {
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ list_add_tail(&adev->reset_list, &device_list);
+ }
+ r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
out:
if (!r) {
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(adev->pdev);
-
- DRM_INFO("PCIe error recovery succeeded\n");
+ dev_info(adev->dev, "PCIe error recovery succeeded\n");
} else {
- DRM_ERROR("PCIe error recovery failed, err:%d", r);
- amdgpu_device_unset_mp1_state(adev);
- amdgpu_device_unlock_reset_domain(adev->reset_domain);
+ dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
+ if (hive) {
+ list_for_each_entry(tmp_adev, &device_list, reset_list)
+ amdgpu_device_unset_mp1_state(tmp_adev);
+ }
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ }
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
}
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -5783,26 +7128,95 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
-
+ struct list_head device_list;
+ struct amdgpu_hive_info *hive = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
- DRM_INFO("PCI error: resume callback!!\n");
+ dev_info(adev->dev, "PCI error: resume callback!!\n");
/* Only continue execution for the case of pci_channel_io_frozen */
if (adev->pci_channel_state != pci_channel_io_frozen)
return;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
+ INIT_LIST_HEAD(&device_list);
- if (!ring || !ring->sched.thread)
- continue;
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = false;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else
+ list_add_tail(&adev->reset_list, &device_list);
+
+ amdgpu_device_sched_resume(&device_list, NULL, NULL);
+ amdgpu_device_gpu_resume(adev, &device_list, false);
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+ }
+}
+
+static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
+{
+ struct pci_dev *swus, *swds;
+ int r;
+
+ swds = pci_upstream_bridge(adev->pdev);
+ if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
+ pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
+ return;
+ swus = pci_upstream_bridge(swds);
+ if (!swus ||
+ (swus->vendor != PCI_VENDOR_ID_ATI &&
+ swus->vendor != PCI_VENDOR_ID_AMD) ||
+ pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
+ return;
+
+ /* If already saved, return */
+ if (adev->pcie_reset_ctx.swus)
+ return;
+ /* Upstream bridge is ATI, assume it's SWUS/DS architecture */
+ r = pci_save_state(swds);
+ if (r)
+ return;
+ adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
+
+ r = pci_save_state(swus);
+ if (r)
+ return;
+ adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
+
+ adev->pcie_reset_ctx.swus = swus;
+}
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
+{
+ struct pci_dev *pdev;
+ int r;
+
+ if (!adev->pcie_reset_ctx.swds_pcistate ||
+ !adev->pcie_reset_ctx.swus_pcistate)
+ return;
- drm_sched_start(&ring->sched, true);
+ pdev = adev->pcie_reset_ctx.swus;
+ r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
+ if (!r) {
+ pci_restore_state(pdev);
+ } else {
+ dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
+ return;
}
- amdgpu_device_unset_mp1_state(adev);
- amdgpu_device_unlock_reset_domain(adev->reset_domain);
+ pdev = pci_upstream_bridge(adev->pdev);
+ r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
+ if (!r)
+ pci_restore_state(pdev);
+ else
+ dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
}
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
@@ -5811,6 +7225,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
struct amdgpu_device *adev = drm_to_adev(dev);
int r;
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
r = pci_save_state(pdev);
if (!r) {
kfree(adev->pci_state);
@@ -5818,14 +7235,16 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
adev->pci_state = pci_store_saved_state(pdev);
if (!adev->pci_state) {
- DRM_ERROR("Failed to store PCI saved state");
+ dev_err(adev->dev, "Failed to store PCI saved state");
return false;
}
} else {
- DRM_WARN("Failed to save PCI state, err:%d\n", r);
+ dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
return false;
}
+ amdgpu_device_cache_switch_state(adev);
+
return true;
}
@@ -5843,7 +7262,7 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
if (!r) {
pci_restore_state(pdev);
} else {
- DRM_WARN("Failed to load PCI state, err:%d\n", r);
+ dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
return false;
}
@@ -5958,6 +7377,22 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
}
/**
+ * amdgpu_device_get_gang - return a reference to the current gang
+ * @adev: amdgpu_device pointer
+ *
+ * Returns: A new reference to the current gang leader.
+ */
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
+{
+ struct dma_fence *fence;
+
+ rcu_read_lock();
+ fence = dma_fence_get_rcu_safe(&adev->gang_submit);
+ rcu_read_unlock();
+ return fence;
+}
+
+/**
* amdgpu_device_switch_gang - switch to a new gang
* @adev: amdgpu_device pointer
* @gang: the gang to switch to
@@ -5971,25 +7406,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
{
struct dma_fence *old = NULL;
+ dma_fence_get(gang);
do {
dma_fence_put(old);
- rcu_read_lock();
- old = dma_fence_get_rcu_safe(&adev->gang_submit);
- rcu_read_unlock();
-
+ old = amdgpu_device_get_gang(adev);
if (old == gang)
break;
- if (!dma_fence_is_signaled(old))
+ if (!dma_fence_is_signaled(old)) {
+ dma_fence_put(gang);
return old;
+ }
} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
old, gang) != old);
+ /*
+ * Drop it once for the exchanged reference in adev and once for the
+ * thread local reference acquired in amdgpu_device_get_gang().
+ */
+ dma_fence_put(old);
dma_fence_put(old);
return NULL;
}
+/**
+ * amdgpu_device_enforce_isolation - enforce HW isolation
+ * @adev: the amdgpu device pointer
+ * @ring: the HW ring the job is supposed to run on
+ * @job: the job which is about to be pushed to the HW ring
+ *
+ * Makes sure that only one client at a time can use the GFX block.
+ * Returns: The dependency to wait on before the job can be pushed to the HW.
+ * The function is called multiple times until NULL is returned.
+ */
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+ struct drm_sched_fence *f = job->base.s_fence;
+ struct dma_fence *dep;
+ void *owner;
+ int r;
+
+ /*
+ * For now enforce isolation only for the GFX block since we only need
+ * the cleaner shader on those rings.
+ */
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
+ ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return NULL;
+
+ /*
+ * All submissions where enforce isolation is false are handled as if
+ * they come from a single client. Use ~0l as the owner to distinct it
+ * from kernel submissions where the owner is NULL.
+ */
+ owner = job->enforce_isolation ? f->owner : (void *)~0l;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+
+ /*
+ * The "spearhead" submission is the first one which changes the
+ * ownership to its client. We always need to wait for it to be
+ * pushed to the HW before proceeding with anything.
+ */
+ if (&f->scheduled != isolation->spearhead &&
+ !dma_fence_is_signaled(isolation->spearhead)) {
+ dep = isolation->spearhead;
+ goto out_grab_ref;
+ }
+
+ if (isolation->owner != owner) {
+
+ /*
+ * Wait for any gang to be assembled before switching to a
+ * different owner or otherwise we could deadlock the
+ * submissions.
+ */
+ if (!job->gang_submit) {
+ dep = amdgpu_device_get_gang(adev);
+ if (!dma_fence_is_signaled(dep))
+ goto out_return_dep;
+ dma_fence_put(dep);
+ }
+
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(&f->scheduled);
+ amdgpu_sync_move(&isolation->active, &isolation->prev);
+ trace_amdgpu_isolation(isolation->owner, owner);
+ isolation->owner = owner;
+ }
+
+ /*
+ * Specifying the ring here helps to pipeline submissions even when
+ * isolation is enabled. If that is not desired for testing NULL can be
+ * used instead of the ring to enforce a CPU round trip while switching
+ * between clients.
+ */
+ dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
+ r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
+ if (r)
+ dev_warn(adev->dev, "OOM tracking isolation\n");
+
+out_grab_ref:
+ dma_fence_get(dep);
+out_return_dep:
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ return dep;
+}
+
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -6024,7 +7551,7 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
return true;
default:
/* IP discovery */
- if (!adev->ip_versions[DCE_HWIP][0] ||
+ if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
return false;
return true;
@@ -6049,12 +7576,108 @@ uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
tmp_ = RREG32(reg_addr);
loop--;
if (!loop) {
- DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
- inst, reg_name, (uint32_t)expected_value,
- (uint32_t)(tmp_ & (mask)));
+ dev_warn(
+ adev->dev,
+ "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
+ inst, reg_name, (uint32_t)expected_value,
+ (uint32_t)(tmp_ & (mask)));
ret = -ETIMEDOUT;
break;
}
}
return ret;
}
+
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
+{
+ ssize_t size = 0;
+
+ if (!ring || !ring->adev)
+ return size;
+
+ if (amdgpu_device_should_recover_gpu(ring->adev))
+ size |= AMDGPU_RESET_TYPE_FULL;
+
+ if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
+ !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
+ size |= AMDGPU_RESET_TYPE_SOFT_RESET;
+
+ return size;
+}
+
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
+{
+ ssize_t size = 0;
+
+ if (supported_reset == 0) {
+ size += sysfs_emit_at(buf, size, "unsupported");
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+
+ }
+
+ if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
+ size += sysfs_emit_at(buf, size, "soft ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
+ size += sysfs_emit_at(buf, size, "queue ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
+ size += sysfs_emit_at(buf, size, "pipe ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_FULL)
+ size += sysfs_emit_at(buf, size, "full ");
+
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst,
+ uint64_t uid)
+{
+ if (!uid_info)
+ return;
+
+ if (type >= AMDGPU_UID_TYPE_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+ type);
+ return;
+ }
+
+ if (inst >= AMDGPU_UID_INST_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+ inst);
+ return;
+ }
+
+ if (uid_info->uid[type][inst] != 0) {
+ dev_warn_once(
+ uid_info->adev->dev,
+ "Overwriting existing UID %llu for type %d instance %d\n",
+ uid_info->uid[type][inst], type, inst);
+ }
+
+ uid_info->uid[type][inst] = uid;
+}
+
+u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+ enum amdgpu_uid_type type, uint8_t inst)
+{
+ if (!uid_info)
+ return 0;
+
+ if (type >= AMDGPU_UID_TYPE_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+ type);
+ return 0;
+ }
+
+ if (inst >= AMDGPU_UID_INST_MAX) {
+ dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+ inst);
+ return 0;
+ }
+
+ return uid_info->uid[type][inst];
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 1538b2dbfff1..eb605e79ae0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -33,6 +33,7 @@ struct amdgpu_df_hash_status {
struct amdgpu_df_funcs {
void (*sw_init)(struct amdgpu_device *adev);
void (*sw_fini)(struct amdgpu_device *adev);
+ void (*hw_init)(struct amdgpu_device *adev);
void (*enable_broadcast_mode)(struct amdgpu_device *adev,
bool enable);
u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 7d5e7ad28ba8..73401f0aeb34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,6 +27,7 @@
#include "amdgpu_discovery.h"
#include "soc15_hw_ip.h"
#include "discovery.h"
+#include "amdgpu_ras.h"
#include "soc15.h"
#include "gfx_v9_0.h"
@@ -35,10 +36,13 @@
#include "df_v1_7.h"
#include "df_v3_6.h"
#include "df_v4_3.h"
+#include "df_v4_6_2.h"
+#include "df_v4_15.h"
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
#include "nbio_v7_9.h"
+#include "nbio_v7_11.h"
#include "hdp_v4_0.h"
#include "vega10_ih.h"
#include "vega20_ih.h"
@@ -52,26 +56,34 @@
#include "smuio_v9_0.h"
#include "gmc_v10_0.h"
#include "gmc_v11_0.h"
+#include "gmc_v12_0.h"
#include "gfxhub_v2_0.h"
#include "mmhub_v2_0.h"
#include "nbio_v2_3.h"
#include "nbio_v4_3.h"
#include "nbio_v7_2.h"
#include "nbio_v7_7.h"
+#include "nbif_v6_3_1.h"
#include "hdp_v5_0.h"
#include "hdp_v5_2.h"
#include "hdp_v6_0.h"
+#include "hdp_v7_0.h"
#include "nv.h"
#include "soc21.h"
+#include "soc24.h"
#include "navi10_ih.h"
#include "ih_v6_0.h"
#include "ih_v6_1.h"
+#include "ih_v7_0.h"
#include "gfx_v10_0.h"
#include "gfx_v11_0.h"
+#include "gfx_v12_0.h"
#include "sdma_v5_0.h"
#include "sdma_v5_2.h"
#include "sdma_v6_0.h"
+#include "sdma_v7_0.h"
#include "lsdma_v6_0.h"
+#include "lsdma_v7_0.h"
#include "vcn_v2_0.h"
#include "jpeg_v2_0.h"
#include "vcn_v3_0.h"
@@ -80,19 +92,40 @@
#include "jpeg_v4_0.h"
#include "vcn_v4_0_3.h"
#include "jpeg_v4_0_3.h"
+#include "vcn_v4_0_5.h"
+#include "jpeg_v4_0_5.h"
#include "amdgpu_vkms.h"
-#include "mes_v10_1.h"
#include "mes_v11_0.h"
+#include "mes_v12_0.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
#include "smuio_v13_0.h"
#include "smuio_v13_0_3.h"
#include "smuio_v13_0_6.h"
+#include "smuio_v14_0_2.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+#include "jpeg_v5_0_0.h"
+#include "jpeg_v5_0_1.h"
+
+#include "amdgpu_vpe.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
-#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
-MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
-
+MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
+
+#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3
+#define mmMP0_SMN_C2PMSG_33 0x16061
#define mmMM_INDEX 0x0
#define mmMM_INDEX_HI 0x6
#define mmMM_DATA 0x1
@@ -174,6 +207,7 @@ static const char *hw_id_names[HW_ID_MAX] = {
[XGMI_HWID] = "XGMI",
[XGBE_HWID] = "XGBE",
[MP0_HWID] = "MP0",
+ [VPE_HWID] = "VPE",
};
static int hw_id_map[MAX_HWIP] = {
@@ -203,6 +237,8 @@ static int hw_id_map[MAX_HWIP] = {
[XGMI_HWIP] = XGMI_HWID,
[DCI_HWIP] = DCI_HWID,
[PCIE_HWIP] = PCIE_HWID,
+ [VPE_HWIP] = VPE_HWID,
+ [ISP_HWIP] = ISP_HWID,
};
static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
@@ -228,13 +264,41 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
return -ENOENT;
}
+#define IP_DISCOVERY_V2 2
+#define IP_DISCOVERY_V4 4
+
static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
uint8_t *binary)
{
- uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
- int ret = 0;
+ bool sz_valid = true;
+ uint64_t vram_size;
+ int i, ret = 0;
+ u32 msg;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* It can take up to two second for IFWI init to complete on some dGPUs,
+ * but generally it should be in the 60-100ms range. Normally this starts
+ * as soon as the device gets power so by the time the OS loads this has long
+ * completed. However, when a card is hotplugged via e.g., USB4, we need to
+ * wait for this to complete. Once the C2PMSG is updated, we can
+ * continue.
+ */
- if (vram_size) {
+ for (i = 0; i < 2000; i++) {
+ msg = RREG32(mmMP0_SMN_C2PMSG_33);
+ if (msg & 0x80000000)
+ break;
+ msleep(1);
+ }
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ sz_valid = false;
+ else
+ vram_size <<= 20;
+
+ if (sz_valid) {
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
adev->mman.discovery_tmr_size, false);
@@ -242,28 +306,27 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
}
+ if (ret)
+ dev_err(adev->dev,
+ "failed to read discovery info from memory, vram size read: %llx",
+ vram_size);
+
return ret;
}
-static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary)
+static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
+ uint8_t *binary,
+ const char *fw_name)
{
const struct firmware *fw;
- const char *fw_name;
int r;
- switch (amdgpu_discovery) {
- case 2:
- fw_name = FIRMWARE_IP_DISCOVERY;
- break;
- default:
- dev_warn(adev->dev, "amdgpu_discovery is not set properly\n");
- return -EINVAL;
- }
-
- r = request_firmware(&fw, fw_name, adev->dev);
+ r = firmware_request_nowarn(&fw, fw_name, adev->dev);
if (r) {
- dev_err(adev->dev, "can't load firmware \"%s\"\n",
- fw_name);
+ if (amdgpu_discovery == 2)
+ dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name);
+ else
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name);
return r;
}
@@ -304,8 +367,8 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
* So far, apply this quirk only on those Navy Flounder boards which
* have a bad harvest table of VCN config.
*/
- if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) &&
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) {
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 1) == IP_VERSION(3, 0, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 2))) {
switch (adev->pdev->revision) {
case 0xC1:
case 0xC2:
@@ -323,10 +386,68 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
}
}
+static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
+ struct binary_header *bhdr)
+{
+ struct table_info *info;
+ uint16_t checksum;
+ uint16_t offset;
+
+ info = &bhdr->table_list[NPS_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ struct nps_info_header *nhdr =
+ (struct nps_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
+ dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
+ return -EINVAL;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+ dev_dbg(adev->dev, "invalid nps info data table checksum\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
+{
+ if (amdgpu_discovery == 2)
+ return "amdgpu/ip_discovery.bin";
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "amdgpu/vega10_ip_discovery.bin";
+ case CHIP_VEGA12:
+ return "amdgpu/vega12_ip_discovery.bin";
+ case CHIP_RAVEN:
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "amdgpu/raven2_ip_discovery.bin";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "amdgpu/picasso_ip_discovery.bin";
+ else
+ return "amdgpu/raven_ip_discovery.bin";
+ case CHIP_VEGA20:
+ return "amdgpu/vega20_ip_discovery.bin";
+ case CHIP_ARCTURUS:
+ return "amdgpu/arcturus_ip_discovery.bin";
+ case CHIP_ALDEBARAN:
+ return "amdgpu/aldebaran_ip_discovery.bin";
+ default:
+ return NULL;
+ }
+}
+
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
+ const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
@@ -338,17 +459,14 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
return -ENOMEM;
/* Read from file if it is the preferred option */
- if (amdgpu_discovery == 2) {
- dev_info(adev->dev, "use ip discovery information from file");
- r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
-
- if (r) {
- dev_err(adev->dev, "failed to read ip discovery binary from file\n");
- r = -EINVAL;
+ fw_name = amdgpu_discovery_get_fw_name(adev);
+ if (fw_name != NULL) {
+ drm_dbg(&adev->ddev, "use ip discovery information from file");
+ r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name);
+ if (r)
goto out;
- }
-
} else {
+ drm_dbg(&adev->ddev, "use ip discovery information from memory");
r = amdgpu_discovery_read_binary_from_mem(
adev, adev->mman.discovery_bin);
if (r)
@@ -491,7 +609,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
out:
kfree(adev->mman.discovery_bin);
adev->mman.discovery_bin = NULL;
-
+ if ((amdgpu_discovery != 2) &&
+ (RREG32(mmIP_DISCOVERY_VERSION) == 4))
+ amdgpu_ras_query_boot_status(adev, 4);
return r;
}
@@ -504,16 +624,19 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev)
adev->mman.discovery_bin = NULL;
}
-static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip)
+static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
+ uint8_t instance, uint16_t hw_id)
{
- if (ip->instance_number >= HWIP_MAX_INSTANCE) {
- DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n",
- ip->instance_number);
+ if (instance >= HWIP_MAX_INSTANCE) {
+ dev_err(adev->dev,
+ "Unexpected instance_number (%d) from ip discovery blob\n",
+ instance);
return -EINVAL;
}
- if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) {
- DRM_ERROR("Unexpected hw_id (%d) from ip discovery blob\n",
- le16_to_cpu(ip->hw_id));
+ if (hw_id >= HW_ID_MAX) {
+ dev_err(adev->dev,
+ "Unexpected hw_id (%d) from ip discovery blob\n",
+ hw_id);
return -EINVAL;
}
@@ -526,8 +649,10 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
- struct ip_v4 *ip;
+ struct ip *ip;
uint16_t die_offset, ip_offset, num_dies, num_ips;
+ uint16_t hw_id;
+ uint8_t inst;
int i, j;
bhdr = (struct binary_header *)adev->mman.discovery_bin;
@@ -543,16 +668,18 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
ip_offset = die_offset + sizeof(*dhdr);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
-
- if (amdgpu_discovery_validate_ip(ip))
+ ip = (struct ip *)(adev->mman.discovery_bin +
+ ip_offset);
+ inst = ip->number_instance;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
goto next_ip;
- if (le16_to_cpu(ip->variant) == 1) {
- switch (le16_to_cpu(ip->hw_id)) {
+ if (ip->harvest == 1) {
+ switch (hw_id) {
case VCN_HWID:
(*vcn_harvest_count)++;
- if (ip->instance_number == 0) {
+ if (inst == 0) {
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
adev->vcn.inst_mask &=
~AMDGPU_VCN_HARVEST_VCN0;
@@ -574,10 +701,8 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
}
}
next_ip:
- if (ihdr->base_addr_64_bit)
- ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
- else
- ip_offset += struct_size(ip, base_address, ip->num_base_address);
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
}
}
}
@@ -635,6 +760,12 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
adev->sdma.sdma_mask &=
~(1U << harvest_info->list[i].number_instance);
break;
+#if defined(CONFIG_DRM_AMD_ISP)
+ case ISP_HWID:
+ adev->isp.harvest_config |=
+ ~(1U << harvest_info->list[i].number_instance);
+ break;
+#endif
default:
break;
}
@@ -655,7 +786,7 @@ struct ip_hw_instance {
u8 harvest;
int num_base_addresses;
- u32 base_addr[];
+ u32 base_addr[] __counted_by(num_base_addresses);
};
struct ip_hw_id {
@@ -930,6 +1061,8 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
bool reg_base_64)
{
int ii, jj, kk, res;
+ uint16_t hw_id;
+ uint8_t inst;
DRM_DEBUG("num_ips:%d", num_ips);
@@ -945,8 +1078,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
struct ip_hw_instance *ip_hw_instance;
ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
- if (amdgpu_discovery_validate_ip(ip) ||
- le16_to_cpu(ip->hw_id) != ii)
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id) ||
+ hw_id != ii)
goto next_ip;
DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset);
@@ -1184,6 +1319,7 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
{
+ uint8_t num_base_address, subrev, variant;
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
@@ -1191,18 +1327,19 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
uint16_t die_offset;
uint16_t ip_offset;
uint16_t num_dies;
+ uint32_t wafl_ver;
uint16_t num_ips;
- uint8_t num_base_address;
+ uint16_t hw_id;
+ uint8_t inst;
int hw_ip;
int i, j, k;
int r;
r = amdgpu_discovery_init(adev);
- if (r) {
- DRM_ERROR("amdgpu_discovery_init failed\n");
+ if (r)
return r;
- }
+ wafl_ver = 0;
adev->gfx.xcc_mask = 0;
adev->sdma.sdma_mask = 0;
adev->vcn.inst_mask = 0;
@@ -1232,7 +1369,9 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
for (j = 0; j < num_ips; j++) {
ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
- if (amdgpu_discovery_validate_ip(ip))
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
goto next_ip;
num_base_address = ip->num_base_address;
@@ -1251,11 +1390,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
* 0b10 : encode is disabled
* 0b01 : decode is disabled
*/
- adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
- ip->revision & 0xc0;
- ip->revision &= ~0xc0;
if (adev->vcn.num_vcn_inst <
AMDGPU_MAX_VCN_INSTANCES) {
+ adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config =
+ ip->revision & 0xc0;
adev->vcn.num_vcn_inst++;
adev->vcn.inst_mask |=
(1U << ip->instance_number);
@@ -1266,6 +1404,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->vcn.num_vcn_inst + 1,
AMDGPU_MAX_VCN_INSTANCES);
}
+ ip->revision &= ~0xc0;
}
if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
@@ -1283,6 +1422,15 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
}
}
+ if (le16_to_cpu(ip->hw_id) == VPE_HWID) {
+ if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES)
+ adev->vpe.num_instances++;
+ else
+ dev_err(adev->dev, "Too many VPE instances: %d vs %d\n",
+ adev->vpe.num_instances + 1,
+ AMDGPU_MAX_VPE_INSTANCES);
+ }
+
if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
adev->gmc.num_umc++;
adev->umc.node_inst_num++;
@@ -1292,6 +1440,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->gfx.xcc_mask |=
(1U << ip->instance_number);
+ if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID)
+ wafl_ver = IP_VERSION_FULL(ip->major, ip->minor,
+ ip->revision, 0, 0);
+
for (k = 0; k < num_base_address; k++) {
/*
* convert the endianness of base addresses in place,
@@ -1330,8 +1482,22 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
* example. On most chips there are multiple instances
* with the same HWID.
*/
- adev->ip_versions[hw_ip][ip->instance_number] =
- IP_VERSION(ip->major, ip->minor, ip->revision);
+
+ if (ihdr->version < 3) {
+ subrev = 0;
+ variant = 0;
+ } else {
+ subrev = ip->sub_revision;
+ variant = ip->variant;
+ }
+
+ adev->ip_versions[hw_ip]
+ [ip->instance_number] =
+ IP_VERSION_FULL(ip->major,
+ ip->minor,
+ ip->revision,
+ variant,
+ subrev);
}
}
@@ -1343,21 +1509,32 @@ next_ip:
}
}
+ if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0])
+ adev->ip_versions[XGMI_HWIP][0] = wafl_ver;
+
return 0;
}
static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
+ struct ip_discovery_header *ihdr;
+ struct binary_header *bhdr;
int vcn_harvest_count = 0;
int umc_harvest_count = 0;
+ uint16_t offset, ihdr_ver;
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
+ ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
+ offset);
+ ihdr_ver = le16_to_cpu(ihdr->version);
/*
* Harvest table does not fit Navi1x and legacy GPUs,
* so read harvest bit per IP data structure to set
* harvest configuration.
*/
- if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0) &&
- adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) &&
+ ihdr_ver <= 2) {
if ((adev->pdev->device == 0x731E &&
(adev->pdev->revision == 0xC6 ||
adev->pdev->revision == 0xC7)) ||
@@ -1389,6 +1566,7 @@ union gc_info {
struct gc_info_v1_0 v1;
struct gc_info_v1_1 v1_1;
struct gc_info_v1_2 v1_2;
+ struct gc_info_v1_3 v1_3;
struct gc_info_v2_0 v2;
struct gc_info_v2_1 v2_1;
};
@@ -1432,12 +1610,12 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
- if (gc_info->v1.header.version_minor >= 1) {
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 1) {
adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa);
adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface);
adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps);
}
- if (gc_info->v1.header.version_minor >= 2) {
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 2) {
adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg);
adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size);
adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp);
@@ -1447,6 +1625,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance);
adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu);
}
+ if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) {
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size);
+ adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size);
+ adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size);
+ adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size);
+ }
break;
case 2:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
@@ -1466,7 +1654,7 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
- if (gc_info->v2.header.version_minor == 1) {
+ if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) {
adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
@@ -1531,7 +1719,7 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
break;
case 2:
mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
- adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc;
+ adev->gmc.mall_size = (uint64_t)mall_size_per_umc * adev->gmc.num_umc;
break;
default:
dev_err(adev->dev,
@@ -1583,7 +1771,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
* so this won't overflow.
*/
for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
- adev->vcn.vcn_codec_disable_mask[v] =
+ adev->vcn.inst[v].vcn_codec_disable_mask =
le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
}
break;
@@ -1597,10 +1785,113 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
return 0;
}
+union nps_info {
+ struct nps_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
+ union nps_info *nps_data)
+{
+ uint64_t vram_size, pos, offset;
+ struct nps_info_header *nhdr;
+ struct binary_header bhdr;
+ uint16_t checksum;
+
+ vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+ pos = vram_size - DISCOVERY_TMR_OFFSET;
+ amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false);
+
+ offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
+ checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+
+ amdgpu_device_vram_access(adev, (pos + offset), nps_data,
+ sizeof(*nps_data), false);
+
+ nhdr = (struct nps_info_header *)(nps_data);
+ if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+ dev_err(adev->dev, "nps data refresh, checksum mismatch\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+ uint32_t *nps_type,
+ struct amdgpu_gmc_memrange **ranges,
+ int *range_cnt, bool refresh)
+{
+ struct amdgpu_gmc_memrange *mem_ranges;
+ struct binary_header *bhdr;
+ union nps_info *nps_info;
+ union nps_info nps_data;
+ u16 offset;
+ int i, r;
+
+ if (!nps_type || !range_cnt || !ranges)
+ return -EINVAL;
+
+ if (refresh) {
+ r = amdgpu_discovery_refresh_nps_info(adev, &nps_data);
+ if (r)
+ return r;
+ nps_info = &nps_data;
+ } else {
+ if (!adev->mman.discovery_bin) {
+ dev_err(adev->dev,
+ "fetch mem range failed, ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+
+ if (!offset)
+ return -ENOENT;
+
+ /* If verification fails, return as if NPS table doesn't exist */
+ if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+ return -ENOENT;
+
+ nps_info =
+ (union nps_info *)(adev->mman.discovery_bin + offset);
+ }
+
+ switch (le16_to_cpu(nps_info->v1.header.version_major)) {
+ case 1:
+ mem_ranges = kvcalloc(nps_info->v1.count,
+ sizeof(*mem_ranges),
+ GFP_KERNEL);
+ if (!mem_ranges)
+ return -ENOMEM;
+ *nps_type = nps_info->v1.nps_type;
+ *range_cnt = nps_info->v1.count;
+ for (i = 0; i < *range_cnt; i++) {
+ mem_ranges[i].base_address =
+ nps_info->v1.instance_info[i].base_address;
+ mem_ranges[i].limit_address =
+ nps_info->v1.instance_info[i].limit_address;
+ mem_ranges[i].nid_mask = -1;
+ mem_ranges[i].flags = 0;
+ }
+ *ranges = mem_ranges;
+ break;
+ default:
+ dev_err(adev->dev, "Unhandled NPS info table %d.%d\n",
+ le16_to_cpu(nps_info->v1.header.version_major),
+ le16_to_cpu(nps_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
{
/* what IP to use for this? */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 1):
@@ -1610,6 +1901,8 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -1632,12 +1925,20 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &soc24_common_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add common ip block(GC_HWIP:0x%x)\n",
- adev->ip_versions[GC_HWIP][0]);
+ amdgpu_ip_version(adev, GC_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -1646,7 +1947,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
{
/* use GC or MMHUB IP version */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 1):
@@ -1656,6 +1957,8 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -1678,12 +1981,19 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block);
+ break;
default:
- dev_err(adev->dev,
- "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
- adev->ip_versions[GC_HWIP][0]);
+ dev_err(adev->dev, "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -1691,7 +2001,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[OSSSYS_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
case IP_VERSION(4, 1, 0):
@@ -1703,6 +2013,7 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 2, 1):
case IP_VERSION(4, 4, 0):
case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
break;
case IP_VERSION(5, 0, 0):
@@ -1721,10 +2032,13 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(6, 1, 0):
amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block);
break;
+ case IP_VERSION(7, 0, 0):
+ amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
- adev->ip_versions[OSSSYS_HWIP][0]);
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -1732,7 +2046,7 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
break;
@@ -1750,6 +2064,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
break;
case IP_VERSION(11, 0, 8):
@@ -1769,16 +2084,25 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
break;
case IP_VERSION(13, 0, 4):
amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block);
break;
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ case IP_VERSION(14, 0, 5):
+ amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add psp ip block(MP0_HWIP:0x%x)\n",
- adev->ip_versions[MP0_HWIP][0]);
+ amdgpu_ip_version(adev, MP0_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -1786,7 +2110,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
@@ -1800,13 +2124,17 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
case IP_VERSION(11, 0, 7):
- case IP_VERSION(11, 0, 8):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
break;
+ case IP_VERSION(11, 0, 8):
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
@@ -1822,12 +2150,22 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
+ amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add smu ip block(MP1_HWIP:0x%x)\n",
- adev->ip_versions[MP1_HWIP][0]);
+ amdgpu_ip_version(adev, MP1_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -1852,8 +2190,8 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
return 0;
#if defined(CONFIG_DRM_AMD_DC)
- if (adev->ip_versions[DCE_HWIP][0]) {
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
case IP_VERSION(2, 0, 2):
@@ -1871,6 +2209,14 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 1, 0):
+ /* TODO: Fix IP version. DC code expects version 4.0.1 */
+ if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0))
+ adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1);
+
if (amdgpu_sriov_vf(adev))
amdgpu_discovery_set_sriov_display(adev);
else
@@ -1879,11 +2225,11 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
default:
dev_err(adev->dev,
"Failed to add dm ip block(DCE_HWIP:0x%x)\n",
- adev->ip_versions[DCE_HWIP][0]);
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
return -EINVAL;
}
- } else if (adev->ip_versions[DCI_HWIP][0]) {
- switch (adev->ip_versions[DCI_HWIP][0]) {
+ } else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
case IP_VERSION(12, 1, 0):
@@ -1895,7 +2241,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
default:
dev_err(adev->dev,
"Failed to add dm ip block(DCI_HWIP:0x%x)\n",
- adev->ip_versions[DCI_HWIP][0]);
+ amdgpu_ip_version(adev, DCI_HWIP, 0));
return -EINVAL;
}
}
@@ -1905,7 +2251,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 1):
@@ -1917,8 +2263,8 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
break;
case IP_VERSION(9, 4, 3):
- if (!amdgpu_exp_hw_support)
- return -EINVAL;
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -1941,12 +2287,19 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block);
+ break;
default:
- dev_err(adev->dev,
- "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
- adev->ip_versions[GC_HWIP][0]);
+ dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -1954,7 +2307,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
case IP_VERSION(4, 1, 0):
@@ -1966,6 +2319,8 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
break;
case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
break;
case IP_VERSION(5, 0, 0):
@@ -1989,12 +2344,19 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
- adev->ip_versions[SDMA0_HWIP][0]);
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0));
return -EINVAL;
}
return 0;
@@ -2002,8 +2364,8 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
{
- if (adev->ip_versions[VCE_HWIP][0]) {
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ if (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(7, 0, 0):
case IP_VERSION(7, 2, 0):
/* UVD is not supported on vega20 SR-IOV */
@@ -2013,10 +2375,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
default:
dev_err(adev->dev,
"Failed to add uvd v7 ip block(UVD_HWIP:0x%x)\n",
- adev->ip_versions[UVD_HWIP][0]);
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
return -EINVAL;
}
- switch (adev->ip_versions[VCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 1, 0):
/* VCE is not supported on vega20 SR-IOV */
@@ -2026,11 +2388,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
default:
dev_err(adev->dev,
"Failed to add VCE v4 ip block(VCE_HWIP:0x%x)\n",
- adev->ip_versions[VCE_HWIP][0]);
+ amdgpu_ip_version(adev, VCE_HWIP, 0));
return -EINVAL;
}
} else {
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
@@ -2074,10 +2436,23 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
break;
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block);
+ break;
+ case IP_VERSION(5, 0, 0):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
+ break;
+ case IP_VERSION(5, 0, 1):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
- adev->ip_versions[UVD_HWIP][0]);
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
return -EINVAL;
}
}
@@ -2086,35 +2461,28 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 1, 10):
- case IP_VERSION(10, 1, 1):
- case IP_VERSION(10, 1, 2):
- case IP_VERSION(10, 1, 3):
- case IP_VERSION(10, 1, 4):
- case IP_VERSION(10, 3, 0):
- case IP_VERSION(10, 3, 1):
- case IP_VERSION(10, 3, 2):
- case IP_VERSION(10, 3, 3):
- case IP_VERSION(10, 3, 4):
- case IP_VERSION(10, 3, 5):
- case IP_VERSION(10, 3, 6):
- if (amdgpu_mes) {
- amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
- adev->enable_mes = true;
- if (amdgpu_mes_kiq)
- adev->enable_mes_kiq = true;
- }
- break;
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
adev->enable_mes = true;
adev->enable_mes_kiq = true;
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_device_ip_block_add(adev, &mes_v12_0_ip_block);
+ adev->enable_mes = true;
+ adev->enable_mes_kiq = true;
+ if (amdgpu_uni_mes)
+ adev->enable_uni_mes = true;
+ break;
default:
break;
}
@@ -2123,8 +2491,10 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
aqua_vanjaram_init_soc_config(adev);
break;
default:
@@ -2132,12 +2502,67 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
}
}
+static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 3):
+ amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ if (amdgpu_umsch_mm & 0x1) {
+ amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block);
+ adev->enable_umsch_mm = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_isp_ip_blocks(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DRM_AMD_ISP)
+ switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ amdgpu_device_ip_block_add(adev, &isp_v4_1_0_ip_block);
+ break;
+ case IP_VERSION(4, 1, 1):
+ amdgpu_device_ip_block_add(adev, &isp_v4_1_1_ip_block);
+ break;
+ default:
+ break;
+ }
+#endif
+
+ return 0;
+}
+
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
{
int r;
switch (adev->asic_type) {
case CHIP_VEGA10:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
adev->gmc.num_umc = 4;
@@ -2160,6 +2585,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
break;
case CHIP_VEGA12:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
adev->gmc.num_umc = 4;
@@ -2182,6 +2612,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
break;
case CHIP_RAVEN:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 1;
adev->vcn.num_vcn_inst = 1;
@@ -2202,6 +2637,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 2);
adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 1);
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 1);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
} else {
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 1, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 1, 0);
@@ -2218,9 +2654,15 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 1, 0);
adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 0);
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 0);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
}
break;
case CHIP_VEGA20:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
vega20_reg_base_init(adev);
adev->sdma.num_instances = 2;
adev->gmc.num_umc = 8;
@@ -2244,6 +2686,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
break;
case CHIP_ARCTURUS:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
arct_reg_base_init(adev);
adev->sdma.num_instances = 8;
adev->vcn.num_vcn_inst = 2;
@@ -2272,6 +2719,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
break;
case CHIP_ALDEBARAN:
+ /* This is not fatal. We only need the discovery
+ * binary for sysfs. We don't need it for a
+ * functional system.
+ */
+ amdgpu_discovery_init(adev);
aldebaran_reg_base_init(adev);
adev->sdma.num_instances = 5;
adev->vcn.num_vcn_inst = 2;
@@ -2297,10 +2749,42 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0);
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
break;
+ case CHIP_CYAN_SKILLFISH:
+ if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r)
+ return -EINVAL;
+
+ amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
+ } else {
+ cyan_skillfish_reg_base_init(adev);
+ adev->sdma.num_instances = 2;
+ adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+ adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+ adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[HDP_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(5, 0, 1);
+ adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 5, 0);
+ adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(2, 1, 1);
+ adev->ip_versions[UMC_HWIP][0] = IP_VERSION(8, 1, 1);
+ adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 1);
+ adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 8);
+ adev->ip_versions[GC_HWIP][0] = IP_VERSION(10, 1, 3);
+ adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 0, 3);
+ }
+ break;
default:
r = amdgpu_discovery_reg_base_init(adev);
- if (r)
- return -EINVAL;
+ if (r) {
+ drm_err(&adev->ddev, "discovery failed: %d\n", r);
+ return r;
+ }
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
@@ -2312,13 +2796,15 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_discovery_init_soc_config(adev);
amdgpu_discovery_sysfs_init(adev);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->family = AMDGPU_FAMILY_AI;
break;
case IP_VERSION(9, 1, 0):
@@ -2359,11 +2845,21 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 4):
adev->family = AMDGPU_FAMILY_GC_11_0_1;
break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->family = AMDGPU_FAMILY_GC_11_5_0;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->family = AMDGPU_FAMILY_GC_12_0_0;
+ break;
default:
return -EINVAL;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 3, 0):
@@ -2375,17 +2871,18 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 7):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->flags |= AMD_IS_APU;
break;
default:
break;
}
- if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(4, 8, 0))
- adev->gmc.xgmi.supported = true;
-
/* set NBIO version */
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 2, 0):
adev->nbio.funcs = &nbio_v6_1_funcs;
@@ -2404,9 +2901,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
break;
case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
adev->nbio.funcs = &nbio_v7_9_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg;
break;
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ adev->nbio.funcs = &nbio_v7_11_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg;
+ break;
case IP_VERSION(7, 2, 0):
case IP_VERSION(7, 2, 1):
case IP_VERSION(7, 3, 0):
@@ -2439,11 +2944,15 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.funcs = &nbio_v7_7_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
break;
+ case IP_VERSION(6, 3, 1):
+ adev->nbio.funcs = &nbif_v6_3_1_funcs;
+ adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg;
+ break;
default:
break;
}
- switch (adev->ip_versions[HDP_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 1):
case IP_VERSION(4, 1, 0):
@@ -2453,6 +2962,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 2, 1):
case IP_VERSION(4, 4, 0):
case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
adev->hdp.funcs = &hdp_v4_0_funcs;
break;
case IP_VERSION(5, 0, 0):
@@ -2471,11 +2981,14 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(6, 1, 0):
adev->hdp.funcs = &hdp_v6_0_funcs;
break;
+ case IP_VERSION(7, 0, 0):
+ adev->hdp.funcs = &hdp_v7_0_funcs;
+ break;
default:
break;
}
- switch (adev->ip_versions[DF_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DF_HWIP, 0)) {
case IP_VERSION(3, 6, 0):
case IP_VERSION(3, 6, 1):
case IP_VERSION(3, 6, 2):
@@ -2491,11 +3004,18 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 3, 0):
adev->df.funcs = &df_v4_3_funcs;
break;
+ case IP_VERSION(4, 6, 2):
+ adev->df.funcs = &df_v4_6_2_funcs;
+ break;
+ case IP_VERSION(4, 15, 0):
+ case IP_VERSION(4, 15, 1):
+ adev->df.funcs = &df_v4_15_funcs;
+ break;
default:
break;
}
- switch (adev->ip_versions[SMUIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SMUIO_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(9, 0, 1):
case IP_VERSION(10, 0, 0):
@@ -2515,6 +3035,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 10):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 9):
case IP_VERSION(13, 0, 10):
@@ -2524,6 +3045,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->smuio.funcs = &smuio_v13_0_funcs;
break;
case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 11):
adev->smuio.funcs = &smuio_v13_0_3_funcs;
if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
adev->flags |= AMD_IS_APU;
@@ -2532,19 +3054,27 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 8):
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
adev->smuio.funcs = &smuio_v13_0_6_funcs;
break;
+ case IP_VERSION(14, 0, 2):
+ adev->smuio.funcs = &smuio_v14_0_2_funcs;
+ break;
default:
break;
}
- switch (adev->ip_versions[LSDMA_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
adev->lsdma.funcs = &lsdma_v6_0_funcs;
break;
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ adev->lsdma.funcs = &lsdma_v7_0_funcs;
+ break;
default:
break;
}
@@ -2611,6 +3141,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
+ r = amdgpu_discovery_set_vpe_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_umsch_mm_ip_blocks(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_discovery_set_isp_ip_blocks(adev);
+ if (r)
+ return r;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 3a2f347bd50d..b44d56465c5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,10 +24,15 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
-#define DISCOVERY_TMR_SIZE (8 << 10)
+#define DISCOVERY_TMR_SIZE (10 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10)
void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+ uint32_t *nps_type,
+ struct amdgpu_gmc_memrange **ranges,
+ int *range_cnt, bool refresh);
+
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 363e6a2cad8c..51bab32fd8c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -33,6 +33,7 @@
#include "soc15_common.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
+#include "bif/bif_4_1_d.h"
#include <asm/div64.h>
#include <linux/pci.h>
@@ -233,6 +234,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
}
if (!adev->enable_virtual_display) {
+ new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(new_abo,
amdgpu_display_supported_domains(adev, new_abo->flags));
if (unlikely(r != 0)) {
@@ -340,14 +342,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
adev->have_disp_power_ref = true;
return ret;
}
- /* if we have no active crtcs, then drop the power ref
- * we got before
+ /* if we have no active crtcs, then go to
+ * drop the power ref we got before
*/
- if (!active && adev->have_disp_power_ref) {
- pm_runtime_put_autosuspend(dev->dev);
+ if (!active && adev->have_disp_power_ref)
adev->have_disp_power_ref = false;
- }
-
out:
/* drop the power reference we got coming in here */
pm_runtime_put_autosuspend(dev->dev);
@@ -657,6 +656,10 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier)
if (!IS_AMD_FMT_MOD(modifier))
return NULL;
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX9 ||
+ AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12)
+ return NULL;
+
if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
return lookup_format_info(dcc_retile_formats,
ARRAY_SIZE(dcc_retile_formats),
@@ -721,6 +724,30 @@ extract_render_dcc_offset(struct amdgpu_device *adev,
return 0;
}
+static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb)
+{
+ u64 modifier = 0;
+ int swizzle_mode = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE);
+
+ if (!swizzle_mode) {
+ modifier = DRM_FORMAT_MOD_LINEAR;
+ } else {
+ int max_comp_block =
+ AMDGPU_TILING_GET(afb->tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+
+ modifier =
+ AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12) |
+ AMD_FMT_MOD_SET(TILE, swizzle_mode) |
+ AMD_FMT_MOD_SET(DCC, afb->gfx12_dcc) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block);
+ }
+
+ afb->base.modifier = modifier;
+ afb->base.flags |= DRM_MODE_FB_MODIFIERS;
+ return 0;
+}
+
static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
{
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
@@ -766,11 +793,13 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
return -EINVAL;
}
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX11;
- else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(10, 3, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
- else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(10, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10;
else
version = AMD_FMT_MOD_TILE_VER_GFX9;
@@ -779,13 +808,15 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 0: /* Z microtiling */
return -EINVAL;
case 1: /* S microtiling */
- if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0)) {
if (!has_xor)
version = AMD_FMT_MOD_TILE_VER_GFX9;
}
break;
case 2:
- if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0)) {
if (!has_xor && afb->base.format->cpp[0] != 4)
version = AMD_FMT_MOD_TILE_VER_GFX9;
}
@@ -838,10 +869,12 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
u64 render_dcc_offset;
/* Enable constant encode on RAVEN2 and later. */
- bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN ||
- (adev->asic_type == CHIP_RAVEN &&
- adev->external_rev_id >= 0x81)) &&
- adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0);
+ bool dcc_constant_encode =
+ (adev->asic_type > CHIP_RAVEN ||
+ (adev->asic_type == CHIP_RAVEN &&
+ adev->external_rev_id >= 0x81)) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(11, 0, 0);
int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
@@ -878,7 +911,9 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
if (adev->family >= AMDGPU_FAMILY_NV) {
int extra_pipe = 0;
- if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) &&
+ if ((amdgpu_ip_version(adev, GC_HWIP,
+ 0) >=
+ IP_VERSION(10, 3, 0)) &&
pipes == packers && pipes > 1)
extra_pipe = 1;
@@ -912,8 +947,7 @@ static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
{
u64 micro_tile_mode;
- /* Zero swizzle mode means linear */
- if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0)
+ if (AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) == 1) /* LINEAR_ALIGNED */
return 0;
micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
@@ -1037,6 +1071,30 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
block_width = 256 / format_info->cpp[i];
block_height = 1;
block_size_log2 = 8;
+ } else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) {
+ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+ switch (swizzle) {
+ case AMD_FMT_MOD_TILE_GFX12_256B_2D:
+ block_size_log2 = 8;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_4K_2D:
+ block_size_log2 = 12;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_64K_2D:
+ block_size_log2 = 16;
+ break;
+ case AMD_FMT_MOD_TILE_GFX12_256K_2D:
+ block_size_log2 = 18;
+ break;
+ default:
+ drm_dbg_kms(rfb->base.dev,
+ "Gfx12 swizzle mode with unknown block size: %d\n", swizzle);
+ return -EINVAL;
+ }
+
+ get_block_dimensions(block_size_log2, format_info->cpp[i],
+ &block_width, &block_height);
} else {
int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
@@ -1072,7 +1130,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
return ret;
}
- if (AMD_FMT_MOD_GET(DCC, modifier)) {
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 &&
+ AMD_FMT_MOD_GET(DCC, modifier)) {
if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
block_size_log2 = get_dcc_block_size(modifier, false, false);
get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
@@ -1102,7 +1161,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
}
static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
- uint64_t *tiling_flags, bool *tmz_surface)
+ uint64_t *tiling_flags, bool *tmz_surface,
+ bool *gfx12_dcc)
{
struct amdgpu_bo *rbo;
int r;
@@ -1110,6 +1170,7 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
if (!amdgpu_fb) {
*tiling_flags = 0;
*tmz_surface = false;
+ *gfx12_dcc = false;
return 0;
}
@@ -1123,11 +1184,9 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
return r;
}
- if (tiling_flags)
- amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
-
- if (tmz_surface)
- *tmz_surface = amdgpu_bo_encrypted(rbo);
+ amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
+ *tmz_surface = amdgpu_bo_encrypted(rbo);
+ *gfx12_dcc = rbo->flags & AMDGPU_GEM_CREATE_GFX12_DCC;
amdgpu_bo_unreserve(rbo);
@@ -1137,13 +1196,14 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
struct amdgpu_framebuffer *rfb,
struct drm_file *file_priv,
+ const struct drm_format_info *info,
const struct drm_mode_fb_cmd2 *mode_cmd,
struct drm_gem_object *obj)
{
int ret;
rfb->base.obj[0] = obj;
- drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
+ drm_helper_mode_fill_fb_struct(dev, &rfb->base, info, mode_cmd);
/* Verify that the modifier is supported. */
if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
mode_cmd->modifier[0])) {
@@ -1196,7 +1256,8 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev,
}
}
- ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface);
+ ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface,
+ &rfb->gfx12_dcc);
if (ret)
return ret;
@@ -1210,7 +1271,11 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev,
if (!dev->mode_config.fb_modifiers_not_supported &&
!(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
- ret = convert_tiling_flags_to_modifier(rfb);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0))
+ ret = convert_tiling_flags_to_modifier_gfx12(rfb);
+ else
+ ret = convert_tiling_flags_to_modifier(rfb);
+
if (ret) {
drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier",
rfb->tiling_flags);
@@ -1233,6 +1298,7 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev,
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
+ const struct drm_format_info *info,
const struct drm_mode_fb_cmd2 *mode_cmd)
{
struct amdgpu_framebuffer *amdgpu_fb;
@@ -1253,7 +1319,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
bo = gem_to_amdgpu_bo(obj);
domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags);
- if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
+ if (drm_gem_is_imported(obj) && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");
drm_gem_object_put(obj);
return ERR_PTR(-EINVAL);
@@ -1266,7 +1332,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
}
ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv,
- mode_cmd, obj);
+ info, mode_cmd, obj);
if (ret) {
kfree(amdgpu_fb);
drm_gem_object_put(obj);
@@ -1345,14 +1411,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
"dither",
amdgpu_dither_enum_list, sz);
- if (adev->dc_enabled) {
- adev->mode_info.abm_level_property =
- drm_property_create_range(adev_to_drm(adev), 0,
- "abm level", 0, 4);
- if (!adev->mode_info.abm_level_property)
- return -ENOMEM;
- }
-
return 0;
}
@@ -1420,7 +1478,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) &&
((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
- connector->display_info.is_hdmi &&
+ connector && connector->display_info.is_hdmi &&
amdgpu_display_is_hdtv_mode(mode)))) {
if (amdgpu_encoder->underscan_hborder != 0)
amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
@@ -1705,6 +1763,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
if (r != 0)
dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
@@ -1732,3 +1791,82 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
return 0;
}
+/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel()
+ * they are called from the panic handler, and protected by the drm_panic spinlock.
+ */
+static struct amdgpu_bo *panic_abo;
+
+/* Use the indirect MMIO to write each pixel to the GPU VRAM,
+ * This is a simplified version of amdgpu_device_mm_access()
+ */
+static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb,
+ unsigned int x,
+ unsigned int y,
+ u32 color)
+{
+ struct amdgpu_res_cursor cursor;
+ unsigned long offset;
+ struct amdgpu_bo *abo = panic_abo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ uint32_t tmp;
+
+ offset = x * 4 + y * sb->pitch[0];
+ amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
+
+ tmp = cursor.start >> 31;
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000);
+ if (tmp != 0xffffffff)
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ WREG32_NO_KIQ(mmMM_DATA, color);
+}
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb)
+{
+ struct amdgpu_bo *abo;
+ struct drm_framebuffer *fb = plane->state->fb;
+
+ if (!fb)
+ return -EINVAL;
+
+ DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format);
+
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ if (!abo)
+ return -EINVAL;
+
+ sb->width = fb->width;
+ sb->height = fb->height;
+ /* Use the generic linear format, because tiling will be disabled in panic_flush() */
+ sb->format = drm_format_info(fb->format->format);
+ if (!sb->format)
+ return -EINVAL;
+
+ sb->pitch[0] = fb->pitches[0];
+
+ if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+ if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
+ drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n");
+ return -EINVAL;
+ }
+ /* Only handle 32bits format, to simplify mmio access */
+ if (fb->format->cpp[0] != 4) {
+ drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n");
+ return -EINVAL;
+ }
+ sb->set_pixel = amdgpu_display_set_pixel;
+ panic_abo = abo;
+ return 0;
+ }
+ if (!abo->kmap.virtual &&
+ ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
+ drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n");
+ return -ENOMEM;
+ }
+ if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+ iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
+ else
+ iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 9d19940f73c8..930c171473b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,6 +23,8 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
+#include <drm/drm_panic.h>
+
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
@@ -42,6 +44,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
struct drm_file *file_priv,
+ const struct drm_format_info *info,
const struct drm_mode_fb_cmd2 *mode_cmd);
const struct drm_format_info *
amdgpu_lookup_format_info(u32 format, uint64_t modifier);
@@ -49,4 +52,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier);
int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 12210598e5b8..8561ad7f6180 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -36,12 +36,35 @@
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
#include <drm/amdgpu_drm.h>
#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h>
-#include <linux/pm_runtime.h>
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops;
+
+/**
+ * dma_buf_attach_adev - Helper to get adev of an attachment
+ *
+ * @attach: attachment
+ *
+ * Returns:
+ * A struct amdgpu_device * if the attaching device is an amdgpu device or
+ * partition, NULL otherwise.
+ */
+static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach)
+{
+ if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) {
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ return amdgpu_ttm_adev(bo->tbo.bdev);
+ }
+
+ return NULL;
+}
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -54,42 +77,18 @@
static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attach)
{
+ struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach);
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- int r;
- if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+ if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) &&
+ pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
- r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- if (r < 0)
- goto out;
+ amdgpu_vm_bo_update_shared(bo);
return 0;
-
-out:
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
-}
-
-/**
- * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
- *
- * @dmabuf: DMA-buf where we remove the attachment from
- * @attach: the attachment to remove
- *
- * Called when an attachment is removed from the DMA-buf.
- */
-static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
- struct dma_buf_attachment *attach)
-{
- struct drm_gem_object *obj = dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
/**
@@ -101,11 +100,35 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
*/
static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct dma_buf *dmabuf = attach->dmabuf;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
+ u32 domains = bo->allowed_domains;
+
+ dma_resv_assert_held(dmabuf->resv);
+
+ /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
+ * support if all attachments can do P2P. If any attachment can't do
+ * P2P just pin into GTT instead.
+ *
+ * To avoid with conflicting pinnings between GPUs and RDMA when move
+ * notifiers are disabled, only allow pinning in VRAM when move
+ * notiers are enabled.
+ */
+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ } else {
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ if (!attach->peer2peer)
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ }
- /* pin buffer into GTT */
- return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (domains & AMDGPU_GEM_DOMAIN_VRAM)
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ if (WARN_ON(!domains))
+ return -EINVAL;
+
+ return amdgpu_bo_pin(bo, domains);
}
/**
@@ -160,10 +183,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return ERR_PTR(r);
-
- } else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) &
- AMDGPU_GEM_DOMAIN_GTT)) {
- return ERR_PTR(-EBUSY);
}
switch (bo->tbo.resource->mem_type) {
@@ -180,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
break;
case TTM_PL_VRAM:
+ /* XGMI-accessible memory should never be DMA-mapped */
+ if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible(
+ dma_buf_attach_adev(attach), bo)))
+ return ERR_PTR(-EINVAL);
+
r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
bo->tbo.base.size, attach->dev,
dir, &sgt);
@@ -211,7 +235,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- if (sgt->sgl->page_link) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
@@ -261,9 +285,38 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ int ret;
+
+ /*
+ * Pin to keep buffer in place while it's vmap'ed. The actual
+ * domain is not that important as long as it's mapable. Using
+ * GTT and VRAM should be compatible with most use cases.
+ */
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM);
+ if (ret)
+ return ret;
+ ret = drm_gem_dmabuf_vmap(dma_buf, map);
+ if (ret)
+ amdgpu_bo_unpin(bo);
+
+ return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ drm_gem_dmabuf_vunmap(dma_buf, map);
+ amdgpu_bo_unpin(bo);
+}
+
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
- .detach = amdgpu_dma_buf_detach,
.pin = amdgpu_dma_buf_pin,
.unpin = amdgpu_dma_buf_unpin,
.map_dma_buf = amdgpu_dma_buf_map,
@@ -271,8 +324,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
+ .vmap = amdgpu_dma_buf_vmap,
+ .vunmap = amdgpu_dma_buf_vunmap,
};
/**
@@ -290,11 +343,23 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
struct dma_buf *buf;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ /* We opt to avoid OOM on system pages allocations */
+ .gfp_retry_mayfail = true,
+ .allow_res_evict = false,
+ };
+ int ret;
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
return ERR_PTR(-EPERM);
+ ret = ttm_bo_setup_export(&bo->tbo, &ctx);
+ if (ret)
+ return ERR_PTR(ret);
+
buf = drm_gem_prime_export(gobj, flags);
if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
@@ -331,6 +396,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED);
}
@@ -372,6 +438,10 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
struct amdgpu_vm_bo_base *bo_base;
int r;
+ /* FIXME: This should be after the "if", but needs a fix to make sure
+ * DMABuf imports are initialized in the right VM list.
+ */
+ amdgpu_vm_bo_invalidate(bo, false);
if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
return;
@@ -403,9 +473,12 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
continue;
}
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ /* Reserve fences for two SDMA page table updates */
+ r = dma_resv_reserve_fences(resv, 2);
+ if (!r)
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (!r)
- r = amdgpu_vm_handle_moved(adev, vm);
+ r = amdgpu_vm_handle_moved(adev, vm, ticket);
if (r && r != -EBUSY)
DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
@@ -479,7 +552,10 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
struct drm_gem_object *obj = &bo->tbo.base;
struct drm_gem_object *gobj;
- if (obj->import_attach) {
+ if (!adev)
+ return false;
+
+ if (drm_gem_is_imported(obj)) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 09f6727e7c73..2675689ef70f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -86,6 +86,7 @@ struct amdgpu_doorbell_index {
uint32_t vce_ring6_7;
} uvd_vce;
};
+ uint32_t vpe_ring;
uint32_t first_non_cp;
uint32_t last_non_cp;
uint32_t max_assignment;
@@ -226,10 +227,12 @@ enum AMDGPU_NAVI10_DOORBELL_ASSIGNMENT {
AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E,
AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F,
+ AMDGPU_NAVI10_DOORBELL64_VPE = 0x190,
+
AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
- AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f,
+ AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VPE,
- AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = AMDGPU_NAVI10_DOORBELL64_VPE,
AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF
};
@@ -357,8 +360,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev);
void amdgpu_doorbell_fini(struct amdgpu_device *adev);
int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
- struct amdgpu_bo *db_bo,
- uint32_t doorbell_index);
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size);
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
index da4be0bbb446..3040437d99c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -41,7 +41,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
if (index < adev->doorbell.num_kernel_doorbells)
return readl(adev->doorbell.cpu_addr + index);
- DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
+ dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+ index);
return 0;
}
@@ -63,7 +64,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
if (index < adev->doorbell.num_kernel_doorbells)
writel(v, adev->doorbell.cpu_addr + index);
else
- DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+ dev_err(adev->dev,
+ "writing beyond doorbell aperture: 0x%08x!\n", index);
}
/**
@@ -83,7 +85,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
if (index < adev->doorbell.num_kernel_doorbells)
return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index));
- DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
+ dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+ index);
return 0;
}
@@ -105,7 +108,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
if (index < adev->doorbell.num_kernel_doorbells)
atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v);
else
- DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
+ dev_err(adev->dev,
+ "writing beyond doorbell aperture: 0x%08x!\n", index);
}
/**
@@ -113,20 +117,25 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
*
* @adev: amdgpu_device pointer
* @db_bo: doorbell object's bo
- * @db_index: doorbell relative index in this doorbell object
+ * @doorbell_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
*
* returns doorbell's absolute index in BAR
*/
uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
- struct amdgpu_bo *db_bo,
- uint32_t doorbell_index)
+ struct amdgpu_bo *db_bo,
+ uint32_t doorbell_index,
+ uint32_t db_size)
{
int db_bo_offset;
db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
- /* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */
- return db_bo_offset / sizeof(u32) + doorbell_index * 2;
+ /* doorbell index is 32 bit but doorbell's size can be 32 bit
+ * or 64 bit, so *db_size(in byte)/4 for alignment.
+ */
+ return db_bo_offset / sizeof(u32) + doorbell_index *
+ DIV_ROUND_UP(db_size, 4);
}
/**
@@ -142,6 +151,10 @@ int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev)
int r;
int size;
+ /* SI HW does not have doorbells, skip allocation */
+ if (adev->doorbell.num_kernel_doorbells == 0)
+ return 0;
+
/* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */
size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE);
@@ -157,7 +170,8 @@ int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev)
NULL,
(void **)&adev->doorbell.cpu_addr);
if (r) {
- DRM_ERROR("Failed to allocate kernel doorbells, err=%d\n", r);
+ dev_err(adev->dev,
+ "Failed to allocate kernel doorbells, err=%d\n", r);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 81edf66dbea8..bff25ef3e2d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,8 +23,9 @@
*/
#include <drm/amdgpu_drm.h>
+#include <drm/clients/drm_client_setup.h>
#include <drm/drm_drv.h>
-#include <drm/drm_fbdev_generic.h>
+#include <drm/drm_fbdev_ttm.h>
#include <drm/drm_gem.h>
#include <drm/drm_managed.h>
#include <drm/drm_pciids.h>
@@ -50,6 +51,8 @@
#include "amdgpu_reset.h"
#include "amdgpu_sched.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
#include "../amdxcp/amdgpu_xcp_drv.h"
/*
@@ -113,11 +116,37 @@
* gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
* 3.53.0 - Support for GFX11 CP GFX shadowing
* 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
+ * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
+ * - 3.58.0 - Add GFX12 DCC support
+ * - 3.59.0 - Cleared VRAM
+ * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
+ * - 3.61.0 - Contains fix for RV/PCO compute queues
+ * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
+ * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
+ * - 3.64.0 - Userq IP support query
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 54
+#define KMS_DRIVER_MINOR 64
#define KMS_DRIVER_PATCHLEVEL 0
+/*
+ * amdgpu.debug module options. Are all disabled by default
+ */
+enum AMDGPU_DEBUG_MASK {
+ AMDGPU_DEBUG_VM = BIT(0),
+ AMDGPU_DEBUG_LARGEBAR = BIT(1),
+ AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+ AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
+ AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
+ AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
+ AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
+ AMDGPU_DEBUG_SMU_POOL = BIT(7),
+ AMDGPU_DEBUG_VM_USERPTR = BIT(8),
+ AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
+};
+
unsigned int amdgpu_vram_limit = UINT_MAX;
int amdgpu_vis_vram_limit;
int amdgpu_gart_size = -1; /* auto */
@@ -140,7 +169,6 @@ int amdgpu_vm_size = -1;
int amdgpu_vm_fragment_size = -1;
int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop;
-int amdgpu_vm_debug;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support;
int amdgpu_dc = -1;
@@ -153,7 +181,18 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu;
char *amdgpu_virtual_display;
-bool enforce_isolation;
+int amdgpu_enforce_isolation = -1;
+int amdgpu_modeset = -1;
+
+/* Specifies the default granularity for SVM, used in buffer
+ * migration and restoration of backing memory when handling
+ * recoverable page faults.
+ *
+ * The value is given as log(numPages(buffer)); for a 2 MiB
+ * buffer it computes to be 9
+ */
+uint amdgpu_svm_default_granularity = 9;
+
/*
* OverDrive(bit 14) disabled by default
* GFX DCS(bit 19) disabled by default
@@ -183,10 +222,13 @@ int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
int amdgpu_mes_kiq;
+int amdgpu_uni_mes = 1;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = -1; /* auto */
+uint amdgpu_freesync_vid_mode;
int amdgpu_reset_method = -1; /* auto */
int amdgpu_num_kcq = -1;
int amdgpu_smartshift_bias;
@@ -194,8 +236,15 @@ int amdgpu_use_xgmi_p2p = 1;
int amdgpu_vcnfw_log;
int amdgpu_sg_display = -1; /* auto */
int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
-
-static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
+int amdgpu_umsch_mm;
+int amdgpu_seamless = -1; /* auto */
+uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
+int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
+int amdgpu_umsch_mm_fwlog;
+int amdgpu_rebar = -1; /* auto */
+int amdgpu_user_queue = -1;
DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
"DRM_UT_CORE",
@@ -211,9 +260,6 @@ DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
- .delayed_reset_work = __DELAYED_WORK_INITIALIZER(
- mgpu_info.delayed_reset_work,
- amdgpu_drv_delayed_reset_work_handler, 0),
};
int amdgpu_ras_enable = -1;
uint amdgpu_ras_mask = 0xffffffff;
@@ -248,7 +294,8 @@ module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
/**
* DOC: gttsize (int)
* Restrict the size of GTT domain (for userspace use) in MiB for testing.
- * The default is -1 (Use 1/2 RAM, minimum value is 3GB).
+ * The default is -1 (Use value specified by TTM).
+ * This parameter is deprecated and will be removed in the future.
*/
MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
@@ -296,6 +343,13 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
+ * DOC: svm_default_granularity (uint)
+ * Used in buffer migration and handling of recoverable page faults
+ */
+MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB");
+module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644);
+
+/**
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
@@ -308,12 +362,12 @@ module_param_named(msi, amdgpu_msi, int, 0444);
* The second one is for Compute. The third and fourth ones are
* for SDMA and Video.
*
- * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
- * jobs is 10000. The timeout for compute is 60000.
+ * By default(with no lockup_timeout settings), the timeout for all jobs is 10000.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
- "for passthrough or sriov, 10000 for all jobs. 0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
- "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
+MODULE_PARM_DESC(lockup_timeout,
+ "GPU lockup timeout in ms (default: 10000 for all jobs. "
+ "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
+ "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/**
@@ -349,7 +403,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
* Setting the value to 0 disables this functionality.
* Setting the value to -2 is auto enabled with power down when displays are attached.
*/
-MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)");
+MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
/**
@@ -360,7 +414,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
* the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
*/
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
-module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
/**
* DOC: bapm (int)
@@ -406,13 +460,6 @@ MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = prin
module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
/**
- * DOC: vm_debug (int)
- * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
- */
-MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
-module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
-
-/**
* DOC: vm_update_mode (int)
* Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
* is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
@@ -425,7 +472,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
* Enable experimental hw support (1 = enable). The default is 0 (disabled).
*/
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
-module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
/**
* DOC: dc (int)
@@ -536,14 +583,14 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
* Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
*/
MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
-module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
/**
* DOC: emu_mode (int)
* Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
*/
MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
-module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444);
/**
* DOC: ras_enable (int)
@@ -583,7 +630,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
#ifdef CONFIG_DRM_AMDGPU_SI
#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_si_support = 0;
+int amdgpu_si_support;
MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
#else
int amdgpu_si_support = 1;
@@ -602,7 +649,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444);
#ifdef CONFIG_DRM_AMDGPU_CIK
#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE)
-int amdgpu_cik_support = 0;
+int amdgpu_cik_support;
MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
#else
int amdgpu_cik_support = 1;
@@ -656,6 +703,15 @@ MODULE_PARM_DESC(mes,
module_param_named(mes, amdgpu_mes, int, 0444);
/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+ "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
+/**
* DOC: mes_kiq (int)
* Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
* (0 = disabled (default), 1 = enabled)
@@ -665,6 +721,15 @@ MODULE_PARM_DESC(mes_kiq,
module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
/**
+ * DOC: uni_mes (int)
+ * Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(uni_mes,
+ "Enable Unified Micro Engine Scheduler (0 = disabled, 1 = enabled(default)");
+module_param_named(uni_mes, amdgpu_uni_mes, int, 0444);
+
+/**
* DOC: noretry (int)
* Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
* do not support per-process XNACK this also disables retry page faults.
@@ -680,7 +745,7 @@ module_param_named(noretry, amdgpu_noretry, int, 0644);
*/
MODULE_PARM_DESC(force_asic_type,
"A non negative value used to specify the asic type for all supported GPUs");
-module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444);
/**
* DOC: use_xgmi_p2p (int)
@@ -699,7 +764,7 @@ module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444);
* assigns queues to HQDs.
*/
int sched_policy = KFD_SCHED_POLICY_HWS;
-module_param(sched_policy, int, 0444);
+module_param_unsafe(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
@@ -744,24 +809,12 @@ MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
/**
- * DOC: debug_largebar (int)
- * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
- * system. This limits the VRAM size reported to ROCm applications to the visible
- * size, usually 256MB.
- * Default value is 0, diabled.
- */
-int debug_largebar;
-module_param(debug_largebar, int, 0444);
-MODULE_PARM_DESC(debug_largebar,
- "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
-
-/**
* DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
* Setting 1 enables halt on hang.
*/
int halt_if_hws_hang;
-module_param(halt_if_hws_hang, int, 0644);
+module_param_unsafe(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
/**
@@ -770,7 +823,7 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
* check says. Default value: false (rely on MEC2 firmware version check).
*/
bool hws_gws_support;
-module_param(hws_gws_support, bool, 0444);
+module_param_unsafe(hws_gws_support, bool, 0444);
MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
/**
@@ -803,7 +856,7 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa
*/
int amdgpu_no_queue_eviction_on_vm_fault;
MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
-module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
+module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif
/**
@@ -811,7 +864,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm
*/
int amdgpu_mtype_local;
MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
-module_param_named(mtype_local, amdgpu_mtype_local, int, 0444);
+module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444);
/**
* DOC: pcie_p2p (bool)
@@ -833,7 +886,7 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
/**
* DOC: dcdebugmask (uint)
- * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * Display debug options. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
*/
MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
@@ -850,18 +903,31 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
* the ABM algorithm, with 1 being the least reduction and 4 being the most
* reduction.
*
- * Defaults to 0, or disabled. Userspace can still override this level later
- * after boot.
+ * Defaults to -1, or auto. Userspace can only override this level after
+ * boot if it's set to auto.
*/
-uint amdgpu_dm_abm_level;
-MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
-module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
+int amdgpu_dm_abm_level = -1;
+MODULE_PARM_DESC(abmlevel,
+ "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))");
+module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444);
int amdgpu_backlight = -1;
MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
module_param_named(backlight, amdgpu_backlight, bint, 0444);
/**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1 (where it is enabled unless a PSR-SU display is detected).
+ */
+MODULE_PARM_DESC(damageclips,
+ "Damage clips support (0 = disable, 1 = enable, -1 auto (default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
+/**
* DOC: tmz (int)
* Trusted Memory Zone (TMZ) is a method to protect data being written
* to or read from memory.
@@ -872,11 +938,37 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)
module_param_named(tmz, amdgpu_tmz, int, 0444);
/**
+ * DOC: freesync_video (uint)
+ * Enable the optimization to adjust front porch timing to achieve seamless
+ * mode change experience when setting a freesync supported mode for which full
+ * modeset is not needed.
+ *
+ * The Display Core will add a set of modes derived from the base FreeSync
+ * video mode into the corresponding connector's mode list based on commonly
+ * used refresh rates and VRR range of the connected display, when users enable
+ * this feature. From the userspace perspective, they can see a seamless mode
+ * change experience when the change between different refresh rates under the
+ * same resolution. Additionally, userspace applications such as Video playback
+ * can read this modeset list and change the refresh rate based on the video
+ * frame rate. Finally, the userspace can also derive an appropriate mode for a
+ * particular refresh rate based on the FreeSync Mode and add it to the
+ * connector's mode list.
+ *
+ * Note: This is an experimental feature.
+ *
+ * The default value: 0 (off).
+ */
+MODULE_PARM_DESC(
+ freesync_video,
+ "Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)");
+module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
+
+/**
* DOC: reset_method (int)
* GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
*/
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
-module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644);
/**
* DOC: bad_page_threshold (int) Bad page threshold is specifies the
@@ -884,7 +976,7 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0444);
* result in the GPU entering bad status when the number of total
* faulty pages by ECC exceeds the threshold value.
*/
-MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)");
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)");
module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
@@ -908,6 +1000,22 @@ MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)");
module_param_named(sg_display, amdgpu_sg_display, int, 0444);
/**
+ * DOC: umsch_mm (int)
+ * Enable Multi Media User Mode Scheduler. This is a HW scheduling engine for VCN and VPE.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(umsch_mm,
+ "Enable Multi Media User Mode Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444);
+
+/**
+ * DOC: umsch_mm_fwlog (int)
+ * Enable umschfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(umsch_mm_fwlog, "Enable umschfw log(0 = disable (default value), 1 = enable)");
+module_param_named(umsch_mm_fwlog, amdgpu_umsch_mm_fwlog, int, 0444);
+
+/**
* DOC: smu_pptable_id (int)
* Used to override pptable id. id = 0 use VBIOS pptable.
* id > 0 use the soft pptable with specicfied id.
@@ -932,11 +1040,92 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
/**
- * DOC: enforce_isolation (bool)
- * enforce process isolation between graphics and compute via using the same reserved vmid.
+ * DOC: enforce_isolation (int)
+ * enforce process isolation between graphics and compute.
+ * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)
*/
-module_param(enforce_isolation, bool, 0444);
-MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
+MODULE_PARM_DESC(enforce_isolation,
+"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)");
+
+/**
+ * DOC: modeset (int)
+ * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto).
+ */
+MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)");
+module_param_named(modeset, amdgpu_modeset, int, 0444);
+
+/**
+ * DOC: seamless (int)
+ * Seamless boot will keep the image on the screen during the boot process.
+ */
+MODULE_PARM_DESC(seamless, "Seamless boot (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(seamless, amdgpu_seamless, int, 0444);
+
+/**
+ * DOC: debug_mask (uint)
+ * Debug options for amdgpu, work as a binary mask with the following options:
+ *
+ * - 0x1: Debug VM handling
+ * - 0x2: Enable simulating large-bar capability on non-large bar system. This
+ * limits the VRAM size reported to ROCm applications to the visible
+ * size, usually 256MB.
+ * - 0x4: Disable GPU soft recovery, always do a full reset
+ * - 0x8: Use VRAM for firmware loading
+ * - 0x10: Enable ACA based RAS logging
+ * - 0x20: Enable experimental resets
+ * - 0x40: Disable ring resets
+ * - 0x80: Use VRAM for SMU pool
+ */
+MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
+module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
+
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture. This provides an aperture in the GPU's internal
+ * address space for direct access to system memory. Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial setting or
+ * P-state transition. However, there may be potential performance impact with this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported))
+ */
+MODULE_PARM_DESC(wbrf,
+ "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
+/**
+ * DOC: rebar (int)
+ * Allow BAR resizing. Disable this to prevent the driver from attempting
+ * to resize the BAR if the GPU supports it and there is available MMIO space.
+ * Note that this just prevents the driver from resizing the BAR. The BIOS
+ * may have already resized the BAR at boot time.
+ */
+MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(rebar, amdgpu_rebar, int, 0444);
+
+/**
+ * DOC: user_queue (int)
+ * Enable user queues on systems that support user queues. Possible values:
+ *
+ * - -1 = auto (ASIC specific default)
+ * - 0 = user queues disabled
+ * - 1 = user queues enabled and kernel queues enabled (if supported)
+ * - 2 = user queues enabled and kernel queues disabled
+ */
+MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
+module_param_named(user_queue, amdgpu_user_queue, int, 0444);
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
@@ -1651,7 +1840,6 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
};
static const struct pci_device_id pciidlist[] = {
-#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -1724,8 +1912,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
@@ -1808,7 +1994,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
-#endif
/* topaz */
{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
{0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
@@ -1987,6 +2172,11 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
/* CYAN_SKILLFISH */
+ {0x1002, 0x13DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13F9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+ {0x1002, 0x13FC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
{0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
{0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
@@ -2018,6 +2208,14 @@ static const struct pci_device_id pciidlist[] = {
MODULE_DEVICE_TABLE(pci, pciidlist);
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+ /* differentiate between P10 and P11 asics with the same DID */
+ {0x67FF, 0xE3, CHIP_POLARIS10},
+ {0x67FF, 0xE7, CHIP_POLARIS10},
+ {0x67FF, 0xF3, CHIP_POLARIS10},
+ {0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
static const struct drm_driver amdgpu_kms_driver;
static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
@@ -2042,6 +2240,73 @@ static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
}
}
+static void amdgpu_init_debug_options(struct amdgpu_device *adev)
+{
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM) {
+ pr_info("debug: VM handling debug enabled\n");
+ adev->debug_vm = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) {
+ pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
+ adev->debug_largebar = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+ pr_info("debug: soft reset for GPU recovery disabled\n");
+ adev->debug_disable_soft_recovery = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+ pr_info("debug: place fw in vram for frontdoor loading\n");
+ adev->debug_use_vram_fw_buf = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_RAS_ACA) {
+ pr_info("debug: enable RAS ACA\n");
+ adev->debug_enable_ras_aca = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_EXP_RESETS) {
+ pr_info("debug: enable experimental reset features\n");
+ adev->debug_exp_resets = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
+ pr_info("debug: ring reset disabled\n");
+ adev->debug_disable_gpu_ring_reset = true;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
+ pr_info("debug: use vram for smu pool\n");
+ adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) {
+ pr_info("debug: VM mode debug for userptr is enabled\n");
+ adev->debug_vm_userptr = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
+ pr_info("debug: disable kernel logs of correctable errors\n");
+ adev->debug_disable_ce_logs = true;
+ }
+}
+
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+ if (pdev->device == asic_type_quirks[i].device &&
+ pdev->revision == asic_type_quirks[i].revision) {
+ flags &= ~AMD_ASIC_MASK;
+ flags |= asic_type_quirks[i].type;
+ break;
+ }
+ }
+
+ return flags;
+}
+
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -2051,6 +2316,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
int ret, retry = 0, i;
bool supports_atomic = false;
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ if (drm_firmware_drivers_only() && amdgpu_modeset == -1)
+ return -EINVAL;
+ }
+
/* skip devices which are owned by radeon */
for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
if (amdgpu_unsupported_pciidlist[i] == pdev->device)
@@ -2061,7 +2332,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
amdgpu_aspm = 0;
if (amdgpu_virtual_display ||
- amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+ amdgpu_device_asic_has_dc_support(pdev, flags & AMD_ASIC_MASK))
supports_atomic = true;
if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
@@ -2069,15 +2340,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
"See modparam exp_hw_support\n");
return -ENODEV;
}
- /* differentiate between P10 and P11 asics with the same DID */
- if (pdev->device == 0x67FF &&
- (pdev->revision == 0xE3 ||
- pdev->revision == 0xE7 ||
- pdev->revision == 0xF3 ||
- pdev->revision == 0xF7)) {
- flags &= ~AMD_ASIC_MASK;
- flags |= CHIP_POLARIS10;
- }
+
+ flags = amdgpu_fix_asic_type(pdev, flags);
/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
* however, SME requires an indirect IOMMU mapping because the encryption
@@ -2090,14 +2354,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENOTSUPP;
}
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
+ if (!amdgpu_si_support) {
dev_info(&pdev->dev,
"SI support provided by radeon.\n");
dev_info(&pdev->dev,
@@ -2105,16 +2369,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
);
return -ENODEV;
}
- }
+ break;
+#else
+ dev_info(&pdev->dev, "amdgpu is built without SI support.\n");
+ return -ENODEV;
#endif
+ case CHIP_KAVERI:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
+ if (!amdgpu_cik_support) {
dev_info(&pdev->dev,
"CIK support provided by radeon.\n");
dev_info(&pdev->dev,
@@ -2122,8 +2388,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
);
return -ENODEV;
}
- }
+ break;
+#else
+ dev_info(&pdev->dev, "amdgpu is built without CIK support.\n");
+ return -ENODEV;
#endif
+ default:
+ break;
+ }
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
if (IS_ERR(adev))
@@ -2142,6 +2414,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, ddev);
+ amdgpu_init_debug_options(adev);
+
ret = amdgpu_driver_load_kms(adev, flags);
if (ret)
goto err_pci;
@@ -2161,17 +2435,25 @@ retry_init:
if (ret)
goto err_pci;
+ ret = amdgpu_amdkfd_drm_client_create(adev);
+ if (ret)
+ goto err_pci;
+
/*
* 1. don't init fbdev on hw without DCE
* 2. don't init fbdev if there are no connectors
*/
if (adev->mode_info.mode_config_initialized &&
!list_empty(&adev_to_drm(adev)->mode_config.connector_list)) {
+ const struct drm_format_info *format;
+
/* select 8 bpp console on low vram cards */
if (adev->gmc.real_vram_size <= (32*1024*1024))
- drm_fbdev_generic_setup(adev_to_drm(adev), 8);
+ format = drm_format_info(DRM_FORMAT_C8);
else
- drm_fbdev_generic_setup(adev_to_drm(adev), 32);
+ format = NULL;
+
+ drm_client_setup(adev_to_drm(adev), format);
}
ret = amdgpu_debugfs_init(adev);
@@ -2180,10 +2462,10 @@ retry_init:
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
/* only need to skip on ATPX */
- if (amdgpu_device_supports_px(ddev))
+ if (amdgpu_device_supports_px(adev))
dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
/* we want direct complete for BOCO */
- if (amdgpu_device_supports_boco(ddev))
+ if (amdgpu_device_supports_boco(adev))
dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE |
DPM_FLAG_SMART_SUSPEND |
DPM_FLAG_MAY_SKIP_RESUME);
@@ -2195,6 +2477,8 @@ retry_init:
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
+ pci_wake_from_d3(pdev, TRUE);
+
/*
* For runpm implemented via BACO, PMFW will handle the
* timing for BACO in and out:
@@ -2214,9 +2498,9 @@ retry_init:
* into D0 state. Then there will be a PMFW-aware D-state
* transition(D0->D3) on runpm suspend.
*/
- if (amdgpu_device_supports_baco(ddev) &&
+ if (amdgpu_device_supports_baco(adev) &&
!(adev->flags & AMD_IS_APU) &&
- (adev->asic_type >= CHIP_NAVI10))
+ adev->asic_type >= CHIP_NAVI10)
amdgpu_get_secondary_funcs(adev);
}
@@ -2233,7 +2517,9 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
+ amdgpu_ras_eeprom_check_and_recover(adev);
amdgpu_xcp_dev_unplug(adev);
+ amdgpu_gmc_prepare_nps_mode_change(adev);
drm_dev_unplug(dev);
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
@@ -2241,38 +2527,6 @@ amdgpu_pci_remove(struct pci_dev *pdev)
pm_runtime_forbid(dev->dev);
}
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
- !amdgpu_sriov_vf(adev)) {
- bool need_to_reset_gpu = false;
-
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- struct amdgpu_hive_info *hive;
-
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive->device_remove_count == 0)
- need_to_reset_gpu = true;
- hive->device_remove_count++;
- amdgpu_put_xgmi_hive(hive);
- } else {
- need_to_reset_gpu = true;
- }
-
- /* Workaround for ASICs need to reset SMU.
- * Called only when the first device is removed.
- */
- if (need_to_reset_gpu) {
- struct amdgpu_reset_context reset_context;
-
- adev->shutdown = true;
- memset(&reset_context, 0, sizeof(reset_context));
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags);
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
- }
- }
-
amdgpu_driver_unload_kms(dev);
/*
@@ -2293,6 +2547,10 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
if (amdgpu_ras_intr_triggered())
return;
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return;
+
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown.
* unfortunately we can't detect certain
@@ -2304,88 +2562,20 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
adev->mp1_state = PP_MP1_STATE_NONE;
}
-/**
- * amdgpu_drv_delayed_reset_work_handler - work handler for reset
- *
- * @work: work_struct.
- */
-static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
-{
- struct list_head device_list;
- struct amdgpu_device *adev;
- int i, r;
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- mutex_lock(&mgpu_info.mutex);
- if (mgpu_info.pending_reset == true) {
- mutex_unlock(&mgpu_info.mutex);
- return;
- }
- mgpu_info.pending_reset = true;
- mutex_unlock(&mgpu_info.mutex);
-
- /* Use a common context, just need to make sure full reset is done */
- reset_context.method = AMD_RESET_METHOD_NONE;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- reset_context.reset_req_dev = adev;
- r = amdgpu_device_pre_asic_reset(adev, &reset_context);
- if (r) {
- dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
- r, adev_to_drm(adev)->unique);
- }
- if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work))
- r = -EALREADY;
- }
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- flush_work(&adev->xgmi_reset_work);
- adev->gmc.xgmi.pending_reset = false;
- }
-
- /* reset function will rebuild the xgmi hive info , clear it now */
- for (i = 0; i < mgpu_info.num_dgpu; i++)
- amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev);
-
- INIT_LIST_HEAD(&device_list);
-
- for (i = 0; i < mgpu_info.num_dgpu; i++)
- list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list);
-
- /* unregister the GPU first, reset function will add them back */
- list_for_each_entry(adev, &device_list, reset_list)
- amdgpu_unregister_gpu_instance(adev);
-
- /* Use a common context, just need to make sure full reset is done */
- set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
- r = amdgpu_do_asic_reset(&device_list, &reset_context);
-
- if (r) {
- DRM_ERROR("reinit gpus failure");
- return;
- }
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- if (!adev->kfd.init_complete)
- amdgpu_amdkfd_device_init(adev);
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
- }
-}
-
static int amdgpu_pmops_prepare(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
+
/* Return a positive number here so
* DPM_FLAG_SMART_SUSPEND works properly
*/
- if (amdgpu_device_supports_boco(drm_dev))
- return pm_runtime_suspended(dev);
+ if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev))
+ return 1;
/* if we will not support s3 or s2i for the device
* then skip suspend
@@ -2394,12 +2584,12 @@ static int amdgpu_pmops_prepare(struct device *dev)
!amdgpu_acpi_is_s3_active(adev))
return 1;
- return 0;
+ return amdgpu_device_prepare(drm_dev);
}
static void amdgpu_pmops_complete(struct device *dev)
{
- /* nothing to do */
+ amdgpu_device_complete(dev_get_drvdata(dev));
}
static int amdgpu_pmops_suspend(struct device *dev)
@@ -2411,8 +2601,24 @@ static int amdgpu_pmops_suspend(struct device *dev)
adev->in_s0ix = true;
else if (amdgpu_acpi_is_s3_active(adev))
adev->in_s3 = true;
- if (!adev->in_s0ix && !adev->in_s3)
+ if (!adev->in_s0ix && !adev->in_s3) {
+#if IS_ENABLED(CONFIG_SUSPEND)
+ /* don't allow going deep first time followed by s2idle the next time */
+ if (adev->last_suspend_state != PM_SUSPEND_ON &&
+ adev->last_suspend_state != pm_suspend_target_state) {
+ drm_err_once(drm_dev, "Unsupported suspend state %d\n",
+ pm_suspend_target_state);
+ return -EINVAL;
+ }
+#endif
return 0;
+ }
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+ /* cache the state last used for suspend */
+ adev->last_suspend_state = pm_suspend_target_state;
+#endif
+
return amdgpu_device_suspend(drm_dev, true);
}
@@ -2454,9 +2660,7 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
- adev->in_s4 = true;
r = amdgpu_device_suspend(drm_dev, true);
- adev->in_s4 = false;
if (r)
return r;
@@ -2469,12 +2673,21 @@ static int amdgpu_pmops_thaw(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ /* do not resume device if it's normal hibernation */
+ if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend())
+ return 0;
+
return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_poweroff(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
return amdgpu_device_suspend(drm_dev, true);
}
@@ -2553,6 +2766,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev)
return 0;
}
+static int amdgpu_runtime_idle_check_userq(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0;
+
+ mutex_lock(&adev->userq_mutex);
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ ret = -EBUSY;
+ goto done;
+ }
+ }
+done:
+ mutex_unlock(&adev->userq_mutex);
+
+ return ret;
+}
+
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2568,6 +2804,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
ret = amdgpu_runtime_idle_check_display(dev);
if (ret)
return ret;
+ ret = amdgpu_runtime_idle_check_userq(dev);
+ if (ret)
+ return ret;
/* wait for all rings to drain before suspending */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -2581,7 +2820,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
}
adev->in_runpm = true;
- if (amdgpu_device_supports_px(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
/*
@@ -2591,21 +2830,24 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
* platforms.
* TODO: this may be also needed for PX capable platform.
*/
- if (amdgpu_device_supports_boco(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
adev->mp1_state = PP_MP1_STATE_UNLOAD;
+ ret = amdgpu_device_prepare(drm_dev);
+ if (ret)
+ return ret;
ret = amdgpu_device_suspend(drm_dev, false);
if (ret) {
adev->in_runpm = false;
- if (amdgpu_device_supports_boco(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
adev->mp1_state = PP_MP1_STATE_NONE;
return ret;
}
- if (amdgpu_device_supports_boco(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
adev->mp1_state = PP_MP1_STATE_NONE;
- if (amdgpu_device_supports_px(drm_dev)) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
@@ -2614,10 +2856,11 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
pci_ignore_hotplug(pdev);
pci_set_power_state(pdev, PCI_D3cold);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
- } else if (amdgpu_device_supports_boco(drm_dev)) {
+ } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* nothing to do */
- } else if (amdgpu_device_supports_baco(drm_dev)) {
- amdgpu_device_baco_enter(drm_dev);
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+ amdgpu_device_baco_enter(adev);
}
dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
@@ -2639,7 +2882,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
if (!pci_device_is_present(adev->pdev))
adev->no_hw_access = true;
- if (amdgpu_device_supports_px(drm_dev)) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
/* Only need to handle PCI state in the driver for ATPX
@@ -2651,22 +2894,23 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
if (ret)
return ret;
pci_set_master(pdev);
- } else if (amdgpu_device_supports_boco(drm_dev)) {
+ } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
pci_set_master(pdev);
- } else if (amdgpu_device_supports_baco(drm_dev)) {
- amdgpu_device_baco_exit(drm_dev);
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+ amdgpu_device_baco_exit(adev);
}
ret = amdgpu_device_resume(drm_dev, false);
if (ret) {
- if (amdgpu_device_supports_px(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
pci_disable_device(pdev);
return ret;
}
- if (amdgpu_device_supports_px(drm_dev))
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
adev->in_runpm = false;
return 0;
@@ -2676,8 +2920,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- /* we don't want the main rpm_idle to call suspend - we want to autosuspend */
- int ret = 1;
+ int ret;
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
@@ -2685,12 +2928,33 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
}
ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ goto done;
+ ret = amdgpu_runtime_idle_check_userq(dev);
+done:
pm_runtime_mark_last_busy(dev);
pm_runtime_autosuspend(dev);
return ret;
}
+static int amdgpu_drm_release(struct inode *inode, struct file *filp)
+{
+ struct drm_file *file_priv = filp->private_data;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+ struct drm_device *dev = file_priv->minor->dev;
+ int idx;
+
+ if (fpriv && drm_dev_enter(dev, &idx)) {
+ fpriv->evf_mgr.fd_closing = true;
+ amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+ amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+ drm_dev_exit(idx);
+ }
+
+ return drm_release(inode, filp);
+}
+
long amdgpu_drm_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg)
{
@@ -2712,15 +2976,15 @@ out:
}
static const struct dev_pm_ops amdgpu_pm_ops = {
- .prepare = amdgpu_pmops_prepare,
- .complete = amdgpu_pmops_complete,
- .suspend = amdgpu_pmops_suspend,
- .suspend_noirq = amdgpu_pmops_suspend_noirq,
- .resume = amdgpu_pmops_resume,
- .freeze = amdgpu_pmops_freeze,
- .thaw = amdgpu_pmops_thaw,
- .poweroff = amdgpu_pmops_poweroff,
- .restore = amdgpu_pmops_restore,
+ .prepare = pm_sleep_ptr(amdgpu_pmops_prepare),
+ .complete = pm_sleep_ptr(amdgpu_pmops_complete),
+ .suspend = pm_sleep_ptr(amdgpu_pmops_suspend),
+ .suspend_noirq = pm_sleep_ptr(amdgpu_pmops_suspend_noirq),
+ .resume = pm_sleep_ptr(amdgpu_pmops_resume),
+ .freeze = pm_sleep_ptr(amdgpu_pmops_freeze),
+ .thaw = pm_sleep_ptr(amdgpu_pmops_thaw),
+ .poweroff = pm_sleep_ptr(amdgpu_pmops_poweroff),
+ .restore = pm_sleep_ptr(amdgpu_pmops_restore),
.runtime_suspend = amdgpu_pmops_runtime_suspend,
.runtime_resume = amdgpu_pmops_runtime_resume,
.runtime_idle = amdgpu_pmops_runtime_idle,
@@ -2742,7 +3006,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.flush = amdgpu_flush,
- .release = drm_release,
+ .release = amdgpu_drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
.mmap = drm_gem_mmap,
.poll = drm_poll,
@@ -2753,6 +3017,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = drm_show_fdinfo,
#endif
+ .fop_flags = FOP_UNSIGNED_OFFSET,
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
@@ -2788,6 +3053,10 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
static const struct drm_driver amdgpu_kms_driver = {
@@ -2798,11 +3067,11 @@ static const struct drm_driver amdgpu_kms_driver = {
DRIVER_SYNCOBJ_TIMELINE,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
- .lastclose = amdgpu_driver_lastclose_kms,
.ioctls = amdgpu_ioctls_kms,
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
.fops = &amdgpu_driver_kms_fops,
.release = &amdgpu_driver_release_kms,
#ifdef CONFIG_PROC_FS
@@ -2813,7 +3082,6 @@ static const struct drm_driver amdgpu_kms_driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
@@ -2825,11 +3093,11 @@ const struct drm_driver amdgpu_partition_driver = {
DRIVER_SYNCOBJ_TIMELINE,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
- .lastclose = amdgpu_driver_lastclose_kms,
.ioctls = amdgpu_ioctls_kms,
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
.fops = &amdgpu_driver_kms_fops,
.release = &amdgpu_driver_release_kms,
@@ -2837,7 +3105,6 @@ const struct drm_driver amdgpu_partition_driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
@@ -2863,7 +3130,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
.probe = amdgpu_pci_probe,
.remove = amdgpu_pci_remove,
.shutdown = amdgpu_pci_shutdown,
- .driver.pm = &amdgpu_pm_ops,
+ .driver.pm = pm_ptr(&amdgpu_pm_ops),
.err_handler = &amdgpu_pci_err_handler,
.dev_groups = amdgpu_sysfs_groups,
};
@@ -2872,14 +3139,11 @@ static int __init amdgpu_init(void)
{
int r;
- if (drm_firmware_drivers_only())
- return -EINVAL;
-
r = amdgpu_sync_init();
if (r)
goto error_sync;
- r = amdgpu_fence_slab_init();
+ r = amdgpu_userq_fence_slab_init();
if (r)
goto error_fence;
@@ -2890,6 +3154,12 @@ static int __init amdgpu_init(void)
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
+ if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ pr_crit("Overdrive is enabled, please disable it before "
+ "reporting any bugs unrelated to overdrive.\n");
+ }
+
/* let modprobe override vga console setting */
return pci_register_driver(&amdgpu_kms_pci_driver);
@@ -2907,7 +3177,7 @@ static void __exit amdgpu_exit(void)
amdgpu_unregister_atpx_handler();
amdgpu_acpi_release();
amdgpu_sync_fini();
- amdgpu_fence_slab_fini();
+ amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
amdgpu_xcp_drv_release();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
index 5bc2cb661af7..2d86cc6f7f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -40,7 +40,6 @@
#define DRIVER_NAME "amdgpu"
#define DRIVER_DESC "AMD GPU"
-#define DRIVER_DATE "20150101"
extern const struct drm_driver amdgpu_partition_driver;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
index e71768661ca8..8cd69836dd99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
@@ -90,7 +90,7 @@
#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))
static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
- u8 *eeprom_buf, u16 buf_size, bool read)
+ u8 *eeprom_buf, u32 buf_size, bool read)
{
u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE];
struct i2c_msg msgs[] = {
@@ -133,15 +133,15 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
* cycle begins. This is implied for the
* "i2c_transfer()" abstraction.
*/
- len = min(EEPROM_PAGE_SIZE - (eeprom_addr &
- EEPROM_PAGE_MASK),
- (u32)buf_size);
+ len = min(EEPROM_PAGE_SIZE - (eeprom_addr & EEPROM_PAGE_MASK),
+ buf_size);
} else {
/* Reading from the EEPROM has no limitation
* on the number of bytes read from the EEPROM
* device--they are simply sequenced out.
+ * Keep in mind that i2c_msg.len is u16 type.
*/
- len = buf_size;
+ len = min(U16_MAX, buf_size);
}
msgs[1].len = len;
msgs[1].buf = eeprom_buf;
@@ -179,7 +179,7 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
* Returns the number of bytes read/written; -errno on error.
*/
static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
- u8 *eeprom_buf, u16 buf_size, bool read)
+ u8 *eeprom_buf, u32 buf_size, bool read)
{
const struct i2c_adapter_quirks *quirks = i2c_adap->quirks;
u16 limit;
@@ -200,7 +200,7 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
dev_err_ratelimited(&i2c_adap->dev,
"maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d",
eeprom_addr, buf_size,
- read ? "read" : "write", EEPROM_OFFSET_SIZE);
+ str_read_write(read), EEPROM_OFFSET_SIZE);
return -EINVAL;
}
@@ -225,7 +225,7 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes)
+ u32 bytes)
{
return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
true);
@@ -233,7 +233,7 @@ int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes)
+ u32 bytes)
{
return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
index 6935adb2be1f..8083b8253ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
@@ -28,10 +28,10 @@
int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes);
+ u32 bytes);
int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
- u16 bytes);
+ u32 bytes);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
new file mode 100644
index 000000000000..23d7d0b0d625
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/sched.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+
+#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
+#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
+
+static const char *
+amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu_eviction_fence";
+}
+
+static const char *
+amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence *ef;
+
+ ef = container_of(f, struct amdgpu_eviction_fence, base);
+ return ef->timeline_name;
+}
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec)
+{
+ struct amdgpu_eviction_fence *old_ef, *new_ef;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ if (evf_mgr->ev_fence &&
+ !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
+ return 0;
+ /*
+ * Steps to replace eviction fence:
+ * * lock all objects in exec (caller)
+ * * create a new eviction fence
+ * * update new eviction fence in evf_mgr
+ * * attach the new eviction fence to BOs
+ * * release the old fence
+ * * unlock the objects (caller)
+ */
+ new_ef = amdgpu_eviction_fence_create(evf_mgr);
+ if (!new_ef) {
+ DRM_ERROR("Failed to create new eviction fence\n");
+ return -ENOMEM;
+ }
+
+ /* Update the eviction fence now */
+ spin_lock(&evf_mgr->ev_fence_lock);
+ old_ef = evf_mgr->ev_fence;
+ evf_mgr->ev_fence = new_ef;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ /* Attach the new fence */
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ if (!bo)
+ continue;
+ ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
+ if (ret) {
+ DRM_ERROR("Failed to attch new eviction fence\n");
+ goto free_err;
+ }
+ }
+
+ /* Free old fence */
+ if (old_ef)
+ dma_fence_put(&old_ef->base);
+ return 0;
+
+free_err:
+ kfree(new_ef);
+ return ret;
+}
+
+static void
+amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
+ struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_fence_get(&ev_fence->base);
+ else
+ goto unlock;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ amdgpu_userq_evict(uq_mgr, ev_fence);
+
+ mutex_unlock(&uq_mgr->userq_mutex);
+ dma_fence_put(&ev_fence->base);
+ return;
+
+unlock:
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ if (!f)
+ return true;
+
+ ev_fence = to_ev_fence(f);
+ evf_mgr = ev_fence->evf_mgr;
+
+ schedule_delayed_work(&evf_mgr->suspend_work, 0);
+ return true;
+}
+
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+ .get_driver_name = amdgpu_eviction_fence_get_driver_name,
+ .get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+ .enable_signaling = amdgpu_eviction_fence_enable_signaling,
+};
+
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ spin_lock(&evf_mgr->ev_fence_lock);
+ dma_fence_signal(&ev_fence->base);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+}
+
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
+ if (!ev_fence)
+ return NULL;
+
+ ev_fence->evf_mgr = evf_mgr;
+ get_task_comm(ev_fence->timeline_name, current);
+ spin_lock_init(&ev_fence->lock);
+ dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
+ &ev_fence->lock, evf_mgr->ev_fence_ctx,
+ atomic_inc_return(&evf_mgr->ev_fence_seq));
+ return ev_fence;
+}
+
+void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ /* Wait for any pending work to execute */
+ flush_delayed_work(&evf_mgr->suspend_work);
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ if (!ev_fence)
+ return;
+
+ dma_fence_wait(&ev_fence->base, false);
+
+ /* Last unref of ev_fence */
+ dma_fence_put(&ev_fence->base);
+}
+
+int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ int ret;
+
+ if (!resv)
+ return 0;
+
+ ret = dma_resv_reserve_fences(resv, 1);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to resv fence space\n");
+ return ret;
+ }
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ return 0;
+}
+
+void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct dma_fence *stub = dma_fence_get_stub();
+
+ dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
+ stub, DMA_RESV_USAGE_BOOKKEEP);
+ dma_fence_put(stub);
+}
+
+int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ /* This needs to be done one time per open */
+ atomic_set(&evf_mgr->ev_fence_seq, 0);
+ evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
+ spin_lock_init(&evf_mgr->ev_fence_lock);
+
+ INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
new file mode 100644
index 000000000000..fcd867b7147d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_EV_FENCE_H_
+#define AMDGPU_EV_FENCE_H_
+
+struct amdgpu_eviction_fence {
+ struct dma_fence base;
+ spinlock_t lock;
+ char timeline_name[TASK_COMM_LEN];
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+};
+
+struct amdgpu_eviction_fence_mgr {
+ u64 ev_fence_ctx;
+ atomic_t ev_fence_seq;
+ spinlock_t ev_fence_lock;
+ struct amdgpu_eviction_fence *ev_fence;
+ struct delayed_work suspend_work;
+ uint8_t fd_closing;
+};
+
+/* Eviction fence helper functions */
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+int
+amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+void
+amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+int
+amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index 6038b5021b27..b349bb3676d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@@ -51,33 +52,29 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_DEC] = "dec",
[AMDGPU_HW_IP_VCN_ENC] = "enc",
[AMDGPU_HW_IP_VCN_JPEG] = "jpeg",
+ [AMDGPU_HW_IP_VPE] = "vpe",
};
void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
{
- struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_mem_stats stats;
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
ktime_t usage[AMDGPU_HW_IP_NUM];
- uint32_t bus, dev, fn, domain;
- unsigned int hw_ip;
- int ret;
-
- memset(&stats, 0, sizeof(stats));
- bus = adev->pdev->bus->number;
- domain = pci_domain_nr(adev->pdev->bus);
- dev = PCI_SLOT(adev->pdev->devfn);
- fn = PCI_FUNC(adev->pdev->devfn);
-
- ret = amdgpu_bo_reserve(vm->root.bo, false);
- if (ret)
- return;
-
- amdgpu_vm_get_memory(vm, &stats);
- amdgpu_bo_unreserve(vm->root.bo);
+ const char *pl_name[] = {
+ [TTM_PL_VRAM] = "vram",
+ [TTM_PL_TT] = "gtt",
+ [TTM_PL_SYSTEM] = "cpu",
+ [AMDGPU_PL_GDS] = "gds",
+ [AMDGPU_PL_GWS] = "gws",
+ [AMDGPU_PL_OA] = "oa",
+ [AMDGPU_PL_DOORBELL] = "doorbell",
+ [AMDGPU_PL_MMIO_REMAP] = "mmioremap",
+ };
+ unsigned int hw_ip, i;
+ amdgpu_vm_get_memory(vm, stats);
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
/*
@@ -87,24 +84,36 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
*/
drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
- drm_printf(p, "drm-driver:\t%s\n", file->minor->dev->driver->name);
- drm_printf(p, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn);
- drm_printf(p, "drm-client-id:\t%llu\n", vm->immediate.fence_context);
- drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
- drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
- drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
- drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n",
- stats.visible_vram/1024UL);
+
+ for (i = 0; i < ARRAY_SIZE(pl_name); i++) {
+ if (!pl_name[i])
+ continue;
+
+ drm_print_memory_stats(p,
+ &stats[i].drm,
+ DRM_GEM_OBJECT_RESIDENT |
+ DRM_GEM_OBJECT_PURGEABLE,
+ pl_name[i]);
+ }
+
+ /* Legacy amdgpu keys, alias to drm-resident-memory-: */
+ drm_printf(p, "drm-memory-vram:\t%llu KiB\n",
+ stats[TTM_PL_VRAM].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-gtt: \t%llu KiB\n",
+ stats[TTM_PL_TT].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-cpu: \t%llu KiB\n",
+ stats[TTM_PL_SYSTEM].drm.resident/1024UL);
+
+ /* Amdgpu specific memory accounting keys: */
drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
- stats.evicted_vram/1024UL);
- drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n",
- stats.evicted_visible_vram/1024UL);
+ stats[TTM_PL_VRAM].evicted/1024UL);
drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
- stats.requested_vram/1024UL);
- drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n",
- stats.requested_visible_vram/1024UL);
+ (stats[TTM_PL_VRAM].drm.shared +
+ stats[TTM_PL_VRAM].drm.private) / 1024UL);
drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
- stats.requested_gtt/1024UL);
+ (stats[TTM_PL_TT].drm.shared +
+ stats[TTM_PL_TT].drm.private) / 1024UL);
+
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
if (!usage[hw_ip])
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7537f5aa76f0..fd8cca241da6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -42,39 +42,6 @@
#include "amdgpu_reset.h"
/*
- * Fences mark an event in the GPUs pipeline and are used
- * for GPU/CPU synchronization. When the fence is written,
- * it is expected that all buffers associated with that fence
- * are no longer in use by the associated ring on the GPU and
- * that the relevant GPU caches have been flushed.
- */
-
-struct amdgpu_fence {
- struct dma_fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
- ktime_t start_timestamp;
-};
-
-static struct kmem_cache *amdgpu_fence_slab;
-
-int amdgpu_fence_slab_init(void)
-{
- amdgpu_fence_slab = kmem_cache_create(
- "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!amdgpu_fence_slab)
- return -ENOMEM;
- return 0;
-}
-
-void amdgpu_fence_slab_fini(void)
-{
- rcu_barrier();
- kmem_cache_destroy(amdgpu_fence_slab);
-}
-/*
* Cast helper
*/
static const struct dma_fence_ops amdgpu_fence_ops;
@@ -132,14 +99,14 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
*
* @ring: ring the fence is associated with
* @f: resulting fence object
- * @job: job the fence is embedded in
+ * @af: amdgpu fence input
* @flags: flags to pass into the subordinate .emit_fence() call
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job,
- unsigned int flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+ struct amdgpu_fence *af, unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
@@ -148,40 +115,34 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
uint32_t seq;
int r;
- if (job == NULL) {
- /* create a sperate hw fence */
- am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
- if (am_fence == NULL)
+ if (!af) {
+ /* create a separate hw fence */
+ am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
+ if (!am_fence)
return -ENOMEM;
- fence = &am_fence->base;
- am_fence->ring = ring;
} else {
- /* take use of job-embedded fence */
- fence = &job->hw_fence;
+ am_fence = af;
}
+ fence = &am_fence->base;
+ am_fence->ring = ring;
seq = ++ring->fence_drv.sync_seq;
- if (job && job->job_run_counter) {
- /* reinit seq for resubmitted jobs */
- fence->seqno = seq;
- /* TO be inline with external fence creation and other drivers */
+ am_fence->seq = seq;
+ if (af) {
+ dma_fence_init(fence, &amdgpu_job_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx, seq);
+ /* Against remove in amdgpu_job_{free, free_cb} */
dma_fence_get(fence);
} else {
- if (job) {
- dma_fence_init(fence, &amdgpu_job_fence_ops,
- &ring->fence_drv.lock,
- adev->fence_context + ring->idx, seq);
- /* Against remove in amdgpu_job_{free, free_cb} */
- dma_fence_get(fence);
- } else {
- dma_fence_init(fence, &amdgpu_fence_ops,
- &ring->fence_drv.lock,
- adev->fence_context + ring->idx, seq);
- }
+ dma_fence_init(fence, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx, seq);
}
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
+ amdgpu_fence_save_wptr(fence);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -282,7 +243,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (del_timer(&ring->fence_drv.fallback_timer) &&
+ if (timer_delete(&ring->fence_drv.fallback_timer) &&
seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
@@ -294,6 +255,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
do {
struct dma_fence *fence, **ptr;
+ struct amdgpu_fence *am_fence;
++last_seq;
last_seq &= drv->num_fences_mask;
@@ -306,6 +268,12 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
if (!fence)
continue;
+ /* Save the wptr in the fence driver so we know what the last processed
+ * wptr was. This is required for re-emitting the ring state for
+ * queues that are reset but are not guilty and thus have no guilty fence.
+ */
+ am_fence = container_of(fence, struct amdgpu_fence, base);
+ drv->signalled_wptr = am_fence->wptr;
dma_fence_signal(fence);
dma_fence_put(fence);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -324,11 +292,13 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
- struct amdgpu_ring *ring = from_timer(ring, t,
- fence_drv.fallback_timer);
+ struct amdgpu_ring *ring = timer_container_of(ring, t,
+ fence_drv.fallback_timer);
if (amdgpu_fence_process(ring))
- DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
+ dev_warn(ring->adev->dev,
+ "Fence fallback timer expired on ring %s\n",
+ ring->name);
}
/**
@@ -570,7 +540,8 @@ static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_SDMA:
/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
- if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0))
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 0, 0))
is_gfx_power_domain = true;
break;
case AMDGPU_RING_TYPE_GFX:
@@ -619,7 +590,7 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
- del_timer_sync(&ring->fence_drv.fallback_timer);
+ timer_delete_sync(&ring->fence_drv.fallback_timer);
}
}
@@ -719,7 +690,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
* it right here or we won't be able to track them in fence_drv
* and they will remain unsignaled during sa_bo free.
*/
- job = container_of(old, struct amdgpu_job, hw_fence);
+ job = container_of(old, struct amdgpu_job, hw_fence.base);
if (!job->base.s_fence && !dma_fence_is_signaled(old))
dma_fence_signal(old);
RCU_INIT_POINTER(*ptr, NULL);
@@ -765,6 +736,86 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
amdgpu_fence_process(ring);
}
+
+/*
+ * Kernel queue reset handling
+ *
+ * The driver can reset individual queues for most engines, but those queues
+ * may contain work from multiple contexts. Resetting the queue will reset
+ * lose all of that state. In order to minimize the collateral damage, the
+ * driver will save the ring contents which are not associated with the guilty
+ * context prior to resetting the queue. After resetting the queue the queue
+ * contents from the other contexts is re-emitted to the rings so that it can
+ * be processed by the engine. To handle this, we save the queue's write
+ * pointer (wptr) in the fences associated with each context. If we get a
+ * queue timeout, we can then use the wptrs from the fences to determine
+ * which data needs to be saved out of the queue's ring buffer.
+ */
+
+/**
+ * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
+ *
+ * @fence: fence of the ring to signal
+ *
+ */
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence)
+{
+ dma_fence_set_error(&fence->base, -ETIME);
+ amdgpu_fence_write(fence->ring, fence->seq);
+ amdgpu_fence_process(fence->ring);
+}
+
+void amdgpu_fence_save_wptr(struct dma_fence *fence)
+{
+ struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base);
+
+ am_fence->wptr = am_fence->ring->wptr;
+}
+
+static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
+ u64 start_wptr, u32 end_wptr)
+{
+ unsigned int first_idx = start_wptr & ring->buf_mask;
+ unsigned int last_idx = end_wptr & ring->buf_mask;
+ unsigned int i;
+
+ /* Backup the contents of the ring buffer. */
+ for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+ ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
+}
+
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ u64 wptr, i, seqno;
+
+ seqno = amdgpu_fence_read(ring);
+ wptr = ring->fence_drv.signalled_wptr;
+ ring->ring_backup_entries_to_copy = 0;
+
+ for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) {
+ ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ /* save everything if the ring is not guilty, otherwise
+ * just save the content from other contexts.
+ */
+ if (!guilty_fence || (fence->context != guilty_fence->context))
+ amdgpu_ring_backup_unprocessed_command(ring, wptr,
+ fence->wptr);
+ wptr = fence->wptr;
+ }
+ rcu_read_unlock();
+ }
+}
+
/*
* Common fence implementation
*/
@@ -781,7 +832,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
return (const char *)to_amdgpu_ring(job->base.sched)->name;
}
@@ -811,7 +862,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
*/
static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
@@ -831,7 +882,7 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
/* free fence_slab if it's separated fence*/
- kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f));
+ kfree(to_amdgpu_fence(f));
}
/**
@@ -846,7 +897,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu)
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
/* free job if fence has a parent job */
- kfree(container_of(f, struct amdgpu_job, hw_fence));
+ kfree(container_of(f, struct amdgpu_job, hw_fence.base));
}
/**
@@ -979,7 +1030,9 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USER;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 9c66d98af6d8..b0082aa7f3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -31,6 +31,7 @@
#define FRU_EEPROM_MADDR_6 0x60000
#define FRU_EEPROM_MADDR_8 0x80000
+#define FRU_EEPROM_MADDR_INV 0xFFFFF
static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
{
@@ -42,8 +43,9 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
/* The i2c access is blocked on VF
* TODO: Need other way to get the info
+ * Also, FRU not valid for APU devices.
*/
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
return false;
/* The default I2C EEPROM address of the FRU.
@@ -57,29 +59,28 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
* for ease/speed/readability. For now, 2 string comparisons are
* reasonable and not too expensive
*/
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- /* D161 and D163 are the VG20 server SKUs */
- if (strnstr(atom_ctx->vbios_pn, "D161",
- sizeof(atom_ctx->vbios_pn)) ||
- strnstr(atom_ctx->vbios_pn, "D163",
- sizeof(atom_ctx->vbios_pn))) {
- if (fru_addr)
- *fru_addr = FRU_EEPROM_MADDR_6;
- return true;
- } else {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(11, 0, 2):
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ /* D161 and D163 are the VG20 server SKUs */
+ if (atom_ctx && (strnstr(atom_ctx->vbios_pn, "D161",
+ sizeof(atom_ctx->vbios_pn)) ||
+ strnstr(atom_ctx->vbios_pn, "D163",
+ sizeof(atom_ctx->vbios_pn)))) {
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ } else {
+ return false;
+ }
+ case CHIP_ARCTURUS:
+ default:
return false;
}
- case CHIP_ALDEBARAN:
- /* All Aldebaran SKUs have an FRU */
- if (!strnstr(atom_ctx->vbios_pn, "D673",
- sizeof(atom_ctx->vbios_pn)))
- if (fru_addr)
- *fru_addr = FRU_EEPROM_MADDR_6;
- return true;
- case CHIP_SIENNA_CICHLID:
- if (strnstr(atom_ctx->vbios_pn, "D603",
- sizeof(atom_ctx->vbios_pn))) {
+ case IP_VERSION(11, 0, 7):
+ if (atom_ctx && strnstr(atom_ctx->vbios_pn, "D603",
+ sizeof(atom_ctx->vbios_pn))) {
if (strnstr(atom_ctx->vbios_pn, "D603GLXE",
sizeof(atom_ctx->vbios_pn))) {
return false;
@@ -92,6 +93,22 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
} else {
return false;
}
+ case IP_VERSION(13, 0, 2):
+ /* All Aldebaran SKUs have an FRU */
+ if (atom_ctx && !strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_6;
+ return true;
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_8;
+ return true;
+ case IP_VERSION(13, 0, 12):
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_INV;
+ return true;
default:
return false;
}
@@ -99,6 +116,7 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
{
+ struct amdgpu_fru_info *fru_info;
unsigned char buf[8], *pia;
u32 addr, fru_addr;
int size, len;
@@ -107,9 +125,27 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
if (!is_fru_eeprom_supported(adev, &fru_addr))
return 0;
+ /* FRU data avaialble, but no direct EEPROM access */
+ if (fru_addr == FRU_EEPROM_MADDR_INV)
+ return 0;
+
+ if (!adev->fru_info) {
+ adev->fru_info = kzalloc(sizeof(*adev->fru_info), GFP_KERNEL);
+ if (!adev->fru_info)
+ return -ENOMEM;
+ }
+
+ fru_info = adev->fru_info;
+ /* For Arcturus-and-later, default value of serial_number is unique_id
+ * so convert it to a 16-digit HEX string for convenience and
+ * backwards-compatibility.
+ */
+ sprintf(fru_info->serial, "%llx", adev->unique_id);
+
/* If algo exists, it means that the i2c_adapter's initialized */
if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) {
- DRM_WARN("Cannot access FRU, EEPROM accessor not initialized");
+ dev_warn(adev->dev,
+ "Cannot access FRU, EEPROM accessor not initialized");
return -ENODEV;
}
@@ -117,19 +153,22 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf,
sizeof(buf));
if (len != 8) {
- DRM_ERROR("Couldn't read the IPMI Common Header: %d", len);
+ dev_err(adev->dev, "Couldn't read the IPMI Common Header: %d",
+ len);
return len < 0 ? len : -EIO;
}
if (buf[0] != 1) {
- DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]);
+ dev_err(adev->dev, "Bad IPMI Common Header version: 0x%02x",
+ buf[0]);
return -EIO;
}
for (csum = 0; len > 0; len--)
csum += buf[len - 1];
if (csum) {
- DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum);
+ dev_err(adev->dev, "Bad IPMI Common Header checksum: 0x%02x",
+ csum);
return -EIO;
}
@@ -144,12 +183,14 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
/* Read the header of the PIA. */
len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3);
if (len != 3) {
- DRM_ERROR("Couldn't read the Product Info Area header: %d", len);
+ dev_err(adev->dev,
+ "Couldn't read the Product Info Area header: %d", len);
return len < 0 ? len : -EIO;
}
if (buf[0] != 1) {
- DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]);
+ dev_err(adev->dev, "Bad IPMI Product Info Area version: 0x%02x",
+ buf[0]);
return -EIO;
}
@@ -162,40 +203,49 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size);
if (len != size) {
kfree(pia);
- DRM_ERROR("Couldn't read the Product Info Area: %d", len);
+ dev_err(adev->dev, "Couldn't read the Product Info Area: %d",
+ len);
return len < 0 ? len : -EIO;
}
for (csum = 0; size > 0; size--)
csum += pia[size - 1];
if (csum) {
- DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum);
+ dev_err(adev->dev, "Bad Product Info Area checksum: 0x%02x",
+ csum);
+ kfree(pia);
return -EIO;
}
/* Now extract useful information from the PIA.
*
- * Skip the Manufacturer Name at [3] and go directly to
- * the Product Name field.
+ * Read Manufacturer Name field whose length is [3].
*/
- addr = 3 + 1 + (pia[3] & 0x3F);
+ addr = 3;
if (addr + 1 >= len)
goto Out;
- memcpy(adev->product_name, pia + addr + 1,
- min_t(size_t,
- sizeof(adev->product_name),
+ memcpy(fru_info->manufacturer_name, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->manufacturer_name),
pia[addr] & 0x3F));
- adev->product_name[sizeof(adev->product_name) - 1] = '\0';
+ fru_info->manufacturer_name[sizeof(fru_info->manufacturer_name) - 1] =
+ '\0';
+
+ /* Read Product Name field. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if (addr + 1 >= len)
+ goto Out;
+ memcpy(fru_info->product_name, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->product_name), pia[addr] & 0x3F));
+ fru_info->product_name[sizeof(fru_info->product_name) - 1] = '\0';
/* Go to the Product Part/Model Number field. */
addr += 1 + (pia[addr] & 0x3F);
if (addr + 1 >= len)
goto Out;
- memcpy(adev->product_number, pia + addr + 1,
- min_t(size_t,
- sizeof(adev->product_number),
+ memcpy(fru_info->product_number, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->product_number),
pia[addr] & 0x3F));
- adev->product_number[sizeof(adev->product_number) - 1] = '\0';
+ fru_info->product_number[sizeof(fru_info->product_number) - 1] = '\0';
/* Go to the Product Version field. */
addr += 1 + (pia[addr] & 0x3F);
@@ -204,10 +254,21 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
addr += 1 + (pia[addr] & 0x3F);
if (addr + 1 >= len)
goto Out;
- memcpy(adev->serial, pia + addr + 1, min_t(size_t,
- sizeof(adev->serial),
- pia[addr] & 0x3F));
- adev->serial[sizeof(adev->serial) - 1] = '\0';
+ memcpy(fru_info->serial, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->serial), pia[addr] & 0x3F));
+ fru_info->serial[sizeof(fru_info->serial) - 1] = '\0';
+
+ /* Asset Tag field */
+ addr += 1 + (pia[addr] & 0x3F);
+
+ /* FRU File Id field. This could be 'null'. */
+ addr += 1 + (pia[addr] & 0x3F);
+ if ((addr + 1 >= len) || !(pia[addr] & 0x3F))
+ goto Out;
+ memcpy(fru_info->fru_id, pia + addr + 1,
+ min_t(size_t, sizeof(fru_info->fru_id), pia[addr] & 0x3F));
+ fru_info->fru_id[sizeof(fru_info->fru_id) - 1] = '\0';
+
Out:
kfree(pia);
return 0;
@@ -230,7 +291,7 @@ static ssize_t amdgpu_fru_product_name_show(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return sysfs_emit(buf, "%s\n", adev->product_name);
+ return sysfs_emit(buf, "%s\n", adev->fru_info->product_name);
}
static DEVICE_ATTR(product_name, 0444, amdgpu_fru_product_name_show, NULL);
@@ -252,7 +313,7 @@ static ssize_t amdgpu_fru_product_number_show(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return sysfs_emit(buf, "%s\n", adev->product_number);
+ return sysfs_emit(buf, "%s\n", adev->fru_info->product_number);
}
static DEVICE_ATTR(product_number, 0444, amdgpu_fru_product_number_show, NULL);
@@ -274,21 +335,65 @@ static ssize_t amdgpu_fru_serial_number_show(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- return sysfs_emit(buf, "%s\n", adev->serial);
+ return sysfs_emit(buf, "%s\n", adev->fru_info->serial);
}
static DEVICE_ATTR(serial_number, 0444, amdgpu_fru_serial_number_show, NULL);
+/**
+ * DOC: fru_id
+ *
+ * The amdgpu driver provides a sysfs API for reporting FRU File Id
+ * for the device.
+ * The file fru_id is used for this and returns the File Id value
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->fru_id);
+}
+
+static DEVICE_ATTR(fru_id, 0444, amdgpu_fru_id_show, NULL);
+
+/**
+ * DOC: manufacturer
+ *
+ * The amdgpu driver provides a sysfs API for reporting manufacturer name from
+ * FRU information.
+ * The file manufacturer returns the value as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_manufacturer_name_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%s\n", adev->fru_info->manufacturer_name);
+}
+
+static DEVICE_ATTR(manufacturer, 0444, amdgpu_fru_manufacturer_name_show, NULL);
+
static const struct attribute *amdgpu_fru_attributes[] = {
&dev_attr_product_name.attr,
&dev_attr_product_number.attr,
&dev_attr_serial_number.attr,
+ &dev_attr_fru_id.attr,
+ &dev_attr_manufacturer.attr,
NULL
};
int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
{
- if (!is_fru_eeprom_supported(adev, NULL))
+ if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info)
return 0;
return sysfs_create_files(&adev->dev->kobj, amdgpu_fru_attributes);
@@ -296,7 +401,7 @@ int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
{
- if (!is_fru_eeprom_supported(adev, NULL))
+ if (!adev->fru_info)
return;
sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
index c817db17cfa7..98f3196599ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -24,6 +24,17 @@
#ifndef __AMDGPU_FRU_EEPROM_H__
#define __AMDGPU_FRU_EEPROM_H__
+#define AMDGPU_PRODUCT_NAME_LEN 64
+
+/* FRU product information */
+struct amdgpu_fru_info {
+ char product_number[20];
+ char product_name[AMDGPU_PRODUCT_NAME_LEN];
+ char serial[20];
+ char manufacturer_name[32];
+ char fru_id[50];
+};
+
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
int amdgpu_fru_sysfs_init(struct amdgpu_device *adev);
void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
index 2d4b67175b55..328a1b963548 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -122,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3))
+ return 0;
+
if (adev->asic_type >= CHIP_SIENNA_CICHLID)
return 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 73b8cca35bab..b2033f8352f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,7 @@
#include <asm/set_memory.h>
#endif
#include "amdgpu.h"
+#include "amdgpu_reset.h"
#include <drm/drm_drv.h>
#include <drm/ttm/ttm_tt.h>
@@ -77,8 +78,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
if (adev->dummy_page_addr)
return 0;
- adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) {
dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
adev->dummy_page_addr = 0;
@@ -98,8 +100,9 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
{
if (!adev->dummy_page_addr)
return;
- dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
+ dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
adev->dummy_page_addr = 0;
}
@@ -121,6 +124,7 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
struct amdgpu_bo_param bp;
dma_addr_t dma_addr;
struct page *p;
+ unsigned long x;
int ret;
if (adev->gart.bo != NULL)
@@ -130,6 +134,10 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
if (!p)
return -ENOMEM;
+ /* assign pages to this device */
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = adev->mman.bdev.dev_mapping;
+
/* If the hardware does not support UTCL2 snooping of the CPU caches
* then set_memory_wc() could be used as a workaround to mark the pages
* as write combine memory.
@@ -223,6 +231,7 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
unsigned int order = get_order(adev->gart.table_size);
struct sg_table *sg = adev->gart.bo->tbo.sg;
struct page *p;
+ unsigned long x;
int ret;
ret = amdgpu_bo_reserve(adev->gart.bo, false);
@@ -234,6 +243,8 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
sg_free_table(sg);
kfree(sg);
p = virt_to_page(adev->gart.ptr);
+ for (x = 0; x < (1UL << order); x++)
+ p[x].mapping = NULL;
__free_pages(p, order);
adev->gart.ptr = NULL;
@@ -317,10 +328,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
- mb();
- amdgpu_device_flush_hdp(adev, NULL);
- for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
- amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+ amdgpu_gart_invalidate_tlb(adev);
drm_dev_exit(idx);
}
@@ -400,7 +408,10 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
return;
mb();
- amdgpu_device_flush_hdp(adev, NULL);
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_device_flush_hdp(adev, NULL);
+ up_read(&adev->reset_domain->sem);
+ }
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index 8283d682f543..7cc980bf4725 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -55,8 +55,6 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
-int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
-void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
int amdgpu_gart_init(struct amdgpu_device *adev);
void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ca4d2d430e28..b7ebae289bea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -36,14 +36,122 @@
#include <drm/drm_exec.h>
#include <drm/drm_gem_ttm_helper.h>
#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_hmm.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+static int
+amdgpu_gem_add_input_fence(struct drm_file *filp,
+ uint64_t syncobj_handles_array,
+ uint32_t num_syncobj_handles)
+{
+ struct dma_fence *fence;
+ uint32_t *syncobj_handles;
+ int ret, i;
+
+ if (!num_syncobj_handles)
+ return 0;
+
+ syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array),
+ size_mul(sizeof(uint32_t), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ for (i = 0; i < num_syncobj_handles; i++) {
+
+ if (!syncobj_handles[i]) {
+ ret = -EINVAL;
+ goto free_memdup;
+ }
+
+ ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
+ if (ret)
+ goto free_memdup;
+
+ dma_fence_wait(fence, false);
+
+ /* TODO: optimize async handling */
+ dma_fence_put(fence);
+ }
+
+free_memdup:
+ kfree(syncobj_handles);
+ return ret;
+}
+
+static int
+amdgpu_gem_update_timeline_node(struct drm_file *filp,
+ uint32_t syncobj_handle,
+ uint64_t point,
+ struct drm_syncobj **syncobj,
+ struct dma_fence_chain **chain)
+{
+ if (!syncobj_handle)
+ return 0;
+
+ /* Find the sync object */
+ *syncobj = drm_syncobj_find(filp, syncobj_handle);
+ if (!*syncobj)
+ return -ENOENT;
+
+ if (!point)
+ return 0;
+
+ /* Allocate the chain node */
+ *chain = dma_fence_chain_alloc();
+ if (!*chain) {
+ drm_syncobj_put(*syncobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+amdgpu_gem_update_bo_mapping(struct drm_file *filp,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation,
+ uint64_t point,
+ struct dma_fence *fence,
+ struct drm_syncobj *syncobj,
+ struct dma_fence_chain *chain)
+{
+ struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct dma_fence *last_update;
+
+ if (!syncobj)
+ return;
+
+ /* Find the last update fence */
+ switch (operation) {
+ case AMDGPU_VA_OP_MAP:
+ case AMDGPU_VA_OP_REPLACE:
+ if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
+ last_update = vm->last_update;
+ else
+ last_update = bo_va->last_pt_update;
+ break;
+ case AMDGPU_VA_OP_UNMAP:
+ case AMDGPU_VA_OP_CLEAR:
+ last_update = fence;
+ break;
+ default:
+ return;
+ }
+
+ /* Add fence to timeline */
+ if (!point)
+ drm_syncobj_replace_fence(syncobj, last_update);
+ else
+ drm_syncobj_add_point(syncobj, chain, last_update, point);
+}
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
@@ -87,12 +195,10 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = {
static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
- struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
- if (robj) {
- amdgpu_hmm_unregister(robj);
- amdgpu_bo_unref(&robj);
- }
+ amdgpu_hmm_unregister(aobj);
+ ttm_bo_put(&aobj->tbo);
}
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
@@ -108,6 +214,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
memset(&bp, 0, sizeof(bp));
*obj = NULL;
+ flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
bp.size = size;
bp.byte_align = alignment;
@@ -125,7 +232,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bo = &ubo->bo;
*obj = &bo->tbo.base;
- (*obj)->funcs = &amdgpu_gem_object_funcs;
return 0;
}
@@ -174,20 +280,63 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
return -EPERM;
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
- abo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+ !amdgpu_vm_is_bo_always_valid(vm, abo))
return -EPERM;
r = amdgpu_bo_reserve(abo, false);
if (r)
return r;
+ amdgpu_vm_bo_update_shared(abo);
bo_va = amdgpu_vm_bo_find(vm, abo);
if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
else
++bo_va->ref_count;
+
+ /* attach gfx eviction fence */
+ r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
+ if (r) {
+ DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
+ amdgpu_bo_unreserve(abo);
+ return r;
+ }
+
amdgpu_bo_unreserve(abo);
- return 0;
+
+ /* Validate and add eviction fence to DMABuf imports with dynamic
+ * attachment in compute VMs. Re-validation will be done by
+ * amdgpu_vm_validate. Fences are on the reservation shared with the
+ * export, which is currently required to be validated and fenced
+ * already by amdgpu_amdkfd_gpuvm_restore_process_bos.
+ *
+ * Nested locking below for the case that a GEM object is opened in
+ * kfd_mem_export_dmabuf. Since the lock below is only taken for imports,
+ * but not for export, this is a different lock class that cannot lead to
+ * circular lock dependencies.
+ */
+ if (!vm->is_compute_context || !vm->process_info)
+ return 0;
+ if (!drm_gem_is_imported(obj) ||
+ !dma_buf_is_dynamic(obj->import_attach->dmabuf))
+ return 0;
+ mutex_lock_nested(&vm->process_info->lock, 1);
+ if (!WARN_ON(!vm->process_info->eviction_fence)) {
+ r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT,
+ &vm->process_info->eviction_fence->base);
+ if (r) {
+ struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);
+
+ dev_warn(adev->dev, "validate_and_fence failed: %d\n", r);
+ if (ti) {
+ dev_warn(adev->dev, "pid %d\n", ti->task.pid);
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+ }
+ mutex_unlock(&vm->process_info->lock);
+
+ return r;
}
static void amdgpu_gem_object_close(struct drm_gem_object *obj,
@@ -203,7 +352,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_exec exec;
long r;
- drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
drm_exec_retry_on_contention(&exec);
@@ -216,11 +365,15 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
goto out_unlock;
}
+ if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+ amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
+
bo_va = amdgpu_vm_bo_find(vm, bo);
if (!bo_va || --bo_va->ref_count)
goto out_unlock;
amdgpu_vm_bo_del(adev, bo_va);
+ amdgpu_vm_bo_update_shared(bo);
if (!amdgpu_vm_ready(vm))
goto out_unlock;
@@ -261,7 +414,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
return drm_gem_ttm_mmap(obj, vma);
}
-static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
+const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
.free = amdgpu_gem_object_free,
.open = amdgpu_gem_object_open,
.close = amdgpu_gem_object_close,
@@ -289,19 +442,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
uint32_t handle, initial_domain;
int r;
- /* reject DOORBELLs until userspace code to use it is available */
- if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL)
- return -EINVAL;
-
/* reject invalid gem flags */
- if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_VRAM_CLEARED |
- AMDGPU_GEM_CREATE_VM_ALWAYS_VALID |
- AMDGPU_GEM_CREATE_EXPLICIT_SYNC |
- AMDGPU_GEM_CREATE_ENCRYPTED |
- AMDGPU_GEM_CREATE_DISCARDABLE))
+ if (flags & ~AMDGPU_GEM_CREATE_SETTABLE_MASK)
return -EINVAL;
/* reject invalid gem domains */
@@ -313,6 +455,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
+ /* always clear VRAM */
+ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
+ if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP)
+ return -EINVAL;
+
/* create a gem object to contain this object in */
if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
@@ -424,8 +572,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
- r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
- &range);
+ r = amdgpu_ttm_tt_get_user_pages(bo, &range);
if (r)
goto release_object;
@@ -433,6 +580,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (r)
goto user_pages_done;
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
@@ -602,18 +751,23 @@ out:
*
* Update the bo_va directly after setting its address. Errors are not
* vital here, so they are not reported back to userspace.
+ *
+ * Returns resulting fence if freed BO(s) got cleared from the PT.
+ * otherwise stub fence in case of error.
*/
-static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_va *bo_va,
- uint32_t operation)
+static struct dma_fence *
+amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation)
{
+ struct dma_fence *fence = dma_fence_get_stub();
int r;
if (!amdgpu_vm_ready(vm))
- return;
+ return fence;
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
if (r)
goto error;
@@ -629,36 +783,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
-}
-/**
- * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
- *
- * @adev: amdgpu_device pointer
- * @flags: GEM UAPI flags
- *
- * Returns the GEM UAPI flags mapped into hardware for the ASIC.
- */
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
- if (flags & AMDGPU_VM_PAGE_NOALLOC)
- pte_flag |= AMDGPU_PTE_NOALLOC;
-
- if (adev->gmc.gmc_funcs->map_mtype)
- pte_flag |= amdgpu_gmc_map_mtype(adev,
- flags & AMDGPU_VM_MTYPE_MASK);
-
- return pte_flag;
+ return fence;
}
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
@@ -677,15 +803,17 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo *abo;
struct amdgpu_bo_va *bo_va;
+ struct drm_syncobj *timeline_syncobj = NULL;
+ struct dma_fence_chain *timeline_chain = NULL;
+ struct dma_fence *fence;
struct drm_exec exec;
- uint64_t va_flags;
uint64_t vm_size;
int r = 0;
- if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
+ if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
dev_dbg(dev->dev,
"va_address 0x%llx is in reserved area 0x%llx\n",
- args->va_address, AMDGPU_VA_RESERVED_SIZE);
+ args->va_address, AMDGPU_VA_RESERVED_BOTTOM);
return -EINVAL;
}
@@ -701,7 +829,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->va_address &= AMDGPU_GMC_HOLE_MASK;
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
- vm_size -= AMDGPU_VA_RESERVED_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_TOP;
if (args->va_address + args->map_size > vm_size) {
dev_dbg(dev->dev,
"va_address 0x%llx is in top reserved area 0x%llx\n",
@@ -738,8 +866,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
abo = NULL;
}
+ r = amdgpu_gem_add_input_fence(filp,
+ args->input_fence_syncobj_handles,
+ args->num_syncobj_handles);
+ if (r)
+ goto error_put_gobj;
+
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
if (gobj) {
r = drm_exec_lock_obj(&exec, gobj);
@@ -766,12 +900,19 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = NULL;
}
+ r = amdgpu_gem_update_timeline_node(filp,
+ args->vm_timeline_syncobj_out,
+ args->vm_timeline_point,
+ &timeline_syncobj,
+ &timeline_chain);
+ if (r)
+ goto error;
+
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
- va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
- va_flags);
+ args->flags);
break;
case AMDGPU_VA_OP_UNMAP:
r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
@@ -783,20 +924,31 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
- va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
- va_flags);
+ args->flags);
break;
default:
break;
}
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
- amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
- args->operation);
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+ fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+ args->operation);
+
+ if (timeline_syncobj)
+ amdgpu_gem_update_bo_mapping(filp, bo_va,
+ args->operation,
+ args->vm_timeline_point,
+ fence, timeline_syncobj,
+ timeline_chain);
+ else
+ dma_fence_put(fence);
+
+ }
error:
drm_exec_fini(&exec);
+error_put_gobj:
drm_gem_object_put(gobj);
return r;
}
@@ -804,22 +956,38 @@ error:
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_op *args = data;
struct drm_gem_object *gobj;
struct amdgpu_vm_bo_base *base;
struct amdgpu_bo *robj;
+ struct drm_exec exec;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
int r;
+ if (args->padding)
+ return -EINVAL;
+
gobj = drm_gem_object_lookup(filp, args->handle);
if (!gobj)
return -ENOENT;
robj = gem_to_amdgpu_bo(gobj);
- r = amdgpu_bo_reserve(robj, false);
- if (unlikely(r))
- goto out;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+ DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_lock_obj(&exec, gobj);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto out_exec;
+
+ if (args->op == AMDGPU_GEM_OP_GET_MAPPING_INFO) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+ drm_exec_retry_on_contention(&exec);
+ if (r)
+ goto out_exec;
+ }
+ }
switch (args->op) {
case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: {
@@ -830,29 +998,26 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
info.alignment = robj->tbo.page_alignment << PAGE_SHIFT;
info.domains = robj->preferred_domains;
info.domain_flags = robj->flags;
- amdgpu_bo_unreserve(robj);
+ drm_exec_fini(&exec);
if (copy_to_user(out, &info, sizeof(info)))
r = -EFAULT;
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
- if (robj->tbo.base.import_attach &&
+ if (drm_gem_is_imported(&robj->tbo.base) &&
args->value & AMDGPU_GEM_DOMAIN_VRAM) {
r = -EINVAL;
- amdgpu_bo_unreserve(robj);
- break;
+ goto out_exec;
}
if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
r = -EPERM;
- amdgpu_bo_unreserve(robj);
- break;
+ goto out_exec;
}
for (base = robj->vm_bo; base; base = base->next)
if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev),
amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) {
r = -EINVAL;
- amdgpu_bo_unreserve(robj);
- goto out;
+ goto out_exec;
}
@@ -864,18 +1029,147 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- amdgpu_vm_bo_invalidate(adev, robj, true);
+ amdgpu_vm_bo_invalidate(robj, true);
+ drm_exec_fini(&exec);
+ break;
+ case AMDGPU_GEM_OP_GET_MAPPING_INFO: {
+ struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(&fpriv->vm, robj);
+ struct drm_amdgpu_gem_vm_entry *vm_entries;
+ struct amdgpu_bo_va_mapping *mapping;
+ int num_mappings = 0;
+ /*
+ * num_entries is set as an input to the size of the user-allocated array of
+ * drm_amdgpu_gem_vm_entry stored at args->value.
+ * num_entries is sent back as output as the number of mappings the bo has.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ */
+ vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL);
+ if (!vm_entries)
+ return -ENOMEM;
+
+ amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) {
+ if (num_mappings < args->num_entries) {
+ vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].offset = mapping->offset;
+ vm_entries[num_mappings].flags = mapping->flags;
+ }
+ num_mappings += 1;
+ }
+
+ amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) {
+ if (num_mappings < args->num_entries) {
+ vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+ vm_entries[num_mappings].offset = mapping->offset;
+ vm_entries[num_mappings].flags = mapping->flags;
+ }
+ num_mappings += 1;
+ }
+
+ drm_exec_fini(&exec);
+
+ if (num_mappings > 0 && num_mappings <= args->num_entries)
+ if (copy_to_user(u64_to_user_ptr(args->value), vm_entries, num_mappings * sizeof(*vm_entries)))
+ r = -EFAULT;
+
+ args->num_entries = num_mappings;
- amdgpu_bo_unreserve(robj);
+ kvfree(vm_entries);
break;
+ }
default:
- amdgpu_bo_unreserve(robj);
+ drm_exec_fini(&exec);
r = -EINVAL;
}
-out:
drm_gem_object_put(gobj);
return r;
+out_exec:
+ drm_exec_fini(&exec);
+ drm_gem_object_put(gobj);
+ return r;
+}
+
+/**
+ * amdgpu_gem_list_handles_ioctl - get information about a process' buffer objects
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_gem_list_handles
+ * @filp: drm file pointer
+ *
+ * num_entries is set as an input to the size of the entries array.
+ * num_entries is sent back as output as the number of bos in the process.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct drm_amdgpu_gem_list_handles *args = data;
+ struct drm_amdgpu_gem_list_handles_entry *bo_entries;
+ struct drm_gem_object *gobj;
+ int id, ret = 0;
+ int bo_index = 0;
+ int num_bos = 0;
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id)
+ num_bos += 1;
+ spin_unlock(&filp->table_lock);
+
+ if (args->num_entries < num_bos) {
+ args->num_entries = num_bos;
+ return 0;
+ }
+
+ if (num_bos == 0) {
+ args->num_entries = 0;
+ return 0;
+ }
+
+ bo_entries = kvcalloc(num_bos, sizeof(*bo_entries), GFP_KERNEL);
+ if (!bo_entries)
+ return -ENOMEM;
+
+ spin_lock(&filp->table_lock);
+ idr_for_each_entry(&filp->object_idr, gobj, id) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ struct drm_amdgpu_gem_list_handles_entry *bo_entry;
+
+ if (bo_index >= num_bos) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ bo_entry = &bo_entries[bo_index];
+
+ bo_entry->size = amdgpu_bo_size(bo);
+ bo_entry->alloc_flags = bo->flags & AMDGPU_GEM_CREATE_SETTABLE_MASK;
+ bo_entry->preferred_domains = bo->preferred_domains;
+ bo_entry->gem_handle = id;
+ bo_entry->alignment = bo->tbo.page_alignment;
+
+ if (bo->tbo.base.import_attach)
+ bo_entry->flags |= AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT;
+
+ bo_index += 1;
+ }
+ spin_unlock(&filp->table_lock);
+
+ args->num_entries = bo_index;
+
+ if (!ret)
+ if (copy_to_user(u64_to_user_ptr(args->entries), bo_entries, num_bos * sizeof(*bo_entries)))
+ ret = -EFAULT;
+
+ kvfree(bo_entries);
+
+ return ret;
}
static int amdgpu_gem_align_pitch(struct amdgpu_device *adev,
@@ -962,6 +1256,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
struct drm_gem_object *gobj;
+ struct pid *pid;
int id;
/*
@@ -971,8 +1266,9 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
- task = pid_task(file->pid, PIDTYPE_TGID);
- seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
task ? task->comm : "<unknown>");
rcu_read_unlock();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index f30264782ba2..b558336bc4c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -33,6 +33,8 @@
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
+extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
/*
@@ -61,13 +63,28 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
-uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+#define AMDGPU_GEM_CREATE_SETTABLE_MASK (AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS | \
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC | \
+ AMDGPU_GEM_CREATE_VRAM_CLEARED | \
+ AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | \
+ AMDGPU_GEM_CREATE_EXPLICIT_SYNC | \
+ AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE | \
+ AMDGPU_GEM_CREATE_ENCRYPTED | \
+ AMDGPU_GEM_CREATE_GFX12_DCC | \
+ AMDGPU_GEM_CREATE_DISCARDABLE | \
+ AMDGPU_GEM_CREATE_COHERENT | \
+ AMDGPU_GEM_CREATE_UNCACHED | \
+ AMDGPU_GEM_CREATE_EXT_COHERENT)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 2382921710ec..ebe2b4c68b0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -24,11 +24,16 @@
*/
#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
+
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
#include "amdgpu_xcp.h"
+#include "amdgpu_xgmi.h"
+#include "nvd.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@@ -70,29 +75,20 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
}
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
- int me, int pipe, int queue)
+static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
{
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
int bit = 0;
bit += me * adev->gfx.me.num_pipe_per_me
- * adev->gfx.me.num_queue_per_pipe;
- bit += pipe * adev->gfx.me.num_queue_per_pipe;
+ * num_queue_per_pipe;
+ bit += pipe * num_queue_per_pipe;
bit += queue;
return bit;
}
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue)
-{
- *queue = bit % adev->gfx.me.num_queue_per_pipe;
- *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
- % adev->gfx.me.num_pipe_per_me;
- *me = (bit / adev->gfx.me.num_queue_per_pipe)
- / adev->gfx.me.num_pipe_per_me;
-}
-
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
int me, int pipe, int queue)
{
@@ -153,12 +149,12 @@ static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
{
if (amdgpu_compute_multipipe != -1) {
- DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
+ dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n",
amdgpu_compute_multipipe);
return amdgpu_compute_multipipe == 1;
}
- if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
return true;
/* FIXME: spreading the queues across pipes causes perf regressions
@@ -244,8 +240,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
- int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
- adev->gfx.me.num_queue_per_pipe;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+ int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe;
if (multipipe_policy) {
/* policy: amdgpu owns the first queue per pipe at this stage
@@ -253,9 +249,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
for (i = 0; i < max_queues_per_me; i++) {
pipe = i % adev->gfx.me.num_pipe_per_me;
queue = (i / adev->gfx.me.num_pipe_per_me) %
- adev->gfx.me.num_queue_per_pipe;
+ num_queue_per_pipe;
- set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
+ set_bit(pipe * num_queue_per_pipe + queue,
adev->gfx.me.queue_bitmap);
}
} else {
@@ -264,8 +260,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
}
/* update the number of active graphics rings */
- adev->gfx.num_gfx_rings =
- bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+ if (adev->gfx.num_gfx_rings)
+ adev->gfx.num_gfx_rings =
+ bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
}
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
@@ -303,11 +300,11 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
return -EINVAL;
}
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq, int xcc_id)
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_irq_src *irq = &kiq->irq;
+ struct amdgpu_ring *ring = &kiq->ring;
int r = 0;
spin_lock_init(&kiq->ring_lock);
@@ -328,7 +325,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->eop_gpu_addr = kiq->eop_gpu_addr;
ring->no_scheduler = true;
- sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue);
+ snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
+ (unsigned char)xcc_id, (unsigned char)ring->me,
+ (unsigned char)ring->pipe, (unsigned char)ring->queue);
r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
@@ -384,9 +383,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring = &kiq->ring;
u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
/* create MQD for KIQ */
if (!adev->enable_mes_kiq && !ring->mqd_obj) {
@@ -407,7 +408,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
}
/* prepare MQD backup */
- kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
+ kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
if (!kiq->mqd_backup) {
dev_warn(adev->dev,
"no memory to create MQD backup for ring %s\n", ring->name);
@@ -430,7 +431,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
+ adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.me.mqd_backup[i]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
return -ENOMEM;
@@ -454,7 +455,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
+ adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.mec.mqd_backup[j]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
return -ENOMEM;
@@ -504,9 +505,22 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
int i, r = 0;
int j;
+ if (adev->enable_mes) {
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.compute_ring[j],
+ RESET_QUEUES, 0, 0);
+ }
+ return 0;
+ }
+
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_compute_rings)) {
@@ -520,9 +534,15 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0);
}
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
- if (kiq_ring->sched.ready && !adev->job_hang)
- r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
return r;
@@ -535,11 +555,26 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
int i, r = 0;
int j;
+ if (adev->enable_mes) {
+ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->gfx.gfx_ring[j],
+ PREEMPT_QUEUES, 0, 0);
+ }
+ }
+ return 0;
+ }
+
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- spin_lock(&kiq->ring_lock);
+ if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_gfx_rings)) {
spin_unlock(&kiq->ring_lock);
@@ -552,11 +587,17 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.gfx_ring[j],
PREEMPT_QUEUES, 0, 0);
}
- }
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
- if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+ /*
+ * Ring test will do a basic scratch register change check.
+ * Just run this to ensure that unmap queues that is submitted
+ * before got processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
- spin_unlock(&kiq->ring_lock);
+ spin_unlock(&kiq->ring_lock);
+ }
return r;
}
@@ -574,6 +615,44 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
return set_resource_bit;
}
+static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ uint64_t queue_mask = ~0ULL;
+ int r, i, j;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ if (!adev->enable_uni_mes) {
+ spin_lock(&kiq->ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
+ if (r) {
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&kiq->ring_lock);
+ return r;
+ }
+
+ kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&kiq->ring_lock);
+ if (r)
+ dev_err(adev->dev, "KIQ failed to set resources\n");
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_compute_rings;
+ r = amdgpu_mes_map_legacy_queue(adev,
+ &adev->gfx.compute_ring[j]);
+ if (r) {
+ dev_err(adev->dev, "failed to map compute queue\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
@@ -581,6 +660,9 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
uint64_t queue_mask = 0;
int r, i, j;
+ if (adev->mes.enable_legacy_queue_map)
+ return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
+
if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
return -EINVAL;
@@ -592,41 +674,45 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
* generation exposes more than 64 queues. If so, the
* definition of queue_mask needs updating */
if (WARN_ON(i > (sizeof(queue_mask)*8))) {
- DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+ dev_err(adev->dev, "Invalid KCQ enabled: %d\n", i);
break;
}
queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
}
- DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
- kiq_ring->queue);
amdgpu_device_flush_hdp(adev, NULL);
+ dev_info(adev->dev, "kiq ring mec %d pipe %d q %d\n", kiq_ring->me,
+ kiq_ring->pipe, kiq_ring->queue);
+
spin_lock(&kiq->ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_compute_rings +
kiq->pmf->set_resources_size);
if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
spin_unlock(&kiq->ring_lock);
return r;
}
- if (adev->enable_mes)
- queue_mask = ~0ULL;
-
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
j = i + xcc_id * adev->gfx.num_compute_rings;
- kiq->pmf->kiq_map_queues(kiq_ring,
- &adev->gfx.compute_ring[j]);
+ kiq->pmf->kiq_map_queues(kiq_ring,
+ &adev->gfx.compute_ring[j]);
}
-
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)
- DRM_ERROR("KCQ enable failed\n");
+ dev_err(adev->dev, "KCQ enable failed\n");
return r;
}
@@ -642,13 +728,27 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
amdgpu_device_flush_hdp(adev, NULL);
+ if (adev->mes.enable_legacy_queue_map) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ j = i + xcc_id * adev->gfx.num_gfx_rings;
+ r = amdgpu_mes_map_legacy_queue(adev,
+ &adev->gfx.gfx_ring[j]);
+ if (r) {
+ dev_err(adev->dev, "failed to map gfx queue\n");
+ return r;
+ }
+ }
+
+ return 0;
+ }
+
spin_lock(&kiq->ring_lock);
/* No need to map kcq on the slave */
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_gfx_rings);
if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
spin_unlock(&kiq->ring_lock);
return r;
}
@@ -659,27 +759,23 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.gfx_ring[j]);
}
}
-
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)
- DRM_ERROR("KCQ enable failed\n");
+ dev_err(adev->dev, "KGQ enable failed\n");
return r;
}
-/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
- *
- * @adev: amdgpu_device pointer
- * @bool enable true: enable gfx off feature, false: disable gfx off feature
- *
- * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
- * 2. other client can send request to disable gfx off feature, the request should be honored.
- * 3. other client can cancel their request of disable gfx off feature
- * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
- */
-
-void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
+ bool no_delay)
{
unsigned long delay = GFX_OFF_DELAY_ENABLE;
@@ -700,15 +796,22 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (adev->gfx.gfx_off_req_count == 0 &&
!adev->gfx.gfx_off_state) {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ /* If going to s2idle, no need to wait */
+ if (no_delay) {
+ if (!amdgpu_dpm_set_powergating_by_smu(adev,
+ AMD_IP_BLOCK_TYPE_GFX, true, 0))
+ adev->gfx.gfx_off_state = true;
+ } else {
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
delay);
+ }
}
} else {
if (adev->gfx.gfx_off_req_count == 0) {
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
if (adev->gfx.gfx_off_state &&
- !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
adev->gfx.gfx_off_state = false;
if (adev->gfx.funcs->init_spm_golden) {
@@ -726,6 +829,43 @@ unlock:
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
+/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms.
+ */
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+{
+ /* If going to s2idle, no need to wait */
+ bool no_delay = adev->in_s0ix ? true : false;
+
+ amdgpu_gfx_do_off_ctrl(adev, enable, no_delay);
+}
+
+/* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be issued immediately.
+ */
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_do_off_ctrl(adev, enable, true);
+}
+
int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
{
int r = 0;
@@ -784,13 +924,19 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
int r;
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
- if (!amdgpu_persistent_edc_harvesting_supported(adev))
- amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+ if (r)
+ return r;
+ }
r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
return r;
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
if (adev->gfx.cp_ecc_error_irq.funcs) {
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)
@@ -884,7 +1030,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
ih_data.head = *ras_if;
- DRM_ERROR("CP ECC ERROR IRQ\n");
+ dev_err(adev->dev, "CP ECC ERROR IRQ\n");
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
@@ -908,18 +1054,18 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
func(adev, ras_error_status, i);
}
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq, reg_val_offs = 0, value = 0;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *ring = &kiq->ring;
if (amdgpu_device_skip_hw_access(adev))
return 0;
- if (adev->mes.ring.sched.ready)
+ if (adev->mes.ring[0].sched.ready)
return amdgpu_mes_rreg(adev, reg);
BUG_ON(!ring->funcs->emit_rreg);
@@ -929,7 +1075,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
pr_err("critical bug! too many kiq readers\n");
goto failed_unlock;
}
- amdgpu_ring_alloc(ring, 32);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r)
@@ -953,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_read;
+
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
@@ -976,12 +1128,12 @@ failed_kiq_read:
return ~0;
}
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
{
signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *ring = &kiq->ring;
BUG_ON(!ring->funcs->emit_wreg);
@@ -989,13 +1141,16 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
if (amdgpu_device_skip_hw_access(adev))
return;
- if (adev->mes.ring.sched.ready) {
+ if (adev->mes.ring[0].sched.ready) {
amdgpu_mes_wreg(adev, reg, v);
return;
}
spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
amdgpu_ring_emit_wreg(ring, reg, v);
r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
if (r)
@@ -1019,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
might_sleep();
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_write;
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -1031,6 +1188,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
failed_undo:
amdgpu_ring_undo(ring);
+failed_unlock:
spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_write:
dev_err(adev->dev, "failed to write reg:%x\n", reg);
@@ -1175,7 +1333,8 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
break;
default:
- break;
+ dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
+ return;
}
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
@@ -1200,6 +1359,10 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
int mode;
+ /* Only minimal precaution taken to reject requests while in reset.*/
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
AMDGPU_XCP_FL_NONE);
@@ -1243,67 +1406,1082 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
return -EINVAL;
}
+ /* Don't allow a switch while under reset */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EPERM;
+
ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+ up_read(&adev->reset_domain->sem);
+
if (ret)
return ret;
return count;
}
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
struct device_attribute *addr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- char *supported_partition;
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
- /* TBD */
- switch (NUM_XCC(adev->gfx.xcc_mask)) {
- case 8:
- supported_partition = "SPX, DPX, QPX, CPX";
- break;
- case 6:
- supported_partition = "SPX, TPX, CPX";
- break;
- case 4:
- supported_partition = "SPX, DPX, CPX";
- break;
- /* this seems only existing in emulation phase */
- case 2:
- supported_partition = "SPX, CPX";
- break;
- default:
- supported_partition = "Not supported";
- break;
+ if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ struct drm_sched_entity entity;
+ static atomic_t counter;
+ struct dma_fence *f;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ void *owner;
+ int i, r;
+
+ /* Initialize the scheduler entity */
+ r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
+ goto err;
+ }
+
+ /*
+ * Use some unique dummy value as the owner to make sure we execute
+ * the cleaner shader on each submission. The value just need to change
+ * for each submission and is otherwise meaningless.
+ */
+ owner = (void *)(unsigned long)atomic_inc_return(&counter);
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
+ 64, 0, &job,
+ AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER);
+ if (r)
+ goto err;
+
+ job->enforce_isolation = true;
+ /* always run the cleaner shader */
+ job->run_cleaner_shader = true;
+
+ ib = &job->ibs[0];
+ for (i = 0; i <= ring->funcs->align_mask; ++i)
+ ib->ptr[i] = ring->funcs->nop;
+ ib->length_dw = ring->funcs->align_mask + 1;
+
+ f = amdgpu_job_submit(job);
+
+ r = dma_fence_wait(f, false);
+ if (r)
+ goto err;
+
+ dma_fence_put(f);
+
+ /* Clean up the scheduler entity */
+ drm_sched_entity_destroy(&entity);
+ return 0;
+
+err:
+ return r;
+}
+
+static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
+{
+ int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ struct amdgpu_ring *ring;
+ int num_xcc_to_clear;
+ int i, r, xcc_id;
+
+ if (adev->gfx.num_xcc_per_xcp)
+ num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
+ else
+ num_xcc_to_clear = 1;
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+ if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
+ r = amdgpu_gfx_run_cleaner_shader_job(ring);
+ if (r)
+ return r;
+ num_xcc_to_clear--;
+ break;
+ }
+ }
+ }
+
+ if (num_xcc_to_clear)
+ return -ENOENT;
+
+ return 0;
+}
+
+/**
+ * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * Provides the sysfs interface to manually run a cleaner shader, which is
+ * used to clear the GPU state between different tasks. Writing a value to the
+ * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
+ * The value written corresponds to the partition index on multi-partition
+ * devices. On single-partition devices, the value should be '0'.
+ *
+ * The cleaner shader clears the Local Data Store (LDS) and General Purpose
+ * Registers (GPRs) to ensure data isolation between GPU workloads.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int ret;
+ long value;
+
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
+
+ if (adev->gfx.disable_kq)
+ return -EPERM;
+
+ ret = kstrtol(buf, 0, &value);
+
+ if (ret)
+ return -EINVAL;
+
+ if (value < 0)
+ return -EINVAL;
+
+ if (adev->xcp_mgr) {
+ if (value >= adev->xcp_mgr->num_xcps)
+ return -EINVAL;
+ } else {
+ if (value > 1)
+ return -EINVAL;
+ }
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0) {
+ pm_runtime_put_autosuspend(ddev->dev);
+ return ret;
+ }
+
+ ret = amdgpu_gfx_run_cleaner_shader(adev, value);
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer to store the output data
+ *
+ * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
+ * feature for each GPU partition. Reading from the 'enforce_isolation'
+ * sysfs file returns the isolation settings for all partitions, where '0'
+ * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode,
+ * and '3' indicates enabled without cleaner shader.
+ *
+ * Return: The number of bytes read from the sysfs file.
+ */
+static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int i;
+ ssize_t size = 0;
+
+ if (adev->xcp_mgr) {
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+ size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
+ if (i < (adev->xcp_mgr->num_xcps - 1))
+ size += sysfs_emit_at(buf, size, " ");
+ }
+ buf[size++] = '\n';
+ } else {
+ size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
+ }
+
+ return size;
+}
+
+/**
+ * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * This function allows control over the 'enforce_isolation' feature, which
+ * serializes access to the graphics engine. Writing '0' to disable, '1' to
+ * enable isolation with cleaner shader, '2' to enable legacy isolation without
+ * cleaner shader, or '3' to enable process isolation without submitting the
+ * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode
+ * for each partition. The input should specify the setting for all
+ * partitions.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ long partition_values[MAX_XCP] = {0};
+ int ret, i, num_partitions;
+ const char *input_buf = buf;
+
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ ret = sscanf(input_buf, "%ld", &partition_values[i]);
+ if (ret <= 0)
+ break;
+
+ /* Move the pointer to the next value in the string */
+ input_buf = strchr(input_buf, ' ');
+ if (input_buf) {
+ input_buf++;
+ } else {
+ i++;
+ break;
+ }
+ }
+ num_partitions = i;
+
+ if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
+ return -EINVAL;
+
+ if (!adev->xcp_mgr && num_partitions != 1)
+ return -EINVAL;
+
+ for (i = 0; i < num_partitions; i++) {
+ if (partition_values[i] != 0 &&
+ partition_values[i] != 1 &&
+ partition_values[i] != 2 &&
+ partition_values[i] != 3)
+ return -EINVAL;
}
- return sysfs_emit(buf, "%s\n", supported_partition);
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < num_partitions; i++) {
+ switch (partition_values[i]) {
+ case 0:
+ default:
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
+ }
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ amdgpu_mes_update_enforce_isolation(adev);
+
+ return count;
+}
+
+static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
}
+static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
+}
+
+static DEVICE_ATTR(run_cleaner_shader, 0200,
+ NULL, amdgpu_gfx_set_run_cleaner_shader);
+
+static DEVICE_ATTR(enforce_isolation, 0644,
+ amdgpu_gfx_get_enforce_isolation,
+ amdgpu_gfx_set_enforce_isolation);
+
static DEVICE_ATTR(current_compute_partition, 0644,
amdgpu_gfx_get_current_compute_partition,
amdgpu_gfx_set_compute_partition);
static DEVICE_ATTR(available_compute_partition, 0444,
amdgpu_gfx_get_available_compute_partition, NULL);
+static DEVICE_ATTR(gfx_reset_mask, 0444,
+ amdgpu_gfx_get_gfx_reset_mask, NULL);
-int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+static DEVICE_ATTR(compute_reset_mask, 0444,
+ amdgpu_gfx_get_compute_reset_mask, NULL);
+
+static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
int r;
+ if (!xcp_mgr)
+ return 0;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+
+ if (!xcp_switch_supported)
+ dev_attr_current_compute_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+
r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
if (r)
return r;
- r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
+ if (xcp_switch_supported)
+ r = device_create_file(adev->dev,
+ &dev_attr_available_compute_partition);
return r;
}
-void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
+
+ if (!xcp_mgr)
+ return;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
device_remove_file(adev->dev, &dev_attr_current_compute_partition);
- device_remove_file(adev->dev, &dev_attr_available_compute_partition);
+
+ if (xcp_switch_supported)
+ device_remove_file(adev->dev,
+ &dev_attr_available_compute_partition);
+}
+
+static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
+ if (r)
+ return r;
+ if (adev->gfx.enable_cleaner_shader)
+ r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+ if (adev->gfx.enable_cleaner_shader)
+ device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
+}
+
+static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
+ return r;
+
+ if (adev->gfx.num_gfx_rings) {
+ r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
+ if (r)
+ return r;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
+
+ if (adev->gfx.num_compute_rings)
+ device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
+}
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_gfx_sysfs_xcp_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to create xcp sysfs files");
+ return r;
+ }
+
+ r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create isolation sysfs files");
+
+ r = amdgpu_gfx_sysfs_reset_mask_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create reset mask sysfs files");
+
+ return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ amdgpu_gfx_sysfs_xcp_fini(adev);
+ amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+ amdgpu_gfx_sysfs_reset_mask_fini(adev);
+ }
+}
+
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return -EOPNOTSUPP;
+
+ return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.cleaner_shader_obj,
+ &adev->gfx.cleaner_shader_gpu_addr,
+ (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
+ &adev->gfx.cleaner_shader_gpu_addr,
+ (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size,
+ const void *cleaner_shader_ptr)
+{
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
+ memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
+ cleaner_shader_size);
+}
+
+/**
+ * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the scheduler to control
+ * @enable: Whether to enable or disable the KFD scheduler
+ *
+ * This function is used to control the KFD (Kernel Fusion Driver) scheduler
+ * from the KGD. It is part of the cleaner shader feature. This function plays
+ * a key role in enforcing process isolation on the GPU.
+ *
+ * The function uses a reference count mechanism (kfd_sch_req_count) to keep
+ * track of the number of requests to enable the KFD scheduler. When a request
+ * to enable the KFD scheduler is made, the reference count is decremented.
+ * When the reference count reaches zero, a delayed work is scheduled to
+ * enforce isolation after a delay of GFX_SLICE_PERIOD.
+ *
+ * When a request to disable the KFD scheduler is made, the function first
+ * checks if the reference count is zero. If it is, it cancels the delayed work
+ * for enforcing isolation and checks if the KFD scheduler is active. If the
+ * KFD scheduler is active, it sends a request to stop the KFD scheduler and
+ * sets the KFD scheduler state to inactive. Then, it increments the reference
+ * count.
+ *
+ * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
+ * scheduler state and reference count are updated atomically.
+ *
+ * Note: If the reference count is already zero when a request to enable the
+ * KFD scheduler is made, it means there's an imbalance bug somewhere. The
+ * function triggers a warning in this case.
+ */
+static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
+ bool enable)
+{
+ mutex_lock(&adev->gfx.userq_sch_mutex);
+
+ if (enable) {
+ /* If the count is already 0, it means there's an imbalance bug somewhere.
+ * Note that the bug may be in a different caller than the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) {
+ dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
+ goto unlock;
+ }
+
+ adev->gfx.userq_sch_req_count[idx]--;
+
+ if (adev->gfx.userq_sch_req_count[idx] == 0 &&
+ adev->gfx.userq_sch_inactive[idx]) {
+ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+ msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
+ }
+ } else {
+ if (adev->gfx.userq_sch_req_count[idx] == 0) {
+ cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
+ if (!adev->gfx.userq_sch_inactive[idx]) {
+ amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_stop_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = true;
+ }
+ }
+
+ adev->gfx.userq_sch_req_count[idx]++;
+ }
+
+unlock:
+ mutex_unlock(&adev->gfx.userq_sch_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
+ *
+ * @work: work_struct.
+ *
+ * This function is the work handler for enforcing shader isolation on AMD GPUs.
+ * It counts the number of emitted fences for each GFX and compute ring. If there
+ * are any fences, it schedules the `enforce_isolation_work` to be run after a
+ * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
+ * Driver (KFD) to resume the runqueue. The function is synchronized using the
+ * `enforce_isolation_mutex`.
+ */
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
+{
+ struct amdgpu_isolation_work *isolation_work =
+ container_of(work, struct amdgpu_isolation_work, work.work);
+ struct amdgpu_device *adev = isolation_work->adev;
+ u32 i, idx, fences = 0;
+
+ if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = isolation_work->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
+ if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+ }
+ for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
+ if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+ }
+ if (fences) {
+ /* we've already had our timeslice, so let's wrap this up */
+ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+ msecs_to_jiffies(1));
+ } else {
+ /* Tell KFD to resume the runqueue */
+ WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]);
+ WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]);
+
+ amdgpu_userq_start_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_start_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = false;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the GPU partition
+ *
+ * When kernel submissions come in, the jobs are given a time slice and once
+ * that time slice is up, if there are KFD user queues active, kernel
+ * submissions are blocked until KFD has had its time slice. Once the KFD time
+ * slice is up, KFD user queues are preempted and kernel submissions are
+ * unblocked and allowed to run again.
+ */
+static void
+amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
+ u32 idx)
+{
+ unsigned long cjiffies;
+ bool wait = false;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ /* set the initial values if nothing is set */
+ if (!adev->gfx.enforce_isolation_jiffies[idx]) {
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ }
+ /* Make sure KFD gets a chance to run */
+ if (amdgpu_amdkfd_compute_active(adev, idx)) {
+ cjiffies = jiffies;
+ if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
+ cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
+ if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
+ /* if our time is up, let KGD work drain before scheduling more */
+ wait = true;
+ /* reset the timer period */
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ } else {
+ /* set the timer period to what's left in our time slice */
+ adev->gfx.enforce_isolation_time[idx] =
+ GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
+ }
+ } else {
+ /* if jiffies wrap around we will just wait a little longer */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ }
+ } else {
+ /* if there is no KFD work, then set the full slice period */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ }
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (wait)
+ msleep(GFX_SLICE_PERIOD_MS);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring begin_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 idx;
+ bool sched_work = false;
+
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = ring->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ /* Don't submit more work until KFD has had some time */
+ amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ if (adev->kfd.init_complete)
+ sched_work = true;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring end_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 idx;
+ bool sched_work = false;
+
+ if (!adev->gfx.enable_cleaner_shader)
+ return;
+
+ if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+ idx = 0;
+ else
+ idx = ring->xcp_id;
+
+ if (idx >= MAX_XCP)
+ return;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ if (adev->kfd.init_complete)
+ sched_work = true;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
+}
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, gfx.idle_work.work);
+ enum PP_SMC_POWER_PROFILE profile;
+ u32 i, fences = 0;
+ int r;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+ if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = false;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+ } else {
+ schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+ }
+}
+
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ enum PP_SMC_POWER_PROFILE profile;
+ int r;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ atomic_inc(&adev->gfx.total_submission_cnt);
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ if (adev->gfx.workload_profile_active)
+ return;
+
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (!adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, true);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = true;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+}
+
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ atomic_dec(&ring->adev->gfx.total_submission_cnt);
+
+ schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_start - Set CSB preamble start
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble setup.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer)
+{
+ u32 count = 0;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_data_parser - Parser CS data
+ *
+ * @adev: amdgpu_device pointer used to get the CS data and other gfx info.
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count)
+{
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ u32 i;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ }
+ }
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_end - Set CSB preamble end
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ */
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count)
+{
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+/*
+ * debugfs for to enable/disable gfx job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
+ amdgpu_debugfs_gfx_sched_mask_get,
+ amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_gfx_rings > 1))
+ return;
+ sprintf(name, "amdgpu_gfx_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_gfx_sched_mask_fops);
+#endif
+}
+
+/*
+ * debugfs for to enable/disable compute job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_compute_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
+ amdgpu_debugfs_compute_sched_mask_get,
+ amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_compute_rings > 1))
+ return;
+ sprintf(name, "amdgpu_compute_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_compute_sched_mask_fops);
+#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 0ca95c4d4bfb..fb5f7a0ee029 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -34,6 +34,7 @@
#include "soc15.h"
#include "amdgpu_ras.h"
#include "amdgpu_ring_mux.h"
+#include "amdgpu_xcp.h"
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@@ -43,10 +44,10 @@
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
#define AMDGPU_MAX_GC_INSTANCES 8
-#define KGD_MAX_QUEUES 128
+#define AMDGPU_MAX_QUEUES 128
-#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
-#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+#define AMDGPU_MAX_GFX_QUEUES AMDGPU_MAX_QUEUES
+#define AMDGPU_MAX_COMPUTE_QUEUES AMDGPU_MAX_QUEUES
enum amdgpu_gfx_pipe_priority {
AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1,
@@ -56,6 +57,9 @@ enum amdgpu_gfx_pipe_priority {
#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
+/* 1 second timeout */
+#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
enum amdgpu_gfx_partition {
AMDGPU_SPX_PARTITION_MODE = 0,
AMDGPU_DPX_PARTITION_MODE = 1,
@@ -69,11 +73,6 @@ enum amdgpu_gfx_partition {
#define NUM_XCC(x) hweight16(x)
-enum amdgpu_pkg_type {
- AMDGPU_PKG_TYPE_APU = 2,
- AMDGPU_PKG_TYPE_UNKNOWN,
-};
-
enum amdgpu_gfx_ras_mem_id_type {
AMDGPU_GFX_CP_MEM = 0,
AMDGPU_GFX_GCEA_MEM,
@@ -143,6 +142,10 @@ struct kiq_pm4_funcs {
void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub);
+ void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring,
+ uint32_t queue_type, uint32_t me_id,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid);
/* Packet sizes */
int set_resources_size;
int map_queues_size;
@@ -167,10 +170,46 @@ struct amdgpu_kiq {
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
+/**
+ * amdgpu_rb_config - Configure a single Render Backend (RB)
+ *
+ * Bad RBs are fused off and there is a harvest register the driver reads to
+ * determine which RB(s) are fused off so that the driver can configure the
+ * hardware state so that nothing gets sent to them. There are also user
+ * harvest registers that the driver can program to disable additional RBs,
+ * etc., for testing purposes.
+ */
struct amdgpu_rb_config {
+ /**
+ * @rb_backend_disable:
+ *
+ * The value captured from register RB_BACKEND_DISABLE indicates if the
+ * RB backend is disabled or not.
+ */
uint32_t rb_backend_disable;
+
+ /**
+ * @user_rb_backend_disable:
+ *
+ * The value captured from register USER_RB_BACKEND_DISABLE indicates
+ * if the User RB backend is disabled or not.
+ */
uint32_t user_rb_backend_disable;
+
+ /**
+ * @raster_config:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the first register.
+ */
uint32_t raster_config;
+
+ /**
+ * @raster_config_1:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the second register.
+ */
uint32_t raster_config_1;
};
@@ -218,6 +257,13 @@ struct amdgpu_gfx_config {
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
+
+ /**
+ * @rb_config:
+ *
+ * Matrix that keeps all the Render Backend (color and depth buffer
+ * handling) configuration on the 3D engine.
+ */
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
@@ -245,6 +291,12 @@ struct amdgpu_gfx_config {
uint32_t gc_tcp_size_per_cu;
uint32_t gc_num_cu_per_sqc;
uint32_t gc_tcc_size;
+ uint32_t gc_tcp_cache_line_size;
+ uint32_t gc_instruction_cache_size_per_sqc;
+ uint32_t gc_instruction_cache_line_size;
+ uint32_t gc_scalar_data_cache_size_per_sqc;
+ uint32_t gc_scalar_data_cache_line_size;
+ uint32_t gc_tcc_cache_line_size;
};
struct amdgpu_cu_info {
@@ -264,7 +316,6 @@ struct amdgpu_cu_info {
struct amdgpu_gfx_ras {
struct amdgpu_ras_block_object ras_block;
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
- bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
@@ -297,12 +348,14 @@ struct amdgpu_gfx_funcs {
void (*init_spm_golden)(struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
- struct amdgpu_gfx_shadow_info *shadow_info);
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check);
enum amdgpu_gfx_partition
(*query_partition_mode)(struct amdgpu_device *adev);
int (*switch_partition_mode)(struct amdgpu_device *adev,
int num_xccs_per_xcp);
int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+ int (*get_xccs_per_xcp)(struct amdgpu_device *adev);
};
struct sq_work {
@@ -344,6 +397,12 @@ struct amdgpu_me {
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
};
+struct amdgpu_isolation_work {
+ struct amdgpu_device *adev;
+ u32 xcp_id;
+ struct delayed_work work;
+};
+
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
@@ -396,6 +455,7 @@ struct amdgpu_gfx {
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
struct amdgpu_irq_src priv_inst_irq;
+ struct amdgpu_irq_src bad_op_irq;
struct amdgpu_irq_src cp_ecc_error_irq;
struct amdgpu_irq_src sq_irq;
struct amdgpu_irq_src rlc_gc_fed_irq;
@@ -411,6 +471,8 @@ struct amdgpu_gfx {
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
+ uint32_t gfx_supported_reset;
+ uint32_t compute_supported_reset;
/* gfx off */
bool gfx_off_state; /* true: enabled, false: disabled */
@@ -439,6 +501,36 @@ struct amdgpu_gfx {
uint32_t num_xcc_per_xcp;
struct mutex partition_mutex;
bool mcbp; /* mid command buffer preemption */
+
+ /* IP reg dump */
+ uint32_t *ip_dump_core;
+ uint32_t *ip_dump_compute_queues;
+ uint32_t *ip_dump_gfx_queues;
+
+ struct mutex reset_sem_mutex;
+
+ /* cleaner shader */
+ struct amdgpu_bo *cleaner_shader_obj;
+ unsigned int cleaner_shader_size;
+ u64 cleaner_shader_gpu_addr;
+ void *cleaner_shader_cpu_ptr;
+ const void *cleaner_shader_ptr;
+ bool enable_cleaner_shader;
+ struct amdgpu_isolation_work enforce_isolation[MAX_XCP];
+ /* Mutex for synchronizing KFD scheduler operations */
+ struct mutex userq_sch_mutex;
+ u64 userq_sch_req_count[MAX_XCP];
+ bool userq_sch_inactive[MAX_XCP];
+ unsigned long enforce_isolation_jiffies[MAX_XCP];
+ unsigned long enforce_isolation_time[MAX_XCP];
+
+ atomic_t total_submission_cnt;
+ struct delayed_work idle_work;
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
+
+ bool disable_kq;
+ bool disable_uq;
};
struct amdgpu_gfx_ras_reg_entry {
@@ -458,7 +550,7 @@ struct amdgpu_gfx_ras_mem_id_entry {
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
-#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
@@ -476,9 +568,7 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
unsigned max_sh);
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq, int xcc_id);
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id);
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
@@ -507,13 +597,10 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
- int pipe, int queue);
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue);
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
@@ -526,8 +613,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
@@ -542,6 +629,25 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
void *ras_error_status,
void (*func)(struct amdgpu_device *adev, void *ras_error_status,
int xcc_id));
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size);
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+ unsigned int cleaner_shader_size,
+ const void *cleaner_shader_ptr);
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work);
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
{
@@ -559,8 +665,6 @@ static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
default:
return "UNKNOWN";
}
-
- return "UNKNOWN";
}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d78bd9732543..9dcf51991b5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -32,11 +32,19 @@
#include "amdgpu.h"
#include "amdgpu_gmc.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
#include "amdgpu_xgmi.h"
#include <drm/drm_drv.h>
#include <drm/ttm/ttm_tt.h>
+static const u64 four_gb = 0x100000000ULL;
+
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev)
+{
+ return adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev);
+}
+
/**
* amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
*
@@ -51,7 +59,7 @@ int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
struct amdgpu_bo_param bp;
u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
- uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift;
+ uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift;
memset(&bp, 0, sizeof(bp));
bp.size = PAGE_ALIGN((npdes + 1) * 8);
@@ -180,6 +188,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ if (!bo->ttm)
+ return AMDGPU_BO_INVALID_OFFSET;
+
if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
return AMDGPU_BO_INVALID_OFFSET;
@@ -247,10 +258,20 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
- mc->gart_start = hive_vram_end + 1;
+ /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */
+ mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
- mc->fb_start = hive_vram_start;
- mc->fb_end = hive_vram_end;
+ if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+ /* set mc->vram_start to 0 to switch the returned GPU address of
+ * amdgpu_bo_create_reserved() from FB aperture to GART aperture.
+ */
+ mc->vram_start = 0;
+ mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+ mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size);
+ } else {
+ mc->fb_start = hive_vram_start;
+ mc->fb_end = hive_vram_end;
+ }
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
@@ -263,14 +284,15 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
*
* @adev: amdgpu device structure holding all necessary information
* @mc: memory controller structure holding memory information
+ * @gart_placement: GART placement policy with respect to VRAM
*
- * Function will place try to place GART before or after VRAM.
+ * Function will try to place GART before or after VRAM.
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
-void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+ enum amdgpu_gart_placement gart_placement)
{
- const uint64_t four_gb = 0x100000000ULL;
u64 size_af, size_bf;
/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
@@ -286,11 +308,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
mc->gart_size = max(size_bf, size_af);
}
- if ((size_bf >= mc->gart_size && size_bf < size_af) ||
- (size_af < mc->gart_size))
- mc->gart_start = 0;
- else
+ switch (gart_placement) {
+ case AMDGPU_GART_PLACEMENT_HIGH:
mc->gart_start = max_mc_address - mc->gart_size + 1;
+ break;
+ case AMDGPU_GART_PLACEMENT_LOW:
+ mc->gart_start = 0;
+ break;
+ case AMDGPU_GART_PLACEMENT_BEST_FIT:
+ default:
+ if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+ (size_af < mc->gart_size))
+ mc->gart_start = 0;
+ else
+ mc->gart_start = max_mc_address - mc->gart_size + 1;
+ break;
+ }
mc->gart_start &= ~(four_gb - 1);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
@@ -315,14 +348,6 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
u64 size_af, size_bf;
- if (amdgpu_sriov_vf(adev)) {
- mc->agp_start = 0xffffffffffff;
- mc->agp_end = 0x0;
- mc->agp_size = 0;
-
- return;
- }
-
if (mc->fb_start > mc->gart_start) {
size_bf = (mc->fb_start & sixteen_gb_mask) -
ALIGN(mc->gart_end + 1, sixteen_gb);
@@ -347,6 +372,25 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
}
/**
+ * amdgpu_gmc_set_agp_default - Set the default AGP aperture value.
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * To disable the AGP aperture, you need to set the start to a larger
+ * value than the end. This function sets the default value which
+ * can then be overridden using amdgpu_gmc_agp_location() if you want
+ * to enable the AGP aperture on a specific chip.
+ *
+ */
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ mc->agp_start = 0xffffffffffff;
+ mc->agp_end = 0;
+ mc->agp_size = 0;
+}
+
+/**
* amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
*
* @addr: 48 bit physical address, page aligned (36 significant bits)
@@ -452,7 +496,10 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint32_t hash;
uint64_t tmp;
- ih = adev->irq.retry_cam_enabled ? &adev->irq.ih_soft : &adev->irq.ih1;
+ if (adev->irq.retry_cam_enabled)
+ return;
+
+ ih = &adev->irq.ih1;
/* Get the WPTR of the last entry in IH ring */
last_wptr = amdgpu_ih_get_wptr(adev, ih);
/* Order wptr with ring data. */
@@ -542,6 +589,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
unsigned i;
unsigned vmhub, inv_eng;
+ struct amdgpu_ring *shared_ring;
/* init the vm inv eng for all vmhubs */
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
@@ -549,13 +597,23 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
/* reserve engine 5 for firmware */
if (adev->enable_mes)
vm_inv_engs[i] &= ~(1 << 5);
+ /* reserve mmhub engine 3 for firmware */
+ if (adev->enable_umsch_mm)
+ vm_inv_engs[i] &= ~(1 << 3);
}
for (i = 0; i < adev->num_rings; ++i) {
ring = adev->rings[i];
vmhub = ring->vm_hub;
- if (ring == &adev->mes.ring)
+ if (ring == &adev->mes.ring[0] ||
+ ring == &adev->mes.ring[1] ||
+ ring == &adev->umsch_mm.ring ||
+ ring == &adev->cper.ring_buf)
+ continue;
+
+ /* Skip if the ring is a shared ring */
+ if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
continue;
inv_eng = ffs(vm_inv_engs[vmhub]);
@@ -570,11 +628,229 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
ring->name, ring->vm_inv_eng, ring->vm_hub);
+ /* SDMA has a special packet which allows it to use the same
+ * invalidation engine for all the rings in one instance.
+ * Therefore, we do not allocate a separate VM invalidation engine
+ * for SDMA page rings. Instead, they share the VM invalidation
+ * engine with the SDMA gfx ring. This change ensures efficient
+ * resource management and avoids the issue of insufficient VM
+ * invalidation engines.
+ */
+ shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
+ if (shared_ring) {
+ shared_ring->vm_inv_eng = ring->vm_inv_eng;
+ dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
+ ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
+ continue;
+ }
}
return 0;
}
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ struct dma_fence *fence;
+ struct amdgpu_job *job;
+ int r;
+
+ if (!hub->sdma_invalidation_workaround || vmid ||
+ !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
+ !ring->sched.ready) {
+ /*
+ * A GPU reset should flush all TLBs anyway, so no need to do
+ * this while one is ongoing.
+ */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+ vmhub, 2);
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+ vmhub, 0);
+
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
+ flush_type);
+ up_read(&adev->reset_domain->sem);
+ return;
+ }
+
+ /* The SDMA on Navi 1x has a bug which can theoretically result in memory
+ * corruption if an invalidation happens at the same time as an VA
+ * translation. Avoid this by doing the invalidation from the SDMA
+ * itself at least for GART.
+ */
+ mutex_lock(&adev->mman.gtt_window_lock);
+ r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+ &job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB);
+ if (r)
+ goto error_alloc;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+ job->vm_needs_flush = true;
+ job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ fence = amdgpu_job_submit(job);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ return;
+
+error_alloc:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst)
+{
+ struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+ unsigned int ndw;
+ int r, cnt = 0;
+ uint32_t seq;
+
+ /*
+ * A GPU reset should flush all TLBs anyway, so no need to do
+ * this while one is ongoing.
+ */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return 0;
+
+ if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ 2, all_hub,
+ inst);
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ 0, all_hub,
+ inst);
+
+ adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+ flush_type, all_hub,
+ inst);
+ r = 0;
+ } else {
+ /* 2 dwords flush + 8 dwords fence */
+ ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ ndw += kiq->pmf->invalidate_tlbs_size;
+
+ if (adev->gmc.flush_tlb_needs_extra_type_0)
+ ndw += kiq->pmf->invalidate_tlbs_size;
+
+ spin_lock(&adev->gfx.kiq[inst].ring_lock);
+ r = amdgpu_ring_alloc(ring, ndw);
+ if (r) {
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ goto error_unlock_reset;
+ }
+ if (adev->gmc.flush_tlb_needs_extra_type_2)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+
+ if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+
+ kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+ goto error_unlock_reset;
+ }
+
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+ !amdgpu_reset_pending(adev->reset_domain)) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
+ dev_err(adev->dev, "timeout waiting for kiq fence\n");
+ r = -ETIME;
+ } else
+ r = 0;
+ }
+
+error_unlock_reset:
+ up_read(&adev->reset_domain->sem);
+ return r;
+}
+
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_inst)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
+ struct amdgpu_ring *ring = &kiq->ring;
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+
+ if (adev->mes.ring[0].sched.ready) {
+ amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
+ ref, mask);
+ return;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+ ref, mask);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for IRQ context */
+ if (r < 1 && in_interrupt())
+ goto failed_kiq;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+ !amdgpu_reset_pending(adev->reset_domain)) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq;
+
+ return;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq:
+ dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+}
+
/**
* amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
* @adev: amdgpu_device pointer
@@ -584,7 +860,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
*/
void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
/* RAVEN */
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 1, 0):
@@ -618,6 +894,10 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
/* YELLOW_CARP*/
case IP_VERSION(10, 3, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
@@ -648,16 +928,20 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
{
struct amdgpu_gmc *gmc = &adev->gmc;
- uint32_t gc_ver = adev->ip_versions[GC_HWIP][0];
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
- gc_ver == IP_VERSION(9, 3, 0) ||
gc_ver == IP_VERSION(9, 4, 0) ||
gc_ver == IP_VERSION(9, 4, 1) ||
gc_ver == IP_VERSION(9, 4, 2) ||
gc_ver == IP_VERSION(9, 4, 3) ||
+ gc_ver == IP_VERSION(9, 4, 4) ||
+ gc_ver == IP_VERSION(9, 5, 0) ||
gc_ver >= IP_VERSION(10, 3, 0));
- gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
+ if (!amdgpu_sriov_xnack_support(adev))
+ gmc->noretry = 1;
+ else
+ gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
}
void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
@@ -721,12 +1005,6 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
case CHIP_RENOIR:
adev->mman.keep_stolen_vga_memory = true;
break;
- case CHIP_YELLOW_CARP:
- if (amdgpu_discovery == 0) {
- adev->mman.stolen_reserved_offset = 0x1ffb0000;
- adev->mman.stolen_reserved_size = 64 * PAGE_SIZE;
- }
- break;
default:
adev->mman.keep_stolen_vga_memory = false;
break;
@@ -779,9 +1057,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
*/
u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
- u64 vram_addr = adev->vm_manager.vram_base_offset -
- adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
- u64 vram_end = vram_addr + vram_size;
+ u64 vram_addr, vram_end;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
int idx;
@@ -792,7 +1068,12 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PTE_WRITEABLE;
flags |= AMDGPU_PTE_SNOOPED;
flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
- flags |= AMDGPU_PDE_PTE;
+ flags |= AMDGPU_PDE_PTE_FLAG(adev);
+
+ vram_addr = adev->vm_manager.vram_base_offset;
+ if (!amdgpu_virt_xgmi_migrate_enabled(adev))
+ vram_addr -= adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ vram_end = vram_addr + vram_size;
/* The first n PDE0 entries are used as PTE,
* pointing to vram
@@ -805,7 +1086,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
* pointing to a 4K system page
*/
flags = AMDGPU_PTE_VALID;
- flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
+ flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0);
/* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
drm_dev_exit(idx);
@@ -835,18 +1116,6 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
}
-/**
- * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address
- * from CPU's view
- *
- * @adev: amdgpu_device pointer
- * @bo: amdgpu buffer object
- */
-uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
-{
- return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base;
-}
-
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
{
struct amdgpu_bo *vram_bo = NULL;
@@ -876,21 +1145,101 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
* seconds, so here, we just pick up three parts for emulation.
*/
ret = memcmp(vram_ptr, cptr, 10);
- if (ret)
- return ret;
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
ret = memcmp(vram_ptr + (size / 2), cptr, 10);
- if (ret)
- return ret;
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
ret = memcmp(vram_ptr + size - 10, cptr, 10);
- if (ret)
- return ret;
+ if (ret) {
+ ret = -EIO;
+ goto release_buffer;
+ }
+release_buffer:
amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
&vram_ptr);
- return 0;
+ return ret;
+}
+
+static const char *nps_desc[] = {
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+static ssize_t available_memory_partition_show(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int size = 0, mode;
+ char *sep = "";
+
+ for_each_inst(mode, adev->gmc.supported_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t current_memory_partition_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+ struct amdgpu_hive_info *hive;
+ int i;
+
+ mode = UNKNOWN_MEMORY_PARTITION_MODE;
+ for_each_inst(i, adev->gmc.supported_nps_modes) {
+ if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
+ mode = i;
+ break;
+ }
+ }
+
+ if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
+ return -EINVAL;
+
+ if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
+ dev_info(
+ adev->dev,
+ "requested NPS mode is same as current NPS mode, skipping\n");
+ return count;
+ }
+
+ /* If device is part of hive, all devices in the hive should request the
+ * same mode. Hence store the requested mode in hive.
+ */
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ atomic_set(&hive->requested_nps_mode, mode);
+ amdgpu_put_xgmi_hive(hive);
+ } else {
+ adev->gmc.requested_nps_mode = mode;
+ }
+
+ dev_info(
+ adev->dev,
+ "NPS mode change requested, please remove and reload the driver\n");
+
+ return count;
}
static ssize_t current_memory_partition_show(
@@ -900,39 +1249,431 @@ static ssize_t current_memory_partition_show(
struct amdgpu_device *adev = drm_to_adev(ddev);
enum amdgpu_memory_partition mode;
+ /* Only minimal precaution taken to reject requests while in reset */
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
- switch (mode) {
- case AMDGPU_NPS1_PARTITION_MODE:
- return sysfs_emit(buf, "NPS1\n");
- case AMDGPU_NPS2_PARTITION_MODE:
- return sysfs_emit(buf, "NPS2\n");
- case AMDGPU_NPS3_PARTITION_MODE:
- return sysfs_emit(buf, "NPS3\n");
- case AMDGPU_NPS4_PARTITION_MODE:
- return sysfs_emit(buf, "NPS4\n");
- case AMDGPU_NPS6_PARTITION_MODE:
- return sysfs_emit(buf, "NPS6\n");
- case AMDGPU_NPS8_PARTITION_MODE:
- return sysfs_emit(buf, "NPS8\n");
- default:
+ if ((mode >= ARRAY_SIZE(nps_desc)) ||
+ (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
return sysfs_emit(buf, "UNKNOWN\n");
- }
- return sysfs_emit(buf, "UNKNOWN\n");
+ return sysfs_emit(buf, "%s\n", nps_desc[mode]);
}
-static DEVICE_ATTR_RO(current_memory_partition);
+static DEVICE_ATTR_RW(current_memory_partition);
+static DEVICE_ATTR_RO(available_memory_partition);
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
{
+ bool nps_switch_support;
+ int r = 0;
+
if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
return 0;
+ nps_switch_support = (hweight32(adev->gmc.supported_nps_modes &
+ AMDGPU_ALL_NPS_MASK) > 1);
+ if (!nps_switch_support)
+ dev_attr_current_memory_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+ else
+ r = device_create_file(adev->dev,
+ &dev_attr_available_memory_partition);
+
+ if (r)
+ return r;
+
return device_create_file(adev->dev,
&dev_attr_current_memory_partition);
}
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
{
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return;
+
device_remove_file(adev->dev, &dev_attr_current_memory_partition);
+ device_remove_file(adev->dev, &dev_attr_available_memory_partition);
+}
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges,
+ uint8_t *exp_ranges)
+{
+ struct amdgpu_gmc_memrange *ranges;
+ int range_cnt, ret, i, j;
+ uint32_t nps_type;
+ bool refresh;
+
+ if (!mem_ranges || !exp_ranges)
+ return -EINVAL;
+
+ refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+ (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
+ ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
+ &range_cnt, refresh);
+
+ if (ret)
+ return ret;
+
+ /* TODO: For now, expect ranges and partition count to be the same.
+ * Adjust if there are holes expected in any NPS domain.
+ */
+ if (*exp_ranges && (range_cnt != *exp_ranges)) {
+ dev_warn(
+ adev->dev,
+ "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
+ *exp_ranges, nps_type, range_cnt);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ for (i = 0; i < range_cnt; ++i) {
+ if (ranges[i].base_address >= ranges[i].limit_address) {
+ dev_warn(
+ adev->dev,
+ "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
+ nps_type, i, ranges[i].base_address,
+ ranges[i].limit_address);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* Check for overlaps, not expecting any now */
+ for (j = i - 1; j >= 0; j--) {
+ if (max(ranges[j].base_address,
+ ranges[i].base_address) <=
+ min(ranges[j].limit_address,
+ ranges[i].limit_address)) {
+ dev_warn(
+ adev->dev,
+ "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
+ ranges[j].base_address,
+ ranges[j].limit_address,
+ ranges[i].base_address,
+ ranges[i].limit_address);
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ mem_ranges[i].range.fpfn =
+ (ranges[i].base_address -
+ adev->vm_manager.vram_base_offset) >>
+ AMDGPU_GPU_PAGE_SHIFT;
+ mem_ranges[i].range.lpfn =
+ (ranges[i].limit_address -
+ adev->vm_manager.vram_base_offset) >>
+ AMDGPU_GPU_PAGE_SHIFT;
+ mem_ranges[i].size =
+ ranges[i].limit_address - ranges[i].base_address + 1;
+ }
+
+ if (!*exp_ranges)
+ *exp_ranges = range_cnt;
+err:
+ kfree(ranges);
+
+ return ret;
+}
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode)
+{
+ /* Not supported on VF devices and APUs */
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return -EOPNOTSUPP;
+
+ if (!adev->psp.funcs) {
+ dev_err(adev->dev,
+ "PSP interface not available for nps mode change request");
+ return -EINVAL;
+ }
+
+ return psp_memory_partition(&adev->psp, nps_mode);
+}
+
+static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
+ int req_nps_mode,
+ int cur_nps_mode)
+{
+ return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
+ BIT(req_nps_mode)) &&
+ req_nps_mode != cur_nps_mode);
+}
+
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
+{
+ int req_nps_mode, cur_nps_mode, r;
+ struct amdgpu_hive_info *hive;
+
+ if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
+ !adev->gmc.gmc_funcs->request_mem_partition_mode)
+ return;
+
+ cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ req_nps_mode = atomic_read(&hive->requested_nps_mode);
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
+ cur_nps_mode)) {
+ amdgpu_put_xgmi_hive(hive);
+ return;
+ }
+ r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
+ amdgpu_put_xgmi_hive(hive);
+ goto out;
+ }
+
+ req_nps_mode = adev->gmc.requested_nps_mode;
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
+ return;
+
+ /* even if this fails, we should let driver unload w/o blocking */
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
+out:
+ if (r)
+ dev_err(adev->dev, "NPS mode change request failed\n");
+ else
+ dev_info(
+ adev->dev,
+ "NPS mode change request done, reload driver to complete the change\n");
+}
+
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
+{
+ if (adev->gmc.gmc_funcs->need_reset_on_init)
+ return adev->gmc.gmc_funcs->need_reset_on_init(adev);
+
+ return false;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev)
+{
+ switch (adev->gmc.num_mem_partitions) {
+ case 0:
+ return UNKNOWN_MEMORY_PARTITION_MODE;
+ case 1:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ case 2:
+ return AMDGPU_NPS2_PARTITION_MODE;
+ case 4:
+ return AMDGPU_NPS4_PARTITION_MODE;
+ case 8:
+ return AMDGPU_NPS8_PARTITION_MODE;
+ default:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ }
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+{
+ enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_memory_partition_mode)
+ mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+ supp_modes);
+ else
+ dev_warn(adev->dev, "memory partition mode query is not supported\n");
+
+ return mode;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return amdgpu_gmc_get_vf_memory_partition(adev);
+ else
+ return amdgpu_gmc_get_memory_partition(adev, NULL);
+}
+
+static bool amdgpu_gmc_validate_partition_info(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ u32 supp_modes;
+ bool valid;
+
+ mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware not present in supported modes */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+ !(BIT(mode - 1) & supp_modes))
+ return false;
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 1);
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 2);
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 3 ||
+ adev->gmc.num_mem_partitions == 4);
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 8);
+ break;
+ default:
+ valid = false;
+ }
+
+ return valid;
+}
+
+static bool amdgpu_gmc_is_node_present(int *node_ids, int num_ids, int nid)
+{
+ int i;
+
+ /* Check if node with id 'nid' is present in 'node_ids' array */
+ for (i = 0; i < num_ids; ++i)
+ if (node_ids[i] == nid)
+ return true;
+
+ return false;
+}
+
+static void
+amdgpu_gmc_init_acpi_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ struct amdgpu_numa_info numa_info;
+ int node_ids[AMDGPU_MAX_MEM_RANGES];
+ int num_ranges = 0, ret;
+ int num_xcc, xcc_id;
+ uint32_t xcc_mask;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ xcc_mask = (1U << num_xcc) - 1;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+ if (ret)
+ continue;
+
+ if (numa_info.nid == NUMA_NO_NODE) {
+ mem_ranges[0].size = numa_info.size;
+ mem_ranges[0].numa.node = numa_info.nid;
+ num_ranges = 1;
+ break;
+ }
+
+ if (amdgpu_gmc_is_node_present(node_ids, num_ranges,
+ numa_info.nid))
+ continue;
+
+ node_ids[num_ranges] = numa_info.nid;
+ mem_ranges[num_ranges].numa.node = numa_info.nid;
+ mem_ranges[num_ranges].size = numa_info.size;
+ ++num_ranges;
+ }
+
+ adev->gmc.num_mem_partitions = num_ranges;
+}
+
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ enum amdgpu_memory_partition mode;
+ u32 start_addr = 0, size;
+ int i, r, l;
+
+ mode = amdgpu_gmc_query_memory_partition(adev);
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 0;
+ break;
+ case AMDGPU_NPS1_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 2;
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ if (adev->flags & AMD_IS_APU)
+ adev->gmc.num_mem_partitions = 3;
+ else
+ adev->gmc.num_mem_partitions = 4;
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 8;
+ break;
+ default:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ }
+
+ /* Use NPS range info, if populated */
+ r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
+ &adev->gmc.num_mem_partitions);
+ if (!r) {
+ l = 0;
+ for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
+ if (mem_ranges[i].range.lpfn >
+ mem_ranges[i - 1].range.lpfn)
+ l = i;
+ }
+
+ } else {
+ if (!adev->gmc.num_mem_partitions) {
+ dev_warn(adev->dev,
+ "Not able to detect NPS mode, fall back to NPS1\n");
+ adev->gmc.num_mem_partitions = 1;
+ }
+ /* Fallback to sw based calculation */
+ size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
+ size /= adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ mem_ranges[i].range.fpfn = start_addr;
+ mem_ranges[i].size =
+ ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+ mem_ranges[i].range.lpfn = start_addr + size - 1;
+ start_addr += size;
+ }
+
+ l = adev->gmc.num_mem_partitions - 1;
+ }
+
+ /* Adjust the last one */
+ mem_ranges[l].range.lpfn =
+ (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+ mem_ranges[l].size =
+ adev->gmc.real_vram_size -
+ ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev)
+{
+ bool valid;
+
+ adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES,
+ sizeof(struct amdgpu_mem_partition_info),
+ GFP_KERNEL);
+ if (!adev->gmc.mem_partitions)
+ return -ENOMEM;
+
+ if (adev->gmc.is_app_apu)
+ amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+ else
+ amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+ if (amdgpu_sriov_vf(adev))
+ valid = true;
+ else
+ valid = amdgpu_gmc_validate_partition_info(adev);
+ if (!valid) {
+ /* TODO: handle invalid case */
+ dev_warn(adev->dev,
+ "Mem ranges not matching with hardware config\n");
+ }
+
+ return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index fdc25cd559b6..55097ca10738 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -29,6 +29,7 @@
#include <linux/types.h>
#include "amdgpu_irq.h"
+#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
/* VA hole for 48bit addresses on Vega10 */
@@ -61,6 +62,9 @@
*/
#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL
+/* XNACK flags */
+#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0)
+
struct firmware;
enum amdgpu_memory_partition {
@@ -73,6 +77,15 @@ enum amdgpu_memory_partition {
AMDGPU_NPS8_PARTITION_MODE = 8,
};
+#define AMDGPU_ALL_NPS_MASK \
+ (BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS2_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS3_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS6_PARTITION_MODE) | BIT(AMDGPU_NPS8_PARTITION_MODE))
+
+#define AMDGPU_GMC_INIT_RESET_NPS BIT(0)
+
+#define AMDGPU_MAX_MEM_RANGES 8
+
/*
* GMC page fault information
*/
@@ -117,6 +130,8 @@ struct amdgpu_vmhub {
uint32_t vm_contexts_disable;
+ bool sdma_invalidation_workaround;
+
const struct amdgpu_vmhub_funcs *vmhub_funcs;
};
@@ -128,9 +143,9 @@ struct amdgpu_gmc_funcs {
void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
/* flush the vm tlb via pasid */
- int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type, bool all_hub,
- uint32_t inst);
+ void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -139,47 +154,30 @@ struct amdgpu_gmc_funcs {
unsigned pasid);
/* enable/disable PRT support */
void (*set_prt)(struct amdgpu_device *adev, bool enable);
- /* map mtype to hardware flags */
- uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
/* get the pde for a given mc addr */
void (*get_vm_pde)(struct amdgpu_device *adev, int level,
u64 *dst, u64 *flags);
- /* get the pte flags to use for a BO VA mapping */
+ /* get the pte flags to use for PTEs */
void (*get_vm_pte)(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
- uint64_t *flags);
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *pte_flags);
/* override per-page pte flags */
void (*override_vm_pte_flags)(struct amdgpu_device *dev,
struct amdgpu_vm *vm,
uint64_t addr, uint64_t *flags);
/* get the amount of memory used by the vbios for pre-OS console */
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+ /* get the DCC buffer alignment */
+ unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev);
enum amdgpu_memory_partition (*query_mem_partition_mode)(
struct amdgpu_device *adev);
-};
-
-struct amdgpu_xgmi_ras {
- struct amdgpu_ras_block_object ras_block;
-};
-
-struct amdgpu_xgmi {
- /* from psp */
- u64 node_id;
- u64 hive_id;
- /* fixed per family */
- u64 node_segment_size;
- /* physical node (0-3) */
- unsigned physical_node_id;
- /* number of nodes (0-4) */
- unsigned num_physical_nodes;
- /* gpu list in the same hive */
- struct list_head head;
- bool supported;
- struct ras_common_if *ras_if;
- bool connected_to_cpu;
- bool pending_reset;
- struct amdgpu_xgmi_ras *ras;
+ /* Request NPS mode */
+ int (*request_mem_partition_mode)(struct amdgpu_device *adev,
+ int nps_mode);
+ bool (*need_reset_on_init)(struct amdgpu_device *adev);
};
struct amdgpu_mem_partition_info {
@@ -197,6 +195,19 @@ struct amdgpu_mem_partition_info {
#define INVALID_PFN -1
+struct amdgpu_gmc_memrange {
+ uint64_t base_address;
+ uint64_t limit_address;
+ uint32_t flags;
+ int nid_mask;
+};
+
+enum amdgpu_gart_placement {
+ AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
+ AMDGPU_GART_PLACEMENT_HIGH,
+ AMDGPU_GART_PLACEMENT_LOW,
+};
+
struct amdgpu_gmc {
/* FB's physical address in MMIO space (for CPU to
* map FB). This is different compared to the agp/
@@ -288,10 +299,14 @@ struct amdgpu_gmc {
struct amdgpu_mem_partition_info *mem_partitions;
uint8_t num_mem_partitions;
const struct amdgpu_gmc_funcs *gmc_funcs;
+ enum amdgpu_memory_partition requested_nps_mode;
+ uint32_t supported_nps_modes;
+ uint32_t reset_flags;
struct amdgpu_xgmi xgmi;
struct amdgpu_irq_src ecc_irq;
int noretry;
+ uint32_t xnack_flags;
uint32_t vmid0_page_table_block_size;
uint32_t vmid0_page_table_depth;
@@ -333,21 +348,26 @@ struct amdgpu_gmc {
u64 MC_VM_MX_L1_TLB_CNTL;
u64 noretry_flags;
+
+ bool flush_tlb_needs_extra_type_0;
+ bool flush_tlb_needs_extra_type_2;
+ bool flush_pasid_uses_kiq;
};
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
-#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
- ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
- ((adev), (pasid), (type), (allhub), (inst)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
-#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
+#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \
+ ((adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \
+ (pte_flags)))
#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
(adev)->gmc.gmc_funcs->override_vm_pte_flags \
((adev), (vm), (addr), (pte_flags))
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_get_dcc_alignment(adev) ({ \
+ typeof(adev) _adev = (adev); \
+ _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \
+})
/**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
@@ -377,6 +397,7 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
return addr;
}
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev);
int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev);
void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
uint64_t *addr, uint64_t *flags);
@@ -389,9 +410,12 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
u64 base);
void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
- struct amdgpu_gmc *mc);
+ struct amdgpu_gmc *mc,
+ enum amdgpu_gart_placement gart_placement);
void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc);
bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
@@ -401,6 +425,15 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type);
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub,
+ uint32_t inst);
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask,
+ uint32_t xcc_inst);
extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
@@ -414,9 +447,25 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev);
void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev);
uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
-uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges,
+ uint8_t *exp_ranges);
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode);
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes);
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev);
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev);
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 44367f03316f..0760e70402ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -200,8 +200,6 @@ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
amdgpu_ttm_recover_gart(node->base.bo);
}
spin_unlock(&mgr->lock);
-
- amdgpu_gart_invalidate_tlb(adev);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index b6cf801939aa..6e02fb9ac2f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -22,6 +22,7 @@
*/
#include "amdgpu.h"
#include "amdgpu_ras.h"
+#include <uapi/linux/kfd_ioctl.h>
int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
{
@@ -46,3 +47,22 @@ int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
/* hdp ras follows amdgpu_ras_block_late_init_default for late init */
return 0;
}
+
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ } else {
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 7b8a6152dc8d..4cfd932b7e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -44,4 +44,6 @@ struct amdgpu_hdp {
};
int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 081267161d40..2c6a6b858112 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -129,13 +129,25 @@ static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = {
*/
int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
{
+ int r;
+
if (bo->kfd_bo)
- return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ r = mmu_interval_notifier_insert(&bo->notifier, current->mm,
addr, amdgpu_bo_size(bo),
&amdgpu_hmm_hsa_ops);
- return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
- amdgpu_bo_size(bo),
- &amdgpu_hmm_gfx_ops);
+ else
+ r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_hmm_gfx_ops);
+ if (r)
+ /*
+ * Make sure amdgpu_hmm_unregister() doesn't call
+ * mmu_interval_notifier_remove() when the notifier isn't properly
+ * initialized.
+ */
+ bo->notifier.mm = NULL;
+
+ return r;
}
/**
@@ -155,13 +167,12 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
- void *owner, struct page **pages,
+ void *owner,
struct hmm_range **phmm_range)
{
struct hmm_range *hmm_range;
unsigned long end;
unsigned long timeout;
- unsigned long i;
unsigned long *pfns;
int r = 0;
@@ -190,19 +201,12 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
hmm_range->start, hmm_range->end);
- /* Assuming 128MB takes maximum 1 second to fault page address */
- timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL);
- timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
- timeout = jiffies + msecs_to_jiffies(timeout);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
retry:
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
r = hmm_range_fault(hmm_range);
if (unlikely(r)) {
- /*
- * FIXME: This timeout should encompass the retry from
- * mmu_interval_read_retry() as well.
- */
if (r == -EBUSY && !time_after(jiffies, timeout))
goto retry;
goto out_free_pfns;
@@ -212,20 +216,11 @@ retry:
break;
hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
hmm_range->start = hmm_range->end;
- schedule();
} while (hmm_range->end < end);
hmm_range->start = start;
hmm_range->hmm_pfns = pfns;
- /*
- * Due to default_flags, all pages are HMM_PFN_VALID or
- * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
- * the notifier_lock, and mmu_interval_read_retry() must be done first.
- */
- for (i = 0; pages && i < npages; i++)
- pages[i] = hmm_pfn_to_page(pfns[i]);
-
*phmm_range = hmm_range;
return 0;
@@ -235,6 +230,8 @@ out_free_pfns:
out_free_range:
kfree(hmm_range);
+ if (r == -EBUSY)
+ r = -EAGAIN;
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index e2edcd010ccc..953e1d06de20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -33,7 +33,7 @@
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
- void *owner, struct page **pages,
+ void *owner,
struct hmm_range **phmm_range);
bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 82608df43396..9cb72f0c5277 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -24,7 +24,6 @@
* Alex Deucher
*/
-#include <linux/export.h>
#include <linux/pci.h>
#include <drm/drm_edid.h>
@@ -175,7 +174,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
i2c->rec = *rec;
i2c->adapter.owner = THIS_MODULE;
- i2c->adapter.class = I2C_CLASS_DDC;
i2c->adapter.dev.parent = dev->dev;
i2c->dev = dev;
i2c_set_adapdata(&i2c->adapter, i2c);
@@ -186,7 +184,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
"AMDGPU i2c hw bus %s", name);
i2c->adapter.algo = &amdgpu_atombios_i2c_algo;
- ret = i2c_add_adapter(&i2c->adapter);
+ ret = devm_i2c_add_adapter(dev->dev, &i2c->adapter);
if (ret)
goto out_free;
} else {
@@ -217,22 +215,23 @@ out_free:
}
-void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
-{
- if (!i2c)
- return;
- WARN_ON(i2c->has_aux);
- i2c_del_adapter(&i2c->adapter);
- kfree(i2c);
-}
-
-/* Add the default buses */
void amdgpu_i2c_init(struct amdgpu_device *adev)
{
- if (amdgpu_hw_i2c)
- DRM_INFO("hw_i2c forced on, you may experience display detection problems!\n");
-
- amdgpu_atombios_i2c_init(adev);
+ if (!adev->is_atom_fw) {
+ if (!amdgpu_device_has_dc_support(adev)) {
+ amdgpu_atombios_i2c_init(adev);
+ } else {
+ switch (adev->asic_type) {
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ amdgpu_atombios_oem_i2c_init(adev, 0x97);
+ break;
+ default:
+ break;
+ }
+ }
+ }
}
/* remove all the buses */
@@ -240,28 +239,9 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (adev->i2c_bus[i]) {
- amdgpu_i2c_destroy(adev->i2c_bus[i]);
+ for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++)
+ if (adev->i2c_bus[i])
adev->i2c_bus[i] = NULL;
- }
- }
-}
-
-/* Add additional buses */
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name)
-{
- struct drm_device *dev = adev_to_drm(adev);
- int i;
-
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (!adev->i2c_bus[i]) {
- adev->i2c_bus[i] = amdgpu_i2c_create(dev, rec, name);
- return;
- }
- }
}
/* looks up bus based on id */
@@ -280,7 +260,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev,
return NULL;
}
-static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 *val)
@@ -305,16 +285,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = 0;
- if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) {
- *val = in_buf[0];
- DRM_DEBUG("val = 0x%02x\n", *val);
- } else {
- DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
- addr, *val);
+ if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) {
+ DRM_DEBUG("i2c 0x%02x read failed\n", addr);
+ return -EIO;
}
+
+ *val = in_buf[0];
+ DRM_DEBUG("val = 0x%02x\n", *val);
+
+ return 0;
}
-static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 val)
@@ -330,9 +312,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = val;
- if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1)
- DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
- addr, val);
+ if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) {
+ DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val);
+ return -EIO;
+ }
+
+ return 0;
}
/* ddc router switching */
@@ -347,16 +332,18 @@ amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connecto
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
val |= amdgpu_connector->router.ddc_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
@@ -376,16 +363,18 @@ amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
val |= amdgpu_connector->router.cd_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
index 63c2ff7499e1..1d3d3806e0dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
@@ -30,9 +30,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
void amdgpu_i2c_init(struct amdgpu_device *adev);
void amdgpu_i2c_fini(struct amdgpu_device *adev);
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name);
struct amdgpu_i2c_chan *
amdgpu_i2c_lookup(struct amdgpu_device *adev,
const struct amdgpu_i2c_bus_rec *i2c_bus);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 6aa3b1d845ab..7d9bcb72e8dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
/**
* amdgpu_ib_free - free an IB (Indirect Buffer)
*
- * @adev: amdgpu_device pointer
* @ib: IB object to free
* @f: the fence SA bo need wait on for the ib alloation
*
* Free an IB (all asics).
*/
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f)
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f)
{
- amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
+ amdgpu_sa_bo_free(&ib->sa_bo, f);
}
/**
@@ -130,8 +128,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
+ struct amdgpu_fence *af;
bool need_ctx_switch;
- unsigned int patch_offset = ~0;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
@@ -139,10 +137,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
bool secure, init_shadow;
u64 shadow_va, csa_va, gds_va;
int vmid = AMDGPU_JOB_GET_VMID(job);
-
+ bool need_pipe_sync = false;
+ unsigned int cond_exec;
unsigned int i;
int r = 0;
- bool need_pipe_sync = false;
if (num_ibs == 0)
return -EINVAL;
@@ -156,6 +154,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
csa_va = job->csa_va;
gds_va = job->gds_va;
init_shadow = job->init_shadow;
+ af = &job->hw_fence;
+ /* Save the context of the job for reset handling.
+ * The driver needs this so it can skip the ring
+ * contents for guilty contexts.
+ */
+ af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
} else {
vm = NULL;
fence_ctx = 0;
@@ -163,14 +167,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
csa_va = 0;
gds_va = 0;
init_shadow = false;
+ af = NULL;
}
- if (!ring->sched.ready && !ring->is_mes_queue) {
+ if (!ring->sched.ready) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
return -EINVAL;
}
- if (vm && !job->vmid && !ring->is_mes_queue) {
+ if (vm && !job->vmid) {
dev_err(adev->dev, "VM IB without ID\n");
return -EINVAL;
}
@@ -193,8 +198,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
- (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
- amdgpu_vm_need_pipeline_sync(ring, job))) {
+ need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
+
need_pipe_sync = true;
if (tmp)
@@ -228,7 +233,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
init_shadow, vmid);
if (ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ cond_exec = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
amdgpu_device_flush_hdp(adev, ring);
@@ -278,19 +284,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
}
- if (ring->funcs->emit_gfx_shadow) {
+ if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
-
- if (ring->funcs->init_cond_exec) {
- unsigned int ce_offset = ~0;
-
- ce_offset = amdgpu_ring_init_cond_exec(ring);
- if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, ce_offset);
- }
+ amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
}
- r = amdgpu_fence_emit(ring, f, job, fence_flags);
+ r = amdgpu_fence_emit(ring, f, af, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
@@ -302,19 +301,27 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
- if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, cond_exec);
ring->current_ctx = fence_ctx;
- if (vm && ring->funcs->emit_switch_buffer)
+ if (job && ring->funcs->emit_switch_buffer)
amdgpu_ring_emit_switch_buffer(ring);
if (ring->funcs->emit_wave_limit &&
ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
ring->funcs->emit_wave_limit(ring, false);
+ /* Save the wptr associated with this fence.
+ * This must be last for resets to work properly
+ * as we need to save the wptr associated with this
+ * fence so we know what rings contents to backup
+ * after we reset the queue.
+ */
+ amdgpu_fence_save_wptr(*f);
+
amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index ff1ea99292fb..3ef5bc95642c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -62,9 +62,8 @@ int amdgpu_pasid_alloc(unsigned int bits)
int pasid = -EINVAL;
for (bits = min(bits, 31U); bits > 0; bits--) {
- pasid = ida_simple_get(&amdgpu_pasid_ida,
- 1U << (bits - 1), 1U << bits,
- GFP_KERNEL);
+ pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
+ (1U << bits) - 1, GFP_KERNEL);
if (pasid != -ENOSPC)
break;
}
@@ -82,7 +81,7 @@ int amdgpu_pasid_alloc(unsigned int bits)
void amdgpu_pasid_free(u32 pasid)
{
trace_amdgpu_pasid_freed(pasid);
- ida_simple_remove(&amdgpu_pasid_ida, pasid);
+ ida_free(&amdgpu_pasid_ida, pasid);
}
static void amdgpu_pasid_free_cb(struct dma_fence *fence,
@@ -188,7 +187,6 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
/**
* amdgpu_vmid_grab_idle - grab idle VMID
*
- * @vm: vm to allocate id for
* @ring: ring we want to submit job to
* @idle: resulting idle VMID
* @fence: fence to wait for if no id could be grabbed
@@ -196,8 +194,7 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
* Try to find an idle VMID, if none is idle add a fence to wait to the sync
* object. Returns -ENOMEM when we are out of memory.
*/
-static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
- struct amdgpu_ring *ring,
+static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
struct amdgpu_vmid **idle,
struct dma_fence **fence)
{
@@ -212,7 +209,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
return 0;
}
- fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
+ fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT);
if (!fences)
return -ENOMEM;
@@ -278,22 +275,27 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->vm_hub;
- struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
bool needs_flush = vm->use_cpu_for_update;
uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r;
- *id = id_mgr->reserved;
+ *id = vm->reserved_vmid[vmhub];
if ((*id)->owner != vm->immediate.fence_context ||
!amdgpu_vmid_compatible(*id, job) ||
(*id)->flushed_updates < updates ||
!(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
- !dma_fence_is_signaled((*id)->last_flush))) {
+ !dma_fence_is_signaled((*id)->last_flush)))
+ needs_flush = true;
+
+ if ((*id)->owner != vm->immediate.fence_context ||
+ (!adev->vm_manager.concurrent_flush && needs_flush)) {
struct dma_fence *tmp;
- /* Don't use per engine and per process VMID at the same time */
+ /* Don't use per engine and per process VMID at the
+ * same time
+ */
if (adev->vm_manager.concurrent_flush)
ring = NULL;
@@ -305,13 +307,13 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
*fence = dma_fence_get(tmp);
return 0;
}
- needs_flush = true;
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished);
+ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished,
+ GFP_NOWAIT);
if (r)
return r;
@@ -327,15 +329,13 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
* @ring: ring we want to submit job to
* @job: job who wants to use the VMID
* @id: resulting VMID
- * @fence: fence to wait for if no id could be grabbed
*
* Try to reuse a VMID for this submission.
*/
static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
struct amdgpu_job *job,
- struct amdgpu_vmid **id,
- struct dma_fence **fence)
+ struct amdgpu_vmid **id)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->vm_hub;
@@ -372,7 +372,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
* user of the VMID.
*/
r = amdgpu_sync_fence(&(*id)->active,
- &job->base.s_fence->finished);
+ &job->base.s_fence->finished,
+ GFP_NOWAIT);
if (r)
return r;
@@ -405,16 +406,16 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
int r = 0;
mutex_lock(&id_mgr->lock);
- r = amdgpu_vmid_grab_idle(vm, ring, &idle, fence);
+ r = amdgpu_vmid_grab_idle(ring, &idle, fence);
if (r || !idle)
goto error;
- if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
+ if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
} else {
- r = amdgpu_vmid_grab_used(vm, ring, job, &id, fence);
+ r = amdgpu_vmid_grab_used(vm, ring, job, &id);
if (r)
goto error;
@@ -424,7 +425,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(&id->active,
- &job->base.s_fence->finished);
+ &job->base.s_fence->finished,
+ GFP_NOWAIT);
if (r)
goto error;
@@ -459,40 +461,73 @@ error:
return r;
}
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
+/*
+ * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
+ * @vm: the VM to check
+ * @vmhub: the VMHUB which will be used
+ *
+ * Returns: True if the VM will use a reserved VMID.
+ */
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
+{
+ return vm->reserved_vmid[vmhub];
+}
+
+/*
+ * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm
+ * @adev: amdgpu device structure
+ * @vm: the VM to reserve an ID for
+ * @vmhub: the VMHUB which should be used
+ *
+ * Mostly used to have a reserved VMID for debugging and SPM.
+ *
+ * Returns: 0 for success, -ENOENT if an ID is already reserved.
+ */
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *id;
+ int r = 0;
mutex_lock(&id_mgr->lock);
-
- ++id_mgr->reserved_use_count;
- if (!id_mgr->reserved) {
- struct amdgpu_vmid *id;
-
- id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid,
- list);
- /* Remove from normal round robin handling */
- list_del_init(&id->list);
- id_mgr->reserved = id;
+ if (vm->reserved_vmid[vmhub])
+ goto unlock;
+ if (id_mgr->reserved_vmid) {
+ r = -ENOENT;
+ goto unlock;
}
-
+ /* Remove from normal round robin handling */
+ id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
+ list_del_init(&id->list);
+ vm->reserved_vmid[vmhub] = id;
+ id_mgr->reserved_vmid = true;
mutex_unlock(&id_mgr->lock);
+
return 0;
+unlock:
+ mutex_unlock(&id_mgr->lock);
+ return r;
}
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
+/*
+ * amdgpu_vmid_free_reserved - free up a reserved VMID again
+ * @adev: amdgpu device structure
+ * @vm: the VM with the reserved ID
+ * @vmhub: the VMHUB which should be used
+ */
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned vmhub)
{
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
- if (!--id_mgr->reserved_use_count) {
- /* give the reserved ID back to normal round robin */
- list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
- id_mgr->reserved = NULL;
+ if (vm->reserved_vmid[vmhub]) {
+ list_add(&vm->reserved_vmid[vmhub]->list,
+ &id_mgr->ids_lru);
+ vm->reserved_vmid[vmhub] = NULL;
+ id_mgr->reserved_vmid = false;
}
-
mutex_unlock(&id_mgr->lock);
}
@@ -559,10 +594,17 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
mutex_init(&id_mgr->lock);
INIT_LIST_HEAD(&id_mgr->ids_lru);
- id_mgr->reserved_use_count = 0;
- /* manage only VMIDs not used by KFD */
- id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ else if (AMDGPU_IS_MMHUB0(i) ||
+ AMDGPU_IS_MMHUB1(i))
+ id_mgr->num_ids = 16;
+ else
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
@@ -571,10 +613,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
- /* alloc a default reserved vmid to enforce isolation */
- if (enforce_isolation)
- amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
-
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index fa8c42c83d5d..b3649cd3af56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr {
unsigned num_ids;
struct list_head ids_lru;
struct amdgpu_vmid ids[AMDGPU_NUM_VMID];
- struct amdgpu_vmid *reserved;
- unsigned int reserved_use_count;
+ bool reserved_vmid;
};
int amdgpu_pasid_alloc(unsigned int bits);
@@ -78,10 +77,11 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
-int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
- unsigned vmhub);
-void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
- unsigned vmhub);
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned vmhub);
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned vmhub);
int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_job *job, struct dma_fence **fence);
void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index f3b0aaf3ebc6..a6419246e9c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_ih.h"
+#include "amdgpu_reset.h"
/**
* amdgpu_ih_ring_init - initialize the IH state
@@ -217,7 +218,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
restart_ih:
count = AMDGPU_IH_MAX_NUM_IVS;
- DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
+ dev_dbg(adev->dev, "%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
/* Order reading of wptr vs. reading of IH ring data */
rmb();
@@ -227,13 +228,23 @@ restart_ih:
ih->rptr &= ih->ptr_mask;
}
- amdgpu_ih_set_rptr(adev, ih);
+ if (!ih->overflow)
+ amdgpu_ih_set_rptr(adev, ih);
+
wake_up_all(&ih->wait_process);
/* make sure wptr hasn't changed while processing */
wptr = amdgpu_ih_get_wptr(adev, ih);
if (wptr != ih->rptr)
- goto restart_ih;
+ if (!ih->overflow)
+ goto restart_ih;
+
+ if (ih->overflow)
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
return IRQ_HANDLED;
}
@@ -298,3 +309,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
return dw1 | ((u64)(dw2 & 0xffff) << 32);
}
+
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+ return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
+ ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 6c6184f0dbc1..f58b6be7fccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -28,7 +28,7 @@
#define AMDGPU_IH_MAX_NUM_IVS 32
#define IH_RING_SIZE (256 * 1024)
-#define IH_SW_RING_SIZE (8 * 1024) /* enough for 256 CAM entries */
+#define IH_SW_RING_SIZE (16 * 1024) /* enough for 512 CAM entries */
struct amdgpu_device;
struct amdgpu_iv_entry;
@@ -56,14 +56,14 @@ struct amdgpu_ih_ring {
bool use_bus_addr;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
uint64_t gpu_addr;
uint64_t wptr_addr;
- volatile uint32_t *wptr_cpu;
+ uint32_t *wptr_cpu;
uint64_t rptr_addr;
- volatile uint32_t *rptr_cpu;
+ uint32_t *rptr_cpu;
bool enabled;
unsigned rptr;
@@ -72,12 +72,16 @@ struct amdgpu_ih_ring {
/* For waiting on IH processing at checkpoint. */
wait_queue_head_t wait_process;
uint64_t processed_timestamp;
+ bool overflow;
};
/* return true if time stamp t2 is after t1 with 48bit wrap around */
#define amdgpu_ih_ts_after(t1, t2) \
(((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
+#define amdgpu_ih_ts_after_or_equal(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) >= 0LL)
+
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
@@ -110,4 +114,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
signed int offset);
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
new file mode 100644
index 000000000000..99e1cf4fc955
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ip.h"
+
+static int8_t amdgpu_logical_to_dev_inst(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ int8_t inst)
+{
+ int8_t dev_inst;
+
+ switch (block) {
+ case GC_HWIP:
+ case SDMA0_HWIP:
+ /* Both JPEG and VCN as JPEG is only alias of VCN */
+ case VCN_HWIP:
+ dev_inst = adev->ip_map.dev_inst[block][inst];
+ break;
+ default:
+ /* For rest of the IPs, no look up required.
+ * Assume 'logical instance == physical instance' for all configs. */
+ dev_inst = inst;
+ break;
+ }
+
+ return dev_inst;
+}
+
+static uint32_t amdgpu_logical_to_dev_mask(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type block,
+ uint32_t mask)
+{
+ uint32_t dev_mask = 0;
+ int8_t log_inst, dev_inst;
+
+ while (mask) {
+ log_inst = ffs(mask) - 1;
+ dev_inst = amdgpu_logical_to_dev_inst(adev, block, log_inst);
+ dev_mask |= (1 << dev_inst);
+ mask &= ~(1 << log_inst);
+ }
+
+ return dev_mask;
+}
+
+static void amdgpu_populate_ip_map(struct amdgpu_device *adev,
+ enum amd_hw_ip_block_type ip_block,
+ uint32_t inst_mask)
+{
+ int l = 0, i;
+
+ while (inst_mask) {
+ i = ffs(inst_mask) - 1;
+ adev->ip_map.dev_inst[ip_block][l++] = i;
+ inst_mask &= ~(1 << i);
+ }
+ for (; l < HWIP_MAX_INSTANCE; l++)
+ adev->ip_map.dev_inst[ip_block][l] = -1;
+}
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev)
+{
+ u32 ip_map[][2] = {
+ { GC_HWIP, adev->gfx.xcc_mask },
+ { SDMA0_HWIP, adev->sdma.sdma_mask },
+ { VCN_HWIP, adev->vcn.inst_mask },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
+ amdgpu_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
+
+ adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst;
+ adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
new file mode 100644
index 000000000000..2490fd322aec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IP_H__
+#define __AMDGPU_IP_H__
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_IP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index fa6d0adcec20..8112ffc85995 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -142,8 +142,9 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
r = src->funcs->set(adev, src, k,
AMDGPU_IRQ_STATE_DISABLE);
if (r)
- DRM_ERROR("error disabling interrupt (%d)\n",
- r);
+ dev_err(adev->dev,
+ "error disabling interrupt (%d)\n",
+ r);
}
}
}
@@ -242,7 +243,7 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
return true;
}
-static void amdgpu_restore_msix(struct amdgpu_device *adev)
+void amdgpu_restore_msix(struct amdgpu_device *adev)
{
u16 ctrl;
@@ -270,29 +271,29 @@ static void amdgpu_restore_msix(struct amdgpu_device *adev)
*/
int amdgpu_irq_init(struct amdgpu_device *adev)
{
- int r = 0;
- unsigned int irq;
+ unsigned int irq, flags;
+ int r;
spin_lock_init(&adev->irq.lock);
/* Enable MSI if not disabled by module parameter */
adev->irq.msi_enabled = false;
+ if (!amdgpu_msi_ok(adev))
+ flags = PCI_IRQ_INTX;
+ else
+ flags = PCI_IRQ_ALL_TYPES;
+
+ /* we only need one vector */
+ r = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+ if (r < 0) {
+ dev_err(adev->dev, "Failed to alloc msi vectors\n");
+ return r;
+ }
+
if (amdgpu_msi_ok(adev)) {
- int nvec = pci_msix_vec_count(adev->pdev);
- unsigned int flags;
-
- if (nvec <= 0)
- flags = PCI_IRQ_MSI;
- else
- flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
-
- /* we only need one vector */
- nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
- if (nvec > 0) {
- adev->irq.msi_enabled = true;
- dev_dbg(adev->dev, "using MSI/MSI-X.\n");
- }
+ adev->irq.msi_enabled = true;
+ dev_dbg(adev->dev, "using MSI/MSI-X.\n");
}
INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
@@ -302,22 +303,29 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
/* Use vector 0 for MSI-X. */
r = pci_irq_vector(adev->pdev, 0);
if (r < 0)
- return r;
+ goto free_vectors;
irq = r;
/* PCI devices require shared interrupts. */
r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name,
adev_to_drm(adev));
if (r)
- return r;
+ goto free_vectors;
+
adev->irq.installed = true;
adev->irq.irq = irq;
adev_to_drm(adev)->max_vblank_count = 0x00ffffff;
- DRM_DEBUG("amdgpu: irq initialized.\n");
+ dev_dbg(adev->dev, "amdgpu: irq initialized.\n");
return 0;
-}
+free_vectors:
+ if (adev->irq.msi_enabled)
+ pci_free_irq_vectors(adev->pdev);
+
+ adev->irq.msi_enabled = false;
+ return r;
+}
void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
{
@@ -438,6 +446,14 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
entry.ih = ih;
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+
+ /*
+ * timestamp is not supported on some legacy SOCs (cik, cz, iceland,
+ * si and tonga), so initialize timestamp and timestamp_src to 0
+ */
+ entry.timestamp = 0;
+ entry.timestamp_src = 0;
+
amdgpu_ih_decode_iv(adev, &entry);
trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
@@ -446,28 +462,32 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
src_id = entry.src_id;
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
- DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
+ dev_dbg(adev->dev, "Invalid client_id in IV: %d\n", client_id);
} else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
- DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
+ dev_dbg(adev->dev, "Invalid src_id in IV: %d\n", src_id);
- } else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) &&
+ } else if (((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) ||
+ (client_id == SOC15_IH_CLIENTID_ISP)) &&
adev->irq.virq[src_id]) {
generic_handle_domain_irq(adev->irq.domain, src_id);
} else if (!adev->irq.client[client_id].sources) {
- DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
- client_id, src_id);
+ dev_dbg(adev->dev,
+ "Unregistered interrupt client_id: %d src_id: %d\n",
+ client_id, src_id);
} else if ((src = adev->irq.client[client_id].sources[src_id])) {
r = src->funcs->process(adev, src, &entry);
if (r < 0)
- DRM_ERROR("error processing interrupt (%d)\n", r);
+ dev_err(adev->dev, "error processing interrupt (%d)\n",
+ r);
else if (r)
handled = true;
} else {
- DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n",
+ dev_dbg(adev->dev,
+ "Unregistered interrupt src_id: %d of client_id:%d\n",
src_id, client_id);
}
@@ -603,6 +623,10 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned int type)
{
+ /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */
+ if (amdgpu_ras_is_rma(adev) && !amdgpu_irq_enabled(adev, src, type))
+ return -EINVAL;
+
if (!adev->irq.installed)
return -ENOENT;
@@ -709,10 +733,10 @@ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
*/
int amdgpu_irq_add_domain(struct amdgpu_device *adev)
{
- adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
- &amdgpu_hw_irqdomain_ops, adev);
+ adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
+ &amdgpu_hw_irqdomain_ops, adev);
if (!adev->irq.domain) {
- DRM_ERROR("GPU irq add domain failed\n");
+ dev_err(adev->dev, "GPU irq add domain failed\n");
return -ENODEV;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 04c0b4fa17a4..9f0417456abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -146,5 +146,6 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
int amdgpu_irq_add_domain(struct amdgpu_device *adev);
void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id);
+void amdgpu_restore_msix(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
new file mode 100644
index 000000000000..9cddbf50442a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/mfd/core.h>
+
+#include "amdgpu.h"
+#include "amdgpu_isp.h"
+#include "isp_v4_1_0.h"
+#include "isp_v4_1_1.h"
+
+#define ISP_MC_ADDR_ALIGN (1024 * 32)
+
+/**
+ * isp_hw_init - start and test isp block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ if (isp->funcs->hw_init != NULL)
+ return isp->funcs->hw_init(isp);
+
+ return -ENODEV;
+}
+
+/**
+ * isp_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_isp *isp = &ip_block->adev->isp;
+
+ if (isp->funcs->hw_fini != NULL)
+ return isp->funcs->hw_fini(isp);
+
+ return -ENODEV;
+}
+
+static int isp_load_fw_by_psp(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *hdr;
+ char ucode_prefix[10];
+ int r = 0;
+
+ /* get isp fw binary name and path */
+ amdgpu_ucode_ip_version_decode(adev, ISP_HWIP, ucode_prefix,
+ sizeof(ucode_prefix));
+
+ /* read isp fw */
+ r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s.bin", ucode_prefix);
+ if (r) {
+ amdgpu_ucode_release(&adev->isp.fw);
+ return r;
+ }
+
+ hdr = (const struct common_firmware_header *)adev->isp.fw->data;
+
+ adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].ucode_id =
+ AMDGPU_UCODE_ID_ISP;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].fw = adev->isp.fw;
+
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+ return r;
+}
+
+static int isp_early_init(struct amdgpu_ip_block *ip_block)
+{
+
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_isp *isp = &adev->isp;
+
+ switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ isp_v4_1_0_set_isp_funcs(isp);
+ break;
+ case IP_VERSION(4, 1, 1):
+ isp_v4_1_1_set_isp_funcs(isp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ isp->adev = adev;
+ isp->parent = adev->dev;
+
+ if (isp_load_fw_by_psp(adev)) {
+ DRM_DEBUG_DRIVER("%s: isp fw load failed\n", __func__);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static bool isp_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static int is_valid_isp_device(struct device *isp_parent, struct device *amdgpu_dev)
+{
+ if (isp_parent != amdgpu_dev)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * isp_user_buffer_alloc - create user buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @dmabuf: DMABUF handle for isp buffer allocated in system memory
+ * @buf_obj: GPU buffer object handle to initialize
+ * @buf_addr: GPU addr of the pinned BO to initialize
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate GPU addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_user_buffer_alloc(struct device *dev, void *dmabuf,
+ void **buf_obj, u64 *buf_addr)
+{
+ struct platform_device *ispdev = to_platform_device(dev);
+ const struct isp_platform_data *isp_pdata;
+ struct amdgpu_device *adev;
+ struct mfd_cell *mfd_cell;
+ struct amdgpu_bo *bo;
+ u64 gpu_addr;
+ int ret;
+
+ if (WARN_ON(!ispdev))
+ return -ENODEV;
+
+ if (WARN_ON(!buf_obj))
+ return -EINVAL;
+
+ if (WARN_ON(!buf_addr))
+ return -EINVAL;
+
+ mfd_cell = &ispdev->mfd_cell[0];
+ if (!mfd_cell)
+ return -ENODEV;
+
+ isp_pdata = mfd_cell->platform_data;
+ adev = isp_pdata->adev;
+
+ ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_bo_create_isp_user(adev, dmabuf,
+ AMDGPU_GEM_DOMAIN_GTT, &bo, &gpu_addr);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to alloc gart user buffer (%d)", ret);
+ return ret;
+ }
+
+ *buf_obj = (void *)bo;
+ *buf_addr = gpu_addr;
+
+ return 0;
+}
+EXPORT_SYMBOL(isp_user_buffer_alloc);
+
+/**
+ * isp_user_buffer_free - free isp user buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void isp_user_buffer_free(void *buf_obj)
+{
+ amdgpu_bo_free_isp_user(buf_obj);
+}
+EXPORT_SYMBOL(isp_user_buffer_free);
+
+/**
+ * isp_kernel_buffer_alloc - create kernel buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @size: size for the new BO
+ * @buf_obj: GPU BO handle to initialize
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: CPU address mapping of BO
+ *
+ * Allocates and pins a kernel BO for internal isp firmware use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_kernel_buffer_alloc(struct device *dev, u64 size,
+ void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+ struct platform_device *ispdev = to_platform_device(dev);
+ struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+ const struct isp_platform_data *isp_pdata;
+ struct amdgpu_device *adev;
+ struct mfd_cell *mfd_cell;
+ int ret;
+
+ if (WARN_ON(!ispdev))
+ return -ENODEV;
+
+ if (WARN_ON(!buf_obj))
+ return -EINVAL;
+
+ if (WARN_ON(!gpu_addr))
+ return -EINVAL;
+
+ if (WARN_ON(!cpu_addr))
+ return -EINVAL;
+
+ mfd_cell = &ispdev->mfd_cell[0];
+ if (!mfd_cell)
+ return -ENODEV;
+
+ isp_pdata = mfd_cell->platform_data;
+ adev = isp_pdata->adev;
+
+ ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_bo_create_kernel(adev,
+ size,
+ ISP_MC_ADDR_ALIGN,
+ AMDGPU_GEM_DOMAIN_GTT,
+ bo,
+ gpu_addr,
+ cpu_addr);
+ if (!cpu_addr || ret) {
+ drm_err(&adev->ddev, "failed to alloc gart kernel buffer (%d)", ret);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(isp_kernel_buffer_alloc);
+
+/**
+ * isp_kernel_buffer_free - free isp kernel buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ * @gpu_addr: GPU addr of isp kernel BO
+ * @cpu_addr: CPU addr of isp kernel BO
+ *
+ * unmaps and unpin a isp kernel BO.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
+ */
+void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+ struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+
+ amdgpu_bo_free_kernel(bo, gpu_addr, cpu_addr);
+}
+EXPORT_SYMBOL(isp_kernel_buffer_free);
+
+static const struct amd_ip_funcs isp_ip_funcs = {
+ .name = "isp_ip",
+ .early_init = isp_early_init,
+ .hw_init = isp_hw_init,
+ .hw_fini = isp_hw_fini,
+ .is_idle = isp_is_idle,
+ .set_clockgating_state = isp_set_clockgating_state,
+ .set_powergating_state = isp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_ISP,
+ .major = 4,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &isp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_ISP,
+ .major = 4,
+ .minor = 1,
+ .rev = 1,
+ .funcs = &isp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
new file mode 100644
index 000000000000..d6f4ffa4c97c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __AMDGPU_ISP_H__
+#define __AMDGPU_ISP_H__
+
+#include <drm/amd/isp.h>
+#include <linux/pm_domain.h>
+
+#define ISP_REGS_OFFSET_END 0x629A4
+
+struct amdgpu_isp;
+
+struct isp_funcs {
+ int (*hw_init)(struct amdgpu_isp *isp);
+ int (*hw_fini)(struct amdgpu_isp *isp);
+};
+
+struct amdgpu_isp {
+ struct device *parent;
+ struct amdgpu_device *adev;
+ const struct isp_funcs *funcs;
+ struct mfd_cell *isp_cell;
+ struct resource *isp_res;
+ struct resource *isp_i2c_res;
+ struct resource *isp_gpio_res;
+ struct isp_platform_data *isp_pdata;
+ unsigned int harvest_config;
+ const struct firmware *fw;
+ struct generic_pm_domain ispgpd;
+};
+
+extern const struct amdgpu_ip_block_version isp_v4_1_0_ip_block;
+extern const struct amdgpu_ip_block_version isp_v4_1_1_ip_block;
+
+#endif /* __AMDGPU_ISP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 78476bc75b4e..d020a890a0ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -30,40 +30,124 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_reset.h"
+#include "amdgpu_dev_coredump.h"
+#include "amdgpu_xgmi.h"
+
+static void amdgpu_job_do_core_dump(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ int i;
+
+ dev_info(adev->dev, "Dumping IP State\n");
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->funcs->dump_ip_state)
+ adev->ip_blocks[i].version->funcs
+ ->dump_ip_state((void *)&adev->ip_blocks[i]);
+ dev_info(adev->dev, "Dumping IP State Completed\n");
+
+ amdgpu_coredump(adev, true, false, job);
+}
+
+static void amdgpu_job_core_dump(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ struct list_head device_list, *device_list_handle = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
+ struct amdgpu_hive_info *hive = NULL;
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+ /*
+ * Reuse the logic in amdgpu_device_gpu_recover() to build list of
+ * devices for code dump
+ */
+ INIT_LIST_HEAD(&device_list);
+ if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ if (!list_is_first(&adev->reset_list, &device_list))
+ list_rotate_to_front(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ } else {
+ list_add_tail(&adev->reset_list, &device_list);
+ device_list_handle = &device_list;
+ }
+
+ /* Do the coredump for each device */
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list)
+ amdgpu_job_do_core_dump(tmp_adev, job);
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+ }
+}
static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
struct amdgpu_job *job = to_amdgpu_job(s_job);
- struct amdgpu_task_info ti;
+ struct drm_wedge_task_info *info = NULL;
+ struct amdgpu_task_info *ti = NULL;
struct amdgpu_device *adev = ring->adev;
- int idx;
- int r;
+ int idx, r;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
- DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s",
+ dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
__func__, s_job->sched->name);
/* Effectively the job is aborted as the device is gone */
return DRM_GPU_SCHED_STAT_ENODEV;
}
- memset(&ti, 0, sizeof(struct amdgpu_task_info));
- adev->job_hang = true;
+ /*
+ * Do the coredump immediately after a job timeout to get a very
+ * close dump/snapshot/representation of GPU's current error status
+ * Skip it for SRIOV, since VF FLR will be triggered by host driver
+ * before job timeout
+ */
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_job_core_dump(adev, job);
if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
- DRM_ERROR("ring %s timeout, but soft recovered\n",
- s_job->sched->name);
+ dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
+ s_job->sched->name);
goto exit;
}
- amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
- DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
- job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
- ring->fence_drv.sync_seq);
- DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
- ti.process_name, ti.tgid, ti.task_name, ti.pid);
+ dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+ job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+ ring->fence_drv.sync_seq);
+
+ ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+ if (ti) {
+ amdgpu_vm_print_task_info(adev, ti);
+ info = &ti->task;
+ }
+
+ /* attempt a per ring reset */
+ if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+ dev_err(adev->dev, "Ring reset disabled by debug mask\n");
+ } else if (amdgpu_gpu_recovery &&
+ amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+ ring->funcs->reset) {
+ dev_err(adev->dev, "Starting %s ring reset\n",
+ s_job->sched->name);
+ r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
+ if (!r) {
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ dev_err(adev->dev, "Ring %s reset succeeded\n",
+ ring->sched.name);
+ drm_dev_wedged_event(adev_to_drm(adev),
+ DRM_WEDGE_RECOVERY_NONE, info);
+ goto exit;
+ }
+ dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
+ }
dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
@@ -73,11 +157,18 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_JOB;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /*
+ * To avoid an unnecessary extra coredump, as we have already
+ * got the very close representation of GPU's error status
+ */
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
if (r)
- DRM_ERROR("GPU Recovery Failed: %d\n", r);
+ dev_err(adev->dev, "GPU Recovery Failed: %d\n", r);
} else {
drm_sched_suspend_timeout(&ring->sched);
if (amdgpu_sriov_vf(adev))
@@ -85,14 +176,15 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
exit:
- adev->job_hang = false;
+ amdgpu_vm_put_task_info(ti);
drm_dev_exit(idx);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
}
int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_sched_entity *entity, void *owner,
- unsigned int num_ibs, struct amdgpu_job **job)
+ unsigned int num_ibs, struct amdgpu_job **job,
+ u64 drm_client_id)
{
if (num_ibs == 0)
return -EINVAL;
@@ -101,11 +193,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!*job)
return -ENOMEM;
- /*
- * Initialize the scheduler to at least some ring so that we always
- * have a pointer to adev.
- */
- (*job)->base.sched = &adev->rings[0]->sched;
(*job)->vm = vm;
amdgpu_sync_create(&(*job)->explicit_sync);
@@ -115,17 +202,19 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!entity)
return 0;
- return drm_sched_job_init(&(*job)->base, entity, owner);
+ return drm_sched_job_init(&(*job)->base, entity, 1, owner,
+ drm_client_id);
}
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
struct drm_sched_entity *entity, void *owner,
size_t size, enum amdgpu_ib_pool_type pool_type,
- struct amdgpu_job **job)
+ struct amdgpu_job **job, u64 k_job_id)
{
int r;
- r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job);
+ r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job,
+ k_job_id);
if (r)
return r;
@@ -159,20 +248,19 @@ void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
unsigned i;
/* Check if any fences where initialized */
if (job->base.s_fence && job->base.s_fence->finished.ops)
f = &job->base.s_fence->finished;
- else if (job->hw_fence.ops)
- f = &job->hw_fence;
+ else if (job->hw_fence.base.ops)
+ f = &job->hw_fence.base;
else
f = NULL;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(ring->adev, &job->ibs[i], f);
+ amdgpu_ib_free(&job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
@@ -184,10 +272,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->explicit_sync);
/* only put the hw fence if has embedded fence */
- if (!job->hw_fence.ops)
+ if (!job->hw_fence.base.ops)
kfree(job);
else
- dma_fence_put(&job->hw_fence);
+ dma_fence_put(&job->hw_fence.base);
}
void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -216,10 +304,10 @@ void amdgpu_job_free(struct amdgpu_job *job)
if (job->gang_submit != &job->base.s_fence->scheduled)
dma_fence_put(job->gang_submit);
- if (!job->hw_fence.ops)
+ if (!job->hw_fence.base.ops)
kfree(job);
else
- dma_fence_put(&job->hw_fence);
+ dma_fence_put(&job->hw_fence.base);
}
struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
@@ -255,26 +343,33 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
int r;
- /* Ignore soft recovered fences here */
r = drm_sched_entity_error(s_entity);
- if (r && r != -ENODATA)
+ if (r)
goto error;
- if (!fence && job->gang_submit)
+ if (job->gang_submit) {
fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+ if (fence)
+ return fence;
+ }
+
+ fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
+ if (fence)
+ return fence;
- while (!fence && job->vm && !job->vmid) {
+ if (job->vm && !job->vmid) {
r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
if (r) {
- DRM_ERROR("Error getting VM ID (%d)\n", r);
+ dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
goto error;
}
+ return fence;
}
- return fence;
+ return NULL;
error:
dma_fence_set_error(&job->base.s_fence->finished, r);
@@ -300,12 +395,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
- DRM_INFO("Skip scheduling IBs!\n");
+ dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+ ring->name);
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev,
+ "Error scheduling IBs (%d) in ring(%s)", r,
+ ring->name);
}
job->job_run_counter++;
@@ -315,8 +413,24 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
return fence;
}
-#define to_drm_sched_job(sched_job) \
- container_of((sched_job), struct drm_sched_job, queue_node)
+/*
+ * This is a duplicate function from DRM scheduler sched_internal.h.
+ * Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due
+ * latter being incorrect and racy.
+ *
+ * See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/
+ */
+static struct drm_sched_job *
+drm_sched_entity_queue_pop(struct drm_sched_entity *entity)
+{
+ struct spsc_node *node;
+
+ node = spsc_queue_pop(&entity->job_queue);
+ if (!node)
+ return NULL;
+
+ return container_of(node, struct drm_sched_job, queue_node);
+}
void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
{
@@ -325,11 +439,11 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
int i;
/* Signal all jobs not yet scheduled */
- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
- struct drm_sched_rq *rq = &sched->sched_rq[i];
+ for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
+ struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
- while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+ while ((s_job = drm_sched_entity_queue_pop(s_entity))) {
struct drm_sched_fence *s_fence = s_job->s_fence;
dma_fence_signal(&s_fence->scheduled);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index a963a25ddd62..4a6487eb6cb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -44,11 +44,27 @@
struct amdgpu_fence;
enum amdgpu_ib_pool_type;
+/* Internal kernel job ids. (decreasing values, starting from U64_MAX). */
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE (18446744073709551615ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES (18446744073709551614ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE (18446744073709551613ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR (18446744073709551612ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER (18446744073709551611ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA (18446744073709551610ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER (18446744073709551609ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE (18446744073709551608ULL)
+#define AMDGPU_KERNEL_JOB_ID_MOVE_BLIT (18446744073709551607ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER (18446744073709551606ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER (18446744073709551605ULL)
+#define AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB (18446744073709551604ULL)
+#define AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP (18446744073709551603ULL)
+#define AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST (18446744073709551602ULL)
+
struct amdgpu_job {
struct drm_sched_job base;
struct amdgpu_vm *vm;
struct amdgpu_sync explicit_sync;
- struct dma_fence hw_fence;
+ struct amdgpu_fence hw_fence;
struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
@@ -76,6 +92,10 @@ struct amdgpu_job {
/* job_run_counter >= 1 means a resubmit job */
uint32_t job_run_counter;
+ /* enforce isolation */
+ bool enforce_isolation;
+ bool run_cleaner_shader;
+
uint32_t num_ibs;
struct amdgpu_ib ibs[];
};
@@ -87,11 +107,13 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)
int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_sched_entity *entity, void *owner,
- unsigned int num_ibs, struct amdgpu_job **job);
+ unsigned int num_ibs, struct amdgpu_job **job,
+ u64 drm_client_id);
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
struct drm_sched_entity *entity, void *owner,
size_t size, enum amdgpu_ib_pool_type pool_type,
- struct amdgpu_job **job);
+ struct amdgpu_job **job,
+ u64 k_job_id);
void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
struct amdgpu_bo *gws, struct amdgpu_bo *oa);
void amdgpu_job_free_resources(struct amdgpu_job *job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 2ff2897fd1db..6b7d66b6d4cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -33,13 +33,39 @@
#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000)
static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev);
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
{
+ int i, r;
+
INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
mutex_init(&adev->jpeg.jpeg_pg_lock);
atomic_set(&adev->jpeg.total_submission_cnt, 0);
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG))
+ adev->jpeg.indirect_sram = true;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1U << i))
+ continue;
+
+ if (adev->jpeg.indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->jpeg.inst[i].dpg_sram_bo,
+ &adev->jpeg.inst[i].dpg_sram_gpu_addr,
+ &adev->jpeg.inst[i].dpg_sram_cpu_addr);
+ if (r) {
+ dev_err(adev->dev,
+ "JPEG %d (%d) failed to allocate DPG bo\n", i, r);
+ return r;
+ }
+ }
+ }
+
return 0;
}
@@ -48,13 +74,21 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
+ amdgpu_bo_free_kernel(
+ &adev->jpeg.inst[i].dpg_sram_bo,
+ &adev->jpeg.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr);
+
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
}
+ if (adev->jpeg.reg_list)
+ amdgpu_jpeg_reg_dump_fini(adev);
+
mutex_destroy(&adev->jpeg.jpeg_pg_lock);
return 0;
@@ -80,17 +114,19 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
unsigned int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
}
- if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt))
+ if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) {
+ mutex_lock(&adev->jpeg.jpeg_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
AMD_PG_STATE_GATE);
- else
+ mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+ } else
schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
}
@@ -160,7 +196,8 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -210,12 +247,15 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
} else {
r = 0;
}
+
if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
+ if (amdgpu_emu_mode == 1)
+ udelay(10);
}
if (i >= adev->usec_timeout)
@@ -296,3 +336,272 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
return 0;
}
+
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_JPEG_RAM,
+ .mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+/*
+ * debugfs for to enable/disable jpeg job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << (adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings)) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (val & (BIT_ULL(1) << ((i * adev->jpeg.num_jpeg_rings) + j)))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_jpeg_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->sched.ready)
+ mask |= 1ULL << ((i * adev->jpeg.num_jpeg_rings) + j);
+ }
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_jpeg_sched_mask_fops,
+ amdgpu_debugfs_jpeg_sched_mask_get,
+ amdgpu_debugfs_jpeg_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->jpeg.num_jpeg_inst > 1) && !(adev->jpeg.num_jpeg_rings > 1))
+ return;
+ sprintf(name, "amdgpu_jpeg_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_jpeg_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_jpeg_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->jpeg.supported_reset);
+}
+
+static DEVICE_ATTR(jpeg_reset_mask, 0444,
+ amdgpu_get_jpeg_reset_mask, NULL);
+
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->jpeg.num_jpeg_inst) {
+ r = device_create_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->jpeg.num_jpeg_inst)
+ device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ }
+}
+
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+ adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count,
+ sizeof(uint32_t), GFP_KERNEL);
+ if (!adev->jpeg.ip_dump) {
+ dev_err(adev->dev,
+ "Failed to allocate memory for JPEG IP Dump\n");
+ return -ENOMEM;
+ }
+ adev->jpeg.reg_list = reg;
+ adev->jpeg.reg_count = count;
+
+ return 0;
+}
+
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->jpeg.ip_dump);
+ adev->jpeg.reg_list = NULL;
+ adev->jpeg.reg_count = 0;
+}
+
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, inst_id, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ inst_id = GET_INST(JPEG, i);
+ inst_off = i * adev->jpeg.reg_count;
+ /* check power status from UVD_JPEG_POWER_STATUS */
+ adev->jpeg.ip_dump[inst_off] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0],
+ inst_id));
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered)
+ for (j = 1; j < adev->jpeg.reg_count; j++)
+ adev->jpeg.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[j],
+ inst_id));
+ }
+}
+
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:JPEG%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * adev->jpeg.reg_count;
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered) {
+ drm_printf(p, "Active Instance:JPEG%d\n", i);
+ for (j = 0; j < adev->jpeg.reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", adev->jpeg.reg_list[j].reg_name,
+ adev->jpeg.ip_dump[inst_off + j]);
+ } else
+ drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
+ }
+}
+
+static inline bool amdgpu_jpeg_reg_valid(u32 reg)
+{
+ if (reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END ||
+ (reg >= JPEG_ATOMIC_RANGE_START && reg <= JPEG_ATOMIC_RANGE_END))
+ return false;
+ else
+ return true;
+}
+
+/**
+ * amdgpu_jpeg_dec_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res) /* only support 0 at the moment */
+ return -EINVAL;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (cond != PACKETJ_CONDITION_CHECK0 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE3:
+ if (cond != PACKETJ_CONDITION_CHECK3 ||
+ !amdgpu_jpeg_reg_valid(reg)) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] == CP_PACKETJ_NOP)
+ continue;
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ default:
+ dev_err(adev->dev, "Unknown packet type %d !\n", type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index ffe47e9f5bf2..346ae0ab09d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -25,13 +25,88 @@
#define __AMDGPU_JPEG_H__
#include "amdgpu_ras.h"
+#include "amdgpu_cs.h"
#define AMDGPU_MAX_JPEG_INSTANCES 4
-#define AMDGPU_MAX_JPEG_RINGS 8
+#define AMDGPU_MAX_JPEG_RINGS 10
+#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8
+
+#define JPEG_REG_RANGE_START 0x4000
+#define JPEG_REG_RANGE_END 0x41c2
+#define JPEG_ATOMIC_RANGE_START 0x4120
+#define JPEG_ATOMIC_RANGE_END 0x412A
+
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+#define WREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ JPEG, GET_INST(JPEG, inst_idx), \
+ mmUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \
+ indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } else { \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
+ } while (0)
+
+#define RREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, mask_en) \
+ ({ \
+ WREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \
+ do { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \
+ WREG32_SOC15( \
+ JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \
+ indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } while (0)
+
+#define RREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, mask_en) \
+ do { \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_MASK, 0xFFFFFFFF); \
+ WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (UVD_DPG_LMA_CTL__MASK_EN_MASK | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(JPEG, inst_idx, regUVD_DPG_LMA_DATA); \
+ } while (0)
+
+#define ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, offset, value, indirect) \
+ do { \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
+ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value; \
+ } while (0)
+
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_jpeg_caps {
+ AMDGPU_JPEG_RRMT_ENABLED,
+};
+
+#define AMDGPU_JPEG_CAPS(caps) BIT(AMDGPU_JPEG_##caps)
+
struct amdgpu_jpeg_reg{
unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
};
@@ -41,6 +116,11 @@ struct amdgpu_jpeg_inst {
struct amdgpu_irq_src irq;
struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_jpeg_reg external;
+ struct amdgpu_bo *dpg_sram_bo;
+ struct dpg_pause_state pause_state;
+ void *dpg_sram_cpu_addr;
+ uint64_t dpg_sram_gpu_addr;
+ uint32_t *dpg_sram_curr_addr;
uint8_t aid_id;
};
@@ -63,6 +143,12 @@ struct amdgpu_jpeg {
uint16_t inst_mask;
uint8_t num_inst_per_aid;
+ bool indirect_sram;
+ uint32_t supported_reset;
+ uint32_t caps;
+ u32 *ip_dump;
+ u32 reg_count;
+ const struct amdgpu_hwip_reg_entry *reg_list;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
@@ -82,5 +168,17 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
struct ras_common_if *ras_block);
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+ enum AMDGPU_UCODE_ID ucode_id);
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index d30dc0b718c7..a9327472c651 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -43,7 +43,9 @@
#include "amdgpu_gem.h"
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
#include "amd_pcie.h"
+#include "amdgpu_userq.h"
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
@@ -89,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (adev->rmmio == NULL)
return;
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD))
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD))
DRM_WARN("smart shift update failed\n");
amdgpu_acpi_fini(adev);
@@ -149,38 +151,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
goto out;
}
- adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
- if (amdgpu_device_supports_px(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */
- adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
- dev_info(adev->dev, "Using ATPX for runtime pm\n");
- } else if (amdgpu_device_supports_boco(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */
- adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
- dev_info(adev->dev, "Using BOCO for runtime pm\n");
- } else if (amdgpu_device_supports_baco(dev) &&
- (amdgpu_runtime_pm != 0)) {
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- /* enable BACO as runpm mode if runpm=1 */
- if (amdgpu_runtime_pm > 0)
- adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
- break;
- case CHIP_VEGA10:
- /* enable BACO as runpm mode if noretry=0 */
- if (!adev->gmc.noretry)
- adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
- break;
- default:
- /* enable BACO as runpm mode on CI+ */
- adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
- break;
- }
-
- if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)
- dev_info(adev->dev, "Using BACO for runtime pm\n");
- }
+ amdgpu_device_detect_runtime_pm_mode(adev);
/* Call ACPI methods: require modeset init
* but failure is not fatal
@@ -190,7 +161,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
if (acpi_status)
dev_dbg(dev->dev, "Error during ACPI methods call\n");
- if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD))
+ if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD))
DRM_WARN("smart shift update failed\n");
out:
@@ -200,6 +171,44 @@ out:
return r;
}
+static enum amd_ip_block_type
+ amdgpu_ip_get_block_type(struct amdgpu_device *adev, uint32_t ip)
+{
+ enum amd_ip_block_type type;
+
+ switch (ip) {
+ case AMDGPU_HW_IP_GFX:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ type = AMD_IP_BLOCK_TYPE_SDMA;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ type = AMD_IP_BLOCK_TYPE_VCE;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ break;
+ default:
+ type = AMD_IP_BLOCK_TYPE_NUM;
+ break;
+ }
+
+ return type;
+}
+
static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
struct drm_amdgpu_query_fw *query_fw,
struct amdgpu_device *adev)
@@ -352,12 +361,36 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->gfx.imu_fw_version;
fw_info->feature = 0;
break;
+ case AMDGPU_INFO_FW_VPE:
+ fw_info->ver = adev->vpe.fw_version;
+ fw_info->feature = adev->vpe.feature_version;
+ break;
default:
return -EINVAL;
}
return 0;
}
+static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_gfx *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow = {};
+
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+ meta->shadow_size = shadow.shadow_size;
+ meta->shadow_alignment = shadow.shadow_alignment;
+ meta->csa_size = shadow.csa_size;
+ meta->csa_alignment = shadow.csa_alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
@@ -366,6 +399,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
uint32_t ib_size_alignment = 0;
enum amd_ip_block_type type;
unsigned int num_rings = 0;
+ uint32_t num_slots = 0;
unsigned int i, j;
if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
@@ -375,24 +409,45 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- if (adev->gfx.gfx_ring[i].sched.ready)
+ if (adev->gfx.gfx_ring[i].sched.ready &&
+ !adev->gfx.gfx_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
+ num_slots += hweight32(adev->mes.gfx_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
- if (adev->gfx.compute_ring[i].sched.ready)
+ if (adev->gfx.compute_ring[i].sched.ready &&
+ !adev->gfx.compute_ring[i].no_user_submission)
++num_rings;
+
+ if (!adev->sdma.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++)
+ num_slots += hweight32(adev->mes.compute_hqd_mask[i]);
+ }
+
ib_start_alignment = 32;
ib_size_alignment = 32;
break;
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
- if (adev->sdma.instance[i].ring.sched.ready)
+ if (adev->sdma.instance[i].ring.sched.ready &&
+ !adev->sdma.instance[i].ring.no_user_submission)
++num_rings;
+
+ if (!adev->gfx.disable_uq) {
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
+ num_slots += hweight32(adev->mes.sdma_hqd_mask[i]);
+ }
+
ib_start_alignment = 256;
ib_size_alignment = 4;
break;
@@ -402,19 +457,21 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
- if (adev->uvd.inst[i].ring.sched.ready)
+ if (adev->uvd.inst[i].ring.sched.ready &&
+ !adev->uvd.inst[i].ring.no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
+ ib_start_alignment = 256;
ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
- if (adev->vce.ring[i].sched.ready)
+ if (adev->vce.ring[i].sched.ready &&
+ !adev->vce.ring[i].no_user_submission)
++num_rings;
- ib_start_alignment = 4;
- ib_size_alignment = 1;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_UVD_ENC:
type = AMD_IP_BLOCK_TYPE_UVD;
@@ -423,11 +480,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
- if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+ if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+ !adev->uvd.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
- ib_size_alignment = 64;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_DEC:
type = AMD_IP_BLOCK_TYPE_VCN;
@@ -435,11 +493,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.harvest_config & (1 << i))
continue;
- if (adev->vcn.inst[i].ring_dec.sched.ready)
+ if (adev->vcn.inst[i].ring_dec.sched.ready &&
+ !adev->vcn.inst[i].ring_dec.no_user_submission)
++num_rings;
}
- ib_start_alignment = 16;
- ib_size_alignment = 16;
+ ib_start_alignment = 256;
+ ib_size_alignment = 64;
break;
case AMDGPU_HW_IP_VCN_ENC:
type = AMD_IP_BLOCK_TYPE_VCN;
@@ -447,12 +506,13 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; j++)
- if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
+ if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
+ !adev->vcn.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
- ib_start_alignment = 64;
- ib_size_alignment = 1;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
case AMDGPU_HW_IP_VCN_JPEG:
type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
@@ -463,11 +523,20 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
- if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
+ if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
+ !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
++num_rings;
}
- ib_start_alignment = 16;
- ib_size_alignment = 16;
+ ib_start_alignment = 256;
+ ib_size_alignment = 64;
+ break;
+ case AMDGPU_HW_IP_VPE:
+ type = AMD_IP_BLOCK_TYPE_VPE;
+ if (adev->vpe.ring.sched.ready &&
+ !adev->vpe.ring.no_user_submission)
+ ++num_rings;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
break;
default:
return -EINVAL;
@@ -490,18 +559,26 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->asic_type >= CHIP_VEGA10) {
switch (type) {
case AMD_IP_BLOCK_TYPE_GFX:
- result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, GC_HWIP, 0));
break;
case AMD_IP_BLOCK_TYPE_SDMA:
- result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, SDMA0_HWIP, 0));
break;
case AMD_IP_BLOCK_TYPE_UVD:
case AMD_IP_BLOCK_TYPE_VCN:
case AMD_IP_BLOCK_TYPE_JPEG:
- result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, UVD_HWIP, 0));
break;
case AMD_IP_BLOCK_TYPE_VCE:
- result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCE_HWIP, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_VPE:
+ result->ip_discovery_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0));
break;
default:
result->ip_discovery_version = 0;
@@ -512,6 +589,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
}
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
+ result->userq_num_slots = num_slots;
result->ib_start_alignment = ib_start_alignment;
result->ib_size_alignment = ib_size_alignment;
return 0;
@@ -538,11 +616,16 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
struct drm_amdgpu_info *info = data;
struct amdgpu_mode_info *minfo = &adev->mode_info;
void __user *out = (void __user *)(uintptr_t)info->return_pointer;
+ struct amdgpu_fpriv *fpriv;
+ struct amdgpu_ip_block *ip_block;
+ enum amd_ip_block_type type;
+ struct amdgpu_xcp *xcp;
+ u32 count, inst_mask;
uint32_t size = info->return_size;
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
- int i, found;
+ int i, found, ret;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
@@ -570,7 +653,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
case AMDGPU_INFO_HW_IP_INFO: {
struct drm_amdgpu_info_hw_ip ip = {};
- int ret;
ret = amdgpu_hw_ip_info(adev, info, &ip);
if (ret)
@@ -580,46 +662,73 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return ret ? -EFAULT : 0;
}
case AMDGPU_INFO_HW_IP_COUNT: {
- enum amd_ip_block_type type;
- uint32_t count = 0;
+ fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
+ type = amdgpu_ip_get_block_type(adev, info->query_hw_ip.type);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, type);
- switch (info->query_hw_ip.type) {
- case AMDGPU_HW_IP_GFX:
- type = AMD_IP_BLOCK_TYPE_GFX;
- break;
- case AMDGPU_HW_IP_COMPUTE:
- type = AMD_IP_BLOCK_TYPE_GFX;
- break;
- case AMDGPU_HW_IP_DMA:
- type = AMD_IP_BLOCK_TYPE_SDMA;
- break;
- case AMDGPU_HW_IP_UVD:
- type = AMD_IP_BLOCK_TYPE_UVD;
+ if (!ip_block || !ip_block->status.valid)
+ return -EINVAL;
+
+ if (adev->xcp_mgr && adev->xcp_mgr->num_xcps > 0 &&
+ fpriv->xcp_id < adev->xcp_mgr->num_xcps) {
+ xcp = &adev->xcp_mgr->xcp[fpriv->xcp_id];
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_SDMA, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask) * adev->jpeg.num_jpeg_rings;
+ break;
+ case AMD_IP_BLOCK_TYPE_VCN:
+ ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+ if (ret)
+ return ret;
+ count = hweight32(inst_mask);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
+ }
+
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ case AMD_IP_BLOCK_TYPE_VCE:
+ count = 1;
break;
- case AMDGPU_HW_IP_VCE:
- type = AMD_IP_BLOCK_TYPE_VCE;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ count = adev->sdma.num_instances;
break;
- case AMDGPU_HW_IP_UVD_ENC:
- type = AMD_IP_BLOCK_TYPE_UVD;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ count = adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings;
break;
- case AMDGPU_HW_IP_VCN_DEC:
- case AMDGPU_HW_IP_VCN_ENC:
- type = AMD_IP_BLOCK_TYPE_VCN;
+ case AMD_IP_BLOCK_TYPE_VCN:
+ count = adev->vcn.num_vcn_inst;
break;
- case AMDGPU_HW_IP_VCN_JPEG:
- type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
- AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ case AMD_IP_BLOCK_TYPE_UVD:
+ count = adev->uvd.num_uvd_inst;
break;
+ /* For all other IP block types not listed in the switch statement
+ * the ip status is valid here and the instance count is one.
+ */
default:
- return -EINVAL;
+ count = 1;
+ break;
}
- for (i = 0; i < adev->num_ip_blocks; i++)
- if (adev->ip_blocks[i].version->type == type &&
- adev->ip_blocks[i].status.valid &&
- count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
- count++;
-
return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
}
case AMDGPU_INFO_TIMESTAMP:
@@ -627,7 +736,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_FW_VERSION: {
struct drm_amdgpu_info_firmware fw_info;
- int ret;
/* We only support one instance of each IP block right now. */
if (info->query_fw.ip_instance != 0)
@@ -722,6 +830,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
+ int ret = 0;
unsigned int n, alloc_size;
uint32_t *regs;
unsigned int se_num = (info->read_mmr_reg.instance >>
@@ -731,24 +840,37 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SH_INDEX_MASK;
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -ENOENT;
+
/* set full masks if the userspace set all bits
* in the bitfields
*/
- if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
+ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) {
se_num = 0xffffffff;
- else if (se_num >= AMDGPU_GFX_MAX_SE)
- return -EINVAL;
- if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
+ } else if (se_num >= AMDGPU_GFX_MAX_SE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) {
sh_num = 0xffffffff;
- else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE)
- return -EINVAL;
+ } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (info->read_mmr_reg.count > 128)
- return -EINVAL;
+ if (info->read_mmr_reg.count > 128) {
+ ret = -EINVAL;
+ goto out;
+ }
regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
- if (!regs)
- return -ENOMEM;
+ if (!regs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
amdgpu_gfx_off_ctrl(adev, false);
@@ -760,19 +882,22 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
info->read_mmr_reg.dword_offset + i);
kfree(regs);
amdgpu_gfx_off_ctrl(adev, true);
- return -EFAULT;
+ ret = -EFAULT;
+ goto out;
}
}
amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
- return n ? -EFAULT : 0;
+ ret = (n ? -EFAULT : 0);
+out:
+ up_read(&adev->reset_domain->sem);
+ return ret;
}
case AMDGPU_INFO_DEV_INFO: {
struct drm_amdgpu_info_device *dev_info;
uint64_t vm_size;
- uint32_t pcie_gen_mask;
- int ret;
+ uint32_t pcie_gen_mask, pcie_width_mask;
dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
if (!dev_info)
@@ -814,15 +939,28 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+ /* Gang submit is not supported under SRIOV currently */
+ if (!amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT;
+
+ if (amdgpu_passthrough(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+ else if (amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
- vm_size -= AMDGPU_VA_RESERVED_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_TOP;
/* Older VCE FW versions are buggy and can handle only 40bits */
if (adev->vce.fw_version &&
adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
vm_size = min(vm_size, 1ULL << 40);
- dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
+ dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM;
dev_info->virtual_address_max =
min(vm_size, AMDGPU_GMC_HOLE_START);
@@ -860,15 +998,18 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
/* Combine the chip gen mask with the platform (CPU/mobo) mask. */
- pcie_gen_mask = adev->pm.pcie_gen_mask & (adev->pm.pcie_gen_mask >> 16);
+ pcie_gen_mask = adev->pm.pcie_gen_mask &
+ (adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT);
+ pcie_width_mask = adev->pm.pcie_mlw_mask &
+ (adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT);
dev_info->pcie_gen = fls(pcie_gen_mask);
dev_info->pcie_num_lanes =
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size;
dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp;
@@ -892,6 +1033,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
}
+ dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+
ret = copy_to_user(out, dev_info,
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
kfree(dev_info);
@@ -1026,6 +1169,20 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (amdgpu_dpm_read_sensor(adev,
AMDGPU_PP_SENSOR_GPU_AVG_POWER,
(void *)&ui32, &ui32_size)) {
+ /* fall back to input power for backwards compat */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ }
+ ui32 >>= 8;
+ break;
+ case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER:
+ /* get input GPU power */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+ (void *)&ui32, &ui32_size)) {
return -EINVAL;
}
ui32 >>= 8;
@@ -1173,6 +1330,42 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return copy_to_user(out, max_ibs,
min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
}
+ case AMDGPU_INFO_GPUVM_FAULT: {
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
+ unsigned long flags;
+
+ if (!vm)
+ return -EINVAL;
+
+ memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ gpuvm_fault.addr = vm->fault_info.addr;
+ gpuvm_fault.status = vm->fault_info.status;
+ gpuvm_fault.vmhub = vm->fault_info.vmhub;
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+ return copy_to_user(out, &gpuvm_fault,
+ min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
+ }
+ case AMDGPU_INFO_UQ_FW_AREAS: {
+ struct drm_amdgpu_info_uq_metadata meta_info = {};
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@@ -1180,23 +1373,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return 0;
}
-
-/*
- * Outdated mess for old drm with Xorg being in charge (void function now).
- */
-/**
- * amdgpu_driver_lastclose_kms - drm callback for last close
- *
- * @dev: drm dev pointer
- *
- * Switch vga_switcheroo state after last close (all asics).
- */
-void amdgpu_driver_lastclose_kms(struct drm_device *dev)
-{
- drm_fb_helper_lastclose(dev);
- vga_switcheroo_process_delayed_switch();
-}
-
/**
* amdgpu_driver_open_kms - drm callback for open
*
@@ -1243,13 +1419,11 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
if (r)
goto error_pasid;
- r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
- if (r)
- goto error_pasid;
+ amdgpu_debugfs_vm_init(file_priv);
- r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
if (r)
- goto error_vm;
+ goto error_pasid;
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
if (!fpriv->prt_va) {
@@ -1266,9 +1440,21 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto error_vm;
}
+ r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va);
+ if (r)
+ goto error_vm;
+
mutex_init(&fpriv->bo_list_lock);
idr_init_base(&fpriv->bo_list_handles, 1);
+ r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
+ if (r)
+ DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
+
+ r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
+ if (r)
+ goto error_vm;
+
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
file_priv->driver_priv = fpriv;
@@ -1278,10 +1464,8 @@ error_vm:
amdgpu_vm_fini(adev, &fpriv->vm);
error_pasid:
- if (pasid) {
+ if (pasid)
amdgpu_pasid_free(pasid);
- amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
- }
kfree(fpriv);
@@ -1329,6 +1513,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
fpriv->csa_va = NULL;
}
+ amdgpu_seq64_unmap(adev, fpriv);
+
pasid = fpriv->vm.pasid;
pd = amdgpu_bo_ref(fpriv->vm.root.bo);
if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
@@ -1729,6 +1915,14 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* VPE */
+ query_fw.fw_type = AMDGPU_INFO_FW_VPE;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "VPE feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_pn);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 8d9ff9e151de..3ca03b5e0f91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -27,6 +27,16 @@
#include "umc/umc_6_7_0_offset.h"
#include "umc/umc_6_7_0_sh_mask.h"
+static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
+ uint64_t mc_status)
+{
+ if (adev->umc.ras->check_ecc_err_status)
+ return adev->umc.ras->check_ecc_err_status(adev,
+ AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
+
+ return false;
+}
+
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t mc_status_addr,
unsigned long *error_count)
@@ -142,3 +152,479 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
return 0;
}
+
+static void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
+{
+ if (!mca_set)
+ return;
+
+ memset(mca_set, 0, sizeof(*mca_set));
+ INIT_LIST_HEAD(&mca_set->list);
+}
+
+static int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry)
+{
+ struct mca_bank_node *node;
+
+ if (!entry)
+ return -EINVAL;
+
+ node = kvzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ memcpy(&node->entry, entry, sizeof(*entry));
+
+ INIT_LIST_HEAD(&node->node);
+ list_add_tail(&node->node, &mca_set->list);
+
+ mca_set->nr_entries++;
+
+ return 0;
+}
+
+static int amdgpu_mca_bank_set_merge(struct mca_bank_set *mca_set, struct mca_bank_set *new)
+{
+ struct mca_bank_node *node;
+
+ list_for_each_entry(node, &new->list, node)
+ amdgpu_mca_bank_set_add_entry(mca_set, &node->entry);
+
+ return 0;
+}
+
+static void amdgpu_mca_bank_set_remove_node(struct mca_bank_set *mca_set, struct mca_bank_node *node)
+{
+ if (!node)
+ return;
+
+ list_del(&node->node);
+ kvfree(node);
+
+ mca_set->nr_entries--;
+}
+
+static void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set)
+{
+ struct mca_bank_node *node, *tmp;
+
+ if (list_empty(&mca_set->list))
+ return;
+
+ list_for_each_entry_safe(node, tmp, &mca_set->list, node)
+ amdgpu_mca_bank_set_remove_node(mca_set, node);
+}
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+
+ mca->mca_funcs = mca_funcs;
+}
+
+int amdgpu_mca_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ struct mca_bank_cache *mca_cache;
+ int i;
+
+ atomic_set(&mca->ue_update_flag, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+ mca_cache = &mca->mca_caches[i];
+ mutex_init(&mca_cache->lock);
+ amdgpu_mca_bank_set_init(&mca_cache->mca_set);
+ }
+
+ return 0;
+}
+
+void amdgpu_mca_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ struct mca_bank_cache *mca_cache;
+ int i;
+
+ atomic_set(&mca->ue_update_flag, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+ mca_cache = &mca->mca_caches[i];
+ amdgpu_mca_bank_set_release(&mca_cache->mca_set);
+ mutex_destroy(&mca_cache->lock);
+ }
+}
+
+int amdgpu_mca_reset(struct amdgpu_device *adev)
+{
+ amdgpu_mca_fini(adev);
+
+ return amdgpu_mca_init(adev);
+}
+
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (mca_funcs && mca_funcs->mca_set_debug_mode)
+ return mca_funcs->mca_set_debug_mode(adev, enable);
+
+ return -EOPNOTSUPP;
+}
+
+static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry,
+ struct ras_query_context *qctx)
+{
+ u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_STATUS]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_ADDR]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_MISC0]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_IPID]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_SYND]);
+}
+
+static int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!count)
+ return -EINVAL;
+
+ if (mca_funcs && mca_funcs->mca_get_valid_mca_count)
+ return mca_funcs->mca_get_valid_mca_count(adev, type, count);
+
+ return -EOPNOTSUPP;
+}
+
+static int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ int count;
+
+ if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ count = mca_funcs->max_ue_count;
+ break;
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ count = mca_funcs->max_ce_count;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (idx >= count)
+ return -EINVAL;
+
+ return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
+}
+
+static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgpu_mca_error_type type)
+{
+ struct amdgpu_mca *mca = &adev->mca;
+ bool ret = true;
+
+ /*
+ * Because the UE Valid MCA count will only be cleared after reset,
+ * in order to avoid repeated counting of the error count,
+ * the aca bank is only updated once during the gpu recovery stage.
+ */
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+ if (amdgpu_ras_intr_triggered())
+ ret = atomic_cmpxchg(&mca->ue_update_flag, 0, 1) == 0;
+ else
+ atomic_set(&mca->ue_update_flag, 0);
+ }
+
+ return ret;
+}
+
+static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry)
+{
+ bool ret;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]);
+ break;
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ default:
+ ret = true;
+ break;
+ }
+
+ return ret;
+}
+
+static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set,
+ struct ras_query_context *qctx)
+{
+ struct mca_bank_entry entry;
+ uint32_t count = 0, i;
+ int ret;
+
+ if (!mca_set)
+ return -EINVAL;
+
+ if (!amdgpu_mca_bank_should_update(adev, type))
+ return 0;
+
+ ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < count; i++) {
+ memset(&entry, 0, sizeof(entry));
+ ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, &entry);
+ if (ret)
+ return ret;
+
+ amdgpu_mca_bank_set_add_entry(mca_set, &entry);
+
+ if (amdgpu_mca_bank_should_dump(adev, type, &entry))
+ amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx);
+ }
+
+ return 0;
+}
+
+static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
+{
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!count || !entry)
+ return -EINVAL;
+
+ if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count)
+ return -EOPNOTSUPP;
+
+ return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count);
+}
+
+static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct mca_bank_set *mca_set, struct ras_err_data *err_data)
+{
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ struct mca_bank_node *node, *tmp;
+ struct mca_bank_entry *entry;
+ uint32_t count;
+ int ret;
+
+ if (!mca_set)
+ return -EINVAL;
+
+ if (!mca_set->nr_entries)
+ return 0;
+
+ list_for_each_entry_safe(node, tmp, &mca_set->list, node) {
+ entry = &node->entry;
+
+ count = 0;
+ ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+
+ if (!count)
+ continue;
+
+ memset(&mcm_info, 0, sizeof(mcm_info));
+
+ mcm_info.socket_id = entry->info.socket_id;
+ mcm_info.die_id = entry->info.aid;
+
+ if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+ amdgpu_ras_error_statistic_ue_count(err_data,
+ &mcm_info, (uint64_t)count);
+ } else {
+ if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
+ amdgpu_ras_error_statistic_de_count(err_data,
+ &mcm_info, (uint64_t)count);
+ else
+ amdgpu_ras_error_statistic_ce_count(err_data,
+ &mcm_info, (uint64_t)count);
+ }
+
+ amdgpu_mca_bank_set_remove_node(mca_set, node);
+ }
+
+ return 0;
+}
+
+static int amdgpu_mca_add_mca_set_to_cache(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *new)
+{
+ struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+ int ret;
+
+ mutex_lock(&mca_cache->lock);
+ ret = amdgpu_mca_bank_set_merge(&mca_cache->mca_set, new);
+ mutex_unlock(&mca_cache->lock);
+
+ return ret;
+}
+
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
+{
+ struct mca_bank_set mca_set;
+ struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+ int ret;
+
+ amdgpu_mca_bank_set_init(&mca_set);
+
+ ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, qctx);
+ if (ret)
+ goto out_mca_release;
+
+ ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_set, err_data);
+ if (ret)
+ goto out_mca_release;
+
+ /* add remain mca bank to mca cache */
+ if (mca_set.nr_entries) {
+ ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+ if (ret)
+ goto out_mca_release;
+ }
+
+ /* dispatch mca set again if mca cache has valid data */
+ mutex_lock(&mca_cache->lock);
+ if (mca_cache->mca_set.nr_entries)
+ ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_cache->mca_set, err_data);
+ mutex_unlock(&mca_cache->lock);
+
+out_mca_release:
+ amdgpu_mca_bank_set_release(&mca_set);
+
+ return ret;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ int ret;
+
+ ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false);
+ if (ret)
+ return ret;
+
+ dev_info(adev->dev, "amdgpu set smu mca debug mode %s success\n", val ? "on" : "off");
+
+ return 0;
+}
+
+static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry)
+{
+ int i, idx = entry->idx;
+ int reg_idx_array[] = {
+ MCA_REG_IDX_STATUS,
+ MCA_REG_IDX_ADDR,
+ MCA_REG_IDX_MISC0,
+ MCA_REG_IDX_IPID,
+ MCA_REG_IDX_SYND,
+ };
+
+ seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE");
+ seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip);
+ seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+ idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype);
+
+ for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++)
+ seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]);
+}
+
+static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct mca_bank_node *node;
+ struct mca_bank_set mca_set;
+ struct ras_query_context qctx;
+ int ret;
+
+ amdgpu_mca_bank_set_init(&mca_set);
+
+ qctx.evid.event_id = RAS_EVENT_INVALID_ID;
+ ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx);
+ if (ret)
+ goto err_free_mca_set;
+
+ seq_printf(m, "amdgpu smu %s valid mca count: %d\n",
+ type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", mca_set.nr_entries);
+
+ if (!mca_set.nr_entries)
+ goto err_free_mca_set;
+
+ list_for_each_entry(node, &mca_set.list, node)
+ mca_dump_entry(m, &node->entry);
+
+ /* add mca bank to mca bank cache */
+ ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+
+err_free_mca_set:
+ amdgpu_mca_bank_set_release(&mca_set);
+
+ return ret;
+}
+
+static int mca_dump_ce_show(struct seq_file *m, void *unused)
+{
+ return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_CE);
+}
+
+static int mca_dump_ce_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations mca_ce_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = mca_dump_ce_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int mca_dump_ue_show(struct seq_file *m, void *unused)
+{
+ return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_UE);
+}
+
+static int mca_dump_ue_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations mca_ue_dump_debug_fops = {
+ .owner = THIS_MODULE,
+ .open = mca_dump_ue_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+ if (!root)
+ return;
+
+ debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);
+ debugfs_create_file("mca_ue_dump", 0400, root, adev, &mca_ue_dump_debug_fops);
+ debugfs_create_file("mca_ce_dump", 0400, root, adev, &mca_ce_dump_debug_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 997a073e2409..e80323ff90c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -21,6 +21,53 @@
#ifndef __AMDGPU_MCA_H__
#define __AMDGPU_MCA_H__
+#include "amdgpu_ras.h"
+
+#define MCA_MAX_REGS_COUNT (16)
+
+#define MCA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
+#define MCA_REG__STATUS__VAL(x) MCA_REG_FIELD(x, 63, 63)
+#define MCA_REG__STATUS__OVERFLOW(x) MCA_REG_FIELD(x, 62, 62)
+#define MCA_REG__STATUS__UC(x) MCA_REG_FIELD(x, 61, 61)
+#define MCA_REG__STATUS__EN(x) MCA_REG_FIELD(x, 60, 60)
+#define MCA_REG__STATUS__MISCV(x) MCA_REG_FIELD(x, 59, 59)
+#define MCA_REG__STATUS__ADDRV(x) MCA_REG_FIELD(x, 58, 58)
+#define MCA_REG__STATUS__PCC(x) MCA_REG_FIELD(x, 57, 57)
+#define MCA_REG__STATUS__ERRCOREIDVAL(x) MCA_REG_FIELD(x, 56, 56)
+#define MCA_REG__STATUS__TCC(x) MCA_REG_FIELD(x, 55, 55)
+#define MCA_REG__STATUS__SYNDV(x) MCA_REG_FIELD(x, 53, 53)
+#define MCA_REG__STATUS__CECC(x) MCA_REG_FIELD(x, 46, 46)
+#define MCA_REG__STATUS__UECC(x) MCA_REG_FIELD(x, 45, 45)
+#define MCA_REG__STATUS__DEFERRED(x) MCA_REG_FIELD(x, 44, 44)
+#define MCA_REG__STATUS__POISON(x) MCA_REG_FIELD(x, 43, 43)
+#define MCA_REG__STATUS__SCRUB(x) MCA_REG_FIELD(x, 40, 40)
+#define MCA_REG__STATUS__ERRCOREID(x) MCA_REG_FIELD(x, 37, 32)
+#define MCA_REG__STATUS__ADDRLSB(x) MCA_REG_FIELD(x, 29, 24)
+#define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16)
+#define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0)
+
+#define MCA_REG__MISC0__ERRCNT(x) MCA_REG_FIELD(x, 43, 32)
+
+#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0)
+
+enum amdgpu_mca_ip {
+ AMDGPU_MCA_IP_UNKNOW = -1,
+ AMDGPU_MCA_IP_PSP = 0,
+ AMDGPU_MCA_IP_SDMA,
+ AMDGPU_MCA_IP_GC,
+ AMDGPU_MCA_IP_SMU,
+ AMDGPU_MCA_IP_MP5,
+ AMDGPU_MCA_IP_UMC,
+ AMDGPU_MCA_IP_PCS_XGMI,
+ AMDGPU_MCA_IP_COUNT,
+};
+
+enum amdgpu_mca_error_type {
+ AMDGPU_MCA_ERROR_TYPE_UE = 0,
+ AMDGPU_MCA_ERROR_TYPE_CE,
+ AMDGPU_MCA_ERROR_TYPE_DE,
+};
+
struct amdgpu_mca_ras_block {
struct amdgpu_ras_block_object ras_block;
};
@@ -30,10 +77,64 @@ struct amdgpu_mca_ras {
struct amdgpu_mca_ras_block *ras;
};
+struct mca_bank_set {
+ int nr_entries;
+ struct list_head list;
+};
+
+struct mca_bank_cache {
+ struct mca_bank_set mca_set;
+ struct mutex lock;
+};
+
struct amdgpu_mca {
struct amdgpu_mca_ras mp0;
struct amdgpu_mca_ras mp1;
struct amdgpu_mca_ras mpio;
+ const struct amdgpu_mca_smu_funcs *mca_funcs;
+ struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE];
+ atomic_t ue_update_flag;
+};
+
+enum mca_reg_idx {
+ MCA_REG_IDX_STATUS = 1,
+ MCA_REG_IDX_ADDR = 2,
+ MCA_REG_IDX_MISC0 = 3,
+ MCA_REG_IDX_IPID = 5,
+ MCA_REG_IDX_SYND = 6,
+ MCA_REG_IDX_COUNT = 16,
+};
+
+struct mca_bank_info {
+ int socket_id;
+ int aid;
+ int hwid;
+ int mcatype;
+};
+
+struct mca_bank_entry {
+ int idx;
+ enum amdgpu_mca_error_type type;
+ enum amdgpu_mca_ip ip;
+ struct mca_bank_info info;
+ uint64_t regs[MCA_MAX_REGS_COUNT];
+};
+
+struct mca_bank_node {
+ struct mca_bank_entry entry;
+ struct list_head node;
+};
+
+struct amdgpu_mca_smu_funcs {
+ int max_ue_count;
+ int max_ce_count;
+ int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable);
+ int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry, uint32_t *count);
+ int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ uint32_t *count);
+ int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ int idx, struct mca_bank_entry *entry);
};
void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
@@ -53,4 +154,16 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs);
+int amdgpu_mca_init(struct amdgpu_device *adev);
+void amdgpu_mca_fini(struct amdgpu_device *adev);
+int amdgpu_mca_reset(struct amdgpu_device *adev);
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum amdgpu_mca_error_type type, uint32_t *total);
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index b6015157763a..5bf9be073cdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -39,44 +39,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
PAGE_SIZE);
}
-static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
- struct amdgpu_mes_process *process,
- int ip_type, uint64_t *doorbell_index)
-{
- unsigned int offset, found;
- struct amdgpu_mes *mes = &adev->mes;
-
- if (ip_type == AMDGPU_RING_TYPE_SDMA)
- offset = adev->doorbell_index.sdma_engine[0];
- else
- offset = 0;
-
- found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
- if (found >= mes->num_mes_dbs) {
- DRM_WARN("No doorbell available\n");
- return -ENOSPC;
- }
-
- set_bit(found, mes->doorbell_bitmap);
-
- /* Get the absolute doorbell index on BAR */
- *doorbell_index = mes->db_start_dw_offset + found * 2;
- return 0;
-}
-
-static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
- struct amdgpu_mes_process *process,
- uint32_t doorbell_index)
-{
- unsigned int old, rel_index;
- struct amdgpu_mes *mes = &adev->mes;
-
- /* Find the relative index of the doorbell in this object */
- rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
- old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
- WARN_ON(!old);
-}
-
static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
{
int i;
@@ -85,7 +47,7 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
/* Bitmap for dynamic allocation of kernel doorbells */
mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
if (!mes->doorbell_bitmap) {
- DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
+ dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n");
return -ENOMEM;
}
@@ -98,6 +60,29 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
return 0;
}
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_mes_log_enable)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.event_log_gpu_obj,
+ &adev->mes.event_log_gpu_addr,
+ &adev->mes.event_log_cpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
+ return r;
+ }
+
+ memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size);
+
+ return 0;
+
+}
+
static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
{
bitmap_free(adev->mes.doorbell_bitmap);
@@ -105,7 +90,7 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
int amdgpu_mes_init(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, num_pipes;
adev->mes.adev = adev;
@@ -114,80 +99,125 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock);
- spin_lock_init(&adev->mes.ring_lock);
mutex_init(&adev->mes.mutex_hidden);
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
+ spin_lock_init(&adev->mes.ring_lock[i]);
+
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xffffff00;
- adev->mes.vmid_mask_gfxhub = 0xffffff00;
-
- for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
- /* use only 1st MEC pipes */
- if (i >= 4)
- continue;
- adev->mes.compute_hqd_mask[i] = 0xc;
- }
+ adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00;
- for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
- adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
+ num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
+ if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
+ dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_GFX_PIPES);
- for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
- if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
- adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
- /* zero sdma_hqd_mask for non-existent engine */
- else if (adev->sdma.num_instances == 1)
- adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(12, 0, 0))
+ /*
+ * GFX V12 has only one GFX pipe, but 8 queues in it.
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1-7 for MES scheduling
+ * mask = 1111 1110b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE;
else
- adev->mes.sdma_hqd_mask[i] = 0xfc;
+ /*
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1 for MES scheduling
+ * mask = 10b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2;
}
- r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
- if (r) {
- dev_err(adev->dev,
- "(%d) ring trail_fence_offs wb alloc failed\n", r);
- goto error_ids;
+ num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec;
+ if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES)
+ dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
}
- adev->mes.sch_ctx_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
- adev->mes.sch_ctx_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
- r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
- if (r) {
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- dev_err(adev->dev,
- "(%d) query_status_fence_offs wb alloc failed\n", r);
- goto error_ids;
+ num_pipes = adev->sdma.num_instances;
+ if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES)
+ dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_SDMA_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
}
- adev->mes.query_status_fence_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
- adev->mes.query_status_fence_ptr =
- (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
- r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
- if (r) {
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
- dev_err(adev->dev,
- "(%d) read_val_offs alloc failed\n", r);
- goto error_ids;
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) ring trail_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.sch_ctx_gpu_addr[i] =
+ adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4);
+ adev->mes.sch_ctx_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]];
+
+ r = amdgpu_device_wb_get(adev,
+ &adev->mes.query_status_fence_offs[i]);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) query_status_fence_offs wb alloc failed\n",
+ r);
+ goto error;
+ }
+ adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr +
+ (adev->mes.query_status_fence_offs[i] * 4);
+ adev->mes.query_status_fence_ptr[i] =
+ (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
}
- adev->mes.read_val_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
- adev->mes.read_val_ptr =
- (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
r = amdgpu_mes_doorbell_init(adev);
if (r)
goto error;
+ r = amdgpu_mes_event_log_init(adev);
+ if (r)
+ goto error_doorbell;
+
+ if (adev->mes.hung_queue_db_array_size) {
+ r = amdgpu_bo_create_kernel(adev,
+ adev->mes.hung_queue_db_array_size * sizeof(u32),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.hung_queue_db_array_gpu_obj,
+ &adev->mes.hung_queue_db_array_gpu_addr,
+ &adev->mes.hung_queue_db_array_cpu_addr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
+ goto error_doorbell;
+ }
+ }
+
return 0;
+error_doorbell:
+ amdgpu_mes_doorbell_free(adev);
error:
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
- amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
-error_ids:
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+
idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
@@ -198,9 +228,24 @@ error_ids:
void amdgpu_mes_fini(struct amdgpu_device *adev)
{
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
- amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
+ int i;
+
+ amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj,
+ &adev->mes.hung_queue_db_array_gpu_addr,
+ &adev->mes.hung_queue_db_array_cpu_addr);
+
+ amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
+ &adev->mes.event_log_gpu_addr,
+ &adev->mes.event_log_cpu_addr);
+
+ for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+ if (adev->mes.sch_ctx_ptr[i])
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+ if (adev->mes.query_status_fence_ptr[i])
+ amdgpu_device_wb_free(adev,
+ adev->mes.query_status_fence_offs[i]);
+ }
+
amdgpu_mes_doorbell_free(adev);
idr_destroy(&adev->mes.pasid_idr);
@@ -210,547 +255,178 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
mutex_destroy(&adev->mes.mutex_hidden);
}
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
-}
-
-int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
- struct amdgpu_vm *vm)
+int amdgpu_mes_suspend(struct amdgpu_device *adev)
{
- struct amdgpu_mes_process *process;
+ struct mes_suspend_gang_input input;
int r;
- /* allocate the mes process buffer */
- process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
- if (!process) {
- DRM_ERROR("no more memory to create mes process\n");
- return -ENOMEM;
- }
+ if (!amdgpu_mes_suspend_resume_all_supported(adev))
+ return 0;
- /* allocate the process context bo and map it */
- r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &process->proc_ctx_bo,
- &process->proc_ctx_gpu_addr,
- &process->proc_ctx_cpu_ptr);
- if (r) {
- DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up_memory;
- }
- memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
+ input.suspend_all_gangs = 1;
/*
* Avoid taking any other locks under MES lock to avoid circular
* lock dependencies.
*/
amdgpu_mes_lock(&adev->mes);
-
- /* add the mes process to idr list */
- r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
- GFP_KERNEL);
- if (r < 0) {
- DRM_ERROR("failed to lock pasid=%d\n", pasid);
- goto clean_up_ctx;
- }
-
- INIT_LIST_HEAD(&process->gang_list);
- process->vm = vm;
- process->pasid = pasid;
- process->process_quantum = adev->mes.default_process_quantum;
- process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
-
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
amdgpu_mes_unlock(&adev->mes);
- return 0;
+ if (r)
+ dev_err(adev->dev, "failed to suspend all gangs");
-clean_up_ctx:
- amdgpu_mes_unlock(&adev->mes);
- amdgpu_bo_free_kernel(&process->proc_ctx_bo,
- &process->proc_ctx_gpu_addr,
- &process->proc_ctx_cpu_ptr);
-clean_up_memory:
- kfree(process);
return r;
}
-void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
-{
- struct amdgpu_mes_process *process;
- struct amdgpu_mes_gang *gang, *tmp1;
- struct amdgpu_mes_queue *queue, *tmp2;
- struct mes_remove_queue_input queue_input;
- unsigned long flags;
- int r;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- process = idr_find(&adev->mes.pasid_idr, pasid);
- if (!process) {
- DRM_WARN("pasid %d doesn't exist\n", pasid);
- amdgpu_mes_unlock(&adev->mes);
- return;
- }
-
- /* Remove all queues from hardware */
- list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
- list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
- idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
- queue_input.doorbell_offset = queue->doorbell_off;
- queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
- r = adev->mes.funcs->remove_hw_queue(&adev->mes,
- &queue_input);
- if (r)
- DRM_WARN("failed to remove hardware queue\n");
- }
-
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
- }
-
- idr_remove(&adev->mes.pasid_idr, pasid);
- amdgpu_mes_unlock(&adev->mes);
-
- /* free all memory allocated by the process */
- list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
- /* free all queues in the gang */
- list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
- amdgpu_mes_queue_free_mqd(queue);
- list_del(&queue->list);
- kfree(queue);
- }
- amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
- list_del(&gang->list);
- kfree(gang);
-
- }
- amdgpu_bo_free_kernel(&process->proc_ctx_bo,
- &process->proc_ctx_gpu_addr,
- &process->proc_ctx_cpu_ptr);
- kfree(process);
-}
-
-int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
- struct amdgpu_mes_gang_properties *gprops,
- int *gang_id)
+int amdgpu_mes_resume(struct amdgpu_device *adev)
{
- struct amdgpu_mes_process *process;
- struct amdgpu_mes_gang *gang;
+ struct mes_resume_gang_input input;
int r;
- /* allocate the mes gang buffer */
- gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
- if (!gang) {
- return -ENOMEM;
- }
+ if (!amdgpu_mes_suspend_resume_all_supported(adev))
+ return 0;
- /* allocate the gang context bo and map it to cpu space */
- r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
- if (r) {
- DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up_mem;
- }
- memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+ memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
+ input.resume_all_gangs = 1;
/*
* Avoid taking any other locks under MES lock to avoid circular
* lock dependencies.
*/
amdgpu_mes_lock(&adev->mes);
-
- process = idr_find(&adev->mes.pasid_idr, pasid);
- if (!process) {
- DRM_ERROR("pasid %d doesn't exist\n", pasid);
- r = -EINVAL;
- goto clean_up_ctx;
- }
-
- /* add the mes gang to idr list */
- r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
- GFP_KERNEL);
- if (r < 0) {
- DRM_ERROR("failed to allocate idr for gang\n");
- goto clean_up_ctx;
- }
-
- gang->gang_id = r;
- *gang_id = r;
-
- INIT_LIST_HEAD(&gang->queue_list);
- gang->process = process;
- gang->priority = gprops->priority;
- gang->gang_quantum = gprops->gang_quantum ?
- gprops->gang_quantum : adev->mes.default_gang_quantum;
- gang->global_priority_level = gprops->global_priority_level;
- gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
- list_add_tail(&gang->list, &process->gang_list);
-
+ r = adev->mes.funcs->resume_gang(&adev->mes, &input);
amdgpu_mes_unlock(&adev->mes);
- return 0;
+ if (r)
+ dev_err(adev->dev, "failed to resume all gangs");
-clean_up_ctx:
- amdgpu_mes_unlock(&adev->mes);
- amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
-clean_up_mem:
- kfree(gang);
return r;
}
-int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
{
- struct amdgpu_mes_gang *gang;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- amdgpu_mes_unlock(&adev->mes);
- return -EINVAL;
- }
-
- if (!list_empty(&gang->queue_list)) {
- DRM_ERROR("queue list is not empty\n");
- amdgpu_mes_unlock(&adev->mes);
- return -EBUSY;
- }
-
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
- list_del(&gang->list);
- amdgpu_mes_unlock(&adev->mes);
-
- amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
+ struct mes_map_legacy_queue_input queue_input;
+ int r;
- kfree(gang);
+ memset(&queue_input, 0, sizeof(queue_input));
- return 0;
-}
-
-int amdgpu_mes_suspend(struct amdgpu_device *adev)
-{
- struct idr *idp;
- struct amdgpu_mes_process *process;
- struct amdgpu_mes_gang *gang;
- struct mes_suspend_gang_input input;
- int r, pasid;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ queue_input.wptr_addr = ring->wptr_gpu_addr;
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
amdgpu_mes_lock(&adev->mes);
-
- idp = &adev->mes.pasid_idr;
-
- idr_for_each_entry(idp, process, pasid) {
- list_for_each_entry(gang, &process->gang_list, list) {
- r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
- if (r)
- DRM_ERROR("failed to suspend pasid %d gangid %d",
- pasid, gang->gang_id);
- }
- }
-
+ r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input);
amdgpu_mes_unlock(&adev->mes);
- return 0;
-}
-
-int amdgpu_mes_resume(struct amdgpu_device *adev)
-{
- struct idr *idp;
- struct amdgpu_mes_process *process;
- struct amdgpu_mes_gang *gang;
- struct mes_resume_gang_input input;
- int r, pasid;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- idp = &adev->mes.pasid_idr;
-
- idr_for_each_entry(idp, process, pasid) {
- list_for_each_entry(gang, &process->gang_list, list) {
- r = adev->mes.funcs->resume_gang(&adev->mes, &input);
- if (r)
- DRM_ERROR("failed to resume pasid %d gangid %d",
- pasid, gang->gang_id);
- }
- }
+ if (r)
+ dev_err(adev->dev, "failed to map legacy queue\n");
- amdgpu_mes_unlock(&adev->mes);
- return 0;
+ return r;
}
-static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
- struct amdgpu_mes_queue *q,
- struct amdgpu_mes_queue_properties *p)
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
{
- struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
- u32 mqd_size = mqd_mgr->mqd_size;
+ struct mes_unmap_legacy_queue_input queue_input;
int r;
- r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &q->mqd_obj,
- &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
- if (r) {
- dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
- return r;
- }
- memset(q->mqd_cpu_ptr, 0, mqd_size);
-
- r = amdgpu_bo_reserve(q->mqd_obj, false);
- if (unlikely(r != 0))
- goto clean_up;
+ queue_input.action = action;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.trail_fence_addr = gpu_addr;
+ queue_input.trail_fence_data = seq;
- return 0;
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to unmap legacy queue\n");
-clean_up:
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
return r;
}
-static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
- struct amdgpu_mes_queue *q,
- struct amdgpu_mes_queue_properties *p)
-{
- struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
- struct amdgpu_mqd_prop mqd_prop = {0};
-
- mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
- mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
- mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
- mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
- mqd_prop.queue_size = p->queue_size;
- mqd_prop.use_doorbell = true;
- mqd_prop.doorbell_index = p->doorbell_off;
- mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
- mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
- mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
- mqd_prop.hqd_active = false;
-
- mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
-
- amdgpu_bo_unreserve(q->mqd_obj);
-}
-
-int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
- struct amdgpu_mes_queue_properties *qprops,
- int *queue_id)
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int vmid,
+ bool use_mmio)
{
- struct amdgpu_mes_queue *queue;
- struct amdgpu_mes_gang *gang;
- struct mes_add_queue_input queue_input;
- unsigned long flags;
+ struct mes_reset_queue_input queue_input;
int r;
- memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
-
- /* allocate the mes queue buffer */
- queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
- if (!queue) {
- DRM_ERROR("Failed to allocate memory for queue\n");
- return -ENOMEM;
- }
+ memset(&queue_input, 0, sizeof(queue_input));
- /* Allocate the queue mqd */
- r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
- if (r)
- goto clean_up_memory;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.me_id = ring->me;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0;
+ queue_input.wptr_addr = ring->wptr_gpu_addr;
+ queue_input.vmid = vmid;
+ queue_input.use_mmio = use_mmio;
+ queue_input.is_kq = true;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ queue_input.legacy_gfx = true;
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
amdgpu_mes_lock(&adev->mes);
-
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- r = -EINVAL;
- goto clean_up_mqd;
- }
-
- /* add the mes gang to idr list */
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
- r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
- GFP_ATOMIC);
- if (r < 0) {
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- goto clean_up_mqd;
- }
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- *queue_id = queue->queue_id = r;
-
- /* allocate a doorbell index for the queue */
- r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
- qprops->queue_type,
- &qprops->doorbell_off);
- if (r)
- goto clean_up_queue_id;
-
- /* initialize the queue mqd */
- amdgpu_mes_queue_init_mqd(adev, queue, qprops);
-
- /* add hw queue to mes */
- queue_input.process_id = gang->process->pasid;
-
- queue_input.page_table_base_addr =
- adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
- adev->gmc.vram_start;
-
- queue_input.process_va_start = 0;
- queue_input.process_va_end =
- (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
- queue_input.process_quantum = gang->process->process_quantum;
- queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
- queue_input.gang_quantum = gang->gang_quantum;
- queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
- queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
- queue_input.gang_global_priority_level = gang->global_priority_level;
- queue_input.doorbell_offset = qprops->doorbell_off;
- queue_input.mqd_addr = queue->mqd_gpu_addr;
- queue_input.wptr_addr = qprops->wptr_gpu_addr;
- queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
- queue_input.queue_type = qprops->queue_type;
- queue_input.paging = qprops->paging;
- queue_input.is_kfd_process = 0;
-
- r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
- if (r) {
- DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
- qprops->doorbell_off);
- goto clean_up_doorbell;
- }
-
- DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
- "queue type=%d, doorbell=0x%llx\n",
- gang->process->pasid, gang_id, qprops->queue_type,
- qprops->doorbell_off);
-
- queue->ring = qprops->ring;
- queue->doorbell_off = qprops->doorbell_off;
- queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
- queue->queue_type = qprops->queue_type;
- queue->paging = qprops->paging;
- queue->gang = gang;
- queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
- list_add_tail(&queue->list, &gang->queue_list);
-
+ r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
amdgpu_mes_unlock(&adev->mes);
- return 0;
+ if (r)
+ dev_err(adev->dev, "failed to reset legacy queue\n");
-clean_up_doorbell:
- amdgpu_mes_kernel_doorbell_free(adev, gang->process,
- qprops->doorbell_off);
-clean_up_queue_id:
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
- idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-clean_up_mqd:
- amdgpu_mes_unlock(&adev->mes);
- amdgpu_mes_queue_free_mqd(queue);
-clean_up_memory:
- kfree(queue);
return r;
}
-int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev)
{
- unsigned long flags;
- struct amdgpu_mes_queue *queue;
- struct amdgpu_mes_gang *gang;
- struct mes_remove_queue_input queue_input;
- int r;
+ return adev->mes.hung_queue_db_array_size;
+}
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+ int queue_type,
+ bool detect_only,
+ unsigned int *hung_db_num,
+ u32 *hung_db_array)
- /* remove the mes gang from idr list */
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
+{
+ struct mes_detect_and_reset_queue_input input;
+ u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr;
+ int r, i;
- queue = idr_find(&adev->mes.queue_id_idr, queue_id);
- if (!queue) {
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- amdgpu_mes_unlock(&adev->mes);
- DRM_ERROR("queue id %d doesn't exist\n", queue_id);
+ if (!hung_db_num || !hung_db_array)
return -EINVAL;
- }
-
- idr_remove(&adev->mes.queue_id_idr, queue_id);
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
- DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
- queue->doorbell_off);
-
- gang = queue->gang;
- queue_input.doorbell_offset = queue->doorbell_off;
- queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
- r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
- if (r)
- DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
- queue_id);
-
- list_del(&queue->list);
- amdgpu_mes_kernel_doorbell_free(adev, gang->process,
- queue->doorbell_off);
- amdgpu_mes_unlock(&adev->mes);
-
- amdgpu_mes_queue_free_mqd(queue);
- kfree(queue);
- return 0;
-}
-int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- enum amdgpu_unmap_queues_action action,
- u64 gpu_addr, u64 seq)
-{
- struct mes_unmap_legacy_queue_input queue_input;
- int r;
+ if ((queue_type != AMDGPU_RING_TYPE_GFX) &&
+ (queue_type != AMDGPU_RING_TYPE_COMPUTE) &&
+ (queue_type != AMDGPU_RING_TYPE_SDMA))
+ return -EINVAL;
- queue_input.action = action;
- queue_input.queue_type = ring->funcs->type;
- queue_input.doorbell_offset = ring->doorbell_index;
- queue_input.pipe_id = ring->pipe;
- queue_input.queue_id = ring->queue;
- queue_input.trail_fence_addr = gpu_addr;
- queue_input.trail_fence_data = seq;
+ /* Clear the doorbell array before detection */
+ memset(adev->mes.hung_queue_db_array_cpu_addr, 0,
+ adev->mes.hung_queue_db_array_size * sizeof(u32));
+ input.queue_type = queue_type;
+ input.detect_only = detect_only;
- r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
- if (r)
- DRM_ERROR("failed to unmap legacy queue\n");
+ r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
+ &input);
+ if (r) {
+ dev_err(adev->dev, "failed to detect and reset\n");
+ } else {
+ *hung_db_num = 0;
+ for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) {
+ if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
+ hung_db_array[i] = db_array[i];
+ *hung_db_num += 1;
+ }
+ }
+ }
return r;
}
@@ -759,23 +435,36 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
{
struct mes_misc_op_input op_input;
int r, val = 0;
+ uint32_t addr_offset = 0;
+ uint64_t read_val_gpu_addr;
+ uint32_t *read_val_ptr;
+ if (amdgpu_device_wb_get(adev, &addr_offset)) {
+ dev_err(adev->dev, "critical bug! too many mes readers\n");
+ goto error;
+ }
+ read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
+ read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
op_input.op = MES_MISC_OP_READ_REG;
op_input.read_reg.reg_offset = reg;
- op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
+ op_input.read_reg.buffer_addr = read_val_gpu_addr;
if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes rreg is not supported!\n");
+ dev_err(adev->dev, "mes rreg is not supported!\n");
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
- DRM_ERROR("failed to read reg (0x%x)\n", reg);
+ dev_err(adev->dev, "failed to read reg (0x%x)\n", reg);
else
- val = *(adev->mes.read_val_ptr);
+ val = *(read_val_ptr);
error:
+ if (addr_offset)
+ amdgpu_device_wb_free(adev, addr_offset);
return val;
}
@@ -790,14 +479,16 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
op_input.write_reg.reg_value = val;
if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes wreg is not supported!\n");
+ dev_err(adev->dev, "mes wreg is not supported!\n");
r = -EINVAL;
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
- DRM_ERROR("failed to write reg (0x%x)\n", reg);
+ dev_err(adev->dev, "failed to write reg (0x%x)\n", reg);
error:
return r;
@@ -817,39 +508,16 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
op_input.wrm_reg.mask = mask;
if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
- r = -EINVAL;
- goto error;
- }
-
- r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
- if (r)
- DRM_ERROR("failed to reg_write_reg_wait\n");
-
-error:
- return r;
-}
-
-int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
- uint32_t val, uint32_t mask)
-{
- struct mes_misc_op_input op_input;
- int r;
-
- op_input.op = MES_MISC_OP_WRM_REG_WAIT;
- op_input.wrm_reg.reg0 = reg;
- op_input.wrm_reg.ref = val;
- op_input.wrm_reg.mask = mask;
-
- if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes reg wait is not supported!\n");
+ dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n");
r = -EINVAL;
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
- DRM_ERROR("failed to reg_write_reg_wait\n");
+ dev_err(adev->dev, "failed to reg_write_reg_wait\n");
error:
return r;
@@ -866,13 +534,19 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
int r;
if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes set shader debugger is not supported!\n");
+ dev_err(adev->dev,
+ "mes set shader debugger is not supported!\n");
return -EINVAL;
}
op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
op_input.set_shader_debugger.process_context_addr = process_context_addr;
op_input.set_shader_debugger.flags.u32all = flags;
+
+ /* use amdgpu mes_flush_shader_debugger instead */
+ if (op_input.set_shader_debugger.flags.process_ctx_flush)
+ return -EINVAL;
+
op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
@@ -885,519 +559,63 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
if (r)
- DRM_ERROR("failed to set_shader_debugger\n");
+ dev_err(adev->dev, "failed to set_shader_debugger\n");
amdgpu_mes_unlock(&adev->mes);
return r;
}
-static void
-amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_mes_queue_properties *props)
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr)
{
- props->queue_type = ring->funcs->type;
- props->hqd_base_gpu_addr = ring->gpu_addr;
- props->rptr_gpu_addr = ring->rptr_gpu_addr;
- props->wptr_gpu_addr = ring->wptr_gpu_addr;
- props->wptr_mc_addr =
- ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
- props->queue_size = ring->ring_size;
- props->eop_gpu_addr = ring->eop_gpu_addr;
- props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
- props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
- props->paging = false;
- props->ring = ring;
-}
-
-#define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \
-do { \
- if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].slots[id_offs]); \
- else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].ring); \
- else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].ib); \
- else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].padding); \
-} while(0)
-
-int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
-{
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
- break;
- case AMDGPU_RING_TYPE_SDMA:
- DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
- break;
- default:
- break;
- }
-
- WARN_ON(1);
- return -EINVAL;
-}
-
-int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
- int queue_type, int idx,
- struct amdgpu_mes_ctx_data *ctx_data,
- struct amdgpu_ring **out)
-{
- struct amdgpu_ring *ring;
- struct amdgpu_mes_gang *gang;
- struct amdgpu_mes_queue_properties qprops = {0};
- int r, queue_id, pasid;
+ struct mes_misc_op_input op_input = {0};
+ int r;
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- amdgpu_mes_unlock(&adev->mes);
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev,
+ "mes flush shader debugger is not supported!\n");
return -EINVAL;
}
- pasid = gang->process->pasid;
- ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
- if (!ring) {
- amdgpu_mes_unlock(&adev->mes);
- return -ENOMEM;
- }
-
- ring->ring_obj = NULL;
- ring->use_doorbell = true;
- ring->is_mes_queue = true;
- ring->mes_ctx = ctx_data;
- ring->idx = idx;
- ring->no_scheduler = true;
-
- if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
- int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- compute[ring->idx].mec_hpd);
- ring->eop_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- }
+ op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+ op_input.set_shader_debugger.process_context_addr = process_context_addr;
+ op_input.set_shader_debugger.flags.process_ctx_flush = true;
- switch (queue_type) {
- case AMDGPU_RING_TYPE_GFX:
- ring->funcs = adev->gfx.gfx_ring[0].funcs;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- ring->funcs = adev->gfx.compute_ring[0].funcs;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- ring->funcs = adev->sdma.instance[0].ring.funcs;
- break;
- default:
- BUG();
- }
+ amdgpu_mes_lock(&adev->mes);
- r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
if (r)
- goto clean_up_memory;
-
- amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
+ dev_err(adev->dev, "failed to set_shader_debugger\n");
- dma_fence_wait(gang->process->vm->last_update, false);
- dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
amdgpu_mes_unlock(&adev->mes);
- r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
- if (r)
- goto clean_up_ring;
-
- ring->hw_queue_id = queue_id;
- ring->doorbell_index = qprops.doorbell_off;
-
- if (queue_type == AMDGPU_RING_TYPE_GFX)
- sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
- else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
- sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
- queue_id);
- else if (queue_type == AMDGPU_RING_TYPE_SDMA)
- sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
- queue_id);
- else
- BUG();
-
- *out = ring;
- return 0;
-
-clean_up_ring:
- amdgpu_ring_fini(ring);
-clean_up_memory:
- kfree(ring);
- amdgpu_mes_unlock(&adev->mes);
return r;
}
-void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring)
- return;
-
- amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
- amdgpu_ring_fini(ring);
- kfree(ring);
-}
-
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio)
{
return adev->mes.aggregated_doorbells[prio];
}
-int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- int r;
-
- r = amdgpu_bo_create_kernel(adev,
- sizeof(struct amdgpu_mes_ctx_meta_data),
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &ctx_data->meta_data_obj,
- &ctx_data->meta_data_mc_addr,
- &ctx_data->meta_data_ptr);
- if (r) {
- dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
- return r;
- }
-
- if (!ctx_data->meta_data_obj)
- return -ENOMEM;
-
- memset(ctx_data->meta_data_ptr, 0,
- sizeof(struct amdgpu_mes_ctx_meta_data));
-
- return 0;
-}
-
-void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
-{
- if (ctx_data->meta_data_obj)
- amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
- &ctx_data->meta_data_mc_addr,
- &ctx_data->meta_data_ptr);
-}
-
-int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- struct amdgpu_bo_va *bo_va;
- struct amdgpu_sync sync;
- struct drm_exec exec;
- int r;
-
- amdgpu_sync_create(&sync);
-
- drm_exec_init(&exec, 0);
- drm_exec_until_all_locked(&exec) {
- r = drm_exec_lock_obj(&exec,
- &ctx_data->meta_data_obj->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto error_fini_exec;
-
- r = amdgpu_vm_lock_pd(vm, &exec, 0);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto error_fini_exec;
- }
-
- bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
- if (!bo_va) {
- DRM_ERROR("failed to create bo_va for meta data BO\n");
- r = -ENOMEM;
- goto error_fini_exec;
- }
-
- r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
- sizeof(struct amdgpu_mes_ctx_meta_data),
- AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
- AMDGPU_PTE_EXECUTABLE);
-
- if (r) {
- DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
- goto error_del_bo_va;
- }
-
- r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r) {
- DRM_ERROR("failed to do vm_bo_update on meta data\n");
- goto error_del_bo_va;
- }
- amdgpu_sync_fence(&sync, bo_va->last_pt_update);
-
- r = amdgpu_vm_update_pdes(adev, vm, false);
- if (r) {
- DRM_ERROR("failed to update pdes on meta data\n");
- goto error_del_bo_va;
- }
- amdgpu_sync_fence(&sync, vm->last_update);
-
- amdgpu_sync_wait(&sync, false);
- drm_exec_fini(&exec);
-
- amdgpu_sync_free(&sync);
- ctx_data->meta_data_va = bo_va;
- return 0;
-
-error_del_bo_va:
- amdgpu_vm_bo_del(adev, bo_va);
-
-error_fini_exec:
- drm_exec_fini(&exec);
- amdgpu_sync_free(&sync);
- return r;
-}
-
-int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
- struct amdgpu_bo *bo = ctx_data->meta_data_obj;
- struct amdgpu_vm *vm = bo_va->base.vm;
- struct dma_fence *fence;
- struct drm_exec exec;
- long r;
-
- drm_exec_init(&exec, 0);
- drm_exec_until_all_locked(&exec) {
- r = drm_exec_lock_obj(&exec,
- &ctx_data->meta_data_obj->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto out_unlock;
-
- r = amdgpu_vm_lock_pd(vm, &exec, 0);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto out_unlock;
- }
-
- amdgpu_vm_bo_del(adev, bo_va);
- if (!amdgpu_vm_ready(vm))
- goto out_unlock;
-
- r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
- &fence);
- if (r)
- goto out_unlock;
- if (fence) {
- amdgpu_bo_fence(bo, fence, true);
- fence = NULL;
- }
-
- r = amdgpu_vm_clear_freed(adev, vm, &fence);
- if (r || !fence)
- goto out_unlock;
-
- dma_fence_wait(fence, false);
- amdgpu_bo_fence(bo, fence, true);
- dma_fence_put(fence);
-
-out_unlock:
- if (unlikely(r < 0))
- dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
- drm_exec_fini(&exec);
-
- return r;
-}
-
-static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
- int pasid, int *gang_id,
- int queue_type, int num_queue,
- struct amdgpu_ring **added_rings,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- struct amdgpu_ring *ring;
- struct amdgpu_mes_gang_properties gprops = {0};
- int r, j;
-
- /* create a gang for the process */
- gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
- gprops.gang_quantum = adev->mes.default_gang_quantum;
- gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
- gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
- gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
-
- r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
- if (r) {
- DRM_ERROR("failed to add gang\n");
- return r;
- }
-
- /* create queues for the gang */
- for (j = 0; j < num_queue; j++) {
- r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
- ctx_data, &ring);
- if (r) {
- DRM_ERROR("failed to add ring\n");
- break;
- }
-
- DRM_INFO("ring %s was added\n", ring->name);
- added_rings[j] = ring;
- }
-
- return 0;
-}
-
-static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
-{
- struct amdgpu_ring *ring;
- int i, r;
-
- for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
- ring = added_rings[i];
- if (!ring)
- continue;
-
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
-
- r = amdgpu_ring_test_ib(ring, 1000 * 10);
- if (r) {
- DRM_DEV_ERROR(ring->adev->dev,
- "ring %s ib test failed (%d)\n",
- ring->name, r);
- return r;
- } else
- DRM_INFO("ring %s ib test pass\n", ring->name);
- }
-
- return 0;
-}
-
-int amdgpu_mes_self_test(struct amdgpu_device *adev)
-{
- struct amdgpu_vm *vm = NULL;
- struct amdgpu_mes_ctx_data ctx_data = {0};
- struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
- int gang_ids[3] = {0};
- int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
- { AMDGPU_RING_TYPE_COMPUTE, 1 },
- { AMDGPU_RING_TYPE_SDMA, 1} };
- int i, r, pasid, k = 0;
-
- pasid = amdgpu_pasid_alloc(16);
- if (pasid < 0) {
- dev_warn(adev->dev, "No more PASIDs available!");
- pasid = 0;
- }
-
- vm = kzalloc(sizeof(*vm), GFP_KERNEL);
- if (!vm) {
- r = -ENOMEM;
- goto error_pasid;
- }
-
- r = amdgpu_vm_init(adev, vm, -1);
- if (r) {
- DRM_ERROR("failed to initialize vm\n");
- goto error_pasid;
- }
-
- r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
- if (r) {
- DRM_ERROR("failed to alloc ctx meta data\n");
- goto error_fini;
- }
-
- ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
- r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
- if (r) {
- DRM_ERROR("failed to map ctx meta data\n");
- goto error_vm;
- }
-
- r = amdgpu_mes_create_process(adev, pasid, vm);
- if (r) {
- DRM_ERROR("failed to create MES process\n");
- goto error_vm;
- }
-
- for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
- /* On GFX v10.3, fw hasn't supported to map sdma queue. */
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
- adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
- queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
- continue;
-
- r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
- &gang_ids[i],
- queue_types[i][0],
- queue_types[i][1],
- &added_rings[k],
- &ctx_data);
- if (r)
- goto error_queues;
-
- k += queue_types[i][1];
- }
-
- /* start ring test and ib test for MES queues */
- amdgpu_mes_test_queues(added_rings);
-
-error_queues:
- /* remove all queues */
- for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
- if (!added_rings[i])
- continue;
- amdgpu_mes_remove_ring(adev, added_rings[i]);
- }
-
- for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
- if (!gang_ids[i])
- continue;
- amdgpu_mes_remove_gang(adev, gang_ids[i]);
- }
-
- amdgpu_mes_destroy_process(adev, pasid);
-
-error_vm:
- amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
-
-error_fini:
- amdgpu_vm_fini(adev, vm);
-
-error_pasid:
- if (pasid)
- amdgpu_pasid_free(pasid);
-
- amdgpu_mes_ctx_free_meta_data(&ctx_data);
- kfree(vm);
- return 0;
-}
-
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
{
const struct mes_firmware_header_v1_0 *mes_hdr;
struct amdgpu_firmware_info *info;
char ucode_prefix[30];
- char fw_name[40];
+ char fw_name[50];
bool need_retry = false;
+ u32 *ucode_ptr;
int r;
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
sizeof(ucode_prefix));
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+ if (adev->enable_uni_mes) {
+ snprintf(fw_name, sizeof(fw_name),
+ "amdgpu/%s_uni_mes.bin", ucode_prefix);
+ } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
ucode_prefix,
pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
@@ -1408,13 +626,13 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
}
- r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
- ucode_prefix);
- DRM_INFO("try to fall back to %s\n", fw_name);
+ dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix);
r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
- fw_name);
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mes.bin", ucode_prefix);
}
if (r)
@@ -1428,6 +646,10 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
adev->mes.data_start_addr[pipe] =
le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+ ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data +
+ sizeof(union amdgpu_firmware_header));
+ adev->mes.fw_version[pipe] =
+ le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
int ucode, ucode_data;
@@ -1460,3 +682,89 @@ out:
amdgpu_ucode_release(&adev->mes.fw[pipe]);
return r;
}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
+{
+ uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ bool is_supported = false;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
+ mes_rev >= 0x63)
+ is_supported = true;
+
+ return is_supported;
+}
+
+/* Fix me -- node_id is used to identify the correct MES instances in the future */
+static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev,
+ uint32_t node_id, bool enable)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ op_input.op = MES_MISC_OP_CHANGE_CONFIG;
+ op_input.change_config.option.limit_single_process = enable ? 1 : 0;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes change config is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to change_config.\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
+{
+ int i, r = 0;
+
+ if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, true);
+ else
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ }
+ return r;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+ seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+ mem, adev->mes.event_log_size, false);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ if (adev->enable_mes && amdgpu_mes_log_enable)
+ debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+ adev, &amdgpu_debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index a27b424ffe00..6b506fc72f58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -40,6 +40,8 @@
#define AMDGPU_MES_VERSION_MASK 0x00000fff
#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
+#define AMDGPU_MES_MSCRATCH_SIZE 0x40000
+#define AMDGPU_MES_INVALID_DB_OFFSET 0xffffffff
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
@@ -55,7 +57,7 @@ enum amdgpu_mes_priority_level {
struct amdgpu_mes_funcs;
-enum admgpu_mes_pipe {
+enum amdgpu_mes_pipe {
AMDGPU_MES_SCHED_PIPE = 0,
AMDGPU_MES_KIQ_PIPE,
AMDGPU_MAX_MES_PIPES = 2,
@@ -75,6 +77,8 @@ struct amdgpu_mes {
uint32_t sched_version;
uint32_t kiq_version;
+ uint32_t fw_version[AMDGPU_MAX_MES_PIPES];
+ bool enable_legacy_queue_map;
uint32_t total_max_queue;
uint32_t max_doorbell_slices;
@@ -82,8 +86,8 @@ struct amdgpu_mes {
uint64_t default_process_quantum;
uint64_t default_gang_quantum;
- struct amdgpu_ring ring;
- spinlock_t ring_lock;
+ struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES];
+ spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES];
const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
@@ -108,19 +112,16 @@ struct amdgpu_mes {
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
- uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+ uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
- uint32_t sch_ctx_offs;
- uint64_t sch_ctx_gpu_addr;
- uint64_t *sch_ctx_ptr;
- uint32_t query_status_fence_offs;
- uint64_t query_status_fence_gpu_addr;
- uint64_t *query_status_fence_ptr;
- uint32_t read_val_offs;
- uint64_t read_val_gpu_addr;
- uint32_t *read_val_ptr;
+ uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
+ uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
+ uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
uint32_t saved_flags;
@@ -133,21 +134,24 @@ struct amdgpu_mes {
uint32_t num_mes_dbs;
unsigned long *doorbell_bitmap;
+ /* MES event log buffer */
+ uint32_t event_log_size;
+ struct amdgpu_bo *event_log_gpu_obj;
+ uint64_t event_log_gpu_addr;
+ void *event_log_cpu_addr;
+
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
-};
-struct amdgpu_mes_process {
- int pasid;
- struct amdgpu_vm *vm;
- uint64_t pd_gpu_addr;
- struct amdgpu_bo *proc_ctx_bo;
- uint64_t proc_ctx_gpu_addr;
- void *proc_ctx_cpu_ptr;
- uint64_t process_quantum;
- struct list_head gang_list;
- uint32_t doorbell_index;
- struct mutex doorbell_lock;
+ /* mes resource_1 bo*/
+ struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
+ uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
+
+ int hung_queue_db_array_size;
+ struct amdgpu_bo *hung_queue_db_array_gpu_obj;
+ uint64_t hung_queue_db_array_gpu_addr;
+ void *hung_queue_db_array_cpu_addr;
};
struct amdgpu_mes_gang {
@@ -236,6 +240,15 @@ struct mes_remove_queue_input {
uint64_t gang_context_addr;
};
+struct mes_map_legacy_queue_input {
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+};
+
struct mes_unmap_legacy_queue_input {
enum amdgpu_unmap_queues_action action;
uint32_t queue_type;
@@ -258,12 +271,39 @@ struct mes_resume_gang_input {
uint64_t gang_context_addr;
};
+struct mes_reset_queue_input {
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ bool use_mmio;
+ uint32_t me_id;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+ uint32_t vmid;
+ bool legacy_gfx;
+ bool is_kq;
+};
+
+struct mes_detect_and_reset_queue_input {
+ uint32_t queue_type;
+ bool detect_only;
+};
+
+struct mes_inv_tlbs_pasid_input {
+ uint32_t xcc_id;
+ uint16_t pasid;
+ uint8_t hub_id;
+ uint8_t flush_type;
+};
+
enum mes_misc_opcode {
MES_MISC_OP_WRITE_REG,
MES_MISC_OP_READ_REG,
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
MES_MISC_OP_SET_SHADER_DEBUGGER,
+ MES_MISC_OP_CHANGE_CONFIG,
};
struct mes_misc_op_input {
@@ -291,9 +331,10 @@ struct mes_misc_op_input {
uint64_t process_context_addr;
union {
struct {
- uint64_t single_memop : 1;
- uint64_t single_alu_op : 1;
- uint64_t reserved: 30;
+ uint32_t single_memop : 1;
+ uint32_t single_alu_op : 1;
+ uint32_t reserved: 29;
+ uint32_t process_ctx_flush: 1;
};
uint32_t u32all;
} flags;
@@ -301,6 +342,21 @@ struct mes_misc_op_input {
uint32_t tcp_watch_cntl[4];
uint32_t trap_en;
} set_shader_debugger;
+
+ struct {
+ union {
+ struct {
+ uint32_t limit_single_process : 1;
+ uint32_t enable_hws_logging_buffer : 1;
+ uint32_t reserved : 30;
+ };
+ uint32_t all;
+ } option;
+ struct {
+ uint32_t tdr_level;
+ uint32_t tdr_delay;
+ } tdr_config;
+ } change_config;
};
};
@@ -311,6 +367,9 @@ struct amdgpu_mes_funcs {
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
+ int (*map_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input);
+
int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input);
@@ -322,44 +381,49 @@ struct amdgpu_mes_funcs {
int (*misc_op)(struct amdgpu_mes *mes,
struct mes_misc_op_input *input);
+
+ int (*reset_hw_queue)(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input);
+
+ int (*detect_and_reset_hung_queues)(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input);
+
+
+ int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
-int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
-
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
int amdgpu_mes_init(struct amdgpu_device *adev);
void amdgpu_mes_fini(struct amdgpu_device *adev);
-int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
- struct amdgpu_vm *vm);
-void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
-
-int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
- struct amdgpu_mes_gang_properties *gprops,
- int *gang_id);
-int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
-
int amdgpu_mes_suspend(struct amdgpu_device *adev);
int amdgpu_mes_resume(struct amdgpu_device *adev);
-int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
- struct amdgpu_mes_queue_properties *qprops,
- int *queue_id);
-int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
-
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq);
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int vmid,
+ bool use_mmio);
+
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev);
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+ int queue_type,
+ bool detect_only,
+ unsigned int *hung_db_num,
+ u32 *hung_db_array);
uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
int amdgpu_mes_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t val);
-int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
- uint32_t val, uint32_t mask);
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask);
@@ -369,28 +433,12 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
const uint32_t *tcp_watch_cntl,
uint32_t flags,
bool trap_en);
-
-int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
- int queue_type, int idx,
- struct amdgpu_mes_ctx_data *ctx_data,
- struct amdgpu_ring **out);
-void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring);
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+ uint64_t process_context_addr);
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio);
-int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data);
-void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
-int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_mes_ctx_data *ctx_data);
-int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data);
-
-int amdgpu_mes_self_test(struct amdgpu_device *adev);
-
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
/*
@@ -450,4 +498,9 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
memalloc_noreclaim_restore(mes->saved_flags);
mutex_unlock(&mes->mutex_hidden);
}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
+
#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 32fe05c810c6..20460cfd09bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -32,7 +32,6 @@
#include <drm/display/drm_dp_helper.h>
#include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
#include <drm/drm_encoder.h>
#include <drm/drm_fixed.h>
#include <drm/drm_framebuffer.h>
@@ -51,6 +50,8 @@ struct amdgpu_device;
struct amdgpu_encoder;
struct amdgpu_router;
struct amdgpu_hpd;
+struct edid;
+struct drm_edid;
#define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
@@ -300,6 +301,7 @@ struct amdgpu_framebuffer {
uint64_t tiling_flags;
bool tmz_surface;
+ bool gfx12_dcc;
/* caching for later use */
uint64_t address;
@@ -324,11 +326,8 @@ struct amdgpu_mode_info {
struct drm_property *audio_property;
/* FMT dithering */
struct drm_property *dither_property;
- /* Adaptive Backlight Modulation (power feature) */
- struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
- struct edid *bios_hardcoded_edid;
- int bios_hardcoded_edid_size;
+ const struct drm_edid *bios_hardcoded_edid;
/* firmware flags */
u32 firmware_flags;
@@ -343,6 +342,97 @@ struct amdgpu_mode_info {
int disp_priority;
const struct amdgpu_display_funcs *funcs;
const enum drm_plane_type *plane_type;
+
+ /* Driver-private color mgmt props */
+
+ /* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+ * convert encoded values to light linear values before sampling or
+ * blending.
+ */
+ struct drm_property *plane_degamma_lut_property;
+ /* @plane_degamma_lut_size_property: Plane property to define the max
+ * size of degamma LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_degamma_lut_size_property;
+ /**
+ * @plane_degamma_tf_property: Plane pre-defined transfer function to
+ * to go from scanout/encoded values to linear values.
+ */
+ struct drm_property *plane_degamma_tf_property;
+ /**
+ * @plane_hdr_mult_property:
+ */
+ struct drm_property *plane_hdr_mult_property;
+
+ struct drm_property *plane_ctm_property;
+ /**
+ * @shaper_lut_property: Plane property to set pre-blending shaper LUT
+ * that converts color content before 3D LUT. If
+ * plane_shaper_tf_property != Identity TF, AMD color module will
+ * combine the user LUT values with pre-defined TF into the LUT
+ * parameters to be programmed.
+ */
+ struct drm_property *plane_shaper_lut_property;
+ /**
+ * @shaper_lut_size_property: Plane property for the size of
+ * pre-blending shaper LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_shaper_lut_size_property;
+ /**
+ * @plane_shaper_tf_property: Plane property to set a predefined
+ * transfer function for pre-blending shaper (before applying 3D LUT)
+ * with or without LUT. There is no shaper ROM, but we can use AMD
+ * color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *plane_shaper_tf_property;
+ /**
+ * @plane_lut3d_property: Plane property for color transformation using
+ * a 3D LUT (pre-blending), a three-dimensional array where each
+ * element is an RGB triplet. Each dimension has the size of
+ * lut3d_size. The array contains samples from the approximated
+ * function. On AMD, values between samples are estimated by
+ * tetrahedral interpolation. The array is accessed with three indices,
+ * one for each input dimension (color channel), blue being the
+ * outermost dimension, red the innermost.
+ */
+ struct drm_property *plane_lut3d_property;
+ /**
+ * @plane_degamma_lut_size_property: Plane property to define the max
+ * size of 3D LUT as supported by the driver (read-only). The max size
+ * is the max size of one dimension and, therefore, the max number of
+ * entries for 3D LUT array is the 3D LUT size cubed;
+ */
+ struct drm_property *plane_lut3d_size_property;
+ /**
+ * @plane_blend_lut_property: Plane property for output gamma before
+ * blending. Userspace set a blend LUT to convert colors after 3D LUT
+ * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+ * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+ * != Identity TF, AMD color module will combine the user LUT values
+ * with pre-defined TF into the LUT parameters to be programmed.
+ */
+ struct drm_property *plane_blend_lut_property;
+ /**
+ * @plane_blend_lut_size_property: Plane property to define the max
+ * size of blend LUT as supported by the driver (read-only).
+ */
+ struct drm_property *plane_blend_lut_size_property;
+ /**
+ * @plane_blend_tf_property: Plane property to set a predefined
+ * transfer function for pre-blending blend/out_gamma (after applying
+ * 3D LUT) with or without LUT. There is no blend ROM, but we can use
+ * AMD color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *plane_blend_tf_property;
+ /* @regamma_tf_property: Transfer function for CRTC regamma
+ * (post-blending). Possible values are defined by `enum
+ * amdgpu_transfer_function`. There is no regamma ROM, but we can use
+ * AMD color modules to program LUT parameters from predefined TF (or
+ * from a combination of pre-defined TF and the custom 1D LUT).
+ */
+ struct drm_property *regamma_tf_property;
};
#define AMDGPU_MAX_BL_LEVEL 0xFF
@@ -406,8 +496,6 @@ struct amdgpu_crtc {
struct drm_connector *connector;
/* for dpm */
u32 line_time;
- u32 wm_low;
- u32 wm_high;
u32 lb_vblank_lead_lines;
struct drm_display_mode hw_mode;
/* for virtual dce */
@@ -416,6 +504,10 @@ struct amdgpu_crtc {
int otg_inst;
struct drm_pending_vblank_event *event;
+
+ bool wb_pending;
+ bool wb_enabled;
+ struct drm_writeback_connector *wb_conn;
};
struct amdgpu_encoder_atom_dig {
@@ -515,6 +607,7 @@ struct amdgpu_i2c_adapter {
struct i2c_adapter base;
struct ddc_service *ddc_service;
+ bool oem;
};
#define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index 51ca544a7094..a974265837f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -53,12 +53,14 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)
return 0;
}
-void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
- uint64_t *count1)
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev)
{
- if (adev->nbio.funcs->get_pcie_usage)
- adev->nbio.funcs->get_pcie_usage(adev, count0, count1);
+ if (amdgpu_sriov_vf(adev) || !adev->asic_funcs ||
+ !adev->asic_funcs->get_pcie_replay_count ||
+ (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count))
+ return false;
+ return true;
}
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 6cf7a8829a52..b528de6a01f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -69,6 +69,8 @@ struct amdgpu_nbio_funcs {
u32 (*get_memsize)(struct amdgpu_device *adev);
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index, int doorbell_size);
+ void (*vpe_doorbell_range)(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size);
void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
int doorbell_index, int instance);
void (*gc_doorbell_init)(struct amdgpu_device *adev);
@@ -99,9 +101,9 @@ struct amdgpu_nbio_funcs {
int (*get_compute_partition_mode)(struct amdgpu_device *adev);
u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
u32 *supp_modes);
+ bool (*is_nps_switch_requested)(struct amdgpu_device *adev);
u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
- void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
- uint64_t *count1);
+ void (*set_reg_remap)(struct amdgpu_device *adev);
};
struct amdgpu_nbio {
@@ -114,8 +116,9 @@ struct amdgpu_nbio {
};
int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
-void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1);
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev);
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ace837cfa0a6..e08f58de4b17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -32,6 +32,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/dma-buf.h>
+#include <linux/export.h>
#include <drm/drm_drv.h>
#include <drm/amdgpu_drm.h>
@@ -39,6 +40,9 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_dma_buf.h"
/**
* DOC: amdgpu_object
@@ -59,7 +63,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_kunmap(bo);
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
drm_gem_object_release(&bo->tbo.base);
amdgpu_bo_unref(&bo->parent);
@@ -76,24 +80,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_destroy(tbo);
}
-static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
- struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
- struct amdgpu_bo_vm *vmbo;
-
- bo = shadow_bo->parent;
- vmbo = to_amdgpu_bo_vm(bo);
- /* in case amdgpu_device_recover_vram got NULL of bo->parent */
- if (!list_empty(&vmbo->shadow_list)) {
- mutex_lock(&adev->shadow_list_lock);
- list_del_init(&vmbo->shadow_list);
- mutex_unlock(&adev->shadow_list_lock);
- }
-
- amdgpu_bo_destroy(tbo);
-}
-
/**
* amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
* @bo: buffer object to be checked
@@ -107,8 +93,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
if (bo->destroy == &amdgpu_bo_destroy ||
- bo->destroy == &amdgpu_bo_user_destroy ||
- bo->destroy == &amdgpu_bo_vm_destroy)
+ bo->destroy == &amdgpu_bo_user_destroy)
return true;
return false;
@@ -153,8 +138,10 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
- if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ if (abo->tbo.type == ttm_bo_type_kernel &&
+ flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
c++;
}
@@ -166,6 +153,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
c++;
}
+ if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) {
+ places[c].fpfn = 0;
+ places[c].lpfn = 0;
+ places[c].mem_type = AMDGPU_PL_MMIO_REMAP;
+ places[c].flags = 0;
+ c++;
+ }
+
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
places[c].lpfn = 0;
@@ -173,6 +168,13 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
AMDGPU_PL_PREEMPT : TTM_PL_TT;
places[c].flags = 0;
+ /*
+ * When GTT is just an alternative to VRAM make sure that we
+ * only use it as fallback and still try to fill up VRAM first.
+ */
+ if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) &&
+ domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+ places[c].flags |= TTM_PL_FLAG_FALLBACK;
c++;
}
@@ -220,9 +222,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
placement->num_placement = c;
placement->placement = places;
-
- placement->num_busy_placement = c;
- placement->busy_placement = places;
}
/**
@@ -335,6 +334,9 @@ error_free:
*
* Allocates and pins a BO for kernel internal use.
*
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
* Returns:
@@ -360,6 +362,74 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
}
/**
+ * amdgpu_bo_create_isp_user - create user BO for isp
+ *
+ * @adev: amdgpu device object
+ * @dma_buf: DMABUF handle for isp buffer
+ * @domain: where to place it
+ * @bo: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate gpu_addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo,
+ u64 *gpu_addr)
+
+{
+ struct drm_gem_object *gem_obj;
+ int r;
+
+ gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf);
+ *bo = gem_to_amdgpu_bo(gem_obj);
+ if (!(*bo)) {
+ dev_err(adev->dev, "failed to get valid isp user bo\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_reserve(*bo, false);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_pin(*bo, domain);
+ if (r) {
+ dev_err(adev->dev, "(%d) isp user bo pin failed\n", r);
+ goto error_unreserve;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo);
+ goto error_unpin;
+ }
+
+ if (!WARN_ON(!gpu_addr))
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo);
+
+ amdgpu_bo_unreserve(*bo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(*bo);
+error_unreserve:
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
+
+ return r;
+}
+
+/**
* amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
*
* @adev: amdgpu device object
@@ -434,6 +504,9 @@ error:
* @cpu_addr: pointer to where the BO's CPU memory space address was stored
*
* unmaps and unpin a BO for kernel internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
*/
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr)
@@ -459,7 +532,29 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
*cpu_addr = NULL;
}
-/* Validate bo size is bit bigger then the request domain */
+/**
+ * amdgpu_bo_free_isp_user - free BO for isp use
+ *
+ * @bo: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo)
+{
+ if (bo == NULL)
+ return;
+
+ if (amdgpu_bo_reserve(bo, true) == 0) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ }
+ amdgpu_bo_unref(&bo);
+}
+
+/* Validate bo size is bit bigger than the request domain */
static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
unsigned long size, u32 domain)
{
@@ -469,29 +564,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
* If GTT is part of requested domains the check must succeed to
* allow fall back to GTT.
*/
- if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
-
- if (man && size < man->size)
- return true;
- else if (!man)
- WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
- goto fail;
- } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+ else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ else
+ return true;
- if (man && size < man->size)
- return true;
- goto fail;
+ if (!man) {
+ if (domain & AMDGPU_GEM_DOMAIN_GTT)
+ WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
+ return false;
}
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */
- return true;
+ if (size < man->size)
+ return true;
-fail:
- if (man)
- DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size);
+ DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size);
return false;
}
@@ -582,6 +672,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (bo == NULL)
return -ENOMEM;
drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
+ bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
@@ -603,9 +694,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- if (adev->ras_enabled)
- bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
-
bo->tbo.bdev = &adev->mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
AMDGPU_GEM_DOMAIN_GDS))
@@ -613,6 +701,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
else
amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 2;
+ else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
bo->tbo.priority = 1;
if (!bp->destroy)
@@ -625,8 +715,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- bo->tbo.resource->mem_type == TTM_PL_VRAM &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
@@ -636,7 +725,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+ r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
@@ -724,52 +813,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
}
/**
- * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
- *
- * @vmbo: BO that will be inserted into the shadow list
- *
- * Insert a BO to the shadow list.
- */
-void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev);
-
- mutex_lock(&adev->shadow_list_lock);
- list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
- vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
- vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
- mutex_unlock(&adev->shadow_list_lock);
-}
-
-/**
- * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
- *
- * @shadow: &amdgpu_bo shadow to be restored
- * @fence: dma_fence associated with the operation
- *
- * Copies a buffer object's shadow content back to the object.
- * This is used for recovering a buffer from its shadow in case of a gpu
- * reset where vram context may be lost.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
-
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t shadow_addr, parent_addr;
-
- shadow_addr = amdgpu_bo_gpu_offset(shadow);
- parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
-
- return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
- amdgpu_bo_size(shadow), NULL, fence,
- true, false, false);
-}
-
-/**
* amdgpu_bo_kmap - map an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be mapped
* @ptr: kernel virtual address to be returned
@@ -852,7 +895,7 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
if (bo == NULL)
return NULL;
- ttm_bo_get(&bo->tbo);
+ drm_gem_object_get(&bo->tbo.base);
return bo;
}
@@ -864,40 +907,30 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
*/
void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
- struct ttm_buffer_object *tbo;
-
if ((*bo) == NULL)
return;
- tbo = &((*bo)->tbo);
- ttm_bo_put(tbo);
+ drm_gem_object_put(&(*bo)->tbo.base);
*bo = NULL;
}
/**
- * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be pinned
* @domain: domain to be pinned to
- * @min_offset: the start of requested address range
- * @max_offset: the end of requested address range
*
- * Pins the buffer object according to requested domain and address range. If
- * the memory is unbound gart memory, binds the pages into gart table. Adjusts
- * pin_count and pin_size accordingly.
+ * Pins the buffer object according to requested domain. If the memory is
+ * unbound gart memory, binds the pages into gart table. Adjusts pin_count and
+ * pin_size accordingly.
*
* Pinning means to lock pages in memory along with keeping them at a fixed
* offset. It is required when a buffer can not be moved, for example, when
* a display buffer is being scanned out.
*
- * Compared with amdgpu_bo_pin(), this function gives more flexibility on
- * where to pin a buffer if there are specific restrictions on where a buffer
- * must be located.
- *
* Returns:
* 0 for success or a negative error code on failure.
*/
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset)
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
@@ -906,15 +939,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
- if (WARN_ON_ONCE(min_offset > max_offset))
- return -EINVAL;
-
/* Check domain to be pinned to against preferred domains */
if (bo->preferred_domains & domain)
domain = bo->preferred_domains & domain;
/* A shared bo cannot be migrated to VRAM */
- if (bo->tbo.base.import_attach) {
+ if (drm_gem_is_imported(&bo->tbo.base)) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
@@ -934,14 +964,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
ttm_bo_pin(&bo->tbo);
-
- if (max_offset != 0) {
- u64 domain_start = amdgpu_ttm_domain_start(adev,
- mem_type);
- WARN_ON_ONCE(max_offset <
- (amdgpu_bo_gpu_offset(bo) - domain_start));
- }
-
return 0;
}
@@ -950,7 +972,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
*/
domain = amdgpu_bo_get_preferred_domain(adev, domain);
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
dma_buf_pin(bo->tbo.base.import_attach);
/* force to pin into visible video ram */
@@ -958,16 +980,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
- unsigned int fpfn, lpfn;
-
- fpfn = min_offset >> PAGE_SHIFT;
- lpfn = max_offset >> PAGE_SHIFT;
-
- if (fpfn > bo->placements[i].fpfn)
- bo->placements[i].fpfn = fpfn;
- if (!bo->placements[i].lpfn ||
- (lpfn && lpfn < bo->placements[i].lpfn))
- bo->placements[i].lpfn = lpfn;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+ bo->placements[i].mem_type == TTM_PL_VRAM)
+ bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -978,12 +993,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
ttm_bo_pin(&bo->tbo);
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
- if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
&adev->visible_pin_size);
- } else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+ } else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
@@ -992,24 +1006,6 @@ error:
}
/**
- * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
- * @bo: &amdgpu_bo buffer object to be pinned
- * @domain: domain to be pinned to
- *
- * A simple wrapper to amdgpu_bo_pin_restricted().
- * Provides a simpler API for buffers that do not have any strict restrictions
- * on where a buffer must be located.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
-{
- bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
-}
-
-/**
* amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be unpinned
*
@@ -1027,7 +1023,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
if (bo->tbo.pin_count)
return;
- if (bo->tbo.base.import_attach)
+ if (drm_gem_is_imported(&bo->tbo.base))
dma_buf_unpin(bo->tbo.base.import_attach);
if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
@@ -1053,7 +1049,8 @@ static const char * const amdgpu_vram_names[] = {
"GDDR6",
"DDR5",
"LPDDR4",
- "LPDDR5"
+ "LPDDR5",
+ "HBM3E"
};
/**
@@ -1250,7 +1247,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
* amdgpu_bo_move_notify - notification about a memory move
* @bo: pointer to a buffer object
* @evict: if this move is evicting the buffer from the graphics address space
- * @new_mem: new information of the bufer object
+ * @new_mem: new resource for backing the BO
*
* Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
* bookkeeping.
@@ -1260,73 +1257,24 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_resource *new_mem)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
- struct amdgpu_bo *abo;
struct ttm_resource *old_mem = bo->resource;
+ struct amdgpu_bo *abo;
if (!amdgpu_bo_is_amdgpu_bo(bo))
return;
abo = ttm_to_amdgpu_bo(bo);
- amdgpu_vm_bo_invalidate(adev, abo, evict);
+ amdgpu_vm_bo_move(abo, new_mem, evict);
amdgpu_bo_kunmap(abo);
- if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach &&
- bo->resource->mem_type != TTM_PL_SYSTEM)
+ if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) &&
+ old_mem && old_mem->mem_type != TTM_PL_SYSTEM)
dma_buf_move_notify(abo->tbo.base.dma_buf);
- /* remember the eviction */
- if (evict)
- atomic64_inc(&adev->num_evictions);
-
- /* update statistics */
- if (!new_mem)
- return;
-
/* move_notify is called before move happens */
- trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
-}
-
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
- struct amdgpu_mem_stats *stats)
-{
- uint64_t size = amdgpu_bo_size(bo);
- unsigned int domain;
-
- /* Abort if the BO doesn't currently have a backing store */
- if (!bo->tbo.resource)
- return;
-
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
- switch (domain) {
- case AMDGPU_GEM_DOMAIN_VRAM:
- stats->vram += size;
- if (amdgpu_bo_in_cpu_visible_vram(bo))
- stats->visible_vram += size;
- break;
- case AMDGPU_GEM_DOMAIN_GTT:
- stats->gtt += size;
- break;
- case AMDGPU_GEM_DOMAIN_CPU:
- default:
- stats->cpu += size;
- break;
- }
-
- if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) {
- stats->requested_vram += size;
- if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- stats->requested_visible_vram += size;
-
- if (domain != AMDGPU_GEM_DOMAIN_VRAM) {
- stats->evicted_vram += size;
- if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- stats->evicted_visible_vram += size;
- }
- } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) {
- stats->requested_gtt += size;
- }
+ trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
+ old_mem ? old_mem->mem_type : -1);
}
/**
@@ -1348,30 +1296,42 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
abo = ttm_to_amdgpu_bo(bo);
+ WARN_ON(abo->vm_bo);
+
if (abo->kfd_bo)
amdgpu_amdkfd_release_notify(abo);
- /* We only remove the fence if the resv has individualized. */
- WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
- && bo->base.resv != &bo->base._resv);
- if (bo->base.resv == &bo->base._resv)
- amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+ /*
+ * We lock the private dma_resv object here and since the BO is about to
+ * be released nobody else should have a pointer to it.
+ * So when this locking here fails something is wrong with the reference
+ * counting.
+ */
+ if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv)))
+ return;
+
+ amdgpu_amdkfd_remove_all_eviction_fences(abo);
if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM ||
!(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev)))
- return;
+ goto out;
- if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
- return;
+ r = dma_resv_reserve_fences(&bo->base._resv, 1);
+ if (r)
+ goto out;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
- if (!WARN_ON(r)) {
- amdgpu_bo_fence(abo, fence, false);
- dma_fence_put(fence);
- }
+ r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
+ AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+ if (WARN_ON(r))
+ goto out;
- dma_resv_unlock(bo->base.resv);
+ amdgpu_vram_mgr_set_cleared(bo->resource);
+ dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
+
+out:
+ dma_resv_unlock(&bo->base._resv);
}
/**
@@ -1395,10 +1355,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (bo->resource->mem_type != TTM_PL_VRAM)
- return 0;
-
- if (amdgpu_bo_in_cpu_visible_vram(abo))
+ if (amdgpu_res_cpu_visible(adev, bo->resource))
return 0;
/* Can't move a pinned BO to visible VRAM */
@@ -1411,8 +1368,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
AMDGPU_GEM_DOMAIN_GTT);
/* Avoid costly evictions; only set GTT as a busy placement */
- abo->placement.num_busy_placement = 1;
- abo->placement.busy_placement = &abo->placements[1];
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
r = ttm_bo_validate(bo, &abo->placement, &ctx);
if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
@@ -1422,7 +1378,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* this should never happen */
if (bo->resource->mem_type == TTM_PL_VRAM &&
- !amdgpu_bo_in_cpu_visible_vram(abo))
+ !amdgpu_res_cpu_visible(adev, bo->resource))
return VM_FAULT_SIGBUS;
ttm_bo_move_to_lru_tail_unlocked(bo);
@@ -1523,6 +1479,26 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
}
/**
+ * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo
+ * @bo: amdgpu VRAM buffer object for which we query the offset
+ *
+ * Returns:
+ * current FB aperture GPU offset of the object.
+ */
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ uint64_t offset, fb_base;
+
+ WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM);
+
+ fb_base = adev->gmc.fb_start;
+ fb_base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ offset = (bo->tbo.resource->start << PAGE_SHIFT) + fb_base;
+ return amdgpu_gmc_sign_extend(offset);
+}
+
+/**
* amdgpu_bo_gpu_offset_no_check - return GPU offset of bo
* @bo: amdgpu object for which we query the offset
*
@@ -1532,15 +1508,60 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- uint64_t offset;
+ uint64_t offset = AMDGPU_BO_INVALID_OFFSET;
+
+ if (bo->tbo.resource->mem_type == TTM_PL_TT)
+ offset = amdgpu_gmc_agp_addr(&bo->tbo);
- offset = (bo->tbo.resource->start << PAGE_SHIFT) +
- amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
+ if (offset == AMDGPU_BO_INVALID_OFFSET)
+ offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+ amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
return amdgpu_gmc_sign_extend(offset);
}
/**
+ * amdgpu_bo_mem_stats_placement - bo placement for memory accounting
+ * @bo: the buffer object we should look at
+ *
+ * BO can have multiple preferred placements, to avoid double counting we want
+ * to file it under a single placement for memory stats.
+ * Luckily, if we take the highest set bit in preferred_domains the result is
+ * quite sensible.
+ *
+ * Returns:
+ * Which of the placements should the BO be accounted under.
+ */
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
+{
+ uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
+
+ if (!domain)
+ return TTM_PL_SYSTEM;
+
+ switch (rounddown_pow_of_two(domain)) {
+ case AMDGPU_GEM_DOMAIN_CPU:
+ return TTM_PL_SYSTEM;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ return TTM_PL_TT;
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ return TTM_PL_VRAM;
+ case AMDGPU_GEM_DOMAIN_GDS:
+ return AMDGPU_PL_GDS;
+ case AMDGPU_GEM_DOMAIN_GWS:
+ return AMDGPU_PL_GWS;
+ case AMDGPU_GEM_DOMAIN_OA:
+ return AMDGPU_PL_OA;
+ case AMDGPU_GEM_DOMAIN_DOORBELL:
+ return AMDGPU_PL_DOORBELL;
+ case AMDGPU_GEM_DOMAIN_MMIO_REMAP:
+ return AMDGPU_PL_MMIO_REMAP;
+ default:
+ return TTM_PL_SYSTEM;
+ }
+}
+
+/**
* amdgpu_bo_get_preferred_domain - get preferred domain
* @adev: amdgpu device object
* @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
@@ -1582,6 +1603,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
*/
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
const char *placement;
@@ -1589,22 +1611,42 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
u64 size;
if (dma_resv_trylock(bo->tbo.base.resv)) {
- unsigned int domain;
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
- switch (domain) {
- case AMDGPU_GEM_DOMAIN_VRAM:
- if (amdgpu_bo_in_cpu_visible_vram(bo))
- placement = "VRAM VISIBLE";
- else
- placement = "VRAM";
- break;
- case AMDGPU_GEM_DOMAIN_GTT:
- placement = "GTT";
- break;
- case AMDGPU_GEM_DOMAIN_CPU:
- default:
- placement = "CPU";
- break;
+ if (!bo->tbo.resource) {
+ placement = "NONE";
+ } else {
+ switch (bo->tbo.resource->mem_type) {
+ case TTM_PL_VRAM:
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+ placement = "VRAM VISIBLE";
+ else
+ placement = "VRAM";
+ break;
+ case TTM_PL_TT:
+ placement = "GTT";
+ break;
+ case AMDGPU_PL_GDS:
+ placement = "GDS";
+ break;
+ case AMDGPU_PL_GWS:
+ placement = "GWS";
+ break;
+ case AMDGPU_PL_OA:
+ placement = "OA";
+ break;
+ case AMDGPU_PL_PREEMPT:
+ placement = "PREEMPTIBLE";
+ break;
+ case AMDGPU_PL_DOORBELL:
+ placement = "DOORBELL";
+ break;
+ case AMDGPU_PL_MMIO_REMAP:
+ placement = "MMIO REMAP";
+ break;
+ case TTM_PL_SYSTEM:
+ default:
+ placement = "CPU";
+ break;
+ }
}
dma_resv_unlock(bo->tbo.base.resv);
} else {
@@ -1634,7 +1676,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
-
+ /* Add the gem obj resv fence dump*/
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ dma_resv_describe(bo->tbo.base.resv, m);
+ dma_resv_unlock(bo->tbo.base.resv);
+ }
seq_puts(m, "\n");
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index f3ee83cdf97e..656b8a931dae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -69,7 +69,7 @@ struct amdgpu_bo_va_mapping {
uint64_t last;
uint64_t __subtree_last;
uint64_t offset;
- uint64_t flags;
+ uint32_t flags;
};
/* User space allocated BO in a VM */
@@ -90,6 +90,12 @@ struct amdgpu_bo_va {
bool cleared;
bool is_xgmi;
+
+ /*
+ * protected by vm reservation lock
+ * if non-zero, cannot unmap from GPU because user queues may still access it
+ */
+ unsigned int queue_refcount;
};
struct amdgpu_bo {
@@ -130,32 +136,9 @@ struct amdgpu_bo_user {
struct amdgpu_bo_vm {
struct amdgpu_bo bo;
- struct amdgpu_bo *shadow;
- struct list_head shadow_list;
struct amdgpu_vm_bo_base entries[];
};
-struct amdgpu_mem_stats {
- /* current VRAM usage, includes visible VRAM */
- uint64_t vram;
- /* current visible VRAM usage */
- uint64_t visible_vram;
- /* current GTT usage */
- uint64_t gtt;
- /* current system memory usage */
- uint64_t cpu;
- /* sum of evicted buffers, includes visible VRAM */
- uint64_t evicted_vram;
- /* sum of evicted buffers due to CPU access */
- uint64_t evicted_visible_vram;
- /* how much userspace asked for, includes vis.VRAM */
- uint64_t requested_vram;
- /* how much userspace asked for */
- uint64_t requested_visible_vram;
- /* how much userspace asked for */
- uint64_t requested_gtt;
-};
-
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
{
return container_of(tbo, struct amdgpu_bo, tbo);
@@ -184,6 +167,8 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
return AMDGPU_GEM_DOMAIN_OA;
case AMDGPU_PL_DOORBELL:
return AMDGPU_GEM_DOMAIN_DOORBELL;
+ case AMDGPU_PL_MMIO_REMAP:
+ return AMDGPU_GEM_DOMAIN_MMIO_REMAP;
default:
break;
}
@@ -245,28 +230,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
}
/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_res_cursor cursor;
-
- if (bo->tbo.resource->mem_type != TTM_PL_VRAM)
- return false;
-
- amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- while (cursor.remaining) {
- if (cursor.start < adev->gmc.visible_vram_size)
- return true;
-
- amdgpu_res_next(&cursor, cursor.size);
- }
-
- return false;
-}
-
-/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
@@ -285,22 +248,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
}
-/**
- * amdgpu_bo_shadowed - check if the BO is shadowed
- *
- * @bo: BO to be tested.
- *
- * Returns:
- * NULL if not shadowed or else return a BO pointer.
- */
-static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo)
-{
- if (bo->tbo.type == ttm_bo_type_kernel)
- return to_amdgpu_bo_vm(bo)->shadow;
-
- return NULL;
-}
-
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
@@ -315,6 +262,10 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dbuf, u32 domain,
+ struct amdgpu_bo **bo,
+ u64 *gpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr);
@@ -326,14 +277,13 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
struct amdgpu_bo_vm **ubo_ptr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
-int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset);
void amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
@@ -356,12 +306,9 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
bool intr);
int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
- struct amdgpu_mem_stats *stats);
-void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
-int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
- struct dma_fence **fence);
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain);
@@ -396,8 +343,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct drm_suballoc **sa_bo,
unsigned int size);
-void amdgpu_sa_bo_free(struct amdgpu_device *adev,
- struct drm_suballoc **sa_bo,
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
struct dma_fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index 0bb2466d539a..675aa138ea11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -94,7 +94,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n
ref_div_max = min(128 / post_div, ref_div_max);
/* get matching reference and feedback divider */
- *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
+ *ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max);
*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
/* limit fb divider to its maximum */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
index e8adfd0a570a..34b5e22b44e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -137,7 +137,8 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
if (ret)
return;
- device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 429ef212c1f2..8c0e5d03de50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -38,12 +38,13 @@
#include "psp_v12_0.h"
#include "psp_v13_0.h"
#include "psp_v13_0_4.h"
+#include "psp_v14_0.h"
#include "amdgpu_ras.h"
#include "amdgpu_securedisplay.h"
#include "amdgpu_atomfirmware.h"
-#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*3)
+#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*16)
static int psp_load_smu_fw(struct psp_context *psp);
static int psp_rap_terminate(struct psp_context *psp);
@@ -100,7 +101,7 @@ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp
return;
}
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 4):
case IP_VERSION(11, 0, 5):
@@ -128,7 +129,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(11, 0, 7):
case IP_VERSION(11, 0, 9):
@@ -144,6 +145,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = 0;
break;
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
ret = psp_init_cap_microcode(psp, ucode_prefix);
ret &= psp_init_ta_microcode(psp, ucode_prefix);
break;
@@ -151,31 +153,40 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
ret = psp_init_cap_microcode(psp, ucode_prefix);
break;
+ case IP_VERSION(13, 0, 12):
+ ret = psp_init_ta_microcode(psp, ucode_prefix);
+ break;
default:
return -EINVAL;
}
return ret;
}
-static int psp_early_init(void *handle)
+static int psp_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ psp->autoload_supported = true;
+ psp->boot_time_tmr = true;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
psp_v3_1_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
psp_v10_0_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 4):
psp_v11_0_set_psp_funcs(psp);
psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 7):
@@ -185,18 +196,30 @@ static int psp_early_init(void *handle)
case IP_VERSION(11, 0, 9):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
psp_v11_0_set_psp_funcs(psp);
- psp->autoload_supported = true;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 3):
case IP_VERSION(12, 0, 1):
psp_v12_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(13, 0, 2):
+ psp->boot_time_tmr = false;
+ fallthrough;
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ break;
+ case IP_VERSION(13, 0, 12):
psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
break;
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
@@ -204,25 +227,37 @@ static int psp_early_init(void *handle)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 11):
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
psp_v13_0_set_psp_funcs(psp);
- psp->autoload_supported = true;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(11, 0, 8):
if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
psp_v11_0_8_set_psp_funcs(psp);
- psp->autoload_supported = false;
}
+ psp->autoload_supported = false;
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 10):
psp_v13_0_set_psp_funcs(psp);
- psp->autoload_supported = true;
adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ psp->boot_time_tmr = false;
break;
case IP_VERSION(13, 0, 4):
psp_v13_0_4_set_psp_funcs(psp);
- psp->autoload_supported = true;
+ psp->boot_time_tmr = false;
+ break;
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ psp_v14_0_set_psp_funcs(psp);
+ break;
+ case IP_VERSION(14, 0, 5):
+ psp_v14_0_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
break;
default:
return -EINVAL;
@@ -230,6 +265,8 @@ static int psp_early_init(void *handle)
psp->adev = adev;
+ adev->psp_timeout = 20000;
+
psp_check_pmfw_centralized_cstate_management(psp);
if (amdgpu_sriov_vf(adev))
@@ -291,21 +328,22 @@ static int psp_memory_training_init(struct psp_context *psp)
struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
- DRM_DEBUG("memory training is not supported!\n");
+ dev_dbg(psp->adev->dev, "memory training is not supported!\n");
return 0;
}
ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
if (ctx->sys_cache == NULL) {
- DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+ dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n");
ret = -ENOMEM;
goto Err_out;
}
- DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
- ctx->train_data_size,
- ctx->p2c_train_data_offset,
- ctx->c2p_train_data_offset);
+ dev_dbg(psp->adev->dev,
+ "train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
return 0;
@@ -334,7 +372,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
bool ret = false;
int i;
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6))
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))
return false;
db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
@@ -396,9 +436,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
return ret;
}
-static int psp_sw_init(void *handle)
+static int psp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
int ret;
struct psp_runtime_boot_cfg_entry boot_cfg_entry;
@@ -407,13 +447,13 @@ static int psp_sw_init(void *handle)
psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!psp->cmd) {
- DRM_ERROR("Failed to allocate memory to command buffer!\n");
- ret = -ENOMEM;
+ dev_err(adev->dev, "Failed to allocate memory to command buffer!\n");
+ return -ENOMEM;
}
adev->psp.xgmi_context.supports_extended_data =
!adev->gmc.xgmi.connected_to_cpu &&
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2);
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2);
memset(&scpm_entry, 0, sizeof(scpm_entry));
if ((psp_get_runtime_db_entry(adev,
@@ -454,19 +494,19 @@ static int psp_sw_init(void *handle)
if (mem_training_ctx->enable_mem_training) {
ret = psp_memory_training_init(psp);
if (ret) {
- DRM_ERROR("Failed to initialize memory training!\n");
+ dev_err(adev->dev, "Failed to initialize memory training!\n");
return ret;
}
ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
if (ret) {
- DRM_ERROR("Failed to process memory training!\n");
+ dev_err(adev->dev, "Failed to process memory training!\n");
return ret;
}
}
ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- amdgpu_sriov_vf(adev) ?
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&psp->fw_pri_bo,
&psp->fw_pri_mc_addr,
@@ -502,11 +542,10 @@ failed1:
return ret;
}
-static int psp_sw_fini(void *handle)
+static int psp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- struct psp_gfx_cmd_resp *cmd = psp->cmd;
psp_memory_training_fini(psp);
@@ -516,8 +555,8 @@ static int psp_sw_fini(void *handle)
amdgpu_ucode_release(&psp->cap_fw);
amdgpu_ucode_release(&psp->toc_fw);
- kfree(cmd);
- cmd = NULL;
+ kfree(psp->cmd);
+ psp->cmd = NULL;
psp_free_shared_bufs(psp);
@@ -536,9 +575,11 @@ static int psp_sw_fini(void *handle)
return 0;
}
-int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
- uint32_t reg_val, uint32_t mask, bool check_changed)
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t reg_val,
+ uint32_t mask, uint32_t flags)
{
+ bool check_changed = flags & PSP_WAITREG_CHANGED;
+ bool verbose = !(flags & PSP_WAITREG_NOVERBOSE);
uint32_t val;
int i;
struct amdgpu_device *adev = psp->adev;
@@ -558,6 +599,11 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
udelay(1);
}
+ if (verbose)
+ dev_err(adev->dev,
+ "psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x",
+ reg_index, mask, val, reg_val);
+
return -ETIME;
}
@@ -614,11 +660,35 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
return "AUTOLOAD_RLC";
case GFX_CMD_ID_BOOT_CFG:
return "BOOT_CFG";
+ case GFX_CMD_ID_CONFIG_SQ_PERFMON:
+ return "CONFIG_SQ_PERFMON";
+ case GFX_CMD_ID_FB_FW_RESERV_ADDR:
+ return "FB_FW_RESERV_ADDR";
+ case GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR:
+ return "FB_FW_RESERV_EXT_ADDR";
+ case GFX_CMD_ID_SRIOV_SPATIAL_PART:
+ return "SPATIAL_PARTITION";
+ case GFX_CMD_ID_FB_NPS_MODE:
+ return "NPS_MODE_CHANGE";
default:
return "UNKNOWN CMD";
}
}
+static bool psp_err_warn(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem;
+
+ /* This response indicates reg list is already loaded */
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW &&
+ cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST &&
+ cmd->resp.status == TEE_ERROR_CANCEL)
+ return false;
+
+ return true;
+}
+
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
@@ -626,7 +696,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
{
int ret;
int index;
- int timeout = 20000;
+ int timeout = psp->adev->psp_timeout;
bool ras_intr = false;
bool skip_unsupport = false;
@@ -675,11 +745,16 @@ psp_cmd_submit_buf(struct psp_context *psp,
*/
if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
if (ucode)
- DRM_WARN("failed to load ucode %s(0x%X) ",
- amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
- DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
- psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
- psp->cmd_buf_mem->resp.status);
+ dev_warn(psp->adev->dev,
+ "failed to load ucode %s(0x%X) ",
+ amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
+ if (psp_err_warn(psp))
+ dev_warn(
+ psp->adev->dev,
+ "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+ psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
/* If any firmware (including CAP) load fails under SRIOV, it should
* return failure to stop the VF from initializing.
* Also return failure in case of timeout
@@ -771,16 +846,6 @@ static int psp_load_toc(struct psp_context *psp,
return ret;
}
-static bool psp_boottime_tmr(struct psp_context *psp)
-{
- switch (psp->adev->ip_versions[MP0_HWIP][0]) {
- case IP_VERSION(13, 0, 6):
- return true;
- default:
- return false;
- }
-}
-
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
@@ -807,33 +872,35 @@ static int psp_tmr_init(struct psp_context *psp)
psp->fw_pri_buf) {
ret = psp_load_toc(psp, &tmr_size);
if (ret) {
- DRM_ERROR("Failed to load toc\n");
+ dev_err(psp->adev->dev, "Failed to load toc\n");
return ret;
}
}
- if (!psp->tmr_bo) {
+ if (!psp->tmr_bo && !psp->boot_time_tmr) {
pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
PSP_TMR_ALIGNMENT,
- AMDGPU_HAS_VRAM(psp->adev) ?
- AMDGPU_GEM_DOMAIN_VRAM :
- AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr,
pptr);
}
+ if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && psp->tmr_bo)
+ psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo);
return ret;
}
static bool psp_skip_tmr(struct psp_context *psp)
{
- switch (psp->adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
case IP_VERSION(11, 0, 9):
case IP_VERSION(11, 0, 7):
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
return true;
default:
return false;
@@ -855,7 +922,7 @@ static int psp_tmr_load(struct psp_context *psp)
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
if (psp->tmr_bo)
- DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
+ dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n",
amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
@@ -933,6 +1000,106 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp,
return ret;
}
+static int psp_get_fw_reservation_info(struct psp_context *psp,
+ uint32_t cmd_id,
+ uint64_t *addr,
+ uint32_t *size)
+{
+ int ret;
+ uint32_t status;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = cmd_id;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ if (ret) {
+ release_psp_cmd_buf(psp);
+ return ret;
+ }
+
+ status = cmd->resp.status;
+ if (status == PSP_ERR_UNKNOWN_COMMAND) {
+ release_psp_cmd_buf(psp);
+ *addr = 0;
+ *size = 0;
+ return 0;
+ }
+
+ *addr = (uint64_t)cmd->resp.uresp.fw_reserve_info.reserve_base_address_hi << 32 |
+ cmd->resp.uresp.fw_reserve_info.reserve_base_address_lo;
+ *size = cmd->resp.uresp.fw_reserve_info.reserve_size;
+
+ release_psp_cmd_buf(psp);
+
+ return 0;
+}
+
+int psp_update_fw_reservation(struct psp_context *psp)
+{
+ int ret;
+ uint64_t reserv_addr, reserv_addr_ext;
+ uint32_t reserv_size, reserv_size_ext, mp0_ip_ver;
+ struct amdgpu_device *adev = psp->adev;
+
+ mp0_ip_ver = amdgpu_ip_version(adev, MP0_HWIP, 0);
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ switch (mp0_ip_ver) {
+ case IP_VERSION(14, 0, 2):
+ if (adev->psp.sos.fw_version < 0x3b0e0d)
+ return 0;
+ break;
+
+ case IP_VERSION(14, 0, 3):
+ if (adev->psp.sos.fw_version < 0x3a0e14)
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size);
+ if (ret)
+ return ret;
+ ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR, &reserv_addr_ext, &reserv_size_ext);
+ if (ret)
+ return ret;
+
+ if (reserv_addr != adev->gmc.real_vram_size - reserv_size) {
+ dev_warn(adev->dev, "reserve fw region is not valid!\n");
+ return 0;
+ }
+
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+
+ reserv_size = roundup(reserv_size, SZ_1M);
+
+ ret = amdgpu_bo_create_kernel_at(adev, reserv_addr, reserv_size, &adev->mman.fw_reserved_memory, NULL);
+ if (ret) {
+ dev_err(adev->dev, "reserve fw region failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+ return ret;
+ }
+
+ reserv_size_ext = roundup(reserv_size_ext, SZ_1M);
+
+ ret = amdgpu_bo_create_kernel_at(adev, reserv_addr_ext, reserv_size_ext,
+ &adev->mman.fw_reserved_memory_extend, NULL);
+ if (ret) {
+ dev_err(adev->dev, "reserve extend fw region failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, NULL);
+ return ret;
+ }
+
+ return 0;
+}
+
static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg)
{
struct psp_context *psp = &adev->psp;
@@ -1008,6 +1175,31 @@ static int psp_rl_load(struct amdgpu_device *adev)
return ret;
}
+int psp_memory_partition(struct psp_context *psp, int mode)
+{
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_FB_NPS_MODE;
+ cmd->cmd.cmd_memory_part.mode = mode;
+
+ dev_info(psp->adev->dev,
+ "Requesting %d memory partition change through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "PSP request failed to change to NPS%d mode\n", mode);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
int psp_spatial_partition(struct psp_context *psp, int mode)
{
struct psp_gfx_cmd_resp *cmd;
@@ -1040,6 +1232,11 @@ static int psp_asd_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes)
return 0;
+ /* bypass asd if display hardware is not available */
+ if (!amdgpu_device_has_display_hardware(psp->adev) &&
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10))
+ return 0;
+
psp->asd_context.mem_context.shared_mc_addr = 0;
psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE;
psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD;
@@ -1113,7 +1310,7 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
psp_prep_reg_prog_cmd_buf(cmd, reg, value);
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
if (ret)
- DRM_ERROR("PSP failed to program reg id %d", reg);
+ dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg);
release_psp_cmd_buf(psp);
@@ -1189,6 +1386,11 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context)
psp_copy_fw(psp, context->bin_desc.start_addr,
context->bin_desc.size_bytes);
+ if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) &&
+ context->mem_context.shared_bo)
+ context->mem_context.shared_mc_addr =
+ amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo);
+
psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, context);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
@@ -1215,8 +1417,8 @@ int psp_xgmi_terminate(struct psp_context *psp)
struct amdgpu_device *adev = psp->adev;
/* XGMI TA unload currently is not supported on Arcturus/Aldebaran A+A */
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
- (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
adev->gmc.xgmi.connected_to_cpu))
return 0;
@@ -1267,6 +1469,8 @@ invoke:
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+ /* note down the capbility flag for XGMI TA */
+ psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag;
return ret;
}
@@ -1313,9 +1517,11 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
{
- return (psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) &&
+ return (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 2) &&
psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
- psp->adev->ip_versions[MP0_HWIP][0] >= IP_VERSION(13, 0, 6);
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >=
+ IP_VERSION(13, 0, 6);
}
/*
@@ -1336,6 +1542,9 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
uint8_t dst_num_links = node_info.num_links;
hive = amdgpu_get_xgmi_hive(psp->adev);
+ if (WARN_ON(!hive))
+ return;
+
list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) {
struct psp_xgmi_topology_info *mirror_top_info;
int j;
@@ -1386,7 +1595,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
/* Fill in the shared memory with topology information as input */
topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
- xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO;
topology_info_input->num_nodes = number_devices;
for (i = 0; i < topology_info_input->num_nodes; i++) {
@@ -1397,7 +1606,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
}
/* Invoke xgmi ta to get the topology information */
- ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
+ ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO);
if (ret)
return ret;
@@ -1422,26 +1631,60 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
/* Invoke xgmi ta again to get the link information */
if (psp_xgmi_peer_link_info_supported(psp)) {
- struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output;
+ struct ta_xgmi_cmd_get_peer_link_info *link_info_output;
+ struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output;
bool requires_reflection =
- (psp->xgmi_context.supports_extended_data && get_extended_data) ||
- psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6);
+ (psp->xgmi_context.supports_extended_data &&
+ get_extended_data) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 14);
+ bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 :
+ psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG;
+
+ /* popluate the shared output buffer rather than the cmd input buffer
+ * with node_ids as the input for GET_PEER_LINKS command execution.
+ * This is required for GET_PEER_LINKS per xgmi ta implementation.
+ * The same requirement for GET_EXTEND_PEER_LINKS command.
+ */
+ if (ta_port_num_support) {
+ link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info;
- xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
+ for (i = 0; i < topology->num_nodes; i++)
+ link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id;
- ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS);
+ link_extend_info_output->num_nodes = topology->num_nodes;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS;
+ } else {
+ link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info;
+
+ for (i = 0; i < topology->num_nodes; i++)
+ link_info_output->nodes[i].node_id = topology->nodes[i].node_id;
+
+ link_info_output->num_nodes = topology->num_nodes;
+ xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
+ }
+ ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
if (ret)
return ret;
- link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info;
for (i = 0; i < topology->num_nodes; i++) {
+ uint8_t node_num_links = ta_port_num_support ?
+ link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links;
/* accumulate num_links on extended data */
- topology->nodes[i].num_links = get_extended_data ?
- topology->nodes[i].num_links +
- link_info_output->nodes[i].num_links :
- ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links :
- link_info_output->nodes[i].num_links);
+ if (get_extended_data) {
+ topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links;
+ } else {
+ topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
+ topology->nodes[i].num_links : node_num_links;
+ }
+ /* popluate the connected port num info if supported and available */
+ if (ta_port_num_support && topology->nodes[i].num_links) {
+ memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+ sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+ }
/* reflect the topology information for bi-directionality */
if (requires_reflection && topology->nodes[i].num_hops)
@@ -1490,24 +1733,84 @@ static void psp_ras_ta_check_status(struct psp_context *psp)
switch (ras_cmd->ras_status) {
case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP:
dev_warn(psp->adev->dev,
- "RAS WARNING: cmd failed due to unsupported ip\n");
+ "RAS WARNING: cmd failed due to unsupported ip\n");
break;
case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ:
dev_warn(psp->adev->dev,
- "RAS WARNING: cmd failed due to unsupported error injection\n");
+ "RAS WARNING: cmd failed due to unsupported error injection\n");
break;
case TA_RAS_STATUS__SUCCESS:
break;
case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED:
if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR)
dev_warn(psp->adev->dev,
- "RAS WARNING: Inject error to critical region is not allowed\n");
+ "RAS WARNING: Inject error to critical region is not allowed\n");
break;
default:
dev_warn(psp->adev->dev,
- "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
+ "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
+ break;
+ }
+}
+
+static int psp_ras_send_cmd(struct psp_context *psp,
+ enum ras_command cmd_id, void *in, void *out)
+{
+ struct ta_ras_shared_memory *ras_cmd;
+ uint32_t cmd = cmd_id;
+ int ret = 0;
+
+ if (!in)
+ return -EINVAL;
+
+ mutex_lock(&psp->ras_context.mutex);
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+ memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+ switch (cmd) {
+ case TA_RAS_COMMAND__ENABLE_FEATURES:
+ case TA_RAS_COMMAND__DISABLE_FEATURES:
+ memcpy(&ras_cmd->ras_in_message,
+ in, sizeof(ras_cmd->ras_in_message));
+ break;
+ case TA_RAS_COMMAND__TRIGGER_ERROR:
+ memcpy(&ras_cmd->ras_in_message.trigger_error,
+ in, sizeof(ras_cmd->ras_in_message.trigger_error));
+ break;
+ case TA_RAS_COMMAND__QUERY_ADDRESS:
+ memcpy(&ras_cmd->ras_in_message.address,
+ in, sizeof(ras_cmd->ras_in_message.address));
+ break;
+ default:
+ dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ras_cmd->cmd_id = cmd;
+ ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+ switch (cmd) {
+ case TA_RAS_COMMAND__TRIGGER_ERROR:
+ if (!ret && out)
+ memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status));
+ break;
+ case TA_RAS_COMMAND__QUERY_ADDRESS:
+ if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+ ret = -EINVAL;
+ else if (out)
+ memcpy(out,
+ &ras_cmd->ras_out_message.address,
+ sizeof(ras_cmd->ras_out_message.address));
+ break;
+ default:
break;
}
+
+err_out:
+ mutex_unlock(&psp->ras_context.mutex);
+
+ return ret;
}
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
@@ -1529,7 +1832,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
return ret;
if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
- DRM_WARN("RAS: Unsupported Interface");
+ dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n");
return -EINVAL;
}
@@ -1551,23 +1854,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
{
- struct ta_ras_shared_memory *ras_cmd;
+ enum ras_command cmd_id;
int ret;
- if (!psp->ras_context.context.initialized)
+ if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
- memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
- if (enable)
- ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES;
- else
- ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES;
-
- ras_cmd->ras_in_message = *info;
-
- ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ cmd_id = enable ?
+ TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES;
+ ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
if (ret)
return -EINVAL;
@@ -1591,6 +1886,8 @@ int psp_ras_terminate(struct psp_context *psp)
psp->ras_context.context.initialized = false;
+ mutex_destroy(&psp->ras_context.mutex);
+
return ret;
}
@@ -1621,34 +1918,47 @@ int psp_ras_initialize(struct psp_context *psp)
if (ret)
dev_warn(adev->dev, "PSP get boot config failed\n");
- if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) {
- if (!boot_cfg) {
- dev_info(adev->dev, "GECC is disabled\n");
- } else {
- /* disable GECC in next boot cycle if ras is
- * disabled by module parameter amdgpu_ras_enable
- * and/or amdgpu_ras_mask, or boot_config_get call
- * is failed
- */
- ret = psp_boot_config_set(adev, 0);
- if (ret)
- dev_warn(adev->dev, "PSP set boot config failed\n");
- else
- dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
- }
+ if (boot_cfg == 1 && !adev->ras_default_ecc_enabled &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n");
+ dev_warn(adev->dev,
+ "To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n");
} else {
- if (boot_cfg == 1) {
- dev_info(adev->dev, "GECC is enabled\n");
+ if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ if (boot_cfg == 1) {
+ dev_info(adev->dev, "GECC is enabled\n");
+ } else {
+ /* enable GECC in next boot cycle if it is disabled
+ * in boot config, or force enable GECC if failed to
+ * get boot configuration
+ */
+ ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ }
} else {
- /* enable GECC in next boot cycle if it is disabled
- * in boot config, or force enable GECC if failed to
- * get boot configuration
- */
- ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
- if (ret)
- dev_warn(adev->dev, "PSP set boot config failed\n");
- else
- dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ if (!boot_cfg) {
+ if (!adev->ras_default_ecc_enabled &&
+ amdgpu_ras_enable != 1 &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n");
+ else
+ dev_info(adev->dev, "GECC is disabled\n");
+ } else {
+ /* disable GECC in next boot cycle if ras is
+ * disabled by module parameter amdgpu_ras_enable
+ * and/or amdgpu_ras_mask, or boot_config_get call
+ * is failed
+ */
+ ret = psp_boot_config_set(adev, 0);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+ }
}
}
}
@@ -1672,14 +1982,19 @@ int psp_ras_initialize(struct psp_context *psp)
ras_cmd->ras_in_message.init_flags.xcc_mask =
adev->gfx.xcc_mask;
ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2;
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ ras_cmd->ras_in_message.init_flags.nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;
ret = psp_ta_load(psp, &psp->ras_context.context);
- if (!ret && !ras_cmd->ras_status)
+ if (!ret && !ras_cmd->ras_status) {
psp->ras_context.context.initialized = true;
- else {
+ mutex_init(&psp->ras_context.mutex);
+ } else {
if (ras_cmd->ras_status)
- dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
+ dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
/* fail to load RAS TA */
psp->ras_context.context.initialized = false;
@@ -1691,12 +2006,12 @@ int psp_ras_initialize(struct psp_context *psp)
int psp_ras_trigger_error(struct psp_context *psp,
struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
{
- struct ta_ras_shared_memory *ras_cmd;
struct amdgpu_device *adev = psp->adev;
int ret;
uint32_t dev_mask;
+ uint32_t ras_status = 0;
- if (!psp->ras_context.context.initialized)
+ if (!psp->ras_context.context.initialized || !info)
return -EINVAL;
switch (info->block_id) {
@@ -1720,13 +2035,8 @@ int psp_ras_trigger_error(struct psp_context *psp,
dev_mask &= AMDGPU_RAS_INST_MASK;
info->sub_block_index |= dev_mask;
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
- memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
-
- ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR;
- ras_cmd->ras_in_message.trigger_error = *info;
-
- ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+ ret = psp_ras_send_cmd(psp,
+ TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status);
if (ret)
return -EINVAL;
@@ -1736,13 +2046,29 @@ int psp_ras_trigger_error(struct psp_context *psp,
if (amdgpu_ras_intr_triggered())
return 0;
- if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
+ if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
return -EACCES;
- else if (ras_cmd->ras_status)
+ else if (ras_status)
return -EINVAL;
return 0;
}
+
+int psp_ras_query_address(struct psp_context *psp,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out)
+{
+ int ret;
+
+ if (!psp->ras_context.context.initialized ||
+ !addr_in || !addr_out)
+ return -EINVAL;
+
+ ret = psp_ras_send_cmd(psp,
+ TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out);
+
+ return ret;
+}
// ras end
// HDCP start
@@ -1756,6 +2082,10 @@ static int psp_hdcp_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ /* bypass hdcp initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
if (!psp->hdcp_context.context.bin_desc.size_bytes ||
!psp->hdcp_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n");
@@ -1788,6 +2118,9 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ if (!psp->hdcp_context.context.initialized)
+ return 0;
+
return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context);
}
@@ -1823,6 +2156,10 @@ static int psp_dtm_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ /* bypass dtm initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
if (!psp->dtm_context.context.bin_desc.size_bytes ||
!psp->dtm_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n");
@@ -1855,6 +2192,9 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ if (!psp->dtm_context.context.initialized)
+ return 0;
+
return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context);
}
@@ -1989,9 +2329,14 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;
+ /* bypass securedisplay initialization if dmu is harvested */
+ if (!amdgpu_device_has_display_hardware(psp->adev))
+ return 0;
+
if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
!psp->securedisplay_context.context.bin_desc.start_addr) {
- dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
+ dev_info(psp->adev->dev,
+ "SECUREDISPLAY: optional securedisplay ta ucode is not available\n");
return 0;
}
@@ -2007,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
}
ret = psp_ta_load(psp, &psp->securedisplay_context.context);
- if (!ret) {
+ if (!ret && !psp->securedisplay_context.context.resp_status) {
psp->securedisplay_context.context.initialized = true;
mutex_init(&psp->securedisplay_context.mutex);
} else
@@ -2069,7 +2414,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
return -EINVAL;
if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA &&
- ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC &&
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2)
return -EINVAL;
ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
@@ -2089,17 +2435,56 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
return ret;
}
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp)
+{
+ if (psp->funcs &&
+ psp->funcs->get_ras_capability) {
+ return psp->funcs->get_ras_capability(psp);
+ } else {
+ return false;
+ }
+}
+
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return false;
+
+ if (psp->funcs && psp->funcs->is_reload_needed)
+ return psp->funcs->is_reload_needed(psp);
+
+ return false;
+}
+
+static void psp_update_gpu_addresses(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+
+ if (psp->cmd_buf_bo && psp->cmd_buf_mem) {
+ psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo);
+ psp->fence_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->fence_buf_bo);
+ psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo);
+ }
+ if (adev->firmware.rbuf && psp->km_ring.ring_mem)
+ psp->km_ring.ring_mem_mc_addr = amdgpu_bo_fb_aper_addr(adev->firmware.rbuf);
+}
+
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
int ret;
+ if (amdgpu_virt_xgmi_migrate_enabled(adev))
+ psp_update_gpu_addresses(adev);
+
if (!amdgpu_sriov_vf(adev)) {
if ((is_psp_fw_valid(psp->kdb)) &&
(psp->funcs->bootloader_load_kdb != NULL)) {
ret = psp_bootloader_load_kdb(psp);
if (ret) {
- DRM_ERROR("PSP load kdb failed!\n");
+ dev_err(adev->dev, "PSP load kdb failed!\n");
return ret;
}
}
@@ -2108,7 +2493,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_spl != NULL)) {
ret = psp_bootloader_load_spl(psp);
if (ret) {
- DRM_ERROR("PSP load spl failed!\n");
+ dev_err(adev->dev, "PSP load spl failed!\n");
return ret;
}
}
@@ -2117,7 +2502,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_sysdrv != NULL)) {
ret = psp_bootloader_load_sysdrv(psp);
if (ret) {
- DRM_ERROR("PSP load sys drv failed!\n");
+ dev_err(adev->dev, "PSP load sys drv failed!\n");
return ret;
}
}
@@ -2126,7 +2511,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_soc_drv != NULL)) {
ret = psp_bootloader_load_soc_drv(psp);
if (ret) {
- DRM_ERROR("PSP load soc drv failed!\n");
+ dev_err(adev->dev, "PSP load soc drv failed!\n");
return ret;
}
}
@@ -2135,7 +2520,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_intf_drv != NULL)) {
ret = psp_bootloader_load_intf_drv(psp);
if (ret) {
- DRM_ERROR("PSP load intf drv failed!\n");
+ dev_err(adev->dev, "PSP load intf drv failed!\n");
return ret;
}
}
@@ -2144,7 +2529,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_dbg_drv != NULL)) {
ret = psp_bootloader_load_dbg_drv(psp);
if (ret) {
- DRM_ERROR("PSP load dbg drv failed!\n");
+ dev_err(adev->dev, "PSP load dbg drv failed!\n");
return ret;
}
}
@@ -2153,7 +2538,25 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_ras_drv != NULL)) {
ret = psp_bootloader_load_ras_drv(psp);
if (ret) {
- DRM_ERROR("PSP load ras_drv failed!\n");
+ dev_err(adev->dev, "PSP load ras_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->ipkeymgr_drv)) &&
+ (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) {
+ ret = psp_bootloader_load_ipkeymgr_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n");
+ return ret;
+ }
+ }
+
+ if ((is_psp_fw_valid(psp->spdm_drv)) &&
+ (psp->funcs->bootloader_load_spdm_drv != NULL)) {
+ ret = psp_bootloader_load_spdm_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load spdm_drv failed!\n");
return ret;
}
}
@@ -2162,7 +2565,7 @@ static int psp_hw_start(struct psp_context *psp)
(psp->funcs->bootloader_load_sos != NULL)) {
ret = psp_bootloader_load_sos(psp);
if (ret) {
- DRM_ERROR("PSP load sos failed!\n");
+ dev_err(adev->dev, "PSP load sos failed!\n");
return ret;
}
}
@@ -2170,17 +2573,25 @@ static int psp_hw_start(struct psp_context *psp)
ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
if (ret) {
- DRM_ERROR("PSP create ring failed!\n");
+ dev_err(adev->dev, "PSP create ring failed!\n");
return ret;
}
+ if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ ret = psp_update_fw_reservation(psp);
+ if (ret) {
+ dev_err(adev->dev, "update fw reservation failed!\n");
+ return ret;
+ }
+ }
+
if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
goto skip_pin_bo;
- if (!psp_boottime_tmr(psp)) {
+ if (!psp->boot_time_tmr || psp->autoload_supported) {
ret = psp_tmr_init(psp);
if (ret) {
- DRM_ERROR("PSP tmr init failed!\n");
+ dev_err(adev->dev, "PSP tmr init failed!\n");
return ret;
}
}
@@ -2197,10 +2608,12 @@ skip_pin_bo:
return ret;
}
- ret = psp_tmr_load(psp);
- if (ret) {
- DRM_ERROR("PSP load tmr failed!\n");
- return ret;
+ if (!psp->boot_time_tmr || !psp->autoload_supported) {
+ ret = psp_tmr_load(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load tmr failed!\n");
+ return ret;
+ }
}
return 0;
@@ -2346,6 +2759,7 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
*type = GFX_FW_TYPE_DMUB;
break;
case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+ case AMDGPU_UCODE_ID_SDMA_RS64:
*type = GFX_FW_TYPE_SDMA_UCODE_TH0;
break;
case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
@@ -2390,6 +2804,33 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
*type = GFX_FW_TYPE_RS64_MEC_P3_STACK;
break;
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ *type = GFX_FW_TYPE_VPEC_FW1;
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ *type = GFX_FW_TYPE_VPEC_FW2;
+ break;
+ case AMDGPU_UCODE_ID_VPE:
+ *type = GFX_FW_TYPE_VPE;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ *type = GFX_FW_TYPE_UMSCH_UCODE;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ *type = GFX_FW_TYPE_UMSCH_DATA;
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+ *type = GFX_FW_TYPE_UMSCH_CMD_BUFFER;
+ break;
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ *type = GFX_FW_TYPE_P2S_TABLE;
+ break;
+ case AMDGPU_UCODE_ID_JPEG_RAM:
+ *type = GFX_FW_TYPE_JPEG_RAM;
+ break;
+ case AMDGPU_UCODE_ID_ISP:
+ *type = GFX_FW_TYPE_ISP;
+ break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
@@ -2446,7 +2887,8 @@ static void psp_print_fw_hdr(struct psp_context *psp,
}
}
-static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
+static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd)
{
int ret;
@@ -2459,7 +2901,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
if (ret)
- DRM_ERROR("Unknown firmware type\n");
+ dev_err(psp->adev->dev, "Unknown firmware type\n");
return ret;
}
@@ -2470,7 +2912,7 @@ int psp_execute_ip_fw_load(struct psp_context *psp,
int ret = 0;
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
- ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd);
+ ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd);
if (!ret) {
ret = psp_cmd_submit_buf(psp, ucode, cmd,
psp->fence_buf_mc_addr);
@@ -2481,6 +2923,33 @@ int psp_execute_ip_fw_load(struct psp_context *psp,
return ret;
}
+static int psp_load_p2s_table(struct psp_context *psp)
+{
+ int ret;
+ struct amdgpu_device *adev = psp->adev;
+ struct amdgpu_firmware_info *ucode =
+ &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE];
+
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
+ return 0;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ uint32_t supp_vers = adev->flags & AMD_IS_APU ? 0x0036013D :
+ 0x0036003C;
+ if (psp->sos.fw_version < supp_vers)
+ return 0;
+ }
+
+ if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ ret = psp_execute_ip_fw_load(psp, ucode);
+
+ return ret;
+}
+
static int psp_load_smu_fw(struct psp_context *psp)
{
int ret;
@@ -2493,25 +2962,25 @@ static int psp_load_smu_fw(struct psp_context *psp)
* Skip SMU FW reloading in case of using BACO for runpm only,
* as SMU is always alive.
*/
- if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO))
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
return 0;
if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
return 0;
- if ((amdgpu_in_reset(adev) &&
- ras && adev->ras_enabled &&
- (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) {
+ if ((amdgpu_in_reset(adev) && ras && adev->ras_enabled &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) {
ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
if (ret)
- DRM_WARN("Failed to set MP1 state prepare for reload\n");
+ dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n");
}
ret = psp_execute_ip_fw_load(psp, ucode);
if (ret)
- DRM_ERROR("PSP load smu failed!\n");
+ dev_err(adev->dev, "PSP load smu failed!\n");
return ret;
}
@@ -2522,6 +2991,9 @@ static bool fw_load_skip_check(struct psp_context *psp,
if (!ucode->fw || !ucode->ucode_size)
return true;
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_P2S_TABLE)
+ return true;
+
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
(psp_smu_reload_quirk(psp) ||
psp->autoload_supported ||
@@ -2570,6 +3042,9 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
return ret;
}
+ /* Load P2S table first if it's available */
+ psp_load_p2s_table(psp);
+
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
@@ -2585,9 +3060,12 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
continue;
if (psp->autoload_supported &&
- (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 11) ||
- adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 12)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 7) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 11) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(11, 0, 12)) &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
@@ -2602,12 +3080,12 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
if (ret)
return ret;
- /* Start rlc autoload after psp recieved all the gfx firmware */
+ /* Start rlc autoload after psp received all the gfx firmware */
if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload_start(psp);
if (ret) {
- DRM_ERROR("Failed to start rlc autoload\n");
+ dev_err(adev->dev, "Failed to start rlc autoload\n");
return ret;
}
}
@@ -2629,7 +3107,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
if (ret) {
- DRM_ERROR("PSP ring init failed!\n");
+ dev_err(adev->dev, "PSP ring init failed!\n");
goto failed;
}
}
@@ -2644,13 +3122,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
ret = psp_asd_initialize(psp);
if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
+ dev_err(adev->dev, "PSP load asd failed!\n");
goto failed1;
}
ret = psp_rl_load(adev);
if (ret) {
- DRM_ERROR("PSP load RL failed!\n");
+ dev_err(adev->dev, "PSP load RL failed!\n");
goto failed1;
}
@@ -2670,7 +3148,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
ret = psp_ras_initialize(psp);
if (ret)
dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
+ "RAS: Failed to initialize RAS\n");
ret = psp_hdcp_initialize(psp);
if (ret)
@@ -2707,23 +3185,20 @@ failed:
return ret;
}
-static int psp_hw_init(void *handle)
+static int psp_hw_init(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
mutex_lock(&adev->firmware.mutex);
- /*
- * This sequence is just used on hw_init only once, no need on
- * resume.
- */
+
ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
ret = psp_load_fw(adev);
if (ret) {
- DRM_ERROR("PSP firmware loading failed\n");
+ dev_err(adev->dev, "PSP firmware loading failed\n");
goto failed;
}
@@ -2736,9 +3211,9 @@ failed:
return -EINVAL;
}
-static int psp_hw_fini(void *handle)
+static int psp_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
if (psp->ta_fw) {
@@ -2760,17 +3235,17 @@ static int psp_hw_fini(void *handle)
return 0;
}
-static int psp_suspend(void *handle)
+static int psp_suspend(struct amdgpu_ip_block *ip_block)
{
int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
psp->xgmi_context.context.initialized) {
ret = psp_xgmi_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate xgmi ta\n");
+ dev_err(adev->dev, "Failed to terminate xgmi ta\n");
goto out;
}
}
@@ -2778,69 +3253,73 @@ static int psp_suspend(void *handle)
if (psp->ta_fw) {
ret = psp_ras_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate ras ta\n");
+ dev_err(adev->dev, "Failed to terminate ras ta\n");
goto out;
}
ret = psp_hdcp_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate hdcp ta\n");
+ dev_err(adev->dev, "Failed to terminate hdcp ta\n");
goto out;
}
ret = psp_dtm_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate dtm ta\n");
+ dev_err(adev->dev, "Failed to terminate dtm ta\n");
goto out;
}
ret = psp_rap_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate rap ta\n");
+ dev_err(adev->dev, "Failed to terminate rap ta\n");
goto out;
}
ret = psp_securedisplay_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate securedisplay ta\n");
+ dev_err(adev->dev, "Failed to terminate securedisplay ta\n");
goto out;
}
}
ret = psp_asd_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate asd\n");
+ dev_err(adev->dev, "Failed to terminate asd\n");
goto out;
}
ret = psp_tmr_terminate(psp);
if (ret) {
- DRM_ERROR("Failed to terminate tmr\n");
+ dev_err(adev->dev, "Failed to terminate tmr\n");
goto out;
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
if (ret)
- DRM_ERROR("PSP ring stop failed\n");
+ dev_err(adev->dev, "PSP ring stop failed\n");
out:
return ret;
}
-static int psp_resume(void *handle)
+static int psp_resume(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- DRM_INFO("PSP is resuming...\n");
+ dev_info(adev->dev, "PSP is resuming...\n");
if (psp->mem_train_ctx.enable_mem_training) {
ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
if (ret) {
- DRM_ERROR("Failed to process memory training!\n");
+ dev_err(adev->dev, "Failed to process memory training!\n");
return ret;
}
}
mutex_lock(&adev->firmware.mutex);
+ ret = amdgpu_ucode_init_bo(adev);
+ if (ret)
+ goto failed;
+
ret = psp_hw_start(psp);
if (ret)
goto failed;
@@ -2851,7 +3330,7 @@ static int psp_resume(void *handle)
ret = psp_asd_initialize(psp);
if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
+ dev_err(adev->dev, "PSP load asd failed!\n");
goto failed;
}
@@ -2875,7 +3354,7 @@ static int psp_resume(void *handle)
ret = psp_ras_initialize(psp);
if (ret)
dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
+ "RAS: Failed to initialize RAS\n");
ret = psp_hdcp_initialize(psp);
if (ret)
@@ -2903,7 +3382,7 @@ static int psp_resume(void *handle)
return 0;
failed:
- DRM_ERROR("PSP resume failed\n");
+ dev_err(adev->dev, "PSP resume failed\n");
mutex_unlock(&adev->firmware.mutex);
return ret;
}
@@ -2964,9 +3443,11 @@ int psp_ring_cmd_submit(struct psp_context *psp,
write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
/* Check invalid write_frame ptr address */
if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
+ dev_err(adev->dev,
+ "ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+ ring_buffer_start, ring_buffer_end, write_frame);
+ dev_err(adev->dev,
+ "write_frame is pointing to address out of bounds\n");
return -EINVAL;
}
@@ -2990,12 +3471,11 @@ int psp_ring_cmd_submit(struct psp_context *psp,
int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *asd_hdr;
int err = 0;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_asd.bin", chip_name);
if (err)
goto out;
@@ -3014,12 +3494,11 @@ out:
int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", chip_name);
if (err)
goto out;
@@ -3109,6 +3588,18 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes);
psp->ras_drv.start_addr = ucode_start_addr;
break;
+ case PSP_FW_TYPE_PSP_IPKEYMGR_DRV:
+ psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ipkeymgr_drv.start_addr = ucode_start_addr;
+ break;
+ case PSP_FW_TYPE_PSP_SPDM_DRV:
+ psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->spdm_drv.start_addr = ucode_start_addr;
+ break;
default:
dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
break;
@@ -3128,7 +3619,7 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev)
le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
if (adev->gmc.xgmi.connected_to_cpu ||
- (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2))) {
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2))) {
adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version);
@@ -3165,18 +3656,23 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev)
int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *sos_hdr;
const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
- int err = 0;
+ const struct psp_firmware_header_v2_1 *sos_hdr_v2_1;
+ int fw_index, fw_bin_count, start_index = 0;
+ const struct psp_fw_bin_desc *fw_bin;
uint8_t *ucode_array_start_addr;
- int fw_index = 0;
+ int err = 0;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, fw_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos.bin", chip_name);
if (err)
goto out;
@@ -3225,15 +3721,30 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
case 2:
sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data;
- if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
+ fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
+
+ if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
dev_err(adev->dev, "packed SOS count exceeds maximum limit\n");
err = -EINVAL;
goto out;
}
- for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) {
- err = parse_sos_bin_descriptor(psp,
- &sos_hdr_v2_0->psp_fw_bin[fw_index],
+ if (sos_hdr_v2_0->header.header_version_minor == 1) {
+ sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data;
+
+ fw_bin = sos_hdr_v2_1->psp_fw_bin;
+
+ if (psp_is_aux_sos_load_required(psp))
+ start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+ else
+ fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+
+ } else {
+ fw_bin = sos_hdr_v2_0->psp_fw_bin;
+ }
+
+ for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) {
+ err = parse_sos_bin_descriptor(psp, fw_bin + fw_index,
sos_hdr_v2_0);
if (err)
goto out;
@@ -3253,6 +3764,36 @@ out:
return err;
}
+static bool is_ta_fw_applicable(struct psp_context *psp,
+ const struct psp_fw_bin_desc *desc)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t fw_version;
+
+ switch (desc->fw_type) {
+ case TA_FW_TYPE_PSP_XGMI:
+ case TA_FW_TYPE_PSP_XGMI_AUX:
+ /* for now, AUX TA only exists on 13.0.6 ta bin,
+ * from v20.00.0x.14
+ */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 6)) {
+ fw_version = le32_to_cpu(desc->fw_version);
+
+ if (adev->flags & AMD_IS_APU &&
+ (fw_version & 0xff) >= 0x14)
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX;
+ else
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
static int parse_ta_bin_descriptor(struct psp_context *psp,
const struct psp_fw_bin_desc *desc,
const struct ta_firmware_header_v2_0 *ta_hdr)
@@ -3262,6 +3803,9 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
if (!psp || !desc || !ta_hdr)
return -EINVAL;
+ if (!is_ta_fw_applicable(psp, desc))
+ return 0;
+
ucode_start_addr = (uint8_t *)ta_hdr +
le32_to_cpu(desc->offset_bytes) +
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
@@ -3274,6 +3818,7 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
psp->asd_context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_XGMI:
+ case TA_FW_TYPE_PSP_XGMI_AUX:
psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr;
@@ -3401,11 +3946,14 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
{
const struct common_firmware_header *hdr;
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
int err;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, fw_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta.bin", chip_name);
if (err)
return err;
@@ -3431,7 +3979,6 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
{
struct amdgpu_device *adev = psp->adev;
- char fw_name[PSP_FW_NAME_LEN];
const struct psp_firmware_header_v1_0 *cap_hdr_v1_0;
struct amdgpu_firmware_info *info = NULL;
int err = 0;
@@ -3441,15 +3988,16 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_cap.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_cap.bin", chip_name);
if (err) {
if (err == -ENODEV) {
dev_warn(adev->dev, "cap microcode does not exist, skip\n");
err = 0;
- goto out;
+ } else {
+ dev_err(adev->dev, "fail to initialize cap microcode\n");
}
- dev_err(adev->dev, "fail to initialize cap microcode\n");
+ goto out;
}
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP];
@@ -3470,13 +4018,49 @@ out:
return err;
}
-static int psp_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
+int psp_config_sq_perfmon(struct psp_context *psp,
+ uint32_t xcp_id, bool core_override_enable,
+ bool reg_override_enable, bool perfmon_override_enable)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (xcp_id > MAX_XCP) {
+ dev_err(psp->adev->dev, "invalid xcp_id %d\n", xcp_id);
+ return -EINVAL;
+ }
+
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) {
+ dev_err(psp->adev->dev, "Unsupported MP0 version 0x%x for CONFIG_SQ_PERFMON command\n",
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0));
+ return -EINVAL;
+ }
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_CONFIG_SQ_PERFMON;
+ cmd->cmd.config_sq_perfmon.gfx_xcp_mask = BIT_MASK(xcp_id);
+ cmd->cmd.config_sq_perfmon.core_override = core_override_enable;
+ cmd->cmd.config_sq_perfmon.reg_override = reg_override_enable;
+ cmd->cmd.config_sq_perfmon.perfmon_override = perfmon_override_enable;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_warn(psp->adev->dev, "PSP failed to config sq: xcp%d core%d reg%d perfmon%d\n",
+ xcp_id, core_override_enable, reg_override_enable, perfmon_override_enable);
+
+ release_psp_cmd_buf(psp);
+ return ret;
+}
+
+static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
return 0;
}
-static int psp_set_powergating_state(void *handle,
+static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3488,11 +4072,13 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_ip_block *ip_block;
uint32_t fw_ver;
int ret;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
- DRM_INFO("PSP block is not ready yet.");
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
+ dev_info(adev->dev, "PSP block is not ready yet\n.");
return -EBUSY;
}
@@ -3501,7 +4087,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
mutex_unlock(&adev->psp.mutex);
if (ret) {
- DRM_ERROR("Failed to read USBC PD FW, err = %d", ret);
+ dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret);
return ret;
}
@@ -3516,22 +4102,23 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
int ret, idx;
- char fw_name[100];
const struct firmware *usbc_pd_fw;
struct amdgpu_bo *fw_buf_bo = NULL;
uint64_t fw_pri_mc_addr;
void *fw_pri_cpu_addr;
+ struct amdgpu_ip_block *ip_block;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
- DRM_INFO("PSP block is not ready yet.");
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
+ dev_err(adev->dev, "PSP block is not ready yet.");
return -EBUSY;
}
if (!drm_dev_enter(ddev, &idx))
return -ENODEV;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s", buf);
- ret = request_firmware(&usbc_pd_fw, fw_name, adev->dev);
+ ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s", buf);
if (ret)
goto fail;
@@ -3553,10 +4140,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
rel_buf:
- release_firmware(usbc_pd_fw);
+ amdgpu_ucode_release(&usbc_pd_fw);
fail:
if (ret) {
- DRM_ERROR("Failed to load USBC PD FW, err = %d", ret);
+ dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret);
count = ret;
}
@@ -3592,7 +4179,7 @@ int is_psp_fw_valid(struct psp_bin_desc bin)
}
static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buffer, loff_t pos, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -3603,7 +4190,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
/* Safeguard against memory drain */
if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) {
- dev_err(adev->dev, "File size cannot exceed %u", AMD_VBIOS_FILE_MAX_SIZE_B);
+ dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B);
kvfree(adev->psp.vbflash_tmp_buf);
adev->psp.vbflash_tmp_buf = NULL;
adev->psp.vbflash_image_size = 0;
@@ -3622,13 +4209,13 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
adev->psp.vbflash_image_size += count;
mutex_unlock(&adev->psp.mutex);
- dev_dbg(adev->dev, "IFWI staged for update");
+ dev_dbg(adev->dev, "IFWI staged for update\n");
return count;
}
static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buffer,
+ const struct bin_attribute *bin_attr, char *buffer,
loff_t pos, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -3642,7 +4229,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
if (adev->psp.vbflash_image_size == 0)
return -EINVAL;
- dev_dbg(adev->dev, "PSP IFWI flash process initiated");
+ dev_dbg(adev->dev, "PSP IFWI flash process initiated\n");
ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
AMDGPU_GPU_PAGE_SIZE,
@@ -3667,11 +4254,11 @@ rel_buf:
adev->psp.vbflash_image_size = 0;
if (ret) {
- dev_err(adev->dev, "Failed to load IFWI, err = %d", ret);
+ dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret);
return ret;
}
- dev_dbg(adev->dev, "PSP IFWI flash process done");
+ dev_dbg(adev->dev, "PSP IFWI flash process done\n");
return 0;
}
@@ -3680,7 +4267,7 @@ rel_buf:
* Writing to this file will stage an IFWI for update. Reading from this file
* will trigger the update process.
*/
-static struct bin_attribute psp_vbflash_bin_attr = {
+static const struct bin_attribute psp_vbflash_bin_attr = {
.attr = {.name = "psp_vbflash", .mode = 0660},
.size = 0,
.write = amdgpu_psp_vbflash_write,
@@ -3711,7 +4298,7 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
}
static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
-static struct bin_attribute *bin_flash_attrs[] = {
+static const struct bin_attribute *const bin_flash_attrs[] = {
&psp_vbflash_bin_attr,
NULL
};
@@ -3735,7 +4322,7 @@ static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribu
}
static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj,
- struct bin_attribute *attr,
+ const struct bin_attribute *attr,
int idx)
{
struct device *dev = kobj_to_dev(kobj);
@@ -3752,20 +4339,119 @@ const struct attribute_group amdgpu_flash_attr_group = {
.is_visible = amdgpu_flash_attr_is_visible,
};
+#if defined(CONFIG_DEBUG_FS)
+static int psp_read_spirom_debugfs_open(struct inode *inode, struct file *filp)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet;
+ int ret;
+
+ /* serialize the open() file calling */
+ if (!mutex_trylock(&adev->psp.mutex))
+ return -EBUSY;
+
+ /*
+ * make sure only one userpace process is alive for dumping so that
+ * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is consumed.
+ * let's say the case where one process try opening the file while
+ * another one has proceeded to read or release. In this way, eliminate
+ * the use of mutex for read() or release() callback as well.
+ */
+ if (adev->psp.spirom_dump_trip) {
+ mutex_unlock(&adev->psp.mutex);
+ return -EBUSY;
+ }
+
+ bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL);
+ if (!bo_triplet) {
+ mutex_unlock(&adev->psp.mutex);
+ return -ENOMEM;
+ }
+
+ ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &bo_triplet->bo,
+ &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ if (ret)
+ goto rel_trip;
+
+ ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr);
+ if (ret)
+ goto rel_bo;
+
+ adev->psp.spirom_dump_trip = bo_triplet;
+ mutex_unlock(&adev->psp.mutex);
+ return 0;
+rel_bo:
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+rel_trip:
+ kfree(bo_triplet);
+ mutex_unlock(&adev->psp.mutex);
+ dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret);
+ return ret;
+}
+
+static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user *buf, size_t size,
+ loff_t *pos)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (!bo_triplet)
+ return -EINVAL;
+
+ return simple_read_from_buffer(buf,
+ size,
+ pos, bo_triplet->cpu_addr,
+ AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+}
+
+static int psp_read_spirom_debugfs_release(struct inode *inode, struct file *filp)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (bo_triplet) {
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ kfree(bo_triplet);
+ }
+
+ adev->psp.spirom_dump_trip = NULL;
+ return 0;
+}
+
+static const struct file_operations psp_dump_spirom_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .open = psp_read_spirom_debugfs_open,
+ .read = psp_read_spirom_debugfs_read,
+ .release = psp_read_spirom_debugfs_release,
+ .llseek = default_llseek,
+};
+#endif
+
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root,
+ adev, &psp_dump_spirom_debugfs_ops, AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+#endif
+}
+
const struct amd_ip_funcs psp_ip_funcs = {
.name = "psp",
.early_init = psp_early_init,
- .late_init = NULL,
.sw_init = psp_sw_init,
.sw_fini = psp_sw_fini,
.hw_init = psp_hw_init,
.hw_fini = psp_hw_fini,
.suspend = psp_suspend,
.resume = psp_resume,
- .is_idle = NULL,
- .check_soft_reset = NULL,
- .wait_for_idle = NULL,
- .soft_reset = NULL,
.set_clockgating_state = psp_set_clockgating_state,
.set_powergating_state = psp_set_powergating_state,
};
@@ -3825,3 +4511,11 @@ const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = {
.rev = 4,
.funcs = &psp_ip_funcs,
};
+
+const struct amdgpu_ip_block_version psp_v14_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 14,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 3e67ed63e638..237b624aa51c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -39,6 +39,29 @@
#define PSP_TMR_ALIGNMENT 0x100000
#define PSP_FW_NAME_LEN 0x24
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10
+#define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11
+
+/* Command register bit 31 set to indicate readiness */
+#define MBOX_TOS_READY_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_READY_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
+/* Values to check for a successful GFX_CMD response wait. Check against
+ * both status bits and response state - helps to detect a command failure
+ * or other unexpected cases like a device drop reading all 0xFFs
+ */
+#define MBOX_TOS_RESP_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_RESP_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
extern const struct attribute_group amdgpu_flash_attr_group;
enum psp_shared_mem_size {
@@ -73,11 +96,14 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
PSP_BL__LOAD_SOCDRV = 0xB0000,
PSP_BL__LOAD_DBGDRV = 0xC0000,
+ PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV,
PSP_BL__LOAD_INTFDRV = 0xD0000,
- PSP_BL__LOAD_RASDRV = 0xE0000,
+ PSP_BL__LOAD_RASDRV = 0xE0000,
+ PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
+ PSP_BL__LOAD_SPDMDRV = 0x20000000,
};
enum psp_ring_type {
@@ -104,9 +130,13 @@ enum psp_reg_prog_id {
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
+ PSP_REG_MMHUB_L1_TLB_CNTL = 25,
PSP_REG_LAST
};
+#define PSP_WAITREG_CHANGED BIT(0) /* check if the value has changed */
+#define PSP_WAITREG_NOVERBOSE BIT(1) /* No error verbose */
+
struct psp_funcs {
int (*init_microcode)(struct psp_context *psp);
int (*wait_for_bootloader)(struct psp_context *psp);
@@ -117,6 +147,8 @@ struct psp_funcs {
int (*bootloader_load_intf_drv)(struct psp_context *psp);
int (*bootloader_load_dbg_drv)(struct psp_context *psp);
int (*bootloader_load_ras_drv)(struct psp_context *psp);
+ int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp);
+ int (*bootloader_load_spdm_drv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
@@ -132,8 +164,14 @@ struct psp_funcs {
int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver);
int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*dump_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*vbflash_stat)(struct psp_context *psp);
int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+ bool (*get_ras_capability)(struct psp_context *psp);
+ bool (*is_aux_sos_load_required)(struct psp_context *psp);
+ bool (*is_reload_needed)(struct psp_context *psp);
+ int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
};
struct ta_funcs {
@@ -149,6 +187,7 @@ struct psp_xgmi_node_info {
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
};
struct psp_xgmi_topology_info {
@@ -189,18 +228,20 @@ struct psp_xgmi_context {
struct ta_context context;
struct psp_xgmi_topology_info top_info;
bool supports_extended_data;
+ uint8_t xgmi_ta_caps;
};
struct psp_ras_context {
struct ta_context context;
struct amdgpu_ras *ras;
+ struct mutex mutex;
};
#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
#define GDDR6_MEM_TRAINING_OFFSET 0x8000
/*Define the VRAM size that will be encroached by BIST training.*/
-#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
+#define BIST_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
enum psp_memory_training_init_flag {
PSP_MEM_TRAIN_NOT_SUPPORT = 0x0,
@@ -308,6 +349,14 @@ struct psp_runtime_scpm_entry {
enum psp_runtime_scpm_authentication scpm_status;
};
+#if defined(CONFIG_DEBUG_FS)
+struct spirom_bo {
+ struct amdgpu_bo *bo;
+ uint64_t mc_addr;
+ void *cpu_addr;
+};
+#endif
+
struct psp_context {
struct amdgpu_device *adev;
struct psp_ring km_ring;
@@ -333,6 +382,8 @@ struct psp_context {
struct psp_bin_desc intf_drv;
struct psp_bin_desc dbg_drv;
struct psp_bin_desc ras_drv;
+ struct psp_bin_desc ipkeymgr_drv;
+ struct psp_bin_desc spdm_drv;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -361,6 +412,8 @@ struct psp_context {
atomic_t fence_value;
/* flag to mark whether gfx fw autoload is supported or not */
bool autoload_supported;
+ /* flag to mark whether psp use runtime TMR or boottime TMR */
+ bool boot_time_tmr;
/* flag to mark whether df cstate management centralized to PMFW */
bool pmfw_centralized_cstate_management;
@@ -391,6 +444,9 @@ struct psp_context {
char *vbflash_tmp_buf;
size_t vbflash_image_size;
bool vbflash_done;
+#if defined(CONFIG_DEBUG_FS)
+ struct spirom_bo *spirom_dump_trip;
+#endif
};
struct amdgpu_psp_funcs {
@@ -419,6 +475,12 @@ struct amdgpu_psp_funcs {
#define psp_bootloader_load_ras_drv(psp) \
((psp)->funcs->bootloader_load_ras_drv ? \
(psp)->funcs->bootloader_load_ras_drv((psp)) : 0)
+#define psp_bootloader_load_ipkeymgr_drv(psp) \
+ ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \
+ (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0)
+#define psp_bootloader_load_spdm_drv(psp) \
+ ((psp)->funcs->bootloader_load_spdm_drv ? \
+ (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
@@ -443,6 +505,10 @@ struct amdgpu_psp_funcs {
((psp)->funcs->update_spirom ? \
(psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+#define psp_dump_spirom(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->dump_spirom ? \
+ (psp)->funcs->dump_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
#define psp_vbflash_status(psp) \
((psp)->funcs->vbflash_stat ? \
(psp)->funcs->vbflash_stat((psp)) : -EINVAL)
@@ -451,6 +517,13 @@ struct amdgpu_psp_funcs {
((psp)->funcs->fatal_error_recovery_quirk ? \
(psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
+#define psp_is_aux_sos_load_required(psp) \
+ ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
+
+#define psp_reg_program_no_ring(psp, val, id) \
+ ((psp)->funcs->reg_program_no_ring ? \
+ (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -460,9 +533,10 @@ extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block;
extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v13_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
+extern const struct amdgpu_ip_block_version psp_v14_0_ip_block;
-extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
- uint32_t field_val, uint32_t mask, bool check_changed);
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+ uint32_t field_val, uint32_t mask, uint32_t flags);
extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
uint32_t field_val, uint32_t mask, uint32_t msec_timeout);
@@ -499,6 +573,9 @@ int psp_ras_enable_features(struct psp_context *psp,
int psp_ras_trigger_error(struct psp_context *psp,
struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
int psp_ras_terminate(struct psp_context *psp);
+int psp_ras_query_address(struct psp_context *psp,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out);
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
@@ -525,15 +602,25 @@ int psp_init_cap_microcode(struct psp_context *psp,
const char *chip_name);
int psp_get_fw_attestation_records_addr(struct psp_context *psp,
uint64_t *output_ptr);
-
+int psp_update_fw_reservation(struct psp_context *psp);
int psp_load_fw_list(struct psp_context *psp,
struct amdgpu_firmware_info **ucode_list, int ucode_count);
void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
int psp_spatial_partition(struct psp_context *psp, int mode);
+int psp_memory_partition(struct psp_context *psp, int mode);
int is_psp_fw_valid(struct psp_bin_desc bin);
int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
+
+int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
+int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 468a67b302d4..6e8aad91bcd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -166,15 +166,14 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t
if (ret)
return -EFAULT;
+ if (ta_bin_len > PSP_1_MEG)
+ return -EINVAL;
+
copy_pos += sizeof(uint32_t);
- ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
- if (!ta_bin)
- return -ENOMEM;
- if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) {
- ret = -EFAULT;
- goto err_free_bin;
- }
+ ta_bin = memdup_user(&buf[copy_pos], ta_bin_len);
+ if (IS_ERR(ta_bin))
+ return PTR_ERR(ta_bin);
/* Set TA context and functions */
set_ta_context_funcs(psp, ta_type, &context);
@@ -324,17 +323,13 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
return -EFAULT;
copy_pos += sizeof(uint32_t);
- shared_buf = kzalloc(shared_buf_len, GFP_KERNEL);
- if (!shared_buf)
- return -ENOMEM;
- if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) {
- ret = -EFAULT;
- goto err_free_shared_buf;
- }
+ shared_buf = memdup_user(&buf[copy_pos], shared_buf_len);
+ if (IS_ERR(shared_buf))
+ return PTR_ERR(shared_buf);
set_ta_context_funcs(psp, ta_type, &context);
- if (!context->initialized) {
+ if (!context || !context->initialized) {
dev_err(adev->dev, "TA is not initialized\n");
ret = -EINVAL;
goto err_free_shared_buf;
@@ -348,6 +343,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
context->session_id = ta_id;
+ mutex_lock(&psp->ras_context.mutex);
ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
if (ret)
goto err_free_shared_buf;
@@ -362,10 +358,11 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
}
}
- if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))
+ if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
ret = -EFAULT;
err_free_shared_buf:
+ mutex_unlock(&psp->ras_context.mutex);
kfree(shared_buf);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 937c54fc7174..e0ee21150860 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -28,6 +28,7 @@
#include <linux/reboot.h>
#include <linux/syscalls.h>
#include <linux/pm_runtime.h>
+#include <linux/list_sort.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
@@ -35,9 +36,11 @@
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "nbio_v4_3.h"
+#include "nbif_v6_3_1.h"
#include "nbio_v7_9.h"
#include "atom.h"
#include "amdgpu_reset.h"
+#include "amdgpu_psp.h"
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -72,6 +75,9 @@ const char *ras_block_string[] = {
"mca",
"vcn",
"jpeg",
+ "ih",
+ "mpio",
+ "mmsch",
};
const char *ras_mca_block_string[] = {
@@ -93,7 +99,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
if (!ras_block)
return "NULL";
- if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT)
+ if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT ||
+ ras_block->block >= ARRAY_SIZE(ras_block_string))
return "OUT OF RANGE";
if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
@@ -115,6 +122,15 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL)
+#define MAX_UMC_POISON_POLLING_TIME_ASYNC 10
+
+#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms
+
+#define MAX_FLUSH_RETIRE_DWORK_TIMES 100
+
+#define BYPASS_ALLOCATED_ADDRESS 0x0
+#define BYPASS_INITIALIZATION_ADDRESS 0x1
+
enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -123,10 +139,14 @@ enum amdgpu_ras_retire_page_reservation {
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr);
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev);
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
+
#ifdef CONFIG_X86_MCE_AMD
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
struct mce_notifier_adev_list {
@@ -152,34 +172,39 @@ static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
{
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_err_data err_data;
struct eeprom_table_record err_rec;
+ int ret;
- if ((address >= adev->gmc.mc_vram_size) ||
- (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
+ ret = amdgpu_ras_check_bad_page(adev, address);
+ if (ret == -EINVAL) {
dev_warn(adev->dev,
- "RAS WARN: input address 0x%llx is invalid.\n",
- address);
+ "RAS WARN: input address 0x%llx is invalid.\n",
+ address);
return -EINVAL;
- }
-
- if (amdgpu_ras_check_bad_page(adev, address)) {
+ } else if (ret == 1) {
dev_warn(adev->dev,
- "RAS WARN: 0x%llx has already been marked as bad page!\n",
- address);
+ "RAS WARN: 0x%llx has already been marked as bad page!\n",
+ address);
return 0;
}
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_data.err_addr = &err_rec;
amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
+ err_data.err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, NULL);
}
+ amdgpu_ras_error_data_fini(&err_data);
+
dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
dev_warn(adev->dev, "Clear EEPROM:\n");
dev_warn(adev->dev, " echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
@@ -187,6 +212,56 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
return 0;
}
+static int amdgpu_check_address_validity(struct amdgpu_device *adev,
+ uint64_t address, uint64_t flags)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_block_info blk_info;
+ uint64_t page_pfns[32] = {0};
+ int i, ret, count;
+ bool hit = false;
+
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
+ return 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+ return -EPERM;
+ return hit ? -EACCES : 0;
+ }
+
+ if ((address >= adev->gmc.mc_vram_size) ||
+ (address >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EFAULT;
+
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ address, page_pfns, ARRAY_SIZE(page_pfns));
+ if (count <= 0)
+ return -EPERM;
+
+ for (i = 0; i < count; i++) {
+ memset(&blk_info, 0, sizeof(blk_info));
+ ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr,
+ page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info);
+ if (!ret) {
+ /* The input address that needs to be checked is allocated by
+ * current calling process, so it is necessary to exclude
+ * the calling process.
+ */
+ if ((flags == BYPASS_ALLOCATED_ADDRESS) &&
+ ((blk_info.task.pid != task_pid_nr(current)) ||
+ strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN)))
+ return -EACCES;
+ else if ((flags == BYPASS_INITIALIZATION_ADDRESS) &&
+ (blk_info.task.pid == con->init_task_pid) &&
+ !strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN))
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -201,8 +276,8 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
return -EINVAL;
/* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
- if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
- obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
}
@@ -277,6 +352,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
op = 2;
else if (strstr(str, "retire_page") != NULL)
op = 3;
+ else if (strstr(str, "check_address") != NULL)
+ op = 4;
else if (str[0] && str[1] && str[2] && str[3])
/* ascii string, but commands are not matched. */
return -EINVAL;
@@ -291,17 +368,28 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
data->inject.address = address;
return 0;
+ } else if (op == 4) {
+ if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 &&
+ sscanf(str, "%*s %llu %llu", &address, &value) != 2)
+ return -EINVAL;
+
+ data->op = op;
+ data->inject.address = address;
+ data->inject.value = value;
+ return 0;
}
if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
return -EINVAL;
data->head.block = block_id;
- /* only ue and ce errors are supported */
+ /* only ue, ce and poison errors are supported */
if (!memcmp("ue", err, 2))
data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
else if (!memcmp("ce", err, 2))
data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+ else if (!memcmp("poison", err, 6))
+ data->head.type = AMDGPU_RAS_ERROR__POISON;
else
return -EINVAL;
@@ -423,9 +511,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
* The block is one of: umc, sdma, gfx, etc.
* see ras_block_string[] for details
*
- * The error type is one of: ue, ce, where,
+ * The error type is one of: ue, ce and poison where,
* ue is multi-uncorrectable
* ce is single-correctable
+ * poison is poison
*
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional.
@@ -477,6 +566,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
return size;
else
return ret;
+ } else if (data.op == 4) {
+ ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value);
+ return ret ? ret : size;
}
if (!amdgpu_ras_is_supported(adev, data.head.block))
@@ -490,22 +582,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- if ((data.inject.address >= adev->gmc.mc_vram_size &&
- adev->gmc.mc_vram_size) ||
- (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
- dev_warn(adev->dev, "RAS WARN: input address "
- "0x%llx is invalid.",
+ /* umc ce/ue error injection for a bad page is not allowed */
+ if (data.head.block == AMDGPU_RAS_BLOCK__UMC)
+ ret = amdgpu_ras_check_bad_page(adev, data.inject.address);
+ if (ret == -EINVAL) {
+ dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.",
data.inject.address);
- ret = -EINVAL;
break;
- }
-
- /* umc ce/ue error injection for a bad page is not allowed */
- if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
- amdgpu_ras_check_bad_page(adev, data.inject.address)) {
- dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
- "already been marked as bad!\n",
- data.inject.address);
+ } else if (ret == 1) {
+ dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n",
+ data.inject.address);
break;
}
@@ -611,14 +697,18 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
- if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
- obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
}
- return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
- "ce", info.ce_count);
+ if (info.head.block == AMDGPU_RAS_BLOCK__UMC)
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count, "de", info.de_count);
+ else
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count);
}
/* obj begin */
@@ -628,8 +718,11 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
static inline void put_obj(struct ras_manager *obj)
{
- if (obj && (--obj->use == 0))
+ if (obj && (--obj->use == 0)) {
list_del(&obj->node);
+ amdgpu_ras_error_data_fini(&obj->err_data);
+ }
+
if (obj && (obj->use < 0))
DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
}
@@ -659,6 +752,9 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
if (alive_obj(obj))
return NULL;
+ if (amdgpu_ras_error_data_init(&obj->err_data))
+ return NULL;
+
obj->head = *head;
obj->adev = adev;
list_add(&obj->node, &con->head);
@@ -769,9 +865,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!con)
return -EINVAL;
- /* Do not enable ras feature if it is not allowed */
- if (enable &&
- head->block != AMDGPU_RAS_BLOCK__GFX &&
+ /* For non-gfx ip, do not enable ras feature if it is not allowed */
+ /* For gfx ip, regardless of feature support status, */
+ /* Force issue enable or disable ras feature commands */
+ if (head->block != AMDGPU_RAS_BLOCK__GFX &&
!amdgpu_ras_is_feature_allowed(adev, head))
return 0;
@@ -801,6 +898,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
+ kfree(info);
return ret;
}
@@ -849,7 +947,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
if (ret)
return ret;
- /* gfx block ras dsiable cmd must send to ras-ta */
+ /* gfx block ras disable cmd must send to ras-ta */
if (head->block == AMDGPU_RAS_BLOCK__GFX)
con->features |= BIT(head->block);
@@ -1013,105 +1111,482 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d
}
}
-/* query/inject/cure begin */
-int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
- struct ras_query_if *info)
+static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
+ struct ras_manager *ras_mgr,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
+ const char *blk_name,
+ bool is_ue,
+ bool is_de)
{
- struct amdgpu_ras_block_object *block_obj = NULL;
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct amdgpu_smuio_mcm_config_info *mcm_info;
+ struct ras_err_node *err_node;
+ struct ras_err_info *err_info;
+ u64 event_id = qctx->evid.event_id;
+
+ if (is_ue) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ue_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new uncorrectable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ue_count,
+ blk_name);
+ }
+ }
- if (!obj)
- return -EINVAL;
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld uncorrectable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
+ }
- if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
- amdgpu_ras_get_ecc_info(adev, &err_data);
} else {
- block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
- if (!block_obj || !block_obj->hw_ops) {
- dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
- get_ras_block_str(&info->head));
- return -EINVAL;
- }
+ if (is_de) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->de_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new deferred hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->de_count,
+ blk_name);
+ }
+ }
- if (block_obj->hw_ops->query_ras_error_count)
- block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld deferred hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->de_count, blk_name);
+ }
+ } else {
+ if (adev->debug_disable_ce_logs)
+ return;
+
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ if (err_info->ce_count) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new correctable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ce_count,
+ blk_name);
+ }
+ }
- if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
- (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
- (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
- if (block_obj->hw_ops->query_ras_error_status)
- block_obj->hw_ops->query_ras_error_status(adev);
+ for_each_ras_error(err_node, &ras_mgr->err_data) {
+ err_info = &err_node->err_info;
+ mcm_info = &err_info->mcm_info;
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld correctable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->ce_count, blk_name);
}
+ }
}
+}
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
+static inline bool err_data_has_source_info(struct ras_err_data *data)
+{
+ return !list_empty(&data->err_node_list);
+}
- info->ue_count = obj->err_data.ue_count;
- info->ce_count = obj->err_data.ce_count;
+static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
+ struct ras_query_if *query_if,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
+ const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->evid.event_id;
+
+ if (err_data->ce_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, false, false);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld correctable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ce_count,
+ blk_name);
+ } else {
+ RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ce_count,
+ blk_name);
+ }
+ }
- if (err_data.ce_count) {
- if (!adev->aid_mask &&
- adev->smuio.funcs &&
- adev->smuio.funcs->get_socket_id &&
- adev->smuio.funcs->get_die_id) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%ld correctable hardware errors "
- "detected in %s block, no user "
- "action is needed.\n",
- adev->smuio.funcs->get_socket_id(adev),
- adev->smuio.funcs->get_die_id(adev),
- obj->err_data.ce_count,
- get_ras_block_str(&info->head));
+ if (err_data->ue_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, true, false);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ue_count,
+ blk_name);
} else {
- dev_info(adev->dev, "%ld correctable hardware errors "
- "detected in %s block, no user "
- "action is needed.\n",
- obj->err_data.ce_count,
- get_ras_block_str(&info->head));
- }
- }
- if (err_data.ue_count) {
- if (!adev->aid_mask &&
- adev->smuio.funcs &&
- adev->smuio.funcs->get_socket_id &&
- adev->smuio.funcs->get_die_id) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%ld uncorrectable hardware errors "
- "detected in %s block\n",
- adev->smuio.funcs->get_socket_id(adev),
- adev->smuio.funcs->get_die_id(adev),
- obj->err_data.ue_count,
- get_ras_block_str(&info->head));
+ RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ue_count,
+ blk_name);
+ }
+ }
+
+ if (err_data->de_count) {
+ if (err_data_has_source_info(err_data)) {
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+ blk_name, false, true);
+ } else if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld deferred hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.de_count,
+ blk_name);
} else {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in %s block\n",
- obj->err_data.ue_count,
- get_ras_block_str(&info->head));
+ RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.de_count,
+ blk_name);
+ }
+ }
+}
+
+static void amdgpu_ras_virt_error_generate_report(struct amdgpu_device *adev,
+ struct ras_query_if *query_if,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ unsigned long new_ue, new_ce, new_de;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &query_if->head);
+ const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->evid.event_id;
+
+ new_ce = err_data->ce_count - obj->err_data.ce_count;
+ new_ue = err_data->ue_count - obj->err_data.ue_count;
+ new_de = err_data->de_count - obj->err_data.de_count;
+
+ if (new_ce) {
+ RAS_EVENT_LOG(adev, event_id, "%lu correctable hardware errors "
+ "detected in %s block\n",
+ new_ce,
+ blk_name);
+ }
+
+ if (new_ue) {
+ RAS_EVENT_LOG(adev, event_id, "%lu uncorrectable hardware errors "
+ "detected in %s block\n",
+ new_ue,
+ blk_name);
+ }
+
+ if (new_de) {
+ RAS_EVENT_LOG(adev, event_id, "%lu deferred hardware errors "
+ "detected in %s block\n",
+ new_de,
+ blk_name);
+ }
+}
+
+static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
+{
+ struct ras_err_node *err_node;
+ struct ras_err_info *err_info;
+
+ if (err_data_has_source_info(err_data)) {
+ for_each_ras_error(err_node, err_data) {
+ err_info = &err_node->err_info;
+ amdgpu_ras_error_statistic_de_count(&obj->err_data,
+ &err_info->mcm_info, err_info->de_count);
+ amdgpu_ras_error_statistic_ce_count(&obj->err_data,
+ &err_info->mcm_info, err_info->ce_count);
+ amdgpu_ras_error_statistic_ue_count(&obj->err_data,
+ &err_info->mcm_info, err_info->ue_count);
}
+ } else {
+ /* for legacy asic path which doesn't has error source info */
+ obj->err_data.ue_count += err_data->ue_count;
+ obj->err_data.ce_count += err_data->ce_count;
+ obj->err_data.de_count += err_data->de_count;
}
+}
+
+static void amdgpu_ras_mgr_virt_error_data_statistics_update(struct ras_manager *obj,
+ struct ras_err_data *err_data)
+{
+ /* Host reports absolute counts */
+ obj->err_data.ue_count = err_data->ue_count;
+ obj->err_data.ce_count = err_data->ce_count;
+ obj->err_data.de_count = err_data->de_count;
+}
+
+static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+ struct ras_common_if head;
+
+ memset(&head, 0, sizeof(head));
+ head.block = blk;
+
+ return amdgpu_ras_find_obj(adev, &head);
+}
+
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ const struct aca_info *aca_info, void *data)
+{
+ struct ras_manager *obj;
+
+ /* in resume phase, no need to create aca fs node */
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
+ return 0;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data);
+}
+
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+ struct ras_manager *obj;
+
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
+ return -EINVAL;
+
+ amdgpu_aca_remove_handle(&obj->aca_handle);
return 0;
}
-int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
- enum amdgpu_ras_block block)
+static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
{
- struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ struct ras_manager *obj;
- if (!amdgpu_ras_is_supported(adev, block))
+ obj = get_ras_manager(adev, blk);
+ if (!obj)
return -EINVAL;
- if (!block_obj || !block_obj->hw_ops) {
- dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
- ras_block_str(block));
+ return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
+}
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+ struct aca_handle *handle, char *buf, void *data)
+{
+ struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle);
+ struct ras_query_if info = {
+ .head = obj->head,
+ };
+
+ if (!amdgpu_ras_get_error_query_ready(obj->adev))
+ return sysfs_emit(buf, "Query currently inaccessible\n");
+
+ if (amdgpu_ras_query_error_status(obj->adev, &info))
+ return -EINVAL;
+
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count, "de", info.de_count);
+}
+
+static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
+ struct ras_query_if *info,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
+ unsigned int error_query_mode)
+{
+ enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
+ struct amdgpu_ras_block_object *block_obj = NULL;
+ int ret;
+
+ if (blk == AMDGPU_RAS_BLOCK_COUNT)
return -EINVAL;
+
+ if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
+ return -EINVAL;
+
+ if (error_query_mode == AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ return amdgpu_virt_req_ras_err_count(adev, blk, err_data);
+ } else if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+ if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
+ amdgpu_ras_get_ecc_info(adev, err_data);
+ } else {
+ block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+
+ if (block_obj->hw_ops->query_ras_error_count)
+ block_obj->hw_ops->query_ras_error_count(adev, err_data);
+
+ if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
+ (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
+ }
+ }
+ } else {
+ if (amdgpu_aca_is_enabled(adev)) {
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
+ if (ret)
+ return ret;
+ } else {
+ /* FIXME: add code to check return value later */
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx);
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx);
+ }
}
+ return 0;
+}
+
+/* query/inject/cure begin */
+static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev,
+ struct ras_query_if *info,
+ enum ras_event_type type)
+{
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+ struct ras_err_data err_data;
+ struct ras_query_context qctx;
+ unsigned int error_query_mode;
+ int ret;
+
+ if (!obj)
+ return -EINVAL;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
+ if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
+ return -EINVAL;
+
+ memset(&qctx, 0, sizeof(qctx));
+ qctx.evid.type = type;
+ qctx.evid.event_id = amdgpu_ras_acquire_event_id(adev, type);
+
+ if (!down_read_trylock(&adev->reset_domain->sem)) {
+ ret = -EIO;
+ goto out_fini_err_data;
+ }
+
+ ret = amdgpu_ras_query_error_status_helper(adev, info,
+ &err_data,
+ &qctx,
+ error_query_mode);
+ up_read(&adev->reset_domain->sem);
+ if (ret)
+ goto out_fini_err_data;
+
+ if (error_query_mode != AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
+ amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
+ } else {
+ /* Host provides absolute error counts. First generate the report
+ * using the previous VF internal count against new host count.
+ * Then Update VF internal count.
+ */
+ amdgpu_ras_virt_error_generate_report(adev, info, &err_data, &qctx);
+ amdgpu_ras_mgr_virt_error_data_statistics_update(obj, &err_data);
+ }
+
+ info->ue_count = obj->err_data.ue_count;
+ info->ce_count = obj->err_data.ce_count;
+ info->de_count = obj->err_data.de_count;
+
+out_fini_err_data:
+ amdgpu_ras_error_data_fini(&err_data);
+
+ return ret;
+}
+
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
+{
+ return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
+}
+
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+ ras_block_str(block));
+ return -EOPNOTSUPP;
+ }
+
+ if (!amdgpu_ras_is_supported(adev, block) ||
+ !amdgpu_ras_get_aca_debug_mode(adev))
+ return -EOPNOTSUPP;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EOPNOTSUPP;
+
+ /* skip ras error reset in gpu reset */
+ if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
+ ((smu_funcs && smu_funcs->set_debug_mode) ||
+ (mca_funcs && mca_funcs->mca_set_debug_mode)))
+ return -EOPNOTSUPP;
+
if (block_obj->hw_ops->reset_ras_error_count)
block_obj->hw_ops->reset_ras_error_count(adev);
+ return 0;
+}
+
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+
+ if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP)
+ return 0;
+
if ((block == AMDGPU_RAS_BLOCK__GFX) ||
(block == AMDGPU_RAS_BLOCK__MMHUB)) {
if (block_obj->hw_ops->reset_ras_error_status)
@@ -1207,8 +1682,8 @@ static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
/* some hardware/IP supports read to clear
* no need to explictly reset the err status after the query call */
- if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
- adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
dev_warn(adev->dev,
"Failed to reset error counter and error status\n");
@@ -1328,7 +1803,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
- struct kobject *kobj, struct bin_attribute *attr,
+ struct kobject *kobj, const struct bin_attribute *attr,
char *buf, loff_t ppos, size_t count)
{
struct amdgpu_ras *con =
@@ -1368,20 +1843,73 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
}
+static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, version_attr);
+ return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version);
+}
+
+static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, schema_attr);
+ return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
+}
+
+static struct {
+ enum ras_event_type type;
+ const char *name;
+} dump_event[] = {
+ {RAS_EVENT_TYPE_FATAL, "Fatal Error"},
+ {RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"},
+ {RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"},
+};
+
+static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, event_state_attr);
+ struct ras_event_manager *event_mgr = con->event_mgr;
+ struct ras_event_state *event_state;
+ int i, size = 0;
+
+ if (!event_mgr)
+ return -EINVAL;
+
+ size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno));
+ for (i = 0; i < ARRAY_SIZE(dump_event); i++) {
+ event_state = &event_mgr->event_state[dump_event[i].type];
+ size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n",
+ dump_event[i].name,
+ atomic64_read(&event_state->count),
+ event_state->last_seqno);
+ }
+
+ return (ssize_t)size;
+}
+
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- sysfs_remove_file_from_group(&adev->dev->kobj,
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
&con->badpages_attr.attr,
RAS_FS_NAME);
}
-static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
+static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
+ &con->event_state_attr.attr,
NULL
};
struct attribute_group group = {
@@ -1389,7 +1917,8 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
.attrs = attrs,
};
- sysfs_remove_group(&adev->dev->kobj, &group);
+ if (adev->dev->kobj.sd)
+ sysfs_remove_group(&adev->dev->kobj, &group);
return 0;
}
@@ -1399,9 +1928,15 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+ if (amdgpu_aca_is_enabled(adev))
+ return 0;
+
if (!obj || obj->attr_inuse)
return -EINVAL;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block))
+ return 0;
+
get_obj(obj);
snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
@@ -1433,10 +1968,14 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+ if (amdgpu_aca_is_enabled(adev))
+ return 0;
+
if (!obj || !obj->attr_inuse)
return -EINVAL;
- sysfs_remove_file_from_group(&adev->dev->kobj,
+ if (adev->dev->kobj.sd)
+ sysfs_remove_file_from_group(&adev->dev->kobj,
&obj->sysfs_attr.attr,
RAS_FS_NAME);
obj->attr_inuse = 0;
@@ -1457,7 +1996,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
if (amdgpu_bad_page_threshold != 0)
amdgpu_ras_sysfs_remove_bad_page_node(adev);
- amdgpu_ras_sysfs_remove_feature_node(adev);
+ amdgpu_ras_sysfs_remove_dev_attr_node(adev);
return 0;
}
@@ -1544,6 +2083,24 @@ static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
obj, &amdgpu_ras_debugfs_ops);
}
+static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
+{
+ bool ret;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret;
+}
+
void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1569,15 +2126,28 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
amdgpu_ras_debugfs_create(adev, &fs_info, dir);
}
}
+
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_aca_smu_debugfs_init(adev, dir);
+ else
+ amdgpu_mca_smu_debugfs_init(adev, dir);
+ }
}
/* debugfs end */
/* ras fs */
-static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
- amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+ amdgpu_ras_sysfs_badpages_read, NULL, 0);
static DEVICE_ATTR(features, S_IRUGO,
amdgpu_ras_sysfs_features_read, NULL);
+static DEVICE_ATTR(version, 0444,
+ amdgpu_ras_sysfs_version_show, NULL);
+static DEVICE_ATTR(schema, 0444,
+ amdgpu_ras_sysfs_schema_show, NULL);
+static DEVICE_ATTR(event_state, 0444,
+ amdgpu_ras_sysfs_event_state_show, NULL);
static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1586,26 +2156,41 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
};
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
+ &con->event_state_attr.attr,
NULL
};
- struct bin_attribute *bin_attrs[] = {
+ const struct bin_attribute *bin_attrs[] = {
NULL,
NULL,
};
int r;
+ group.attrs = attrs;
+
/* add features entry */
con->features_attr = dev_attr_features;
- group.attrs = attrs;
sysfs_attr_init(attrs[0]);
+ /* add version entry */
+ con->version_attr = dev_attr_version;
+ sysfs_attr_init(attrs[1]);
+
+ /* add schema entry */
+ con->schema_attr = dev_attr_schema;
+ sysfs_attr_init(attrs[2]);
+
+ /* add event_state entry */
+ con->event_state_attr = dev_attr_event_state;
+ sysfs_attr_init(attrs[3]);
+
if (amdgpu_bad_page_threshold != 0) {
/* add bad_page_features entry */
- bin_attr_gpu_vram_bad_pages.private = NULL;
con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+ sysfs_bin_attr_init(&con->badpages_attr);
bin_attrs[0] = &con->badpages_attr;
group.bin_attrs = bin_attrs;
- sysfs_bin_attr_init(bin_attrs[0]);
}
r = sysfs_create_group(&adev->dev->kobj, &group);
@@ -1645,6 +2230,16 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
/* Fatal error events are handled on host side */
if (amdgpu_sriov_vf(adev))
return;
+ /*
+ * If the current interrupt is caused by a non-fatal RAS error, skip
+ * check for fatal error. For fatal errors, FED status of all devices
+ * in XGMI hive gets set when the first device gets fatal error
+ * interrupt. The error gets propagated to other devices as well, so
+ * make sure to ack the interrupt regardless of FED status.
+ */
+ if (!amdgpu_ras_get_fed_status(adev) &&
+ amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
+ return;
if (adev->nbio.ras &&
adev->nbio.ras->handle_ras_controller_intr_no_bifring)
@@ -1662,10 +2257,19 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
struct amdgpu_device *adev = obj->adev;
struct amdgpu_ras_block_object *block_obj =
amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int ret;
- if (!block_obj)
+ if (!block_obj || !con)
return;
+ ret = amdgpu_ras_mark_ras_event(adev, type);
+ if (ret)
+ return;
+
+ amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block);
/* both query_poison_status and handle_poison_consumption are optional,
* but at least one of them should be implemented if we need poison
* consumption handler
@@ -1681,41 +2285,69 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
}
}
- amdgpu_umc_poison_handler(adev, false);
+ amdgpu_umc_poison_handler(adev, obj->head.block, 0);
if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
- /* gpu reset is fallback for failed and default cases */
- if (poison_stat) {
- dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
- block_obj->ras_comm.name);
+ /* gpu reset is fallback for failed and default cases.
+ * For RMA case, amdgpu_umc_poison_handler will handle gpu reset.
+ */
+ if (poison_stat && !amdgpu_ras_is_rma(adev)) {
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id,
+ "GPU reset for %s RAS poison consumption is issued!\n",
+ block_obj->ras_comm.name);
amdgpu_ras_reset_gpu(adev);
- } else {
- amdgpu_gfx_poison_consumption_handler(adev, entry);
}
+
+ if (!poison_stat)
+ amdgpu_gfx_poison_consumption_handler(adev, entry);
}
static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
struct amdgpu_iv_entry *entry)
{
- dev_info(obj->adev->dev,
- "Poison is created, no user action is needed.\n");
+ struct amdgpu_device *adev = obj->adev;
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+ u64 event_id;
+ int ret;
+
+ ret = amdgpu_ras_mark_ras_event(adev, type);
+ if (ret)
+ return;
+
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id, "Poison is created\n");
+
+ if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
+
+ atomic_inc(&con->page_retirement_req_cnt);
+ atomic_inc(&con->poison_creation_count);
+
+ wake_up(&con->page_retirement_wq);
+ }
}
static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
struct amdgpu_iv_entry *entry)
{
struct ras_ih_data *data = &obj->ih_data;
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_err_data err_data;
int ret;
if (!data->cb)
return;
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return;
+
/* Let IP handle its data, maybe we need get the output
* from the callback to update the error type/count, etc
*/
+ amdgpu_ras_set_fed(obj->adev, true);
ret = data->cb(obj->adev, &err_data, entry);
/* ue will trigger an interrupt, and in that case
* we need do a reset to recovery the whole system.
@@ -1728,7 +2360,10 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
*/
obj->err_data.ue_count += err_data.ue_count;
obj->err_data.ce_count += err_data.ce_count;
+ obj->err_data.de_count += err_data.de_count;
}
+
+ amdgpu_ras_error_data_fini(&err_data);
}
static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
@@ -1773,12 +2408,15 @@ static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info)
{
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
- struct ras_ih_data *data = &obj->ih_data;
+ struct ras_manager *obj;
+ struct ras_ih_data *data;
+ obj = amdgpu_ras_find_obj(adev, &info->head);
if (!obj)
return -EINVAL;
+ data = &obj->ih_data;
+
if (data->inuse == 0)
return 0;
@@ -1875,7 +2513,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
/* ih end */
/* traversal all IPs except NBIO to query error counter */
-static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
+static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev, enum ras_event_type type)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
@@ -1904,14 +2542,18 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
* should be removed until smu fix handle ecc_info table.
*/
if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
- (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))
+ (amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 2)))
continue;
- amdgpu_ras_query_error_status(adev, &info);
+ amdgpu_ras_query_error_status_with_event(adev, &info, type);
- if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
- adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) &&
- adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) {
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(11, 0, 2) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(11, 0, 4) &&
+ amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+ IP_VERSION(13, 0, 0)) {
if (amdgpu_ras_reset_error_status(adev, info.head.block))
dev_warn(adev->dev, "Failed to reset error counter and error status");
}
@@ -1987,32 +2629,78 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
goto out;
}
- *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
+ *bps = kmalloc_array(data->count, sizeof(struct ras_badpage), GFP_KERNEL);
if (!*bps) {
ret = -ENOMEM;
goto out;
}
for (; i < data->count; i++) {
+ if (!data->bps[i].ts)
+ continue;
+
(*bps)[i] = (struct ras_badpage){
.bp = data->bps[i].retired_page,
.size = AMDGPU_GPU_PAGE_SIZE,
.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
};
+
+ if (amdgpu_ras_check_critical_address(adev,
+ data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ continue;
+
status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
- data->bps[i].retired_page);
+ data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT);
if (status == -EBUSY)
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
else if (status == -ENOENT)
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
}
- *count = data->count;
+ *count = con->bad_page_num;
out:
mutex_unlock(&con->recovery_lock);
return ret;
}
+static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive, bool status)
+{
+ struct amdgpu_device *tmp_adev;
+
+ if (hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ amdgpu_ras_set_fed(tmp_adev, status);
+ } else {
+ amdgpu_ras_set_fed(adev, status);
+ }
+}
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+
+ if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+ return true;
+
+ return false;
+}
+
+static enum ras_event_type amdgpu_ras_get_fatal_error_event(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_intr_triggered())
+ return RAS_EVENT_TYPE_FATAL;
+ else
+ return RAS_EVENT_TYPE_POISON_CONSUMPTION;
+}
+
static void amdgpu_ras_do_recovery(struct work_struct *work)
{
struct amdgpu_ras *ras =
@@ -2020,9 +2708,26 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_device *remote_adev = NULL;
struct amdgpu_device *adev = ras->adev;
struct list_head device_list, *device_list_handle = NULL;
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ unsigned int error_query_mode;
+ enum ras_event_type type;
+ if (hive) {
+ atomic_set(&hive->ras_recovery, 1);
+
+ /* If any device which is part of the hive received RAS fatal
+ * error interrupt, set fatal error status on all. This
+ * condition will need a recovery, and flag will be cleared
+ * as part of recovery.
+ */
+ list_for_each_entry(remote_adev, &hive->device_list,
+ gmc.xgmi.head)
+ if (amdgpu_ras_get_fed_status(remote_adev)) {
+ amdgpu_ras_set_fed_all(adev, hive, true);
+ break;
+ }
+ }
if (!ras->disable_ras_err_cnt_harvest) {
- struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
/* Build list of devices to query RAS related errors */
if (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -2033,13 +2738,20 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
device_list_handle = &device_list;
}
+ if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) {
+ if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) {
+ /* wait 500ms to ensure pmfw polling mca bank info done */
+ msleep(500);
+ }
+ }
+
+ type = amdgpu_ras_get_fatal_error_event(adev);
list_for_each_entry(remote_adev,
device_list_handle, gmc.xgmi.head) {
amdgpu_ras_query_err_status(remote_adev);
- amdgpu_ras_log_on_err_counter(remote_adev);
+ amdgpu_ras_log_on_err_counter(remote_adev, type);
}
- amdgpu_put_xgmi_hive(hive);
}
if (amdgpu_device_should_recover_gpu(ras->adev)) {
@@ -2048,6 +2760,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_RAS;
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
/* Perform full reset in fatal error mode */
if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
@@ -2074,6 +2788,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
atomic_set(&ras->in_recovery, 0);
+ if (hive) {
+ atomic_set(&hive->ras_recovery, 0);
+ amdgpu_put_xgmi_hive(hive);
+ }
}
/* alloc/realloc bps array */
@@ -2083,7 +2801,7 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
unsigned int old_space = data->count + data->space_left;
unsigned int new_space = old_space + pages;
unsigned int align_space = ALIGN(new_space, 512);
- void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+ void *bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL);
if (!bps) {
return -ENOMEM;
@@ -2100,43 +2818,269 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
return 0;
}
-/* it deal with vram only. */
-int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages)
+static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct ras_err_handler_data *data;
+ struct ta_ras_query_address_input addr_in;
+ uint32_t socket = 0;
int ret = 0;
- uint32_t i;
- if (!con || !con->eh_data || !bps || pages <= 0)
- return 0;
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
- mutex_lock(&con->recovery_lock);
- data = con->eh_data;
- if (!data)
- goto out;
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.socket_id = socket;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ /* tell RAS TA the node instance is not used */
+ addr_in.ma.node_inst = TA_RAS_INV_NODE;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+
+ return ret;
+}
+
+static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
+{
+ struct ta_ras_query_address_input addr_in;
+ uint32_t die_id, socket = 0;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
- for (i = 0; i < pages; i++) {
+ /* although die id is gotten from PA in nps1 mode, the id is
+ * fitable for any nps mode
+ */
+ if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa)
+ die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT);
+ else
+ return -EINVAL;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = die_id;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+ else
+ return -EINVAL;
+}
+
+static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int count)
+{
+ int j;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data = con->eh_data;
+
+ for (j = 0; j < count; j++) {
if (amdgpu_ras_check_bad_page_unlock(con,
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
+ data->count++;
+ data->space_left--;
continue;
+ }
if (!data->space_left &&
- amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
- ret = -ENOMEM;
- goto out;
+ amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+ return -ENOMEM;
}
- amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_GPU_PAGE_SIZE);
+ amdgpu_ras_reserve_page(adev, bps[j].retired_page);
- memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
+ memcpy(&data->bps[data->count], &(bps[j]),
+ sizeof(struct eeprom_table_record));
data->count++;
data->space_left--;
+ con->bad_page_num++;
+ }
+
+ return 0;
+}
+
+static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ int i = 0;
+ enum amdgpu_memory_partition save_nps;
+
+ save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+
+ /*old asics just have pa in eeprom*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ goto out;
+ }
+
+ for (i = 0; i < adev->umc.retire_unit; i++)
+ bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+ if (save_nps) {
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ err_data->err_addr[i].address = bps[0].address;
+ err_data->err_addr[i].mem_channel = bps[0].mem_channel;
+ err_data->err_addr[i].bank = bps[0].bank;
+ err_data->err_addr[i].err_type = bps[0].err_type;
+ err_data->err_addr[i].mcumc_id = bps[0].mcumc_id;
+ }
+ } else {
+ if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
+ return -EINVAL;
+ }
+ } else {
+ if (bps[0].address == 0) {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps[0].address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+ }
+
+ if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
+ if (nps == AMDGPU_NPS1_PARTITION_MODE)
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ else
+ return -EOPNOTSUPP;
+ }
}
+
out:
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit);
+}
+
+static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ int i = 0;
+ enum amdgpu_memory_partition save_nps;
+
+ save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+ bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ err_data->err_addr[i].address = bps->address;
+ err_data->err_addr[i].mem_channel = bps->mem_channel;
+ err_data->err_addr[i].bank = bps->bank;
+ err_data->err_addr[i].err_type = bps->err_type;
+ err_data->err_addr[i].mcumc_id = bps->mcumc_id;
+ }
+ } else {
+ if (bps->address) {
+ if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+ return -EINVAL;
+ } else {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps->address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+
+ if (amdgpu_ras_mca2pa(adev, bps, err_data))
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
+ adev->umc.retire_unit);
+}
+
+/* it deal with vram only. */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages, bool from_rom)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_data err_data;
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras_context.ras->eeprom_control;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (!con || !con->eh_data || !bps || pages <= 0)
+ return 0;
+
+ if (from_rom) {
+ err_data.err_addr =
+ kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n");
+ return -ENOMEM;
+ }
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ }
+
+ mutex_lock(&con->recovery_lock);
+
+ if (from_rom) {
+ /* there is no pa recs in V3, so skip pa recs processing */
+ if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+ for (i = 0; i < pages; i++) {
+ if (control->ras_num_recs - i >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ /* deal with retire_unit records a time */
+ ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ con->bad_page_num -= adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ for (; i < pages; i++) {
+ ret = __amdgpu_ras_convert_rec_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ con->bad_page_num -= adev->umc.retire_unit;
+ }
+
+ con->eh_data->count_saved = con->eh_data->count;
+ } else {
+ ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
+ }
+
+ if (from_rom)
+ kfree(err_data.err_addr);
mutex_unlock(&con->recovery_lock);
return ret;
@@ -2153,7 +3097,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
- int save_count;
+ int save_count, unit_num, i;
if (!con || !con->eh_data) {
if (new_cnt)
@@ -2162,25 +3106,47 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
return 0;
}
+ if (!con->eeprom_control.is_eeprom_valid) {
+ dev_warn(adev->dev,
+ "Failed to save EEPROM table data because of EEPROM data corruption!");
+ if (new_cnt)
+ *new_cnt = 0;
+
+ return 0;
+ }
+
mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- save_count = data->count - control->ras_num_recs;
+ unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs;
+ save_count = con->bad_page_num - control->ras_num_bad_pages;
mutex_unlock(&con->recovery_lock);
if (new_cnt)
- *new_cnt = save_count / adev->umc.retire_unit;
+ *new_cnt = unit_num;
/* only new entries are saved */
- if (save_count > 0) {
- if (amdgpu_ras_eeprom_append(control,
- &data->bps[control->ras_num_recs],
- save_count)) {
- dev_err(adev->dev, "Failed to save EEPROM table data!");
- return -EIO;
+ if (unit_num > 0) {
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[data->count_saved], unit_num)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ } else {
+ for (i = 0; i < unit_num; i++) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[data->count_saved +
+ i * adev->umc.retire_unit], 1)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ }
}
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
+ data->count_saved = data->count;
}
return 0;
@@ -2195,7 +3161,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
struct amdgpu_ras_eeprom_control *control =
&adev->psp.ras_context.ras->eeprom_control;
struct eeprom_table_record *bps;
- int ret;
+ int ret, i = 0;
/* no bad page record, skip eeprom access */
if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
@@ -2206,27 +3172,71 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
return -ENOMEM;
ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
- if (ret)
+ if (ret) {
dev_err(adev->dev, "Failed to load EEPROM table records!");
- else
- ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
+ } else {
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ /*In V3, there is no pa recs, and some cases(when address==0) may be parsed
+ as pa recs, so add verion check to avoid it.
+ */
+ if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+ for (i = 0; i < control->ras_num_recs; i++) {
+ if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ control->ras_num_pa_recs += adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ control->ras_num_mca_recs +=
+ (control->ras_num_recs - i);
+ break;
+ }
+ } else {
+ control->ras_num_mca_recs += (control->ras_num_recs - i);
+ break;
+ }
+ }
+ } else {
+ control->ras_num_mca_recs = control->ras_num_recs;
+ }
+ }
+
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
+ if (ret)
+ goto out;
+ ret = amdgpu_ras_eeprom_check(control);
+ if (ret)
+ goto out;
+
+ /* HW not usable */
+ if (amdgpu_ras_is_rma(adev))
+ ret = -EHWPOISON;
+ }
+
+out:
kfree(bps);
return ret;
}
-static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr)
{
struct ras_err_handler_data *data = con->eh_data;
+ struct amdgpu_device *adev = con->adev;
int i;
+ if ((addr >= adev->gmc.mc_vram_size &&
+ adev->gmc.mc_vram_size) ||
+ (addr >= RAS_UMC_INJECT_ADDR_LIMIT))
+ return -EINVAL;
+
addr >>= AMDGPU_GPU_PAGE_SHIFT;
for (i = 0; i < data->count; i++)
if (addr == data->bps[i].retired_page)
- return true;
+ return 1;
- return false;
+ return 0;
}
/*
@@ -2234,11 +3244,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
*
* Note: this check is only for umc block
*/
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- bool ret = false;
+ int ret = 0;
if (!con || !con->eh_data)
return ret;
@@ -2255,43 +3265,374 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
/*
- * Justification of value bad_page_cnt_threshold in ras structure
- *
- * Generally, 0 <= amdgpu_bad_page_threshold <= max record length
- * in eeprom or amdgpu_bad_page_threshold == -2, introduce two
- * scenarios accordingly.
- *
- * Bad page retirement enablement:
- * - If amdgpu_bad_page_threshold = -2,
- * bad_page_cnt_threshold = typical value by formula.
- *
- * - When the value from user is 0 < amdgpu_bad_page_threshold <
- * max record length in eeprom, use it directly.
- *
- * Bad page retirement disablement:
- * - If amdgpu_bad_page_threshold = 0, bad page retirement
- * functionality is disabled, and bad_page_cnt_threshold will
- * take no effect.
+ * amdgpu_bad_page_threshold is used to config
+ * the threshold for the number of bad pages.
+ * -1: Threshold is set to default value
+ * Driver will issue a warning message when threshold is reached
+ * and continue runtime services.
+ * 0: Disable bad page retirement
+ * Driver will not retire bad pages
+ * which is intended for debugging purpose.
+ * -2: Threshold is determined by a formula
+ * that assumes 1 bad page per 100M of local memory.
+ * Driver will continue runtime services when threhold is reached.
+ * 0 < threshold < max number of bad page records in EEPROM,
+ * A user-defined threshold is set
+ * Driver will halt runtime services when this custom threshold is reached.
*/
-
- if (amdgpu_bad_page_threshold < 0) {
+ if (amdgpu_bad_page_threshold == -2) {
u64 val = adev->gmc.mc_vram_size;
do_div(val, RAS_BAD_PAGE_COVER);
con->bad_page_cnt_threshold = min(lower_32_bits(val),
max_count);
+ } else if (amdgpu_bad_page_threshold == -1) {
+ con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4;
} else {
con->bad_page_cnt_threshold = min_t(int, max_count,
amdgpu_bad_page_threshold);
}
}
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ int ret = 0;
+ struct ras_poison_msg poison_msg;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ memset(&poison_msg, 0, sizeof(poison_msg));
+ poison_msg.block = block;
+ poison_msg.pasid = pasid;
+ poison_msg.reset = reset;
+ poison_msg.pasid_fn = pasid_fn;
+ poison_msg.data = data;
+
+ ret = kfifo_put(&con->poison_fifo, poison_msg);
+ if (!ret) {
+ dev_err(adev->dev, "Poison message fifo is full!\n");
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
+ struct ras_poison_msg *poison_msg)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ return kfifo_get(&con->poison_fifo, poison_msg);
+}
+
+static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
+{
+ mutex_init(&ecc_log->lock);
+
+ INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+}
+
+static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ras_ecc_err *ecc_err;
+
+ mutex_lock(&ecc_log->lock);
+ radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
+ ecc_err = radix_tree_deref_slot(slot);
+ kfree(ecc_err->err_pages.pfn);
+ kfree(ecc_err);
+ radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
+ }
+ mutex_unlock(&ecc_log->lock);
+
+ mutex_destroy(&ecc_log->lock);
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+}
+
+static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con,
+ uint32_t delayed_ms)
+{
+ int ret;
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ ret = radix_tree_tagged(&con->umc_ecc_log.de_page_tree,
+ UMC_ECC_NEW_DETECTED_TAG);
+ mutex_unlock(&con->umc_ecc_log.lock);
+
+ if (ret)
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(delayed_ms));
+
+ return ret ? true : false;
+}
+
+static void amdgpu_ras_do_page_retirement(struct work_struct *work)
+{
+ struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+ page_retirement_dwork.work);
+ struct amdgpu_device *adev = con->adev;
+ struct ras_err_data err_data;
+
+ /* If gpu reset is ongoing, delay retiring the bad pages */
+ if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) {
+ amdgpu_ras_schedule_retirement_dwork(con,
+ AMDGPU_RAS_RETIRE_PAGE_INTERVAL * 3);
+ return;
+ }
+
+ amdgpu_ras_error_data_init(&err_data);
+
+ amdgpu_umc_handle_bad_pages(adev, &err_data);
+
+ amdgpu_ras_error_data_fini(&err_data);
+
+ amdgpu_ras_schedule_retirement_dwork(con,
+ AMDGPU_RAS_RETIRE_PAGE_INTERVAL);
+}
+
+static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+ uint32_t poison_creation_count)
+{
+ int ret = 0;
+ struct ras_ecc_log_info *ecc_log;
+ struct ras_query_if info;
+ u32 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ u64 de_queried_count;
+ u64 consumption_q_count;
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+
+ memset(&info, 0, sizeof(info));
+ info.head.block = AMDGPU_RAS_BLOCK__UMC;
+
+ ecc_log = &ras->umc_ecc_log;
+ ecc_log->de_queried_count = 0;
+ ecc_log->consumption_q_count = 0;
+
+ do {
+ ret = amdgpu_ras_query_error_status_with_event(adev, &info, type);
+ if (ret)
+ return ret;
+
+ de_queried_count = ecc_log->de_queried_count;
+ consumption_q_count = ecc_log->consumption_q_count;
+
+ if (de_queried_count && consumption_q_count)
+ break;
+
+ msleep(100);
+ } while (--timeout);
+
+ if (de_queried_count)
+ schedule_delayed_work(&ras->page_retirement_dwork, 0);
+
+ if (amdgpu_ras_is_rma(adev) && atomic_cmpxchg(&ras->rma_in_recovery, 0, 1) == 0)
+ amdgpu_ras_reset_gpu(adev);
+
+ return 0;
+}
+
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_poison_msg msg;
+ int ret;
+
+ do {
+ ret = kfifo_get(&con->poison_fifo, &msg);
+ } while (ret);
+}
+
+static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ uint32_t msg_count, uint32_t *gpu_reset)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t reset_flags = 0, reset = 0;
+ struct ras_poison_msg msg;
+ int ret, i;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ for (i = 0; i < msg_count; i++) {
+ ret = amdgpu_ras_get_poison_req(adev, &msg);
+ if (!ret)
+ continue;
+
+ if (msg.pasid_fn)
+ msg.pasid_fn(adev, msg.pasid, msg.data);
+
+ reset_flags |= msg.reset;
+ }
+
+ /*
+ * Try to ensure poison creation handler is completed first
+ * to set rma if bad page exceed threshold.
+ */
+ flush_delayed_work(&con->page_retirement_dwork);
+
+ /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
+ if (reset_flags && !amdgpu_ras_is_rma(adev)) {
+ if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ else
+ reset = reset_flags;
+
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+
+ *gpu_reset = reset;
+
+ /* Wait for gpu recovery to complete */
+ flush_work(&con->recovery_work);
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_page_retirement_thread(void *param)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)param;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t poison_creation_count, msg_count;
+ uint32_t gpu_reset;
+ int ret;
+
+ while (!kthread_should_stop()) {
+
+ wait_event_interruptible(con->page_retirement_wq,
+ kthread_should_stop() ||
+ atomic_read(&con->page_retirement_req_cnt));
+
+ if (kthread_should_stop())
+ break;
+
+ mutex_lock(&con->poison_lock);
+ gpu_reset = 0;
+
+ do {
+ poison_creation_count = atomic_read(&con->poison_creation_count);
+ ret = amdgpu_ras_poison_creation_handler(adev, poison_creation_count);
+ if (ret == -EIO)
+ break;
+
+ if (poison_creation_count) {
+ atomic_sub(poison_creation_count, &con->poison_creation_count);
+ atomic_sub(poison_creation_count, &con->page_retirement_req_cnt);
+ }
+ } while (atomic_read(&con->poison_creation_count) &&
+ !atomic_read(&con->poison_consumption_count));
+
+ if (ret != -EIO) {
+ msg_count = kfifo_len(&con->poison_fifo);
+ if (msg_count) {
+ ret = amdgpu_ras_poison_consumption_handler(adev,
+ msg_count, &gpu_reset);
+ if ((ret != -EIO) &&
+ (gpu_reset != AMDGPU_RAS_GPU_RESET_MODE1_RESET))
+ atomic_sub(msg_count, &con->page_retirement_req_cnt);
+ }
+ }
+
+ if ((ret == -EIO) || (gpu_reset == AMDGPU_RAS_GPU_RESET_MODE1_RESET)) {
+ /* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */
+ /* Clear poison creation request */
+ atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
+
+ /* Clear poison fifo */
+ amdgpu_ras_clear_poison_fifo(adev);
+
+ /* Clear all poison requests */
+ atomic_set(&con->page_retirement_req_cnt, 0);
+
+ if (ret == -EIO) {
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ /* Wake up work to save bad pages to eeprom */
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+ } else if (gpu_reset) {
+ /* gpu just completed mode-2 reset or other reset */
+ /* Clear poison consumption messages cached in fifo */
+ msg_count = kfifo_len(&con->poison_fifo);
+ if (msg_count) {
+ amdgpu_ras_clear_poison_fifo(adev);
+ atomic_sub(msg_count, &con->page_retirement_req_cnt);
+ }
+
+ atomic_set(&con->poison_consumption_count, 0);
+
+ /* Wake up work to save bad pages to eeprom */
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+ }
+ mutex_unlock(&con->poison_lock);
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
+ int ret;
+
+ if (!con || amdgpu_sriov_vf(adev))
+ return 0;
+
+ control = &con->eeprom_control;
+ ret = amdgpu_ras_eeprom_init(control);
+ control->is_eeprom_valid = !ret;
+
+ if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
+ control->ras_num_pa_recs = control->ras_num_recs;
+
+ if (adev->umc.ras &&
+ adev->umc.ras->get_retire_flip_bits)
+ adev->umc.ras->get_retire_flip_bits(adev);
+
+ if (control->ras_num_recs && control->is_eeprom_valid) {
+ ret = amdgpu_ras_load_bad_pages(adev);
+ if (ret) {
+ control->is_eeprom_valid = false;
+ return 0;
+ }
+
+ amdgpu_dpm_send_hbm_bad_pages_num(
+ adev, control->ras_num_bad_pages);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(
+ adev, control->bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+
+ /* The format action is only applied to new ASICs */
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 &&
+ control->tbl_hdr.version < RAS_TABLE_VER_V3)
+ if (!amdgpu_ras_eeprom_reset_table(control))
+ if (amdgpu_ras_save_bad_pages(adev, NULL))
+ dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n");
+ }
+
+ return 0;
+}
+
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data **data;
u32 max_eeprom_records_count = 0;
- bool exc_err_limit = false;
int ret;
if (!con || amdgpu_sriov_vf(adev))
@@ -2308,47 +3649,44 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
return 0;
data = &con->eh_data;
- *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+ *data = kzalloc(sizeof(**data), GFP_KERNEL);
if (!*data) {
ret = -ENOMEM;
goto out;
}
mutex_init(&con->recovery_lock);
+ mutex_init(&con->poison_lock);
INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
atomic_set(&con->in_recovery, 0);
+ atomic_set(&con->rma_in_recovery, 0);
con->eeprom_control.bad_channel_bitmap = 0;
max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
- /* Todo: During test the SMU might fail to read the eeprom through I2C
- * when the GPU is pending on XGMI reset during probe time
- * (Mostly after second bus reset), skip it now
- */
- if (adev->gmc.xgmi.pending_reset)
- return 0;
- ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
- /*
- * This calling fails when exc_err_limit is true or
- * ret != 0.
- */
- if (exc_err_limit || ret)
- goto free;
-
- if (con->eeprom_control.ras_num_recs) {
- ret = amdgpu_ras_load_bad_pages(adev);
+ if (init_bp_info) {
+ ret = amdgpu_ras_init_badpage_info(adev);
if (ret)
goto free;
+ }
- amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
-
- if (con->update_channel_flag == true) {
- amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
- con->update_channel_flag = false;
- }
+ mutex_init(&con->page_rsv_lock);
+ INIT_KFIFO(con->poison_fifo);
+ mutex_init(&con->page_retirement_lock);
+ init_waitqueue_head(&con->page_retirement_wq);
+ atomic_set(&con->page_retirement_req_cnt, 0);
+ atomic_set(&con->poison_creation_count, 0);
+ atomic_set(&con->poison_consumption_count, 0);
+ con->page_retirement_thread =
+ kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
+ if (IS_ERR(con->page_retirement_thread)) {
+ con->page_retirement_thread = NULL;
+ dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
}
+ INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement);
+ amdgpu_ras_ecc_log_init(&con->umc_ecc_log);
#ifdef CONFIG_X86_MCE_AMD
if ((adev->asic_type == CHIP_ALDEBARAN) &&
(adev->gmc.xgmi.connected_to_cpu))
@@ -2367,7 +3705,7 @@ out:
* Except error threshold exceeding case, other failure cases in this
* function would not fail amdgpu driver init.
*/
- if (!exc_err_limit)
+ if (!amdgpu_ras_is_rma(adev))
ret = 0;
else
ret = -EINVAL;
@@ -2379,19 +3717,41 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data = con->eh_data;
+ int max_flush_timeout = MAX_FLUSH_RETIRE_DWORK_TIMES;
+ bool ret;
/* recovery_init failed to init it, fini is useless */
if (!data)
return 0;
+ /* Save all cached bad pages to eeprom */
+ do {
+ flush_delayed_work(&con->page_retirement_dwork);
+ ret = amdgpu_ras_schedule_retirement_dwork(con, 0);
+ } while (ret && max_flush_timeout--);
+
+ if (con->page_retirement_thread)
+ kthread_stop(con->page_retirement_thread);
+
+ atomic_set(&con->page_retirement_req_cnt, 0);
+ atomic_set(&con->poison_creation_count, 0);
+
+ mutex_destroy(&con->page_rsv_lock);
+
cancel_work_sync(&con->recovery_work);
+ cancel_delayed_work_sync(&con->page_retirement_dwork);
+
+ amdgpu_ras_ecc_log_fini(&con->umc_ecc_log);
+
mutex_lock(&con->recovery_lock);
con->eh_data = NULL;
kfree(data->bps);
kfree(data);
mutex_unlock(&con->recovery_lock);
+ amdgpu_ras_critical_region_init(adev);
+
return 0;
}
/* recovery end */
@@ -2399,9 +3759,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
return true;
default:
return false;
@@ -2409,10 +3771,13 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
}
if (adev->asic_type == CHIP_IP_DISCOVERY) {
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(14, 0, 3):
return true;
default:
return false;
@@ -2445,6 +3810,88 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
}
+/* Query ras capablity via atomfirmware interface */
+static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev)
+{
+ /* mem_ecc cap */
+ if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+ dev_info(adev->dev, "MEM ECC is active.\n");
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ } else {
+ dev_info(adev->dev, "MEM ECC is not presented.\n");
+ }
+
+ /* sram_ecc cap */
+ if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+ dev_info(adev->dev, "SRAM ECC is active.\n");
+ if (!amdgpu_sriov_vf(adev))
+ adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
+ else
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
+ 1 << AMDGPU_RAS_BLOCK__SDMA |
+ 1 << AMDGPU_RAS_BLOCK__GFX);
+
+ /*
+ * VCN/JPEG RAS can be supported on both bare metal and
+ * SRIOV environment
+ */
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(5, 0, 1))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+ else
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+
+ /*
+ * XGMI RAS is not supported if xgmi num physical nodes
+ * is zero
+ */
+ if (!adev->gmc.xgmi.num_physical_nodes)
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ } else {
+ dev_info(adev->dev, "SRAM ECC is not presented.\n");
+ }
+}
+
+/* Query poison mode from umc/df IP callbacks */
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ bool df_poison, umc_poison;
+
+ /* poison setting is useless on SRIOV guest */
+ if (amdgpu_sriov_vf(adev) || !con)
+ return;
+
+ /* Init poison supported flag, the default value is false */
+ if (adev->gmc.xgmi.connected_to_cpu ||
+ adev->gmc.is_app_apu) {
+ /* enabled by default when GPU is connected to CPU */
+ con->poison_supported = true;
+ } else if (adev->df.funcs &&
+ adev->df.funcs->query_ras_poison_mode &&
+ adev->umc.ras &&
+ adev->umc.ras->query_ras_poison_mode) {
+ df_poison =
+ adev->df.funcs->query_ras_poison_mode(adev);
+ umc_poison =
+ adev->umc.ras->query_ras_poison_mode(adev);
+
+ /* Only poison is set in both DF and UMC, we can support it */
+ if (df_poison && umc_poison)
+ con->poison_supported = true;
+ else if (df_poison != umc_poison)
+ dev_warn(adev->dev,
+ "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+ df_poison, umc_poison);
+ }
+}
+
/*
* check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and
@@ -2461,45 +3908,18 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
if (!amdgpu_ras_asic_supported(adev))
return;
- if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
- if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
- dev_info(adev->dev, "MEM ECC is active.\n");
- adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
- } else {
- dev_info(adev->dev, "MEM ECC is not presented.\n");
- }
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_get_ras_capability(adev))
+ goto init_ras_enabled_flag;
+ }
- if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
- dev_info(adev->dev, "SRAM ECC is active.\n");
- if (!amdgpu_sriov_vf(adev))
- adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
- else
- adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
- 1 << AMDGPU_RAS_BLOCK__SDMA |
- 1 << AMDGPU_RAS_BLOCK__GFX);
+ /* query ras capability from psp */
+ if (amdgpu_psp_get_ras_capability(&adev->psp))
+ goto init_ras_enabled_flag;
- /* VCN/JPEG RAS can be supported on both bare metal and
- * SRIOV environment
- */
- if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
- adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
- adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
- 1 << AMDGPU_RAS_BLOCK__JPEG);
- else
- adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
- 1 << AMDGPU_RAS_BLOCK__JPEG);
-
- /*
- * XGMI RAS is not supported if xgmi num physical nodes
- * is zero
- */
- if (!adev->gmc.xgmi.num_physical_nodes)
- adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
- } else {
- dev_info(adev->dev, "SRAM ECC is not presented.\n");
- }
+ /* query ras capablity from bios */
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+ amdgpu_ras_query_ras_capablity_from_vbios(adev);
} else {
/* driver only manages a few IP blocks RAS feature
* when GPU is connected cpu through XGMI */
@@ -2508,23 +3928,31 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
1 << AMDGPU_RAS_BLOCK__MMHUB);
}
+ /* apply asic specific settings (vega20 only for now) */
amdgpu_ras_get_quirks(adev);
+ /* query poison mode from umc/df ip callback */
+ amdgpu_ras_query_poison_mode(adev);
+
+init_ras_enabled_flag:
/* hw_supported needs to be aligned with RAS block mask. */
adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
+ adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
+ adev->ras_hw_enabled & amdgpu_ras_mask;
- /*
- * Disable ras feature for aqua vanjaram
- * by default on apu platform.
- */
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6) &&
- adev->gmc.is_app_apu)
- adev->ras_enabled = amdgpu_ras_enable != 1 ? 0 :
- adev->ras_hw_enabled & amdgpu_ras_mask;
- else
- adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
- adev->ras_hw_enabled & amdgpu_ras_mask;
+ /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->aca.is_enabled =
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+ }
+
+ /* bad page feature is not applicable to specific app platform */
+ if (adev->gmc.is_app_apu &&
+ amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0))
+ amdgpu_bad_page_threshold = 0;
}
static void amdgpu_ras_counte_dw(struct work_struct *work)
@@ -2552,35 +3980,68 @@ Out:
pm_runtime_put_autosuspend(dev->dev);
}
-static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- bool df_poison, umc_poison;
+ return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE |
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE |
+ AMDGPU_RAS_ERROR__PARITY;
+}
- /* poison setting is useless on SRIOV guest */
- if (amdgpu_sriov_vf(adev) || !con)
+static void ras_event_mgr_init(struct ras_event_manager *mgr)
+{
+ struct ras_event_state *event_state;
+ int i;
+
+ memset(mgr, 0, sizeof(*mgr));
+ atomic64_set(&mgr->seqno, 0);
+
+ for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) {
+ event_state = &mgr->event_state[i];
+ event_state->last_seqno = RAS_EVENT_INVALID_ID;
+ atomic64_set(&event_state->count, 0);
+ }
+}
+
+static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_hive_info *hive;
+
+ if (!ras)
return;
- /* Init poison supported flag, the default value is false */
- if (adev->gmc.xgmi.connected_to_cpu) {
- /* enabled by default when GPU is connected to CPU */
- con->poison_supported = true;
- } else if (adev->df.funcs &&
- adev->df.funcs->query_ras_poison_mode &&
- adev->umc.ras &&
- adev->umc.ras->query_ras_poison_mode) {
- df_poison =
- adev->df.funcs->query_ras_poison_mode(adev);
- umc_poison =
- adev->umc.ras->query_ras_poison_mode(adev);
+ hive = amdgpu_get_xgmi_hive(adev);
+ ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
- /* Only poison is set in both DF and UMC, we can support it */
- if (df_poison && umc_poison)
- con->poison_supported = true;
- else if (df_poison != umc_poison)
- dev_warn(adev->dev,
- "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
- df_poison, umc_poison);
+ /* init event manager with node 0 on xgmi system */
+ if (!amdgpu_reset_in_recovery(adev)) {
+ if (!hive || adev->gmc.xgmi.node_id == 0)
+ ras_event_mgr_init(ras->event_mgr);
+ }
+
+ if (hive)
+ amdgpu_put_xgmi_hive(hive);
+}
+
+static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con || (adev->flags & AMD_IS_APU))
+ return;
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT;
+ break;
+ case IP_VERSION(13, 0, 14):
+ con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1);
+ break;
+ default:
+ break;
}
}
@@ -2592,10 +4053,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (con)
return 0;
- con = kmalloc(sizeof(struct amdgpu_ras) +
+ con = kzalloc(sizeof(*con) +
sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
- GFP_KERNEL|__GFP_ZERO);
+ GFP_KERNEL);
if (!con)
return -ENOMEM;
@@ -2626,6 +4087,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
con->update_channel_flag = false;
con->features = 0;
+ con->schema = 0;
INIT_LIST_HEAD(&con->head);
/* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS;
@@ -2633,7 +4095,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* initialize nbio ras function ahead of any other
* ras functions so hardware fatal error interrupt
* can be enabled as early as possible */
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 4, 0):
case IP_VERSION(7, 4, 1):
case IP_VERSION(7, 4, 4):
@@ -2650,7 +4112,19 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
* check DF RAS */
adev->nbio.ras = &nbio_v4_3_ras;
break;
+ case IP_VERSION(6, 3, 1):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbif v6_3_1 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS
+ */
+ adev->nbio.ras = &nbif_v6_3_1_ras;
+ break;
case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
if (!adev->gmc.is_app_apu)
adev->nbio.ras = &nbio_v7_9_ras;
break;
@@ -2679,13 +4153,37 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con;
}
- amdgpu_ras_query_poison_mode(adev);
+ /* Packed socket_id to ras feature mask bits[31:29] */
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ con->features |= ((adev->smuio.funcs->get_socket_id(adev)) <<
+ AMDGPU_RAS_FEATURES_SOCKETID_SHIFT);
+
+ /* Get RAS schema for particular SOC */
+ con->schema = amdgpu_get_ras_schema(adev);
+
+ amdgpu_ras_init_reserved_vram_size(adev);
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
goto release_con;
}
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ r = amdgpu_aca_init(adev);
+ else
+ r = amdgpu_mca_init(adev);
+ if (r)
+ goto release_con;
+ }
+
+ con->init_task_pid = task_pid_nr(current);
+ get_task_comm(con->init_task_comm, current);
+
+ mutex_init(&con->critical_region_lock);
+ INIT_LIST_HEAD(&con->critical_region_head);
+
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
adev->ras_hw_enabled, adev->ras_enabled);
@@ -2753,7 +4251,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
if (r) {
- if (adev->in_suspend || amdgpu_in_reset(adev)) {
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) {
/* in resume phase, if fail to enable ras,
* clean up all ras fs nodes, and disable ras */
goto cleanup;
@@ -2765,7 +4263,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
amdgpu_persistent_edc_harvesting(adev, ras_block);
/* in resume phase, no need to create ras fs node */
- if (adev->in_suspend || amdgpu_in_reset(adev))
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
return 0;
ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
@@ -2882,7 +4380,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
amdgpu_ras_disable_all_features(adev, 0);
/* Make sure all ras objects are disabled. */
- if (con->features)
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
amdgpu_ras_disable_all_features(adev, 1);
}
@@ -2892,17 +4390,40 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
struct amdgpu_ras_block_object *obj;
int r;
+ amdgpu_ras_event_mgr_init(adev);
+
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_reset_in_recovery(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ r = amdgpu_aca_reset(adev);
+ else
+ r = amdgpu_mca_reset(adev);
+ if (r)
+ return r;
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_ras_set_aca_debug_mode(adev, false);
+ else
+ amdgpu_ras_set_mca_debug_mode(adev, false);
+ }
+ }
+
/* Guest side doesn't need init ras feature */
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_telemetry_en(adev))
return 0;
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
- if (!node->ras_obj) {
+ obj = node->ras_obj;
+ if (!obj) {
dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
continue;
}
- obj = node->ras_obj;
+ if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block))
+ continue;
+
if (obj->ras_late_init) {
r = obj->ras_late_init(adev, &obj->ras_comm);
if (r) {
@@ -2927,7 +4448,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
/* Need disable ras on all IPs here before ip [hw/sw]fini */
- if (con->features)
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
amdgpu_ras_disable_all_features(adev, 0);
amdgpu_ras_recovery_fini(adev);
return 0;
@@ -2942,6 +4463,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
if (!adev->ras_enabled || !con)
return 0;
+ amdgpu_ras_critical_region_fini(adev);
+ mutex_destroy(&con->critical_region_lock);
+
list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
if (ras_node->ras_obj) {
obj = ras_node->ras_obj;
@@ -2960,10 +4484,17 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
amdgpu_ras_fs_fini(adev);
amdgpu_ras_interrupt_remove_all(adev);
- WARN(con->features, "Feature mask is not cleared");
+ if (amdgpu_ras_aca_is_supported(adev)) {
+ if (amdgpu_aca_is_enabled(adev))
+ amdgpu_aca_fini(adev);
+ else
+ amdgpu_mca_fini(adev);
+ }
+
+ WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
- if (con->features)
- amdgpu_ras_disable_all_features(adev, 1);
+ if (AMDGPU_RAS_GET_FEATURES(con->features))
+ amdgpu_ras_disable_all_features(adev, 0);
cancel_delayed_work_sync(&con->ras_counte_delay_work);
@@ -2973,14 +4504,155 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
return 0;
}
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (!ras)
+ return false;
+
+ return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+}
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (status)
+ set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ else
+ clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ }
+}
+
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ ras->ras_err_state = 0;
+ ras->gpu_reset_flags = 0;
+ }
+}
+
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras)
+ set_bit(block, &ras->ras_err_state);
+}
+
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (block == AMDGPU_RAS_BLOCK__ANY)
+ return (ras->ras_err_state != 0);
+ else
+ return test_bit(block, &ras->ras_err_state) ||
+ test_bit(AMDGPU_RAS_BLOCK__LAST,
+ &ras->ras_err_state);
+ }
+
+ return false;
+}
+
+static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (!ras)
+ return NULL;
+
+ return ras->event_mgr;
+}
+
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+ const void *caller)
+{
+ struct ras_event_manager *event_mgr;
+ struct ras_event_state *event_state;
+ int ret = 0;
+
+ if (type >= RAS_EVENT_TYPE_COUNT) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ event_mgr = __get_ras_event_mgr(adev);
+ if (!event_mgr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ event_state = &event_mgr->event_state[type];
+ event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno);
+ atomic64_inc(&event_state->count);
+
+out:
+ if (ret && caller)
+ dev_warn(adev->dev, "failed mark ras event (%d) in %ps, ret:%d\n",
+ (int)type, caller, ret);
+
+ return ret;
+}
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type)
+{
+ struct ras_event_manager *event_mgr;
+ u64 id;
+
+ if (type >= RAS_EVENT_TYPE_COUNT)
+ return RAS_EVENT_INVALID_ID;
+
+ switch (type) {
+ case RAS_EVENT_TYPE_FATAL:
+ case RAS_EVENT_TYPE_POISON_CREATION:
+ case RAS_EVENT_TYPE_POISON_CONSUMPTION:
+ event_mgr = __get_ras_event_mgr(adev);
+ if (!event_mgr)
+ return RAS_EVENT_INVALID_ID;
+
+ id = event_mgr->event_state[type].last_seqno;
+ break;
+ case RAS_EVENT_TYPE_INVALID:
+ default:
+ id = RAS_EVENT_INVALID_ID;
+ break;
+ }
+
+ return id;
+}
+
void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
{
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ enum ras_event_type type = RAS_EVENT_TYPE_FATAL;
+ u64 event_id;
+
+ if (amdgpu_ras_mark_ras_event(adev, type)) {
+ dev_err(adev->dev,
+ "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n");
+ return;
+ }
- dev_info(adev->dev, "uncorrectable hardware error"
- "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
+ RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
+ "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+
+ amdgpu_ras_set_fed(adev, true);
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
amdgpu_ras_reset_gpu(adev);
}
@@ -3153,6 +4825,7 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev,
block == AMDGPU_RAS_BLOCK__SDMA ||
block == AMDGPU_RAS_BLOCK__VCN ||
block == AMDGPU_RAS_BLOCK__JPEG) &&
+ (amdgpu_ras_mask & (1 << block)) &&
amdgpu_ras_is_poison_mode_supported(adev) &&
amdgpu_ras_get_ras_block(adev, block, 0))
ret = 1;
@@ -3164,11 +4837,106 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
+ /* mode1 is the only selection for RMA status */
+ if (amdgpu_ras_is_rma(adev)) {
+ ras->gpu_reset_flags = 0;
+ ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ }
+
+ if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+ /* In the case of multiple GPUs, after a GPU has started
+ * resetting all GPUs on hive, other GPUs do not need to
+ * trigger GPU reset again.
+ */
+ if (!hive_ras_recovery)
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ else
+ atomic_set(&ras->in_recovery, 0);
+ } else {
+ flush_work(&ras->recovery_work);
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ }
+
return 0;
}
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (con) {
+ ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+ if (!ret)
+ con->is_aca_debug_mode = enable;
+ }
+
+ return ret;
+}
+
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (con) {
+ if (amdgpu_aca_is_enabled(adev))
+ ret = amdgpu_aca_smu_set_debug_mode(adev, enable);
+ else
+ ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+ if (!ret)
+ con->is_aca_debug_mode = enable;
+ }
+
+ return ret;
+}
+
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+ if (!con)
+ return false;
+
+ if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) ||
+ (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode))
+ return con->is_aca_debug_mode;
+ else
+ return true;
+}
+
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+ unsigned int *error_query_mode)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+ const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+ if (!con) {
+ *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
+ return false;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ *error_query_mode = AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY;
+ } else if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) {
+ *error_query_mode =
+ (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
+ } else {
+ *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
+ }
+
+ return true;
+}
/* Register each ip ras block into amdgpu ras */
int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
@@ -3328,3 +5096,397 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
WREG32(err_status_hi_offset, 0);
}
}
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
+{
+ memset(err_data, 0, sizeof(*err_data));
+
+ INIT_LIST_HEAD(&err_data->err_node_list);
+
+ return 0;
+}
+
+static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
+{
+ if (!err_node)
+ return;
+
+ list_del(&err_node->node);
+ kvfree(err_node);
+}
+
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
+{
+ struct ras_err_node *err_node, *tmp;
+
+ list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
+ amdgpu_ras_error_node_release(err_node);
+}
+
+static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+ struct ras_err_node *err_node;
+ struct amdgpu_smuio_mcm_config_info *ref_id;
+
+ if (!err_data || !mcm_info)
+ return NULL;
+
+ for_each_ras_error(err_node, err_data) {
+ ref_id = &err_node->err_info.mcm_info;
+
+ if (mcm_info->socket_id == ref_id->socket_id &&
+ mcm_info->die_id == ref_id->die_id)
+ return err_node;
+ }
+
+ return NULL;
+}
+
+static struct ras_err_node *amdgpu_ras_error_node_new(void)
+{
+ struct ras_err_node *err_node;
+
+ err_node = kvzalloc(sizeof(*err_node), GFP_KERNEL);
+ if (!err_node)
+ return NULL;
+
+ INIT_LIST_HEAD(&err_node->node);
+
+ return err_node;
+}
+
+static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
+{
+ struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
+ struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
+ struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
+ struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
+
+ if (unlikely(infoa->socket_id != infob->socket_id))
+ return infoa->socket_id - infob->socket_id;
+ else
+ return infoa->die_id - infob->die_id;
+
+ return 0;
+}
+
+static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+ struct ras_err_node *err_node;
+
+ err_node = amdgpu_ras_error_find_node_by_id(err_data, mcm_info);
+ if (err_node)
+ return &err_node->err_info;
+
+ err_node = amdgpu_ras_error_node_new();
+ if (!err_node)
+ return NULL;
+
+ memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
+
+ err_data->err_list_count++;
+ list_add_tail(&err_node->node, &err_data->err_node_list);
+ list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
+
+ return &err_node->err_info;
+}
+
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->ue_count += count;
+ err_data->ue_count += count;
+
+ return 0;
+}
+
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->ce_count += count;
+ err_data->ce_count += count;
+
+ return 0;
+}
+
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count)
+{
+ struct ras_err_info *err_info;
+
+ if (!err_data || !mcm_info)
+ return -EINVAL;
+
+ if (!count)
+ return 0;
+
+ err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+ if (!err_info)
+ return -EINVAL;
+
+ err_info->de_count += count;
+ err_data->de_count += count;
+
+ return 0;
+}
+
+#define mmMP0_SMN_C2PMSG_92 0x1609C
+#define mmMP0_SMN_C2PMSG_126 0x160BE
+static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
+ u32 instance)
+{
+ u32 socket_id, aid_id, hbm_id;
+ u32 fw_status;
+ u32 boot_error;
+ u64 reg_addr;
+
+ /* The pattern for smn addressing in other SOC could be different from
+ * the one for aqua_vanjaram. We should revisit the code if the pattern
+ * is changed. In such case, replace the aqua_vanjaram implementation
+ * with more common helper */
+ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+ fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+ reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+ boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+ socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
+ aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
+ hbm_id = ((1 == AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error)) ? 0 : 1);
+
+ if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, memory training failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, firmware load failed at boot time\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, wafl link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, xgmi link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, usr cp link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, usr dp link training failed\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm memory test failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n",
+ socket_id, aid_id, hbm_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n",
+ socket_id, aid_id, fw_status);
+
+ if (AMDGPU_RAS_GPU_ERR_GENERIC(boot_error))
+ dev_info(adev->dev,
+ "socket: %d, aid: %d, fw_status: 0x%x, Boot Controller Generic Error\n",
+ socket_id, aid_id, fw_status);
+}
+
+static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
+ u32 instance)
+{
+ u64 reg_addr;
+ u32 reg_data;
+ int retry_loop;
+
+ reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+ aqua_vanjaram_encode_ext_smn_addressing(instance);
+
+ for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
+ reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+ if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS)
+ return false;
+ else
+ msleep(1);
+ }
+
+ return true;
+}
+
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
+{
+ u32 i;
+
+ for (i = 0; i < num_instances; i++) {
+ if (amdgpu_ras_boot_error_detected(adev, i))
+ amdgpu_ras_boot_time_error_reporting(adev, i);
+ }
+}
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
+ int ret = 0;
+
+ if (amdgpu_ras_check_critical_address(adev, start))
+ return 0;
+
+ mutex_lock(&con->page_rsv_lock);
+ ret = amdgpu_vram_mgr_query_page_status(mgr, start);
+ if (ret == -ENOENT)
+ ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE);
+ mutex_unlock(&con->page_rsv_lock);
+
+ return ret;
+}
+
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ if (RAS_EVENT_ID_IS_VALID(event_id))
+ dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf);
+ else
+ dev_printk(KERN_INFO, adev->dev, "%pV", &vaf);
+
+ va_end(args);
+}
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con)
+ return false;
+
+ return con->is_rma;
+}
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr_resource *vres;
+ struct ras_critical_region *region;
+ struct drm_buddy_block *block;
+ int ret = 0;
+
+ if (!bo || !bo->tbo.resource)
+ return -EINVAL;
+
+ vres = to_amdgpu_vram_mgr_resource(bo->tbo.resource);
+
+ mutex_lock(&con->critical_region_lock);
+
+ /* Check if the bo had been recorded */
+ list_for_each_entry(region, &con->critical_region_head, node)
+ if (region->bo == bo)
+ goto out;
+
+ /* Record new critical amdgpu bo */
+ list_for_each_entry(block, &vres->blocks, link) {
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ region->bo = bo;
+ region->start = amdgpu_vram_mgr_block_start(block);
+ region->size = amdgpu_vram_mgr_block_size(block);
+ list_add_tail(&region->node, &con->critical_region_head);
+ }
+
+out:
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev)
+{
+ amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory);
+}
+
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region, *tmp;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry_safe(region, tmp, &con->critical_region_head, node) {
+ list_del(&region->node);
+ kfree(region);
+ }
+ mutex_unlock(&con->critical_region_lock);
+}
+
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_critical_region *region;
+ bool ret = false;
+
+ mutex_lock(&con->critical_region_lock);
+ list_for_each_entry(region, &con->critical_region_head, node) {
+ if ((region->start <= addr) &&
+ (addr < (region->start + region->size))) {
+ ret = true;
+ break;
+ }
+ }
+ mutex_unlock(&con->critical_region_lock);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index ffb49b2d533a..6cf0dfd38be8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -26,11 +26,33 @@
#include <linux/debugfs.h>
#include <linux/list.h>
+#include <linux/kfifo.h>
+#include <linux/radix-tree.h>
#include "ta_ras_if.h"
#include "amdgpu_ras_eeprom.h"
+#include "amdgpu_smuio.h"
+#include "amdgpu_aca.h"
struct amdgpu_iv_entry;
+#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0)
+#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1)
+#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2)
+#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 3, 3)
+#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 4, 4)
+#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 5, 5)
+#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x) AMDGPU_GET_REG_FIELD(x, 6, 6)
+#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7)
+#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8)
+#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11)
+#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13)
+#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29)
+#define AMDGPU_RAS_GPU_ERR_GENERIC(x) AMDGPU_GET_REG_FIELD(x, 30, 30)
+
+#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100
+#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
+#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF
+
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
/* position of instance value in sub_block_index of
* ta_ras_trigger_error_input, the sub block uses lower 12 bits
@@ -38,6 +60,24 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_INST_MASK 0xfffff000
#define AMDGPU_RAS_INST_SHIFT 0xc
+#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
+#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
+
+/* Reserve 8 physical dram row for possible retirement.
+ * In worst cases, it will lose 8 * 2MB memory in vram domain */
+#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20)
+/* The high three bits indicates socketid */
+#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
+
+#define RAS_EVENT_INVALID_ID (BIT_ULL(63))
+#define RAS_EVENT_ID_IS_VALID(x) (!((x) & BIT_ULL(63)))
+
+#define RAS_EVENT_LOG(adev, id, fmt, ...) \
+ amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__)
+
+#define amdgpu_ras_mark_ras_event(adev, type) \
+ (amdgpu_ras_mark_ras_event_caller((adev), (type), __builtin_return_address(0)))
+
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
AMDGPU_RAS_BLOCK__SDMA,
@@ -56,8 +96,12 @@ enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__MCA,
AMDGPU_RAS_BLOCK__VCN,
AMDGPU_RAS_BLOCK__JPEG,
+ AMDGPU_RAS_BLOCK__IH,
+ AMDGPU_RAS_BLOCK__MPIO,
+ AMDGPU_RAS_BLOCK__MMSCH,
- AMDGPU_RAS_BLOCK__LAST
+ AMDGPU_RAS_BLOCK__LAST,
+ AMDGPU_RAS_BLOCK__ANY = -1
};
enum amdgpu_ras_mca_block {
@@ -319,6 +363,13 @@ enum amdgpu_ras_ret {
AMDGPU_RAS_PT,
};
+enum amdgpu_ras_error_query_mode {
+ AMDGPU_RAS_INVALID_ERROR_QUERY = 0,
+ AMDGPU_RAS_DIRECT_ERROR_QUERY = 1,
+ AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2,
+ AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY = 3,
+};
+
/* ras error status reisger fields */
#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0
#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L
@@ -385,13 +436,84 @@ struct umc_ecc_info {
int record_ce_addr_supported;
};
+enum ras_event_type {
+ RAS_EVENT_TYPE_INVALID = 0,
+ RAS_EVENT_TYPE_FATAL,
+ RAS_EVENT_TYPE_POISON_CREATION,
+ RAS_EVENT_TYPE_POISON_CONSUMPTION,
+ RAS_EVENT_TYPE_COUNT,
+};
+
+struct ras_event_state {
+ u64 last_seqno;
+ atomic64_t count;
+};
+
+struct ras_event_manager {
+ atomic64_t seqno;
+ struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT];
+};
+
+struct ras_event_id {
+ enum ras_event_type type;
+ u64 event_id;
+};
+
+struct ras_query_context {
+ struct ras_event_id evid;
+};
+
+typedef int (*pasid_notify)(struct amdgpu_device *adev,
+ uint16_t pasid, void *data);
+
+struct ras_poison_msg {
+ enum amdgpu_ras_block block;
+ uint16_t pasid;
+ uint32_t reset;
+ pasid_notify pasid_fn;
+ void *data;
+};
+
+struct ras_err_pages {
+ uint32_t count;
+ uint64_t *pfn;
+};
+
+struct ras_ecc_err {
+ uint64_t status;
+ uint64_t ipid;
+ uint64_t addr;
+ uint64_t pa_pfn;
+ /* save global channel index across all UMC instances */
+ uint32_t channel_idx;
+ struct ras_err_pages err_pages;
+};
+
+struct ras_ecc_log_info {
+ struct mutex lock;
+ struct radix_tree_root de_page_tree;
+ uint64_t de_queried_count;
+ uint64_t consumption_q_count;
+};
+
+struct ras_critical_region {
+ struct list_head node;
+ struct amdgpu_bo *bo;
+ uint64_t start;
+ uint64_t size;
+};
+
struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
uint32_t features;
+ uint32_t schema;
struct list_head head;
/* sysfs */
struct device_attribute features_attr;
+ struct device_attribute version_attr;
+ struct device_attribute schema_attr;
+ struct device_attribute event_state_attr;
struct bin_attribute badpages_attr;
struct dentry *de_ras_eeprom_table;
/* block array */
@@ -400,6 +522,7 @@ struct amdgpu_ras {
/* gpu recovery */
struct work_struct recovery_work;
atomic_t in_recovery;
+ atomic_t rma_in_recovery;
struct amdgpu_device *adev;
/* error handler data */
struct ras_err_handler_data *eh_data;
@@ -430,28 +553,82 @@ struct amdgpu_ras {
/* Indicates smu whether need update bad channel info */
bool update_channel_flag;
+ /* Record status of smu mca debug mode */
+ bool is_aca_debug_mode;
+ bool is_rma;
/* Record special requirements of gpu reset caller */
uint32_t gpu_reset_flags;
+
+ struct task_struct *page_retirement_thread;
+ wait_queue_head_t page_retirement_wq;
+ struct mutex page_retirement_lock;
+ atomic_t page_retirement_req_cnt;
+ atomic_t poison_creation_count;
+ atomic_t poison_consumption_count;
+ struct mutex page_rsv_lock;
+ DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
+ struct ras_ecc_log_info umc_ecc_log;
+ struct delayed_work page_retirement_dwork;
+
+ /* ras errors detected */
+ unsigned long ras_err_state;
+
+ /* RAS event manager */
+ struct ras_event_manager __event_mgr;
+ struct ras_event_manager *event_mgr;
+
+ uint64_t reserved_pages_in_bytes;
+
+ pid_t init_task_pid;
+ char init_task_comm[TASK_COMM_LEN];
+
+ int bad_page_num;
+
+ struct list_head critical_region_head;
+ struct mutex critical_region_lock;
+
+ /* Protect poison injection */
+ struct mutex poison_lock;
};
struct ras_fs_data {
- char sysfs_name[32];
+ char sysfs_name[48];
char debugfs_name[32];
};
+struct ras_err_info {
+ struct amdgpu_smuio_mcm_config_info mcm_info;
+ u64 ce_count;
+ u64 ue_count;
+ u64 de_count;
+};
+
+struct ras_err_node {
+ struct list_head node;
+ struct ras_err_info err_info;
+};
+
struct ras_err_data {
unsigned long ue_count;
unsigned long ce_count;
+ unsigned long de_count;
unsigned long err_addr_cnt;
struct eeprom_table_record *err_addr;
+ unsigned long err_addr_len;
+ u32 err_list_count;
+ struct list_head err_node_list;
};
+#define for_each_ras_error(err_node, err_data) \
+ list_for_each_entry(err_node, &(err_data)->err_node_list, node)
+
struct ras_err_handler_data {
/* point to bad page records array */
struct eeprom_table_record *bps;
/* the count of entries */
int count;
+ int count_saved;
/* the space can place new entries */
int space_left;
};
@@ -494,6 +671,8 @@ struct ras_manager {
struct ras_ih_data ih_data;
struct ras_err_data err_data;
+
+ struct aca_handle aca_handle;
};
struct ras_badpage {
@@ -513,6 +692,7 @@ struct ras_query_if {
struct ras_common_if head;
unsigned long ue_count;
unsigned long ce_count;
+ unsigned long de_count;
};
struct ras_inject_if {
@@ -582,8 +762,8 @@ struct amdgpu_ras_block_hw_ops {
* 8: feature disable
*/
-
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev);
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info);
void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev);
@@ -595,7 +775,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages);
+ struct eeprom_table_record *bps, int pages, bool from_rom);
int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
unsigned long *new_cnt);
@@ -637,6 +817,12 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
return TA_RAS_BLOCK__VCN;
case AMDGPU_RAS_BLOCK__JPEG:
return TA_RAS_BLOCK__JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return TA_RAS_BLOCK__IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return TA_RAS_BLOCK__MPIO;
+ case AMDGPU_RAS_BLOCK__MMSCH:
+ return TA_RAS_BLOCK__MMSCH;
default:
WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
return TA_RAS_BLOCK__UMC;
@@ -691,6 +877,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
struct ras_query_if *info);
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
enum amdgpu_ras_block block);
@@ -743,6 +931,12 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable);
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev);
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+ unsigned int *mode);
+
int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj);
void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
@@ -767,4 +961,51 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
const struct amdgpu_ras_err_status_reg_entry *reg_list,
uint32_t reg_list_size,
uint32_t instance);
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+ struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 count);
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances);
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+ const struct aca_info *aca_info, void *data);
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk);
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+ struct aca_handle *handle, char *buf, void *data);
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev);
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block);
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+ const void *caller);
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn);
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr);
+
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev);
+
+__printf(3, 4)
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+ const char *fmt, ...);
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 595d5e535aca..3eb3fb55ccb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -58,7 +58,7 @@
#define EEPROM_I2C_MADDR_4 0x40000
/*
- * The 2 macros bellow represent the actual size in bytes that
+ * The 2 macros below represent the actual size in bytes that
* those entities occupy in the EEPROM memory.
* RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
* uses uint64 to store 6b fields such as retired_page.
@@ -149,11 +149,11 @@
RAS_TABLE_HEADER_SIZE - \
RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
-#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
+#define to_amdgpu_device(x) ((container_of(x, struct amdgpu_ras, eeprom_control))->adev)
static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */
case IP_VERSION(11, 0, 7): /* Sienna cichlid */
case IP_VERSION(13, 0, 0):
@@ -161,6 +161,8 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
return true;
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
return (adev->gmc.is_app_apu) ? false : true;
default:
return false;
@@ -176,7 +178,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
if (!control)
return false;
- if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+ if (adev->bios && amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
/* The address given by VBIOS is an 8-bit, wire-format
* address, i.e. the most significant byte.
*
@@ -191,7 +193,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
return true;
}
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(11, 0, 2):
/* VEGA20 and ARCTURUS */
if (adev->asic_type == CHIP_VEGA20)
@@ -214,8 +216,16 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
control->i2c_address = EEPROM_I2C_MADDR_0;
return true;
case IP_VERSION(13, 0, 0):
+ if (strnstr(atom_ctx->vbios_pn, "D707",
+ sizeof(atom_ctx->vbios_pn)))
+ control->i2c_address = EEPROM_I2C_MADDR_0;
+ else
+ control->i2c_address = EEPROM_I2C_MADDR_4;
+ return true;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
control->i2c_address = EEPROM_I2C_MADDR_4;
return true;
default:
@@ -267,10 +277,11 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("Failed to write EEPROM table header:%d", res);
+ dev_err(adev->dev, "Failed to write EEPROM table header:%d",
+ res);
} else if (res < RAS_TABLE_HEADER_SIZE) {
- DRM_ERROR("Short write:%d out of %d\n",
- res, RAS_TABLE_HEADER_SIZE);
+ dev_err(adev->dev, "Short write:%d out of %d\n", res,
+ RAS_TABLE_HEADER_SIZE);
res = -EIO;
} else {
res = 0;
@@ -313,7 +324,8 @@ static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
if (!buf) {
- DRM_ERROR("Failed to alloc buf to write table ras info\n");
+ dev_err(adev->dev,
+ "Failed to alloc buf to write table ras info\n");
return -ENOMEM;
}
@@ -328,10 +340,11 @@ static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("Failed to write EEPROM table ras info:%d", res);
+ dev_err(adev->dev, "Failed to write EEPROM table ras info:%d",
+ res);
} else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
- DRM_ERROR("Short write:%d out of %d\n",
- res, RAS_TABLE_V2_1_INFO_SIZE);
+ dev_err(adev->dev, "Short write:%d out of %d\n", res,
+ RAS_TABLE_V2_1_INFO_SIZE);
res = -EIO;
} else {
res = 0;
@@ -398,6 +411,25 @@ static int amdgpu_ras_eeprom_correct_header_tag(
return res;
}
+static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 10, 0):
+ hdr->version = RAS_TABLE_VER_V2_1;
+ return;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
+ hdr->version = RAS_TABLE_VER_V3;
+ return;
+ default:
+ hdr->version = RAS_TABLE_VER_V1;
+ return;
+ }
+}
+
/**
* amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
* @control: pointer to control structure
@@ -417,13 +449,9 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
mutex_lock(&control->ras_tbl_mutex);
hdr->header = RAS_TABLE_HDR_VAL;
- if (adev->umc.ras &&
- adev->umc.ras->set_eeprom_table_version)
- adev->umc.ras->set_eeprom_table_version(hdr);
- else
- hdr->version = RAS_TABLE_VER_V1;
+ amdgpu_ras_set_eeprom_table_version(control);
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
hdr->first_rec_offset = RAS_RECORD_START_V2_1;
hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE;
@@ -441,7 +469,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
}
csum = __calc_hdr_byte_sum(control);
- if (hdr->version == RAS_TABLE_VER_V2_1)
+ if (hdr->version >= RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
csum = -csum;
hdr->checksum = csum;
@@ -450,9 +478,12 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
res = __write_table_ras_info(control);
control->ras_num_recs = 0;
+ control->ras_num_bad_pages = 0;
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
control->ras_fri = 0;
- amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages);
control->bad_channel_bitmap = 0;
amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
@@ -537,16 +568,17 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
return false;
if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
- if (amdgpu_bad_page_threshold == -1) {
+ if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold)
dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
- con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold);
+ con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
dev_warn(adev->dev,
- "But GPU can be operated due to bad_page_threshold = -1.\n");
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n");
return false;
} else {
- dev_warn(adev->dev, "This GPU is in BAD status.");
- dev_warn(adev->dev, "Please retire it or set a larger "
- "threshold value when reloading driver.\n");
+ dev_warn(adev->dev,
+ "Please consider adjusting the customized threshold.\n");
return true;
}
}
@@ -580,13 +612,13 @@ static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
buf, buf_size);
up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("Writing %d EEPROM table records error:%d",
- num, res);
+ dev_err(adev->dev, "Writing %d EEPROM table records error:%d",
+ num, res);
} else if (res < buf_size) {
/* Short write, return error.
*/
- DRM_ERROR("Wrote %d records out of %d",
- res / RAS_TABLE_RECORD_SIZE, num);
+ dev_err(adev->dev, "Wrote %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
res = -EIO;
} else {
res = 0;
@@ -601,6 +633,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
+ struct amdgpu_device *adev = to_amdgpu_device(control);
u32 a, b, i;
u8 *buf, *pp;
int res;
@@ -616,7 +649,8 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
__encode_table_record_to_buf(control, &record[i], pp);
/* update bad channel bitmap */
- if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+ !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
control->bad_channel_bitmap |= 1 << record[i].mem_channel;
con->update_channel_flag = true;
}
@@ -702,6 +736,14 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
control->ras_num_recs = 1 + (control->ras_max_record_count + b
- control->ras_fri)
% control->ras_max_record_count;
+
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12)
+ control->ras_num_pa_recs += num;
+ else
+ control->ras_num_mca_recs += num;
+
+ control->ras_num_bad_pages = con->bad_page_num;
Out:
kfree(buf);
return res;
@@ -719,18 +761,29 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
/* Modify the header if it exceeds.
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->ras_num_recs >= ras->bad_page_cnt_threshold) {
+ control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
dev_warn(adev->dev,
"Saved bad pages %d reaches threshold value %d\n",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
- control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
- control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
- control->tbl_rai.health_percent = 0;
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+
+ if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+ dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2)) {
+ control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) {
+ control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
+ control->tbl_rai.health_percent = 0;
+ }
+ ras->is_rma = true;
}
+
+ /* ignore the -ENOTSUPP return value */
+ amdgpu_dpm_send_rma_reason(adev);
}
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
@@ -742,8 +795,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
if (!buf) {
- DRM_ERROR("allocating memory for table of size %d bytes failed\n",
- control->tbl_hdr.tbl_size);
+ dev_err(adev->dev,
+ "allocating memory for table of size %d bytes failed\n",
+ control->tbl_hdr.tbl_size);
res = -ENOMEM;
goto Out;
}
@@ -755,12 +809,11 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
buf, buf_size);
up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("EEPROM failed reading records:%d\n",
- res);
+ dev_err(adev->dev, "EEPROM failed reading records:%d\n", res);
goto Out;
} else if (res < buf_size) {
- DRM_ERROR("EEPROM read %d out of %d bytes\n",
- res, buf_size);
+ dev_err(adev->dev, "EEPROM read %d out of %d bytes\n", res,
+ buf_size);
res = -EIO;
goto Out;
}
@@ -770,10 +823,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
* now calculate gpu health percent
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
- control->ras_num_recs < ras->bad_page_cnt_threshold)
+ control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 &&
+ control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
- control->ras_num_recs) * 100) /
+ control->ras_num_bad_pages) * 100) /
ras->bad_page_cnt_threshold;
/* Recalc the checksum.
@@ -783,7 +836,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
csum += *pp;
csum += __calc_hdr_byte_sum(control);
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
/* avoid sign extension when assigning to "checksum" */
csum = -csum;
@@ -814,20 +867,29 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
- int res;
+ int res, i;
+ uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
if (!__is_ras_eeprom_supported(adev))
return 0;
if (num == 0) {
- DRM_ERROR("will not append 0 records\n");
+ dev_err(adev->dev, "will not append 0 records\n");
return -EINVAL;
} else if (num > control->ras_max_record_count) {
- DRM_ERROR("cannot append %d records than the size of table %d\n",
- num, control->ras_max_record_count);
+ dev_err(adev->dev,
+ "cannot append %d records than the size of table %d\n",
+ num, control->ras_max_record_count);
return -EINVAL;
}
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* set the new channel index flag */
+ for (i = 0; i < num; i++)
+ record[i].retired_page |= (nps << UMC_NPS_SHIFT);
+
mutex_lock(&control->ras_tbl_mutex);
res = amdgpu_ras_eeprom_append_table(control, record, num);
@@ -837,6 +899,11 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
amdgpu_ras_debugfs_set_ret_size(control);
mutex_unlock(&control->ras_tbl_mutex);
+
+ /* clear channel index flag, the flag is only saved on eeprom */
+ for (i = 0; i < num; i++)
+ record[i].retired_page &= ~(nps << UMC_NPS_SHIFT);
+
return res;
}
@@ -866,13 +933,13 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
buf, buf_size);
up_read(&adev->reset_domain->sem);
if (res < 0) {
- DRM_ERROR("Reading %d EEPROM table records error:%d",
- num, res);
+ dev_err(adev->dev, "Reading %d EEPROM table records error:%d",
+ num, res);
} else if (res < buf_size) {
/* Short read, return error.
*/
- DRM_ERROR("Read %d records out of %d",
- res / RAS_TABLE_RECORD_SIZE, num);
+ dev_err(adev->dev, "Read %d records out of %d",
+ res / RAS_TABLE_RECORD_SIZE, num);
res = -EIO;
} else {
res = 0;
@@ -906,11 +973,11 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
return 0;
if (num == 0) {
- DRM_ERROR("will not read 0 records\n");
+ dev_err(adev->dev, "will not read 0 records\n");
return -EINVAL;
} else if (num > control->ras_num_recs) {
- DRM_ERROR("too many records to read:%d available:%d\n",
- num, control->ras_num_recs);
+ dev_err(adev->dev, "too many records to read:%d available:%d\n",
+ num, control->ras_num_recs);
return -EINVAL;
}
@@ -969,7 +1036,8 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
__decode_table_record_from_buf(control, &record[i], pp);
/* update bad channel bitmap */
- if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+ if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+ !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
control->bad_channel_bitmap |= 1 << record[i].mem_channel;
con->update_channel_flag = true;
}
@@ -983,7 +1051,10 @@ Out:
uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
{
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ /* get available eeprom table version first before eeprom table init */
+ amdgpu_ras_set_eeprom_table_version(control);
+
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
return RAS_MAX_RECORD_COUNT_V2_1;
else
return RAS_MAX_RECORD_COUNT;
@@ -1228,7 +1299,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
int buf_size, res;
u8 csum, *buf, *pp;
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
buf_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
@@ -1238,7 +1309,8 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
buf = kzalloc(buf_size, GFP_KERNEL);
if (!buf) {
- DRM_ERROR("Out of memory checking RAS table checksum.\n");
+ dev_err(adev->dev,
+ "Out of memory checking RAS table checksum.\n");
return -ENOMEM;
}
@@ -1247,7 +1319,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
control->ras_header_offset,
buf, buf_size);
if (res < buf_size) {
- DRM_ERROR("Partial read for checksum, res:%d\n", res);
+ dev_err(adev->dev, "Partial read for checksum, res:%d\n", res);
/* On partial reads, return -EIO.
*/
if (res >= 0)
@@ -1272,7 +1344,8 @@ static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
if (!buf) {
- DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n");
+ dev_err(adev->dev,
+ "Failed to alloc buf to read EEPROM table ras info\n");
return -ENOMEM;
}
@@ -1284,7 +1357,8 @@ static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
control->i2c_address + control->ras_info_offset,
buf, RAS_TABLE_V2_1_INFO_SIZE);
if (res < RAS_TABLE_V2_1_INFO_SIZE) {
- DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res);
+ dev_err(adev->dev,
+ "Failed to read EEPROM table ras info, res:%d", res);
res = res >= 0 ? -EIO : res;
goto Out;
}
@@ -1296,8 +1370,7 @@ Out:
return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
}
-int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit)
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
@@ -1305,7 +1378,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int res;
- *exceed_err_limit = false;
+ ras->is_rma = false;
if (!__is_ras_eeprom_supported(adev))
return 0;
@@ -1326,28 +1399,76 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
control->i2c_address + control->ras_header_offset,
buf, RAS_TABLE_HEADER_SIZE);
if (res < RAS_TABLE_HEADER_SIZE) {
- DRM_ERROR("Failed to read EEPROM table header, res:%d", res);
+ dev_err(adev->dev, "Failed to read EEPROM table header, res:%d",
+ res);
return res >= 0 ? -EIO : res;
}
__decode_table_header_from_buf(hdr, buf);
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->header != RAS_TABLE_HDR_VAL &&
+ hdr->header != RAS_TABLE_HDR_BAD) {
+ dev_info(adev->dev, "Creating a new EEPROM table");
+ return amdgpu_ras_eeprom_reset_table(control);
+ }
+
+ switch (hdr->version) {
+ case RAS_TABLE_VER_V2_1:
+ case RAS_TABLE_VER_V3:
control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
control->ras_record_offset = RAS_RECORD_START_V2_1;
control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
- } else {
+ break;
+ case RAS_TABLE_VER_V1:
control->ras_num_recs = RAS_NUM_RECS(hdr);
control->ras_record_offset = RAS_RECORD_START;
control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ break;
+ default:
+ dev_err(adev->dev,
+ "RAS header invalid, unsupported version: %u",
+ hdr->version);
+ return -EINVAL;
}
+
+ if (control->ras_num_recs > control->ras_max_record_count) {
+ dev_err(adev->dev,
+ "RAS header invalid, records in header: %u max allowed :%u",
+ control->ras_num_recs, control->ras_max_record_count);
+ return -EINVAL;
+ }
+
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+ return 0;
+}
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int res = 0;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ /* Verify i2c adapter is initialized */
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+ return -ENOENT;
+
+ if (!__get_eeprom_i2c_addr(adev, control))
+ return -EINVAL;
+
+ control->ras_num_bad_pages = ras->bad_page_num;
if (hdr->header == RAS_TABLE_HDR_VAL) {
- DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
- control->ras_num_recs);
+ dev_dbg(adev->dev,
+ "Found existing EEPROM table with %d records",
+ control->ras_num_bad_pages);
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
res = __read_table_ras_info(control);
if (res)
return res;
@@ -1355,28 +1476,32 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
res = __verify_ras_table_checksum(control);
if (res)
- DRM_ERROR("RAS table incorrect checksum or error:%d\n",
- res);
+ dev_err(adev->dev,
+ "RAS table incorrect checksum or error:%d\n",
+ res);
/* Warn if we are at 90% of the threshold or above
*/
- if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold)
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
} else if (hdr->header == RAS_TABLE_HDR_BAD &&
amdgpu_bad_page_threshold != 0) {
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
res = __read_table_ras_info(control);
if (res)
return res;
}
res = __verify_ras_table_checksum(control);
- if (res)
- DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
- res);
- if (ras->bad_page_cnt_threshold > control->ras_num_recs) {
+ if (res) {
+ dev_err(adev->dev,
+ "RAS Table incorrect checksum or error:%d\n",
+ res);
+ return -EINVAL;
+ }
+ if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) {
/* This means that, the threshold was increased since
* the last time the system was booted, and now,
* ras->bad_page_cnt_threshold - control->num_recs > 0,
@@ -1386,29 +1511,54 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
dev_info(adev->dev,
"records:%d threshold:%d, resetting "
"RAS table header signature",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
res = amdgpu_ras_eeprom_correct_header_tag(control,
RAS_TABLE_HDR_VAL);
} else {
- dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
- if (amdgpu_bad_page_threshold == -1) {
- dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1.");
+ dev_warn(adev->dev,
+ "RAS records:%d exceed threshold:%d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
res = 0;
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
} else {
- *exceed_err_limit = true;
- dev_err(adev->dev,
- "RAS records:%d exceed threshold:%d, "
- "GPU will not be initialized. Replace this GPU or increase the threshold",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
+ ras->is_rma = true;
+ dev_warn(adev->dev,
+ "User defined threshold is set, runtime service will be halt when threshold is reached\n");
}
}
- } else {
- DRM_INFO("Creating a new EEPROM table");
-
- res = amdgpu_ras_eeprom_reset_table(control);
}
return res < 0 ? res : 0;
}
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
+ int res;
+
+ if (!__is_ras_eeprom_supported(adev) || !ras)
+ return;
+ control = &ras->eeprom_control;
+ if (!control->is_eeprom_valid)
+ return;
+ res = __verify_ras_table_checksum(control);
+ if (res) {
+ dev_warn(adev->dev,
+ "RAS table incorrect checksum or error:%d, try to recover\n",
+ res);
+ if (!amdgpu_ras_eeprom_reset_table(control))
+ if (!amdgpu_ras_save_bad_pages(adev, NULL))
+ if (!__verify_ras_table_checksum(control)) {
+ dev_info(adev->dev, "RAS table recovery succeed\n");
+ return;
+ }
+ dev_err(adev->dev, "RAS table recovery failed\n");
+ control->is_eeprom_valid = false;
+ }
+ return;
+} \ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 6dfd667f3013..ebfca4cb5688 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -28,6 +28,7 @@
#define RAS_TABLE_VER_V1 0x00010000
#define RAS_TABLE_VER_V2_1 0x00021000
+#define RAS_TABLE_VER_V3 0x00030000
struct amdgpu_device;
@@ -82,6 +83,17 @@ struct amdgpu_ras_eeprom_control {
*/
u32 ras_num_recs;
+ /* the bad page number is ras_num_recs or
+ * ras_num_recs * umc.retire_unit
+ */
+ u32 ras_num_bad_pages;
+
+ /* Number of records store mca address */
+ u32 ras_num_mca_recs;
+
+ /* Number of records store physical address */
+ u32 ras_num_pa_recs;
+
/* First record index to read, 0-based.
* Range is [0, num_recs-1]. This is
* an absolute index, starting right after
@@ -102,6 +114,8 @@ struct amdgpu_ras_eeprom_control {
/* Record channel info which occurred bad pages
*/
u32 bad_channel_bitmap;
+
+ bool is_eeprom_valid;
};
/*
@@ -129,8 +143,7 @@ struct eeprom_table_record {
unsigned char mcumc_id;
} __packed;
-int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit);
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
@@ -146,6 +159,10 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);
+
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 3c988cc406e4..be2e56ce1355 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -91,6 +91,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
break;
case TTM_PL_TT:
case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
node = to_ttm_range_mgr_node(res)->mm_nodes;
while (start >= node->size << PAGE_SHIFT)
start -= node++->size << PAGE_SHIFT;
@@ -112,7 +113,6 @@ fallback:
cur->remaining = size;
cur->node = NULL;
WARN_ON(res && start + size > res->size);
- return;
}
/**
@@ -154,6 +154,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
break;
case TTM_PL_TT:
case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
node = cur->node;
cur->node = ++node;
@@ -165,4 +166,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
}
}
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+ struct drm_buddy_block *block;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ if (!amdgpu_vram_mgr_is_cleared(block))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 5fed06ffcc6b..28c4ad62f50e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -26,21 +26,165 @@
#include "sienna_cichlid.h"
#include "smu_v13_0_10.h"
-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
- struct amdgpu_reset_handler *handler)
+static int amdgpu_reset_xgmi_reset_on_init_suspend(struct amdgpu_device *adev)
{
- /* TODO: Check if handler exists? */
- list_add_tail(&handler->handler_list, &reset_ctl->reset_handlers);
+ int i;
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+ /* displays are handled in phase1 */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+ continue;
+
+ /* XXX handle errors */
+ amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ /* VCN FW shared region is in frambuffer, there are some flags
+ * initialized in that region during sw_init. Make sure the region is
+ * backed up.
+ */
+ amdgpu_vcn_save_vcpu_bo(adev);
+
return 0;
}
+static int amdgpu_reset_xgmi_reset_on_init_prep_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev;
+ int r;
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_unregister_gpu_instance(tmp_adev);
+ r = amdgpu_reset_xgmi_reset_on_init_suspend(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: prepare for reset failed");
+ return r;
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_restore_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r)
+ return r;
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ if (!tmp_adev->kfd.init_complete) {
+ kgd2kfd_init_zone_device(tmp_adev);
+ amdgpu_amdkfd_device_init(tmp_adev);
+ amdgpu_amdkfd_drm_client_create(tmp_adev);
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_perform_reset(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ dev_dbg(adev->dev, "xgmi roi - hw reset\n");
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset =
+ amdgpu_asic_reset_method(adev);
+ }
+ r = 0;
+ /* Mode1 reset needs to be triggered on all devices together */
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: reset failed with error, %d",
+ r);
+ break;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+ }
+
+ return r;
+}
+
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *adev;
+ int r;
+
+ if (!reset_device_list || list_empty(reset_device_list) ||
+ list_is_singular(reset_device_list))
+ return -EINVAL;
+
+ adev = list_first_entry(reset_device_list, struct amdgpu_device,
+ reset_list);
+ r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+ if (r)
+ return r;
+
+ r = amdgpu_reset_perform_reset(adev, reset_context);
+
+ return r;
+}
+
+struct amdgpu_reset_handler xgmi_reset_on_init_handler = {
+ .reset_method = AMD_RESET_METHOD_ON_INIT,
+ .prepare_env = NULL,
+ .prepare_hwcontext = amdgpu_reset_xgmi_reset_on_init_prep_hwctxt,
+ .perform_reset = amdgpu_reset_xgmi_reset_on_init_perform_reset,
+ .restore_hwcontext = amdgpu_reset_xgmi_reset_on_init_restore_hwctxt,
+ .restore_env = NULL,
+ .do_reset = NULL,
+};
+
int amdgpu_reset_init(struct amdgpu_device *adev)
{
int ret = 0;
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_init(adev);
break;
case IP_VERSION(11, 0, 7):
@@ -60,9 +204,11 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
{
int ret = 0;
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_fini(adev);
break;
case IP_VERSION(11, 0, 7):
@@ -167,5 +313,42 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
up_write(&reset_domain->sem);
}
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len)
+{
+ if (!buf || !len)
+ return;
+ switch (rst_ctxt->src) {
+ case AMDGPU_RESET_SRC_JOB:
+ if (rst_ctxt->job) {
+ snprintf(buf, len, "job hang on ring:%s",
+ rst_ctxt->job->base.sched->name);
+ } else {
+ strscpy(buf, "job hang", len);
+ }
+ break;
+ case AMDGPU_RESET_SRC_RAS:
+ strscpy(buf, "RAS error", len);
+ break;
+ case AMDGPU_RESET_SRC_MES:
+ strscpy(buf, "MES hang", len);
+ break;
+ case AMDGPU_RESET_SRC_HWS:
+ strscpy(buf, "HWS hang", len);
+ break;
+ case AMDGPU_RESET_SRC_USER:
+ strscpy(buf, "user trigger", len);
+ break;
+ case AMDGPU_RESET_SRC_USERQ:
+ strscpy(buf, "user queue trigger", len);
+ break;
+ default:
+ strscpy(buf, "unknown", len);
+ }
+}
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
+{
+ return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index f4a501ff87d9..07b4d37f1db6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -26,11 +26,24 @@
#include "amdgpu.h"
+#define AMDGPU_RESET_MAX_HANDLERS 5
+
enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
- AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
+ AMDGPU_SKIP_COREDUMP = 2,
+ AMDGPU_HOST_FLR = 3,
+};
+
+enum AMDGPU_RESET_SRCS {
+ AMDGPU_RESET_SRC_UNKNOWN,
+ AMDGPU_RESET_SRC_JOB,
+ AMDGPU_RESET_SRC_RAS,
+ AMDGPU_RESET_SRC_MES,
+ AMDGPU_RESET_SRC_HWS,
+ AMDGPU_RESET_SRC_USER,
+ AMDGPU_RESET_SRC_USERQ,
};
struct amdgpu_reset_context {
@@ -40,11 +53,11 @@ struct amdgpu_reset_context {
struct amdgpu_hive_info *hive;
struct list_head *reset_device_list;
unsigned long flags;
+ enum AMDGPU_RESET_SRCS src;
};
struct amdgpu_reset_handler {
enum amd_reset_method reset_method;
- struct list_head handler_list;
int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *context);
int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl,
@@ -63,7 +76,8 @@ struct amdgpu_reset_control {
void *handle;
struct work_struct reset_work;
struct mutex reset_lock;
- struct list_head reset_handlers;
+ struct amdgpu_reset_handler *(
+ *reset_handlers)[AMDGPU_RESET_MAX_HANDLERS];
atomic_t in_reset;
enum amd_reset_method active_reset;
struct amdgpu_reset_handler *(*get_reset_handler)(
@@ -87,7 +101,6 @@ struct amdgpu_reset_domain {
atomic_t reset_res;
};
-
int amdgpu_reset_init(struct amdgpu_device *adev);
int amdgpu_reset_fini(struct amdgpu_device *adev);
@@ -97,8 +110,10 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context);
-int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl,
- struct amdgpu_reset_handler *handler);
+int amdgpu_reset_prepare_env(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
+int amdgpu_reset_restore_env(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
char *wq_name);
@@ -122,8 +137,40 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
return queue_work(domain->wq, work);
}
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain)
+{
+ lockdep_assert_held(&domain->sem);
+ return rwsem_is_contended(&domain->sem);
+}
+
void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len);
+
+#define for_each_handler(i, handler, reset_ctl) \
+ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
+ (handler = (*reset_ctl->reset_handlers)[i]); \
+ ++i)
+
+extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context);
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);
+
+static inline void amdgpu_reset_set_dpc_status(struct amdgpu_device *adev,
+ bool status)
+{
+ adev->pcie_reset_ctx.occurs_dpc = status;
+ adev->no_hw_access = status;
+}
+
+static inline bool amdgpu_reset_in_dpc(struct amdgpu_device *adev)
+{
+ return adev->pcie_reset_ctx.occurs_dpc;
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 80d6e132e409..8f6ce948c684 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -99,6 +99,29 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
return 0;
}
+/**
+ * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * doesn't check the max_dw limit as we may be reemitting
+ * several submissions.
+ */
+static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
+{
+ /* Align requested size with padding so unlock_commit can
+ * pad safely */
+ ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+ ring->count_dw = ndw;
+ ring->wptr_old = ring->wptr;
+
+ if (ring->funcs->begin_use)
+ ring->funcs->begin_use(ring);
+}
+
/** amdgpu_ring_insert_nop - insert NOP packets
*
* @ring: amdgpu_ring structure holding ring information
@@ -108,10 +131,22 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
*/
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
+ uint32_t occupied, chunk1, chunk2;
+
+ occupied = ring->wptr & ring->buf_mask;
+ chunk1 = ring->buf_mask + 1 - occupied;
+ chunk1 = (chunk1 >= count) ? count : chunk1;
+ chunk2 = count - chunk1;
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ if (chunk1)
+ memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
+
+ if (chunk2)
+ memset32(ring->ring, ring->funcs->nop, chunk2);
+
+ ring->wptr += count;
+ ring->wptr &= ring->ptr_mask;
+ ring->count_dw -= count;
}
/**
@@ -141,11 +176,16 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
{
uint32_t count;
+ if (ring->count_dw < 0)
+ DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
- count %= ring->funcs->align_mask + 1;
- ring->funcs->insert_nop(ring, count);
+ count &= ring->funcs->align_mask;
+
+ if (count != 0)
+ ring->funcs->insert_nop(ring, count);
mb();
amdgpu_ring_set_wptr(ring);
@@ -170,14 +210,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
}
#define amdgpu_ring_get_gpu_addr(ring, offset) \
- (ring->is_mes_queue ? \
- (ring->mes_ctx->meta_data_gpu_addr + offset) : \
- (ring->adev->wb.gpu_addr + offset * 4))
+ (ring->adev->wb.gpu_addr + offset * 4)
#define amdgpu_ring_get_cpu_addr(ring, offset) \
- (ring->is_mes_queue ? \
- (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
- (&ring->adev->wb.wb[offset]))
+ (&ring->adev->wb.wb[offset])
/**
* amdgpu_ring_init - init driver ring struct.
@@ -212,6 +248,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
*/
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
sched_hw_submission = max(sched_hw_submission, 256);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_MES)
+ sched_hw_submission = 8;
else if (ring == &adev->sdma.instance[0].page)
sched_hw_submission = 256;
@@ -224,57 +262,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->sched_score = sched_score;
ring->vmid_wait = dma_fence_get_stub();
- if (!ring->is_mes_queue) {
- ring->idx = adev->num_rings++;
- adev->rings[ring->idx] = ring;
- }
+ ring->idx = adev->num_rings++;
+ adev->rings[ring->idx] = ring;
r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
}
- if (ring->is_mes_queue) {
- ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_RPTR_OFFS);
- ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_WPTR_OFFS);
- ring->fence_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_FENCE_OFFS);
- ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_TRAIL_FENCE_OFFS);
- ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_COND_EXE_OFFS);
- } else {
- r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->fence_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->fence_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
+ return r;
}
ring->fence_gpu_addr =
@@ -305,45 +328,51 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
/* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1;
- r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
- if (r) {
- dev_err(adev->dev, "failed initializing fences (%d).\n", r);
- return r;
- }
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
+ if (r) {
+ dev_err(adev->dev, "failed initializing fences (%d).\n", r);
+ return r;
+ }
- max_ibs_dw = ring->funcs->emit_frame_size +
- amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
- max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+ max_ibs_dw = ring->funcs->emit_frame_size +
+ amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
+ max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
- if (WARN_ON(max_ibs_dw > max_dw))
- max_dw = max_ibs_dw;
+ if (WARN_ON(max_ibs_dw > max_dw))
+ max_dw = max_ibs_dw;
- ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ } else {
+ ring->ring_size = roundup_pow_of_two(max_dw * 4);
+ ring->count_dw = (ring->ring_size - 4) >> 2;
+ /* ring buffer is empty now */
+ ring->wptr = *ring->rptr_cpu_addr = 0;
+ }
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
0xffffffffffffffff : ring->buf_mask;
+ /* Initialize cached_rptr to 0 */
+ ring->cached_rptr = 0;
- /* Allocate ring buffer */
- if (ring->is_mes_queue) {
- int offset = 0;
-
- BUG_ON(ring->ring_size > PAGE_SIZE*4);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_RING_OFFS);
- ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- amdgpu_ring_clear_ring(ring);
+ if (!ring->ring_backup) {
+ ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL);
+ if (!ring->ring_backup)
+ return -ENOMEM;
+ }
- } else if (ring->ring_obj == NULL) {
- r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
+ /* Allocate ring buffer */
+ if (ring->ring_obj == NULL) {
+ r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_bytes,
+ PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
&ring->gpu_addr,
(void **)&ring->ring);
if (r) {
dev_err(adev->dev, "(%d) ring create failed\n", r);
+ kvfree(ring->ring_backup);
return r;
}
amdgpu_ring_clear_ring(ring);
@@ -352,7 +381,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->max_dw = max_dw;
ring->hw_prio = hw_prio;
- if (!ring->no_scheduler) {
+ if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM) {
hw_ip = ring->funcs->type;
num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
@@ -373,32 +402,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
/* Not to finish a ring which is not initialized */
- if (!(ring->adev) ||
- (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
+ if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
return;
ring->sched.ready = false;
- if (!ring->is_mes_queue) {
- amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
- amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
+ amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
+ amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
- amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
- amdgpu_device_wb_free(ring->adev, ring->fence_offs);
+ amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
+ amdgpu_device_wb_free(ring->adev, ring->fence_offs);
- amdgpu_bo_free_kernel(&ring->ring_obj,
- &ring->gpu_addr,
- (void **)&ring->ring);
- } else {
- kfree(ring->fence_drv.fences);
- }
+ amdgpu_bo_free_kernel(&ring->ring_obj,
+ &ring->gpu_addr,
+ (void **)&ring->ring);
+ kvfree(ring->ring_backup);
+ ring->ring_backup = NULL;
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
ring->me = 0;
-
- if (!ring->is_mes_queue)
- ring->adev->rings[ring->idx] = NULL;
}
/**
@@ -434,8 +457,13 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
unsigned long flags;
+ ktime_t deadline;
+ bool ret;
- ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+ if (unlikely(ring->adev->debug_disable_soft_recovery))
+ return false;
+
+ deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
@@ -445,12 +473,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
dma_fence_set_error(fence, -ENODATA);
spin_unlock_irqrestore(fence->lock, flags);
- atomic_inc(&ring->adev->gpu_reset_counter);
while (!dma_fence_is_signaled(fence) &&
ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
ring->funcs->soft_recovery(ring, vmid);
- return dma_fence_is_signaled(fence);
+ ret = dma_fence_is_signaled(fence);
+ /* increment the counter only if soft reset worked */
+ if (ret)
+ atomic_inc(&ring->adev->gpu_reset_counter);
+
+ return ret;
}
/*
@@ -469,8 +501,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
- int r, i;
uint32_t value, result, early[3];
+ uint64_t p;
+ loff_t i;
+ int r;
if (*pos & 3 || size & 3)
return -EINVAL;
@@ -478,13 +512,18 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
result = 0;
if (*pos < 12) {
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_lock(&ring->adev->cper.ring_lock);
+
early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
for (i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
- if (r)
- return r;
+ if (r) {
+ result = r;
+ goto out;
+ }
buf += 4;
result += 4;
size -= 4;
@@ -492,74 +531,94 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
}
}
- while (size) {
- if (*pos >= (ring->ring_size + 12))
- return result;
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ while (size) {
+ if (*pos >= (ring->ring_size + 12))
+ return result;
- value = ring->ring[(*pos - 12)/4];
- r = put_user(value, (uint32_t *)buf);
- if (r)
- return r;
- buf += 4;
- result += 4;
- size -= 4;
- *pos += 4;
+ value = ring->ring[(*pos - 12)/4];
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ return r;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+ } else {
+ p = early[0];
+ if (early[0] <= early[1])
+ size = (early[1] - early[0]);
+ else
+ size = ring->ring_size - (early[0] - early[1]);
+
+ while (size) {
+ if (p == early[1])
+ goto out;
+
+ value = ring->ring[p];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto out;
+ }
+
+ buf += 4;
+ result += 4;
+ size--;
+ p++;
+ p &= ring->ptr_mask;
+ }
}
+out:
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_unlock(&ring->adev->cper.ring_lock);
+
return result;
}
+static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+
+ if (*pos & 3 || size & 3)
+ return -EINVAL;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ amdgpu_virt_req_ras_cper_dump(ring->adev, false);
+
+ return amdgpu_debugfs_ring_read(f, buf, size, pos);
+}
+
static const struct file_operations amdgpu_debugfs_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_virt_ring_read,
+ .llseek = default_llseek
+};
+
static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
- volatile u32 *mqd;
- int r;
- uint32_t value, result;
-
- if (*pos & 3 || size & 3)
- return -EINVAL;
-
- result = 0;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
+ ssize_t bytes = min_t(ssize_t, ring->mqd_size - *pos, size);
+ void *from = ((u8 *)ring->mqd_ptr) + *pos;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
- if (r) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
+ if (*pos > ring->mqd_size)
+ return 0;
- while (size) {
- if (*pos >= ring->mqd_size)
- goto done;
+ if (copy_to_user(buf, from, bytes))
+ return -EFAULT;
- value = mqd[*pos/4];
- r = put_user(value, (uint32_t *)buf);
- if (r)
- goto done;
- buf += 4;
- result += 4;
- size -= 4;
- *pos += 4;
- }
-
-done:
- amdgpu_bo_kunmap(ring->mqd_obj);
- mqd = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- if (r)
- return r;
-
- return result;
+ *pos += bytes;
+ return bytes;
}
static const struct file_operations amdgpu_debugfs_mqd_fops = {
@@ -590,9 +649,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32];
sprintf(name, "amdgpu_ring_%s", ring->name);
- debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
- &amdgpu_debugfs_ring_fops,
- ring->ring_size + 12);
+ if (amdgpu_sriov_vf(adev))
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_virt_ring_fops,
+ ring->ring_size + 12);
+ else
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_ring_fops,
+ ring->ring_size + 12);
if (ring->mqd_obj) {
sprintf(name, "amdgpu_mqd_%s", ring->name);
@@ -631,6 +695,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
ring->name);
ring->sched.ready = !r;
+
return r;
}
@@ -638,6 +703,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
struct amdgpu_mqd_prop *prop)
{
struct amdgpu_device *adev = ring->adev;
+ bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+ amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+ bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+ amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
memset(prop, 0, sizeof(*prop));
@@ -649,16 +718,15 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
prop->eop_gpu_addr = ring->eop_gpu_addr;
prop->use_doorbell = ring->use_doorbell;
prop->doorbell_index = ring->doorbell_index;
+ prop->kernel_queue = true;
/* map_queues packet doesn't need activate the queue,
* so only kiq need set this field.
*/
prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
- if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
- amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
- (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
- amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+ prop->allow_tunneling = is_high_prio_compute;
+ if (is_high_prio_compute || is_high_prio_gfx) {
prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
}
@@ -711,3 +779,80 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
}
+
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
+{
+ if (!ring)
+ return false;
+
+ if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
+ return false;
+
+ return true;
+}
+
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ /* Stop the scheduler to prevent anybody else from touching the ring buffer. */
+ drm_sched_wqueue_stop(&ring->sched);
+ /* back up the non-guilty commands */
+ amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence);
+}
+
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
+{
+ unsigned int i;
+ int r;
+
+ /* verify that the ring is functional */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+
+ /* signal the fence of the bad job */
+ if (guilty_fence)
+ amdgpu_fence_driver_guilty_force_completion(guilty_fence);
+ /* Re-emit the non-guilty commands */
+ if (ring->ring_backup_entries_to_copy) {
+ amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
+ for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
+ amdgpu_ring_write(ring, ring->ring_backup[i]);
+ amdgpu_ring_commit(ring);
+ }
+ /* Start the scheduler again */
+ drm_sched_wqueue_start(&ring->sched);
+ return 0;
+}
+
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+ u32 reset_type)
+{
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ if (ring->adev->gfx.gfx_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ if (ring->adev->gfx.compute_supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ if (ring->adev->sdma.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_DEC:
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ if (ring->adev->vcn.supported_reset & reset_type)
+ return true;
+ break;
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ if (ring->adev->jpeg.supported_reset & reset_type)
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index e2ab303ad270..b6b649179776 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -37,13 +37,14 @@ struct amdgpu_job;
struct amdgpu_vm;
/* max number of rings */
-#define AMDGPU_MAX_RINGS 124
+#define AMDGPU_MAX_RINGS 149
#define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_SW_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
+#define AMDGPU_MAX_VPE_RINGS 2
enum amdgpu_ring_priority_level {
AMDGPU_RING_PRIO_0,
@@ -77,8 +78,11 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC,
AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC,
AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG,
+ AMDGPU_RING_TYPE_VPE = AMDGPU_HW_IP_VPE,
AMDGPU_RING_TYPE_KIQ,
- AMDGPU_RING_TYPE_MES
+ AMDGPU_RING_TYPE_MES,
+ AMDGPU_RING_TYPE_UMSCH_MM,
+ AMDGPU_RING_TYPE_CPER,
};
enum amdgpu_ib_pool_type {
@@ -110,10 +114,11 @@ struct amdgpu_sched {
*/
struct amdgpu_fence_driver {
uint64_t gpu_addr;
- volatile uint32_t *cpu_addr;
+ uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
uint32_t sync_seq;
atomic_t last_seq;
+ u64 signalled_wptr;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
@@ -123,11 +128,35 @@ struct amdgpu_fence_driver {
struct dma_fence **fences;
};
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization. When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+ struct dma_fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+ ktime_t start_timestamp;
+
+ /* wptr for the fence for resets */
+ u64 wptr;
+ /* fence context for resets */
+ u64 context;
+ uint32_t seq;
+};
+
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence);
+void amdgpu_fence_save_wptr(struct dma_fence *fence);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -137,8 +166,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job,
- unsigned flags);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+ struct amdgpu_fence *af, unsigned int flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
uint32_t timeout);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
@@ -160,13 +189,40 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
/* provided by hw blocks that expose a ring buffer for commands */
struct amdgpu_ring_funcs {
+ /**
+ * @type:
+ *
+ * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER
+ * use ring buffers. The type field just identifies which component the
+ * ring buffer is associated with.
+ */
enum amdgpu_ring_type type;
uint32_t align_mask;
+
+ /**
+ * @nop:
+ *
+ * Every block in the amdgpu has no-op instructions (e.g., GFX 10
+ * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP,
+ * etc). This field receives the specific no-op for the component
+ * that initializes the ring.
+ */
u32 nop;
bool support_64bit_ptrs;
bool no_user_fence;
bool secure_submission_supported;
- unsigned extra_dw;
+
+ /**
+ * @extra_bytes:
+ *
+ * Optional extra space in bytes that is added to the ring size
+ * when allocating the BO that holds the contents of the ring.
+ * This space isn't used for command submission to the ring,
+ * but is just there to satisfy some hardware requirements or
+ * implement workarounds. It's up to the implementation of each
+ * specific ring to initialize this space.
+ */
+ unsigned extra_bytes;
/* ring read/write ptr handling */
u64 (*get_rptr)(struct amdgpu_ring *ring);
@@ -206,8 +262,7 @@ struct amdgpu_ring_funcs {
void (*insert_end)(struct amdgpu_ring *ring);
/* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
- unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
- void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+ unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
/* note usage for clock and power gating */
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
@@ -233,8 +288,14 @@ struct amdgpu_ring_funcs {
void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
+ int (*reset)(struct amdgpu_ring *ring, unsigned int vmid,
+ struct amdgpu_fence *timedout_fence);
+ void (*emit_cleaner_shader)(struct amdgpu_ring *ring);
};
+/**
+ * amdgpu_ring - Holds ring information
+ */
struct amdgpu_ring {
struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs;
@@ -242,17 +303,68 @@ struct amdgpu_ring {
struct drm_gpu_scheduler sched;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
+ /* backups for resets */
+ uint32_t *ring_backup;
+ unsigned int ring_backup_entries_to_copy;
unsigned rptr_offs;
u64 rptr_gpu_addr;
- volatile u32 *rptr_cpu_addr;
+ u32 *rptr_cpu_addr;
+
+ /**
+ * @wptr:
+ *
+ * This is part of the Ring buffer implementation and represents the
+ * write pointer. The wptr determines where the host has written.
+ */
u64 wptr;
+
+ /**
+ * @wptr_old:
+ *
+ * Before update wptr with the new value, usually the old value is
+ * stored in the wptr_old.
+ */
u64 wptr_old;
unsigned ring_size;
+
+ /**
+ * @max_dw:
+ *
+ * Maximum number of DWords for ring allocation. This information is
+ * provided at the ring initialization time, and each IP block can
+ * specify a specific value. Check places that invoke
+ * amdgpu_ring_init() to see the maximum size per block.
+ */
unsigned max_dw;
+
+ /**
+ * @count_dw:
+ *
+ * This value starts with the maximum amount of DWords supported by the
+ * ring. This value is updated based on the ring manipulation.
+ */
int count_dw;
uint64_t gpu_addr;
+
+ /**
+ * @ptr_mask:
+ *
+ * Some IPs provide support for 64-bit pointers and others for 32-bit
+ * only; this behavior is component-specific and defined by the field
+ * support_64bit_ptr. If the IP block supports 64-bits, the mask
+ * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask.
+ * Notice that this field is used to keep wptr under a valid range.
+ */
uint64_t ptr_mask;
+
+ /**
+ * @buf_mask:
+ *
+ * Buffer mask is a value used to keep wptr count under its
+ * thresholding. Buffer mask initialized during the ring buffer
+ * initialization time, and it is defined as (ring_size / 4) -1.
+ */
uint32_t buf_mask;
u32 idx;
u32 xcc_id;
@@ -270,36 +382,43 @@ struct amdgpu_ring {
bool use_pollmem;
unsigned wptr_offs;
u64 wptr_gpu_addr;
- volatile u32 *wptr_cpu_addr;
+
+ /**
+ * @wptr_cpu_addr:
+ *
+ * This is the CPU address pointer in the writeback slot. This is used
+ * to commit changes to the GPU.
+ */
+ u32 *wptr_cpu_addr;
unsigned fence_offs;
u64 fence_gpu_addr;
- volatile u32 *fence_cpu_addr;
+ u32 *fence_cpu_addr;
uint64_t current_ctx;
char name[16];
u32 trail_seq;
unsigned trail_fence_offs;
u64 trail_fence_gpu_addr;
- volatile u32 *trail_fence_cpu_addr;
+ u32 *trail_fence_cpu_addr;
unsigned cond_exe_offs;
u64 cond_exe_gpu_addr;
- volatile u32 *cond_exe_cpu_addr;
+ u32 *cond_exe_cpu_addr;
+ unsigned int set_q_mode_offs;
+ u32 *set_q_mode_ptr;
+ u64 set_q_mode_token;
unsigned vm_hub;
unsigned vm_inv_eng;
struct dma_fence *vmid_wait;
bool has_compute_vm_bug;
bool no_scheduler;
+ bool no_user_submission;
int hw_prio;
unsigned num_hw_submission;
atomic_t *sched_score;
- /* used for mes */
- bool is_mes_queue;
- uint32_t hw_queue_id;
- struct amdgpu_mes_ctx_data *mes_ctx;
-
bool is_sw_ring;
unsigned int entry_index;
-
+ /* store the cached rptr to restore after reset */
+ uint64_t cached_rptr;
};
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
@@ -324,12 +443,12 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
-#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
-#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
+#define amdgpu_ring_reset(r, v, f) (r)->funcs->reset((r), (v), (f))
unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
@@ -362,16 +481,11 @@ static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
- int i = 0;
- while (i <= ring->buf_mask)
- ring->ring[i++] = ring->funcs->nop;
-
+ memset32(ring->ring, ring->funcs->nop, ring->buf_mask + 1);
}
static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
{
- if (ring->count_dw <= 0)
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
ring->ring[ring->wptr++ & ring->buf_mask] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;
@@ -381,13 +495,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
void *src, int count_dw)
{
unsigned occupied, chunk1, chunk2;
- void *dst;
-
- if (unlikely(ring->count_dw < count_dw))
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
occupied = ring->wptr & ring->buf_mask;
- dst = (void *)&ring->ring[occupied];
chunk1 = ring->buf_mask + 1 - occupied;
chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
chunk2 = count_dw - chunk1;
@@ -395,12 +504,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
chunk2 <<= 2;
if (chunk1)
- memcpy(dst, src, chunk1);
+ memcpy(&ring->ring[occupied], src, chunk1);
if (chunk2) {
src += chunk1;
- dst = (void *)ring->ring;
- memcpy(dst, src, chunk2);
+ memcpy(ring->ring, src, chunk2);
}
ring->wptr += count_dw;
@@ -408,14 +516,29 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
ring->count_dw -= count_dw;
}
-#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
- (ring->is_mes_queue && ring->mes_ctx ? \
- (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
+/**
+ * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
+ * @ring: amdgpu_ring structure
+ * @offset: offset returned by amdgpu_ring_init_cond_exec
+ *
+ * Calculate the dw count and patch it into a cond_exec command.
+ */
+static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned int offset)
+{
+ unsigned cur;
+
+ if (!ring->funcs->init_cond_exec)
+ return;
-#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \
- (ring->is_mes_queue && ring->mes_ctx ? \
- (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
- NULL)
+ WARN_ON(offset > ring->buf_mask);
+ WARN_ON(ring->ring[offset] != 0);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur < offset)
+ cur += ring->ring_size >> 2;
+ ring->ring[offset] = cur - offset;
+}
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
@@ -439,13 +562,20 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size,
enum amdgpu_ib_pool_type pool,
struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f);
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ibs, struct amdgpu_job *job,
struct dma_fence **f);
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+ u32 reset_type);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index e1ee1c7117fb..7e7d6c3865bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -124,7 +124,7 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
}
}
- del_timer(&mux->resubmit_timer);
+ timer_delete(&mux->resubmit_timer);
mux->s_resubmit = false;
}
@@ -135,7 +135,8 @@ static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
{
- struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
+ struct amdgpu_ring_mux *mux = timer_container_of(mux, t,
+ resubmit_timer);
if (!spin_trylock(&mux->lock)) {
amdgpu_ring_mux_schedule_resubmit(mux);
@@ -159,9 +160,7 @@ int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
mux->ring_entry_size = entry_size;
mux->s_resubmit = false;
- amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk",
- sizeof(struct amdgpu_mux_chunk), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN);
if (!amdgpu_mux_chunk_slab) {
DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
return -ENOMEM;
@@ -412,7 +411,7 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
WARN_ON(!ring->is_sw_ring);
- if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+ if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
return;
amdgpu_ring_mux_end_ib(mux, ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 35e0ae9acadc..5aa830a02d80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -89,7 +89,7 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 i;
int r;
@@ -189,7 +189,7 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
{
const __le32 *fw_data;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
int me, i, max_me;
u32 bo_offset = 0;
u32 table_offset, table_size;
@@ -241,7 +241,7 @@ void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
table_size = le32_to_cpu(hdr->jt_size);
}
- for (i = 0; i < table_size; i ++) {
+ for (i = 0; i < table_size; i++) {
dst_ptr[bo_offset + i] =
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
}
@@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
if (version_major == 2 && version_minor == 1)
adev->gfx.rlc.is_rlc_v2_1 = true;
- if (version_minor >= 0) {
- err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
- if (err) {
- dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
- return err;
- }
+ err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+ if (err) {
+ dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+ return err;
}
+
if (version_minor >= 1)
amdgpu_gfx_rlc_init_microcode_v2_1(adev);
if (version_minor >= 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index b591d33af264..2ce310b31942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -112,6 +112,53 @@ typedef enum _SOC21_FIRMWARE_ID_ {
SOC21_FIRMWARE_ID_MAX = 37
} SOC21_FIRMWARE_ID;
+typedef enum _SOC24_FIRMWARE_ID_ {
+ SOC24_FIRMWARE_ID_INVALID = 0,
+ SOC24_FIRMWARE_ID_RLC_G_UCODE = 1,
+ SOC24_FIRMWARE_ID_RLC_TOC = 2,
+ SOC24_FIRMWARE_ID_RLCG_SCRATCH = 3,
+ SOC24_FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ SOC24_FIRMWARE_ID_RLC_P_UCODE = 5,
+ SOC24_FIRMWARE_ID_RLC_V_UCODE = 6,
+ SOC24_FIRMWARE_ID_RLX6_UCODE = 7,
+ SOC24_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
+ SOC24_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
+ SOC24_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
+ SOC24_FIRMWARE_ID_CP_PFP = 13,
+ SOC24_FIRMWARE_ID_CP_ME = 14,
+ SOC24_FIRMWARE_ID_CP_MEC = 15,
+ SOC24_FIRMWARE_ID_RS64_MES_P0 = 16,
+ SOC24_FIRMWARE_ID_RS64_MES_P1 = 17,
+ SOC24_FIRMWARE_ID_RS64_PFP = 18,
+ SOC24_FIRMWARE_ID_RS64_ME = 19,
+ SOC24_FIRMWARE_ID_RS64_MEC = 20,
+ SOC24_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
+ SOC24_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
+ SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
+ SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
+ SOC24_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
+ SOC24_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
+ SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
+ SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
+ SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
+ SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
+ SOC24_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
+ SOC24_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
+ SOC24_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
+ SOC24_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_SR = 35,
+ SOC24_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
+ SOC24_FIRMWARE_ID_RLCDEBUGLOG = 37,
+ SOC24_FIRMWARE_ID_SRIOV_DEBUG = 38,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_RLC = 39,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_SDMA = 40,
+ SOC24_FIRMWARE_ID_SRIOV_CSA_CP = 41,
+ SOC24_FIRMWARE_ID_UMF_ZONE_PAD = 42,
+ SOC24_FIRMWARE_ID_MAX = 43
+} SOC24_FIRMWARE_ID;
+
typedef struct _RLC_TABLE_OF_CONTENT {
union {
unsigned int DW0;
@@ -155,6 +202,33 @@ typedef struct _RLC_TABLE_OF_CONTENT {
};
} RLC_TABLE_OF_CONTENT;
+typedef struct _RLC_TABLE_OF_CONTENT_V2 {
+ union {
+ unsigned int DW0;
+ struct {
+ uint32_t offset : 25;
+ uint32_t id : 7;
+ };
+ };
+
+ union {
+ unsigned int DW1;
+ struct {
+ uint32_t reserved0 : 1;
+ uint32_t reserved1 : 1;
+ uint32_t reserved2 : 1;
+ uint32_t memory_destination : 2;
+ uint32_t vfflr_image_code : 4;
+ uint32_t reserved9 : 1;
+ uint32_t reserved10 : 1;
+ uint32_t reserved11 : 1;
+ uint32_t size_x16 : 1;
+ uint32_t reserved13 : 1;
+ uint32_t size : 18;
+ };
+ };
+} RLC_TABLE_OF_CONTENT_V2;
+
#define RLC_TOC_MAX_SIZE 64
struct amdgpu_rlc_funcs {
@@ -163,13 +237,27 @@ struct amdgpu_rlc_funcs {
void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev);
- void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
+
+ /**
+ * @get_csb_buffer: Get the clear state to be put into the hardware.
+ *
+ * The parameter adev is used to get the CS data and other gfx info,
+ * and buffer is the RLC CS pointer
+ *
+ * Sometimes, the user space puts a request to clear the state in the
+ * command buffer; this function provides the clear state that gets put
+ * into the hardware. Note that the driver programs Clear State
+ * Indirect Buffer (CSB) explicitly when it sets up the kernel rings,
+ * and it also provides a pointer to it which is used by the firmware
+ * to load the clear state in some cases.
+ */
+ void (*get_csb_buffer)(struct amdgpu_device *adev, u32 *buffer);
int (*get_cp_table_num)(struct amdgpu_device *adev);
int (*resume)(struct amdgpu_device *adev);
void (*stop)(struct amdgpu_device *adev);
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
- void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+ void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid);
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
};
@@ -187,19 +275,19 @@ struct amdgpu_rlc {
/* for power gating */
struct amdgpu_bo *save_restore_obj;
uint64_t save_restore_gpu_addr;
- volatile uint32_t *sr_ptr;
+ uint32_t *sr_ptr;
const u32 *reg_list;
u32 reg_list_size;
/* for clear state */
struct amdgpu_bo *clear_state_obj;
uint64_t clear_state_gpu_addr;
- volatile uint32_t *cs_ptr;
+ uint32_t *cs_ptr;
const struct cs_section_def *cs_data;
u32 clear_state_size;
/* for cp tables */
struct amdgpu_bo *cp_table_obj;
uint64_t cp_table_gpu_addr;
- volatile uint32_t *cp_table_ptr;
+ uint32_t *cp_table_ptr;
u32 cp_table_size;
/* safe mode for updating CG/PG state */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 10df731998b2..39070b2a4c04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -93,8 +93,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
return 0;
}
-void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo,
- struct dma_fence *fence)
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence)
{
if (sa_bo == NULL || *sa_bo == NULL) {
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 863b2a34b2d6..341beec59537 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -22,7 +22,6 @@
* Authors: Andres Rodriguez <andresx7@gmail.com>
*/
-#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/pid.h>
@@ -36,21 +35,19 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
int fd,
int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx_mgr *mgr;
struct amdgpu_ctx *ctx;
uint32_t id;
int r;
- if (!f.file)
+ if (fd_empty(f))
return -EINVAL;
- r = amdgpu_file_to_fpriv(f.file, &fpriv);
- if (r) {
- fdput(f);
+ r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+ if (r)
return r;
- }
mgr = &fpriv->ctx_mgr;
mutex_lock(&mgr->lock);
@@ -58,7 +55,6 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
amdgpu_ctx_priority_override(ctx, priority);
mutex_unlock(&mgr->lock);
- fdput(f);
return 0;
}
@@ -67,31 +63,25 @@ static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
unsigned ctx_id,
int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
int r;
- if (!f.file)
+ if (fd_empty(f))
return -EINVAL;
- r = amdgpu_file_to_fpriv(f.file, &fpriv);
- if (r) {
- fdput(f);
+ r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+ if (r)
return r;
- }
ctx = amdgpu_ctx_get(fpriv, ctx_id);
- if (!ctx) {
- fdput(f);
+ if (!ctx)
return -EINVAL;
- }
amdgpu_ctx_priority_override(ctx, priority);
amdgpu_ctx_put(ctx);
- fdput(f);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index e2b9392d7f0d..8b8a04138711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -25,6 +25,9 @@
#include "amdgpu.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
#define AMDGPU_CSA_SDMA_SIZE 64
/* SDMA CSA reside in the 3rd page of CSA */
@@ -75,22 +78,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
return 0;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
+ r = amdgpu_sdma_get_index_from_ring(ring, &index);
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- sdma[ring->idx].sdma_meta_data);
- csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- r = amdgpu_sdma_get_index_from_ring(ring, &index);
-
- if (r || index > 31)
- csa_mc_addr = 0;
- else
- csa_mc_addr = amdgpu_csa_vaddr(adev) +
- AMDGPU_CSA_SDMA_OFFSET +
- index * AMDGPU_CSA_SDMA_SIZE;
- }
+ if (r || index > 31)
+ csa_mc_addr = 0;
+ else
+ csa_mc_addr = amdgpu_csa_vaddr(adev) +
+ AMDGPU_CSA_SDMA_OFFSET +
+ index * AMDGPU_CSA_SDMA_SIZE;
return csa_mc_addr;
}
@@ -158,6 +153,7 @@ static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
const struct common_firmware_header *header = NULL;
const struct sdma_firmware_header_v1_0 *hdr;
const struct sdma_firmware_header_v2_0 *hdr_v2;
+ const struct sdma_firmware_header_v3_0 *hdr_v3;
header = (const struct common_firmware_header *)
sdma_inst->fw->data;
@@ -174,6 +170,11 @@ static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version);
sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version);
break;
+ case 3:
+ hdr_v3 = (const struct sdma_firmware_header_v3_0 *)sdma_inst->fw->data;
+ sdma_inst->fw_version = le32_to_cpu(hdr_v3->header.ucode_version);
+ sdma_inst->feature_version = le32_to_cpu(hdr_v3->ucode_feature_version);
+ break;
default:
return -EINVAL;
}
@@ -206,16 +207,19 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
const struct common_firmware_header *header = NULL;
int err, i;
const struct sdma_firmware_header_v2_0 *sdma_hdr;
+ const struct sdma_firmware_header_v3_0 *sdma_hv3;
uint16_t version_major;
char ucode_prefix[30];
- char fw_name[40];
amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
if (instance == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s%d.bin", ucode_prefix, instance);
- err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s%d.bin", ucode_prefix, instance);
if (err)
goto out;
@@ -251,8 +255,14 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
else {
/* Use a single copy per SDMA firmware type. PSP uses the same instance for all
* groups of SDMAs */
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2) &&
- adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 5)) &&
+ adev->firmware.load_type ==
+ AMDGPU_FW_LOAD_PSP &&
adev->sdma.num_inst_per_aid == i) {
break;
}
@@ -278,6 +288,15 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
break;
+ case 3:
+ sdma_hv3 = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_RS64];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA_RS64;
+ info->fw = adev->sdma.instance[0].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(sdma_hv3->ucode_size_bytes), PAGE_SIZE);
+ break;
default:
err = -EINVAL;
}
@@ -289,27 +308,6 @@ out:
return err;
}
-void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *sdma;
- int i;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->sdma.has_page_queue) {
- sdma = &adev->sdma.instance[i].page;
- if (adev->mman.buffer_funcs_ring == sdma) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- break;
- }
- }
- sdma = &adev->sdma.instance[i].ring;
- if (adev->mman.buffer_funcs_ring == sdma) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- break;
- }
- }
-}
-
int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
{
int err = 0;
@@ -344,3 +342,270 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
return 0;
}
+
+/*
+ * debugfs for to enable/disable sdma job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ /* Calculate the maximum possible mask value
+ * based on the number of SDMA instances and rings
+ */
+ mask = BIT_ULL(adev->sdma.num_instances * num_ring) - 1;
+
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+ if (val & BIT_ULL(i * num_ring))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+
+ if (page) {
+ if (val & BIT_ULL(i * num_ring + 1))
+ page->sched.ready = true;
+ else
+ page->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+
+ if (ring->sched.ready)
+ mask |= BIT_ULL(i * num_ring);
+ else
+ mask &= ~BIT_ULL(i * num_ring);
+
+ if (page) {
+ if (page->sched.ready)
+ mask |= BIT_ULL(i * num_ring + 1);
+ else
+ mask &= ~BIT_ULL(i * num_ring + 1);
+ }
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops,
+ amdgpu_debugfs_sdma_sched_mask_get,
+ amdgpu_debugfs_sdma_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->sdma.num_instances > 1))
+ return;
+ sprintf(name, "amdgpu_sdma_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_sdma_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->sdma.supported_reset);
+}
+
+static DEVICE_ATTR(sdma_reset_mask, 0444,
+ amdgpu_get_sdma_reset_mask, NULL);
+
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
+ return r;
+
+ if (adev->sdma.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->dev->kobj.sd) {
+ if (adev->sdma.num_instances)
+ device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+ }
+}
+
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->sdma.has_page_queue &&
+ (ring->me < adev->sdma.num_instances) &&
+ (ring == &adev->sdma.instance[ring->me].ring))
+ return &adev->sdma.instance[ring->me].page;
+ else
+ return NULL;
+}
+
+/**
+* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine
+* @adev: Pointer to the AMDGPU device structure
+* @ring: Pointer to the ring structure to check
+*
+* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine.
+* It returns true if the ring is such an SDMA ring, false otherwise.
+*/
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ int i = ring->me;
+
+ if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
+ return false;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ return (ring == &adev->sdma.instance[i].page);
+ else
+ return false;
+}
+
+static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
+{
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+
+ if (sdma_instance->funcs->soft_reset_kernel_queue)
+ return sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id);
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * amdgpu_sdma_reset_engine - Reset a specific SDMA engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: Logical ID of the SDMA engine instance to reset
+ * @caller_handles_kernel_queues: Skip kernel queue processing. Caller
+ * will handle it.
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+ bool caller_handles_kernel_queues)
+{
+ int ret = 0;
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+ struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+ struct amdgpu_ring *page_ring = &sdma_instance->page;
+
+ mutex_lock(&sdma_instance->engine_reset_mutex);
+
+ if (!caller_handles_kernel_queues) {
+ /* Stop the scheduler's work queue for the GFX and page rings if they are running.
+ * This ensures that no new tasks are submitted to the queues while
+ * the reset is in progress.
+ */
+ drm_sched_wqueue_stop(&gfx_ring->sched);
+
+ if (adev->sdma.has_page_queue)
+ drm_sched_wqueue_stop(&page_ring->sched);
+ }
+
+ if (sdma_instance->funcs->stop_kernel_queue) {
+ sdma_instance->funcs->stop_kernel_queue(gfx_ring);
+ if (adev->sdma.has_page_queue)
+ sdma_instance->funcs->stop_kernel_queue(page_ring);
+ }
+
+ /* Perform the SDMA reset for the specified instance */
+ ret = amdgpu_sdma_soft_reset(adev, instance_id);
+ if (ret) {
+ dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id);
+ goto exit;
+ }
+
+ if (sdma_instance->funcs->start_kernel_queue) {
+ sdma_instance->funcs->start_kernel_queue(gfx_ring);
+ if (adev->sdma.has_page_queue)
+ sdma_instance->funcs->start_kernel_queue(page_ring);
+ }
+
+exit:
+ if (!caller_handles_kernel_queues) {
+ /* Restart the scheduler's work queue for the GFX and page rings
+ * if they were stopped by this function. This allows new tasks
+ * to be submitted to the queues after the reset is complete.
+ */
+ if (!ret) {
+ amdgpu_fence_driver_force_completion(gfx_ring);
+ drm_sched_wqueue_start(&gfx_ring->sched);
+ if (adev->sdma.has_page_queue) {
+ amdgpu_fence_driver_force_completion(page_ring);
+ drm_sched_wqueue_start(&page_ring->sched);
+ }
+ }
+ }
+ mutex_unlock(&sdma_instance->engine_reset_mutex);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 513ac22120c1..34311f32be4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,6 +50,12 @@ enum amdgpu_sdma_irq {
#define NUM_SDMA(x) hweight32(x)
+struct amdgpu_sdma_funcs {
+ int (*stop_kernel_queue)(struct amdgpu_ring *ring);
+ int (*start_kernel_queue)(struct amdgpu_ring *ring);
+ int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id);
+};
+
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
@@ -60,6 +66,15 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring page;
bool burst_nop;
uint32_t aid_id;
+
+ struct amdgpu_bo *sdma_fw_obj;
+ uint64_t sdma_fw_gpu_addr;
+ uint32_t *sdma_fw_ptr;
+ struct mutex engine_reset_mutex;
+ /* track guilty state of GFX and PAGE queues */
+ bool gfx_guilty;
+ bool page_guilty;
+ const struct amdgpu_sdma_funcs *funcs;
};
enum amdgpu_sdma_ras_memory_id {
@@ -98,11 +113,13 @@ struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
+ struct amdgpu_irq_src fence_irq;
struct amdgpu_irq_src ecc_irq;
struct amdgpu_irq_src vm_hole_irq;
struct amdgpu_irq_src doorbell_invalid_irq;
struct amdgpu_irq_src pool_timeout_irq;
struct amdgpu_irq_src srbm_write_irq;
+ struct amdgpu_irq_src ctxt_empty_irq;
int num_instances;
uint32_t sdma_mask;
@@ -111,6 +128,11 @@ struct amdgpu_sdma {
bool has_page_queue;
struct ras_common_if *ras_if;
struct amdgpu_sdma_ras *ras;
+ uint32_t *ip_dump;
+ uint32_t supported_reset;
+ struct list_head reset_callback_list;
+ bool no_user_submission;
+ bool disable_uq;
};
/*
@@ -132,7 +154,7 @@ struct amdgpu_buffer_funcs {
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count,
- bool tmz);
+ uint32_t copy_flags);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
@@ -150,6 +172,9 @@ struct amdgpu_buffer_funcs {
uint32_t byte_count);
};
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+ bool caller_handles_kernel_queues);
+
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
@@ -169,7 +194,11 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
bool duplicate);
void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
bool duplicate);
-void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
-
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 8ed0e073656f..41ebe690eeff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -135,6 +135,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
mutex_unlock(&psp->securedisplay_context.mutex);
break;
case 2:
+ if (size < 3 || phy_id >= TA_SECUREDISPLAY_MAX_PHY) {
+ dev_err(adev->dev, "Invalid input: %s\n", str);
+ return -EINVAL;
+ }
mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
new file mode 100644
index 000000000000..a0b479d5fff1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_seq64.h"
+
+#include <drm/drm_exec.h>
+
+/**
+ * DOC: amdgpu_seq64
+ *
+ * amdgpu_seq64 allocates a 64bit memory on each request in sequence order.
+ * seq64 driver is required for user queue fence memory allocation, TLB
+ * counters and VM updates. It has maximum count of 32768 64 bit slots.
+ */
+
+/**
+ * amdgpu_seq64_get_va_base - Get the seq64 va base address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * va base address on success
+ */
+static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
+{
+ u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev);
+
+ addr = amdgpu_gmc_sign_extend(addr);
+
+ return addr;
+}
+
+/**
+ * amdgpu_seq64_map - Map the seq64 memory to VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm pointer
+ * @bo_va: bo_va pointer
+ *
+ * Map the seq64 memory to the given VM.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va)
+{
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ u64 seq64_addr;
+ int r;
+
+ bo = adev->seq64.sbo;
+ if (!bo)
+ return -EINVAL;
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+ if (!*bo_va) {
+ r = -ENOMEM;
+ goto error;
+ }
+
+ seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
+
+ r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0,
+ AMDGPU_VA_RESERVED_SEQ64_SIZE,
+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
+ if (r) {
+ DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
+ }
+
+ r = amdgpu_vm_bo_update(adev, *bo_va, false);
+ if (r) {
+ DRM_ERROR("failed to do vm_bo_update on userq sem\n");
+ amdgpu_vm_bo_del(adev, *bo_va);
+ goto error;
+ }
+
+error:
+ drm_exec_fini(&exec);
+ return r;
+}
+
+/**
+ * amdgpu_seq64_unmap - Unmap the seq64 memory
+ *
+ * @adev: amdgpu_device pointer
+ * @fpriv: DRM file private
+ *
+ * Unmap the seq64 memory from the given VM.
+ */
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
+{
+ struct amdgpu_vm *vm;
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ int r;
+
+ if (!fpriv->seq64_va)
+ return;
+
+ bo = adev->seq64.sbo;
+ if (!bo)
+ return;
+
+ vm = &fpriv->vm;
+
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec) {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r))
+ goto error;
+ }
+
+ amdgpu_vm_bo_del(adev, fpriv->seq64_va);
+
+ fpriv->seq64_va = NULL;
+
+error:
+ drm_exec_fini(&exec);
+}
+
+/**
+ * amdgpu_seq64_alloc - Allocate a 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: VA to access the seq in process address space
+ * @gpu_addr: GPU address to access the seq
+ * @cpu_addr: CPU address to access the seq
+ *
+ * Alloc a 64 bit memory from seq64 pool.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
+ u64 *gpu_addr, u64 **cpu_addr)
+{
+ unsigned long bit_pos;
+
+ bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+ if (bit_pos >= adev->seq64.num_sem)
+ return -ENOSPC;
+
+ __set_bit(bit_pos, adev->seq64.used);
+
+ *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
+
+ if (gpu_addr)
+ *gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr;
+
+ *cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
+
+ return 0;
+}
+
+/**
+ * amdgpu_seq64_free - Free the given 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: gpu start address to be freed
+ *
+ * Free the given 64 bit memory from seq64 pool.
+ */
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
+{
+ unsigned long bit_pos;
+
+ bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
+ if (bit_pos < adev->seq64.num_sem)
+ __clear_bit(bit_pos, adev->seq64.used);
+}
+
+/**
+ * amdgpu_seq64_fini - Cleanup seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the memory space allocated for seq64.
+ *
+ */
+void amdgpu_seq64_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->seq64.sbo,
+ NULL,
+ (void **)&adev->seq64.cpu_base_addr);
+}
+
+/**
+ * amdgpu_seq64_init - Initialize seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the required memory space for seq64.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->seq64.sbo)
+ return 0;
+
+ /*
+ * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
+ * 64bit slots
+ */
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->seq64.sbo, &adev->seq64.gpu_addr,
+ (void **)&adev->seq64.cpu_base_addr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
+ return r;
+ }
+
+ memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE);
+
+ adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
+ memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
new file mode 100644
index 000000000000..26a249aaaee1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SEQ64_H__
+#define __AMDGPU_SEQ64_H__
+
+#include "amdgpu_vm.h"
+
+#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64))
+
+struct amdgpu_seq64 {
+ struct amdgpu_bo *sbo;
+ u32 num_sem;
+ u64 gpu_addr;
+ u64 *cpu_base_addr;
+ DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
+};
+
+void amdgpu_seq64_fini(struct amdgpu_device *adev);
+int amdgpu_seq64_init(struct amdgpu_device *adev);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr);
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_va **bo_va);
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index 89c38d864471..ec9d12f85f39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -23,6 +23,18 @@
#ifndef __AMDGPU_SMUIO_H__
#define __AMDGPU_SMUIO_H__
+enum amdgpu_pkg_type {
+ AMDGPU_PKG_TYPE_APU = 2,
+ AMDGPU_PKG_TYPE_CEM = 3,
+ AMDGPU_PKG_TYPE_OAM = 4,
+ AMDGPU_PKG_TYPE_UNKNOWN,
+};
+
+struct amdgpu_smuio_mcm_config_info {
+ int socket_id;
+ int die_id;
+};
+
struct amdgpu_smuio_funcs {
u32 (*get_rom_index_offset)(struct amdgpu_device *adev);
u32 (*get_rom_data_offset)(struct amdgpu_device *adev);
@@ -32,6 +44,7 @@ struct amdgpu_smuio_funcs {
u32 (*get_socket_id)(struct amdgpu_device *adev);
enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
+ u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev);
};
struct amdgpu_smuio {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index dcd8c066bc1f..d6ae9974c952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
struct amdgpu_sync_entry *e;
hash_for_each_possible(sync->fences, e, node, f->context) {
- if (unlikely(e->fence->context != f->context))
- continue;
+ if (dma_fence_is_signaled(e->fence)) {
+ dma_fence_put(e->fence);
+ e->fence = dma_fence_get(f);
+ return true;
+ }
- amdgpu_sync_keep_later(&e->fence, f);
- return true;
+ if (likely(e->fence->context == f->context)) {
+ amdgpu_sync_keep_later(&e->fence, f);
+ return true;
+ }
}
return false;
}
@@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
*
* @sync: sync object to add fence to
* @f: fence to sync to
+ * @flags: memory allocation flags to use when allocating sync entry
*
* Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags)
{
struct amdgpu_sync_entry *e;
@@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
if (amdgpu_sync_add_later(sync, f))
return 0;
- e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+ e = kmem_cache_alloc(amdgpu_sync_slab, flags);
if (!e)
return -ENOMEM;
@@ -191,7 +198,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
/* Never sync to VM updates either. */
if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
+ owner != AMDGPU_FENCE_OWNER_KFD)
return false;
/* Ignore fences depending on the sync mode */
@@ -241,14 +249,13 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
if (resv == NULL)
return -EINVAL;
-
- /* TODO: Use DMA_RESV_USAGE_READ here */
- dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
+ /* Implicitly sync only to KERNEL, WRITE and READ */
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
dma_fence_chain_for_each(f, f) {
struct dma_fence *tmp = dma_fence_chain_contained(f);
if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
- r = amdgpu_sync_fence(sync, f);
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
dma_fence_put(f);
if (r)
return r;
@@ -259,6 +266,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
}
+/**
+ * amdgpu_sync_kfd - sync to KFD fences
+ *
+ * @sync: sync object to add KFD fences to
+ * @resv: reservation object with KFD fences
+ *
+ * Extract all KFD fences and add them to the sync object.
+ */
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *f;
+ int r = 0;
+
+ dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, f) {
+ void *fence_owner = amdgpu_sync_get_owner(f);
+
+ if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
+ continue;
+
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
+ if (r)
+ break;
+ }
+ dma_resv_iter_end(&cursor);
+
+ return r;
+}
+
/* Free the entry back to the slab */
static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
{
@@ -357,7 +394,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(clone, f);
+ r = amdgpu_sync_fence(clone, f, GFP_KERNEL);
if (r)
return r;
} else {
@@ -369,6 +406,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
}
/**
+ * amdgpu_sync_move - move all fences from src to dst
+ *
+ * @src: source of the fences, empty after function
+ * @dst: destination for the fences
+ *
+ * Moves all fences from source to destination. All fences in destination are
+ * freed and source is empty after the function call.
+ */
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
+{
+ unsigned int i;
+
+ amdgpu_sync_free(dst);
+
+ for (i = 0; i < HASH_SIZE(src->fences); ++i)
+ hlist_move_list(&src->fences[i], &dst->fences[i]);
+}
+
+/**
* amdgpu_sync_push_to_job - push fences into job
* @sync: sync object to get the fences from
* @job: job to push the fences into
@@ -440,9 +496,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
*/
int amdgpu_sync_init(void)
{
- amdgpu_sync_slab = kmem_cache_create(
- "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN);
if (!amdgpu_sync_slab)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index cf1e9e858efd..51eb4382c91e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -47,14 +47,17 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 2fd1bfb35916..d13e64a69e25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -167,25 +167,23 @@ TRACE_EVENT(amdgpu_cs_ioctl,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
- __field(unsigned int, context)
- __field(unsigned int, seqno)
+ __field(u64, context)
+ __field(u64, seqno)
__field(struct dma_fence *, fence)
__string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
- TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
- __entry->sched_job_id, __get_str(timeline), __entry->context,
+ TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+ __get_str(timeline), __entry->context,
__entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -193,24 +191,22 @@ TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
- __field(uint64_t, sched_job_id)
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
- __field(unsigned int, context)
- __field(unsigned int, seqno)
+ __field(u64, context)
+ __field(u64, seqno)
__string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
TP_fast_assign(
- __entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
- TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
- __entry->sched_job_id, __get_str(timeline), __entry->context,
+ TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+ __get_str(timeline), __entry->context,
__entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -231,7 +227,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_fast_assign(
__entry->pasid = vm->pasid;
- __assign_str(ring, ring->name);
+ __assign_str(ring);
__entry->vmid = job->vmid;
__entry->vm_hub = ring->vm_hub,
__entry->pd_addr = job->vm_pd_addr;
@@ -425,7 +421,7 @@ TRACE_EVENT(amdgpu_vm_flush,
),
TP_fast_assign(
- __assign_str(ring, ring->name);
+ __assign_str(ring);
__entry->vmid = vmid;
__entry->vm_hub = ring->vm_hub;
__entry->pd_addr = pd_addr;
@@ -457,6 +453,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
TP_ARGS(pasid)
);
+TRACE_EVENT(amdgpu_isolation,
+ TP_PROTO(void *prev, void *next),
+ TP_ARGS(prev, next),
+ TP_STRUCT__entry(
+ __field(void *, prev)
+ __field(void *, next)
+ ),
+
+ TP_fast_assign(
+ __entry->prev = prev;
+ __entry->next = next;
+ ),
+ TP_printk("prev=%p, next=%p",
+ __entry->prev,
+ __entry->next)
+);
+
+TRACE_EVENT(amdgpu_cleaner_shader,
+ TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence),
+ TP_ARGS(ring, fence),
+ TP_STRUCT__entry(
+ __string(ring, ring->name)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ring);
+ __entry->seqno = fence->seqno;
+ ),
+ TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno)
+);
+
TRACE_EVENT(amdgpu_bo_list_set,
TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
TP_ARGS(list, bo),
@@ -519,23 +547,19 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
__string(ring, sched_job->base.sched->name)
- __field(uint64_t, id)
__field(struct dma_fence *, fence)
- __field(uint64_t, ctx)
- __field(unsigned, seqno)
+ __field(u64, ctx)
+ __field(u64, seqno)
),
TP_fast_assign(
- __assign_str(ring, sched_job->base.sched->name);
- __entry->id = sched_job->base.id;
+ __assign_str(ring);
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
- TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
- __get_str(ring), __entry->id,
- __entry->fence, __entry->ctx,
- __entry->seqno)
+ TP_printk("job ring=%s need pipe sync to fence=%llu:%llu",
+ __get_str(ring), __entry->ctx, __entry->seqno)
);
TRACE_EVENT(amdgpu_reset_reg_dumps,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4e51dce3aab5..aa9ee5dffa45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -61,7 +61,7 @@
#include "amdgpu_res_cursor.h"
#include "bif/bif_4_1_d.h"
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
@@ -102,23 +102,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
/* Don't handle scatter gather BOs */
if (bo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
/* Object isn't an AMDGPU object so ignore */
if (!amdgpu_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
- placement->busy_placement = &placements;
placement->num_placement = 1;
- placement->num_busy_placement = 1;
return;
}
abo = ttm_to_amdgpu_bo(bo);
if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
@@ -127,17 +123,18 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case AMDGPU_PL_GWS:
case AMDGPU_PL_OA:
case AMDGPU_PL_DOORBELL:
+ case AMDGPU_PL_MMIO_REMAP:
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -149,8 +146,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
AMDGPU_GEM_DOMAIN_CPU);
abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
- abo->placement.busy_placement = &abo->placements[1];
- abo->placement.num_busy_placement = 1;
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
} else {
/* Move to GTT memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
@@ -231,7 +227,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
- AMDGPU_IB_POOL_DELAYED, &job);
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER);
if (r)
return r;
@@ -241,7 +238,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -300,9 +297,12 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct amdgpu_res_cursor src_mm, dst_mm;
struct dma_fence *fence = NULL;
int r = 0;
+ uint32_t copy_flags = 0;
+ struct amdgpu_bo *abo_src, *abo_dst;
if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
@@ -311,7 +311,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
mutex_lock(&adev->mman.gtt_window_lock);
while (src_mm.remaining) {
- uint64_t from, to, cur_size;
+ uint64_t from, to, cur_size, tiling_flags;
+ uint32_t num_type, data_format, max_com, write_compress_disable;
struct dma_fence *next;
/* Never copy more than 256MiB at once to avoid a timeout */
@@ -328,8 +329,31 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
if (r)
goto error;
- r = amdgpu_copy_buffer(ring, from, to, cur_size,
- resv, &next, false, true, tmz);
+ abo_src = ttm_to_amdgpu_bo(src->bo);
+ abo_dst = ttm_to_amdgpu_bo(dst->bo);
+ if (tmz)
+ copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+ if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
+ copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
+ if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (dst->mem->mem_type == TTM_PL_VRAM)) {
+ copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
+ amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
+ max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+ num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
+ data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+ write_compress_disable =
+ AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
+ copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
+ AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
+ AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) |
+ AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE,
+ write_compress_disable));
+ }
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ &next, false, true, copy_flags);
if (r)
goto error;
@@ -383,11 +407,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
- false);
+ r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
if (r) {
goto error;
} else if (wipe_fence) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
dma_fence_put(fence);
fence = wipe_fence;
}
@@ -408,40 +433,56 @@ error:
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- u64 mem_size = (u64)mem->size;
struct amdgpu_res_cursor cursor;
- u64 end;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (!res)
+ return false;
+
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL ||
+ res->mem_type == AMDGPU_PL_MMIO_REMAP)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
return false;
- amdgpu_res_first(mem, 0, mem_size, &cursor);
- end = cursor.start + cursor.size;
+ amdgpu_res_first(res, 0, res->size, &cursor);
while (cursor.remaining) {
+ if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
+ return false;
amdgpu_res_next(&cursor, cursor.size);
+ }
- if (!cursor.remaining)
- break;
+ return true;
+}
- /* ttm_resource_ioremap only supports contiguous memory */
- if (end != cursor.start)
- return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
+ return false;
- end = cursor.start + cursor.size;
- }
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
- return end <= adev->gmc.visible_vram_size;
+ return true;
}
/*
@@ -471,14 +512,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
bo->ttm == NULL)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == TTM_PL_SYSTEM &&
(new_mem->mem_type == TTM_PL_TT ||
new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if ((old_mem->mem_type == TTM_PL_TT ||
old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
@@ -488,22 +531,26 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_resource_free(bo, &bo->resource);
ttm_bo_assign_mem(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == AMDGPU_PL_GDS ||
old_mem->mem_type == AMDGPU_PL_GWS ||
old_mem->mem_type == AMDGPU_PL_OA ||
old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ old_mem->mem_type == AMDGPU_PL_MMIO_REMAP ||
new_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_GWS ||
new_mem->mem_type == AMDGPU_PL_OA ||
- new_mem->mem_type == AMDGPU_PL_DOORBELL) {
+ new_mem->mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) {
/* Nothing to save here */
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (bo->type == ttm_bo_type_device &&
@@ -515,27 +562,28 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
}
- if (adev->mman.buffer_funcs_enabled) {
- if (((old_mem->mem_type == TTM_PL_SYSTEM &&
- new_mem->mem_type == TTM_PL_VRAM) ||
- (old_mem->mem_type == TTM_PL_VRAM &&
- new_mem->mem_type == TTM_PL_SYSTEM))) {
- hop->fpfn = 0;
- hop->lpfn = 0;
- hop->mem_type = TTM_PL_TT;
- hop->flags = TTM_PL_FLAG_TEMPORARY;
- return -EMULTIHOP;
- }
+ if (adev->mman.buffer_funcs_enabled &&
+ ((old_mem->mem_type == TTM_PL_SYSTEM &&
+ new_mem->mem_type == TTM_PL_VRAM) ||
+ (old_mem->mem_type == TTM_PL_VRAM &&
+ new_mem->mem_type == TTM_PL_SYSTEM))) {
+ hop->fpfn = 0;
+ hop->lpfn = 0;
+ hop->mem_type = TTM_PL_TT;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ return -EMULTIHOP;
+ }
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ if (adev->mman.buffer_funcs_enabled)
r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
- } else {
+ else
r = -ENODEV;
- }
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
@@ -545,10 +593,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
-out:
- /* update statistics */
+ /* update statistics after the move */
+ if (evict)
+ atomic64_inc(&adev->num_evictions);
atomic64_add(bo->base.size, &adev->num_bytes_moved);
- amdgpu_bo_move_notify(bo, evict, new_mem);
return 0;
}
@@ -561,7 +609,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- size_t bus_size = (size_t)mem->size;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
@@ -572,9 +619,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
if (adev->mman.aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
@@ -590,6 +634,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.is_iomem = true;
mem->bus.caching = ttm_uncached;
break;
+ case AMDGPU_PL_MMIO_REMAP:
+ mem->bus.offset = mem->start << PAGE_SHIFT;
+ mem->bus.offset += adev->rmmio_remap.bus_addr;
+ mem->bus.is_iomem = true;
+ mem->bus.caching = ttm_uncached;
+ break;
default:
return -EINVAL;
}
@@ -607,6 +657,8 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+ else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP)
+ return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT;
return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
}
@@ -656,7 +708,7 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
struct hmm_range **range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
@@ -693,7 +745,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
readonly = amdgpu_ttm_tt_is_readonly(ttm);
r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
- readonly, NULL, pages, range);
+ readonly, NULL, range);
out_unlock:
mmap_read_unlock(mm);
if (r)
@@ -745,12 +797,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
- ttm->pages[i] = pages ? pages[i] : NULL;
+ ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL;
}
/*
@@ -778,7 +830,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
/* Map SG to device */
r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
if (r)
- goto release_sg;
+ goto release_sg_table;
/* convert SG to linear array of pages and dma addresses */
drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
@@ -786,6 +838,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
return 0;
+release_sg_table:
+ sg_free_table(ttm->sg);
release_sg:
kfree(ttm->sg);
ttm->sg = NULL;
@@ -828,8 +882,7 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc;
int i;
- uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
- AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
+ uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
pages_per_xcc = total_pages;
do_div(pages_per_xcc, num_xcc);
@@ -868,6 +921,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags);
}
+ gtt->bound = true;
}
/*
@@ -894,7 +948,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
if (gtt->userptr) {
r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
if (r) {
- DRM_ERROR("failed to pin userptr\n");
+ dev_err(adev->dev, "failed to pin userptr\n");
return r;
}
} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
@@ -959,16 +1013,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
return 0;
addr = amdgpu_gmc_agp_addr(bo);
- if (addr != AMDGPU_BO_INVALID_OFFSET) {
- bo->resource->start = addr >> PAGE_SHIFT;
+ if (addr != AMDGPU_BO_INVALID_OFFSET)
return 0;
- }
/* allocate GART space */
placement.num_placement = 1;
placement.placement = &placements;
- placement.num_busy_placement = 1;
- placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.mem_type = TTM_PL_TT;
@@ -1024,7 +1074,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
/* if the pages have userptr pinning then clear that first */
if (gtt->userptr) {
amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
- } else if (ttm->sg && gtt->gobj->import_attach) {
+ } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) {
struct dma_buf_attachment *attach;
attach = gtt->gobj->import_attach;
@@ -1318,7 +1368,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
if (mem && (mem->mem_type == TTM_PL_TT ||
mem->mem_type == AMDGPU_PL_DOORBELL ||
- mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ mem->mem_type == AMDGPU_PL_PREEMPT ||
+ mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
flags |= AMDGPU_PTE_SYSTEM;
if (ttm->caching == ttm_cached)
@@ -1387,7 +1438,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
*/
dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP, f) {
- if (amdkfd_fence_check_mm(f, current->mm))
+ if (amdkfd_fence_check_mm(f, current->mm) &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
return false;
}
@@ -1472,7 +1524,8 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, AMDGPU_IB_POOL_DELAYED,
- &job);
+ &job,
+ AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA);
if (r)
goto out;
@@ -1484,7 +1537,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
swap(src_addr, dst_addr);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
- PAGE_SIZE, false);
+ PAGE_SIZE, 0);
amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -1727,7 +1780,10 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
reserve_size =
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
- if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ if (!adev->bios &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;
@@ -1741,7 +1797,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
&ctx->c2p_bo,
NULL);
if (ret) {
- DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret);
amdgpu_ttm_training_reserve_vram_fini(adev);
return ret;
}
@@ -1753,7 +1809,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
adev, adev->gmc.real_vram_size - reserve_size,
reserve_size, &adev->mman.fw_reserved_memory, NULL);
if (ret) {
- DRM_ERROR("alloc tmr failed(%d)!\n", ret);
+ dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
NULL, NULL);
return ret;
@@ -1800,6 +1856,59 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
adev->mman.ttm_pools = NULL;
}
+/**
+ * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the
+ * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host
+ * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
+ * GEM object (amdgpu_bo_create).
+ *
+ * Return:
+ * * 0 on success or intentional skip (feature not present/unsupported)
+ * * negative errno on allocation failure
+ */
+static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_bo_param bp;
+ int r;
+
+ /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */
+ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE)
+ return 0;
+
+ memset(&bp, 0, sizeof(bp));
+
+ /* Create exactly one GEM BO in the MMIO_REMAP domain. */
+ bp.type = ttm_bo_type_device; /* userspace-mappable GEM */
+ bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+ bp.flags = 0;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Frees the kernel-owned MMIO_REMAP BO if it was allocated by
+ * amdgpu_ttm_mmio_remap_bo_init().
+ */
+static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_unref(&adev->rmmio_remap.bo);
+ adev->rmmio_remap.bo = NULL;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1816,6 +1925,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
mutex_init(&adev->mman.gtt_window_lock);
+ dma_set_max_seg_size(adev->dev, UINT_MAX);
/* No others user of address space so set it to 0 */
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
adev_to_drm(adev)->anon_inode->i_mapping,
@@ -1823,22 +1933,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
adev->need_swiotlb,
dma_addressing_limited(adev->dev));
if (r) {
- DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+ dev_err(adev->dev,
+ "failed initializing buffer object driver(%d).\n", r);
return r;
}
r = amdgpu_ttm_pools_init(adev);
if (r) {
- DRM_ERROR("failed to init ttm pools(%d).\n", r);
+ dev_err(adev->dev, "failed to init ttm pools(%d).\n", r);
return r;
}
adev->mman.initialized = true;
- /* Initialize VRAM pool with all of VRAM divided into pages */
- r = amdgpu_vram_mgr_init(adev);
- if (r) {
- DRM_ERROR("Failed initializing VRAM heap.\n");
- return r;
+ if (!adev->gmc.is_app_apu) {
+ /* Initialize VRAM pool with all of VRAM divided into pages */
+ r = amdgpu_vram_mgr_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing VRAM heap.\n");
+ return r;
+ }
}
/* Change the size here instead of the init above so only lpfn is affected */
@@ -1917,63 +2030,89 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
}
- DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
+ dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n",
(unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
/* Compute GTT size, either based on TTM limit
* or whatever the user passed on module init.
*/
- if (amdgpu_gtt_size == -1)
- gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
- else
- gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+ gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+ if (amdgpu_gtt_size != -1) {
+ uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ drm_warn(&adev->ddev,
+ "Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n");
+ if (gtt_size != configured_size)
+ drm_warn(&adev->ddev,
+ "GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n",
+ configured_size, gtt_size);
+
+ gtt_size = configured_size;
+ }
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
if (r) {
- DRM_ERROR("Failed initializing GTT heap.\n");
+ dev_err(adev->dev, "Failed initializing GTT heap.\n");
return r;
}
- DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
+ dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n",
(unsigned int)(gtt_size / (1024 * 1024)));
- /* Initiailize doorbell pool on PCI BAR */
+ if (adev->flags & AMD_IS_APU) {
+ if (adev->gmc.real_vram_size < gtt_size)
+ adev->apu_prefer_gtt = true;
+ }
+
+ /* Initialize doorbell pool on PCI BAR */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
if (r) {
- DRM_ERROR("Failed initializing doorbell heap.\n");
+ dev_err(adev->dev, "Failed initializing doorbell heap.\n");
return r;
}
/* Create a boorbell page for kernel usages */
r = amdgpu_doorbell_create_kernel_doorbells(adev);
if (r) {
- DRM_ERROR("Failed to initialize kernel doorbells.\n");
+ dev_err(adev->dev, "Failed to initialize kernel doorbells.\n");
+ return r;
+ }
+
+ /* Initialize MMIO-remap pool (single page 4K) */
+ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1);
+ if (r) {
+ dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n");
return r;
}
+ /* Allocate the singleton MMIO_REMAP BO (4K) if supported */
+ r = amdgpu_ttm_mmio_remap_bo_init(adev);
+ if (r)
+ return r;
+
/* Initialize preemptible memory pool */
r = amdgpu_preempt_mgr_init(adev);
if (r) {
- DRM_ERROR("Failed initializing PREEMPT heap.\n");
+ dev_err(adev->dev, "Failed initializing PREEMPT heap.\n");
return r;
}
/* Initialize various on-chip memory pools */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
if (r) {
- DRM_ERROR("Failed initializing GDS heap.\n");
+ dev_err(adev->dev, "Failed initializing GDS heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
if (r) {
- DRM_ERROR("Failed initializing gws heap.\n");
+ dev_err(adev->dev, "Failed initializing gws heap.\n");
return r;
}
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
if (r) {
- DRM_ERROR("Failed initializing oa heap.\n");
+ dev_err(adev->dev, "Failed initializing oa heap.\n");
return r;
}
if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
@@ -2005,12 +2144,16 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
/* return the FW reserved memory back to VRAM */
amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
NULL);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL,
+ NULL);
if (adev->mman.stolen_reserved_size)
amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
NULL, NULL);
}
amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
&adev->mman.sdma_access_ptr);
+
+ amdgpu_ttm_mmio_remap_bo_fini(adev);
amdgpu_ttm_fw_reserve_vram_fini(adev);
amdgpu_ttm_drv_reserve_vram_fini(adev);
@@ -2023,15 +2166,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
drm_dev_exit(idx);
}
- amdgpu_vram_mgr_fini(adev);
+ if (!adev->gmc.is_app_apu)
+ amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
+ amdgpu_doorbell_fini(adev);
+
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
- DRM_INFO("amdgpu: ttm finalized\n");
+ dev_info(adev->dev, "amdgpu: ttm finalized\n");
}
/**
@@ -2063,8 +2211,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
DRM_SCHED_PRIORITY_KERNEL, &sched,
1, NULL);
if (r) {
- DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
- r);
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
return;
}
@@ -2072,8 +2221,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
DRM_SCHED_PRIORITY_NORMAL, &sched,
1, NULL);
if (r) {
- DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
- r);
+ dev_err(adev->dev,
+ "Failed setting up TTM BO move entity (%d)\n",
+ r);
goto error_free_entity;
}
} else {
@@ -2103,7 +2253,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
struct dma_resv *resv,
bool vm_needs_flush,
struct amdgpu_job **job,
- bool delayed)
+ bool delayed, u64 k_job_id)
{
enum amdgpu_ib_pool_type pool = direct_submit ?
AMDGPU_IB_POOL_DIRECT :
@@ -2113,7 +2263,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
&adev->mman.high_pr;
r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
- num_dw * 4, pool, job);
+ num_dw * 4, pool, job, k_job_id);
if (r)
return r;
@@ -2134,7 +2284,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz)
+ bool vm_needs_flush, uint32_t copy_flags)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
@@ -2144,7 +2294,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
int r;
if (!direct_submit && !ring->sched.ready) {
- DRM_ERROR("Trying to move memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to move memory with ring turned off.\n");
return -EINVAL;
}
@@ -2152,7 +2303,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
- resv, vm_needs_flush, &job, false);
+ resv, vm_needs_flush, &job, false,
+ AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
if (r)
return r;
@@ -2160,8 +2312,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
- dst_offset, cur_size_in_bytes, tmz);
-
+ dst_offset, cur_size_in_bytes, copy_flags);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
@@ -2180,7 +2331,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
error_free:
amdgpu_job_free(job);
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev, "Error scheduling IBs (%d)\n", r);
return r;
}
@@ -2188,7 +2339,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
uint64_t dst_addr, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence,
- bool vm_needs_flush, bool delayed)
+ bool vm_needs_flush, bool delayed,
+ u64 k_job_id)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
@@ -2201,7 +2353,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
- &job, delayed);
+ &job, delayed, k_job_id);
if (r)
return r;
@@ -2221,11 +2373,78 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
return 0;
}
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor cursor;
+ u64 addr;
+ int r = 0;
+
+ if (!adev->mman.buffer_funcs_enabled)
+ return -EINVAL;
+
+ if (!fence)
+ return -EINVAL;
+
+ *fence = dma_fence_get_stub();
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (cursor.remaining) {
+ struct dma_fence *next = NULL;
+ u64 size;
+
+ if (amdgpu_res_cleared(&cursor)) {
+ amdgpu_res_next(&cursor, cursor.size);
+ continue;
+ }
+
+ /* Never clear more than 256MiB at once to avoid timeouts */
+ size = min(cursor.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ 1, ring, false, &size, &addr);
+ if (r)
+ goto err;
+
+ r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ &next, true, true,
+ AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
+ if (r)
+ goto err;
+
+ dma_fence_put(*fence);
+ *fence = next;
+
+ amdgpu_res_next(&cursor, size);
+ }
+err:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ return r;
+}
+
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
struct dma_fence **f,
- bool delayed)
+ bool delayed,
+ u64 k_job_id)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
@@ -2234,7 +2453,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
int r;
if (!adev->mman.buffer_funcs_enabled) {
- DRM_ERROR("Trying to clear memory with ring turned off.\n");
+ dev_err(adev->dev,
+ "Trying to clear memory with ring turned off.\n");
return -EINVAL;
}
@@ -2254,7 +2474,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
goto error;
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
- &next, true, delayed);
+ &next, true, delayed, k_job_id);
if (r)
goto error;
@@ -2294,7 +2514,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
man = ttm_manager_type(&adev->mman.bdev, mem_type);
break;
default:
- DRM_ERROR("Trying to evict invalid memory type\n");
+ dev_err(adev->dev, "Trying to evict invalid memory type\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 65ec82141a8e..0be2728aa872 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,20 +26,20 @@
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
+#include <drm/ttm/ttm_placement.h>
#include "amdgpu_vram_mgr.h"
-#include "amdgpu.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
+#define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5)
+#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
-#define AMDGPU_POISON 0xd0bed0be
-
extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
@@ -87,6 +87,7 @@ struct amdgpu_mman {
uint32_t discovery_tmr_size;
/* fw reserved memory */
struct amdgpu_bo *fw_reserved_memory;
+ struct amdgpu_bo *fw_reserved_memory_extend;
/* firmware VRAM reservation */
u64 fw_vram_usage_start_offset;
@@ -111,6 +112,23 @@ struct amdgpu_copy_mem {
unsigned long offset;
};
+#define AMDGPU_COPY_FLAGS_TMZ (1 << 0)
+#define AMDGPU_COPY_FLAGS_READ_DECOMPRESSED (1 << 1)
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESSED (1 << 2)
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_SHIFT 3
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_MASK 0x03
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_SHIFT 5
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1
+
+#define AMDGPU_COPY_FLAGS_SET(field, value) \
+ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT)
+#define AMDGPU_COPY_FLAGS_GET(value, field) \
+ (((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & AMDGPU_COPY_FLAGS_##field##_MASK)
+
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
@@ -138,35 +156,42 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
uint64_t start, uint64_t size);
int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev);
+
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res);
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
bool enable);
-
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz);
+ bool vm_needs_flush, uint32_t copy_flags);
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
const struct amdgpu_copy_mem *src,
const struct amdgpu_copy_mem *dst,
uint64_t size, bool tmz,
struct dma_resv *resv,
struct dma_fence **f);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
struct dma_fence **fence,
- bool delayed);
+ bool delayed,
+ u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
struct hmm_range **range);
void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
struct hmm_range *range);
@@ -174,7 +199,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
struct hmm_range *range);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct page **pages,
struct hmm_range **range)
{
return -EPERM;
@@ -190,7 +214,7 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
}
#endif
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range);
int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 8beefc045e14..e96f24e9ad57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -28,6 +28,13 @@
#include "amdgpu.h"
#include "amdgpu_ucode.h"
+#define AMDGPU_UCODE_NAME_MAX (128)
+
+static const struct kicker_device kicker_device_list[] = {
+ {0x744B, 0x00},
+ {0x7551, 0xC8}
+};
+
static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr)
{
DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
@@ -323,6 +330,12 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset));
DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset));
DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size));
+ } else if (version_major == 3) {
+ const struct sdma_firmware_header_v3_0 *sdma_hdr =
+ container_of(hdr, struct sdma_firmware_header_v3_0, header);
+
+ DRM_DEBUG("ucode_reversion: %u\n",
+ le32_to_cpu(sdma_hdr->ucode_feature_version));
} else {
DRM_ERROR("Unknown SDMA ucode version: %u.%u\n",
version_major, version_minor);
@@ -556,6 +569,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
default:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
+ else if (load_type == 3)
+ return AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO;
else
return AMDGPU_FW_LOAD_PSP;
}
@@ -642,6 +657,8 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "SMC";
case AMDGPU_UCODE_ID_PPTABLE:
return "PPTABLE";
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ return "P2STABLE";
case AMDGPU_UCODE_ID_UVD:
return "UVD";
case AMDGPU_UCODE_ID_UVD1:
@@ -664,20 +681,70 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "DMCUB";
case AMDGPU_UCODE_ID_CAP:
return "CAP";
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ return "VPE_CTX";
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ return "VPE_CTL";
+ case AMDGPU_UCODE_ID_VPE:
+ return "VPE";
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ return "UMSCH_MM_UCODE";
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ return "UMSCH_MM_DATA";
+ case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+ return "UMSCH_MM_CMD_BUFFER";
+ case AMDGPU_UCODE_ID_JPEG_RAM:
+ return "JPEG";
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ return "RS64_SDMA";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ return "RS64_PFP";
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ return "RS64_ME";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ return "RS64_MEC";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ return "RS64_PFP_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ return "RS64_PFP_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ return "RS64_ME_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ return "RS64_ME_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ return "RS64_MEC_P0_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ return "RS64_MEC_P1_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ return "RS64_MEC_P2_STACK";
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ return "RS64_MEC_P3_STACK";
+ case AMDGPU_UCODE_ID_ISP:
+ return "ISP";
default:
return "UNKNOWN UCODE";
}
}
+static inline int amdgpu_ucode_is_valid(uint32_t fw_version)
+{
+ if (!fw_version)
+ return -EINVAL;
+
+ return 0;
+}
+
#define FW_VERSION_ATTR(name, mode, field) \
static ssize_t show_##name(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
+ struct device_attribute *attr, char *buf) \
{ \
struct drm_device *ddev = dev_get_drvdata(dev); \
struct amdgpu_device *adev = drm_to_adev(ddev); \
\
- return sysfs_emit(buf, "0x%08x\n", adev->field); \
+ if (!buf) \
+ return amdgpu_ucode_is_valid(adev->field); \
+ \
+ return sysfs_emit(buf, "0x%08x\n", adev->field); \
} \
static DEVICE_ATTR(name, mode, show_##name, NULL)
@@ -703,8 +770,10 @@ FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+FW_VERSION_ATTR(dmcub_fw_version, 0444, dm.dmcub_fw_version);
FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK);
FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(pldm_fw_version, 0444, firmware.pldm_version);
static struct attribute *fw_attrs[] = {
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
@@ -717,14 +786,30 @@ static struct attribute *fw_attrs[] = {
&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
- &dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr,
- &dev_attr_mes_fw_version.attr, &dev_attr_mes_kiq_fw_version.attr,
+ &dev_attr_dmcu_fw_version.attr, &dev_attr_dmcub_fw_version.attr,
+ &dev_attr_imu_fw_version.attr, &dev_attr_mes_fw_version.attr,
+ &dev_attr_mes_kiq_fw_version.attr, &dev_attr_pldm_fw_version.attr,
NULL
};
+#define to_dev_attr(x) container_of(x, struct device_attribute, attr)
+
+static umode_t amdgpu_ucode_sys_visible(struct kobject *kobj,
+ struct attribute *attr, int idx)
+{
+ struct device_attribute *dev_attr = to_dev_attr(attr);
+ struct device *dev = kobj_to_dev(kobj);
+
+ if (dev_attr->show(dev, dev_attr, NULL) == -EINVAL)
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group fw_attr_group = {
.name = "fw_version",
- .attrs = fw_attrs
+ .attrs = fw_attrs,
+ .is_visible = amdgpu_ucode_sys_visible
};
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
@@ -748,7 +833,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
+ const struct sdma_firmware_header_v3_0 *sdmav3_hdr = NULL;
const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL;
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL;
u8 *ucode_addr;
if (!ucode->fw)
@@ -767,7 +855,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data;
+ sdmav3_hdr = (const struct sdma_firmware_header_v3_0 *)ucode->fw->data;
imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data;
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)ucode->fw->data;
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
switch (ucode->ucode_id) {
@@ -781,6 +872,11 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(sdma_hdr->ctl_ucode_offset);
break;
+ case AMDGPU_UCODE_ID_SDMA_RS64:
+ ucode->ucode_size = le32_to_cpu(sdmav3_hdr->ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(sdmav3_hdr->header.ucode_array_offset_bytes);
+ break;
case AMDGPU_UCODE_ID_CP_MEC1:
case AMDGPU_UCODE_ID_CP_MEC2:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
@@ -884,6 +980,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = ucode->fw->size;
ucode_addr = (u8 *)ucode->fw->data;
break;
+ case AMDGPU_UCODE_ID_P2S_TABLE:
+ ucode->ucode_size = ucode->fw->size;
+ ucode_addr = (u8 *)ucode->fw->data;
+ break;
case AMDGPU_UCODE_ID_IMU_I:
ucode->ucode_size = le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
@@ -950,6 +1050,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(cpv2_hdr->data_offset_bytes);
break;
+ case AMDGPU_UCODE_ID_VPE_CTX:
+ ucode->ucode_size = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_VPE_CTL:
+ ucode->ucode_size = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(vpe_hdr->ctl_ucode_offset);
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+ ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(umsch_mm_hdr->header.ucode_array_offset_bytes);
+ break;
+ case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+ ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+ ucode_addr = (u8 *)ucode->fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes);
+ break;
default:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
@@ -993,9 +1113,11 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
{
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
+ if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) &&
+ (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) {
amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
- amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&adev->firmware.fw_buf,
&adev->firmware.fw_buf_mc,
&adev->firmware.fw_buf_ptr);
@@ -1038,6 +1160,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
}
+ if (amdgpu_virt_xgmi_migrate_enabled(adev) && adev->firmware.fw_buf)
+ adev->firmware.fw_buf_mc = amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf);
+
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
@@ -1061,7 +1186,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int block_type)
{
if (block_type == MP0_HWIP) {
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
switch (adev->asic_type) {
case CHIP_VEGA10:
@@ -1102,6 +1227,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
case IP_VERSION(11, 0, 13):
return "beige_goby";
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
return "vangogh";
case IP_VERSION(12, 0, 1):
return "green_sardine";
@@ -1112,7 +1238,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
return "yellow_carp";
}
} else if (block_type == MP1_HWIP) {
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
@@ -1138,7 +1264,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
return "aldebaran_smc";
}
} else if (block_type == SDMA0_HWIP) {
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
return "vega10_sdma";
case IP_VERSION(4, 0, 1):
@@ -1182,7 +1308,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
return "vangogh_sdma";
}
} else if (block_type == UVD_HWIP) {
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
if (adev->apu_flags & AMD_APU_IS_RAVEN2)
@@ -1207,7 +1333,8 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 64):
case IP_VERSION(3, 0, 192):
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0))
return "sienna_cichlid_vcn";
return "navy_flounder_vcn";
case IP_VERSION(3, 0, 2):
@@ -1220,7 +1347,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
return "yellow_carp_vcn";
}
} else if (block_type == GC_HWIP) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
return "vega10";
case IP_VERSION(9, 2, 1):
@@ -1268,12 +1395,25 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
return NULL;
}
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) {
+ if (adev->pdev->device == kicker_device_list[i].device &&
+ adev->pdev->revision == kicker_device_list[i].revision)
+ return true;
+ }
+
+ return false;
+}
+
void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
{
int maj, min, rev;
char *ip_name;
const char *legacy;
- uint32_t version = adev->ip_versions[block_type][0];
+ uint32_t version = amdgpu_ip_version(adev, block_type, 0);
legacy = amdgpu_ucode_legacy_naming(adev, block_type);
if (legacy) {
@@ -1297,6 +1437,12 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type,
case UVD_HWIP:
ip_name = "vcn";
break;
+ case VPE_HWIP:
+ ip_name = "vpe";
+ break;
+ case ISP_HWIP:
+ ip_name = "isp";
+ break;
default:
BUG();
}
@@ -1313,24 +1459,49 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type,
*
* @adev: amdgpu device
* @fw: pointer to load firmware to
- * @fw_name: firmware to load
+ * @required: whether the firmware is required
+ * @fmt: firmware name format string
+ * @...: variable arguments
*
* This is a helper that will use request_firmware and amdgpu_ucode_validate
* to load and run basic validation on firmware. If the load fails, remap
* the error code to -ENODEV, so that early_init functions will fail to load.
*/
int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
- const char *fw_name)
+ enum amdgpu_ucode_required required, const char *fmt, ...)
{
- int err = request_firmware(fw, fw_name, adev->dev);
+ char fname[AMDGPU_UCODE_NAME_MAX];
+ va_list ap;
+ int r;
+
+ va_start(ap, fmt);
+ r = vsnprintf(fname, sizeof(fname), fmt, ap);
+ va_end(ap);
+ if (r == sizeof(fname)) {
+ dev_warn(adev->dev, "amdgpu firmware name buffer overflow\n");
+ return -EOVERFLOW;
+ }
- if (err)
+ if (required == AMDGPU_UCODE_REQUIRED)
+ r = request_firmware(fw, fname, adev->dev);
+ else {
+ r = firmware_request_nowarn(fw, fname, adev->dev);
+ if (r)
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname);
+ }
+ if (r)
return -ENODEV;
- err = amdgpu_ucode_validate(*fw);
- if (err)
- dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name);
- return err;
+ r = amdgpu_ucode_validate(*fw);
+ if (r)
+ /*
+ * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release()
+ * regardless of success/failure, and the amdgpu_ucode_release() takes care of
+ * firmware release and need to avoid redundant release FW operation here.
+ */
+ dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname);
+
+ return r;
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index b03321e7d2d8..6349aad6da35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -25,6 +25,8 @@
#include "amdgpu_socbb.h"
+#define RS64_FW_UC_START_ADDR_LO 0x3000
+
struct common_firmware_header {
uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
uint32_t header_size_bytes; /* size of just the header in bytes */
@@ -125,6 +127,8 @@ enum psp_fw_type {
PSP_FW_TYPE_PSP_INTF_DRV,
PSP_FW_TYPE_PSP_DBG_DRV,
PSP_FW_TYPE_PSP_RAS_DRV,
+ PSP_FW_TYPE_PSP_IPKEYMGR_DRV,
+ PSP_FW_TYPE_PSP_SPDM_DRV,
PSP_FW_TYPE_MAX_INDEX,
};
@@ -135,6 +139,14 @@ struct psp_firmware_header_v2_0 {
struct psp_fw_bin_desc psp_fw_bin[];
};
+/* version_major=2, version_minor=1 */
+struct psp_firmware_header_v2_1 {
+ struct common_firmware_header header;
+ uint32_t psp_fw_bin_count;
+ uint32_t psp_aux_fw_bin_index;
+ struct psp_fw_bin_desc psp_fw_bin[];
+};
+
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -154,6 +166,7 @@ enum ta_fw_type {
TA_FW_TYPE_PSP_DTM,
TA_FW_TYPE_PSP_RAP,
TA_FW_TYPE_PSP_SECUREDISPLAY,
+ TA_FW_TYPE_PSP_XGMI_AUX,
TA_FW_TYPE_MAX_INDEX,
};
@@ -315,6 +328,44 @@ struct sdma_firmware_header_v2_0 {
uint32_t ctl_jt_size; /* control thread size of jt */
};
+/* version_major=1, version_minor=0 */
+struct vpe_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+ uint32_t ctx_jt_offset; /* context thread jt location */
+ uint32_t ctx_jt_size; /* context thread size of jt */
+ uint32_t ctl_ucode_offset;
+ uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+ uint32_t ctl_jt_offset; /* control thread jt location */
+ uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* version_major=1, version_minor=0 */
+struct umsch_mm_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t umsch_mm_ucode_version;
+ uint32_t umsch_mm_ucode_size_bytes;
+ uint32_t umsch_mm_ucode_offset_bytes;
+ uint32_t umsch_mm_ucode_data_version;
+ uint32_t umsch_mm_ucode_data_size_bytes;
+ uint32_t umsch_mm_ucode_data_offset_bytes;
+ uint32_t umsch_mm_irq_start_addr_lo;
+ uint32_t umsch_mm_irq_start_addr_hi;
+ uint32_t umsch_mm_uc_start_addr_lo;
+ uint32_t umsch_mm_uc_start_addr_hi;
+ uint32_t umsch_mm_data_start_addr_lo;
+ uint32_t umsch_mm_data_start_addr_hi;
+};
+
+/* version_major=3, version_minor=0 */
+struct sdma_firmware_header_v3_0 {
+ struct common_firmware_header header;
+ uint32_t ucode_feature_version;
+ uint32_t ucode_offset_bytes;
+ uint32_t ucode_size_bytes;
+};
+
/* gpu info payload */
struct gpu_info_firmware_v1_0 {
uint32_t gc_num_se;
@@ -387,6 +438,7 @@ union amdgpu_firmware_header {
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
struct psp_firmware_header_v2_0 psp_v2_0;
+ struct psp_firmware_header_v2_0 psp_v2_1;
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
@@ -400,6 +452,7 @@ union amdgpu_firmware_header {
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct sdma_firmware_header_v2_0 sdma_v2_0;
+ struct sdma_firmware_header_v3_0 sdma_v3_0;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
struct dmcub_firmware_header_v1_0 dmcub;
@@ -407,7 +460,7 @@ union amdgpu_firmware_header {
uint8_t raw[0x100];
};
-#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc))
+#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2)
/*
* fw loading support
@@ -424,6 +477,7 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_SDMA7,
AMDGPU_UCODE_ID_SDMA_UCODE_TH0,
AMDGPU_UCODE_ID_SDMA_UCODE_TH1,
+ AMDGPU_UCODE_ID_SDMA_RS64,
AMDGPU_UCODE_ID_CP_CE,
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,
@@ -474,6 +528,15 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_VCN0_RAM,
AMDGPU_UCODE_ID_VCN1_RAM,
AMDGPU_UCODE_ID_DMCUB,
+ AMDGPU_UCODE_ID_VPE_CTX,
+ AMDGPU_UCODE_ID_VPE_CTL,
+ AMDGPU_UCODE_ID_VPE,
+ AMDGPU_UCODE_ID_UMSCH_MM_UCODE,
+ AMDGPU_UCODE_ID_UMSCH_MM_DATA,
+ AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+ AMDGPU_UCODE_ID_P2S_TABLE,
+ AMDGPU_UCODE_ID_JPEG_RAM,
+ AMDGPU_UCODE_ID_ISP,
AMDGPU_UCODE_ID_MAXIMUM,
};
@@ -491,6 +554,11 @@ enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
};
+enum amdgpu_ucode_required {
+ AMDGPU_UCODE_OPTIONAL,
+ AMDGPU_UCODE_REQUIRED,
+};
+
/* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@@ -534,6 +602,12 @@ struct amdgpu_firmware {
void *fw_buf_ptr;
uint64_t fw_buf_mc;
+ uint32_t pldm_version;
+};
+
+struct kicker_device{
+ unsigned short device;
+ u8 revision;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
@@ -544,8 +618,9 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
+__printf(4, 5)
int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
- const char *fw_name);
+ enum amdgpu_ucode_required required, const char *fmt, ...);
void amdgpu_ucode_release(const struct firmware **fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
@@ -562,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index db0d94ca4ffc..2e039fb778ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -21,14 +21,18 @@
*
*/
+#include <linux/sort.h>
#include "amdgpu.h"
#include "umc_v6_7.h"
+#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms
+
+#define MAX_UMC_HASH_STRING_SIZE 256
static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data, uint64_t err_addr,
uint32_t ch_inst, uint32_t umc_inst)
{
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(6, 7, 0):
umc_v6_7_convert_error_address(adev,
err_data, err_addr, ch_inst, umc_inst);
@@ -45,8 +49,12 @@ static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
{
- struct ras_err_data err_data = {0, 0, 0, NULL};
- int ret = AMDGPU_RAS_FAIL;
+ struct ras_err_data err_data;
+ int ret;
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
err_data.err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -54,40 +62,50 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
if (!err_data.err_addr) {
dev_warn(adev->dev,
"Failed to alloc memory for umc error record in MCA notifier!\n");
- return AMDGPU_RAS_FAIL;
+ ret = AMDGPU_RAS_FAIL;
+ goto out_fini_err_data;
}
+ err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query;
+
/*
* Translate UMC channel address to Physical address
*/
ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
ch_inst, umc_inst);
if (ret)
- goto out;
+ goto out_free_err_addr;
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
+ err_data.err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, NULL);
}
-out:
+out_free_err_addr:
kfree(err_data.err_addr);
+
+out_fini_err_data:
+ amdgpu_ras_error_data_fini(&err_data);
+
return ret;
}
-static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
- void *ras_error_status,
- struct amdgpu_iv_entry *entry,
- bool reset)
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ unsigned int error_query_mode;
int ret = 0;
+ unsigned long err_count;
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
+
+ mutex_lock(&con->page_retirement_lock);
ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
- if (ret == -EOPNOTSUPP) {
+ if (ret == -EOPNOTSUPP &&
+ error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
@@ -105,13 +123,16 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for "
"umc error address record!\n");
+ else
+ err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
}
- } else if (!ret) {
+ } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
+ (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
if (adev->umc.ras &&
adev->umc.ras->ecc_info_query_ras_error_count)
adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
@@ -129,6 +150,8 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for "
"umc error address record!\n");
+ else
+ err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing
* error status
@@ -138,34 +161,53 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
}
/* only uncorrectable error needs gpu reset */
- if (err_data->ue_count) {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in UMC block\n",
- err_data->ue_count);
-
+ if (err_data->ue_count || err_data->de_count) {
+ err_count = err_data->ue_count + err_data->de_count;
if ((amdgpu_bad_page_threshold != 0) &&
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
- err_data->err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count));
+ err_data->err_addr_cnt, false);
+ amdgpu_ras_save_bad_pages(adev, &err_count);
- amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev,
+ con->eeprom_control.ras_num_bad_pages);
if (con->update_channel_flag == true) {
amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
con->update_channel_flag = false;
}
}
-
- if (reset)
- amdgpu_ras_reset_gpu(adev);
}
kfree(err_data->err_addr);
+ err_data->err_addr = NULL;
+
+ mutex_unlock(&con->page_retirement_lock);
+}
+
+static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry,
+ uint32_t reset)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_umc_handle_bad_pages(adev, ras_error_status);
+
+ if ((err_data->ue_count || err_data->de_count) &&
+ (reset || amdgpu_ras_is_rma(adev))) {
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+ }
+
return AMDGPU_RAS_SUCCESS;
}
-int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
{
int ret = AMDGPU_RAS_SUCCESS;
@@ -182,21 +224,41 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
}
if (!amdgpu_sriov_vf(adev)) {
- struct ras_err_data err_data = {0, 0, 0, NULL};
- struct ras_common_if head = {
- .block = AMDGPU_RAS_BLOCK__UMC,
- };
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ struct ras_err_data err_data;
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ };
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+ ret = amdgpu_ras_error_data_init(&err_data);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
- ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+ if (ret == AMDGPU_RAS_SUCCESS && obj) {
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ obj->err_data.de_count += err_data.de_count;
+ }
- if (ret == AMDGPU_RAS_SUCCESS && obj) {
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
+ amdgpu_ras_error_data_fini(&err_data);
+ } else {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret;
+
+ ret = amdgpu_ras_put_poison_req(adev,
+ block, pasid, pasid_fn, data, reset);
+ if (!ret) {
+ atomic_inc(&con->page_retirement_req_cnt);
+ atomic_inc(&con->poison_consumption_count);
+ wake_up(&con->page_retirement_wq);
+ }
}
} else {
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
- adev->virt.ops->ras_poison_handler(adev);
+ adev->virt.ops->ras_poison_handler(adev, block);
else
dev_warn(adev->dev,
"No ras_poison_handler interface in SRIOV!\n");
@@ -205,11 +267,19 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
return ret;
}
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset)
+{
+ return amdgpu_umc_pasid_poison_handler(adev,
+ block, 0, NULL, NULL, reset);
+}
+
int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry)
{
- return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
+ return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry,
+ AMDGPU_RAS_GPU_RESET_MODE1_RESET);
}
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
@@ -250,6 +320,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
if (r)
return r;
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
if (r)
@@ -286,14 +359,20 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
uint32_t umc_inst)
{
- struct eeprom_table_record *err_rec =
- &err_data->err_addr[err_data->err_addr_cnt];
+ struct eeprom_table_record *err_rec;
+
+ if (!err_data ||
+ !err_data->err_addr ||
+ (err_data->err_addr_cnt >= err_data->err_addr_len))
+ return -EINVAL;
+
+ err_rec = &err_data->err_addr[err_data->err_addr_cnt];
err_rec->address = err_addr;
/* page frame address is saved */
@@ -305,6 +384,47 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
+
+ return 0;
+}
+
+static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func,
+ void *data)
+{
+ uint32_t umc_node_inst;
+ uint32_t node_inst;
+ uint32_t umc_inst;
+ uint32_t ch_inst;
+ int ret;
+
+ /*
+ * This loop is done based on the following -
+ * umc.active mask = mask of active umc instances across all nodes
+ * umc.umc_inst_num = maximum number of umc instancess per node
+ * umc.node_inst_num = maximum number of node instances
+ * Channel instances are not assumed to be harvested.
+ */
+ dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d",
+ adev->umc.active_mask, adev->umc.umc_inst_num);
+ for_each_set_bit(umc_node_inst, &(adev->umc.active_mask),
+ adev->umc.node_inst_num * adev->umc.umc_inst_num) {
+ node_inst = umc_node_inst / adev->umc.umc_inst_num;
+ umc_inst = umc_node_inst % adev->umc.umc_inst_num;
+ LOOP_UMC_CH_INST(ch_inst) {
+ dev_dbg(adev->dev,
+ "node_inst :%d umc_inst: %d ch_inst: %d",
+ node_inst, umc_inst, ch_inst);
+ ret = func(adev, node_inst, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev,
+ "Node %d umc %d ch %d func returns %d\n",
+ node_inst, umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
}
int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
@@ -315,6 +435,9 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
uint32_t ch_inst = 0;
int ret = 0;
+ if (adev->aid_mask)
+ return amdgpu_umc_loop_all_aid(adev, func, data);
+
if (adev->umc.node_inst_num) {
LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
ret = func(adev, node_inst, umc_inst, ch_inst, data);
@@ -337,3 +460,129 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
return 0;
}
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ if (adev->umc.ras->update_ecc_status)
+ return adev->umc.ras->update_ecc_status(adev,
+ status, ipid, addr);
+ return 0;
+}
+
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_log_info *ecc_log;
+ int ret;
+
+ ecc_log = &con->umc_ecc_log;
+
+ mutex_lock(&ecc_log->lock);
+ ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err);
+ if (!ret)
+ radix_tree_tag_set(ecc_tree,
+ ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG);
+ mutex_unlock(&ecc_log->lock);
+
+ return ret;
+}
+
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr)
+{
+ struct ta_ras_query_address_output addr_out;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ addr_out.pa.pa = pa_addr;
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL,
+ &addr_out, false);
+ else
+ return -EINVAL;
+}
+
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len)
+{
+ int i, ret;
+ struct ras_err_data err_data;
+
+ err_data.err_addr = kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n");
+ return 0;
+ }
+
+ ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ if (i >= len)
+ goto out;
+
+ pfns[i] = err_data.err_addr[i].retired_page;
+ }
+ ret = i;
+ adev->umc.err_addr_cnt = err_data.err_addr_cnt;
+
+out:
+ kfree(err_data.err_addr);
+ return ret;
+}
+
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr)
+{
+ struct ta_ras_query_address_input addr_in;
+ int ret;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch;
+ addr_in.ma.umc_inst = umc;
+ addr_in.ma.node_inst = node;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+ addr_out, dump_addr);
+ if (ret)
+ return ret;
+ } else {
+ return 0;
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
+{
+ struct ta_ras_query_address_input addr_in;
+ struct ta_ras_query_address_output addr_out;
+ int ret;
+
+ /* nps: the pa belongs to */
+ addr_in.pa.pa = pa | ((uint64_t)nps << 58);
+ addr_in.addr_type = TA_RAS_PA_TO_MCA;
+ ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx",
+ pa);
+
+ return ret;
+ }
+
+ *mca = addr_out.ma.err_addr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 43321f57f557..ec203f9e5ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -21,7 +21,7 @@
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
#include "amdgpu_ras.h"
-
+#include "amdgpu_mca.h"
/*
* (addr / 256) * 4096, the higher 26 bits in ErrorAddr
* is the index of 4KB block
@@ -32,6 +32,11 @@
* is the index of 8KB block
*/
#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+/*
+ * (addr / 256) * 32768, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_32KB_BLOCK(addr) (((addr) & ~0xffULL) << 7)
/* channel index is the index of 256B block */
#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
/* offset in 256B block */
@@ -47,6 +52,43 @@
#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+/* Page retirement tag */
+#define UMC_ECC_NEW_DETECTED_TAG 0x1
+/*
+ * a flag to indicate v2 of channel index stored in eeprom
+ *
+ * v1 (legacy way): store channel index within a umc instance in eeprom
+ * range in UMC v12: 0 ~ 7
+ * v2: store global channel index in eeprom
+ * range in UMC v12: 0 ~ 127
+ *
+ * NOTE: it's better to store it in eeprom_table_record.mem_channel,
+ * but there is only 8 bits in mem_channel, and the channel number may
+ * increase in the future, we decide to save it in
+ * eeprom_table_record.retired_page. retired_page is useless in v2,
+ * we depend on eeprom_table_record.address instead of retired_page in v2.
+ * Only 48 bits are saved on eeprom, use bit 47 here.
+ */
+#define UMC_CHANNEL_IDX_V2 BIT_ULL(47)
+
+/*
+ * save nps value to eeprom_table_record.retired_page[47:40],
+ * the channel index flag above will be retired.
+ */
+#define UMC_NPS_SHIFT 40
+#define UMC_NPS_MASK 0xffULL
+
+/* three column bits and one row bit in MCA address flip
+ * in bad page retirement
+ */
+#define RETIRE_FLIP_BITS_NUM 4
+
+struct amdgpu_umc_flip_bits {
+ uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM];
+ uint32_t flip_row_bit;
+ uint32_t r13_in_pa;
+ uint32_t bit_num;
+};
typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
uint32_t umc_inst, uint32_t ch_inst, void *data);
@@ -59,8 +101,18 @@ struct amdgpu_umc_ras {
void *ras_error_status);
void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
- /* support different eeprom table version for different asic */
- void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
+ bool (*check_ecc_err_status)(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status);
+ int (*update_ecc_status)(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+ int (*convert_ras_err_addr)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr);
+ uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page);
+ void (*get_retire_flip_bits)(struct amdgpu_device *adev);
};
struct amdgpu_umc_funcs {
@@ -91,15 +143,23 @@ struct amdgpu_umc {
/* active mask for umc node instance */
unsigned long active_mask;
+
+ struct amdgpu_umc_flip_bits flip_bits;
+
+ unsigned long err_addr_cnt;
};
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
-int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset);
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
-void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
@@ -113,4 +173,22 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
umc_func func, void *data);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err);
+
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
index 107f9bb0e24f..5b27fc41ffbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
@@ -69,12 +69,12 @@ struct amdgpu_debugfs_gprwave_data {
};
enum AMDGPU_DEBUGFS_REGS2_CMDS {
- AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0,
+ AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0,
AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
};
enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
- AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0,
+ AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0,
};
//reg2 interface
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
new file mode 100644
index 000000000000..cd707d70a0bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_umsch_mm.h"
+#include "umsch_mm_v4_0.h"
+
+MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin");
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+
+ if (amdgpu_ring_alloc(ring, ndws))
+ return -ENOMEM;
+
+ amdgpu_ring_write_multiple(ring, pkt, ndws);
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, adev->usec_timeout);
+ if (r < 1) {
+ dev_err(adev->dev, "ring umsch timeout, emitted fence %u\n",
+ ring->fence_drv.sync_seq);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void umsch_mm_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
+ else
+ WREG32(umsch->rb_wptr, ring->wptr << 2);
+}
+
+static u64 umsch_mm_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(umsch->rb_rptr);
+}
+
+static u64 umsch_mm_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32(umsch->rb_wptr);
+}
+
+static const struct amdgpu_ring_funcs umsch_v4_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_UMSCH_MM,
+ .align_mask = 0,
+ .nop = 0,
+ .support_64bit_ptrs = false,
+ .get_rptr = umsch_mm_ring_get_rptr,
+ .get_wptr = umsch_mm_ring_get_wptr,
+ .set_wptr = umsch_mm_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm);
+ struct amdgpu_ring *ring = &umsch->ring;
+
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->use_doorbell = true;
+ ring->no_scheduler = true;
+ ring->doorbell_index = (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1) + 6;
+
+ snprintf(ring->name, sizeof(ring->name), "umsch");
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const char *fw_name = NULL;
+ int r;
+
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ fw_name = "4_0_0";
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/umsch_mm_%s.bin", fw_name);
+ if (r) {
+ release_firmware(adev->umsch_mm.fw);
+ adev->umsch_mm.fw = NULL;
+ return r;
+ }
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)adev->umsch_mm.fw->data;
+
+ adev->umsch_mm.ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+ adev->umsch_mm.data_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+ adev->umsch_mm.irq_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_hi)) << 32);
+ adev->umsch_mm.uc_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_hi)) << 32);
+ adev->umsch_mm.data_start_addr =
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ struct amdgpu_firmware_info *info;
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE];
+ info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE;
+ info->fw = adev->umsch_mm.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA];
+ info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA;
+ info->fw = adev->umsch_mm.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE);
+ }
+
+ return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ int r;
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+ adev->umsch_mm.fw->data;
+
+ fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_offset_bytes));
+ fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 4 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create umsch_mm fw ucode bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->umsch_mm.ucode_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->umsch_mm.ucode_fw_obj);
+ amdgpu_bo_unreserve(adev->umsch_mm.ucode_fw_obj);
+ return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch)
+{
+ const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+ struct amdgpu_device *adev = umsch->ring.adev;
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ int r;
+
+ umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+ adev->umsch_mm.fw->data;
+
+ fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+ le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create umsch_mm fw data bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->umsch_mm.data_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->umsch_mm.data_fw_obj);
+ amdgpu_bo_unreserve(adev->umsch_mm.data_fw_obj);
+ return 0;
+}
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+ .mc_addr = adev->umsch_mm.cmd_buf_gpu_addr,
+ .ucode_size = ((uintptr_t)adev->umsch_mm.cmd_buf_curr_ptr -
+ (uintptr_t)adev->umsch_mm.cmd_buf_ptr),
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+static void umsch_mm_agdb_index_init(struct amdgpu_device *adev)
+{
+ uint32_t umsch_mm_agdb_start;
+ int i;
+
+ umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1;
+ umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024);
+ umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1);
+
+ for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++)
+ adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i;
+}
+
+static int umsch_mm_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ adev->umsch_mm.vmid_mask_mm_vpe = 0xf00;
+ adev->umsch_mm.engine_mask = (1 << UMSCH_SWIP_ENGINE_TYPE_VPE);
+ adev->umsch_mm.vpe_hqd_mask = 0xfe;
+
+ r = amdgpu_device_wb_get(adev, &adev->umsch_mm.wb_index);
+ if (r) {
+ dev_err(adev->dev, "failed to alloc wb for umsch: %d\n", r);
+ return r;
+ }
+
+ adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr +
+ (adev->umsch_mm.wb_index * 4);
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->umsch_mm.cmd_buf_obj,
+ &adev->umsch_mm.cmd_buf_gpu_addr,
+ (void **)&adev->umsch_mm.cmd_buf_ptr);
+ if (r) {
+ dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n", r);
+ amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+ return r;
+ }
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_UMSCHFW_LOG_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->umsch_mm.dbglog_bo,
+ &adev->umsch_mm.log_gpu_addr,
+ &adev->umsch_mm.log_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate umsch debug bo\n", r);
+ return r;
+ }
+
+ mutex_init(&adev->umsch_mm.mutex_hidden);
+
+ umsch_mm_agdb_index_init(adev);
+
+ return 0;
+}
+
+
+static int umsch_mm_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
+ umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ adev->umsch_mm.ring.funcs = &umsch_v4_0_ring_funcs;
+ umsch_mm_set_regs(&adev->umsch_mm);
+
+ return 0;
+}
+
+static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend)
+ return 0;
+
+ return 0;
+}
+
+static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = umsch_mm_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_umsch_fwlog_init(&adev->umsch_mm);
+ r = umsch_mm_ring_init(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ r = umsch_mm_init_microcode(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int umsch_mm_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ release_firmware(adev->umsch_mm.fw);
+ adev->umsch_mm.fw = NULL;
+
+ amdgpu_ring_fini(&adev->umsch_mm.ring);
+
+ mutex_destroy(&adev->umsch_mm.mutex_hidden);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.cmd_buf_obj,
+ &adev->umsch_mm.cmd_buf_gpu_addr,
+ (void **)&adev->umsch_mm.cmd_buf_ptr);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.dbglog_bo,
+ &adev->umsch_mm.log_gpu_addr,
+ (void **)&adev->umsch_mm.log_cpu_addr);
+
+ amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+
+ return 0;
+}
+
+static int umsch_mm_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = umsch_mm_load_microcode(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ umsch_mm_ring_start(&adev->umsch_mm);
+
+ r = umsch_mm_set_hw_resources(&adev->umsch_mm);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int umsch_mm_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ umsch_mm_ring_stop(&adev->umsch_mm);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ return 0;
+}
+
+static int umsch_mm_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return umsch_mm_hw_fini(ip_block);
+}
+
+static int umsch_mm_resume(struct amdgpu_ip_block *ip_block)
+{
+ return umsch_mm_hw_init(ip_block);
+}
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+ void *fw_log_cpu_addr = umsch_mm->log_cpu_addr;
+ volatile struct amdgpu_umsch_fwlog *log_buf = fw_log_cpu_addr;
+
+ log_buf->header_size = sizeof(struct amdgpu_umsch_fwlog);
+ log_buf->buffer_size = AMDGPU_UMSCHFW_LOG_SIZE;
+ log_buf->rptr = log_buf->header_size;
+ log_buf->wptr = log_buf->header_size;
+ log_buf->wrapped = 0;
+#endif
+}
+
+/*
+ * debugfs for mapping umsch firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_umsch_fwlog_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_umsch_mm *umsch_mm;
+ void *log_buf;
+ volatile struct amdgpu_umsch_fwlog *plog;
+ unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+ unsigned int read_num[2] = {0};
+
+ umsch_mm = file_inode(f)->i_private;
+ if (!umsch_mm)
+ return -ENODEV;
+
+ if (!umsch_mm->log_cpu_addr)
+ return -EFAULT;
+
+ log_buf = umsch_mm->log_cpu_addr;
+
+ plog = (volatile struct amdgpu_umsch_fwlog *)log_buf;
+ read_pos = plog->rptr;
+ write_pos = plog->wptr;
+
+ if (read_pos > AMDGPU_UMSCHFW_LOG_SIZE || write_pos > AMDGPU_UMSCHFW_LOG_SIZE)
+ return -EFAULT;
+
+ if (!size || (read_pos == write_pos))
+ return 0;
+
+ if (write_pos > read_pos) {
+ available = write_pos - read_pos;
+ read_num[0] = min_t(size_t, size, available);
+ } else {
+ read_num[0] = AMDGPU_UMSCHFW_LOG_SIZE - read_pos;
+ available = read_num[0] + write_pos - plog->header_size;
+ if (size > available)
+ read_num[1] = write_pos - plog->header_size;
+ else if (size > read_num[0])
+ read_num[1] = size - read_num[0];
+ else
+ read_num[0] = size;
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (read_num[i]) {
+ if (read_pos == AMDGPU_UMSCHFW_LOG_SIZE)
+ read_pos = plog->header_size;
+ if (read_num[i] == copy_to_user((buf + read_bytes),
+ (log_buf + read_pos), read_num[i]))
+ return -EFAULT;
+
+ read_bytes += read_num[i];
+ read_pos += read_num[i];
+ }
+ }
+
+ plog->rptr = read_pos;
+ *pos += read_bytes;
+ return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_umschfwlog_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_umsch_fwlog_read,
+ .llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+ struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ sprintf(name, "amdgpu_umsch_fwlog");
+ debugfs_create_file_size(name, S_IFREG | 0444, root, umsch_mm,
+ &amdgpu_debugfs_umschfwlog_fops,
+ AMDGPU_UMSCHFW_LOG_SIZE);
+#endif
+}
+
+static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = {
+ .name = "umsch_mm_v4_0",
+ .early_init = umsch_mm_early_init,
+ .late_init = umsch_mm_late_init,
+ .sw_init = umsch_mm_sw_init,
+ .sw_fini = umsch_mm_sw_fini,
+ .hw_init = umsch_mm_hw_init,
+ .hw_fini = umsch_mm_hw_fini,
+ .suspend = umsch_mm_suspend,
+ .resume = umsch_mm_resume,
+};
+
+const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_UMSCH_MM,
+ .major = 4,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &umsch_mm_v4_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
new file mode 100644
index 000000000000..2c771a753778
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_UMSCH_MM_H__
+#define __AMDGPU_UMSCH_MM_H__
+
+enum UMSCH_SWIP_ENGINE_TYPE {
+ UMSCH_SWIP_ENGINE_TYPE_VCN0 = 0,
+ UMSCH_SWIP_ENGINE_TYPE_VCN1 = 1,
+ UMSCH_SWIP_ENGINE_TYPE_VCN = 2,
+ UMSCH_SWIP_ENGINE_TYPE_VPE = 3,
+ UMSCH_SWIP_ENGINE_TYPE_MAX
+};
+
+enum UMSCH_CONTEXT_PRIORITY_LEVEL {
+ CONTEXT_PRIORITY_LEVEL_IDLE = 0,
+ CONTEXT_PRIORITY_LEVEL_NORMAL = 1,
+ CONTEXT_PRIORITY_LEVEL_FOCUS = 2,
+ CONTEXT_PRIORITY_LEVEL_REALTIME = 3,
+ CONTEXT_PRIORITY_NUM_LEVELS
+};
+
+struct umsch_mm_set_resource_input {
+ uint32_t vmid_mask_mm_vcn;
+ uint32_t vmid_mask_mm_vpe;
+ uint32_t collaboration_mask_vpe;
+ uint32_t logging_vmid;
+ uint32_t engine_mask;
+ union {
+ struct {
+ uint32_t disable_reset : 1;
+ uint32_t disable_umsch_mm_log : 1;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 29;
+ };
+ uint32_t uint32_all;
+ };
+};
+
+struct amdgpu_umsch_fwlog {
+ uint32_t rptr;
+ uint32_t wptr;
+ uint32_t buffer_size;
+ uint32_t header_size;
+ uint32_t wrapped;
+};
+
+struct umsch_mm_add_queue_input {
+ uint32_t process_id;
+ uint64_t page_table_base_addr;
+ uint64_t process_va_start;
+ uint64_t process_va_end;
+ uint64_t process_quantum;
+ uint64_t process_csa_addr;
+ uint64_t context_quantum;
+ uint64_t context_csa_addr;
+ uint32_t inprocess_context_priority;
+ enum UMSCH_CONTEXT_PRIORITY_LEVEL context_global_priority_level;
+ uint32_t doorbell_offset_0;
+ uint32_t doorbell_offset_1;
+ enum UMSCH_SWIP_ENGINE_TYPE engine_type;
+ uint32_t affinity;
+ uint64_t mqd_addr;
+ uint64_t h_context;
+ uint64_t h_queue;
+ uint32_t vm_context_cntl;
+
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
+
+ struct {
+ uint32_t is_context_suspended : 1;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
+ };
+};
+
+struct umsch_mm_remove_queue_input {
+ uint32_t doorbell_offset_0;
+ uint32_t doorbell_offset_1;
+ uint64_t context_csa_addr;
+ uint32_t context_csa_array_index;
+};
+
+struct MQD_INFO {
+ uint32_t rb_base_hi;
+ uint32_t rb_base_lo;
+ uint32_t rb_size;
+ uint32_t wptr_val;
+ uint32_t rptr_val;
+ uint32_t unmapped;
+ uint32_t vmid;
+};
+
+struct amdgpu_umsch_mm;
+
+struct umsch_mm_funcs {
+ int (*set_hw_resources)(struct amdgpu_umsch_mm *umsch);
+ int (*add_queue)(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_add_queue_input *input);
+ int (*remove_queue)(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_remove_queue_input *input);
+ int (*set_regs)(struct amdgpu_umsch_mm *umsch);
+ int (*init_microcode)(struct amdgpu_umsch_mm *umsch);
+ int (*load_microcode)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_init)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_start)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_stop)(struct amdgpu_umsch_mm *umsch);
+ int (*ring_fini)(struct amdgpu_umsch_mm *umsch);
+};
+
+struct amdgpu_umsch_mm {
+ struct amdgpu_ring ring;
+
+ uint32_t rb_wptr;
+ uint32_t rb_rptr;
+
+ const struct umsch_mm_funcs *funcs;
+
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_bo *ucode_fw_obj;
+ uint64_t ucode_fw_gpu_addr;
+ uint32_t *ucode_fw_ptr;
+ uint64_t irq_start_addr;
+ uint64_t uc_start_addr;
+ uint32_t ucode_size;
+
+ struct amdgpu_bo *data_fw_obj;
+ uint64_t data_fw_gpu_addr;
+ uint32_t *data_fw_ptr;
+ uint64_t data_start_addr;
+ uint32_t data_size;
+
+ struct amdgpu_bo *cmd_buf_obj;
+ uint64_t cmd_buf_gpu_addr;
+ uint32_t *cmd_buf_ptr;
+ uint32_t *cmd_buf_curr_ptr;
+
+ uint32_t wb_index;
+ uint64_t sch_ctx_gpu_addr;
+ uint32_t *sch_ctx_cpu_addr;
+
+ uint32_t vmid_mask_mm_vcn;
+ uint32_t vmid_mask_mm_vpe;
+ uint32_t engine_mask;
+ uint32_t vcn0_hqd_mask;
+ uint32_t vcn1_hqd_mask;
+ uint32_t vcn_hqd_mask[2];
+ uint32_t vpe_hqd_mask;
+ uint32_t agdb_index[CONTEXT_PRIORITY_NUM_LEVELS];
+
+ struct mutex mutex_hidden;
+ struct amdgpu_bo *dbglog_bo;
+ void *log_cpu_addr;
+ uint64_t log_gpu_addr;
+ uint32_t mem_size;
+ uint32_t log_offset;
+};
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws);
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+ struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm);
+
+#define WREG32_SOC15_UMSCH(reg, value) \
+ do { \
+ uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg; \
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { \
+ *adev->umsch_mm.cmd_buf_curr_ptr++ = (reg_offset << 2); \
+ *adev->umsch_mm.cmd_buf_curr_ptr++ = value; \
+ } else { \
+ WREG32(reg_offset, value); \
+ } \
+ } while (0)
+
+#define umsch_mm_set_hw_resources(umsch) \
+ ((umsch)->funcs->set_hw_resources ? (umsch)->funcs->set_hw_resources((umsch)) : 0)
+#define umsch_mm_add_queue(umsch, input) \
+ ((umsch)->funcs->add_queue ? (umsch)->funcs->add_queue((umsch), (input)) : 0)
+#define umsch_mm_remove_queue(umsch, input) \
+ ((umsch)->funcs->remove_queue ? (umsch)->funcs->remove_queue((umsch), (input)) : 0)
+
+#define umsch_mm_set_regs(umsch) \
+ ((umsch)->funcs->set_regs ? (umsch)->funcs->set_regs((umsch)) : 0)
+#define umsch_mm_init_microcode(umsch) \
+ ((umsch)->funcs->init_microcode ? (umsch)->funcs->init_microcode((umsch)) : 0)
+#define umsch_mm_load_microcode(umsch) \
+ ((umsch)->funcs->load_microcode ? (umsch)->funcs->load_microcode((umsch)) : 0)
+
+#define umsch_mm_ring_init(umsch) \
+ ((umsch)->funcs->ring_init ? (umsch)->funcs->ring_init((umsch)) : 0)
+#define umsch_mm_ring_start(umsch) \
+ ((umsch)->funcs->ring_start ? (umsch)->funcs->ring_start((umsch)) : 0)
+#define umsch_mm_ring_stop(umsch) \
+ ((umsch)->funcs->ring_stop ? (umsch)->funcs->ring_stop((umsch)) : 0)
+#define umsch_mm_ring_fini(umsch) \
+ ((umsch)->funcs->ring_fini ? (umsch)->funcs->ring_fini((umsch)) : 0)
+
+static inline void amdgpu_umsch_mm_lock(struct amdgpu_umsch_mm *umsch)
+{
+ mutex_lock(&umsch->mutex_hidden);
+}
+
+static inline void amdgpu_umsch_mm_unlock(struct amdgpu_umsch_mm *umsch)
+{
+ mutex_unlock(&umsch->mutex_hidden);
+}
+
+extern const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
new file mode 100644
index 000000000000..1add21160d21
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -0,0 +1,1090 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_exec.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
+{
+ int i;
+ u32 userq_ip_mask = 0;
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+ if (adev->userq_funcs[i])
+ userq_ip_mask |= (1 << i);
+ }
+
+ return userq_ip_mask;
+}
+
+int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
+ u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *va_map;
+ u64 user_addr;
+ u64 size;
+ int r = 0;
+
+ user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!va_map) {
+ r = -EINVAL;
+ goto out_err;
+ }
+ /* Only validate the userq whether resident in the VM mapping range */
+ if (user_addr >= va_map->start &&
+ va_map->last - user_addr + 1 >= size) {
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+ }
+
+ r = -EINVAL;
+out_err:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return r;
+}
+
+static int
+amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ r = userq_funcs->preempt(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+ r = userq_funcs->restore(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
+ (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+ r = userq_funcs->unmap(uq_mgr, queue);
+ if (r)
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ else
+ queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+ }
+ return r;
+}
+
+static int
+amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+ r = userq_funcs->map(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+ return r;
+}
+
+static void
+amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct dma_fence *f = queue->last_fence;
+ int ret;
+
+ if (f && !dma_fence_is_signaled(f)) {
+ ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+ if (ret <= 0)
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ }
+}
+
+static void
+amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ int queue_id)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ amdgpu_userq_fence_driver_free(queue);
+ idr_remove(&uq_mgr->userq_idr, queue_id);
+ kfree(queue);
+}
+
+static struct amdgpu_usermode_queue *
+amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
+{
+ return idr_find(&uq_mgr->userq_idr, qid);
+}
+
+void
+amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+retry:
+ /* Flush any pending resume work to create ev_fence */
+ flush_delayed_work(&uq_mgr->resume_work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+ mutex_unlock(&uq_mgr->userq_mutex);
+ /*
+ * Looks like there was no pending resume work,
+ * add one now to create a valid eviction fence
+ */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+ goto retry;
+ }
+}
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ bp.type = ttm_bo_type_kernel;
+ bp.size = size;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
+ return r;
+ }
+
+ r = amdgpu_bo_reserve(userq_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
+ goto free_obj;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
+ goto unresv;
+ }
+
+ r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
+ goto unresv;
+ }
+
+ userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
+ amdgpu_bo_unreserve(userq_obj->obj);
+ memset(userq_obj->cpu_ptr, 0, size);
+ return 0;
+
+unresv:
+ amdgpu_bo_unreserve(userq_obj->obj);
+
+free_obj:
+ amdgpu_bo_unref(&userq_obj->obj);
+ return r;
+}
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj)
+{
+ amdgpu_bo_kunmap(userq_obj->obj);
+ amdgpu_bo_unref(&userq_obj->obj);
+}
+
+uint64_t
+amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp)
+{
+ uint64_t index;
+ struct drm_gem_object *gobj;
+ struct amdgpu_userq_obj *db_obj = db_info->db_obj;
+ int r, db_size;
+
+ gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
+ if (gobj == NULL) {
+ drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
+ return -EINVAL;
+ }
+
+ db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ drm_gem_object_put(gobj);
+
+ r = amdgpu_bo_reserve(db_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unref_bo;
+ }
+
+ /* Pin the BO before generating the index, unpin in queue destroy */
+ r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unresv_bo;
+ }
+
+ switch (db_info->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_HW_IP_COMPUTE:
+ case AMDGPU_HW_IP_DMA:
+ db_size = sizeof(u64);
+ break;
+
+ case AMDGPU_HW_IP_VCN_ENC:
+ db_size = sizeof(u32);
+ db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
+ break;
+
+ case AMDGPU_HW_IP_VPE:
+ db_size = sizeof(u32);
+ db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1;
+ break;
+
+ default:
+ drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
+ db_info->queue_type);
+ r = -EINVAL;
+ goto unpin_bo;
+ }
+
+ index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+ db_info->doorbell_offset, db_size);
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev),
+ "[Usermode queues] doorbell index=%lld\n", index);
+ amdgpu_bo_unreserve(db_obj->obj);
+ return index;
+
+unpin_bo:
+ amdgpu_bo_unpin(db_obj->obj);
+unresv_bo:
+ amdgpu_bo_unreserve(db_obj->obj);
+unref_bo:
+ amdgpu_bo_unref(&db_obj->obj);
+ return r;
+}
+
+static int
+amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_usermode_queue *queue;
+ int r = 0;
+
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ queue = amdgpu_userq_find(uq_mgr, queue_id);
+ if (!queue) {
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
+ mutex_unlock(&uq_mgr->userq_mutex);
+ return -EINVAL;
+ }
+ amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
+ r = amdgpu_bo_reserve(queue->db_obj.obj, true);
+ if (!r) {
+ amdgpu_bo_unpin(queue->db_obj.obj);
+ amdgpu_bo_unreserve(queue->db_obj.obj);
+ }
+ amdgpu_bo_unref(&queue->db_obj.obj);
+
+#if defined(CONFIG_DEBUG_FS)
+ debugfs_remove_recursive(queue->debugfs_queue);
+#endif
+ r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ /*TODO: It requires a reset for userq hw unmap error*/
+ if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
+ drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ }
+ amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
+ mutex_unlock(&uq_mgr->userq_mutex);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static int amdgpu_userq_priority_permit(struct drm_file *filp,
+ int priority)
+{
+ if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
+ return 0;
+
+ if (capable(CAP_SYS_NICE))
+ return 0;
+
+ if (drm_is_current_master(filp))
+ return 0;
+
+ return -EACCES;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
+{
+ struct amdgpu_usermode_queue *queue = m->private;
+ struct amdgpu_bo *bo;
+ int r;
+
+ if (!queue || !queue->mqd.obj)
+ return -EINVAL;
+
+ bo = amdgpu_bo_ref(queue->mqd.obj);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ return -EINVAL;
+ }
+
+ seq_printf(m, "queue_type: %d\n", queue->queue_type);
+ seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
+
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+}
+
+static int amdgpu_mqd_info_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, amdgpu_mqd_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_mqd_info_fops = {
+ .owner = THIS_MODULE,
+ .open = amdgpu_mqd_info_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+static int
+amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_db_info db_info;
+ char *queue_name;
+ bool skip_map_queue;
+ uint64_t index;
+ int qid, r = 0;
+ int priority =
+ (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+
+ r = amdgpu_userq_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ /*
+ * There could be a situation that we are creating a new queue while
+ * the other queues under this UQ_mgr are suspended. So if there is any
+ * resume work pending, wait for it to get done.
+ *
+ * This will also make sure we have a valid eviction fence ready to be used.
+ */
+ mutex_lock(&adev->userq_mutex);
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ uq_funcs = adev->userq_funcs[args->in.ip_type];
+ if (!uq_funcs) {
+ drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
+ args->in.ip_type);
+ r = -EINVAL;
+ goto unlock;
+ }
+
+ queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+ if (!queue) {
+ drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ r = -EINVAL;
+ kfree(queue);
+ goto unlock;
+ }
+ queue->doorbell_handle = args->in.doorbell_handle;
+ queue->queue_type = args->in.ip_type;
+ queue->vm = &fpriv->vm;
+ queue->priority = priority;
+
+ db_info.queue_type = queue->queue_type;
+ db_info.doorbell_handle = queue->doorbell_handle;
+ db_info.db_obj = &queue->db_obj;
+ db_info.doorbell_offset = args->in.doorbell_offset;
+
+ /* Convert relative doorbell offset into absolute doorbell index */
+ index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
+ if (index == (uint64_t)-EINVAL) {
+ drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
+ kfree(queue);
+ r = -EINVAL;
+ goto unlock;
+ }
+
+ queue->doorbell_index = index;
+ xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ r = amdgpu_userq_fence_driver_alloc(adev, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
+ goto unlock;
+ }
+
+ r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to create Queue\n");
+ amdgpu_userq_fence_driver_free(queue);
+ kfree(queue);
+ goto unlock;
+ }
+
+
+ qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
+ if (qid < 0) {
+ drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+ /* don't map the queue if scheduling is halted */
+ if (adev->userq_halt_for_enforce_isolation &&
+ ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
+ skip_map_queue = true;
+ else
+ skip_map_queue = false;
+ if (!skip_map_queue) {
+ r = amdgpu_userq_map_helper(uq_mgr, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+ idr_remove(&uq_mgr->userq_idr, qid);
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ goto unlock;
+ }
+ }
+
+ queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid);
+ if (!queue_name) {
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+#if defined(CONFIG_DEBUG_FS)
+ /* Queue dentry per client to hold MQD information */
+ queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client);
+ debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops);
+#endif
+ kfree(queue_name);
+
+ args->out.queue_id = qid;
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+ mutex_unlock(&adev->userq_mutex);
+
+ return r;
+}
+
+static int amdgpu_userq_input_args_validate(struct drm_device *dev,
+ union drm_amdgpu_userq *args,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
+ return -EINVAL;
+ /* Usermode queues are only supported for GFX IP as of now */
+ if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+ args->in.ip_type != AMDGPU_HW_IP_DMA &&
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
+ args->in.ip_type);
+ return -EINVAL;
+ }
+
+ if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+ (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+ (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+ !amdgpu_is_tmz(adev)) {
+ drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
+ return -EINVAL;
+ }
+
+ if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
+ args->in.queue_va == 0 ||
+ args->in.queue_size == 0) {
+ drm_file_err(filp, "invalidate userq queue va or size\n");
+ return -EINVAL;
+ }
+ if (!args->in.wptr_va || !args->in.rptr_va) {
+ drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
+ return -EINVAL;
+ }
+ break;
+ case AMDGPU_USERQ_OP_FREE:
+ if (args->in.ip_type ||
+ args->in.doorbell_handle ||
+ args->in.doorbell_offset ||
+ args->in.flags ||
+ args->in.queue_va ||
+ args->in.queue_size ||
+ args->in.rptr_va ||
+ args->in.wptr_va ||
+ args->in.mqd ||
+ args->in.mqd_size)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_userq *args = data;
+ int r;
+
+ if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
+ return -EINVAL;
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ r = amdgpu_userq_create(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to create usermode queue\n");
+ break;
+
+ case AMDGPU_USERQ_OP_FREE:
+ r = amdgpu_userq_destroy(filp, args->in.queue_id);
+ if (r)
+ drm_file_err(filp, "Failed to destroy usermode queue\n");
+ break;
+
+ default:
+ drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ int queue_id;
+ int ret = 0, r;
+
+ /* Resume all the queues for this process */
+ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ r = amdgpu_userq_restore_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
+ return ret;
+}
+
+static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/* Handle all BOs on the invalidated list, validate them and update the PTs */
+static int
+amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
+ struct amdgpu_vm *vm)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated,
+ struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_va->base.bo;
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
+ if (unlikely(ret))
+ return ret;
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ return ret;
+
+ /* This moves the bo_va to the done list */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret)
+ return ret;
+
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
+
+ return 0;
+}
+
+/* Make sure the whole VM is ready to be used */
+static int
+amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_va *bo_va;
+ struct drm_exec exec;
+ int ret;
+
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ ret = amdgpu_vm_lock_pd(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ ret = amdgpu_vm_lock_done_list(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ /* This validates PDs, PTs and per VM BOs */
+ ret = amdgpu_vm_validate(adev, vm, NULL,
+ amdgpu_userq_validate_vm,
+ NULL);
+ if (unlikely(ret))
+ goto unlock_all;
+
+ /* This locks and validates the remaining evicted BOs */
+ ret = amdgpu_userq_bo_validate(adev, &exec, vm);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+ }
+
+ ret = amdgpu_vm_handle_moved(adev, vm, NULL);
+ if (ret)
+ goto unlock_all;
+
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
+ if (ret)
+ goto unlock_all;
+
+ /*
+ * We need to wait for all VM updates to finish before restarting the
+ * queues. Using the done list like that is now ok since everything is
+ * locked in place.
+ */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status)
+ dma_fence_wait(bo_va->last_pt_update, false);
+ dma_fence_wait(vm->last_update, false);
+
+ ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
+
+unlock_all:
+ drm_exec_fini(&exec);
+ return ret;
+}
+
+static void amdgpu_userq_restore_worker(struct work_struct *work)
+{
+ struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ int ret;
+
+ flush_delayed_work(&fpriv->evf_mgr.suspend_work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ ret = amdgpu_userq_vm_validate(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
+ goto unlock;
+ }
+
+ ret = amdgpu_userq_restore_all(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
+ goto unlock;
+ }
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static int
+amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ int queue_id;
+ int ret = 0, r;
+
+ /* Try to unmap all the queues in this process ctx */
+ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ r = amdgpu_userq_preempt_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
+ return ret;
+}
+
+static int
+amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ int queue_id, ret;
+
+ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ struct dma_fence *f = queue->last_fence;
+
+ if (!f || dma_fence_is_signaled(f))
+ continue;
+ ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+ if (ret <= 0) {
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ return -ETIMEDOUT;
+ }
+ }
+
+ return 0;
+}
+
+void
+amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ int ret;
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+
+ /* Wait for any pending userqueue fence work to finish */
+ ret = amdgpu_userq_wait_for_signal(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n");
+ return;
+ }
+
+ ret = amdgpu_userq_evict_all(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
+ return;
+ }
+
+ /* Signal current eviction fence */
+ amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
+
+ if (evf_mgr->fd_closing) {
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ return;
+ }
+
+ /* Schedule a resume work */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+}
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev)
+{
+ mutex_init(&userq_mgr->userq_mutex);
+ idr_init_base(&userq_mgr->userq_idr, 1);
+ userq_mgr->adev = adev;
+ userq_mgr->file = file_priv;
+
+ mutex_lock(&adev->userq_mutex);
+ list_add(&userq_mgr->list, &adev->userq_mgr_list);
+ mutex_unlock(&adev->userq_mutex);
+
+ INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+ return 0;
+}
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
+{
+ struct amdgpu_device *adev = userq_mgr->adev;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ uint32_t queue_id;
+
+ cancel_delayed_work_sync(&userq_mgr->resume_work);
+
+ mutex_lock(&adev->userq_mutex);
+ mutex_lock(&userq_mgr->userq_mutex);
+ idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
+ amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
+ amdgpu_userq_unmap_helper(userq_mgr, queue);
+ amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
+ }
+
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ if (uqm == userq_mgr) {
+ list_del(&uqm->list);
+ break;
+ }
+ }
+ idr_destroy(&userq_mgr->userq_idr);
+ mutex_unlock(&userq_mgr->userq_mutex);
+ mutex_unlock(&adev->userq_mutex);
+ mutex_destroy(&userq_mgr->userq_mutex);
+}
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ if (!ip_mask)
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ cancel_delayed_work_sync(&uqm->resume_work);
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ if (adev->in_s0ix)
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ else
+ r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
+
+int amdgpu_userq_resume(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ if (!ip_mask)
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ if (adev->in_s0ix)
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ else
+ r = amdgpu_userq_map_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ if (adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already stopped!\n");
+ adev->userq_halt_for_enforce_isolation = true;
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ cancel_delayed_work_sync(&uqm->resume_work);
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
+
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ if (!adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already started!\n");
+ adev->userq_halt_for_enforce_isolation = false;
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
new file mode 100644
index 000000000000..c027dd916672
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERQ_H_
+#define AMDGPU_USERQ_H_
+#include "amdgpu_eviction_fence.h"
+
+#define AMDGPU_MAX_USERQ_COUNT 512
+
+#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
+#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
+
+enum amdgpu_userq_state {
+ AMDGPU_USERQ_STATE_UNMAPPED = 0,
+ AMDGPU_USERQ_STATE_MAPPED,
+ AMDGPU_USERQ_STATE_PREEMPTED,
+ AMDGPU_USERQ_STATE_HUNG,
+};
+
+struct amdgpu_mqd_prop;
+
+struct amdgpu_userq_obj {
+ void *cpu_ptr;
+ uint64_t gpu_addr;
+ struct amdgpu_bo *obj;
+};
+
+struct amdgpu_usermode_queue {
+ int queue_type;
+ enum amdgpu_userq_state state;
+ uint64_t doorbell_handle;
+ uint64_t doorbell_index;
+ uint64_t flags;
+ struct amdgpu_mqd_prop *userq_prop;
+ struct amdgpu_userq_mgr *userq_mgr;
+ struct amdgpu_vm *vm;
+ struct amdgpu_userq_obj mqd;
+ struct amdgpu_userq_obj db_obj;
+ struct amdgpu_userq_obj fw_obj;
+ struct amdgpu_userq_obj wptr_obj;
+ struct xarray fence_drv_xa;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *last_fence;
+ u32 xcp_id;
+ int priority;
+ struct dentry *debugfs_queue;
+};
+
+struct amdgpu_userq_funcs {
+ int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args,
+ struct amdgpu_usermode_queue *queue);
+ void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *uq);
+ int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*map)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*preempt)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*restore)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*detect_and_reset)(struct amdgpu_device *adev,
+ int queue_type);
+};
+
+/* Usermode queues for gfx */
+struct amdgpu_userq_mgr {
+ struct idr userq_idr;
+ struct mutex userq_mutex;
+ struct amdgpu_device *adev;
+ struct delayed_work resume_work;
+ struct list_head list;
+ struct drm_file *file;
+};
+
+struct amdgpu_db_info {
+ uint64_t doorbell_handle;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ struct amdgpu_userq_obj *db_obj;
+};
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev);
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size);
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj);
+
+void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp);
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev);
+int amdgpu_userq_resume(struct amdgpu_device *adev);
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+
+int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
+ u64 expected_size);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..761bad98da3e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,1009 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+ amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+ sizeof(struct amdgpu_userq_fence),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!amdgpu_userq_fence_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+ if (!f || f->ops != &amdgpu_userq_fence_ops)
+ return NULL;
+
+ return container_of(f, struct amdgpu_userq_fence, base);
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+static void
+amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv,
+ u64 seq)
+{
+ if (fence_drv->cpu_addr)
+ *fence_drv->cpu_addr = cpu_to_le64(seq);
+}
+
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long flags;
+ int r;
+
+ fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
+ if (!fence_drv)
+ return -ENOMEM;
+
+ /* Acquire seq64 memory */
+ r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
+ &fence_drv->cpu_addr);
+ if (r)
+ goto free_fence_drv;
+
+ memset(fence_drv->cpu_addr, 0, sizeof(u64));
+
+ kref_init(&fence_drv->refcount);
+ INIT_LIST_HEAD(&fence_drv->fences);
+ spin_lock_init(&fence_drv->fence_list_lock);
+
+ fence_drv->adev = adev;
+ fence_drv->context = dma_fence_context_alloc(1);
+ get_task_comm(fence_drv->timeline_name, current);
+
+ xa_lock_irqsave(&adev->userq_xa, flags);
+ r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
+ fence_drv, GFP_KERNEL));
+ xa_unlock_irqrestore(&adev->userq_xa, flags);
+ if (r)
+ goto free_seq64;
+
+ userq->fence_drv = fence_drv;
+
+ return 0;
+
+free_seq64:
+ amdgpu_seq64_free(adev, fence_drv->va);
+free_fence_drv:
+ kfree(fence_drv);
+
+ return r;
+}
+
+static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long index;
+
+ if (xa_empty(xa))
+ return;
+
+ xa_lock(xa);
+ xa_for_each(xa, index, fence_drv) {
+ __xa_erase(xa, index);
+ amdgpu_userq_fence_driver_put(fence_drv);
+ }
+
+ xa_unlock(xa);
+}
+
+void
+amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
+{
+ amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
+ xa_destroy(&userq->fence_drv_xa);
+ /* Drop the fence_drv reference held by user queue */
+ amdgpu_userq_fence_driver_put(userq->fence_drv);
+}
+
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ struct amdgpu_userq_fence *userq_fence, *tmp;
+ struct dma_fence *fence;
+ u64 rptr;
+ int i;
+
+ if (!fence_drv)
+ return;
+
+ rptr = amdgpu_userq_fence_read(fence_drv);
+
+ spin_lock(&fence_drv->fence_list_lock);
+ list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+ fence = &userq_fence->base;
+
+ if (rptr < fence->seqno)
+ break;
+
+ dma_fence_signal(fence);
+
+ for (i = 0; i < userq_fence->fence_drv_array_count; i++)
+ amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+
+ list_del(&userq_fence->link);
+ dma_fence_put(fence);
+ }
+ spin_unlock(&fence_drv->fence_list_lock);
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+ struct amdgpu_userq_fence_driver,
+ refcount);
+ struct amdgpu_userq_fence_driver *xa_fence_drv;
+ struct amdgpu_device *adev = fence_drv->adev;
+ struct amdgpu_userq_fence *fence, *tmp;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long index, flags;
+ struct dma_fence *f;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+ f = &fence->base;
+
+ if (!dma_fence_is_signaled(f)) {
+ dma_fence_set_error(f, -ECANCELED);
+ dma_fence_signal(f);
+ }
+
+ list_del(&fence->link);
+ dma_fence_put(f);
+ }
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ xa_lock_irqsave(xa, flags);
+ xa_for_each(xa, index, xa_fence_drv)
+ if (xa_fence_drv == fence_drv)
+ __xa_erase(xa, index);
+ xa_unlock_irqrestore(xa, flags);
+
+ /* Free seq64 memory */
+ amdgpu_seq64_free(adev, fence_drv->va);
+ kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_get(&fence_drv->refcount);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+{
+ *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+ return *userq_fence ? 0 : -ENOMEM;
+}
+
+static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence *userq_fence,
+ u64 seq, struct dma_fence **f)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *fence;
+ unsigned long flags;
+
+ fence_drv = userq->fence_drv;
+ if (!fence_drv)
+ return -EINVAL;
+
+ spin_lock_init(&userq_fence->lock);
+ INIT_LIST_HEAD(&userq_fence->link);
+ fence = &userq_fence->base;
+ userq_fence->fence_drv = fence_drv;
+
+ dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+ fence_drv->context, seq);
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+ dma_fence_get(fence);
+
+ if (!xa_empty(&userq->fence_drv_xa)) {
+ struct amdgpu_userq_fence_driver *stored_fence_drv;
+ unsigned long index, count = 0;
+ int i = 0;
+
+ xa_lock(&userq->fence_drv_xa);
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
+ count++;
+
+ userq_fence->fence_drv_array =
+ kvmalloc_array(count,
+ sizeof(struct amdgpu_userq_fence_driver *),
+ GFP_ATOMIC);
+
+ if (userq_fence->fence_drv_array) {
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
+ userq_fence->fence_drv_array[i] = stored_fence_drv;
+ __xa_erase(&userq->fence_drv_xa, index);
+ i++;
+ }
+ }
+
+ userq_fence->fence_drv_array_count = i;
+ xa_unlock(&userq->fence_drv_xa);
+ } else {
+ userq_fence->fence_drv_array = NULL;
+ userq_fence->fence_drv_array_count = 0;
+ }
+
+ /* Check if hardware has already processed the job */
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ if (!dma_fence_is_signaled(fence))
+ list_add_tail(&userq_fence->link, &fence_drv->fences);
+ else
+ dma_fence_put(fence);
+
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ *f = fence;
+
+ return 0;
+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+ return "amdgpu_userq_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+ return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 rptr, wptr;
+
+ rptr = amdgpu_userq_fence_read(fence_drv);
+ wptr = fence->base.seqno;
+
+ if (rptr >= wptr)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+ struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
+ struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
+ struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
+
+ /* Release the fence driver reference */
+ amdgpu_userq_fence_driver_put(fence_drv);
+
+ kvfree(userq_fence->fence_drv_array);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+ call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+ .get_driver_name = amdgpu_userq_fence_get_driver_name,
+ .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+ .signaled = amdgpu_userq_fence_signaled,
+ .release = amdgpu_userq_fence_release,
+};
+
+/**
+ * amdgpu_userq_fence_read_wptr - Read the userq wptr value
+ *
+ * @queue: user mode queue structure pointer
+ * @wptr: write pointer value
+ *
+ * Read the wptr value from userq's MQD. The userq signal IOCTL
+ * creates a dma_fence for the shared buffers that expects the
+ * RPTR value written to seq64 memory >= WPTR.
+ *
+ * Returns wptr value on success, error on failure.
+ */
+static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
+ u64 *wptr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ u64 addr, *ptr;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ addr = queue->userq_prop->wptr_gpu_addr;
+ addr &= AMDGPU_GMC_HOLE_MASK;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+ if (!mapping) {
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
+ return -EINVAL;
+ }
+
+ bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ DRM_ERROR("Failed to reserve userqueue wptr bo");
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, (void **)&ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the userqueue wptr bo");
+ goto map_error;
+ }
+
+ *wptr = le64_to_cpu(*ptr);
+
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+
+map_error:
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return r;
+}
+
+static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
+{
+ dma_fence_put(fence);
+}
+
+static void
+amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence,
+ int error)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ unsigned long flags;
+ struct dma_fence *f;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+
+ f = rcu_dereference_protected(&fence->base,
+ lockdep_is_held(&fence_drv->fence_list_lock));
+ if (f && !dma_fence_is_signaled_locked(f))
+ dma_fence_set_error(f, error);
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+}
+
+void
+amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq)
+{
+ struct dma_fence *f = userq->last_fence;
+
+ if (f) {
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 wptr = fence->base.seqno;
+
+ amdgpu_userq_fence_driver_set_error(fence, -ECANCELED);
+ amdgpu_userq_fence_write(fence_drv, wptr);
+ amdgpu_userq_fence_driver_process(fence_drv);
+
+ }
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct drm_amdgpu_userq_signal *args = data;
+ struct drm_gem_object **gobj_write = NULL;
+ struct drm_gem_object **gobj_read = NULL;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_fence *userq_fence;
+ struct drm_syncobj **syncobj = NULL;
+ u32 *bo_handles_write, num_write_bo_handles;
+ u32 *syncobj_handles, num_syncobj_handles;
+ u32 *bo_handles_read, num_read_bo_handles;
+ int r, i, entry, rentry, wentry;
+ struct dma_fence *fence;
+ struct drm_exec exec;
+ u64 wptr;
+
+ num_syncobj_handles = args->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ /* Array of pointers to the looked up syncobjs */
+ syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+ if (!syncobj) {
+ r = -ENOMEM;
+ goto free_syncobj_handles;
+ }
+
+ for (entry = 0; entry < num_syncobj_handles; entry++) {
+ syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
+ if (!syncobj[entry]) {
+ r = -ENOENT;
+ goto free_syncobj;
+ }
+ }
+
+ num_read_bo_handles = args->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
+ sizeof(u32) * num_read_bo_handles);
+ if (IS_ERR(bo_handles_read)) {
+ r = PTR_ERR(bo_handles_read);
+ goto free_syncobj;
+ }
+
+ /* Array of pointers to the GEM read objects */
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_bo_handles_read;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ num_write_bo_handles = args->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
+ sizeof(u32) * num_write_bo_handles);
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto put_gobj_read;
+ }
+
+ /* Array of pointers to the GEM write objects */
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto free_bo_handles_write;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ /* Retrieve the user queue */
+ queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+ if (!queue) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+
+ r = amdgpu_userq_fence_read_wptr(queue, &wptr);
+ if (r)
+ goto put_gobj_write;
+
+ r = amdgpu_userq_fence_alloc(&userq_fence);
+ if (r)
+ goto put_gobj_write;
+
+ /* We are here means UQ is active, make sure the eviction fence is valid */
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ /* Create a new fence */
+ r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
+ if (r) {
+ mutex_unlock(&userq_mgr->userq_mutex);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+ goto put_gobj_write;
+ }
+
+ dma_fence_put(queue->last_fence);
+ queue->last_fence = dma_fence_get(fence);
+ mutex_unlock(&userq_mgr->userq_mutex);
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+ }
+
+ for (i = 0; i < num_read_bo_handles; i++) {
+ if (!gobj_read || !gobj_read[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_read[i]->resv, fence,
+ DMA_RESV_USAGE_READ);
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ if (!gobj_write || !gobj_write[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_write[i]->resv, fence,
+ DMA_RESV_USAGE_WRITE);
+ }
+
+ /* Add the created fence to syncobj/BO's */
+ for (i = 0; i < num_syncobj_handles; i++)
+ drm_syncobj_replace_fence(syncobj[i], fence);
+
+ /* drop the reference acquired in fence creation function */
+ dma_fence_put(fence);
+
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+free_syncobj:
+ while (entry-- > 0)
+ if (syncobj[entry])
+ drm_syncobj_put(syncobj[entry]);
+ kfree(syncobj);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+
+ return r;
+}
+
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
+ u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
+ struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+ struct drm_amdgpu_userq_wait *wait_info = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_usermode_queue *waitq;
+ struct drm_gem_object **gobj_write;
+ struct drm_gem_object **gobj_read;
+ struct dma_fence **fences = NULL;
+ u16 num_points, num_fences = 0;
+ int r, i, rentry, wentry, cnt;
+ struct drm_exec exec;
+
+ num_read_bo_handles = wait_info->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
+ size_mul(sizeof(u32), num_read_bo_handles));
+ if (IS_ERR(bo_handles_read))
+ return PTR_ERR(bo_handles_read);
+
+ num_write_bo_handles = wait_info->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
+ size_mul(sizeof(u32), num_write_bo_handles));
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto free_bo_handles_read;
+ }
+
+ num_syncobj = wait_info->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj));
+ if (IS_ERR(syncobj_handles)) {
+ r = PTR_ERR(syncobj_handles);
+ goto free_bo_handles_write;
+ }
+
+ num_points = wait_info->num_syncobj_timeline_handles;
+ timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_handles)) {
+ r = PTR_ERR(timeline_handles);
+ goto free_syncobj_handles;
+ }
+
+ timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_points)) {
+ r = PTR_ERR(timeline_points);
+ goto free_timeline_handles;
+ }
+
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_timeline_points;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto put_gobj_read;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+ }
+
+ if (!wait_info->num_fences) {
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ dma_fence_unwrap_for_each(f, &iter, fence)
+ num_fences++;
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Count syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ num_fences++;
+ dma_fence_put(fence);
+ }
+
+ /* Count GEM objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence)
+ num_fences++;
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence)
+ num_fences++;
+ }
+
+ /*
+ * Passing num_fences = 0 means that userspace doesn't want to
+ * retrieve userq_fence_info. If num_fences = 0 we skip filling
+ * userq_fence_info and return the actual number of fences on
+ * args->num_fences.
+ */
+ wait_info->num_fences = num_fences;
+ } else {
+ /* Array of fence info */
+ fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
+ if (!fence_info) {
+ r = -ENOMEM;
+ goto exec_fini;
+ }
+
+ /* Array of fences */
+ fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ r = -ENOMEM;
+ goto free_fence_info;
+ }
+
+ /* Retrieve GEM read objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ /* Retrieve GEM write objects fence */
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ dma_fence_unwrap_for_each(f, &iter, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ dma_fence_get(f);
+ fences[num_fences++] = f;
+ }
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Retrieve syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ }
+
+ /*
+ * Keep only the latest fences to reduce the number of values
+ * given back to userspace.
+ */
+ num_fences = dma_fence_dedup_array(fences, num_fences);
+
+ waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
+ if (!waitq) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ for (i = 0, cnt = 0; i < num_fences; i++) {
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ u32 index;
+
+ userq_fence = to_amdgpu_userq_fence(fences[i]);
+ if (!userq_fence) {
+ /*
+ * Just waiting on other driver fences should
+ * be good for now
+ */
+ r = dma_fence_wait(fences[i], true);
+ if (r) {
+ dma_fence_put(fences[i]);
+ goto free_fences;
+ }
+
+ dma_fence_put(fences[i]);
+ continue;
+ }
+
+ fence_drv = userq_fence->fence_drv;
+ /*
+ * We need to make sure the user queue release their reference
+ * to the fence drivers at some point before queue destruction.
+ * Otherwise, we would gather those references until we don't
+ * have any more space left and crash.
+ */
+ r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
+ xa_limit_32b, GFP_KERNEL);
+ if (r)
+ goto free_fences;
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+
+ /* Store drm syncobj's gpu va address and value */
+ fence_info[cnt].va = fence_drv->va;
+ fence_info[cnt].value = fences[i]->seqno;
+
+ dma_fence_put(fences[i]);
+ /* Increment the actual userq fence count */
+ cnt++;
+ }
+
+ wait_info->num_fences = cnt;
+ /* Copy userq fence info to user space */
+ if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
+ fence_info, wait_info->num_fences * sizeof(*fence_info))) {
+ r = -EFAULT;
+ goto free_fences;
+ }
+
+ kfree(fences);
+ kfree(fence_info);
+ }
+
+ drm_exec_fini(&exec);
+ for (i = 0; i < num_read_bo_handles; i++)
+ drm_gem_object_put(gobj_read[i]);
+ kfree(gobj_read);
+
+ for (i = 0; i < num_write_bo_handles; i++)
+ drm_gem_object_put(gobj_write[i]);
+ kfree(gobj_write);
+
+ kfree(timeline_points);
+ kfree(timeline_handles);
+ kfree(syncobj_handles);
+ kfree(bo_handles_write);
+ kfree(bo_handles_read);
+
+ return 0;
+
+free_fences:
+ while (num_fences-- > 0)
+ dma_fence_put(fences[num_fences]);
+ kfree(fences);
+free_fence_info:
+ kfree(fence_info);
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_timeline_points:
+ kfree(timeline_points);
+free_timeline_handles:
+ kfree(timeline_handles);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..d76add2afc77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_userq.h"
+
+struct amdgpu_userq_fence {
+ struct dma_fence base;
+ /*
+ * This lock is necessary to synchronize the
+ * userqueue dma fence operations.
+ */
+ spinlock_t lock;
+ struct list_head link;
+ unsigned long fence_drv_array_count;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence_driver **fence_drv_array;
+};
+
+struct amdgpu_userq_fence_driver {
+ struct kref refcount;
+ u64 va;
+ u64 gpu_addr;
+ u64 *cpu_addr;
+ u64 context;
+ /*
+ * This lock is necesaary to synchronize the access
+ * to the fences list by the fence driver.
+ */
+ spinlock_t fence_list_lock;
+ struct list_head fences;
+ struct amdgpu_device *adev;
+ char timeline_name[TASK_COMM_LEN];
+};
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
new file mode 100644
index 000000000000..1e40ca3b1584
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_UTILS_H_
+#define AMDGPU_UTILS_H_
+
+/* ---------- Generic 2‑bit capability attribute encoding ----------
+ * 00 INVALID, 01 RO, 10 WO, 11 RW
+ */
+enum amdgpu_cap_attr {
+ AMDGPU_CAP_ATTR_INVALID = 0,
+ AMDGPU_CAP_ATTR_RO = 1 << 0,
+ AMDGPU_CAP_ATTR_WO = 1 << 1,
+ AMDGPU_CAP_ATTR_RW = (AMDGPU_CAP_ATTR_RO | AMDGPU_CAP_ATTR_WO),
+};
+
+#define AMDGPU_CAP_ATTR_BITS 2
+#define AMDGPU_CAP_ATTR_MAX ((1U << AMDGPU_CAP_ATTR_BITS) - 1)
+
+/* Internal helper to build helpers for a given enum NAME */
+#define DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) \
+enum { NAME##_BITMAP_BITS = NAME##_COUNT * AMDGPU_CAP_ATTR_BITS }; \
+struct NAME##_caps { \
+ DECLARE_BITMAP(bmap, NAME##_BITMAP_BITS); \
+}; \
+static inline unsigned int NAME##_ATTR_START(enum NAME##_cap_id cap) \
+{ return (unsigned int)cap * AMDGPU_CAP_ATTR_BITS; } \
+static inline void NAME##_attr_init(struct NAME##_caps *c) \
+{ if (c) bitmap_zero(c->bmap, NAME##_BITMAP_BITS); } \
+static inline int NAME##_attr_set(struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr attr) \
+{ \
+ if (!c) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ if ((unsigned int)attr > AMDGPU_CAP_ATTR_MAX) \
+ return -EINVAL; \
+ bitmap_write(c->bmap, (unsigned long)attr, \
+ NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ return 0; \
+} \
+static inline int NAME##_attr_get(const struct NAME##_caps *c, \
+ enum NAME##_cap_id cap, enum amdgpu_cap_attr *out) \
+{ \
+ unsigned long v; \
+ if (!c || !out) \
+ return -EINVAL; \
+ if (cap >= NAME##_COUNT) \
+ return -EINVAL; \
+ v = bitmap_read(c->bmap, NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \
+ *out = (enum amdgpu_cap_attr)v; \
+ return 0; \
+} \
+static inline bool NAME##_cap_is_ro(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RO; } \
+static inline bool NAME##_cap_is_wo(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_WO; } \
+static inline bool NAME##_cap_is_rw(const struct NAME##_caps *c, enum NAME##_cap_id id) \
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RW; }
+
+/* Element expander for enum creation */
+#define _CAP_ENUM_ELEM(x) x,
+
+/* Public macro: declare enum + helpers from an X‑macro list */
+#define DECLARE_ATTR_CAP_CLASS(NAME, LIST_MACRO) \
+ enum NAME##_cap_id { LIST_MACRO(_CAP_ENUM_ELEM) NAME##_COUNT }; \
+ DECLARE_ATTR_CAP_CLASS_HELPERS(NAME)
+
+#endif /* AMDGPU_UTILS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b7441654e6fa..5c38f0d30c87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
- r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name);
+ r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
fw_name);
@@ -398,32 +398,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
* amdgpu_uvd_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
+ * Initialize the entity used for handle management in the kernel driver.
*/
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- struct amdgpu_ring *ring;
- struct drm_gpu_scheduler *sched;
- int r;
+ if (ring == &adev->uvd.inst[0].ring) {
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ int r;
- ring = &adev->uvd.inst[0].ring;
- sched = &ring->sched;
- r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
- if (r) {
- DRM_ERROR("Failed setting up UVD kernel entity.\n");
- return r;
+ r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up UVD kernel entity.\n");
+ return r;
+ }
}
return 0;
}
-int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev)
{
unsigned int size;
void *ptr;
int i, j, idx;
- bool in_ras_intr = amdgpu_ras_intr_triggered();
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -452,7 +452,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
/* re-write 0 since err_event_athub will corrupt VCPU buffer */
- if (in_ras_intr)
+ if (amdgpu_ras_intr_triggered())
memset(adev->uvd.inst[j].saved_bo, 0, size);
else
memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
@@ -461,7 +461,12 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
}
}
- if (in_ras_intr)
+ return 0;
+}
+
+int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_intr_triggered())
DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
return 0;
@@ -546,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+ if (abo->placements[i].mem_type == TTM_PL_VRAM)
+ abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
}
@@ -1083,7 +1090,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
int r;
job->vm = NULL;
- ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
if (ib->length_dw % 16) {
DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
@@ -1130,7 +1136,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
64, direct ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED, &job);
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 9f89bb7cd60b..9dfad2f48ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -73,7 +73,8 @@ struct amdgpu_uvd {
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
-int amdgpu_uvd_entity_init(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 1904edf68407..ce318f5de047 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
return -EINVAL;
}
- r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name);
+ r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
fw_name);
@@ -214,15 +214,15 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
drm_sched_entity_destroy(&adev->vce.entity);
- amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
- (void **)&adev->vce.cpu_addr);
-
for (i = 0; i < adev->vce.num_rings; i++)
amdgpu_ring_fini(&adev->vce.ring[i]);
amdgpu_ucode_release(&adev->vce.fw);
mutex_destroy(&adev->vce.idle_mutex);
+ amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
+ (void **)&adev->vce.cpu_addr);
+
return 0;
}
@@ -230,21 +230,22 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
* amdgpu_vce_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
+ * Initialize the entity used for handle management in the kernel driver.
*/
-int amdgpu_vce_entity_init(struct amdgpu_device *adev)
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- struct amdgpu_ring *ring;
- struct drm_gpu_scheduler *sched;
- int r;
-
- ring = &adev->vce.ring[0];
- sched = &ring->sched;
- r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up VCE run queue.\n");
- return r;
+ if (ring == &adev->vce.ring[0]) {
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ int r;
+
+ r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up VCE run queue.\n");
+ return r;
+ }
}
return 0;
@@ -448,7 +449,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
- &job);
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -502,7 +503,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[i] = 0x0;
r = amdgpu_job_submit_direct(job, ring, &f);
- amdgpu_ib_free(ring->adev, &ib_msg, f);
+ amdgpu_ib_free(&ib_msg, f);
if (r)
goto err;
@@ -539,7 +540,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
AMDGPU_FENCE_OWNER_UNDEFINED,
ib_size_dw * 4,
direct ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED, &job);
+ AMDGPU_IB_POOL_DELAYED, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -742,12 +744,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
uint32_t created = 0;
uint32_t allocated = 0;
uint32_t tmp, handle = 0;
- uint32_t *size = &tmp;
+ uint32_t dummy = 0xffffffff;
+ uint32_t *size = &dummy;
unsigned int idx;
int i, r = 0;
job->vm = NULL;
- ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
for (idx = 0; idx < ib->length_dw;) {
uint32_t len = amdgpu_ib_get_value(ib, idx);
@@ -1042,7 +1044,6 @@ out:
if (!r) {
/* No error, free all destroyed handle slots */
tmp = destroyed;
- amdgpu_ib_free(p->adev, ib, NULL);
} else {
/* Error during parsing, free all allocated handle slots */
tmp = allocated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index ea680fc9a6c3..6e53f872d084 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -55,7 +55,7 @@ struct amdgpu_vce {
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vce_entity_init(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 36b55d2bd51a..5e0786ea911b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -58,6 +58,11 @@
#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin"
#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
+#define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin"
+#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin"
+#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
+#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"
+#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -80,42 +85,63 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev);
-int amdgpu_vcn_early_init(struct amdgpu_device *adev)
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i)
{
- char ucode_prefix[30];
- char fw_name[40];
+ char ucode_prefix[25];
int r;
+ adev->vcn.inst[i].adev = adev;
+ adev->vcn.inst[i].inst = i;
amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
- r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name);
- if (r)
- amdgpu_ucode_release(&adev->vcn.fw);
+
+ if (i != 0 && adev->vcn.per_inst_fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_%d.bin", ucode_prefix, i);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ if (!adev->vcn.inst[0].fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ } else {
+ r = 0;
+ }
+ adev->vcn.inst[i].fw = adev->vcn.inst[0].fw;
+ }
return r;
}
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
{
unsigned long bo_size;
const struct common_firmware_header *hdr;
unsigned char fw_check;
unsigned int fw_shared_size, log_offset;
- int i, r;
-
- INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
- mutex_init(&adev->vcn.vcn_pg_lock);
- mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
- atomic_set(&adev->vcn.total_submission_cnt, 0);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++)
- atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+ int r;
+ mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
+ mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_init(&adev->vcn.inst[i].engine_reset_mutex);
+ atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
+ INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
+ adev->vcn.inst[i].indirect_sram = true;
/*
* Some Steam Deck's BIOS versions are incompatible with the
@@ -124,18 +150,23 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
* Hence, check for these versions here - notice this is
* restricted to Vangogh (Deck's APU).
*/
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 0, 2)) {
const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
- !strncmp("F7A0114", bios_ver, 7))) {
- adev->vcn.indirect_sram = false;
+ !strncmp("F7A0114", bios_ver, 7))) {
+ adev->vcn.inst[i].indirect_sram = false;
dev_info(adev->dev,
- "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+ "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
}
}
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ /* from vcn4 and above, only unified queue is used */
+ adev->vcn.inst[i].using_unified_queue =
+ amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version);
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
/* Bit 20-23, it is encode major and non-zero for new naming convention.
@@ -153,23 +184,27 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
enc_major = fw_check;
dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
- DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
- enc_major, enc_minor, dec_ver, vep, fw_rev);
+ dev_info(adev->dev,
+ "[VCN instance %d] Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
+ i, enc_major, enc_minor, dec_ver, vep, fw_rev);
} else {
unsigned int version_major, version_minor, family_id;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
- version_major, version_minor, family_id);
+ dev_info(adev->dev, "[VCN instance %d] Found VCN firmware Version: %u.%u Family ID: %u\n",
+ i, version_major, version_minor, family_id);
}
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared));
+ log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log);
+ } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) {
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
} else {
@@ -182,99 +217,87 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
if (amdgpu_vcnfw_log)
bo_size += AMDGPU_VCNFW_LOG_SIZE;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ &adev->vcn.inst[i].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ return r;
+ }
+
+ adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
+ bo_size - fw_shared_size;
+ adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
+ bo_size - fw_shared_size;
+
+ adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
+
+ if (amdgpu_vcnfw_log) {
+ adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.log_offset = log_offset;
+ }
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ if (adev->vcn.inst[i].indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
- &adev->vcn.inst[i].vcpu_bo,
- &adev->vcn.inst[i].gpu_addr,
- &adev->vcn.inst[i].cpu_addr);
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ &adev->vcn.inst[i].dpg_sram_cpu_addr);
if (r) {
- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
return r;
}
-
- adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
- bo_size - fw_shared_size;
- adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
- bo_size - fw_shared_size;
-
- adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
-
- if (amdgpu_vcnfw_log) {
- adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
- adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
- adev->vcn.inst[i].fw_shared.log_offset = log_offset;
- }
-
- if (adev->vcn.indirect_sram) {
- r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->vcn.inst[i].dpg_sram_bo,
- &adev->vcn.inst[i].dpg_sram_gpu_addr,
- &adev->vcn.inst[i].dpg_sram_cpu_addr);
- if (r) {
- dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
- return r;
- }
- }
}
return 0;
}
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
{
- int i, j;
-
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ int j;
- amdgpu_bo_free_kernel(
- &adev->vcn.inst[j].dpg_sram_bo,
- &adev->vcn.inst[j].dpg_sram_gpu_addr,
- (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- kvfree(adev->vcn.inst[j].saved_bo);
+ amdgpu_bo_free_kernel(
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr);
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
- &adev->vcn.inst[j].gpu_addr,
- (void **)&adev->vcn.inst[j].cpu_addr);
+ kvfree(adev->vcn.inst[i].saved_bo);
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
+ amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ (void **)&adev->vcn.inst[i].cpu_addr);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
- }
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec);
- amdgpu_ucode_release(&adev->vcn.fw);
- mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
- mutex_destroy(&adev->vcn.vcn_pg_lock);
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]);
- return 0;
-}
-
-/* from vcn4 and above, only unified queue is used */
-static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- bool ret = false;
+ if (adev->vcn.per_inst_fw) {
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ adev->vcn.inst[i].fw = NULL;
+ }
- if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0))
- ret = true;
+ if (adev->vcn.reg_list)
+ amdgpu_vcn_reg_dump_fini(adev);
- return ret;
+ mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
}
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
{
bool ret = false;
- int vcn_config = adev->vcn.vcn_config[vcn_instance];
+ int vcn_config = adev->vcn.inst[vcn_instance].vcn_config;
if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
ret = true;
@@ -286,172 +309,246 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t
return ret;
}
-int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i)
{
unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return 0;
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vcn.inst[i].saved_bo)
+ return -ENOMEM;
- adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
- if (!adev->vcn.inst[i].saved_bo)
- return -ENOMEM;
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+ drm_dev_exit(idx);
+ }
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
- drm_dev_exit(idx);
- }
+ return 0;
+}
+
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+ if (ret)
+ return ret;
}
+
return 0;
}
-int amdgpu_vcn_resume(struct amdgpu_device *adev)
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
+{
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
+ * restore fw data and clear buffer in amdgpu_vcn_resume() */
+ if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
+ return 0;
+
+ return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i)
{
unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return -EINVAL;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ if (adev->vcn.inst[i].saved_bo != NULL) {
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ kvfree(adev->vcn.inst[i].saved_bo);
+ adev->vcn.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
- if (adev->vcn.inst[i].saved_bo != NULL) {
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ memcpy_toio(adev->vcn.inst[i].cpu_addr,
+ adev->vcn.inst[i].fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
drm_dev_exit(idx);
}
- kvfree(adev->vcn.inst[i].saved_bo);
- adev->vcn.inst[i].saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned int offset;
-
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- drm_dev_exit(idx);
- }
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
- }
- memset_io(ptr, 0, size);
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
+ memset_io(ptr, 0, size);
}
+
return 0;
}
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev)
+{
+ int r;
+
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+
+ if (adev->vcn.workload_profile_active) {
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+ return;
+ }
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ true);
+ if (r)
+ dev_warn(adev->dev,
+ "(%d) failed to enable video power profile mode\n", r);
+ else
+ adev->vcn.workload_profile_active = true;
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
+
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev)
+{
+ bool pg = true;
+ int r, i;
+
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.inst[i].cur_state != AMD_PG_STATE_GATE) {
+ pg = false;
+ break;
+ }
+ }
+
+ if (pg) {
+ r = amdgpu_dpm_switch_power_profile(
+ adev, PP_SMC_POWER_PROFILE_VIDEO, false);
+ if (r)
+ dev_warn(
+ adev->dev,
+ "(%d) failed to disable video power profile mode\n",
+ r);
+ else
+ adev->vcn.workload_profile_active = false;
+ }
+
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
+
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
- unsigned int i, j;
- int r = 0;
-
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ unsigned int i = vcn_inst->inst, j;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- struct dpg_pause_state new_state;
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
- if (fence[j] ||
- unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !adev->vcn.inst[i].using_unified_queue) {
+ struct dpg_pause_state new_state;
- adev->vcn.pause_dpg_mode(adev, j, &new_state);
- }
+ if (fence[i] ||
+ unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
- fences += fence[j];
+ adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state);
}
- if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- false);
- if (r)
- dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
+ fences += fence[i];
+
+ if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
+ amdgpu_vcn_put_profile(adev);
+
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
}
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int r = 0;
+ struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me];
- atomic_inc(&adev->vcn.total_submission_cnt);
+ atomic_inc(&vcn_inst->total_submission_cnt);
- if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- true);
- if (r)
- dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
- }
+ cancel_delayed_work_sync(&vcn_inst->idle_work);
- mutex_lock(&adev->vcn.vcn_pg_lock);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !vcn_inst->using_unified_queue) {
struct dpg_pause_state new_state;
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
- atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+ atomic_inc(&vcn_inst->dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
} else {
unsigned int fences = 0;
unsigned int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+ for (i = 0; i < vcn_inst->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]);
- if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
}
- adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
+ vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
}
- mutex_unlock(&adev->vcn.vcn_pg_lock);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
+ amdgpu_vcn_get_profile(adev);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
+ !adev->vcn.inst[ring->me].using_unified_queue)
atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
- atomic_dec(&ring->adev->vcn.total_submission_cnt);
+ atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
+ VCN_IDLE_TIMEOUT);
}
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
@@ -469,7 +566,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -529,19 +626,19 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
64, AMDGPU_IB_POOL_DIRECT,
- &job);
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
goto err;
ib = &job->ibs[0];
- ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
+ ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0);
ib->ptr[1] = addr;
- ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
+ ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0);
ib->ptr[3] = addr >> 32;
- ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
+ ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
- ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
+ ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0);
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
@@ -550,7 +647,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -561,7 +658,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -700,17 +797,16 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
struct amdgpu_job *job;
struct amdgpu_ib *ib;
uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
- bool sq = amdgpu_vcn_using_unified_queue(ring);
uint32_t *ib_checksum;
uint32_t ib_pack_in_dw;
int i, r;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
- &job);
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
goto err;
@@ -718,7 +814,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
ib->length_dw = 0;
/* single queue headers */
- if (sq) {
+ if (adev->vcn.inst[ring->me].using_unified_queue) {
ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ 4 + 2; /* engine info + decoding ib in dw */
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
@@ -737,14 +833,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -755,7 +851,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -827,20 +923,20 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
struct dma_fence **fence)
{
unsigned int ib_size_dw = 16;
+ struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
uint32_t *ib_checksum = NULL;
uint64_t addr;
- bool sq = amdgpu_vcn_using_unified_queue(ring);
int i, r;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
- &job);
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -849,7 +945,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
ib->length_dw = 0;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
@@ -857,7 +953,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
- ib->ptr[ib->length_dw++] = 0x0000000b;
+ ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -871,7 +967,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
@@ -894,20 +990,20 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
struct dma_fence **fence)
{
unsigned int ib_size_dw = 16;
+ struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
uint32_t *ib_checksum = NULL;
uint64_t addr;
- bool sq = amdgpu_vcn_using_unified_queue(ring);
int i, r;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
- &job);
+ &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -916,7 +1012,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
ib->length_dw = 0;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
@@ -924,7 +1020,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
- ib->ptr[ib->length_dw++] = 0x0000000b;
+ ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
@@ -938,7 +1034,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (sq)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
@@ -985,7 +1081,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
error:
- amdgpu_ib_free(adev, &ib, fence);
+ amdgpu_ib_free(&ib, fence);
dma_fence_put(fence);
return r;
@@ -996,7 +1092,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
long r;
- if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(4, 0, 3)) {
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) &&
+ (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) {
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
if (r)
goto error;
@@ -1022,34 +1119,32 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
}
}
-void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i)
{
- int i;
unsigned int idx;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* currently only support 2 FW instances */
- if (i >= 2) {
- dev_info(adev->dev, "More then 2 VCN FW instances!\n");
- break;
- }
- idx = AMDGPU_UCODE_ID_VCN + i;
- adev->firmware.ucode[idx].ucode_id = idx;
- adev->firmware.ucode[idx].fw = adev->vcn.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
-
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(4, 0, 3))
- break;
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1))
+ && (i > 0))
+ return;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ /* currently only support 2 FW instances */
+ if (i >= 2) {
+ dev_info(adev->dev, "More then 2 VCN FW instances!\n");
+ return;
}
- dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
+ idx = AMDGPU_UCODE_ID_VCN + i;
+ adev->firmware.ucode[idx].ucode_id = idx;
+ adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
}
}
@@ -1062,7 +1157,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
{
struct amdgpu_vcn_inst *vcn;
void *log_buf;
- volatile struct amdgpu_vcn_fwlog *plog;
+ struct amdgpu_vcn_fwlog *plog;
unsigned int read_pos, write_pos, available, i, read_bytes = 0;
unsigned int read_num[2] = {0};
@@ -1075,7 +1170,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
- plog = (volatile struct amdgpu_vcn_fwlog *)log_buf;
+ plog = (struct amdgpu_vcn_fwlog *)log_buf;
read_pos = plog->rptr;
write_pos = plog->wptr;
@@ -1087,7 +1182,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
if (write_pos > read_pos) {
available = write_pos - read_pos;
- read_num[0] = min(size, (size_t)available);
+ read_num[0] = min_t(size_t, size, available);
} else {
read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos;
available = read_num[0] + write_pos - plog->header_size;
@@ -1142,11 +1237,11 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
{
#if defined(CONFIG_DEBUG_FS)
- volatile uint32_t *flag = vcn->fw_shared.cpu_addr;
+ uint32_t *flag = vcn->fw_shared.cpu_addr;
void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
- volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
- volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
+ struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+ vcn->fw_shared.log_offset;
*flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
fw_log->is_enabled = 1;
@@ -1179,7 +1274,7 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
} else {
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
- adev->virt.ops->ras_poison_handler(adev);
+ adev->virt.ops->ras_poison_handler(adev, ras_if->block);
else
dev_warn(adev->dev,
"No ras_poison_handler interface in SRIOV for VCN!\n");
@@ -1254,3 +1349,289 @@ int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
return psp_execute_ip_fw_load(&adev->psp, &ucode);
}
+
+static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset);
+}
+
+static DEVICE_ATTR(vcn_reset_mask, 0444,
+ amdgpu_get_vcn_reset_mask, NULL);
+
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vcn.num_vcn_inst) {
+ r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vcn.num_vcn_inst)
+ device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
+ }
+}
+
+/*
+ * debugfs to enable/disable vcn job submission to specific core or
+ * instance. It is created only if the queue type is unified.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->vcn.num_vcn_inst) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (val & (1ULL << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops,
+ amdgpu_debugfs_vcn_sched_mask_get,
+ amdgpu_debugfs_vcn_sched_mask_set, "%llx\n");
+#endif
+
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue)
+ return;
+ sprintf(name, "amdgpu_vcn_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_vcn_sched_mask_fops);
+#endif
+}
+
+/**
+ * vcn_set_powergating_state - set VCN block powergating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ ret |= vinst->set_pg_state(vinst, state);
+ }
+
+ return ret;
+}
+
+/**
+ * amdgpu_vcn_reset_engine - Reset a specific VCN engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: VCN engine instance to reset
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev,
+ uint32_t instance_id)
+{
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id];
+ int r, i;
+
+ mutex_lock(&vinst->engine_reset_mutex);
+ /* Stop the scheduler's work queue for the dec and enc rings if they are running.
+ * This ensures that no new tasks are submitted to the queues while
+ * the reset is in progress.
+ */
+ drm_sched_wqueue_stop(&vinst->ring_dec.sched);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ drm_sched_wqueue_stop(&vinst->ring_enc[i].sched);
+
+ /* Perform the VCN reset for the specified instance */
+ r = vinst->reset(vinst);
+ if (r)
+ goto unlock;
+ r = amdgpu_ring_test_ring(&vinst->ring_dec);
+ if (r)
+ goto unlock;
+ for (i = 0; i < vinst->num_enc_rings; i++) {
+ r = amdgpu_ring_test_ring(&vinst->ring_enc[i]);
+ if (r)
+ goto unlock;
+ }
+ amdgpu_fence_driver_force_completion(&vinst->ring_dec);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]);
+
+ /* Restart the scheduler's work queue for the dec and enc rings
+ * if they were stopped by this function. This allows new tasks
+ * to be submitted to the queues after the reset is complete.
+ */
+ drm_sched_wqueue_start(&vinst->ring_dec.sched);
+ for (i = 0; i < vinst->num_enc_rings; i++)
+ drm_sched_wqueue_start(&vinst->ring_enc[i].sched);
+
+unlock:
+ mutex_unlock(&vinst->engine_reset_mutex);
+
+ return r;
+}
+
+/**
+ * amdgpu_vcn_ring_reset - Reset a VCN ring
+ * @ring: ring to reset
+ * @vmid: vmid of guilty job
+ * @timedout_fence: fence of timed out job
+ *
+ * This helper is for VCN blocks without unified queues because
+ * resetting the engine resets all queues in that case. With
+ * unified queues we have one queue per engine.
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->vcn.inst[ring->me].using_unified_queue)
+ return -EINVAL;
+
+ return amdgpu_vcn_reset_engine(adev, ring->me);
+}
+
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+ adev->vcn.ip_dump = kcalloc(adev->vcn.num_vcn_inst * count,
+ sizeof(uint32_t), GFP_KERNEL);
+ if (!adev->vcn.ip_dump)
+ return -ENOMEM;
+ adev->vcn.reg_list = reg;
+ adev->vcn.reg_count = count;
+
+ return 0;
+}
+
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->vcn.ip_dump);
+ adev->vcn.ip_dump = NULL;
+ adev->vcn.reg_list = NULL;
+ adev->vcn.reg_count = 0;
+}
+
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ u32 inst_off;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ inst_off = i * adev->vcn.reg_count;
+ /* mmUVD_POWER_STATUS is always readable and is the first in reg_list */
+ adev->vcn.ip_dump[inst_off] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[0], i));
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+ if (is_powered)
+ for (j = 1; j < adev->vcn.reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[j], i));
+ }
+}
+
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ u32 inst_off;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * adev->vcn.reg_count;
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+ if (is_powered) {
+ drm_printf(p, "\nActive Instance:VCN%d\n", i);
+ for (j = 0; j < adev->vcn.reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", adev->vcn.reg_list[j].reg_name,
+ adev->vcn.ip_dump[inst_off + j]);
+ } else {
+ drm_printf(p, "\nInactive Instance:VCN%d\n", i);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index a3eed90b6af0..dc8a17bcc3c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -33,7 +33,7 @@
#define AMDGPU_VCN_MAX_ENC_RINGS 3
#define AMDGPU_MAX_VCN_INSTANCES 4
-#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
+#define AMDGPU_MAX_VCN_ENC_RINGS (AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES)
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
@@ -66,7 +66,6 @@
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
-#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
@@ -160,6 +159,58 @@
} \
} while (0)
+#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \
+ ({ \
+ uint32_t internal_reg_offset, addr; \
+ bool video_range, video1_range, aon_range, aon1_range; \
+ \
+ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
+ addr <<= 2; \
+ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \
+ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \
+ if (video_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
+ else if (video1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
+ else \
+ internal_reg_offset = (0xFFFFF & addr); \
+ \
+ internal_reg_offset >>= 2; \
+ })
+
+#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \
+ regUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15( \
+ VCN, GET_INST(VCN, inst_idx), \
+ regUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \
+ value; \
+ } \
+ } while (0)
+
#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
#define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4)
#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6)
@@ -169,6 +220,9 @@
#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)
#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
+#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15)
+
+#define MAX_NUM_VCN_RB_SETUP 4
#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001
#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001
@@ -183,6 +237,14 @@
#define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_vcn_caps {
+ AMDGPU_VCN_RRMT_ENABLED,
+};
+
+#define AMDGPU_VCN_CAPS(caps) BIT(AMDGPU_VCN_##caps)
+
enum fw_queue_mode {
FW_QUEUE_RING_RESET = 1,
FW_QUEUE_DPG_HOLD_OFF = 2,
@@ -234,6 +296,8 @@ struct amdgpu_vcn_fw_shared {
};
struct amdgpu_vcn_inst {
+ struct amdgpu_device *adev;
+ int inst;
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
@@ -252,6 +316,25 @@ struct amdgpu_vcn_inst {
atomic_t dpg_enc_submission_cnt;
struct amdgpu_vcn_fw_shared fw_shared;
uint8_t aid_id;
+ const struct firmware *fw; /* VCN firmware */
+ uint8_t vcn_config;
+ uint32_t vcn_codec_disable_mask;
+ atomic_t total_submission_cnt;
+ struct mutex vcn_pg_lock;
+ enum amd_powergating_state cur_state;
+ struct delayed_work idle_work;
+ unsigned fw_version;
+ unsigned num_enc_rings;
+ bool indirect_sram;
+ struct amdgpu_vcn_reg internal;
+ struct mutex vcn1_jpeg1_workaround;
+ int (*pause_dpg_mode)(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+ int (*set_pg_state)(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+ int (*reset)(struct amdgpu_vcn_inst *vinst);
+ bool using_unified_queue;
+ struct mutex engine_reset_mutex;
};
struct amdgpu_vcn_ras {
@@ -259,31 +342,30 @@ struct amdgpu_vcn_ras {
};
struct amdgpu_vcn {
- unsigned fw_version;
- struct delayed_work idle_work;
- const struct firmware *fw; /* VCN firmware */
- unsigned num_enc_rings;
- enum amd_powergating_state cur_state;
- bool indirect_sram;
-
uint8_t num_vcn_inst;
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
- uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES];
- uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES];
- struct amdgpu_vcn_reg internal;
- struct mutex vcn_pg_lock;
- struct mutex vcn1_jpeg1_workaround;
- atomic_t total_submission_cnt;
unsigned harvest_config;
- int (*pause_dpg_mode)(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
struct ras_common_if *ras_if;
struct amdgpu_vcn_ras *ras;
uint16_t inst_mask;
uint8_t num_inst_per_aid;
+
+ /* IP reg dump */
+ uint32_t *ip_dump;
+
+ uint32_t supported_reset;
+ uint32_t caps;
+
+ bool per_inst_fw;
+ unsigned fw_version;
+
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
+ u32 reg_count;
+ const struct amdgpu_hwip_reg_entry *reg_list;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
@@ -335,15 +417,30 @@ struct amdgpu_fw_shared {
struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
};
+struct amdgpu_vcn_rb_setup_info {
+ uint32_t rb_addr_lo;
+ uint32_t rb_addr_hi;
+ uint32_t rb_size;
+};
+
struct amdgpu_fw_shared_rb_setup {
uint32_t is_rb_enabled_flags;
- uint32_t rb_addr_lo;
- uint32_t rb_addr_hi;
- uint32_t rb_size;
- uint32_t rb4_addr_lo;
- uint32_t rb4_addr_hi;
- uint32_t rb4_size;
- uint32_t reserved[6];
+
+ union {
+ struct {
+ uint32_t rb_addr_lo;
+ uint32_t rb_addr_hi;
+ uint32_t rb_size;
+ uint32_t rb4_addr_lo;
+ uint32_t rb4_addr_hi;
+ uint32_t rb4_size;
+ uint32_t reserved[6];
+ };
+
+ struct {
+ struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP];
+ };
+ };
};
struct amdgpu_fw_shared_drm_key_wa {
@@ -351,6 +448,11 @@ struct amdgpu_fw_shared_drm_key_wa {
uint8_t reserved[3];
};
+struct amdgpu_fw_shared_queue_decouple {
+ uint8_t is_enabled;
+ uint8_t reserved[7];
+};
+
struct amdgpu_vcn4_fw_shared {
uint32_t present_flag_0;
uint8_t pad[12];
@@ -361,6 +463,8 @@ struct amdgpu_vcn4_fw_shared {
struct amdgpu_fw_shared_rb_setup rb_setup;
struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+ uint8_t pad3[9];
+ struct amdgpu_fw_shared_queue_decouple decouple;
};
struct amdgpu_vcn_fwlog {
@@ -378,6 +482,28 @@ struct amdgpu_vcn_decode_buffer {
uint32_t pad[30];
};
+struct amdgpu_vcn_rb_metadata {
+ uint32_t size;
+ uint32_t present_flag_0;
+
+ uint8_t version;
+ uint8_t ring_id;
+ uint8_t pad[26];
+};
+
+struct amdgpu_vcn5_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[12];
+ struct amdgpu_fw_shared_unified_queue_struct sq;
+ uint8_t pad1[8];
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ uint8_t pad2[20];
+ struct amdgpu_fw_shared_rb_setup rb_setup;
+ struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+ struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+ uint8_t pad3[404];
+};
+
#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
@@ -388,11 +514,11 @@ enum vcn_ring_type {
VCN_UNIFIED_RING,
};
-int amdgpu_vcn_early_init(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vcn_suspend(struct amdgpu_device *adev);
-int amdgpu_vcn_resume(struct amdgpu_device *adev);
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i);
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i);
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
@@ -410,7 +536,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
-void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev);
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i);
void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
@@ -425,5 +551,21 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
enum AMDGPU_UCODE_ID ucode_id);
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev);
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev);
+
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *guilty_fence);
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev);
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 96857ae7fb5b..3328ab63376b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -32,6 +32,8 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
#include "vi.h"
#include "soc15.h"
#include "nv.h"
@@ -71,58 +73,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
amdgpu_num_kcq = 2;
}
-void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
- uint32_t reg0, uint32_t reg1,
- uint32_t ref, uint32_t mask)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
- struct amdgpu_ring *ring = &kiq->ring;
- signed long r, cnt = 0;
- unsigned long flags;
- uint32_t seq;
-
- if (adev->mes.ring.sched.ready) {
- amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
- ref, mask);
- return;
- }
-
- spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
- amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
- ref, mask);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r)
- goto failed_undo;
-
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-
- /* don't wait anymore for IRQ context */
- if (r < 1 && in_interrupt())
- goto failed_kiq;
-
- might_sleep();
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
-
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- }
-
- if (cnt > MAX_KIQ_REG_TRY)
- goto failed_kiq;
-
- return;
-
-failed_undo:
- amdgpu_ring_undo(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-failed_kiq:
- dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
-}
-
/**
* amdgpu_virt_request_full_gpu() - request full gpu access
* @adev: amdgpu device.
@@ -137,8 +87,10 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
if (virt->ops && virt->ops->req_full_gpu) {
r = virt->ops->req_full_gpu(adev, init);
- if (r)
+ if (r) {
+ adev->no_hw_access = true;
return r;
+ }
adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
}
@@ -204,6 +156,20 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
}
/**
+ * amdgpu_virt_ready_to_reset() - send ready to reset to host
+ * @adev: amdgpu device.
+ * Send ready to reset message to GPU hypervisor to signal we have stopped GPU
+ * activity and is ready for host FLR
+ */
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->reset_gpu)
+ virt->ops->ready_to_reset(adev);
+}
+
+/**
* amdgpu_virt_wait_reset() - wait for reset gpu completed
* @adev: amdgpu device.
* Wait for GPU reset completed.
@@ -266,6 +232,22 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
adev->virt.mm_table.gpu_addr = 0;
}
+/**
+ * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt
+ * @adev: amdgpu device.
+ * Check whether host sent RAS error message
+ * Return: true if found, otherwise false
+ */
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->rcvd_ras_intr)
+ return false;
+
+ return virt->ops->rcvd_ras_intr(adev);
+}
+
unsigned int amd_sriov_msg_checksum(void *obj,
unsigned long obj_size,
@@ -302,11 +284,11 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
if (!*data)
goto data_failure;
- bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL);
+ bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL);
if (!bps)
goto bps_failure;
- bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL);
+ bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
if (!bps_bo)
goto bps_bo_failure;
@@ -339,8 +321,10 @@ static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
for (i = data->last_reserved - 1; i >= 0; i--) {
bo = data->bps_bo[i];
- amdgpu_bo_free_kernel(&bo, NULL, NULL);
- data->bps_bo[i] = bo;
+ if (bo) {
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
+ data->bps_bo[i] = bo;
+ }
data->last_reserved = i;
}
}
@@ -380,6 +364,8 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
{
struct amdgpu_virt *virt = &adev->virt;
struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
struct amdgpu_bo *bo = NULL;
uint64_t bp;
int i;
@@ -395,12 +381,18 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
* 2) a ras bad page has been reserved (duplicate error injection
* for one page);
*/
- if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_GPU_PAGE_SIZE,
- &bo, NULL))
- DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
-
- data->bps_bo[i] = bo;
+ if (ttm_resource_manager_used(man)) {
+ amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
+ bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE);
+ data->bps_bo[i] = NULL;
+ } else {
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE,
+ &bo, NULL))
+ DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+ data->bps_bo[i] = bo;
+ }
data->last_reserved = i + 1;
bo = NULL;
}
@@ -436,6 +428,8 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
else
vram_usage_va = adev->mman.drv_vram_usage_va;
+ memset(&bp, 0, sizeof(bp));
+
if (bp_block_size) {
bp_cnt = bp_block_size / sizeof(uint64_t);
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
@@ -466,7 +460,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
return -EINVAL;
if (pf2vf_info->size > 1024) {
- DRM_ERROR("invalid pf2vf message size\n");
+ dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size);
return -EINVAL;
}
@@ -477,7 +471,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
adev->virt.fw_reserve.checksum_key, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -491,7 +487,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
0, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -525,9 +523,12 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->unique_id =
((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
+ adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all;
+ adev->virt.ras_telemetry_en_caps.all =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all;
break;
default:
- DRM_ERROR("invalid pf2vf version\n");
+ dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
return -EINVAL;
}
@@ -613,9 +614,15 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->decode_usage = 0;
vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
+ if (amdgpu_sriov_is_mes_info_enable(adev)) {
+ vf2pf_info->mes_info_addr =
+ (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
+ vf2pf_info->mes_info_size =
+ adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
+ }
vf2pf_info->checksum =
amd_sriov_msg_checksum(
- vf2pf_info, vf2pf_info->header.size, 0, 0);
+ vf2pf_info, sizeof(*vf2pf_info), 0, 0);
return 0;
}
@@ -626,8 +633,25 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
int ret;
ret = amdgpu_virt_read_pf2vf_data(adev);
- if (ret)
+ if (ret) {
+ adev->virt.vf2pf_update_retry_cnt++;
+
+ if ((amdgpu_virt_rcvd_ras_interrupt(adev) ||
+ adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
+ amdgpu_sriov_runtime(adev)) {
+
+ amdgpu_ras_set_fed(adev, true);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work))
+ return;
+ else
+ dev_err(adev->dev, "Failed to queue work! at %s", __func__);
+ }
+
goto out;
+ }
+
+ adev->virt.vf2pf_update_retry_cnt = 0;
amdgpu_virt_write_vf2pf_data(adev);
out:
@@ -648,6 +672,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
+ adev->virt.vf2pf_update_retry_cnt = 0;
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
@@ -682,6 +707,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
} else if (adev->mman.drv_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
@@ -689,6 +716,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
}
amdgpu_virt_read_pf2vf_data(adev);
@@ -711,7 +740,7 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
}
}
-void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
{
uint32_t reg;
@@ -747,14 +776,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
- if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
- /* VF MMIO access (except mailbox range) from CPU
- * will be blocked during sriov runtime
- */
- adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
+ return reg;
+}
+
+static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
+{
+ bool is_sriov = false;
/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
+ is_sriov = true;
+
switch (adev->asic_type) {
case CHIP_TONGA:
case CHIP_FIJI:
@@ -783,10 +815,42 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
amdgpu_virt_request_init_data(adev);
break;
default: /* other chip doesn't support SRIOV */
+ is_sriov = false;
DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
+
+ return is_sriov;
+}
+
+static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
+{
+ ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1);
+
+ ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+
+ mutex_init(&adev->virt.ras.ras_telemetry_mutex);
+
+ adev->virt.ras.cper_rptr = 0;
+}
+
+void amdgpu_virt_init(struct amdgpu_device *adev)
+{
+ bool is_sriov = false;
+ uint32_t reg = amdgpu_virt_init_detect_asic(adev);
+
+ is_sriov = amdgpu_virt_init_req_data(adev, reg);
+
+ if (is_sriov)
+ amdgpu_virt_init_ras(adev);
}
static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
@@ -835,19 +899,28 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad
return mode;
}
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
+{
+ /* stop the data exchange thread */
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
+}
+
void amdgpu_virt_post_reset(struct amdgpu_device *adev)
{
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
/* force set to GFXOFF state after reset,
* to avoid some invalid operation before GC enable
*/
adev->gfx.is_poweron = false;
}
+
+ adev->mes.ring[0].sched.ready = false;
}
bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
{
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 0):
/* no vf autoload, white list */
if (ucode_id == AMDGPU_UCODE_ID_VCN1 ||
@@ -942,7 +1015,7 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
}
}
-static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
u32 acc_flags, u32 hwip,
bool write, u32 *rlcg_flag)
{
@@ -975,7 +1048,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
return ret;
}
-static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
{
struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
uint32_t timeout = 50000;
@@ -986,6 +1059,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
void *scratch_reg2;
void *scratch_reg3;
void *spare_int;
+ unsigned long flags;
if (!adev->gfx.rlc.rlcg_reg_access_supported) {
dev_err(adev->dev,
@@ -998,11 +1072,17 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
return 0;
}
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
+
+ spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
+
if (reg_access_ctrl->spare_int)
spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
@@ -1021,7 +1101,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
* SCRATCH_REG0 = read/write value
* SCRATCH_REG1[30:28] = command
* SCRATCH_REG1[19:0] = address in dword
- * SCRATCH_REG1[26:24] = Error reporting
+ * SCRATCH_REG1[27:24] = Error reporting
*/
writel(v, scratch_reg0);
writel((offset | flag), scratch_reg1);
@@ -1035,7 +1115,8 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
udelay(10);
}
- if (i >= timeout) {
+ tmp = readl(scratch_reg1);
+ if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) {
if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
dev_err(adev->dev,
@@ -1058,6 +1139,9 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
}
ret = readl(scratch_reg0);
+
+ spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
+
return ret;
}
@@ -1067,6 +1151,9 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
{
u32 rlcg_flag;
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
@@ -1084,6 +1171,9 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
{
u32 rlcg_flag;
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
if (!amdgpu_sriov_runtime(adev) &&
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
@@ -1093,3 +1183,376 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
else
return RREG32(offset);
}
+
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
+{
+ bool xnack_mode = true;
+
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ xnack_mode = false;
+
+ return xnack_mode;
+}
+
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_sriov_ras_caps_en(adev))
+ return false;
+
+ if (adev->virt.ras_en_caps.bits.block_umc)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC);
+ if (adev->virt.ras_en_caps.bits.block_sdma)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA);
+ if (adev->virt.ras_en_caps.bits.block_gfx)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX);
+ if (adev->virt.ras_en_caps.bits.block_mmhub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB);
+ if (adev->virt.ras_en_caps.bits.block_athub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB);
+ if (adev->virt.ras_en_caps.bits.block_pcie_bif)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF);
+ if (adev->virt.ras_en_caps.bits.block_hdp)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP);
+ if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ if (adev->virt.ras_en_caps.bits.block_df)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF);
+ if (adev->virt.ras_en_caps.bits.block_smn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN);
+ if (adev->virt.ras_en_caps.bits.block_sem)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM);
+ if (adev->virt.ras_en_caps.bits.block_mp0)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0);
+ if (adev->virt.ras_en_caps.bits.block_mp1)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1);
+ if (adev->virt.ras_en_caps.bits.block_fuse)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE);
+ if (adev->virt.ras_en_caps.bits.block_mca)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA);
+ if (adev->virt.ras_en_caps.bits.block_vcn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN);
+ if (adev->virt.ras_en_caps.bits.block_jpeg)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG);
+ if (adev->virt.ras_en_caps.bits.block_ih)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH);
+ if (adev->virt.ras_en_caps.bits.block_mpio)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
+
+ if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
+ con->poison_supported = true; /* Poison is handled by host */
+
+ return true;
+}
+
+static inline enum amd_sriov_ras_telemetry_gpu_block
+amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) {
+ switch (block) {
+ case AMDGPU_RAS_BLOCK__UMC:
+ return RAS_TELEMETRY_GPU_BLOCK_UMC;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ return RAS_TELEMETRY_GPU_BLOCK_SDMA;
+ case AMDGPU_RAS_BLOCK__GFX:
+ return RAS_TELEMETRY_GPU_BLOCK_GFX;
+ case AMDGPU_RAS_BLOCK__MMHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_MMHUB;
+ case AMDGPU_RAS_BLOCK__ATHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_ATHUB;
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF;
+ case AMDGPU_RAS_BLOCK__HDP:
+ return RAS_TELEMETRY_GPU_BLOCK_HDP;
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL;
+ case AMDGPU_RAS_BLOCK__DF:
+ return RAS_TELEMETRY_GPU_BLOCK_DF;
+ case AMDGPU_RAS_BLOCK__SMN:
+ return RAS_TELEMETRY_GPU_BLOCK_SMN;
+ case AMDGPU_RAS_BLOCK__SEM:
+ return RAS_TELEMETRY_GPU_BLOCK_SEM;
+ case AMDGPU_RAS_BLOCK__MP0:
+ return RAS_TELEMETRY_GPU_BLOCK_MP0;
+ case AMDGPU_RAS_BLOCK__MP1:
+ return RAS_TELEMETRY_GPU_BLOCK_MP1;
+ case AMDGPU_RAS_BLOCK__FUSE:
+ return RAS_TELEMETRY_GPU_BLOCK_FUSE;
+ case AMDGPU_RAS_BLOCK__MCA:
+ return RAS_TELEMETRY_GPU_BLOCK_MCA;
+ case AMDGPU_RAS_BLOCK__VCN:
+ return RAS_TELEMETRY_GPU_BLOCK_VCN;
+ case AMDGPU_RAS_BLOCK__JPEG:
+ return RAS_TELEMETRY_GPU_BLOCK_JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return RAS_TELEMETRY_GPU_BLOCK_IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return RAS_TELEMETRY_GPU_BLOCK_MPIO;
+ default:
+ DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
+ block);
+ return RAS_TELEMETRY_GPU_BLOCK_COUNT;
+ }
+}
+
+static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry)
+{
+ struct amd_sriov_ras_telemetry_error_count *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ memcpy(&adev->virt.count_cache, tmp,
+ min(used_size, sizeof(adev->virt.count_cache)));
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->req_ras_err_count)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_err_count(adev))
+ amdgpu_virt_cache_host_error_counts(adev,
+ virt->fw_reserve.ras_telemetry);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return 0;
+}
+
+/* Bypass ACA interface and query ECC counts directly from host */
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return -EOPNOTSUPP;
+
+ /* Host Access may be lost during reset, just return last cached data. */
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, false);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count;
+ err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count;
+ err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count;
+
+ return 0;
+}
+
+static int
+amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ u32 *more)
+{
+ struct amd_sriov_ras_cper_dump *cper_dump = NULL;
+ struct cper_hdr *entry = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t checksum, used_size, i;
+ int ret = 0;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ return -EINVAL;
+
+ cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
+ if (!cper_dump)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *more = cper_dump->more;
+
+ if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
+ dev_warn(
+ adev->dev,
+ "guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n",
+ adev->virt.ras.cper_rptr, cper_dump->wptr);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+ goto out;
+ }
+
+ entry = (struct cper_hdr *)&cper_dump->buf[0];
+
+ for (i = 0; i < cper_dump->count; i++) {
+ amdgpu_cper_ring_write(ring, entry, entry->record_length);
+ entry = (struct cper_hdr *)((char *)entry +
+ entry->record_length);
+ }
+
+ if (cper_dump->overflow_count)
+ dev_warn(adev->dev,
+ "host reported CPER overflow of 0x%llx entries!\n",
+ cper_dump->overflow_count);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+out:
+ kfree(cper_dump);
+
+ return ret;
+}
+
+static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+ uint32_t more = 0;
+
+ if (!virt->ops || !virt->ops->req_ras_cper_dump)
+ return -EOPNOTSUPP;
+
+ do {
+ if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
+ ret = amdgpu_virt_write_cpers_to_ring(
+ adev, virt->fw_reserve.ras_telemetry, &more);
+ else
+ ret = 0;
+ } while (more && !ret);
+
+ return ret;
+}
+
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+
+ if (!amdgpu_sriov_ras_cper_en(adev))
+ return -EOPNOTSUPP;
+
+ if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ return ret;
+}
+
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
+{
+ unsigned long ue_count, ce_count;
+
+ if (amdgpu_sriov_ras_telemetry_en(adev)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, true);
+ amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
+ }
+
+ return 0;
+}
+
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return false;
+
+ return true;
+}
+
+/*
+ * amdgpu_virt_request_bad_pages() - request bad pages
+ * @adev: amdgpu device.
+ * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
+ */
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->req_bad_pages)
+ virt->ops->req_bad_pages(adev);
+}
+
+static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ bool *hit)
+{
+ struct amd_sriov_ras_chk_criti *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ if (hit)
+ *hit = tmp->hit ? true : false;
+
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int r = -EPERM;
+
+ if (!virt->ops || !virt->ops->req_ras_chk_criti)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_chk_criti_rs)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_chk_criti(adev, addr))
+ r = amdgpu_virt_cache_chk_criti_hit(
+ adev, virt->fw_reserve.ras_telemetry, hit);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index fabb83e9d9ae..d1172c8e58c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -45,12 +45,15 @@
#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000
#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF
+#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000
/* all asic after AI use this offset */
#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
/* tonga/fiji use this offset */
#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
+#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
+
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
SRIOV_VF_MODE_ONE_VF,
@@ -85,10 +88,17 @@ struct amdgpu_virt_ops {
int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
int (*req_init_data)(struct amdgpu_device *adev);
int (*reset_gpu)(struct amdgpu_device *adev);
+ void (*ready_to_reset)(struct amdgpu_device *adev);
int (*wait_reset)(struct amdgpu_device *adev);
void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
u32 data1, u32 data2, u32 data3);
- void (*ras_poison_handler)(struct amdgpu_device *adev);
+ void (*ras_poison_handler)(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+ bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
+ int (*req_ras_err_count)(struct amdgpu_device *adev);
+ int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
+ int (*req_bad_pages)(struct amdgpu_device *adev);
+ int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr);
};
/*
@@ -97,6 +107,7 @@ struct amdgpu_virt_ops {
struct amdgpu_virt_fw_reserve {
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
+ void *ras_telemetry;
unsigned int checksum_key;
};
@@ -126,15 +137,26 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
/* AV1 Support MODE*/
AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
+ /* VCN RB decouple */
+ AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
+ /* MES info */
+ AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
+ AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
+ AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
+ AMDGIM_FEATURE_RAS_CPER = (1 << 11),
};
enum AMDGIM_REG_ACCESS_FLAG {
/* Use PSP to program IH_RB_CNTL */
- AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
+ AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
/* Use RLC to program MMHUB regs */
- AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
+ AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
/* Use RLC to program GC regs */
- AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+ AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+ /* Use PSP to program L1_TLB_CNTL */
+ AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
+ /* Use RLCG to program SQ_CONFIG1 */
+ AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4),
};
struct amdgim_pf2vf_info_v1 {
@@ -228,6 +250,18 @@ struct amdgpu_virt_ras_err_handler_data {
int last_reserved;
};
+struct amdgpu_virt_ras {
+ struct ratelimit_state ras_error_cnt_rs;
+ struct ratelimit_state ras_cper_dump_rs;
+ struct ratelimit_state ras_chk_criti_rs;
+ struct mutex ras_telemetry_mutex;
+ uint64_t cper_rptr;
+};
+
+#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT)
+
+DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -237,11 +271,16 @@ struct amdgpu_virt {
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
+
struct work_struct flr_work;
+ struct work_struct req_bad_pages_work;
+ struct work_struct handle_bad_pages_work;
+
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
struct amdgpu_vf_error_buffer vf_errors;
struct amdgpu_virt_fw_reserve fw_reserve;
+ struct amdgpu_virt_caps virt_caps;
uint32_t gim_feature;
uint32_t reg_access_mode;
int req_init_data_ver;
@@ -253,6 +292,7 @@ struct amdgpu_virt {
/* vf2pf message */
struct delayed_work vf2pf_work;
uint32_t vf2pf_update_interval_ms;
+ int vf2pf_update_retry_cnt;
/* multimedia bandwidth config */
bool is_mm_bw_enabled;
@@ -263,6 +303,17 @@ struct amdgpu_virt {
/* the ucode id to signal the autoload */
uint32_t autoload_ucode_id;
+
+ /* Spinlock to protect access to the RLCG register interface */
+ spinlock_t rlcg_reg_lock;
+
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+ struct amdgpu_virt_ras ras;
+ struct amd_sriov_ras_telemetry_error_count count_cache;
+
+ /* hibernate and resume with different VF feature for xgmi enabled system */
+ bool is_xgmi_node_migrate_enabled;
};
struct amdgpu_video_codec_info;
@@ -298,15 +349,35 @@ struct amdgpu_video_codec_info;
(amdgpu_sriov_vf((adev)) && \
((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
+#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
+
#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
(amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
+#define amdgpu_sriov_reg_access_sq_config(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG)))
+
#define amdgpu_passthrough(adev) \
((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
#define amdgpu_sriov_vf_mmio_access_protection(adev) \
((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
+#define amdgpu_sriov_ras_caps_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
+
+#define amdgpu_sriov_ras_telemetry_en(adev) \
+(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
+
+#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
+(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
+
+#define amdgpu_sriov_ras_cper_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
+
static inline bool is_virtual_machine(void)
{
#if defined(CONFIG_X86)
@@ -320,29 +391,38 @@ static inline bool is_virtual_machine(void)
#define amdgpu_sriov_is_pp_one_vf(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_multi_vf_mode(adev) \
+ (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
#define amdgpu_sriov_is_debug(adev) \
((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
#define amdgpu_sriov_is_normal(adev) \
((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
#define amdgpu_sriov_is_av1_support(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
+#define amdgpu_sriov_is_vcn_rb_decouple(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
+#define amdgpu_sriov_is_mes_info_enable(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
+
+#define amdgpu_virt_xgmi_migrate_enabled(adev) \
+ ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0)
+
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
-void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
- uint32_t reg0, uint32_t rreg1,
- uint32_t ref, uint32_t mask);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
-void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+void amdgpu_virt_init(struct amdgpu_device *adev);
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
@@ -360,5 +440,20 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
uint32_t ucode_id);
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
void amdgpu_virt_post_reset(struct amdgpu_device *adev);
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+ u32 acc_flags, u32 hwip,
+ bool write, u32 *rlcg_flag);
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data);
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 7148a216ae2f..79bad9cbe2ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0+
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_vblank.h>
#include "amdgpu.h"
@@ -12,7 +14,6 @@
#include "dce_v8_0.h"
#endif
#include "dce_v10_0.h"
-#include "dce_v11_0.h"
#include "ivsrcid/ivsrcid_vislands30.h"
#include "amdgpu_vkms.h"
#include "amdgpu_display.h"
@@ -64,9 +65,7 @@ static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc)
{
- struct drm_device *dev = crtc->dev;
- unsigned int pipe = drm_crtc_index(crtc);
- struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
@@ -90,10 +89,8 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
ktime_t *vblank_time,
bool in_vblank_irq)
{
- struct drm_device *dev = crtc->dev;
- unsigned int pipe = crtc->index;
struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
- struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
if (!READ_ONCE(vblank->enabled)) {
@@ -190,8 +187,8 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
amdgpu_crtc->connector = NULL;
amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
- hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- amdgpu_crtc->vblank_timer.function = &amdgpu_vkms_vblank_simulate;
+ hrtimer_setup(&amdgpu_crtc->vblank_timer, &amdgpu_vkms_vblank_simulate, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
return ret;
}
@@ -239,6 +236,8 @@ static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector)
for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+ if (!mode)
+ continue;
drm_mode_probed_add(connector, mode);
}
@@ -311,7 +310,13 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
return 0;
}
afb = to_amdgpu_framebuffer(new_state->fb);
- obj = new_state->fb->obj[0];
+
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
@@ -332,6 +337,7 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
@@ -365,12 +371,19 @@ static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
struct amdgpu_bo *rbo;
+ struct drm_gem_object *obj;
int r;
if (!old_state->fb)
return;
- rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
+ obj = drm_gem_fb_get_obj(old_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return;
+ }
+
+ rbo = gem_to_amdgpu_bo(obj);
r = amdgpu_bo_reserve(rbo, false);
if (unlikely(r)) {
DRM_ERROR("failed to reserve rbo before unpin\n");
@@ -479,10 +492,10 @@ const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = {
.atomic_commit = drm_atomic_helper_commit,
};
-static int amdgpu_vkms_sw_init(void *handle)
+static int amdgpu_vkms_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc,
sizeof(struct amdgpu_vkms_output), GFP_KERNEL);
@@ -522,9 +535,9 @@ static int amdgpu_vkms_sw_init(void *handle)
return 0;
}
-static int amdgpu_vkms_sw_fini(void *handle)
+static int amdgpu_vkms_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i = 0;
for (i = 0; i < adev->mode_info.num_crtc; i++)
@@ -536,14 +549,14 @@ static int amdgpu_vkms_sw_fini(void *handle)
adev->mode_info.mode_config_initialized = false;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
kfree(adev->amdgpu_vkms_output);
return 0;
}
-static int amdgpu_vkms_hw_init(void *handle)
+static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -567,13 +580,6 @@ static int amdgpu_vkms_hw_init(void *handle)
case CHIP_TONGA:
dce_v10_0_disable_dce(adev);
break;
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_VEGAM:
- dce_v11_0_disable_dce(adev);
- break;
case CHIP_TOPAZ:
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_HAINAN:
@@ -586,55 +592,45 @@ static int amdgpu_vkms_hw_init(void *handle)
return 0;
}
-static int amdgpu_vkms_hw_fini(void *handle)
+static int amdgpu_vkms_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int amdgpu_vkms_suspend(void *handle)
+static int amdgpu_vkms_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = drm_mode_config_helper_suspend(adev_to_drm(adev));
if (r)
return r;
- return amdgpu_vkms_hw_fini(handle);
+
+ return 0;
}
-static int amdgpu_vkms_resume(void *handle)
+static int amdgpu_vkms_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_vkms_hw_init(handle);
+ r = amdgpu_vkms_hw_init(ip_block);
if (r)
return r;
- return drm_mode_config_helper_resume(adev_to_drm(adev));
+ return drm_mode_config_helper_resume(adev_to_drm(ip_block->adev));
}
-static bool amdgpu_vkms_is_idle(void *handle)
+static bool amdgpu_vkms_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int amdgpu_vkms_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int amdgpu_vkms_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int amdgpu_vkms_set_clockgating_state(void *handle,
+static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int amdgpu_vkms_set_powergating_state(void *handle,
+static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -642,8 +638,6 @@ static int amdgpu_vkms_set_powergating_state(void *handle,
static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
.name = "amdgpu_vkms",
- .early_init = NULL,
- .late_init = NULL,
.sw_init = amdgpu_vkms_sw_init,
.sw_fini = amdgpu_vkms_sw_fini,
.hw_init = amdgpu_vkms_hw_init,
@@ -651,14 +645,11 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
.suspend = amdgpu_vkms_suspend,
.resume = amdgpu_vkms_resume,
.is_idle = amdgpu_vkms_is_idle,
- .wait_for_idle = amdgpu_vkms_wait_for_idle,
- .soft_reset = amdgpu_vkms_soft_reset,
.set_clockgating_state = amdgpu_vkms_set_clockgating_state,
.set_powergating_state = amdgpu_vkms_set_powergating_state,
};
-const struct amdgpu_ip_block_version amdgpu_vkms_ip_block =
-{
+const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 1,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f5daadcec865..c1a801203949 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -36,6 +36,7 @@
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
#include "amdgpu.h"
+#include "amdgpu_vm.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_gmc.h"
@@ -127,43 +128,14 @@ struct amdgpu_vm_tlb_seq_struct {
};
/**
- * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm pointer
- * @pasid: the pasid the VM is using on this GPU
- *
- * Set the pasid this VM is using on this GPU, can also be used to remove the
- * pasid by passing in zero.
+ * amdgpu_vm_assert_locked - check if VM is correctly locked
+ * @vm: the VM which schould be tested
*
+ * Asserts that the VM root PD is locked.
*/
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid)
+static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
{
- int r;
-
- if (vm->pasid == pasid)
- return 0;
-
- if (vm->pasid) {
- r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid));
- if (r < 0)
- return r;
-
- vm->pasid = 0;
- }
-
- if (pasid) {
- r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm,
- GFP_KERNEL));
- if (r < 0)
- return r;
-
- vm->pasid = pasid;
- }
-
-
- return 0;
+ dma_resv_assert_held(vm->root.bo->tbo.base.resv);
}
/**
@@ -180,6 +152,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
struct amdgpu_bo *bo = vm_bo->bo;
vm_bo->moved = true;
+ amdgpu_vm_assert_locked(vm);
spin_lock(&vm_bo->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
@@ -197,6 +170,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
spin_unlock(&vm_bo->vm->status_lock);
@@ -212,6 +186,7 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
spin_unlock(&vm_bo->vm->status_lock);
@@ -234,6 +209,22 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
}
/**
+ * amdgpu_vm_bo_evicted_user - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for BOs used by user mode queues which are not at the location they
+ * should be.
+ */
+static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
+{
+ vm_bo->moved = true;
+ spin_lock(&vm_bo->vm->status_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
+ spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
* amdgpu_vm_bo_relocated - vm_bo is reloacted
*
* @vm_bo: vm_bo which is relocated
@@ -243,6 +234,7 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
if (vm_bo->bo->parent) {
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
@@ -262,6 +254,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
+ amdgpu_vm_assert_locked(vm_bo->vm);
spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
spin_unlock(&vm_bo->vm->status_lock);
@@ -278,13 +271,17 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
{
struct amdgpu_vm_bo_base *vm_bo, *tmp;
+ amdgpu_vm_assert_locked(vm);
+
spin_lock(&vm->status_lock);
list_splice_init(&vm->done, &vm->invalidated);
list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
vm_bo->moved = true;
+
list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
struct amdgpu_bo *bo = vm_bo->bo;
+ vm_bo->moved = true;
if (!bo || bo->tbo.type != ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
else if (bo->parent)
@@ -294,6 +291,112 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
}
/**
+ * amdgpu_vm_update_shared - helper to update shared memory stat
+ * @base: base structure for tracking BO usage in a VM
+ *
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
+ * as well.
+ */
+static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ uint64_t size = amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+ bool shared;
+
+ dma_resv_assert_held(bo->tbo.base.resv);
+ spin_lock(&vm->status_lock);
+ shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ if (base->shared != shared) {
+ base->shared = shared;
+ if (shared) {
+ vm->stats[bo_memtype].drm.shared += size;
+ vm->stats[bo_memtype].drm.private -= size;
+ } else {
+ vm->stats[bo_memtype].drm.shared -= size;
+ vm->stats[bo_memtype].drm.private += size;
+ }
+ }
+ spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
+ * @bo: amdgpu buffer object
+ *
+ * Update the per VM stats for all the vm if needed from private to shared or
+ * vice versa.
+ */
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *base;
+
+ for (base = bo->vm_bo; base; base = base->next)
+ amdgpu_vm_update_shared(base);
+}
+
+/**
+ * amdgpu_vm_update_stats_locked - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Caller need to have the vm status_lock held. Useful for when multiple update
+ * need to happen at the same time.
+ */
+static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ int64_t size = sign * amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+
+ /* For drm-total- and drm-shared-, BO are accounted by their preferred
+ * placement, see also amdgpu_bo_mem_stats_placement.
+ */
+ if (base->shared)
+ vm->stats[bo_memtype].drm.shared += size;
+ else
+ vm->stats[bo_memtype].drm.private += size;
+
+ if (res && res->mem_type < __AMDGPU_PL_NUM) {
+ uint32_t res_memtype = res->mem_type;
+
+ vm->stats[res_memtype].drm.resident += size;
+ /* BO only count as purgeable if it is resident,
+ * since otherwise there's nothing to purge.
+ */
+ if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
+ vm->stats[res_memtype].drm.purgeable += size;
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+ vm->stats[bo_memtype].evicted += size;
+ }
+}
+
+/**
+ * amdgpu_vm_update_stats - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Updates the basic memory stat when bo is added/deleted/moved.
+ */
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
+{
+ struct amdgpu_vm *vm = base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(base, res, sign);
+ spin_unlock(&vm->status_lock);
+}
+
+/**
* amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
*
* @base: base structure for tracking BO usage in a VM
@@ -316,7 +419,12 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
- if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+ spin_lock(&vm->status_lock);
+ base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
+ spin_unlock(&vm->status_lock);
+
+ if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
dma_resv_assert_held(vm->root.bo->tbo.base.resv);
@@ -357,6 +465,42 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
}
/**
+ * amdgpu_vm_lock_done_list - lock all BOs on the done list
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
+ *
+ * Lock the BOs on the done list in the DRM execution context.
+ */
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct list_head *prev = &vm->done;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ int ret;
+
+ /* We can only trust prev->next while holding the lock */
+ spin_lock(&vm->status_lock);
+ while (!list_is_head(prev->next, &vm->done)) {
+ bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_va->base.bo;
+ if (bo) {
+ ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
+ if (unlikely(ret))
+ return ret;
+ }
+ spin_lock(&vm->status_lock);
+ prev = prev->next;
+ }
+ spin_unlock(&vm->status_lock);
+
+ return 0;
+}
+
+/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
* @adev: amdgpu device pointer
@@ -417,7 +561,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
if (!vm)
return result;
- result += vm->generation;
+ result += lower_32_bits(vm->generation);
/* Add one if the page tables will be re-generated on next CS */
if (drm_sched_entity_error(&vm->delayed))
++result;
@@ -426,29 +570,33 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
/**
- * amdgpu_vm_validate_pt_bos - validate the page table BOs
+ * amdgpu_vm_validate - validate evicted BOs tracked in the VM
*
* @adev: amdgpu device pointer
* @vm: vm providing the BOs
+ * @ticket: optional reservation ticket used to reserve the VM
* @validate: callback to do the validation
* @param: parameter for the validation callback
*
- * Validate the page table BOs on command submission if neccessary.
+ * Validate the page table BOs and per-VM BOs on command submission if
+ * necessary. If a ticket is given, also try to validate evicted user queue
+ * BOs. They must already be reserved with the given ticket.
*
* Returns:
* Validation result.
*/
-int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int (*validate)(void *p, struct amdgpu_bo *bo),
- void *param)
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket,
+ int (*validate)(void *p, struct amdgpu_bo *bo),
+ void *param)
{
+ uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
struct amdgpu_vm_bo_base *bo_base;
- struct amdgpu_bo *shadow;
struct amdgpu_bo *bo;
int r;
- if (drm_sched_entity_error(&vm->delayed)) {
- ++vm->generation;
+ if (vm->generation != new_vm_generation) {
+ vm->generation = new_vm_generation;
amdgpu_vm_bo_reset_state_machine(vm);
amdgpu_vm_fini_entities(vm);
r = amdgpu_vm_init_entities(adev, vm);
@@ -464,16 +612,10 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_unlock(&vm->status_lock);
bo = bo_base->bo;
- shadow = amdgpu_bo_shadowed(bo);
r = validate(param, bo);
if (r)
return r;
- if (shadow) {
- r = validate(param, shadow);
- if (r)
- return r;
- }
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
@@ -483,6 +625,23 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
spin_lock(&vm->status_lock);
}
+ while (ticket && !list_empty(&vm->evicted_user)) {
+ bo_base = list_first_entry(&vm->evicted_user,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_base->bo;
+ dma_resv_assert_held(bo->tbo.base.resv);
+
+ r = validate(param, bo);
+ if (r)
+ return r;
+
+ amdgpu_vm_bo_invalidated(bo_base);
+
+ spin_lock(&vm->status_lock);
+ }
spin_unlock(&vm->status_lock);
amdgpu_vm_eviction_lock(vm);
@@ -500,22 +659,31 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* Check if all VM PDs/PTs are ready for updates
*
* Returns:
- * True if VM is not evicting.
+ * True if VM is not evicting and all VM entities are not stopped
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
- bool empty;
bool ret;
+ amdgpu_vm_assert_locked(vm);
+
amdgpu_vm_eviction_lock(vm);
ret = !vm->evicting;
amdgpu_vm_eviction_unlock(vm);
spin_lock(&vm->status_lock);
- empty = list_empty(&vm->evicted);
+ ret &= list_empty(&vm->evicted);
spin_unlock(&vm->status_lock);
- return ret && empty;
+ spin_lock(&vm->immediate.lock);
+ ret &= !vm->immediate.stopped;
+ spin_unlock(&vm->immediate.lock);
+
+ spin_lock(&vm->delayed.lock);
+ ret &= !vm->delayed.stopped;
+ spin_unlock(&vm->delayed.lock);
+
+ return ret;
}
/**
@@ -600,6 +768,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
@@ -607,9 +776,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool gds_switch_needed = ring->funcs->emit_gds_switch &&
job->gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush;
- struct dma_fence *fence = NULL;
+ bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
- unsigned patch_offset = 0;
+ struct dma_fence *fence = NULL;
+ struct amdgpu_fence *af;
+ unsigned int patch;
int r;
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
@@ -631,16 +802,26 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
- if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
+ cleaner_shader_needed = job->run_cleaner_shader &&
+ adev->gfx.enable_cleaner_shader &&
+ ring->funcs->emit_cleaner_shader && job->base.s_fence &&
+ &job->base.s_fence->scheduled == isolation->spearhead;
+
+ if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
+ !cleaner_shader_needed)
return 0;
amdgpu_ring_ib_begin(ring);
if (ring->funcs->init_cond_exec)
- patch_offset = amdgpu_ring_init_cond_exec(ring);
+ patch = amdgpu_ring_init_cond_exec(ring,
+ ring->cond_exe_gpu_addr);
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
+ if (cleaner_shader_needed)
+ ring->funcs->emit_cleaner_shader(ring);
+
if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
@@ -650,9 +831,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
- if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
+ if (ring->funcs->emit_gds_switch &&
gds_switch_needed) {
amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
job->gds_size, job->gws_base,
@@ -660,10 +841,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
job->oa_size);
}
- if (vm_flush_needed || pasid_mapping_needed) {
+ if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
r = amdgpu_fence_emit(ring, &fence, NULL, 0);
if (r)
return r;
+ /* this is part of the job's context */
+ af = container_of(fence, struct amdgpu_fence, base);
+ af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
}
if (vm_flush_needed) {
@@ -682,16 +866,28 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
id->pasid_mapping = dma_fence_get(fence);
mutex_unlock(&id_mgr->lock);
}
+
+ /*
+ * Make sure that all other submissions wait for the cleaner shader to
+ * finish before we push them to the HW.
+ */
+ if (cleaner_shader_needed) {
+ trace_amdgpu_cleaner_shader(ring, fence);
+ mutex_lock(&adev->enforce_isolation_mutex);
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(fence);
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ }
dma_fence_put(fence);
- if (ring->funcs->patch_cond_exec)
- amdgpu_ring_patch_cond_exec(ring, patch_offset);
+ amdgpu_ring_patch_cond_exec(ring, patch);
/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
if (ring->funcs->emit_switch_buffer) {
amdgpu_ring_emit_switch_buffer(ring);
amdgpu_ring_emit_switch_buffer(ring);
}
+
amdgpu_ring_ib_end(ring);
return 0;
}
@@ -773,6 +969,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
LIST_HEAD(relocated);
int r, idx;
+ amdgpu_vm_assert_locked(vm);
+
spin_lock(&vm->status_lock);
list_splice_init(&vm->relocated, &relocated);
spin_unlock(&vm->status_lock);
@@ -788,7 +986,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
params.vm = vm;
params.immediate = immediate;
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ r = vm->update_funcs->prepare(&params, NULL,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
if (r)
goto error;
@@ -837,6 +1036,46 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
}
/**
+ * amdgpu_vm_tlb_flush - prepare TLB flush
+ *
+ * @params: parameters for update
+ * @fence: input fence to sync TLB flush with
+ * @tlb_cb: the callback structure
+ *
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
+ */
+static void
+amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
+ struct dma_fence **fence,
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb)
+{
+ struct amdgpu_vm *vm = params->vm;
+
+ tlb_cb->vm = vm;
+ if (!fence || !*fence) {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+ return;
+ }
+
+ if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
+ amdgpu_vm_tlb_seq_cb)) {
+ dma_fence_put(vm->last_tlb_flush);
+ vm->last_tlb_flush = dma_fence_get(*fence);
+ } else {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+ }
+
+ /* Prepare a TLB flush fence to be attached to PTs */
+ if (!params->unlocked && vm->is_compute_context) {
+ amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
+
+ /* Makes sure no PD/PT is freed before the flush */
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+}
+
+/**
* amdgpu_vm_update_range - update a range in the vm page table
*
* @adev: amdgpu_device pointer to use for commands
@@ -844,7 +1083,8 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
* @flush_tlb: trigger tlb invalidation after update completed
- * @resv: fences we need to sync to
+ * @allow_override: change MTYPE for local NUMA nodes
+ * @sync: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
@@ -861,15 +1101,15 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
*/
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bool immediate, bool unlocked, bool flush_tlb,
- struct dma_resv *resv, uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset, uint64_t vram_base,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence)
{
- struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_struct *tlb_cb;
+ struct amdgpu_vm_update_params params;
struct amdgpu_res_cursor cursor;
- enum amdgpu_sync_mode sync_mode;
int r, idx;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -877,20 +1117,20 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
if (!tlb_cb) {
- r = -ENOMEM;
- goto error_unlock;
+ drm_dev_exit(idx);
+ return -ENOMEM;
}
/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
* heavy-weight flush TLB unconditionally.
*/
flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
/*
* On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
*/
- flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
+ flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
memset(&params, 0, sizeof(params));
params.adev = adev;
@@ -898,14 +1138,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
-
- /* Implicitly sync to command submissions in the same VM before
- * unmapping. Sync to moving fences before mapping.
- */
- if (!(flags & AMDGPU_PTE_VALID))
- sync_mode = AMDGPU_SYNC_EQ_OWNER;
- else
- sync_mode = AMDGPU_SYNC_EXPLICIT;
+ params.needs_flush = flush_tlb;
+ params.allow_override = allow_override;
+ INIT_LIST_HEAD(&params.tlb_flush_waitlist);
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
@@ -921,7 +1156,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
dma_fence_put(tmp);
}
- r = vm->update_funcs->prepare(&params, resv, sync_mode);
+ r = vm->update_funcs->prepare(&params, sync,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
if (r)
goto error_free;
@@ -964,7 +1200,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.pages_addr = NULL;
}
- } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
+ } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
addr = vram_base + cursor.start;
} else {
addr = 0;
@@ -980,74 +1216,28 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
r = vm->update_funcs->commit(&params, fence);
+ if (r)
+ goto error_free;
- if (flush_tlb || params.table_freed) {
- tlb_cb->vm = vm;
- if (fence && *fence &&
- !dma_fence_add_callback(*fence, &tlb_cb->cb,
- amdgpu_vm_tlb_seq_cb)) {
- dma_fence_put(vm->last_tlb_flush);
- vm->last_tlb_flush = dma_fence_get(*fence);
- } else {
- amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
- }
+ if (params.needs_flush) {
+ amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
tlb_cb = NULL;
}
+ amdgpu_vm_pt_free_list(adev, &params);
+
error_free:
kfree(tlb_cb);
-
-error_unlock:
amdgpu_vm_eviction_unlock(vm);
drm_dev_exit(idx);
return r;
}
-static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
- struct amdgpu_mem_stats *stats)
-{
- struct amdgpu_vm *vm = bo_va->base.vm;
- struct amdgpu_bo *bo = bo_va->base.bo;
-
- if (!bo)
- return;
-
- /*
- * For now ignore BOs which are currently locked and potentially
- * changing their location.
- */
- if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv &&
- !dma_resv_trylock(bo->tbo.base.resv))
- return;
-
- amdgpu_bo_get_memory(bo, stats);
- if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
- dma_resv_unlock(bo->tbo.base.resv);
-}
-
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
- struct amdgpu_mem_stats *stats)
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
{
- struct amdgpu_bo_va *bo_va, *tmp;
-
spin_lock(&vm->status_lock);
- list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
+ memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
spin_unlock(&vm->status_lock);
}
@@ -1069,34 +1259,59 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
+ struct dma_fence **last_update;
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
- struct dma_fence **last_update;
+ struct amdgpu_sync sync;
bool flush_tlb = clear;
- struct dma_resv *resv;
uint64_t vram_base;
uint64_t flags;
+ bool uncached;
int r;
- if (clear || !bo) {
+ amdgpu_sync_create(&sync);
+ if (clear) {
+ mem = NULL;
+
+ /* Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
+ if (bo) {
+ r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
+ if (r)
+ goto error_free;
+ }
+ } else if (!bo) {
mem = NULL;
- resv = vm->root.bo->tbo.base.resv;
+
+ /* PRT map operations don't need to sync to anything. */
+
} else {
struct drm_gem_object *obj = &bo->tbo.base;
- resv = bo->tbo.base.resv;
- if (obj->import_attach && bo_va->is_xgmi) {
+ if (drm_gem_is_imported(obj) && bo_va->is_xgmi) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
struct drm_gem_object *gobj = dma_buf->priv;
struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
- if (abo->tbo.resource->mem_type == TTM_PL_VRAM)
+ if (abo->tbo.resource &&
+ abo->tbo.resource->mem_type == TTM_PL_VRAM)
bo = gem_to_amdgpu_bo(gobj);
}
mem = bo->tbo.resource;
- if (mem->mem_type == TTM_PL_TT ||
- mem->mem_type == AMDGPU_PL_PREEMPT)
+ if (mem && (mem->mem_type == TTM_PL_TT ||
+ mem->mem_type == AMDGPU_PL_PREEMPT))
pages_addr = bo->tbo.ttm->dma_address;
+
+ /* Implicitly sync to moving fences before mapping anything */
+ r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
+ AMDGPU_SYNC_EXPLICIT, vm);
+ if (r)
+ goto error_free;
}
if (bo) {
@@ -1109,13 +1324,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
vram_base = bo_adev->vm_manager.vram_base_offset;
+ uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
} else {
flags = 0x0;
vram_base = 0;
+ uncached = false;
}
- if (clear || (bo && bo->tbo.base.resv ==
- vm->root.bo->tbo.base.resv))
+ if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
last_update = &vm->last_update;
else
last_update = &bo_va->last_pt_update;
@@ -1134,34 +1350,34 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
* but in case of something, we filter the flags in first place
*/
- if (!(mapping->flags & AMDGPU_PTE_READABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
update_flags &= ~AMDGPU_PTE_READABLE;
- if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+ if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
update_flags &= ~AMDGPU_PTE_WRITEABLE;
/* Apply ASIC specific mapping flags */
- amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
+ amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
+ &update_flags);
trace_amdgpu_vm_bo_update(mapping);
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
- resv, mapping->start, mapping->last,
- update_flags, mapping->offset,
- vram_base, mem, pages_addr,
- last_update);
+ !uncached, &sync, mapping->start,
+ mapping->last, update_flags,
+ mapping->offset, vram_base, mem,
+ pages_addr, last_update);
if (r)
- return r;
+ goto error_free;
}
/* If the BO is not in its preferred location add it back to
* the evicted list so that it gets validated again on the
* next command submission.
*/
- if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
- uint32_t mem_type = bo->tbo.resource->mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
+ if (bo->tbo.resource &&
+ !(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
@@ -1178,7 +1394,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_mapping(mapping);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
/**
@@ -1280,7 +1498,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
struct dma_fence *fence)
{
- if (mapping->flags & AMDGPU_PTE_PRT)
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_add_prt_cb(adev, fence);
kfree(mapping);
}
@@ -1325,29 +1543,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence)
{
- struct dma_resv *resv = vm->root.bo->tbo.base.resv;
struct amdgpu_bo_va_mapping *mapping;
- uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
+ struct amdgpu_sync sync;
int r;
+
+ /*
+ * Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ amdgpu_sync_create(&sync);
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
+
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
- if (vm->pte_support_ats &&
- mapping->start < AMDGPU_GMC_HOLE_START)
- init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
-
- r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
- mapping->start, mapping->last,
- init_pte_value, 0, 0, NULL, NULL,
- &f);
+ r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
+ &sync, mapping->start, mapping->last,
+ 0, 0, 0, NULL, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
- return r;
+ goto error_free;
}
}
@@ -1358,7 +1581,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
dma_fence_put(f);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
@@ -1367,6 +1592,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @ticket: optional reservation ticket used to reserve the VM
*
* Make sure all BOs which are moved are updated in the PTs.
*
@@ -1376,11 +1602,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
* PTs have to be reserved!
*/
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket)
{
struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
- bool clear;
+ bool clear, unlock;
int r;
spin_lock(&vm->status_lock);
@@ -1403,18 +1630,35 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
- if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+ if (!adev->debug_vm && dma_resv_trylock(resv)) {
+ clear = false;
+ unlock = true;
+ /* The caller is already holding the reservation lock */
+ } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
clear = false;
+ unlock = false;
/* Somebody else is using the BO right now */
- else
+ } else {
clear = true;
+ unlock = false;
+ }
r = amdgpu_vm_bo_update(adev, bo_va, clear);
+
+ if (unlock)
+ dma_resv_unlock(resv);
if (r)
return r;
- if (!clear)
- dma_resv_unlock(resv);
+ /* Remember evicted DMABuf imports in compute VMs for later
+ * validation
+ */
+ if (vm->is_compute_context &&
+ drm_gem_is_imported(&bo_va->base.bo->tbo.base) &&
+ (!bo_va->base.bo->tbo.resource ||
+ bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
+ amdgpu_vm_bo_evicted_user(&bo_va->base);
+
spin_lock(&vm->status_lock);
}
spin_unlock(&vm->status_lock);
@@ -1423,6 +1667,51 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask)
+{
+ uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+ bool all_hub = false;
+ int xcc = 0, r = 0;
+
+ WARN_ON_ONCE(!vm->is_compute_context);
+
+ /*
+ * It can be that we race and lose here, but that is extremely unlikely
+ * and the worst thing which could happen is that we flush the changes
+ * into the TLB once more which is harmless.
+ */
+ if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+ return 0;
+
+ if (adev->family == AMDGPU_FAMILY_AI ||
+ adev->family == AMDGPU_FAMILY_RV)
+ all_hub = true;
+
+ for_each_inst(xcc, xcc_mask) {
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+ all_hub, xcc);
+ if (r)
+ break;
+ }
+ return r;
+}
+
+/**
* amdgpu_vm_bo_add - add a bo to a specific vm
*
* @adev: amdgpu_device pointer
@@ -1488,16 +1777,46 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->invalids);
amdgpu_vm_it_insert(mapping, &vm->va);
- if (mapping->flags & AMDGPU_PTE_PRT)
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT)
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
- !bo_va->base.moved) {
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
amdgpu_vm_bo_moved(&bo_va->base);
- }
+
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ uint64_t saddr,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t tmp, lpfn;
+
+ if (saddr & AMDGPU_GPU_PAGE_MASK
+ || offset & AMDGPU_GPU_PAGE_MASK
+ || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
+
+ if (check_add_overflow(saddr, size, &tmp)
+ || check_add_overflow(offset, size, &tmp)
+ || size == 0 /* which also leads to end < begin */)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ if (bo && offset + size > amdgpu_bo_size(bo))
+ return -EINVAL;
+
+ /* Ensure last pfn not exceed max_pfn */
+ lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+ if (lpfn >= adev->vm_manager.max_pfn)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* amdgpu_vm_bo_map - map bo inside a vm
*
@@ -1518,27 +1837,20 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
+ int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
if (tmp) {
@@ -1584,24 +1896,16 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t saddr, uint64_t offset,
- uint64_t size, uint64_t flags)
+ uint64_t size, uint32_t flags)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo = bo_va->base.bo;
uint64_t eaddr;
int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
/* Allocate all the needed memory */
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
@@ -1615,7 +1919,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
mapping->start = saddr;
mapping->last = eaddr;
@@ -1702,10 +2006,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
LIST_HEAD(removed);
uint64_t eaddr;
+ int r;
+
+ r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+ if (r)
+ return r;
- eaddr = saddr + size - 1;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
/* Allocate all the needed memory */
before = kzalloc(sizeof(*before), GFP_KERNEL);
@@ -1770,10 +2078,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo *bo = before->bo_va->base.bo;
amdgpu_vm_it_insert(before, &vm->va);
- if (before->flags & AMDGPU_PTE_PRT)
+ if (before->flags & AMDGPU_PTE_PRT_FLAG(adev))
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
!before->bo_va->base.moved)
amdgpu_vm_bo_moved(&before->bo_va->base);
} else {
@@ -1785,10 +2093,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo *bo = after->bo_va->base.bo;
amdgpu_vm_it_insert(after, &vm->va);
- if (after->flags & AMDGPU_PTE_PRT)
+ if (after->flags & AMDGPU_PTE_PRT_FLAG(adev))
amdgpu_vm_prt_get(adev);
- if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+ if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
!after->bo_va->base.moved)
amdgpu_vm_bo_moved(&after->bo_va->base);
} else {
@@ -1868,7 +2176,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
if (bo) {
dma_resv_assert_held(bo->tbo.base.resv);
- if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+ if (amdgpu_vm_is_bo_always_valid(vm, bo))
ttm_bo_set_bulk_move(&bo->tbo, NULL);
for (base = &bo_va->base.bo->vm_bo; *base;
@@ -1876,6 +2184,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
if (*base != &bo_va->base)
continue;
+ amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
*base = bo_va->base.next;
break;
}
@@ -1944,25 +2253,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
- * @adev: amdgpu_device pointer
* @bo: amdgpu buffer object
* @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted)
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
{
struct amdgpu_vm_bo_base *bo_base;
- /* shadow bo doesn't have bo base, its validation needs its parent */
- if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo))
- bo = bo->parent;
-
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
+ if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
amdgpu_vm_bo_evicted(bo_base);
continue;
}
@@ -1973,7 +2276,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
if (bo->tbo.type == ttm_bo_type_kernel)
amdgpu_vm_bo_relocated(bo_base);
- else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
+ else if (amdgpu_vm_is_bo_always_valid(vm, bo))
amdgpu_vm_bo_moved(bo_base);
else
amdgpu_vm_bo_invalidated(bo_base);
@@ -1981,6 +2284,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_bo_move - handle BO move
+ *
+ * @bo: amdgpu buffer object
+ * @new_mem: the new placement of the BO move
+ * @evicted: is the BO evicted
+ *
+ * Update the memory stats for the new placement and mark @bo as invalid.
+ */
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
+ amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
+ spin_unlock(&vm->status_lock);
+ }
+
+ amdgpu_vm_bo_invalidate(bo, evicted);
+}
+
+/**
* amdgpu_vm_get_block_size - calculate VM page table size as power of two
*
* @vm_size: VM size
@@ -2050,7 +2379,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
(1 << 30) - 1) >> 30;
vm_size = roundup_pow_of_two(
- min(max(phys_ram_gb * 3, min_vm_size), max_size));
+ clamp(phys_ram_gb * 3, min_vm_size, max_size));
}
adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
@@ -2059,7 +2388,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
if (amdgpu_vm_block_size != -1)
tmp >>= amdgpu_vm_block_size - 9;
tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
- adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
+ adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
switch (adev->vm_manager.num_level) {
case 3:
adev->vm_manager.root_level = AMDGPU_VM_PDB2;
@@ -2089,10 +2418,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
else
adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
- DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
- vm_size, adev->vm_manager.num_level + 1,
- adev->vm_manager.block_size,
- adev->vm_manager.fragment_size);
+ dev_info(
+ adev->dev,
+ "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+ vm_size, adev->vm_manager.num_level + 1,
+ adev->vm_manager.block_size, adev->vm_manager.fragment_size);
}
/**
@@ -2103,13 +2433,114 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
*/
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
{
- timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
- DMA_RESV_USAGE_BOOKKEEP,
- true, timeout);
+ timeout = drm_sched_entity_flush(&vm->immediate, timeout);
if (timeout <= 0)
return timeout;
- return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
+ return drm_sched_entity_flush(&vm->delayed, timeout);
+}
+
+static void amdgpu_vm_destroy_task_info(struct kref *kref)
+{
+ struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount);
+
+ kfree(ti);
+}
+
+static inline struct amdgpu_vm *
+amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+ return vm;
+}
+
+/**
+ * amdgpu_vm_put_task_info - reference down the vm task_info ptr
+ *
+ * @task_info: task_info struct under discussion.
+ *
+ * frees the vm task_info ptr at the last put
+ */
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info)
+{
+ if (task_info)
+ kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info);
+}
+
+/**
+ * amdgpu_vm_get_task_info_vm - Extracts task info for a vm.
+ *
+ * @vm: VM to get info from
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
+{
+ struct amdgpu_task_info *ti = NULL;
+
+ if (vm) {
+ ti = vm->task_info;
+ kref_get(&vm->task_info->refcount);
+ }
+
+ return ti;
+}
+
+/**
+ * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID.
+ *
+ * @adev: drm device pointer
+ * @pasid: PASID identifier for VM
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+ return amdgpu_vm_get_task_info_vm(
+ amdgpu_vm_get_vm_from_pasid(adev, pasid));
+}
+
+static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
+{
+ vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL);
+ if (!vm->task_info)
+ return -ENOMEM;
+
+ kref_init(&vm->task_info->refcount);
+ return 0;
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+ if (!vm->task_info)
+ return;
+
+ if (vm->task_info->task.pid == current->pid)
+ return;
+
+ vm->task_info->task.pid = current->pid;
+ get_task_comm(vm->task_info->task.comm, current);
+
+ if (current->group_leader->mm != current->mm)
+ return;
+
+ vm->task_info->tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info->process_name, current->group_leader);
}
/**
@@ -2118,13 +2549,15 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
* @adev: amdgpu_device pointer
* @vm: requested vm
* @xcp_id: GPU partition selection id
+ * @pasid: the pasid the VM is using on this GPU
*
* Init @vm fields.
*
* Returns:
* 0 for success, error for failure.
*/
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int32_t xcp_id, uint32_t pasid)
{
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2134,6 +2567,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
vm->reserved_vmid[i] = NULL;
INIT_LIST_HEAD(&vm->evicted);
+ INIT_LIST_HEAD(&vm->evicted_user);
INIT_LIST_HEAD(&vm->relocated);
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
@@ -2141,21 +2575,21 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
- INIT_LIST_HEAD(&vm->pt_freed);
- INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
+ INIT_KFIFO(vm->faults);
r = amdgpu_vm_init_entities(adev, vm);
if (r)
return r;
- vm->pte_support_ats = false;
+ ttm_lru_bulk_move_init(&vm->lru_bulk_move);
+
vm->is_compute_context = false;
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
- DRM_DEBUG_DRIVER("VM update mode is %s\n",
- vm->use_cpu_for_update ? "CPU" : "SDMA");
+ dev_dbg(adev->dev, "VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
!amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
@@ -2168,47 +2602,65 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
vm->last_update = dma_fence_get_stub();
vm->last_unlocked = dma_fence_get_stub();
vm->last_tlb_flush = dma_fence_get_stub();
- vm->generation = 0;
+ vm->generation = amdgpu_vm_generation(adev, NULL);
mutex_init(&vm->eviction_lock);
vm->evicting = false;
+ vm->tlb_fence_context = dma_fence_context_alloc(1);
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
false, &root, xcp_id);
if (r)
goto error_free_delayed;
- root_bo = &root->bo;
+
+ root_bo = amdgpu_bo_ref(&root->bo);
r = amdgpu_bo_reserve(root_bo, true);
- if (r)
- goto error_free_root;
+ if (r) {
+ amdgpu_bo_unref(&root_bo);
+ goto error_free_delayed;
+ }
+ amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
if (r)
- goto error_unreserve;
-
- amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+ goto error_free_root;
r = amdgpu_vm_pt_clear(adev, vm, root, false);
if (r)
- goto error_unreserve;
+ goto error_free_root;
- amdgpu_bo_unreserve(vm->root.bo);
+ r = amdgpu_vm_create_task_info(vm);
+ if (r)
+ dev_dbg(adev->dev, "Failed to create task info for VM\n");
- INIT_KFIFO(vm->faults);
+ /* Store new PASID in XArray (if non-zero) */
+ if (pasid != 0) {
+ r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
+ if (r < 0)
+ goto error_free_root;
- return 0;
+ vm->pasid = pasid;
+ }
-error_unreserve:
amdgpu_bo_unreserve(vm->root.bo);
+ amdgpu_bo_unref(&root_bo);
+
+ return 0;
error_free_root:
- amdgpu_bo_unref(&root->shadow);
+ /* If PASID was partially set, erase it from XArray before failing */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
+ amdgpu_vm_pt_free_root(adev, vm);
+ amdgpu_bo_unreserve(vm->root.bo);
amdgpu_bo_unref(&root_bo);
- vm->root.bo = NULL;
error_free_delayed:
dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
+ ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
amdgpu_vm_fini_entities(vm);
return r;
@@ -2235,35 +2687,17 @@ error_free_delayed:
*/
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
r = amdgpu_bo_reserve(vm->root.bo, true);
if (r)
return r;
- /* Check if PD needs to be reinitialized and do it before
- * changing any other state, in case it fails.
- */
- if (pte_support_ats != vm->pte_support_ats) {
- /* Sanity checks */
- if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
- r = -EINVAL;
- goto unreserve_bo;
- }
-
- vm->pte_support_ats = pte_support_ats;
- r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
- false);
- if (r)
- goto unreserve_bo;
- }
-
/* Update VM state */
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
- DRM_DEBUG_DRIVER("VM update mode is %s\n",
- vm->use_cpu_for_update ? "CPU" : "SDMA");
+ dev_dbg(adev->dev, "VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
!amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
@@ -2288,28 +2722,19 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true;
- /* Free the shadow bo for compute VM */
- amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
-
- goto unreserve_bo;
-
unreserve_bo:
amdgpu_bo_unreserve(vm->root.bo);
return r;
}
-/**
- * amdgpu_vm_release_compute - release a compute vm
- * @adev: amdgpu_device pointer
- * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
- *
- * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
- * pasid from vm. Compute should stop use of vm after this call.
- */
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
{
- amdgpu_vm_set_pasid(adev, vm, 0);
- vm->is_compute_context = false;
+ for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
+ if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
+ vm->stats[i].evicted == 0))
+ return false;
+ }
+ return true;
}
/**
@@ -2331,11 +2756,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
- flush_work(&vm->pt_free_work);
-
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
- amdgpu_vm_set_pasid(adev, vm, 0);
+ /* Remove PASID mapping before destroying VM */
+ if (vm->pasid != 0) {
+ xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+ vm->pasid = 0;
+ }
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
dma_fence_wait(vm->last_tlb_flush, false);
@@ -2345,7 +2772,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
- if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
+ if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
amdgpu_vm_prt_fini(adev, vm);
prt_fini_needed = false;
}
@@ -2376,12 +2803,20 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
- if (vm->reserved_vmid[i]) {
- amdgpu_vmid_free_reserved(adev, i);
- vm->reserved_vmid[i] = false;
- }
+ amdgpu_vmid_free_reserved(adev, vm, i);
}
+ ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+
+ if (!amdgpu_vm_stats_is_zero(vm)) {
+ struct amdgpu_task_info *ti = vm->task_info;
+
+ dev_warn(adev->dev,
+ "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
+ ti->process_name, ti->task.pid, ti->task.comm, ti->tgid);
+ }
+
+ amdgpu_vm_put_task_info(vm->task_info);
}
/**
@@ -2464,6 +2899,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_vm *args = data;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
/* No valid flags defined yet */
if (args->in.flags)
@@ -2472,17 +2908,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
/* We only have requirement to reserve vmid from gfxhub */
- if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
- amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
- fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
- }
-
+ amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
case AMDGPU_VM_OP_UNRESERVE_VMID:
- if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
- amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
- fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
- }
+ amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
break;
default:
return -EINVAL;
@@ -2492,51 +2921,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
}
/**
- * amdgpu_vm_get_task_info - Extracts task info for a PASID.
- *
- * @adev: drm device pointer
- * @pasid: PASID identifier for VM
- * @task_info: task_info to fill.
- */
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
- struct amdgpu_task_info *task_info)
-{
- struct amdgpu_vm *vm;
- unsigned long flags;
-
- xa_lock_irqsave(&adev->vm_manager.pasids, flags);
-
- vm = xa_load(&adev->vm_manager.pasids, pasid);
- if (vm)
- *task_info = vm->task_info;
-
- xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
-}
-
-/**
- * amdgpu_vm_set_task_info - Sets VMs task info.
- *
- * @vm: vm for which to set the info
- */
-void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
-{
- if (vm->task_info.pid)
- return;
-
- vm->task_info.pid = current->pid;
- get_task_comm(vm->task_info.task_name, current);
-
- if (current->group_leader->mm != current->mm)
- return;
-
- vm->task_info.tgid = current->group_leader->pid;
- get_task_comm(vm->task_info.process_name, current->group_leader);
-}
-
-/**
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
* @pasid: PASID of the VM
+ * @ts: Timestamp of the fault
* @vmid: VMID, only used for GFX 9.4.3.
* @node_id: Node_id received in IH cookie. Only applicable for
* GFX 9.4.3.
@@ -2547,7 +2935,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* shouldn't be reported any more.
*/
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- u32 vmid, u32 node_id, uint64_t addr,
+ u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
bool write_fault)
{
bool is_compute_context = false;
@@ -2573,7 +2961,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
addr /= AMDGPU_GPU_PAGE_SIZE;
if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
- node_id, addr, write_fault)) {
+ node_id, addr, ts, write_fault)) {
amdgpu_bo_unref(&root);
return true;
}
@@ -2617,8 +3005,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
goto error_unlock;
}
- r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
- addr, flags, value, 0, NULL, NULL, NULL);
+ r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
+ NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
if (r)
goto error_unlock;
@@ -2627,7 +3015,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
error_unlock:
amdgpu_bo_unreserve(root);
if (r < 0)
- DRM_ERROR("Can't handle page fault (%d)\n", r);
+ dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
error_unref:
amdgpu_bo_unref(&root);
@@ -2661,6 +3049,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
unsigned int total_done_objs = 0;
unsigned int id = 0;
+ amdgpu_vm_assert_locked(vm);
+
spin_lock(&vm->status_lock);
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
@@ -2730,3 +3120,83 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
total_done_objs);
}
#endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: GPUVM fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debugging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub)
+{
+ struct amdgpu_vm *vm;
+ unsigned long flags;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ /* Don't update the fault cache if status is 0. In the multiple
+ * fault case, subsequent faults will return a 0 status which is
+ * useless for userspace and replaces the useful fault status, so
+ * only update if status is non-0.
+ */
+ if (vm && status) {
+ vm->fault_info.addr = addr;
+ vm->fault_info.status = status;
+ /*
+ * Update the fault information globally for later usage
+ * when vm could be stale or freed.
+ */
+ adev->vm_manager.fault_info.addr = addr;
+ adev->vm_manager.fault_info.vmhub = vmhub;
+ adev->vm_manager.fault_info.status = status;
+
+ if (AMDGPU_IS_GFXHUB(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else if (AMDGPU_IS_MMHUB0(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else if (AMDGPU_IS_MMHUB1(vmhub)) {
+ vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
+ vm->fault_info.vmhub |=
+ (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
+ } else {
+ WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
+ }
+ }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
+/**
+ * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
+ *
+ * @vm: VM to test against.
+ * @bo: BO to be tested.
+ *
+ * Returns true if the BO shares the dma_resv object with the root PD and is
+ * always guaranteed to be valid inside the VM.
+ */
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
+{
+ return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
+}
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+ struct amdgpu_task_info *task_info)
+{
+ dev_err(adev->dev,
+ " Process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 204ab13184ed..cf0ec94e8a07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -35,6 +35,7 @@
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
+#include "amdgpu_ttm.h"
struct drm_exec;
@@ -42,7 +43,6 @@ struct amdgpu_bo_va;
struct amdgpu_job;
struct amdgpu_bo_list_entry;
struct amdgpu_bo_vm;
-struct amdgpu_mem_stats;
/*
* GPUVM handling
@@ -94,8 +94,11 @@ struct amdgpu_mem_stats;
#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
AMDGPU_PTE_PRT)
/* For GFX9 */
-#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57)
-#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype) ((uint64_t)(mtype) << 57)
+#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_VG10_MASK)) | \
+ AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype))
#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
@@ -108,15 +111,41 @@ struct amdgpu_mem_stats;
| AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
/* gfx10 */
-#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
-#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
+#define AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype) ((uint64_t)(mtype) << 48)
+#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10_SHIFT(7ULL)
+#define AMDGPU_PTE_MTYPE_NV10(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_NV10_MASK)) | \
+ AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype))
+
+/* gfx12 */
+#define AMDGPU_PTE_PRT_GFX12 (1ULL << 56)
+#define AMDGPU_PTE_PRT_FLAG(adev) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PTE_PRT_GFX12 : AMDGPU_PTE_PRT)
+
+#define AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype) ((uint64_t)(mtype) << 54)
+#define AMDGPU_PTE_MTYPE_GFX12_MASK AMDGPU_PTE_MTYPE_GFX12_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_GFX12(flags, mtype) \
+ (((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_GFX12_MASK)) | \
+ AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype))
+
+#define AMDGPU_PTE_DCC (1ULL << 58)
+#define AMDGPU_PTE_IS_PTE (1ULL << 63)
+
+/* PDE Block Fragment Size for gfx v12 */
+#define AMDGPU_PDE_BFS_GFX12(a) ((uint64_t)((a) & 0x1fULL) << 58)
+#define AMDGPU_PDE_BFS_FLAG(adev, a) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_BFS_GFX12(a) : AMDGPU_PDE_BFS(a))
+/* PDE is handled as PTE for gfx v12 */
+#define AMDGPU_PDE_PTE_GFX12 (1ULL << 63)
+#define AMDGPU_PDE_PTE_FLAG(adev) \
+ ((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_PTE_GFX12 : AMDGPU_PDE_PTE)
/* How to program VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
#define AMDGPU_VM_FAULT_STOP_FIRST 1
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
-/* Reserve 4MB VRAM for page tables */
+/* How much VRAM be reserved for page tables */
#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20)
/*
@@ -124,12 +153,32 @@ struct amdgpu_mem_stats;
* layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
*/
#define AMDGPU_MAX_VMHUBS 13
-#define AMDGPU_GFXHUB(x) (x)
-#define AMDGPU_MMHUB0(x) (8 + x)
-#define AMDGPU_MMHUB1(x) (8 + 4 + x)
-
-/* Reserve 2MB at top/bottom of address space for kernel use */
-#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20)
+#define AMDGPU_GFXHUB_START 0
+#define AMDGPU_MMHUB0_START 8
+#define AMDGPU_MMHUB1_START 12
+#define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x))
+#define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x))
+#define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x))
+
+#define AMDGPU_IS_GFXHUB(x) ((x) >= AMDGPU_GFXHUB_START && (x) < AMDGPU_MMHUB0_START)
+#define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START)
+#define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS)
+
+/* Reserve space at top/bottom of address space for kernel use */
+#define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20)
+#define AMDGPU_VA_RESERVED_CSA_START(adev) (((adev)->vm_manager.max_pfn \
+ << AMDGPU_GPU_PAGE_SHIFT) \
+ - AMDGPU_VA_RESERVED_CSA_SIZE)
+#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
+#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
+ - AMDGPU_VA_RESERVED_SEQ64_SIZE)
+#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
+#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
+ - AMDGPU_VA_RESERVED_TRAP_SIZE)
+#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
+#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \
+ AMDGPU_VA_RESERVED_SEQ64_SIZE + \
+ AMDGPU_VA_RESERVED_CSA_SIZE)
/* See vm_update_mode */
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
@@ -154,9 +203,13 @@ struct amdgpu_vm_bo_base {
/* protected by bo being reserved */
struct amdgpu_vm_bo_base *next;
- /* protected by spinlock */
+ /* protected by vm status_lock */
struct list_head vm_status;
+ /* if the bo is counted as shared in mem stats
+ * protected by vm status_lock */
+ bool shared;
+
/* protected by the BO being reserved */
bool moved;
};
@@ -183,10 +236,10 @@ struct amdgpu_vm_pte_funcs {
};
struct amdgpu_task_info {
- char process_name[TASK_COMM_LEN];
- char task_name[TASK_COMM_LEN];
- pid_t pid;
- pid_t tgid;
+ struct drm_wedge_task_info task;
+ char process_name[TASK_COMM_LEN];
+ pid_t tgid;
+ struct kref refcount;
};
/**
@@ -236,15 +289,26 @@ struct amdgpu_vm_update_params {
unsigned int num_dw_left;
/**
- * @table_freed: return true if page table is freed when updating
+ * @needs_flush: true whenever we need to invalidate the TLB
+ */
+ bool needs_flush;
+
+ /**
+ * @allow_override: true for memory that is not uncached: allows MTYPE
+ * to be overridden for NUMA local memory.
*/
- bool table_freed;
+ bool allow_override;
+
+ /**
+ * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
+ */
+ struct list_head tlb_flush_waitlist;
};
struct amdgpu_vm_update_funcs {
int (*map_table)(struct amdgpu_bo_vm *bo);
- int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode);
+ int (*prepare)(struct amdgpu_vm_update_params *p,
+ struct amdgpu_sync *sync, u64 k_job_id);
int (*update)(struct amdgpu_vm_update_params *p,
struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr, uint64_t flags);
@@ -252,6 +316,22 @@ struct amdgpu_vm_update_funcs {
struct dma_fence **fence);
};
+struct amdgpu_vm_fault_info {
+ /* fault address */
+ uint64_t addr;
+ /* fault status register */
+ uint32_t status;
+ /* which vmhub? gfxhub, mmhub, etc. */
+ unsigned int vmhub;
+};
+
+struct amdgpu_mem_stats {
+ struct drm_memory_stats drm;
+
+ /* buffers that requested this placement but are currently evicted */
+ uint64_t evicted;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@@ -266,7 +346,17 @@ struct amdgpu_vm {
/* Lock to protect vm_bo add/del/move on all lists of vm */
spinlock_t status_lock;
- /* BOs who needs a validation */
+ /* Memory statistics for this vm, protected by status_lock */
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for either
+ * PDs, PTs or per VM BOs. The state transits are:
+ *
+ * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle
+ */
+
+ /* Per-VM and PT BOs who needs a validation */
struct list_head evicted;
/* PT BOs which relocated and their parent need an update */
@@ -278,18 +368,28 @@ struct amdgpu_vm {
/* All BOs of this VM not currently in the state machine */
struct list_head idle;
+ /*
+ * The following lists contain amdgpu_vm_bo_base objects for BOs which
+ * have their own dma_resv object and not depend on the root PD. Their
+ * state transits are:
+ *
+ * evicted_user or invalidated -> done
+ */
+
+ /* BOs for user mode queues that need a validation */
+ struct list_head evicted_user;
+
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
- /* BO mappings freed, but not yet updated in the PT */
- struct list_head freed;
-
/* BOs which are invalidated, has been updated in the PTs */
struct list_head done;
- /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
- struct list_head pt_freed;
- struct work_struct pt_free_work;
+ /*
+ * This list contains amdgpu_bo_va_mapping objects which have been freed
+ * but not updated in the PTs
+ */
+ struct list_head freed;
/* contains the page directory */
struct amdgpu_vm_bo_base root;
@@ -302,6 +402,8 @@ struct amdgpu_vm {
/* Last finished delayed update */
atomic64_t tlb_seq;
struct dma_fence *last_tlb_flush;
+ atomic64_t kfd_last_flushed_seq;
+ uint64_t tlb_fence_context;
/* How many times we had to re-generate the page tables */
uint64_t generation;
@@ -310,7 +412,7 @@ struct amdgpu_vm {
struct dma_fence *last_unlocked;
unsigned int pasid;
- bool reserved_vmid[AMDGPU_MAX_VMHUBS];
+ struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS];
/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
bool use_cpu_for_update;
@@ -318,9 +420,6 @@ struct amdgpu_vm {
/* Functions to use for VM table updates */
const struct amdgpu_vm_update_funcs *update_funcs;
- /* Flag to indicate ATS support from PTE for GFX9 */
- bool pte_support_ats;
-
/* Up to 128 pending retry page faults */
DECLARE_KFIFO(faults, u64, 128);
@@ -334,7 +433,7 @@ struct amdgpu_vm {
uint64_t pd_phys_addr;
/* Some basic info about the task */
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
/* Store positions of group of BOs */
struct ttm_lru_bulk_move lru_bulk_move;
@@ -343,6 +442,9 @@ struct amdgpu_vm {
/* Memory partition number, -1 means any partition */
int8_t mem_id;
+
+ /* cached fault info */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_vm_manager {
@@ -382,6 +484,8 @@ struct amdgpu_vm_manager {
* look up VM of a page fault
*/
struct xarray pasids;
+ /* Global registration of recent page fault information */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_bo_va_mapping;
@@ -396,21 +500,20 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs;
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
-int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u32 pasid);
-
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
unsigned int num_fences);
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int (*callback)(void *p, struct amdgpu_bo *bo),
- void *param);
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket,
+ int (*callback)(void *p, struct amdgpu_bo *bo),
+ void *param);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate);
@@ -418,21 +521,31 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+ struct amdgpu_vm *vm,
+ struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask);
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bool immediate, bool unlocked, bool flush_tlb,
- struct dma_resv *resv, uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset, uint64_t vram_base,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *new_res, int sign);
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
@@ -442,11 +555,11 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
- uint64_t size, uint64_t flags);
+ uint64_t size, uint32_t flags);
int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
- uint64_t size, uint64_t flags);
+ uint64_t size, uint32_t flags);
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr);
@@ -466,10 +579,16 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
- struct amdgpu_task_info *task_info);
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
+
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
+
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info);
+
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- u32 vmid, u32 node_id, uint64_t addr,
+ u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
bool write_fault);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
@@ -477,7 +596,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
- struct amdgpu_mem_stats *stats);
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]);
int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_vm *vmbo, bool immediate);
@@ -485,8 +604,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int level, bool immediate, struct amdgpu_bo_vm **vmbo,
int32_t xcp_id);
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
struct amdgpu_vm_bo_base *entry);
@@ -494,6 +611,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags);
void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
@@ -501,6 +620,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+
/**
* amdgpu_vm_tlb_seq - return tlb flush sequence number
* @vm: the amdgpu_vm structure to query
@@ -554,4 +675,21 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
mutex_unlock(&vm->eviction_lock);
}
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+ unsigned int pasid,
+ uint64_t addr,
+ uint32_t status,
+ unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+ struct amdgpu_task_info *task_info);
+
+#define amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &(bo_va)->valids, list)
+#define amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) \
+ list_for_each_entry(mapping, &(bo_va)->invalids, list)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 6e31621452de..22e2e5b47341 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -39,20 +39,20 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
* amdgpu_vm_cpu_prepare - prepare page table update with the CPU
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: sync obj with fences to wait on
+ * @k_job_id: the id for tracing/debug purposes
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync,
+ u64 k_job_id)
{
- if (!resv)
+ if (!sync)
return 0;
- return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true);
+ return amdgpu_sync_wait(sync, true);
}
/**
@@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
- /* Flush HDP */
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
mb();
amdgpu_device_flush_hdp(p->adev, NULL);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 96d601e209b8..f794fb1cc06e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_vm.h"
+#include "amdgpu_job.h"
/*
* amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
@@ -90,22 +91,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
}
/**
- * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS setting.
- */
-static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
-{
- unsigned int shift;
-
- shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
- return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
-
-/**
* amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
*
* @adev: amdgpu_device pointer
@@ -379,7 +364,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_vm_update_params params;
struct amdgpu_bo *ancestor = &vmbo->bo;
- unsigned int entries, ats_entries;
+ unsigned int entries;
struct amdgpu_bo *bo = &vmbo->bo;
uint64_t addr;
int r, idx;
@@ -394,40 +379,11 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
entries = amdgpu_bo_size(bo) / 8;
- if (!vm->pte_support_ats) {
- ats_entries = 0;
-
- } else if (!bo->parent) {
- ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
- ats_entries = min(ats_entries, entries);
- entries -= ats_entries;
-
- } else {
- struct amdgpu_vm_bo_base *pt;
-
- pt = ancestor->vm_bo;
- ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
- if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
- ats_entries) {
- ats_entries = 0;
- } else {
- ats_entries = entries;
- entries = 0;
- }
- }
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
- if (vmbo->shadow) {
- struct amdgpu_bo *shadow = vmbo->shadow;
-
- r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
- if (r)
- return r;
- }
-
if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
@@ -440,49 +396,30 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.vm = vm;
params.immediate = immediate;
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ r = vm->update_funcs->prepare(&params, NULL,
+ AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR);
if (r)
goto exit;
addr = 0;
- if (ats_entries) {
- uint64_t value = 0, flags;
- flags = AMDGPU_PTE_DEFAULT_ATC;
+ uint64_t value = 0, flags = 0;
+ if (adev->asic_type >= CHIP_VEGA10) {
if (level != AMDGPU_VM_PTB) {
/* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
+ flags |= AMDGPU_PDE_PTE_FLAG(adev);
+ amdgpu_gmc_get_vm_pde(adev, level,
+ &value, &flags);
+ } else {
+ /* Workaround for fault priority problem on GMC9 */
+ flags = AMDGPU_PTE_EXECUTABLE;
}
-
- r = vm->update_funcs->update(&params, vmbo, addr, 0,
- ats_entries, value, flags);
- if (r)
- goto exit;
-
- addr += ats_entries * 8;
}
- if (entries) {
- uint64_t value = 0, flags = 0;
-
- if (adev->asic_type >= CHIP_VEGA10) {
- if (level != AMDGPU_VM_PTB) {
- /* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level,
- &value, &flags);
- } else {
- /* Workaround for fault priority problem on GMC9 */
- flags = AMDGPU_PTE_EXECUTABLE;
- }
- }
-
- r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
- value, flags);
- if (r)
- goto exit;
- }
+ r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
+ value, flags);
+ if (r)
+ goto exit;
r = vm->update_funcs->commit(&params, NULL);
exit:
@@ -505,10 +442,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int32_t xcp_id)
{
struct amdgpu_bo_param bp;
- struct amdgpu_bo *bo;
- struct dma_resv *resv;
unsigned int num_entries;
- int r;
memset(&bp, 0, sizeof(bp));
@@ -541,42 +475,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv;
- r = amdgpu_bo_create_vm(adev, &bp, vmbo);
- if (r)
- return r;
-
- bo = &(*vmbo)->bo;
- if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
- (*vmbo)->shadow = NULL;
- return 0;
- }
-
- if (!bp.resv)
- WARN_ON(dma_resv_lock(bo->tbo.base.resv,
- NULL));
- resv = bp.resv;
- memset(&bp, 0, sizeof(bp));
- bp.size = amdgpu_vm_pt_size(adev, level);
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- bp.type = ttm_bo_type_kernel;
- bp.resv = bo->tbo.base.resv;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
- bp.xcp_id_plus1 = xcp_id + 1;
-
- r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
-
- if (!resv)
- dma_resv_unlock(bo->tbo.base.resv);
-
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- amdgpu_bo_add_to_shadow_list(*vmbo);
-
- return 0;
+ return amdgpu_bo_create_vm(adev, &bp, vmbo);
}
/**
@@ -626,7 +525,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
return 0;
error_free_pt:
- amdgpu_bo_unref(&pt->shadow);
amdgpu_bo_unref(&pt_bo);
return r;
}
@@ -638,17 +536,12 @@ error_free_pt:
*/
static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
{
- struct amdgpu_bo *shadow;
-
if (!entry->bo)
return;
- shadow = amdgpu_bo_shadowed(entry->bo);
- if (shadow) {
- ttm_bo_set_bulk_move(&shadow->tbo, NULL);
- amdgpu_bo_unref(&shadow);
- }
- ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
+
+ amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1);
entry->bo->vm_bo = NULL;
+ ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
spin_lock(&entry->vm->status_lock);
list_del(&entry->vm_status);
@@ -656,62 +549,55 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
amdgpu_bo_unref(&entry->bo);
}
-void amdgpu_vm_pt_free_work(struct work_struct *work)
+/**
+ * amdgpu_vm_pt_free_list - free PD/PT levels
+ *
+ * @adev: amdgpu device structure
+ * @params: see amdgpu_vm_update_params definition
+ *
+ * Free the page directory objects saved in the flush list
+ */
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params)
{
struct amdgpu_vm_bo_base *entry, *next;
- struct amdgpu_vm *vm;
- LIST_HEAD(pt_freed);
-
- vm = container_of(work, struct amdgpu_vm, pt_free_work);
+ bool unlocked = params->unlocked;
- spin_lock(&vm->status_lock);
- list_splice_init(&vm->pt_freed, &pt_freed);
- spin_unlock(&vm->status_lock);
+ if (list_empty(&params->tlb_flush_waitlist))
+ return;
- /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
- amdgpu_bo_reserve(vm->root.bo, true);
+ /*
+ * unlocked unmap clear page table leaves, warning to free the page entry.
+ */
+ WARN_ON(unlocked);
- list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
+ list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
amdgpu_vm_pt_free(entry);
-
- amdgpu_bo_unreserve(vm->root.bo);
}
/**
- * amdgpu_vm_pt_free_dfs - free PD/PT levels
+ * amdgpu_vm_pt_add_list - add PD/PT level to the flush list
*
- * @adev: amdgpu device structure
- * @vm: amdgpu vm structure
- * @start: optional cursor where to start freeing PDs/PTs
- * @unlocked: vm resv unlock status
+ * @params: parameters for the update
+ * @cursor: first PT entry to start DF search from, non NULL
*
- * Free the page directory or page table level and all sub levels.
+ * This list will be freed after TLB flush.
*/
-static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start,
- bool unlocked)
+static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_pt_cursor *cursor)
{
- struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_pt_cursor seek;
struct amdgpu_vm_bo_base *entry;
- if (unlocked) {
- spin_lock(&vm->status_lock);
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
- list_move(&entry->vm_status, &vm->pt_freed);
-
- if (start)
- list_move(&start->entry->vm_status, &vm->pt_freed);
- spin_unlock(&vm->status_lock);
- schedule_work(&vm->pt_free_work);
- return;
+ spin_lock(&params->vm->status_lock);
+ for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
+ if (entry && entry->bo)
+ list_move(&entry->vm_status, &params->tlb_flush_waitlist);
}
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
- amdgpu_vm_pt_free(entry);
-
- if (start)
- amdgpu_vm_pt_free(start->entry);
+ /* enter start node now */
+ list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
+ spin_unlock(&params->vm->status_lock);
}
/**
@@ -723,34 +609,13 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
*/
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
-}
-
-/**
- * amdgpu_vm_pt_is_root_clean - check if a root PD is clean
- *
- * @adev: amdgpu_device pointer
- * @vm: the VM to check
- *
- * Check all entries of the root PD, if any subsequent PDs are allocated,
- * it means there are page table creating and filling, and is no a clean
- * VM
- *
- * Returns:
- * 0 if this VM is clean
- */
-bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- enum amdgpu_vm_level root = adev->vm_manager.root_level;
- unsigned int entries = amdgpu_vm_pt_num_entries(adev, root);
- unsigned int i = 0;
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
- for (i = 0; i < entries; i++) {
- if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
- return false;
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+ if (entry)
+ amdgpu_vm_pt_free(entry);
}
- return true;
}
/**
@@ -765,11 +630,15 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
struct amdgpu_vm_bo_base *entry)
{
struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
- struct amdgpu_bo *bo = parent->bo, *pbo;
+ struct amdgpu_bo *bo, *pbo;
struct amdgpu_vm *vm = params->vm;
uint64_t pde, pt, flags;
unsigned int level;
+ if (WARN_ON(!parent))
+ return -EINVAL;
+
+ bo = parent->bo;
for (level = 0, pbo = bo->parent; pbo; ++level)
pbo = pbo->parent;
@@ -816,12 +685,12 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
struct amdgpu_device *adev = params->adev;
if (level != AMDGPU_VM_PTB) {
- flags |= AMDGPU_PDE_PTE;
+ flags |= AMDGPU_PDE_PTE_FLAG(params->adev);
amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
} else if (adev->asic_type >= CHIP_VEGA10 &&
!(flags & AMDGPU_PTE_VALID) &&
- !(flags & AMDGPU_PTE_PRT)) {
+ !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) {
/* Workaround for fault priority problem on GMC9 */
flags |= AMDGPU_PTE_EXECUTABLE;
@@ -843,14 +712,8 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
*/
if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
adev->gmc.gmc_funcs->override_vm_pte_flags &&
- num_possible_nodes() > 1) {
- if (!params->pages_addr)
- amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
- addr, &flags);
- else
- dev_dbg(adev->dev,
- "override_vm_pte_flags skipped: non-contiguous\n");
- }
+ num_possible_nodes() > 1 && !params->pages_addr && params->allow_override)
+ amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags);
params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
flags);
@@ -1014,7 +877,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
pe_start = ((cursor.pfn >> shift) & mask) * 8;
- entry_end = ((uint64_t)mask + 1) << shift;
+
+ if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
+ /*
+ * MMU notifier callback unlocked unmap huge page, leave is PDE entry,
+ * only clear one entry. Next entry search again for PDE or PTE leave.
+ */
+ entry_end = 1ULL << shift;
+ else
+ entry_end = ((uint64_t)mask + 1) << shift;
entry_end += cursor.pfn & ~(entry_end - 1);
entry_end = min(entry_end, end);
@@ -1032,7 +903,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
min(nptes, 32u), dst, incr,
upd_flags,
- vm->task_info.tgid,
+ vm->task_info ? vm->task_info->tgid : 0,
vm->immediate.fence_context);
amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
cursor.level, pe_start, dst,
@@ -1061,10 +932,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
while (cursor.pfn < frag_start) {
/* Make sure previous mapping is freed */
if (cursor.entry->bo) {
- params->table_freed = true;
- amdgpu_vm_pt_free_dfs(adev, params->vm,
- &cursor,
- params->unlocked);
+ params->needs_flush = true;
+ amdgpu_vm_pt_add_list(params, &cursor);
}
amdgpu_vm_pt_next(adev, &cursor);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 349416e176a1..36805dcfa159 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -35,21 +35,12 @@
*/
static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
{
- int r;
-
- r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
- if (r)
- return r;
-
- if (table->shadow)
- r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
-
- return r;
+ return amdgpu_ttm_alloc_gart(&table->bo.tbo);
}
/* Allocate a new job for @count PTE updates */
static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
- unsigned int count)
+ unsigned int count, u64 k_job_id)
{
enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
: AMDGPU_IB_POOL_DELAYED;
@@ -65,7 +56,7 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM,
- ndw * 4, pool, &p->job);
+ ndw * 4, pool, &p->job, k_job_id);
if (r)
return r;
@@ -77,31 +68,29 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
* amdgpu_vm_sdma_prepare - prepare SDMA command submission
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: amdgpu_sync object with fences to wait for
+ * @k_job_id: identifier of the job, for tracing purpose
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync, u64 k_job_id)
{
- struct amdgpu_sync sync;
int r;
- r = amdgpu_vm_sdma_alloc_job(p, 0);
+ r = amdgpu_vm_sdma_alloc_job(p, 0, k_job_id);
if (r)
return r;
- if (!resv)
+ if (!sync)
return 0;
- amdgpu_sync_create(&sync);
- r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm);
- if (!r)
- r = amdgpu_sync_push_to_job(&sync, p->job);
- amdgpu_sync_free(&sync);
+ r = amdgpu_sync_push_to_job(sync, p->job);
+ if (r) {
+ p->num_dw_left = 0;
+ amdgpu_job_free(p->job);
+ }
return r;
}
@@ -126,6 +115,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
WARN_ON(ib->length_dw == 0);
amdgpu_ring_pad_ib(ring, ib);
+
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
WARN_ON(ib->length_dw > p->num_dw_left);
f = amdgpu_job_submit(p->job);
@@ -257,24 +250,21 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
if (r)
return r;
- r = amdgpu_vm_sdma_alloc_job(p, count);
+ r = amdgpu_vm_sdma_alloc_job(p, count,
+ AMDGPU_KERNEL_JOB_ID_VM_UPDATE);
if (r)
return r;
}
if (!p->pages_addr) {
/* set page commands needed */
- if (vmbo->shadow)
- amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
- count, incr, flags);
amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
incr, flags);
return 0;
}
/* copy commands needed */
- ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
- (vmbo->shadow ? 2 : 1);
+ ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
/* for padding */
ndw -= 7;
@@ -289,8 +279,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
pte[i] |= flags;
}
- if (vmbo->shadow)
- amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
pe += nptes * 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index 000000000000..5d26797356a3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gmc.h"
+
+struct amdgpu_tlb_fence {
+ struct dma_fence base;
+ struct amdgpu_device *adev;
+ struct dma_fence *dependency;
+ struct work_struct work;
+ spinlock_t lock;
+ uint16_t pasid;
+
+};
+
+static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu tlb fence";
+}
+
+static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f)
+{
+ return "amdgpu tlb timeline";
+}
+
+static void amdgpu_tlb_fence_work(struct work_struct *work)
+{
+ struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
+ int r;
+
+ if (f->dependency) {
+ dma_fence_wait(f->dependency, false);
+ dma_fence_put(f->dependency);
+ f->dependency = NULL;
+ }
+
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
+ if (r) {
+ dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
+ f->pasid);
+ dma_fence_set_error(&f->base, r);
+ }
+
+ dma_fence_signal(&f->base);
+ dma_fence_put(&f->base);
+}
+
+static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
+ .get_driver_name = amdgpu_tlb_fence_get_driver_name,
+ .get_timeline_name = amdgpu_tlb_fence_get_timeline_name
+};
+
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct dma_fence **fence)
+{
+ struct amdgpu_tlb_fence *f;
+
+ f = kmalloc(sizeof(*f), GFP_KERNEL);
+ if (!f) {
+ /*
+ * We can't fail since the PDEs and PTEs are already updated, so
+ * just block for the dependency and execute the TLB flush
+ */
+ if (*fence)
+ dma_fence_wait(*fence, false);
+
+ amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
+ *fence = dma_fence_get_stub();
+ return;
+ }
+
+ f->adev = adev;
+ f->dependency = *fence;
+ f->pasid = vm->pasid;
+ INIT_WORK(&f->work, amdgpu_tlb_fence_work);
+ spin_lock_init(&f->lock);
+
+ dma_fence_init64(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
+ vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
+
+ /* TODO: We probably need a separate wq here */
+ dma_fence_get(&f->base);
+ schedule_work(&f->work);
+
+ *fence = &f->base;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
new file mode 100644
index 000000000000..474bfe36c0c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -0,0 +1,992 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_smu.h"
+#include "soc15_common.h"
+#include "vpe_v6_1.h"
+
+#define AMDGPU_CSA_VPE_SIZE 64
+/* VPE CSA resides in the 4th page of CSA */
+#define AMDGPU_CSA_VPE_OFFSET (4096 * 3)
+
+/* 1 second timeout */
+#define VPE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+#define VPE_MAX_DPM_LEVEL 4
+#define FIXED1_8_BITS_PER_FRACTIONAL_PART 8
+#define GET_PRATIO_INTEGER_PART(x) ((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev);
+
+static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder)
+{
+ *remainder = dividend % divisor;
+ return dividend / divisor;
+}
+
+static inline uint16_t complete_integer_division_u16(
+ uint16_t dividend,
+ uint16_t divisor,
+ uint16_t *remainder)
+{
+ return div16_u16_rem(dividend, divisor, (uint16_t *)remainder);
+}
+
+static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
+{
+ u16 arg1_value = numerator;
+ u16 arg2_value = denominator;
+
+ uint16_t remainder;
+
+ /* determine integer part */
+ uint16_t res_value = complete_integer_division_u16(
+ arg1_value, arg2_value, &remainder);
+
+ if (res_value > 127 /* CHAR_MAX */)
+ return 0;
+
+ /* determine fractional part */
+ {
+ unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART;
+
+ do {
+ remainder <<= 1;
+
+ res_value <<= 1;
+
+ if (remainder >= arg2_value) {
+ res_value |= 1;
+ remainder -= arg2_value;
+ }
+ } while (--i != 0);
+ }
+
+ /* round up LSB */
+ {
+ uint16_t summand = (remainder << 1) >= arg2_value;
+
+ if ((res_value + summand) > 32767 /* SHRT_MAX */)
+ return 0;
+
+ res_value += summand;
+ }
+
+ return res_value;
+}
+
+static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency)
+{
+ uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency);
+
+ if (GET_PRATIO_INTEGER_PART(pratio) > 1)
+ pratio = 0;
+
+ return pratio;
+}
+
+/*
+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
+ * VPE FW will dynamically decide which level should be used according to current loading.
+ *
+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
+ * calculate the ratios of adjusting from one clock to another.
+ * The VPE FW can then request the appropriate frequency from the PMFW.
+ */
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t dpm_ctl;
+
+ if (adev->pm.dpm_enabled) {
+ struct dpm_clocks clock_table = { 0 };
+ struct dpm_clock *VPEClks;
+ struct dpm_clock *SOCClks;
+ uint32_t idx;
+ uint32_t vpeclk_enalbled_num = 0;
+ uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
+ uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
+
+ dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+ dpm_ctl |= 1; /* DPM enablement */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+
+ /* Get VPECLK and SOCCLK */
+ if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) {
+ dev_dbg(adev->dev, "%s: get clock failed!\n", __func__);
+ goto disable_dpm;
+ }
+
+ SOCClks = clock_table.SocClocks;
+ VPEClks = clock_table.VPEClocks;
+
+ /* Comfirm enabled vpe clk num
+ * Enabled VPE clocks are ordered from low to high in VPEClks
+ * The highest valid clock index+1 is the number of VPEClks
+ */
+ for (idx = PP_SMU_NUM_VPECLK_DPM_LEVELS; idx && !vpeclk_enalbled_num; idx--)
+ if (VPEClks[idx-1].Freq)
+ vpeclk_enalbled_num = idx;
+
+ /* vpe dpm only cares 4 levels. */
+ for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
+ uint32_t soc_dpm_level;
+ uint32_t min_freq;
+
+ if (idx == 0)
+ soc_dpm_level = 0;
+ else
+ soc_dpm_level = (idx * 2) + 1;
+
+ /* clamp the max level */
+ if (soc_dpm_level > vpeclk_enalbled_num - 1)
+ soc_dpm_level = vpeclk_enalbled_num - 1;
+
+ min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
+ SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;
+
+ switch (idx) {
+ case 0:
+ pratio_vmin_freq = min_freq;
+ break;
+ case 1:
+ pratio_vmid_freq = min_freq;
+ break;
+ case 2:
+ pratio_vnorm_freq = min_freq;
+ break;
+ case 3:
+ pratio_vmax_freq = min_freq;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) {
+ uint32_t pratio_ctl;
+
+ pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq);
+ pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq);
+ pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq);
+
+ pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18);
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl); /* PRatio */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000); /* 1ms, unit=1/24MHz */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000); /* 50ms */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */
+ dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__);
+ } else {
+ dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__);
+ goto disable_dpm;
+ }
+ }
+ return 0;
+
+disable_dpm:
+ dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+ dpm_ctl &= 0xfffffffe; /* Disable DPM */
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+ dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
+ return -EINVAL;
+}
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
+{
+ struct amdgpu_firmware_info ucode = {
+ .ucode_id = AMDGPU_UCODE_ID_VPE,
+ .mc_addr = adev->vpe.cmdbuf_gpu_addr,
+ .ucode_size = 8,
+ };
+
+ return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr;
+ char fw_prefix[32];
+ int ret;
+
+ amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
+ ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", fw_prefix);
+ if (ret)
+ goto out;
+
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
+ adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version);
+ adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ struct amdgpu_firmware_info *info;
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX];
+ info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX;
+ info->fw = adev->vpe.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL];
+ info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL;
+ info->fw = adev->vpe.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+ }
+
+ return 0;
+out:
+ dev_err(adev->dev, "fail to initialize vpe microcode\n");
+ release_firmware(adev->vpe.fw);
+ adev->vpe.fw = NULL;
+ return ret;
+}
+
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ struct amdgpu_ring *ring = &vpe->ring;
+ int ret;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1);
+ snprintf(ring->name, 4, "vpe");
+
+ ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe)
+{
+ amdgpu_ring_fini(&vpe->ring);
+
+ return 0;
+}
+
+static int vpe_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 3):
+ vpe_v6_1_set_funcs(vpe);
+ break;
+ case IP_VERSION(6, 1, 1):
+ vpe_v6_1_set_funcs(vpe);
+ vpe->collaborate_mode = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ vpe_set_ring_funcs(adev);
+ vpe_set_regs(vpe);
+
+ dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false");
+
+ return 0;
+}
+
+static void vpe_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vpe.idle_work.work);
+ unsigned int fences = 0;
+
+ fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+
+ if (fences == 0)
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+ else
+ schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_common_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ int r;
+
+ r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vpe.cmdbuf_obj,
+ &adev->vpe.cmdbuf_gpu_addr,
+ (void **)&adev->vpe.cmdbuf_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r);
+ return r;
+ }
+
+ vpe->context_started = false;
+ INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler);
+
+ return 0;
+}
+
+static int vpe_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ int ret;
+
+ ret = vpe_common_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_irq_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_ring_init(vpe);
+ if (ret)
+ goto out;
+
+ ret = vpe_init_microcode(vpe);
+ if (ret)
+ goto out;
+
+ adev->vpe.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vpe.ring);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vpe.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ ret = amdgpu_vpe_sysfs_reset_mask_init(adev);
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+static int vpe_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ release_firmware(vpe->fw);
+ vpe->fw = NULL;
+
+ amdgpu_vpe_sysfs_reset_mask_fini(adev);
+ vpe_ring_fini(vpe);
+
+ amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj,
+ &adev->vpe.cmdbuf_gpu_addr,
+ (void **)&adev->vpe.cmdbuf_cpu_addr);
+
+ return 0;
+}
+
+static int vpe_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ int ret;
+
+ /* Power on VPE */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (ret)
+ return ret;
+
+ ret = vpe_load_microcode(vpe);
+ if (ret)
+ return ret;
+
+ ret = vpe_ring_start(vpe);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int vpe_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+ vpe_ring_stop(vpe);
+
+ /* Power off VPE */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+static int vpe_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return vpe_hw_fini(ip_block);
+}
+
+static int vpe_resume(struct amdgpu_ip_block *ip_block)
+{
+ return vpe_hw_init(ip_block);
+}
+
+static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (i == 0)
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ VPE_CMD_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t index = 0;
+ uint64_t csa_mc_addr;
+
+ if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
+ return 0;
+
+ csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET +
+ index * AMDGPU_CSA_VPE_SIZE;
+
+ return csa_mc_addr;
+}
+
+static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring,
+ uint32_t device_select,
+ uint32_t exec_count)
+{
+ if (!ring->adev->vpe.collaborate_mode)
+ return;
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) |
+ (device_select << 16));
+ amdgpu_ring_write(ring, exec_count & 0x1fff);
+}
+
+static void vpe_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) |
+ VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf));
+
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
+ uint64_t seq, unsigned int flags)
+{
+ int i = 0;
+
+ do {
+ /* write the fence */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+ /* zero in first two bits */
+ WARN_ON_ONCE(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq));
+ addr += 4;
+ } while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+
+}
+
+static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ vpe_ring_emit_pred_exec(ring, 0, 6);
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+ VPE_POLL_REGMEM_SUBOP_REGMEM) |
+ VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ VPE_CMD_POLL_REGMEM_HEADER_MEM(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4));
+}
+
+static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ vpe_ring_emit_pred_exec(ring, 0, 3);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ vpe_ring_emit_pred_exec(ring, 0, 6);
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+ VPE_POLL_REGMEM_SUBOP_REGMEM) |
+ VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ VPE_CMD_POLL_REGMEM_HEADER_MEM(0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
+ uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned int ret;
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint32_t preempt_reg = vpe->regs.queue0_preempt;
+ int i, r = 0;
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+
+ return r;
+}
+
+static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (!adev->pm.dpm_enabled)
+ dev_err(adev->dev, "Without PM, cannot support powergating\n");
+
+ dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE");
+
+ if (state == AMD_PG_STATE_GATE) {
+ amdgpu_dpm_enable_vpe(adev, false);
+ vpe->context_started = false;
+ } else {
+ amdgpu_dpm_enable_vpe(adev, true);
+ }
+
+ return 0;
+}
+
+static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint64_t rptr;
+
+ if (ring->use_doorbell) {
+ rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr);
+ dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr);
+ } else {
+ rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi));
+ rptr = rptr << 32;
+ rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo));
+ dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr);
+ }
+
+ return (rptr >> 2);
+}
+
+static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint64_t wptr;
+
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr);
+ } else {
+ wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi));
+ wptr = wptr << 32;
+ wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo));
+ dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr);
+ }
+
+ return (wptr >> 2);
+}
+
+static void vpe_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (ring->use_doorbell) {
+ dev_dbg(adev->dev, "Using doorbell, \
+ wptr_offs == 0x%08x, \
+ lower_32_bits(ring->wptr) << 2 == 0x%08x, \
+ upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ if (vpe->collaborate_mode)
+ WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2);
+ } else {
+ int i;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ dev_dbg(adev->dev, "Not using doorbell, \
+ regVPEC_QUEUE0_RB_WPTR == 0x%08x, \
+ regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n",
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi),
+ upper_32_bits(ring->wptr << 2));
+ }
+ }
+}
+
+static int vpe_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ const uint32_t test_pattern = 0xdeadbeef;
+ uint32_t index, i;
+ uint64_t wb_addr;
+ int ret;
+
+ ret = amdgpu_device_wb_get(adev, &index);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+ return ret;
+ }
+
+ adev->wb.wb[index] = 0;
+ wb_addr = adev->wb.gpu_addr + (index * 4);
+
+ ret = amdgpu_ring_alloc(ring, 4);
+ if (ret) {
+ dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret);
+ goto out;
+ }
+
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+ amdgpu_ring_write(ring, lower_32_bits(wb_addr));
+ amdgpu_ring_write(ring, upper_32_bits(wb_addr));
+ amdgpu_ring_write(ring, test_pattern);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (le32_to_cpu(adev->wb.wb[index]) == test_pattern)
+ goto out;
+ udelay(1);
+ }
+
+ ret = -ETIMEDOUT;
+out:
+ amdgpu_device_wb_free(adev, index);
+
+ return ret;
+}
+
+static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ const uint32_t test_pattern = 0xdeadbeef;
+ struct amdgpu_ib ib = {};
+ struct dma_fence *f = NULL;
+ uint32_t index;
+ uint64_t wb_addr;
+ int ret;
+
+ ret = amdgpu_device_wb_get(adev, &index);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+ return ret;
+ }
+
+ adev->wb.wb[index] = 0;
+ wb_addr = adev->wb.gpu_addr + (index * 4);
+
+ ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (ret)
+ goto err0;
+
+ ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0);
+ ib.ptr[1] = lower_32_bits(wb_addr);
+ ib.ptr[2] = upper_32_bits(wb_addr);
+ ib.ptr[3] = test_pattern;
+ ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+ ib.length_dw = 8;
+
+ ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (ret)
+ goto err1;
+
+ ret = dma_fence_wait_timeout(f, false, timeout);
+ if (ret <= 0) {
+ ret = ret ? : -ETIMEDOUT;
+ goto err1;
+ }
+
+ ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+
+ return ret;
+}
+
+static void vpe_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+ /* Power on VPE and notify VPE of new context */
+ if (!vpe->context_started) {
+ uint32_t context_notify;
+
+ /* Power on VPE */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE);
+
+ /* Indicates that a job from a new context has been submitted. */
+ context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator));
+ if ((context_notify & 0x1) == 0)
+ context_notify |= 0x1;
+ else
+ context_notify &= ~(0x1);
+ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify);
+ vpe->context_started = true;
+ }
+}
+
+static void vpe_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_GATE);
+ if (r)
+ return r;
+ r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static ssize_t amdgpu_get_vpe_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vpe.supported_reset);
+}
+
+static DEVICE_ATTR(vpe_reset_mask, 0444,
+ amdgpu_get_vpe_reset_mask, NULL);
+
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vpe.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_vpe_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vpe.num_instances)
+ device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
+ }
+}
+
+static const struct amdgpu_ring_funcs vpe_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_VPE,
+ .align_mask = 0xf,
+ .nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0),
+ .support_64bit_ptrs = true,
+ .get_rptr = vpe_ring_get_rptr,
+ .get_wptr = vpe_ring_get_wptr,
+ .set_wptr = vpe_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* vpe_ring_init_cond_exec */
+ 6 + /* vpe_ring_emit_pipeline_sync */
+ 10 + 10 + 10 + /* vpe_ring_emit_fence */
+ /* vpe_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
+ .emit_ib_size = 7 + 6,
+ .emit_ib = vpe_ring_emit_ib,
+ .emit_pipeline_sync = vpe_ring_emit_pipeline_sync,
+ .emit_fence = vpe_ring_emit_fence,
+ .emit_vm_flush = vpe_ring_emit_vm_flush,
+ .emit_wreg = vpe_ring_emit_wreg,
+ .emit_reg_wait = vpe_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .insert_nop = vpe_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .test_ring = vpe_ring_test_ring,
+ .test_ib = vpe_ring_test_ib,
+ .init_cond_exec = vpe_ring_init_cond_exec,
+ .preempt_ib = vpe_ring_preempt_ib,
+ .begin_use = vpe_ring_begin_use,
+ .end_use = vpe_ring_end_use,
+ .reset = vpe_ring_reset,
+};
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vpe.ring.funcs = &vpe_ring_funcs;
+}
+
+const struct amd_ip_funcs vpe_ip_funcs = {
+ .name = "vpe_v6_1",
+ .early_init = vpe_early_init,
+ .sw_init = vpe_sw_init,
+ .sw_fini = vpe_sw_fini,
+ .hw_init = vpe_hw_init,
+ .hw_fini = vpe_hw_fini,
+ .suspend = vpe_suspend,
+ .resume = vpe_resume,
+ .set_clockgating_state = vpe_set_clockgating_state,
+ .set_powergating_state = vpe_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vpe_v6_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VPE,
+ .major = 6,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &vpe_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
new file mode 100644
index 000000000000..695da740a97e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __AMDGPU_VPE_H__
+#define __AMDGPU_VPE_H__
+
+#include "amdgpu_ring.h"
+#include "amdgpu_irq.h"
+#include "vpe_6_1_fw_if.h"
+
+#define AMDGPU_MAX_VPE_INSTANCES 2
+
+struct amdgpu_vpe;
+
+struct vpe_funcs {
+ uint32_t (*get_reg_offset)(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset);
+ int (*set_regs)(struct amdgpu_vpe *vpe);
+ int (*irq_init)(struct amdgpu_vpe *vpe);
+ int (*init_microcode)(struct amdgpu_vpe *vpe);
+ int (*load_microcode)(struct amdgpu_vpe *vpe);
+ int (*ring_init)(struct amdgpu_vpe *vpe);
+ int (*ring_start)(struct amdgpu_vpe *vpe);
+ int (*ring_stop)(struct amdgpu_vpe *vpe);
+ int (*ring_fini)(struct amdgpu_vpe *vpe);
+};
+
+struct vpe_regs {
+ uint32_t queue0_rb_rptr_lo;
+ uint32_t queue0_rb_rptr_hi;
+ uint32_t queue0_rb_wptr_lo;
+ uint32_t queue0_rb_wptr_hi;
+ uint32_t queue0_preempt;
+
+ uint32_t dpm_enable;
+ uint32_t dpm_pratio;
+ uint32_t dpm_request_interval;
+ uint32_t dpm_decision_threshold;
+ uint32_t dpm_busy_clamp_threshold;
+ uint32_t dpm_idle_clamp_threshold;
+ uint32_t dpm_request_lv;
+ uint32_t context_indicator;
+};
+
+struct amdgpu_vpe {
+ struct amdgpu_ring ring;
+ struct amdgpu_irq_src trap_irq;
+
+ const struct vpe_funcs *funcs;
+ struct vpe_regs regs;
+
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_bo *cmdbuf_obj;
+ uint64_t cmdbuf_gpu_addr;
+ uint32_t *cmdbuf_cpu_addr;
+ struct delayed_work idle_work;
+ bool context_started;
+
+ uint32_t num_instances;
+ bool collaborate_mode;
+ uint32_t supported_reset;
+};
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev);
+
+#define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
+#define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
+#define vpe_ring_stop(vpe) ((vpe)->funcs->ring_stop ? (vpe)->funcs->ring_stop((vpe)) : 0)
+#define vpe_ring_fini(vpe) ((vpe)->funcs->ring_fini ? (vpe)->funcs->ring_fini((vpe)) : 0)
+
+#define vpe_get_reg_offset(vpe, inst, offset) \
+ ((vpe)->funcs->get_reg_offset ? (vpe)->funcs->get_reg_offset((vpe), (inst), (offset)) : 0)
+#define vpe_set_regs(vpe) \
+ ((vpe)->funcs->set_regs ? (vpe)->funcs->set_regs((vpe)) : 0)
+#define vpe_irq_init(vpe) \
+ ((vpe)->funcs->irq_init ? (vpe)->funcs->irq_init((vpe)) : 0)
+#define vpe_init_microcode(vpe) \
+ ((vpe)->funcs->init_microcode ? (vpe)->funcs->init_microcode((vpe)) : 0)
+#define vpe_load_microcode(vpe) \
+ ((vpe)->funcs->load_microcode ? (vpe)->funcs->load_microcode((vpe)) : 0)
+
+extern const struct amdgpu_ip_block_version vpe_v6_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index c7085a747b03..a5adb2ed9b3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -24,13 +24,15 @@
#include <linux/dma-mapping.h>
#include <drm/ttm/ttm_range_manager.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "amdgpu_res_cursor.h"
-#include "amdgpu_atomfirmware.h"
#include "atom.h"
+#define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30)
+
struct amdgpu_vram_reservation {
u64 start;
u64 size;
@@ -77,7 +79,16 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
return true;
}
+static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 size = 0;
+
+ list_for_each_entry(block, head, link)
+ size += amdgpu_vram_mgr_block_size(block);
+ return size;
+}
/**
* DOC: mem_info_vram_total
@@ -212,8 +223,23 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = {
NULL
};
+static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (attr == &dev_attr_mem_info_vram_vendor.attr &&
+ !adev->gmc.vram_vendor)
+ return 0;
+
+ return attr->mode;
+}
+
const struct attribute_group amdgpu_vram_mgr_attr_group = {
- .attrs = amdgpu_vram_mgr_attributes
+ .attrs = amdgpu_vram_mgr_attributes,
+ .is_visible = amdgpu_vram_attrs_is_visible
};
/**
@@ -370,43 +396,33 @@ out:
return ret;
}
-static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man,
- struct drm_printer *printer)
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+ uint64_t address, struct amdgpu_vram_block_info *info)
{
- DRM_DEBUG_DRIVER("Dummy vram mgr debug\n");
-}
-
-static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man,
- struct ttm_resource *res,
- const struct ttm_place *place,
- size_t size)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n");
- return false;
-}
+ struct amdgpu_vram_mgr_resource *vres;
+ struct drm_buddy_block *block;
+ u64 start, size;
+ int ret = -ENOENT;
-static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man,
- struct ttm_resource *res,
- const struct ttm_place *place,
- size_t size)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n");
- return true;
-}
+ mutex_lock(&mgr->lock);
+ list_for_each_entry(vres, &mgr->allocated_vres_list, vres_node) {
+ list_for_each_entry(block, &vres->blocks, link) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+ if ((start <= address) && (address < (start + size))) {
+ info->start = start;
+ info->size = size;
+ memcpy(&info->task, &vres->task, sizeof(vres->task));
+ ret = 0;
+ goto out;
+ }
+ }
+ }
-static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man,
- struct ttm_resource *res)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n");
-}
+out:
+ mutex_unlock(&mgr->lock);
-static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man,
- struct ttm_buffer_object *tbo,
- const struct ttm_place *place,
- struct ttm_resource **res)
-{
- DRM_DEBUG_DRIVER("Dummy vram mgr new\n");
- return -ENOSPC;
+ return ret;
}
/**
@@ -424,18 +440,20 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource **res)
{
- u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
+ unsigned int adjust_dcc_size = 0;
struct drm_buddy *mm = &mgr->mm;
struct drm_buddy_block *block;
unsigned long pages_per_block;
int r;
lpfn = (u64)place->lpfn << PAGE_SHIFT;
- if (!lpfn)
+ if (!lpfn || lpfn > man->size)
lpfn = man->size;
fpfn = (u64)place->fpfn << PAGE_SHIFT;
@@ -444,7 +462,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
pages_per_block = ~0ul;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -453,7 +471,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* default to 2MB */
pages_per_block = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_block = max_t(uint32_t, pages_per_block,
+ pages_per_block = max_t(u32, pages_per_block,
tbo->page_alignment);
}
@@ -474,11 +492,29 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+ vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+ if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
+ adev->gmc.gmc_funcs->get_dcc_alignment)
+ adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev);
+
remaining_size = (u64)vres->base.size;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ unsigned int dcc_size;
+
+ dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
+ remaining_size = (u64)dcc_size;
+
+ vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+ }
mutex_lock(&mgr->lock);
while (remaining_size) {
@@ -487,33 +523,15 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
min_block_size = mgr->default_page_size;
- BUG_ON(min_block_size < mm->chunk_size);
-
- /* Limit maximum size to 2GiB due to SG table limitations */
- size = min(remaining_size, 2ULL << 30);
+ size = remaining_size;
- if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
- !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size)
+ min_block_size = size;
+ else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
- cur_size = size;
-
- if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
- /*
- * Except for actual range allocation, modify the size and
- * min_block_size conforming to continuous flag enablement
- */
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- size = roundup_pow_of_two(size);
- min_block_size = size;
- /*
- * Modify the size value if size is not
- * aligned with min_block_size
- */
- } else if (!IS_ALIGNED(size, min_block_size)) {
- size = round_up(size, min_block_size);
- }
- }
+ BUG_ON(min_block_size < mm->chunk_size);
r = drm_buddy_alloc_blocks(mm, fpfn,
lpfn,
@@ -521,6 +539,16 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
min_block_size,
&vres->blocks,
vres->flags);
+
+ if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+ vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+ tbo->page_alignment);
+
+ continue;
+ }
+
if (unlikely(r))
goto error_free_blocks;
@@ -529,43 +557,31 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
remaining_size -= size;
}
- mutex_unlock(&mgr->lock);
-
- if (cur_size != size) {
- struct drm_buddy_block *block;
- struct list_head *trim_list;
- u64 original_size;
- LIST_HEAD(temp);
-
- trim_list = &vres->blocks;
- original_size = (u64)vres->base.size;
-
- /*
- * If size value is rounded up to min_block_size, trim the last
- * block to the required size
- */
- if (!list_is_singular(&vres->blocks)) {
- block = list_last_entry(&vres->blocks, typeof(*block), link);
- list_move_tail(&block->link, &temp);
- trim_list = &temp;
- /*
- * Compute the original_size value by subtracting the
- * last block size with (aligned size - original size)
- */
- original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size);
- }
- mutex_lock(&mgr->lock);
- drm_buddy_block_trim(mm,
- original_size,
- trim_list);
- mutex_unlock(&mgr->lock);
-
- if (!list_empty(&temp))
- list_splice_tail(trim_list, &vres->blocks);
+ vres->task.pid = task_pid_nr(current);
+ get_task_comm(vres->task.comm, current);
+ list_add_tail(&vres->vres_node, &mgr->allocated_vres_list);
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+ struct drm_buddy_block *dcc_block;
+ unsigned long dcc_start;
+ u64 trim_start;
+
+ dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
+ /* Adjust the start address for DCC buffers only */
+ dcc_start =
+ roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
+ adjust_dcc_size);
+ trim_start = (u64)dcc_start;
+ drm_buddy_block_trim(mm, &trim_start,
+ (u64)vres->base.size,
+ &vres->blocks);
}
+ mutex_unlock(&mgr->lock);
vres->base.start = 0;
+ size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+ vres->base.size);
list_for_each_entry(block, &vres->blocks, link) {
unsigned long start;
@@ -573,8 +589,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
amdgpu_vram_mgr_block_size(block);
start >>= PAGE_SHIFT;
- if (start > PFN_UP(vres->base.size))
- start -= PFN_UP(vres->base.size);
+ if (start > PFN_UP(size))
+ start -= PFN_UP(size);
else
start = 0;
vres->base.start = max(vres->base.start, start);
@@ -595,7 +611,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
return 0;
error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks);
+ drm_buddy_free_list(mm, &vres->blocks, 0);
mutex_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
@@ -623,12 +639,15 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
uint64_t vis_usage = 0;
mutex_lock(&mgr->lock);
+
+ list_del(&vres->vres_node);
+ memset(&vres->task, 0, sizeof(vres->task));
+
list_for_each_entry(block, &vres->blocks, link)
vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+ drm_buddy_free_list(mm, &vres->blocks, vres->flags);
amdgpu_vram_mgr_do_reserve(man);
-
- drm_buddy_free_list(mm, &vres->blocks);
mutex_unlock(&mgr->lock);
atomic64_sub(vis_usage, &mgr->vis_usage);
@@ -670,7 +689,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
- amdgpu_res_next(&cursor, cursor.size);
+ amdgpu_res_next(&cursor, min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE));
}
r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
@@ -690,7 +709,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
phys_addr_t phys = cursor.start + adev->gmc.aper_base;
- size_t size = cursor.size;
+ unsigned long size = min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE);
dma_addr_t addr;
addr = dma_map_resource(dev, phys, size, dir,
@@ -703,7 +722,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg_dma_address(sg) = addr;
sg_dma_len(sg) = size;
- amdgpu_res_next(&cursor, cursor.size);
+ amdgpu_res_next(&cursor, size);
}
return 0;
@@ -761,6 +780,23 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
}
/**
+ * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Reset the cleared drm buddy blocks.
+ */
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct drm_buddy *mm = &mgr->mm;
+
+ mutex_lock(&mgr->lock);
+ drm_buddy_reset_clear(mm, false);
+ mutex_unlock(&mgr->lock);
+}
+
+/**
* amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
*
* @man: TTM memory type manager
@@ -857,14 +893,6 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
mutex_unlock(&mgr->lock);
}
-static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = {
- .alloc = amdgpu_dummy_vram_mgr_new,
- .free = amdgpu_dummy_vram_mgr_del,
- .intersects = amdgpu_dummy_vram_mgr_intersects,
- .compatible = amdgpu_dummy_vram_mgr_compatible,
- .debug = amdgpu_dummy_vram_mgr_debug
-};
-
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.alloc = amdgpu_vram_mgr_new,
.free = amdgpu_vram_mgr_del,
@@ -886,24 +914,22 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
struct ttm_resource_manager *man = &mgr->manager;
int err;
+ man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size);
+ if (IS_ERR(man->cg))
+ return PTR_ERR(man->cg);
ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size);
mutex_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
+ INIT_LIST_HEAD(&mgr->allocated_vres_list);
mgr->default_page_size = PAGE_SIZE;
- if (!adev->gmc.is_app_apu) {
- man->func = &amdgpu_vram_mgr_func;
-
- err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
- if (err)
- return err;
- } else {
- man->func = &amdgpu_dummy_vram_mgr_func;
- DRM_INFO("Setup dummy vram mgr\n");
- }
+ man->func = &amdgpu_vram_mgr_func;
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
@@ -936,7 +962,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+ drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 0e04e42cf809..5f5fd9a911c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -35,12 +35,26 @@ struct amdgpu_vram_mgr {
struct list_head reserved_pages;
atomic64_t vis_usage;
u64 default_page_size;
+ struct list_head allocated_vres_list;
+};
+
+struct amdgpu_vres_task {
+ pid_t pid;
+ char comm[TASK_COMM_LEN];
+};
+
+struct amdgpu_vram_block_info {
+ u64 start;
+ u64 size;
+ struct amdgpu_vres_task task;
};
struct amdgpu_vram_mgr_resource {
struct ttm_resource base;
struct list_head blocks;
unsigned long flags;
+ struct list_head vres_node;
+ struct amdgpu_vres_task task;
};
static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
@@ -53,10 +67,26 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
return (u64)PAGE_SIZE << drm_buddy_block_order(block);
}
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_is_clear(block);
+}
+
static inline struct amdgpu_vram_mgr_resource *
to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
{
return container_of(res, struct amdgpu_vram_mgr_resource, base);
}
+static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
+{
+ struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res);
+
+ WARN_ON(ares->flags & DRM_BUDDY_CLEARED);
+ ares->flags |= DRM_BUDDY_CLEARED;
+}
+
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+ uint64_t address, struct amdgpu_vram_block_info *info);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index 565a1fa436d4..1083db8cea2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -27,6 +27,9 @@
#include <drm/drm_drv.h>
#include "../amdxcp/amdgpu_xcp_drv.h"
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr);
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr);
+
static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
{
@@ -117,6 +120,25 @@ static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
xcp->valid = true;
}
+static void __amdgpu_xcp_set_unique_id(struct amdgpu_xcp_mgr *xcp_mgr,
+ int xcp_id)
+{
+ struct amdgpu_xcp *xcp = &xcp_mgr->xcp[xcp_id];
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ uint32_t inst_mask;
+ uint64_t uid;
+ int i;
+
+ if (!amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask) &&
+ inst_mask) {
+ i = GET_INST(GC, (ffs(inst_mask) - 1));
+ uid = amdgpu_device_get_uid(xcp_mgr->adev->uid_info,
+ AMDGPU_UID_TYPE_XCD, i);
+ if (uid)
+ xcp->unique_id = uid;
+ }
+}
+
int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
{
struct amdgpu_device *adev = xcp_mgr->adev;
@@ -155,6 +177,7 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
else
xcp_mgr->xcp[i].mem_id = mem_id;
}
+ __amdgpu_xcp_set_unique_id(xcp_mgr, i);
}
xcp_mgr->num_xcps = num_xcps;
@@ -163,16 +186,11 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
return 0;
}
-int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode)
{
int ret, curr_mode, num_xcps = 0;
- if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
- return -EINVAL;
-
- if (xcp_mgr->mode == mode)
- return 0;
-
if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
return 0;
@@ -194,26 +212,64 @@ int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
goto out;
}
-
+ amdgpu_xcp_sysfs_entries_update(xcp_mgr);
out:
mutex_unlock(&xcp_mgr->xcp_lock);
return ret;
}
-int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
{
- int mode;
+ if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
+ return -EINVAL;
- if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
- return xcp_mgr->mode;
+ if (xcp_mgr->mode == mode)
+ return 0;
+
+ return __amdgpu_xcp_switch_partition_mode(xcp_mgr, mode);
+}
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ if (!xcp_mgr || xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode);
+}
+
+static bool __amdgpu_xcp_is_cached_mode_valid(struct amdgpu_xcp_mgr *xcp_mgr)
+{
if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
+ return true;
+
+ if (!amdgpu_sriov_vf(xcp_mgr->adev) &&
+ xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return true;
+
+ if (xcp_mgr->mode != AMDGPU_XCP_MODE_NONE &&
+ xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS)
+ return true;
+
+ return false;
+}
+
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int mode;
+
+ if (__amdgpu_xcp_is_cached_mode_valid(xcp_mgr))
return xcp_mgr->mode;
if (!(flags & AMDGPU_XCP_FL_LOCKED))
mutex_lock(&xcp_mgr->xcp_lock);
mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+
+ /* First time query for VF, set the mode here */
+ if (amdgpu_sriov_vf(xcp_mgr->adev) &&
+ xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ xcp_mgr->mode = mode;
+
if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
dev_WARN(
xcp_mgr->adev->dev,
@@ -242,9 +298,10 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
if (ret == -ENOSPC) {
dev_warn(adev->dev,
"Skip xcp node #%d when out of drm node resource.", i);
- return 0;
+ ret = 0;
+ goto out;
} else if (ret) {
- return ret;
+ goto out;
}
/* Redirect all IOCTLs to the primary device */
@@ -257,9 +314,14 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
p_ddev->vma_offset_manager = ddev->vma_offset_manager;
p_ddev->driver = &amdgpu_partition_driver;
adev->xcp_mgr->xcp[i].ddev = p_ddev;
+
+ dev_set_drvdata(p_ddev->dev, &adev->xcp_mgr->xcp[i]);
}
+ ret = 0;
+out:
+ amdgpu_xcp_sysfs_entries_init(adev->xcp_mgr);
- return 0;
+ return ret;
}
int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
@@ -267,9 +329,9 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
struct amdgpu_xcp_mgr_funcs *xcp_funcs)
{
struct amdgpu_xcp_mgr *xcp_mgr;
+ int i;
- if (!xcp_funcs || !xcp_funcs->switch_partition_mode ||
- !xcp_funcs->get_ip_details)
+ if (!xcp_funcs || !xcp_funcs->get_ip_details)
return -EINVAL;
xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
@@ -286,6 +348,8 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
adev->xcp_mgr = xcp_mgr;
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].xcp_mgr = xcp_mgr;
return amdgpu_xcp_dev_alloc(adev);
}
@@ -362,6 +426,7 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
p_ddev->driver = adev->xcp_mgr->xcp[i].driver;
p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
+ amdgpu_xcp_drm_dev_free(p_ddev);
}
}
@@ -407,9 +472,636 @@ void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
return;
sched = entity->entity.rq->sched;
- if (sched->ready) {
+ if (drm_sched_wqueue_ready(sched)) {
ring = to_amdgpu_ring(entity->entity.rq->sched);
atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
}
}
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds)
+{
+ u32 sel_xcp_id;
+ int i;
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+
+ if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
+ u32 least_ref_cnt = ~0;
+
+ fpriv->xcp_id = 0;
+ for (i = 0; i < xcp_mgr->num_xcps; i++) {
+ u32 total_ref_cnt;
+
+ total_ref_cnt = atomic_read(&xcp_mgr->xcp[i].ref_cnt);
+ if (total_ref_cnt < least_ref_cnt) {
+ fpriv->xcp_id = i;
+ least_ref_cnt = total_ref_cnt;
+ }
+ }
+ }
+ sel_xcp_id = fpriv->xcp_id;
+
+ if (xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
+ *num_scheds =
+ xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
+ *scheds =
+ xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
+ atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
+ dev_dbg(adev->dev, "Selected partition #%d", sel_xcp_id);
+ } else {
+ dev_err(adev->dev, "Failed to schedule partition #%d.", sel_xcp_id);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static void amdgpu_set_xcp_id(struct amdgpu_device *adev,
+ uint32_t inst_idx,
+ struct amdgpu_ring *ring)
+{
+ int xcp_id;
+ enum AMDGPU_XCP_IP_BLOCK ip_blk;
+ uint32_t inst_mask;
+
+ ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id;
+ if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_CPER))
+ return;
+
+ inst_mask = 1 << inst_idx;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ ip_blk = AMDGPU_XCP_GFX;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ ip_blk = AMDGPU_XCP_SDMA;
+ break;
+ case AMDGPU_RING_TYPE_VCN_ENC:
+ case AMDGPU_RING_TYPE_VCN_JPEG:
+ ip_blk = AMDGPU_XCP_VCN;
+ break;
+ default:
+ dev_err(adev->dev, "Not support ring type %d!", ring->funcs->type);
+ return;
+ }
+
+ for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
+ if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
+ ring->xcp_id = xcp_id;
+ dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name,
+ ring->xcp_id);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+ adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id;
+ break;
+ }
+ }
+}
+
+static void amdgpu_xcp_gpu_sched_update(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned int sel_xcp_id)
+{
+ unsigned int *num_gpu_sched;
+
+ num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
+ .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
+ adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
+ .sched[(*num_gpu_sched)++] = &ring->sched;
+ dev_dbg(adev->dev, "%s :[%d] gpu_sched[%d][%d] = %d",
+ ring->name, sel_xcp_id, ring->funcs->type,
+ ring->hw_prio, *num_gpu_sched);
+}
+
+static int amdgpu_xcp_sched_list_update(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
+ memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
+ }
+
+ if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ return 0;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ ring = adev->rings[i];
+ if (!ring || !ring->sched.ready || ring->no_scheduler)
+ continue;
+
+ amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
+
+ /* VCN may be shared by two partitions under CPX MODE in certain
+ * configs.
+ */
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst))
+ amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
+ }
+
+ return 0;
+}
+
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->num_rings; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ amdgpu_set_xcp_id(adev, ring->xcc_id, ring);
+ else
+ amdgpu_set_xcp_id(adev, ring->me, ring);
+ }
+
+ return amdgpu_xcp_sched_list_update(adev);
+}
+
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ xcp_mgr->supp_xcp_modes = 0;
+
+ switch (NUM_XCC(adev->gfx.xcc_mask)) {
+ case 8:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_QPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 6:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_TPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 4:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 2:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 1:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+
+ default:
+ break;
+ }
+}
+
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ /* TODO:
+ * Stop user queues and threads, and make sure GPU is empty of work.
+ */
+
+ if (flags & AMDGPU_XCP_OPS_KFD)
+ amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
+
+ return 0;
+}
+
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+ int ret = 0;
+
+ if (flags & AMDGPU_XCP_OPS_KFD) {
+ amdgpu_amdkfd_device_probe(xcp_mgr->adev);
+ amdgpu_amdkfd_device_init(xcp_mgr->adev);
+ /* If KFD init failed, return failure */
+ if (!xcp_mgr->adev->kfd.init_complete)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+/*====================== xcp sysfs - configuration ======================*/
+#define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name) \
+ static ssize_t amdgpu_xcp_res_sysfs_##_name##_show( \
+ struct amdgpu_xcp_res_details *xcp_res, char *buf) \
+ { \
+ return sysfs_emit(buf, "%d\n", xcp_res->_name); \
+ }
+
+struct amdgpu_xcp_res_sysfs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct amdgpu_xcp_res_details *xcp_res, char *buf);
+};
+
+#define XCP_CFG_SYSFS_RES_ATTR(_name) \
+ struct amdgpu_xcp_res_sysfs_attribute xcp_res_sysfs_attr_##_name = { \
+ .attr = { .name = __stringify(_name), .mode = 0400 }, \
+ .show = amdgpu_xcp_res_sysfs_##_name##_show, \
+ }
+
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_inst)
+XCP_CFG_SYSFS_RES_ATTR(num_inst);
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_shared)
+XCP_CFG_SYSFS_RES_ATTR(num_shared);
+
+#define XCP_CFG_SYSFS_RES_ATTR_PTR(_name) xcp_res_sysfs_attr_##_name.attr
+
+static struct attribute *xcp_cfg_res_sysfs_attrs[] = {
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_inst),
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_shared), NULL
+};
+
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static const char *nps_desc[] = {
+ [UNKNOWN_MEMORY_PARTITION_MODE] = "UNKNOWN",
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+ATTRIBUTE_GROUPS(xcp_cfg_res_sysfs);
+
+#define to_xcp_attr(x) \
+ container_of(x, struct amdgpu_xcp_res_sysfs_attribute, attr)
+#define to_xcp_res(x) container_of(x, struct amdgpu_xcp_res_details, kobj)
+
+static ssize_t xcp_cfg_res_sysfs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_res_sysfs_attribute *attribute;
+ struct amdgpu_xcp_res_details *xcp_res;
+
+ attribute = to_xcp_attr(attr);
+ xcp_res = to_xcp_res(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(xcp_res, buf);
+}
+
+static const struct sysfs_ops xcp_cfg_res_sysfs_ops = {
+ .show = xcp_cfg_res_sysfs_attr_show,
+};
+
+static const struct kobj_type xcp_cfg_res_sysfs_ktype = {
+ .sysfs_ops = &xcp_cfg_res_sysfs_ops,
+ .default_groups = xcp_cfg_res_sysfs_groups,
+};
+
+const char *xcp_res_names[] = {
+ [AMDGPU_XCP_RES_XCC] = "xcc",
+ [AMDGPU_XCP_RES_DMA] = "dma",
+ [AMDGPU_XCP_RES_DEC] = "dec",
+ [AMDGPU_XCP_RES_JPEG] = "jpeg",
+};
+
+static int amdgpu_xcp_get_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ if (xcp_mgr->funcs && xcp_mgr->funcs->get_xcp_res_info)
+ return xcp_mgr->funcs->get_xcp_res_info(xcp_mgr, mode, xcp_cfg);
+
+ return -EOPNOTSUPP;
+}
+
+#define to_xcp_cfg(x) container_of(x, struct amdgpu_xcp_cfg, kobj)
+static ssize_t supported_xcp_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr = xcp_cfg->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_mgr || !xcp_mgr->supp_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t supported_nps_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_cfg || !xcp_cfg->compatible_nps_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_cfg->compatible_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t xcp_config_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ amdgpu_gfx_compute_mode_desc(xcp_cfg->mode));
+}
+
+static ssize_t xcp_config_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int mode, r;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX")))
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ else if (!strncasecmp("DPX", buf, strlen("DPX")))
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ else if (!strncasecmp("TPX", buf, strlen("TPX")))
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ else if (!strncasecmp("QPX", buf, strlen("QPX")))
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ else if (!strncasecmp("CPX", buf, strlen("CPX")))
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ else
+ return -EINVAL;
+
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+
+ if (r)
+ return r;
+
+ xcp_cfg->mode = mode;
+ return size;
+}
+
+static struct kobj_attribute xcp_cfg_sysfs_mode =
+ __ATTR_RW_MODE(xcp_config, 0644);
+
+static void xcp_cfg_sysfs_release(struct kobject *kobj)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ kfree(xcp_cfg);
+}
+
+static const struct kobj_type xcp_cfg_sysfs_ktype = {
+ .release = xcp_cfg_sysfs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static struct kobj_attribute supp_part_sysfs_mode =
+ __ATTR_RO(supported_xcp_configs);
+
+static struct kobj_attribute supp_nps_sysfs_mode =
+ __ATTR_RO(supported_nps_configs);
+
+static const struct attribute *xcp_attrs[] = {
+ &supp_part_sysfs_mode.attr,
+ &xcp_cfg_sysfs_mode.attr,
+ NULL,
+};
+
+static void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i, r, j, rid, mode;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ xcp_cfg = kzalloc(sizeof(*xcp_cfg), GFP_KERNEL);
+ if (!xcp_cfg)
+ return;
+ xcp_cfg->xcp_mgr = adev->xcp_mgr;
+
+ r = kobject_init_and_add(&xcp_cfg->kobj, &xcp_cfg_sysfs_ktype,
+ &adev->dev->kobj, "compute_partition_config");
+ if (r)
+ goto err1;
+
+ r = sysfs_create_files(&xcp_cfg->kobj, xcp_attrs);
+ if (r)
+ goto err1;
+
+ if (adev->gmc.supported_nps_modes != 0) {
+ r = sysfs_create_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ if (r) {
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+ }
+
+ mode = (xcp_cfg->xcp_mgr->mode ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) ?
+ AMDGPU_SPX_PARTITION_MODE :
+ xcp_cfg->xcp_mgr->mode;
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+ if (r) {
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+
+ xcp_cfg->mode = mode;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ rid = xcp_res->id;
+ r = kobject_init_and_add(&xcp_res->kobj,
+ &xcp_cfg_res_sysfs_ktype,
+ &xcp_cfg->kobj, "%s",
+ xcp_res_names[rid]);
+ if (r)
+ goto err;
+ }
+
+ adev->xcp_mgr->xcp_cfg = xcp_cfg;
+ return;
+err:
+ for (j = 0; j < i; j++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+err1:
+ kobject_put(&xcp_cfg->kobj);
+}
+
+static void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i;
+
+ if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg)
+ return;
+
+ xcp_cfg = adev->xcp_mgr->xcp_cfg;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ kobject_put(&xcp_cfg->kobj);
+}
+
+/*====================== xcp sysfs - data entries ======================*/
+
+#define to_xcp(x) container_of(x, struct amdgpu_xcp, kobj)
+
+static ssize_t xcp_metrics_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ ssize_t size;
+
+ xcp_mgr = xcp->xcp_mgr;
+ size = amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, NULL);
+ if (size <= 0)
+ return size;
+
+ if (size > PAGE_SIZE)
+ return -ENOSPC;
+
+ return amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, buf);
+}
+
+static umode_t amdgpu_xcp_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+
+ if (!xcp || !xcp->valid)
+ return 0;
+
+ return attr->mode;
+}
+
+static struct kobj_attribute xcp_sysfs_metrics = __ATTR_RO(xcp_metrics);
+
+static struct attribute *amdgpu_xcp_attrs[] = {
+ &xcp_sysfs_metrics.attr,
+ NULL,
+};
+
+static const struct attribute_group amdgpu_xcp_attrs_group = {
+ .attrs = amdgpu_xcp_attrs,
+ .is_visible = amdgpu_xcp_attrs_is_visible
+};
+
+static const struct kobj_type xcp_sysfs_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void amdgpu_xcp_sysfs_entries_fini(struct amdgpu_xcp_mgr *xcp_mgr, int n)
+{
+ struct amdgpu_xcp *xcp;
+
+ for (n--; n >= 0; n--) {
+ xcp = &xcp_mgr->xcp[n];
+ if (!xcp->ddev || !xcp->valid)
+ continue;
+ sysfs_remove_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ kobject_put(&xcp->kobj);
+ }
+}
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i, r;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ break;
+ r = kobject_init_and_add(&xcp->kobj, &xcp_sysfs_ktype,
+ &xcp->ddev->dev->kobj, "xcp");
+ if (r)
+ goto out;
+
+ r = sysfs_create_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ if (r)
+ goto out;
+ }
+
+ return;
+out:
+ kobject_put(&xcp->kobj);
+}
+
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ continue;
+ sysfs_update_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ }
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+
+ amdgpu_xcp_cfg_sysfs_init(adev);
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+ amdgpu_xcp_sysfs_entries_fini(adev->xcp_mgr, MAX_XCP);
+ amdgpu_xcp_cfg_sysfs_fini(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 9a1036aeec2a..1928d9e224fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -39,6 +39,8 @@
#define AMDGPU_XCP_NO_PARTITION (~0)
+#define AMDGPU_XCP_OPS_KFD (1 << 0)
+
struct amdgpu_fpriv;
enum AMDGPU_XCP_IP_BLOCK {
@@ -56,6 +58,30 @@ enum AMDGPU_XCP_STATE {
AMDGPU_XCP_RESUME,
};
+enum amdgpu_xcp_res_id {
+ AMDGPU_XCP_RES_XCC,
+ AMDGPU_XCP_RES_DMA,
+ AMDGPU_XCP_RES_DEC,
+ AMDGPU_XCP_RES_JPEG,
+ AMDGPU_XCP_RES_MAX,
+};
+
+struct amdgpu_xcp_res_details {
+ enum amdgpu_xcp_res_id id;
+ u8 num_inst;
+ u8 num_shared;
+ struct kobject kobj;
+};
+
+struct amdgpu_xcp_cfg {
+ u8 mode;
+ struct amdgpu_xcp_res_details xcp_res[AMDGPU_XCP_RES_MAX];
+ u8 num_res;
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
+ u16 compatible_nps_modes;
+};
+
struct amdgpu_xcp_ip_funcs {
int (*prepare_suspend)(void *handle, uint32_t inst_mask);
int (*suspend)(void *handle, uint32_t inst_mask);
@@ -84,6 +110,9 @@ struct amdgpu_xcp {
struct drm_driver *driver;
struct drm_vma_offset_manager *vma_offset_manager;
struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
+ uint64_t unique_id;
};
struct amdgpu_xcp_mgr {
@@ -97,6 +126,9 @@ struct amdgpu_xcp_mgr {
/* Used to determine KFD memory size limits per XCP */
unsigned int num_xcp_per_mem_partition;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ uint32_t supp_xcp_modes;
+ uint32_t avail_xcp_modes;
};
struct amdgpu_xcp_mgr_funcs {
@@ -108,15 +140,13 @@ struct amdgpu_xcp_mgr_funcs {
struct amdgpu_xcp_ip *ip);
int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
struct amdgpu_xcp *xcp, uint8_t *mem_id);
-
+ int (*get_xcp_res_info)(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg);
int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
- int (*select_scheds)(struct amdgpu_device *adev,
- u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv,
- unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds);
- int (*update_partition_sched_list)(struct amdgpu_device *adev);
};
int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
@@ -129,6 +159,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr);
int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
enum AMDGPU_XCP_IP_BLOCK ip, int instance);
@@ -144,15 +175,17 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev,
struct drm_file *file_priv);
void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
struct amdgpu_ctx_entity *entity);
-
-#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \
- ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
- (adev)->xcp_mgr->funcs->select_scheds ? \
- (adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT)
-#define amdgpu_xcp_update_partition_sched_list(adev) \
- ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
- (adev)->xcp_mgr->funcs->update_partition_sched_list ? \
- (adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0)
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+ u32 hw_ip, u32 hw_prio,
+ struct amdgpu_fpriv *fpriv,
+ unsigned int *num_scheds,
+ struct drm_gpu_scheduler ***scheds);
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev);
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev);
static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
{
@@ -179,6 +212,6 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
#define for_each_xcp(xcp_mgr, xcp, i) \
for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
- xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+ ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 7e91b24784e5..1ede308a7c67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -40,6 +40,11 @@
#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210
#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218
+#define XGMI_STATE_DISABLE 0xD1
+#define XGMI_STATE_LS0 0x81
+#define XGMI_LINK_ACTIVE 1
+#define XGMI_LINK_INACTIVE 0
+
static DEFINE_MUTEX(xgmi_mutex);
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
@@ -103,6 +108,53 @@ static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
};
+static const int xgmi3x16_pcs_err_status_reg_v6_4[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = {
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const u64 xgmi_v6_4_0_mca_base_array[] = {
+ 0x11a09200,
+ 0x11b09200,
+};
+
+static const char *xgmi_v6_4_0_ras_error_code_ext[32] = {
+ [0x00] = "XGMI PCS DataLossErr",
+ [0x01] = "XGMI PCS TrainingErr",
+ [0x02] = "XGMI PCS FlowCtrlAckErr",
+ [0x03] = "XGMI PCS RxFifoUnderflowErr",
+ [0x04] = "XGMI PCS RxFifoOverflowErr",
+ [0x05] = "XGMI PCS CRCErr",
+ [0x06] = "XGMI PCS BERExceededErr",
+ [0x07] = "XGMI PCS TxMetaDataErr",
+ [0x08] = "XGMI PCS ReplayBufParityErr",
+ [0x09] = "XGMI PCS DataParityErr",
+ [0x0a] = "XGMI PCS ReplayFifoOverflowErr",
+ [0x0b] = "XGMI PCS ReplayFifoUnderflowErr",
+ [0x0c] = "XGMI PCS ElasticFifoOverflowErr",
+ [0x0d] = "XGMI PCS DeskewErr",
+ [0x0e] = "XGMI PCS FlowCtrlCRCErr",
+ [0x0f] = "XGMI PCS DataStartupLimitErr",
+ [0x10] = "XGMI PCS FCInitTimeoutErr",
+ [0x11] = "XGMI PCS RecoveryTimeoutErr",
+ [0x12] = "XGMI PCS ReadySerialTimeoutErr",
+ [0x13] = "XGMI PCS ReadySerialAttemptErr",
+ [0x14] = "XGMI PCS RecoveryAttemptErr",
+ [0x15] = "XGMI PCS RecoveryRelockAttemptErr",
+ [0x16] = "XGMI PCS ReplayAttemptErr",
+ [0x17] = "XGMI PCS SyncHdrErr",
+ [0x18] = "XGMI PCS TxReplayTimeoutErr",
+ [0x19] = "XGMI PCS RxReplayTimeoutErr",
+ [0x1a] = "XGMI PCS LinkSubTxTimeoutErr",
+ [0x1b] = "XGMI PCS LinkSubRxTimeoutErr",
+ [0x1c] = "XGMI PCS RxCMDPktErr",
+};
+
static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
{"XGMI PCS DataLossErr",
SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)},
@@ -242,6 +294,72 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
};
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num)
+{
+ int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 };
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ if (link_num < ARRAY_SIZE(link_map_6_4_x))
+ return link_map_6_4_x[link_num];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
+static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 };
+ const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070,
+ 0x11b00070 };
+ u32 i, n;
+ u64 addr;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1);
+ addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n];
+ break;
+ case IP_VERSION(6, 4, 1):
+ n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1);
+ addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n];
+ break;
+ default:
+ return U32_MAX;
+ }
+
+ i = global_link_num / n;
+ addr += adev->asic_funcs->encode_ext_smn_addressing(i);
+
+ return RREG32_PCIE_EXT(addr);
+}
+
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ u32 xgmi_state_reg_val;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE)
+ return -ENOLINK;
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0)
+ return XGMI_LINK_ACTIVE;
+
+ return XGMI_LINK_INACTIVE;
+}
+
/**
* DOC: AMDGPU XGMI Support
*
@@ -325,6 +443,17 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
}
+static ssize_t amdgpu_xgmi_show_physical_id(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ return sysfs_emit(buf, "%u\n", adev->gmc.xgmi.physical_node_id);
+
+}
+
static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -355,6 +484,41 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
return sysfs_emit(buf, "%s\n", buf);
}
+static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i, j, size = 0;
+ int current_node;
+ /*
+ * get the node id in the sysfs for the current socket and show
+ * it in the port num info output in the sysfs for easy reading.
+ * it is NOT the one retrieved from xgmi ta.
+ */
+ for (i = 0; i < top->num_nodes; i++) {
+ if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+ current_node = i;
+ break;
+ }
+ }
+
+ if (i == top->num_nodes)
+ return -EINVAL;
+
+ for (i = 0; i < top->num_nodes; i++) {
+ for (j = 0; j < top->nodes[i].num_links; j++)
+ /* node id in sysfs starts from 1 rather than 0 so +1 here */
+ size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1,
+ top->nodes[i].port_num[j].src_xgmi_port_num, i + 1,
+ top->nodes[i].port_num[j].dst_xgmi_port_num);
+ }
+
+ return size;
+}
+
#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
static ssize_t amdgpu_xgmi_show_error(struct device *dev,
struct device_attribute *attr,
@@ -390,9 +554,11 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
+static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL);
static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL);
static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
@@ -407,6 +573,12 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
return ret;
}
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_physical_id);
+ if (ret) {
+ dev_err(adev->dev, "XGMI: Failed to create device file xgmi_physical_id\n");
+ return ret;
+ }
+
/* Create xgmi error file */
ret = device_create_file(adev->dev, &dev_attr_xgmi_error);
if (ret)
@@ -422,6 +594,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
if (ret)
pr_err("failed to create xgmi_num_links\n");
+ /* Create xgmi port num file if supported */
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+ ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num);
+ if (ret)
+ dev_err(adev->dev, "failed to create xgmi_port_num\n");
+ }
+
/* Create sysfs link to hive info folder on the first device */
if (hive->kobj.parent != (&adev->dev->kobj)) {
ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,
@@ -448,9 +627,12 @@ remove_link:
remove_file:
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
device_remove_file(adev->dev, &dev_attr_xgmi_error);
device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+ device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
success:
return ret;
@@ -463,9 +645,12 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
memset(node, 0, sizeof(node));
device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+ device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
device_remove_file(adev->dev, &dev_attr_xgmi_error);
device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+ if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+ device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
if (hive->kobj.parent != (&adev->dev->kobj))
sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
@@ -553,6 +738,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
hive->hi_req_gpu = NULL;
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
/*
* hive pstate on boot is high in vega20 so we have to go to low
@@ -662,28 +848,88 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
* num_hops[2:0] = number of hops
*/
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
+ struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
uint8_t num_hops_mask = 0x7;
int i;
+ if (!adev->gmc.xgmi.supported)
+ return 0;
+
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
return top->nodes[i].num_hops & num_hops_mask;
- return -EINVAL;
+
+ dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+
+ return 0;
+}
+
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw)
+{
+ bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER;
+ int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1;
+ int num_lanes = adev->gmc.xgmi.max_width;
+ int speed = adev->gmc.xgmi.max_speed;
+ int num_links = !peer_mode ? 1 : -1;
+
+ if (!(min_bw && max_bw))
+ return -EINVAL;
+
+ *min_bw = 0;
+ *max_bw = 0;
+
+ if (!adev->gmc.xgmi.supported)
+ return -ENODATA;
+
+ if (peer_mode && !peer_adev)
+ return -EINVAL;
+
+ if (peer_mode) {
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i) {
+ if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id)
+ continue;
+
+ num_links = top->nodes[i].num_links;
+ break;
+ }
+ }
+
+ if (num_links == -1) {
+ dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+ } else if (num_links) {
+ int per_link_bw = (speed * num_lanes * unit_scale)/BITS_PER_BYTE;
+
+ *min_bw = per_link_bw;
+ *max_bw = num_links * per_link_bw;
+ }
+
+ return 0;
}
-int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
+ /* Sharing should always be enabled for non-SRIOV. */
+ if (!amdgpu_sriov_vf(adev))
+ return true;
+
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
- return top->nodes[i].num_links;
- return -EINVAL;
+ return !!top->nodes[i].is_sharing_enabled;
+
+ return false;
}
/*
@@ -712,6 +958,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
return 0;
}
+static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+ struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info;
+ struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info;
+
+ for (int i = 0; i < peer_info->num_nodes; i++) {
+ if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+ for (int j = 0; j < top_info->num_nodes; j++) {
+ if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) {
+ peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops;
+ peer_info->nodes[i].is_sharing_enabled =
+ top_info->nodes[j].is_sharing_enabled;
+ peer_info->nodes[i].num_links =
+ top_info->nodes[j].num_links;
+ return;
+ }
+ }
+ }
+ }
+}
+
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
struct psp_xgmi_topology_info *top_info;
@@ -724,8 +992,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.supported)
return 0;
- if (!adev->gmc.xgmi.pending_reset &&
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
ret = psp_xgmi_initialize(&adev->psp, false, true);
if (ret) {
dev_err(adev->dev,
@@ -771,8 +1038,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
task_barrier_add_task(&hive->tb);
- if (!adev->gmc.xgmi.pending_reset &&
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
/* update node list for other device in the hive */
if (tmp_adev != adev) {
@@ -786,18 +1052,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit_unlock;
}
- /* get latest topology info for each device from psp */
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
- &tmp_adev->psp.xgmi_context.top_info, false);
+ if (amdgpu_sriov_vf(adev) &&
+ adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+ /* only get topology for VF being init if it can support full duplex */
+ ret = psp_xgmi_get_topology_info(&adev->psp, count,
+ &adev->psp.xgmi_context.top_info, false);
if (ret) {
- dev_err(tmp_adev->dev,
+ dev_err(adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
- tmp_adev->gmc.xgmi.node_id,
- tmp_adev->gmc.xgmi.hive_id, ret);
- /* To do : continue with some node failed or disable the whole hive */
+ adev->gmc.xgmi.node_id,
+ adev->gmc.xgmi.hive_id, ret);
+ /* To do: continue with some node failed or disable the whole hive*/
goto exit_unlock;
}
+
+ /* fill the topology info for peers instead of getting from PSP */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ amdgpu_xgmi_fill_topology_info(adev, tmp_adev);
+ }
+ } else {
+ /* get latest topology info for each device from psp */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info, false);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.node_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ /* To do : continue with some node failed or disable the whole hive */
+ goto exit_unlock;
+ }
+ }
}
/* get topology again for hives that support extended data */
@@ -829,7 +1115,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
}
}
- if (!ret && !adev->gmc.xgmi.pending_reset)
+ if (!ret)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
exit_unlock:
@@ -882,15 +1168,91 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
return 0;
}
+static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct aca_bank_info info;
+ const char *error_str;
+ u64 status, count;
+ int ret, ext_error_code;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+ error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+ xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+ if (error_str)
+ dev_info(adev->dev, "%s detected\n", error_str);
+
+ count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ if (ext_error_code != 0 && ext_error_code != 9)
+ count = 0ULL;
+
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count);
+ break;
+ case ACA_SMU_TYPE_CE:
+ count = ext_error_code == 6 ? count : 0ULL;
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = {
+ .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser,
+};
+
+static const struct aca_info xgmi_v6_4_0_aca_info = {
+ .hwip = ACA_HWIP_TYPE_PCS_XGMI,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+ .bank_ops = &xgmi_v6_4_0_aca_bank_ops,
+};
+
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
+ int r;
+
if (!adev->gmc.xgmi.supported ||
adev->gmc.xgmi.num_physical_nodes == 0)
return 0;
- adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL,
+ &xgmi_v6_4_0_aca_info, NULL);
+ if (r)
+ goto late_fini;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
- return amdgpu_ras_block_late_init(adev, ras_block);
+ return r;
}
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
@@ -906,7 +1268,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg
WREG32_PCIE(pcs_status_reg, 0);
}
-static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
{
uint32_t i;
@@ -932,6 +1294,51 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
default:
break;
}
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++)
+ pcs_clear_status(adev,
+ xgmi3x16_pcs_err_status_reg_v6_4[i]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base)
+{
+ WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+ __xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]);
+}
+
+static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ int i;
+
+ for_each_inst(i, adev->aid_mask)
+ xgmi_v6_4_0_reset_error_count(adev, i);
+}
+
+static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_v6_4_0_reset_ras_error_count(adev);
+ break;
+ default:
+ amdgpu_xgmi_legacy_reset_ras_error_count(adev);
+ break;
+ }
}
static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
@@ -948,7 +1355,12 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
uint32_t field_array_size = 0;
if (is_xgmi_pcs) {
- if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
+ if (amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 1, 0) ||
+ amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 4, 0) ||
+ amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 4, 1)) {
pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
} else {
@@ -982,11 +1394,11 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
return 0;
}
-static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
- void *ras_error_status)
+static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
- int i;
+ int i, supported = 1;
uint32_t data, mask_data = 0;
uint32_t ue_cnt = 0, ce_cnt = 0;
@@ -1050,42 +1462,146 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
}
break;
default:
- dev_warn(adev->dev, "XGMI RAS error query not supported");
+ supported = 0;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ /* check xgmi3x16 pcs error */
+ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) {
+ data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]);
+ mask_data =
+ RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]);
+ if (data)
+ amdgpu_xgmi_query_pcs_error_status(adev, data,
+ mask_data, &ue_cnt, &ce_cnt, true, true);
+ }
+ break;
+ default:
+ if (!supported)
+ dev_warn(adev->dev, "XGMI RAS error query not supported");
break;
}
- adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
err_data->ue_count += ue_cnt;
err_data->ce_count += ce_cnt;
}
+static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status)
+{
+ const char *error_str;
+ int ext_error_code;
+
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+ error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+ xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+ if (error_str)
+ dev_info(adev->dev, "%s detected\n", error_str);
+
+ switch (ext_error_code) {
+ case 0:
+ return ACA_ERROR_TYPE_UE;
+ case 6:
+ return ACA_ERROR_TYPE_CE;
+ default:
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
+static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info,
+ u64 mca_base, struct ras_err_data *err_data)
+{
+ int xgmi_inst = mcm_info->die_id;
+ u64 status = 0;
+
+ status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS);
+ if (!ACA_REG__STATUS__VAL(status))
+ return;
+
+ switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
+ case ACA_ERROR_TYPE_UE:
+ amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+ break;
+ case ACA_ERROR_TYPE_CE:
+ amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+ break;
+ default:
+ break;
+ }
+
+ WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data)
+{
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = xgmi_inst,
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+ __xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data);
+}
+
+static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ int i;
+
+ for_each_inst(i, adev->aid_mask)
+ xgmi_v6_4_0_query_error_count(adev, i, err_data);
+}
+
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status);
+ break;
+ default:
+ amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status);
+ break;
+ }
+}
+
/* Trigger XGMI/WAFL error */
static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
void *inject_if, uint32_t instance_mask)
{
- int ret = 0;
+ int ret1, ret2;
struct ta_ras_trigger_error_input *block_info =
(struct ta_ras_trigger_error_input *)inject_if;
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
- if (amdgpu_dpm_allow_xgmi_power_down(adev, false))
+ ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW);
+ if (ret1 && ret1 != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to disallow XGMI power down");
- ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
+ ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
if (amdgpu_ras_intr_triggered())
- return ret;
+ return ret2;
- if (amdgpu_dpm_allow_xgmi_power_down(adev, true))
+ ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT);
+ if (ret1 && ret1 != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to allow XGMI power down");
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
dev_warn(adev->dev, "Failed to allow df cstate");
- return ret;
+ return ret2;
}
struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
@@ -1123,3 +1639,157 @@ int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev)
return 0;
}
+
+static void amdgpu_xgmi_reset_on_init_work(struct work_struct *work)
+{
+ struct amdgpu_hive_info *hive =
+ container_of(work, struct amdgpu_hive_info, reset_on_init_work);
+ struct amdgpu_reset_context reset_context;
+ struct amdgpu_device *tmp_adev;
+ struct list_head device_list;
+ int r;
+
+ mutex_lock(&hive->hive_lock);
+
+ INIT_LIST_HEAD(&device_list);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+
+ tmp_adev = list_first_entry(&device_list, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+
+ reset_context.method = AMD_RESET_METHOD_ON_INIT;
+ reset_context.reset_req_dev = tmp_adev;
+ reset_context.hive = hive;
+ reset_context.reset_device_list = &device_list;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ amdgpu_reset_do_xgmi_reset_on_init(&reset_context);
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = amdgpu_ras_init_badpage_info(tmp_adev);
+ if (r && r != -EHWPOISON)
+ dev_err(tmp_adev->dev,
+ "error during bad page data initialization");
+ }
+}
+
+static void amdgpu_xgmi_schedule_reset_on_init(struct amdgpu_hive_info *hive)
+{
+ INIT_WORK(&hive->reset_on_init_work, amdgpu_xgmi_reset_on_init_work);
+ amdgpu_reset_domain_schedule(hive->reset_domain,
+ &hive->reset_on_init_work);
+}
+
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive;
+ bool reset_scheduled;
+ int num_devs;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (!hive)
+ return -EINVAL;
+
+ mutex_lock(&hive->hive_lock);
+ num_devs = atomic_read(&hive->number_devices);
+ reset_scheduled = false;
+ if (num_devs == adev->gmc.xgmi.num_physical_nodes) {
+ amdgpu_xgmi_schedule_reset_on_init(hive);
+ reset_scheduled = true;
+ }
+
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+
+ if (reset_scheduled)
+ flush_work(&hive->reset_on_init_work);
+
+ return 0;
+}
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode)
+{
+ struct amdgpu_device *tmp_adev;
+ int cur_nps_mode, r;
+
+ /* This is expected to be called only during unload of driver. The
+ * request needs to be placed only once for all devices in the hive. If
+ * one of them fail, revert the request for previous successful devices.
+ * After placing the request, make hive mode as UNKNOWN so that other
+ * devices don't request anymore.
+ */
+ mutex_lock(&hive->hive_lock);
+ if (atomic_read(&hive->requested_nps_mode) ==
+ UNKNOWN_MEMORY_PARTITION_MODE) {
+ dev_dbg(adev->dev, "Unexpected entry for hive NPS change");
+ mutex_unlock(&hive->hive_lock);
+ return 0;
+ }
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, req_nps_mode);
+ if (r)
+ break;
+ }
+ if (r) {
+ /* Request back current mode if one of the requests failed */
+ cur_nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);
+ list_for_each_entry_continue_reverse(
+ tmp_adev, &hive->device_list, gmc.xgmi.head)
+ adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, cur_nps_mode);
+ }
+ /* Set to UNKNOWN so that other devices don't request anymore */
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+ mutex_unlock(&hive->hive_lock);
+
+ return r;
+}
+
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev)
+{
+ return (amdgpu_use_xgmi_p2p && adev != bo_adev &&
+ adev->gmc.xgmi.hive_id &&
+ adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
+}
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.xgmi.supported)
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ /* 25 GT/s */
+ adev->gmc.xgmi.max_speed = 25;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ /* 32 GT/s */
+ adev->gmc.xgmi.max_speed = 32;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ default:
+ break;
+ }
+}
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+ uint16_t max_speed, uint8_t max_width)
+{
+ adev->gmc.xgmi.max_speed = max_speed;
+ adev->gmc.xgmi.max_width = max_width;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 86fbf56938f4..5f36aff17e79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -23,7 +23,6 @@
#define __AMDGPU_XGMI_H__
#include <drm/task_barrier.h>
-#include "amdgpu_psp.h"
#include "amdgpu_ras.h"
struct amdgpu_hive_info {
@@ -43,7 +42,10 @@ struct amdgpu_hive_info {
} pstate;
struct amdgpu_reset_domain *reset_domain;
- uint32_t device_remove_count;
+ atomic_t ras_recovery;
+ struct ras_event_manager event_mgr;
+ struct work_struct reset_on_init_work;
+ atomic_t requested_nps_mode;
};
struct amdgpu_pcs_ras_field {
@@ -52,27 +54,80 @@ struct amdgpu_pcs_ras_field {
uint32_t pcs_err_shift;
};
-extern struct amdgpu_xgmi_ras xgmi_ras;
+/**
+ * Bandwidth range reporting comes in two modes.
+ *
+ * PER_LINK - range for any xgmi link
+ * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer
+ */
+enum amdgpu_xgmi_bw_mode {
+ AMDGPU_XGMI_BW_MODE_PER_LINK = 0,
+ AMDGPU_XGMI_BW_MODE_PER_PEER
+};
+
+enum amdgpu_xgmi_bw_unit {
+ AMDGPU_XGMI_BW_UNIT_GBYTES = 0,
+ AMDGPU_XGMI_BW_UNIT_MBYTES
+};
+
+struct amdgpu_xgmi_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+extern struct amdgpu_xgmi_ras xgmi_ras;
+
+struct amdgpu_xgmi {
+ /* from psp */
+ u64 node_id;
+ u64 hive_id;
+ /* fixed per family */
+ u64 node_segment_size;
+ /* physical node (0-3) */
+ unsigned physical_node_id;
+ /* number of nodes (0-4) */
+ unsigned num_physical_nodes;
+ /* gpu list in the same hive */
+ struct list_head head;
+ bool supported;
+ struct ras_common_if *ras_if;
+ bool connected_to_cpu;
+ struct amdgpu_xgmi_ras *ras;
+ uint16_t max_speed;
+ uint8_t max_width;
+};
+
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
-int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev);
-int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw);
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
uint64_t addr);
-static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev)
-{
- return (amdgpu_use_xgmi_p2p &&
- adev != bo_adev &&
- adev->gmc.xgmi.hive_id &&
- adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
-}
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev);
int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode);
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
+ int global_link_num);
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num);
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev);
+uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+ uint16_t max_speed, uint8_t max_width);
+
+/* Cleanup macro for use with __free(xgmi_put_hive) */
+DEFINE_FREE(xgmi_put_hive, struct amdgpu_hive_info *, if (_T) amdgpu_put_xgmi_hive(_T))
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 104a5ad8397d..3a79ed7d8031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -28,17 +28,21 @@
#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
-
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2
+#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64
/*
* layout
- * 0 64KB 65KB 66KB
- * | VBIOS | PF2VF | VF2PF | Bad Page | ...
- * | 64KB | 1KB | 1KB |
+ * 0 64KB 65KB 66KB 68KB 132KB
+ * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 64KB | 1KB | 1KB | 2KB | 64KB | ...
*/
+
#define AMD_SRIOV_MSG_SIZE_KB 1
#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
/*
* PF2VF history log:
@@ -86,28 +90,62 @@ enum amd_sriov_ucode_engine_id {
union amd_sriov_msg_feature_flags {
struct {
- uint32_t error_log_collect : 1;
- uint32_t host_load_ucodes : 1;
- uint32_t host_flr_vramlost : 1;
- uint32_t mm_bw_management : 1;
- uint32_t pp_one_vf_mode : 1;
- uint32_t reg_indirect_acc : 1;
- uint32_t av1_support : 1;
- uint32_t reserved : 25;
+ uint32_t error_log_collect : 1;
+ uint32_t host_load_ucodes : 1;
+ uint32_t host_flr_vramlost : 1;
+ uint32_t mm_bw_management : 1;
+ uint32_t pp_one_vf_mode : 1;
+ uint32_t reg_indirect_acc : 1;
+ uint32_t av1_support : 1;
+ uint32_t vcn_rb_decouple : 1;
+ uint32_t mes_info_dump_enable : 1;
+ uint32_t ras_caps : 1;
+ uint32_t ras_telemetry : 1;
+ uint32_t ras_cper : 1;
+ uint32_t reserved : 20;
} flags;
uint32_t all;
};
union amd_sriov_reg_access_flags {
struct {
- uint32_t vf_reg_access_ih : 1;
- uint32_t vf_reg_access_mmhub : 1;
- uint32_t vf_reg_access_gc : 1;
- uint32_t reserved : 29;
+ uint32_t vf_reg_access_ih : 1;
+ uint32_t vf_reg_access_mmhub : 1;
+ uint32_t vf_reg_access_gc : 1;
+ uint32_t vf_reg_access_l1_tlb_cntl : 1;
+ uint32_t vf_reg_access_sq_config : 1;
+ uint32_t reserved : 27;
} flags;
uint32_t all;
};
+union amd_sriov_ras_caps {
+ struct {
+ uint64_t block_umc : 1;
+ uint64_t block_sdma : 1;
+ uint64_t block_gfx : 1;
+ uint64_t block_mmhub : 1;
+ uint64_t block_athub : 1;
+ uint64_t block_pcie_bif : 1;
+ uint64_t block_hdp : 1;
+ uint64_t block_xgmi_wafl : 1;
+ uint64_t block_df : 1;
+ uint64_t block_smn : 1;
+ uint64_t block_sem : 1;
+ uint64_t block_mp0 : 1;
+ uint64_t block_mp1 : 1;
+ uint64_t block_fuse : 1;
+ uint64_t block_mca : 1;
+ uint64_t block_vcn : 1;
+ uint64_t block_jpeg : 1;
+ uint64_t block_ih : 1;
+ uint64_t block_mpio : 1;
+ uint64_t poison_propogation_mode : 1;
+ uint64_t reserved : 44;
+ } bits;
+ uint64_t all;
+};
+
union amd_sriov_msg_os_info {
struct {
uint32_t windows : 1;
@@ -156,7 +194,7 @@ struct amd_sriov_msg_pf2vf_info_header {
uint32_t reserved[2];
};
-#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48)
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55)
struct amd_sriov_msg_pf2vf_info {
/* header contains size and version */
struct amd_sriov_msg_pf2vf_info_header header;
@@ -207,9 +245,17 @@ struct amd_sriov_msg_pf2vf_info {
struct amd_sriov_msg_uuid_info uuid_info;
/* PCIE atomic ops support flag */
uint32_t pcie_atomic_ops_support_flags;
+ /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
+ uint32_t gpu_capacity;
+ /* vf bdf on host pci tree for debug only */
+ uint32_t bdf_on_host;
+ uint32_t more_bp; //Reserved for future use.
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+
/* reserved */
uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
-};
+} __packed;
struct amd_sriov_msg_vf2pf_info_header {
/* the total structure size in byte */
@@ -220,7 +266,7 @@ struct amd_sriov_msg_vf2pf_info_header {
uint32_t reserved[2];
};
-#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70)
+#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73)
struct amd_sriov_msg_vf2pf_info {
/* header contains size and version */
struct amd_sriov_msg_vf2pf_info_header header;
@@ -264,10 +310,12 @@ struct amd_sriov_msg_vf2pf_info {
uint32_t version;
} ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
uint64_t dummy_page_addr;
-
+ /* FB allocated for guest MES to record UQ info */
+ uint64_t mes_info_addr;
+ uint32_t mes_info_size;
/* reserved */
uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
-};
+} __packed;
/* mailbox message send from guest to host */
enum amd_sriov_mailbox_request_message {
@@ -277,22 +325,98 @@ enum amd_sriov_mailbox_request_message {
MB_REQ_MSG_REL_GPU_FINI_ACCESS,
MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
MB_REQ_MSG_REQ_GPU_INIT_DATA,
+ MB_REQ_MSG_PSP_VF_CMD_RELAY,
MB_REQ_MSG_LOG_VF_ERROR = 200,
+ MB_REQ_MSG_READY_TO_RESET = 201,
+ MB_REQ_MSG_RAS_POISON = 202,
+ MB_REQ_RAS_ERROR_COUNT = 203,
+ MB_REQ_RAS_CPER_DUMP = 204,
+ MB_REQ_RAS_BAD_PAGES = 205,
};
/* mailbox message send from host to guest */
enum amd_sriov_mailbox_response_message {
- MB_RES_MSG_CLR_MSG_BUF = 0,
- MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
- MB_RES_MSG_FLR_NOTIFICATION,
- MB_RES_MSG_FLR_NOTIFICATION_COMPLETION,
- MB_RES_MSG_SUCCESS,
- MB_RES_MSG_FAIL,
- MB_RES_MSG_QUERY_ALIVE,
- MB_RES_MSG_GPU_INIT_DATA_READY,
-
- MB_RES_MSG_TEXT_MESSAGE = 255
+ MB_RES_MSG_CLR_MSG_BUF = 0,
+ MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
+ MB_RES_MSG_FLR_NOTIFICATION = 2,
+ MB_RES_MSG_FLR_NOTIFICATION_COMPLETION = 3,
+ MB_RES_MSG_SUCCESS = 4,
+ MB_RES_MSG_FAIL = 5,
+ MB_RES_MSG_QUERY_ALIVE = 6,
+ MB_RES_MSG_GPU_INIT_DATA_READY = 7,
+ MB_RES_MSG_RAS_POISON_READY = 8,
+ MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9,
+ MB_RES_MSG_GPU_RMA = 10,
+ MB_RES_MSG_RAS_ERROR_COUNT_READY = 11,
+ MB_REQ_RAS_CPER_DUMP_READY = 14,
+ MB_RES_MSG_RAS_BAD_PAGES_READY = 15,
+ MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16,
+ MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17,
+ MB_RES_MSG_TEXT_MESSAGE = 255
+};
+
+enum amd_sriov_ras_telemetry_gpu_block {
+ RAS_TELEMETRY_GPU_BLOCK_UMC = 0,
+ RAS_TELEMETRY_GPU_BLOCK_SDMA = 1,
+ RAS_TELEMETRY_GPU_BLOCK_GFX = 2,
+ RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3,
+ RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4,
+ RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5,
+ RAS_TELEMETRY_GPU_BLOCK_HDP = 6,
+ RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7,
+ RAS_TELEMETRY_GPU_BLOCK_DF = 8,
+ RAS_TELEMETRY_GPU_BLOCK_SMN = 9,
+ RAS_TELEMETRY_GPU_BLOCK_SEM = 10,
+ RAS_TELEMETRY_GPU_BLOCK_MP0 = 11,
+ RAS_TELEMETRY_GPU_BLOCK_MP1 = 12,
+ RAS_TELEMETRY_GPU_BLOCK_FUSE = 13,
+ RAS_TELEMETRY_GPU_BLOCK_MCA = 14,
+ RAS_TELEMETRY_GPU_BLOCK_VCN = 15,
+ RAS_TELEMETRY_GPU_BLOCK_JPEG = 16,
+ RAS_TELEMETRY_GPU_BLOCK_IH = 17,
+ RAS_TELEMETRY_GPU_BLOCK_MPIO = 18,
+ RAS_TELEMETRY_GPU_BLOCK_COUNT = 19,
+};
+
+struct amd_sriov_ras_telemetry_header {
+ uint32_t checksum;
+ uint32_t used_size;
+ uint32_t reserved[2];
+};
+
+struct amd_sriov_ras_telemetry_error_count {
+ struct {
+ uint32_t ce_count;
+ uint32_t ue_count;
+ uint32_t de_count;
+ uint32_t ce_overflow_count;
+ uint32_t ue_overflow_count;
+ uint32_t de_overflow_count;
+ uint32_t reserved[6];
+ } block[RAS_TELEMETRY_GPU_BLOCK_COUNT];
+};
+
+struct amd_sriov_ras_cper_dump {
+ uint32_t more;
+ uint64_t overflow_count;
+ uint64_t count;
+ uint64_t wptr;
+ uint32_t buf[];
+};
+
+struct amd_sriov_ras_chk_criti {
+ uint32_t hit;
+};
+
+struct amdsriov_ras_telemetry {
+ struct amd_sriov_ras_telemetry_header header;
+
+ union {
+ struct amd_sriov_ras_telemetry_error_count error_count;
+ struct amd_sriov_ras_cper_dump cper_dump;
+ struct amd_sriov_ras_chk_criti chk_criti;
+ } body;
};
/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index d0fc62784e82..811124ff88a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -24,16 +24,16 @@
#include "soc15.h"
#include "soc15_common.h"
+#include "amdgpu_reg_state.h"
#include "amdgpu_xcp.h"
#include "gfx_v9_4_3.h"
#include "gfxhub_v1_2.h"
#include "sdma_v4_4_2.h"
+#include "amdgpu_ip.h"
#define XCP_INST_MASK(num_inst, xcp_id) \
(num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
-#define AMDGPU_XCP_OPS_KFD (1 << 0)
-
void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
{
int i;
@@ -61,222 +61,6 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
}
-static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
- uint32_t inst_idx, struct amdgpu_ring *ring)
-{
- int xcp_id;
- enum AMDGPU_XCP_IP_BLOCK ip_blk;
- uint32_t inst_mask;
-
- ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
- if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
- return;
-
- inst_mask = 1 << inst_idx;
-
- switch (ring->funcs->type) {
- case AMDGPU_HW_IP_GFX:
- case AMDGPU_RING_TYPE_COMPUTE:
- case AMDGPU_RING_TYPE_KIQ:
- ip_blk = AMDGPU_XCP_GFX;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- ip_blk = AMDGPU_XCP_SDMA;
- break;
- case AMDGPU_RING_TYPE_VCN_ENC:
- case AMDGPU_RING_TYPE_VCN_JPEG:
- ip_blk = AMDGPU_XCP_VCN;
- if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
- inst_mask = 1 << (inst_idx * 2);
- break;
- default:
- DRM_ERROR("Not support ring type %d!", ring->funcs->type);
- return;
- }
-
- for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
- if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
- ring->xcp_id = xcp_id;
- break;
- }
- }
-}
-
-static void aqua_vanjaram_xcp_gpu_sched_update(
- struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- unsigned int sel_xcp_id)
-{
- unsigned int *num_gpu_sched;
-
- num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
- .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
- adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
- .sched[(*num_gpu_sched)++] = &ring->sched;
- DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name,
- sel_xcp_id, ring->funcs->type,
- ring->hw_prio, *num_gpu_sched);
-}
-
-static int aqua_vanjaram_xcp_sched_list_update(
- struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring;
- int i;
-
- for (i = 0; i < MAX_XCP; i++) {
- atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
- memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
- }
-
- if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
- return 0;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
- ring = adev->rings[i];
- if (!ring || !ring->sched.ready || ring->no_scheduler)
- continue;
-
- aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
-
- /* VCN is shared by two partitions under CPX MODE */
- if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
- adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
- aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
- }
-
- return 0;
-}
-
-static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->num_rings; i++) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
- ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring);
- else
- aqua_vanjaram_set_xcp_id(adev, ring->me, ring);
- }
-
- return aqua_vanjaram_xcp_sched_list_update(adev);
-}
-
-static int aqua_vanjaram_select_scheds(
- struct amdgpu_device *adev,
- u32 hw_ip,
- u32 hw_prio,
- struct amdgpu_fpriv *fpriv,
- unsigned int *num_scheds,
- struct drm_gpu_scheduler ***scheds)
-{
- u32 sel_xcp_id;
- int i;
-
- if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
- u32 least_ref_cnt = ~0;
-
- fpriv->xcp_id = 0;
- for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
- u32 total_ref_cnt;
-
- total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt);
- if (total_ref_cnt < least_ref_cnt) {
- fpriv->xcp_id = i;
- least_ref_cnt = total_ref_cnt;
- }
- }
- }
- sel_xcp_id = fpriv->xcp_id;
-
- if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
- *num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
- *scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
- atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
- DRM_DEBUG("Selected partition #%d", sel_xcp_id);
- } else {
- DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id);
- return -ENOENT;
- }
-
- return 0;
-}
-
-static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev,
- enum amd_hw_ip_block_type block,
- int8_t inst)
-{
- int8_t dev_inst;
-
- switch (block) {
- case GC_HWIP:
- case SDMA0_HWIP:
- /* Both JPEG and VCN as JPEG is only alias of VCN */
- case VCN_HWIP:
- dev_inst = adev->ip_map.dev_inst[block][inst];
- break;
- default:
- /* For rest of the IPs, no look up required.
- * Assume 'logical instance == physical instance' for all configs. */
- dev_inst = inst;
- break;
- }
-
- return dev_inst;
-}
-
-static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev,
- enum amd_hw_ip_block_type block,
- uint32_t mask)
-{
- uint32_t dev_mask = 0;
- int8_t log_inst, dev_inst;
-
- while (mask) {
- log_inst = ffs(mask) - 1;
- dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst);
- dev_mask |= (1 << dev_inst);
- mask &= ~(1 << log_inst);
- }
-
- return dev_mask;
-}
-
-static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev,
- enum amd_hw_ip_block_type ip_block,
- uint32_t inst_mask)
-{
- int l = 0, i;
-
- while (inst_mask) {
- i = ffs(inst_mask) - 1;
- adev->ip_map.dev_inst[ip_block][l++] = i;
- inst_mask &= ~(1 << i);
- }
- for (; l < HWIP_MAX_INSTANCE; l++)
- adev->ip_map.dev_inst[ip_block][l] = -1;
-}
-
-void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev)
-{
- u32 ip_map[][2] = {
- { GC_HWIP, adev->gfx.xcc_mask },
- { SDMA0_HWIP, adev->sdma.sdma_mask },
- { VCN_HWIP, adev->vcn.inst_mask },
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
- aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
-
- adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst;
- adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask;
-}
-
/* Fixed pattern for smn addressing on different AIDs:
* bit[34]: indicate cross AID access
* bit[33:32]: indicate target AID id
@@ -296,13 +80,59 @@ u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
return ext_offset;
}
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_xcc, num_xcc_per_xcp = 0, mode = 0;
+
+ num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+ if (adev->gfx.funcs->get_xccs_per_xcp)
+ num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
+ if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0))
+ mode = num_xcc / num_xcc_per_xcp;
+
+ if (num_xcc_per_xcp == 1)
+ return AMDGPU_CPX_PARTITION_MODE;
+
+ switch (mode) {
+ case 1:
+ return AMDGPU_SPX_PARTITION_MODE;
+ case 2:
+ return AMDGPU_DPX_PARTITION_MODE;
+ case 3:
+ return AMDGPU_TPX_PARTITION_MODE;
+ case 4:
+ return AMDGPU_QPX_PARTITION_MODE;
+ default:
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ }
+
+ return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
{
- enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+ enum amdgpu_gfx_partition derv_mode,
+ mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
struct amdgpu_device *adev = xcp_mgr->adev;
- if (adev->nbio.funcs->get_compute_partition_mode)
+ derv_mode = __aqua_vanjaram_calc_xcp_mode(xcp_mgr);
+
+ if (amdgpu_sriov_vf(adev))
+ return derv_mode;
+
+ if (adev->nbio.funcs->get_compute_partition_mode) {
mode = adev->nbio.funcs->get_compute_partition_mode(adev);
+ if (mode != derv_mode) {
+ dev_warn(
+ adev->dev,
+ "Mismatch in compute partition mode - reported : %d derived : %d",
+ mode, derv_mode);
+ if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ amdgpu_device_bus_status_check(adev);
+ }
+ }
return mode;
}
@@ -339,38 +169,31 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
struct amdgpu_xcp_ip *ip)
{
struct amdgpu_device *adev = xcp_mgr->adev;
+ int num_sdma, num_vcn, num_shared_vcn, num_xcp;
int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
- int num_sdma, num_vcn;
num_sdma = adev->sdma.num_instances;
num_vcn = adev->vcn.num_vcn_inst;
+ num_shared_vcn = 1;
+
+ num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+ num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp;
switch (xcp_mgr->mode) {
case AMDGPU_SPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma;
- num_vcn_xcp = num_vcn;
- break;
case AMDGPU_DPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma / 2;
- num_vcn_xcp = num_vcn / 2;
- break;
case AMDGPU_TPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma / 3;
- num_vcn_xcp = num_vcn / 3;
- break;
case AMDGPU_QPX_PARTITION_MODE:
- num_sdma_xcp = num_sdma / 4;
- num_vcn_xcp = num_vcn / 4;
- break;
case AMDGPU_CPX_PARTITION_MODE:
- num_sdma_xcp = 2;
- num_vcn_xcp = num_vcn ? 1 : 0;
+ num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp);
+ num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp);
break;
default:
return -EINVAL;
}
- num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+ if (num_vcn && num_xcp > num_vcn)
+ num_shared_vcn = num_xcp / num_vcn;
switch (ip_id) {
case AMDGPU_XCP_GFXHUB:
@@ -386,7 +209,8 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
break;
case AMDGPU_XCP_VCN:
- ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id);
+ ip->inst_mask =
+ XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn);
/* TODO : Assign IP funcs */
break;
default:
@@ -398,6 +222,94 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
return 0;
}
+static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int px_mode, int *num_xcp,
+ uint16_t *nps_modes)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+ if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode)))
+ return -EINVAL;
+
+ switch (px_mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ *num_xcp = 1;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE);
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ *num_xcp = 2;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ *num_xcp = 3;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ *num_xcp = 4;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ if (gc_ver == IP_VERSION(9, 5, 0))
+ *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ *num_xcp = NUM_XCC(adev->gfx.xcc_mask);
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ if (gc_ver == IP_VERSION(9, 5, 0))
+ *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int max_res[AMDGPU_XCP_RES_MAX] = {};
+ bool res_lt_xcp;
+ int num_xcp, i, r;
+ u16 nps_modes;
+
+ if (!(xcp_mgr->supp_xcp_modes & BIT(mode)))
+ return -EINVAL;
+
+ max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask);
+ max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances;
+ max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst;
+ max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst;
+
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, &nps_modes);
+ if (r)
+ return r;
+
+ xcp_cfg->compatible_nps_modes =
+ (adev->gmc.supported_nps_modes & nps_modes);
+ xcp_cfg->num_res = ARRAY_SIZE(max_res);
+
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ res_lt_xcp = max_res[i] < num_xcp;
+ xcp_cfg->xcp_res[i].id = i;
+ xcp_cfg->xcp_res[i].num_inst =
+ res_lt_xcp ? 1 : max_res[i] / num_xcp;
+ xcp_cfg->xcp_res[i].num_inst =
+ i == AMDGPU_XCP_RES_JPEG ?
+ xcp_cfg->xcp_res[i].num_inst *
+ adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst;
+ xcp_cfg->xcp_res[i].num_shared =
+ res_lt_xcp ? num_xcp / max_res[i] : 1;
+ }
+
+ return 0;
+}
+
static enum amdgpu_gfx_partition
__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
{
@@ -414,7 +326,7 @@ __aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
if (adev->gmc.num_mem_partitions == num_xcc / 2)
return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
- AMDGPU_QPX_PARTITION_MODE;
+ AMDGPU_CPX_PARTITION_MODE;
if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
return AMDGPU_DPX_PARTITION_MODE;
@@ -426,27 +338,31 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
enum amdgpu_gfx_partition mode)
{
struct amdgpu_device *adev = xcp_mgr->adev;
- int num_xcc, num_xccs_per_xcp;
+ int num_xcc, num_xccs_per_xcp, r;
+ int num_xcp, nps_mode;
+ u16 supp_nps_modes;
+ bool comp_mode;
+
+ nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp,
+ &supp_nps_modes);
+ if (r)
+ return false;
+ comp_mode = !!(BIT(nps_mode) & supp_nps_modes);
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
- return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
+ return comp_mode && num_xcc > 0;
case AMDGPU_DPX_PARTITION_MODE:
- return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0;
+ return comp_mode && (num_xcc % 4) == 0;
case AMDGPU_TPX_PARTITION_MODE:
- return (adev->gmc.num_mem_partitions == 1 ||
- adev->gmc.num_mem_partitions == 3) &&
- ((num_xcc % 3) == 0);
+ return comp_mode && ((num_xcc % 3) == 0);
case AMDGPU_QPX_PARTITION_MODE:
num_xccs_per_xcp = num_xcc / 4;
- return (adev->gmc.num_mem_partitions == 1 ||
- adev->gmc.num_mem_partitions == 4) &&
- (num_xccs_per_xcp >= 2);
+ return comp_mode && (num_xccs_per_xcp >= 2);
case AMDGPU_CPX_PARTITION_MODE:
- return ((num_xcc > 1) &&
- (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) &&
- (num_xcc % adev->gmc.num_mem_partitions) == 0);
+ return comp_mode && (num_xcc > 1);
default:
return false;
}
@@ -454,31 +370,16 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
return false;
}
-static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
{
- /* TODO:
- * Stop user queues and threads, and make sure GPU is empty of work.
- */
-
- if (flags & AMDGPU_XCP_OPS_KFD)
- amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
-
- return 0;
-}
+ int mode;
-static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
-{
- int ret = 0;
+ xcp_mgr->avail_xcp_modes = 0;
- if (flags & AMDGPU_XCP_OPS_KFD) {
- amdgpu_amdkfd_device_probe(xcp_mgr->adev);
- amdgpu_amdkfd_device_init(xcp_mgr->adev);
- /* If KFD init failed, return failure */
- if (!xcp_mgr->adev->kfd.init_complete)
- ret = -EIO;
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ if (__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
+ xcp_mgr->avail_xcp_modes |= BIT(mode);
}
-
- return ret;
}
static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
@@ -493,6 +394,12 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) {
+ dev_err(adev->dev,
+ "Invalid config, no compatible compute partition mode found, available memory partitions: %d",
+ adev->gmc.num_mem_partitions);
+ return -EINVAL;
+ }
} else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
dev_err(adev->dev,
"Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
@@ -500,7 +407,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
return -EINVAL;
}
- if (adev->kfd.init_complete)
+ if (adev->kfd.init_complete && !amdgpu_in_reset(adev))
flags |= AMDGPU_XCP_OPS_KFD;
if (flags & AMDGPU_XCP_OPS_KFD) {
@@ -509,7 +416,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
goto out;
}
- ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags);
+ ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags);
if (ret)
goto unlock;
@@ -522,7 +429,9 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
*num_xcps = num_xcc / num_xcc_per_xcp;
amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
- ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
+ ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags);
+ if (!ret)
+ __aqua_vanjaram_update_available_partition_mode(xcp_mgr);
unlock:
if (flags & AMDGPU_XCP_OPS_KFD)
amdgpu_amdkfd_unlock_kfd(adev);
@@ -601,20 +510,23 @@ struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
.switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
.query_partition_mode = &aqua_vanjaram_query_partition_mode,
.get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+ .get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info,
.get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
- .select_scheds = &aqua_vanjaram_select_scheds,
- .update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list
};
static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
{
int ret;
+ if (amdgpu_sriov_vf(adev))
+ aqua_vanjaram_xcp_funcs.switch_partition_mode = NULL;
+
ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
&aqua_vanjaram_xcp_funcs);
if (ret)
return ret;
+ amdgpu_xcp_update_supported_modes(adev->xcp_mgr);
/* TODO: Default memory node affinity init */
return ret;
@@ -622,7 +534,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
{
- u32 mask, inst_mask = adev->sdma.sdma_mask;
+ u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask;
int ret, i;
/* generally 1 AID supports 4 instances */
@@ -634,7 +546,9 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
- if ((inst_mask & mask) == mask)
+ avail_inst = inst_mask & mask;
+ if (avail_inst == mask || avail_inst == 0x3 ||
+ avail_inst == 0xc)
adev->aid_mask |= (1 << i);
}
@@ -652,7 +566,420 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
if (ret)
return ret;
- aqua_vanjaram_ip_map_init(adev);
+ amdgpu_ip_map_init(adev);
return 0;
}
+
+static void aqua_read_smn(struct amdgpu_device *adev,
+ struct amdgpu_smn_reg_data *regdata,
+ uint64_t smn_addr)
+{
+ regdata->addr = smn_addr;
+ regdata->value = RREG32_PCIE(smn_addr);
+}
+
+struct aqua_reg_list {
+ uint64_t start_addr;
+ uint32_t num_regs;
+ uint32_t incrx;
+};
+
+#define DW_ADDR_INCR 4
+
+static void aqua_read_smn_ext(struct amdgpu_device *adev,
+ struct amdgpu_smn_reg_data *regdata,
+ uint64_t smn_addr, int i)
+{
+ regdata->addr =
+ smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+ regdata->value = RREG32_PCIE_EXT(regdata->addr);
+}
+
+#define smnreg_0x1A340218 0x1A340218
+#define smnreg_0x1A3402E4 0x1A3402E4
+#define smnreg_0x1A340294 0x1A340294
+#define smreg_0x1A380088 0x1A380088
+
+#define NUM_PCIE_SMN_REGS 14
+
+static struct aqua_reg_list pcie_reg_addrs[] = {
+ { smnreg_0x1A340218, 1, 0 },
+ { smnreg_0x1A3402E4, 1, 0 },
+ { smnreg_0x1A340294, 6, DW_ADDR_INCR },
+ { smreg_0x1A380088, 6, DW_ADDR_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_pcie_v1_0 *pcie_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ struct pci_dev *us_pdev, *ds_pdev;
+ int aer_cap, r, n;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf;
+
+ szbuf = sizeof(*pcie_reg_state) +
+ amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS);
+ /* Only one instance of pcie regs */
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf +
+ sizeof(*pcie_reg_state));
+ pcie_regs->inst_header.instance = 0;
+ pcie_regs->inst_header.state = AMDGPU_INST_S_OK;
+ pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS;
+
+ reg_data = pcie_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) {
+ start_addr = pcie_reg_addrs[r].start_addr;
+ incrx = pcie_reg_addrs[r].incrx;
+ num_regs = pcie_reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn(adev, reg_data, start_addr + n * incrx);
+ ++reg_data;
+ }
+ }
+
+ ds_pdev = pci_upstream_bridge(adev->pdev);
+ us_pdev = pci_upstream_bridge(ds_pdev);
+
+ pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA,
+ &pcie_regs->device_status);
+ pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA,
+ &pcie_regs->link_status);
+
+ aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR);
+ if (aer_cap) {
+ pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS,
+ &pcie_regs->pcie_corr_err_status);
+ pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS,
+ &pcie_regs->pcie_uncorr_err_status);
+ }
+
+ pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS,
+ &pcie_regs->sub_bus_number_latency);
+
+ pcie_reg_state->common_header.structure_size = szbuf;
+ pcie_reg_state->common_header.format_revision = 1;
+ pcie_reg_state->common_header.content_revision = 0;
+ pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE;
+ pcie_reg_state->common_header.num_instances = 1;
+
+ return pcie_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11A00050 0x11A00050
+#define smnreg_0x11A00180 0x11A00180
+#define smnreg_0x11A00070 0x11A00070
+#define smnreg_0x11A00200 0x11A00200
+#define smnreg_0x11A0020C 0x11A0020C
+#define smnreg_0x11A00210 0x11A00210
+#define smnreg_0x11A00108 0x11A00108
+
+#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_XGMI_SMN_REGS 25
+
+static struct aqua_reg_list xgmi_reg_addrs[] = {
+ { smnreg_0x11A00050, 1, 0 },
+ { smnreg_0x11A00180, 16, DW_ADDR_INCR },
+ { smnreg_0x11A00070, 4, DW_ADDR_INCR },
+ { smnreg_0x11A00200, 1, 0 },
+ { smnreg_0x11A0020C, 1, 0 },
+ { smnreg_0x11A00210, 1, 0 },
+ { smnreg_0x11A00108, 1, 0 },
+};
+
+static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_xgmi_v1_0 *xgmi_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_xgmi_instances = 8;
+ int inst = 0, i, j, r, n;
+ const int xgmi_inst = 2;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf;
+
+ szbuf = sizeof(*xgmi_reg_state) +
+ amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs),
+ NUM_XGMI_SMN_REGS);
+ /* Only one instance of pcie regs */
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &xgmi_reg_state->xgmi_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ for (j = 0; j < xgmi_inst; ++j) {
+ xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p;
+ xgmi_regs->inst_header.instance = inst++;
+
+ xgmi_regs->inst_header.state = AMDGPU_INST_S_OK;
+ xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS;
+
+ reg_data = xgmi_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) {
+ start_addr = xgmi_reg_addrs[r].start_addr;
+ incrx = xgmi_reg_addrs[r].incrx;
+ num_regs = xgmi_reg_addrs[r].num_regs;
+
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(
+ adev, reg_data,
+ XGMI_LINK_REG(start_addr, j) +
+ n * incrx,
+ i);
+ ++reg_data;
+ }
+ }
+ p = reg_data;
+ }
+ }
+
+ xgmi_reg_state->common_header.structure_size = szbuf;
+ xgmi_reg_state->common_header.format_revision = 1;
+ xgmi_reg_state->common_header.content_revision = 0;
+ xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI;
+ xgmi_reg_state->common_header.num_instances = max_xgmi_instances;
+
+ return xgmi_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11C00070 0x11C00070
+#define smnreg_0x11C00210 0x11C00210
+
+static struct aqua_reg_list wafl_reg_addrs[] = {
+ { smnreg_0x11C00070, 4, DW_ADDR_INCR },
+ { smnreg_0x11C00210, 1, 0 },
+};
+
+#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_WAFL_SMN_REGS 5
+
+static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size)
+{
+ struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state;
+ uint32_t start_addr, incrx, num_regs, szbuf;
+ struct amdgpu_regs_wafl_v1_0 *wafl_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_wafl_instances = 8;
+ int inst = 0, i, j, r, n;
+ const int wafl_inst = 2;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf;
+
+ szbuf = sizeof(*wafl_reg_state) +
+ amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs),
+ NUM_WAFL_SMN_REGS);
+
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &wafl_reg_state->wafl_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ for (j = 0; j < wafl_inst; ++j) {
+ wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p;
+ wafl_regs->inst_header.instance = inst++;
+
+ wafl_regs->inst_header.state = AMDGPU_INST_S_OK;
+ wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS;
+
+ reg_data = wafl_regs->smn_reg_values;
+
+ for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) {
+ start_addr = wafl_reg_addrs[r].start_addr;
+ incrx = wafl_reg_addrs[r].incrx;
+ num_regs = wafl_reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(
+ adev, reg_data,
+ WAFL_LINK_REG(start_addr, j) +
+ n * incrx,
+ i);
+ ++reg_data;
+ }
+ }
+ p = reg_data;
+ }
+ }
+
+ wafl_reg_state->common_header.structure_size = szbuf;
+ wafl_reg_state->common_header.format_revision = 1;
+ wafl_reg_state->common_header.content_revision = 0;
+ wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL;
+ wafl_reg_state->common_header.num_instances = max_wafl_instances;
+
+ return wafl_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x1B311060 0x1B311060
+#define smnreg_0x1B411060 0x1B411060
+#define smnreg_0x1B511060 0x1B511060
+#define smnreg_0x1B611060 0x1B611060
+
+#define smnreg_0x1C307120 0x1C307120
+#define smnreg_0x1C317120 0x1C317120
+
+#define smnreg_0x1C320830 0x1C320830
+#define smnreg_0x1C380830 0x1C380830
+#define smnreg_0x1C3D0830 0x1C3D0830
+#define smnreg_0x1C420830 0x1C420830
+
+#define smnreg_0x1C320100 0x1C320100
+#define smnreg_0x1C380100 0x1C380100
+#define smnreg_0x1C3D0100 0x1C3D0100
+#define smnreg_0x1C420100 0x1C420100
+
+#define smnreg_0x1B310500 0x1B310500
+#define smnreg_0x1C300400 0x1C300400
+
+#define USR_CAKE_INCR 0x11000
+#define USR_LINK_INCR 0x100000
+#define USR_CP_INCR 0x10000
+
+#define NUM_USR_SMN_REGS 20
+
+struct aqua_reg_list usr_reg_addrs[] = {
+ { smnreg_0x1B311060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B411060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B511060, 4, DW_ADDR_INCR },
+ { smnreg_0x1B611060, 4, DW_ADDR_INCR },
+ { smnreg_0x1C307120, 2, DW_ADDR_INCR },
+ { smnreg_0x1C317120, 2, DW_ADDR_INCR },
+};
+
+#define NUM_USR1_SMN_REGS 46
+struct aqua_reg_list usr1_reg_addrs[] = {
+ { smnreg_0x1C320830, 6, USR_CAKE_INCR },
+ { smnreg_0x1C380830, 5, USR_CAKE_INCR },
+ { smnreg_0x1C3D0830, 5, USR_CAKE_INCR },
+ { smnreg_0x1C420830, 4, USR_CAKE_INCR },
+ { smnreg_0x1C320100, 6, USR_CAKE_INCR },
+ { smnreg_0x1C380100, 5, USR_CAKE_INCR },
+ { smnreg_0x1C3D0100, 5, USR_CAKE_INCR },
+ { smnreg_0x1C420100, 4, USR_CAKE_INCR },
+ { smnreg_0x1B310500, 4, USR_LINK_INCR },
+ { smnreg_0x1C300400, 2, USR_CP_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev,
+ void *buf, size_t max_size,
+ int reg_state)
+{
+ uint32_t start_addr, incrx, num_regs, szbuf, num_smn;
+ struct amdgpu_reg_state_usr_v1_0 *usr_reg_state;
+ struct amdgpu_regs_usr_v1_0 *usr_regs;
+ struct amdgpu_smn_reg_data *reg_data;
+ const int max_usr_instances = 4;
+ struct aqua_reg_list *reg_addrs;
+ int inst = 0, i, n, r, arr_size;
+ void *p;
+
+ if (!buf || !max_size)
+ return -EINVAL;
+
+ switch (reg_state) {
+ case AMDGPU_REG_STATE_TYPE_USR:
+ arr_size = ARRAY_SIZE(usr_reg_addrs);
+ reg_addrs = usr_reg_addrs;
+ num_smn = NUM_USR_SMN_REGS;
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR_1:
+ arr_size = ARRAY_SIZE(usr1_reg_addrs);
+ reg_addrs = usr1_reg_addrs;
+ num_smn = NUM_USR1_SMN_REGS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf;
+
+ szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances,
+ sizeof(*usr_regs),
+ num_smn);
+ if (max_size < szbuf)
+ return -EOVERFLOW;
+
+ p = &usr_reg_state->usr_state_regs[0];
+ for_each_inst(i, adev->aid_mask) {
+ usr_regs = (struct amdgpu_regs_usr_v1_0 *)p;
+ usr_regs->inst_header.instance = inst++;
+ usr_regs->inst_header.state = AMDGPU_INST_S_OK;
+ usr_regs->inst_header.num_smn_regs = num_smn;
+ reg_data = usr_regs->smn_reg_values;
+
+ for (r = 0; r < arr_size; r++) {
+ start_addr = reg_addrs[r].start_addr;
+ incrx = reg_addrs[r].incrx;
+ num_regs = reg_addrs[r].num_regs;
+ for (n = 0; n < num_regs; n++) {
+ aqua_read_smn_ext(adev, reg_data,
+ start_addr + n * incrx, i);
+ reg_data++;
+ }
+ }
+ p = reg_data;
+ }
+
+ usr_reg_state->common_header.structure_size = szbuf;
+ usr_reg_state->common_header.format_revision = 1;
+ usr_reg_state->common_header.content_revision = 0;
+ usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR;
+ usr_reg_state->common_header.num_instances = max_usr_instances;
+
+ return usr_reg_state->common_header.structure_size;
+}
+
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size)
+{
+ ssize_t size;
+
+ switch (reg_state) {
+ case AMDGPU_REG_STATE_TYPE_PCIE:
+ size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_XGMI:
+ size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_WAFL:
+ size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR:
+ size = aqua_vanjaram_read_usr_state(adev, buf, max_size,
+ AMDGPU_REG_STATE_TYPE_USR);
+ break;
+ case AMDGPU_REG_STATE_TYPE_USR_1:
+ size = aqua_vanjaram_read_usr_state(
+ adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
index a13c443ea10f..42f4e163e251 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -68,7 +68,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index a9521c98e7f7..5a122f50a6e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -77,7 +77,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
case IP_VERSION(1, 3, 1):
case IP_VERSION(2, 0, 0):
case IP_VERSION(2, 0, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
index 78508ae6a670..e143fcc46148 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
@@ -70,7 +70,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index f0e235f98afb..d1bba9c64e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -30,16 +30,21 @@
#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+#define regATHUB_MISC_CNTL_V3_3_0 0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0
static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
{
uint32_t data;
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
case IP_VERSION(3, 0, 1):
data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
break;
+ case IP_VERSION(3, 3, 0):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+ break;
default:
data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
break;
@@ -49,10 +54,13 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
{
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
case IP_VERSION(3, 0, 1):
WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
break;
+ case IP_VERSION(3, 3, 0):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+ break;
default:
WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
break;
@@ -99,10 +107,11 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 3, 0):
athub_v3_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
athub_v3_0_update_medium_grain_light_sleep(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
new file mode 100644
index 000000000000..8a0773b80864
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v4_1_0.h"
+#include "athub/athub_4_1_0_offset.h"
+#include "athub/athub_4_1_0_sh_mask.h"
+#include "soc15_common.h"
+
+static uint32_t athub_v4_1_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ default:
+ data = 0;
+ break;
+ }
+ return data;
+}
+
+static void athub_v4_1_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+athub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ athub_v4_1_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ athub_v4_1_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = athub_v4_1_0_get_cg_cntl(adev);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
new file mode 100644
index 000000000000..4d18d0998fa8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V4_1_0_H__
+#define __ATHUB_V4_1_0_H__
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 9f63ddb89b75..7a063e44d429 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -27,7 +27,7 @@
#include <linux/slab.h>
#include <linux/string_helpers.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <drm/drm_util.h>
@@ -62,6 +62,7 @@
typedef struct {
struct atom_context *ctx;
uint32_t *ps, *ws;
+ int ps_size, ws_size;
int ps_shift;
uint16_t start;
unsigned last_jump;
@@ -70,8 +71,8 @@ typedef struct {
} atom_exec_context;
int amdgpu_atom_debug;
-static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params);
-int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
static uint32_t atom_arg_mask[8] =
{ 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
@@ -223,7 +224,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr)++;
/* get_unaligned_le32 avoids unaligned accesses from atombios
* tables, noticed on a DEC Alpha. */
- val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+ if (idx < ctx->ps_size)
+ val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+ else
+ pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
if (print)
DEBUG("PS[0x%02X,0x%04X]", idx, val);
break;
@@ -261,7 +265,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
val = gctx->reg_block;
break;
default:
- val = ctx->ws[idx];
+ if (idx < ctx->ws_size)
+ val = ctx->ws[idx];
+ else
+ pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
}
break;
case ATOM_ARG_ID:
@@ -294,7 +301,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr) += 4;
if (print)
DEBUG("IMM 0x%08X\n", val);
- return val;
+ break;
case ATOM_SRC_WORD0:
case ATOM_SRC_WORD8:
case ATOM_SRC_WORD16:
@@ -302,7 +309,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr) += 2;
if (print)
DEBUG("IMM 0x%04X\n", val);
- return val;
+ break;
case ATOM_SRC_BYTE0:
case ATOM_SRC_BYTE8:
case ATOM_SRC_BYTE16:
@@ -311,9 +318,9 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
(*ptr)++;
if (print)
DEBUG("IMM 0x%02X\n", val);
- return val;
+ break;
}
- return 0;
+ return val;
case ATOM_ARG_PLL:
idx = U8(*ptr);
(*ptr)++;
@@ -395,7 +402,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
(*ptr)++;
return;
}
- return;
}
}
@@ -496,6 +502,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
idx = U8(*ptr);
(*ptr)++;
DEBUG("PS[0x%02X]", idx);
+ if (idx >= ctx->ps_size) {
+ pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
+ return;
+ }
ctx->ps[idx] = cpu_to_le32(val);
break;
case ATOM_ARG_WS:
@@ -528,6 +538,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
gctx->reg_block = val;
break;
default:
+ if (idx >= ctx->ws_size) {
+ pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
+ return;
+ }
ctx->ws[idx] = val;
}
break;
@@ -625,7 +639,7 @@ static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
else
SDEBUG(" table: %d\n", idx);
if (U16(ctx->ctx->cmd_table + 4 + 2 * idx))
- r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift);
+ r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift);
if (r) {
ctx->abort = true;
}
@@ -1204,7 +1218,7 @@ static struct {
atom_op_div32, ATOM_ARG_WS},
};
-static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params)
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size)
{
int base = CU16(ctx->cmd_table + 4 + 2 * index);
int len, ws, ps, ptr;
@@ -1226,12 +1240,21 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
ectx.ps_shift = ps / 4;
ectx.start = base;
ectx.ps = params;
+ ectx.ps_size = params_size;
ectx.abort = false;
ectx.last_jump = 0;
- if (ws)
+ ectx.last_jump_jiffies = 0;
+ if (ws) {
ectx.ws = kcalloc(4, ws, GFP_KERNEL);
- else
+ if (!ectx.ws) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ ectx.ws_size = ws;
+ } else {
ectx.ws = NULL;
+ ectx.ws_size = 0;
+ }
debug_depth++;
while (1) {
@@ -1265,7 +1288,7 @@ free:
return ret;
}
-int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params)
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size)
{
int r;
@@ -1281,7 +1304,7 @@ int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *par
/* reset divmul */
ctx->divmul[0] = 0;
ctx->divmul[1] = 0;
- r = amdgpu_atom_execute_table_locked(ctx, index, params);
+ r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size);
mutex_unlock(&ctx->mutex);
return r;
}
@@ -1425,6 +1448,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
if (vbios_str == NULL)
vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
}
+ OPTIMIZER_HIDE_VAR(vbios_str);
if (vbios_str != NULL && *vbios_str == 0)
vbios_str++;
@@ -1444,10 +1468,27 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
static void atom_get_vbios_version(struct atom_context *ctx)
{
+ unsigned short start = 3, end;
unsigned char *vbios_ver;
+ unsigned char *p_rom;
+
+ p_rom = ctx->bios;
+ /* Search from strings offset if it's present */
+ start = *(unsigned short *)(p_rom +
+ OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+ /* Search till atom rom header start point */
+ end = *(unsigned short *)(p_rom + OFFSET_TO_ATOM_ROM_HEADER_POINTER);
+
+ /* Use hardcoded offsets, if the offsets are not populated */
+ if (end <= start) {
+ start = 3;
+ end = 1024;
+ }
/* find anchor ATOMBIOSBK-AMD */
- vbios_ver = atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, 3, 1024, 64);
+ vbios_ver =
+ atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, start, end, 64);
if (vbios_ver != NULL) {
/* skip ATOMBIOSBK-AMD VER */
vbios_ver += 18;
@@ -1457,6 +1498,28 @@ static void atom_get_vbios_version(struct atom_context *ctx)
}
}
+static void atom_get_vbios_build(struct atom_context *ctx)
+{
+ unsigned char *atom_rom_hdr;
+ unsigned char *str;
+ uint16_t base, len;
+
+ base = CU16(ATOM_ROM_TABLE_PTR);
+ atom_rom_hdr = CSTR(base);
+
+ str = CSTR(CU16(base + ATOM_ROM_CFG_PTR));
+ /* Skip config string */
+ while (str < atom_rom_hdr && *str++)
+ ;
+ /* Skip change list string */
+ while (str < atom_rom_hdr && *str++)
+ ;
+
+ len = min(atom_rom_hdr - str, STRLEN_NORMAL);
+ if (len)
+ strscpy(ctx->build_num, str, len);
+}
+
struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
{
int base;
@@ -1517,6 +1580,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
atom_get_vbios_pn(ctx);
atom_get_vbios_date(ctx);
atom_get_vbios_version(ctx);
+ atom_get_vbios_build(ctx);
return ctx;
}
@@ -1536,7 +1600,7 @@ int amdgpu_atom_asic_init(struct atom_context *ctx)
if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT))
return 1;
- ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps);
+ ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index c11cf18a0f18..825ff28731f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -37,6 +37,7 @@ struct drm_device;
#define ATOM_ROM_MAGIC "ATOM"
#define ATOM_ROM_MAGIC_PTR 4
+#define ATOM_ROM_CFG_PTR 0xC
#define ATOM_ROM_MSG_PTR 0x10
#define ATOM_ROM_CMD_PTR 0x1E
#define ATOM_ROM_DATA_PTR 0x20
@@ -151,12 +152,13 @@ struct atom_context {
uint32_t version;
uint8_t vbios_ver_str[STRLEN_NORMAL];
uint8_t date[STRLEN_NORMAL];
+ uint8_t build_num[STRLEN_NORMAL];
};
extern int amdgpu_atom_debug;
struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios);
-int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
int amdgpu_atom_asic_init(struct atom_context *ctx);
void amdgpu_atom_destroy(struct atom_context *ctx);
bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size,
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 10098fdd33fc..3dfc28840a7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -77,7 +77,7 @@ void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border);
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
@@ -106,7 +106,7 @@ void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
args.ucEnable = ATOM_SCALER_DISABLE;
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
@@ -123,7 +123,7 @@ void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucEnable = lock;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
@@ -139,7 +139,7 @@ void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucEnable = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
@@ -155,7 +155,7 @@ void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
args.ucCRTC = amdgpu_crtc->crtc_id;
args.ucBlanking = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
@@ -171,7 +171,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
args.ucDispPipeId = amdgpu_crtc->crtc_id;
args.ucEnable = state;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
@@ -183,7 +183,7 @@ void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
args.ucEnable = ATOM_INIT;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
@@ -228,7 +228,7 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
args.susModeMiscInfo.usAccess = cpu_to_le16(misc);
args.ucCRTC = amdgpu_crtc->crtc_id;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union atom_enable_ss {
@@ -293,7 +293,7 @@ static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev,
args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
args.v3.ucEnable = enable;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union adjust_pixel_clock {
@@ -395,7 +395,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
ADJUST_DISPLAY_CONFIG_SS_ENABLE;
amdgpu_atom_execute_table(adev->mode_info.atom_context,
- index, (uint32_t *)&args);
+ index, (uint32_t *)&args, sizeof(args));
adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
break;
case 3:
@@ -428,7 +428,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
args.v3.sInput.ucExtTransmitterID = 0;
amdgpu_atom_execute_table(adev->mode_info.atom_context,
- index, (uint32_t *)&args);
+ index, (uint32_t *)&args, sizeof(args));
adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
if (args.v3.sOutput.ucRefDiv) {
amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
@@ -514,7 +514,7 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
DRM_ERROR("Unknown table version %d %d\n", frev, crev);
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
union set_dce_clock {
@@ -544,7 +544,7 @@ u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */
args.v2_1.asParam.ucDCEClkType = clk_type;
args.v2_1.asParam.ucDCEClkSrc = clk_src;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10;
break;
default:
@@ -740,7 +740,7 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 87c41e0e9b7c..492813ab1b54 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -83,7 +83,7 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan,
args.v2.ucDelay = delay / 10;
args.v2.ucHPD_ID = chan->rec.hpd;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
*ack = args.v2.ucReplyStatus;
@@ -301,7 +301,7 @@ static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev,
args.ucLaneNum = lane_num;
args.ucStatus = 0;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
return args.ucStatus;
}
@@ -430,7 +430,7 @@ void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
}
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *dig_connector;
@@ -458,8 +458,8 @@ bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connect
u8 link_status[DP_LINK_STATUS_SIZE];
struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
- if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, link_status)
- <= 0)
+ if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux,
+ link_status) < 0)
return false;
if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
return false;
@@ -616,7 +616,7 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
@@ -681,7 +681,7 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
index f59d85eaddf0..3e24acf8133f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
@@ -32,7 +32,7 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
const struct drm_display_mode *mode);
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode);
+ const struct drm_display_mode *mode);
bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector);
void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
u8 power_state);
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index d95b2dc78063..a51f3414b65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -28,6 +28,7 @@
#include <acpi/video.h>
+#include <drm/drm_edid.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
@@ -214,7 +215,7 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode
dig->bl_dev = bd;
bd->props.brightness = amdgpu_atombios_encoder_get_backlight_brightness(bd);
- bd->props.power = FB_BLANK_UNBLANK;
+ bd->props.power = BACKLIGHT_POWER_ON;
backlight_update_status(bd);
DRM_INFO("amdgpu atom DIG backlight initialized\n");
@@ -228,7 +229,6 @@ error:
register_acpi_backlight:
/* Try registering an ACPI video backlight device instead. */
acpi_video_register_backlight();
- return;
}
void
@@ -335,7 +335,7 @@ amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action)
args.ucDacStandard = ATOM_DAC1_PS2;
args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
@@ -432,7 +432,7 @@ amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action)
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
@@ -732,7 +732,7 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
@@ -1136,7 +1136,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
break;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
bool
@@ -1164,7 +1164,7 @@ amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
args.v1.ucAction = action;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
/* wait for the panel to power up */
if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) {
@@ -1288,7 +1288,7 @@ amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder,
DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
static void
@@ -1633,7 +1633,7 @@ amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder)
return;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
/* This only needs to be called once at startup */
@@ -1706,7 +1706,7 @@ amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder,
args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
}
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
return true;
} else
@@ -2064,27 +2064,25 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder)
case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record;
if (fake_edid_record->ucFakeEDIDLength) {
- struct edid *edid;
- int edid_size =
- max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength);
- edid = kmalloc(edid_size, GFP_KERNEL);
- if (edid) {
- memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0],
- fake_edid_record->ucFakeEDIDLength);
-
- if (drm_edid_is_valid(edid)) {
- adev->mode_info.bios_hardcoded_edid = edid;
- adev->mode_info.bios_hardcoded_edid_size = edid_size;
- } else
- kfree(edid);
- }
+ const struct drm_edid *edid;
+ int edid_size;
+
+ if (fake_edid_record->ucFakeEDIDLength == 128)
+ edid_size = fake_edid_record->ucFakeEDIDLength;
+ else
+ edid_size = fake_edid_record->ucFakeEDIDLength * 128;
+ edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size);
+ if (drm_edid_valid(edid))
+ adev->mode_info.bios_hardcoded_edid = edid;
+ else
+ drm_edid_free(edid);
+ record += struct_size(fake_edid_record,
+ ucFakeEDIDString,
+ edid_size);
+ } else {
+ /* empty fake edid record must be 3 bytes long */
+ record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
}
- record += fake_edid_record->ucFakeEDIDLength ?
- struct_size(fake_edid_record,
- ucFakeEDIDString,
- fake_edid_record->ucFakeEDIDLength) :
- /* empty fake edid record must be 3 bytes long */
- sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
break;
case LCD_PANEL_RESOLUTION_RECORD_TYPE:
panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index af0335535f82..a6501114322f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -86,7 +86,7 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
args.ucSlaveAddr = slave_addr << 1;
args.ucLineNumber = chan->rec.i2c_id;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
/* error */
if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
@@ -172,5 +172,5 @@ void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr
args.ucSlaveAddr = slave_addr;
args.ucLineNumber = line_number;
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+ amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index e63abdf52b6c..9cd63b4177bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1375,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+static int cik_asic_supports_baco(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_BONAIRE:
case CHIP_HAWAII:
return amdgpu_dpm_is_baco_supported(adev);
default:
- return false;
+ return 0;
}
}
@@ -1638,28 +1638,18 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
PCI_EXP_LNKCTL_HAWD);
/* linkctl2 */
- pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (bridge_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(root,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (gpu_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- tmp16);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1674,16 +1664,15 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
-
+ tmp16 = 0;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
@@ -1709,10 +1698,6 @@ static void cik_program_aspm(struct amdgpu_device *adev)
if (pci_is_root_bus(adev->pdev->bus))
return;
- /* XXX double check APUs */
- if (adev->flags & AMD_IS_APU)
- return;
-
orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) |
@@ -2000,9 +1985,9 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.query_video_codecs = &cik_query_video_codecs,
};
-static int cik_common_early_init(void *handle)
+static int cik_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &cik_smc_rreg;
adev->smc_wreg = &cik_smc_wreg;
@@ -2139,19 +2124,9 @@ static int cik_common_early_init(void *handle)
return 0;
}
-static int cik_common_sw_init(void *handle)
+static int cik_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static int cik_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int cik_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* move the golden regs per IP block */
cik_init_golden_registers(adev);
@@ -2163,48 +2138,36 @@ static int cik_common_hw_init(void *handle)
return 0;
}
-static int cik_common_hw_fini(void *handle)
+static int cik_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int cik_common_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_fini(adev);
-}
-
-static int cik_common_resume(void *handle)
+static int cik_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_init(adev);
+ return cik_common_hw_init(ip_block);
}
-static bool cik_common_is_idle(void *handle)
+static bool cik_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int cik_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-static int cik_common_soft_reset(void *handle)
+
+static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX hard reset?? */
return 0;
}
-static int cik_common_set_clockgating_state(void *handle,
+static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_common_set_powergating_state(void *handle,
+static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2213,15 +2176,10 @@ static int cik_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_common_ip_funcs = {
.name = "cik_common",
.early_init = cik_common_early_init,
- .late_init = NULL,
- .sw_init = cik_common_sw_init,
- .sw_fini = cik_common_sw_fini,
.hw_init = cik_common_hw_init,
.hw_fini = cik_common_hw_fini,
- .suspend = cik_common_suspend,
.resume = cik_common_resume,
.is_idle = cik_common_is_idle,
- .wait_for_idle = cik_common_wait_for_idle,
.soft_reset = cik_common_soft_reset,
.set_clockgating_state = cik_common_set_clockgating_state,
.set_powergating_state = cik_common_set_powergating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 6f7c031dd197..41f4705bdbbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(mmIH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(mmIH_RB_CNTL, tmp);
}
return (wptr & ih->ptr_mask);
}
@@ -277,9 +283,9 @@ static void cik_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cik_ih_early_init(void *handle)
+static int cik_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -291,10 +297,10 @@ static int cik_ih_early_init(void *handle)
return 0;
}
-static int cik_ih_sw_init(void *handle)
+static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -305,9 +311,9 @@ static int cik_ih_sw_init(void *handle)
return r;
}
-static int cik_ih_sw_fini(void *handle)
+static int cik_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -315,39 +321,33 @@ static int cik_ih_sw_fini(void *handle)
return 0;
}
-static int cik_ih_hw_init(void *handle)
+static int cik_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return cik_ih_irq_init(adev);
}
-static int cik_ih_hw_fini(void *handle)
+static int cik_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cik_ih_irq_disable(adev);
+ cik_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cik_ih_suspend(void *handle)
+static int cik_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_fini(adev);
+ return cik_ih_hw_fini(ip_block);
}
-static int cik_ih_resume(void *handle)
+static int cik_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_init(adev);
+ return cik_ih_hw_init(ip_block);
}
-static bool cik_ih_is_idle(void *handle)
+static bool cik_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -356,11 +356,11 @@ static bool cik_ih_is_idle(void *handle)
return true;
}
-static int cik_ih_wait_for_idle(void *handle)
+static int cik_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -372,9 +372,9 @@ static int cik_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cik_ih_soft_reset(void *handle)
+static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -402,13 +402,13 @@ static int cik_ih_soft_reset(void *handle)
return 0;
}
-static int cik_ih_set_clockgating_state(void *handle,
+static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_ih_set_powergating_state(void *handle,
+static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -417,7 +417,6 @@ static int cik_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_ih_ip_funcs = {
.name = "cik_ih",
.early_init = cik_ih_early_init,
- .late_init = NULL,
.sw_init = cik_ih_sw_init,
.sw_fini = cik_ih_sw_fini,
.hw_init = cik_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 52598fbc9b39..9e8715b4739d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -54,7 +54,9 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
-static int cik_sdma_soft_reset(void *handle);
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
+
+u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
@@ -67,9 +69,6 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
-u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
-
-
static void cik_sdma_free_microcode(struct amdgpu_device *adev)
{
int i;
@@ -107,7 +106,6 @@ static void cik_sdma_free_microcode(struct amdgpu_device *adev)
static int cik_sdma_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
DRM_DEBUG("\n");
@@ -133,16 +131,20 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
}
out:
if (err) {
- pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name);
+ pr_err("cik_sdma: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
@@ -308,8 +310,6 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK;
@@ -498,9 +498,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -700,7 +697,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -922,12 +919,17 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
}
}
-static int cik_sdma_early_init(void *handle)
+static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+ r = cik_sdma_init_microcode(adev);
+ if (r)
+ return r;
+
cik_sdma_set_ring_funcs(adev);
cik_sdma_set_irq_funcs(adev);
cik_sdma_set_buffer_funcs(adev);
@@ -936,18 +938,12 @@ static int cik_sdma_early_init(void *handle)
return 0;
}
-static int cik_sdma_sw_init(void *handle)
+static int cik_sdma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, i;
- r = cik_sdma_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
&adev->sdma.trap_irq);
@@ -982,9 +978,9 @@ static int cik_sdma_sw_init(void *handle)
return r;
}
-static int cik_sdma_sw_fini(void *handle)
+static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -994,21 +990,16 @@ static int cik_sdma_sw_fini(void *handle)
return 0;
}
-static int cik_sdma_hw_init(void *handle)
+static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = cik_sdma_start(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
- return r;
+ return cik_sdma_start(adev);
}
-static int cik_sdma_hw_fini(void *handle)
+static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cik_ctx_switch_enable(adev, false);
cik_sdma_enable(adev, false);
@@ -1016,25 +1007,21 @@ static int cik_sdma_hw_fini(void *handle)
return 0;
}
-static int cik_sdma_suspend(void *handle)
+static int cik_sdma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_sdma_hw_fini(adev);
+ return cik_sdma_hw_fini(ip_block);
}
-static int cik_sdma_resume(void *handle)
+static int cik_sdma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ cik_sdma_soft_reset(ip_block);
- cik_sdma_soft_reset(handle);
-
- return cik_sdma_hw_init(adev);
+ return cik_sdma_hw_init(ip_block);
}
-static bool cik_sdma_is_idle(void *handle)
+static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1044,27 +1031,23 @@ static bool cik_sdma_is_idle(void *handle)
return true;
}
-static int cik_sdma_wait_for_idle(void *handle)
+static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
- SRBM_STATUS2__SDMA1_BUSY_MASK);
-
- if (!tmp)
+ if (cik_sdma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int cik_sdma_soft_reset(void *handle)
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp;
/* sdma0 */
@@ -1198,11 +1181,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int cik_sdma_set_clockgating_state(void *handle,
+static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1213,7 +1196,7 @@ static int cik_sdma_set_clockgating_state(void *handle,
return 0;
}
-static int cik_sdma_set_powergating_state(void *handle,
+static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1222,7 +1205,6 @@ static int cik_sdma_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_sdma_ip_funcs = {
.name = "cik_sdma",
.early_init = cik_sdma_early_init,
- .late_init = NULL,
.sw_init = cik_sdma_sw_init,
.sw_fini = cik_sdma_sw_fini,
.hw_init = cik_sdma_hw_init,
@@ -1296,7 +1278,7 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (CIK).
* Used by the amdgpu ttm implementation to move pages if
@@ -1306,7 +1288,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
ib->ptr[ib->length_dw++] = byte_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 55982c0064b5..8aca4f2734f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -51,8 +51,14 @@
#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
-#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
-#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
@@ -364,6 +370,7 @@
* 1 - Stream
* 2 - Bypass
*/
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
* 1 - send low 32bit data
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
new file mode 100644
index 000000000000..2f6c9d11d5ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX12_H_
+#define __CLEARSTATE_GFX12_H_
+
+static const unsigned int gfx12_SECT_CONTEXT_def_1[] = {
+0x00000000, //mmSC_MEM_TEMPORAL
+0x00000000, //mmSC_MEM_SPEC_READ
+0x00000000, //mmPA_SC_VPORT_0_TL
+0x00000000, //mmPA_SC_VPORT_0_BR
+0x00000000, //mmPA_SC_VPORT_1_TL
+0x00000000, //mmPA_SC_VPORT_1_BR
+0x00000000, //mmPA_SC_VPORT_2_TL
+0x00000000, //mmPA_SC_VPORT_2_BR
+0x00000000, //mmPA_SC_VPORT_3_TL
+0x00000000, //mmPA_SC_VPORT_3_BR
+0x00000000, //mmPA_SC_VPORT_4_TL
+0x00000000, //mmPA_SC_VPORT_4_BR
+0x00000000, //mmPA_SC_VPORT_5_TL
+0x00000000, //mmPA_SC_VPORT_5_BR
+0x00000000, //mmPA_SC_VPORT_6_TL
+0x00000000, //mmPA_SC_VPORT_6_BR
+0x00000000, //mmPA_SC_VPORT_7_TL
+0x00000000, //mmPA_SC_VPORT_7_BR
+0x00000000, //mmPA_SC_VPORT_8_TL
+0x00000000, //mmPA_SC_VPORT_8_BR
+0x00000000, //mmPA_SC_VPORT_9_TL
+0x00000000, //mmPA_SC_VPORT_9_BR
+0x00000000, //mmPA_SC_VPORT_10_TL
+0x00000000, //mmPA_SC_VPORT_10_BR
+0x00000000, //mmPA_SC_VPORT_11_TL
+0x00000000, //mmPA_SC_VPORT_11_BR
+0x00000000, //mmPA_SC_VPORT_12_TL
+0x00000000, //mmPA_SC_VPORT_12_BR
+0x00000000, //mmPA_SC_VPORT_13_TL
+0x00000000, //mmPA_SC_VPORT_13_BR
+0x00000000, //mmPA_SC_VPORT_14_TL
+0x00000000, //mmPA_SC_VPORT_14_BR
+0x00000000, //mmPA_SC_VPORT_15_TL
+0x00000000, //mmPA_SC_VPORT_15_BR
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_2[] = {
+0x00000000, //mmPA_CL_PROG_NEAR_CLIP_Z
+0x00000000, //mmPA_RATE_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_3[] = {
+0x00000000, //mmCP_PERFMON_CNTX_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_4[] = {
+0x00000000, //mmCONTEXT_RESERVED_REG0
+0x00000000, //mmCONTEXT_RESERVED_REG1
+0x00000000, //mmPA_SC_CLIPRECT_0_EXT
+0x00000000, //mmPA_SC_CLIPRECT_1_EXT
+0x00000000, //mmPA_SC_CLIPRECT_2_EXT
+0x00000000, //mmPA_SC_CLIPRECT_3_EXT
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_5[] = {
+0x00000000, //mmPA_SC_HIZ_INFO
+0x00000000, //mmPA_SC_HIS_INFO
+0x00000000, //mmPA_SC_HIZ_BASE
+0x00000000, //mmPA_SC_HIZ_BASE_EXT
+0x00000000, //mmPA_SC_HIZ_SIZE_XY
+0x00000000, //mmPA_SC_HIS_BASE
+0x00000000, //mmPA_SC_HIS_BASE_EXT
+0x00000000, //mmPA_SC_HIS_SIZE_XY
+0x00000000, //mmPA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+0x00000000, //mmPA_SC_BINNER_DYNAMIC_BATCH_LIMIT
+0x00000000, //mmPA_SC_HISZ_CONTROL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_6[] = {
+0x00000000, //mmCB_MEM0_INFO
+0x00000000, //mmCB_MEM1_INFO
+0x00000000, //mmCB_MEM2_INFO
+0x00000000, //mmCB_MEM3_INFO
+0x00000000, //mmCB_MEM4_INFO
+0x00000000, //mmCB_MEM5_INFO
+0x00000000, //mmCB_MEM6_INFO
+0x00000000, //mmCB_MEM7_INFO
+};
+
+static const struct cs_extent_def gfx12_SECT_CONTEXT_defs[] = {
+ {gfx12_SECT_CONTEXT_def_1, 0x0000a03e, 34 },
+ {gfx12_SECT_CONTEXT_def_2, 0x0000a0cc, 2 },
+ {gfx12_SECT_CONTEXT_def_3, 0x0000a0d8, 1 },
+ {gfx12_SECT_CONTEXT_def_4, 0x0000a0db, 6 },
+ {gfx12_SECT_CONTEXT_def_5, 0x0000a2e5, 11 },
+ {gfx12_SECT_CONTEXT_def_6, 0x0000a3c0, 8 },
+ { 0, 0, 0 }
+};
+
+static const struct cs_section_def gfx12_cs_data[] = {
+ { gfx12_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX12_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
index 567a904804bc..9c85ca6358c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
@@ -21,8 +21,7 @@
*
*/
-static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_1[] = {
0x00000000, // DB_RENDER_CONTROL
0x00000000, // DB_COUNT_CONTROL
0x00000000, // DB_DEPTH_VIEW
@@ -236,8 +235,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] =
0x00000000, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
};
-static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_2[] = {
0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
0x00000000, // PA_SC_TILE_STEERING_OVERRIDE
0x00000000, // CP_PERFMON_CNTX_CNTL
@@ -521,15 +519,13 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] =
0x00000000, // CB_MRT6_EPITCH
0x00000000, // CB_MRT7_EPITCH
};
-static const unsigned int gfx9_SECT_CONTEXT_def_3[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_3[] = {
0x00000000, // PA_CL_POINT_X_RAD
0x00000000, // PA_CL_POINT_Y_RAD
0x00000000, // PA_CL_POINT_SIZE
0x00000000, // PA_CL_POINT_CULL_RAD
};
-static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_4[] = {
0x00000000, // DB_DEPTH_CONTROL
0x00000000, // DB_EQAA
0x00000000, // CB_COLOR_CONTROL
@@ -688,17 +684,14 @@ static const unsigned int gfx9_SECT_CONTEXT_def_4[] =
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0x00000000, // IA_ENHANCE
};
-static const unsigned int gfx9_SECT_CONTEXT_def_5[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_5[] = {
0x00000000, // WD_ENHANCE
0x00000000, // VGT_PRIMITIVEID_EN
};
-static const unsigned int gfx9_SECT_CONTEXT_def_6[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_6[] = {
0x00000000, // VGT_PRIMITIVEID_RESET
};
-static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_7[] = {
0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
0, // HOLE
@@ -766,8 +759,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] =
0x00000000, // VGT_STRMOUT_CONFIG
0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
};
-static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
-{
+static const unsigned int gfx9_SECT_CONTEXT_def_8[] = {
0x00000000, // PA_SC_CENTROID_PRIORITY_0
0x00000000, // PA_SC_CENTROID_PRIORITY_1
0x00001000, // PA_SC_LINE_CNTL
@@ -924,8 +916,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_8[] =
0x00000000, // CB_COLOR7_DCC_BASE
0x00000000, // CB_COLOR7_DCC_BASE_EXT
};
-static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] =
-{
+static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = {
{gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 },
{gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 },
{gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
index 66e39cdb5cb0..5fd96ddd7f0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
@@ -21,8 +21,7 @@
*
*/
-static const u32 si_SECT_CONTEXT_def_1[] =
-{
+static const u32 si_SECT_CONTEXT_def_1[] = {
0x00000000, // DB_RENDER_CONTROL
0x00000000, // DB_COUNT_CONTROL
0x00000000, // DB_DEPTH_VIEW
@@ -236,8 +235,7 @@ static const u32 si_SECT_CONTEXT_def_1[] =
0x00000000, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
};
-static const u32 si_SECT_CONTEXT_def_2[] =
-{
+static const u32 si_SECT_CONTEXT_def_2[] = {
0x00000000, // CP_PERFMON_CNTX_CNTL
0x00000000, // CP_RINGID
0x00000000, // CP_VMID
@@ -511,8 +509,7 @@ static const u32 si_SECT_CONTEXT_def_2[] =
0x00000000, // CB_BLEND6_CONTROL
0x00000000, // CB_BLEND7_CONTROL
};
-static const u32 si_SECT_CONTEXT_def_3[] =
-{
+static const u32 si_SECT_CONTEXT_def_3[] = {
0x00000000, // PA_CL_POINT_X_RAD
0x00000000, // PA_CL_POINT_Y_RAD
0x00000000, // PA_CL_POINT_SIZE
@@ -520,8 +517,7 @@ static const u32 si_SECT_CONTEXT_def_3[] =
0x00000000, // VGT_DMA_BASE_HI
0x00000000, // VGT_DMA_BASE
};
-static const u32 si_SECT_CONTEXT_def_4[] =
-{
+static const u32 si_SECT_CONTEXT_def_4[] = {
0x00000000, // DB_DEPTH_CONTROL
0x00000000, // DB_EQAA
0x00000000, // CB_COLOR_CONTROL
@@ -680,16 +676,13 @@ static const u32 si_SECT_CONTEXT_def_4[] =
0x00000000, // VGT_GS_OUT_PRIM_TYPE
0x00000000, // IA_ENHANCE
};
-static const u32 si_SECT_CONTEXT_def_5[] =
-{
+static const u32 si_SECT_CONTEXT_def_5[] = {
0x00000000, // VGT_PRIMITIVEID_EN
};
-static const u32 si_SECT_CONTEXT_def_6[] =
-{
+static const u32 si_SECT_CONTEXT_def_6[] = {
0x00000000, // VGT_PRIMITIVEID_RESET
};
-static const u32 si_SECT_CONTEXT_def_7[] =
-{
+static const u32 si_SECT_CONTEXT_def_7[] = {
0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
0, // HOLE
0, // HOLE
@@ -924,8 +917,7 @@ static const u32 si_SECT_CONTEXT_def_7[] =
0x00000000, // CB_COLOR7_CLEAR_WORD0
0x00000000, // CB_COLOR7_CLEAR_WORD1
};
-static const struct cs_extent_def si_SECT_CONTEXT_defs[] =
-{
+static const struct cs_extent_def si_SECT_CONTEXT_defs[] = {
{si_SECT_CONTEXT_def_1, 0x0000a000, 212 },
{si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
{si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
new file mode 100644
index 000000000000..96616a865aac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "cyan_skillfish_ip_offset.h"
+
+int cyan_skillfish_reg_base_init(struct amdgpu_device *adev)
+{
+ /* HW has more IP blocks, only initialized the blocke needed by driver */
+ uint32_t i;
+
+ adev->gfx.xcc_mask = 1;
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index b8c47e0cf37a..2f891fb846d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
@@ -269,9 +274,9 @@ static void cz_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cz_ih_early_init(void *handle)
+static int cz_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -283,10 +288,10 @@ static int cz_ih_early_init(void *handle)
return 0;
}
-static int cz_ih_sw_init(void *handle)
+static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -297,9 +302,9 @@ static int cz_ih_sw_init(void *handle)
return r;
}
-static int cz_ih_sw_fini(void *handle)
+static int cz_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -307,10 +312,10 @@ static int cz_ih_sw_fini(void *handle)
return 0;
}
-static int cz_ih_hw_init(void *handle)
+static int cz_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = cz_ih_irq_init(adev);
if (r)
@@ -319,32 +324,26 @@ static int cz_ih_hw_init(void *handle)
return 0;
}
-static int cz_ih_hw_fini(void *handle)
+static int cz_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cz_ih_irq_disable(adev);
+ cz_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cz_ih_suspend(void *handle)
+static int cz_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_fini(adev);
+ return cz_ih_hw_fini(ip_block);
}
-static int cz_ih_resume(void *handle)
+static int cz_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_init(adev);
+ return cz_ih_hw_init(ip_block);
}
-static bool cz_ih_is_idle(void *handle)
+static bool cz_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -353,11 +352,11 @@ static bool cz_ih_is_idle(void *handle)
return true;
}
-static int cz_ih_wait_for_idle(void *handle)
+static int cz_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -369,10 +368,10 @@ static int cz_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cz_ih_soft_reset(void *handle)
+static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -399,14 +398,14 @@ static int cz_ih_soft_reset(void *handle)
return 0;
}
-static int cz_ih_set_clockgating_state(void *handle,
+static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
// TODO
return 0;
}
-static int cz_ih_set_powergating_state(void *handle,
+static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
// TODO
@@ -416,7 +415,6 @@ static int cz_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cz_ih_ip_funcs = {
.name = "cz_ih",
.early_init = cz_ih_early_init,
- .late_init = NULL,
.sw_init = cz_ih_sw_init,
.sw_fini = cz_ih_sw_fini,
.hw_init = cz_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 584cd5277f92..72ca6538b2e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -21,6 +21,7 @@
*
*/
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_modeset_helper.h>
#include <drm/drm_modeset_helper_vtables.h>
@@ -51,6 +52,7 @@
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd);
static const u32 crtc_offsets[] = {
CRTC0_REGISTER_OFFSET,
@@ -363,6 +365,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+ dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq,
amdgpu_connector->hpd.hpd);
@@ -1036,7 +1039,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
@@ -1066,7 +1069,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
wm_high.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v10_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v10_0_latency_watermark(&wm_high), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1105,7 +1108,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for low clocks */
- latency_watermark_b = min(dce_v10_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v10_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1138,8 +1141,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
+
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
@@ -1296,7 +1298,7 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1366,7 +1368,7 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -1459,17 +1461,12 @@ static int dce_v10_0_audio_init(struct amdgpu_device *adev)
static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1878,6 +1875,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -2398,6 +2396,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2682,6 +2681,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v10_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v10_0_panic_flush,
+};
+
static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2729,13 +2754,14 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v10_0_early_init(void *handle)
+static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg;
@@ -2760,10 +2786,10 @@ static int dce_v10_0_early_init(void *handle)
return 0;
}
-static int dce_v10_0_sw_init(void *handle)
+static int dce_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2839,11 +2865,11 @@ static int dce_v10_0_sw_init(void *handle)
return 0;
}
-static int dce_v10_0_sw_fini(void *handle)
+static int dce_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2857,10 +2883,10 @@ static int dce_v10_0_sw_fini(void *handle)
return 0;
}
-static int dce_v10_0_hw_init(void *handle)
+static int dce_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_init_golden_registers(adev);
@@ -2882,10 +2908,10 @@ static int dce_v10_0_hw_init(void *handle)
return 0;
}
-static int dce_v10_0_hw_fini(void *handle)
+static int dce_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_hpd_fini(adev);
@@ -2900,9 +2926,9 @@ static int dce_v10_0_hw_fini(void *handle)
return 0;
}
-static int dce_v10_0_suspend(void *handle)
+static int dce_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2912,18 +2938,18 @@ static int dce_v10_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v10_0_hw_fini(handle);
+ return dce_v10_0_hw_fini(ip_block);
}
-static int dce_v10_0_resume(void *handle)
+static int dce_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v10_0_hw_init(handle);
+ ret = dce_v10_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2938,27 +2964,22 @@ static int dce_v10_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v10_0_is_idle(void *handle)
+static bool dce_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v10_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static bool dce_v10_0_check_soft_reset(void *handle)
+static bool dce_v10_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return dce_v10_0_is_display_hung(adev);
}
-static int dce_v10_0_soft_reset(void *handle)
+static int dce_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v10_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -3048,7 +3069,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
@@ -3200,7 +3221,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return;
}
@@ -3302,13 +3323,13 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v10_0_set_clockgating_state(void *handle,
+static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v10_0_set_powergating_state(void *handle,
+static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3317,7 +3338,6 @@ static int dce_v10_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.name = "dce_v10_0",
.early_init = dce_v10_0_early_init,
- .late_init = NULL,
.sw_init = dce_v10_0_sw_init,
.sw_fini = dce_v10_0_sw_fini,
.hw_init = dce_v10_0_hw_init,
@@ -3325,7 +3345,6 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.suspend = dce_v10_0_suspend,
.resume = dce_v10_0_resume,
.is_idle = dce_v10_0_is_idle,
- .wait_for_idle = dce_v10_0_wait_for_idle,
.check_soft_reset = dce_v10_0_check_soft_reset,
.soft_reset = dce_v10_0_soft_reset,
.set_clockgating_state = dce_v10_0_set_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
deleted file mode 100644
index c14b70350a51..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ /dev/null
@@ -1,3799 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drm_fourcc.h>
-#include <drm/drm_modeset_helper.h>
-#include <drm/drm_modeset_helper_vtables.h>
-#include <drm/drm_vblank.h>
-
-#include "amdgpu.h"
-#include "amdgpu_pm.h"
-#include "amdgpu_i2c.h"
-#include "vid.h"
-#include "atom.h"
-#include "amdgpu_atombios.h"
-#include "atombios_crtc.h"
-#include "atombios_encoders.h"
-#include "amdgpu_pll.h"
-#include "amdgpu_connectors.h"
-#include "amdgpu_display.h"
-#include "dce_v11_0.h"
-
-#include "dce/dce_11_0_d.h"
-#include "dce/dce_11_0_sh_mask.h"
-#include "dce/dce_11_0_enum.h"
-#include "oss/oss_3_0_d.h"
-#include "oss/oss_3_0_sh_mask.h"
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
-
-#include "ivsrcid/ivsrcid_vislands30.h"
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
-
-static const u32 crtc_offsets[] =
-{
- CRTC0_REGISTER_OFFSET,
- CRTC1_REGISTER_OFFSET,
- CRTC2_REGISTER_OFFSET,
- CRTC3_REGISTER_OFFSET,
- CRTC4_REGISTER_OFFSET,
- CRTC5_REGISTER_OFFSET,
- CRTC6_REGISTER_OFFSET
-};
-
-static const u32 hpd_offsets[] =
-{
- HPD0_REGISTER_OFFSET,
- HPD1_REGISTER_OFFSET,
- HPD2_REGISTER_OFFSET,
- HPD3_REGISTER_OFFSET,
- HPD4_REGISTER_OFFSET,
- HPD5_REGISTER_OFFSET
-};
-
-static const uint32_t dig_offsets[] = {
- DIG0_REGISTER_OFFSET,
- DIG1_REGISTER_OFFSET,
- DIG2_REGISTER_OFFSET,
- DIG3_REGISTER_OFFSET,
- DIG4_REGISTER_OFFSET,
- DIG5_REGISTER_OFFSET,
- DIG6_REGISTER_OFFSET,
- DIG7_REGISTER_OFFSET,
- DIG8_REGISTER_OFFSET
-};
-
-static const struct {
- uint32_t reg;
- uint32_t vblank;
- uint32_t vline;
- uint32_t hpd;
-
-} interrupt_status_offsets[] = { {
- .reg = mmDISP_INTERRUPT_STATUS,
- .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
-}, {
- .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
- .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
- .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
- .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
-} };
-
-static const u32 cz_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14300000,
-};
-
-static const u32 cz_mgcg_cgcg_init[] =
-{
- mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
- mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
-};
-
-static const u32 stoney_golden_settings_a11[] =
-{
- mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
- mmFBC_MISC, 0x1f311fff, 0x14302000,
-};
-
-static const u32 polaris11_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_DEBUG1, 0xffffffff, 0x00000008,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static const u32 polaris10_golden_settings_a11[] =
-{
- mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
- mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
- mmFBC_MISC, 0x9f313fff, 0x14302008,
- mmHDMI_CONTROL, 0x313f031f, 0x00000011,
-};
-
-static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
-{
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- amdgpu_device_program_register_sequence(adev,
- cz_mgcg_cgcg_init,
- ARRAY_SIZE(cz_mgcg_cgcg_init));
- amdgpu_device_program_register_sequence(adev,
- cz_golden_settings_a11,
- ARRAY_SIZE(cz_golden_settings_a11));
- break;
- case CHIP_STONEY:
- amdgpu_device_program_register_sequence(adev,
- stoney_golden_settings_a11,
- ARRAY_SIZE(stoney_golden_settings_a11));
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- amdgpu_device_program_register_sequence(adev,
- polaris11_golden_settings_a11,
- ARRAY_SIZE(polaris11_golden_settings_a11));
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- amdgpu_device_program_register_sequence(adev,
- polaris10_golden_settings_a11,
- ARRAY_SIZE(polaris10_golden_settings_a11));
- break;
- default:
- break;
- }
-}
-
-static u32 dce_v11_0_audio_endpt_rreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-
- return r;
-}
-
-static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
- u32 block_offset, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
- WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
- spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
-}
-
-static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
-{
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
- return 0;
- else
- return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
-}
-
-static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Enable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_get(adev, &adev->pageflip_irq, i);
-}
-
-static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
-{
- unsigned i;
-
- /* Disable pflip interrupts */
- for (i = 0; i < adev->mode_info.num_crtc; i++)
- amdgpu_irq_put(adev, &adev->pageflip_irq, i);
-}
-
-/**
- * dce_v11_0_page_flip - pageflip callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc_id: crtc to cleanup pageflip on
- * @crtc_base: new address of the crtc (GPU MC address)
- * @async: asynchronous flip
- *
- * Triggers the actual pageflip by updating the primary
- * surface base address.
- */
-static void dce_v11_0_page_flip(struct amdgpu_device *adev,
- int crtc_id, u64 crtc_base, bool async)
-{
- struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
- struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
- u32 tmp;
-
- /* flip immediate for async, default is vsync */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* update pitch */
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
- fb->pitches[0] / fb->format->cpp[0]);
- /* update the scanout addresses */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(crtc_base));
- /* writing to the low address triggers the update */
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(crtc_base));
- /* post the write */
- RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
-}
-
-static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
- u32 *vbl, u32 *position)
-{
- if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
- return -EINVAL;
-
- *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
- *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- return 0;
-}
-
-/**
- * dce_v11_0_hpd_sense - hpd sense callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Checks if a digital monitor is connected (evergreen+).
- * Returns true if connected, false if not connected.
- */
-static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- bool connected = false;
-
- if (hpd >= adev->mode_info.num_hpd)
- return connected;
-
- if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
- DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
- connected = true;
-
- return connected;
-}
-
-/**
- * dce_v11_0_hpd_set_polarity - hpd set polarity callback.
- *
- * @adev: amdgpu_device pointer
- * @hpd: hpd (hotplug detect) pin
- *
- * Set the polarity of the hpd pin (evergreen+).
- */
-static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev,
- enum amdgpu_hpd_id hpd)
-{
- u32 tmp;
- bool connected = dce_v11_0_hpd_sense(adev, hpd);
-
- if (hpd >= adev->mode_info.num_hpd)
- return;
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- if (connected)
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
- else
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-/**
- * dce_v11_0_hpd_init - hpd setup callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Setup the hpd pins used by the card (evergreen+).
- * Enable the pin, set the polarity, and enable the hpd interrupts.
- */
-static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
- connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
- /* don't try to enable hpd on eDP or LVDS avoid breaking the
- * aux dp channel on imac and help (but not completely fix)
- * https://bugzilla.redhat.com/show_bug.cgi?id=726143
- * also avoid interrupt storms during dpms.
- */
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
- continue;
- }
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_CONNECT_INT_DELAY,
- AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
- tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
- DC_HPD_DISCONNECT_INT_DELAY,
- AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
- WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
- amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-/**
- * dce_v11_0_hpd_fini - hpd tear down callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Tear down the hpd pins used by the card (evergreen+).
- * Disable the hpd interrupts.
- */
-static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- u32 tmp;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
- continue;
-
- tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
- WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
-
- amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
- }
- drm_connector_list_iter_end(&iter);
-}
-
-static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
-{
- return mmDC_GPIO_HPD_A;
-}
-
-static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev)
-{
- u32 crtc_hung = 0;
- u32 crtc_status[6];
- u32 i, j, tmp;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) {
- crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- crtc_hung |= (1 << i);
- }
- }
-
- for (j = 0; j < 10; j++) {
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (crtc_hung & (1 << i)) {
- tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
- if (tmp != crtc_status[i])
- crtc_hung &= ~(1 << i);
- }
- }
- if (crtc_hung == 0)
- return false;
- udelay(100);
- }
-
- return true;
-}
-
-static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev,
- bool render)
-{
- u32 tmp;
-
- /* Lockout access through VGA aperture*/
- tmp = RREG32(mmVGA_HDP_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
- else
- tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
- WREG32(mmVGA_HDP_CONTROL, tmp);
-
- /* disable VGA render */
- tmp = RREG32(mmVGA_RENDER_CONTROL);
- if (render)
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
- else
- tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
- WREG32(mmVGA_RENDER_CONTROL, tmp);
-}
-
-static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
-{
- int num_crtc = 0;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- num_crtc = 3;
- break;
- case CHIP_STONEY:
- num_crtc = 2;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- num_crtc = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- num_crtc = 5;
- break;
- default:
- num_crtc = 0;
- }
- return num_crtc;
-}
-
-void dce_v11_0_disable_dce(struct amdgpu_device *adev)
-{
- /*Disable VGA render and enabled crtc, if has DCE engine*/
- if (amdgpu_atombios_has_dce_engine_info(adev)) {
- u32 tmp;
- int crtc_enabled, i;
-
- dce_v11_0_set_vga_render_state(adev, false);
-
- /*Disable crtc*/
- for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) {
- crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
- CRTC_CONTROL, CRTC_MASTER_EN);
- if (crtc_enabled) {
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
- tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
- tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
- WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
- WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
- }
- }
- }
-}
-
-static void dce_v11_0_program_fmt(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- int bpc = 0;
- u32 tmp = 0;
- enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- bpc = amdgpu_connector_get_monitor_bpc(connector);
- dither = amdgpu_connector->dither;
- }
-
- /* LVDS/eDP FMT is set up by atom */
- if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
- return;
-
- /* not needed for analog */
- if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
- (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
- return;
-
- if (bpc == 0)
- return;
-
- switch (bpc) {
- case 6:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0);
- }
- break;
- case 8:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1);
- }
- break;
- case 10:
- if (dither == AMDGPU_FMT_DITHER_ENABLE) {
- /* XXX sort out optimal dither settings */
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2);
- } else {
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
- tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2);
- }
- break;
- default:
- /* not needed */
- break;
- }
-
- WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-
-/* display watermark setup */
-/**
- * dce_v11_0_line_buffer_adjust - Set up the line buffer
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @mode: the current display mode on the selected display
- * controller
- *
- * Setup up the line buffer allocation for
- * the selected display controller (CIK).
- * Returns the line buffer size in pixels.
- */
-static u32 dce_v11_0_line_buffer_adjust(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- struct drm_display_mode *mode)
-{
- u32 tmp, buffer_alloc, i, mem_cfg;
- u32 pipe_offset = amdgpu_crtc->crtc_id;
- /*
- * Line Buffer Setup
- * There are 6 line buffers, one for each display controllers.
- * There are 3 partitions per LB. Select the number of partitions
- * to enable based on the display width. For display widths larger
- * than 4096, you need use to use 2 display controllers and combine
- * them using the stereo blender.
- */
- if (amdgpu_crtc->base.enabled && mode) {
- if (mode->crtc_hdisplay < 1920) {
- mem_cfg = 1;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 2560) {
- mem_cfg = 2;
- buffer_alloc = 2;
- } else if (mode->crtc_hdisplay < 4096) {
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- } else {
- DRM_DEBUG_KMS("Mode too big for LB!\n");
- mem_cfg = 0;
- buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
- }
- } else {
- mem_cfg = 1;
- buffer_alloc = 0;
- }
-
- tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg);
- WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc);
- WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
- if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED))
- break;
- udelay(1);
- }
-
- if (amdgpu_crtc->base.enabled && mode) {
- switch (mem_cfg) {
- case 0:
- default:
- return 4096 * 2;
- case 1:
- return 1920 * 2;
- case 2:
- return 2560 * 2;
- }
- }
-
- /* controller not enabled, so no lb used */
- return 0;
-}
-
-/**
- * cik_get_number_of_dram_channels - get the number of dram channels
- *
- * @adev: amdgpu_device pointer
- *
- * Look up the number of video ram channels (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the number of dram channels
- */
-static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
-{
- u32 tmp = RREG32(mmMC_SHARED_CHMAP);
-
- switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
- case 0:
- default:
- return 1;
- case 1:
- return 2;
- case 2:
- return 4;
- case 3:
- return 8;
- case 4:
- return 3;
- case 5:
- return 6;
- case 6:
- return 10;
- case 7:
- return 12;
- case 8:
- return 16;
- }
-}
-
-struct dce10_wm_params {
- u32 dram_channels; /* number of dram channels */
- u32 yclk; /* bandwidth per dram data pin in kHz */
- u32 sclk; /* engine clock in kHz */
- u32 disp_clk; /* display clock in kHz */
- u32 src_width; /* viewport width */
- u32 active_time; /* active display time in ns */
- u32 blank_time; /* blank time in ns */
- bool interlaced; /* mode is interlaced */
- fixed20_12 vsc; /* vertical scale ratio */
- u32 num_heads; /* number of active crtcs */
- u32 bytes_per_pixel; /* bytes per pixel display + overlay */
- u32 lb_size; /* line buffer allocated to pipe */
- u32 vtaps; /* vertical scaler taps */
-};
-
-/**
- * dce_v11_0_dram_bandwidth - get the dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the raw dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate raw DRAM Bandwidth */
- fixed20_12 dram_efficiency; /* 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- dram_efficiency.full = dfixed_const(7);
- dram_efficiency.full = dfixed_div(dram_efficiency, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dram_bandwidth_for_display - get the dram bandwidth for display
- *
- * @wm: watermark calculation data
- *
- * Calculate the dram bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dram bandwidth for display in MBytes/s
- */
-static u32 dce_v11_0_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- /* Calculate DRAM Bandwidth and the part allocated to display. */
- fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
- fixed20_12 yclk, dram_channels, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- yclk.full = dfixed_const(wm->yclk);
- yclk.full = dfixed_div(yclk, a);
- dram_channels.full = dfixed_const(wm->dram_channels * 4);
- a.full = dfixed_const(10);
- disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
- disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
- bandwidth.full = dfixed_mul(dram_channels, yclk);
- bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_data_return_bandwidth - get the data return bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the data return bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the data return bandwidth in MBytes/s
- */
-static u32 dce_v11_0_data_return_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display Data return Bandwidth */
- fixed20_12 return_efficiency; /* 0.8 */
- fixed20_12 sclk, bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- sclk.full = dfixed_const(wm->sclk);
- sclk.full = dfixed_div(sclk, a);
- a.full = dfixed_const(10);
- return_efficiency.full = dfixed_const(8);
- return_efficiency.full = dfixed_div(return_efficiency, a);
- a.full = dfixed_const(32);
- bandwidth.full = dfixed_mul(a, sclk);
- bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_dmif_request_bandwidth - get the dmif bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the dmif bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the dmif bandwidth in MBytes/s
- */
-static u32 dce_v11_0_dmif_request_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the DMIF Request Bandwidth */
- fixed20_12 disp_clk_request_efficiency; /* 0.8 */
- fixed20_12 disp_clk, bandwidth;
- fixed20_12 a, b;
-
- a.full = dfixed_const(1000);
- disp_clk.full = dfixed_const(wm->disp_clk);
- disp_clk.full = dfixed_div(disp_clk, a);
- a.full = dfixed_const(32);
- b.full = dfixed_mul(a, disp_clk);
-
- a.full = dfixed_const(10);
- disp_clk_request_efficiency.full = dfixed_const(8);
- disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
-
- bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_available_bandwidth - get the min available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the min available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the min available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_available_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
- u32 dram_bandwidth = dce_v11_0_dram_bandwidth(wm);
- u32 data_return_bandwidth = dce_v11_0_data_return_bandwidth(wm);
- u32 dmif_req_bandwidth = dce_v11_0_dmif_request_bandwidth(wm);
-
- return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
-}
-
-/**
- * dce_v11_0_average_bandwidth - get the average available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Calculate the average available bandwidth used for display (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the average available bandwidth in MBytes/s
- */
-static u32 dce_v11_0_average_bandwidth(struct dce10_wm_params *wm)
-{
- /* Calculate the display mode Average Bandwidth
- * DisplayMode should contain the source and destination dimensions,
- * timing, etc.
- */
- fixed20_12 bpp;
- fixed20_12 line_time;
- fixed20_12 src_width;
- fixed20_12 bandwidth;
- fixed20_12 a;
-
- a.full = dfixed_const(1000);
- line_time.full = dfixed_const(wm->active_time + wm->blank_time);
- line_time.full = dfixed_div(line_time, a);
- bpp.full = dfixed_const(wm->bytes_per_pixel);
- src_width.full = dfixed_const(wm->src_width);
- bandwidth.full = dfixed_mul(src_width, bpp);
- bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
- bandwidth.full = dfixed_div(bandwidth, line_time);
-
- return dfixed_trunc(bandwidth);
-}
-
-/**
- * dce_v11_0_latency_watermark - get the latency watermark
- *
- * @wm: watermark calculation data
- *
- * Calculate the latency watermark (CIK).
- * Used for display watermark bandwidth calculations
- * Returns the latency watermark in ns
- */
-static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
-{
- /* First calculate the latency in ns */
- u32 mc_latency = 2000; /* 2000 ns. */
- u32 available_bandwidth = dce_v11_0_available_bandwidth(wm);
- u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
- u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
- u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
- u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
- (wm->num_heads * cursor_line_pair_return_time);
- u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
- u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
- u32 tmp, dmif_size = 12288;
- fixed20_12 a, b, c;
-
- if (wm->num_heads == 0)
- return 0;
-
- a.full = dfixed_const(2);
- b.full = dfixed_const(1);
- if ((wm->vsc.full > a.full) ||
- ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
- (wm->vtaps >= 5) ||
- ((wm->vsc.full >= a.full) && wm->interlaced))
- max_src_lines_per_dst_line = 4;
- else
- max_src_lines_per_dst_line = 2;
-
- a.full = dfixed_const(available_bandwidth);
- b.full = dfixed_const(wm->num_heads);
- a.full = dfixed_div(a, b);
- tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
- tmp = min(dfixed_trunc(a), tmp);
-
- lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
-
- a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
- b.full = dfixed_const(1000);
- c.full = dfixed_const(lb_fill_bw);
- b.full = dfixed_div(c, b);
- a.full = dfixed_div(a, b);
- line_fill_time = dfixed_trunc(a);
-
- if (line_fill_time < wm->active_time)
- return latency;
- else
- return latency + (line_fill_time - wm->active_time);
-
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display - check
- * average and available dram bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * dram bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_dram_bandwidth_for_display(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_average_bandwidth_vs_available_bandwidth - check
- * average and available bandwidth
- *
- * @wm: watermark calculation data
- *
- * Check if the display average bandwidth fits in the display
- * available bandwidth (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm)
-{
- if (dce_v11_0_average_bandwidth(wm) <=
- (dce_v11_0_available_bandwidth(wm) / wm->num_heads))
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_check_latency_hiding - check latency hiding
- *
- * @wm: watermark calculation data
- *
- * Check latency hiding (CIK).
- * Used for display watermark bandwidth calculations
- * Returns true if the display fits, false if not.
- */
-static bool dce_v11_0_check_latency_hiding(struct dce10_wm_params *wm)
-{
- u32 lb_partitions = wm->lb_size / wm->src_width;
- u32 line_time = wm->active_time + wm->blank_time;
- u32 latency_tolerant_lines;
- u32 latency_hiding;
- fixed20_12 a;
-
- a.full = dfixed_const(1);
- if (wm->vsc.full > a.full)
- latency_tolerant_lines = 1;
- else {
- if (lb_partitions <= (wm->vtaps + 1))
- latency_tolerant_lines = 1;
- else
- latency_tolerant_lines = 2;
- }
-
- latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
-
- if (dce_v11_0_latency_watermark(wm) <= latency_hiding)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_program_watermarks - program display watermarks
- *
- * @adev: amdgpu_device pointer
- * @amdgpu_crtc: the selected display controller
- * @lb_size: line buffer size
- * @num_heads: number of display controllers in use
- *
- * Calculate and program the display watermarks for the
- * selected display controller (CIK).
- */
-static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
- struct amdgpu_crtc *amdgpu_crtc,
- u32 lb_size, u32 num_heads)
-{
- struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
- struct dce10_wm_params wm_low, wm_high;
- u32 active_time;
- u32 line_time = 0;
- u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
-
- if (amdgpu_crtc->base.enabled && num_heads && mode) {
- active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
- (u32)mode->clock);
- line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
- (u32)mode->clock);
- line_time = min(line_time, (u32)65535);
-
- /* watermark for high clocks */
- if (adev->pm.dpm_enabled) {
- wm_high.yclk =
- amdgpu_dpm_get_mclk(adev, false) * 10;
- wm_high.sclk =
- amdgpu_dpm_get_sclk(adev, false) * 10;
- } else {
- wm_high.yclk = adev->pm.current_mclk * 10;
- wm_high.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_high.disp_clk = mode->clock;
- wm_high.src_width = mode->crtc_hdisplay;
- wm_high.active_time = active_time;
- wm_high.blank_time = line_time - wm_high.active_time;
- wm_high.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_high.interlaced = true;
- wm_high.vsc = amdgpu_crtc->vsc;
- wm_high.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_high.vtaps = 2;
- wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_high.lb_size = lb_size;
- wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_high.num_heads = num_heads;
-
- /* set for high clocks */
- latency_watermark_a = min(dce_v11_0_latency_watermark(&wm_high), (u32)65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
- !dce_v11_0_check_latency_hiding(&wm_high) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
-
- /* watermark for low clocks */
- if (adev->pm.dpm_enabled) {
- wm_low.yclk =
- amdgpu_dpm_get_mclk(adev, true) * 10;
- wm_low.sclk =
- amdgpu_dpm_get_sclk(adev, true) * 10;
- } else {
- wm_low.yclk = adev->pm.current_mclk * 10;
- wm_low.sclk = adev->pm.current_sclk * 10;
- }
-
- wm_low.disp_clk = mode->clock;
- wm_low.src_width = mode->crtc_hdisplay;
- wm_low.active_time = active_time;
- wm_low.blank_time = line_time - wm_low.active_time;
- wm_low.interlaced = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- wm_low.interlaced = true;
- wm_low.vsc = amdgpu_crtc->vsc;
- wm_low.vtaps = 1;
- if (amdgpu_crtc->rmx_type != RMX_OFF)
- wm_low.vtaps = 2;
- wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
- wm_low.lb_size = lb_size;
- wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
- wm_low.num_heads = num_heads;
-
- /* set for low clocks */
- latency_watermark_b = min(dce_v11_0_latency_watermark(&wm_low), (u32)65535);
-
- /* possibly force display priority to high */
- /* should really do this at mode validation time... */
- if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
- !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
- !dce_v11_0_check_latency_hiding(&wm_low) ||
- (adev->mode_info.disp_priority == 2)) {
- DRM_DEBUG_KMS("force priority to high\n");
- }
- lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
- }
-
- /* select wm A */
- wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* select wm B */
- tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
- tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
- WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
- /* restore original selection */
- WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
-
- /* save values for DPM */
- amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
- /* Save number of lines the linebuffer leads before the scanout */
- amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
-}
-
-/**
- * dce_v11_0_bandwidth_update - program display watermarks
- *
- * @adev: amdgpu_device pointer
- *
- * Calculate and program the display watermarks and line
- * buffer allocation (CIK).
- */
-static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
-{
- struct drm_display_mode *mode = NULL;
- u32 num_heads = 0, lb_size;
- int i;
-
- amdgpu_display_update_priority(adev);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i]->base.enabled)
- num_heads++;
- }
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- mode = &adev->mode_info.crtcs[i]->base.mode;
- lb_size = dce_v11_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
- dce_v11_0_program_watermarks(adev, adev->mode_info.crtcs[i],
- lb_size, num_heads);
- }
-}
-
-static void dce_v11_0_audio_get_connected_pins(struct amdgpu_device *adev)
-{
- int i;
- u32 offset, tmp;
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- offset = adev->mode_info.audio.pin[i].offset;
- tmp = RREG32_AUDIO_ENDPT(offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
- if (((tmp &
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
- AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
- adev->mode_info.audio.pin[i].connected = false;
- else
- adev->mode_info.audio.pin[i].connected = true;
- }
-}
-
-static struct amdgpu_audio_pin *dce_v11_0_audio_get_pin(struct amdgpu_device *adev)
-{
- int i;
-
- dce_v11_0_audio_get_connected_pins(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- if (adev->mode_info.audio.pin[i].connected)
- return &adev->mode_info.audio.pin[i];
- }
- DRM_ERROR("No connected audio pins found!\n");
- return NULL;
-}
-
-static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id);
- WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp);
-}
-
-static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- int interlace = 0;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- interlace = 1;
- if (connector->latency_present[interlace]) {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, connector->video_latency[interlace]);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, connector->audio_latency[interlace]);
- } else {
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- VIDEO_LIPSYNC, 0);
- tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
- AUDIO_LIPSYNC, 0);
- }
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
-}
-
-static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- u32 tmp;
- u8 *sadb = NULL;
- int sad_count;
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
- if (sad_count < 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- sad_count = 0;
- }
-
- /* program the speaker allocation */
- tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- DP_CONNECTION, 0);
- /* set HDMI mode */
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- HDMI_CONNECTION, 1);
- if (sad_count)
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, sadb[0]);
- else
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
- SPEAKER_ALLOCATION, 5); /* stereo */
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
- ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
-
- kfree(sadb);
-}
-
-static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector;
- struct drm_connector_list_iter iter;
- struct amdgpu_connector *amdgpu_connector = NULL;
- struct cea_sad *sads;
- int i, sad_count;
-
- static const u16 eld_reg_to_type[][2] = {
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
- { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
- };
-
- if (!dig || !dig->afmt || !dig->afmt->pin)
- return;
-
- drm_connector_list_iter_begin(dev, &iter);
- drm_for_each_connector_iter(connector, &iter) {
- if (connector->encoder == encoder) {
- amdgpu_connector = to_amdgpu_connector(connector);
- break;
- }
- }
- drm_connector_list_iter_end(&iter);
-
- if (!amdgpu_connector) {
- DRM_ERROR("Couldn't find encoder's connector\n");
- return;
- }
-
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count < 0)
- DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
- if (sad_count <= 0)
- return;
- BUG_ON(!sads);
-
- for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
- u8 stereo_freqs = 0;
- int max_channels = -1;
- int j;
-
- for (j = 0; j < sad_count; j++) {
- struct cea_sad *sad = &sads[j];
-
- if (sad->format == eld_reg_to_type[i][1]) {
- if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
- max_channels = sad->channels;
- }
-
- if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
- stereo_freqs |= sad->freq;
- else
- break;
- }
- }
-
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
- }
-
- kfree(sads);
-}
-
-static void dce_v11_0_audio_enable(struct amdgpu_device *adev,
- struct amdgpu_audio_pin *pin,
- bool enable)
-{
- if (!pin)
- return;
-
- WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
- enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
-}
-
-static const u32 pin_offsets[] =
-{
- AUD0_REGISTER_OFFSET,
- AUD1_REGISTER_OFFSET,
- AUD2_REGISTER_OFFSET,
- AUD3_REGISTER_OFFSET,
- AUD4_REGISTER_OFFSET,
- AUD5_REGISTER_OFFSET,
- AUD6_REGISTER_OFFSET,
- AUD7_REGISTER_OFFSET,
-};
-
-static int dce_v11_0_audio_init(struct amdgpu_device *adev)
-{
- int i;
-
- if (!amdgpu_audio)
- return 0;
-
- adev->mode_info.audio.enabled = true;
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- adev->mode_info.audio.num_pins = 7;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.audio.num_pins = 8;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.audio.num_pins = 6;
- break;
- default:
- return -EINVAL;
- }
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- adev->mode_info.audio.pin[i].channels = -1;
- adev->mode_info.audio.pin[i].rate = -1;
- adev->mode_info.audio.pin[i].bits_per_sample = -1;
- adev->mode_info.audio.pin[i].status_bits = 0;
- adev->mode_info.audio.pin[i].category_code = 0;
- adev->mode_info.audio.pin[i].connected = false;
- adev->mode_info.audio.pin[i].offset = pin_offsets[i];
- adev->mode_info.audio.pin[i].id = i;
- /* disable audio. it will be set up later */
- /* XXX remove once we switch to ip funcs */
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- return 0;
-}
-
-static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
-{
- int i;
-
- if (!amdgpu_audio)
- return;
-
- if (!adev->mode_info.audio.enabled)
- return;
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
- adev->mode_info.audio.enabled = false;
-}
-
-/*
- * update the N and CTS parameters for a given pixel clock rate
- */
-static void dce_v11_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- u32 tmp;
-
- tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
- WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
- WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
- WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
- WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
- WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
- tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
- WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
-
-}
-
-/*
- * build a HDMI Video Info Frame
- */
-static void dce_v11_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
- void *buffer, size_t size)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- uint8_t *frame = buffer + 3;
- uint8_t *header = buffer;
-
- WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
- frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
- WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
- frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
- WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
- frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
- WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
- frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
-}
-
-static void dce_v11_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- u32 dto_phase = 24 * 1000;
- u32 dto_modulo = clock;
- u32 tmp;
-
- if (!dig || !dig->afmt)
- return;
-
- /* XXX two dtos; generally use dto0 for hdmi */
- /* Express [24MHz / target pixel clock] as an exact rational
- * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
- * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
- */
- tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
- tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL,
- amdgpu_crtc->crtc_id);
- WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
- WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
- WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
-}
-
-/*
- * update the info frames with the data from the current display mode
- */
-static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
- u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
- struct hdmi_avi_infoframe frame;
- ssize_t err;
- u32 tmp;
- int bpc = 8;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (!dig->afmt->enabled)
- return;
-
- /* hdmi deep color mode general control packets setup, if bpc > 8 */
- if (encoder->crtc) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
- bpc = amdgpu_crtc->bpc;
- }
-
- /* disable audio prior to setting up hw */
- dig->afmt->pin = dce_v11_0_audio_get_pin(adev);
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
-
- dce_v11_0_audio_set_dto(encoder, mode->clock);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */
-
- WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000);
-
- tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset);
- switch (bpc) {
- case 0:
- case 6:
- case 8:
- case 16:
- default:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
- DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
- connector->name, bpc);
- break;
- case 10:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1);
- DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
- connector->name);
- break;
- case 12:
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
- tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2);
- DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
- connector->name);
- break;
- }
- WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */
- tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */
- WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable audio info frames (frames won't be set until audio is enabled) */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
- WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- /* anything other than 0 */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */
-
- tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* set the default audio delay */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
- /* should be suffient for all audio modes and small enough for all hblanks */
- tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
- WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* allow 60958 channel status fields to be updated */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
- if (bpc > 8)
- /* clear SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0);
- else
- /* select SW CTS value */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1);
- /* allow hw to sent ACR packets when required */
- tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
- WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- dce_v11_0_afmt_update_ACR(encoder, mode->clock);
-
- tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
- WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
- WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
- tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
- WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
-
- dce_v11_0_audio_write_speaker_allocation(encoder);
-
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset,
- (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
-
- dce_v11_0_afmt_audio_select_pin(encoder);
- dce_v11_0_audio_write_sad_regs(encoder);
- dce_v11_0_audio_write_latency_fields(encoder, mode);
-
- err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
- if (err < 0) {
- DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
- return;
- }
-
- err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
- if (err < 0) {
- DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
- return;
- }
-
- dce_v11_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
- /* enable AVI info frames */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
- /* required for audio info values to be updated */
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
- WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
- tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
- WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
-
- tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
- /* send audio packets */
- tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
- WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
-
- WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF);
- WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF);
- WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001);
- WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001);
-
- /* enable audio after to setting up hw */
- dce_v11_0_audio_enable(adev, dig->afmt->pin, true);
-}
-
-static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (!dig || !dig->afmt)
- return;
-
- /* Silent, r600_hdmi_enable will raise WARN for us */
- if (enable && dig->afmt->enabled)
- return;
- if (!enable && !dig->afmt->enabled)
- return;
-
- if (!enable && dig->afmt->pin) {
- dce_v11_0_audio_enable(adev, dig->afmt->pin, false);
- dig->afmt->pin = NULL;
- }
-
- dig->afmt->enabled = enable;
-
- DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
- enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
-}
-
-static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++)
- adev->mode_info.afmt[i] = NULL;
-
- /* DCE11 has audio blocks tied to DIG encoders */
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
- if (adev->mode_info.afmt[i]) {
- adev->mode_info.afmt[i]->offset = dig_offsets[i];
- adev->mode_info.afmt[i]->id = i;
- } else {
- int j;
- for (j = 0; j < i; j++) {
- kfree(adev->mode_info.afmt[j]);
- adev->mode_info.afmt[j] = NULL;
- }
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->mode_info.num_dig; i++) {
- kfree(adev->mode_info.afmt[i]);
- adev->mode_info.afmt[i] = NULL;
- }
-}
-
-static const u32 vga_control_regs[6] =
-{
- mmD1VGA_CONTROL,
- mmD2VGA_CONTROL,
- mmD3VGA_CONTROL,
- mmD4VGA_CONTROL,
- mmD5VGA_CONTROL,
- mmD6VGA_CONTROL,
-};
-
-static void dce_v11_0_vga_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 vga_control;
-
- vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
- if (enable)
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
- else
- WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
-}
-
-static void dce_v11_0_grph_enable(struct drm_crtc *crtc, bool enable)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (enable)
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
- else
- WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
-}
-
-static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, int atomic)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct drm_framebuffer *target_fb;
- struct drm_gem_object *obj;
- struct amdgpu_bo *abo;
- uint64_t fb_location, tiling_flags;
- uint32_t fb_format, fb_pitch_pixels;
- u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE);
- u32 pipe_config;
- u32 tmp, viewport_w, viewport_h;
- int r;
- bool bypass_lut = false;
-
- /* no fb bound */
- if (!atomic && !crtc->primary->fb) {
- DRM_DEBUG_KMS("No FB bound\n");
- return 0;
- }
-
- if (atomic)
- target_fb = fb;
- else
- target_fb = crtc->primary->fb;
-
- /* If atomic, assume fb object is pinned & idle & fenced and
- * just update base pointers
- */
- obj = target_fb->obj[0];
- abo = gem_to_amdgpu_bo(obj);
- r = amdgpu_bo_reserve(abo, false);
- if (unlikely(r != 0))
- return r;
-
- if (!atomic) {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(abo);
- return -EINVAL;
- }
- }
- fb_location = amdgpu_bo_gpu_offset(abo);
-
- amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
- amdgpu_bo_unreserve(abo);
-
- pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
-
- switch (target_fb->format->format) {
- case DRM_FORMAT_C8:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- break;
- case DRM_FORMAT_XRGB4444:
- case DRM_FORMAT_ARGB4444:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB1555:
- case DRM_FORMAT_ARGB1555:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_BGRX5551:
- case DRM_FORMAT_BGRA5551:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_RGB565:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN16);
-#endif
- break;
- case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_ARGB8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_BGRX1010102:
- case DRM_FORMAT_BGRA1010102:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
- bypass_lut = true;
- break;
- case DRM_FORMAT_XBGR8888:
- case DRM_FORMAT_ABGR8888:
- fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
-#ifdef __BIG_ENDIAN
- fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
- ENDIAN_8IN32);
-#endif
- break;
- default:
- DRM_ERROR("Unsupported screen format %p4cc\n",
- &target_fb->format->format);
- return -EINVAL;
- }
-
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
- unsigned bankw, bankh, mtaspect, tile_split, num_banks;
-
- bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
- bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
- mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
- tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
- num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_2D_TILED_THIN1);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT,
- tile_split);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT,
- mtaspect);
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE,
- ADDR_SURF_MICRO_TILING_DISPLAY);
- } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
- ARRAY_1D_TILED_THIN1);
- }
-
- fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG,
- pipe_config);
-
- dce_v11_0_vga_enable(crtc, false);
-
- /* Make sure surface address is updated at vertical blank rather than
- * horizontal blank
- */
- tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
- GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
- WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(fb_location));
- WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
- WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
- WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
-
- /*
- * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
- * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
- * retain the full precision throughout the pipeline.
- */
- tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset);
- if (bypass_lut)
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0);
- WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp);
-
- if (bypass_lut)
- DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
-
- WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
- WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
-
- fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
- WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
-
- dce_v11_0_grph_enable(crtc, true);
-
- WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
- target_fb->height);
-
- x &= ~3;
- y &= ~1;
- WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
- (x << 16) | y);
- viewport_w = crtc->mode.hdisplay;
- viewport_h = (crtc->mode.vdisplay + 1) & ~1;
- WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
- (viewport_w << 16) | viewport_h);
-
- /* set pageflip to happen anywhere in vblank interval */
- WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
-
- if (!atomic && fb && fb != crtc->primary->fb) {
- abo = gem_to_amdgpu_bo(fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r != 0))
- return r;
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
-
- /* Bytes per pixel may have changed */
- dce_v11_0_bandwidth_update(adev);
-
- return 0;
-}
-
-static void dce_v11_0_set_interleave(struct drm_crtc *crtc,
- struct drm_display_mode *mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- u32 tmp;
-
- tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset);
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1);
- else
- tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0);
- WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u16 *r, *g, *b;
- int i;
- u32 tmp;
-
- DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
-
- tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0);
- WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1);
- WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0);
- WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
-
- WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
- WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
-
- WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
- WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
-
- WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
- r = crtc->gamma_store;
- g = r + crtc->gamma_size;
- b = g + crtc->gamma_size;
- for (i = 0; i < 256; i++) {
- WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
- ((*r++ & 0xffc0) << 14) |
- ((*g++ & 0xffc0) << 4) |
- (*b++ >> 6));
- }
-
- tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0);
- tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR2_DEGAMMA_MODE, 0);
- WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0);
- WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0);
- WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0);
- WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-
- /* XXX match this to the depth of the crtc fmt block, move to modeset? */
- WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0);
- /* XXX this only needs to be programmed once per crtc at startup,
- * not sure where the best place for it is
- */
- tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1);
- WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return 1;
- else
- return 0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return 3;
- else
- return 2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return 5;
- else
- return 4;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- return 6;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return 0;
- }
-}
-
-/**
- * dce_v11_0_pick_pll - Allocate a PPLL for use by the crtc.
- *
- * @crtc: drm crtc
- *
- * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors
- * a single PPLL can be used for all DP crtcs/encoders. For non-DP
- * monitors a dedicated PPLL must be used. If a particular board has
- * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
- * as there is no need to program the PLL itself. If we are not able to
- * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
- * avoid messing up an existing monitor.
- *
- * Asic specific PLL information
- *
- * DCE 10.x
- * Tonga
- * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
- * CI
- * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
- *
- */
-static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- u32 pll_in_use;
- int pll;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return ATOM_DP_DTO;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL1;
- else
- return ATOM_COMBOPHY_PLL0;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL3;
- else
- return ATOM_COMBOPHY_PLL2;
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- if (dig->linkb)
- return ATOM_COMBOPHY_PLL5;
- else
- return ATOM_COMBOPHY_PLL4;
- default:
- DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
- return ATOM_PPLL_INVALID;
- }
- }
-
- if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
- if (adev->clock.dp_extclk)
- /* skip PPLL programming if using ext clock */
- return ATOM_PPLL_INVALID;
- else {
- /* use the same PPLL for all DP monitors */
- pll = amdgpu_pll_get_shared_dp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
- } else {
- /* use the same PPLL for all monitors with the same clock */
- pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
-
- /* XXX need to determine what plls are available on each DCE11 part */
- pll_in_use = amdgpu_pll_get_use_mask(crtc);
- if (adev->flags & AMD_IS_APU) {
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- } else {
- if (!(pll_in_use & (1 << ATOM_PPLL2)))
- return ATOM_PPLL2;
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL0)))
- return ATOM_PPLL0;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- }
- return ATOM_PPLL_INVALID;
-}
-
-static void dce_v11_0_lock_cursor(struct drm_crtc *crtc, bool lock)
-{
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- uint32_t cur_lock;
-
- cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
- if (lock)
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1);
- else
- cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0);
- WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
-}
-
-static void dce_v11_0_hide_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- u32 tmp;
-
- WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
- upper_32_bits(amdgpu_crtc->cursor_addr));
- WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- lower_32_bits(amdgpu_crtc->cursor_addr));
-
- tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
- tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
- WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
-}
-
-static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
- int x, int y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct amdgpu_device *adev = drm_to_adev(crtc->dev);
- int xorigin = 0, yorigin = 0;
-
- amdgpu_crtc->cursor_x = x;
- amdgpu_crtc->cursor_y = y;
-
- /* avivo cursor are offset into the total surface */
- x += crtc->x;
- y += crtc->y;
- DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
-
- if (x < 0) {
- xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
- x = 0;
- }
- if (y < 0) {
- yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
- y = 0;
- }
-
- WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
- WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
- WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
- ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
-
- return 0;
-}
-
-static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
- int x, int y)
-{
- int ret;
-
- dce_v11_0_lock_cursor(crtc, true);
- ret = dce_v11_0_cursor_move_locked(crtc, x, y);
- dce_v11_0_lock_cursor(crtc, false);
-
- return ret;
-}
-
-static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
- struct drm_file *file_priv,
- uint32_t handle,
- uint32_t width,
- uint32_t height,
- int32_t hot_x,
- int32_t hot_y)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_gem_object *obj;
- struct amdgpu_bo *aobj;
- int ret;
-
- if (!handle) {
- /* turn off cursor */
- dce_v11_0_hide_cursor(crtc);
- obj = NULL;
- goto unpin;
- }
-
- if ((width > amdgpu_crtc->max_cursor_width) ||
- (height > amdgpu_crtc->max_cursor_height)) {
- DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
- return -EINVAL;
- }
-
- obj = drm_gem_object_lookup(file_priv, handle);
- if (!obj) {
- DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
- return -ENOENT;
- }
-
- aobj = gem_to_amdgpu_bo(obj);
- ret = amdgpu_bo_reserve(aobj, false);
- if (ret != 0) {
- drm_gem_object_put(obj);
- return ret;
- }
-
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
- amdgpu_bo_unreserve(aobj);
- if (ret) {
- DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
- drm_gem_object_put(obj);
- return ret;
- }
- amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
-
- dce_v11_0_lock_cursor(crtc, true);
-
- if (width != amdgpu_crtc->cursor_width ||
- height != amdgpu_crtc->cursor_height ||
- hot_x != amdgpu_crtc->cursor_hot_x ||
- hot_y != amdgpu_crtc->cursor_hot_y) {
- int x, y;
-
- x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
- y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
-
- dce_v11_0_cursor_move_locked(crtc, x, y);
-
- amdgpu_crtc->cursor_width = width;
- amdgpu_crtc->cursor_height = height;
- amdgpu_crtc->cursor_hot_x = hot_x;
- amdgpu_crtc->cursor_hot_y = hot_y;
- }
-
- dce_v11_0_show_cursor(crtc);
- dce_v11_0_lock_cursor(crtc, false);
-
-unpin:
- if (amdgpu_crtc->cursor_bo) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- ret = amdgpu_bo_reserve(aobj, true);
- if (likely(ret == 0)) {
- amdgpu_bo_unpin(aobj);
- amdgpu_bo_unreserve(aobj);
- }
- drm_gem_object_put(amdgpu_crtc->cursor_bo);
- }
-
- amdgpu_crtc->cursor_bo = obj;
- return 0;
-}
-
-static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- if (amdgpu_crtc->cursor_bo) {
- dce_v11_0_lock_cursor(crtc, true);
-
- dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
- amdgpu_crtc->cursor_y);
-
- dce_v11_0_show_cursor(crtc);
-
- dce_v11_0_lock_cursor(crtc, false);
- }
-}
-
-static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
- u16 *blue, uint32_t size,
- struct drm_modeset_acquire_ctx *ctx)
-{
- dce_v11_0_crtc_load_lut(crtc);
-
- return 0;
-}
-
-static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- drm_crtc_cleanup(crtc);
- kfree(amdgpu_crtc);
-}
-
-static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
- .cursor_set2 = dce_v11_0_crtc_cursor_set2,
- .cursor_move = dce_v11_0_crtc_cursor_move,
- .gamma_set = dce_v11_0_crtc_gamma_set,
- .set_config = amdgpu_display_crtc_set_config,
- .destroy = dce_v11_0_crtc_destroy,
- .page_flip_target = amdgpu_display_crtc_page_flip_target,
- .get_vblank_counter = amdgpu_get_vblank_counter_kms,
- .enable_vblank = amdgpu_enable_vblank_kms,
- .disable_vblank = amdgpu_disable_vblank_kms,
- .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
-};
-
-static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- unsigned type;
-
- switch (mode) {
- case DRM_MODE_DPMS_ON:
- amdgpu_crtc->enabled = true;
- amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
- dce_v11_0_vga_enable(crtc, false);
- /* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_display_crtc_idx_to_irq_type(adev,
- amdgpu_crtc->crtc_id);
- amdgpu_irq_update(adev, &adev->crtc_irq, type);
- amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_crtc_vblank_on(crtc);
- dce_v11_0_crtc_load_lut(crtc);
- break;
- case DRM_MODE_DPMS_STANDBY:
- case DRM_MODE_DPMS_SUSPEND:
- case DRM_MODE_DPMS_OFF:
- drm_crtc_vblank_off(crtc);
- if (amdgpu_crtc->enabled) {
- dce_v11_0_vga_enable(crtc, true);
- amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
- dce_v11_0_vga_enable(crtc, false);
- }
- amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
- amdgpu_crtc->enabled = false;
- break;
- }
- /* adjust pm to dpms */
- amdgpu_dpm_compute_clocks(adev);
-}
-
-static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc)
-{
- /* disable crtc pair power gating before programming */
- amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
- amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void dce_v11_0_crtc_commit(struct drm_crtc *crtc)
-{
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
- amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
-}
-
-static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_atom_ss ss;
- int i;
-
- dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
- if (crtc->primary->fb) {
- int r;
- struct amdgpu_bo *abo;
-
- abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r))
- DRM_ERROR("failed to reserve abo before unpin\n");
- else {
- amdgpu_bo_unpin(abo);
- amdgpu_bo_unreserve(abo);
- }
- }
- /* disable the GRPH */
- dce_v11_0_grph_enable(crtc, false);
-
- amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->mode_info.crtcs[i] &&
- adev->mode_info.crtcs[i]->enabled &&
- i != amdgpu_crtc->crtc_id &&
- amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
- /* one other crtc is using this pll don't turn
- * off the pll
- */
- goto done;
- }
- }
-
- switch (amdgpu_crtc->pll_id) {
- case ATOM_PPLL0:
- case ATOM_PPLL1:
- case ATOM_PPLL2:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- case ATOM_COMBOPHY_PLL0:
- case ATOM_COMBOPHY_PLL1:
- case ATOM_COMBOPHY_PLL2:
- case ATOM_COMBOPHY_PLL3:
- case ATOM_COMBOPHY_PLL4:
- case ATOM_COMBOPHY_PLL5:
- /* disable the ppll */
- amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id,
- 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
- break;
- default:
- break;
- }
-done:
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
-}
-
-static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode,
- int x, int y, struct drm_framebuffer *old_fb)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- if (!amdgpu_crtc->adjusted_clock)
- return -EINVAL;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- struct amdgpu_encoder *amdgpu_encoder =
- to_amdgpu_encoder(amdgpu_crtc->encoder);
- int encoder_mode =
- amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
-
- /* SetPixelClock calculates the plls and ss values now */
- amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id,
- amdgpu_crtc->pll_id,
- encoder_mode, amdgpu_encoder->encoder_id,
- adjusted_mode->clock, 0, 0, 0, 0,
- amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss);
- } else {
- amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
- }
- amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
- dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
- amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
- amdgpu_atombios_crtc_scaler_setup(crtc);
- dce_v11_0_cursor_reset(crtc);
- /* update the hw version fpr dpm */
- amdgpu_crtc->hw_mode = *adjusted_mode;
-
- return 0;
-}
-
-static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct drm_encoder *encoder;
-
- /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- if (encoder->crtc == crtc) {
- amdgpu_crtc->encoder = encoder;
- amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
- break;
- }
- }
- if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- return false;
- }
- if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
- return false;
- if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
- return false;
- /* pick pll */
- amdgpu_crtc->pll_id = dce_v11_0_pick_pll(crtc);
- /* if we can't get a PPLL for a non-DP encoder, fail */
- if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
- !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
- return false;
-
- return true;
-}
-
-static int dce_v11_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
- struct drm_framebuffer *old_fb)
-{
- return dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
-}
-
-static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- int x, int y, enum mode_set_atomic state)
-{
- return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1);
-}
-
-static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = {
- .dpms = dce_v11_0_crtc_dpms,
- .mode_fixup = dce_v11_0_crtc_mode_fixup,
- .mode_set = dce_v11_0_crtc_mode_set,
- .mode_set_base = dce_v11_0_crtc_set_base,
- .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic,
- .prepare = dce_v11_0_crtc_prepare,
- .commit = dce_v11_0_crtc_commit,
- .disable = dce_v11_0_crtc_disable,
- .get_scanout_position = amdgpu_crtc_get_scanout_position,
-};
-
-static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
-{
- struct amdgpu_crtc *amdgpu_crtc;
-
- amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
- (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
- if (amdgpu_crtc == NULL)
- return -ENOMEM;
-
- drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v11_0_crtc_funcs);
-
- drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
- amdgpu_crtc->crtc_id = index;
- adev->mode_info.crtcs[index] = amdgpu_crtc;
-
- amdgpu_crtc->max_cursor_width = 128;
- amdgpu_crtc->max_cursor_height = 128;
- adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
- adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
-
- switch (amdgpu_crtc->crtc_id) {
- case 0:
- default:
- amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET;
- break;
- case 1:
- amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET;
- break;
- case 2:
- amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET;
- break;
- case 3:
- amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET;
- break;
- case 4:
- amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET;
- break;
- case 5:
- amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET;
- break;
- }
-
- amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
- amdgpu_crtc->adjusted_clock = 0;
- amdgpu_crtc->encoder = NULL;
- amdgpu_crtc->connector = NULL;
- drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs);
-
- return 0;
-}
-
-static int dce_v11_0_early_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg;
- adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg;
-
- dce_v11_0_set_display_funcs(adev);
-
- adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev);
-
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_STONEY:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 9;
- break;
- case CHIP_POLARIS10:
- case CHIP_VEGAM:
- adev->mode_info.num_hpd = 6;
- adev->mode_info.num_dig = 6;
- break;
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- adev->mode_info.num_hpd = 5;
- adev->mode_info.num_dig = 5;
- break;
- default:
- /* FIXME: not supported yet */
- return -EINVAL;
- }
-
- dce_v11_0_set_irq_funcs(adev);
-
- return 0;
-}
-
-static int dce_v11_0_sw_init(void *handle)
-{
- int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
- if (r)
- return r;
- }
-
- for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
- if (r)
- return r;
- }
-
- /* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
-
- adev_to_drm(adev)->mode_config.async_page_flip = true;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
- adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
-
- adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
-
- r = amdgpu_display_modeset_create_props(adev);
- if (r)
- return r;
-
- adev_to_drm(adev)->mode_config.max_width = 16384;
- adev_to_drm(adev)->mode_config.max_height = 16384;
-
-
- /* allocate crtcs */
- for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = dce_v11_0_crtc_init(adev, i);
- if (r)
- return r;
- }
-
- if (amdgpu_atombios_get_connector_info_from_object_table(adev))
- amdgpu_display_print_display_setup(adev_to_drm(adev));
- else
- return -EINVAL;
-
- /* setup afmt */
- r = dce_v11_0_afmt_init(adev);
- if (r)
- return r;
-
- r = dce_v11_0_audio_init(adev);
- if (r)
- return r;
-
- /* Disable vblank IRQs aggressively for power-saving */
- /* XXX: can this be enabled for DC? */
- adev_to_drm(adev)->vblank_disable_immediate = true;
-
- r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
- if (r)
- return r;
-
- INIT_DELAYED_WORK(&adev->hotplug_work,
- amdgpu_display_hotplug_work_func);
-
- drm_kms_helper_poll_init(adev_to_drm(adev));
-
- adev->mode_info.mode_config_initialized = true;
- return 0;
-}
-
-static int dce_v11_0_sw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- kfree(adev->mode_info.bios_hardcoded_edid);
-
- drm_kms_helper_poll_fini(adev_to_drm(adev));
-
- dce_v11_0_audio_fini(adev);
-
- dce_v11_0_afmt_fini(adev);
-
- drm_mode_config_cleanup(adev_to_drm(adev));
- adev->mode_info.mode_config_initialized = false;
-
- return 0;
-}
-
-static int dce_v11_0_hw_init(void *handle)
-{
- int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- dce_v11_0_init_golden_registers(adev);
-
- /* disable vga render */
- dce_v11_0_set_vga_render_state(adev, false);
- /* init dig PHYs, disp eng pll */
- amdgpu_atombios_crtc_powergate_init(adev);
- amdgpu_atombios_encoder_init_dig(adev);
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_POLARIS11) ||
- (adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM)) {
- amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
- DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
- amdgpu_atombios_crtc_set_dce_clock(adev, 0,
- DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS);
- } else {
- amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
- }
-
- /* initialize hpd */
- dce_v11_0_hpd_init(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_init(adev);
-
- return 0;
-}
-
-static int dce_v11_0_hw_fini(void *handle)
-{
- int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- dce_v11_0_hpd_fini(adev);
-
- for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
- dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
- }
-
- dce_v11_0_pageflip_interrupt_fini(adev);
-
- flush_delayed_work(&adev->hotplug_work);
-
- return 0;
-}
-
-static int dce_v11_0_suspend(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
-
- r = amdgpu_display_suspend_helper(adev);
- if (r)
- return r;
-
- adev->mode_info.bl_level =
- amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
-
- return dce_v11_0_hw_fini(handle);
-}
-
-static int dce_v11_0_resume(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
-
- amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
- adev->mode_info.bl_level);
-
- ret = dce_v11_0_hw_init(handle);
-
- /* turn on the BL */
- if (adev->mode_info.bl_encoder) {
- u8 bl_level = amdgpu_display_backlight_get_level(adev,
- adev->mode_info.bl_encoder);
- amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
- bl_level);
- }
- if (ret)
- return ret;
-
- return amdgpu_display_resume_helper(adev);
-}
-
-static bool dce_v11_0_is_idle(void *handle)
-{
- return true;
-}
-
-static int dce_v11_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v11_0_soft_reset(void *handle)
-{
- u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (dce_v11_0_is_display_hung(adev))
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
-
- if (srbm_soft_reset) {
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
- return 0;
-}
-
-static void dce_v11_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VBLANK_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static void dce_v11_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
- int crtc,
- enum amdgpu_interrupt_state state)
-{
- u32 lb_interrupt_mask;
-
- if (crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 0);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
- lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
- VLINE_INTERRUPT_MASK, 1);
- WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
- break;
- default:
- break;
- }
-}
-
-static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned hpd,
- enum amdgpu_interrupt_state state)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
- return 0;
- }
-
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- case AMDGPU_IRQ_STATE_ENABLE:
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
- break;
- default:
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_crtc_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- switch (type) {
- case AMDGPU_CRTC_IRQ_VBLANK1:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK2:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK3:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK4:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK5:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VBLANK6:
- dce_v11_0_set_crtc_vblank_interrupt_state(adev, 5, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE1:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 0, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE2:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 1, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE3:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 2, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE4:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 3, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE5:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 4, state);
- break;
- case AMDGPU_CRTC_IRQ_VLINE6:
- dce_v11_0_set_crtc_vline_interrupt_state(adev, 5, state);
- break;
- default:
- break;
- }
- return 0;
-}
-
-static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- u32 reg;
-
- if (type >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", type);
- return -EINVAL;
- }
-
- reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
- if (state == AMDGPU_IRQ_STATE_DISABLE)
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
- else
- WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
- reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
-
- return 0;
-}
-
-static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned long flags;
- unsigned crtc_id;
- struct amdgpu_crtc *amdgpu_crtc;
- struct amdgpu_flip_work *works;
-
- crtc_id = (entry->src_id - 8) >> 1;
- amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-
- if (crtc_id >= adev->mode_info.num_crtc) {
- DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
- return -EINVAL;
- }
-
- if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
- WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
- GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
-
- /* IRQ could occur when in initial stage */
- if(amdgpu_crtc == NULL)
- return 0;
-
- spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
- DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
- "AMDGPU_FLIP_SUBMITTED(%d)\n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED);
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
- return 0;
- }
-
- /* page flip completed. clean up */
- amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
- amdgpu_crtc->pflip_works = NULL;
-
- /* wakeup usersapce */
- if(works->event)
- drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
-
- spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
-
- drm_crtc_vblank_put(&amdgpu_crtc->base);
- schedule_work(&works->unpin_work);
-
- return 0;
-}
-
-static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
- int hpd)
-{
- u32 tmp;
-
- if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
- return;
- }
-
- tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
- tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1);
- WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
-}
-
-static void dce_v11_0_crtc_vblank_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1);
- WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static void dce_v11_0_crtc_vline_int_ack(struct amdgpu_device *adev,
- int crtc)
-{
- u32 tmp;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc) {
- DRM_DEBUG("invalid crtc %d\n", crtc);
- return;
- }
-
- tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]);
- tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1);
- WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp);
-}
-
-static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- unsigned crtc = entry->src_id - 1;
- uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
- crtc);
-
- switch (entry->src_data[0]) {
- case 0: /* vblank */
- if (disp_int & interrupt_status_offsets[crtc].vblank)
- dce_v11_0_crtc_vblank_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- if (amdgpu_irq_enabled(adev, source, irq_type)) {
- drm_handle_vblank(adev_to_drm(adev), crtc);
- }
- DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
-
- break;
- case 1: /* vline */
- if (disp_int & interrupt_status_offsets[crtc].vline)
- dce_v11_0_crtc_vline_int_ack(adev, crtc);
- else
- DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
-
- DRM_DEBUG("IH: D%d vline\n", crtc + 1);
-
- break;
- default:
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- break;
- }
-
- return 0;
-}
-
-static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- uint32_t disp_int, mask;
- unsigned hpd;
-
- if (entry->src_data[0] >= adev->mode_info.num_hpd) {
- DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
- return 0;
- }
-
- hpd = entry->src_data[0];
- disp_int = RREG32(interrupt_status_offsets[hpd].reg);
- mask = interrupt_status_offsets[hpd].hpd;
-
- if (disp_int & mask) {
- dce_v11_0_hpd_int_ack(adev, hpd);
- schedule_delayed_work(&adev->hotplug_work, 0);
- DRM_DEBUG("IH: HPD%d\n", hpd + 1);
- }
-
- return 0;
-}
-
-static int dce_v11_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- return 0;
-}
-
-static int dce_v11_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- return 0;
-}
-
-static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
- .name = "dce_v11_0",
- .early_init = dce_v11_0_early_init,
- .late_init = NULL,
- .sw_init = dce_v11_0_sw_init,
- .sw_fini = dce_v11_0_sw_fini,
- .hw_init = dce_v11_0_hw_init,
- .hw_fini = dce_v11_0_hw_fini,
- .suspend = dce_v11_0_suspend,
- .resume = dce_v11_0_resume,
- .is_idle = dce_v11_0_is_idle,
- .wait_for_idle = dce_v11_0_wait_for_idle,
- .soft_reset = dce_v11_0_soft_reset,
- .set_clockgating_state = dce_v11_0_set_clockgating_state,
- .set_powergating_state = dce_v11_0_set_powergating_state,
-};
-
-static void
-dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-
- amdgpu_encoder->pixel_clock = adjusted_mode->clock;
-
- /* need to call this here rather than in prepare() since we need some crtc info */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- /* set scaler clears this on some chips */
- dce_v11_0_set_interleave(encoder->crtc, mode);
-
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
- dce_v11_0_afmt_enable(encoder, true);
- dce_v11_0_afmt_setmode(encoder, adjusted_mode);
- }
-}
-
-static void dce_v11_0_encoder_prepare(struct drm_encoder *encoder)
-{
- struct amdgpu_device *adev = drm_to_adev(encoder->dev);
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
-
- if ((amdgpu_encoder->active_device &
- (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
- (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
- ENCODER_OBJECT_ID_NONE)) {
- struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
- if (dig) {
- dig->dig_encoder = dce_v11_0_pick_dig_encoder(encoder);
- if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
- dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
- }
- }
-
- amdgpu_atombios_scratch_regs_lock(adev, true);
-
- if (connector) {
- struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-
- /* select the clock/data port if it uses a router */
- if (amdgpu_connector->router.cd_valid)
- amdgpu_i2c_router_select_cd_port(amdgpu_connector);
-
- /* turn eDP panel on for mode set */
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
- amdgpu_atombios_encoder_set_edp_panel_power(connector,
- ATOM_TRANSMITTER_ACTION_POWER_ON);
- }
-
- /* this is needed for the pll/ss setup to work correctly in some cases */
- amdgpu_atombios_encoder_set_crtc_source(encoder);
- /* set up the FMT blocks */
- dce_v11_0_program_fmt(encoder);
-}
-
-static void dce_v11_0_encoder_commit(struct drm_encoder *encoder)
-{
- struct drm_device *dev = encoder->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
-
- /* need to call this here as we need the crtc set up */
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
- amdgpu_atombios_scratch_regs_lock(adev, false);
-}
-
-static void dce_v11_0_encoder_disable(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig;
-
- amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
-
- if (amdgpu_atombios_encoder_is_digital(encoder)) {
- if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
- dce_v11_0_afmt_enable(encoder, false);
- dig = amdgpu_encoder->enc_priv;
- dig->dig_encoder = -1;
- }
- amdgpu_encoder->active_device = 0;
-}
-
-/* these are handled by the primary encoders */
-static void dce_v11_0_ext_prepare(struct drm_encoder *encoder)
-{
-
-}
-
-static void dce_v11_0_ext_commit(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
-
-}
-
-static void dce_v11_0_ext_disable(struct drm_encoder *encoder)
-{
-
-}
-
-static void
-dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode)
-{
-
-}
-
-static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = {
- .dpms = dce_v11_0_ext_dpms,
- .prepare = dce_v11_0_ext_prepare,
- .mode_set = dce_v11_0_ext_mode_set,
- .commit = dce_v11_0_ext_commit,
- .disable = dce_v11_0_ext_disable,
- /* no detect for TMDS/LVDS yet */
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dig_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .disable = dce_v11_0_encoder_disable,
- .detect = amdgpu_atombios_encoder_dig_detect,
-};
-
-static const struct drm_encoder_helper_funcs dce_v11_0_dac_helper_funcs = {
- .dpms = amdgpu_atombios_encoder_dpms,
- .mode_fixup = amdgpu_atombios_encoder_mode_fixup,
- .prepare = dce_v11_0_encoder_prepare,
- .mode_set = dce_v11_0_encoder_mode_set,
- .commit = dce_v11_0_encoder_commit,
- .detect = amdgpu_atombios_encoder_dac_detect,
-};
-
-static void dce_v11_0_encoder_destroy(struct drm_encoder *encoder)
-{
- struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
- kfree(amdgpu_encoder->enc_priv);
- drm_encoder_cleanup(encoder);
- kfree(amdgpu_encoder);
-}
-
-static const struct drm_encoder_funcs dce_v11_0_encoder_funcs = {
- .destroy = dce_v11_0_encoder_destroy,
-};
-
-static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
- uint32_t encoder_enum,
- uint32_t supported_device,
- u16 caps)
-{
- struct drm_device *dev = adev_to_drm(adev);
- struct drm_encoder *encoder;
- struct amdgpu_encoder *amdgpu_encoder;
-
- /* see if we already added it */
- list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
- amdgpu_encoder = to_amdgpu_encoder(encoder);
- if (amdgpu_encoder->encoder_enum == encoder_enum) {
- amdgpu_encoder->devices |= supported_device;
- return;
- }
-
- }
-
- /* add a new one */
- amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
- if (!amdgpu_encoder)
- return;
-
- encoder = &amdgpu_encoder->base;
- switch (adev->mode_info.num_crtc) {
- case 1:
- encoder->possible_crtcs = 0x1;
- break;
- case 2:
- default:
- encoder->possible_crtcs = 0x3;
- break;
- case 3:
- encoder->possible_crtcs = 0x7;
- break;
- case 4:
- encoder->possible_crtcs = 0xf;
- break;
- case 5:
- encoder->possible_crtcs = 0x1f;
- break;
- case 6:
- encoder->possible_crtcs = 0x3f;
- break;
- }
-
- amdgpu_encoder->enc_priv = NULL;
-
- amdgpu_encoder->encoder_enum = encoder_enum;
- amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
- amdgpu_encoder->devices = supported_device;
- amdgpu_encoder->rmx_type = RMX_OFF;
- amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
- amdgpu_encoder->is_ext_encoder = false;
- amdgpu_encoder->caps = caps;
-
- switch (amdgpu_encoder->encoder_id) {
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
- case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
- amdgpu_encoder->rmx_type = RMX_FULL;
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
- } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- } else {
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
- }
- drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs);
- break;
- case ENCODER_OBJECT_ID_SI170B:
- case ENCODER_OBJECT_ID_CH7303:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
- case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
- case ENCODER_OBJECT_ID_TITFP513:
- case ENCODER_OBJECT_ID_VT1623:
- case ENCODER_OBJECT_ID_HDMI_SI1930:
- case ENCODER_OBJECT_ID_TRAVIS:
- case ENCODER_OBJECT_ID_NUTMEG:
- /* these are handled by the primary encoders */
- amdgpu_encoder->is_ext_encoder = true;
- if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_LVDS, NULL);
- else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_DAC, NULL);
- else
- drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
- DRM_MODE_ENCODER_TMDS, NULL);
- drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs);
- break;
- }
-}
-
-static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
- .bandwidth_update = &dce_v11_0_bandwidth_update,
- .vblank_get_counter = &dce_v11_0_vblank_get_counter,
- .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
- .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
- .hpd_sense = &dce_v11_0_hpd_sense,
- .hpd_set_polarity = &dce_v11_0_hpd_set_polarity,
- .hpd_get_gpio_reg = &dce_v11_0_hpd_get_gpio_reg,
- .page_flip = &dce_v11_0_page_flip,
- .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos,
- .add_encoder = &dce_v11_0_encoder_add,
- .add_connector = &amdgpu_connector_add,
-};
-
-static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
-{
- adev->mode_info.funcs = &dce_v11_0_display_funcs;
-}
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {
- .set = dce_v11_0_set_crtc_irq_state,
- .process = dce_v11_0_crtc_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_pageflip_irq_funcs = {
- .set = dce_v11_0_set_pageflip_irq_state,
- .process = dce_v11_0_pageflip_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = {
- .set = dce_v11_0_set_hpd_irq_state,
- .process = dce_v11_0_hpd_irq,
-};
-
-static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev)
-{
- if (adev->mode_info.num_crtc > 0)
- adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
- else
- adev->crtc_irq.num_types = 0;
- adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs;
-
- adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
- adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs;
-
- adev->hpd_irq.num_types = adev->mode_info.num_hpd;
- adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs;
-}
-
-const struct amdgpu_ip_block_version dce_v11_0_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 0,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
-
-const struct amdgpu_ip_block_version dce_v11_2_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_DCE,
- .major = 11,
- .minor = 2,
- .rev = 0,
- .funcs = &dce_v11_0_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 7f85ba5b726f..acc887a58518 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -23,6 +23,7 @@
#include <linux/pci.h>
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_modeset_helper.h>
#include <drm/drm_modeset_helper_vtables.h>
@@ -39,18 +40,25 @@
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
+#include "dce_v6_0.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
-#include "dce_v6_0.h"
+
#include "si_enums.h"
static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev);
@@ -58,31 +66,31 @@ static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static const u32 crtc_offsets[6] =
{
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET
};
static const u32 hpd_offsets[] =
{
- mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ HPD0_REGISTER_OFFSET,
+ HPD1_REGISTER_OFFSET,
+ HPD2_REGISTER_OFFSET,
+ HPD3_REGISTER_OFFSET,
+ HPD4_REGISTER_OFFSET,
+ HPD5_REGISTER_OFFSET
};
static const uint32_t dig_offsets[] = {
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET,
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET,
(0x13830 - 0x7030) >> 2,
};
@@ -205,9 +213,9 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev,
/* update the scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)crtc_base);
-
/* post the write */
RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
}
@@ -217,11 +225,11 @@ static int dce_v6_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
{
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
+
*vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
*position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
return 0;
-
}
/**
@@ -241,7 +249,8 @@ static bool dce_v6_0_hpd_sense(struct amdgpu_device *adev,
if (hpd >= adev->mode_info.num_hpd)
return connected;
- if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) & DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+ if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+ DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
connected = true;
return connected;
@@ -272,6 +281,21 @@ static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev,
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
}
+static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
/**
* dce_v6_0_hpd_init - hpd setup callback.
*
@@ -311,6 +335,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
continue;
}
+ dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -353,13 +378,41 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
return mmDC_GPIO_HPD_A;
}
+static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
+{
+ u32 crtc_hung = 0;
+ u32 crtc_status[6];
+ u32 i, j, tmp;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
+ crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ crtc_hung |= (1 << i);
+ }
+ }
+
+ for (j = 0; j < 10; j++) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (crtc_hung & (1 << i)) {
+ tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ if (tmp != crtc_status[i])
+ crtc_hung &= ~(1 << i);
+ }
+ }
+ if (crtc_hung == 0)
+ return false;
+ udelay(100);
+ }
+
+ return true;
+}
+
static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
bool render)
{
if (!render)
WREG32(mmVGA_RENDER_CONTROL,
- RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
-
+ RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK);
}
static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
@@ -402,7 +455,6 @@ void dce_v6_0_disable_dce(struct amdgpu_device *adev)
static void dce_v6_0_program_fmt(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -845,7 +897,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
priority_a_cnt = 0;
priority_b_cnt = 0;
@@ -878,8 +930,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
wm_high.dram_channels = dram_channels;
wm_high.num_heads = num_heads;
- if (adev->pm.dpm_enabled) {
/* watermark for low clocks */
+ if (adev->pm.dpm_enabled) {
wm_low.yclk =
amdgpu_dpm_get_mclk(adev, true) * 10;
wm_low.sclk =
@@ -906,9 +958,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v6_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v6_0_latency_watermark(&wm_high), 65535);
/* set for low clocks */
- latency_watermark_b = min(dce_v6_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v6_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -959,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
/* select wm A */
arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
tmp = arb_control3;
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(1);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
/* select wm B */
tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(2);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
@@ -982,13 +1034,26 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/* watermark setup */
+/**
+ * dce_v6_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ * @other_mode: the display mode of another display controller
+ * that may be sharing the line buffer
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
struct amdgpu_crtc *amdgpu_crtc,
struct drm_display_mode *mode,
@@ -1023,7 +1088,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
}
WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
- DC_LB_MEMORY_CONFIG(tmp));
+ (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT));
WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
(buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
@@ -1200,7 +1265,7 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1240,6 +1305,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 offset;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
@@ -1261,6 +1327,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
};
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->pin->offset;
+
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
@@ -1275,14 +1346,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
return;
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
+ u32 value = 0;
u8 stereo_freqs = 0;
int max_channels = -1;
int j;
@@ -1292,12 +1363,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (sad->format == eld_reg_to_type[i][1]) {
if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
+ value = (sad->channels <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+ (sad->byte2 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+ (sad->freq <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
max_channels = sad->channels;
}
@@ -1308,13 +1379,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
}
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+ value |= (stereo_freqs <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+ WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
}
kfree(sads);
-
}
static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
@@ -1330,13 +1401,13 @@ static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
static const u32 pin_offsets[7] =
{
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v6_0_audio_init(struct amdgpu_device *adev)
@@ -1369,6 +1440,8 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)
adev->mode_info.audio.pin[i].connected = false;
adev->mode_info.audio.pin[i].offset = pin_offsets[i];
adev->mode_info.audio.pin[i].id = i;
+ /* disable audio. it will be set up later */
+ /* XXX remove once we switch to ip funcs */
dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
@@ -1377,17 +1450,12 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)
static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1818,7 +1886,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_bo *abo;
uint64_t fb_location, tiling_flags;
uint32_t fb_format, fb_pitch_pixels, pipe_config;
- u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE);
+ u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
u32 viewport_w, viewport_h;
int r;
bool bypass_lut = false;
@@ -1844,6 +1912,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -1857,76 +1926,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
switch (target_fb->format->format) {
case DRM_FORMAT_C8:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) |
- GRPH_FORMAT(GRPH_FORMAT_INDEXED));
+ fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
break;
case DRM_FORMAT_XRGB4444:
case DRM_FORMAT_ARGB4444:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB4444));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB1555:
case DRM_FORMAT_ARGB1555:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB1555));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_BGRX5551:
case DRM_FORMAT_BGRA5551:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA5551));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_RGB565:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB565));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB8888:
case DRM_FORMAT_ARGB8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB2101010:
case DRM_FORMAT_ARGB2101010:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB2101010));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_BGRX1010102:
case DRM_FORMAT_BGRA1010102:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA1010102));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
- fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
- GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
default:
@@ -1944,18 +2013,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
- fb_format |= GRPH_NUM_BANKS(num_banks);
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1);
- fb_format |= GRPH_TILE_SPLIT(tile_split);
- fb_format |= GRPH_BANK_WIDTH(bankw);
- fb_format |= GRPH_BANK_HEIGHT(bankh);
- fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect);
+ fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+ fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+ fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+ fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+ fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+ fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1);
+ fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
}
pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
- fb_format |= GRPH_PIPE_CONFIG(pipe_config);
+ fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
dce_v6_0_vga_enable(crtc, false);
@@ -1971,7 +2040,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
@@ -2039,14 +2108,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
- INTERLEAVE_EN);
+ DATA_FORMAT__INTERLEAVE_EN_MASK);
else
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
}
static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -2056,15 +2124,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+ ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+ (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
- (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+ ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+ (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
@@ -2091,19 +2159,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
}
WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
- (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
- ICON_DEGAMMA_MODE(0) |
- (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+ ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
- (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+ ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+ (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
- (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+ ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+ (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+ ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+ (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
/* XXX match this to the depth of the crtc fmt block, move to modeset? */
WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
@@ -2198,8 +2266,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
-
}
static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
@@ -2216,7 +2282,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
CUR_CONTROL__CURSOR_EN_MASK |
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
}
static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
@@ -2304,6 +2369,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2526,7 +2592,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct drm_encoder *encoder;
@@ -2583,6 +2648,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v6_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v6_0_panic_flush,
+};
+
static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2610,13 +2701,14 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v6_0_early_init(void *handle)
+static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg;
@@ -2645,11 +2737,10 @@ static int dce_v6_0_early_init(void *handle)
return 0;
}
-static int dce_v6_0_sw_init(void *handle)
+static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- bool ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2692,8 +2783,7 @@ static int dce_v6_0_sw_init(void *handle)
return r;
}
- ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
- if (ret)
+ if (amdgpu_atombios_get_connector_info_from_object_table(adev))
amdgpu_display_print_display_setup(adev_to_drm(adev));
else
return -EINVAL;
@@ -2724,11 +2814,11 @@ static int dce_v6_0_sw_init(void *handle)
return r;
}
-static int dce_v6_0_sw_fini(void *handle)
+static int dce_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2741,10 +2831,10 @@ static int dce_v6_0_sw_fini(void *handle)
return 0;
}
-static int dce_v6_0_hw_init(void *handle)
+static int dce_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v6_0_set_vga_render_state(adev, false);
@@ -2764,10 +2854,10 @@ static int dce_v6_0_hw_init(void *handle)
return 0;
}
-static int dce_v6_0_hw_fini(void *handle)
+static int dce_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v6_0_hpd_fini(adev);
@@ -2782,9 +2872,9 @@ static int dce_v6_0_hw_fini(void *handle)
return 0;
}
-static int dce_v6_0_suspend(void *handle)
+static int dce_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2793,18 +2883,18 @@ static int dce_v6_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v6_0_hw_fini(handle);
+ return dce_v6_0_hw_fini(ip_block);
}
-static int dce_v6_0_resume(void *handle)
+static int dce_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v6_0_hw_init(handle);
+ ret = dce_v6_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2819,19 +2909,35 @@ static int dce_v6_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v6_0_is_idle(void *handle)
+static bool dce_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v6_0_wait_for_idle(void *handle)
+static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
+ u32 srbm_soft_reset = 0, tmp;
+ struct amdgpu_device *adev = ip_block->adev;
-static int dce_v6_0_soft_reset(void *handle)
-{
- DRM_INFO("xxxx: dce_v6_0_soft_reset --- no impl!!\n");
+ if (dce_v6_0_is_display_hung(adev))
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
return 0;
}
@@ -2848,22 +2954,22 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (crtc) {
case 0:
- reg_block = SI_CRTC0_REGISTER_OFFSET;
+ reg_block = CRTC0_REGISTER_OFFSET;
break;
case 1:
- reg_block = SI_CRTC1_REGISTER_OFFSET;
+ reg_block = CRTC1_REGISTER_OFFSET;
break;
case 2:
- reg_block = SI_CRTC2_REGISTER_OFFSET;
+ reg_block = CRTC2_REGISTER_OFFSET;
break;
case 3:
- reg_block = SI_CRTC3_REGISTER_OFFSET;
+ reg_block = CRTC3_REGISTER_OFFSET;
break;
case 4:
- reg_block = SI_CRTC4_REGISTER_OFFSET;
+ reg_block = CRTC4_REGISTER_OFFSET;
break;
case 5:
- reg_block = SI_CRTC5_REGISTER_OFFSET;
+ reg_block = CRTC5_REGISTER_OFFSET;
break;
default:
DRM_DEBUG("invalid crtc %d\n", crtc);
@@ -2873,12 +2979,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask &= ~VBLANK_INT_MASK;
+ interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
case AMDGPU_IRQ_STATE_ENABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask |= VBLANK_INT_MASK;
+ interrupt_mask |= INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
default:
@@ -2893,28 +2999,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
-static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned hpd,
enum amdgpu_interrupt_state state)
{
u32 dc_hpd_int_cntl;
- if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl &= ~DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl |= DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
default:
break;
@@ -2923,7 +3029,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -2983,7 +3089,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
- WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
+ WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -2994,7 +3100,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
break;
case 1: /* vline */
if (disp_int & interrupt_status_offsets[crtc].vline)
- WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK);
+ WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -3008,7 +3114,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3059,7 +3165,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
"AMDGPU_FLIP_SUBMITTED(%d)\n",
amdgpu_crtc->pflip_status,
@@ -3088,7 +3194,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t disp_int, mask, tmp;
+ uint32_t disp_int, mask;
unsigned hpd;
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
@@ -3101,24 +3207,21 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
mask = interrupt_status_offsets[hpd].hpd;
if (disp_int & mask) {
- tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
- tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+ dce_v6_0_hpd_int_ack(adev, hpd);
schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
return 0;
-
}
-static int dce_v6_0_set_clockgating_state(void *handle,
+static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v6_0_set_powergating_state(void *handle,
+static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3127,7 +3230,6 @@ static int dce_v6_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.name = "dce_v6_0",
.early_init = dce_v6_0_early_init,
- .late_init = NULL,
.sw_init = dce_v6_0_sw_init,
.sw_fini = dce_v6_0_sw_fini,
.hw_init = dce_v6_0_hw_init,
@@ -3135,18 +3237,15 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.suspend = dce_v6_0_suspend,
.resume = dce_v6_0_resume,
.is_idle = dce_v6_0_is_idle,
- .wait_for_idle = dce_v6_0_wait_for_idle,
.soft_reset = dce_v6_0_soft_reset,
.set_clockgating_state = dce_v6_0_set_clockgating_state,
.set_powergating_state = dce_v6_0_set_powergating_state,
};
-static void
-dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3166,7 +3265,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
{
-
struct amdgpu_device *adev = drm_to_adev(encoder->dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
@@ -3206,7 +3304,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -3217,7 +3314,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3244,8 +3340,7 @@ static void dce_v6_0_ext_commit(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
@@ -3257,8 +3352,7 @@ static void dce_v6_0_ext_disable(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
+static void dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
{
}
@@ -3329,7 +3423,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
amdgpu_encoder->devices |= supported_device;
return;
}
-
}
/* add a new one */
@@ -3436,17 +3529,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
- .set = dce_v6_0_set_crtc_interrupt_state,
+ .set = dce_v6_0_set_crtc_irq_state,
.process = dce_v6_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
- .set = dce_v6_0_set_pageflip_interrupt_state,
+ .set = dce_v6_0_set_pageflip_irq_state,
.process = dce_v6_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
- .set = dce_v6_0_set_hpd_interrupt_state,
+ .set = dce_v6_0_set_hpd_irq_state,
.process = dce_v6_0_hpd_irq,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index f2b3cb5ed6be..2ccd6aad8dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -21,6 +21,7 @@
*
*/
+#include <drm/drm_edid.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_modeset_helper.h>
#include <drm/drm_modeset_helper_vtables.h>
@@ -264,6 +265,21 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev,
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
}
+static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
+ int hpd)
+{
+ u32 tmp;
+
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
+ return;
+ }
+
+ tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
/**
* dce_v8_0_hpd_init - hpd setup callback.
*
@@ -303,6 +319,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
continue;
}
+ dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -975,7 +992,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
(u32)mode->clock);
line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
(u32)mode->clock);
- line_time = min(line_time, (u32)65535);
+ line_time = min_t(u32, line_time, 65535);
/* watermark for high clocks */
if (adev->pm.dpm_enabled) {
@@ -1005,7 +1022,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
wm_high.num_heads = num_heads;
/* set for high clocks */
- latency_watermark_a = min(dce_v8_0_latency_watermark(&wm_high), (u32)65535);
+ latency_watermark_a = min_t(u32, dce_v8_0_latency_watermark(&wm_high), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1044,7 +1061,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
wm_low.num_heads = num_heads;
/* set for low clocks */
- latency_watermark_b = min(dce_v8_0_latency_watermark(&wm_low), (u32)65535);
+ latency_watermark_b = min_t(u32, dce_v8_0_latency_watermark(&wm_low), 65535);
/* possibly force display priority to high */
/* should really do this at mode validation time... */
@@ -1079,8 +1096,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
/* save values for DPM */
amdgpu_crtc->line_time = line_time;
- amdgpu_crtc->wm_high = latency_watermark_a;
- amdgpu_crtc->wm_low = latency_watermark_b;
+
/* Save number of lines the linebuffer leads before the scanout */
amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
@@ -1255,7 +1271,7 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
+ sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
if (sad_count < 0) {
DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
sad_count = 0;
@@ -1323,7 +1339,7 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
}
- sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
+ sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
if (sad_count <= 0)
@@ -1378,13 +1394,13 @@ static void dce_v8_0_audio_enable(struct amdgpu_device *adev,
}
static const u32 pin_offsets[7] = {
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v8_0_audio_init(struct amdgpu_device *adev)
@@ -1426,17 +1442,12 @@ static int dce_v8_0_audio_init(struct amdgpu_device *adev)
static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
{
- int i;
-
if (!amdgpu_audio)
return;
if (!adev->mode_info.audio.enabled)
return;
- for (i = 0; i < adev->mode_info.audio.num_pins; i++)
- dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
-
adev->mode_info.audio.enabled = false;
}
@@ -1811,6 +1822,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
+ abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@@ -2303,6 +2315,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
+ aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
@@ -2594,6 +2607,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v8_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+}
+
+static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v8_0_panic_flush,
+};
+
static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2621,13 +2659,14 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v8_0_early_init(void *handle)
+static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg;
@@ -2661,10 +2700,10 @@ static int dce_v8_0_early_init(void *handle)
return 0;
}
-static int dce_v8_0_sw_init(void *handle)
+static int dce_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2745,11 +2784,11 @@ static int dce_v8_0_sw_init(void *handle)
return 0;
}
-static int dce_v8_0_sw_fini(void *handle)
+static int dce_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->mode_info.bios_hardcoded_edid);
+ drm_edid_free(adev->mode_info.bios_hardcoded_edid);
drm_kms_helper_poll_fini(adev_to_drm(adev));
@@ -2763,10 +2802,10 @@ static int dce_v8_0_sw_fini(void *handle)
return 0;
}
-static int dce_v8_0_hw_init(void *handle)
+static int dce_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v8_0_set_vga_render_state(adev, false);
@@ -2786,10 +2825,10 @@ static int dce_v8_0_hw_init(void *handle)
return 0;
}
-static int dce_v8_0_hw_fini(void *handle)
+static int dce_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v8_0_hpd_fini(adev);
@@ -2804,9 +2843,9 @@ static int dce_v8_0_hw_fini(void *handle)
return 0;
}
-static int dce_v8_0_suspend(void *handle)
+static int dce_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2816,18 +2855,18 @@ static int dce_v8_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v8_0_hw_fini(handle);
+ return dce_v8_0_hw_fini(ip_block);
}
-static int dce_v8_0_resume(void *handle)
+static int dce_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v8_0_hw_init(handle);
+ ret = dce_v8_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2842,20 +2881,15 @@ static int dce_v8_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v8_0_is_idle(void *handle)
+static bool dce_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v8_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v8_0_soft_reset(void *handle)
+static int dce_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v8_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -2981,7 +3015,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
}
-static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -2989,7 +3023,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
u32 dc_hpd_int_cntl;
if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ DRM_DEBUG("invalid hpd %d\n", type);
return 0;
}
@@ -3011,7 +3045,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3096,7 +3130,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3176,7 +3210,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t disp_int, mask, tmp;
+ uint32_t disp_int, mask;
unsigned hpd;
if (entry->src_data[0] >= adev->mode_info.num_hpd) {
@@ -3189,9 +3223,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
mask = interrupt_status_offsets[hpd].hpd;
if (disp_int & mask) {
- tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
- tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+ dce_v8_0_hpd_int_ack(adev, hpd);
schedule_delayed_work(&adev->hotplug_work, 0);
DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
@@ -3200,13 +3232,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
}
-static int dce_v8_0_set_clockgating_state(void *handle,
+static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v8_0_set_powergating_state(void *handle,
+static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3215,7 +3247,6 @@ static int dce_v8_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.name = "dce_v8_0",
.early_init = dce_v8_0_early_init,
- .late_init = NULL,
.sw_init = dce_v8_0_sw_init,
.sw_fini = dce_v8_0_sw_fini,
.hw_init = dce_v8_0_hw_init,
@@ -3223,7 +3254,6 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.suspend = dce_v8_0_suspend,
.resume = dce_v8_0_resume,
.is_idle = dce_v8_0_is_idle,
- .wait_for_idle = dce_v8_0_wait_for_idle,
.soft_reset = dce_v8_0_soft_reset,
.set_clockgating_state = dce_v8_0_set_clockgating_state,
.set_powergating_state = dce_v8_0_set_powergating_state,
@@ -3511,17 +3541,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
- .set = dce_v8_0_set_crtc_interrupt_state,
+ .set = dce_v8_0_set_crtc_irq_state,
.process = dce_v8_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
- .set = dce_v8_0_set_pageflip_interrupt_state,
+ .set = dce_v8_0_set_pageflip_irq_state,
.process = dce_v8_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
- .set = dce_v8_0_set_hpd_interrupt_state,
+ .set = dce_v8_0_set_hpd_irq_state,
.process = dce_v8_0_hpd_irq,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 5dfab80ffff2..cd298556f7a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -70,6 +70,8 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
int fb_channel_number;
fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+ if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number))
+ fb_channel_number = 0;
return df_v1_7_channel_number[fb_channel_number];
}
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 483a441b46aa..621aeca53880 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -254,8 +254,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
static void df_v3_6_sw_fini(struct amdgpu_device *adev)
{
-
- device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
new file mode 100644
index 000000000000..2a573e33908b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_15.h"
+
+#include "df/df_4_15_offset.h"
+#include "df/df_4_15_sh_mask.h"
+
+static void df_v4_15_hw_init(struct amdgpu_device *adev)
+{
+ if (adev->have_atomics_support) {
+ uint32_t tmp;
+ uint32_t dis_lcl_proc = (1 << 1 |
+ 1 << 2 |
+ 1 << 13);
+
+ tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1);
+ tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT);
+ WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp);
+ }
+}
+
+const struct amdgpu_df_funcs df_v4_15_funcs = {
+ .hw_init = df_v4_15_hw_init
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
new file mode 100644
index 000000000000..dddf2422112a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_15_H__
+#define __DF_V4_15_H__
+
+extern const struct amdgpu_df_funcs df_v4_15_funcs;
+
+#endif /* __DF_V4_15_H__ */
+
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
new file mode 100644
index 000000000000..a47960a0babd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_6_2.h"
+
+static bool df_v4_6_2_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /* return true since related regs are inaccessible */
+ return true;
+}
+
+const struct amdgpu_df_funcs df_v4_6_2_funcs = {
+ .query_ras_poison_mode = df_v4_6_2_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
index 0d878ca3acba..3bc3e6d216e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,12 +21,11 @@
*
*/
-#ifndef __DCE_V11_0_H__
-#define __DCE_V11_0_H__
+#ifndef __DF_V4_6_2_H__
+#define __DF_V4_6_2_H__
-extern const struct amdgpu_ip_block_version dce_v11_0_ip_block;
-extern const struct amdgpu_ip_block_version dce_v11_2_ip_block;
+#include "soc15_common.h"
-void dce_v11_0_disable_dce(struct amdgpu_device *adev);
+extern const struct amdgpu_df_funcs df_v4_6_2_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9032d7a24d7c..8841d7213de4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,11 +40,11 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
-#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
/*
@@ -102,6 +102,11 @@
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580
#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish 0x0105
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish_BASE_IDX 1
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish 0x0106
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish_BASE_IDX 1
+
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025
#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1
#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026
@@ -271,6 +276,210 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+ /* gfx header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+};
+
static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
@@ -3493,15 +3702,23 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid);
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -3551,14 +3768,8 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq)
{
- struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
- if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
- amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
- return;
- }
-
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(action) |
@@ -3607,12 +3818,65 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
}
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+ uint32_t tmp;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -3627,7 +3891,7 @@ static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_rlc_spm_10_0_nv10,
@@ -3650,7 +3914,10 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
golden_settings_gc_10_1,
@@ -3808,38 +4075,23 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *f = NULL;
unsigned int index;
uint64_t gpu_addr;
- volatile uint32_t *cpu_ptr;
+ uint32_t *cpu_ptr;
long r;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t padding, offset;
-
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- padding = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
-
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
- *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r)
- return r;
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- cpu_ptr = &adev->wb.wb[index];
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
- r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -3866,12 +4118,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -3891,7 +4141,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
{
adev->gfx.cp_fw_write_wait = false;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 1):
@@ -3942,7 +4192,7 @@ static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
@@ -3954,7 +4204,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
- char fw_name[40];
+ char fw_name[53];
char ucode_prefix[30];
const char *wks = "";
int err;
@@ -3964,41 +4214,41 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) &&
- (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) &&
+ (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
wks = "_wks";
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
if (!amdgpu_sriov_vf(adev)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
- /* don't check this. There are apparently firmwares in the wild with
- * incorrect size in the header
- */
- if (err == -ENODEV)
- goto out;
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
- dev_dbg(adev->dev,
- "gfx10: amdgpu_ucode_request() failed \"%s\"\n",
- fw_name);
+ goto out;
+
+ /* don't validate this firmware. There are apparently firmwares
+ * in the wild with incorrect size in the header
+ */
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
@@ -4007,15 +4257,17 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
@@ -4023,8 +4275,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = 0;
adev->gfx.mec2_fw = NULL;
}
- amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
- amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
gfx_v10_0_check_fw_write_wait(adev);
out:
@@ -4072,12 +4322,9 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -4085,39 +4332,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4144,7 +4367,7 @@ static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
reg_access_ctrl->spare_int =
SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid);
@@ -4358,7 +4581,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -4485,13 +4708,57 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio, NULL);
}
-static int gfx_v10_0_sw_init(void *handle)
+static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -4499,7 +4766,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 1, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -4513,8 +4780,8 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4528,6 +4795,74 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 101 &&
+ adev->gfx.pfp_fw_version >= 158 &&
+ adev->gfx.mec_fw_version >= 151) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 14 &&
+ adev->gfx.pfp_fw_version >= 17 &&
+ adev->gfx.mec_fw_version >= 24) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 9 &&
+ adev->gfx.mec_fw_version >= 12) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
/* KIQ event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
@@ -4543,6 +4878,13 @@ static int gfx_v10_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -4577,7 +4919,7 @@ static int gfx_v10_0_sw_init(void *handle)
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue;
@@ -4610,19 +4952,25 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
- if (!adev->enable_mes_kiq) {
- r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
- if (r) {
- DRM_ERROR("Failed to init KIQ BOs!\n");
- return r;
- }
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
- kiq = &adev->gfx.kiq[0];
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
- if (r)
- return r;
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
}
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+
r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
if (r)
return r;
@@ -4638,6 +4986,12 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_gpu_early_init(adev);
+ gfx_v10_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -4662,10 +5016,10 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.me.me_fw_ptr);
}
-static int gfx_v10_0_sw_fini(void *handle)
+static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -4674,10 +5028,10 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_gfx_mqd_sw_fini(adev, 0);
- if (!adev->enable_mes_kiq) {
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
- amdgpu_gfx_kiq_fini(adev, 0);
- }
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
@@ -4689,6 +5043,11 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
gfx_v10_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
@@ -4749,9 +5108,12 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
@@ -4779,7 +5141,7 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
/* for ASICs that integrates GFX v10.3
* pa_sc_tile_steering_override should be set to 0
*/
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
return 0;
/* init num_sc */
@@ -4960,7 +5322,7 @@ static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
/* TCCs are global (not instanced). */
uint32_t tcc_disable;
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) {
tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) |
RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3);
} else {
@@ -4978,7 +5340,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
@@ -5010,26 +5373,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
}
+static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp;
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
-
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
- enable ? 1 : 0);
-
- WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
}
static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
@@ -5037,7 +5448,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
@@ -5666,12 +6077,12 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- if (adev->job_hang && !enable)
+ if (amdgpu_in_reset(adev) && !enable)
return 0;
for (i = 0; i < adev->usec_timeout; i++) {
@@ -5740,7 +6151,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -5818,7 +6229,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -5895,7 +6306,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -6057,7 +6468,7 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
}
WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -6116,7 +6527,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -6154,7 +6565,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
+ /* Set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -6190,7 +6601,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
if (enable) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -6206,7 +6617,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
break;
}
} else {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -6270,7 +6681,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -6306,7 +6717,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* tell RLC which is KIQ queue */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -6318,17 +6729,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
@@ -6434,13 +6841,13 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
return 0;
}
-static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6457,11 +6864,18 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
/* restore mqd with the backup copy */
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
*ring->wptr_cpu_addr = 0;
@@ -6474,22 +6888,9 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_gfx_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
return r;
}
@@ -6575,8 +6976,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
mqd->cp_hqd_pq_control = tmp;
@@ -6735,7 +7137,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -6758,19 +7160,19 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
}
-static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v10_compute_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6779,11 +7181,11 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
@@ -6795,55 +7197,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v10_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v10_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+ false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
@@ -6865,10 +7236,7 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
return r;
}
- if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
- r = amdgpu_mes_kiq_hw_init(adev);
- else
- r = gfx_v10_0_kiq_resume(adev);
+ r = gfx_v10_0_kiq_resume(adev);
if (r)
return r;
@@ -6917,7 +7285,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
* check if mmVGT_ESGS_RING_SIZE_UMD
* has been remapped to mmVGT_ESGS_RING_SIZE
*/
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 4):
@@ -6966,7 +7334,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
*/
WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -7103,25 +7471,26 @@ static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
}
-static int gfx_v10_0_hw_init(void *handle)
+static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
/**
* For gfx 10, rlc firmware loading relies on smu firmware is
* loaded firstly, so in direct type, it has to load smc ucode
* here before rlc.
*/
- if (!(adev->flags & AMD_IS_APU)) {
- r = amdgpu_pm_load_smu_firmware(adev, NULL);
- if (r)
- return r;
- }
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
gfx_v10_0_disable_gpa_mode(adev);
}
@@ -7139,30 +7508,40 @@ static int gfx_v10_0_hw_init(void *handle)
* init golden registers and rlc resume may override some registers,
* reconfig them here
*/
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 10) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
gfx_v10_0_tcp_harvest(adev);
r = gfx_v10_0_cp_resume(adev);
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
gfx_v10_3_program_pbb_mode(adev);
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev))
gfx_v10_3_set_power_brake_sequence(adev);
return r;
}
-static int gfx_v10_0_hw_fini(void *handle)
+static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+
+ /* WA added for Vangogh asic fixing the SMU suspend failure
+ * It needs to set power gating again during gfxoff control
+ * otherwise the gfxoff disallowing will be failed to set.
+ */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
@@ -7187,19 +7566,19 @@ static int gfx_v10_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v10_0_suspend(void *handle)
+static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_fini(handle);
+ return gfx_v10_0_hw_fini(ip_block);
}
-static int gfx_v10_0_resume(void *handle)
+static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_init(handle);
+ return gfx_v10_0_hw_init(ip_block);
}
-static bool gfx_v10_0_is_idle(void *handle)
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -7208,11 +7587,11 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
}
-static int gfx_v10_0_wait_for_idle(void *handle)
+static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -7226,11 +7605,11 @@ static int gfx_v10_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v10_0_soft_reset(void *handle)
+static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -7255,7 +7634,7 @@ static int gfx_v10_0_soft_reset(void *handle)
/* GRBM_STATUS2 */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -7288,19 +7667,17 @@ static int gfx_v10_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v10_0_cp_compute_enable(adev, false);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -7312,7 +7689,23 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock, clock_lo, clock_hi, hi_check;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 3):
+ case IP_VERSION(10, 1, 4):
+ preempt_disable();
+ clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+ /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+ * roughly every 42 seconds.
+ */
+ if (hi_check != clock_hi) {
+ clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+ clock_hi = hi_check;
+ }
+ preempt_enable();
+ clock = clock_lo | (clock_hi << 32ULL);
+ break;
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
@@ -7393,13 +7786,13 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v10_0_early_init(void *handle)
+static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -7437,9 +7830,9 @@ static int gfx_v10_0_early_init(void *handle)
return gfx_v10_0_init_microcode(adev);
}
-static int gfx_v10_0_late_init(void *handle)
+static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -7450,6 +7843,10 @@ static int gfx_v10_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
return 0;
}
@@ -7470,7 +7867,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
data = RLC_SAFE_MODE__CMD_MASK;
data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -7508,7 +7905,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
uint32_t data;
data = RLC_SAFE_MODE__CMD_MASK;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -7819,7 +8216,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d
mmCGTS_SA1_QUAD1_SM_CTRL_REG
};
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
tcp_ctrl_regs_nv12[i];
@@ -7864,9 +8261,12 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === CGCG + CGLS === */
gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)))
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 10)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 1)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 1, 2)))
gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
} else {
/* CGCG/CGLS should be disabled before MGCG/MGLS
@@ -7897,25 +8297,27 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid)
{
- u32 reg, data;
+ u32 reg, pre_data, data;
- /* not for *_SOC15 */
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ /* not for *_SOC15 */
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+ }
}
-static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
{
amdgpu_gfx_off_ctrl(adev, false);
@@ -7973,7 +8375,7 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
* Power/performance team will optimize it and might give a new value later.
*/
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 6):
@@ -8025,16 +8427,16 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -8063,15 +8465,15 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -8092,9 +8494,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_FGCG */
@@ -8161,45 +8563,17 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always being used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
- lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
- upper_32_bits(ring->wptr));
- }
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
@@ -8224,42 +8598,13 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG(); /* only DOORBELL method supported on gfx10 now */
- }
+ BUG(); /* only DOORBELL method supported on gfx10 now */
}
}
@@ -8282,7 +8627,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -8318,10 +8663,6 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x400000;
-
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -8341,10 +8682,6 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x40000000;
-
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@@ -8402,8 +8739,7 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+ amdgpu_ring_write(ring, 0);
}
static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -8431,10 +8767,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
- if (ring->is_mes_queue)
- gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
- else
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -8511,34 +8844,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
}
-static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned int ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
-{
- unsigned int cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -8596,19 +8918,9 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v10_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ce_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v10_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- }
+ offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -8634,28 +8946,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
void *de_payload_cpu_addr;
int cnt;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gds_backup) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
- AMDGPU_CSA_SIZE - adev->gds.gds_size,
- PAGE_SIZE);
- }
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -8750,19 +9047,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
-static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned int vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- WREG32_SOC15(GC, 0, mmSQ_CMD, value);
-}
-
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -8907,49 +9191,34 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
int i;
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
- uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: CP EOP\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
-
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
- } else {
- me_id = (entry->ring_id & 0x0c) >> 2;
- pipe_id = (entry->ring_id & 0x03) >> 0;
- queue_id = (entry->ring_id & 0x70) >> 4;
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
- switch (me_id) {
- case 0:
- if (pipe_id == 0)
- amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
- else
- amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- /* Per-queue interrupt is supported for MEC starting from VI.
- * The interrupt can only be enabled/disabled per pipe instead
- * of per queue.
- */
- if ((ring->me == me_id) &&
- (ring->pipe == pipe_id) &&
- (ring->queue == queue_id))
- amdgpu_fence_process(ring);
- }
- break;
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
}
+ break;
}
return 0;
@@ -8960,12 +9229,39 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -8974,17 +9270,75 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -9008,8 +9362,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
case 0:
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
drm_sched_fault(&ring->sched);
}
break;
@@ -9036,6 +9390,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -9130,6 +9493,319 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ u32 tmp;
+ u64 addr;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ addr = amdgpu_bo_gpu_offset(ring->mqd_obj) +
+ offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active);
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (ring->pipe == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue);
+
+ gfx_v10_0_ring_emit_wreg(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
+ gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0,
+ lower_32_bits(addr), upper_32_bits(addr),
+ 0, 1, 0x20);
+ gfx_v10_0_ring_emit_reg_wait(kiq_ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_kgq_init_queue(ring, true);
+ if (r) {
+ DRM_ERROR("fail to init kgq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+ return r;
+ }
+
+ r = gfx_v10_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_10_1[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ nv_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_10[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -9146,6 +9822,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.set_clockgating_state = gfx_v10_0_set_clockgating_state,
.set_powergating_state = gfx_v10_0_set_powergating_state,
.get_clockgating_state = gfx_v10_0_get_clockgating_state,
+ .dump_ip_state = gfx_v10_ip_dump,
+ .print_ip_state = gfx_v10_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
@@ -9162,7 +9840,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -9178,7 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9188,19 +9867,21 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v10_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -9220,7 +9901,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9230,12 +9912,16 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+ .reset = gfx_v10_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -9253,7 +9939,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
@@ -9291,6 +9976,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
.process = gfx_v10_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = {
+ .set = gfx_v10_0_set_bad_op_fault_state,
+ .process = gfx_v10_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
.set = gfx_v10_0_set_priv_inst_fault_state,
.process = gfx_v10_0_priv_inst_irq,
@@ -9312,13 +10002,16 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
}
static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 3):
@@ -9435,10 +10128,14 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
bitmap = i * adev->gfx.config.max_sh_per_se + j;
- if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6)) ||
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) &&
+ if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 0)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 3)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 6)) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(10, 3, 7))) &&
((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
continue;
mask = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
new file mode 100644
index 000000000000..f67569ccf9f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_10_1_10 */
+static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbf068100, 0xbf840023,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020102,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803000,
+ 0xbe813000, 0xbe823000,
+ 0xbe833000, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+/* Define the cleaner shader gfx_10_3_0 */
+static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020002,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803080,
+ 0xbe813080, 0xbe823080,
+ 0xbe833080, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
new file mode 100644
index 000000000000..54f7ed9e2801
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 256 Dwords cleaner shader.
+
+// GFX10.1 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10.1)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1)
+
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, s0
+ s_movreld_b32 s1, s0
+ s_movreld_b32 s2, s0
+ s_movreld_b32 s3, s0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
new file mode 100644
index 000000000000..0e1c246166c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// GFX10.3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 762d7a19f1be..66c47c466532 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -29,7 +29,6 @@
#include "amdgpu_gfx.h"
#include "amdgpu_psp.h"
#include "amdgpu_smu.h"
-#include "amdgpu_atomfirmware.h"
#include "imu_v11_0.h"
#include "soc21.h"
#include "nvd.h"
@@ -42,13 +41,15 @@
#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
#include "soc15.h"
-#include "soc15d.h"
#include "clearstate_gfx11.h"
#include "v11_structs.h"
#include "gfx_v11_0.h"
+#include "gfx_v11_0_cleaner_shader.h"
#include "gfx_v11_0_3.h"
#include "nbio_v4_3.h"
#include "mes_v11_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
#define GFX11_NUM_GFX_RINGS 1
#define GFX11_MEC_HPD_SIZE 2048
@@ -60,11 +61,32 @@
#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e
#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
+#define regPC_CONFIG_CNTL_1 0x194d
+#define regPC_CONFIG_CNTL_1_BASE_IDX 1
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
@@ -82,6 +104,200 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
{
@@ -130,13 +346,20 @@ static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -265,7 +488,10 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
soc15_program_register_sequence(adev,
@@ -275,6 +501,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
default:
break;
}
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_11_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
}
static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
@@ -310,6 +540,21 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
amdgpu_ring_write(ring, inv); /* poll interval */
}
+static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -358,7 +603,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct dma_fence *f = NULL;
unsigned index;
uint64_t gpu_addr;
- volatile uint32_t *cpu_ptr;
+ uint32_t *cpu_ptr;
long r;
/* MES KIQ fw hasn't indirect buffer support for now */
@@ -368,33 +613,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t padding, offset;
-
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- padding = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
-
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
- *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r)
- return r;
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- cpu_ptr = &adev->wb.wb[index];
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
- r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -421,12 +651,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -444,10 +672,10 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *
{
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
- char fw_name[40];
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
if (err)
goto out;
@@ -465,7 +693,7 @@ out:
static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
@@ -486,8 +714,7 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
{
- char fw_name[40];
- char ucode_prefix[30];
+ char ucode_prefix[25];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -496,9 +723,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
@@ -514,8 +741,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", ucode_prefix);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -527,8 +755,19 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
}
if (!amdgpu_sriov_vf(adev)) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+ adev->pdev->revision == 0xCE)
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/gc_11_0_0_rlc_1.bin");
+ else if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
@@ -539,8 +778,9 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
if (err)
goto out;
if (adev->gfx.rs64_enable) {
@@ -561,6 +801,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.mec2_fw = NULL;
gfx_v11_0_check_fw_cp_gfx_shadow(adev);
+
+ if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ DRM_ERROR("Failed to init imu firmware!\n");
+ return err;
+ }
+
out:
if (err) {
amdgpu_ucode_release(&adev->gfx.pfp_fw);
@@ -602,12 +850,9 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -615,39 +860,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
@@ -696,7 +917,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -828,14 +1049,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
#define MQD_FWWORKAREA_SIZE 484
#define MQD_FWWORKAREA_ALIGNMENT 256
-static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info)
{
- if (adev->gfx.cp_gfx_shadow) {
- shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
- shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
- shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
- shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
return 0;
} else {
memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
@@ -856,8 +1084,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
{
-
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
adev->gfx.config.max_hw_contexts = 8;
@@ -876,6 +1103,10 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -893,9 +1124,9 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
- int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
+ unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@@ -905,6 +1136,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->ring_obj = NULL;
ring->use_doorbell = true;
+ if (adev->gfx.disable_kq) {
+ ring->no_scheduler = true;
+ ring->no_user_submission = true;
+ }
if (!ring_id)
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
@@ -914,11 +1149,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
- r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
- if (r)
- return r;
- return 0;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
}
static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
@@ -1295,30 +1529,69 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v11_0_sw_init(void *handle)
+static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- int i, j, k, r, ring_id = 0;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, j, k, r, ring_id;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
- adev->gfxhub.funcs->init(adev);
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
- adev->gfx.mec.num_mec = 2;
- adev->gfx.mec.num_pipe_per_mec = 4;
- adev->gfx.mec.num_queue_per_pipe = 4;
- break;
- case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -1333,9 +1606,118 @@ static int gfx_v11_0_sw_init(void *handle)
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2420 &&
+ adev->gfx.pfp_fw_version >= 2580 &&
+ adev->gfx.mec_fw_version >= 2650 &&
+ adev->mes.fw_version[0] >= 120) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* add firmware version checks here */
+ if (0 && !adev->gfx.disable_uq) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 2280 &&
+ adev->gfx.pfp_fw_version >= 2370 &&
+ adev->gfx.mec_fw_version >= 2450 &&
+ adev->mes.fw_version[0] >= 99) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.pfp_fw_version >= 102 &&
+ adev->gfx.mec_fw_version >= 66 &&
+ adev->mes.fw_version[0] >= 128) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 26 &&
+ adev->mes.fw_version[0] >= 114) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 12 &&
+ adev->gfx.pfp_fw_version >= 15 &&
+ adev->gfx.mec_fw_version >= 15) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 7 &&
+ adev->gfx.pfp_fw_version >= 8 &&
+ adev->gfx.mec_fw_version >= 8) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
- amdgpu_sriov_is_pp_one_vf(adev))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
+ amdgpu_sriov_is_pp_one_vf(adev))
adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
/* EOP Event */
@@ -1345,6 +1727,13 @@ static int gfx_v11_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
@@ -1368,14 +1757,6 @@ static int gfx_v11_0_sw_init(void *handle)
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
- if (adev->gfx.imu.funcs) {
- if (adev->gfx.imu.funcs->init_microcode) {
- r = adev->gfx.imu.funcs->init_microcode(adev);
- if (r)
- DRM_ERROR("Failed to load imu firmware!\n");
- }
- }
-
gfx_v11_0_me_init(adev);
r = gfx_v11_0_rlc_init(adev);
@@ -1390,41 +1771,69 @@ static int gfx_v11_0_sw_init(void *handle)
return r;
}
- /* set up the gfx ring */
- for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
- continue;
-
- r = gfx_v11_0_gfx_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
- ring_id++;
+ if (adev->gfx.num_gfx_rings) {
+ ring_id = 0;
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
}
}
}
- ring_id = 0;
- /* set up the compute queues - allocate horizontally across pipes */
- for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
- for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
- k, j))
- continue;
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
- r = gfx_v11_0_compute_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
+ r = gfx_v11_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
- ring_id++;
+ ring_id++;
+ }
}
}
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 2280) &&
+ (adev->gfx.mec_fw_version >= 2410) &&
+ !amdgpu_sriov_vf(adev)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ }
+
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
if (r) {
@@ -1432,8 +1841,7 @@ static int gfx_v11_0_sw_init(void *handle)
return r;
}
- kiq = &adev->gfx.kiq[0];
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
}
@@ -1458,6 +1866,12 @@ static int gfx_v11_0_sw_init(void *handle)
return -EINVAL;
}
+ gfx_v11_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -1490,10 +1904,10 @@ static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.rlc_autoload_ptr);
}
-static int gfx_v11_0_sw_fini(void *handle)
+static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -1507,6 +1921,8 @@ static int gfx_v11_0_sw_fini(void *handle)
amdgpu_gfx_kiq_fini(adev, 0);
}
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
gfx_v11_0_pfp_fini(adev);
gfx_v11_0_me_fini(adev);
gfx_v11_0_rlc_fini(adev);
@@ -1517,6 +1933,12 @@ static int gfx_v11_0_sw_fini(void *handle)
gfx_v11_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
+
return 0;
}
@@ -1586,6 +2008,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
{
+ u32 rb_bitmap_per_sa;
u32 rb_bitmap_width_per_sa;
u32 max_sa;
u32 active_sa_bitmap;
@@ -1603,12 +2026,14 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se;
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
for (i = 0; i < max_sa; i++) {
if (active_sa_bitmap & (1 << i))
- active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
@@ -1647,8 +2072,10 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
@@ -1732,26 +2159,74 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
gfx_v11_0_init_gds_vmid(adev);
}
+static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
bool enable)
{
- u32 tmp;
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
if (amdgpu_sriov_vf(adev))
return;
- tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
-
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
- enable ? 1 : 0);
- tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
- enable ? 1 : 0);
-
- WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
}
static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
@@ -2033,7 +2508,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -2077,7 +2552,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -2122,7 +2597,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -2562,8 +3037,14 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
for (i = 0; i < adev->usec_timeout; i++) {
cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(11, 0, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
bootload_status = RREG32_SOC15(GC, 0,
regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
else
@@ -2752,7 +3233,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
@@ -2970,7 +3451,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
@@ -3256,7 +3737,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -3294,7 +3775,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
+ /* Set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -3564,9 +4045,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
@@ -3584,6 +4063,24 @@ static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
(adev->doorbell_index.userqueue_end * 2) << 2);
}
+static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v11_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@@ -3601,25 +4098,22 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
mqd->cp_gfx_mqd_control = tmp;
/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
- /* set up default queue priority level
- * 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
- tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
- mqd->cp_gfx_hqd_queue_priority = tmp;
+ /* set up gfx queue priority */
+ gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
mqd->cp_gfx_hqd_quantum = tmp;
@@ -3641,16 +4135,20 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+ if (!prop->kernel_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
mqd->cp_gfx_hqd_cntl = tmp;
/* set up cp_doorbell_control */
- tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -3662,21 +4160,31 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_rb_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
/* active the queue */
mqd->cp_gfx_hqd_active = 1;
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+ mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
-static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
+static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v11_gfx_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.gfx_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -3684,11 +4192,11 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore mqd with the backup copy */
if (adev->gfx.me.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
*ring->wptr_cpu_addr = 0;
@@ -3701,22 +4209,9 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_gfx_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
return r;
}
@@ -3748,14 +4243,14 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
mqd->cp_hqd_eop_control = tmp;
/* enable doorbell? */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -3784,7 +4279,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
- tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ tmp = regCP_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
mqd->cp_mqd_control = tmp;
@@ -3794,15 +4289,20 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+ prop->allow_tunneling);
+ if (prop->kernel_queue) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ }
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
@@ -3819,7 +4319,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = 0;
/* enable the doorbell if requested */
if (prop->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -3834,17 +4334,17 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
/* set the vmid for the queue */
mqd->cp_hqd_vmid = 0;
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
mqd->cp_hqd_persistent_state = tmp;
/* set MIN_IB_AVAIL_SIZE */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
@@ -3854,6 +4354,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_active = prop->hqd_active;
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -3977,7 +4481,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
/* reset MQD to a clean status */
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
@@ -4000,19 +4504,19 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.kiq[0].mqd_backup)
- memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
}
return 0;
}
-static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
{
struct amdgpu_device *adev = ring->adev;
struct v11_compute_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -4021,11 +4525,11 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
mutex_unlock(&adev->srbm_mutex);
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else {
/* restore MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
- memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset ring buffer */
ring->wptr = 0;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
@@ -4037,57 +4541,24 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v11_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
if (!amdgpu_async_gfx_ring)
gfx_v11_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
@@ -4140,11 +4611,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
return r;
}
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
+ if (adev->gfx.disable_kq) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we don't want to set ring->ready */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+ }
+ if (amdgpu_async_gfx_ring)
+ amdgpu_gfx_disable_kgq(adev, 0);
+ } else {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
@@ -4172,13 +4655,14 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
if (r)
return r;
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
- value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
- false : true;
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
adev->gfxhub.funcs->set_fault_enable_default(adev, value);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+ /* TODO investigate why this and the hdp flush above is needed,
+ * are we missing a flush somewhere else? */
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
return 0;
}
@@ -4251,21 +4735,24 @@ static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
}
-static int gfx_v11_0_hw_init(void *handle)
+static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
if (adev->gfx.imu.funcs) {
/* RLC autoload sequence 1: Program rlc ram */
if (adev->gfx.imu.funcs->program_rlc_ram)
adev->gfx.imu.funcs->program_rlc_ram(adev);
+ /* rlc autoload firmware */
+ r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
}
- /* rlc autoload firmware */
- r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
- if (r)
- return r;
} else {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
@@ -4314,11 +4801,9 @@ static int gfx_v11_0_hw_init(void *handle)
* loaded firstly, so in direct type, it has to load smc ucode
* here before rlc.
*/
- if (!(adev->flags & AMD_IS_APU)) {
- r = amdgpu_pm_load_smu_firmware(adev, NULL);
- if (r)
- return r;
- }
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
}
gfx_v11_0_constants_init(adev);
@@ -4343,18 +4828,70 @@ static int gfx_v11_0_hw_init(void *handle)
if (r)
return r;
+ /* get IMU version from HW if it's not set */
+ if (!adev->gfx.imu_fw_version)
+ adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
+
return r;
}
-static int gfx_v11_0_hw_fini(void *handle)
+static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v11_0_set_userq_eop_interrupts(adev, false);
if (!adev->no_hw_access) {
- if (amdgpu_async_gfx_ring) {
+ if (amdgpu_async_gfx_ring &&
+ !adev->gfx.disable_kq) {
if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
@@ -4383,19 +4920,19 @@ static int gfx_v11_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v11_0_suspend(void *handle)
+static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v11_0_hw_fini(handle);
+ return gfx_v11_0_hw_fini(ip_block);
}
-static int gfx_v11_0_resume(void *handle)
+static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v11_0_hw_init(handle);
+ return gfx_v11_0_hw_init(ip_block);
}
-static bool gfx_v11_0_is_idle(void *handle)
+static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -4404,11 +4941,11 @@ static bool gfx_v11_0_is_idle(void *handle)
return true;
}
-static int gfx_v11_0_wait_for_idle(void *handle)
+static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -4422,12 +4959,46 @@ static int gfx_v11_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v11_0_soft_reset(void *handle)
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- int i, j, k;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r, i, j, k;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
@@ -4436,16 +5007,11 @@ static int gfx_v11_0_soft_reset(void *handle)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
- gfx_v11_0_set_safe_mode(adev, 0);
-
+ mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
- WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+ soc21_grbm_select(adev, i, k, j, 0);
WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
@@ -4455,16 +5021,23 @@ static int gfx_v11_0_soft_reset(void *handle)
for (i = 0; i < adev->gfx.me.num_me; ++i) {
for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
- WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+ soc21_grbm_select(adev, i, k, j, 0);
WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
}
}
}
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ r = gfx_v11_0_request_gfx_index_mutex(adev, true);
+ if (r) {
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+ return r;
+ }
WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
@@ -4474,6 +5047,14 @@ static int gfx_v11_0_soft_reset(void *handle)
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ /* release the gfx mutex */
+ r = gfx_v11_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+ if (r) {
+ DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+ return r;
+ }
+
for (i = 0; i < adev->usec_timeout; i++) {
if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
!RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
@@ -4536,15 +5117,15 @@ static int gfx_v11_0_soft_reset(void *handle)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
- gfx_v11_0_unset_safe_mode(adev, 0);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return gfx_v11_0_cp_resume(adev);
}
-static bool gfx_v11_0_check_soft_reset(void *handle)
+static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
long tmo = msecs_to_jiffies(1000);
@@ -4565,12 +5146,13 @@ static bool gfx_v11_0_check_soft_reset(void *handle)
return false;
}
-static int gfx_v11_0_post_soft_reset(void *handle)
+static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
/**
* GFX soft reset will impact MES, need resume MES when do GFX soft reset
*/
- return amdgpu_mes_resume((struct amdgpu_device *)handle);
+ return amdgpu_mes_resume(adev);
}
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
@@ -4631,15 +5213,40 @@ static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v11_0_early_init(void *handle)
+static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
- adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
- AMDGPU_MAX_COMPUTE_RINGS);
+ if (adev->gfx.disable_kq) {
+ /* We need one GFX ring temporarily to set up
+ * the clear state.
+ */
+ adev->gfx.num_gfx_rings = 1;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
gfx_v11_0_set_kiq_pm4_funcs(adev);
gfx_v11_0_set_ring_funcs(adev);
@@ -4654,9 +5261,9 @@ static int gfx_v11_0_early_init(void *handle)
return gfx_v11_0_init_microcode(adev);
}
-static int gfx_v11_0_late_init(void *handle)
+static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4667,6 +5274,14 @@ static int gfx_v11_0_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
return 0;
}
@@ -4951,27 +5566,35 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
- u32 reg, data;
+ u32 reg, pre_data, data;
amdgpu_gfx_off_ctrl(adev, false);
-
reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32(reg);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
-
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+ }
amdgpu_gfx_off_ctrl(adev, true);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && (pre_data != data)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
}
static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
@@ -5001,9 +5624,13 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
// Program RLC_PG_DELAY3 for CGPG hysteresis
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
break;
default:
@@ -5021,16 +5648,16 @@ static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
-static int gfx_v11_0_set_powergating_state(void *handle,
+static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
@@ -5038,6 +5665,10 @@ static int gfx_v11_0_set_powergating_state(void *handle,
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
if (!enable)
amdgpu_gfx_off_ctrl(adev, false);
@@ -5054,20 +5685,24 @@ static int gfx_v11_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v11_0_set_clockgating_state(void *handle,
+static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -5078,9 +5713,9 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_MGCG */
@@ -5144,45 +5779,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always being used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
- lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
- upper_32_bits(ring->wptr));
- }
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
@@ -5207,42 +5814,14 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG(); /* only DOORBELL method supported on gfx11 now */
- }
+ BUG(); /* only DOORBELL method supported on gfx11 now */
}
}
@@ -5265,7 +5844,7 @@ static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -5300,10 +5879,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x400000;
-
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -5323,10 +5898,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x40000000;
-
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@@ -5364,11 +5935,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
PACKET3_RELEASE_MEM_GCR_GL2_WB |
- PACKET3_RELEASE_MEM_GCR_GL2_INV |
- PACKET3_RELEASE_MEM_GCR_GL2_US |
- PACKET3_RELEASE_MEM_GCR_GL1_INV |
- PACKET3_RELEASE_MEM_GCR_GLV_INV |
- PACKET3_RELEASE_MEM_GCR_GLM_INV |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
PACKET3_RELEASE_MEM_GCR_GLM_WB |
PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
@@ -5388,8 +5955,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+ amdgpu_ring_write(ring, 0);
}
static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -5417,10 +5983,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- if (ring->is_mes_queue)
- gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
- else
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -5428,6 +5991,12 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
}
+
+ /* Make sure that we can't skip the SET_Q_MODE packets when the VM
+ * changed in any way.
+ */
+ ring->set_q_mode_offs = 0;
+ ring->set_q_mode_ptr = NULL;
}
static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
@@ -5477,16 +6046,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
}
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
+ ret = ring->wptr & ring->buf_mask;
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
u64 shadow_va, u64 csa_va,
u64 gds_va, bool init_shadow,
int vmid)
{
struct amdgpu_device *adev = ring->adev;
+ unsigned int offs, end;
- if (!adev->gfx.cp_gfx_shadow)
+ if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
return;
+ /*
+ * The logic here isn't easy to understand because we need to keep state
+ * accross multiple executions of the function as well as between the
+ * CPU and GPU. The general idea is that the newly written GPU command
+ * has a condition on the previous one and only executed if really
+ * necessary.
+ */
+
+ /*
+ * The dw in the NOP controls if the next SET_Q_MODE packet should be
+ * executed or not. Reserve 64bits just to be on the save side.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
+ offs = ring->wptr & ring->buf_mask;
+
+ /*
+ * We start with skipping the prefix SET_Q_MODE and always executing
+ * the postfix SET_Q_MODE packet. This is changed below with a
+ * WRITE_DATA command when the postfix executed.
+ */
+ amdgpu_ring_write(ring, shadow_va ? 1 : 0);
+ amdgpu_ring_write(ring, 0);
+
+ if (ring->set_q_mode_offs) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += ring->set_q_mode_offs << 2;
+ end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
+ }
+
+ /*
+ * When the postfix SET_Q_MODE packet executes we need to make sure that the
+ * next prefix SET_Q_MODE packet executes as well.
+ */
+ if (!shadow_va) {
+ uint64_t addr;
+
+ addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+ addr += offs << 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 0x1);
+ }
+
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
amdgpu_ring_write(ring, lower_32_bits(shadow_va));
amdgpu_ring_write(ring, upper_32_bits(shadow_va));
@@ -5498,33 +6132,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
amdgpu_ring_write(ring, init_shadow ?
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
-}
-static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
-{
- unsigned ret;
+ if (ring->set_q_mode_offs)
+ amdgpu_ring_patch_cond_exec(ring, end);
- amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
- ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ if (shadow_va) {
+ uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
- return ret;
-}
+ /*
+ * If the tokens match try to skip the last postfix SET_Q_MODE
+ * packet to avoid saving/restoring the state all the time.
+ */
+ if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
+ *ring->set_q_mode_ptr = 0;
-static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
+ ring->set_q_mode_token = token;
+ } else {
+ ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
+ }
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+ ring->set_q_mode_offs = offs;
}
static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
@@ -5535,6 +6162,9 @@ static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
struct amdgpu_ring *kiq_ring = &kiq->ring;
unsigned long flags;
+ if (adev->enable_mes)
+ return -EINVAL;
+
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@@ -5582,28 +6212,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
void *de_payload_cpu_addr;
int cnt;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gds_backup) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
- AMDGPU_CSA_SIZE - adev->gds.gds_size,
- PAGE_SIZE);
- }
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -5690,19 +6305,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask, 0x20);
}
-static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- WREG32_SOC15(GC, 0, regSQ_CMD, value);
-}
-
static void
gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -5840,25 +6442,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- int i;
+ u32 doorbell_offset = entry->src_data[0];
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
- uint32_t mes_queue_id = entry->src_data[0];
+ int i;
DRM_DEBUG("IH: CP EOP\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -5893,15 +6493,42 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -5910,17 +6537,75 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
- unsigned type,
+ unsigned int type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
- PRIV_INSTR_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -5940,27 +6625,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
- switch (me_id) {
- case 0:
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- /* we only enabled 1 gfx queue per pipe for now */
- if (ring->me == me_id && ring->pipe == pipe_id)
- drm_sched_fault(&ring->sched);
- }
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
}
- break;
- default:
- BUG();
- break;
}
}
@@ -5973,6 +6660,15 @@ static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v11_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -6059,6 +6755,426 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+
+static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
+ * so the pipe reset status relies on the later gfx ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ if (r) {
+
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v11_reset_gfx_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kgq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kgq\n");
+ return r;
+ }
+
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec1 fw pc: CP_MEC1_INSTR_PNTR */
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
+ * reset status relies on the compute ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r = 0;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v11_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v11_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kcq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_11_0[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i &&
+ gc_cp_reg_list_11[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0,
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ soc21_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_11[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.name = "gfx_v11_0",
.early_init = gfx_v11_0_early_init,
@@ -6077,6 +7193,8 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.set_clockgating_state = gfx_v11_0_set_clockgating_state,
.set_powergating_state = gfx_v11_0_set_powergating_state,
.get_clockgating_state = gfx_v11_0_get_clockgating_state,
+ .dump_ip_state = gfx_v11_ip_dump,
+ .print_ip_state = gfx_v11_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
@@ -6088,13 +7206,14 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
- .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ .emit_frame_size = /* totally 247 maximum if 16 IBs */
+ 5 + /* update_spm_vmid */
5 + /* COND_EXEC */
- 9 + /* SET_Q_PREEMPTION_MODE */
+ 22 + /* SET_Q_PREEMPTION_MODE */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
5 + /* COND_EXEC */
@@ -6103,8 +7222,10 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
31 + /* DE_META */
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
+ 22 + /* SET_Q_PREEMPTION_MODE */
8 + 8 + /* FENCE x2 */
- 8, /* gfx_v11_0_emit_mem_sync */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
.emit_fence = gfx_v11_0_ring_emit_fence,
@@ -6114,19 +7235,21 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
.test_ring = gfx_v11_0_ring_test_ring,
.test_ib = gfx_v11_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v11_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v11_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v11_0_ring_soft_recovery,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
@@ -6138,6 +7261,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
.get_wptr = gfx_v11_0_ring_get_wptr_compute,
.set_wptr = gfx_v11_0_ring_set_wptr_compute,
.emit_frame_size =
+ 5 + /* update_spm_vmid */
20 + /* gfx_v11_0_ring_emit_gds_switch */
7 + /* gfx_v11_0_ring_emit_hdp_flush */
5 + /* hdp invalidate */
@@ -6146,7 +7270,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v11_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v11_0_emit_mem_sync */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
.emit_ib = gfx_v11_0_ring_emit_ib_compute,
.emit_fence = gfx_v11_0_ring_emit_fence,
@@ -6156,12 +7281,16 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
.test_ring = gfx_v11_0_ring_test_ring,
.test_ib = gfx_v11_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v11_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
+ .reset = gfx_v11_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
@@ -6179,7 +7308,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
7 + /* gfx_v11_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v11_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
.emit_ib = gfx_v11_0_ring_emit_ib_compute,
@@ -6217,6 +7345,11 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
.process = gfx_v11_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
+ .set = gfx_v11_0_set_bad_op_fault_state,
+ .process = gfx_v11_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
.set = gfx_v11_0_set_priv_inst_fault_state,
.process = gfx_v11_0_priv_inst_irq,
@@ -6234,6 +7367,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
@@ -6345,6 +7481,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
mask = 1;
counter = 0;
gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
index 10cfc29c27c9..157a5c812259 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
@@ -26,4 +26,7 @@
extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block;
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
index 26d6286d86c9..999bb3cc88b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
@@ -69,7 +69,7 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
} else {
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
- adev->virt.ops->ras_poison_handler(adev);
+ adev->virt.ops->ras_poison_handler(adev, ras_if->block);
else
dev_warn(adev->dev,
"No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name);
@@ -85,6 +85,7 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) &&
(entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) &&
!entry->vmid && !entry->pasid) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
uint32_t rlc_status0 = 0;
rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0);
@@ -96,7 +97,8 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}
- amdgpu_ras_reset_gpu(adev);
+ if (con && !amdgpu_ras_is_rma(adev))
+ amdgpu_ras_reset_gpu(adev);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
new file mode 100644
index 000000000000..9b90b66368c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// Navi3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+shader main
+ asic(GFX11)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+
+ //
+ // CLEAR VGPRs
+ //
+ s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance)
+
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_sub_u32 m0, m0, 8
+ s_cbranch_scc0 label_0005
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
new file mode 100644
index 000000000000..3218cc04f543
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_11_0_3 */
+static const u32 gfx_11_0_3_cleaner_shader_hex[] = {
+ 0xb0804006, 0xbe8200ff,
+ 0x00000058, 0xbefd0080,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefd0002, 0x80828802,
+ 0xbfa1fff5, 0xbe8200ff,
+ 0x80000000, 0x8b020002,
+ 0xbfa10012, 0xbefe00c1,
+ 0xbeff00c1, 0xd71f0001,
+ 0x0001007f, 0xd7200001,
+ 0x0002027e, 0x16020288,
+ 0xbe8200bf, 0xbefd00c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd7006a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbfa1fff7, 0xbefd00ff,
+ 0x00000068, 0xbe804280,
+ 0xbe814280, 0xbe824280,
+ 0xbe834280, 0x80fd847d,
+ 0xbfa1fffa, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbfb00000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
new file mode 100644
index 000000000000..710ec9c34e43
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -0,0 +1,5793 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "imu_v12_0.h"
+#include "soc24.h"
+#include "nvd.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx12.h"
+#include "v12_structs.h"
+#include "gfx_v12_0.h"
+#include "nbif_v6_3_1.h"
+#include "mes_v12_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define GFX12_NUM_GFX_RINGS 1
+#define GFX12_MEC_HPD_SIZE 2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
+
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
+ SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
+ /* compute registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
+ /* gfx queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel);
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
+
+static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0);
+}
+
+static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = ring->wptr_gpu_addr;
+ uint32_t me = 0, eng_sel = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_COMPUTE:
+ me = 1;
+ eng_sel = 0;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ me = 0;
+ eng_sel = 4;
+ break;
+ case AMDGPU_RING_TYPE_MES:
+ me = 2;
+ eng_sel = 5;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((me)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+ amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+ return;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid,
+ uint32_t flush_type,
+ bool all_hub)
+{
+ gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v12_0_kiq_set_resources,
+ .kiq_map_queues = gfx_v12_0_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
+ .kiq_query_status = gfx_v12_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
+}
+
+static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref,
+ uint32_t mask, uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(scratch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+ gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+ } else {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, scratch -
+ PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ }
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ return r;
+}
+
+static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t *cpu_ptr;
+ long r;
+
+ /* MES KIQ fw hasn't indirect buffer support for now */
+ if (adev->enable_mes_kiq &&
+ ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ return 0;
+
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
+
+ r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ goto err2;
+ }
+
+ if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+err2:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
+{
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+ const struct psp_firmware_header_v1_0 *toc_hdr;
+ int err = 0;
+
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+ adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+ adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+ adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+ adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+ le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+ return 0;
+out:
+ amdgpu_ucode_release(&adev->psp.toc_fw);
+ return err;
+}
+
+static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", ucode_prefix);
+ if (err)
+ goto out;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+ if (err)
+ goto out;
+ }
+
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", ucode_prefix);
+ if (err)
+ goto out;
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+ amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
+
+ /* only one MEC for gfx 12 */
+ adev->gfx.mec2_fw = NULL;
+
+ if (adev->gfx.imu.funcs) {
+ if (adev->gfx.imu.funcs->init_microcode) {
+ err = adev->gfx.imu.funcs->init_microcode(adev);
+ if (err)
+ dev_err(adev->dev, "Failed to load imu firmware!\n");
+ }
+ }
+
+out:
+ if (err) {
+ amdgpu_ucode_release(&adev->gfx.pfp_fw);
+ amdgpu_ucode_release(&adev->gfx.me_fw);
+ amdgpu_ucode_release(&adev->gfx.rlc_fw);
+ amdgpu_ucode_release(&adev->gfx.mec_fw);
+ }
+
+ return err;
+}
+
+static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ count += 1;
+
+ for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext)
+ count += 2 + ext->reg_count;
+ } else
+ return 0;
+ }
+
+ return count;
+}
+
+static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+ u32 count = 0, clustercount = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ count += 1;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ if (sect->id == SECT_CONTEXT) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ clustercount++;
+ buffer[count++] = ext->reg_count;
+ buffer[count++] = ext->reg_index;
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ } else
+ return;
+ }
+
+ buffer[0] = clustercount;
+}
+
+static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+ struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+ reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+ reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+ reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+ reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+ reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+ reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+ reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+ reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx12_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+ return 0;
+}
+
+static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v12_0_me_init(struct amdgpu_device *adev)
+{
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ size_t mec_hpd_size;
+
+ bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
+
+ if (mec_hpd_size) {
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, mec_hpd_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
+ uint32_t xcc_id,
+ uint32_t simd, uint32_t wave,
+ uint32_t *dst, int *no_fields)
+{
+ /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 4 wave data */
+ dst[(*no_fields)++] = 4;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
+}
+
+static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
+ uint32_t xcc_id, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+ soc24_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+ return 0;
+ }
+
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -EINVAL;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v12_0_select_se_sh,
+ .read_wave_data = &gfx_v12_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
+ .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
+};
+
+static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
+{
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ int r;
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring;
+ unsigned int hw_prio;
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX12_MEC_HPD_SIZE);
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+ hw_prio, NULL);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static struct {
+ SOC24_FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+ unsigned int size_x16;
+} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
+
+#define RLC_TOC_OFFSET_DWUNIT 8
+#define RLC_SIZE_MULTIPLE 1024
+#define RLC_TOC_UMF_SIZE_inM 23ULL
+#define RLC_TOC_FORMAT_API 165ULL
+
+static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+ RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
+
+ while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
+ rlc_autoload_info[ucode->id].id = ucode->id;
+ rlc_autoload_info[ucode->id].offset =
+ ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
+ rlc_autoload_info[ucode->id].size =
+ ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
+ ucode->size * 4;
+ ucode++;
+ }
+}
+
+static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ SOC24_FIRMWARE_ID id;
+
+ gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+ for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
+ if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
+ total_size = RLC_TOC_UMF_SIZE_inM << 20;
+
+ return total_size;
+}
+
+static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v12_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ SOC24_FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+ void *data;
+ uint32_t size;
+ uint32_t *toc_ptr;
+
+ data = adev->psp.toc.start_addr;
+ size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
+
+ toc_ptr = (uint32_t *)data + size / 4 - 2;
+ *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
+ data, size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
+ const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+ uint16_t version_major, version_minor;
+
+ /* pfp ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
+ fw_data, fw_size);
+ /* me ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
+ fw_data, fw_size);
+ /* mec ucode */
+ cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ /* instruction */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
+ fw_data, fw_size);
+ /* data */
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cpv2_hdr->data_offset_bytes));
+ fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
+ fw_data, fw_size);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
+ fw_data, fw_size);
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size);
+
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2) {
+ if (version_minor >= 1) {
+ rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
+ fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
+ fw_data, fw_size);
+ }
+ if (version_minor >= 2) {
+ rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
+ fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
+ fw_data, fw_size);
+ }
+ }
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v3_0 *sdma_hdr;
+
+ sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+ le32_to_cpu(sdma_hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
+ fw_data, fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ unsigned fw_size;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ int pipe, ucode_id, data_id;
+
+ for (pipe = 0; pipe < 2; pipe++) {
+ if (pipe == 0) {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
+ } else {
+ ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
+ data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
+ }
+}
+
+static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size;
+ uint64_t gpu_addr;
+ uint32_t data;
+
+ /* RLC autoload sequence 2: copy ucode */
+ gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
+ gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
+
+ rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ /* RLC autoload sequence 3: load IMU fw */
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ /* RLC autoload sequence 4 init IMU fw */
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+
+ /* RLC autoload sequence 5 disable gpa mode */
+ gfx_v12_0_disable_gpa_mode(adev);
+ } else {
+ /* unhalt rlc to start autoload without imu */
+ data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
+ data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+
+ /* Allocate memory for gfx queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+ adev->gfx.ip_dump_gfx_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_gfx_queues = ptr;
+ }
+}
+
+static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int i, j, k, r, ring_id = 0;
+ unsigned num_compute_rings;
+ int xcc_id = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 2;
+ adev->gfx.mec.num_queue_per_pipe = 4;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2780 &&
+ adev->gfx.pfp_fw_version >= 2840 &&
+ adev->gfx.mec_fw_version >= 3050 &&
+ adev->mes.fw_version[0] >= 123) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (adev->gfx.me_fw_version >= 2480 &&
+ adev->gfx.pfp_fw_version >= 2530 &&
+ adev->gfx.mec_fw_version >= 2680 &&
+ adev->mes.fw_version[0] >= 100)
+ adev->gfx.enable_cleaner_shader = true;
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ /* recalculate compute rings to use based on hardware configuration */
+ num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe) / 2;
+ adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+ }
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+ GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v12_0_me_init(adev);
+
+ r = gfx_v12_0_rlc_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v12_0_mec_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ if (adev->gfx.num_gfx_rings) {
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v12_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+ 0, i, k, j))
+ continue;
+
+ r = gfx_v12_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+ }
+
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if ((adev->gfx.me_fw_version >= 2660) &&
+ (adev->gfx.mec_fw_version >= 2920) &&
+ !amdgpu_sriov_vf(adev)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (!adev->enable_mes_kiq) {
+ r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
+ if (r) {
+ dev_err(adev->dev, "Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0);
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v12_0_rlc_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_gpu_early_init(adev);
+ if (r)
+ return r;
+
+ gfx_v12_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int i;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+ if (!adev->enable_mes_kiq) {
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+ amdgpu_gfx_kiq_fini(adev, 0);
+ }
+
+ gfx_v12_0_pfp_fini(adev);
+ gfx_v12_0_me_fini(adev);
+ gfx_v12_0_rlc_fini(adev);
+ gfx_v12_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v12_0_rlc_autoload_buffer_fini(adev);
+
+ gfx_v12_0_free_microcode(adev);
+
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+ kfree(adev->gfx.ip_dump_gfx_queues);
+
+ return 0;
+}
+
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance, int xcc_id)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+ gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
+ gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+ GRBM_CC_GC_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
+ gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+ GRBM_GC_USER_SA_UNIT_DISABLE,
+ SA_DISABLE);
+ sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+ u32 rb_mask;
+
+ gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+ gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+ CC_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+ gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+ GC_USER_RB_BACKEND_DISABLE,
+ BACKEND_DISABLE);
+ rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+ adev->gfx.config.max_shader_engines);
+
+ return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
+{
+ u32 rb_bitmap_per_sa;
+ u32 rb_bitmap_width_per_sa;
+ u32 max_sa;
+ u32 active_sa_bitmap;
+ u32 global_active_rb_bitmap;
+ u32 active_rb_bitmap = 0;
+ u32 i;
+
+ /* query sa bitmap from SA_UNIT_DISABLE registers */
+ active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
+ /* query rb bitmap from RB_BACKEND_DISABLE registers */
+ global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
+
+ /* generate active rb bitmap according to active sa bitmap */
+ max_sa = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
+ for (i = 0; i < max_sa; i++) {
+ if (active_sa_bitmap & (1 << i))
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
+ }
+
+ active_rb_bitmap &= global_active_rb_bitmap;
+ adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+ adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define LDS_APP_BASE 0x1
+#define SCRATCH_APP_BASE 0x2
+
+static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_bases;
+ uint32_t data;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ SCRATCH_APP_BASE;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ soc24_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+ /* Enable trap for each kfd vmid. */
+ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+ data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+ WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ /* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
+{
+}
+
+static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v12_0_setup_rb(adev);
+ gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
+ gfx_v12_0_get_tcc_info(adev);
+ adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+ soc24_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+ if (i != 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v12_0_init_compute_vmid(adev);
+}
+
+static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ if (me != 0)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ default:
+ return 0;
+ }
+}
+
+static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
+static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp, cp_int_cntl_reg;
+ int i, j;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+ }
+ }
+ }
+}
+
+static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+ return 0;
+}
+
+static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ } else
+ rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v12_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ u32 tmp;
+
+ hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+ for (i = 0; i < fw_size; i++) {
+ if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+ msleep(1);
+ WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+ le32_to_cpup(fw_data++));
+ }
+
+ WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+ tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+ WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ version_major = le16_to_cpu(hdr->header.header_version_major);
+ version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+ if (version_major == 2) {
+ gfx_v12_0_load_rlcg_microcode(adev);
+ if (amdgpu_dpm == 1) {
+ if (version_minor >= 2)
+ gfx_v12_0_load_rlc_iram_dram_microcode(adev);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ gfx_v12_0_init_csb(adev);
+
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v12_0_rlc_enable_srm(adev);
+ } else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v12_0_init_csb(adev);
+ return 0;
+ }
+
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v12_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_init_csb(adev);
+
+ adev->gfx.rlc.funcs->start(adev);
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ uint32_t pipe_id, tmp;
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ /* config pfp program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (pfp_hdr->ucode_start_addr_hi << 30) |
+ (pfp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ pfp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset pfp pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear pfp pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config me program start addr */
+ for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (me_hdr->ucode_start_addr_hi << 30) |
+ (me_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ me_hdr->ucode_start_addr_hi>>2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset me pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* clear me pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* config mec program start addr */
+ for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+ soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ mec_hdr->ucode_start_addr_lo >> 2 |
+ mec_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ mec_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* reset mec pipe */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+ /* clear mec pipe reset */
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id, tmp;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+ (cp_hdr->ucode_start_addr_hi << 30) |
+ (cp_hdr->ucode_start_addr_lo >> 2));
+ WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_PFP_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id, tmp;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+ (cp_hdr->ucode_start_addr_hi << 30) |
+ (cp_hdr->ucode_start_addr_lo >> 2) );
+ WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi>>2);
+
+ /*
+ * Program CP_ME_CNTL to reset given PIPE to take
+ * effect of CP_ME_PRGRM_CNTR_START.
+ */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ /* Clear pfp pipe0 reset bit. */
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *cp_hdr;
+ unsigned pipe_id;
+
+ cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
+ soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+ cp_hdr->ucode_start_addr_lo >> 2 |
+ cp_hdr->ucode_start_addr_hi << 30);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+ cp_hdr->ucode_start_addr_hi >> 2);
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+ bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ if (amdgpu_emu_mode)
+ msleep(10);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ gfx_v12_0_set_pfp_ucode_start_addr(adev);
+ gfx_v12_0_set_me_ucode_start_addr(adev);
+ gfx_v12_0_set_mec_ucode_start_addr(adev);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *pfp_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+ /* 64kb align */
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+ gfx_v12_0_pfp_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.pfp.pfp_fw_data_obj,
+ &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+ gfx_v12_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_PFP_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+ /* Waiting for cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+ lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_pfp_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v2_0 *me_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ /* instruction */
+ fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+ /* data */
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+ /* 64kb align*/
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+ gfx_v12_0_me_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, fw_data_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.me.me_fw_data_obj,
+ &adev->gfx.me.me_fw_data_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+ gfx_v12_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+ memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+ if (amdgpu_emu_mode == 1)
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+ lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+ /*
+ * Programming any of the CP_ME_IC_BASE registers
+ * forces invalidation of the ME L1 I$. Wait for the
+ * invalidation complete
+ */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Prime the instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+ /* Waiting for instruction cache primed*/
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ ICACHE_PRIMED))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to prime instruction cache\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+ soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+ lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+ upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+ /* Invalidate the data caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_me_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+ return -EINVAL;
+
+ gfx_v12_0_cp_gfx_enable(adev, false);
+
+ r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ /* init the CP */
+ WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v12_0_cp_gfx_enable(adev, true);
+
+ return 0;
+}
+
+static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address whether it's enabled or not */
+ rptr_addr = ring->rptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+ gfx_v12_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v12_0_cp_gfx_start(adev);
+ return 0;
+}
+
+static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+ enable ? 0 : 1);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+ enable ? 1 : 0);
+ data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+ enable ? 0 : 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+
+ adev->gfx.kiq[0].ring.sched.ready = enable;
+
+ udelay(50);
+}
+
+static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v2_0 *mec_hdr;
+ const __le32 *fw_ucode, *fw_data;
+ u32 tmp, fw_ucode_size, fw_data_size;
+ u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+ u32 *fw_ucode_ptr, *fw_data_ptr;
+ int r;
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v12_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->ucode_offset_bytes));
+ fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->data_offset_bytes));
+ fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw_ucode_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ r = amdgpu_bo_create_reserved(adev,
+ ALIGN(fw_data_size, 64 * 1024) *
+ adev->gfx.mec.num_pipe_per_mec,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.mec.mec_fw_data_obj,
+ &adev->gfx.mec.mec_fw_data_gpu_addr,
+ (void **)&fw_data_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+ gfx_v12_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
+ }
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+ soc24_grbm_select(adev, 1, i, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ i * ALIGN(fw_data_size, 64 * 1024)));
+ WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+ i * ALIGN(fw_data_size, 64 * 1024)));
+
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+ lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+ }
+ mutex_unlock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+ INVALIDATE_DCACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ gfx_v12_0_set_mec_ucode_start_addr(adev);
+
+ return 0;
+}
+
+static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+ /* set graphics engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.gfx_ring0 * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+ /* set compute engine doorbell range */
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_gfx_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority */
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+
+ /* set up time quantum */
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+ if (!prop->kernel_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ *ring->wptr_cpu_addr = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ r = gfx_v12_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_gfx_enable_kgq(adev, 0);
+ if (r)
+ return r;
+
+ return gfx_v12_0_cp_gfx_start(adev);
+}
+
+static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_compute_mqd *mqd = m;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = prop->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+ if (prop->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(prop->queue_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ if (prop->kernel_queue) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ }
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (prop->use_doorbell) {
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, prop->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* set static priority for a compute queue/ring */
+ mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+ mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+ mqd->cp_hqd_active = prop->hqd_active;
+
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* inactivate the queue */
+ if (amdgpu_sriov_vf(adev))
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+ /* disable wptr polling */
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
+
+ gfx_v12_0_kiq_setting(ring);
+
+ if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v12_0_kiq_init_register(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+ amdgpu_ring_clear_ring(ring);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ gfx_v12_0_kiq_init_register(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ amdgpu_ring_init_mqd(ring);
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else {
+ /* restore MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
+{
+ gfx_v12_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+ adev->gfx.kiq[0].ring.sched.ready = true;
+ return 0;
+}
+
+static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ if (!amdgpu_async_gfx_ring)
+ gfx_v12_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ r = gfx_v12_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v12_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_cp_compute_load_microcode_rs64(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_cp_set_doorbell_range(adev);
+
+ if (amdgpu_async_gfx_ring) {
+ gfx_v12_0_cp_compute_enable(adev, true);
+ gfx_v12_0_cp_gfx_enable(adev, true);
+ }
+
+ if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+ r = amdgpu_mes_kiq_hw_init(adev);
+ else
+ r = gfx_v12_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v12_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v12_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v12_0_cp_gfx_enable(adev, enable);
+ gfx_v12_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ r = adev->gfxhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+ /* TODO investigate why this and the hdp flush above is needed,
+ * are we missing a flush somewhere else? */
+ adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+ return 0;
+}
+
+static int get_gb_addr_config(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+ if (gb_addr_config == 0)
+ return -EINVAL;
+
+ adev->gfx.config.gb_addr_config_fields.num_pkrs =
+ 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+ return 0;
+}
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+ data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+ data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+ data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+ WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0));
+
+ if (adev->rev_id == 0)
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_12_0_rev0,
+ (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0));
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ /* RLC autoload sequence 1: Program rlc ram */
+ if (adev->gfx.imu.funcs->program_rlc_ram)
+ adev->gfx.imu.funcs->program_rlc_ram(adev);
+ }
+ /* rlc autoload firmware */
+ r = gfx_v12_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ } else {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+ if (adev->gfx.imu.funcs->load_microcode)
+ adev->gfx.imu.funcs->load_microcode(adev);
+ if (adev->gfx.imu.funcs->setup_imu)
+ adev->gfx.imu.funcs->setup_imu(adev);
+ if (adev->gfx.imu.funcs->start_imu)
+ adev->gfx.imu.funcs->start_imu(adev);
+ }
+
+ /* disable gpa mode in backdoor loading */
+ gfx_v12_0_disable_gpa_mode(adev);
+ }
+ }
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ r = gfx_v12_0_wait_for_rlc_autoload_complete(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+ return r;
+ }
+ }
+
+ if (!amdgpu_emu_mode)
+ gfx_v12_0_init_golden_registers(adev);
+
+ adev->gfx.is_poweron = true;
+
+ if (get_gb_addr_config(adev))
+ DRM_WARN("Invalid gb_addr_config !\n");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ gfx_v12_0_config_gfx_rs64(adev);
+
+ r = gfx_v12_0_gfxhub_enable(adev);
+ if (r)
+ return r;
+
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT ||
+ adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) &&
+ (amdgpu_dpm == 1)) {
+ /**
+ * For gfx 12, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ r = amdgpu_pm_load_smu_firmware(adev, NULL);
+ if (r)
+ return r;
+ }
+
+ gfx_v12_0_constants_init(adev);
+
+ if (adev->nbio.funcs->gc_doorbell_init)
+ adev->nbio.funcs->gc_doorbell_init(adev);
+
+ r = gfx_v12_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v12_0_tcp_harvest(adev);
+
+ r = gfx_v12_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t tmp;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v12_0_set_userq_eop_interrupts(adev, false);
+
+ if (!adev->no_hw_access) {
+ if (amdgpu_async_gfx_ring) {
+ if (amdgpu_gfx_disable_kgq(adev, 0))
+ DRM_ERROR("KGQ disable failed\n");
+ }
+
+ if (amdgpu_gfx_disable_kcq(adev, 0))
+ DRM_ERROR("KCQ disable failed\n");
+
+ amdgpu_mes_kiq_hw_fini(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v12_0_cp_gfx_enable(adev, false);
+ /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+
+ return 0;
+ }
+ gfx_v12_0_cp_enable(adev, false);
+ gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+ adev->gfxhub.funcs->gart_disable(adev);
+
+ adev->gfx.is_poweron = false;
+
+ return 0;
+}
+
+static int gfx_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_0_hw_fini(ip_block);
+}
+
+static int gfx_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return gfx_v12_0_hw_init(ip_block);
+}
+
+static bool gfx_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock = 0;
+
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_gpu_clock_counter)
+ clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
+ else
+ dev_warn(adev->dev, "query gpu clock counter is not supported\n");
+
+ return clock;
+}
+
+static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
+
+ adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
+
+ if (adev->gfx.disable_kq) {
+ adev->gfx.num_gfx_rings = 0;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
+
+ gfx_v12_0_set_kiq_pm4_funcs(adev);
+ gfx_v12_0_set_ring_funcs(adev);
+ gfx_v12_0_set_irq_funcs(adev);
+ gfx_v12_0_set_rlc_funcs(adev);
+ gfx_v12_0_set_mqd_funcs(adev);
+ gfx_v12_0_set_imu_funcs(adev);
+
+ gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
+
+ return gfx_v12_0_init_microcode(adev);
+}
+
+static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+ RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ u32 reg, data;
+
+ reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ data = RREG32_NO_KIQ(reg);
+ else
+ data = RREG32(reg);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ if (amdgpu_sriov_is_pp_one_vf(adev))
+ WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+ else
+ WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+
+ if (ring
+ && amdgpu_sriov_is_pp_one_vf(adev)
+ && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+ uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+ amdgpu_ring_emit_wreg(ring, reg, data);
+ }
+}
+
+static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v12_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v12_0_set_safe_mode,
+ .unset_safe_mode = gfx_v12_0_unset_safe_mode,
+ .init = gfx_v12_0_rlc_init,
+ .get_csb_size = gfx_v12_0_get_csb_size,
+ .get_csb_buffer = gfx_v12_0_get_csb_buffer,
+ .resume = gfx_v12_0_rlc_resume,
+ .stop = gfx_v12_0_rlc_stop,
+ .reset = gfx_v12_0_rlc_reset,
+ .start = gfx_v12_0_rlc_start,
+ .update_spm_vmid = gfx_v12_0_update_spm_vmid,
+};
+
+#if 0
+static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+
+static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ /* TODO */
+}
+#endif
+
+static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
+ if (enable) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ /* unset CGCG override */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+ adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+ /* update CGCG override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+ data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ }
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+ data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ }
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+ data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+ data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+ data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
+ } else {
+ /* Program RLC_CGCG_CGLS_CTRL */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+ /* Program RLC_CGCG_CGLS_CTRL_3D */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+ }
+}
+
+static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
+ /* It is disabled by HW by default */
+ if (enable) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ } else {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+ }
+ }
+}
+
+static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+ if (enable)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ gfx_v12_0_update_coarse_grain_clock_gating(adev, enable);
+
+ gfx_v12_0_update_medium_grain_clock_gating(adev, enable);
+
+ gfx_v12_0_update_repeater_fgcg(adev, enable);
+
+ gfx_v12_0_update_sram_fgcg(adev, enable);
+
+ gfx_v12_0_update_perf_clk(adev, enable);
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v12_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return 0;
+}
+
+static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ gfx_v12_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_REPEATER_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_FGCG */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+ /* AMD_CG_SUPPORT_GFX_PERF_CLK */
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ /* gfx12 is 32bit rptr*/
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
+}
+
+static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ /* gfx12 hardware is 32bit rptr */
+ return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx12 now */
+ }
+}
+
+static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
+
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint8_t dst_sel)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+ uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
+ ret = ring->wptr & ring->buf_mask;
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+
+ if (adev->enable_mes)
+ return -EINVAL;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring,
+ bool start,
+ bool secure)
+{
+ uint32_t v = secure ? FRAME_TMZ : 0;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t reg_val_offs)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ reg_val_offs * 4));
+}
+
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+}
+
+static void
+gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 0);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ GENERIC0_INT_ENABLE, 1);
+ WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ DRM_DEBUG("IH: CP EOP\n");
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ } else {
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead
+ * of per queue.
+ */
+ if ((ring->me == me_id) &&
+ (ring->pipe == pipe_id) &&
+ (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
+ }
+ }
+}
+
+static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream \n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v12_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ const unsigned int gcr_cntl =
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+ PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+ amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+ amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static void gfx_v12_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v12_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_12_0[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_12[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+ /* print gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.me.num_me,
+ adev->gfx.me.num_pipe_per_me,
+ adev->gfx.me.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_gfx_queue_reg_list_12[reg].reg_name,
+ adev->gfx.ip_dump_gfx_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+}
+
+static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_12[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump gfx queue registers for all instances */
+ if (!adev->gfx.ip_dump_gfx_queues)
+ return;
+
+ index = 0;
+ reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+ for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+ soc24_grbm_select(adev, i, j, k, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ adev->gfx.ip_dump_gfx_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_gfx_queue_reg_list_12[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Sometimes the ME start pc counter can't cache correctly, so the
+ * PC check only as a reference and pipe reset result rely on the
+ * later ring test.
+ */
+ return 0;
+}
+
+static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+ if (r) {
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v12_reset_gfx_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_kgq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kgq\n");
+ return r;
+ }
+
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kgq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r = 0;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ /* Doesn't find the F32 MEC instruction pointer register, and suppose
+ * the driver won't run into the F32 mode.
+ */
+ }
+
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Need the ring test to verify the pipe reset result.*/
+ return 0;
+}
+
+static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v12_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v12_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "failed to init kcq\n");
+ return r;
+ }
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ if (r) {
+ dev_err(adev->dev, "failed to remap kcq\n");
+ return r;
+ }
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v12_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
+ .name = "gfx_v12_0",
+ .early_init = gfx_v12_0_early_init,
+ .late_init = gfx_v12_0_late_init,
+ .sw_init = gfx_v12_0_sw_init,
+ .sw_fini = gfx_v12_0_sw_fini,
+ .hw_init = gfx_v12_0_hw_init,
+ .hw_fini = gfx_v12_0_hw_fini,
+ .suspend = gfx_v12_0_suspend,
+ .resume = gfx_v12_0_resume,
+ .is_idle = gfx_v12_0_is_idle,
+ .wait_for_idle = gfx_v12_0_wait_for_idle,
+ .set_clockgating_state = gfx_v12_0_set_clockgating_state,
+ .set_powergating_state = gfx_v12_0_set_powergating_state,
+ .get_clockgating_state = gfx_v12_0_get_clockgating_state,
+ .dump_ip_state = gfx_v12_ip_dump,
+ .print_ip_state = gfx_v12_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v12_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v12_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v12_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v12_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = gfx_v12_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
+ .preempt_ib = gfx_v12_0_ring_preempt_ib,
+ .emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v12_0_emit_mem_sync,
+ .reset = gfx_v12_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_0_ring_emit_vm_flush */
+ 8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
+ .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = gfx_v12_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_mem_sync = gfx_v12_0_emit_mem_sync,
+ .reset = gfx_v12_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .get_rptr = gfx_v12_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v12_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v12_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 7 + /* gfx_v12_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v12_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v12_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v12_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v12_0_ring_test_ring,
+ .test_ib = gfx_v12_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v12_0_ring_emit_rreg,
+ .emit_wreg = gfx_v12_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+};
+
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = {
+ .set = gfx_v12_0_set_eop_interrupt_state,
+ .process = gfx_v12_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = {
+ .set = gfx_v12_0_set_priv_reg_fault_state,
+ .process = gfx_v12_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = {
+ .set = gfx_v12_0_set_bad_op_fault_state,
+ .process = gfx_v12_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = {
+ .set = gfx_v12_0_set_priv_inst_fault_state,
+ .process = gfx_v12_0_priv_inst_irq,
+};
+
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs;
+
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+
+ adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs;
+}
+
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs;
+}
+
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ /* set gfx eng mqd */
+ adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+ sizeof(struct v12_gfx_mqd);
+ adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+ gfx_v12_0_gfx_mqd_init;
+ /* set compute eng mqd */
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+ sizeof(struct v12_compute_mqd);
+ adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+ gfx_v12_0_compute_mqd_init;
+}
+
+static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 data, wgp_bitmask;
+ data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ wgp_bitmask =
+ amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+ return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap;
+ unsigned disable_masks[8 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ bitmap = i * adev->gfx.config.max_sh_per_se + j;
+ if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+ continue;
+ mask = 1;
+ counter = 0;
+ gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+ if (i < 8 && j < 2)
+ gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev);
+
+ /**
+ * GFX12 could support more than 4 SEs, while the bitmap
+ * in cu_info struct is 4x4 and ioctl interface struct
+ * drm_amdgpu_info_device should keep stable.
+ * So we use last two columns of bitmap to store cu mask for
+ * SEs 4 to 7, the layout of the bitmap is as below:
+ * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+ * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+ * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+ * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+ * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+ * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+ * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+ * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+ */
+ cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask)
+ counter++;
+
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ }
+ }
+ gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
new file mode 100644
index 000000000000..f7184b2dc4e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V12_0_H__
+#define __GFX_V12_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block;
+
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 34f9211b2679..7693b7953426 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -28,19 +28,33 @@
#include "amdgpu_gfx.h"
#include "amdgpu_ucode.h"
#include "clearstate_si.h"
+#include "si.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
+
#include "si_enums.h"
-#include "si.h"
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
+
+#define GFX6_NUM_GFX_RINGS 1
+#define GFX6_NUM_COMPUTE_RINGS 2
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -72,7 +86,7 @@ MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
//static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev);
static void gfx_v6_0_init_pg(struct amdgpu_device *adev);
@@ -311,7 +325,6 @@ static const u32 verde_rlc_save_restore_register_list[] =
static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
const struct gfx_firmware_header_v1_0 *cp_hdr;
const struct rlc_firmware_header_v1_0 *rlc_hdr;
@@ -337,32 +350,36 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
@@ -371,7 +388,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- pr_err("gfx6: Failed to load firmware \"%s\"\n", fw_name);
+ pr_err("gfx6: Failed to load firmware %s gfx firmware\n", chip_name);
amdgpu_ucode_release(&adev->gfx.pfp_fw);
amdgpu_ucode_release(&adev->gfx.me_fw);
amdgpu_ucode_release(&adev->gfx.ce_fw);
@@ -1718,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
gfx_v6_0_get_cu_info(adev);
gfx_v6_0_config_init(adev);
- WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
- (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
- WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
- (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+ WREG32(mmCP_QUEUE_THRESHOLDS,
+ ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
+ (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
+
+ /* set HW defaults for 3D engine */
+ WREG32(mmCP_MEQ_THRESHOLDS,
+ (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+ (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
sx_debug_1 = RREG32(mmSX_DEBUG_1);
WREG32(mmSX_DEBUG_1, sx_debug_1);
@@ -1907,7 +1928,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
error:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
@@ -2333,7 +2354,7 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
{
const u32 *src_ptr;
- volatile u32 *dst_ptr;
+ u32 *dst_ptr;
u32 dws;
u64 reg_list_mc_addr;
const struct cs_section_def *cs_data;
@@ -2834,47 +2855,23 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
@@ -3024,9 +3021,9 @@ static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
.start = gfx_v6_0_rlc_start
};
-static int gfx_v6_0_early_init(void *handle)
+static int gfx_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
@@ -3040,10 +3037,10 @@ static int gfx_v6_0_early_init(void *handle)
return 0;
}
-static int gfx_v6_0_sw_init(void *handle)
+static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -3108,10 +3105,10 @@ static int gfx_v6_0_sw_init(void *handle)
return r;
}
-static int gfx_v6_0_sw_fini(void *handle)
+static int gfx_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -3123,10 +3120,10 @@ static int gfx_v6_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_hw_init(void *handle)
+static int gfx_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_constants_init(adev);
@@ -3143,9 +3140,9 @@ static int gfx_v6_0_hw_init(void *handle)
return r;
}
-static int gfx_v6_0_hw_fini(void *handle)
+static int gfx_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
@@ -3154,23 +3151,19 @@ static int gfx_v6_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_suspend(void *handle)
+static int gfx_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_fini(adev);
+ return gfx_v6_0_hw_fini(ip_block);
}
-static int gfx_v6_0_resume(void *handle)
+static int gfx_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_init(adev);
+ return gfx_v6_0_hw_init(ip_block);
}
-static bool gfx_v6_0_is_idle(void *handle)
+static bool gfx_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -3178,24 +3171,19 @@ static bool gfx_v6_0_is_idle(void *handle)
return true;
}
-static int gfx_v6_0_wait_for_idle(void *handle)
+static int gfx_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v6_0_is_idle(handle))
+ if (gfx_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v6_0_soft_reset(void *handle)
-{
- return 0;
-}
-
static void gfx_v6_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -3383,11 +3371,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v6_0_set_clockgating_state(void *handle,
+static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -3405,11 +3393,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v6_0_set_powergating_state(void *handle,
+static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -3445,7 +3433,6 @@ static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.name = "gfx_v6_0",
.early_init = gfx_v6_0_early_init,
- .late_init = NULL,
.sw_init = gfx_v6_0_sw_init,
.sw_fini = gfx_v6_0_sw_fini,
.hw_init = gfx_v6_0_hw_init,
@@ -3454,7 +3441,6 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.resume = gfx_v6_0_resume,
.is_idle = gfx_v6_0_is_idle,
.wait_for_idle = gfx_v6_0_wait_for_idle,
- .soft_reset = gfx_v6_0_soft_reset,
.set_clockgating_state = gfx_v6_0_set_clockgating_state,
.set_powergating_state = gfx_v6_0_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index c2faf6b4c2fc..5976ed55d9db 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -55,6 +55,9 @@
#define GFX7_NUM_GFX_RINGS 1
#define GFX7_MEC_HPD_SIZE 2048
+#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
+#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
@@ -880,7 +883,7 @@ static const u32 kalindi_rlc_save_restore_register_list[] = {
};
static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
@@ -909,7 +912,6 @@ static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -934,40 +936,44 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
BUG();
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
if (adev->asic_type == CHIP_KAVERI) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
if (err)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
- if (err)
- goto out;
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
out:
if (err) {
- pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
+ pr_err("gfx7: Failed to load firmware %s gfx firmware\n", chip_name);
gfx_v7_0_free_microcode(adev);
}
return err;
@@ -2117,6 +2123,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+
/* Workaround for cache flush problems. First send a dummy EOP
* event down the pipe with seq one below.
*/
@@ -2136,7 +2144,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+ EVENT_INDEX(5) |
+ (exec ? EOP_EXEC : 0)));
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -2324,7 +2333,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
error:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
@@ -2559,7 +2568,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2757,44 +2766,6 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return 0;
}
-struct hqd_registers {
- u32 cp_mqd_base_addr;
- u32 cp_mqd_base_addr_hi;
- u32 cp_hqd_active;
- u32 cp_hqd_vmid;
- u32 cp_hqd_persistent_state;
- u32 cp_hqd_pipe_priority;
- u32 cp_hqd_queue_priority;
- u32 cp_hqd_quantum;
- u32 cp_hqd_pq_base;
- u32 cp_hqd_pq_base_hi;
- u32 cp_hqd_pq_rptr;
- u32 cp_hqd_pq_rptr_report_addr;
- u32 cp_hqd_pq_rptr_report_addr_hi;
- u32 cp_hqd_pq_wptr_poll_addr;
- u32 cp_hqd_pq_wptr_poll_addr_hi;
- u32 cp_hqd_pq_doorbell_control;
- u32 cp_hqd_pq_wptr;
- u32 cp_hqd_pq_control;
- u32 cp_hqd_ib_base_addr;
- u32 cp_hqd_ib_base_addr_hi;
- u32 cp_hqd_ib_rptr;
- u32 cp_hqd_ib_control;
- u32 cp_hqd_iq_timer;
- u32 cp_hqd_iq_rptr;
- u32 cp_hqd_dequeue_request;
- u32 cp_hqd_dma_offload;
- u32 cp_hqd_sema_cmd;
- u32 cp_hqd_msg_type;
- u32 cp_hqd_atomic0_preop_lo;
- u32 cp_hqd_atomic0_preop_hi;
- u32 cp_hqd_atomic1_preop_lo;
- u32 cp_hqd_atomic1_preop_hi;
- u32 cp_hqd_hq_scheduler0;
- u32 cp_hqd_hq_scheduler1;
- u32 cp_mqd_control;
-};
-
static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
int mec, int pipe)
{
@@ -2914,7 +2885,7 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
@@ -3274,7 +3245,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -3500,7 +3471,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
@@ -3911,70 +3882,24 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
- switch (adev->asic_type) {
- case CHIP_BONAIRE:
- buffer[count++] = cpu_to_le32(0x16000012);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KAVERI:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KABINI:
- case CHIP_MULLINS:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_HAWAII:
- buffer[count++] = cpu_to_le32(0x3a00161a);
- buffer[count++] = cpu_to_le32(0x0000002e);
- break;
- default:
- buffer[count++] = cpu_to_le32(0x00000000);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- }
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
@@ -4172,9 +4097,9 @@ static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.update_spm_vmid = gfx_v7_0_update_spm_vmid
};
-static int gfx_v7_0_early_init(void *handle)
+static int gfx_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
@@ -4189,9 +4114,9 @@ static int gfx_v7_0_early_init(void *handle)
return 0;
}
-static int gfx_v7_0_late_init(void *handle)
+static int gfx_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4381,10 +4306,10 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
return 0;
}
-static int gfx_v7_0_sw_init(void *handle)
+static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j, k, r, ring_id;
switch (adev->asic_type) {
@@ -4477,9 +4402,9 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
-static int gfx_v7_0_sw_fini(void *handle)
+static int gfx_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -4503,10 +4428,10 @@ static int gfx_v7_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_hw_init(void *handle)
+static int gfx_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v7_0_constants_init(adev);
@@ -4524,9 +4449,9 @@ static int gfx_v7_0_hw_init(void *handle)
return r;
}
-static int gfx_v7_0_hw_fini(void *handle)
+static int gfx_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4537,23 +4462,19 @@ static int gfx_v7_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_suspend(void *handle)
+static int gfx_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_fini(adev);
+ return gfx_v7_0_hw_fini(ip_block);
}
-static int gfx_v7_0_resume(void *handle)
+static int gfx_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_init(adev);
+ return gfx_v7_0_hw_init(ip_block);
}
-static bool gfx_v7_0_is_idle(void *handle)
+static bool gfx_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -4561,11 +4482,11 @@ static bool gfx_v7_0_is_idle(void *handle)
return true;
}
-static int gfx_v7_0_wait_for_idle(void *handle)
+static int gfx_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -4578,11 +4499,11 @@ static int gfx_v7_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v7_0_soft_reset(void *handle)
+static int gfx_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32(mmGRBM_STATUS);
@@ -4888,11 +4809,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v7_0_set_clockgating_state(void *handle,
+static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -4911,11 +4832,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v7_0_set_powergating_state(void *handle,
+static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -5041,6 +4962,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .soft_recovery = gfx_v7_0_ring_soft_recovery,
.emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 885ebd703260..0856ff65288c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -914,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -939,7 +939,6 @@ static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -982,15 +981,18 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
}
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_pfp_2.bin", chip_name);
if (err == -ENODEV) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
}
if (err)
goto out;
@@ -999,15 +1001,18 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_me_2.bin", chip_name);
if (err == -ENODEV) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
}
if (err)
goto out;
@@ -1017,15 +1022,18 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_ce_2.bin", chip_name);
if (err == -ENODEV) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
}
if (err)
goto out;
@@ -1044,8 +1052,9 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
} else
adev->virt.chained_ib_support = false;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
@@ -1093,15 +1102,18 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_mec_2.bin", chip_name);
if (err == -ENODEV) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
}
if (err)
goto out;
@@ -1112,15 +1124,18 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if ((adev->asic_type != CHIP_STONEY) &&
(adev->asic_type != CHIP_TOPAZ)) {
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_mec2_2.bin", chip_name);
if (err == -ENODEV) {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
}
} else {
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
}
if (!err) {
cp_hdr = (const struct gfx_firmware_header_v1_0 *)
@@ -1194,9 +1209,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- dev_err(adev->dev,
- "gfx8: Failed to load firmware \"%s\"\n",
- fw_name);
+ dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
amdgpu_ucode_release(&adev->gfx.pfp_fw);
amdgpu_ucode_release(&adev->gfx.me_fw);
amdgpu_ucode_release(&adev->gfx.ce_fw);
@@ -1207,51 +1220,24 @@ out:
return err;
}
-static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
- buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
- PACKET3_SET_CONTEXT_REG_START);
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
@@ -1288,7 +1274,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -1643,7 +1629,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
RREG32(sec_ded_counter_registers[i]);
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
@@ -1897,12 +1883,12 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
-static int gfx_v8_0_sw_init(void *handle)
+static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
+ int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (adev->asic_type) {
case CHIP_TONGA:
@@ -2022,8 +2008,7 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
}
- kiq = &adev->gfx.kiq[0];
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
@@ -2041,9 +2026,9 @@ static int gfx_v8_0_sw_init(void *handle)
return 0;
}
-static int gfx_v8_0_sw_fini(void *handle)
+static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -4264,7 +4249,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -4308,9 +4293,7 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32(mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
+ WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
@@ -4656,6 +4639,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
/* reset ring buffer */
ring->wptr = 0;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
amdgpu_ring_clear_ring(ring);
}
return 0;
@@ -4673,60 +4657,25 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v8_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v8_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (!r) {
- r = gfx_v8_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]);
if (r)
- goto done;
+ return r;
}
gfx_v8_0_set_mec_doorbell_range(adev);
- r = gfx_v8_0_kiq_kcq_enable(adev);
- if (r)
- goto done;
-
-done:
- return r;
+ return gfx_v8_0_kiq_kcq_enable(adev);
}
static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
@@ -4787,10 +4736,10 @@ static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v8_0_cp_compute_enable(adev, enable);
}
-static int gfx_v8_0_hw_init(void *handle)
+static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
@@ -4827,6 +4776,13 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@@ -4834,9 +4790,9 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
return r;
}
-static bool gfx_v8_0_is_idle(void *handle)
+static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
|| RREG32(mmGRBM_STATUS2) != 0x8)
@@ -4869,13 +4825,13 @@ static int gfx_v8_0_wait_for_rlc_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_wait_for_idle(void *handle)
+static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v8_0_is_idle(handle))
+ if (gfx_v8_0_is_idle(ip_block))
return 0;
udelay(1);
@@ -4883,9 +4839,9 @@ static int gfx_v8_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_hw_fini(void *handle)
+static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4901,8 +4857,9 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
+
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
- if (!gfx_v8_0_wait_for_idle(adev))
+ if (!gfx_v8_0_wait_for_idle(ip_block))
gfx_v8_0_cp_enable(adev, false);
else
pr_err("cp is busy, skip halt cp\n");
@@ -4915,19 +4872,19 @@ static int gfx_v8_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v8_0_suspend(void *handle)
+static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_fini(handle);
+ return gfx_v8_0_hw_fini(ip_block);
}
-static int gfx_v8_0_resume(void *handle)
+static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_init(handle);
+ return gfx_v8_0_hw_init(ip_block);
}
-static bool gfx_v8_0_check_soft_reset(void *handle)
+static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -4987,9 +4944,9 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
}
}
-static int gfx_v8_0_pre_soft_reset(void *handle)
+static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5028,9 +4985,9 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_soft_reset(void *handle)
+static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -5090,9 +5047,9 @@ static int gfx_v8_0_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_post_soft_reset(void *handle)
+static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5258,9 +5215,9 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
};
-static int gfx_v8_0_early_init(void *handle)
+static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
@@ -5275,9 +5232,9 @@ static int gfx_v8_0_early_init(void *handle)
return 0;
}
-static int gfx_v8_0_late_init(void *handle)
+static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -5317,7 +5274,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM))
/* Send msg to SMU via Powerplay */
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
@@ -5363,10 +5320,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
}
}
-static int gfx_v8_0_set_powergating_state(void *handle,
+static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -5434,9 +5391,9 @@ static int gfx_v8_0_set_powergating_state(void *handle,
return 0;
}
-static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5579,7 +5536,7 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
}
}
-static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
@@ -5621,8 +5578,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
@@ -5716,8 +5671,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5727,8 +5680,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
@@ -5809,12 +5760,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
}
gfx_v8_0_wait_for_rlc_serdes(adev);
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
* === MGCG + MGLS + TS(CG/LS) ===
@@ -5828,6 +5779,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
}
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5978,10 +5931,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_set_clockgating_state(void *handle,
+static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -6153,6 +6106,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
{
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+ bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
/* Workaround for cache flush problems. First send a dummy EOP
* event down the pipe with seq one below.
@@ -6176,7 +6130,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
EOP_TC_ACTION_EN |
EOP_TC_WB_ACTION_EN |
EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- EVENT_INDEX(5)));
+ EVENT_INDEX(5) |
+ (exec ? EOP_EXEC : 0)));
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -6327,33 +6282,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr & ring->buf_mask) - 1;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
-}
-
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
@@ -6933,7 +6877,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.emit_switch_buffer = gfx_v8_ring_emit_sb,
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
.soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync,
@@ -6969,6 +6912,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .soft_recovery = gfx_v8_0_ring_soft_recovery,
.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
.emit_wave_limit = gfx_v8_0_emit_wave_limit,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fd61574a737c..dd19a97436db 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -50,6 +50,7 @@
#include "amdgpu_ring_mux.h"
#include "gfx_v9_4.h"
#include "gfx_v9_0.h"
+#include "gfx_v9_0_cleaner_shader.h"
#include "gfx_v9_4_2.h"
#include "asic_reg/pwr/pwr_10_0_offset.h"
@@ -149,6 +150,162 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
+static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3),
+ /* packet headers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
+ /* compute queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP)
+};
+
enum ta_ras_gfx_subblock {
/*CPC*/
TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
@@ -764,10 +921,18 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid);
+static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring,
PACKET3_SET_RESOURCES_VMID_MASK(0) |
@@ -777,8 +942,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring,
upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -875,12 +1040,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
}
+
+static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+}
+
static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx_v9_0_kiq_set_resources,
.kiq_map_queues = gfx_v9_0_kiq_map_queues,
.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
.kiq_query_status = gfx_v9_0_kiq_query_status,
.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -895,7 +1095,7 @@ static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_9_0,
@@ -951,8 +1151,8 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
break;
}
- if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
- (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
}
@@ -1039,8 +1239,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -1070,7 +1270,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -1095,14 +1295,15 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
- if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) &&
((adev->gfx.mec_fw_version < 0x000001a5) ||
- (adev->gfx.mec_feature_version < 46) ||
- (adev->gfx.pfp_fw_version < 0x000000b7) ||
- (adev->gfx.pfp_feature_version < 46)))
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46)))
DRM_WARN_ONCE("CP firmware version too old, please update!");
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
(adev->gfx.me_feature_version >= 42) &&
@@ -1172,6 +1373,10 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
+ /* https://bbs.openkylin.top/t/topic/171497 */
+ { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
+ /* HP 705G4 DM with R5 2400G */
+ { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
{ 0, 0, 0, 0, 0 },
};
@@ -1202,7 +1407,7 @@ static bool is_raven_kicker(struct amdgpu_device *adev)
static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
{
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
(adev->gfx.me_fw_version >= 0x000000a5) &&
(adev->gfx.me_feature_version >= 52))
return true;
@@ -1215,7 +1420,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -1249,23 +1454,25 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
char *chip_name)
{
- char fw_name[30];
int err;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
@@ -1282,7 +1489,6 @@ out:
static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
char *chip_name)
{
- char fw_name[30];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -1300,20 +1506,25 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
if (!strcmp(chip_name, "picasso") &&
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_am4.bin", chip_name);
else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
(smu_version >= 0x41e2b))
/**
*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
*/
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_kicker_rlc.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
- rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
@@ -1326,9 +1537,9 @@ out:
static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
{
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
return false;
return true;
@@ -1337,28 +1548,31 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
char *chip_name)
{
- char fw_name[30];
int err;
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
-
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
+
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec2.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
-
- /* ignore failures to load */
- err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec2.bin", chip_name);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
@@ -1434,45 +1648,18 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
return count;
}
-static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
- volatile u32 *buffer)
+static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
@@ -1485,7 +1672,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
always_on_cu_num = 4;
- else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
+ else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
always_on_cu_num = 8;
else
always_on_cu_num = 12;
@@ -1836,7 +2023,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
u32 gb_addr_config;
int err;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1994,15 +2181,43 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio, NULL);
}
-static int gfx_v9_0_sw_init(void *handle)
+static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+ uint32_t *ptr;
+ uint32_t inst;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+}
+
+static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
+ int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned int hw_prio;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -2018,6 +2233,43 @@ static int gfx_v9_0_sw_init(void *handle)
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 3, 0):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 167 &&
+ adev->gfx.pfp_fw_version >= 196 &&
+ adev->gfx.mec_fw_version >= 474) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 88) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -2026,6 +2278,13 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -2080,7 +2339,7 @@ static int gfx_v9_0_sw_init(void *handle)
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
/* disable scheduler on the real ring */
- ring->no_scheduler = true;
+ ring->no_scheduler = adev->gfx.mcbp;
ring->vm_hub = AMDGPU_GFXHUB(0);
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
@@ -2090,7 +2349,7 @@ static int gfx_v9_0_sw_init(void *handle)
}
/* set up the software rings */
- if (adev->gfx.num_gfx_rings) {
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
ring = &adev->gfx.sw_gfx_ring[i];
ring->ring_obj = NULL;
@@ -2145,14 +2404,21 @@ static int gfx_v9_0_sw_init(void *handle)
}
}
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
- kiq = &adev->gfx.kiq[0];
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
@@ -2172,16 +2438,22 @@ static int gfx_v9_0_sw_init(void *handle)
return -EINVAL;
}
+ gfx_v9_0_alloc_ip_dump(adev);
+
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
-static int gfx_v9_0_sw_fini(void *handle)
+static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (adev->gfx.num_gfx_rings) {
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
amdgpu_ring_mux_fini(&adev->gfx.muxer);
@@ -2196,6 +2468,8 @@ static int gfx_v9_0_sw_fini(void *handle)
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
amdgpu_gfx_kiq_fini(adev, 0);
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
gfx_v9_0_mec_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@@ -2207,6 +2481,11 @@ static int gfx_v9_0_sw_fini(void *handle)
}
gfx_v9_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
+
return 0;
}
@@ -2363,13 +2642,16 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
{
uint32_t tmp;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 1):
tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
!READ_ONCE(adev->barrier_has_auto_waitcnt));
WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
break;
+ case IP_VERSION(9, 4, 2):
+ gfx_v9_4_2_init_sq(adev);
+ break;
default:
break;
}
@@ -2380,7 +2662,10 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) {
+ WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ }
gfx_v9_0_tiling_mode_table_init(adev);
@@ -2474,7 +2759,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
- if(adev->gfx.num_gfx_rings)
+ if (adev->gfx.num_gfx_rings)
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
@@ -2700,7 +2985,7 @@ static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
pwr_10_0_gfxip_control_over_cgpg(adev, true);
}
}
@@ -2812,7 +3097,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
* And it's needed by gfxoff feature.
*/
if (adev->gfx.rlc.is_rlc_v2_1) {
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 2, 1) ||
(adev->apu_flags & AMD_APU_IS_RAVEN2))
gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
@@ -2925,7 +3211,7 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 1, 0):
gfx_v9_0_init_lbpw(adev);
@@ -2956,6 +3242,15 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
@@ -3033,6 +3328,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v9_0_cp_gfx_enable(adev, true);
+ /* Now only limit the quirk on the APU gfx9 series and already
+ * confirmed that the APU gfx10/gfx11 needn't such update.
+ */
+ if (adev->flags & AMD_IS_APU &&
+ adev->in_s3 && !pm_resume_via_firmware()) {
+ DRM_INFO("Will skip the CSB packet resubmit\n");
+ return 0;
+ }
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
@@ -3110,7 +3413,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
@@ -3157,7 +3460,15 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
} else {
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
- (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
@@ -3215,9 +3526,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -3566,7 +3875,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
return 0;
}
-static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
+static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
@@ -3578,8 +3887,8 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
*/
tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
- if (!tmp_mqd->cp_hqd_pq_control ||
- (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
+ if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -3606,55 +3915,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v9_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v9_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
@@ -3678,6 +3955,10 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
}
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_cp_gfx_enable(adev, false);
+ gfx_v9_0_cp_compute_enable(adev, false);
+
r = gfx_v9_0_kiq_resume(adev);
if (r)
return r;
@@ -3713,8 +3994,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
{
u32 tmp;
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
- adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
return;
tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
@@ -3734,10 +4015,13 @@ static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v9_0_cp_compute_enable(adev, enable);
}
-static int gfx_v9_0_hw_init(void *handle)
+static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
if (!amdgpu_sriov_vf(adev))
gfx_v9_0_init_golden_registers(adev);
@@ -3754,20 +4038,22 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) &&
+ !amdgpu_sriov_vf(adev))
gfx_v9_4_2_set_power_brake_sequence(adev);
return r;
}
-static int gfx_v9_0_hw_fini(void *handle)
+static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
/* DF freeze and kcq disable will fail */
if (!amdgpu_ras_intr_triggered())
@@ -3802,7 +4088,7 @@ static int gfx_v9_0_hw_fini(void *handle)
/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
- (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
dev_dbg(adev->dev, "Skipping RLC halt\n");
return 0;
}
@@ -3811,19 +4097,19 @@ static int gfx_v9_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v9_0_suspend(void *handle)
+static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_fini(handle);
+ return gfx_v9_0_hw_fini(ip_block);
}
-static int gfx_v9_0_resume(void *handle)
+static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_init(handle);
+ return gfx_v9_0_hw_init(ip_block);
}
-static bool gfx_v9_0_is_idle(void *handle)
+static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -3832,24 +4118,24 @@ static bool gfx_v9_0_is_idle(void *handle)
return true;
}
-static int gfx_v9_0_wait_for_idle(void *handle)
+static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v9_0_is_idle(handle))
+ if (gfx_v9_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v9_0_soft_reset(void *handle)
+static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -3888,19 +4174,17 @@ static int gfx_v9_0_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v9_0_cp_compute_enable(adev, false);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -3986,7 +4270,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock, clock_lo, clock_hi, hi_check;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 3, 0):
preempt_disable();
clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
@@ -4005,7 +4289,9 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
default:
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 0, 1) &&
+ amdgpu_sriov_runtime(adev)) {
clock = gfx_v9_0_kiq_read_clock(adev);
} else {
WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
@@ -4357,7 +4643,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@@ -4497,20 +4783,20 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
}
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
-static int gfx_v9_0_early_init(void *handle)
+static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
adev->gfx.num_gfx_rings = 0;
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
@@ -4529,9 +4815,9 @@ static int gfx_v9_0_early_init(void *handle)
return gfx_v9_0_init_microcode(adev);
}
-static int gfx_v9_0_ecc_late_init(void *handle)
+static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/*
@@ -4548,7 +4834,7 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
/* requires IBs so do in late init after IB pool is initialized */
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
else
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
@@ -4563,9 +4849,9 @@ static int gfx_v9_0_ecc_late_init(void *handle)
return 0;
}
-static int gfx_v9_0_late_init(void *handle)
+static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4576,11 +4862,15 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
- r = gfx_v9_0_ecc_late_init(handle);
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
+ r = gfx_v9_0_ecc_late_init(ip_block);
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
gfx_v9_4_2_debug_trap_config_init(adev,
adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
else
@@ -4669,14 +4959,12 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t data, def;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
@@ -4710,7 +4998,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
@@ -4735,8 +5023,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
@@ -4747,8 +5033,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (!adev->gfx.num_gfx_rings)
return;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* Enable 3D CGCG/CGLS */
if (enable) {
/* write cmd to clear cgcg/cgls ov */
@@ -4790,8 +5074,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -4799,8 +5081,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
@@ -4816,7 +5096,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
/* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
else
@@ -4842,13 +5122,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS
* === MGCG + MGLS ===
@@ -4868,6 +5147,7 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === MGCG + MGLS === */
gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
}
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -4891,7 +5171,7 @@ static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
}
-static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
+static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
{
amdgpu_gfx_off_ctrl(adev, false);
@@ -4945,18 +5225,18 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
};
-static int gfx_v9_0_set_powergating_state(void *handle,
+static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 3, 0):
if (!enable)
- amdgpu_gfx_off_ctrl(adev, false);
+ amdgpu_gfx_off_ctrl_immediate(adev, false);
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
@@ -4978,10 +5258,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
if (enable)
- amdgpu_gfx_off_ctrl(adev, true);
+ amdgpu_gfx_off_ctrl_immediate(adev, true);
break;
case IP_VERSION(9, 2, 1):
- amdgpu_gfx_off_ctrl(adev, enable);
+ amdgpu_gfx_off_ctrl_immediate(adev, enable);
break;
default:
break;
@@ -4990,15 +5270,15 @@ static int gfx_v9_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v9_0_set_clockgating_state(void *handle,
+static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -5016,9 +5296,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5048,7 +5328,7 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
/* AMD_CG_SUPPORT_GFX_3D_CGCG */
data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
@@ -5183,16 +5463,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
payload_size = sizeof(struct v9_ce_ib_state);
- if (ring->is_mes_queue) {
- payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
- } else {
- payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
- }
+ payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
@@ -5215,16 +5487,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
payload_size = sizeof(struct v9_de_ib_state);
- if (ring->is_mes_queue) {
- payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
- } else {
- payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
- }
+ payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
IB_COMPLETION_STATUS_PREEMPTED;
@@ -5414,19 +5678,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ce_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- }
+ offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -5512,28 +5766,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo
void *de_payload_cpu_addr;
int cnt;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gds_backup) +
- offsetof(struct v9_gfx_meta_data, de_payload);
- gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
- AMDGPU_CSA_SIZE - adev->gds.gds_size,
- PAGE_SIZE);
- }
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
if (usegds) {
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
@@ -5600,31 +5839,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
amdgpu_ring_write(ring, 0);
}
-static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ /* discard following DWs if *cond_exec_gpu_addr==0 */
+ amdgpu_ring_write(ring, 0);
ret = ring->wptr & ring->buf_mask;
- amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+ /* patch dummy value later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
- unsigned cur;
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (likely(cur > offset))
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
-}
-
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
@@ -5697,7 +5926,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
WREG32_SOC15(GC, 0, mmSQ_CMD, value);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
@@ -5768,17 +5999,95 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
}
}
+static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
PRIV_REG_INT_ENABLE,
state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 cp_int_cntl_reg, cp_int_cntl;
+ int i, j;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ /* MECs start at 1 */
+ cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
+
+ if (cp_int_cntl_reg) {
+ cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+ }
+ }
+ }
break;
default:
break;
@@ -5899,11 +6208,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
switch (me_id) {
case 0:
- if (adev->gfx.num_gfx_rings &&
- !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
- /* Fence signals are handled on the software rings*/
- for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
- amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
+ if (adev->gfx.num_gfx_rings) {
+ if (!adev->gfx.mcbp) {
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
+ /* Fence signals are handled on the software rings*/
+ for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+ amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
+ }
}
break;
case 1:
@@ -5957,6 +6269,15 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream\n");
+ gfx_v9_0_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -6454,7 +6775,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
return ret;
}
-static const char *vml2_mems[] = {
+static const char * const vml2_mems[] = {
"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
"UTC_VML2_BANK_CACHE_0_4K_MEM0",
@@ -6473,7 +6794,7 @@ static const char *vml2_mems[] = {
"UTC_VML2_BANK_CACHE_3_4K_MEM1",
};
-static const char *vml2_walker_mems[] = {
+static const char * const vml2_walker_mems[] = {
"UTC_VML2_CACHE_PDE0_MEM0",
"UTC_VML2_CACHE_PDE0_MEM1",
"UTC_VML2_CACHE_PDE1_MEM0",
@@ -6483,7 +6804,7 @@ static const char *vml2_walker_mems[] = {
"UTC_VML2_RDIF_LOG_FIFO",
};
-static const char *atc_l2_cache_2m_mems[] = {
+static const char * const atc_l2_cache_2m_mems[] = {
"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
@@ -6837,6 +7158,230 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
}
}
+static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ unsigned long flags;
+ int i, r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ return r;
+
+ /* make sure dequeue is complete*/
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ if (r) {
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ return r;
+ }
+
+ r = gfx_v9_0_kcq_init_queue(ring, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r) {
+ DRM_ERROR("fail to remap queue\n");
+ return r;
+ }
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_9[i].reg_name,
+ adev->gfx.ip_dump_core[i]);
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ }
+ index += reg_count;
+ }
+ }
+ }
+
+}
+
+static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k, reg, index = 0;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
+
+ if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_9[reg]));
+ }
+ index += reg_count;
+ }
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
+
+}
+
+static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Emit the cleaner shader */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ else
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0));
+
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE);
+}
+
+static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@@ -6853,6 +7398,8 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.set_clockgating_state = gfx_v9_0_set_clockgating_state,
.set_powergating_state = gfx_v9_0_set_powergating_state,
.get_clockgating_state = gfx_v9_0_get_clockgating_state,
+ .dump_ip_state = gfx_v9_ip_dump,
+ .print_ip_state = gfx_v9_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
@@ -6884,7 +7431,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 7, /* gfx_v9_0_emit_mem_sync */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6893,12 +7441,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v9_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v9_ring_emit_sb,
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v9_0_ring_preempt_ib,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
@@ -6906,6 +7453,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
@@ -6938,7 +7488,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 7, /* gfx_v9_0_emit_mem_sync */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6948,12 +7499,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
- .insert_nop = amdgpu_sw_ring_insert_nop,
+ .insert_nop = gfx_v9_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v9_ring_emit_sb,
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
- .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
@@ -6963,6 +7513,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
.patch_cntl = gfx_v9_0_ring_patch_cntl,
.patch_de = gfx_v9_0_ring_patch_de_meta,
.patch_ce = gfx_v9_0_ring_patch_ce_meta,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -6980,11 +7533,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7 + /* gfx_v9_0_emit_mem_sync */
5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
- 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
+ 2, /* gfx_v9_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6994,13 +7547,18 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v9_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
.emit_wave_limit = gfx_v9_0_emit_wave_limit,
+ .reset = gfx_v9_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v9_0_ring_begin_use_compute,
+ .end_use = gfx_v9_0_ring_end_use_compute,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -7018,7 +7576,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
@@ -7040,7 +7597,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
- if (adev->gfx.num_gfx_rings) {
+ if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
}
@@ -7059,6 +7616,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
.process = gfx_v9_0_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
+ .set = gfx_v9_0_set_bad_op_fault_state,
+ .process = gfx_v9_0_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
.set = gfx_v9_0_set_priv_inst_fault_state,
.process = gfx_v9_0_priv_inst_irq,
@@ -7078,6 +7640,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
@@ -7087,7 +7652,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -7106,7 +7671,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 4, 0):
@@ -7128,7 +7693,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
break;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
case IP_VERSION(9, 4, 0):
adev->gds.gds_compute_max_wave_id = 0x7ff;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
new file mode 100644
index 000000000000..0b6bd09b7529
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/* Define the cleaner shader gfx_9_0 */
+static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = {
+ /* Add the cleaner shader code here */
+};
+
+/* Define the cleaner shader gfx_9_4_2 */
+static const u32 gfx_9_4_2_cleaner_shader_hex[] = {
+ 0xbf068100, 0xbf84003b,
+ 0xbf8a0000, 0xb07c0000,
+ 0xbe8200ff, 0x00000078,
+ 0xbf110802, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0x7e060280, 0x7e080280,
+ 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x80828802,
+ 0xbe803202, 0xbf84fff5,
+ 0xbf9c0000, 0xbe8200ff,
+ 0x80000000, 0x86020102,
+ 0xbf840011, 0xbefe00c1,
+ 0xbeff00c1, 0xd28c0001,
+ 0x0001007f, 0xd28d0001,
+ 0x0002027e, 0x10020288,
+ 0xbe8200bf, 0xbefc00c1,
+ 0xd89c2000, 0x00020201,
+ 0xd89c6040, 0x00040401,
+ 0x320202ff, 0x00000400,
+ 0x80828102, 0xbf84fff8,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbf810000, 0xbf8d0001,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea01ff,
+ 0x000000ee, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index bc8416afb62c..6028afd81690 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -27,7 +27,6 @@
#include "amdgpu_gfx.h"
#include "soc15.h"
#include "soc15d.h"
-#include "amdgpu_atomfirmware.h"
#include "amdgpu_pm.h"
#include "gc/gc_9_4_1_offset.h"
@@ -970,8 +969,9 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
}
-static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs =
- { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 };
+static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32
+};
static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 63f6843a069e..8058ea91ecaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -412,7 +412,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
if (r) {
dev_err(adev->dev, "ib submit failed (%d).\n", r);
- amdgpu_ib_free(adev, ib, NULL);
+ amdgpu_ib_free(ib, NULL);
}
return r;
}
@@ -611,16 +611,16 @@ static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
}
disp2_failed:
- amdgpu_ib_free(adev, &disp_ibs[2], NULL);
+ amdgpu_ib_free(&disp_ibs[2], NULL);
dma_fence_put(fences[2]);
disp1_failed:
- amdgpu_ib_free(adev, &disp_ibs[1], NULL);
+ amdgpu_ib_free(&disp_ibs[1], NULL);
dma_fence_put(fences[1]);
disp0_failed:
- amdgpu_ib_free(adev, &disp_ibs[0], NULL);
+ amdgpu_ib_free(&disp_ibs[0], NULL);
dma_fence_put(fences[0]);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init SGPRS Failed\n");
@@ -687,10 +687,10 @@ static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
}
disp_failed:
- amdgpu_ib_free(adev, &disp_ib, NULL);
+ amdgpu_ib_free(&disp_ib, NULL);
dma_fence_put(fence);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init VGPRS Failed\n");
@@ -746,8 +746,18 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
die_id);
break;
}
+}
+
+void gfx_v9_4_2_init_sq(struct amdgpu_device *adev)
+{
+ uint32_t data;
- return;
+ if (adev->gfx.mec_fw_version >= 98) {
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ data = RREG32_SOC15(GC, 0, regSQ_CONFIG1);
+ data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regSQ_CONFIG1, data);
+ }
}
void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
@@ -1548,8 +1558,8 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
uint32_t ded_cnt)
{
uint32_t bank, way, mem;
- static const char *vml2_way_str[] = { "BIGK", "4K" };
- static const char *utcl2_rounter_str[] = { "VMC", "APT" };
+ static const char * const vml2_way_str[] = { "BIGK", "4K" };
+ static const char * const utcl2_router_str[] = { "VMC", "APT" };
mem = instance % blk->num_mem_blocks;
way = (instance / blk->num_mem_blocks) % blk->num_ways;
@@ -1570,7 +1580,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
dev_info(
adev->dev,
"GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n",
- bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt);
+ bank, utcl2_router_str[mem], sec_cnt, ded_cnt);
break;
case ATC_L2_CACHE_2M:
dev_info(
@@ -1911,18 +1921,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
-static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
-{
- u32 status = 0;
- struct amdgpu_vmhub *hub;
-
- hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
- status = RREG32(hub->vm_l2_pro_fault_status);
- /* reset page fault status */
- WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
- return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
-}
struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
@@ -1936,5 +1935,4 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
.hw_ops = &gfx_v9_4_2_ras_ops,
},
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
- .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
index 7584624b641c..a603724c1dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
@@ -28,6 +28,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
uint32_t first_vmid, uint32_t last_vmid);
void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
uint32_t die_id);
+void gfx_v9_4_2_init_sq(struct amdgpu_device *adev);
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
new file mode 100644
index 000000000000..35b8cf9070bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// MI200 : Clear SGPRs, VGPRs and LDS
+// Uses two kernels launched separately:
+// 1. Clean VGPRs, LDS, and lower SGPRs
+// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU
+// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD
+// Waves in the workgroup share the 64KB of LDS
+// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383
+// Each wave clears 128 VGPRs, so all 512 in the SIMD
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+// 2. Clean remaining SGPRs
+// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU
+// Waves are allocating 96 SGPRs
+// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223.
+// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799
+// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER
+// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command
+// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799
+
+shader main
+ asic(MI200)
+ type(CS)
+ wave_size(64)
+// Note: original source code from SQ team
+
+// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks)
+
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1)
+ S_BARRIER
+
+ s_movk_i32 m0, 0x0000
+ s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance)
+ //
+ // CLEAR VGPRs
+ //
+ s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing
+label_0005:
+ v_mov_b32 v0, 0
+ v_mov_b32 v1, 0
+ v_mov_b32 v2, 0
+ v_mov_b32 v3, 0
+ v_mov_b32 v4, 0
+ v_mov_b32 v5, 0
+ v_mov_b32 v6, 0
+ v_mov_b32 v7, 0
+ s_sub_u32 s2, s2, 8
+ s_set_gpr_idx_idx s2
+ s_cbranch_scc0 label_0005
+ s_set_gpr_idx_off
+
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_clean_sgpr_1:
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+s_endpgm
+
+label_0023:
+
+ s_sethalt 1
+
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop1:
+
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop1
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0xee //clear vcc
+
+s_endpgm
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 18ce5fe45f6f..77f9d5b9a556 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -37,10 +37,18 @@
#include "gc/gc_9_4_3_sh_mask.h"
#include "gfx_v9_4_3.h"
+#include "gfx_v9_4_3_cleaner_shader.h"
#include "amdgpu_xcp.h"
+#include "amdgpu_aca.h"
MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -48,6 +56,111 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
+#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */
+#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */
+#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
+#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */
+
+#define NORMALIZE_XCC_REG_OFFSET(offset) \
+ (offset & 0xFFFF)
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQC_ICACHE_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSQ_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regTCP_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6),
+ /* SE status registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = {
+ /* compute queue registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+};
+
struct amdgpu_gfx_ras gfx_v9_4_3_ras;
static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
@@ -56,10 +169,18 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev);
static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev);
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
+static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring,
uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring,
PACKET3_SET_RESOURCES_VMID_MASK(0) |
@@ -69,8 +190,8 @@ static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring,
lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring,
upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -167,12 +288,46 @@ static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
}
+static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type);
+ }
+
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
+}
+
static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = {
.kiq_set_resources = gfx_v9_4_3_kiq_set_resources,
.kiq_map_queues = gfx_v9_4_3_kiq_map_queues,
.kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues,
.kiq_query_status = gfx_v9_4_3_kiq_query_status,
.kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -199,20 +354,28 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG,
GOLDEN_GB_ADDR_CONFIG);
- /* Golden settings applied by driver for ASIC with rev_id 0 */
- if (adev->rev_id == 0) {
- WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
- REDUCE_FIFO_DEPTH_BY_2, 2);
- } else {
- WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
- SPARE, 0x1);
- }
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1);
}
}
+static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg)
+{
+ uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg);
+
+ /* If it is an XCC reg, normalize the reg to keep
+ lower 16 bits in local xcc */
+
+ if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) ||
+ ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH)))
+ return normalized_reg;
+ else
+ return reg;
+}
+
static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
WRITE_DATA_DST_SEL(0) |
@@ -227,6 +390,12 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
uint32_t addr1, uint32_t ref, uint32_t mask,
uint32_t inv)
{
+ /* Only do the normalization on regspace */
+ if (mem_space == 0) {
+ addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0);
+ addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1);
+ }
+
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring,
/* memory (1) or register (0) */
@@ -256,6 +425,7 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring)
xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
WREG32(scratch_reg0_offset, 0xCAFEDEAD);
+ tmp = RREG32(scratch_reg0_offset);
r = amdgpu_ring_alloc(ring, 3);
if (r)
@@ -296,8 +466,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -327,7 +497,7 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -364,15 +534,15 @@ static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev)
static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
const char *chip_name)
{
- char fw_name[30];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
uint16_t version_minor;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
@@ -387,26 +557,24 @@ out:
return err;
}
-static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev)
-{
- return true;
-}
-
-static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev)
-{
- if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev))
- adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
-}
-
static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
const char *chip_name)
{
- char fw_name[30];
int err;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
-
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ if (amdgpu_sriov_vf(adev)) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
+
+ if (err)
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
+ } else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
@@ -415,8 +583,6 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
- gfx_v9_4_3_check_if_need_gfxoff(adev);
-
out:
if (err)
amdgpu_ucode_release(&adev->gfx.mec_fw);
@@ -425,16 +591,16 @@ out:
static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev)
{
- const char *chip_name;
+ char ucode_prefix[15];
int r;
- chip_name = "gc_9_4_3";
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name);
+ r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix);
if (r)
return r;
- r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name);
+ r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix);
if (r)
return r;
@@ -618,12 +784,21 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
}
+static int gfx_v9_4_3_get_xccs_per_xcp(struct amdgpu_device *adev)
+{
+ u32 xcp_ctl;
+
+ /* Value is expected to be the same on all, fetch from first instance */
+ xcp_ctl = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HYP_XCP_CTL);
+
+ return REG_GET_FIELD(xcp_ctl, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP);
+}
static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
int num_xccs_per_xcp)
{
int ret, i, num_xcc;
- u32 tmp = 0, regval;
+ u32 tmp = 0;
if (adev->psp.funcs) {
ret = psp_spatial_partition(&adev->psp,
@@ -631,24 +806,23 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
num_xccs_per_xcp);
if (ret)
return ret;
- }
-
- num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ } else {
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
- for (i = 0; i < num_xcc; i++) {
- tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP,
- num_xccs_per_xcp);
- tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID,
- i % num_xccs_per_xcp);
- regval = RREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL);
- if (regval != tmp)
+ for (i = 0; i < num_xcc; i++) {
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP,
+ num_xccs_per_xcp);
+ tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID,
+ i % num_xccs_per_xcp);
WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL,
tmp);
+ }
+ ret = 0;
}
adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp;
- return 0;
+ return ret;
}
static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
@@ -673,30 +847,87 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
.switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
.ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
+ .get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp,
};
-static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
+static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
+ struct aca_bank *bank, enum aca_smu_type type,
+ void *data)
{
- u32 gb_addr_config;
+ struct aca_bank_info info;
+ u64 misc0;
+ u32 instlo;
+ int ret;
- adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;
- adev->gfx.ras = &gfx_v9_4_3_ras;
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 4, 3):
- adev->gfx.config.max_hw_contexts = 8;
- adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
- adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
- adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
- adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
- gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG);
+ /* NOTE: overwrite info.die_id with xcd id for gfx */
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+ info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
- BUG();
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+ switch (instlo) {
+ case mmSMNAID_XCD0_MCA_SMU:
+ case mmSMNAID_XCD1_MCA_SMU:
+ case mmSMNXCD_XCD0_MCA_SMU:
+ return true;
+ default:
break;
}
- adev->gfx.config.gb_addr_config = gb_addr_config;
+ return false;
+}
+
+static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = {
+ .aca_bank_parser = gfx_v9_4_3_aca_bank_parser,
+ .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid,
+};
+
+static const struct aca_info gfx_v9_4_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+ .bank_ops = &gfx_v9_4_3_aca_bank_ops,
+};
+
+static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
+{
+ adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;
+ adev->gfx.ras = &gfx_v9_4_3_ras;
+
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ adev->gfx.config.gb_addr_config = GOLDEN_GB_ADDR_CONFIG;
adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
REG_GET_FIELD(
@@ -775,11 +1006,58 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio, NULL);
}
-static int gfx_v9_4_3_sw_init(void *handle)
+static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+ uint32_t *ptr, num_xcc, inst;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ ptr = kcalloc(reg_count * num_xcc, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+ adev->gfx.ip_dump_core = NULL;
+ } else {
+ adev->gfx.ip_dump_core = ptr;
+ }
+
+ /* Allocate memory for compute queue registers for all the instances */
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+ adev->gfx.ip_dump_compute_queues = NULL;
+ } else {
+ adev->gfx.ip_dump_compute_queues = ptr;
+ }
+}
+
+static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id, xcc_id, num_xcc;
- struct amdgpu_kiq *kiq;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 153) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -792,6 +1070,13 @@ static int gfx_v9_4_3_sw_init(void *handle)
if (r)
return r;
+ /* Bad opcode Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
+ &adev->gfx.bad_op_irq);
+ if (r)
+ return r;
+
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
@@ -847,8 +1132,7 @@ static int gfx_v9_4_3_sw_init(void *handle)
return r;
}
- kiq = &adev->gfx.kiq[xcc_id];
- r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id);
+ r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
@@ -859,6 +1143,27 @@ static int gfx_v9_4_3_sw_init(void *handle)
return r;
}
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 155) &&
+ !amdgpu_sriov_vf(adev)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ case IP_VERSION(9, 5, 0):
+ if ((adev->gfx.mec_fw_version >= 21) &&
+ !amdgpu_sriov_vf(adev)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ default:
+ break;
+ }
r = gfx_v9_4_3_gpu_early_init(adev);
if (r)
return r;
@@ -867,17 +1172,19 @@ static int gfx_v9_4_3_sw_init(void *handle)
if (r)
return r;
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
- if (!amdgpu_sriov_vf(adev))
- r = amdgpu_gfx_sysfs_init(adev);
+ gfx_v9_4_3_alloc_ip_dump(adev);
- return r;
+ return 0;
}
-static int gfx_v9_4_3_sw_fini(void *handle)
+static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, num_xcc;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
@@ -889,11 +1196,15 @@ static int gfx_v9_4_3_sw_fini(void *handle)
amdgpu_gfx_kiq_fini(adev, i);
}
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
gfx_v9_4_3_mec_fini(adev);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
gfx_v9_4_3_free_microcode(adev);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_gfx_sysfs_fini(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
+ kfree(adev->gfx.ip_dump_core);
+ kfree(adev->gfx.ip_dump_compute_queues);
return 0;
}
@@ -934,8 +1245,10 @@ static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev,
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0);
@@ -962,6 +1275,22 @@ static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id)
}
}
+/* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1
+ * DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain
+ * bit in SET_RESOURCES
+ */
+static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ return;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1);
+ data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
+ WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data);
+}
+
static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
int xcc_id)
{
@@ -1006,6 +1335,7 @@ static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id);
gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_sq(adev, xcc_id);
}
static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
@@ -1018,6 +1348,22 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
adev->gfx.config.db_debug2 =
RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ /* ToDo: GC 9.4.4 */
+ case IP_VERSION(9, 4, 3):
+ if (adev->gfx.mec_fw_version >= 184 &&
+ (amdgpu_sriov_reg_access_sq_config(adev) ||
+ !amdgpu_sriov_vf(adev)))
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if (adev->gfx.mec_fw_version >= 23)
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ default:
+ break;
+ }
+
for (i = 0; i < num_xcc; i++)
gfx_v9_4_3_xcc_constants_init(adev, i);
}
@@ -1033,10 +1379,8 @@ static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id)
{
/*
* Rlc save restore list is workable since v2_1.
- * And it's needed by gfxoff feature.
*/
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
+ gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
}
static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id)
@@ -1102,13 +1446,14 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT);
}
+ adev->gfx.rlc.rlcg_reg_access_supported = true;
}
static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev)
{
/* init spm vmid with 0xf */
if (adev->gfx.rlc.funcs->update_spm_vmid)
- adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
return 0;
}
@@ -1319,24 +1664,26 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev,
+static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned vmid)
{
- u32 reg, data;
+ u32 reg, pre_data, data;
reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32(reg);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ }
}
static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
@@ -1385,7 +1732,15 @@ static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0);
} else {
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL,
- (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
adev->gfx.kiq[xcc_id].ring.sched.ready = false;
}
udelay(50);
@@ -1449,9 +1804,7 @@ static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -1512,6 +1865,9 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
+ if (amdgpu_sriov_multi_vf_mode(adev))
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_MODE, 1);
} else {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
@@ -1796,7 +2152,7 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id)
return 0;
}
-static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id)
+static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
@@ -1808,8 +2164,8 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id)
*/
tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
- if (!tmp_mqd->cp_hqd_pq_control ||
- (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
+ if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
+ (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -1857,55 +2213,27 @@ static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_
static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[xcc_id].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v9_4_3_xcc_kiq_init_queue(&adev->gfx.kiq[xcc_id].ring, xcc_id);
return 0;
}
static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ struct amdgpu_ring *ring;
+ int i, r;
gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ ring = &adev->gfx.compute_ring[i + xcc_id *
+ adev->gfx.num_compute_rings];
+
+ r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, xcc_id);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, xcc_id);
}
static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
@@ -1921,6 +2249,8 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id);
if (r)
return r;
+ } else {
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
}
r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id);
@@ -1946,18 +2276,31 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
{
- int r = 0, i, num_xcc;
+ int r = 0, i, num_xcc, num_xcp, num_xcc_per_xcp;
- if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
- AMDGPU_XCP_FL_NONE) ==
- AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
- r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
- amdgpu_user_partt_mode);
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (amdgpu_sriov_vf(adev)) {
+ enum amdgpu_gfx_partition mode;
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ return -EINVAL;
+ num_xcc_per_xcp = gfx_v9_4_3_get_xccs_per_xcp(adev);
+ adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp;
+ num_xcp = num_xcc / num_xcc_per_xcp;
+ r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
+
+ } else {
+ if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE) ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ r = amdgpu_xcp_switch_partition_mode(
+ adev->xcp_mgr, amdgpu_user_partt_mode);
+ }
if (r)
return r;
- num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < num_xcc; i++) {
r = gfx_v9_4_3_xcc_cp_resume(adev, i);
if (r)
@@ -1967,12 +2310,6 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable,
- int xcc_id)
-{
- gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id);
-}
-
static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
{
if (amdgpu_gfx_disable_kcq(adev, xcc_id))
@@ -2004,13 +2341,16 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
}
gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
- gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
+ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
}
-static int gfx_v9_4_3_hw_init(void *handle)
+static int gfx_v9_4_3_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
if (!amdgpu_sriov_vf(adev))
gfx_v9_4_3_init_golden_registers(adev);
@@ -2028,13 +2368,14 @@ static int gfx_v9_4_3_hw_init(void *handle)
return r;
}
-static int gfx_v9_4_3_hw_fini(void *handle)
+static int gfx_v9_4_3_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, num_xcc;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < num_xcc; i++) {
@@ -2044,19 +2385,19 @@ static int gfx_v9_4_3_hw_fini(void *handle)
return 0;
}
-static int gfx_v9_4_3_suspend(void *handle)
+static int gfx_v9_4_3_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_4_3_hw_fini(handle);
+ return gfx_v9_4_3_hw_fini(ip_block);
}
-static int gfx_v9_4_3_resume(void *handle)
+static int gfx_v9_4_3_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_4_3_hw_init(handle);
+ return gfx_v9_4_3_hw_init(ip_block);
}
-static bool gfx_v9_4_3_is_idle(void *handle)
+static bool gfx_v9_4_3_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, num_xcc;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
@@ -2068,24 +2409,24 @@ static bool gfx_v9_4_3_is_idle(void *handle)
return true;
}
-static int gfx_v9_4_3_wait_for_idle(void *handle)
+static int gfx_v9_4_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v9_4_3_is_idle(handle))
+ if (gfx_v9_4_3_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v9_4_3_soft_reset(void *handle)
+static int gfx_v9_4_3_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS);
@@ -2120,19 +2461,17 @@ static int gfx_v9_4_3_soft_reset(void *handle)
/* Disable MEC parsing/prefetching */
gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
- }
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
@@ -2169,9 +2508,9 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v9_4_3_early_init(void *handle)
+static int gfx_v9_4_3_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -2187,9 +2526,9 @@ static int gfx_v9_4_3_early_init(void *handle)
return gfx_v9_4_3_init_microcode(adev);
}
-static int gfx_v9_4_3_late_init(void *handle)
+static int gfx_v9_4_3_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -2200,6 +2539,10 @@ static int gfx_v9_4_3_late_init(void *handle)
if (r)
return r;
+ r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+ if (r)
+ return r;
+
if (adev->gfx.ras &&
adev->gfx.ras->enable_watchdog_timer)
adev->gfx.ras->enable_watchdog_timer(adev);
@@ -2334,10 +2677,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
- /* enable cgcg FSM(0x0000363F) */
+ /* CGCG Hysteresis: 400us */
def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
- data = (0x36
+ data = (0x2710
<< RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
@@ -2346,10 +2689,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
- /* set IDLE_POLL_COUNT(0x00900100) */
+ /* set IDLE_POLL_COUNT(0x33450100)*/
def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
- (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ (0x3345 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
if (def != data)
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
} else {
@@ -2414,37 +2757,32 @@ static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
.is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
};
-static int gfx_v9_4_3_set_powergating_state(void *handle,
+static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static int gfx_v9_4_3_set_clockgating_state(void *handle,
+static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, num_xcc;
if (amdgpu_sriov_vf(adev))
return 0;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 4, 3):
- for (i = 0; i < num_xcc; i++)
- gfx_v9_4_3_xcc_update_gfx_clock_gating(
- adev, state == AMD_CG_STATE_GATE, i);
- break;
- default:
- break;
- }
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_update_gfx_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, i);
+
return 0;
}
-static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v9_4_3_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2652,6 +2990,8 @@ static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
{
struct amdgpu_device *adev = ring->adev;
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
(5 << 8) | /* dst: memory */
@@ -2669,6 +3009,8 @@ static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
{
uint32_t cmd = 0;
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
@@ -2701,6 +3043,21 @@ static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
+static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id);
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id);
+}
+
static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
struct amdgpu_device *adev, int me, int pipe,
enum amdgpu_interrupt_state state, int xcc_id)
@@ -2738,37 +3095,119 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 0);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
TIME_STAMP_INT_ENABLE, 1);
- WREG32(mec_int_cntl_reg, mec_int_cntl);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
break;
default:
break;
}
}
+static u32 gfx_v9_4_3_get_cpc_int_cntl(struct amdgpu_device *adev,
+ int xcc_id, int me, int pipe)
+{
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+ if (me != 1)
+ return 0;
+
+ switch (pipe) {
+ case 0:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL);
+ case 1:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL);
+ case 2:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL);
+ case 3:
+ return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL);
+ default:
+ return 0;
+ }
+}
+
static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
enum amdgpu_interrupt_state state)
{
- int i, num_xcc;
+ u32 mec_int_cntl_reg, mec_int_cntl;
+ int i, j, k, num_xcc;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
case AMDGPU_IRQ_STATE_ENABLE:
- for (i = 0; i < num_xcc; i++)
+ for (i = 0; i < num_xcc; i++) {
WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (j = 0; j < adev->gfx.mec.num_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ /* MECs start at 1 */
+ mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k);
+
+ if (mec_int_cntl_reg) {
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ?
+ 1 : 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i);
+ }
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl_reg, mec_int_cntl;
+ int i, j, k, num_xcc;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ for (i = 0; i < num_xcc; i++) {
+ WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ for (j = 0; j < adev->gfx.mec.num_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ /* MECs start at 1 */
+ mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k);
+
+ if (mec_int_cntl_reg) {
+ mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ OPCODE_ERROR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ?
+ 1 : 0);
+ WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i);
+ }
+ }
+ }
+ }
break;
default:
break;
@@ -2929,6 +3368,15 @@ static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal opcode in command stream\n");
+ gfx_v9_4_3_fault(adev, entry);
+ return 0;
+}
+
static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -3015,6 +3463,181 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
}
}
+static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me,
+ uint32_t pipe, uint32_t queue,
+ uint32_t xcc_id)
+{
+ int i, r;
+ /* make sure dequeue is complete*/
+ gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, me, pipe, queue, 0, GET_INST(GC, xcc_id));
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+ else
+ r = 0;
+ soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id);
+
+ return r;
+
+}
+
+static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev)
+{
+ if (!!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
+ return true;
+ else
+ dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n");
+
+ return false;
+}
+
+static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe, clean_pipe;
+ int r;
+
+ if (!gfx_v9_4_3_pipe_reset_support(adev))
+ return -EINVAL;
+
+ gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+
+ reset_pipe = RREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (ring->pipe)
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ else
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ }
+
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, clean_pipe);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id);
+
+ r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
+ return r;
+}
+
+static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id];
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE;
+ unsigned long flags;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+ 0, 0);
+ amdgpu_ring_commit(kiq_ring);
+
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ dev_err(adev->dev, "kiq ring test failed after ring: %s queue reset\n",
+ ring->name);
+ goto pipe_reset;
+ }
+
+ r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id);
+ if (r)
+ dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n");
+
+pipe_reset:
+ if (r) {
+ if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
+ return -EOPNOTSUPP;
+ r = gfx_v9_4_3_reset_hw_pipe(ring);
+ reset_mode = AMDGPU_RESET_TYPE_PER_PIPE;
+ dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name,
+ r ? "failed" : "successfully");
+ if (r)
+ return r;
+ }
+
+ r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
+ if (r) {
+ dev_err(adev->dev, "fail to init kcq\n");
+ return r;
+ }
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ return -ENOMEM;
+ }
+ kiq->pmf->kiq_map_queues(kiq_ring, ring);
+ amdgpu_ring_commit(kiq_ring);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ if (r) {
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE)
+ goto pipe_reset;
+
+ dev_err(adev->dev, "fail to remap queue\n");
+ return r;
+ }
+
+ if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) {
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ goto pipe_reset;
+ }
+
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
enum amdgpu_gfx_cp_ras_mem_id {
AMDGPU_GFX_CP_MEM1 = 1,
AMDGPU_GFX_CP_MEM2,
@@ -3653,19 +4276,19 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = {
AMDGPU_GFX_GC_CANE_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI),
1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
- AMDGPU_GFX_SPI_MEM, 8},
+ AMDGPU_GFX_SPI_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
- AMDGPU_GFX_SP_MEM, 1},
+ AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
- AMDGPU_GFX_SP_MEM, 1},
+ AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
- AMDGPU_GFX_SQ_MEM, 8},
+ AMDGPU_GFX_SQ_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI),
5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
- AMDGPU_GFX_SQC_MEM, 8},
+ AMDGPU_GFX_SQC_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI),
2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
AMDGPU_GFX_TCX_MEM, 1},
@@ -3674,22 +4297,22 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = {
AMDGPU_GFX_TCC_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
- AMDGPU_GFX_TA_MEM, 8},
+ AMDGPU_GFX_TA_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG),
- 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
AMDGPU_GFX_TCI_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
- AMDGPU_GFX_TCP_MEM, 8},
+ AMDGPU_GFX_TCP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
- AMDGPU_GFX_TD_MEM, 8},
+ AMDGPU_GFX_TD_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI),
16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
AMDGPU_GFX_GCEA_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
- AMDGPU_GFX_LDS_MEM, 1},
+ AMDGPU_GFX_LDS_MEM, 4},
};
static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
@@ -3713,19 +4336,19 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
AMDGPU_GFX_GC_CANE_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI),
1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
- AMDGPU_GFX_SPI_MEM, 8},
+ AMDGPU_GFX_SPI_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
- AMDGPU_GFX_SP_MEM, 1},
+ AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
- AMDGPU_GFX_SP_MEM, 1},
+ AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
- AMDGPU_GFX_SQ_MEM, 8},
+ AMDGPU_GFX_SQ_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI),
5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
- AMDGPU_GFX_SQC_MEM, 8},
+ AMDGPU_GFX_SQC_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI),
2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
AMDGPU_GFX_TCX_MEM, 1},
@@ -3734,16 +4357,16 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
AMDGPU_GFX_TCC_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
- AMDGPU_GFX_TA_MEM, 8},
+ AMDGPU_GFX_TA_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG),
- 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
+ 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
AMDGPU_GFX_TCI_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
- AMDGPU_GFX_TCP_MEM, 8},
+ AMDGPU_GFX_TCP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
- AMDGPU_GFX_TD_MEM, 8},
+ AMDGPU_GFX_TD_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI),
2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"},
AMDGPU_GFX_TCA_MEM, 1},
@@ -3752,11 +4375,7 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
AMDGPU_GFX_GCEA_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
- AMDGPU_GFX_LDS_MEM, 1},
-};
-
-static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = {
- SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
+ AMDGPU_GFX_LDS_MEM, 4},
};
static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
@@ -3766,6 +4385,12 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
unsigned long ce_count = 0, ue_count = 0;
uint32_t i, j, k;
+ /* NOTE: convert xcc_id to physical XCD ID (XCD0 or XCD1) */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = xcc_id & 0x01 ? 1 : 0,
+ };
+
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) {
@@ -3797,6 +4422,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
}
}
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+ gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+ GET_INST(GC, xcc_id),
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &ue_count);
+ }
+ }
+ }
+
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
@@ -3804,8 +4450,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
/* the caller should make sure initialize value of
* err_data->ue_count and err_data->ce_count
*/
- err_data->ce_count += ce_count;
- err_data->ue_count += ue_count;
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
}
static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
@@ -3836,217 +4482,37 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
}
}
- gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
- xcc_id);
- mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev,
- int xcc_id)
-{
- uint32_t i, j;
- uint32_t reg_value;
-
- mutex_lock(&adev->grbm_idx_mutex);
-
- for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
- for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
- gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
- reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
- regGCEA_ERR_STATUS);
- if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
- REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
- REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
- dev_warn(adev->dev,
- "GCEA err detected at instance: %d, status: 0x%x!\n",
- j, reg_value);
- }
- /* clear after read */
- reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS,
- CLEAR_ERROR_STATUS, 0x1);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS,
- reg_value);
- }
- }
-
- gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
- xcc_id);
- mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev,
- int xcc_id)
-{
- uint32_t data;
-
- data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS);
- if (data) {
- dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
- }
-
- data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS);
- if (data) {
- dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
- }
-
- data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
- regVML2_WALKER_MEM_ECC_STATUS);
- if (data) {
- dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS,
- 0x3);
- }
-}
-
-static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev,
- uint32_t status, int xcc_id)
-{
- struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
- uint32_t i, simd, wave;
- uint32_t wave_status;
- uint32_t wave_pc_lo, wave_pc_hi;
- uint32_t wave_exec_lo, wave_exec_hi;
- uint32_t wave_inst_dw0, wave_inst_dw1;
- uint32_t wave_ib_sts;
-
- for (i = 0; i < 32; i++) {
- if (!((i << 1) & status))
- continue;
-
- simd = i / cu_info->max_waves_per_simd;
- wave = i % cu_info->max_waves_per_simd;
-
- wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS);
- wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO);
- wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI);
- wave_exec_lo =
- wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO);
- wave_exec_hi =
- wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI);
- wave_inst_dw0 =
- wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0);
- wave_inst_dw1 =
- wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1);
- wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS);
-
- dev_info(
- adev->dev,
- "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n",
- simd, wave, wave_status,
- ((uint64_t)wave_pc_hi << 32 | wave_pc_lo),
- ((uint64_t)wave_exec_hi << 32 | wave_exec_lo),
- ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0),
- wave_ib_sts);
- }
-}
-
-static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev,
- int xcc_id)
-{
- uint32_t se_idx, sh_idx, cu_idx;
- uint32_t status;
+ /* handle extra register entries of UE */
+ for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+ for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+ /* no need to select if instance number is 1 */
+ if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+ gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+ gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
- mutex_lock(&adev->grbm_idx_mutex);
- for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
- for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
- for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
- gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
- cu_idx, xcc_id);
- status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
- regSQ_TIMEOUT_STATUS);
- if (status != 0) {
- dev_info(
- adev->dev,
- "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n",
- se_idx, sh_idx, cu_idx);
- gfx_v9_4_3_log_cu_timeout_status(
- adev, status, xcc_id);
- }
- /* clear old status */
- WREG32_SOC15(GC, GET_INST(GC, xcc_id),
- regSQ_TIMEOUT_STATUS, 0);
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+ 1,
+ GET_INST(GC, xcc_id));
}
}
}
- gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
- xcc_id);
- mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
- void *ras_error_status, int xcc_id)
-{
- gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id);
- gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
- gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id);
-}
-
-static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
- int xcc_id)
-{
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
-}
-static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev,
- int xcc_id)
-{
- uint32_t i, j;
- uint32_t value;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
- for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
- gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
- value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS);
- value = REG_SET_FIELD(value, GCEA_ERR_STATUS,
- CLEAR_ERROR_STATUS, 0x1);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value);
- }
- }
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
}
-static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev,
- int xcc_id)
-{
- uint32_t se_idx, sh_idx, cu_idx;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) {
- for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) {
- for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) {
- gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx,
- cu_idx, xcc_id);
- WREG32_SOC15(GC, GET_INST(GC, xcc_id),
- regSQ_TIMEOUT_STATUS, 0);
- }
- }
- }
- gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
- xcc_id);
- mutex_unlock(&adev->grbm_idx_mutex);
-}
-
-static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
- void *ras_error_status, int xcc_id)
-{
- gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
- gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id);
- gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id);
-}
-
static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
void *ras_error_status, int xcc_id)
{
uint32_t i;
uint32_t data;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0);
@@ -4082,19 +4548,164 @@ static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev)
amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count);
}
-static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
+static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
{
- amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
+ amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
}
-static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev)
+static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
{
- amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status);
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
}
-static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
+static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer);
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k;
+ uint32_t xcc_id, xcc_offset, inst_offset;
+ uint32_t num_xcc, reg, num_inst;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ drm_printf(p, "Number of Instances:%d\n", num_xcc);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count;
+ drm_printf(p, "\nInstance id:%d\n", xcc_id);
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_9_4_3[i].reg_name,
+ adev->gfx.ip_dump_core[xcc_offset + i]);
+ }
+
+ /* print compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n",
+ num_xcc,
+ adev->gfx.mec.num_mec,
+ adev->gfx.mec.num_pipe_per_mec,
+ adev->gfx.mec.num_queue_per_pipe);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count * num_inst;
+ inst_offset = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ drm_printf(p,
+ "\nxcc:%d mec:%d, pipe:%d, queue:%d\n",
+ xcc_id, i, j, k);
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
+ else
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9_4_3[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
+ }
+ inst_offset += reg_count;
+ }
+ }
+ }
+ }
+}
+
+static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t i, j, k;
+ uint32_t num_xcc, reg, num_inst;
+ uint32_t xcc_id, xcc_offset, inst_offset;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3);
+
+ if (!adev->gfx.ip_dump_core)
+ return;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count;
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump_core[xcc_offset + i] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i],
+ GET_INST(GC, xcc_id)));
+ }
+
+ /* dump compute queue registers for all instances */
+ if (!adev->gfx.ip_dump_compute_queues)
+ return;
+
+ num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe;
+ reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
+ mutex_lock(&adev->srbm_mutex);
+ for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+ xcc_offset = xcc_id * reg_count * num_inst;
+ inst_offset = 0;
+ for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+ for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+ for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+ /* ME0 is for GFX so start from 1 for CP */
+ soc15_grbm_select(adev, 1 + i, j, k, 0,
+ GET_INST(GC, xcc_id));
+
+ for (reg = 0; reg < reg_count; reg++) {
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(
+ gc_cp_reg_list_9_4_3[reg],
+ GET_INST(GC, xcc_id)));
+ }
+ inst_offset += reg_count;
+ }
+ }
+ }
+ }
+ soc15_grbm_select(adev, 0, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
}
static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
@@ -4113,6 +4724,8 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
.set_clockgating_state = gfx_v9_4_3_set_clockgating_state,
.set_powergating_state = gfx_v9_4_3_set_powergating_state,
.get_clockgating_state = gfx_v9_4_3_get_clockgating_state,
+ .dump_ip_state = gfx_v9_4_3_ip_dump,
+ .print_ip_state = gfx_v9_4_3_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
@@ -4134,7 +4747,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */
7 + /* gfx_v9_4_3_emit_mem_sync */
5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */
- 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */
+ 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */
+ 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */
.emit_ib = gfx_v9_4_3_ring_emit_ib_compute,
.emit_fence = gfx_v9_4_3_ring_emit_fence,
@@ -4144,13 +4758,18 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
.emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
.test_ring = gfx_v9_4_3_ring_test_ring,
.test_ib = gfx_v9_4_3_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v9_4_3_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v9_4_3_ring_emit_wreg,
.emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_4_3_ring_soft_recovery,
.emit_mem_sync = gfx_v9_4_3_emit_mem_sync,
.emit_wave_limit = gfx_v9_4_3_emit_wave_limit,
+ .reset = gfx_v9_4_3_reset_kcq,
+ .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader,
+ .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
+ .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = {
@@ -4205,6 +4824,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = {
.process = gfx_v9_4_3_priv_reg_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = {
+ .set = gfx_v9_4_3_set_bad_op_fault_state,
+ .process = gfx_v9_4_3_bad_op_irq,
+};
+
static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = {
.set = gfx_v9_4_3_set_priv_inst_fault_state,
.process = gfx_v9_4_3_priv_inst_irq,
@@ -4218,6 +4842,9 @@ static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_reg_irq.num_types = 1;
adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs;
+ adev->gfx.bad_op_irq.num_types = 1;
+ adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs;
+
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs;
}
@@ -4230,30 +4857,13 @@ static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
{
- /* init asci gds info */
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 4, 3):
- /* 9.4.3 removed all the GDS internal memory,
- * only support GWS opcode in kernel, like barrier
- * semaphore.etc */
- adev->gds.gds_size = 0;
- break;
- default:
- adev->gds.gds_size = 0x10000;
- break;
- }
-
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 4, 3):
- /* deprecated for 9.4.3, no usage at all */
- adev->gds.gds_compute_max_wave_id = 0;
- break;
- default:
- /* this really depends on the chip */
- adev->gds.gds_compute_max_wave_id = 0x7ff;
- break;
- }
+ /* 9.4.3 variants removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+ /* init asic gds info */
+ adev->gds.gds_size = 0;
+ adev->gds.gds_compute_max_wave_id = 0;
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
}
@@ -4290,9 +4900,10 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_i
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info)
{
- int i, j, k, counter, xcc_id, active_cu_number = 0;
- u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ int i, j, k, prev_counter, counter, xcc_id, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0, tmp;
unsigned disable_masks[4 * 4];
+ bool is_symmetric_cus;
if (!adev || !cu_info)
return -EINVAL;
@@ -4310,6 +4921,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex);
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
+ is_symmetric_cus = true;
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
@@ -4337,6 +4949,15 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
}
+ if (i && is_symmetric_cus && prev_counter != counter)
+ is_symmetric_cus = false;
+ prev_counter = counter;
+ }
+ if (is_symmetric_cus) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_RELAUNCH_DISABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_DEBUG, CPC_HARVESTING_DISPATCH_DISABLE, 1);
+ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_DEBUG, tmp);
}
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
xcc_id);
@@ -4354,7 +4975,7 @@ const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = {
.type = AMD_IP_BLOCK_TYPE_GFX,
.major = 9,
.minor = 4,
- .rev = 0,
+ .rev = 3,
.funcs = &gfx_v9_4_3_ip_funcs,
};
@@ -4409,13 +5030,34 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = {
struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = {
.query_ras_error_count = &gfx_v9_4_3_query_ras_error_count,
.reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count,
- .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status,
- .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status,
};
+static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX,
+ &gfx_v9_4_3_aca_info,
+ NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
struct amdgpu_gfx_ras gfx_v9_4_3_ras = {
.ras_block = {
.hw_ops = &gfx_v9_4_3_ras_ops,
+ .ras_late_init = &gfx_v9_4_3_ras_late_init,
},
.enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm
new file mode 100644
index 000000000000..d5325ef80ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// MI300 : Clear SGPRs, VGPRs and LDS
+// Uses two kernels launched separately:
+// 1. Clean VGPRs, LDS, and lower SGPRs
+// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU
+// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD
+// Waves in the workgroup share the 64KB of LDS
+// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383
+// Each wave clears 128 VGPRs, so all 512 in the SIMD
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+// 2. Clean remaining SGPRs
+// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU
+// Waves are allocating 96 SGPRs
+// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223.
+// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799
+// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER
+// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command
+// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799
+
+shader main
+ asic(MI300)
+ type(CS)
+ wave_size(64)
+// Note: original source code from SQ team
+
+// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks)
+
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1)
+ S_BARRIER
+
+ s_movk_i32 m0, 0x0000
+ s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance)
+ //
+ // CLEAR VGPRs
+ //
+ s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing
+label_0005:
+ v_mov_b32 v0, 0
+ v_mov_b32 v1, 0
+ v_mov_b32 v2, 0
+ v_mov_b32 v3, 0
+ v_mov_b32 v4, 0
+ v_mov_b32 v5, 0
+ v_mov_b32 v6, 0
+ v_mov_b32 v7, 0
+ s_sub_u32 s2, s2, 8
+ s_set_gpr_idx_idx s2
+ s_cbranch_scc0 label_0005
+ s_set_gpr_idx_off
+
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iteraions
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_clean_sgpr_1:
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+s_endpgm
+
+label_0023:
+
+ s_sethalt 1
+
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop1:
+
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop1
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0xee //clear vcc
+
+s_endpgm
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h
new file mode 100644
index 000000000000..69aa567c6c1d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_9_4_3 */
+static const u32 gfx_9_4_3_cleaner_shader_hex[] = {
+ 0xbf068100, 0xbf84003b,
+ 0xbf8a0000, 0xb07c0000,
+ 0xbe8200ff, 0x00000078,
+ 0xbf110802, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0x7e060280, 0x7e080280,
+ 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x80828802,
+ 0xbe803202, 0xbf84fff5,
+ 0xbf9c0000, 0xbe8200ff,
+ 0x80000000, 0x86020102,
+ 0xbf840011, 0xbefe00c1,
+ 0xbeff00c1, 0xd28c0001,
+ 0x0001007f, 0xd28d0001,
+ 0x0002027e, 0x10020288,
+ 0xbe8200bf, 0xbefc00c1,
+ 0xd89c2000, 0x00020201,
+ 0xd89c6040, 0x00040401,
+ 0x320202ff, 0x00000400,
+ 0x80828102, 0xbf84fff8,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbf810000, 0xbf8d0001,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea01ff,
+ 0x000000ee, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
new file mode 100644
index 000000000000..f9949fedfbb9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c
@@ -0,0 +1,516 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v11_5_0.h"
+
+#include "gc/gc_11_5_0_offset.h"
+#include "gc/gc_11_5_0_sh_mask.h"
+
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+
+static const char *gfxhub_client_ids[] = {
+ "CB/DB",
+ "Reserved",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "SDMA2",
+ "SDMA3",
+};
+
+static uint32_t gfxhub_v11_5_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v11_5_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS, RW));
+}
+
+static u64 gfxhub_v11_5_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v11_5_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v11_5_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v11_5_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v11_5_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v11_5_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+static void gfxhub_v11_5_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v11_5_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v11_5_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v11_5_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v11_5_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v11_5_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v11_5_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v11_5_0_init_gart_aperture_regs(adev);
+ gfxhub_v11_5_0_init_system_aperture_regs(adev);
+ gfxhub_v11_5_0_init_tlb_regs(adev);
+ gfxhub_v11_5_0_init_cache_regs(adev);
+
+ gfxhub_v11_5_0_enable_system_domain(adev);
+ gfxhub_v11_5_0_disable_identity_aperture(adev);
+ gfxhub_v11_5_0_setup_vmid_config(adev);
+ gfxhub_v11_5_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v11_5_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v11_5_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v11_5_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v11_5_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v11_5_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v11_5_0_get_invalidate_req,
+};
+
+static void gfxhub_v11_5_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v11_5_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs = {
+ .get_fb_location = gfxhub_v11_5_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v11_5_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v11_5_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v11_5_0_gart_enable,
+ .gart_disable = gfxhub_v11_5_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v11_5_0_set_fault_enable_default,
+ .init = gfxhub_v11_5_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h
new file mode 100644
index 000000000000..265ab631b3d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V11_5_0_H__
+#define __GFXHUB_V11_5_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c
new file mode 100644
index 000000000000..7609b9cecae8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v12_0.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "soc15_common.h"
+
+#define regGCVM_L2_CNTL3_DEFAULT 0x80120007
+#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
+#define regGRBM_GFX_INDEX_DEFAULT 0xe0000000
+
+static const char *gfxhub_client_ids[] = {
+ "CB",
+ "DB",
+ "GE1",
+ "GE2",
+ "CPF",
+ "CPC",
+ "CPG",
+ "RLC",
+ "TCP",
+ "SQC (inst)",
+ "SQC (data)",
+ "SQG/PC/SC",
+ "Reserved",
+ "SDMA0",
+ "SDMA1",
+ "GCR",
+ "Reserved",
+ "Reserved",
+ "WGS",
+ "DSM",
+ "PA"
+};
+
+static uint32_t gfxhub_v12_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+gfxhub_v12_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ u32 cid = REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
+
+ dev_err(adev->dev,
+ "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
+ cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%lx\n",
+ REG_GET_FIELD(status,
+ GCVM_L2_PROTECTION_FAULT_STATUS_LO32, RW));
+}
+
+static u64 gfxhub_v12_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 gfxhub_v12_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v12_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void gfxhub_v12_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v12_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v12_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v12_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v12_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
+
+ tmp = regGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
+
+ tmp = regGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
+
+ tmp = regGCVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
+}
+
+static void gfxhub_v12_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v12_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v12_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void gfxhub_v12_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int gfxhub_v12_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v12_0_init_gart_aperture_regs(adev);
+ gfxhub_v12_0_init_system_aperture_regs(adev);
+ gfxhub_v12_0_init_tlb_regs(adev);
+ gfxhub_v12_0_init_cache_regs(adev);
+
+ gfxhub_v12_0_enable_system_domain(adev);
+ gfxhub_v12_0_disable_identity_aperture(adev);
+ gfxhub_v12_0_setup_vmid_config(adev);
+ gfxhub_v12_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void gfxhub_v12_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v12_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void gfxhub_v12_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs gfxhub_v12_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = gfxhub_v12_0_print_l2_protection_fault_status,
+ .get_invalidate_req = gfxhub_v12_0_get_invalidate_req,
+};
+
+static void gfxhub_v12_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
+ regGCVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &gfxhub_v12_0_vmhub_funcs;
+}
+
+const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs = {
+ .get_fb_location = gfxhub_v12_0_get_fb_location,
+ .get_mc_fb_offset = gfxhub_v12_0_get_mc_fb_offset,
+ .setup_vm_pt_regs = gfxhub_v12_0_setup_vm_pt_regs,
+ .gart_enable = gfxhub_v12_0_gart_enable,
+ .gart_disable = gfxhub_v12_0_gart_disable,
+ .set_fault_enable_default = gfxhub_v12_0_set_fault_enable_default,
+ .init = gfxhub_v12_0_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h
new file mode 100644
index 000000000000..f1258265f802
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V12_0_H__
+#define __GFXHUB_V12_0_H__
+
+extern const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index cdc290a474a9..a7bfc9f41d0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -92,17 +92,19 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
- /* Program the AGP BAR */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
-
if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
/* Program the system aperture low logical page number. */
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
/*
* Raven2 has a HW issue that it is unable to use the
* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
@@ -260,7 +262,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -441,7 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev)
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
-
const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = {
.get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset,
.setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 0834af771549..6c03bf9f1ae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -74,6 +74,8 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev,
static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
uint32_t xcc_mask)
{
+ uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
+ adev->gmc.vram_start : adev->gmc.fb_start;
uint64_t pt_base;
int i;
@@ -91,10 +93,10 @@ static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev,
if (adev->gmc.pdb0_bo) {
WREG32_SOC15(GC, GET_INST(GC, i),
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->gmc.fb_start >> 12));
+ (u32)(gart_start >> 12));
WREG32_SOC15(GC, GET_INST(GC, i),
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->gmc.fb_start >> 44));
+ (u32)(gart_start >> 44));
WREG32_SOC15(GC, GET_INST(GC, i),
regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
@@ -139,7 +141,9 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
/*
* Raven2 has a HW issue that it is unable to use the
* vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
@@ -178,7 +182,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev,
/* In the case squeezing vram into GART aperture, we don't use
* FB aperture and AGP aperture. Disable them.
*/
- if (adev->gmc.pdb0_bo) {
+ if (adev->gmc.pdb0_bo && adev->gmc.xgmi.connected_to_cpu) {
WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0);
WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF);
WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0);
@@ -311,6 +315,16 @@ gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev,
}
}
+static inline bool
+gfxhub_v1_2_per_process_xnack_support(struct amdgpu_device *adev)
+{
+ /*
+ * TODO: Check if this function is really needed, so far only 9.4.3
+ * variants use GFXHUB 1.2
+ */
+ return !!adev->aid_mask;
+}
+
static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
uint32_t xcc_mask)
{
@@ -329,7 +343,8 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
for_each_inst(j, xcc_mask) {
hub = &adev->vmhub[AMDGPU_GFXHUB(j)];
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -352,15 +367,16 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
PAGE_TABLE_BLOCK_SIZE,
block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm.
- * On 9.4.2 and 9.4.3, XNACK can be enabled in
+ * On 9.4.3 variants, XNACK can be enabled in
* the SQ per-process.
* Retry faults need to be enabled for that to work.
*/
- tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
- RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
- !adev->gmc.noretry ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3));
+ tmp = REG_SET_FIELD(
+ tmp, VM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !adev->gmc.noretry ||
+ gfxhub_v1_2_per_process_xnack_support(
+ adev));
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
@@ -450,10 +466,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
- tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
- WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
- WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ if (!amdgpu_sriov_vf(adev)) {
+ tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+ WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index a041c6c970e1..793faf62cb07 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -287,7 +287,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+ /* TODO: This is only needed on some Navi 1x revisions */
+ hub->sdma_invalidation_workaround = true;
+
hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index 7708d5ded7b8..deb95fab02df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -155,6 +155,9 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
/* Program the AGP BAR */
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
@@ -296,7 +299,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -502,42 +505,6 @@ static void gfxhub_v2_1_init(struct amdgpu_device *adev)
hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs;
}
-static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)
-{
- u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL);
- u32 max_region =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
- u32 max_num_physical_nodes = 0;
- u32 max_physical_node_id = 0;
-
- switch (adev->ip_versions[XGMI_HWIP][0]) {
- case IP_VERSION(4, 8, 0):
- max_num_physical_nodes = 4;
- max_physical_node_id = 3;
- break;
- default:
- return -EINVAL;
- }
-
- /* PF_MAX_REGION=0 means xgmi is disabled */
- if (max_region) {
- adev->gmc.xgmi.num_physical_nodes = max_region + 1;
- if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
- return -EINVAL;
-
- adev->gmc.xgmi.physical_node_id =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
- if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
- return -EINVAL;
-
- adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
- RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE),
- GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
- }
-
- return 0;
-}
-
static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
{
int i;
@@ -548,7 +515,7 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 3):
/* Get SA disabled bitmap from eFuse setting */
@@ -693,7 +660,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.gart_disable = gfxhub_v2_1_gart_disable,
.set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default,
.init = gfxhub_v2_1_init,
- .get_xgmi_info = gfxhub_v2_1_get_xgmi_info,
.utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
.mode2_save_regs = gfxhub_v2_1_save_regs,
.mode2_restore_regs = gfxhub_v2_1_restore_regs,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
index e1c76c070ba9..abe30c8bd2ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
@@ -164,8 +164,7 @@ static void gfxhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
- + adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -295,7 +294,7 @@ static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
index 07f369c7a1ed..b3ef6e71811f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
@@ -169,8 +169,7 @@ static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
- + adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -300,7 +299,7 @@ static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fa87a85e1017..d7499be8c4bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -51,8 +51,6 @@
#include "athub_v2_0.h"
#include "athub_v2_1.h"
-#include "amdgpu_reset.h"
-
static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned int type,
@@ -107,7 +105,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index];
bool retry_fault = !!(entry->src_data[1] & 0x80);
bool write_fault = !!(entry->src_data[1] & 0x20);
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
uint32_t status = 0;
u64 addr;
@@ -134,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
/* Try to handle the recoverable page faults by filling page
* tables
*/
- if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault))
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
+ entry->timestamp, write_fault))
return 1;
}
@@ -145,30 +144,38 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
* the new fast GRBM interface.
*/
if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
+ (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(10, 3, 0)))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
}
if (!printk_ratelimit())
return 0;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_err(adev->dev,
- "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
- entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n",
- addr, entry->client_id,
- soc15_ih_clientid_name[entry->client_id]);
+ addr, entry->client_id,
+ soc15_ih_clientid_name[entry->client_id]);
- if (!amdgpu_sriov_vf(adev))
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
hub->vmhub_funcs->print_l2_protection_fault_status(adev,
status);
@@ -230,20 +237,48 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
* by the amdgpu vm/hsa code.
*/
-static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
- unsigned int vmhub, uint32_t flush_type)
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
- unsigned int i;
unsigned char hub_ip = 0;
+ u32 sem, req, ack;
+ unsigned int i;
+ u32 tmp;
- hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
- GC_HWIP : MMHUB_HWIP;
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -257,9 +292,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, hub_ip);
-
+ tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
@@ -269,24 +302,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng,
- inv_req, hub_ip);
+ WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
/*
* Issue a dummy read to wait for the ACK register to be cleared
* to avoid a false ACK due to the new fast GRBM interface.
*/
if ((vmhub == AMDGPU_GFXHUB(0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
- RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, hub_ip);
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0)))
+ RREG32_RLC_NO_KIQ(req, hub_ip);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
- hub->eng_distance * eng, hub_ip);
-
+ tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
@@ -296,109 +324,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
- /*
- * add semaphore release after invalidation,
- * write with 0 means semaphore release
- */
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0, hub_ip);
+ WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
spin_unlock(&adev->gmc.invalidate_lock);
- if (i < adev->usec_timeout)
- return;
-
- DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
-}
-
-/**
- * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: vmhub type
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
- uint32_t vmhub, uint32_t flush_type)
-{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct dma_fence *fence;
- struct amdgpu_job *job;
-
- int r;
-
- /* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
-
- /* For SRIOV run time, driver shouldn't access the register through MMIO
- * Directly use kiq to do the vm invalidation instead
- */
- if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- down_read_trylock(&adev->reset_domain->sem)) {
- struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
- const unsigned int eng = 17;
- u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
- u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
-
- up_read(&adev->reset_domain->sem);
- return;
- }
-
- mutex_lock(&adev->mman.gtt_window_lock);
-
- if (vmhub == AMDGPU_MMHUB0(0)) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- BUG_ON(vmhub != AMDGPU_GFXHUB(0));
-
- if (!adev->mman.buffer_funcs_enabled ||
- !adev->ib_pool_ready ||
- amdgpu_in_reset(adev) ||
- ring->sched.ready == false) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- /* The SDMA on Navi has a bug which can theoretically result in memory
- * corruption if an invalidation happens at the same time as an VA
- * translation. Avoid this by doing the invalidation from the SDMA
- * itself.
- */
- r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
- &job);
- if (r)
- goto error_alloc;
-
- job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
- job->vm_needs_flush = true;
- job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- fence = amdgpu_job_submit(job);
-
- mutex_unlock(&adev->mman.gtt_window_lock);
-
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
-
- return;
-
-error_alloc:
- mutex_unlock(&adev->mman.gtt_window_lock);
- DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n",
+ vmhub);
}
/**
@@ -412,62 +344,31 @@ error_alloc:
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub, uint32_t inst)
+static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ uint16_t queried;
int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
- struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready) {
- spin_lock(&adev->gfx.kiq[0].ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq[0].ring_lock);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq[0].ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
- }
for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
-
- ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB(0), flush_type);
- }
- if (!adev->enable_mes)
- break;
+ bool valid;
+
+ valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
}
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -524,10 +425,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- /* MES fw manages IH_VMID_x_LUT updating */
- if (ring->is_mes_queue)
- return;
-
if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
@@ -569,24 +466,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
* 0 valid
*/
-static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-{
- switch (flags) {
- case AMDGPU_VM_MTYPE_DEFAULT:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_NC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_WC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
- case AMDGPU_VM_MTYPE_CC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
- case AMDGPU_VM_MTYPE_UC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
- default:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- }
-}
-
static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
@@ -611,21 +490,39 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- struct amdgpu_bo *bo = mapping->bo_va->base.bo;
-
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
- *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+ break;
+ }
- *flags &= ~AMDGPU_PTE_NOALLOC;
- *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
- if (mapping->flags & AMDGPU_PTE_PRT) {
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags |= AMDGPU_PTE_SNOOPED;
*flags |= AMDGPU_PTE_LOG;
@@ -634,9 +531,9 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
}
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED))
- *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
- AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
}
static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
@@ -666,7 +563,6 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
.flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
- .map_mtype = gmc_v10_0_map_mtype,
.get_vm_pde = gmc_v10_0_get_vm_pde,
.get_vm_pte = gmc_v10_0_get_vm_pte,
.get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size,
@@ -680,7 +576,7 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(8, 7, 0):
adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
@@ -697,7 +593,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 4, 0):
case IP_VERSION(2, 4, 1):
@@ -711,7 +607,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -729,9 +625,9 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
}
-static int gmc_v10_0_early_init(void *handle)
+static int gmc_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v10_0_set_mmhub_funcs(adev);
gmc_v10_0_set_gfxhub_funcs(adev);
@@ -750,9 +646,9 @@ static int gmc_v10_0_early_init(void *handle)
return 0;
}
-static int gmc_v10_0_late_init(void *handle)
+static int gmc_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -776,9 +672,11 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -825,7 +723,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
/* set the gart size */
if (amdgpu_gart_size == -1) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -860,16 +758,16 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);
}
-static int gmc_v10_0_sw_init(void *handle)
+static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfxhub.funcs->init(adev);
@@ -892,7 +790,7 @@ static int gmc_v10_0_sw_init(void *handle)
adev->gmc.vram_vendor = vram_vendor;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
adev->gmc.mall_size = 128 * 1024 * 1024;
break;
@@ -910,7 +808,7 @@ static int gmc_v10_0_sw_init(void *handle)
break;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -1017,9 +915,9 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_table_vram_free(adev);
}
-static int gmc_v10_0_sw_fini(void *handle)
+static int gmc_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_vm_manager_fini(adev);
gmc_v10_0_gart_fini(adev);
@@ -1063,10 +961,9 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
adev->hdp.funcs->init_registers(adev);
/* Flush HDP after it is initialized */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
- value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
- false : true;
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
if (!adev->in_s0ix)
adev->gfxhub.funcs->set_fault_enable_default(adev, value);
@@ -1082,10 +979,12 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v10_0_hw_init(void *handle)
+static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
/* The sequence of these two function calls matters.*/
gmc_v10_0_init_golden_registers(adev);
@@ -1127,9 +1026,9 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
adev->mmhub.funcs->gart_disable(adev);
}
-static int gmc_v10_0_hw_fini(void *handle)
+static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v10_0_gart_disable(adev);
@@ -1141,61 +1040,58 @@ static int gmc_v10_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
return 0;
}
-static int gmc_v10_0_suspend(void *handle)
+static int gmc_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v10_0_hw_fini(adev);
+ gmc_v10_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v10_0_resume(void *handle)
+static int gmc_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v10_0_hw_init(adev);
+ r = gmc_v10_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v10_0_is_idle(void *handle)
+static bool gmc_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v10.*/
return true;
}
-static int gmc_v10_0_wait_for_idle(void *handle)
+static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v10.*/
return 0;
}
-static int gmc_v10_0_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int gmc_v10_0_set_clockgating_state(void *handle,
+static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
* is a new problem observed at DF 3.0.3, however with the same suspend sequence not
* seen any issue on the DF 3.0.2 series platform.
*/
- if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
+ if (adev->in_s0ix &&
+ amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) {
dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
return 0;
}
@@ -1204,29 +1100,29 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
if (r)
return r;
- if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
return athub_v2_1_set_clockgating(adev, state);
else
return athub_v2_0_set_clockgating(adev, state);
}
-static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4))
return;
adev->mmhub.funcs->get_clockgating(adev, flags);
- if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
athub_v2_1_get_clockgating(adev, flags);
else
athub_v2_0_get_clockgating(adev, flags);
}
-static int gmc_v10_0_set_powergating_state(void *handle,
+static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1244,7 +1140,6 @@ const struct amd_ip_funcs gmc_v10_0_ip_funcs = {
.resume = gmc_v10_0_resume,
.is_idle = gmc_v10_0_is_idle,
.wait_for_idle = gmc_v10_0_wait_for_idle,
- .soft_reset = gmc_v10_0_soft_reset,
.set_clockgating_state = gmc_v10_0_set_clockgating_state,
.set_powergating_state = gmc_v10_0_set_powergating_state,
.get_clockgating_state = gmc_v10_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e3b76fd28d15..7bc389d9f5c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -42,9 +42,11 @@
#include "nbio_v4_3.h"
#include "gfxhub_v3_0.h"
#include "gfxhub_v3_0_3.h"
+#include "gfxhub_v11_5_0.h"
#include "mmhub_v3_0.h"
#include "mmhub_v3_0_1.h"
#include "mmhub_v3_0_2.h"
+#include "mmhub_v3_3.h"
#include "athub_v3_0.h"
@@ -71,7 +73,8 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
* fini/suspend, so the overall state doesn't
* change over the course of suspend/resume.
*/
- if (!adev->in_s0ix)
+ if (!adev->in_s0ix && (adev->in_runpm || adev->in_suspend ||
+ amdgpu_in_reset(adev)))
amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
break;
case AMDGPU_IRQ_STATE_ENABLE:
@@ -117,23 +120,31 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
}
if (printk_ratelimit()) {
- struct amdgpu_task_info task_info;
-
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+ struct amdgpu_task_info *task_info;
dev_err(adev->dev,
- "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
- entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
- addr, entry->client_id);
- if (!amdgpu_sriov_vf(adev))
+ addr, entry->client_id);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
}
@@ -184,27 +195,51 @@ static bool gmc_v11_0_get_vmid_pasid_mapping_info(
return !!(*p_pasid);
}
-/*
- * GART
- * VMID 0 is the physical GPU addresses as used by the kernel.
- * VMIDs 1-15 are used for userspace clients and are handled
- * by the amdgpu vm/hsa code.
+/**
+ * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
*/
-
-static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
- unsigned int vmhub, uint32_t flush_type)
+static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
+ unsigned char hub_ip;
+ u32 sem, req, ack;
unsigned int i;
- unsigned char hub_ip = 0;
+ u32 tmp;
+
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+ return;
+
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
- hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
- GC_HWIP : MMHUB_HWIP;
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -218,8 +253,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, hub_ip);
+ tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
@@ -229,12 +263,11 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
+ WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
- hub->eng_distance * eng, hub_ip);
+ tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
@@ -244,12 +277,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
- /*
- * add semaphore release after invalidation,
- * write with 0 means semaphore release
- */
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0, hub_ip);
+ WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
/* Issue additional private vm invalidation to MMHUB */
if ((vmhub != AMDGPU_GFXHUB(0)) &&
@@ -266,50 +294,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
spin_unlock(&adev->gmc.invalidate_lock);
- if (i < adev->usec_timeout)
- return;
-
- DRM_ERROR("Timeout waiting for VM flush ACK!\n");
-}
-
-/**
- * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: which hub to flush
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
- uint32_t vmhub, uint32_t flush_type)
-{
- if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
- return;
-
- /* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
-
- /* For SRIOV run time, driver shouldn't access the register through MMIO
- * Directly use kiq to do the vm invalidation instead
- */
- if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
- struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
- const unsigned int eng = 17;
- u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
- u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
- return;
- }
-
- mutex_lock(&adev->mman.gtt_window_lock);
- gmc_v11_0_flush_vm_hub(adev, vmid, vmhub, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
}
/**
@@ -323,59 +309,31 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub, uint32_t inst)
+static void gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ uint16_t queried;
int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready) {
- spin_lock(&adev->gfx.kiq[0].ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq[0].ring_lock);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq[0].ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
- }
for (vmid = 1; vmid < 16; vmid++) {
-
- ret = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
- gmc_v11_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v11_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB(0), flush_type);
- }
+ bool valid;
+
+ valid = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v11_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
}
static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -432,10 +390,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- /* MES fw manages IH_VMID_x_LUT updating */
- if (ring->is_mes_queue)
- return;
-
if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
else
@@ -476,30 +430,11 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
* 0 valid
*/
-static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-{
- switch (flags) {
- case AMDGPU_VM_MTYPE_DEFAULT:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_NC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_WC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
- case AMDGPU_VM_MTYPE_CC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
- case AMDGPU_VM_MTYPE_UC:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
- default:
- return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- }
-}
-
static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
- *addr = adev->vm_manager.vram_base_offset + *addr -
- adev->gmc.vram_start;
+ *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
BUG_ON(*addr & 0xFFFF00000000003FULL);
if (!adev->gmc.translate_further)
@@ -519,21 +454,39 @@ static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- struct amdgpu_bo *bo = mapping->bo_va->base.bo;
-
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
- *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
+ break;
+ }
- *flags &= ~AMDGPU_PTE_NOALLOC;
- *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
- if (mapping->flags & AMDGPU_PTE_PRT) {
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags |= AMDGPU_PTE_SNOOPED;
*flags |= AMDGPU_PTE_LOG;
@@ -542,9 +495,9 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
}
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED))
- *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
- AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
+ *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
}
static unsigned int gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
@@ -574,7 +527,6 @@ static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
.flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping,
- .map_mtype = gmc_v11_0_map_mtype,
.get_vm_pde = gmc_v11_0_get_vm_pde,
.get_vm_pte = gmc_v11_0_get_vm_pte,
.get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size,
@@ -587,7 +539,7 @@ static void gmc_v11_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(8, 10, 0):
adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
@@ -610,13 +562,18 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(3, 0, 1):
adev->mmhub.funcs = &mmhub_v3_0_1_funcs;
break;
case IP_VERSION(3, 0, 2):
adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
break;
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
+ adev->mmhub.funcs = &mmhub_v3_3_funcs;
+ break;
default:
adev->mmhub.funcs = &mmhub_v3_0_funcs;
break;
@@ -625,19 +582,25 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 3):
adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs;
break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs;
+ break;
default:
adev->gfxhub.funcs = &gfxhub_v3_0_funcs;
break;
}
}
-static int gmc_v11_0_early_init(void *handle)
+static int gmc_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v11_0_set_gfxhub_funcs(adev);
gmc_v11_0_set_mmhub_funcs(adev);
@@ -656,9 +619,9 @@ static int gmc_v11_0_early_init(void *handle)
return 0;
}
-static int gmc_v11_0_late_init(void *handle)
+static int gmc_v11_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -679,9 +642,13 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
base = adev->mmhub.funcs->get_fb_location(adev);
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
+ if (!amdgpu_sriov_vf(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+ (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
if (amdgpu_sriov_vf(adev))
@@ -753,19 +720,21 @@ static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);
}
-static int gmc_v11_0_sw_init(void *handle)
+static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->init(adev);
+ adev->gfxhub.funcs->init(adev);
+
spin_lock_init(&adev->gmc.invalidate_lock);
r = amdgpu_atomfirmware_get_vram_info(adev,
@@ -775,12 +744,28 @@ static int gmc_v11_0_sw_init(void *handle)
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ /* The mall_size is already calculated as mall_size_per_umc * num_umc.
+ * However, for gfx1151, which features a 2-to-1 UMC mapping,
+ * the result must be multiplied by 2 to determine the actual mall size.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 5, 1):
+ adev->gmc.mall_size *= 2;
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
@@ -851,7 +836,7 @@ static int gmc_v11_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.first_kfd_vmid = 8;
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
amdgpu_vm_manager_init(adev);
@@ -874,9 +859,9 @@ static void gmc_v11_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_table_vram_free(adev);
}
-static int gmc_v11_0_sw_fini(void *handle)
+static int gmc_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_vm_manager_fini(adev);
gmc_v11_0_gart_fini(adev);
@@ -918,10 +903,9 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
return r;
/* Flush HDP after it is initialized */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
- value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
- false : true;
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
adev->mmhub.funcs->set_fault_enable_default(adev, value);
gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
@@ -933,10 +917,12 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v11_0_hw_init(void *handle)
+static int gmc_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
/* The sequence of these two function calls matters.*/
gmc_v11_0_init_golden_registers(adev);
@@ -963,9 +949,9 @@ static void gmc_v11_0_gart_disable(struct amdgpu_device *adev)
adev->mmhub.funcs->gart_disable(adev);
}
-static int gmc_v11_0_hw_fini(void *handle)
+static int gmc_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
/* full access mode, so don't touch any GMC register */
@@ -974,56 +960,53 @@ static int gmc_v11_0_hw_fini(void *handle)
}
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
gmc_v11_0_gart_disable(adev);
return 0;
}
-static int gmc_v11_0_suspend(void *handle)
+static int gmc_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v11_0_hw_fini(adev);
+ gmc_v11_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v11_0_resume(void *handle)
+static int gmc_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v11_0_hw_init(adev);
+ r = gmc_v11_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v11_0_is_idle(void *handle)
+static bool gmc_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v11.*/
return true;
}
-static int gmc_v11_0_wait_for_idle(void *handle)
+static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v11.*/
return 0;
}
-static int gmc_v11_0_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int gmc_v11_0_set_clockgating_state(void *handle,
+static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
@@ -1032,16 +1015,16 @@ static int gmc_v11_0_set_clockgating_state(void *handle,
return athub_v3_0_set_clockgating(adev, state);
}
-static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v3_0_get_clockgating(adev, flags);
}
-static int gmc_v11_0_set_powergating_state(void *handle,
+static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1059,7 +1042,6 @@ const struct amd_ip_funcs gmc_v11_0_ip_funcs = {
.resume = gmc_v11_0_resume,
.is_idle = gmc_v11_0_is_idle,
.wait_for_idle = gmc_v11_0_wait_for_idle,
- .soft_reset = gmc_v11_0_soft_reset,
.set_clockgating_state = gmc_v11_0_set_clockgating_state,
.set_powergating_state = gmc_v11_0_set_powergating_state,
.get_clockgating_state = gmc_v11_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
new file mode 100644
index 000000000000..f4a19357ccbc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v12_0.h"
+#include "athub/athub_4_1_0_sh_mask.h"
+#include "athub/athub_4_1_0_offset.h"
+#include "oss/osssys_7_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "soc24_enum.h"
+#include "soc24.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+#include "nbif_v6_3_1.h"
+#include "gfxhub_v12_0.h"
+#include "mmhub_v4_1_0.h"
+#include "athub_v4_1_0.h"
+#include "umc_v8_14.h"
+
+static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int gmc_v12_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true);
+ /* GFX HUB */
+ /* This works because this interrupt is only
+ * enabled at init/resume and disabled in
+ * fini/suspend, so the overall state doesn't
+ * change over the course of suspend/resume.
+ */
+ if (!adev->in_s0ix)
+ amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_vmhub *hub;
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (entry->client_id == SOC21_IH_CLIENTID_VMC)
+ hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ else
+ hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+ * be updated to avoid reading an incorrect value due to
+ * the new fast GRBM interface.
+ */
+ if (entry->vmid_src == AMDGPU_GFXHUB(0))
+ RREG32(hub->vm_l2_pro_fault_status);
+
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
+ }
+
+ if (printk_ratelimit()) {
+ struct amdgpu_task_info *task_info;
+
+ dev_err(adev->dev,
+ "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
+ dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
+ addr, entry->client_id);
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
+ hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v12_0_irq_funcs = {
+ .set = gmc_v12_0_vm_fault_interrupt_state,
+ .process = gmc_v12_0_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs gmc_v12_0_ecc_funcs = {
+ .set = gmc_v12_0_ecc_interrupt_state,
+ .process = amdgpu_umc_process_ecc_irq,
+};
+
+static void gmc_v12_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v12_0_irq_funcs;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v12_0_ecc_funcs;
+ }
+}
+
+/**
+ * gmc_v12_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v12_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB0(0)) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v12_0_get_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
+
+ return !!(*p_pasid);
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+static void gmc_v12_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ unsigned int vmhub, uint32_t flush_type)
+{
+ bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(adev, vmhub);
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 tmp;
+ /* Use register 17 for GART */
+ const unsigned eng = 17;
+ unsigned int i;
+ unsigned char hub_ip = 0;
+
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
+ GC_HWIP : MMHUB_HWIP;
+
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, hub_ip);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev,
+ "Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng, hub_ip);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0, hub_ip);
+
+ /* Issue additional private vm invalidation to MMHUB */
+ if ((vmhub != AMDGPU_GFXHUB(0)) &&
+ (hub->vm_l2_bank_select_reserved_cid2) &&
+ !amdgpu_sriov_vf(adev)) {
+ inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
+ inv_req |= (1 << 25);
+ /* Issue private invalidation */
+ WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
+ /* Read back to ensure invalidation is done*/
+ RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
+ }
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
+ if (i < adev->usec_timeout)
+ return;
+
+ dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v12_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: which hub to flush
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
+{
+ if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
+ return;
+
+ /* flush hdp cache */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
+ */
+ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ const unsigned eng = 17;
+ u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
+ u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, GET_INST(GC, 0));
+ return;
+ }
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ gmc_v12_0_flush_vm_hub(adev, vmid, vmhub, 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ return;
+}
+
+/**
+ * gmc_v12_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ * @flush_type: the flush type
+ * @all_hub: flush all hubs
+ * @inst: is used to select which instance of KIQ to use for the invalidation
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
+{
+ uint16_t queried;
+ int vmid, i;
+
+ if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready &&
+ (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) {
+ struct mes_inv_tlbs_pasid_input input = {0};
+ input.pasid = pasid;
+ input.flush_type = flush_type;
+ input.hub_id = AMDGPU_GFXHUB(0);
+ /* MES will invalidate all gc_hub for the device from master */
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ if (all_hub) {
+ /* Only need to invalidate mm_hub now, gfx12 only support one mmhub */
+ input.hub_id = AMDGPU_MMHUB0(0);
+ adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input);
+ }
+ return;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+ bool valid;
+
+ valid = gmc_v12_0_get_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v12_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v12_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
+ }
+ }
+}
+
+static uint64_t gmc_v12_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ bool use_semaphore = gmc_v12_0_use_invalidate_semaphore(ring->adev, ring->vm_hub);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+ unsigned eng = ring->vm_inv_eng;
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0x1, 0x1);
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
+ hub->eng_distance * eng,
+ hub->vm_inv_eng0_ack +
+ hub->eng_distance * eng,
+ req, 1 << vmid);
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0);
+
+ return pd_addr;
+}
+
+static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->vm_hub == AMDGPU_GFXHUB(0))
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format:
+ * 63 P
+ * 62:59 reserved
+ * 58 D
+ * 57 G
+ * 56 T
+ * 55:54 M
+ * 53:52 SW
+ * 51:48 reserved for future
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format:
+ * 63 P
+ * 62:58 block fragment size
+ * 57 reserved
+ * 56 A
+ * 55:54 M
+ * 53:52 reserved
+ * 51:48 reserved for future
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+
+static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = adev->vm_manager.vram_base_offset + *addr -
+ adev->gmc.vram_start;
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE_GFX12))
+ *flags |= AMDGPU_PDE_BFS_GFX12(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE_GFX12)
+ *flags &= ~AMDGPU_PDE_PTE_GFX12;
+ }
+}
+
+static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
+ uint64_t *flags)
+{
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+ break;
+ }
+
+ if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
+ *flags |= AMDGPU_PTE_NOALLOC;
+ else
+ *flags &= ~AMDGPU_PTE_NOALLOC;
+
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
+ *flags |= AMDGPU_PTE_PRT_GFX12;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags |= AMDGPU_PTE_IS_PTE;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+ *flags |= AMDGPU_PTE_DCC;
+
+ if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
+}
+
+static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev)
+{
+ unsigned int max_tex_channel_caches, alignment;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1))
+ return 0;
+
+ max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches;
+ if (is_power_of_2(max_tex_channel_caches))
+ alignment = (unsigned int)(max_tex_channel_caches / SZ_4);
+ else
+ alignment = roundup_pow_of_two(max_tex_channel_caches);
+
+ return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K);
+}
+
+static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
+ .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping,
+ .get_vm_pde = gmc_v12_0_get_vm_pde,
+ .get_vm_pte = gmc_v12_0_get_vm_pte,
+ .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
+ .get_dcc_alignment = gmc_v12_0_get_dcc_alignment,
+};
+
+static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.gmc_funcs = &gmc_v12_0_gmc_funcs;
+}
+
+static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 14, 0):
+ adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev);
+ adev->umc.node_inst_num = 0;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET;
+ adev->umc.ras = &umc_v8_14_ras;
+ break;
+ default:
+ break;
+ }
+}
+
+
+static void gmc_v12_0_set_mmhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ adev->mmhub.funcs = &mmhub_v4_1_0_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ adev->gfxhub.funcs = &gfxhub_v12_0_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ gmc_v12_0_set_gfxhub_funcs(adev);
+ gmc_v12_0_set_mmhub_funcs(adev);
+ gmc_v12_0_set_gmc_funcs(adev);
+ gmc_v12_0_set_irq_funcs(adev);
+ gmc_v12_0_set_umc_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+
+ return 0;
+}
+
+static int gmc_v12_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_gmc_ras_late_init(adev);
+ if (r)
+ return r;
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v12_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ base = adev->mmhub.funcs->get_fb_location(adev);
+
+ amdgpu_gmc_set_agp_default(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_LOW);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
+
+ /* base offset of vram pages */
+ if (amdgpu_sriov_vf(adev))
+ adev->vm_manager.vram_base_offset = 0;
+ else
+ adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev);
+}
+
+/**
+ * gmc_v12_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v12_0_mc_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_device_resize_fb_bar(adev);
+ if (r)
+ return r;
+ }
+
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ adev->gmc.gart_size = 512ULL << 20;
+ } else
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+
+ gmc_v12_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE |
+ AMDGPU_PTE_IS_PTE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->init(adev);
+
+ adev->gfxhub.funcs->init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ adev->gmc.vram_width = vram_width;
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
+ set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
+ if (r) {
+ printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ return r;
+ }
+
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
+ r = gmc_v12_0_mc_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_gmc_get_vbios_allocations(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v12_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
+
+ amdgpu_vm_manager_init(adev);
+
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v12_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v12_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+}
+
+static int gmc_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v12_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+}
+
+/**
+ * gmc_v12_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
+ r = adev->mmhub.funcs->gart_enable(adev);
+ if (r)
+ return r;
+
+ /* Flush HDP after it is initialized */
+ amdgpu_device_flush_hdp(adev, NULL);
+
+ value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+ adev->mmhub.funcs->set_fault_enable_default(adev, value);
+ gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0);
+
+ dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ return 0;
+}
+
+static int gmc_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v12_0_init_golden_registers(adev);
+
+ r = gmc_v12_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v12_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v12_0_gart_disable(struct amdgpu_device *adev)
+{
+ adev->mmhub.funcs->gart_disable(adev);
+}
+
+static int gmc_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+
+ gmc_v12_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ gmc_v12_0_hw_fini(ip_block);
+
+ return 0;
+}
+
+static int gmc_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = gmc_v12_0_hw_init(ip_block);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(ip_block->adev);
+
+ return 0;
+}
+
+static bool gmc_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* MC is always ready in GMC v11.*/
+ return true;
+}
+
+static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* There is no need to wait for MC idle in GMC v11.*/
+ return 0;
+}
+
+static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = adev->mmhub.funcs->set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ return athub_v4_1_0_set_clockgating(adev, state);
+}
+
+static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->mmhub.funcs->get_clockgating(adev, flags);
+
+ athub_v4_1_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v12_0_ip_funcs = {
+ .name = "gmc_v12_0",
+ .early_init = gmc_v12_0_early_init,
+ .sw_init = gmc_v12_0_sw_init,
+ .hw_init = gmc_v12_0_hw_init,
+ .late_init = gmc_v12_0_late_init,
+ .sw_fini = gmc_v12_0_sw_fini,
+ .hw_fini = gmc_v12_0_hw_fini,
+ .suspend = gmc_v12_0_suspend,
+ .resume = gmc_v12_0_resume,
+ .is_idle = gmc_v12_0_is_idle,
+ .wait_for_idle = gmc_v12_0_wait_for_idle,
+ .set_clockgating_state = gmc_v12_0_set_clockgating_state,
+ .set_powergating_state = gmc_v12_0_set_powergating_state,
+ .get_clockgating_state = gmc_v12_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h
new file mode 100644
index 000000000000..deca93e4a156
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V12_0_H__
+#define __GMC_V12_0_H__
+
+extern const struct amd_ip_funcs gmc_v12_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v12_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 5b837a65fad2..f6ad7911f1e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -43,7 +43,7 @@
static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v6_0_wait_for_idle(void *handle);
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
@@ -64,8 +64,13 @@ MODULE_FIRMWARE("amdgpu/si58_mc.bin");
static void gmc_v6_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
- gmc_v6_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v6_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -98,9 +103,7 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
- bool is_58_fw = false;
DRM_DEBUG("\n");
@@ -126,17 +129,14 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
/* this memory configuration requires special firmware */
if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
- is_58_fw = true;
+ chip_name = "si58";
- if (is_58_fw)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin");
- else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
dev_err(adev->dev,
- "si_mc: Failed to load firmware \"%s\"\n",
- fw_name);
+ "si_mc: Failed to load firmware \"%s_mc.bin\"\n",
+ chip_name);
amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
@@ -211,13 +211,16 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
}
static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
{
int i, j;
+ struct amdgpu_ip_block *ip_block;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
@@ -229,7 +232,11 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v6_0_wait_for_idle((void *)adev))
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
if (adev->mode_info.num_crtc) {
@@ -242,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
/* disable VGA render */
tmp = RREG32(mmVGA_RENDER_CONTROL);
- tmp &= ~VGA_VSTATUS_CNTL;
+ tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK;
WREG32(mmVGA_RENDER_CONTROL, tmp);
}
/* Update configuration */
@@ -253,10 +260,10 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
adev->mem_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
- if (gmc_v6_0_wait_for_idle((void *)adev))
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
}
@@ -375,7 +382,9 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
*flags &= ~AMDGPU_PTE_EXECUTABLE;
@@ -434,9 +443,10 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -619,17 +629,16 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
"write" : "read", block, mc_client, mc_id);
}
-/*
static const u32 mc_cg_registers[] = {
- MC_HUB_MISC_HUB_CG,
- MC_HUB_MISC_SIP_CG,
- MC_HUB_MISC_VM_CG,
- MC_XPB_CLK_GAT,
- ATC_MISC_CG,
- MC_CITF_MISC_WR_CG,
- MC_CITF_MISC_RD_CG,
- MC_CITF_MISC_VM_CG,
- VM_L2_CG,
+ mmMC_HUB_MISC_HUB_CG,
+ mmMC_HUB_MISC_SIP_CG,
+ mmMC_HUB_MISC_VM_CG,
+ mmMC_XPB_CLK_GAT,
+ mmATC_MISC_CG,
+ mmMC_CITF_MISC_WR_CG,
+ mmMC_CITF_MISC_RD_CG,
+ mmMC_CITF_MISC_VM_CG,
+ mmVM_L2_CG,
};
static const u32 mc_cg_ls_en[] = {
@@ -664,7 +673,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= mc_cg_ls_en[i];
else
data &= ~mc_cg_ls_en[i];
@@ -681,7 +690,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
data |= mc_cg_en[i];
else
data &= ~mc_cg_en[i];
@@ -697,7 +706,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev,
orig = data = RREG32_PCIE(ixPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -720,7 +729,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
else
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -736,7 +745,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_MEM_POWER_LS);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
else
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
@@ -744,7 +753,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
if (orig != data)
WREG32(mmHDP_MEM_POWER_LS, data);
}
-*/
static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
{
@@ -766,9 +774,9 @@ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v6_0_early_init(void *handle)
+static int gmc_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_set_gmc_funcs(adev);
gmc_v6_0_set_irq_funcs(adev);
@@ -776,9 +784,9 @@ static int gmc_v6_0_early_init(void *handle)
return 0;
}
-static int gmc_v6_0_late_init(void *handle)
+static int gmc_v6_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -803,10 +811,10 @@ static unsigned int gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v6_0_sw_init(void *handle)
+static int gmc_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
@@ -880,9 +888,9 @@ static int gmc_v6_0_sw_init(void *handle)
return 0;
}
-static int gmc_v6_0_sw_fini(void *handle)
+static int gmc_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
@@ -893,10 +901,10 @@ static int gmc_v6_0_sw_fini(void *handle)
return 0;
}
-static int gmc_v6_0_hw_init(void *handle)
+static int gmc_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_mc_program(adev);
@@ -914,13 +922,13 @@ static int gmc_v6_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
-static int gmc_v6_0_hw_fini(void *handle)
+static int gmc_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v6_0_gart_disable(adev);
@@ -928,21 +936,19 @@ static int gmc_v6_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v6_0_suspend(void *handle)
+static int gmc_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v6_0_hw_fini(adev);
+ gmc_v6_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v6_0_resume(void *handle)
+static int gmc_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = gmc_v6_0_hw_init(adev);
+ r = gmc_v6_0_hw_init(ip_block);
if (r)
return r;
@@ -951,9 +957,10 @@ static int gmc_v6_0_resume(void *handle)
return 0;
}
-static bool gmc_v6_0_is_idle(void *handle)
+static bool gmc_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -963,13 +970,13 @@ static bool gmc_v6_0_is_idle(void *handle)
return true;
}
-static int gmc_v6_0_wait_for_idle(void *handle)
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gmc_v6_0_is_idle(handle))
+ if (gmc_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -977,9 +984,10 @@ static int gmc_v6_0_wait_for_idle(void *handle)
}
-static int gmc_v6_0_soft_reset(void *handle)
+static int gmc_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -996,7 +1004,8 @@ static int gmc_v6_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v6_0_mc_stop(adev);
- if (gmc_v6_0_wait_for_idle(adev))
+
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
tmp = RREG32(mmSRBM_SOFT_RESET);
@@ -1086,13 +1095,27 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v6_0_set_clockgating_state(void *handle,
+static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool gate = false;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ gmc_v6_0_enable_mc_mgcg(adev, gate);
+ gmc_v6_0_enable_mc_ls(adev, gate);
+ }
+ gmc_v6_0_enable_bif_mgls(adev, gate);
+ gmc_v6_0_enable_hdp_mgcg(adev, gate);
+ gmc_v6_0_enable_hdp_ls(adev, gate);
+
return 0;
}
-static int gmc_v6_0_set_powergating_state(void *handle,
+static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 6a6929ac2748..93d7ccb7d013 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -52,7 +52,7 @@
static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v7_0_wait_for_idle(void *handle);
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/bonaire_mc.bin");
MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");
@@ -87,9 +87,14 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v7_0_mc_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 blackout;
- gmc_v7_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v7_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -130,7 +135,6 @@ static void gmc_v7_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -153,11 +157,10 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
-
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
- pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
+ pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
@@ -239,8 +242,9 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
}
/**
@@ -253,9 +257,14 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
WREG32((0xb05 + j), 0x00000000);
@@ -266,7 +275,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v7_0_wait_for_idle((void *)adev))
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
if (adev->mode_info.num_crtc) {
@@ -288,9 +297,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
adev->mem_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
- if (gmc_v7_0_wait_for_idle((void *)adev))
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -422,28 +431,23 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub, uint32_t inst)
+static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ u32 mask = 0x0;
int vmid;
- unsigned int tmp;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
for (vmid = 1; vmid < 16; vmid++) {
+ u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+ mask |= 1 << vmid;
}
- return 0;
+ WREG32(mmVM_INVALIDATE_REQUEST, mask);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
}
/*
@@ -500,7 +504,9 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
*flags &= ~AMDGPU_PTE_EXECUTABLE;
@@ -567,9 +573,10 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -927,9 +934,9 @@ static int gmc_v7_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v7_0_early_init(void *handle)
+static int gmc_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_set_gmc_funcs(adev);
gmc_v7_0_set_irq_funcs(adev);
@@ -946,9 +953,9 @@ static int gmc_v7_0_early_init(void *handle)
return 0;
}
-static int gmc_v7_0_late_init(void *handle)
+static int gmc_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -974,10 +981,10 @@ static unsigned int gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v7_0_sw_init(void *handle)
+static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
@@ -1066,9 +1073,9 @@ static int gmc_v7_0_sw_init(void *handle)
return 0;
}
-static int gmc_v7_0_sw_fini(void *handle)
+static int gmc_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
@@ -1080,10 +1087,10 @@ static int gmc_v7_0_sw_fini(void *handle)
return 0;
}
-static int gmc_v7_0_hw_init(void *handle)
+static int gmc_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_init_golden_registers(adev);
@@ -1103,13 +1110,13 @@ static int gmc_v7_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
-static int gmc_v7_0_hw_fini(void *handle)
+static int gmc_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v7_0_gart_disable(adev);
@@ -1117,32 +1124,29 @@ static int gmc_v7_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v7_0_suspend(void *handle)
+static int gmc_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v7_0_hw_fini(adev);
+ gmc_v7_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v7_0_resume(void *handle)
+static int gmc_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v7_0_hw_init(adev);
+ r = gmc_v7_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v7_0_is_idle(void *handle)
+static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1152,20 +1156,13 @@ static bool gmc_v7_0_is_idle(void *handle)
return true;
}
-static int gmc_v7_0_wait_for_idle(void *handle)
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- /* read MC_STATUS */
- tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
- SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
- SRBM_STATUS__MCC_BUSY_MASK |
- SRBM_STATUS__MCD_BUSY_MASK |
- SRBM_STATUS__VMC_BUSY_MASK);
- if (!tmp)
+ if (gmc_v7_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -1173,9 +1170,9 @@ static int gmc_v7_0_wait_for_idle(void *handle)
}
-static int gmc_v7_0_soft_reset(void *handle)
+static int gmc_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1192,7 +1189,7 @@ static int gmc_v7_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v7_0_mc_stop(adev);
- if (gmc_v7_0_wait_for_idle((void *)adev))
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
tmp = RREG32(mmSRBM_SOFT_RESET);
@@ -1273,6 +1270,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v7_0_set_fault_enable_default(adev, false);
@@ -1313,11 +1313,11 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v7_0_set_clockgating_state(void *handle,
+static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1333,7 +1333,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v7_0_set_powergating_state(void *handle,
+static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 5af235202513..c5e2a2c41e06 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -53,7 +53,7 @@
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v8_0_wait_for_idle(void *handle);
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
@@ -170,8 +170,13 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v8_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
- gmc_v8_0_wait_for_idle(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v8_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -212,7 +217,6 @@ static void gmc_v8_0_mc_resume(struct amdgpu_device *adev)
static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err;
DRM_DEBUG("\n");
@@ -255,10 +259,10 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
return -EINVAL;
}
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
- pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
+ pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
amdgpu_ucode_release(&adev->gmc.fw);
}
return err;
@@ -413,8 +417,9 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
+ amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
}
/**
@@ -427,6 +432,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
@@ -440,7 +446,11 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v8_0_wait_for_idle((void *)adev))
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
if (adev->mode_info.num_crtc) {
@@ -473,9 +483,9 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmMC_VM_AGP_BASE, 0);
- WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
- WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
- if (gmc_v8_0_wait_for_idle((void *)adev))
+ WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
+ WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -612,29 +622,23 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub, uint32_t inst)
+static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ u32 mask = 0x0;
int vmid;
- unsigned int tmp;
-
- if (amdgpu_in_reset(adev))
- return -EIO;
for (vmid = 1; vmid < 16; vmid++) {
+ u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid)
+ mask |= 1 << vmid;
}
- return 0;
-
+ WREG32(mmVM_INVALIDATE_REQUEST, mask);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
}
/*
@@ -712,11 +716,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
*flags &= ~AMDGPU_PTE_PRT;
}
@@ -782,9 +790,10 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
WREG32(mmVM_PRT_CNTL, tmp);
if (enable) {
- uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >>
+ AMDGPU_GPU_PAGE_SHIFT;
uint32_t high = adev->vm_manager.max_pfn -
- (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
+ (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -1033,9 +1042,9 @@ static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v8_0_early_init(void *handle)
+static int gmc_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_set_gmc_funcs(adev);
gmc_v8_0_set_irq_funcs(adev);
@@ -1052,9 +1061,9 @@ static int gmc_v8_0_early_init(void *handle)
return 0;
}
-static int gmc_v8_0_late_init(void *handle)
+static int gmc_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -1082,10 +1091,10 @@ static unsigned int gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
#define mmMC_SEQ_MISC0_FIJI 0xA71
-static int gmc_v8_0_sw_init(void *handle)
+static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
@@ -1179,9 +1188,9 @@ static int gmc_v8_0_sw_init(void *handle)
return 0;
}
-static int gmc_v8_0_sw_fini(void *handle)
+static int gmc_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
@@ -1193,10 +1202,10 @@ static int gmc_v8_0_sw_fini(void *handle)
return 0;
}
-static int gmc_v8_0_hw_init(void *handle)
+static int gmc_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_init_golden_registers(adev);
@@ -1224,13 +1233,13 @@ static int gmc_v8_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
-static int gmc_v8_0_hw_fini(void *handle)
+static int gmc_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v8_0_gart_disable(adev);
@@ -1238,32 +1247,29 @@ static int gmc_v8_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v8_0_suspend(void *handle)
+static int gmc_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v8_0_hw_fini(adev);
+ gmc_v8_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v8_0_resume(void *handle)
+static int gmc_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v8_0_hw_init(adev);
+ r = gmc_v8_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v8_0_is_idle(void *handle)
+static bool gmc_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1273,11 +1279,11 @@ static bool gmc_v8_0_is_idle(void *handle)
return true;
}
-static int gmc_v8_0_wait_for_idle(void *handle)
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -1295,10 +1301,10 @@ static int gmc_v8_0_wait_for_idle(void *handle)
}
-static bool gmc_v8_0_check_soft_reset(void *handle)
+static bool gmc_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
@@ -1322,23 +1328,23 @@ static bool gmc_v8_0_check_soft_reset(void *handle)
return false;
}
-static int gmc_v8_0_pre_soft_reset(void *handle)
+static int gmc_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
gmc_v8_0_mc_stop(adev);
- if (gmc_v8_0_wait_for_idle(adev))
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
return 0;
}
-static int gmc_v8_0_soft_reset(void *handle)
+static int gmc_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->gmc.srbm_soft_reset)
@@ -1367,9 +1373,9 @@ static int gmc_v8_0_soft_reset(void *handle)
return 0;
}
-static int gmc_v8_0_post_soft_reset(void *handle)
+static int gmc_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
@@ -1442,22 +1448,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
if (!addr && !status)
return 0;
+ amdgpu_vm_update_fault_cache(adev, entry->pasid,
+ ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0));
+
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) {
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+ entry->src_id, entry->src_data[0]);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
- dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
- entry->src_id, entry->src_data[0], task_info.process_name,
- task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- addr);
+ addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
+
gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
entry->pasid);
}
@@ -1648,10 +1661,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev,
}
}
-static int gmc_v8_0_set_clockgating_state(void *handle,
+static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1669,15 +1682,15 @@ static int gmc_v8_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v8_0_set_powergating_state(void *handle,
+static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f9a5a2c0573e..0d1dd587db5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -56,6 +56,7 @@
#include "umc_v6_1.h"
#include "umc_v6_0.h"
#include "umc_v6_7.h"
+#include "umc_v12_0.h"
#include "hdp_v4_0.h"
#include "mca_v3_0.h"
@@ -64,8 +65,6 @@
#include "amdgpu_ras.h"
#include "amdgpu_xgmi.h"
-#include "amdgpu_reset.h"
-
/* add these here since we already include dce12 headers and these are for DCN */
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
@@ -79,8 +78,6 @@
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
-#define MAX_MEM_RANGES 8
-
static const char * const gfxhub_client_ids[] = {
"CB",
"DB",
@@ -497,14 +494,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
if (j >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP(MMHUB, reg);
else
- tmp = RREG32_SOC15_IP(GC, reg);
+ tmp = RREG32_XCC(reg, j);
tmp &= ~bits;
if (j >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP(MMHUB, reg, tmp);
else
- WREG32_SOC15_IP(GC, reg, tmp);
+ WREG32_XCC(reg, tmp, j);
}
}
break;
@@ -525,14 +522,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
if (j >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP(MMHUB, reg);
else
- tmp = RREG32_SOC15_IP(GC, reg);
+ tmp = RREG32_XCC(reg, j);
tmp |= bits;
if (j >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP(MMHUB, reg, tmp);
else
- WREG32_SOC15_IP(GC, reg, tmp);
+ WREG32_XCC(reg, tmp, j);
}
}
break;
@@ -549,11 +546,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
{
bool retry_fault = !!(entry->src_data[1] & 0x80);
bool write_fault = !!(entry->src_data[1] & 0x20);
- uint32_t status = 0, cid = 0, rw = 0;
- struct amdgpu_task_info task_info;
+ uint32_t status = 0, cid = 0, rw = 0, fed = 0;
+ struct amdgpu_task_info *task_info;
struct amdgpu_vmhub *hub;
const char *mmhub_cid;
const char *hub_name;
+ unsigned int vmhub;
u64 addr;
uint32_t cam_index = 0;
int ret, xcc_id = 0;
@@ -566,10 +564,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
hub_name = "mmhub0";
- hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)];
+ vmhub = AMDGPU_MMHUB0(node_id / 4);
} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
hub_name = "mmhub1";
- hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
+ vmhub = AMDGPU_MMHUB1(0);
} else {
hub_name = "gfxhub0";
if (adev->gfx.funcs->ih_node_to_logical_xcc) {
@@ -578,8 +576,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
if (xcc_id < 0)
xcc_id = 0;
}
- hub = &adev->vmhub[xcc_id];
+ vmhub = xcc_id;
}
+ hub = &adev->vmhub[vmhub];
if (retry_fault) {
if (adev->irq.retry_cam_enabled) {
@@ -594,7 +593,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
cam_index = entry->src_data[2] & 0x3ff;
ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
- addr, write_fault);
+ addr, entry->timestamp, write_fault);
WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
if (ret)
return 1;
@@ -617,29 +616,33 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
* tables
*/
if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
- addr, write_fault))
+ addr, entry->timestamp, write_fault))
return 1;
}
}
+ if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
+ return 1;
+
if (!printk_ratelimit())
return 0;
-
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_err(adev->dev,
- "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
- hub_name, retry_fault ? "retry" : "no-retry",
- entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name,
+ retry_fault ? "retry" : "no-retry",
+ entry->src_id, entry->ring_id, entry->vmid, entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ amdgpu_vm_print_task_info(adev, task_info);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ if (amdgpu_is_multi_aid(adev))
dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
@@ -653,13 +656,29 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
* the new fast GRBM interface.
*/
if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
- WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+
+ /* for fed error, kfd will handle it, return directly */
+ if (fed && amdgpu_ras_is_poison_mode_supported(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)))
+ return 0;
+
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (!status)
+ return 0;
+
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
dev_err(adev->dev,
"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
@@ -670,7 +689,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
gfxhub_client_ids[cid],
cid);
} else {
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
mmhub_cid = mmhub_client_ids_vega10[cid][rw];
break;
@@ -735,7 +754,8 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
if (!amdgpu_sriov_vf(adev) &&
- !adev->gmc.xgmi.connected_to_cpu) {
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !adev->gmc.is_app_apu) {
adev->gmc.ecc_irq.num_types = 1;
adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
}
@@ -770,8 +790,8 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_is_multi_aid(adev))
return false;
return ((vmhub == AMDGPU_MMHUB0(0) ||
@@ -814,48 +834,37 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
+ u32 j, inv_req, tmp, sem, req, ack, inst;
const unsigned int eng = 17;
- u32 j, inv_req, inv_req2, tmp;
struct amdgpu_vmhub *hub;
BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS);
hub = &adev->vmhub[vmhub];
- if (adev->gmc.xgmi.num_physical_nodes &&
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
- /* Vega20+XGMI caches PTEs in TC and TLB. Add a
- * heavy-weight TLB flush (type 2), which flushes
- * both. Due to a race condition with concurrent
- * memory accesses using the same TLB cache line, we
- * still need a second TLB flush after this.
- */
- inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
- inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- } else if (flush_type == 2 &&
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
- adev->rev_id == 0) {
- inv_req = gmc_v9_0_get_invalidate_req(vmid, 0);
- inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- } else {
- inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- inv_req2 = 0;
- }
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ inst = 0;
+ else
+ inst = vmhub;
- /* This is necessary for a HW workaround under SRIOV as well
- * as GFXOFF under bare metal
+ /* This is necessary for SRIOV as well as for GFXOFF to function
+ * properly under bare metal
*/
- if (adev->gfx.kiq[0].ring.sched.ready &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- down_read_trylock(&adev->reset_domain->sem)) {
+ if (adev->gfx.kiq[inst].ring.sched.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
- up_read(&adev->reset_domain->sem);
+ amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid, inst);
return;
}
+ /* This path is needed before KIQ/MES/GFXOFF are set up */
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -870,9 +879,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
- tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst));
else
- tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst));
if (tmp & 0x1)
break;
udelay(1);
@@ -882,35 +891,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- do {
- if (vmhub >= AMDGPU_MMHUB0(0))
- WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
- else
- WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
-
- /*
- * Issue a dummy read to wait for the ACK register to
- * be cleared to avoid a false ACK due to the new fast
- * GRBM interface.
- */
- if ((vmhub == AMDGPU_GFXHUB(0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
- RREG32_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng);
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC, inst));
+ else
+ WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC, inst));
- for (j = 0; j < adev->usec_timeout; j++) {
- if (vmhub >= AMDGPU_MMHUB0(0))
- tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
- else
- tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
- if (tmp & (1 << vmid))
- break;
- udelay(1);
- }
+ /*
+ * Issue a dummy read to wait for the ACK register to
+ * be cleared to avoid a false ACK due to the new fast
+ * GRBM interface.
+ */
+ if ((vmhub == AMDGPU_GFXHUB(0)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)))
+ RREG32_NO_KIQ(req);
- inv_req = inv_req2;
- inv_req2 = 0;
- } while (inv_req);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (vmhub >= AMDGPU_MMHUB0(0))
+ tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, GET_INST(GC, inst));
+ else
+ tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, GET_INST(GC, inst));
+ if (tmp & (1 << vmid))
+ break;
+ udelay(1);
+ }
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore) {
@@ -919,9 +922,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* write with 0 means semaphore release
*/
if (vmhub >= AMDGPU_MMHUB0(0))
- WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+ WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst));
else
- WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
+ WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst));
}
spin_unlock(&adev->gmc.invalidate_lock);
@@ -943,91 +946,32 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub, uint32_t inst)
+static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
- int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
- struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
-
- if (amdgpu_in_reset(adev))
- return -EIO;
-
- if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) {
- /* Vega20+XGMI caches PTEs in TC and TLB. Add a
- * heavy-weight TLB flush (type 2), which flushes
- * both. Due to a race condition with concurrent
- * memory accesses using the same TLB cache line, we
- * still need a second TLB flush after this.
- */
- bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0));
- /* 2 dwords flush + 8 dwords fence */
- unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
-
- if (vega20_xgmi_wa)
- ndw += kiq->pmf->invalidate_tlbs_size;
-
- spin_lock(&adev->gfx.kiq[inst].ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, ndw);
- if (vega20_xgmi_wa)
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, 2, all_hub);
-
- if (flush_type == 2 &&
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) &&
- adev->rev_id == 0)
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, 0, all_hub);
-
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq[inst].ring_lock);
- up_read(&adev->reset_domain->sem);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq[inst].ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- up_read(&adev->reset_domain->sem);
- return -ETIME;
- }
- up_read(&adev->reset_domain->sem);
- return 0;
- }
+ uint16_t queried;
+ int i, vmid;
for (vmid = 1; vmid < 16; vmid++) {
+ bool valid;
- ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
- gmc_v9_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v9_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB(0), flush_type);
- }
- break;
+ valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v9_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
-
}
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -1129,27 +1073,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int v
* 0 valid
*/
-static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
-
-{
- switch (flags) {
- case AMDGPU_VM_MTYPE_DEFAULT:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_NC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- case AMDGPU_VM_MTYPE_WC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
- case AMDGPU_VM_MTYPE_RW:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
- case AMDGPU_VM_MTYPE_CC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
- case AMDGPU_VM_MTYPE_UC:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
- default:
- return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- }
-}
-
static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
@@ -1177,20 +1100,26 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
struct amdgpu_bo *bo,
- struct amdgpu_bo_va_mapping *mapping,
+ uint32_t vm_flags,
uint64_t *flags)
{
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
- bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
- bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+ bool is_vram = bo->tbo.resource &&
+ bo->tbo.resource->mem_type == TTM_PL_VRAM;
+ bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT);
+ bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
- struct amdgpu_vm *vm = mapping->bo_va->base.vm;
unsigned int mtype_local, mtype;
+ uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
bool snoop = false;
bool is_local;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ dma_resv_assert_held(bo->tbo.base.resv);
+
+ switch (gc_ip_version) {
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
if (is_vram) {
@@ -1204,8 +1133,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
/* FIXME: is this still needed? Or does
* amdgpu_ttm_tt_pde_flags already handle this?
*/
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) &&
+ if (gc_ip_version == IP_VERSION(9, 4, 2) &&
adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
@@ -1213,7 +1141,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
mtype = MTYPE_UC;
else
mtype = MTYPE_NC;
- if (mapping->bo_va->is_xgmi)
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
snoop = true;
}
} else {
@@ -1228,6 +1156,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
}
break;
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
/* Only local VRAM BOs or system memory on non-NUMA APUs
* can be assumed to be local in their entirety. Choose
* MTYPE_NC as safe fallback for all system memory BOs on
@@ -1251,16 +1181,18 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
snoop = true;
if (uncached) {
mtype = MTYPE_UC;
+ } else if (ext_coherent) {
+ mtype = is_local ? MTYPE_CC : MTYPE_UC;
} else if (adev->flags & AMD_IS_APU) {
mtype = is_local ? mtype_local : MTYPE_NC;
} else {
/* dGPU */
if (is_local)
mtype = mtype_local;
- else if (is_vram)
- mtype = MTYPE_NC;
- else
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram)
mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
}
break;
@@ -1278,31 +1210,49 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
}
if (mtype != MTYPE_NC)
- *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
- AMDGPU_PTE_MTYPE_VG10(mtype);
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype);
+
*flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
}
static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
- struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo,
+ uint32_t vm_flags,
uint64_t *flags)
{
- struct amdgpu_bo *bo = mapping->bo_va->base.bo;
-
- *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ *flags |= AMDGPU_PTE_EXECUTABLE;
+ else
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
- *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+ switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ case AMDGPU_VM_MTYPE_NC:
+ default:
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_RW:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
+ break;
+ }
- if (mapping->flags & AMDGPU_PTE_PRT) {
+ if (vm_flags & AMDGPU_VM_PAGE_PRT) {
*flags |= AMDGPU_PTE_PRT;
*flags &= ~AMDGPU_PTE_VALID;
}
- if (bo && bo->tbo.resource)
- gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo,
- mapping, flags);
+ if ((*flags & AMDGPU_PTE_VALID) && bo)
+ gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags);
}
static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
@@ -1314,23 +1264,27 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
/* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
* memory can use more efficient MTYPEs.
*/
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+ if (!(adev->flags & AMD_IS_APU) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3))
return;
/* Only direct-mapped memory allows us to determine the NUMA node from
* the DMA address.
*/
if (!adev->ram_is_direct_mapped) {
- dev_dbg(adev->dev, "RAM is not direct mapped\n");
+ dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n");
return;
}
- /* Only override mappings with MTYPE_NC, which is the safe default for
- * cacheable memory.
+ /* MTYPE_NC is the same default and can be overridden.
+ * MTYPE_UC will be present if the memory is extended-coherent
+ * and can also be overridden.
*/
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
- AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
- dev_dbg(adev->dev, "MTYPE is not NC\n");
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) &&
+ (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) {
+ dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n");
return;
}
@@ -1341,7 +1295,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
if (adev->gmc.is_app_apu && vm->mem_id >= 0) {
local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node;
} else {
- dev_dbg(adev->dev, "Only native mode APU is supported.\n");
+ dev_dbg_ratelimited(adev->dev, "Only native mode APU is supported.\n");
return;
}
@@ -1349,25 +1303,31 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
* page or NUMA nodes.
*/
if (!page_is_ram(addr >> PAGE_SHIFT)) {
- dev_dbg(adev->dev, "Page is not RAM.\n");
+ dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n");
return;
}
nid = pfn_to_nid(addr >> PAGE_SHIFT);
- dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
- vm->mem_id, local_node, nid);
+ dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+ vm->mem_id, local_node, nid);
if (nid == local_node) {
uint64_t old_flags = *flags;
- unsigned int mtype_local = MTYPE_RW;
+ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) ==
+ AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) {
+ unsigned int mtype_local = MTYPE_RW;
- if (amdgpu_mtype_local == 1)
- mtype_local = MTYPE_NC;
- else if (amdgpu_mtype_local == 2)
- mtype_local = MTYPE_CC;
+ if (amdgpu_mtype_local == 1)
+ mtype_local = MTYPE_NC;
+ else if (amdgpu_mtype_local == 2)
+ mtype_local = MTYPE_CC;
+
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local);
+ } else {
+ /* MTYPE_UC case */
+ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
+ }
- *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
- AMDGPU_PTE_MTYPE_VG10(mtype_local);
- dev_dbg(adev->dev, "flags updated from %llx to %llx\n",
- old_flags, *flags);
+ dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n",
+ old_flags, *flags);
}
}
@@ -1383,7 +1343,7 @@ static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
} else {
u32 viewport;
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
@@ -1413,25 +1373,15 @@ static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static enum amdgpu_memory_partition
-gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev)
{
- enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
-
- if (adev->nbio.funcs->get_memory_partition_mode)
- mode = adev->nbio.funcs->get_memory_partition_mode(adev,
- supp_modes);
-
- return mode;
-}
-
-static enum amdgpu_memory_partition
-gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
-{
- if (amdgpu_sriov_vf(adev))
- return AMDGPU_NPS1_PARTITION_MODE;
+ if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested &&
+ adev->nbio.funcs->is_nps_switch_requested(adev)) {
+ adev->gmc.reset_flags |= AMDGPU_GMC_INIT_RESET_NPS;
+ return true;
+ }
- return gmc_v9_0_get_memory_partition(adev, NULL);
+ return false;
}
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
@@ -1439,12 +1389,13 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
- .map_mtype = gmc_v9_0_map_mtype,
.get_vm_pde = gmc_v9_0_get_vm_pde,
.get_vm_pte = gmc_v9_0_get_vm_pte,
.override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
- .query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
+ .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition,
+ .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition,
+ .need_reset_on_init = &gmc_v9_0_need_reset_on_init,
};
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -1454,7 +1405,7 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(6, 0, 0):
adev->umc.funcs = &umc_v6_0_funcs;
break;
@@ -1490,6 +1441,17 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
else
adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
+ adev->umc.max_ras_err_cnt_per_query =
+ UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
+ adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM;
+ adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET;
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
+ adev->umc.ras = &umc_v12_0_ras;
+ break;
default:
break;
}
@@ -1497,7 +1459,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 4, 1):
adev->mmhub.funcs = &mmhub_v9_4_funcs;
break;
@@ -1505,6 +1467,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
adev->mmhub.funcs = &mmhub_v1_7_funcs;
break;
case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
adev->mmhub.funcs = &mmhub_v1_8_funcs;
break;
default:
@@ -1515,7 +1478,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 4, 0):
adev->mmhub.ras = &mmhub_v1_0_ras;
break;
@@ -1526,6 +1489,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
adev->mmhub.ras = &mmhub_v1_7_ras;
break;
case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
adev->mmhub.ras = &mmhub_v1_8_ras;
break;
default:
@@ -1536,7 +1500,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ if (amdgpu_is_multi_aid(adev))
adev->gfxhub.funcs = &gfxhub_v1_2_funcs;
else
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
@@ -1552,7 +1516,7 @@ static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev)
struct amdgpu_mca *mca = &adev->mca;
/* is UMC the right IP to check for MCA? Maybe DF? */
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(6, 7, 0):
if (!adev->gmc.xgmi.connected_to_cpu) {
mca->mp0.ras = &mca_v3_0_mp0_ras;
@@ -1571,26 +1535,63 @@ static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev)
adev->gmc.xgmi.ras = &xgmi_ras;
}
-static int gmc_v9_0_early_init(void *handle)
+static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ enum amdgpu_memory_partition mode;
+ uint32_t supp_modes;
+ int i;
+
+ adev->gmc.supported_nps_modes = 0;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return;
+
+ mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware and supported modes available */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) {
+ while ((i = ffs(supp_modes))) {
+ if (AMDGPU_ALL_NPS_MASK & BIT(i))
+ adev->gmc.supported_nps_modes |= BIT(i);
+ supp_modes &= supp_modes - 1;
+ }
+ } else {
+ /*TODO: Check PSP version also which supports NPS switch. Otherwise keep
+ * supported modes as 0.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ adev->gmc.supported_nps_modes =
+ BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
/*
* 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
* in their IP discovery tables
*/
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_is_multi_aid(adev))
adev->gmc.xgmi.supported = true;
- if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
+ if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) {
adev->gmc.xgmi.supported = true;
adev->gmc.xgmi.connected_to_cpu =
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
}
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
enum amdgpu_pkg_type pkg_type =
adev->smuio.funcs->get_pkg_type(adev);
/* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
@@ -1625,9 +1626,9 @@ static int gmc_v9_0_early_init(void *handle)
return 0;
}
-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -1639,7 +1640,7 @@ static int gmc_v9_0_late_init(void *handle)
* writes, while disables HBM ECC for vega10.
*/
if (!amdgpu_sriov_vf(adev) &&
- (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) {
+ (amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(6, 0, 0))) {
if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
if (adev->df.funcs &&
adev->df.funcs->enable_ecc_force_par_wr_rmw)
@@ -1648,13 +1649,8 @@ static int gmc_v9_0_late_init(void *handle)
}
if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
- if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
- adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
- adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
-
- if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops &&
- adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count)
- adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP);
}
r = amdgpu_gmc_ras_late_init(adev);
@@ -1669,14 +1665,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
{
u64 base = adev->mmhub.funcs->get_fb_location(adev);
+ amdgpu_gmc_set_agp_default(adev, mc);
+
/* add the xgmi offset of the physical node */
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
amdgpu_gmc_sysvm_location(adev, mc);
} else {
amdgpu_gmc_vram_location(adev, mc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
+ amdgpu_gmc_agp_location(adev, mc);
}
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -1747,13 +1746,15 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
/* set the gart size */
if (amdgpu_gart_size == -1) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1): /* all engines support GPUVM */
case IP_VERSION(9, 2, 1): /* all engines support GPUVM */
case IP_VERSION(9, 4, 0):
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -1783,7 +1784,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
return 0;
}
- if (adev->gmc.xgmi.connected_to_cpu) {
+ if (amdgpu_gmc_is_pdb0_enabled(adev)) {
adev->gmc.vmid0_page_table_depth = 1;
adev->gmc.vmid0_page_table_block_size = 12;
} else {
@@ -1796,7 +1797,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
if (!adev->gmc.real_vram_size) {
@@ -1809,7 +1810,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (amdgpu_gmc_is_pdb0_enabled(adev))
r = amdgpu_gmc_pdb0_alloc(adev);
}
@@ -1826,195 +1827,32 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
*/
static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
{
- if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
- (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1)))
+ if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1)))
adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
}
-static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev)
-{
- enum amdgpu_memory_partition mode;
- u32 supp_modes;
- bool valid;
-
- mode = gmc_v9_0_get_memory_partition(adev, &supp_modes);
-
- /* Mode detected by hardware not present in supported modes */
- if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
- !(BIT(mode - 1) & supp_modes))
- return false;
-
- switch (mode) {
- case UNKNOWN_MEMORY_PARTITION_MODE:
- case AMDGPU_NPS1_PARTITION_MODE:
- valid = (adev->gmc.num_mem_partitions == 1);
- break;
- case AMDGPU_NPS2_PARTITION_MODE:
- valid = (adev->gmc.num_mem_partitions == 2);
- break;
- case AMDGPU_NPS4_PARTITION_MODE:
- valid = (adev->gmc.num_mem_partitions == 3 ||
- adev->gmc.num_mem_partitions == 4);
- break;
- default:
- valid = false;
- }
-
- return valid;
-}
-
-static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid)
-{
- int i;
-
- /* Check if node with id 'nid' is present in 'node_ids' array */
- for (i = 0; i < num_ids; ++i)
- if (node_ids[i] == nid)
- return true;
-
- return false;
-}
-
-static void
-gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev,
- struct amdgpu_mem_partition_info *mem_ranges)
-{
- int num_ranges = 0, ret, mem_groups;
- struct amdgpu_numa_info numa_info;
- int node_ids[MAX_MEM_RANGES];
- int num_xcc, xcc_id;
- uint32_t xcc_mask;
-
- num_xcc = NUM_XCC(adev->gfx.xcc_mask);
- xcc_mask = (1U << num_xcc) - 1;
- mem_groups = hweight32(adev->aid_mask);
-
- for_each_inst(xcc_id, xcc_mask) {
- ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
- if (ret)
- continue;
-
- if (numa_info.nid == NUMA_NO_NODE) {
- mem_ranges[0].size = numa_info.size;
- mem_ranges[0].numa.node = numa_info.nid;
- num_ranges = 1;
- break;
- }
-
- if (gmc_v9_0_is_node_present(node_ids, num_ranges,
- numa_info.nid))
- continue;
-
- node_ids[num_ranges] = numa_info.nid;
- mem_ranges[num_ranges].numa.node = numa_info.nid;
- mem_ranges[num_ranges].size = numa_info.size;
- ++num_ranges;
- }
-
- adev->gmc.num_mem_partitions = num_ranges;
-
- /* If there is only partition, don't use entire size */
- if (adev->gmc.num_mem_partitions == 1) {
- mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1);
- do_div(mem_ranges[0].size, mem_groups);
- }
-}
-
-static void
-gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
- struct amdgpu_mem_partition_info *mem_ranges)
-{
- enum amdgpu_memory_partition mode;
- u32 start_addr = 0, size;
- int i;
-
- mode = gmc_v9_0_query_memory_partition(adev);
-
- switch (mode) {
- case UNKNOWN_MEMORY_PARTITION_MODE:
- case AMDGPU_NPS1_PARTITION_MODE:
- adev->gmc.num_mem_partitions = 1;
- break;
- case AMDGPU_NPS2_PARTITION_MODE:
- adev->gmc.num_mem_partitions = 2;
- break;
- case AMDGPU_NPS4_PARTITION_MODE:
- if (adev->flags & AMD_IS_APU)
- adev->gmc.num_mem_partitions = 3;
- else
- adev->gmc.num_mem_partitions = 4;
- break;
- default:
- adev->gmc.num_mem_partitions = 1;
- break;
- }
-
- size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT;
- size /= adev->gmc.num_mem_partitions;
-
- for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
- mem_ranges[i].range.fpfn = start_addr;
- mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
- mem_ranges[i].range.lpfn = start_addr + size - 1;
- start_addr += size;
- }
-
- /* Adjust the last one */
- mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn =
- (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
- mem_ranges[adev->gmc.num_mem_partitions - 1].size =
- adev->gmc.real_vram_size -
- ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn
- << AMDGPU_GPU_PAGE_SHIFT);
-}
-
-static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
-{
- bool valid;
-
- adev->gmc.mem_partitions = kzalloc(
- MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info),
- GFP_KERNEL);
-
- if (!adev->gmc.mem_partitions)
- return -ENOMEM;
-
- /* TODO : Get the range from PSP/Discovery for dGPU */
- if (adev->gmc.is_app_apu)
- gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
- else
- gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
-
- if (amdgpu_sriov_vf(adev))
- valid = true;
- else
- valid = gmc_v9_0_validate_partition_info(adev);
- if (!valid) {
- /* TODO: handle invalid case */
- dev_WARN(adev->dev,
- "Mem ranges not matching with hardware config");
- }
-
- return 0;
-}
-
static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
{
static const u32 regBIF_BIOS_SCRATCH_4 = 0x50;
u32 vram_info;
- if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ adev->gmc.vram_width = 128 * 64;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
+ if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
adev->gmc.vram_vendor = vram_info & 0xF;
}
- adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
- adev->gmc.vram_width = 128 * 64;
}
-static int gmc_v9_0_sw_init(void *handle)
+static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned long inst_mask = adev->aid_mask;
adev->gfxhub.funcs->init(adev);
@@ -2023,7 +1861,7 @@ static int gmc_v9_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
+ if (amdgpu_is_multi_aid(adev)) {
gmc_v9_4_3_init_vram_info(adev);
} else if (!adev->bios) {
if (adev->flags & AMD_IS_APU) {
@@ -2063,7 +1901,7 @@ static int gmc_v9_0_sw_init(void *handle)
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
@@ -2091,12 +1929,9 @@ static int gmc_v9_0_sw_init(void *handle)
* vm size is 256TB (48bit), maximum size of Vega10,
* block size 512 (9bit)
*/
- /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
- if (amdgpu_sriov_vf(adev))
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
- else
- amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 1):
@@ -2109,6 +1944,8 @@ static int gmc_v9_0_sw_init(void *handle)
adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
NUM_XCC(adev->gfx.xcc_mask));
@@ -2128,7 +1965,7 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
if (r)
@@ -2142,7 +1979,8 @@ static int gmc_v9_0_sw_init(void *handle)
return r;
if (!amdgpu_sriov_vf(adev) &&
- !adev->gmc.xgmi.connected_to_cpu) {
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !adev->gmc.is_app_apu) {
/* interrupt sent to DF. */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
&adev->gmc.ecc_irq);
@@ -2156,7 +1994,10 @@ static int gmc_v9_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44;
+ dma_addr_bits = amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(9, 4, 2) ?
+ 48 :
+ 44;
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
if (r) {
dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n");
@@ -2170,8 +2011,8 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_gmc_get_vbios_allocations(adev);
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) {
- r = gmc_v9_0_init_mem_ranges(adev);
+ if (amdgpu_is_multi_aid(adev)) {
+ r = amdgpu_gmc_init_mem_ranges(adev);
if (r)
return r;
}
@@ -2185,6 +2026,7 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ gmc_v9_0_init_nps_details(adev);
/*
* number of VMs
* VMID 0 is reserved for System
@@ -2196,9 +2038,11 @@ static int gmc_v9_0_sw_init(void *handle)
* for video processing.
*/
adev->vm_manager.first_kfd_vmid =
- (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? 3 : 8;
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+ amdgpu_is_multi_aid(adev)) ?
+ 3 :
+ 8;
amdgpu_vm_manager_init(adev);
@@ -2208,20 +2052,18 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ if (amdgpu_is_multi_aid(adev))
amdgpu_gmc_sysfs_init(adev);
return 0;
}
-static int gmc_v9_0_sw_fini(void *handle)
+static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3))
+ if (amdgpu_is_multi_aid(adev))
amdgpu_gmc_sysfs_fini(adev);
- adev->gmc.num_mem_partitions = 0;
- kfree(adev->gmc.mem_partitions);
amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
@@ -2235,13 +2077,15 @@ static int gmc_v9_0_sw_fini(void *handle)
amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
+ adev->gmc.num_mem_partitions = 0;
+ kfree(adev->gmc.mem_partitions);
+
return 0;
}
static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
-
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
if (amdgpu_sriov_vf(adev))
break;
@@ -2275,8 +2119,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
*/
void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
{
- if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
- (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) {
+ if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) {
WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
WARN_ON(adev->gmc.sdpif_register !=
RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
@@ -2292,7 +2136,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
{
int r;
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (amdgpu_gmc_is_pdb0_enabled(adev))
amdgpu_gmc_init_pdb0(adev);
if (adev->gart.bo == NULL) {
@@ -2323,12 +2167,23 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v9_0_hw_init(void *handle)
+static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool value;
int i, r;
+ adev->gmc.flush_pasid_uses_kiq = true;
+
+ /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush
+ * (type 2), which flushes both. Due to a race condition with
+ * concurrent memory accesses using the same TLB cache line, we still
+ * need a second TLB flush after this.
+ */
+ adev->gmc.flush_tlb_needs_extra_type_2 =
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
+ adev->gmc.xgmi.num_physical_nodes;
+
/* The sequence of these two function calls matters.*/
gmc_v9_0_init_golden_registers(adev);
@@ -2345,7 +2200,7 @@ static int gmc_v9_0_hw_init(void *handle)
adev->hdp.funcs->init_registers(adev);
/* After HDP is initialized, flush HDP.*/
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
value = false;
@@ -2372,8 +2227,8 @@ static int gmc_v9_0_hw_init(void *handle)
if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
- else
- return r;
+
+ return 0;
}
/**
@@ -2390,9 +2245,9 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
adev->mmhub.funcs->gart_disable(adev);
}
-static int gmc_v9_0_hw_fini(void *handle)
+static int gmc_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v9_0_gart_disable(adev);
@@ -2410,54 +2265,70 @@ static int gmc_v9_0_hw_fini(void *handle)
if (adev->mmhub.funcs->update_power_gating)
adev->mmhub.funcs->update_power_gating(adev, false);
- amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ /*
+ * For minimal init, late_init is not called, hence VM fault/RAS irqs
+ * are not enabled.
+ */
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+ }
return 0;
}
-static int gmc_v9_0_suspend(void *handle)
+static int gmc_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gmc_v9_0_hw_fini(adev);
+ return gmc_v9_0_hw_fini(ip_block);
}
-static int gmc_v9_0_resume(void *handle)
+static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v9_0_hw_init(adev);
+ /* If a reset is done for NPS mode switch, read the memory range
+ * information again.
+ */
+ if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) {
+ amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+ adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS;
+ }
+
+ r = gmc_v9_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v9_0_is_idle(void *handle)
+static bool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v9.*/
return true;
}
-static int gmc_v9_0_wait_for_idle(void *handle)
+static int gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v9.*/
return 0;
}
-static int gmc_v9_0_soft_reset(void *handle)
+static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX for emulation.*/
return 0;
}
-static int gmc_v9_0_set_clockgating_state(void *handle,
+static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->set_clockgating(adev, state);
@@ -2466,16 +2337,16 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v1_0_get_clockgating(adev, flags);
}
-static int gmc_v9_0_set_powergating_state(void *handle,
+static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 71d1a2e3bac9..e6c0d86d3486 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v4_0.h"
#include "amdgpu_ras.h"
@@ -37,27 +36,21 @@
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
-static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0) ||
- adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 2))
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
return;
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
- else
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+ } else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+ }
}
static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
@@ -80,7 +73,7 @@ static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
- if (adev->ip_versions[HDP_HWIP][0] >= IP_VERSION(4, 4, 0))
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) >= IP_VERSION(4, 4, 0))
WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
else
/*read back hdp ras counter to reset it to 0 */
@@ -92,10 +85,10 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
- if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 0) ||
- adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 1) ||
- adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 1) ||
- adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 0)) {
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 0) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 1) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 1) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 0)) {
def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@@ -129,6 +122,12 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
{
int data;
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* Default enabled */
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+ return;
+ }
/* AMD_CG_SUPPORT_HDP_LS */
data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK)
@@ -137,7 +136,7 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[HDP_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) {
case IP_VERSION(4, 2, 1):
WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
break;
@@ -145,9 +144,13 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
break;
}
+ /* Do not program registers if VF */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
- if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0))
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0))
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2);
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
@@ -166,7 +169,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
};
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
- .flush_hdp = hdp_v4_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index a9ea23fa0def..8bc001dc9f63 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -21,27 +21,18 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v5_0.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
-static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
} else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
@@ -215,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_hdp_funcs hdp_v5_0_funcs = {
- .flush_hdp = hdp_v5_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v5_0_invalidate_hdp,
.update_clock_gating = hdp_v5_0_update_clock_gating,
.get_clock_gating_state = hdp_v5_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index 29c3484ae1f1..40940b4ab400 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v5_2.h"
#include "hdp/hdp_5_2_1_offset.h"
@@ -31,13 +30,25 @@
static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
- else
+ if (amdgpu_sriov_vf(adev)) {
+ /* this is fine because SR_IOV doesn't remap the register */
+ RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
+ /* We just need to read back a register to post the write.
+ * Reading back the remapped register causes problems on
+ * some platforms so just read back the memory size register.
+ */
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ }
+ } else {
amdgpu_ring_emit_wreg(ring,
(adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
+ }
}
static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
index 063eba619f2f..ec20daf4272c 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
@@ -21,26 +21,19 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v6_0.h"
#include "hdp/hdp_6_0_0_offset.h"
#include "hdp/hdp_6_0_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
-static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
+#define regHDP_CLK_CNTL_V6_1 0xd5
+#define regHDP_CLK_CNTL_V6_1_BASE_IDX 0
static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
bool enable)
{
- uint32_t hdp_clk_cntl, hdp_clk_cntl1;
+ uint32_t hdp_clk_cntl;
uint32_t hdp_mem_pwr_cntl;
if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
@@ -48,14 +41,20 @@ static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
AMD_CG_SUPPORT_HDP_SD)))
return;
- hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1);
+ else
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
/* Before doing clock/power mode switch,
* forced on IPH & RC clock */
hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
RC_MEM_CLK_SOFT_OVERRIDE, 1);
- WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl);
+ else
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
/* disable clock and power gating before any changing */
hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
@@ -117,7 +116,10 @@ static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
/* disable IPH & RC clock override after clock/power mode changing */
hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
RC_MEM_CLK_SOFT_OVERRIDE, 0);
- WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+ if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0))
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl);
+ else
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
}
static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
@@ -136,7 +138,7 @@ static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
}
const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
- .flush_hdp = hdp_v6_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.update_clock_gating = hdp_v6_0_update_clock_gating,
.get_clock_gating_state = hdp_v6_0_get_clockgating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
new file mode 100644
index 000000000000..ed1debc03507
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "hdp_v7_0.h"
+
+#include "hdp/hdp_7_0_0_offset.h"
+#include "hdp/hdp_7_0_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl, hdp_clk_cntl1;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+const struct amdgpu_hdp_funcs hdp_v7_0_funcs = {
+ .flush_hdp = amdgpu_hdp_generic_flush,
+ .update_clock_gating = hdp_v7_0_update_clock_gating,
+ .get_clock_gating_state = hdp_v7_0_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h
new file mode 100644
index 000000000000..25b69201402d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __HDP_V7_0_H__
+#define __HDP_V7_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_hdp_funcs hdp_v7_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index aecad530b10a..1317ede131b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
out:
return (wptr & ih->ptr_mask);
@@ -268,9 +273,9 @@ static void iceland_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int iceland_ih_early_init(void *handle)
+static int iceland_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -282,10 +287,10 @@ static int iceland_ih_early_init(void *handle)
return 0;
}
-static int iceland_ih_sw_init(void *handle)
+static int iceland_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -296,9 +301,9 @@ static int iceland_ih_sw_init(void *handle)
return r;
}
-static int iceland_ih_sw_fini(void *handle)
+static int iceland_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -306,39 +311,33 @@ static int iceland_ih_sw_fini(void *handle)
return 0;
}
-static int iceland_ih_hw_init(void *handle)
+static int iceland_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return iceland_ih_irq_init(adev);
}
-static int iceland_ih_hw_fini(void *handle)
+static int iceland_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- iceland_ih_irq_disable(adev);
+ iceland_ih_irq_disable(ip_block->adev);
return 0;
}
-static int iceland_ih_suspend(void *handle)
+static int iceland_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_fini(adev);
+ return iceland_ih_hw_fini(ip_block);
}
-static int iceland_ih_resume(void *handle)
+static int iceland_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_init(adev);
+ return iceland_ih_hw_init(ip_block);
}
-static bool iceland_ih_is_idle(void *handle)
+static bool iceland_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -347,11 +346,11 @@ static bool iceland_ih_is_idle(void *handle)
return true;
}
-static int iceland_ih_wait_for_idle(void *handle)
+static int iceland_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -363,10 +362,10 @@ static int iceland_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int iceland_ih_soft_reset(void *handle)
+static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -393,13 +392,13 @@ static int iceland_ih_soft_reset(void *handle)
return 0;
}
-static int iceland_ih_set_clockgating_state(void *handle,
+static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int iceland_ih_set_powergating_state(void *handle,
+static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -408,7 +407,6 @@ static int iceland_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs iceland_ih_ip_funcs = {
.name = "iceland_ih",
.early_init = iceland_ih_early_init,
- .late_init = NULL,
.sw_init = iceland_ih_sw_init,
.sw_fini = iceland_ih_sw_fini,
.hw_init = iceland_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index ec0c8f8b465a..333e9c30c091 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -135,6 +135,34 @@ static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev,
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+
+ if (enable) {
+ /* Unset the CLEAR_OVERFLOW bit to make sure the next step
+ * is switching the bit from 0 to 1
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Clear RB_OVERFLOW bit */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ }
+
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
@@ -321,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev)
if (ret)
return ret;
}
+ ih[i]->overflow = false;
}
/* update doorbell range for ih ring 0 */
@@ -346,6 +375,21 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev)
DELAY, 3);
WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -403,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
- wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ if (!amdgpu_sriov_vf(adev))
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ else
+ ih->overflow = true;
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 32). Hopefully
@@ -418,6 +465,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
@@ -510,19 +563,19 @@ static void ih_v6_0_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &ih_v6_0_self_irq_funcs;
}
-static int ih_v6_0_early_init(void *handle)
+static int ih_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_0_set_interrupt_funcs(adev);
ih_v6_0_set_self_irq_funcs(adev);
return 0;
}
-static int ih_v6_0_sw_init(void *handle)
+static int ih_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
@@ -534,8 +587,7 @@ static int ih_v6_0_sw_init(void *handle)
/* use gpu virtual address for ih ring
* until ih_checken is programmed to allow
* use bus address for ih ring by psp bl */
- use_bus_addr =
- (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
if (r)
return r;
@@ -543,8 +595,15 @@ static int ih_v6_0_sw_init(void *handle)
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
- adev->irq.ih1.ring_size = 0;
- adev->irq.ih2.ring_size = 0;
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
/* initialize ih control register offset */
ih_v6_0_init_register_offset(adev);
@@ -558,19 +617,19 @@ static int ih_v6_0_sw_init(void *handle)
return r;
}
-static int ih_v6_0_sw_fini(void *handle)
+static int ih_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int ih_v6_0_hw_init(void *handle)
+static int ih_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = ih_v6_0_irq_init(adev);
if (r)
@@ -579,42 +638,36 @@ static int ih_v6_0_hw_init(void *handle)
return 0;
}
-static int ih_v6_0_hw_fini(void *handle)
+static int ih_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- ih_v6_0_irq_disable(adev);
+ ih_v6_0_irq_disable(ip_block->adev);
return 0;
}
-static int ih_v6_0_suspend(void *handle)
+static int ih_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_0_hw_fini(adev);
+ return ih_v6_0_hw_fini(ip_block);
}
-static int ih_v6_0_resume(void *handle)
+static int ih_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_0_hw_init(adev);
+ return ih_v6_0_hw_init(ip_block);
}
-static bool ih_v6_0_is_idle(void *handle)
+static bool ih_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int ih_v6_0_wait_for_idle(void *handle)
+static int ih_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int ih_v6_0_soft_reset(void *handle)
+static int ih_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -641,14 +694,12 @@ static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
}
-
- return;
}
-static int ih_v6_0_set_clockgating_state(void *handle,
+static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_0_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -708,10 +759,10 @@ static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v6_0_set_powergating_state(void *handle,
+static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
@@ -720,20 +771,17 @@ static int ih_v6_0_set_powergating_state(void *handle,
return 0;
}
-static void ih_v6_0_get_clockgating_state(void *handle, u64 *flags)
+static void ih_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
-
- return;
}
static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
.name = "ih_v6_0",
.early_init = ih_v6_0_early_init,
- .late_init = NULL,
.sw_init = ih_v6_0_sw_init,
.sw_fini = ih_v6_0_sw_fini,
.hw_init = ih_v6_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index 8fb05eae340a..95b3f4e55ec3 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -346,6 +346,21 @@ static int ih_v6_1_irq_init(struct amdgpu_device *adev)
DELAY, 3);
WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -418,6 +433,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -510,19 +532,25 @@ static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs;
}
-static int ih_v6_1_early_init(void *handle)
+static int ih_v6_1_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ ret = amdgpu_irq_add_domain(adev);
+ if (ret) {
+ return ret;
+ }
ih_v6_1_set_interrupt_funcs(adev);
ih_v6_1_set_self_irq_funcs(adev);
return 0;
}
-static int ih_v6_1_sw_init(void *handle)
+static int ih_v6_1_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
@@ -534,8 +562,7 @@ static int ih_v6_1_sw_init(void *handle)
/* use gpu virtual address for ih ring
* until ih_checken is programmed to allow
* use bus address for ih ring by psp bl */
- use_bus_addr =
- (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
if (r)
return r;
@@ -543,8 +570,15 @@ static int ih_v6_1_sw_init(void *handle)
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
- adev->irq.ih1.ring_size = 0;
- adev->irq.ih2.ring_size = 0;
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
/* initialize ih control register offset */
ih_v6_1_init_register_offset(adev);
@@ -558,19 +592,19 @@ static int ih_v6_1_sw_init(void *handle)
return r;
}
-static int ih_v6_1_sw_fini(void *handle)
+static int ih_v6_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int ih_v6_1_hw_init(void *handle)
+static int ih_v6_1_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = ih_v6_1_irq_init(adev);
if (r)
@@ -579,42 +613,36 @@ static int ih_v6_1_hw_init(void *handle)
return 0;
}
-static int ih_v6_1_hw_fini(void *handle)
+static int ih_v6_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- ih_v6_1_irq_disable(adev);
+ ih_v6_1_irq_disable(ip_block->adev);
return 0;
}
-static int ih_v6_1_suspend(void *handle)
+static int ih_v6_1_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_1_hw_fini(adev);
+ return ih_v6_1_hw_fini(ip_block);
}
-static int ih_v6_1_resume(void *handle)
+static int ih_v6_1_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_1_hw_init(adev);
+ return ih_v6_1_hw_init(ip_block);
}
-static bool ih_v6_1_is_idle(void *handle)
+static bool ih_v6_1_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int ih_v6_1_wait_for_idle(void *handle)
+static int ih_v6_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int ih_v6_1_soft_reset(void *handle)
+static int ih_v6_1_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -645,10 +673,10 @@ static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev,
return;
}
-static int ih_v6_1_set_clockgating_state(void *handle,
+static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_1_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -708,10 +736,10 @@ static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v6_1_set_powergating_state(void *handle,
+static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
@@ -720,9 +748,9 @@ static int ih_v6_1_set_powergating_state(void *handle,
return 0;
}
-static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags)
+static void ih_v6_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
@@ -733,7 +761,6 @@ static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags)
static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
.name = "ih_v6_1",
.early_init = ih_v6_1_early_init,
- .late_init = NULL,
.sw_init = ih_v6_1_sw_init,
.sw_fini = ih_v6_1_sw_fini,
.hw_init = ih_v6_1_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
new file mode 100644
index 000000000000..b32ea4129c61
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_7_0_0_offset.h"
+#include "oss/osssys_7_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "ih_v7_0.h"
+
+#define MAX_REARM_RETRY 10
+
+static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * ih_v7_0_init_register_offset - Initialize register offset for ih rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize register offset ih rings (IH_V7_0).
+ */
+static void ih_v7_0_init_register_offset(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ /* ih ring 2 is removed
+ * ih ring and ih ring 1 are available */
+ if (adev->irq.ih.ring_size) {
+ ih_regs = &adev->irq.ih.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
+ ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
+ ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
+ }
+
+ if (adev->irq.ih1.ring_size) {
+ ih_regs = &adev->irq.ih1.ih_regs;
+ ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
+ ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
+ ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
+ ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
+ ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
+ ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
+ ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
+ }
+}
+
+/**
+ * force_update_wptr_for_self_int - Force update the wptr for self interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @threshold: threshold to trigger the wptr reporting
+ * @timeout: timeout to trigger the wptr reporting
+ * @enabled: Enable/disable timeout flush mechanism
+ *
+ * threshold input range: 0 ~ 15, default 0,
+ * real_threshold = 2^threshold
+ * timeout input range: 0 ~ 20, default 8,
+ * real_timeout = (2^timeout) * 1024 / (socclk_freq)
+ *
+ * Force update wptr for self interrupt ( >= SIENNA_CICHLID).
+ */
+static void
+force_update_wptr_for_self_int(struct amdgpu_device *adev,
+ u32 threshold, u32 timeout, bool enabled)
+{
+ u32 ih_cntl, ih_rb_cntl;
+
+ ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
+
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
+ ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
+ SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
+ RB_USED_INT_THRESHOLD, threshold);
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
+ return;
+ } else {
+ WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
+}
+
+/**
+ * ih_v7_0_toggle_ring_interrupts - toggle the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointet
+ * @enable: true - enable the interrupts, false - disable the interrupts
+ *
+ * Toggle the interrupt ring buffer (IH_V7_0)
+ */
+static int ih_v7_0_toggle_ring_interrupts(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ bool enable)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ /* enable_intr field is only valid in ring0 */
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (enable) {
+ ih->enabled = true;
+ } else {
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_rptr, 0);
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ ih->enabled = false;
+ ih->rptr = 0;
+ }
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_toggle_interrupts - Toggle all the available interrupt ring buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable or disable interrupt ring buffers
+ *
+ * Toggle all the available interrupt ring buffers (IH_V7_0).
+ */
+static int ih_v7_0_toggle_interrupts(struct amdgpu_device *adev, bool enable)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ int i;
+ int r;
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ r = ih_v7_0_toggle_ring_interrupts(adev, ih[i], enable);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t ih_v7_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 2 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+static uint32_t ih_v7_0_doorbell_rptr(struct amdgpu_ih_ring *ih)
+{
+ u32 ih_doorbell_rtpr = 0;
+
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR,
+ ENABLE, 0);
+ }
+ return ih_doorbell_rtpr;
+}
+
+/**
+ * ih_v7_0_enable_ring - enable an ih ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: amdgpu_ih_ring pointer
+ *
+ * Enable an ih ring buffer (IH_V7_0)
+ */
+static int ih_v7_0_enable_ring(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+ uint32_t tmp;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
+ WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
+
+ tmp = RREG32(ih_regs->ih_rb_cntl);
+ tmp = ih_v7_0_rb_cntl(ih, tmp);
+ if (ih == &adev->irq.ih)
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
+ if (ih == &adev->irq.ih1) {
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
+ }
+
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ if (ih == &adev->irq.ih) {
+ /* set the ih ring 0 writeback address whether it's enabled or not */
+ WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
+ WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
+ }
+
+ /* set rptr, wptr to 0 */
+ WREG32(ih_regs->ih_rb_wptr, 0);
+ WREG32(ih_regs->ih_rb_rptr, 0);
+
+ WREG32(ih_regs->ih_doorbell_rptr, ih_v7_0_doorbell_rptr(ih));
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it.
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int ih_v7_0_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
+ u32 ih_chicken;
+ u32 tmp;
+ int ret;
+ int i;
+
+ /* disable irqs */
+ ret = ih_v7_0_toggle_interrupts(adev, false);
+ if (ret)
+ return ret;
+
+ adev->nbio.funcs->ih_control(adev);
+
+ if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
+ if (ih[0]->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ih); i++) {
+ if (ih[i]->ring_size) {
+ ret = ih_v7_0_enable_ring(adev, ih[i]);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /* update doorbell range for ih ring 0 */
+ adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
+ ih[0]->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
+
+ /* GC/MMHUB UTCL2 page fault interrupts are configured as
+ * MSI storm capable interrupts by deafult. The delay is
+ * used to avoid ISR being called too frequently
+ * when page fault happens on several continuous page
+ * and thus avoid MSI storm */
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
+ tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
+ DELAY, 3);
+ WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ ret = ih_v7_0_toggle_interrupts(adev, true);
+ if (ret)
+ return ret;
+ /* enable wptr force update for self int */
+ force_update_wptr_for_self_int(adev, 0, 8, true);
+
+ if (adev->irq.ih_soft.ring_size)
+ adev->irq.ih_soft.enabled = true;
+
+ return 0;
+}
+
+/**
+ * ih_v7_0_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw.
+ */
+static void ih_v7_0_irq_disable(struct amdgpu_device *adev)
+{
+ force_update_wptr_for_self_int(adev, 0, 8, false);
+ ih_v7_0_toggle_interrupts(adev, false);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * ih_v7_0_get_wptr() - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer. Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 ih_v7_0_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, tmp;
+ struct amdgpu_ih_regs *ih_regs;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+ ih_regs = &ih->ih_regs;
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * ih_v7_0_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring to match
+ *
+ */
+static void ih_v7_0_irq_rearm(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ uint32_t v = 0;
+ uint32_t i = 0;
+ struct amdgpu_ih_regs *ih_regs;
+
+ ih_regs = &ih->ih_regs;
+
+ /* Rearm IRQ / re-write doorbell if doorbell write is lost */
+ for (i = 0; i < MAX_REARM_RETRY; i++) {
+ v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
+ if ((v < ih->ring_size) && (v != ih->rptr))
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ else
+ break;
+ }
+}
+
+/**
+ * ih_v7_0_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ */
+static void ih_v7_0_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ struct amdgpu_ih_regs *ih_regs;
+
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+ if (amdgpu_sriov_vf(adev))
+ ih_v7_0_irq_rearm(adev, ih);
+ } else {
+ ih_regs = &ih->ih_regs;
+ WREG32(ih_regs->ih_rb_rptr, ih->rptr);
+ }
+}
+
+/**
+ * ih_v7_0_self_irq - dispatch work for ring 1
+ *
+ * @adev: amdgpu_device pointer
+ * @source: irq source
+ * @entry: IV with WPTR update
+ *
+ * Update the WPTR from the IV and schedule work to handle the entries.
+ */
+static int ih_v7_0_self_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t wptr = cpu_to_le32(entry->src_data[0]);
+
+ switch (entry->ring_id) {
+ case 1:
+ *adev->irq.ih1.wptr_cpu = wptr;
+ schedule_work(&adev->irq.ih1_work);
+ break;
+ default: break;
+ }
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs ih_v7_0_self_irq_funcs = {
+ .process = ih_v7_0_self_irq,
+};
+
+static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.self_irq.num_types = 0;
+ adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs;
+}
+
+static int ih_v7_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v7_0_set_interrupt_funcs(adev);
+ ih_v7_0_set_self_irq_funcs(adev);
+ return 0;
+}
+
+static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ bool use_bus_addr;
+
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
+ &adev->irq.self_irq);
+
+ if (r)
+ return r;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
+
+ /* initialize ih control register offset */
+ ih_v7_0_init_register_offset(adev);
+
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int ih_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_irq_fini_sw(adev);
+
+ return 0;
+}
+
+static int ih_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ r = ih_v7_0_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int ih_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ ih_v7_0_irq_disable(ip_block->adev);
+
+ return 0;
+}
+
+static int ih_v7_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v7_0_hw_fini(ip_block);
+}
+
+static int ih_v7_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return ih_v7_0_hw_init(ip_block);
+}
+
+static bool ih_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return true;
+}
+
+static int ih_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int ih_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ /* todo */
+ return 0;
+}
+
+static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ ih_v7_0_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t ih_mem_pwr_cntl;
+
+ /* Disable ih sram power cntl before switch powergating mode */
+ ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+
+ /* It is recommended to set mem powergating mode to DS mode */
+ if (enable) {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ } else {
+ /* mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_SD_EN, 0);
+ /* cam mem power mode */
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
+ /* re-enable power cntl*/
+ ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
+ IH_BUFFER_MEM_POWER_CTRL_EN, 1);
+ }
+
+ WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
+}
+
+static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_PG_STATE_GATE);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
+ ih_v7_0_update_ih_mem_power_gating(adev, enable);
+
+ return 0;
+}
+
+static void ih_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs ih_v7_0_ip_funcs = {
+ .name = "ih_v7_0",
+ .early_init = ih_v7_0_early_init,
+ .sw_init = ih_v7_0_sw_init,
+ .sw_fini = ih_v7_0_sw_fini,
+ .hw_init = ih_v7_0_hw_init,
+ .hw_fini = ih_v7_0_hw_fini,
+ .suspend = ih_v7_0_suspend,
+ .resume = ih_v7_0_resume,
+ .is_idle = ih_v7_0_is_idle,
+ .wait_for_idle = ih_v7_0_wait_for_idle,
+ .soft_reset = ih_v7_0_soft_reset,
+ .set_clockgating_state = ih_v7_0_set_clockgating_state,
+ .set_powergating_state = ih_v7_0_set_powergating_state,
+ .get_clockgating_state = ih_v7_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs ih_v7_0_funcs = {
+ .get_wptr = ih_v7_0_get_wptr,
+ .decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
+ .set_rptr = ih_v7_0_set_rptr
+};
+
+static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ adev->irq.ih_funcs = &ih_v7_0_funcs;
+}
+
+const struct amdgpu_ip_block_version ih_v7_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ih_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h
new file mode 100644
index 000000000000..af9dcbc451fd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __IH_V7_0_IH_H__
+#define __IH_V7_0_IH_H__
+
+extern const struct amdgpu_ip_block_version ih_v7_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index 4ab90c7852c3..cc626036ed9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -32,14 +32,18 @@
#include "gc/gc_11_0_0_sh_mask.h"
MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin");
static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
{
- char fw_name[40];
char ucode_prefix[30];
int err;
const struct imu_firmware_header_v1_0 *imu_hdr;
@@ -48,13 +52,16 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
-
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_imu.bin", ucode_prefix);
- err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, fw_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
if (err)
goto out;
+
imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
- adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
//adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
@@ -68,13 +75,14 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
info->fw = adev->gfx.imu_fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
- }
+ } else
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
out:
if (err) {
dev_err(adev->dev,
- "gfx11: Failed to load firmware \"%s\"\n",
- fw_name);
+ "gfx11: Failed to load firmware \"%s_imu.bin\"\n",
+ ucode_prefix);
amdgpu_ucode_release(&adev->gfx.imu_fw);
}
@@ -152,7 +160,7 @@ static void imu_v11_0_setup(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
}
- //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
+ //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
imu_reg_val |= 0x10007;
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
@@ -352,7 +360,7 @@ static void imu_v11_0_program_rlc_ram(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
program_imu_rlc_ram(adev, imu_rlc_ram_golden_11,
(const u32)ARRAY_SIZE(imu_rlc_ram_golden_11));
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
new file mode 100644
index 000000000000..58cd87db8061
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
+
+#include "imu_v12_0.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "mmhub/mmhub_4_1_0_offset.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu_kicker.bin");
+
+#define TRANSFER_RAM_MASK 0x001c0000
+
+static int imu_v12_0_init_microcode(struct amdgpu_device *adev)
+{
+ char ucode_prefix[30];
+ int err;
+ const struct imu_firmware_header_v1_0 *imu_hdr;
+ struct amdgpu_firmware_info *info = NULL;
+
+ DRM_DEBUG("\n");
+
+ amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
+ if (err)
+ goto out;
+
+ imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+ adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
+ info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
+ info->fw = adev->gfx.imu_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx12: Failed to load firmware \"%s_imu.bin\"\n",
+ ucode_prefix);
+ amdgpu_ucode_release(&adev->gfx.imu_fw);
+ }
+
+ return err;
+}
+
+static int imu_v12_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct imu_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.imu_fw)
+ return -EINVAL;
+
+ hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
+ le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
+ fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
+
+ return 0;
+}
+
+static int imu_v12_0_wait_for_reset_status(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val = 0;
+ int i;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
+ if ((imu_reg_val & 0x1f) == 0x1f)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "init imu: IMU start timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void imu_v12_0_setup(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
+
+ if (adev->gfx.imu.mode == DEBUG_MODE) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
+ imu_reg_val |= 0x1;
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
+ imu_reg_val |= 0x20010007;
+ WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
+
+ }
+}
+
+static int imu_v12_0_start(struct amdgpu_device *adev)
+{
+ u32 imu_reg_val;
+
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
+ imu_reg_val &= 0xfffffffe;
+ WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
+
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_gfx_power_up_by_imu(adev);
+
+ return imu_v12_0_wait_for_reset_status(adev);
+}
+
+static const struct imu_rlc_ram_golden imu_rlc_ram_golden_12_0_1[] = {
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1X_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x1e4, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13571357, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x64206420, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x2460246, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x75317531, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xc0d41183, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_RB_WPTR_POLL_CNTL, 0x600100, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_CREDITS, 0x3f7fff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_CREDITS, 0x3f7ebf, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE0, 0x2e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE1, 0x1a078, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE0, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE1, 0x12030, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE0, 0x19041000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE0, 0x1e080000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_PRIORITY, 0x880, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_PRIORITY, 0x8880, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ARB_FINAL, 0x17, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ARB_FINAL, 0x77, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ENABLE, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ENABLE, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x20000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0c, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xfffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_MISC, 0x0091, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_MISC, 0x0091, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x00008500, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0x00880007, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regTD_CNTL, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000100, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000101, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x08200545, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBMH_CP_PERFMON_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCB_PERFCOUNTER0_SELECT1, 0x000fffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_DEBUG_2, 0x00020000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_CPC_DEBUG, 0x00500010, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x0000000f, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x0000600f, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x0000ffff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x0003d000, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x0003d7ff, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 0, 0x1c0000),
+ IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, 0, 0x1c0000)
+};
+
+static void program_imu_rlc_ram_old(struct amdgpu_device *adev,
+ const struct imu_rlc_ram_golden *regs,
+ const u32 array_size)
+{
+ const struct imu_rlc_ram_golden *entry;
+ u32 reg, data;
+ int i;
+
+ for (i = 0; i < array_size; ++i) {
+ entry = &regs[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ reg |= entry->addr_mask;
+ data = entry->data;
+ if (entry->reg == regGCMC_VM_AGP_BASE)
+ data = 0x00ffffff;
+ else if (entry->reg == regGCMC_VM_AGP_TOP)
+ data = 0x0;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
+ data = adev->gmc.vram_start >> 24;
+ else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
+ data = adev->gmc.vram_end >> 24;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+}
+
+static u32 imu_v12_0_grbm_gfx_index_remap(struct amdgpu_device *adev,
+ u32 data, bool high)
+{
+ u32 val, inst_index;
+
+ inst_index = REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX);
+
+ if (high)
+ val = inst_index >> 5;
+ else
+ val = REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES) << 18 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES) << 19 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES) << 20 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX) << 21 |
+ REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX) << 25 |
+ (inst_index & 0x1f);
+
+ return val;
+}
+
+static u32 imu_v12_init_gfxhub_settings(struct amdgpu_device *adev,
+ u32 reg, u32 data)
+{
+ if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_BASE))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_TOP))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_TOP);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_OFFSET))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BASE))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BOT))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_TOP))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_START);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_END);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_START))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_START);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_END))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_END);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB);
+ else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB))
+ return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB);
+ else
+ return data;
+}
+
+static void program_imu_rlc_ram(struct amdgpu_device *adev,
+ const u32 *regs,
+ const u32 array_size)
+{
+ u32 reg, data, val_h = 0, val_l = TRANSFER_RAM_MASK;
+ int i;
+
+ if (array_size % 3)
+ return;
+
+ for (i = 0; i < array_size; i += 3) {
+ reg = regs[i + 0];
+ data = regs[i + 2];
+ data = imu_v12_init_gfxhub_settings(adev, reg, data);
+ if (reg == SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX)) {
+ val_l = imu_v12_0_grbm_gfx_index_remap(adev, data, false);
+ val_h = imu_v12_0_grbm_gfx_index_remap(adev, data, true);
+ } else {
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, val_h);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg | val_l);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
+ }
+ }
+}
+
+static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev)
+{
+ u32 reg_data, size = 0;
+ const u32 *data = NULL;
+ int r = -EINVAL;
+
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (!r)
+ program_imu_rlc_ram(adev, data, (const u32)size);
+ else
+ program_imu_rlc_ram_old(adev, imu_rlc_ram_golden_12_0_1,
+ (const u32)ARRAY_SIZE(imu_rlc_ram_golden_12_0_1));
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ //Indicate the latest entry
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
+
+ reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
+ reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
+ WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
+}
+
+const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs = {
+ .init_microcode = imu_v12_0_init_microcode,
+ .load_microcode = imu_v12_0_load_microcode,
+ .setup_imu = imu_v12_0_setup,
+ .start_imu = imu_v12_0_start,
+ .program_rlc_ram = imu_v12_0_program_rlc_ram,
+ .wait_for_reset_status = imu_v12_0_wait_for_reset_status,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h
index 9afd6ddb01e9..a1f50cb1aeab 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2019 Advanced Micro Devices, Inc.
+ * Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -21,9 +21,10 @@
*
*/
-#ifndef __MES_V10_1_H__
-#define __MES_V10_1_H__
+#ifndef __IMU_V12_0_H__
+#define __IMU_V12_0_H__
-extern const struct amdgpu_ip_block_version mes_v10_1_ip_block;
+extern const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs;
#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
new file mode 100644
index 000000000000..0027a639c7e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "isp_v4_1_0.h"
+
+static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = {
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16
+};
+
+static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp)
+{
+ struct amdgpu_device *adev = isp->adev;
+ int idx, int_idx, num_res, r;
+ u64 isp_base;
+
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ isp_base = adev->rmmio_base;
+
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
+ if (!isp->isp_cell) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd cell alloc failed\n", __func__);
+ goto failure;
+ }
+
+ num_res = MAX_ISP410_MEM_RES + MAX_ISP410_INT_SRC;
+ isp->isp_res = kcalloc(num_res, sizeof(struct resource),
+ GFP_KERNEL);
+ if (!isp->isp_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
+ if (!isp->isp_pdata) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp platform data alloc failed\n", __func__);
+ goto failure;
+ }
+
+ /* initialize isp platform data */
+ isp->isp_pdata->adev = (void *)adev;
+ isp->isp_pdata->asic_type = adev->asic_type;
+ isp->isp_pdata->base_rmmio_size = adev->rmmio_size;
+
+ isp->isp_res[0].name = "isp_4_1_0_reg";
+ isp->isp_res[0].flags = IORESOURCE_MEM;
+ isp->isp_res[0].start = isp_base;
+ isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END;
+
+ isp->isp_res[1].name = "isp_4_1_phy0_reg";
+ isp->isp_res[1].flags = IORESOURCE_MEM;
+ isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET;
+ isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE;
+
+ for (idx = MAX_ISP410_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
+ isp->isp_res[idx].name = "isp_4_1_0_irq";
+ isp->isp_res[idx].flags = IORESOURCE_IRQ;
+ isp->isp_res[idx].start =
+ amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]);
+ isp->isp_res[idx].end =
+ isp->isp_res[idx].start;
+ }
+
+ isp->isp_cell[0].name = "amd_isp_capture";
+ isp->isp_cell[0].num_resources = num_res;
+ isp->isp_cell[0].resources = &isp->isp_res[0];
+ isp->isp_cell[0].platform_data = isp->isp_pdata;
+ isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp i2c platform data */
+ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_i2c_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_i2c_res[0].name = "isp_i2c0_reg";
+ isp->isp_i2c_res[0].flags = IORESOURCE_MEM;
+ isp->isp_i2c_res[0].start = isp_base + ISP410_I2C0_OFFSET;
+ isp->isp_i2c_res[0].end = isp_base + ISP410_I2C0_OFFSET + ISP410_I2C0_SIZE;
+
+ isp->isp_cell[1].name = "amd_isp_i2c_designware";
+ isp->isp_cell[1].num_resources = 1;
+ isp->isp_cell[1].resources = &isp->isp_i2c_res[0];
+ isp->isp_cell[1].platform_data = isp->isp_pdata;
+ isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp gpio res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP410_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP410_GPIO_SENSOR_OFFSET +
+ ISP410_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3);
+ if (r) {
+ drm_err(&adev->ddev,
+ "%s: add mfd hotplug device failed\n", __func__);
+ goto failure;
+ }
+
+ return 0;
+
+failure:
+
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return r;
+}
+
+static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp)
+{
+ mfd_remove_devices(isp->parent);
+
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return 0;
+}
+
+static const struct isp_funcs isp_v4_1_0_funcs = {
+ .hw_init = isp_v4_1_0_hw_init,
+ .hw_fini = isp_v4_1_0_hw_fini,
+};
+
+void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp)
+{
+ isp->funcs = &isp_v4_1_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
new file mode 100644
index 000000000000..4d239198edd0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __ISP_V4_1_0_H__
+#define __ISP_V4_1_0_H__
+
+#include "amdgpu_isp.h"
+
+#include "ivsrcid/isp/irqsrcs_isp_4_1.h"
+
+#define MAX_ISP410_MEM_RES 2
+#define MAX_ISP410_SENSOR_RES 1
+#define MAX_ISP410_INT_SRC 8
+
+#define ISP410_PHY0_OFFSET 0x66700
+#define ISP410_PHY0_SIZE 0xD30
+
+#define ISP410_I2C0_OFFSET 0x66400
+#define ISP410_I2C0_SIZE 0x100
+
+#define ISP410_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP410_GPIO_SENSOR_SIZE 0x54
+
+void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
new file mode 100644
index 000000000000..4258d3e0b706
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/gpio/machine.h>
+#include "amdgpu.h"
+#include "isp_v4_1_1.h"
+
+MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin");
+
+#define ISP_PERFORMANCE_STATE_LOW 0
+#define ISP_PERFORMANCE_STATE_HIGH 1
+
+#define ISP_HIGH_PERFORMANC_XCLK 788
+#define ISP_HIGH_PERFORMANC_ICLK 788
+
+static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = {
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT11,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT12,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT13,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT14,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT15,
+ ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16
+};
+
+static struct gpiod_lookup_table isp_gpio_table = {
+ .dev_id = "amd_isp_capture",
+ .table = {
+ GPIO_LOOKUP("AMDI0030:00", 85, "enable_isp", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
+static struct gpiod_lookup_table isp_sensor_gpio_table = {
+ .dev_id = "i2c-ov05c10",
+ .table = {
+ GPIO_LOOKUP("amdisp-pinctrl", 0, "enable", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
+static int isp_poweroff(struct generic_pm_domain *genpd)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+
+ return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, true, 0);
+}
+
+static int isp_poweron(struct generic_pm_domain *genpd)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+
+ return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, false, 0);
+}
+
+static int isp_set_performance_state(struct generic_pm_domain *genpd,
+ unsigned int state)
+{
+ struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ u32 iclk, xclk;
+ int ret;
+
+ switch (state) {
+ case ISP_PERFORMANCE_STATE_HIGH:
+ xclk = ISP_HIGH_PERFORMANC_XCLK;
+ iclk = ISP_HIGH_PERFORMANC_ICLK;
+ break;
+ case ISP_PERFORMANCE_STATE_LOW:
+ /* isp runs at default lowest clock-rate on power-on, do nothing */
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPXCLK, xclk, 0);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to set xclk %u to %u: %d\n",
+ xclk, state, ret);
+ return ret;
+ }
+
+ ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPICLK, iclk, 0);
+ if (ret) {
+ drm_err(&adev->ddev, "failed to set iclk %u to %u: %d\n",
+ iclk, state, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int isp_genpd_add_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ int ret;
+
+ if (!pdev)
+ return -EINVAL;
+
+ if (!dev->type->name) {
+ drm_dbg(&adev->ddev, "Invalid device type to add\n");
+ goto exit;
+ }
+
+ if (strcmp(dev->type->name, "mfd_device")) {
+ drm_dbg(&adev->ddev, "Invalid isp mfd device %s to add\n", pdev->mfd_cell->name);
+ goto exit;
+ }
+
+ ret = pm_genpd_add_device(gpd, dev);
+ if (ret) {
+ drm_err(&adev->ddev, "Failed to add dev %s to genpd %d\n",
+ pdev->mfd_cell->name, ret);
+ return -ENODEV;
+ }
+
+exit:
+ /* Continue to add */
+ return 0;
+}
+
+static int isp_genpd_remove_device(struct device *dev, void *data)
+{
+ struct generic_pm_domain *gpd = data;
+ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd);
+ struct amdgpu_device *adev = isp->adev;
+ int ret;
+
+ if (!pdev)
+ return -EINVAL;
+
+ if (!dev->type->name) {
+ drm_dbg(&adev->ddev, "Invalid device type to remove\n");
+ goto exit;
+ }
+
+ if (strcmp(dev->type->name, "mfd_device")) {
+ drm_dbg(&adev->ddev, "Invalid isp mfd device %s to remove\n",
+ pdev->mfd_cell->name);
+ goto exit;
+ }
+
+ ret = pm_genpd_remove_device(dev);
+ if (ret) {
+ drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret);
+ return -ENODEV;
+ }
+
+exit:
+ /* Continue to remove */
+ return 0;
+}
+
+static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
+{
+ const struct software_node *amd_camera_node, *isp4_node;
+ struct amdgpu_device *adev = isp->adev;
+ struct acpi_device *acpi_dev;
+ int idx, int_idx, num_res, r;
+ u64 isp_base;
+
+ if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+ return -EINVAL;
+
+ r = amdgpu_acpi_get_isp4_dev(&acpi_dev);
+ if (r) {
+ drm_dbg(&adev->ddev, "Invalid isp platform detected (%d)", r);
+ /* allow GPU init to progress */
+ return 0;
+ }
+
+ /* add GPIO resources required for OMNI5C10 sensor */
+ if (!strcmp("OMNI5C10", acpi_device_hid(acpi_dev))) {
+ gpiod_add_lookup_table(&isp_gpio_table);
+ gpiod_add_lookup_table(&isp_sensor_gpio_table);
+ }
+
+ isp_base = adev->rmmio_base;
+
+ isp->ispgpd.name = "ISP_v_4_1_1";
+ isp->ispgpd.power_off = isp_poweroff;
+ isp->ispgpd.power_on = isp_poweron;
+ isp->ispgpd.set_performance_state = isp_set_performance_state;
+
+ r = pm_genpd_init(&isp->ispgpd, NULL, true);
+ if (r) {
+ drm_err(&adev->ddev, "failed to initialize genpd (%d)\n", r);
+ return -EINVAL;
+ }
+
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
+ if (!isp->isp_cell) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd cell alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ num_res = MAX_ISP411_MEM_RES + MAX_ISP411_INT_SRC;
+
+ isp->isp_res = kcalloc(num_res, sizeof(struct resource),
+ GFP_KERNEL);
+ if (!isp->isp_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd resource alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
+ if (!isp->isp_pdata) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp platform data alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ amd_camera_node = (const struct software_node *)acpi_dev->driver_data;
+ isp4_node = software_node_find_by_name(amd_camera_node, "isp4");
+
+ /* initialize isp platform data */
+ isp->isp_pdata->adev = (void *)adev;
+ isp->isp_pdata->asic_type = adev->asic_type;
+ isp->isp_pdata->base_rmmio_size = adev->rmmio_size;
+
+ isp->isp_res[0].name = "isp_4_1_1_reg";
+ isp->isp_res[0].flags = IORESOURCE_MEM;
+ isp->isp_res[0].start = isp_base;
+ isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END;
+
+ isp->isp_res[1].name = "isp_4_1_1_phy0_reg";
+ isp->isp_res[1].flags = IORESOURCE_MEM;
+ isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET;
+ isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE;
+
+ for (idx = MAX_ISP411_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
+ isp->isp_res[idx].name = "isp_4_1_1_irq";
+ isp->isp_res[idx].flags = IORESOURCE_IRQ;
+ isp->isp_res[idx].start =
+ amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]);
+ isp->isp_res[idx].end =
+ isp->isp_res[idx].start;
+ }
+
+ isp->isp_cell[0].name = "amd_isp_capture";
+ isp->isp_cell[0].num_resources = num_res;
+ isp->isp_cell[0].resources = &isp->isp_res[0];
+ isp->isp_cell[0].platform_data = isp->isp_pdata;
+ isp->isp_cell[0].swnode = isp4_node;
+ isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp i2c platform data */
+ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_i2c_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp mfd res alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_i2c_res[0].name = "isp_i2c0_reg";
+ isp->isp_i2c_res[0].flags = IORESOURCE_MEM;
+ isp->isp_i2c_res[0].start = isp_base + ISP411_I2C0_OFFSET;
+ isp->isp_i2c_res[0].end = isp_base + ISP411_I2C0_OFFSET + ISP411_I2C0_SIZE;
+
+ isp->isp_cell[1].name = "amd_isp_i2c_designware";
+ isp->isp_cell[1].num_resources = 1;
+ isp->isp_cell[1].resources = &isp->isp_i2c_res[0];
+ isp->isp_cell[1].platform_data = isp->isp_pdata;
+ isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
+
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev, "isp gpio resource alloc failed (%d)\n", r);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP411_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP411_GPIO_SENSOR_OFFSET +
+ ISP411_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ /* add only amd_isp_capture and amd_isp_i2c_designware to genpd */
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2);
+ if (r) {
+ drm_err(&adev->ddev, "add mfd hotplug device failed (%d)\n", r);
+ goto failure;
+ }
+
+ r = device_for_each_child(isp->parent, &isp->ispgpd,
+ isp_genpd_add_device);
+ if (r) {
+ drm_err(&adev->ddev, "failed to add devices to genpd (%d)\n", r);
+ goto failure;
+ }
+
+ r = mfd_add_hotplug_devices(isp->parent, &isp->isp_cell[2], 1);
+ if (r) {
+ drm_err(&adev->ddev, "add pinctl hotplug device failed (%d)\n", r);
+ goto failure;
+ }
+
+ return 0;
+
+failure:
+
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return r;
+}
+
+static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp)
+{
+ device_for_each_child(isp->parent, NULL,
+ isp_genpd_remove_device);
+
+ mfd_remove_devices(isp->parent);
+
+ kfree(isp->isp_res);
+ kfree(isp->isp_cell);
+ kfree(isp->isp_pdata);
+ kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
+
+ return 0;
+}
+
+static const struct isp_funcs isp_v4_1_1_funcs = {
+ .hw_init = isp_v4_1_1_hw_init,
+ .hw_fini = isp_v4_1_1_hw_fini,
+};
+
+void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp)
+{
+ isp->funcs = &isp_v4_1_1_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
new file mode 100644
index 000000000000..fe45d70d87f1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __ISP_V4_1_1_H__
+#define __ISP_V4_1_1_H__
+
+#include "amdgpu_isp.h"
+
+#include "ivsrcid/isp/irqsrcs_isp_4_1.h"
+
+#define MAX_ISP411_MEM_RES 2
+#define MAX_ISP411_INT_SRC 8
+
+#define ISP411_PHY0_OFFSET 0x66700
+#define ISP411_PHY0_SIZE 0xD30
+
+#define ISP411_I2C0_OFFSET 0x66400
+#define ISP411_I2C0_SIZE 0x100
+
+#define ISP411_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP411_GPIO_SENSOR_SIZE 0x54
+
+void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 77595e9622da..b5bb7f4d607c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
@@ -34,6 +35,9 @@
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
{
@@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
@@ -451,13 +458,13 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
/**
* jpeg_v1_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-int jpeg_v1_0_early_init(void *handle)
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
adev->jpeg.num_jpeg_rings = 1;
@@ -471,12 +478,12 @@ int jpeg_v1_0_early_init(void *handle)
/**
* jpeg_v1_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-int jpeg_v1_0_sw_init(void *handle)
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -502,13 +509,13 @@ int jpeg_v1_0_sw_init(void *handle)
/**
* jpeg_v1_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG free up sw allocation
*/
-void jpeg_v1_0_sw_fini(void *handle)
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
}
@@ -550,10 +557,11 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
.no_user_fence = true,
- .extra_dw = 64,
+ .extra_bytes = 256,
.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+ .parse_cs = jpeg_v1_dec_ring_parse_cs,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@@ -581,7 +589,6 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
@@ -597,18 +604,84 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
int cnt = 0;
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
- for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
+ for (cnt = 0; cnt < adev->vcn.inst[0].num_enc_rings; cnt++) {
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
}
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
}
+
+/**
+ * jpeg_v1_dec_ring_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ int ret = 0;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
+ return -EINVAL;
+
+ if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
+ continue;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
+ reg != JPEG_V1_REG_CTX_INDEX &&
+ reg != JPEG_V1_REG_CTX_DATA) {
+ ret = -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE1:
+ if (reg != JPEG_V1_REG_CTX_DATA)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE3:
+ if (reg != JPEG_V1_REG_SOFT_RESET)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] != CP_PACKETJ_NOP)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
index bbf33a6a3972..097328635083 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -24,9 +24,20 @@
#ifndef __JPEG_V1_0_H__
#define __JPEG_V1_0_H__
-int jpeg_v1_0_early_init(void *handle);
-int jpeg_v1_0_sw_init(void *handle);
-void jpeg_v1_0_sw_fini(void *handle);
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block);
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block);
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block);
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+#define JPEG_V1_REG_RANGE_START 0x8000
+#define JPEG_V1_REG_RANGE_END 0x803f
+
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b
+#define JPEG_V1_REG_CTX_INDEX 0x8328
+#define JPEG_V1_REG_CTX_DATA 0x8329
+#define JPEG_V1_REG_SOFT_RESET 0x83a0
+
#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 1c8116d75f63..27c76bd424cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -32,21 +32,37 @@
#include "vcn/vcn_2_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v2_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_0_early_init(void *handle)
+static int jpeg_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
adev->jpeg.num_jpeg_rings = 1;
@@ -60,13 +76,13 @@ static int jpeg_v2_0_early_init(void *handle)
/**
* jpeg_v2_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_0_sw_init(void *handle)
+static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -97,25 +113,37 @@ static int jpeg_v2_0_sw_init(void *handle)
adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_0, ARRAY_SIZE(jpeg_reg_list_2_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_0_sw_fini(void *handle)
+static int jpeg_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -124,41 +152,36 @@ static int jpeg_v2_0_sw_fini(void *handle)
/**
* jpeg_v2_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_0_hw_init(void *handle)
+static int jpeg_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
- int r;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
- r = amdgpu_ring_test_helper(ring);
- if (!r)
- DRM_INFO("JPEG decode initialized successfully.\n");
-
- return r;
+ return amdgpu_ring_test_helper(ring);
}
/**
* jpeg_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_0_hw_fini(void *handle)
+static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -166,20 +189,19 @@ static int jpeg_v2_0_hw_fini(void *handle)
/**
* jpeg_v2_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_0_suspend(void *handle)
+static int jpeg_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_0_hw_fini(adev);
+ r = jpeg_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -187,20 +209,19 @@ static int jpeg_v2_0_suspend(void *handle)
/**
* jpeg_v2_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_0_resume(void *handle)
+static int jpeg_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_0_hw_init(adev);
+ r = jpeg_v2_0_hw_init(ip_block);
return r;
}
@@ -543,11 +564,15 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -657,18 +682,18 @@ void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool jpeg_v2_0_is_idle(void *handle)
+static bool jpeg_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v2_0_wait_for_idle(void *handle)
+static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
@@ -677,14 +702,14 @@ static int jpeg_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int jpeg_v2_0_set_clockgating_state(void *handle,
+static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
- if (!jpeg_v2_0_is_idle(handle))
+ if (!jpeg_v2_0_is_idle(ip_block))
return -EBUSY;
jpeg_v2_0_enable_clock_gating(adev);
} else {
@@ -694,10 +719,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
@@ -741,10 +766,25 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v2_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v2_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.name = "jpeg_v2_0",
.early_init = jpeg_v2_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_0_sw_init,
.sw_fini = jpeg_v2_0_sw_fini,
.hw_init = jpeg_v2_0_hw_init,
@@ -753,12 +793,10 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.resume = jpeg_v2_0_resume,
.is_idle = jpeg_v2_0_is_idle,
.wait_for_idle = jpeg_v2_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_0_set_clockgating_state,
.set_powergating_state = jpeg_v2_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
@@ -767,6 +805,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -788,12 +827,12 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_0_ring_reset,
};
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index aadb74de52bc..20983f126b49 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -36,9 +36,25 @@
#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev);
@@ -50,13 +66,13 @@ static int amdgpu_ih_clientid_jpeg[] = {
/**
* jpeg_v2_5_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_5_early_init(void *handle)
+static int jpeg_v2_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 harvest;
int i;
@@ -81,15 +97,15 @@ static int jpeg_v2_5_early_init(void *handle)
/**
* jpeg_v2_5_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_5_sw_init(void *handle)
+static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -128,7 +144,7 @@ static int jpeg_v2_5_sw_init(void *handle)
ring = adev->jpeg.inst[i].ring_dec;
ring->use_doorbell = true;
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0))
ring->vm_hub = AMDGPU_MMHUB1(0);
else
ring->vm_hub = AMDGPU_MMHUB0(0);
@@ -147,25 +163,37 @@ static int jpeg_v2_5_sw_init(void *handle)
if (r)
return r;
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_5, ARRAY_SIZE(jpeg_reg_list_2_5));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_5_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_5_sw_fini(void *handle)
+static int jpeg_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -174,12 +202,12 @@ static int jpeg_v2_5_sw_fini(void *handle)
/**
* jpeg_v2_5_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_5_hw_init(void *handle)
+static int jpeg_v2_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
@@ -196,24 +224,22 @@ static int jpeg_v2_5_hw_init(void *handle)
return r;
}
- DRM_INFO("JPEG decode initialized successfully.\n");
-
return 0;
}
/**
* jpeg_v2_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_5_hw_fini(void *handle)
+static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -221,7 +247,7 @@ static int jpeg_v2_5_hw_fini(void *handle)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
- jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
@@ -233,20 +259,19 @@ static int jpeg_v2_5_hw_fini(void *handle)
/**
* jpeg_v2_5_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_5_suspend(void *handle)
+static int jpeg_v2_5_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_5_hw_fini(adev);
+ r = jpeg_v2_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -254,20 +279,19 @@ static int jpeg_v2_5_suspend(void *handle)
/**
* jpeg_v2_5_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_5_resume(void *handle)
+static int jpeg_v2_5_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_5_hw_init(adev);
+ r = jpeg_v2_5_hw_init(ip_block);
return r;
}
@@ -314,6 +338,44 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
}
+static void jpeg_v2_5_start_inst(struct amdgpu_device *adev, int i)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[i].ring_dec;
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v2_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+}
+
/**
* jpeg_v2_5_start - start JPEG block
*
@@ -323,52 +385,33 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
*/
static int jpeg_v2_5_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
+ jpeg_v2_5_start_inst(adev, i);
- ring = adev->jpeg.inst[i].ring_dec;
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- jpeg_v2_5_disable_clock_gating(adev, i);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
-
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
}
return 0;
}
+static void jpeg_v2_5_stop_inst(struct amdgpu_device *adev, int i)
+{
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v2_5_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
/**
* jpeg_v2_5_stop - stop JPEG block
*
@@ -383,18 +426,7 @@ static int jpeg_v2_5_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
-
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- jpeg_v2_5_enable_clock_gating(adev, i);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ jpeg_v2_5_stop_inst(adev, i);
}
return 0;
@@ -486,9 +518,9 @@ static void jpeg_v2_6_dec_ring_insert_end(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, (1 << (ring->me * 2 + 14)));
}
-static bool jpeg_v2_5_is_idle(void *handle)
+static bool jpeg_v2_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -503,9 +535,9 @@ static bool jpeg_v2_5_is_idle(void *handle)
return ret;
}
-static int jpeg_v2_5_wait_for_idle(void *handle)
+static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -522,10 +554,10 @@ static int jpeg_v2_5_wait_for_idle(void *handle)
return 0;
}
-static int jpeg_v2_5_set_clockgating_state(void *handle,
+static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -534,7 +566,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (!jpeg_v2_5_is_idle(handle))
+ if (!jpeg_v2_5_is_idle(ip_block))
return -EBUSY;
jpeg_v2_5_enable_clock_gating(adev, i);
} else {
@@ -545,13 +577,13 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
- if(state == adev->jpeg.cur_state)
+ if (state == adev->jpeg.cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
@@ -559,7 +591,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle,
else
ret = jpeg_v2_5_start(adev);
- if(!ret)
+ if (!ret)
adev->jpeg.cur_state = state;
return ret;
@@ -614,10 +646,19 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v2_5_stop_inst(ring->adev, ring->me);
+ jpeg_v2_5_start_inst(ring->adev, ring->me);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.name = "jpeg_v2_5",
.early_init = jpeg_v2_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_5_sw_init,
.sw_fini = jpeg_v2_5_sw_fini,
.hw_init = jpeg_v2_5_hw_init,
@@ -626,18 +667,15 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.resume = jpeg_v2_5_resume,
.is_idle = jpeg_v2_5_is_idle,
.wait_for_idle = jpeg_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
.name = "jpeg_v2_6",
.early_init = jpeg_v2_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_5_sw_init,
.sw_fini = jpeg_v2_5_sw_fini,
.hw_init = jpeg_v2_5_hw_init,
@@ -646,12 +684,10 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
.resume = jpeg_v2_5_resume,
.is_idle = jpeg_v2_5_is_idle,
.wait_for_idle = jpeg_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
@@ -660,6 +696,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -681,6 +718,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
};
static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
@@ -689,6 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -710,6 +749,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
};
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -724,7 +764,6 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
else /* CHIP_ALDEBARAN */
adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs;
adev->jpeg.inst[i].ring_dec->me = i;
- DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
}
}
@@ -754,8 +793,7 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
}
}
-const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
-{
+const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 2,
.minor = 5,
@@ -763,8 +801,7 @@ const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
.funcs = &jpeg_v2_5_ip_funcs,
};
-const struct amdgpu_ip_block_version jpeg_v2_6_ip_block =
-{
+const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = {
.type = AMD_IP_BLOCK_TYPE_JPEG,
.major = 2,
.minor = 6,
@@ -822,7 +859,7 @@ static struct amdgpu_jpeg_ras jpeg_v2_6_ras = {
static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[JPEG_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) {
case IP_VERSION(2, 6, 0):
adev->jpeg.ras = &jpeg_v2_6_ras;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index df4440c21bbf..d1a011c40ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -34,25 +34,41 @@
#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_3_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v3_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v3_0_early_init(void *handle)
+static int jpeg_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 harvest;
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(3, 1, 1):
case IP_VERSION(3, 1, 2):
break;
@@ -75,13 +91,13 @@ static int jpeg_v3_0_early_init(void *handle)
/**
* jpeg_v3_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v3_0_sw_init(void *handle)
+static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -112,25 +128,37 @@ static int jpeg_v3_0_sw_init(void *handle)
adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_3_0, ARRAY_SIZE(jpeg_reg_list_3_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v3_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v3_0_sw_fini(void *handle)
+static int jpeg_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -139,43 +167,36 @@ static int jpeg_v3_0_sw_fini(void *handle)
/**
* jpeg_v3_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v3_0_hw_init(void *handle)
+static int jpeg_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
- int r;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
-
- DRM_INFO("JPEG decode initialized successfully.\n");
-
- return 0;
+ return amdgpu_ring_test_helper(ring);
}
/**
* jpeg_v3_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v3_0_hw_fini(void *handle)
+static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -183,20 +204,19 @@ static int jpeg_v3_0_hw_fini(void *handle)
/**
* jpeg_v3_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v3_0_suspend(void *handle)
+static int jpeg_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v3_0_hw_fini(adev);
+ r = jpeg_v3_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -204,20 +224,19 @@ static int jpeg_v3_0_suspend(void *handle)
/**
* jpeg_v3_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v3_0_resume(void *handle)
+static int jpeg_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v3_0_hw_init(adev);
+ r = jpeg_v3_0_hw_init(ip_block);
return r;
}
@@ -454,9 +473,9 @@ static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static bool jpeg_v3_0_is_idle(void *handle)
+static bool jpeg_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
@@ -466,23 +485,23 @@ static bool jpeg_v3_0_is_idle(void *handle)
return ret;
}
-static int jpeg_v3_0_wait_for_idle(void *handle)
+static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v3_0_set_clockgating_state(void *handle,
+static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v3_0_is_idle(handle))
+ if (!jpeg_v3_0_is_idle(ip_block))
return -EBUSY;
jpeg_v3_0_enable_clock_gating(adev);
} else {
@@ -492,10 +511,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if(state == adev->jpeg.cur_state)
@@ -539,10 +558,25 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v3_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v3_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.name = "jpeg_v3_0",
.early_init = jpeg_v3_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v3_0_sw_init,
.sw_fini = jpeg_v3_0_sw_fini,
.hw_init = jpeg_v3_0_hw_init,
@@ -551,12 +585,10 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.resume = jpeg_v3_0_resume,
.is_idle = jpeg_v3_0_is_idle,
.wait_for_idle = jpeg_v3_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v3_0_set_clockgating_state,
.set_powergating_state = jpeg_v3_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
@@ -565,6 +597,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -586,12 +619,12 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v3_0_ring_reset,
};
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs;
- DRM_INFO("JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v3_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 3eb3dcd56b57..33db2c1ae6cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -36,25 +36,40 @@
#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev);
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_set_powergating_state(void *handle,
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
-
static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
/**
* jpeg_v4_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v4_0_early_init(void *handle)
+static int jpeg_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
@@ -70,13 +85,13 @@ static int jpeg_v4_0_early_init(void *handle)
/**
* jpeg_v4_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v4_0_sw_init(void *handle)
+static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -124,25 +139,36 @@ static int jpeg_v4_0_sw_init(void *handle)
if (r)
return r;
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0, ARRAY_SIZE(jpeg_reg_list_4_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v4_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v4_0_sw_fini(void *handle)
+static int jpeg_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -151,12 +177,12 @@ static int jpeg_v4_0_sw_fini(void *handle)
/**
* jpeg_v4_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v4_0_hw_init(void *handle)
+static int jpeg_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
@@ -181,27 +207,25 @@ static int jpeg_v4_0_hw_init(void *handle)
return r;
}
- DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
-
return 0;
}
/**
* jpeg_v4_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v4_0_hw_fini(void *handle)
+static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
- jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
@@ -212,20 +236,19 @@ static int jpeg_v4_0_hw_fini(void *handle)
/**
* jpeg_v4_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v4_0_suspend(void *handle)
+static int jpeg_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v4_0_hw_fini(adev);
+ r = jpeg_v4_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -233,20 +256,19 @@ static int jpeg_v4_0_suspend(void *handle)
/**
* jpeg_v4_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v4_0_resume(void *handle)
+static int jpeg_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v4_0_hw_init(adev);
+ r = jpeg_v4_0_hw_init(ip_block);
return r;
}
@@ -431,6 +453,10 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
end.cmd_header.command_type =
MMSCH_COMMAND__END;
+ size = sizeof(struct mmsch_v4_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy(&header, (void *)table_loc, size);
+
header.version = MMSCH_VERSION;
header.total_size = RREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE);
@@ -468,6 +494,9 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
table_loc = (uint32_t *)table->cpu_addr;
memcpy((void *)table_loc, &header, size);
+ /* Perform HDP flush before writing to MMSCH registers */
+ amdgpu_device_flush_hdp(adev, NULL);
+
/* message MMSCH (in VCN[0]) to initialize this client
* 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
* of memory descriptor location
@@ -515,8 +544,11 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
return -EBUSY;
}
}
- if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+ return -EINVAL;
+ }
return 0;
@@ -601,9 +633,9 @@ static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static bool jpeg_v4_0_is_idle(void *handle)
+static bool jpeg_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
@@ -613,23 +645,23 @@ static bool jpeg_v4_0_is_idle(void *handle)
return ret;
}
-static int jpeg_v4_0_wait_for_idle(void *handle)
+static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v4_0_set_clockgating_state(void *handle,
+static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v4_0_is_idle(handle))
+ if (!jpeg_v4_0_is_idle(ip_block))
return -EBUSY;
jpeg_v4_0_enable_clock_gating(adev);
} else {
@@ -639,10 +671,10 @@ static int jpeg_v4_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_set_powergating_state(void *handle,
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev)) {
@@ -664,14 +696,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle,
return ret;
}
-static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- return 0;
-}
-
static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned int type,
@@ -699,10 +723,25 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v4_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v4_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
.name = "jpeg_v4_0",
.early_init = jpeg_v4_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v4_0_sw_init,
.sw_fini = jpeg_v4_0_sw_fini,
.hw_init = jpeg_v4_0_hw_init,
@@ -711,12 +750,10 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
.resume = jpeg_v4_0_resume,
.is_idle = jpeg_v4_0_is_idle,
.wait_for_idle = jpeg_v4_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
@@ -725,6 +762,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -746,16 +784,15 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_ring_reset,
};
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs;
- DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
- .set = jpeg_v4_0_set_interrupt_state,
.process = jpeg_v4_0_process_interrupt,
};
@@ -831,7 +868,7 @@ static struct amdgpu_jpeg_ras jpeg_v4_0_ras = {
static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[JPEG_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
adev->jpeg.ras = &jpeg_v4_0_ras;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
index 07d36c2abd6b..47638fd4d4e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
@@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block {
};
extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
-
#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 1de79d660285..aae7328973d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -25,6 +25,7 @@
#include "amdgpu_jpeg.h"
#include "soc15.h"
#include "soc15d.h"
+#include "jpeg_v2_0.h"
#include "jpeg_v4_0_3.h"
#include "mmsch_v4_0_3.h"
@@ -32,6 +33,9 @@
#include "vcn/vcn_4_0_3_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+#define NORMALIZE_JPEG_REG_OFFSET(offset) \
+ (offset & 0x1FFFF)
+
enum jpeg_engin_status {
UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0,
UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
@@ -39,7 +43,7 @@ enum jpeg_engin_status {
static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_3_set_powergating_state(void *handle,
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
@@ -55,18 +59,67 @@ static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG7_DECODE
};
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_3[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_SYS_INT_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+};
+
+static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
+{
+ return (adev->jpeg.caps & AMDGPU_JPEG_CAPS(RRMT_ENABLED)) == 0;
+}
+
+static inline int jpeg_v4_0_3_core_reg_offset(u32 pipe)
+{
+ if (pipe)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return 0;
+}
+
/**
* jpeg_v4_0_3_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v4_0_3_early_init(void *handle)
+static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
@@ -78,13 +131,13 @@ static int jpeg_v4_0_3_early_init(void *handle)
/**
* jpeg_v4_0_3_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v4_0_3_sw_init(void *handle)
+static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
@@ -96,6 +149,18 @@ static int jpeg_v4_0_3_sw_init(void *handle)
return r;
}
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
r = amdgpu_jpeg_sw_init(adev);
if (r)
return r;
@@ -134,10 +199,8 @@ static int jpeg_v4_0_3_sw_init(void *handle)
adev->jpeg.internal.jpeg_pitch[j] =
regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
adev->jpeg.inst[i].external.jpeg_pitch[j] =
- SOC15_REG_OFFSET1(
- JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_SCRATCH0,
- (j ? (0x40 * j - 0xc80) : 0));
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_SCRATCH0,
+ jpeg_v4_0_3_core_reg_offset(j));
}
}
@@ -149,25 +212,37 @@ static int jpeg_v4_0_3_sw_init(void *handle)
}
}
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_3, ARRAY_SIZE(jpeg_reg_list_4_0_3));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
return 0;
}
/**
* jpeg_v4_0_3_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v4_0_3_sw_fini(void *handle)
+static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -289,12 +364,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
/**
* jpeg_v4_0_3_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v4_0_3_hw_init(void *handle)
+static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
@@ -303,7 +378,7 @@ static int jpeg_v4_0_3_hw_init(void *handle)
if (r)
return r;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
ring = &adev->jpeg.inst[i].ring_dec[j];
ring->wptr = 0;
@@ -313,6 +388,11 @@ static int jpeg_v4_0_3_hw_init(void *handle)
}
}
} else {
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) &
+ 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
jpeg_inst = GET_INST(JPEG, i);
@@ -341,7 +421,6 @@ static int jpeg_v4_0_3_hw_init(void *handle)
}
}
}
- DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
return 0;
}
@@ -349,42 +428,44 @@ static int jpeg_v4_0_3_hw_init(void *handle)
/**
* jpeg_v4_0_3_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v4_0_3_hw_fini(void *handle)
+static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
- ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
return ret;
}
/**
* jpeg_v4_0_3_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v4_0_3_suspend(void *handle)
+static int jpeg_v4_0_3_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v4_0_3_hw_fini(adev);
+ r = jpeg_v4_0_3_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -392,20 +473,19 @@ static int jpeg_v4_0_3_suspend(void *handle)
/**
* jpeg_v4_0_3_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v4_0_3_resume(void *handle)
+static int jpeg_v4_0_3_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v4_0_3_hw_init(adev);
+ r = jpeg_v4_0_3_hw_init(ip_block);
return r;
}
@@ -460,6 +540,75 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst
WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
}
+static void jpeg_v4_0_3_start_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON <<
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_3_disable_clock_gating(adev, inst);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v4_0_3_start_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe,
+ ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe));
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
/**
* jpeg_v4_0_3_start - start JPEG block
*
@@ -470,84 +619,36 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst
static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- int i, j, jpeg_inst;
+ int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- jpeg_inst = GET_INST(JPEG, i);
-
- WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
- 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
- SOC15_WAIT_ON_RREG(
- JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
- UVD_PGFSM_STATUS__UVDJ_PWR_ON
- << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
-
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
- regUVD_JPEG_POWER_STATUS),
- 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- jpeg_v4_0_3_disable_clock_gating(adev, i);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
+ jpeg_v4_0_3_start_inst(adev, i);
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
- unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
-
ring = &adev->jpeg.inst[i].ring_dec[j];
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
- regJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC0_MASK << j,
- ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j));
-
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
- reg_offset, 0);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_CNTL,
- reg_offset,
- (0x00000001L | 0x00000002L));
- WREG32_SOC15_OFFSET(
- JPEG, jpeg_inst,
- regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- reg_offset, lower_32_bits(ring->gpu_addr));
- WREG32_SOC15_OFFSET(
- JPEG, jpeg_inst,
- regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- reg_offset, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_RPTR,
- reg_offset, 0);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- reg_offset, 0);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_CNTL,
- reg_offset, 0x00000002L);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_SIZE,
- reg_offset, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15_OFFSET(
- JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- reg_offset);
+ jpeg_v4_0_3_start_jrbc(ring);
}
}
return 0;
}
+static void jpeg_v4_0_3_stop_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_3_enable_clock_gating(adev, inst);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+}
+
/**
* jpeg_v4_0_3_stop - stop JPEG block
*
@@ -557,31 +658,10 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
*/
static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
{
- int i, jpeg_inst;
+ int i;
- for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- jpeg_inst = GET_INST(JPEG, i);
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- jpeg_v4_0_3_enable_clock_gating(adev, i);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
- regUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
- 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
- SOC15_WAIT_ON_RREG(
- JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
- UVD_PGFSM_STATUS__UVDJ_PWR_OFF
- << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
- }
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v4_0_3_stop_inst(adev, i);
return 0;
}
@@ -597,9 +677,8 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- return RREG32_SOC15_OFFSET(
- JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
- ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
}
/**
@@ -615,11 +694,16 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
- else
- return RREG32_SOC15_OFFSET(
- JPEG, GET_INST(JPEG, ring->me),
- regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
+}
+
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* JPEG engine access for HDP flush doesn't work when RRMT is enabled.
+ * This is a workaround to avoid any HDP flush through JPEG ring.
+ */
}
/**
@@ -637,10 +721,8 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
- WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
- regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- (ring->pipe ? (0x40 * ring->pipe - 0xc80) :
- 0),
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe),
lower_32_bits(ring->wptr));
}
}
@@ -652,15 +734,18 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
*
* Write a start command to the ring.
*/
-static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
+void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
{
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
+ if (!amdgpu_sriov_vf(ring->adev)) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x80004000);
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80004000);
+ }
}
/**
@@ -670,15 +755,18 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
*
* Write a end command to the ring.
*/
-static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
+void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
{
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x62a04);
+ if (!amdgpu_sriov_vf(ring->adev)) {
+ amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x62a04);
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x00004000);
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00004000);
+ }
}
/**
@@ -691,7 +779,7 @@ static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
*
* Write a fence and a trap command to the ring.
*/
-static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned int flags)
{
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
@@ -720,31 +808,11 @@ static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
amdgpu_ring_write(ring, 0);
- if (ring->adev->jpeg.inst[ring->me].aid_id) {
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x4);
- } else {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
-
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x3fbc);
-
- if (ring->adev->jpeg.inst[ring->me].aid_id) {
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x0);
- } else {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x1);
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
amdgpu_ring_write(ring, 0);
@@ -760,7 +828,7 @@ static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
*
* Write ring commands to execute the indirect buffer.
*/
-static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
uint32_t flags)
@@ -769,11 +837,15 @@ static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -811,10 +883,16 @@ static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0x2);
}
-static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask)
{
- uint32_t reg_offset = (reg << 2);
+ uint32_t reg_offset;
+
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_JPEG_REG_OFFSET(reg);
+
+ reg_offset = (reg << 2);
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -838,7 +916,7 @@ static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_
amdgpu_ring_write(ring, mask);
}
-static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
@@ -853,9 +931,15 @@ static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask);
}
-static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
{
- uint32_t reg_offset = (reg << 2);
+ uint32_t reg_offset;
+
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_JPEG_REG_OFFSET(reg);
+
+ reg_offset = (reg << 2);
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -871,7 +955,7 @@ static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t re
amdgpu_ring_write(ring, val);
}
-static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
{
int i;
@@ -883,21 +967,17 @@ static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool jpeg_v4_0_3_is_idle(void *handle)
+static bool jpeg_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool ret = false;
int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
- unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
-
- ret &= ((RREG32_SOC15_OFFSET(
- JPEG, GET_INST(JPEG, i),
- regUVD_JRBC0_UVD_JRBC_STATUS,
- reg_offset) &
- UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j)) &
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
}
@@ -905,36 +985,33 @@ static bool jpeg_v4_0_3_is_idle(void *handle)
return ret;
}
-static int jpeg_v4_0_3_wait_for_idle(void *handle)
+static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
- unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
-
- ret &= SOC15_WAIT_ON_RREG_OFFSET(
- JPEG, GET_INST(JPEG, i),
- regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset,
+ ret &= (SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j),
UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
- UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
}
}
return ret;
}
-static int jpeg_v4_0_3_set_clockgating_state(void *handle,
+static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (enable) {
- if (!jpeg_v4_0_3_is_idle(handle))
+ if (!jpeg_v4_0_3_is_idle(ip_block))
return -EBUSY;
jpeg_v4_0_3_enable_clock_gating(adev, i);
} else {
@@ -944,12 +1021,17 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_3_set_powergating_state(void *handle,
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
if (state == adev->jpeg.cur_state)
return 0;
@@ -972,6 +1054,14 @@ static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -1026,10 +1116,47 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ if (amdgpu_sriov_vf(ring->adev))
+ return -EOPNOTSUPP;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v4_0_3_core_stall_reset(ring);
+ jpeg_v4_0_3_start_jrbc(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
.name = "jpeg_v4_0_3",
.early_init = jpeg_v4_0_3_early_init,
- .late_init = NULL,
.sw_init = jpeg_v4_0_3_sw_init,
.sw_fini = jpeg_v4_0_3_sw_fini,
.hw_init = jpeg_v4_0_3_hw_init,
@@ -1038,12 +1165,10 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
.resume = jpeg_v4_0_3_resume,
.is_idle = jpeg_v4_0_3_is_idle,
.wait_for_idle = jpeg_v4_0_3_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_3_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
@@ -1052,16 +1177,18 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
- 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
+ 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
8 + 16,
.emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
.emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
.emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
.test_ring = amdgpu_jpeg_dec_ring_test_ring,
.test_ib = amdgpu_jpeg_dec_ring_test_ib,
.insert_nop = jpeg_v4_0_3_dec_ring_nop,
@@ -1073,6 +1200,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_3_ring_reset,
};
static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -1089,7 +1217,6 @@ static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
adev->jpeg.inst[i].aid_id =
jpeg_inst / adev->jpeg.num_inst_per_aid;
}
- DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
@@ -1097,6 +1224,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
.process = jpeg_v4_0_3_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_ras_irq_funcs = {
+ .set = jpeg_v4_0_3_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
{
int i;
@@ -1105,6 +1237,9 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
}
adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_3_ras_irq_funcs;
}
const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
@@ -1201,14 +1336,147 @@ static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
}
+static uint32_t jpeg_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V4_0_3_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V4_0_3_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v4_0_3_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v4_0_3_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
.query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
.reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
+ .query_poison_status = jpeg_v4_0_3_query_ras_poison_status,
+};
+
+static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v4_0_3_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v4_0_3_err_codes,
+ ARRAY_SIZE(jpeg_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid,
};
+static const struct aca_info jpeg_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v4_0_3_aca_bank_ops,
+};
+
+static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
.ras_block = {
.hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+ .ras_late_init = jpeg_v4_0_3_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
index 22483dc66351..2e110d04af84 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -46,6 +46,29 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+enum amdgpu_jpeg_v4_0_3_sub_block {
+ AMDGPU_JPEG_V4_0_3_JPEG0 = 0,
+ AMDGPU_JPEG_V4_0_3_JPEG1,
+
+ AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
+void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags);
+void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned int flags);
+void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
+void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+
#endif /* __JPEG_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
new file mode 100644
index 000000000000..54fd9c800c40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_5.h"
+#include "mmsch_v4_0.h"
+
+#include "vcn/vcn_4_0_5_offset.h"
+#include "vcn/vcn_4_0_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+#define regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET 0x4026
+#define regJPEG_SYS_INT_EN_INTERNAL_OFFSET 0x4141
+#define regJPEG_CGC_CTRL_INTERNAL_OFFSET 0x4161
+#define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160
+#define regUVD_NO_OP_INTERNAL_OFFSET 0x0029
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_clientid_jpeg[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+
+
+/**
+ * jpeg_v4_0_5_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v4_0_5_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(4, 0, 5):
+ adev->jpeg.num_jpeg_inst = 1;
+ break;
+ case IP_VERSION(4, 0, 6):
+ adev->jpeg.num_jpeg_inst = 2;
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev,
+ "Failed to init vcn ip block(UVD_HWIP:0x%x)\n",
+ amdgpu_ip_version(adev, UVD_HWIP, 0));
+ return -EINVAL;
+ }
+
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v4_0_5_set_dec_ring_funcs(adev);
+ jpeg_v4_0_5_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
+ sprintf(ring->name, "jpeg_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq,
+ 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH);
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_5, ARRAY_SIZE(jpeg_reg_list_4_0_5));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_5_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r = 0;
+
+ // TODO: Enable ring test with DPG support
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ return 0;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS))
+ jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v4_0_5_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v4_0_5_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v4_0_5_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v4_0_5_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v4_0_5_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK);
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) {
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK;
+ } else {
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ }
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data);
+}
+
+static void jpeg_engine_4_0_5_dpg_clock_gating_mode(struct amdgpu_device *adev,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_CTRL_INTERNAL_OFFSET, data, indirect);
+
+ data = 0;
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_GATE_INTERNAL_OFFSET,
+ data, indirect);
+}
+
+static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG),
+ 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_start_dpg_mode - Jpeg start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start JPEG block with dpg mode
+ */
+static void jpeg_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec;
+ uint32_t reg_data = 0;
+
+ /* enable anti hang mechanism */
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+ reg_data |= 0x1;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, inst_idx, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, inst_idx, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (indirect)
+ adev->jpeg.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr;
+
+ jpeg_engine_4_0_5_dpg_clock_gating_mode(adev, inst_idx, indirect);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET,
+ adev->gfx.config.gb_addr_config, indirect);
+ /* enable System Interrupt for JRBC */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_SYS_INT_EN_INTERNAL_OFFSET,
+ JPEG_SYS_INT_EN__DJRBC_MASK, indirect);
+
+ /* add nop to workaround PSP size check */
+ WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regUVD_NO_OP_INTERNAL_OFFSET, 0, indirect);
+
+ if (indirect)
+ amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0);
+
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_5_stop_dpg_mode - Jpeg stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop JPEG block with dpg mode
+ */
+static void jpeg_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t reg_data = 0;
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+}
+
+/**
+ * jpeg_v4_0_5_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v4_0_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = adev->jpeg.inst[i].ring_dec;
+ /* doorbell programming is done for every playback */
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
+
+ WREG32_SOC15(VCN, i, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v4_0_5_start_dpg_mode(adev, i, adev->jpeg.indirect_sram);
+ continue;
+ }
+
+ /* disable power gating */
+ r = jpeg_v4_0_5_disable_static_power_gating(adev, i);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v4_0_5_stop(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v4_0_5_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_5_enable_clock_gating(adev, i);
+
+ /* enable power gating */
+ r = jpeg_v4_0_5_enable_static_power_gating(adev, i);
+ if (r)
+ return r;
+ }
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v4_0_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v4_0_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v4_0_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ret &= (((RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+ return ret;
+}
+
+static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ return SOC15_WAIT_ON_RREG(JPEG, i, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (!jpeg_v4_0_5_is_idle(ip_block))
+ return -EBUSY;
+
+ jpeg_v4_0_5_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v4_0_5_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_5_stop(adev);
+ else
+ ret = jpeg_v4_0_5_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec);
+ break;
+ case VCN_4_0__SRCID_DJPEG0_POISON:
+ case VCN_4_0__SRCID_EJPEG0_POISON:
+ amdgpu_jpeg_process_poison_irq(adev, source, entry);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v4_0_5_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v4_0_5_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = {
+ .name = "jpeg_v4_0_5",
+ .early_init = jpeg_v4_0_5_early_init,
+ .sw_init = jpeg_v4_0_5_sw_init,
+ .sw_fini = jpeg_v4_0_5_sw_fini,
+ .hw_init = jpeg_v4_0_5_hw_init,
+ .hw_fini = jpeg_v4_0_5_hw_fini,
+ .suspend = jpeg_v4_0_5_suspend,
+ .resume = jpeg_v4_0_5_resume,
+ .is_idle = jpeg_v4_0_5_is_idle,
+ .wait_for_idle = jpeg_v4_0_5_wait_for_idle,
+ .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v4_0_5_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v4_0_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v4_0_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v4_0_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_5_ring_reset,
+};
+
+static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec->me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = {
+ .process = jpeg_v4_0_5_process_interrupt,
+};
+
+static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].irq.num_types = 1;
+ adev->jpeg.inst[i].irq.funcs = &jpeg_v4_0_5_irq_funcs;
+ }
+}
+
+const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 4,
+ .minor = 0,
+ .rev = 5,
+ .funcs = &jpeg_v4_0_5_ip_funcs,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h
new file mode 100644
index 000000000000..c5eee572079c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V4_0_5_H__
+#define __JPEG_V4_0_5_H__
+
+enum amdgpu_jpeg_v4_0_5_sub_block {
+ AMDGPU_JPEG_V4_0_5_JPEG0 = 0,
+
+ AMDGPU_JPEG_V4_0_5_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block;
+
+#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
new file mode 100644
index 000000000000..46bf15dce2bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -0,0 +1,733 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+#include "jpeg_v4_0_3.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "jpeg_v5_0_0.h"
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
+static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v5_0_0_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->jpeg.num_jpeg_inst = 1;
+ adev->jpeg.num_jpeg_rings = 1;
+
+ jpeg_v5_0_0_set_dec_ring_funcs(adev);
+ jpeg_v5_0_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0, ARRAY_SIZE(jpeg_reg_list_5_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ /* Skip ring test because pause DPG is not implemented. */
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)
+ return 0;
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
+ jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = jpeg_v5_0_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(ip_block->adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_0_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ r = amdgpu_jpeg_resume(ip_block->adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_0_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v5_0_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL);
+
+ data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT;
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data);
+}
+
+static int jpeg_v5_0_0_disable_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_enable_power_gating(struct amdgpu_device *adev)
+{
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG),
+ 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+static void jpeg_engine_5_0_0_dpg_clock_gating_mode(struct amdgpu_device *adev,
+ int inst_idx, uint8_t indirect)
+{
+ uint32_t data = 0;
+
+ // JPEG disable CGC
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+
+ if (indirect) {
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ } else {
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_CTRL, data, indirect);
+
+ // Turn on All JPEG clocks
+ data = 0;
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_CGC_GATE, data, indirect);
+ }
+}
+
+/**
+ * jpeg_v5_0_0_start_dpg_mode - Jpeg start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start JPEG block with dpg mode
+ */
+static int jpeg_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec;
+ uint32_t reg_data = 0;
+
+ jpeg_v5_0_0_enable_power_gating(adev);
+
+ // enable dynamic power gating mode
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+
+ if (indirect)
+ adev->jpeg.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr;
+
+ jpeg_engine_5_0_0_dpg_clock_gating_mode(adev, inst_idx, indirect);
+
+ /* MJPEG global tiling registers */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 1);
+
+ /* enable System Interrupt for JRBC */
+ if (indirect)
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, indirect);
+ else
+ WREG32_SOC24_JPEG_DPG_MODE(inst_idx, vcnipJPEG_SYS_INT_EN,
+ JPEG_SYS_INT_EN__DJRBC0_MASK, 1);
+
+ if (indirect) {
+ /* add nop to workaround PSP size check */
+ ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, vcnipUVD_NO_OP, 0, indirect);
+
+ amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0);
+ }
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_stop_dpg_mode - Jpeg stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop JPEG block with dpg mode
+ */
+static void jpeg_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t reg_data = 0;
+
+ reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS);
+ reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK;
+ WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data);
+}
+
+/**
+ * jpeg_v5_0_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ r = jpeg_v5_0_0_start_dpg_mode(adev, 0, adev->jpeg.indirect_sram);
+ return r;
+ }
+
+ /* disable power gating */
+ r = jpeg_v5_0_0_disable_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v5_0_0_disable_clock_gating(adev);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC0_MASK);
+
+ WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+ ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {
+ jpeg_v5_0_0_stop_dpg_mode(adev, 0);
+ } else {
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v5_0_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v5_0_0_enable_power_gating(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v5_0_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 1;
+
+ ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+
+ return ret;
+}
+
+static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ if (enable) {
+ if (!jpeg_v5_0_0_is_idle(ip_block))
+ return -EBUSY;
+ jpeg_v5_0_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v5_0_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_0_stop(adev);
+ else
+ ret = jpeg_v5_0_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = jpeg_v5_0_0_stop(ring->adev);
+ if (r)
+ return r;
+ r = jpeg_v5_0_0_start(ring->adev);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = {
+ .name = "jpeg_v5_0_0",
+ .early_init = jpeg_v5_0_0_early_init,
+ .sw_init = jpeg_v5_0_0_sw_init,
+ .sw_fini = jpeg_v5_0_0_sw_fini,
+ .hw_init = jpeg_v5_0_0_hw_init,
+ .hw_fini = jpeg_v5_0_0_hw_fini,
+ .suspend = jpeg_v5_0_0_suspend,
+ .resume = jpeg_v5_0_0_resume,
+ .is_idle = jpeg_v5_0_0_is_idle,
+ .wait_for_idle = jpeg_v5_0_0_wait_for_idle,
+ .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_0_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
+ .parse_cs = amdgpu_jpeg_dec_parse_cs,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_0_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_0_ring_reset,
+};
+
+static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = {
+ .set = jpeg_v5_0_0_set_interrupt_state,
+ .process = jpeg_v5_0_0_process_interrupt,
+};
+
+static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v5_0_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h
new file mode 100644
index 000000000000..5abb96159814
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_0_H__
+#define __JPEG_V5_0_0_H__
+
+#define vcnipJPEG_CGC_GATE 0x4160
+#define vcnipJPEG_CGC_CTRL 0x4161
+#define vcnipJPEG_SYS_INT_EN 0x4141
+#define vcnipUVD_NO_OP 0x0029
+#define vcnipJPEG_DEC_GFX10_ADDR_CONFIG 0x404A
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block;
+
+#endif /* __JPEG_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
new file mode 100644
index 000000000000..baf097d2e1ac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -0,0 +1,1100 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+#include "jpeg_v5_0_1.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_5_0__SRCID__JPEG_DECODE,
+ VCN_5_0__SRCID__JPEG1_DECODE,
+ VCN_5_0__SRCID__JPEG2_DECODE,
+ VCN_5_0__SRCID__JPEG3_DECODE,
+ VCN_5_0__SRCID__JPEG4_DECODE,
+ VCN_5_0__SRCID__JPEG5_DECODE,
+ VCN_5_0__SRCID__JPEG6_DECODE,
+ VCN_5_0__SRCID__JPEG7_DECODE,
+ VCN_5_0__SRCID__JPEG8_DECODE,
+ VCN_5_0__SRCID__JPEG9_DECODE,
+};
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_1[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS),
+};
+
+static int jpeg_v5_0_1_core_reg_offset(u32 pipe)
+{
+ if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return ((0x40 * pipe) - 0x440);
+}
+
+/**
+ * jpeg_v5_0_1_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES)
+ return -ENOENT;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ jpeg_v5_0_1_set_dec_ring_funcs(adev);
+ jpeg_v5_0_1_set_irq_funcs(adev);
+ jpeg_v5_0_1_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 11 * jpeg_inst;
+ } else {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 2 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0,
+ (j ? jpeg_v5_0_1_core_reg_offset(j) : 0));
+ }
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) {
+ r = amdgpu_jpeg_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize jpeg ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_1, ARRAY_SIZE(jpeg_reg_list_5_0_1));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v5_0_1_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ return 0;
+ }
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ ring = adev->jpeg.inst[i].ring_dec;
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
+ ring->pipe,
+ ring->doorbell_index <<
+ VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return ret;
+}
+
+/**
+ * jpeg_v5_0_1_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = jpeg_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_1_init_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v5_0_1_deinit_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
+static void jpeg_v5_0_1_init_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 reg, data, mask;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ /* enable System Interrupt for JRBC */
+ reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN);
+ if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe;
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe);
+ WREG32_P(reg, data, mask);
+ } else {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12);
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12));
+ WREG32_P(reg, data, mask);
+ }
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC_RB_SIZE);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j < 5) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j - 5].table_offset = item_offset;
+ header.mjpegdec1[j - 5].init_status = 0;
+ header.mjpegdec1[j - 5].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset += table_size;
+ }
+
+ MMSCH_V5_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status =
+ ((struct mmsch_v5_0_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
+ init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
+ resp, init_status);
+
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_1_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_v5_0_1_init_inst(adev, i);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ jpeg_v5_0_1_init_jrbc(ring);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_1_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v5_0_1_deinit_inst(adev, i);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC_RB_WPTR,
+ (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+
+ int i;
+
+ if (!enable)
+ return 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (!jpeg_v5_0_1_is_idle(ip_block))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_stop(adev);
+ else
+ ret = jpeg_v5_0_1_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+
+
+static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_5_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_5_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_5_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_5_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_5_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_5_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_5_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ case VCN_5_0__SRCID__JPEG8_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]);
+ break;
+ case VCN_5_0__SRCID__JPEG9_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ jpeg_v5_0_1_core_stall_reset(ring);
+ jpeg_v5_0_1_init_jrbc(ring);
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
+ .name = "jpeg_v5_0_1",
+ .early_init = jpeg_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v5_0_1_sw_init,
+ .sw_fini = jpeg_v5_0_1_sw_fini,
+ .hw_init = jpeg_v5_0_1_hw_init,
+ .hw_fini = jpeg_v5_0_1_hw_fini,
+ .suspend = jpeg_v5_0_1_suspend,
+ .resume = jpeg_v5_0_1_resume,
+ .is_idle = jpeg_v5_0_1_is_idle,
+ .wait_for_idle = jpeg_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_1_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_1_ring_reset,
+};
+
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = {
+ .set = jpeg_v5_0_1_set_interrupt_state,
+ .process = jpeg_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_ras_irq_funcs = {
+ .set = jpeg_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v5_0_1_ras_irq_funcs;
+
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &jpeg_v5_0_1_ip_funcs,
+};
+
+static uint32_t jpeg_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V5_0_1_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V5_0_1_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v5_0_1_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v5_0_1_ras_hw_ops = {
+ .query_poison_status = jpeg_v5_0_1_query_ras_poison_status,
+};
+
+static int jpeg_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v5_0_1_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 48, 49, 50, 51,
+};
+
+static bool jpeg_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v5_0_1_err_codes,
+ ARRAY_SIZE(jpeg_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v5_0_1_aca_bank_ops,
+};
+
+static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_jpeg_ras jpeg_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v5_0_1_ras_hw_ops,
+ .ras_late_init = jpeg_v5_0_1_ras_late_init,
+ },
+};
+
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
new file mode 100644
index 000000000000..a7e58d5fb246
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_1_H__
+#define __JPEG_V5_0_1_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block;
+
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649
+#define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009
+#define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049
+#define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089
+#define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9
+#define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109
+#define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149
+#define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189
+#define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9
+#define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449
+#define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JMI0_JPEG_LMI_DROP 0x0663
+#define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1
+#define regJPEG_CORE_RST_CTRL 0x072e
+#define regJPEG_CORE_RST_CTRL_BASE_IDX 1
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+enum amdgpu_jpeg_v5_0_1_sub_block {
+ AMDGPU_JPEG_V5_0_1_JPEG0 = 0,
+ AMDGPU_JPEG_V5_0_1_JPEG1,
+
+ AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK,
+};
+
+#endif /* __JPEG_V5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c
new file mode 100644
index 000000000000..396262044ea8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include "amdgpu.h"
+#include "lsdma_v7_0.h"
+#include "amdgpu_lsdma.h"
+
+#include "lsdma/lsdma_7_0_0_offset.h"
+#include "lsdma/lsdma_7_0_0_sh_mask.h"
+
+static int lsdma_v7_0_wait_pio_status(struct amdgpu_device *adev)
+{
+ return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS),
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK,
+ LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK);
+}
+
+static int lsdma_v7_0_copy_mem(struct amdgpu_device *adev,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n");
+
+ return ret;
+}
+
+static int lsdma_v7_0_fill_mem(struct amdgpu_device *adev,
+ uint64_t dst_addr,
+ uint32_t data,
+ uint64_t size)
+{
+ int ret;
+ uint32_t tmp;
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data);
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr));
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr));
+
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0);
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp);
+
+ ret = lsdma_v7_0_wait_pio_status(adev);
+ if (ret)
+ dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n");
+
+ return ret;
+}
+
+static void lsdma_v7_0_update_memory_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL);
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+
+ tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable);
+ WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp);
+}
+
+const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs = {
+ .copy_mem = lsdma_v7_0_copy_mem,
+ .fill_mem = lsdma_v7_0_fill_mem,
+ .update_memory_power_gating = lsdma_v7_0_update_memory_power_gating
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h
new file mode 100644
index 000000000000..52b4485cdd98
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __LSDMA_V7_0_H__
+#define __LSDMA_V7_0_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs;
+
+#endif /* __LSDMA_V7_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
new file mode 100644
index 000000000000..2db9b2c63693
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+
+static int
+mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_bo_unreserve(bo);
+ bo = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+ return ret;
+}
+
+static int
+mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+ struct amdgpu_bo_va_mapping *wptr_mapping;
+ struct amdgpu_vm *wptr_vm;
+ struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
+ int ret;
+
+ wptr_vm = queue->vm;
+ ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+ if (ret)
+ return ret;
+
+ wptr &= AMDGPU_GMC_HOLE_MASK;
+ wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+ if (!wptr_mapping) {
+ DRM_ERROR("Failed to lookup wptr bo\n");
+ return -EINVAL;
+ }
+
+ wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+ if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+ DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
+ return -EINVAL;
+ }
+
+ ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
+ if (ret) {
+ DRM_ERROR("Failed to map wptr bo to GART\n");
+ return ret;
+ }
+
+ queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
+ return 0;
+}
+
+static int convert_to_mes_priority(int priority)
+{
+ switch (priority) {
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW:
+ default:
+ return AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW:
+ return AMDGPU_MES_PRIORITY_LEVEL_LOW;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_HIGH;
+ }
+}
+
+static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
+ struct mes_add_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+
+ /* set process quantum to 10 ms and gang quantum to 1 ms as default */
+ queue_input.process_quantum = 100000;
+ queue_input.gang_quantum = 10000;
+ queue_input.paging = false;
+
+ queue_input.process_context_addr = ctx->gpu_addr;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority);
+
+ queue_input.process_id = queue->vm->pasid;
+ queue_input.queue_type = queue->queue_type;
+ queue_input.mqd_addr = queue->mqd.gpu_addr;
+ queue_input.wptr_addr = userq_props->wptr_gpu_addr;
+ queue_input.queue_size = userq_props->queue_size >> 2;
+ queue_input.doorbell_offset = userq_props->doorbell_index;
+ queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
+ queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+ return r;
+ }
+
+ DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
+ return 0;
+}
+
+static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_remove_queue_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+ queue_input.doorbell_offset = queue->doorbell_index;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
+ return r;
+}
+
+static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *mqd_user)
+{
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r, size;
+
+ /*
+ * The FW expects at least one page space allocated for
+ * process ctx and gang ctx each. Create an object
+ * for the same.
+ */
+ size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
+ r = amdgpu_userq_create_object(uq_mgr, ctx, size);
+ if (r) {
+ DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
+ int queue_type)
+{
+ int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
+ struct mes_detect_and_reset_queue_input input;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ unsigned int hung_db_num = 0;
+ int queue_id, r, i;
+ u32 db_array[4];
+
+ if (db_array_size > 4) {
+ dev_err(adev->dev, "DB array size (%d vs 4) too small\n",
+ db_array_size);
+ return -EINVAL;
+ }
+
+ memset(&input, 0x0, sizeof(struct mes_detect_and_reset_queue_input));
+
+ input.queue_type = queue_type;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false,
+ &hung_db_num, db_array);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
+ } else if (hung_db_num) {
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ if (queue->queue_type == queue_type) {
+ for (i = 0; i < hung_db_num; i++) {
+ if (queue->doorbell_index == db_array[i]) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ atomic_inc(&adev->gpu_reset_counter);
+ amdgpu_userq_fence_driver_force_completion(queue);
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return r;
+}
+
+static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args_in,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
+ struct drm_amdgpu_userq_in *mqd_user = args_in;
+ struct amdgpu_mqd_prop *userq_props;
+ struct amdgpu_gfx_shadow_info shadow_info;
+ int r;
+
+ /* Structure to initialize MQD for userqueue using generic MQD init function */
+ userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL);
+ if (!userq_props) {
+ DRM_ERROR("Failed to allocate memory for userq_props\n");
+ return -ENOMEM;
+ }
+
+ r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
+ if (r) {
+ DRM_ERROR("Failed to create MQD object for userqueue\n");
+ goto free_props;
+ }
+
+ /* Initialize the MQD BO with user given values */
+ userq_props->wptr_gpu_addr = mqd_user->wptr_va;
+ userq_props->rptr_gpu_addr = mqd_user->rptr_va;
+ userq_props->queue_size = mqd_user->queue_size;
+ userq_props->hqd_base_gpu_addr = mqd_user->queue_va;
+ userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
+ userq_props->use_doorbell = true;
+ userq_props->doorbell_index = queue->doorbell_index;
+ userq_props->fence_address = queue->fence_drv->gpu_addr;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info)
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
+ if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
+ struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
+
+ if (mqd_user->mqd_size != sizeof(*compute_mqd)) {
+ DRM_ERROR("Invalid compute IP MQD size\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(compute_mqd)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va,
+ max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE)))
+ goto free_mqd;
+
+ userq_props->eop_gpu_addr = compute_mqd->eop_va;
+ userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+ userq_props->hqd_active = false;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+ kfree(compute_mqd);
+ } else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
+ struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+
+ if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid GFX MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_gfx_v11)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
+ userq_props->csa_addr = mqd_gfx_v11->csa_va;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+
+ if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va,
+ shadow_info.shadow_size))
+ goto free_mqd;
+
+ kfree(mqd_gfx_v11);
+ } else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+ struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
+
+ if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid SDMA MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_sdma_v11)) {
+ DRM_ERROR("Failed to read sdma user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va,
+ shadow_info.csa_size))
+ goto free_mqd;
+
+ userq_props->csa_addr = mqd_sdma_v11->csa_va;
+ kfree(mqd_sdma_v11);
+ }
+
+ queue->userq_prop = userq_props;
+
+ r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props);
+ if (r) {
+ DRM_ERROR("Failed to initialize MQD for userqueue\n");
+ goto free_mqd;
+ }
+
+ /* Create BO for FW operations */
+ r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
+ if (r) {
+ DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
+ goto free_mqd;
+ }
+
+ /* FW expects WPTR BOs to be mapped into GART */
+ r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
+ if (r) {
+ DRM_ERROR("Failed to create WPTR mapping\n");
+ goto free_ctx;
+ }
+
+ return 0;
+
+free_ctx:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+
+free_mqd:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+
+free_props:
+ kfree(userq_props);
+
+ return r;
+}
+
+static void
+mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ kfree(queue->userq_prop);
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+}
+
+static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_suspend_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ signed long timeout = 2100000; /* 2100 ms */
+ u64 fence_gpu_addr;
+ u32 fence_offset;
+ u64 *fence_ptr;
+ int i, r;
+
+ if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+ return 0;
+ r = amdgpu_device_wb_get(adev, &fence_offset);
+ if (r)
+ return r;
+
+ fence_gpu_addr = adev->wb.gpu_addr + (fence_offset * 4);
+ fence_ptr = (u64 *)&adev->wb.wb[fence_offset];
+ *fence_ptr = 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.suspend_fence_addr = fence_gpu_addr;
+ queue_input.suspend_fence_value = 1;
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to suspend gang: %d\n", r);
+ goto out;
+ }
+
+ for (i = 0; i < timeout; i++) {
+ if (*fence_ptr == 1)
+ goto out;
+ udelay(1);
+ }
+ r = -ETIMEDOUT;
+
+out:
+ amdgpu_device_wb_free(adev, fence_offset);
+ return r;
+}
+
+static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_resume_gang_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ if (queue->state == AMDGPU_USERQ_STATE_HUNG)
+ return -EINVAL;
+ if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED)
+ return 0;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input));
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r);
+ return r;
+}
+
+const struct amdgpu_userq_funcs userq_mes_funcs = {
+ .mqd_create = mes_userq_mqd_create,
+ .mqd_destroy = mes_userq_mqd_destroy,
+ .unmap = mes_userq_unmap,
+ .map = mes_userq_map,
+ .detect_and_reset = mes_userq_detect_and_reset,
+ .preempt = mes_userq_preempt,
+ .restore = mes_userq_restore,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
new file mode 100644
index 000000000000..090ae8897770
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef MES_USERQ_H
+#define MES_USERQ_H
+#include "amdgpu_userq.h"
+
+extern const struct amdgpu_userq_funcs userq_mes_funcs;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
deleted file mode 100644
index eb06d749876f..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ /dev/null
@@ -1,1187 +0,0 @@
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <linux/firmware.h>
-#include <linux/module.h>
-#include "amdgpu.h"
-#include "soc15_common.h"
-#include "nv.h"
-#include "gc/gc_10_1_0_offset.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-#include "gc/gc_10_1_0_default.h"
-#include "v10_structs.h"
-#include "mes_api_def.h"
-
-#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid 0x2820
-#define mmCP_MES_IC_OP_CNTL_Sienna_Cichlid_BASE_IDX 1
-#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid 0x4ca1
-#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX 1
-
-MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_mes1.bin");
-
-static int mes_v10_1_hw_fini(void *handle);
-static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev);
-
-#define MES_EOP_SIZE 2048
-
-static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG();
- }
-}
-
-static u64 mes_v10_1_ring_get_rptr(struct amdgpu_ring *ring)
-{
- return *ring->rptr_cpu_addr;
-}
-
-static u64 mes_v10_1_ring_get_wptr(struct amdgpu_ring *ring)
-{
- u64 wptr;
-
- if (ring->use_doorbell)
- wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
- else
- BUG();
- return wptr;
-}
-
-static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
- .type = AMDGPU_RING_TYPE_MES,
- .align_mask = 1,
- .nop = 0,
- .support_64bit_ptrs = true,
- .get_rptr = mes_v10_1_ring_get_rptr,
- .get_wptr = mes_v10_1_ring_get_wptr,
- .set_wptr = mes_v10_1_ring_set_wptr,
- .insert_nop = amdgpu_ring_insert_nop,
-};
-
-static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
- void *pkt, int size,
- int api_status_off)
-{
- int ndw = size / 4;
- signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
- struct MES_API_STATUS *api_status;
- struct amdgpu_device *adev = mes->adev;
- struct amdgpu_ring *ring = &mes->ring;
- unsigned long flags;
-
- BUG_ON(size % 4 != 0);
-
- spin_lock_irqsave(&mes->ring_lock, flags);
- if (amdgpu_ring_alloc(ring, ndw)) {
- spin_unlock_irqrestore(&mes->ring_lock, flags);
- return -ENOMEM;
- }
-
- api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
- api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
- api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
-
- amdgpu_ring_write_multiple(ring, pkt, ndw);
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&mes->ring_lock, flags);
-
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
-
- r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
- adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
-
- while (halt_if_hws_hang)
- schedule();
-
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
-static int convert_to_mes_queue_type(int queue_type)
-{
- if (queue_type == AMDGPU_RING_TYPE_GFX)
- return MES_QUEUE_TYPE_GFX;
- else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
- return MES_QUEUE_TYPE_COMPUTE;
- else if (queue_type == AMDGPU_RING_TYPE_SDMA)
- return MES_QUEUE_TYPE_SDMA;
- else
- BUG();
- return -1;
-}
-
-static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
- struct mes_add_queue_input *input)
-{
- struct amdgpu_device *adev = mes->adev;
- union MESAPI__ADD_QUEUE mes_add_queue_pkt;
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
- uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
-
- memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
-
- mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
- mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_add_queue_pkt.process_id = input->process_id;
- mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
- mes_add_queue_pkt.process_va_start = input->process_va_start;
- mes_add_queue_pkt.process_va_end = input->process_va_end;
- mes_add_queue_pkt.process_quantum = input->process_quantum;
- mes_add_queue_pkt.process_context_addr = input->process_context_addr;
- mes_add_queue_pkt.gang_quantum = input->gang_quantum;
- mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
- mes_add_queue_pkt.inprocess_gang_priority =
- input->inprocess_gang_priority;
- mes_add_queue_pkt.gang_global_priority_level =
- input->gang_global_priority_level;
- mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_add_queue_pkt.mqd_addr = input->mqd_addr;
- mes_add_queue_pkt.wptr_addr = input->wptr_addr;
- mes_add_queue_pkt.queue_type =
- convert_to_mes_queue_type(input->queue_type);
- mes_add_queue_pkt.paging = input->paging;
- mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
- mes_add_queue_pkt.gws_base = input->gws_base;
- mes_add_queue_pkt.gws_size = input->gws_size;
- mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
- offsetof(union MESAPI__ADD_QUEUE, api_status));
-}
-
-static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
- struct mes_remove_queue_input *input)
-{
- union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
-
- memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
-
- mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
- mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
- offsetof(union MESAPI__REMOVE_QUEUE, api_status));
-}
-
-static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
- struct mes_unmap_legacy_queue_input *input)
-{
- union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
-
- memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
-
- mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
- mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_remove_queue_pkt.gang_context_addr = 0;
-
- mes_remove_queue_pkt.pipe_id = input->pipe_id;
- mes_remove_queue_pkt.queue_id = input->queue_id;
-
- if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
- mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
- mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
- mes_remove_queue_pkt.tf_data =
- lower_32_bits(input->trail_fence_data);
- } else {
- if (input->queue_type == AMDGPU_RING_TYPE_GFX)
- mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
- else
- mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
- }
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
- offsetof(union MESAPI__REMOVE_QUEUE, api_status));
-}
-
-static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
- struct mes_suspend_gang_input *input)
-{
- return 0;
-}
-
-static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
- struct mes_resume_gang_input *input)
-{
- return 0;
-}
-
-static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
-{
- union MESAPI__QUERY_MES_STATUS mes_status_pkt;
-
- memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
-
- mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
- mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_status_pkt, sizeof(mes_status_pkt),
- offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
-}
-
-static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
-{
- int i;
- struct amdgpu_device *adev = mes->adev;
- union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
-
- memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
-
- mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
- mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
- mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
- mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
- mes_set_hw_res_pkt.paging_vmid = 0;
- mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
- mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
- mes->query_status_fence_gpu_addr;
-
- for (i = 0; i < MAX_COMPUTE_PIPES; i++)
- mes_set_hw_res_pkt.compute_hqd_mask[i] =
- mes->compute_hqd_mask[i];
-
- for (i = 0; i < MAX_GFX_PIPES; i++)
- mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
-
- for (i = 0; i < MAX_SDMA_PIPES; i++)
- mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
-
- for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
- mes_set_hw_res_pkt.aggregated_doorbells[i] =
- mes->aggregated_doorbells[i];
-
- for (i = 0; i < 5; i++) {
- mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
- mes_set_hw_res_pkt.mmhub_base[i] =
- adev->reg_offset[MMHUB_HWIP][0][i];
- mes_set_hw_res_pkt.osssys_base[i] =
- adev->reg_offset[OSSSYS_HWIP][0][i];
- }
-
- mes_set_hw_res_pkt.disable_reset = 1;
- mes_set_hw_res_pkt.disable_mes_log = 1;
- mes_set_hw_res_pkt.use_different_vmid_compute = 1;
-
- return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
- offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
-}
-
-static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes)
-{
- struct amdgpu_device *adev = mes->adev;
- uint32_t data;
-
- data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1);
- data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
- CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data);
-
- data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2);
- data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
- CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data);
-
- data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3);
- data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
- CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data);
-
- data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4);
- data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
- CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data);
-
- data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5);
- data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
- CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data);
-
- data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
- WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data);
-}
-
-static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
- .add_hw_queue = mes_v10_1_add_hw_queue,
- .remove_hw_queue = mes_v10_1_remove_hw_queue,
- .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
- .suspend_gang = mes_v10_1_suspend_gang,
- .resume_gang = mes_v10_1_resume_gang,
-};
-
-static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
-{
- int r;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- const __le32 *fw_data;
- unsigned fw_size;
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)
- adev->mes.fw[pipe]->data;
-
- fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
- le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
- fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
-
- r = amdgpu_bo_create_reserved(adev, fw_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.ucode_fw_obj[pipe],
- &adev->mes.ucode_fw_gpu_addr[pipe],
- (void **)&adev->mes.ucode_fw_ptr[pipe]);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
- return r;
- }
-
- memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
-
- amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
- amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
-{
- int r;
- const struct mes_firmware_header_v1_0 *mes_hdr;
- const __le32 *fw_data;
- unsigned fw_size;
-
- mes_hdr = (const struct mes_firmware_header_v1_0 *)
- adev->mes.fw[pipe]->data;
-
- fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
- le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
- fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
-
- r = amdgpu_bo_create_reserved(adev, fw_size,
- 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.data_fw_obj[pipe],
- &adev->mes.data_fw_gpu_addr[pipe],
- (void **)&adev->mes.data_fw_ptr[pipe]);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
- return r;
- }
-
- memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
-
- amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
- amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
-
- return 0;
-}
-
-static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
-{
- amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
- &adev->mes.data_fw_gpu_addr[pipe],
- (void **)&adev->mes.data_fw_ptr[pipe]);
-
- amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
- &adev->mes.ucode_fw_gpu_addr[pipe],
- (void **)&adev->mes.ucode_fw_ptr[pipe]);
-}
-
-static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
-{
- uint32_t pipe, data = 0;
-
- if (enable) {
- data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL,
- MES_PIPE1_RESET, adev->enable_mes_kiq ? 1 : 0);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
-
- mutex_lock(&adev->srbm_mutex);
- for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- if (!adev->enable_mes_kiq &&
- pipe == AMDGPU_MES_KIQ_PIPE)
- continue;
-
- nv_grbm_select(adev, 3, pipe, 0, 0);
- WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
- (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
- }
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-
- /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
- data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
- data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
- BYPASS_UNCACHED, 0);
- WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
-
- /* unhalt MES and activate pipe0 */
- data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE,
- adev->enable_mes_kiq ? 1 : 0);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
- udelay(100);
- } else {
- data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
- data = REG_SET_FIELD(data, CP_MES_CNTL,
- MES_INVALIDATE_ICACHE, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET,
- adev->enable_mes_kiq ? 1 : 0);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
- WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
- }
-}
-
-/* This function is for backdoor MES firmware */
-static int mes_v10_1_load_microcode(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
-{
- int r;
- uint32_t data;
-
- mes_v10_1_enable(adev, false);
-
- if (!adev->mes.fw[pipe])
- return -EINVAL;
-
- r = mes_v10_1_allocate_ucode_buffer(adev, pipe);
- if (r)
- return r;
-
- r = mes_v10_1_allocate_ucode_data_buffer(adev, pipe);
- if (r) {
- mes_v10_1_free_ucode_buffers(adev, pipe);
- return r;
- }
-
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
-
- mutex_lock(&adev->srbm_mutex);
- /* me=3, pipe=0, queue=0 */
- nv_grbm_select(adev, 3, pipe, 0, 0);
-
- /* set ucode start address */
- WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
- (uint32_t)(adev->mes.uc_start_addr[pipe]) >> 2);
-
- /* set ucode fimrware address */
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
- lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
- WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
- upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
-
- /* set ucode instruction cache boundary to 2M-1 */
- WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
-
- /* set ucode data firmware address */
- WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
- lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
- WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
- upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
-
- /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
- WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
-
- /* invalidate ICACHE */
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 3, 0):
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
- break;
- default:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
- break;
- }
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 3, 0):
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
- break;
- }
-
- /* prime the ICACHE. */
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 3, 0):
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid);
- break;
- default:
- data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
- break;
- }
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 3, 0):
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
- break;
- }
-
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-
- return 0;
-}
-
-static int mes_v10_1_allocate_eop_buf(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
-{
- int r;
- u32 *eop;
-
- r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->mes.eop_gpu_obj[pipe],
- &adev->mes.eop_gpu_addr[pipe],
- (void **)&eop);
- if (r) {
- dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
- return r;
- }
-
- memset(eop, 0, adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
-
- amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
- amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
-
- return 0;
-}
-
-static int mes_v10_1_mqd_init(struct amdgpu_ring *ring)
-{
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
- uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
- uint32_t tmp;
-
- memset(mqd, 0, sizeof(*mqd));
-
- mqd->header = 0xC0310800;
- mqd->compute_pipelinestat_enable = 0x00000001;
- mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
- mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
- mqd->compute_misc_reserved = 0x00000003;
-
- eop_base_addr = ring->eop_gpu_addr >> 8;
-
- /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = mmCP_HQD_EOP_CONTROL_DEFAULT;
- tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
- (order_base_2(MES_EOP_SIZE / 4) - 1));
-
- mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
- mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
- mqd->cp_hqd_eop_control = tmp;
-
- /* disable the queue if it's active */
- ring->wptr = 0;
- mqd->cp_hqd_pq_rptr = 0;
- mqd->cp_hqd_pq_wptr_lo = 0;
- mqd->cp_hqd_pq_wptr_hi = 0;
-
- /* set the pointer to the MQD */
- mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
- mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
-
- /* set MQD vmid to 0 */
- tmp = mmCP_MQD_CONTROL_DEFAULT;
- tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
- mqd->cp_mqd_control = tmp;
-
- /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- hqd_gpu_addr = ring->gpu_addr >> 8;
- mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
- mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-
- /* set the wb address whether it's enabled or not */
- wb_gpu_addr = ring->rptr_gpu_addr;
- mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
- mqd->cp_hqd_pq_rptr_report_addr_hi =
- upper_32_bits(wb_gpu_addr) & 0xffff;
-
- /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = ring->wptr_gpu_addr;
- mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
- mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-
- /* set up the HQD, this is similar to CP_RB0_CNTL */
- tmp = mmCP_HQD_PQ_CONTROL_DEFAULT;
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
- (order_base_2(ring->ring_size / 4) - 1));
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
- ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
-#ifdef __BIG_ENDIAN
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
-#endif
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
- mqd->cp_hqd_pq_control = tmp;
-
- /* enable doorbell? */
- tmp = 0;
- if (ring->use_doorbell) {
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_OFFSET, ring->doorbell_index);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 1);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_SOURCE, 0);
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_HIT, 0);
- }
- else
- tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 0);
- mqd->cp_hqd_pq_doorbell_control = tmp;
-
- mqd->cp_hqd_vmid = 0;
- /* activate the queue */
- mqd->cp_hqd_active = 1;
- mqd->cp_hqd_persistent_state = mmCP_HQD_PERSISTENT_STATE_DEFAULT;
- mqd->cp_hqd_ib_control = mmCP_HQD_IB_CONTROL_DEFAULT;
- mqd->cp_hqd_iq_timer = mmCP_HQD_IQ_TIMER_DEFAULT;
- mqd->cp_hqd_quantum = mmCP_HQD_QUANTUM_DEFAULT;
-
- tmp = mmCP_HQD_GFX_CONTROL_DEFAULT;
- tmp = REG_SET_FIELD(tmp, CP_HQD_GFX_CONTROL, DB_UPDATED_MSG_EN, 1);
- /* offset: 184 - this is used for CP_HQD_GFX_CONTROL */
- mqd->cp_hqd_suspend_cntl_stack_offset = tmp;
-
- amdgpu_device_flush_hdp(ring->adev, NULL);
- return 0;
-}
-
-#if 0
-static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring)
-{
- struct v10_compute_mqd *mqd = ring->mqd_ptr;
- struct amdgpu_device *adev = ring->adev;
- uint32_t data = 0;
-
- mutex_lock(&adev->srbm_mutex);
- nv_grbm_select(adev, 3, ring->pipe, 0, 0);
-
- /* set CP_HQD_VMID.VMID = 0. */
- data = RREG32_SOC15(GC, 0, mmCP_HQD_VMID);
- data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_VMID, data);
-
- /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
- data = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
- data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
- DOORBELL_EN, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
-
- /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
- /* set CP_MQD_CONTROL.VMID=0 */
- data = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
- data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
- WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 0);
-
- /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-
- /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
- mqd->cp_hqd_pq_rptr_report_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
- mqd->cp_hqd_pq_rptr_report_addr_hi);
-
- /* set CP_HQD_PQ_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-
- /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
- mqd->cp_hqd_pq_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
- mqd->cp_hqd_pq_wptr_poll_addr_hi);
-
- /* set CP_HQD_PQ_DOORBELL_CONTROL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
- mqd->cp_hqd_pq_doorbell_control);
-
- /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
- WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
-
- /* set CP_HQD_ACTIVE.ACTIVE=1 */
- WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
-
- nv_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-}
-#endif
-
-static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev)
-{
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
- int r;
-
- if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
- return -EINVAL;
-
- r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
-
- return amdgpu_ring_test_helper(kiq_ring);
-}
-
-static int mes_v10_1_queue_init(struct amdgpu_device *adev)
-{
- int r;
-
- r = mes_v10_1_mqd_init(&adev->mes.ring);
- if (r)
- return r;
-
- r = mes_v10_1_kiq_enable_queue(adev);
- if (r)
- return r;
-
- return 0;
-}
-
-static int mes_v10_1_ring_init(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring;
-
- ring = &adev->mes.ring;
-
- ring->funcs = &mes_v10_1_ring_funcs;
-
- ring->me = 3;
- ring->pipe = 0;
- ring->queue = 0;
-
- ring->ring_obj = NULL;
- ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
- ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE];
- ring->no_scheduler = true;
- sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-
- return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
-}
-
-static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring;
-
- spin_lock_init(&adev->gfx.kiq[0].ring_lock);
-
- ring = &adev->gfx.kiq[0].ring;
-
- ring->me = 3;
- ring->pipe = 1;
- ring->queue = 0;
-
- ring->adev = NULL;
- ring->ring_obj = NULL;
- ring->use_doorbell = true;
- ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
- ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
- ring->no_scheduler = true;
- sprintf(ring->name, "mes_kiq_%d.%d.%d",
- ring->me, ring->pipe, ring->queue);
-
- return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
-}
-
-static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
-{
- int r, mqd_size = sizeof(struct v10_compute_mqd);
- struct amdgpu_ring *ring;
-
- if (pipe == AMDGPU_MES_KIQ_PIPE)
- ring = &adev->gfx.kiq[0].ring;
- else if (pipe == AMDGPU_MES_SCHED_PIPE)
- ring = &adev->mes.ring;
- else
- BUG();
-
- if (ring->mqd_obj)
- return 0;
-
- r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
- &ring->mqd_gpu_addr, &ring->mqd_ptr);
- if (r) {
- dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
- return r;
- }
- memset(ring->mqd_ptr, 0, mqd_size);
-
- /* prepare MQD backup */
- adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
- if (!adev->mes.mqd_backup[pipe]) {
- dev_warn(adev->dev,
- "no memory to create MQD backup for ring %s\n",
- ring->name);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int mes_v10_1_sw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int pipe, r;
-
- adev->mes.funcs = &mes_v10_1_funcs;
- adev->mes.kiq_hw_init = &mes_v10_1_kiq_hw_init;
-
- r = amdgpu_mes_init(adev);
- if (r)
- return r;
-
- for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
- continue;
-
- r = mes_v10_1_allocate_eop_buf(adev, pipe);
- if (r)
- return r;
-
- r = mes_v10_1_mqd_sw_init(adev, pipe);
- if (r)
- return r;
- }
-
- if (adev->enable_mes_kiq) {
- r = mes_v10_1_kiq_ring_init(adev);
- if (r)
- return r;
- }
-
- r = mes_v10_1_ring_init(adev);
- if (r)
- return r;
-
- return 0;
-}
-
-static int mes_v10_1_sw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int pipe;
-
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
-
- for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- kfree(adev->mes.mqd_backup[pipe]);
-
- amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
- &adev->mes.eop_gpu_addr[pipe],
- NULL);
- amdgpu_ucode_release(&adev->mes.fw[pipe]);
- }
-
- amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
- &adev->gfx.kiq[0].ring.mqd_gpu_addr,
- &adev->gfx.kiq[0].ring.mqd_ptr);
-
- amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
- &adev->mes.ring.mqd_gpu_addr,
- &adev->mes.ring.mqd_ptr);
-
- amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
- amdgpu_ring_fini(&adev->mes.ring);
-
- amdgpu_mes_fini(adev);
- return 0;
-}
-
-static void mes_v10_1_kiq_setting(struct amdgpu_ring *ring)
-{
- uint32_t tmp;
- struct amdgpu_device *adev = ring->adev;
-
- /* tell RLC which is KIQ queue */
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(10, 3, 0):
- case IP_VERSION(10, 3, 2):
- case IP_VERSION(10, 3, 1):
- case IP_VERSION(10, 3, 4):
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
- tmp &= 0xffffff00;
- tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- break;
- default:
- tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
- tmp &= 0xffffff00;
- tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- break;
- }
-}
-
-static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev)
-{
- int r = 0;
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- r = mes_v10_1_load_microcode(adev, AMDGPU_MES_KIQ_PIPE);
- if (r) {
- DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
- return r;
- }
-
- r = mes_v10_1_load_microcode(adev, AMDGPU_MES_SCHED_PIPE);
- if (r) {
- DRM_ERROR("failed to load MES fw, r=%d\n", r);
- return r;
- }
- }
-
- mes_v10_1_enable(adev, true);
-
- mes_v10_1_kiq_setting(&adev->gfx.kiq[0].ring);
-
- r = mes_v10_1_queue_init(adev);
- if (r)
- goto failure;
-
- return r;
-
-failure:
- mes_v10_1_hw_fini(adev);
- return r;
-}
-
-static int mes_v10_1_hw_init(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->enable_mes_kiq) {
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- r = mes_v10_1_load_microcode(adev,
- AMDGPU_MES_SCHED_PIPE);
- if (r) {
- DRM_ERROR("failed to MES fw, r=%d\n", r);
- return r;
- }
- }
-
- mes_v10_1_enable(adev, true);
- }
-
- r = mes_v10_1_queue_init(adev);
- if (r)
- goto failure;
-
- r = mes_v10_1_set_hw_resources(&adev->mes);
- if (r)
- goto failure;
-
- mes_v10_1_init_aggregated_doorbell(&adev->mes);
-
- r = mes_v10_1_query_sched_status(&adev->mes);
- if (r) {
- DRM_ERROR("MES is busy\n");
- goto failure;
- }
-
- /*
- * Disable KIQ ring usage from the driver once MES is enabled.
- * MES uses KIQ ring exclusively so driver cannot access KIQ ring
- * with MES enabled.
- */
- adev->gfx.kiq[0].ring.sched.ready = false;
- adev->mes.ring.sched.ready = true;
-
- return 0;
-
-failure:
- mes_v10_1_hw_fini(adev);
- return r;
-}
-
-static int mes_v10_1_hw_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->mes.ring.sched.ready = false;
-
- mes_v10_1_enable(adev, false);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
- mes_v10_1_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
- }
-
- return 0;
-}
-
-static int mes_v10_1_suspend(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_mes_suspend(adev);
- if (r)
- return r;
-
- return mes_v10_1_hw_fini(adev);
-}
-
-static int mes_v10_1_resume(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = mes_v10_1_hw_init(adev);
- if (r)
- return r;
-
- return amdgpu_mes_resume(adev);
-}
-
-static int mes_v10_0_early_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int pipe, r;
-
- for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
- if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
- continue;
- r = amdgpu_mes_init_microcode(adev, pipe);
- if (r)
- return r;
- }
-
- return 0;
-}
-
-static int mes_v10_0_late_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!amdgpu_in_reset(adev))
- amdgpu_mes_self_test(adev);
-
- return 0;
-}
-
-static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
- .name = "mes_v10_1",
- .early_init = mes_v10_0_early_init,
- .late_init = mes_v10_0_late_init,
- .sw_init = mes_v10_1_sw_init,
- .sw_fini = mes_v10_1_sw_fini,
- .hw_init = mes_v10_1_hw_init,
- .hw_fini = mes_v10_1_hw_fini,
- .suspend = mes_v10_1_suspend,
- .resume = mes_v10_1_resume,
-};
-
-const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
- .type = AMD_IP_BLOCK_TYPE_MES,
- .major = 10,
- .minor = 1,
- .rev = 0,
- .funcs = &mes_v10_1_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 6827d547042e..e82188431f79 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "soc15_common.h"
#include "soc21.h"
+#include "gfx_v11_0.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
#include "gc/gc_11_0_0_default.h"
@@ -47,12 +48,25 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin");
-
-static int mes_v11_0_hw_fini(void *handle);
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin");
+
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block);
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
#define MES_EOP_SIZE 2048
+#define GFX_MES_DRAM_SIZE 0x80000
+#define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE)
+
+#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4
static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
{
@@ -94,18 +108,79 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
};
+static const char *mes_v11_0_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "PROGRAM_GDS",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+ "unused",
+ "unused",
+ "SET_HW_RSRC_1",
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+ op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
- int ndw = size / 4;
- signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
- struct MES_API_STATUS *api_status;
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 2100000; /* 2100 ms */
struct amdgpu_device *adev = mes->adev;
- struct amdgpu_ring *ring = &mes->ring;
+ struct amdgpu_ring *ring = &mes->ring[0];
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
unsigned long flags;
- signed long timeout = adev->usec_timeout;
+ u64 status_gpu_addr;
+ u32 seq, status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
if (amdgpu_emu_mode) {
timeout *= 100;
@@ -113,37 +188,92 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
timeout = 15 * 600 * 1000;
}
- BUG_ON(size % 4 != 0);
- spin_lock_irqsave(&mes->ring_lock, flags);
- if (amdgpu_ring_alloc(ring, ndw)) {
- spin_unlock_irqrestore(&mes->ring_lock, flags);
- return -ENOMEM;
- }
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
+
+ spin_lock_irqsave(&mes->ring_lock[0], flags);
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
- api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
- api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
- amdgpu_ring_write_multiple(ring, pkt, ndw);
amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&mes->ring_lock, flags);
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+ op_str = mes_v11_0_get_op_string(x_pkt);
+ misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
- r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
- timeout);
- if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
+ misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+ else
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n",
+ x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
+ if (r < 1 || !*status_ptr) {
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+ op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+ op_str);
+ else
+ dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+ x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
- return -ETIMEDOUT;
+ r = -ETIMEDOUT;
+ goto error_wb_free;
}
+ amdgpu_device_wb_free(adev, status_offset);
return 0;
+
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
+error_unlock_free:
+ spin_unlock_irqrestore(&mes->ring_lock[0], flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
}
static int convert_to_mes_queue_type(int queue_type)
@@ -159,6 +289,23 @@ static int convert_to_mes_queue_type(int queue_type)
return -1;
}
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
struct mes_add_queue_input *input)
{
@@ -182,9 +329,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gang_quantum = input->gang_quantum;
mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_add_queue_pkt.inprocess_gang_priority =
- input->inprocess_gang_priority;
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
mes_add_queue_pkt.gang_global_priority_level =
- input->gang_global_priority_level;
+ convert_to_mes_priority_level(input->gang_global_priority_level);
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
@@ -236,6 +383,125 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
+static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v11_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v11_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.pipe_id = input->pipe_id;
+ mes_add_queue_pkt.queue_id = input->queue_id;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.map_legacy_kq = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input)
{
@@ -272,13 +538,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input)
{
- return 0;
+ union MESAPI__SUSPEND mes_suspend_gang_pkt;
+
+ memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
+
+ mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
+ mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
+ mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
+ mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
+ mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
+ offsetof(union MESAPI__SUSPEND, api_status));
}
static int mes_v11_0_resume_gang(struct amdgpu_mes *mes,
struct mes_resume_gang_input *input)
{
- return 0;
+ union MESAPI__RESUME mes_resume_gang_pkt;
+
+ memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
+
+ mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
+ mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
+ mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
+ offsetof(union MESAPI__RESUME, api_status));
}
static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
@@ -347,6 +641,19 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) {
+ dev_warn_once(mes->adev->dev,
+ "MES FW version must be larger than 0x63 to support limit single process feature.\n");
+ return 0;
+ }
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
default:
DRM_ERROR("unsupported misc op (%d) \n", input->op);
return -EINVAL;
@@ -373,16 +680,17 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
mes_set_hw_res_pkt.paging_vmid = 0;
- mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr;
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0];
mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
- mes->query_status_fence_gpu_addr;
+ mes->query_status_fence_gpu_addr[0];
for (i = 0; i < MAX_COMPUTE_PIPES; i++)
mes_set_hw_res_pkt.compute_hqd_mask[i] =
mes->compute_hqd_mask[i];
for (i = 0; i < MAX_GFX_PIPES; i++)
- mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
for (i = 0; i < MAX_SDMA_PIPES; i++)
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
@@ -403,78 +711,127 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(mes->adev->dev,
+ "MES FW version must be >= 0x7f to enable LR compute workaround.\n");
+
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr;
+ }
+
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
}
-static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
+static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
{
- struct amdgpu_device *adev = mes->adev;
- uint32_t data;
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt;
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
+
+ mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0];
+ if (amdgpu_sriov_is_mes_info_enable(mes->adev)) {
+ mes_set_hw_res_pkt.mes_info_ctx_mc_addr =
+ mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE;
+ mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
+static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ if (input->use_mmio)
+ return mes_v11_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
- data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
- data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
- CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
-
- data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
- data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
- CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
-
- data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
- data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
- CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
-
- data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
- data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
- CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
-
- data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
- data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
- CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
- CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
- data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
- CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
- data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
- WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
-
- data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
- WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
+ if (input->legacy_gfx) {
+ mes_reset_queue_pkt.reset_legacy_gfx = 1;
+ mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
+ mes_reset_queue_pkt.queue_id_lp = input->queue_id;
+ mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
+ mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
+ mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
+ mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+ } else {
+ mes_reset_queue_pkt.reset_queue_only = 1;
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ }
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_reset_queue_pkt.doorbell_offset_addr =
+ mes->hung_queue_db_array_gpu_addr;
+
+ if (input->detect_only)
+ mes_reset_queue_pkt.hang_detect_only = 1;
+ else
+ mes_reset_queue_pkt.hang_detect_then_reset = 1;
+
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
}
static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
.add_hw_queue = mes_v11_0_add_hw_queue,
.remove_hw_queue = mes_v11_0_remove_hw_queue,
+ .map_legacy_queue = mes_v11_0_map_legacy_queue,
.unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
.suspend_gang = mes_v11_0_suspend_gang,
.resume_gang = mes_v11_0_resume_gang,
.misc_op = mes_v11_0_misc_op,
+ .reset_hw_queue = mes_v11_0_reset_hw_queue,
+ .detect_and_reset_hung_queues = mes_v11_0_detect_and_reset_hung_queues,
};
static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -509,7 +866,7 @@ static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
}
static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -523,7 +880,13 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
- r = amdgpu_bo_create_reserved(adev, fw_size,
+ if (fw_size > GFX_MES_DRAM_SIZE) {
+ dev_err(adev->dev, "PIPE%d ucode data fw size (%d) is greater than dram size (%d)\n",
+ pipe, fw_size, GFX_MES_DRAM_SIZE);
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_create_reserved(adev, GFX_MES_DRAM_SIZE,
64 * 1024,
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
@@ -544,7 +907,7 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
}
static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
&adev->mes.data_fw_gpu_addr[pipe],
@@ -555,12 +918,48 @@ static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
(void **)&adev->mes.ucode_fw_ptr[pipe]);
}
+static void mes_v11_0_get_fw_version(struct amdgpu_device *adev)
+{
+ int pipe;
+
+ /* return early if we have already fetched these */
+ if (adev->mes.sched_version && adev->mes.kiq_version)
+ return;
+
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version =
+ RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version =
+ RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
{
uint64_t ucode_addr;
uint32_t pipe, data = 0;
if (enable) {
+ if (amdgpu_mes_log_enable) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+
data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL,
@@ -593,7 +992,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
if (amdgpu_emu_mode)
msleep(100);
else
- udelay(50);
+ udelay(500);
} else {
data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
@@ -610,7 +1009,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
/* This function is for backdoor MES firmware */
static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe, bool prime_icache)
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
{
int r;
uint32_t data;
@@ -659,8 +1058,8 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
- /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
- WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF);
+ /* Set 0x7FFFF (512K-1) to CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
if (prime_icache) {
/* invalidate ICACHE */
@@ -682,7 +1081,7 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
}
static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
u32 *eop;
@@ -887,13 +1286,13 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
return r;
}
- kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring);
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
return amdgpu_ring_test_helper(kiq_ring);
}
static int mes_v11_0_queue_init(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
struct amdgpu_ring *ring;
int r;
@@ -901,7 +1300,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
if (pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
- ring = &adev->mes.ring;
+ ring = &adev->mes.ring[0];
else
BUG();
@@ -924,18 +1323,6 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
mes_v11_0_queue_init_register(ring);
}
- /* get MES scheduler/KIQ versions */
- mutex_lock(&adev->srbm_mutex);
- soc21_grbm_select(adev, 3, pipe, 0, 0);
-
- if (pipe == AMDGPU_MES_SCHED_PIPE)
- adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
- else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
- adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
-
- soc21_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
-
return 0;
}
@@ -943,7 +1330,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- ring = &adev->mes.ring;
+ ring = &adev->mes.ring[0];
ring->funcs = &mes_v11_0_ring_funcs;
@@ -988,7 +1375,7 @@ static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
}
static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r, mqd_size = sizeof(struct v11_compute_mqd);
struct amdgpu_ring *ring;
@@ -996,7 +1383,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
if (pipe == AMDGPU_MES_KIQ_PIPE)
ring = &adev->gfx.kiq[0].ring;
else if (pipe == AMDGPU_MES_SCHED_PIPE)
- ring = &adev->mes.ring;
+ ring = &adev->mes.ring[0];
else
BUG();
@@ -1026,15 +1413,17 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
return 0;
}
-static int mes_v11_0_sw_init(void *handle)
+static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int pipe, r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r, bo_size;
adev->mes.funcs = &mes_v11_0_funcs;
adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
+ adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
+
r = amdgpu_mes_init(adev);
if (r)
return r;
@@ -1062,16 +1451,33 @@ static int mes_v11_0_sw_init(void *handle)
if (r)
return r;
+ bo_size = AMDGPU_GPU_PAGE_SIZE;
+ if (amdgpu_sriov_is_mes_info_enable(adev))
+ bo_size += MES11_HW_RESOURCE_1_SIZE;
+
+ /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/
+ r = amdgpu_bo_create_kernel(adev,
+ bo_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[0],
+ &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
+ return r;
+ }
+
return 0;
}
-static int mes_v11_0_sw_fini(void *handle)
+static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int pipe;
- amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
- amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
kfree(adev->mes.mqd_backup[pipe]);
@@ -1086,12 +1492,12 @@ static int mes_v11_0_sw_fini(void *handle)
&adev->gfx.kiq[0].ring.mqd_gpu_addr,
&adev->gfx.kiq[0].ring.mqd_ptr);
- amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj,
- &adev->mes.ring.mqd_gpu_addr,
- &adev->mes.ring.mqd_ptr);
+ amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj,
+ &adev->mes.ring[0].mqd_gpu_addr,
+ &adev->mes.ring[0].mqd_ptr);
amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
- amdgpu_ring_fini(&adev->mes.ring);
+ amdgpu_ring_fini(&adev->mes.ring[0]);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
@@ -1146,9 +1552,7 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
@@ -1164,6 +1568,7 @@ static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
@@ -1183,24 +1588,43 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
mes_v11_0_enable(adev, true);
+ mes_v11_0_get_fw_version(adev);
+
mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
if (r)
goto failure;
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47)
+ adev->mes.enable_legacy_queue_map = true;
+ else
+ adev->mes.enable_legacy_queue_map = false;
+
+ if (adev->mes.enable_legacy_queue_map) {
+ r = mes_v11_0_hw_init(ip_block);
+ if (r)
+ goto failure;
+ }
+
return r;
failure:
- mes_v11_0_hw_fini(adev);
+ mes_v11_0_hw_fini(ip_block);
return r;
}
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
{
- if (adev->mes.ring.sched.ready) {
- mes_v11_0_kiq_dequeue(&adev->mes.ring);
- adev->mes.ring.sched.ready = false;
+ if (adev->mes.ring[0].sched.ready) {
+ mes_v11_0_kiq_dequeue(&adev->mes.ring[0]);
+ adev->mes.ring[0].sched.ready = false;
}
if (amdgpu_sriov_vf(adev)) {
@@ -1213,10 +1637,13 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
return 0;
}
-static int mes_v11_0_hw_init(void *handle)
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->mes.ring[0].sched.ready)
+ goto out;
if (!adev->enable_mes_kiq) {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
@@ -1239,7 +1666,13 @@ static int mes_v11_0_hw_init(void *handle)
if (r)
goto failure;
- mes_v11_0_init_aggregated_doorbell(&adev->mes);
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) {
+ r = mes_v11_0_set_hw_resources_1(&adev->mes);
+ if (r) {
+ DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
+ goto failure;
+ }
+ }
r = mes_v11_0_query_sched_status(&adev->mes);
if (r) {
@@ -1247,55 +1680,48 @@ static int mes_v11_0_hw_init(void *handle)
goto failure;
}
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
+out:
/*
* Disable KIQ ring usage from the driver once MES is enabled.
* MES uses KIQ ring exclusively so driver cannot access KIQ ring
* with MES enabled.
*/
adev->gfx.kiq[0].ring.sched.ready = false;
- adev->mes.ring.sched.ready = true;
+ adev->mes.ring[0].sched.ready = true;
return 0;
failure:
- mes_v11_0_hw_fini(adev);
+ mes_v11_0_hw_fini(ip_block);
return r;
}
-static int mes_v11_0_hw_fini(void *handle)
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int mes_v11_0_suspend(void *handle)
+static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_mes_suspend(adev);
- if (r)
- return r;
-
- return mes_v11_0_hw_fini(adev);
+ return mes_v11_0_hw_fini(ip_block);
}
-static int mes_v11_0_resume(void *handle)
+static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = mes_v11_0_hw_init(adev);
- if (r)
- return r;
-
- return amdgpu_mes_resume(adev);
+ return mes_v11_0_hw_init(ip_block);
}
-static int mes_v11_0_early_init(void *handle)
+static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int pipe, r;
+ adev->mes.hung_queue_db_array_size =
+ MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
continue;
@@ -1307,22 +1733,10 @@ static int mes_v11_0_early_init(void *handle)
return 0;
}
-static int mes_v11_0_late_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- /* it's only intended for use in mes_self_test case, not for s0ix and reset */
- if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend &&
- (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))
- amdgpu_mes_self_test(adev);
-
- return 0;
-}
-
static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
.name = "mes_v11_0",
.early_init = mes_v11_0_early_init,
- .late_init = mes_v11_0_late_init,
+ .late_init = NULL,
.sw_init = mes_v11_0_sw_init,
.sw_fini = mes_v11_0_sw_fini,
.hw_init = mes_v11_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
new file mode 100644
index 000000000000..aff06f06aeee
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -0,0 +1,1931 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "gfx_v12_0.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
+#include "v12_structs.h"
+#include "mes_v12_api_def.h"
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_uni_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin");
+
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block);
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev);
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
+
+#define MES_EOP_SIZE 2048
+
+#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4
+
+static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG();
+ }
+}
+
+static u64 mes_v12_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *ring->rptr_cpu_addr;
+}
+
+static u64 mes_v12_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+ else
+ BUG();
+ return wptr;
+}
+
+static const struct amdgpu_ring_funcs mes_v12_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_MES,
+ .align_mask = 1,
+ .nop = 0,
+ .support_64bit_ptrs = true,
+ .get_rptr = mes_v12_0_ring_get_rptr,
+ .get_wptr = mes_v12_0_ring_get_wptr,
+ .set_wptr = mes_v12_0_ring_set_wptr,
+ .insert_nop = amdgpu_ring_insert_nop,
+};
+
+static const char *mes_v12_0_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "unused",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+ "SET_SE_MODE",
+ "SET_GANG_SUBMIT",
+ "SET_HW_RSRC_1",
+ "INVALIDATE_TLBS",
+};
+
+static const char *mes_v12_0_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+ "NOTIFY_WORK_ON_UNMAPPED_QUEUE",
+ "NOTIFY_TO_UNMAP_PROCESSES",
+};
+
+static const char *mes_v12_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v12_0_opcodes))
+ op_str = mes_v12_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v12_0_misc_opcodes)))
+ op_str = mes_v12_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
+static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
+ int pipe, void *pkt, int size,
+ int api_status_off)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 2100000; /* 2100 ms */
+ struct amdgpu_device *adev = mes->adev;
+ struct amdgpu_ring *ring = &mes->ring[pipe];
+ spinlock_t *ring_lock = &mes->ring_lock[pipe];
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
+ unsigned long flags;
+ u64 status_gpu_addr;
+ u32 seq, status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
+
+ if (amdgpu_emu_mode) {
+ timeout *= 100;
+ } else if (amdgpu_sriov_vf(adev)) {
+ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+ timeout = 15 * 600 * 1000;
+ }
+
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
+
+ spin_lock_irqsave(ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
+
+ seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ goto error_undo;
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value = seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(ring_lock, flags);
+
+ op_str = mes_v12_0_get_op_string(x_pkt);
+ misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
+
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n",
+ pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n",
+ pipe, op_str);
+ else
+ dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n",
+ pipe, x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, seq, timeout);
+ if (r < 1 || !*status_ptr) {
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
+ pipe, op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n",
+ pipe, op_str);
+ else
+ dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n",
+ pipe, x_pkt->header.opcode);
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ r = -ETIMEDOUT;
+ goto error_wb_free;
+ }
+
+ amdgpu_device_wb_free(adev, status_offset);
+ return 0;
+
+error_undo:
+ dev_err(adev->dev, "MES ring buffer is full.\n");
+ amdgpu_ring_undo(ring);
+
+error_unlock_free:
+ spin_unlock_irqrestore(ring_lock, flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+ if (queue_type == AMDGPU_RING_TYPE_GFX)
+ return MES_QUEUE_TYPE_GFX;
+ else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
+ return MES_QUEUE_TYPE_COMPUTE;
+ else if (queue_type == AMDGPU_RING_TYPE_SDMA)
+ return MES_QUEUE_TYPE_SDMA;
+ else if (queue_type == AMDGPU_RING_TYPE_MES)
+ return MES_QUEUE_TYPE_SCHQ;
+ else
+ BUG();
+ return -1;
+}
+
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
+static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.process_id = input->process_id;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
+ mes_add_queue_pkt.process_va_start = input->process_va_start;
+ mes_add_queue_pkt.process_va_end = input->process_va_end;
+ mes_add_queue_pkt.process_quantum = input->process_quantum;
+ mes_add_queue_pkt.process_context_addr = input->process_context_addr;
+ mes_add_queue_pkt.gang_quantum = input->gang_quantum;
+ mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
+ mes_add_queue_pkt.inprocess_gang_priority =
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
+ mes_add_queue_pkt.gang_global_priority_level =
+ convert_to_mes_priority_level(input->gang_global_priority_level);
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
+ mes_add_queue_pkt.tma_addr = input->tma_addr;
+ mes_add_queue_pkt.trap_en = input->trap_en;
+ mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
+ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
+ mes_add_queue_pkt.gds_size = input->queue_size;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes,
+ AMDGPU_MES_SCHED_PIPE,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v12_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v12_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input)
+{
+ union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ int pipe;
+
+ memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
+
+ mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
+ mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_add_queue_pkt.pipe_id = input->pipe_id;
+ mes_add_queue_pkt.queue_id = input->queue_id;
+ mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_add_queue_pkt.mqd_addr = input->mqd_addr;
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+ mes_add_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_add_queue_pkt.map_legacy_kq = 1;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
+}
+
+static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ int pipe;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ }
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+}
+
+static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ union MESAPI__SUSPEND mes_suspend_gang_pkt;
+
+ memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt));
+
+ mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND;
+ mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs;
+ mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr;
+ mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr;
+ mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt),
+ offsetof(union MESAPI__SUSPEND, api_status));
+}
+
+static int mes_v12_0_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ union MESAPI__RESUME mes_resume_gang_pkt;
+
+ memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt));
+
+ mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME;
+ mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs;
+ mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt),
+ offsetof(union MESAPI__RESUME, api_status));
+}
+
+static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe)
+{
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ union MESAPI__MISC misc_pkt;
+ int pipe;
+
+ if (mes->adev->enable_uni_mes)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ break;
+ case MES_MISC_OP_SET_SHADER_DEBUGGER:
+ pipe = AMDGPU_MES_SCHED_PIPE;
+ misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
+ misc_pkt.set_shader_debugger.process_context_addr =
+ input->set_shader_debugger.process_context_addr;
+ misc_pkt.set_shader_debugger.flags.u32all =
+ input->set_shader_debugger.flags.u32all;
+ misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
+ input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
+ memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
+ input->set_shader_debugger.tcp_watch_cntl,
+ sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
+ misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
+ break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
+}
+
+static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
+{
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt;
+
+ memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt));
+
+ mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
+ mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
+ mes->resource_1_gpu_addr[pipe];
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
+static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
+{
+ int i;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES mes_set_hw_res_pkt;
+
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub;
+ mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub;
+ mes_set_hw_res_pkt.gds_size = adev->gds.gds_size;
+ mes_set_hw_res_pkt.paging_vmid = 0;
+
+ for (i = 0; i < MAX_COMPUTE_PIPES; i++)
+ mes_set_hw_res_pkt.compute_hqd_mask[i] =
+ mes->compute_hqd_mask[i];
+
+ for (i = 0; i < MAX_GFX_PIPES; i++)
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
+
+ for (i = 0; i < MAX_SDMA_PIPES; i++)
+ mes_set_hw_res_pkt.sdma_hqd_mask[i] =
+ mes->sdma_hqd_mask[i];
+
+ for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
+ mes->aggregated_doorbells[i];
+ }
+
+ mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr =
+ mes->sch_ctx_gpu_addr[pipe];
+ mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr =
+ mes->query_status_fence_gpu_addr[pipe];
+
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+ mes_set_hw_res_pkt.enable_reg_active_poll = 1;
+ mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82)
+ mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
+ else
+ dev_info_once(adev->dev,
+ "MES FW version must be >= 0x82 to enable LR compute workaround.\n");
+
+ /*
+ * Keep oversubscribe timer for sdma . When we have unmapped doorbell
+ * handling support, other queue will not use the oversubscribe timer.
+ * handling mode - 0: disabled; 1: basic version; 2: basic+ version
+ */
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
+ mes_set_hw_res_pkt.unmapped_doorbell_handling = 1;
+
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr +
+ pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
+ }
+
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v12_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
+}
+
+
+static void mes_v12_0_enable_unmapped_doorbell_handling(
+ struct amdgpu_mes *mes, bool enable)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data = RREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL);
+
+ /*
+ * The default PROC_LSB settng is 0xc which means doorbell
+ * addr[16:12] gives the doorbell page number. For kfd, each
+ * process will use 2 pages of doorbell, we need to change the
+ * setting to 0xd
+ */
+ data &= ~CP_UNMAPPED_DOORBELL__PROC_LSB_MASK;
+ data |= 0xd << CP_UNMAPPED_DOORBELL__PROC_LSB__SHIFT;
+
+ data |= (enable ? 1 : 0) << CP_UNMAPPED_DOORBELL__ENABLE__SHIFT;
+
+ WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
+}
+
+static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+ int pipe;
+
+ if (input->use_mmio)
+ return mes_v12_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+
+ if (input->legacy_gfx) {
+ mes_reset_queue_pkt.reset_legacy_gfx = 1;
+ mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
+ mes_reset_queue_pkt.queue_id_lp = input->queue_id;
+ mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr;
+ mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
+ mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
+ mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+ } else {
+ mes_reset_queue_pkt.reset_queue_only = 1;
+ mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
+ }
+
+ if (input->is_kq)
+ pipe = AMDGPU_MES_KIQ_PIPE;
+ else
+ pipe = AMDGPU_MES_SCHED_PIPE;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes,
+ struct mes_detect_and_reset_queue_input *input)
+{
+ union MESAPI__RESET mes_reset_queue_pkt;
+
+ memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+
+ mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
+ mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_reset_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
+ mes_reset_queue_pkt.doorbell_offset_addr =
+ mes->hung_queue_db_array_gpu_addr;
+
+ if (input->detect_only)
+ mes_reset_queue_pkt.hang_detect_only = 1;
+ else
+ mes_reset_queue_pkt.hang_detect_then_reset = 1;
+
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE,
+ &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
+ offsetof(union MESAPI__RESET, api_status));
+}
+
+static int mes_v12_inv_tlb_convert_hub_id(uint8_t id)
+{
+ /*
+ * MES doesn't support invalidate gc_hub on slave xcc individually
+ * master xcc will invalidate all gc_hub for the partition
+ */
+ if (AMDGPU_IS_GFXHUB(id))
+ return 0;
+ else if (AMDGPU_IS_MMHUB0(id))
+ return 1;
+ else
+ return -EINVAL;
+
+}
+
+static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes,
+ struct mes_inv_tlbs_pasid_input *input)
+{
+ union MESAPI__INV_TLBS mes_inv_tlbs;
+ int ret;
+
+ memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs));
+
+ mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER;
+ mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS;
+ mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_inv_tlbs.invalidate_tlbs.inv_sel = 0;
+ mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type;
+ mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid;
+
+ /*convert amdgpu_mes_hub_id to mes expected hub_id */
+ ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id);
+ if (ret < 0)
+ return -EINVAL;
+ mes_inv_tlbs.invalidate_tlbs.hub_id = ret;
+ return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE,
+ &mes_inv_tlbs, sizeof(mes_inv_tlbs),
+ offsetof(union MESAPI__INV_TLBS, api_status));
+
+}
+
+static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
+ .add_hw_queue = mes_v12_0_add_hw_queue,
+ .remove_hw_queue = mes_v12_0_remove_hw_queue,
+ .map_legacy_queue = mes_v12_0_map_legacy_queue,
+ .unmap_legacy_queue = mes_v12_0_unmap_legacy_queue,
+ .suspend_gang = mes_v12_0_suspend_gang,
+ .resume_gang = mes_v12_0_resume_gang,
+ .misc_op = mes_v12_0_misc_op,
+ .reset_hw_queue = mes_v12_0_reset_hw_queue,
+ .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid,
+ .detect_and_reset_hung_queues = mes_v12_0_detect_and_reset_hung_queues,
+};
+
+static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw[pipe]->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr[pipe], fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj[pipe]);
+
+ return 0;
+}
+
+static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
+ &adev->mes.data_fw_gpu_addr[pipe],
+ (void **)&adev->mes.data_fw_ptr[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj[pipe],
+ &adev->mes.ucode_fw_gpu_addr[pipe],
+ (void **)&adev->mes.ucode_fw_ptr[pipe]);
+}
+
+static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint64_t ucode_addr;
+ uint32_t pipe, data = 0;
+
+ if (enable) {
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+ if (amdgpu_mes_log_enable) {
+ u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
+ /* In case uni mes is not enabled, only program for pipe 0 */
+ if (adev->mes.event_log_size >= (pipe + 1) * log_size) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+ }
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ if (pipe == 0)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ else
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ /* unhalt MES and activate one pipe each loop */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ if (pipe)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
+ dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data);
+
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (amdgpu_emu_mode)
+ msleep(100);
+ else if (adev->enable_uni_mes)
+ udelay(500);
+ else
+ udelay(50);
+ } else {
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+ }
+}
+
+static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
+{
+ uint64_t ucode_addr;
+ int pipe;
+
+ mes_v12_0_enable(adev, false);
+
+ mutex_lock(&adev->srbm_mutex);
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ /* me=3, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ /* set ucode start address */
+ ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
+ lower_32_bits(ucode_addr));
+ WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(ucode_addr));
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ }
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v12_0_load_microcode(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
+{
+ int r;
+ uint32_t data;
+
+ mes_v12_0_enable(adev, false);
+
+ if (!adev->mes.fw[pipe])
+ return -EINVAL;
+
+ r = mes_v12_0_allocate_ucode_buffer(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v12_0_allocate_ucode_data_buffer(adev, pipe);
+ if (r) {
+ mes_v12_0_free_ucode_buffers(adev, pipe);
+ return r;
+ }
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_CNTL, 0);
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr[pipe]));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, regCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+ WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr[pipe]));
+
+ /* Set data cache boundary CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF);
+
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r;
+ u32 *eop;
+
+ r = amdgpu_bo_create_reserved(adev, MES_EOP_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ (void **)&eop);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create EOP bo failed\n", r);
+ return r;
+ }
+
+ memset(eop, 0,
+ adev->mes.eop_gpu_obj[pipe]->tbo.base.size);
+
+ amdgpu_bo_kunmap(adev->mes.eop_gpu_obj[pipe]);
+ amdgpu_bo_unreserve(adev->mes.eop_gpu_obj[pipe]);
+
+ return 0;
+}
+
+static int mes_v12_0_mqd_init(struct amdgpu_ring *ring)
+{
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000007;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(MES_EOP_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = regCP_MQD_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = lower_32_bits(hqd_gpu_addr);
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = ring->rptr_gpu_addr;
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = ring->wptr_gpu_addr;
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffff8;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, NO_UPDATE_RPTR, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* enable doorbell */
+ tmp = 0;
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ mqd->cp_hqd_vmid = 0;
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE,
+ PRELOAD_SIZE, 0x55);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ mqd->cp_hqd_ib_control = regCP_HQD_IB_CONTROL_DEFAULT;
+ mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT;
+ mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT;
+
+ /*
+ * Set CP_HQD_GFX_CONTROL.DB_UPDATED_MSG_EN[15] to enable unmapped
+ * doorbell handling. This is a reserved CP internal register can
+ * not be accesss by others
+ */
+ mqd->reserved_184 = BIT(15);
+
+ return 0;
+}
+
+static void mes_v12_0_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct v12_compute_mqd *mqd = ring->mqd_ptr;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data = 0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, ring->pipe, 0, 0);
+
+ /* set CP_HQD_VMID.VMID = 0. */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_VMID);
+ data = REG_SET_FIELD(data, CP_HQD_VMID, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_VMID, data);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_EN=0 */
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ /* set CP_MQD_BASE_ADDR/HI with the MQD base address */
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set CP_MQD_CONTROL.VMID=0 */
+ data = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ data = REG_SET_FIELD(data, CP_MQD_CONTROL, VMID, 0);
+ WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 0);
+
+ /* set CP_HQD_PQ_BASE/HI with the ring buffer base address */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+
+ /* set CP_HQD_PQ_RPTR_REPORT_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* set CP_HQD_PQ_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+
+ /* set CP_HQD_PQ_WPTR_POLL_ADDR/HI */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* set CP_HQD_PQ_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* set CP_HQD_PERSISTENT_STATE.PRELOAD_SIZE=0x53 */
+ WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+
+ /* set CP_HQD_ACTIVE.ACTIVE=1 */
+ WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, mqd->cp_hqd_active);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+ int r;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("kfq enable failed\n");
+ kiq_ring->sched.ready = false;
+ }
+ return r;
+}
+
+static int mes_v12_0_queue_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else
+ ring = &adev->mes.ring[pipe];
+
+ if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) &&
+ (amdgpu_in_reset(adev) || adev->in_suspend)) {
+ *(ring->wptr_cpu_addr) = 0;
+ *(ring->rptr_cpu_addr) = 0;
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ r = mes_v12_0_mqd_init(ring);
+ if (r)
+ return r;
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE) {
+ if (adev->enable_uni_mes)
+ r = amdgpu_mes_map_legacy_queue(adev, ring);
+ else
+ r = mes_v12_0_kiq_enable_queue(adev);
+ if (r)
+ return r;
+ } else {
+ mes_v12_0_queue_init_register(ring);
+ }
+
+ if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
+ ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
+
+ return 0;
+}
+
+static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe)
+{
+ struct amdgpu_ring *ring;
+
+ ring = &adev->mes.ring[pipe];
+
+ ring->funcs = &mes_v12_0_ring_funcs;
+
+ ring->me = 3;
+ ring->pipe = pipe;
+ ring->queue = 0;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+
+ spin_lock_init(&adev->gfx.kiq[0].ring_lock);
+
+ ring = &adev->gfx.kiq[0].ring;
+
+ ring->me = 3;
+ ring->pipe = 1;
+ ring->queue = 0;
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1;
+ ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_KIQ_PIPE];
+ ring->no_scheduler = true;
+ sprintf(ring->name, "mes_kiq_%d.%d.%d",
+ ring->me, ring->pipe, ring->queue);
+
+ return amdgpu_ring_init(adev, ring, 1024, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
+ enum amdgpu_mes_pipe pipe)
+{
+ int r, mqd_size = sizeof(struct v12_compute_mqd);
+ struct amdgpu_ring *ring;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ ring = &adev->gfx.kiq[0].ring;
+ else
+ ring = &adev->mes.ring[pipe];
+
+ if (ring->mqd_obj)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ memset(ring->mqd_ptr, 0, mqd_size);
+
+ /* prepare MQD backup */
+ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->mes.mqd_backup[pipe])
+ dev_warn(adev->dev,
+ "no memory to create MQD backup for ring %s\n",
+ ring->name);
+
+ return 0;
+}
+
+static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.funcs = &mes_v12_0_funcs;
+ adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init;
+ adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
+ adev->mes.enable_legacy_queue_map = true;
+
+ adev->mes.event_log_size = adev->enable_uni_mes ?
+ (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) :
+ (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
+ r = amdgpu_mes_init(adev);
+ if (r)
+ return r;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = mes_v12_0_allocate_eop_buf(adev, pipe);
+ if (r)
+ return r;
+
+ r = mes_v12_0_mqd_sw_init(adev, pipe);
+ if (r)
+ return r;
+
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) {
+ r = mes_v12_0_kiq_ring_init(adev);
+ }
+ else {
+ r = mes_v12_0_ring_init(adev, pipe);
+ if (r)
+ return r;
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe;
+
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+
+ kfree(adev->mes.mqd_backup[pipe]);
+
+ amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
+ &adev->mes.eop_gpu_addr[pipe],
+ NULL);
+ amdgpu_ucode_release(&adev->mes.fw[pipe]);
+
+ if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) {
+ amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj,
+ &adev->mes.ring[pipe].mqd_gpu_addr,
+ &adev->mes.ring[pipe].mqd_ptr);
+ amdgpu_ring_fini(&adev->mes.ring[pipe]);
+ }
+ }
+
+ if (!adev->enable_uni_mes) {
+ amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj,
+ &adev->gfx.kiq[0].ring.mqd_gpu_addr,
+ &adev->gfx.kiq[0].ring.mqd_ptr);
+ amdgpu_ring_fini(&adev->gfx.kiq[0].ring);
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE);
+ mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_SCHED_PIPE);
+ }
+
+ amdgpu_mes_fini(adev);
+ return 0;
+}
+
+static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int i;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ adev->mes.ring[0].sched.ready = false;
+}
+
+static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+ struct amdgpu_ip_block *ip_block;
+
+ if (adev->enable_uni_mes)
+ mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
+ else
+ mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+
+ r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
+ if (r) {
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ return r;
+ }
+
+ r = mes_v12_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ mes_v12_0_enable(adev, true);
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
+ r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ if (adev->enable_uni_mes) {
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ if (r)
+ goto failure;
+
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE);
+ }
+
+ if (adev->mes.enable_legacy_queue_map) {
+ r = mes_v12_0_hw_init(ip_block);
+ if (r)
+ goto failure;
+ }
+
+ return r;
+
+failure:
+ mes_v12_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
+{
+ if (adev->mes.ring[0].sched.ready) {
+ if (adev->enable_uni_mes)
+ amdgpu_mes_unmap_legacy_queue(adev,
+ &adev->mes.ring[AMDGPU_MES_SCHED_PIPE],
+ RESET_QUEUES, 0, 0);
+ else
+ mes_v12_0_kiq_dequeue_sched(adev);
+
+ adev->mes.ring[0].sched.ready = false;
+ }
+
+ mes_v12_0_enable(adev, false);
+
+ return 0;
+}
+
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (adev->mes.ring[0].sched.ready)
+ goto out;
+
+ if (!adev->enable_mes_kiq) {
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v12_0_load_microcode(adev,
+ AMDGPU_MES_SCHED_PIPE, true);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+
+ mes_v12_0_set_ucode_start_addr(adev);
+
+ } else if (adev->firmware.load_type ==
+ AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+
+ mes_v12_0_set_ucode_start_addr(adev);
+ }
+
+ mes_v12_0_enable(adev, true);
+ }
+
+ /* Enable the MES to handle doorbell ring on unmapped queue */
+ mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true);
+
+ r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+ if (r)
+ goto failure;
+
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b)
+ mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+
+ mes_v12_0_init_aggregated_doorbell(&adev->mes);
+
+ r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE);
+ if (r) {
+ DRM_ERROR("MES is busy\n");
+ goto failure;
+ }
+
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
+out:
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq[0].ring.sched.ready = false;
+ adev->mes.ring[0].sched.ready = true;
+
+ return 0;
+
+failure:
+ mes_v12_0_hw_fini(ip_block);
+ return r;
+}
+
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ return 0;
+}
+
+static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v12_0_hw_fini(ip_block);
+}
+
+static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return mes_v12_0_hw_init(ip_block);
+}
+
+static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r;
+
+ adev->mes.hung_queue_db_array_size =
+ MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
+ for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ r = amdgpu_mes_init_microcode(adev, pipe);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
+ .name = "mes_v12_0",
+ .early_init = mes_v12_0_early_init,
+ .late_init = NULL,
+ .sw_init = mes_v12_0_sw_init,
+ .sw_fini = mes_v12_0_sw_fini,
+ .hw_init = mes_v12_0_hw_init,
+ .hw_fini = mes_v12_0_hw_fini,
+ .suspend = mes_v12_0_suspend,
+ .resume = mes_v12_0_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v12_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 12,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &mes_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h
new file mode 100644
index 000000000000..ac3740f353aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V12_0_H__
+#define __MES_V12_0_H__
+
+extern const struct amdgpu_ip_block_version mes_v12_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index fb91b31056ca..243eabda0607 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
/*
* Raven2 has a HW issue that it is unable to use the vram which
* is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -227,6 +229,52 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
0);
}
+static void mmhub_v1_0_init_saw(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint32_t tmp;
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ /* Program SAW CONTEXT0 CNTL */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL);
+ tmp |= 1 << CONTEXT0_CNTL_ENABLE_OFFSET;
+ tmp &= ~(3 << CONTEXT0_CNTL_PAGE_TABLE_DEPTH_OFFSET);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL, tmp);
+
+ /* Disable all Contexts except Context0 */
+ tmp = 0xfffe;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXTS_DISABLE, tmp);
+
+ /* Program SAW CNTL4 */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4);
+ tmp |= 1 << VMC_TAP_PDE_REQUEST_SNOOP_OFFSET;
+ tmp |= 1 << VMC_TAP_PTE_REQUEST_SNOOP_OFFSET;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4, tmp);
+}
+
static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
@@ -242,7 +290,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -281,6 +329,9 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ if (amdgpu_ip_version(adev, ISP_HWIP, 0))
+ mmhub_v1_0_init_saw(adev);
}
static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
@@ -305,7 +356,7 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB)
amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GMC,
- enable);
+ enable, 0);
}
static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 9086f2fdfaf4..2adee2b94c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -172,6 +172,30 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
}
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_7_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ for (i = 0; i < 5; i++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance, tmp);
+ }
+
+}
+
static void mmhub_v1_7_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
@@ -274,7 +298,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
block_size -= 9;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
num_level);
@@ -337,6 +361,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev)
mmhub_v1_7_init_system_aperture_regs(adev);
mmhub_v1_7_init_tlb_regs(adev);
mmhub_v1_7_init_cache_regs(adev);
+ mmhub_v1_7_init_snoop_override_regs(adev);
mmhub_v1_7_enable_system_domain(adev);
mmhub_v1_7_disable_identity_aperture(adev);
@@ -544,7 +569,7 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev,
static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 784c4e077470..cc688ae79e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -30,6 +30,7 @@
#include "soc15_common.h"
#include "soc15.h"
#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
#define regVM_L2_CNTL3_DEFAULT 0x80100007
#define regVM_L2_CNTL4_DEFAULT 0x000000c1
@@ -75,6 +76,8 @@ static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi
static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
{
+ uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ?
+ adev->gmc.vram_start : adev->gmc.fb_start;
uint64_t pt_base;
u32 inst_mask;
int i;
@@ -94,10 +97,10 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev)
if (adev->gmc.pdb0_bo) {
WREG32_SOC15(MMHUB, i,
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->gmc.fb_start >> 12));
+ (u32)(gart_start >> 12));
WREG32_SOC15(MMHUB, i,
regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->gmc.fb_start >> 44));
+ (u32)(gart_start >> 44));
WREG32_SOC15(MMHUB, i,
regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
@@ -130,6 +133,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value;
int i;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
inst_mask = adev->aid_mask;
for_each_inst(i, inst_mask) {
/* Program the AGP BAR */
@@ -139,9 +145,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
adev->gmc.agp_end >> 24);
- if (amdgpu_sriov_vf(adev))
- return;
-
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
@@ -192,10 +195,8 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
uint32_t tmp, inst_mask;
int i;
- /* Setup TLB control */
- inst_mask = adev->aid_mask;
- for_each_inst(i, inst_mask) {
- tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
1);
@@ -209,7 +210,55 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
- WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i, j;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ for (j = 0; j < 5; j++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance, tmp);
+ }
}
}
@@ -344,7 +393,7 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
hub = &adev->vmhub[AMDGPU_MMHUB0(j)];
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL,
- i);
+ i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
@@ -418,6 +467,7 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
mmhub_v1_8_init_system_aperture_regs(adev);
mmhub_v1_8_init_tlb_regs(adev);
mmhub_v1_8_init_cache_regs(adev);
+ mmhub_v1_8_init_snoop_override_regs(adev);
mmhub_v1_8_enable_system_domain(adev);
mmhub_v1_8_disable_identity_aperture(adev);
@@ -427,6 +477,30 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
return 0;
}
+static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i, inst_mask;
+
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub;
@@ -440,15 +514,6 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
-
- /* Setup TLB control */
- tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
- 0);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- ENABLE_ADVANCED_DRIVER_MODEL, 0);
- WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp);
-
if (!amdgpu_sriov_vf(adev)) {
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
@@ -458,6 +523,8 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
}
}
+
+ mmhub_v1_8_disable_l1_tlb(adev);
}
/**
@@ -626,6 +693,14 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
void *ras_err_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+ unsigned long ue_count = 0, ce_count = 0;
+
+ /* NOTE: mmhub is converted by aid_mask and the range is 0-3,
+ * which can be used as die ID directly */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = mmhub_inst,
+ };
amdgpu_ras_inst_query_ras_error_count(adev,
mmhub_v1_8_ce_reg_list,
@@ -634,7 +709,7 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
mmhub_inst,
AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
- &err_data->ce_count);
+ &ce_count);
amdgpu_ras_inst_query_ras_error_count(adev,
mmhub_v1_8_ue_reg_list,
ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
@@ -642,7 +717,10 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
mmhub_inst,
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- &err_data->ue_count);
+ &ue_count);
+
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
}
static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
@@ -689,156 +767,105 @@ static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
mmhub_v1_8_inst_reset_ras_error_count(adev, i);
}
-static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = {
- regMMEA0_ERR_STATUS,
- regMMEA1_ERR_STATUS,
- regMMEA2_ERR_STATUS,
- regMMEA3_ERR_STATUS,
- regMMEA4_ERR_STATUS,
+static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
};
-static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev,
- uint32_t mmhub_inst)
+static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
{
- uint32_t reg_value;
- uint32_t mmea_err_status_addr_dist;
- uint32_t i;
-
- /* query mmea ras err status */
- mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
- for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
- reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
- regMMEA0_ERR_STATUS,
- i * mmea_err_status_addr_dist);
- if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
- REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
- REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
- dev_warn(adev->dev,
- "Detected MMEA%d err in MMHUB%d, status: 0x%x\n",
- i, mmhub_inst, reg_value);
- }
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
}
- /* query mm_cane ras err status */
- reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
- if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) ||
- REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) ||
- REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) {
- dev_warn(adev->dev,
- "Detected MM CANE err in MMHUB%d, status: 0x%x\n",
- mmhub_inst, reg_value);
- }
+ return ret;
}
-static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev)
+/* reference to smu driver if header file */
+static int mmhub_v1_8_err_codes[] = {
+ 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */
+ 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */
+ 10, /* CODE_UTCL2_ROUTER */
+ 11, /* CODE_VML2 */
+ 12, /* CODE_VML2_WALKER */
+ 13, /* CODE_MMCANE */
+};
+
+static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
{
- uint32_t inst_mask;
- uint32_t i;
+ u32 instlo;
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
- dev_warn(adev->dev, "MMHUB RAS is not supported\n");
- return;
- }
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
- inst_mask = adev->aid_mask;
- for_each_inst(i, inst_mask)
- mmhub_v1_8_inst_query_ras_err_status(adev, i);
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ mmhub_v1_8_err_codes,
+ ARRAY_SIZE(mmhub_v1_8_err_codes)))
+ return false;
+
+ return true;
}
-static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev,
- uint32_t mmhub_inst)
+static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = {
+ .aca_bank_parser = mmhub_v1_8_aca_bank_parser,
+ .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid,
+};
+
+static const struct aca_info mmhub_v1_8_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &mmhub_v1_8_aca_bank_ops,
+};
+
+static int mmhub_v1_8_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
- uint32_t mmea_cgtt_clk_cntl_addr_dist;
- uint32_t mmea_err_status_addr_dist;
- uint32_t reg_value;
- uint32_t i;
+ int r;
- /* reset mmea ras err status */
- mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL;
- mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
- for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
- /* force clk branch on for response path
- * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
- */
- reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
- regMMEA0_CGTT_CLK_CTRL,
- i * mmea_cgtt_clk_cntl_addr_dist);
- reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
- SOFT_OVERRIDE_RETURN, 1);
- WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
- regMMEA0_CGTT_CLK_CTRL,
- i * mmea_cgtt_clk_cntl_addr_dist,
- reg_value);
-
- /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
- reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
- regMMEA0_ERR_STATUS,
- i * mmea_err_status_addr_dist);
- reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
- CLEAR_ERROR_STATUS, 1);
- WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
- regMMEA0_ERR_STATUS,
- i * mmea_err_status_addr_dist,
- reg_value);
-
- /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */
- reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
- regMMEA0_CGTT_CLK_CTRL,
- i * mmea_cgtt_clk_cntl_addr_dist);
- reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
- SOFT_OVERRIDE_RETURN, 0);
- WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
- regMMEA0_CGTT_CLK_CTRL,
- i * mmea_cgtt_clk_cntl_addr_dist,
- reg_value);
- }
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
- /* reset mm_cane ras err status
- * force clk branch on for response path
- * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
- */
- reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
- reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
- SOFT_OVERRIDE_ATRET, 1);
- WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
-
- /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
- reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
- reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
- CLEAR_ERROR_STATUS, 1);
- WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value);
-
- /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
- reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
- reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
- SOFT_OVERRIDE_ATRET, 0);
- WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
-}
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__MMHUB,
+ &mmhub_v1_8_aca_info, NULL);
+ if (r)
+ goto late_fini;
-static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev)
-{
- uint32_t inst_mask;
- uint32_t i;
+ return 0;
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
- dev_warn(adev->dev, "MMHUB RAS is not supported\n");
- return;
- }
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
- inst_mask = adev->aid_mask;
- for_each_inst(i, inst_mask)
- mmhub_v1_8_inst_reset_ras_err_status(adev, i);
+ return r;
}
-static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
- .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
- .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
- .query_ras_error_status = mmhub_v1_8_query_ras_error_status,
- .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
-};
-
struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
.ras_block = {
.hw_ops = &mmhub_v1_8_ras_hw_ops,
+ .ras_late_init = mmhub_v1_8_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 8f76c6ecf50a..a0cc8e218ca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -151,7 +151,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 0, 0):
case IP_VERSION(2, 0, 2):
mmhub_cid = mmhub_client_ids_navi1x[cid][rw];
@@ -367,7 +367,7 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
@@ -568,7 +568,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
return;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
@@ -601,7 +601,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
}
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
@@ -625,7 +625,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
return;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
@@ -651,7 +651,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 0, 0):
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 1, 0):
@@ -671,12 +671,12 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index 1dce053a4c4d..5eb8122e2746 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -90,7 +90,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 4, 0):
case IP_VERSION(2, 4, 1):
@@ -285,7 +285,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
index 441379e91cfa..7d5242df58a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -107,7 +107,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 1):
mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw];
@@ -189,8 +189,7 @@ static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -324,7 +323,7 @@ static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index 12c7f4b46ea9..910337dc28d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -36,40 +36,47 @@
static const char *mmhub_client_ids_v3_0_1[][2] = {
[0][0] = "VMC",
+ [1][0] = "ISPXT",
+ [2][0] = "ISPIXT",
[4][0] = "DCEDMC",
[5][0] = "DCEVGA",
[6][0] = "MP0",
[7][0] = "MP1",
- [8][0] = "MPIO",
- [16][0] = "HDP",
- [17][0] = "LSDMA",
- [18][0] = "JPEG",
- [19][0] = "VCNU0",
- [21][0] = "VSCH",
- [22][0] = "VCNU1",
- [23][0] = "VCN1",
- [32+20][0] = "VCN0",
- [2][1] = "DBGUNBIO",
+ [8][0] = "MPM",
+ [12][0] = "ISPTNR",
+ [14][0] = "ISPCRD0",
+ [15][0] = "ISPCRD1",
+ [16][0] = "ISPCRD2",
+ [22][0] = "HDP",
+ [23][0] = "LSDMA",
+ [24][0] = "JPEG",
+ [27][0] = "VSCH",
+ [28][0] = "VCNU",
+ [29][0] = "VCN",
+ [1][1] = "ISPXT",
+ [2][1] = "ISPIXT",
[3][1] = "DCEDWB",
[4][1] = "DCEDMC",
[5][1] = "DCEVGA",
[6][1] = "MP0",
[7][1] = "MP1",
- [8][1] = "MPIO",
- [10][1] = "DBGU0",
- [11][1] = "DBGU1",
- [12][1] = "DBGU2",
- [13][1] = "DBGU3",
- [14][1] = "XDP",
- [15][1] = "OSSSYS",
- [16][1] = "HDP",
- [17][1] = "LSDMA",
- [18][1] = "JPEG",
- [19][1] = "VCNU0",
- [20][1] = "VCN0",
- [21][1] = "VSCH",
- [22][1] = "VCNU1",
- [23][1] = "VCN1",
+ [8][1] = "MPM",
+ [10][1] = "ISPMWR0",
+ [11][1] = "ISPMWR1",
+ [12][1] = "ISPTNR",
+ [13][1] = "ISPSWR",
+ [14][1] = "ISPCWR0",
+ [15][1] = "ISPCWR1",
+ [16][1] = "ISPCWR2",
+ [17][1] = "ISPCWR3",
+ [18][1] = "XDP",
+ [21][1] = "OSSSYS",
+ [22][1] = "HDP",
+ [23][1] = "LSDMA",
+ [24][1] = "JPEG",
+ [27][1] = "VSCH",
+ [28][1] = "VCNU",
+ [29][1] = "VCN",
};
static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid,
@@ -108,7 +115,7 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(3, 0, 1):
mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw];
break;
@@ -188,8 +195,7 @@ static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev)
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
- value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -311,7 +317,7 @@ static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
index 5dadc85abf7e..f0f182f033b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -181,8 +181,7 @@ static void mmhub_v3_0_2_init_system_aperture_regs(struct amdgpu_device *adev)
}
/* Set default page address. */
- value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
@@ -316,7 +315,7 @@ static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev)
uint32_t tmp;
for (i = 0; i <= 14; i++) {
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
new file mode 100644
index 000000000000..f6fc9778bc30
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
@@ -0,0 +1,746 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_3.h"
+
+#include "mmhub/mmhub_3_3_0_offset.h"
+#include "mmhub/mmhub_3_3_0_sh_mask.h"
+
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+#define regDAGB0_L1TLB_REG_RW_3_3 0x00a4
+#define regDAGB0_L1TLB_REG_RW_3_3_BASE_IDX 1
+#define regDAGB1_L1TLB_REG_RW_3_3 0x0163
+#define regDAGB1_L1TLB_REG_RW_3_3_BASE_IDX 1
+
+static const char *mmhub_client_ids_v3_3[][2] = {
+ [0][0] = "VMC",
+ [1][0] = "ISPXT",
+ [2][0] = "ISPIXT",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [9][0] = "ISPPDPRD",
+ [10][0] = "ISPCSTATRD",
+ [11][0] = "ISPBYRPRD",
+ [12][0] = "ISPRGBPRD",
+ [13][0] = "ISPMCFPRD",
+ [14][0] = "ISPMCFPRD1",
+ [15][0] = "ISPYUVPRD",
+ [16][0] = "ISPMCSCRD",
+ [17][0] = "ISPGDCRD",
+ [18][0] = "ISPLMERD",
+ [22][0] = "ISPXT1",
+ [23][0] = "ISPIXT1",
+ [24][0] = "HDP",
+ [25][0] = "LSDMA",
+ [26][0] = "JPEG",
+ [27][0] = "VPE",
+ [28][0] = "VSCH",
+ [29][0] = "VCNU",
+ [30][0] = "VCN",
+ [1][1] = "ISPXT",
+ [2][1] = "ISPIXT",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "ISPCSISWR",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [9][1] = "ISPPDPWR",
+ [10][1] = "ISPCSTATWR",
+ [11][1] = "ISPBYRPWR",
+ [12][1] = "ISPRGBPWR",
+ [13][1] = "ISPMCFPWR",
+ [14][1] = "ISPMWR0",
+ [15][1] = "ISPYUVPWR",
+ [16][1] = "ISPMCSCWR",
+ [17][1] = "ISPGDCWR",
+ [18][1] = "ISPLMEWR",
+ [20][1] = "ISPMWR2",
+ [21][1] = "OSSSYS",
+ [22][1] = "ISPXT1",
+ [23][1] = "ISPIXT1",
+ [24][1] = "HDP",
+ [25][1] = "LSDMA",
+ [26][1] = "JPEG",
+ [27][1] = "VPE",
+ [28][1] = "VSCH",
+ [29][1] = "VCNU",
+ [30][1] = "VCN",
+};
+
+static const char *mmhub_client_ids_v3_3_1[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPM",
+ [24][0] = "HDP",
+ [25][0] = "LSDMA",
+ [26][0] = "JPEG0",
+ [27][0] = "VPE0",
+ [28][0] = "VSCH",
+ [29][0] = "VCNU0",
+ [30][0] = "VCN0",
+ [32+1][0] = "ISPXT",
+ [32+2][0] = "ISPIXT",
+ [32+9][0] = "ISPPDPRD",
+ [32+10][0] = "ISPCSTATRD",
+ [32+11][0] = "ISPBYRPRD",
+ [32+12][0] = "ISPRGBPRD",
+ [32+13][0] = "ISPMCFPRD",
+ [32+14][0] = "ISPMCFPRD1",
+ [32+15][0] = "ISPYUVPRD",
+ [32+16][0] = "ISPMCSCRD",
+ [32+17][0] = "ISPGDCRD",
+ [32+18][0] = "ISPLMERD",
+ [32+22][0] = "ISPXT1",
+ [32+23][0] = "ISPIXT1",
+ [32+26][0] = "JPEG1",
+ [32+27][0] = "VPE1",
+ [32+29][0] = "VCNU1",
+ [32+30][0] = "VCN1",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPM",
+ [21][1] = "OSSSYS",
+ [24][1] = "HDP",
+ [25][1] = "LSDMA",
+ [26][1] = "JPEG0",
+ [27][1] = "VPE0",
+ [28][1] = "VSCH",
+ [29][1] = "VCNU0",
+ [30][1] = "VCN0",
+ [32+1][1] = "ISPXT",
+ [32+2][1] = "ISPIXT",
+ [32+5][1] = "ISPCSISWR",
+ [32+9][1] = "ISPPDPWR",
+ [32+10][1] = "ISPCSTATWR",
+ [32+11][1] = "ISPBYRPWR",
+ [32+12][1] = "ISPRGBPWR",
+ [32+13][1] = "ISPMCFPWR",
+ [32+14][1] = "ISPMWR0",
+ [32+15][1] = "ISPYUVPWR",
+ [32+16][1] = "ISPMCSCWR",
+ [32+17][1] = "ISPGDCWR",
+ [32+18][1] = "ISPLMEWR",
+ [32+19][1] = "ISPMWR1",
+ [32+20][1] = "ISPMWR2",
+ [32+22][1] = "ISPXT1",
+ [32+23][1] = "ISPIXT1",
+ [32+26][1] = "JPEG1",
+ [32+27][1] = "VPE1",
+ [32+29][1] = "VCNU1",
+ [32+30][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type ? : 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(3, 3, 0):
+ case IP_VERSION(3, 3, 2):
+ mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ?
+ mmhub_client_ids_v3_3[cid][rw] :
+ cid == 0x140 ? "UMSCH" : NULL;
+ break;
+ case IP_VERSION(3, 3, 1):
+ mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ?
+ mmhub_client_ids_v3_3_1[cid][rw] :
+ cid == 0x140 ? "UMSCH" : NULL;
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_3_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+
+}
+
+static void mmhub_v3_3_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_3_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_3_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_3_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_3_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_3_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_3_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_3_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_3_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned int i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static void mmhub_v3_3_init_saw_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint32_t tmp;
+
+ /* Program page table base, gart start, gart end */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(pt_base >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(pt_base >> 12));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL, tmp);
+
+ /* Disable all contexts except context 0 */
+ tmp = 0xfffe;
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXTS_DISABLE, tmp);
+
+ /* Program saw cntl4 */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_SNOOP, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_SNOOP, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4, tmp);
+}
+
+static void mmhub_v3_3_enable_tls(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0, regDAGB0_L1TLB_REG_RW_3_3, 0);
+ WREG32_SOC15(MMHUB, 0, regDAGB1_L1TLB_REG_RW_3_3, 3);
+}
+
+static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_3_init_gart_aperture_regs(adev);
+ mmhub_v3_3_init_system_aperture_regs(adev);
+ mmhub_v3_3_init_tlb_regs(adev);
+ mmhub_v3_3_init_cache_regs(adev);
+
+ mmhub_v3_3_enable_system_domain(adev);
+ mmhub_v3_3_disable_identity_aperture(adev);
+ mmhub_v3_3_setup_vmid_config(adev);
+ mmhub_v3_3_program_invalidation(adev);
+
+ /* standalone alone walker init */
+ mmhub_v3_3_init_saw_regs(adev);
+
+ /* enable mmhub tls */
+ mmhub_v3_3_enable_tls(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_3_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_3_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_3_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_3_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_3_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_3_get_invalidate_req,
+};
+
+static void mmhub_v3_3_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v3_3_vmhub_funcs;
+}
+
+static u64 mmhub_v3_3_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_3_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ u64 offset;
+
+ offset = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET);
+ offset &= MMMC_VM_FB_OFFSET__FB_OFFSET_MASK;
+ offset <<= 24;
+
+ return offset;
+}
+
+static void mmhub_v3_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static void mmhub_v3_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_3_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_3_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_3_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ u32 data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs = {
+ .init = mmhub_v3_3_init,
+ .get_fb_location = mmhub_v3_3_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_3_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_3_gart_enable,
+ .set_fault_enable_default = mmhub_v3_3_set_fault_enable_default,
+ .gart_disable = mmhub_v3_3_gart_disable,
+ .set_clockgating = mmhub_v3_3_set_clockgating,
+ .get_clockgating = mmhub_v3_3_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_3_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h
new file mode 100644
index 000000000000..37b62c7e5a4a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMHUB_V3_3_H__
+#define __MMHUB_V3_3_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
new file mode 100644
index 000000000000..951998454b25
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v4_1_0.h"
+
+#include "mmhub/mmhub_4_1_0_offset.h"
+#include "mmhub/mmhub_4_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "soc24_enum.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v4_1_0[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "LSDMA",
+ [17][0] = "JPEG",
+ [19][0] = "VCNU",
+ [22][0] = "VSCH",
+ [23][0] = "HDP",
+ [32+23][0] = "VCNRD",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGUNBIO",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "LSDMA",
+ [17][1] = "JPEG",
+ [18][1] = "VCNWR",
+ [19][1] = "VCNU",
+ [22][1] = "VSCH",
+ [23][1] = "HDP",
+};
+
+static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ /* Only use legacy inv on mmhub side */
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
+ status);
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
+ case IP_VERSION(4, 1, 0):
+ mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v4_1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid, uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v4_1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v4_1_0_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v4_1_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v4_1_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v4_1_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v4_1_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v4_1_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v4_1_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v4_1_0_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v4_1_0_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v4_1_0_init_gart_aperture_regs(adev);
+ mmhub_v4_1_0_init_system_aperture_regs(adev);
+ mmhub_v4_1_0_init_tlb_regs(adev);
+ mmhub_v4_1_0_init_cache_regs(adev);
+
+ mmhub_v4_1_0_enable_system_domain(adev);
+ mmhub_v4_1_0_disable_identity_aperture(adev);
+ mmhub_v4_1_0_setup_vmid_config(adev);
+ mmhub_v4_1_0_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v4_1_0_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v4_1_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void
+mmhub_v4_1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+
+ /* These registers are not accessible to VF-SRIOV.
+ * The PF will program them instead.
+ */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v4_1_0_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v4_1_0_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v4_1_0_get_invalidate_req,
+};
+
+static void mmhub_v4_1_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS_LO32);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vm_l2_bank_select_reserved_cid2 =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
+
+ hub->vm_contexts_disable =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
+
+ hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs;
+}
+
+static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v4_1_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void
+mmhub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+#if 0
+ uint32_t def, data;
+#endif
+ uint32_t def1, data1, def2 = 0, data2 = 0;
+#if 0
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+#endif
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
+ def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
+
+ if (enable) {
+#if 0
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#endif
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ } else {
+#if 0
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+#endif
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+
+ data2 |= (DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
+ DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
+ }
+
+#if 0
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#endif
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
+
+ if (def2 != data2)
+ WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
+}
+
+static void
+mmhub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+#if 0
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+#endif
+}
+
+static int mmhub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
+ mmhub_v4_1_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
+ mmhub_v4_1_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+
+ return 0;
+}
+
+static void mmhub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+#if 0
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+#endif
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs = {
+ .init = mmhub_v4_1_0_init,
+ .get_fb_location = mmhub_v4_1_0_get_fb_location,
+ .get_mc_fb_offset = mmhub_v4_1_0_get_mc_fb_offset,
+ .gart_enable = mmhub_v4_1_0_gart_enable,
+ .set_fault_enable_default = mmhub_v4_1_0_set_fault_enable_default,
+ .gart_disable = mmhub_v4_1_0_gart_disable,
+ .set_clockgating = mmhub_v4_1_0_set_clockgating,
+ .get_clockgating = mmhub_v4_1_0_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v4_1_0_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h
new file mode 100644
index 000000000000..3902d653353c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V4_1_0_H__
+#define __MMHUB_V4_1_0_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 5718e4d40e66..fe0710b55c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -198,6 +198,36 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
}
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v9_4_init_snoop_override_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+ uint32_t huboffset = hubid * MMHUB_INSTANCE_REGISTER_OFFSET;
+
+ for (i = 0; i < 5 - (2 * hubid); i++) {
+ /* DAGB instances 0 to 4 are in hub0 and 5 to 7 are in hub1 */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance, tmp);
+ }
+
+}
+
static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
{
uint32_t tmp;
@@ -308,7 +338,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i * hub->ctx_distance);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
@@ -392,6 +422,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_init_cache_regs(adev, i);
+ mmhub_v9_4_init_snoop_override_regs(adev, i);
mmhub_v9_4_enable_system_domain(adev, i);
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_disable_identity_aperture(adev, i);
@@ -657,7 +688,7 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
- int data, data1;
+ u32 data, data1;
if (amdgpu_sriov_vf(adev))
*flags = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
index 796d4f8791e5..ced26cc5123a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
@@ -35,13 +35,11 @@
#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
-#define MMSCH_VF_MAILBOX_RESP__OK 0x1
-#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
-
-#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
-
-#define MMSCH_VF_MAILBOX_RESP__OK 0x1
-#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3
+#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4
+#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5
#define MMSCH_V4_0_VCN_INSTANCES 0x2
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
new file mode 100644
index 000000000000..6f749814929f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V5_0_H__
+#define __MMSCH_V5_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 5
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define RB_ENABLED (1 << 0)
+#define RB4_ENABLED (1 << 1)
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3
+#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4
+#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5
+
+enum mmsch_v5_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v5_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v5_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v5_0_table_info vcn0;
+ struct mmsch_v5_0_table_info mjpegdec0[5];
+ struct mmsch_v5_0_table_info mjpegdec1[5];
+};
+
+struct mmsch_v5_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_direct_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v5_0_cmd_direct_read_modify_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v5_0_cmd_direct_polling {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v5_0_cmd_end {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v5_0_cmd_indirect_write {
+ struct mmsch_v5_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v5_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 63725b2ebc03..9a40107a0869 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -93,7 +93,7 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
- pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
+ dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
return -ETIME;
}
@@ -111,7 +111,7 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
timeout -= 10;
} while (timeout > 1);
- pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+ dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
return -ETIME;
}
@@ -132,7 +132,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_ai_mailbox_set_valid(adev, false);
trn = xgpu_ai_peek_ack(adev);
if (trn) {
- pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);
msleep(1);
}
} while(trn);
@@ -155,7 +155,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
/* start to poll ack */
r = xgpu_ai_poll_ack(adev);
if (r)
- pr_err("Doesn't get ack from pf, continue\n");
+ dev_err(adev->dev, "Doesn't get ack from pf, continue\n");
xgpu_ai_mailbox_set_valid(adev, false);
}
@@ -173,7 +173,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
req == IDH_REQ_GPU_RESET_ACCESS) {
r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
if (r) {
- pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
+ dev_err(adev->dev, "Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
return r;
}
/* Retrieve checksum from mailbox2 */
@@ -231,7 +231,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_DEBUG("get ack intr and do nothing.\n");
+ dev_dbg(adev->dev, "get ack intr and do nothing.\n");
return 0;
}
@@ -249,52 +249,80 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+static void xgpu_ai_ready_to_reset(struct amdgpu_device *adev)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
-
- /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
- * otherwise the mailbox msg will be ruined/reseted by
- * the VF FLR.
- */
- if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
- return;
-
- down_write(&adev->reset_domain->sem);
-
- amdgpu_virt_fini_data_exchange(adev);
-
xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
+static int xgpu_ai_wait_reset(struct amdgpu_device *adev)
+{
+ int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
do {
- if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
- goto flr_done;
-
+ if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) {
+ dev_dbg(adev->dev, "Got AI IDH_FLR_NOTIFICATION_CMPL after %d ms\n", AI_MAILBOX_POLL_FLR_TIMEDOUT - timeout);
+ return 0;
+ }
msleep(10);
timeout -= 10;
} while (timeout > 1);
-flr_done:
- atomic_set(&adev->reset_domain->in_gpu_reset, 0);
- up_write(&adev->reset_domain->sem);
+ dev_dbg(adev->dev, "waiting AI IDH_FLR_NOTIFICATION_CMPL timeout\n");
+ return -ETIME;
+}
+
+static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
+
+ amdgpu_virt_fini_data_exchange(adev);
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
}
+static void xgpu_ai_mailbox_req_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_ai_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_ai_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@@ -314,26 +342,47 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
- case IDH_FLR_NOTIFICATION:
- if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_ai_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.handle_bad_pages_work);
+ break;
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.req_bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+ if (amdgpu_sriov_runtime(adev))
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->virt.flr_work),
- "Failed to queue work! at %s",
- __func__);
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
break;
- case IDH_QUERY_ALIVE:
- xgpu_ai_mailbox_send_ack(adev);
- break;
- /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
- * it byfar since that polling thread will handle it,
- * other msg like flr complete is not handled here.
- */
- case IDH_CLR_MSG_BUF:
- case IDH_FLR_NOTIFICATION_CMPL:
- case IDH_READY_TO_ACCESS_GPU:
- default:
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ case IDH_QUERY_ALIVE:
+ xgpu_ai_mailbox_send_ack(adev);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
break;
}
@@ -389,6 +438,8 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_ai_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_ai_mailbox_handle_bad_pages_work);
return 0;
}
@@ -404,17 +455,27 @@ static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
}
-static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev)
+static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
{
xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);
}
+static bool xgpu_ai_rcvd_ras_intr(struct amdgpu_device *adev)
+{
+ enum idh_event msg = xgpu_ai_mailbox_peek_msg(adev);
+
+ return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
+}
+
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
.reset_gpu = xgpu_ai_request_reset,
- .wait_reset = NULL,
+ .ready_to_reset = xgpu_ai_ready_to_reset,
+ .wait_reset = xgpu_ai_wait_reset,
.trans_msg = xgpu_ai_mailbox_trans_msg,
.req_init_data = xgpu_ai_request_init_data,
.ras_poison_handler = xgpu_ai_ras_poison_handler,
+ .rcvd_ras_intr = xgpu_ai_rcvd_ras_intr,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index af1a784696bd..874b9f8f9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -40,6 +40,7 @@ enum idh_request {
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_BAD_PAGES = 205,
};
enum idh_event {
@@ -51,7 +52,12 @@ enum idh_event {
IDH_FAIL,
IDH_QUERY_ALIVE,
IDH_REQ_GPU_INIT_DATA_READY,
-
+ IDH_RAS_POISON_READY,
+ IDH_PF_SOFT_FLR_NOTIFICATION,
+ IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
IDH_TEXT_MESSAGE = 255,
};
@@ -62,7 +68,9 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);
int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);
void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev);
-#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4
-#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1
+#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE \
+ (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4)
+#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE \
+ (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1)
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 6a68ee946f1c..e5282a5d05d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -61,15 +61,20 @@ static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
enum idh_event event)
{
+ int r = 0;
u32 reg;
reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
- if (reg != event)
+ if (reg == IDH_FAIL)
+ r = -EINVAL;
+ if (reg == IDH_UNRECOV_ERR_NOTIFICATION)
+ r = -ENODEV;
+ else if (reg != event)
return -ENOENT;
xgpu_nv_mailbox_send_ack(adev);
- return 0;
+ return r;
}
static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
@@ -91,7 +96,7 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
timeout -= 5;
} while (timeout > 1);
- pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+ dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
return -ETIME;
}
@@ -100,19 +105,31 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
{
int r;
uint64_t timeout, now;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
now = (uint64_t)ktime_to_ms(ktime_get());
timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event);
- if (!r)
+ if (!r) {
+ dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n",
+ event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
return 0;
+ } else if (r == -ENODEV) {
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. "
+ "Runtime Services are halted.\n");
+ }
+ return r;
+ }
msleep(10);
now = (uint64_t)ktime_to_ms(ktime_get());
} while (timeout > now);
+ dev_dbg(adev->dev, "nv_poll_msg timed out\n");
return -ETIME;
}
@@ -133,11 +150,12 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_nv_mailbox_set_valid(adev, false);
trn = xgpu_nv_peek_ack(adev);
if (trn) {
- pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);
msleep(1);
}
} while (trn);
+ dev_dbg(adev->dev, "trans_msg req = 0x%x, data1 = 0x%x\n", req, data1);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1);
WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2);
@@ -147,19 +165,23 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
/* start to poll ack */
r = xgpu_nv_poll_ack(adev);
if (r)
- pr_err("Doesn't get ack from pf, continue\n");
+ dev_err(adev->dev, "Doesn't get ack from pf, continue\n");
xgpu_nv_mailbox_set_valid(adev, false);
}
-static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
- enum idh_request req)
+static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
+ enum idh_request req, u32 data1, u32 data2, u32 data3)
{
int r, retry = 1;
enum idh_event event = -1;
send_request:
- xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
+
+ if (amdgpu_ras_is_rma(adev))
+ return -ENODEV;
+
+ xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3);
switch (req) {
case IDH_REQ_GPU_INIT_ACCESS:
@@ -170,6 +192,19 @@ send_request:
case IDH_REQ_GPU_INIT_DATA:
event = IDH_REQ_GPU_INIT_DATA_READY;
break;
+ case IDH_RAS_POISON:
+ if (data1 != 0)
+ event = IDH_RAS_POISON_READY;
+ break;
+ case IDH_REQ_RAS_ERROR_COUNT:
+ event = IDH_RAS_ERROR_COUNT_READY;
+ break;
+ case IDH_REQ_RAS_CPER_DUMP:
+ event = IDH_RAS_CPER_DUMP_READY;
+ break;
+ case IDH_REQ_RAS_CHK_CRITI:
+ event = IDH_REQ_RAS_CHK_CRITI_READY;
+ break;
default:
break;
}
@@ -177,11 +212,11 @@ send_request:
if (event != -1) {
r = xgpu_nv_poll_msg(adev, event);
if (r) {
- if (retry++ < 2)
+ if (retry++ < 5)
goto send_request;
if (req != IDH_REQ_GPU_INIT_DATA) {
- pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+ dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);
return r;
} else /* host doesn't support REQ_GPU_INIT_DATA handshake */
adev->virt.req_init_data_ver = 0;
@@ -206,6 +241,13 @@ send_request:
return 0;
}
+static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ return xgpu_nv_send_access_requests_with_param(adev,
+ req, 0, 0, 0);
+}
+
static int xgpu_nv_request_reset(struct amdgpu_device *adev)
{
int ret, i = 0;
@@ -250,7 +292,7 @@ static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- DRM_DEBUG("get ack intr and do nothing.\n");
+ dev_dbg(adev->dev, "get ack intr and do nothing.\n");
return 0;
}
@@ -271,36 +313,34 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
return 0;
}
-static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+static void xgpu_nv_ready_to_reset(struct amdgpu_device *adev)
{
- struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
- struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
-
- /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
- * otherwise the mailbox msg will be ruined/reseted by
- * the VF FLR.
- */
- if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
- return;
-
- down_write(&adev->reset_domain->sem);
-
- amdgpu_virt_fini_data_exchange(adev);
-
xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
+static int xgpu_nv_wait_reset(struct amdgpu_device *adev)
+{
+ int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
do {
- if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
- goto flr_done;
-
+ if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) {
+ dev_dbg(adev->dev, "Got NV IDH_FLR_NOTIFICATION_CMPL after %d ms\n", NV_MAILBOX_POLL_FLR_TIMEDOUT - timeout);
+ return 0;
+ }
msleep(10);
timeout -= 10;
} while (timeout > 1);
-flr_done:
- atomic_set(&adev->reset_domain->in_gpu_reset, 0);
- up_write(&adev->reset_domain->sem);
+ dev_dbg(adev->dev, "waiting NV IDH_FLR_NOTIFICATION_CMPL timeout\n");
+ return -ETIME;
+}
+
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
+
+ amdgpu_virt_fini_data_exchange(adev);
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
@@ -309,17 +349,47 @@ flr_done:
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
}
+static void xgpu_nv_mailbox_req_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
+/**
+ * xgpu_nv_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
+ * @work: pointer to the work_struct
+ *
+ * This work handler is triggered when bad pages are ready, and it reinitializes
+ * the data exchange region to retrieve updated bad page information from the host.
+ */
+static void xgpu_nv_mailbox_handle_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@@ -342,10 +412,34 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
+ case IDH_RAS_BAD_PAGES_READY:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.handle_bad_pages_work);
+ break;
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.req_bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+ }
+
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
case IDH_FLR_NOTIFICATION:
- if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
+ if (amdgpu_sriov_runtime(adev))
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->virt.flr_work),
"Failed to queue work! at %s",
@@ -414,6 +508,8 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+ INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_nv_mailbox_req_bad_pages_work);
+ INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_nv_mailbox_handle_bad_pages_work);
return 0;
}
@@ -424,9 +520,53 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
-static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev)
+static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+ xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
+ } else {
+ amdgpu_virt_fini_data_exchange(adev);
+ xgpu_nv_send_access_requests_with_param(adev,
+ IDH_RAS_POISON, block, 0, 0);
+ }
+}
+
+static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev)
+{
+ enum idh_event msg = xgpu_nv_mailbox_peek_msg(adev);
+
+ return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
+}
+
+static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT);
+}
+
+static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr)
+{
+ uint32_t vf_rptr_hi, vf_rptr_lo;
+
+ vf_rptr_hi = (uint32_t)(vf_rptr >> 32);
+ vf_rptr_lo = (uint32_t)(vf_rptr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0);
+}
+
+static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev)
{
- xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES);
+}
+
+static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr)
+{
+ uint32_t addr_hi, addr_lo;
+
+ addr_hi = (uint32_t)(addr >> 32);
+ addr_lo = (uint32_t)(addr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0);
}
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
@@ -434,7 +574,13 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
.req_init_data = xgpu_nv_request_init_data,
.reset_gpu = xgpu_nv_request_reset,
- .wait_reset = NULL,
+ .ready_to_reset = xgpu_nv_ready_to_reset,
+ .wait_reset = xgpu_nv_wait_reset,
.trans_msg = xgpu_nv_mailbox_trans_msg,
.ras_poison_handler = xgpu_nv_ras_poison_handler,
+ .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
+ .req_ras_err_count = xgpu_nv_req_ras_err_count,
+ .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
+ .req_bad_pages = xgpu_nv_req_ras_bad_pages,
+ .req_ras_chk_criti = xgpu_nv_check_vf_critical_region
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index d0221ce08769..c1083e5e41e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -25,8 +25,8 @@
#define __MXGPU_NV_H__
#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500
-#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000
-#define NV_MAILBOX_POLL_FLR_TIMEDOUT 5000
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT 15000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000
#define NV_MAILBOX_POLL_MSG_REP_MAX 11
enum idh_request {
@@ -40,6 +40,10 @@ enum idh_request {
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_ERROR_COUNT = 203,
+ IDH_REQ_RAS_CPER_DUMP = 204,
+ IDH_REQ_RAS_BAD_PAGES = 205,
+ IDH_REQ_RAS_CHK_CRITI = 206
};
enum idh_event {
@@ -51,6 +55,15 @@ enum idh_event {
IDH_FAIL,
IDH_QUERY_ALIVE,
IDH_REQ_GPU_INIT_DATA_READY,
+ IDH_RAS_POISON_READY,
+ IDH_PF_SOFT_FLR_NOTIFICATION,
+ IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_ERROR_COUNT_READY = 11,
+ IDH_RAS_CPER_DUMP_READY = 14,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
+ IDH_REQ_RAS_CHK_CRITI_READY = 18,
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 59f53c743362..e1d63bed84bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -515,12 +515,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
- /* wait until RCV_MSG become 3 */
- if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
- pr_err("failed to receive FLR_CMPL\n");
- return;
- }
-
/* Trigger recovery due to world switch failure */
if (amdgpu_device_should_recover_gpu(adev)) {
struct amdgpu_reset_context reset_context;
@@ -529,6 +523,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
@@ -560,7 +555,7 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* only handle FLR_NOTIFY now */
- if (!r && !amdgpu_in_reset(adev))
+ if (!r)
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->virt.flr_work),
"Failed to queue work! at %s",
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index b6a8478dabf4..4cd325149b63 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -107,7 +107,7 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev,
{
u32 ih_cntl, ih_rb_cntl;
- if (adev->ip_versions[OSSSYS_HWIP][0] < IP_VERSION(5, 0, 3))
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) < IP_VERSION(5, 0, 3))
return;
ih_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_CNTL2);
@@ -330,7 +330,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
if (ih[0]->use_bus_addr) {
- switch (adev->ip_versions[OSSSYS_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) {
case IP_VERSION(5, 0, 3):
case IP_VERSION(5, 2, 0):
case IP_VERSION(5, 2, 1):
@@ -434,14 +434,19 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catch up.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
@@ -536,19 +541,19 @@ static void navi10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &navi10_ih_self_irq_funcs;
}
-static int navi10_ih_early_init(void *handle)
+static int navi10_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_set_interrupt_funcs(adev);
navi10_ih_set_self_irq_funcs(adev);
return 0;
}
-static int navi10_ih_sw_init(void *handle)
+static int navi10_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -587,58 +592,52 @@ static int navi10_ih_sw_init(void *handle)
return r;
}
-static int navi10_ih_sw_fini(void *handle)
+static int navi10_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int navi10_ih_hw_init(void *handle)
+static int navi10_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return navi10_ih_irq_init(adev);
}
-static int navi10_ih_hw_fini(void *handle)
+static int navi10_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- navi10_ih_irq_disable(adev);
+ navi10_ih_irq_disable(ip_block->adev);
return 0;
}
-static int navi10_ih_suspend(void *handle)
+static int navi10_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_fini(adev);
+ return navi10_ih_hw_fini(ip_block);
}
-static int navi10_ih_resume(void *handle)
+static int navi10_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_init(adev);
+ return navi10_ih_hw_init(ip_block);
}
-static bool navi10_ih_is_idle(void *handle)
+static bool navi10_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int navi10_ih_wait_for_idle(void *handle)
+static int navi10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int navi10_ih_soft_reset(void *handle)
+static int navi10_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -665,40 +664,35 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
}
-
- return;
}
-static int navi10_ih_set_clockgating_state(void *handle,
+static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
-static int navi10_ih_set_powergating_state(void *handle,
+static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void navi10_ih_get_clockgating_state(void *handle, u64 *flags)
+static void navi10_ih_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
-
- return;
}
static const struct amd_ip_funcs navi10_ih_ip_funcs = {
.name = "navi10_ih",
.early_init = navi10_ih_early_init,
- .late_init = NULL,
.sw_init = navi10_ih_sw_init,
.sw_fini = navi10_ih_sw_fini,
.hw_init = navi10_ih_hw_init,
@@ -726,8 +720,7 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
adev->irq.ih_funcs = &navi10_ih_funcs;
}
-const struct amdgpu_ip_block_version navi10_ih_ip_block =
-{
+const struct amdgpu_ip_block_version navi10_ih_ip_block = {
.type = AMD_IP_BLOCK_TYPE_IH,
.major = 5,
.minor = 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
index a5b60c9a2418..c88284ff92d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -68,6 +68,7 @@
#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
#define HEADER_AGENT_DISPATCH 4
#define HEADER_BARRIER 5
#define SDMA_OP_AQL_COPY 0
@@ -4041,6 +4042,69 @@
/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
** Definitions for SDMA_PKT_ATOMIC packet
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
new file mode 100644
index 000000000000..9b4025c39e44
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
@@ -0,0 +1,554 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbif_v6_3_1.h"
+
+#include "nbif/nbif_6_3_1_offset.h"
+#include "nbif/nbif_6_3_1_sh_mask.h"
+#include "pcie/pcie_6_1_0_offset.h"
+#include "pcie/pcie_6_1_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbif_v6_3_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN,
+ BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0);
+}
+
+static u32 nbif_v6_3_1_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev,
+ int instance, bool use_doorbell,
+ int doorbell_index,
+ int doorbell_size)
+{
+ if (instance == 0) {
+ u32 doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWID,
+ 0xe);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ doorbell_size);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE,
+ 0x3);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL,
+ S2A_DOORBELL_PORT2_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range);
+ }
+}
+
+static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index,
+ int instance)
+{
+ u32 doorbell_range;
+
+ if (instance)
+ doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL);
+ else
+ doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_ENABLE,
+ 0x1);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWID,
+ instance ? 0x7 : 0x4);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 8);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE,
+ instance ? 0x7 : 0x4);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL,
+ S2A_DOORBELL_PORT4_RANGE_SIZE,
+ 0);
+
+ if (instance)
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range);
+ else
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range);
+}
+
+static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007);
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d);
+}
+
+static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void
+nbif_v6_3_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_ENABLE,
+ 0x1);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWID,
+ 0x0);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 2);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE,
+ 0x0);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL,
+ S2A_DOORBELL_PORT1_RANGE_SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range);
+}
+
+static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL);
+ /*
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void
+nbif_v6_3_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void
+nbif_v6_3_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+}
+
+static void
+nbif_v6_3_1_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+}
+
+static u32 nbif_v6_3_1_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX);
+}
+
+static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbif_v6_3_1_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
+ data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
+}
+
+static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev)
+{
+ u32 data, rom_offset;
+
+ data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL);
+ rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET);
+
+ return rom_offset;
+}
+
+#ifdef CONFIG_PCIEASPM
+static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+ u16 devctl2;
+
+ def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data = 0x35EB;
+ data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2);
+
+ if (adev->pdev->ltr_path == (devctl2 & PCI_EXP_DEVCTL2_LTR_EN))
+ return;
+
+ if (adev->pdev->ltr_path)
+ pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
+ else
+ pcie_capability_clear_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
+}
+#endif
+
+static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+ u16 devctl2, ltr;
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data &= ~RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2);
+ data = def = devctl2;
+ data &= ~PCI_EXP_DEVCTL2_LTR_EN;
+ if (def != data)
+ pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, (u16)data);
+
+ ltr = pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_LTR);
+
+ if (ltr) {
+ pci_write_config_dword(adev->pdev, ltr + PCI_LTR_MAX_SNOOP_LAT, 0x10011001);
+ }
+
+#if 0
+ /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */
+ def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
+#endif
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4);
+ data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
+ data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
+
+ nbif_v6_3_1_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL);
+ data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data);
+#endif
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbif_v6_3_1_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
+ .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset,
+ .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset,
+ .get_rev_id = nbif_v6_3_1_get_rev_id,
+ .mc_access_enable = nbif_v6_3_1_mc_access_enable,
+ .get_memsize = nbif_v6_3_1_get_memsize,
+ .sdma_doorbell_range = nbif_v6_3_1_sdma_doorbell_range,
+ .vcn_doorbell_range = nbif_v6_3_1_vcn_doorbell_range,
+ .gc_doorbell_init = nbif_v6_3_1_gc_doorbell_init,
+ .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbif_v6_3_1_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbif_v6_3_1_get_clockgating_state,
+ .ih_control = nbif_v6_3_1_ih_control,
+ .init_registers = nbif_v6_3_1_init_registers,
+ .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
+ .get_rom_offset = nbif_v6_3_1_get_rom_offset,
+ .program_aspm = nbif_v6_3_1_program_aspm,
+ .set_reg_remap = nbif_v6_3_1_set_reg_remap,
+};
+
+
+static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
+}
+
+static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = {
+ .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state,
+ .process = nbif_v6_3_1_process_err_event_athub_irq,
+};
+
+static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbif_v6_3_1_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbif v6_3_1 uses the same irq source as nbio v7_4
+ */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+struct amdgpu_nbio_ras nbif_v6_3_1_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring =
+ nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt =
+ nbif_v6_3_1_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
new file mode 100644
index 000000000000..3afec715a9fe
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V6_3_1_H__
+#define __NBIO_V6_3_1_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs;
+extern struct amdgpu_nbio_ras nbif_v6_3_1_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 4038455d7998..04041b398781 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -21,13 +21,13 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v2_3.h"
#include "nbio/nbio_2_3_default.h"
#include "nbio/nbio_2_3_offset.h"
#include "nbio/nbio_2_3_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
+#include <linux/device.h>
#include <linux/pci.h>
#define smnPCIE_CONFIG_CNTL 0x11180044
@@ -338,10 +338,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
-
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
#define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1
@@ -361,14 +357,14 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
- if (pci_is_thunderbolt_attached(adev->pdev))
+ if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
} else {
- /* Disbale ASPM L1 */
+ /* Disable ASPM L1 */
data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
/* Disable ASPM TxL0s */
data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
@@ -480,7 +476,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
- if (pci_is_thunderbolt_attached(adev->pdev))
+ if (dev_is_removable(&adev->pdev->dev))
data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
else
data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
@@ -536,7 +532,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
{
uint32_t reg, reg_data;
- if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0))
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) != IP_VERSION(3, 3, 0))
return;
reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
@@ -552,6 +548,20 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
}
}
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v2_3_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -576,4 +586,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
.apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
.clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
+ .set_reg_remap = nbio_v2_3_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
index e5b5b0f4940f..f89e5f40e1a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v4_3.h"
#include "nbio/nbio_4_3_0_offset.h"
@@ -338,16 +337,13 @@ const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg = {
static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
{
- if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(4, 3, 0)) {
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(4, 3, 0)) {
uint32_t data;
data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2);
data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
}
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
@@ -392,8 +388,8 @@ static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
#ifdef CONFIG_PCIEASPM
uint32_t def, data;
- if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) &&
- !(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0)))
+ if (!(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 4, 0)) &&
+ !(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 6, 0)))
return;
def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
@@ -475,6 +471,20 @@ static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
#endif
}
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v4_3_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
@@ -497,6 +507,7 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
.get_rom_offset = nbio_v4_3_get_rom_offset,
.program_aspm = nbio_v4_3_program_aspm,
+ .set_reg_remap = nbio_v4_3_set_reg_remap,
};
@@ -541,6 +552,7 @@ const struct amdgpu_nbio_funcs nbio_v4_3_sriov_funcs = {
.init_registers = nbio_v4_3_init_registers,
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
.get_rom_offset = nbio_v4_3_get_rom_offset,
+ .set_reg_remap = nbio_v4_3_set_reg_remap,
};
static int nbio_v4_3_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 37615a77287b..e911368c1aeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v6_1.h"
#include "nbio/nbio_6_1_default.h"
@@ -276,10 +275,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_CI_CNTL, data);
-
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
#ifdef CONFIG_PCIEASPM
@@ -394,6 +389,21 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
#endif
}
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v6_1_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
.get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
@@ -412,5 +422,6 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
.ih_control = nbio_v6_1_ih_control,
.init_registers = nbio_v6_1_init_registers,
.remap_hdp_registers = nbio_v6_1_remap_hdp_registers,
- .program_aspm = nbio_v6_1_program_aspm,
+ .program_aspm = nbio_v6_1_program_aspm,
+ .set_reg_remap = nbio_v6_1_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index aa0326d00c72..1569a1e934ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_0.h"
#include "nbio/nbio_7_0_default.h"
@@ -271,11 +270,33 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
+#define regRCC_DEV0_EPF6_STRAP4 0xd304
+#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5
+
static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev))
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(2, 5, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data);
+ break;
+ }
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_0_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
adev->rmmio_remap.reg_offset =
SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
@@ -297,4 +318,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.ih_control = nbio_v7_0_ih_control,
.init_registers = nbio_v7_0_init_registers,
.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_0_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
new file mode 100644
index 000000000000..bed5ef4d8788
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nbio_v7_11.h"
+
+#include "nbio/nbio_7_11_0_offset.h"
+#include "nbio/nbio_7_11_0_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void nbio_v7_11_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
+static u32 nbio_v7_11_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP1_RCC_DEV0_EPF0_STRAP0);
+ tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_11_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN,
+ BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0);
+}
+
+static u32 nbio_v7_11_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_11_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_vpe_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = instance == 0 ?
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE1_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VPE_DOORBELL_RANGE,
+ SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_vcn_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = instance == 0 ?
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE):
+ SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN1_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
+static void nbio_v7_11_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 reg;
+
+
+ reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN);
+ reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN,
+ BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg);
+}
+
+static void nbio_v7_11_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0,
+ regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
+}
+
+
+static void nbio_v7_11_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,regGDC0_BIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ GDC0_BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+ }
+
+ WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE,
+ ih_doorbell_range);
+}
+
+static void nbio_v7_11_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2,
+ adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_11_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_11_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_11_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_INDEX2);
+}
+
+static u32 nbio_v7_11_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_DATA2);
+}
+
+static u32 nbio_v7_11_get_pcie_port_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_INDEX);
+}
+
+static u32 nbio_v7_11_get_pcie_port_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_DATA);
+}
+
+const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3);
+ data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3,
+ CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
+}
+
+static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL, data);
+}
+
+static void nbio_v7_11_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2);
+ if (enable)
+ data |= BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL);
+ if (data & BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2);
+ if (data & BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+#define MMIO_REG_HOLE_OFFSET 0x44000
+
+static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_11_funcs = {
+ .get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_11_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_11_get_pcie_data_offset,
+ .get_pcie_port_index_offset = nbio_v7_11_get_pcie_port_index_offset,
+ .get_pcie_port_data_offset = nbio_v7_11_get_pcie_port_data_offset,
+ .get_rev_id = nbio_v7_11_get_rev_id,
+ .mc_access_enable = nbio_v7_11_mc_access_enable,
+ .get_memsize = nbio_v7_11_get_memsize,
+ .sdma_doorbell_range = nbio_v7_11_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_11_vcn_doorbell_range,
+ .vpe_doorbell_range = nbio_v7_11_vpe_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_11_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_11_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_11_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_11_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_11_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_11_get_clockgating_state,
+ .ih_control = nbio_v7_11_ih_control,
+ .init_registers = nbio_v7_11_init_registers,
+ .remap_hdp_registers = nbio_v7_11_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_11_set_reg_remap,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h
new file mode 100644
index 000000000000..9d8258ed3f0a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_11_H__
+#define __NBIO_V7_11_H__
+
+#include "soc15_common.h"
+
+extern const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg;
+extern const struct amdgpu_nbio_funcs nbio_v7_11_funcs;
+extern const struct amdgpu_nbio_ras_funcs nbio_v7_11_ras_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index 4ef1fa4603c8..acc5f363684a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_2.h"
#include "nbio/nbio_7_2_0_offset.h"
@@ -59,7 +58,7 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 2, 1):
case IP_VERSION(7, 3, 0):
case IP_VERSION(7, 5, 0):
@@ -78,7 +77,7 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev)
static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable)
{
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 2, 1):
case IP_VERSION(7, 3, 0):
case IP_VERSION(7, 5, 0):
@@ -262,7 +261,7 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 2, 1):
case IP_VERSION(7, 3, 0):
case IP_VERSION(7, 5, 0):
@@ -369,7 +368,7 @@ const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = {
static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
{
uint32_t def, data;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 2, 1):
case IP_VERSION(7, 3, 0):
case IP_VERSION(7, 5, 0):
@@ -394,7 +393,7 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
break;
}
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 3, 0):
case IP_VERSION(7, 5, 1):
data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2);
@@ -402,10 +401,21 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
WREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2, data);
break;
}
+}
+
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+static void nbio_v7_2_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
@@ -429,4 +439,5 @@ const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
.ih_control = nbio_v7_2_ih_control,
.init_registers = nbio_v7_2_init_registers,
.remap_hdp_registers = nbio_v7_2_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_2_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 685abf57ffdd..860bc5cb03c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_4.h"
#include "amdgpu_ras.h"
@@ -152,9 +151,9 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan
* BIF_SDMA0_DOORBELL_RANGE: 0x3bc0
* BIF_SDMA1_DOORBELL_RANGE: 0x3bc4
* BIF_SDMA2_DOORBELL_RANGE: 0x3bd8
-+ * BIF_SDMA4_DOORBELL_RANGE:
-+ * ARCTURUS: 0x3be0
-+ * ALDEBARAN: 0x3be4
+ * BIF_SDMA4_DOORBELL_RANGE:
+ * ARCTURUS: 0x3be0
+ * ALDEBARAN: 0x3be4
*/
if (adev->asic_type == CHIP_ALDEBARAN && instance == 4)
reg = instance + 0x4 + 0x1 +
@@ -343,11 +342,7 @@ static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
uint32_t baco_cntl;
- if (amdgpu_sriov_vf(adev))
- adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
-
- if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4) &&
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4) &&
!amdgpu_sriov_vf(adev)) {
baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL);
if (baco_cntl &
@@ -365,9 +360,12 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
{
uint32_t bif_doorbell_intr_cntl;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_err_data err_data;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ if (amdgpu_ras_error_data_init(&err_data))
+ return;
+
if (adev->asic_type == CHIP_ALDEBARAN)
bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE);
else
@@ -384,7 +382,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
else
WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
- if (!ras->disable_ras_err_cnt_harvest) {
+ if (ras && !ras->disable_ras_err_cnt_harvest && obj) {
/*
* clear error status after ras_controller_intr
* according to hw team and count ue number
@@ -398,8 +396,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
if (err_data.ce_count)
dev_info(adev->dev, "%ld correctable hardware "
- "errors detected in %s block, "
- "no user action is needed.\n",
+ "errors detected in %s block\n",
obj->err_data.ce_count,
get_ras_block_str(adev->nbio.ras_if));
@@ -416,8 +413,10 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
/* ras_controller_int is dedicated for nbif ras error,
* not the global interrupt for sync flood
*/
- amdgpu_ras_reset_gpu(adev);
+ amdgpu_ras_global_ras_isr(adev);
}
+
+ amdgpu_ras_error_data_fini(&err_data);
}
static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
@@ -702,7 +701,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
#ifdef CONFIG_PCIEASPM
uint32_t def, data;
- if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
+ if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4))
return;
def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
@@ -788,6 +787,21 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
#endif
}
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_4_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
@@ -809,4 +823,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.init_registers = nbio_v7_4_init_registers,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
.program_aspm = nbio_v7_4_program_aspm,
+ .set_reg_remap = nbio_v7_4_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index def89379b51a..2ee60b8746a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_7.h"
#include "nbio/nbio_7_7_0_offset.h"
@@ -247,6 +246,12 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 7, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
}
static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -254,7 +259,7 @@ static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
- if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
return;
def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
@@ -283,7 +288,7 @@ static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
- if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
return;
def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
@@ -324,6 +329,21 @@ static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev,
*flags |= AMD_CG_SUPPORT_BIF_LS;
}
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+
+static void nbio_v7_7_set_reg_remap(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(NBIO, 0,
+ regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
+}
+
const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
@@ -345,4 +365,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.ih_control = nbio_v7_7_ih_control,
.init_registers = nbio_v7_7_init_registers,
.remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
+ .set_reg_remap = nbio_v7_7_set_reg_remap,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index f85eec05d218..1c22bc11c1f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_9.h"
#include "amdgpu_ras.h"
@@ -32,18 +31,6 @@
#define NPS_MODE_MASK 0x000000FFL
-/* Core 0 Port 0 counter */
-#define smnPCIEP_NAK_COUNTER 0x1A340218
-
-#define smnPCIE_PERF_CNTL_TXCLK3 0x1A38021c
-#define smnPCIE_PERF_CNTL_TXCLK7 0x1A380888
-#define smnPCIE_PERF_COUNT_CNTL 0x1A380200
-#define smnPCIE_PERF_COUNT0_TXCLK3 0x1A380220
-#define smnPCIE_PERF_COUNT0_TXCLK7 0x1A38088C
-#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK3 0x1A3808F8
-#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK7 0x1A380918
-
-
static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
@@ -56,8 +43,15 @@ static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp;
+ tmp = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
+ /* If it is VF or subrevision holds a non-zero value, that should be used */
+ if (tmp || amdgpu_sriov_vf(adev))
+ return tmp;
+
+ /* If discovery subrev is not updated, use register version */
tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
- tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, STRAP_ATI_REV_ID_DEV0_F0);
+ tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
+ STRAP_ATI_REV_ID_DEV0_F0);
return tmp;
}
@@ -173,8 +167,6 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan
default:
break;
}
-
- return;
}
static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
@@ -182,8 +174,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
{
u32 doorbell_range = 0, doorbell_ctrl = 0;
u32 aid_id = instance;
+ u32 range_size;
if (use_doorbell) {
+ range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 5, 0)) ?
+ 0xb : 0x9;
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
@@ -191,7 +187,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_SIZE_ENTRY,
- 0x9);
+ range_size);
if (aid_id)
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
@@ -209,7 +205,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
- S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9);
+ S2A_DOORBELL_PORT1_RANGE_SIZE, range_size);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
@@ -405,6 +401,17 @@ static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev)
return px;
}
+static bool nbio_v7_9_is_nps_switch_requested(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS,
+ CHANGE_STATUE);
+
+ /* 0x8 - NPS switch requested */
+ return (tmp == 0x8);
+}
static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev,
u32 *supp_modes)
{
@@ -457,73 +464,21 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
}
}
-static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev)
-{
- u32 val, nak_r, nak_g;
-
- if (adev->flags & AMD_IS_APU)
- return 0;
+#define MMIO_REG_HOLE_OFFSET 0x1A000
- /* Get the number of NAKs received and generated */
- val = RREG32_PCIE(smnPCIEP_NAK_COUNTER);
- nak_r = val & 0xFFFF;
- nak_g = val >> 16;
-
- /* Add the total number of NAKs, i.e the number of replays */
- return (nak_r + nak_g);
-}
-
-static void nbio_v7_9_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
- uint64_t *count1)
+static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev)
{
- uint32_t perfctrrx = 0;
- uint32_t perfctrtx = 0;
-
- /* This reports 0 on APUs, so return to avoid writing/reading registers
- * that may or may not be different from their GPU counterparts
- */
- if (adev->flags & AMD_IS_APU)
- return;
-
- /* Use TXCLK3 counter group for rx event */
- /* Use TXCLK7 counter group for tx event */
- /* Set the 2 events that we wish to watch, defined above */
- /* 40 is event# for received msgs */
- /* 2 is event# of posted requests sent */
- perfctrrx = REG_SET_FIELD(perfctrrx, PCIE_PERF_CNTL_TXCLK3, EVENT0_SEL, 40);
- perfctrtx = REG_SET_FIELD(perfctrtx, PCIE_PERF_CNTL_TXCLK7, EVENT0_SEL, 2);
-
- /* Write to enable desired perf counters */
- WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctrrx);
- WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK7, perfctrtx);
-
- /* Zero out and enable SHADOW_WR
- * Write 0x6:
- * Bit 1 = Global Shadow wr(1)
- * Bit 2 = Global counter reset enable(1)
- */
- WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006);
-
- /* Enable Gloabl Counter
- * Write 0x1:
- * Bit 0 = Global Counter Enable(1)
- */
- WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000001);
-
- msleep(1000);
-
- /* Disable Global Counter, Reset and enable SHADOW_WR
- * Write 0x6:
- * Bit 1 = Global Shadow wr(1)
- * Bit 2 = Global counter reset enable(1)
- */
- WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006);
-
- /* Get the upper and lower count */
- *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) |
- ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK3) << 32);
- *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK7) |
- ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK7) << 32);
+ if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ } else {
+ adev->rmmio_remap.reg_offset =
+ SOC15_REG_OFFSET(
+ NBIO, 0,
+ regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL)
+ << 2;
+ adev->rmmio_remap.bus_addr = 0;
+ }
}
const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
@@ -548,24 +503,26 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.remap_hdp_registers = nbio_v7_9_remap_hdp_registers,
.get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode,
.get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
+ .is_nps_switch_requested = nbio_v7_9_is_nps_switch_requested,
.init_registers = nbio_v7_9_init_registers,
- .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count,
- .get_pcie_usage = nbio_v7_9_get_pcie_usage,
+ .set_reg_remap = nbio_v7_9_set_reg_remap,
};
static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
- return;
}
static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
{
uint32_t bif_doorbell_intr_cntl;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
- struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_err_data err_data;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ if (amdgpu_ras_error_data_init(&err_data))
+ return;
+
bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
if (REG_GET_FIELD(bif_doorbell_intr_cntl,
@@ -590,8 +547,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
if (err_data.ce_count)
dev_info(adev->dev, "%ld correctable hardware "
- "errors detected in %s block, "
- "no user action is needed.\n",
+ "errors detected in %s block\n",
obj->err_data.ce_count,
get_ras_block_str(adev->nbio.ras_if));
@@ -604,12 +560,9 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
dev_info(adev->dev, "RAS controller interrupt triggered "
"by NBIF error\n");
-
- /* ras_controller_int is dedicated for nbif ras error,
- * not the global interrupt for sync flood
- */
- amdgpu_ras_reset_gpu(adev);
}
+
+ amdgpu_ras_error_data_fini(&err_data);
}
static void nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 13aca808ecab..50e77d9b30af 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -59,7 +59,6 @@
#include "vcn_v3_0.h"
#include "jpeg_v3_0.h"
#include "amdgpu_vkms.h"
-#include "mes_v10_1.h"
#include "mxgpu_nv.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
@@ -68,8 +67,8 @@ static const struct amd_ip_funcs nv_common_ip_funcs;
/* Navi */
static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs nv_video_codecs_encode = {
@@ -79,12 +78,12 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = {
/* Navi1x */
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -95,8 +94,8 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = {
/* Sienna Cichlid */
static const struct amdgpu_video_codec_info sc_video_codecs_encode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs sc_video_codecs_encode = {
@@ -105,23 +104,23 @@ static const struct amdgpu_video_codecs sc_video_codecs_encode = {
};
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -137,28 +136,28 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = {
/* SRIOV Sienna Cichlid, not const since data is controlled by host */
static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -199,7 +198,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -214,7 +213,7 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
return -EINVAL;
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 0, 64):
case IP_VERSION(3, 0, 192):
@@ -453,8 +452,9 @@ nv_asic_reset_method(struct amdgpu_device *adev)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 5):
@@ -513,11 +513,10 @@ static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
static void nv_program_aspm(struct amdgpu_device *adev)
{
- if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
@@ -609,9 +608,8 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
if (adev->gfx.funcs->update_perfmon_mgcg)
adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->enable_aspm) &&
- amdgpu_device_should_use_aspm(adev))
+ if (adev->nbio.funcs->enable_aspm &&
+ amdgpu_device_should_use_aspm(adev))
adev->nbio.funcs->enable_aspm(adev, !enter);
return 0;
@@ -637,15 +635,11 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = {
.query_video_codecs = &nv_query_video_codecs,
};
-static int nv_common_early_init(void *handle)
+static int nv_common_early_init(struct amdgpu_ip_block *ip_block)
{
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (!amdgpu_sriov_vf(adev)) {
- adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
- adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
- }
+ adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &amdgpu_device_indirect_rreg;
@@ -669,7 +663,7 @@ static int nv_common_early_init(void *handle)
/* TODO: split the GC and PG flags based on the relevant IP version for which
* they are relevant.
*/
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
@@ -951,9 +945,9 @@ static int nv_common_early_init(void *handle)
return 0;
}
-static int nv_common_late_init(void *handle)
+static int nv_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
@@ -980,9 +974,9 @@ static int nv_common_late_init(void *handle)
return 0;
}
-static int nv_common_sw_init(void *handle)
+static int nv_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
@@ -990,14 +984,9 @@ static int nv_common_sw_init(void *handle)
return 0;
}
-static int nv_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int nv_common_hw_init(void *handle)
+static int nv_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->nbio.funcs->apply_lc_spc_mode_wa)
adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
@@ -1021,9 +1010,9 @@ static int nv_common_hw_init(void *handle)
return 0;
}
-static int nv_common_hw_fini(void *handle)
+static int nv_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because nv_enable_doorbell_aperture
@@ -1036,44 +1025,30 @@ static int nv_common_hw_fini(void *handle)
return 0;
}
-static int nv_common_suspend(void *handle)
+static int nv_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_fini(adev);
+ return nv_common_hw_fini(ip_block);
}
-static int nv_common_resume(void *handle)
+static int nv_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_init(adev);
+ return nv_common_hw_init(ip_block);
}
-static bool nv_common_is_idle(void *handle)
+static bool nv_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int nv_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int nv_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int nv_common_set_clockgating_state(void *handle,
+static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 3, 1):
case IP_VERSION(2, 3, 2):
@@ -1096,16 +1071,16 @@ static int nv_common_set_clockgating_state(void *handle,
return 0;
}
-static int nv_common_set_powergating_state(void *handle,
+static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* TODO */
return 0;
}
-static void nv_common_get_clockgating_state(void *handle, u64 *flags)
+static void nv_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
*flags = 0;
@@ -1115,8 +1090,6 @@ static void nv_common_get_clockgating_state(void *handle, u64 *flags)
adev->hdp.funcs->get_clock_gating_state(adev, flags);
adev->smuio.funcs->get_clock_gating_state(adev, flags);
-
- return;
}
static const struct amd_ip_funcs nv_common_ip_funcs = {
@@ -1124,14 +1097,11 @@ static const struct amd_ip_funcs nv_common_ip_funcs = {
.early_init = nv_common_early_init,
.late_init = nv_common_late_init,
.sw_init = nv_common_sw_init,
- .sw_fini = nv_common_sw_fini,
.hw_init = nv_common_hw_init,
.hw_fini = nv_common_hw_fini,
.suspend = nv_common_suspend,
.resume = nv_common_resume,
.is_idle = nv_common_is_idle,
- .wait_for_idle = nv_common_wait_for_idle,
- .soft_reset = nv_common_soft_reset,
.set_clockgating_state = nv_common_set_clockgating_state,
.set_powergating_state = nv_common_set_powergating_state,
.get_clockgating_state = nv_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
index 83e9782aef39..8f4817404f10 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -31,5 +31,6 @@ extern const struct amdgpu_ip_block_version nv_common_ip_block;
void nv_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
void nv_set_virt_ops(struct amdgpu_device *adev);
+int cyan_skillfish_reg_base_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index 631dafb92299..56f1bfac0b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -64,6 +64,24 @@
#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
+#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2
+#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
@@ -105,6 +123,38 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_WRITE_DATA__TEMPORAL__RT 0
+#define PACKET3_WRITE_DATA__TEMPORAL__NT 1
+#define PACKET3_WRITE_DATA__TEMPORAL__HT 2
+#define PACKET3_WRITE_DATA__TEMPORAL__LU 3
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -135,6 +185,42 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -144,8 +230,94 @@
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23)
+#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_CP_DMA 0x41
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_SURFACE_SYNC 0x43
@@ -160,6 +332,23 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x))
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3
#define PACKET3_EVENT_WRITE_EOP 0x47
#define PACKET3_EVENT_WRITE_EOS 0x48
#define PACKET3_RELEASE_MEM 0x49
@@ -304,6 +493,12 @@
* 2: REVERSE
*/
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0)
#define PACKET3_REWIND 0x59
#define PACKET3_INTERRUPT 0x5A
#define PACKET3_GEN_PDEPTE 0x5B
@@ -330,11 +525,17 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
+#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0
+#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
#define PACKET3_FORWARD_HEADER 0x7C
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
@@ -369,6 +570,7 @@
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_AQL_PACKET 0x99
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
#define PACKET3_SET_SH_REG_INDEX 0x9B
@@ -462,6 +664,12 @@
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
#define PACKET3_RUN_LIST 0xA5
#define PACKET3_MAP_PROCESS_VM 0xA6
+
+#define PACKET3_RUN_CLEANER_SHADER 0xD2
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
/* GFX11 */
#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 18917df785ec..73f87131a7e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -103,6 +103,12 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */
GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */
+ /*IDs of performance monitoring/profiling*/
+ GFX_CMD_ID_CONFIG_SQ_PERFMON = 0x00000046, /* Config CGTT_SQ_CLK_CTRL */
+ /* Dynamic memory partitioninig (NPS mode change)*/
+ GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */
+ GFX_CMD_ID_FB_FW_RESERV_ADDR = 0x00000050, /* Query FW reservation addr */
+ GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR = 0x00000051, /* Query FW reservation extended addr */
};
/* PSP boot config sub-commands */
@@ -293,6 +299,11 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */
GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */
GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */
+ GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */
+ GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */
+ GFX_FW_TYPE_VPE = 102,
+ GFX_FW_TYPE_JPEG_RAM = 128, /**< JPEG Command buffer */
+ GFX_FW_TYPE_P2S_TABLE = 129,
GFX_FW_TYPE_MAX
};
@@ -346,6 +357,20 @@ struct psp_gfx_cmd_sriov_spatial_part {
uint32_t override_this_aid;
};
+/*Structure for sq performance monitoring/profiling enable/disable*/
+struct psp_gfx_cmd_config_sq_perfmon {
+ uint32_t gfx_xcp_mask;
+ uint8_t core_override;
+ uint8_t reg_override;
+ uint8_t perfmon_override;
+ uint8_t reserved[5];
+};
+
+struct psp_gfx_cmd_fb_memory_part {
+ uint32_t mode; /* requested NPS mode */
+ uint32_t resvd;
+};
+
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@@ -360,6 +385,8 @@ union psp_gfx_commands
struct psp_gfx_cmd_load_toc cmd_load_toc;
struct psp_gfx_cmd_boot_cfg boot_cfg;
struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part;
+ struct psp_gfx_cmd_config_sq_perfmon config_sq_perfmon;
+ struct psp_gfx_cmd_fb_memory_part cmd_memory_part;
};
struct psp_gfx_uresp_reserved
@@ -379,11 +406,19 @@ struct psp_gfx_uresp_bootcfg {
uint32_t boot_cfg; /* boot config data */
};
+/* Command-specific response for fw reserve info */
+struct psp_gfx_uresp_fw_reserve_info {
+ uint32_t reserve_base_address_hi;
+ uint32_t reserve_base_address_lo;
+ uint32_t reserve_size;
+};
+
/* Union of command-specific responses for GPCOM ring. */
union psp_gfx_uresp {
struct psp_gfx_uresp_reserved reserved;
struct psp_gfx_uresp_bootcfg boot_cfg;
struct psp_gfx_uresp_fwar_db_info fwar_db_info;
+ struct psp_gfx_uresp_fw_reserve_info fw_reserve_info;
};
/* Structure of GFX Response buffer.
@@ -459,8 +494,9 @@ struct psp_gfx_rb_frame
#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
enum tee_error_code {
- TEE_SUCCESS = 0x00000000,
- TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_CANCEL = 0xFFFF0002,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 5f10883da6a2..3584b8c18fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -58,9 +58,10 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
return err;
err = psp_init_ta_microcode(psp, ucode_prefix);
- if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 1, 0)) &&
- (adev->pdev->revision == 0xa1) &&
- (psp->securedisplay_context.context.bin_desc.fw_version >= 0x27000008)) {
+ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) &&
+ (adev->pdev->revision == 0xa1) &&
+ (psp->securedisplay_context.context.bin_desc.fw_version >=
+ 0x27000008)) {
adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
}
return err;
@@ -93,7 +94,7 @@ static int psp_v10_0_ring_create(struct psp_context *psp,
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -114,7 +115,7 @@ static int psp_v10_0_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) in C2PMSG_64 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 8f84fe40abbb..64b240b51f1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -95,7 +95,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 4):
err = psp_init_sos_microcode(psp, ucode_prefix);
@@ -129,6 +129,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
err = psp_init_ta_microcode(psp, ucode_prefix);
break;
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
@@ -148,14 +149,12 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
int ret;
int retry_loop;
- for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ for (retry_loop = 0; retry_loop < 20; retry_loop++) {
/* Wait for bootloader to signify that is
ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp,
- SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000,
- 0x80000000,
- false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
return 0;
@@ -251,8 +250,8 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
@@ -276,11 +275,13 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -316,13 +317,15 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
return ret;
@@ -346,8 +349,9 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -380,7 +384,8 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG,
+ MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -392,17 +397,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
msleep(500);
- offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
-
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
-
- if (ret) {
- DRM_INFO("psp mode 1 reset failed!\n");
- return -EINVAL;
- }
-
- DRM_INFO("psp mode1 reset succeed \n");
-
return 0;
}
@@ -420,8 +414,9 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
for (i = 0; i < max_wait; i++) {
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
break;
}
@@ -506,7 +501,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
* before training, and restore it after training to avoid
* VRAM corruption.
*/
- sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
@@ -532,7 +527,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {
@@ -600,7 +595,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -637,7 +632,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (!ret)
*fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36);
@@ -658,7 +653,8 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_get_wptr = psp_v11_0_ring_get_wptr,
.ring_set_wptr = psp_v11_0_ring_set_wptr,
.load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw,
- .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw
+ .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw,
+ .wait_for_bootloader = psp_v11_0_wait_for_bootloader
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
index 5697760a819b..93787a90d598 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
@@ -41,8 +41,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Write the ring destroy command*/
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
@@ -50,8 +51,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -87,13 +89,15 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
return ret;
@@ -117,8 +121,9 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index fcd708eae75c..4c6450d62299 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -34,9 +34,6 @@
#include "sdma0/sdma0_4_0_offset.h"
#include "nbio/nbio_7_4_offset.h"
-#include "oss/osssys_4_0_offset.h"
-#include "oss/osssys_4_0_sh_mask.h"
-
MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
@@ -85,7 +82,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -99,11 +96,8 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -124,7 +118,7 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -138,46 +132,13 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
-static void psp_v12_0_reroute_ih(struct psp_context *psp)
-{
- struct amdgpu_device *adev = psp->adev;
- uint32_t tmp;
-
- /* Change IH ring for VMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-
- /* Change IH ring for UMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-}
-
static int psp_v12_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -186,49 +147,23 @@ static int psp_v12_0_ring_create(struct psp_context *psp,
struct psp_ring *ring = &psp->km_ring;
struct amdgpu_device *adev = psp->adev;
- psp_v12_0_reroute_ih(psp);
-
- if (amdgpu_sriov_vf(psp->adev)) {
- /* Write low address of the ring to C2PMSG_102 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_103 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
-
- /* Write the ring initialization command to C2PMSG_101 */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
-
- } else {
- /* Write low address of the ring to C2PMSG_69 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_70 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
- /* Write size of ring to C2PMSG_71 */
- psp_ring_reg = ring->ring_size;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
- /* Write the ring initialization command to C2PMSG_64 */
- psp_ring_reg = ring_type;
- psp_ring_reg = psp_ring_reg << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
- }
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -247,16 +182,15 @@ static int psp_v12_0_ring_stop(struct psp_context *psp,
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
GFX_CTRL_CMD_ID_DESTROY_RINGS);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
return ret;
}
@@ -287,7 +221,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG,
+ MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -301,7 +236,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK,
+ 0);
if (ret) {
DRM_INFO("psp mode 1 reset failed!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 469eed084976..af4a7d7c4abd 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -27,6 +27,7 @@
#include "amdgpu_ucode.h"
#include "soc15_common.h"
#include "psp_v13_0.h"
+#include "amdgpu_ras.h"
#include "mp/mp_13_0_2_offset.h"
#include "mp/mp_13_0_2_sh_mask.h"
@@ -41,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
@@ -50,8 +53,16 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@@ -59,18 +70,16 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin");
/* Read USB-PD from LFB */
#define GFX_CMD_USB_PD_USE_LFB 0x480
-/* VBIOS gfl defines */
-#define MBOX_READY_MASK 0x80000000
-#define MBOX_STATUS_MASK 0x0000FFFF
-#define MBOX_COMMAND_MASK 0x00FF0000
-#define MBOX_READY_FLAG 0x80000000
-#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
-#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
-#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+/* Retry times for vmbx ready wait */
+#define PSP_VMBX_POLLING_LIMIT 3000
/* memory training timeout define */
#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+#define regMP1_PUB_SCRATCH0 0x3b10090
+
+#define PSP13_BL_STATUS_SIZE 100
+
static int psp_v13_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -79,7 +88,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
@@ -97,6 +106,8 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 11):
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
err = psp_init_toc_microcode(psp, ucode_prefix);
if (err)
return err;
@@ -108,6 +119,8 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
return err;
@@ -133,17 +146,43 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
return sol_reg != 0x0;
}
+static void psp_v13_0_bootloader_print_status(struct psp_context *psp,
+ const char *msg)
+{
+ struct amdgpu_device *adev = psp->adev;
+ u32 bl_status_reg;
+ char bl_status_msg[PSP13_BL_STATUS_SIZE];
+ int i, at;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ at = 0;
+ for_each_inst(i, adev->aid_mask) {
+ bl_status_reg =
+ (SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92)
+ << 2) +
+ adev->asic_funcs->encode_ext_smn_addressing(i);
+ at += snprintf(bl_status_msg + at,
+ PSP13_BL_STATUS_SIZE - at,
+ " status(%02i): 0x%08x", i,
+ RREG32_PCIE_EXT(bl_status_reg));
+ }
+ dev_info(adev->dev, "%s - %s", msg, bl_status_msg);
+ }
+}
+
static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
int retry_loop, ret;
- for (retry_loop = 0; retry_loop < 70; retry_loop++) {
+ for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) {
/* Wait for bootloader to signify that is
ready having bit 31 of C2PMSG_33 set to 1 */
ret = psp_wait_for(
psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
- 0x80000000, 0xffffffff, false);
+ 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
break;
@@ -158,20 +197,29 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- int retry_loop, ret;
-
+ int retry_loop, retry_cnt, ret;
+
+ retry_cnt =
+ ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ?
+ PSP_VMBX_POLLING_LIMIT :
+ 10;
/* Wait for bootloader to signify that it is ready having bit 31 of
* C2PMSG_35 set to 1. All other bits are expected to be cleared.
* If there is an error in processing command, bits[7:0] will be set.
* This is applicable for PSP v13.0.6 and newer.
*/
- for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) {
ret = psp_wait_for(
psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000, 0xffffffff, false);
+ 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
return 0;
+ if (retry_loop && !(retry_loop % 10))
+ psp_v13_0_bootloader_print_status(
+ psp, "Waiting for bootloader completion");
}
return ret;
@@ -180,11 +228,20 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ ret = psp_v13_0_wait_for_vmbx_ready(psp);
+ if (ret)
+ amdgpu_ras_query_boot_status(adev, 4);
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) {
- psp_v13_0_wait_for_vmbx_ready(psp);
+ ret = psp_v13_0_wait_for_bootloader(psp);
+ if (ret)
+ amdgpu_ras_query_boot_status(adev, 4);
- return psp_v13_0_wait_for_bootloader(psp);
+ return ret;
}
return 0;
@@ -260,6 +317,17 @@ static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp)
return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
}
+static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV);
+}
+
+static inline void psp_v13_0_init_sos_version(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ psp->sos.fw_version = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_58);
+}
static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
{
@@ -270,8 +338,10 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- if (psp_v13_0_is_sos_alive(psp))
+ if (psp_v13_0_is_sos_alive(psp)) {
+ psp_v13_0_init_sos_version(psp);
return 0;
+ }
ret = psp_v13_0_wait_for_bootloader(psp);
if (ret)
@@ -292,8 +362,11 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
+
+ if (!ret)
+ psp_v13_0_init_sos_version(psp);
return ret;
}
@@ -311,8 +384,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Write the ring destroy command*/
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
@@ -320,8 +394,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -357,13 +432,15 @@ static int psp_v13_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
return ret;
@@ -387,8 +464,9 @@ static int psp_v13_0_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -451,8 +529,9 @@ static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg)
max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
for (i = 0; i < max_wait; i++) {
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
break;
}
@@ -535,7 +614,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
* before training, and restore it after training to avoid
* VRAM corruption.
*/
- sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
@@ -561,7 +640,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {
@@ -604,7 +683,7 @@ static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -641,7 +720,7 @@ static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (!ret)
*fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36);
@@ -661,12 +740,14 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd)
/* Ring the doorbell */
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1);
- if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE ||
+ cmd == C2PMSG_CMD_SPI_GET_FLASH_IMAGE)
ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
- MBOX_READY_FLAG, MBOX_READY_MASK, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
if (ret) {
dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
return ret;
@@ -690,7 +771,7 @@ static int psp_v13_0_update_spirom(struct psp_context *psp,
/* Confirm PSP is ready to start */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
- MBOX_READY_FLAG, MBOX_READY_MASK, false);
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
if (ret) {
dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
return ret;
@@ -717,6 +798,37 @@ static int psp_v13_0_update_spirom(struct psp_context *psp,
return 0;
}
+static int psp_v13_0_dump_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_FLASH_IMAGE);
+
+ return ret;
+}
+
static int psp_v13_0_vbflash_status(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -728,7 +840,7 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
- if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 10)) {
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 10)) {
uint32_t reg_data;
/* MP1 fatal error: trigger PSP dram read to unhalt PSP
* during MP1 triggered sync flood.
@@ -745,6 +857,90 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
return 0;
}
+static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ u32 reg_data;
+
+ /* query ras cap should be done from host side */
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
+ if (!con)
+ return false;
+
+ if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) &&
+ (!(adev->flags & AMD_IS_APU))) {
+ reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127);
+ adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0));
+ con->poison_supported = ((reg_data & GENMASK_ULL(24, 24)) >> 24) ? true : false;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ u32 pmfw_ver;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+ return false;
+
+ /* load 4e version of sos if pmfw version less than 85.115.0 */
+ pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4);
+
+ return (pmfw_ver < 0x557300);
+}
+
+static bool psp_v13_0_is_reload_needed(struct psp_context *psp)
+{
+ uint32_t ucode_ver;
+
+ if (!psp_v13_0_is_sos_alive(psp))
+ return false;
+
+ /* Restrict reload support only to specific IP versions */
+ switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ /* TOS version read from microcode header */
+ ucode_ver = psp->sos.fw_version;
+ /* Read TOS version from hardware */
+ psp_v13_0_init_sos_version(psp);
+ return (ucode_ver != psp->sos.fw_version);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret = -EOPNOTSUPP;
+
+ /* PSP will broadcast the value to all instances */
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val);
+
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, 0);
+ }
+
+ return ret;
+}
+
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -755,6 +951,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
.bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
.bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
+ .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv,
.bootloader_load_sos = psp_v13_0_bootloader_load_sos,
.ring_create = psp_v13_0_ring_create,
.ring_stop = psp_v13_0_ring_stop,
@@ -765,8 +962,13 @@ static const struct psp_funcs psp_v13_0_funcs = {
.load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw,
.read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw,
.update_spirom = psp_v13_0_update_spirom,
+ .dump_spirom = psp_v13_0_dump_spirom,
.vbflash_stat = psp_v13_0_vbflash_status,
.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
+ .get_ras_capability = psp_v13_0_get_ras_capability,
+ .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
+ .is_reload_needed = psp_v13_0_is_reload_needed,
+ .reg_program_no_ring = psp_v13_0_reg_program_no_ring,
};
void psp_v13_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
index d5ba58eba3e2..5f39a2edcc95 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
@@ -40,7 +40,7 @@ static int psp_v13_0_4_init_microcode(struct psp_context *psp)
amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
- switch (adev->ip_versions[MP0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 4):
err = psp_init_toc_microcode(psp, ucode_prefix);
if (err)
@@ -76,11 +76,9 @@ static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp)
for (retry_loop = 0; retry_loop < 10; retry_loop++) {
/* Wait for bootloader to signify that is
ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp,
- SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000,
- 0x80000000,
- false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
return 0;
@@ -185,8 +183,8 @@ static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
@@ -204,8 +202,9 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Write the ring destroy command*/
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
@@ -213,8 +212,9 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp,
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
/* Wait for response flag (bit 31) */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
@@ -250,13 +250,15 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
} else {
/* Wait for sOS ready for ring creation */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
if (ret) {
DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
return ret;
@@ -280,8 +282,9 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
}
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
new file mode 100644
index 000000000000..38dfc5c19f2a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v14_0.h"
+
+#include "mp/mp_14_0_2_offset.h"
+#include "mp/mp_14_0_2_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta_kicker.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin");
+
+/* For large FW files the time to complete can be very long */
+#define USBC_PD_POLLING_LIMIT_S 240
+
+/* Read USB-PD from LFB */
+#define GFX_CMD_USB_PD_USE_LFB 0x480
+
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
+static int psp_v14_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ char ucode_prefix[30];
+ int err = 0;
+
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ err = psp_init_sos_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ case IP_VERSION(14, 0, 5):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v14_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v14_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v14_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v14_0_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v14_0_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->spl, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v14_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v14_0_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ /* dbg_drv was renamed to had_drv in psp v14 */
+ return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV);
+}
+
+static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
+}
+
+static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV);
+}
+
+static int psp_v14_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v14_0_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v14_0_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
+
+ return ret;
+}
+
+static int psp_v14_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v14_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64),
+ MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0);
+ }
+
+ return ret;
+}
+
+static int psp_v14_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v14_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v14_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v14_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value);
+}
+
+static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+
+static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ dev_dbg(adev->dev, "Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ dev_err(adev->dev, "Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v14_0_is_sos_alive(psp)) {
+ dev_dbg(adev->dev, "SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ dev_dbg(adev->dev, "Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ dev_dbg(adev->dev, "Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = BIST_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ dev_err(adev->dev, "failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v14_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ amdgpu_device_flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v14_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ dev_err(adev->dev, "send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t reg_status;
+ int ret, i = 0;
+
+ /*
+ * LFB address which is aligned to 1MB address and has to be
+ * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P
+ * register
+ */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20));
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (ret)
+ return ret;
+
+ /* Fireup interrupt so PSP can pick up the address */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16));
+
+ /* FW load takes very long time */
+ do {
+ msleep(1000);
+ reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35);
+
+ if (reg_status & 0x80000000)
+ goto done;
+
+ } while (++i < USBC_PD_POLLING_LIMIT_S);
+
+ return -ETIME;
+done:
+
+ if ((reg_status & 0xFFFF) != 0) {
+ DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n",
+ reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER);
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, 0);
+ if (!ret)
+ *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36);
+
+ return ret;
+}
+
+static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd)
+{
+ uint32_t reg_status = 0, reg_val = 0;
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */
+ reg_val |= (cmd << 16);
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115, reg_val);
+
+ /* Ring the doorbell */
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_73, 1);
+
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
+ ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
+ else
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret);
+ return ret;
+ }
+
+ reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115);
+ if ((reg_status & 0xFFFF) != 0) {
+ dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n",
+ cmd, reg_status & 0xFFFF);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int psp_v14_0_update_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, 0);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ psp->vbflash_done = true;
+
+ ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_v14_0_vbflash_status(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115);
+}
+
+static const struct psp_funcs psp_v14_0_funcs = {
+ .init_microcode = psp_v14_0_init_microcode,
+ .bootloader_load_kdb = psp_v14_0_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v14_0_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v14_0_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v14_0_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv,
+ .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv,
+ .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv,
+ .bootloader_load_sos = psp_v14_0_bootloader_load_sos,
+ .ring_create = psp_v14_0_ring_create,
+ .ring_stop = psp_v14_0_ring_stop,
+ .ring_destroy = psp_v14_0_ring_destroy,
+ .ring_get_wptr = psp_v14_0_ring_get_wptr,
+ .ring_set_wptr = psp_v14_0_ring_set_wptr,
+ .mem_training = psp_v14_0_memory_training,
+ .load_usbc_pd_fw = psp_v14_0_load_usbc_pd_fw,
+ .read_usbc_pd_fw = psp_v14_0_read_usbc_pd_fw,
+ .update_spirom = psp_v14_0_update_spirom,
+ .vbflash_stat = psp_v14_0_vbflash_status
+};
+
+void psp_v14_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v14_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h
new file mode 100644
index 000000000000..dd18ba2cfad5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V14_0_H__
+#define __PSP_V14_0_H__
+
+#include "amdgpu_psp.h"
+
+#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */
+
+void psp_v14_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index f6b75e3e47ff..833830bc3e2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -91,7 +91,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -109,7 +109,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -130,7 +130,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ 0x80000000, 0x80000000, 0);
if (ret)
return ret;
@@ -147,8 +147,8 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
- RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
- 0, true);
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0,
+ PSP_WAITREG_CHANGED);
return ret;
}
@@ -168,7 +168,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp)
mdelay(20);
psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ 0x80000000, 0x8000FFFF, 0);
/* Change IH ring for UMC */
tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
@@ -180,7 +180,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp)
mdelay(20);
psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ 0x80000000, 0x8000FFFF, 0);
}
static int psp_v3_1_ring_create(struct psp_context *psp,
@@ -217,9 +217,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
- mmMP0_SMN_C2PMSG_101), 0x80000000,
- 0x8000FFFF, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, 0);
} else {
/* Write low address of the ring to C2PMSG_69 */
@@ -240,10 +240,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
mdelay(20);
/* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
- mmMP0_SMN_C2PMSG_64), 0x80000000,
- 0x8000FFFF, false);
-
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, 0);
}
return ret;
}
@@ -267,11 +266,13 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, 0);
else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, 0);
return ret;
}
@@ -311,7 +312,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0);
if (ret) {
DRM_INFO("psp is not working correctly before mode1 reset!\n");
@@ -325,7 +326,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
- ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0);
if (ret) {
DRM_INFO("psp mode 1 reset failed!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 51afc92994a8..92ce580647cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -57,22 +57,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/topaz_sdma.bin");
MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin");
-static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
-{
+static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = {
SDMA0_REGISTER_OFFSET,
SDMA1_REGISTER_OFFSET
};
-static const u32 golden_settings_iceland_a11[] =
-{
+static const u32 golden_settings_iceland_a11[] = {
mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
};
-static const u32 iceland_mgcg_cgcg_init[] =
-{
+static const u32 iceland_mgcg_cgcg_init[] = {
mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
};
@@ -130,7 +127,6 @@ static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -142,15 +138,19 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
case CHIP_TOPAZ:
chip_name = "topaz";
break;
- default: BUG();
+ default:
+ BUG();
}
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
@@ -171,7 +171,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name);
+ pr_err("sdma_v2_4: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
@@ -339,8 +340,6 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -474,9 +473,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -637,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -813,12 +809,17 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v2_4_early_init(void *handle)
+static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+ r = sdma_v2_4_init_microcode(adev);
+ if (r)
+ return r;
+
sdma_v2_4_set_ring_funcs(adev);
sdma_v2_4_set_buffer_funcs(adev);
sdma_v2_4_set_vm_pte_funcs(adev);
@@ -827,11 +828,11 @@ static int sdma_v2_4_early_init(void *handle)
return 0;
}
-static int sdma_v2_4_sw_init(void *handle)
+static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -851,12 +852,6 @@ static int sdma_v2_4_sw_init(void *handle)
if (r)
return r;
- r = sdma_v2_4_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -873,9 +868,9 @@ static int sdma_v2_4_sw_init(void *handle)
return r;
}
-static int sdma_v2_4_sw_fini(void *handle)
+static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -885,10 +880,10 @@ static int sdma_v2_4_sw_fini(void *handle)
return 0;
}
-static int sdma_v2_4_hw_init(void *handle)
+static int sdma_v2_4_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v2_4_init_golden_registers(adev);
@@ -899,32 +894,26 @@ static int sdma_v2_4_hw_init(void *handle)
return r;
}
-static int sdma_v2_4_hw_fini(void *handle)
+static int sdma_v2_4_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- sdma_v2_4_enable(adev, false);
+ sdma_v2_4_enable(ip_block->adev, false);
return 0;
}
-static int sdma_v2_4_suspend(void *handle)
+static int sdma_v2_4_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_fini(adev);
+ return sdma_v2_4_hw_fini(ip_block);
}
-static int sdma_v2_4_resume(void *handle)
+static int sdma_v2_4_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_init(adev);
+ return sdma_v2_4_hw_init(ip_block);
}
-static bool sdma_v2_4_is_idle(void *handle)
+static bool sdma_v2_4_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -934,11 +923,11 @@ static bool sdma_v2_4_is_idle(void *handle)
return true;
}
-static int sdma_v2_4_wait_for_idle(void *handle)
+static int sdma_v2_4_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -951,10 +940,10 @@ static int sdma_v2_4_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v2_4_soft_reset(void *handle)
+static int sdma_v2_4_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
@@ -1093,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v2_4_set_clockgating_state(void *handle,
+static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* XXX handled via the smc on VI */
return 0;
}
-static int sdma_v2_4_set_powergating_state(void *handle,
+static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1109,7 +1098,6 @@ static int sdma_v2_4_set_powergating_state(void *handle,
static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
.name = "sdma_v2_4",
.early_init = sdma_v2_4_early_init,
- .late_init = NULL,
.sw_init = sdma_v2_4_sw_init,
.sw_fini = sdma_v2_4_sw_fini,
.hw_init = sdma_v2_4_hw_init,
@@ -1184,7 +1172,7 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1194,7 +1182,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
@@ -1264,8 +1252,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
-const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
-{
+const struct amdgpu_ip_block_version sdma_v2_4_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 2,
.minor = 4,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 344202870aeb..1c076bd1cf73 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -267,7 +267,6 @@ static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
- char fw_name[30];
int err = 0, i;
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
@@ -305,10 +304,13 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
@@ -327,7 +329,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
}
out:
if (err) {
- pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name);
+ pr_err("sdma_v3_0: Failed to load firmware \"%s_sdma%s.bin\"\n",
+ chip_name, i == 0 ? "" : "1");
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ucode_release(&adev->sdma.instance[i].fw);
}
@@ -513,8 +516,6 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
@@ -711,7 +712,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
if (ring->use_pollmem) {
- /*wptr polling is not enogh fast, directly clean the wptr register */
+ /*wptr polling is not enough fast, directly clean the wptr register */
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,
@@ -746,9 +747,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -908,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1084,9 +1082,10 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v3_0_early_init(void *handle)
+static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
switch (adev->asic_type) {
case CHIP_STONEY:
@@ -1097,6 +1096,10 @@ static int sdma_v3_0_early_init(void *handle)
break;
}
+ r = sdma_v3_0_init_microcode(adev);
+ if (r)
+ return r;
+
sdma_v3_0_set_ring_funcs(adev);
sdma_v3_0_set_buffer_funcs(adev);
sdma_v3_0_set_vm_pte_funcs(adev);
@@ -1105,11 +1108,11 @@ static int sdma_v3_0_early_init(void *handle)
return 0;
}
-static int sdma_v3_0_sw_init(void *handle)
+static int sdma_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -1129,12 +1132,6 @@ static int sdma_v3_0_sw_init(void *handle)
if (r)
return r;
- r = sdma_v3_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -1157,9 +1154,9 @@ static int sdma_v3_0_sw_init(void *handle)
return r;
}
-static int sdma_v3_0_sw_fini(void *handle)
+static int sdma_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -1169,10 +1166,10 @@ static int sdma_v3_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_hw_init(void *handle)
+static int sdma_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_init_golden_registers(adev);
@@ -1183,9 +1180,9 @@ static int sdma_v3_0_hw_init(void *handle)
return r;
}
-static int sdma_v3_0_hw_fini(void *handle)
+static int sdma_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_ctx_switch_enable(adev, false);
sdma_v3_0_enable(adev, false);
@@ -1193,23 +1190,19 @@ static int sdma_v3_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_suspend(void *handle)
+static int sdma_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_fini(adev);
+ return sdma_v3_0_hw_fini(ip_block);
}
-static int sdma_v3_0_resume(void *handle)
+static int sdma_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_init(adev);
+ return sdma_v3_0_hw_init(ip_block);
}
-static bool sdma_v3_0_is_idle(void *handle)
+static bool sdma_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1219,11 +1212,11 @@ static bool sdma_v3_0_is_idle(void *handle)
return true;
}
-static int sdma_v3_0_wait_for_idle(void *handle)
+static int sdma_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1236,9 +1229,9 @@ static int sdma_v3_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool sdma_v3_0_check_soft_reset(void *handle)
+static bool sdma_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS2);
@@ -1257,9 +1250,9 @@ static bool sdma_v3_0_check_soft_reset(void *handle)
}
}
-static int sdma_v3_0_pre_soft_reset(void *handle)
+static int sdma_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1276,9 +1269,9 @@ static int sdma_v3_0_pre_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_post_soft_reset(void *handle)
+static int sdma_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1295,9 +1288,9 @@ static int sdma_v3_0_post_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_soft_reset(void *handle)
+static int sdma_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp;
@@ -1492,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep(
}
}
-static int sdma_v3_0_set_clockgating_state(void *handle,
+static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1515,15 +1508,15 @@ static int sdma_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v3_0_set_powergating_state(void *handle,
+static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1543,7 +1536,6 @@ static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags)
static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
.name = "sdma_v3_0",
.early_init = sdma_v3_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v3_0_sw_init,
.sw_fini = sdma_v3_0_sw_fini,
.hw_init = sdma_v3_0_hw_init,
@@ -1622,7 +1614,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1632,7 +1624,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index cd37f45e01a1..f38004e6064e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -72,6 +72,53 @@ MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");
MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin");
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL)
+};
+
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
@@ -469,7 +516,7 @@ static int sdma_v4_0_irq_id_to_seq(unsigned client_id)
static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
soc15_program_register_sequence(adev,
golden_settings_sdma_4,
@@ -539,7 +586,7 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev)
* The only chips with SDMAv4 and ULV are VG10 and VG20.
* Server SKUs take a different hysteresis setting from other SKUs.
*/
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
if (adev->pdev->device == 0x6860)
break;
@@ -578,8 +625,10 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
int ret, i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
- adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) {
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 0)) {
/* Acturus & Aldebaran will leverage the same FW memory
for every SDMA instance */
ret = amdgpu_sdma_init_microcode(adev, 0, true);
@@ -875,8 +924,6 @@ static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
@@ -911,8 +958,6 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
@@ -978,7 +1023,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
* Arcturus for the moment and firmware version 14
* and above.
*/
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) &&
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
adev->sdma.instance[i].fw_version >= 14)
WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
/* Extend page fault timeout to avoid interrupt storm */
@@ -1255,7 +1301,7 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
return;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
case IP_VERSION(4, 1, 1):
case IP_VERSION(4, 1, 2):
@@ -1399,13 +1445,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == page)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return r;
@@ -1525,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1698,7 +1738,7 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
{
uint fw_version = adev->sdma.instance[0].fw_version;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
return fw_version >= 430;
case IP_VERSION(4, 0, 1):
@@ -1711,19 +1751,17 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
}
}
-static int sdma_v4_0_early_init(void *handle)
+static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = sdma_v4_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
+ if (r)
return r;
- }
/* TODO: Page queue breaks driver reload under SRIOV */
- if ((adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 0, 0)) &&
+ if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 0, 0)) &&
amdgpu_sriov_vf((adev)))
adev->sdma.has_page_queue = false;
else if (sdma_v4_0_fw_support_paging_queue(adev))
@@ -1742,26 +1780,25 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
-static int sdma_v4_0_late_init(void *handle)
+static int sdma_v4_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v4_0_setup_ulv(adev);
- if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
- if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
- adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
- adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
- }
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
return 0;
}
-static int sdma_v4_0_sw_init(void *handle)
+static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+ uint32_t *ptr;
/* SDMA trap event */
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1823,7 +1860,9 @@ static int sdma_v4_0_sw_init(void *handle)
* On Arcturus, SDMA instance 5~7 has a different vmhub
* type(AMDGPU_MMHUB1).
*/
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
+ i >= 5)
ring->vm_hub = AMDGPU_MMHUB1(0);
else
ring->vm_hub = AMDGPU_MMHUB0(0);
@@ -1843,8 +1882,10 @@ static int sdma_v4_0_sw_init(void *handle)
/* paging queue use same doorbell index/routing as gfx queue
* with 0x400 (4096 dwords) offset on second doorbell page
*/
- if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
- adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) {
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(4, 0, 0) &&
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) <
+ IP_VERSION(4, 2, 0)) {
ring->doorbell_index =
adev->doorbell_index.sdma_engine[i] << 1;
ring->doorbell_index += 0x400;
@@ -1856,7 +1897,9 @@ static int sdma_v4_0_sw_init(void *handle)
(adev->doorbell_index.sdma_engine[i] + 1) << 1;
}
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5)
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 2) &&
+ i >= 5)
ring->vm_hub = AMDGPU_MMHUB1(0);
else
ring->vm_hub = AMDGPU_MMHUB0(0);
@@ -1876,12 +1919,19 @@ static int sdma_v4_0_sw_init(void *handle)
return -EINVAL;
}
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
return r;
}
-static int sdma_v4_0_sw_fini(void *handle)
+static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1890,21 +1940,23 @@ static int sdma_v4_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) ||
- adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0))
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 2, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 0))
amdgpu_sdma_destroy_inst_ctx(adev, true);
else
amdgpu_sdma_destroy_inst_ctx(adev, false);
+ kfree(adev->sdma.ip_dump);
+
return 0;
}
-static int sdma_v4_0_hw_init(void *handle)
+static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0);
if (!amdgpu_sriov_vf(adev))
sdma_v4_0_init_golden_registers(adev);
@@ -1912,16 +1964,13 @@ static int sdma_v4_0_hw_init(void *handle)
return sdma_v4_0_start(adev);
}
-static int sdma_v4_0_hw_fini(void *handle)
+static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1934,14 +1983,14 @@ static int sdma_v4_0_hw_fini(void *handle)
sdma_v4_0_enable(adev, false);
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0);
return 0;
}
-static int sdma_v4_0_suspend(void *handle)
+static int sdma_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SMU saves SDMA state for us */
if (adev->in_s0ix) {
@@ -1949,27 +1998,26 @@ static int sdma_v4_0_suspend(void *handle)
return 0;
}
- return sdma_v4_0_hw_fini(adev);
+ return sdma_v4_0_hw_fini(ip_block);
}
-static int sdma_v4_0_resume(void *handle)
+static int sdma_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SMU restores SDMA state for us */
if (adev->in_s0ix) {
sdma_v4_0_enable(adev, true);
sdma_v4_0_gfx_enable(adev, true);
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
return 0;
}
- return sdma_v4_0_hw_init(adev);
+ return sdma_v4_0_hw_init(ip_block);
}
-static bool sdma_v4_0_is_idle(void *handle)
+static bool sdma_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1982,11 +2030,11 @@ static bool sdma_v4_0_is_idle(void *handle)
return true;
}
-static int sdma_v4_0_wait_for_idle(void *handle)
+static int sdma_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i, j;
u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
for (j = 0; j < adev->sdma.num_instances; j++) {
@@ -2001,7 +2049,7 @@ static int sdma_v4_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v4_0_soft_reset(void *handle)
+static int sdma_v4_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -2027,23 +2075,28 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- uint32_t instance;
+ int instance;
DRM_DEBUG("IH: SDMA trap\n");
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0)
+ return instance;
+
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[instance].ring);
break;
case 1:
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0))
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 2, 0))
amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
case 2:
/* XXX compute */
break;
case 3:
- if (adev->ip_versions[SDMA0_HWIP][0] != IP_VERSION(4, 2, 0))
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) !=
+ IP_VERSION(4, 2, 0))
amdgpu_fence_process(&adev->sdma.instance[instance].page);
break;
}
@@ -2112,7 +2165,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instance;
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
u64 addr;
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
@@ -2124,15 +2177,20 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_dbg_ratelimited(adev->dev,
- "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
- "pasid:%u, for process %s pid %d thread %s pid %d\n",
- instance, addr, entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ dev_dbg_ratelimited(adev->dev,
+ " for process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
return 0;
}
@@ -2239,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
}
}
-static int sdma_v4_0_set_clockgating_state(void *handle,
+static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -2254,12 +2312,12 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v4_0_set_powergating_state(void *handle,
+static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
case IP_VERSION(4, 1, 1):
case IP_VERSION(4, 1, 2):
@@ -2273,9 +2331,9 @@ static int sdma_v4_0_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v4_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2292,6 +2350,46 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v4_0_get_reg_offset(adev, i,
+ sdma_reg_list_4_0[j].reg_offset));
+ }
+}
+
const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.name = "sdma_v4_0",
.early_init = sdma_v4_0_early_init,
@@ -2308,6 +2406,8 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
.set_clockgating_state = sdma_v4_0_set_clockgating_state,
.set_powergating_state = sdma_v4_0_set_powergating_state,
.get_clockgating_state = sdma_v4_0_get_clockgating_state,
+ .dump_ip_state = sdma_v4_0_dump_ip_state,
+ .print_ip_state = sdma_v4_0_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
@@ -2451,7 +2551,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (VEGA10/12).
* Used by the amdgpu ttm implementation to move pages if
@@ -2461,11 +2561,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
@@ -2622,7 +2722,7 @@ static struct amdgpu_sdma_ras sdma_v4_0_ras = {
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 2, 0):
case IP_VERSION(4, 2, 2):
adev->sdma.ras = &sdma_v4_0_ras;
@@ -2633,7 +2733,6 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
default:
break;
}
-
}
const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index f413898dda37..36b1ca73c2ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -30,6 +30,7 @@
#include "amdgpu_xcp.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
#include "sdma/sdma_4_4_2_offset.h"
#include "sdma/sdma_4_4_2_sh_mask.h"
@@ -44,6 +45,57 @@
#include "amdgpu_ras.h"
MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin");
+MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin");
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA_VM_CNTL)
+};
+
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400
#define WREG32_SDMA(instance, offset, value) \
WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
@@ -55,6 +107,11 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev,
+ u32 instance_id);
static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset)
@@ -80,7 +137,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
}
}
-static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
+static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id)
{
switch (client_id) {
case SOC15_IH_CLIENTID_SDMA0:
@@ -88,9 +145,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
case SOC15_IH_CLIENTID_SDMA1:
return 1;
case SOC15_IH_CLIENTID_SDMA2:
- return 2;
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 0;
+ else
+ return 2;
case SOC15_IH_CLIENTID_SDMA3:
- return 3;
+ if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
+ return 1;
+ else
+ return 3;
default:
return -EINVAL;
}
@@ -132,7 +195,9 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
int ret, i;
for (i = 0; i < adev->sdma.num_instances; i++) {
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2)) {
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
ret = amdgpu_sdma_init_microcode(adev, 0, true);
break;
} else {
@@ -154,13 +219,13 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
*/
static uint64_t sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring *ring)
{
- u64 *rptr;
+ u64 rptr;
/* XXX check if swapping is necessary on BE */
- rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+ rptr = READ_ONCE(*((u64 *)&ring->adev->wb.wb[ring->rptr_offs]));
- DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
- return ((*rptr) >> 2);
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr);
+ return rptr >> 2;
}
/**
@@ -365,7 +430,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
+ << (ring->me % adev->sdma.num_inst_per_aid);
sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
@@ -426,23 +492,34 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
uint32_t inst_mask)
{
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
- u32 rb_cntl, ib_cntl;
- int i, unset = 0;
+ u32 doorbell_offset, doorbell;
+ u32 rb_cntl, ib_cntl, sdma_cntl;
+ int i;
for_each_inst(i, inst_mask) {
sdma[i] = &adev->sdma.instance[i].ring;
- if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = 1;
- }
-
rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+ sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl);
+
+ if (sdma[i]->use_doorbell) {
+ doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
+ doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
+
+ doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset,
+ SDMA_GFX_DOORBELL_OFFSET,
+ OFFSET, 0);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
+ WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
+ }
}
}
@@ -471,20 +548,10 @@ static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev,
static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
uint32_t inst_mask)
{
- struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 rb_cntl, ib_cntl;
int i;
- bool unset = false;
for_each_inst(i, inst_mask) {
- sdma[i] = &adev->sdma.instance[i].page;
-
- if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
- (!unset)) {
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
- unset = true;
- }
-
rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
RB_ENABLE, 0);
@@ -596,7 +663,7 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
/* Set ring buffer size in dwords */
uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
- barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */
+ barrier(); /* work around https://llvm.org/pr42576 */
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
@@ -611,11 +678,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
*
* @adev: amdgpu_device pointer
* @i: instance to resume
+ * @restore: used to restore wptr when restart
*
* Set up the gfx DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
-static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
+static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
@@ -623,6 +691,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
+ u64 rwptr;
wb_offset = (ring->rptr_offs * 4);
@@ -630,12 +699,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
-
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI,
upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
@@ -648,11 +711,33 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
- ring->wptr = 0;
+ if (!restore)
+ ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].gfx_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+ }
+
doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
@@ -699,11 +784,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
*
* @adev: amdgpu_device pointer
* @i: instance to resume
+ * @restore: boolean to say restore needed or not
*
* Set up the page DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
-static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
+static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
@@ -711,6 +797,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
+ u64 rwptr;
wb_offset = (ring->rptr_offs * 4);
@@ -718,11 +805,26 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].page_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
/* Initialize the ring buffer's read and write pointers */
- WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ }
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
@@ -736,7 +838,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
- ring->wptr = 0;
+ if (!restore)
+ ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
@@ -855,12 +958,13 @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @inst_mask: mask of dma engine instances to be enabled
+ * @restore: boolean to say restore needed or not
*
* Set up the DMA engines and enable them.
* Returns 0 for success, error for failure.
*/
static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
- uint32_t inst_mask)
+ uint32_t inst_mask, bool restore)
{
struct amdgpu_ring *ring;
uint32_t tmp_mask;
@@ -871,7 +975,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
sdma_v4_4_2_inst_enable(adev, false, inst_mask);
} else {
/* bypass sdma microcode loading on Gopher */
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
+ if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
adev->sdma.instance[0].fw) {
r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
if (r)
@@ -890,17 +994,20 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
uint32_t temp;
WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
- sdma_v4_4_2_gfx_resume(adev, i);
+ sdma_v4_4_2_gfx_resume(adev, i, restore);
if (adev->sdma.has_page_queue)
- sdma_v4_4_2_page_resume(adev, i);
+ sdma_v4_4_2_page_resume(adev, i, restore);
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, regSDMA_CNTL);
temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
- /* enable context empty interrupt during initialization */
- temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
WREG32_SDMA(i, regSDMA_CNTL, temp);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) {
+ /* enable context empty interrupt during initialization */
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+ }
if (!amdgpu_sriov_vf(adev)) {
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
/* unhalt engine */
@@ -934,13 +1041,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
r = amdgpu_ring_test_helper(page);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == page)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return r;
@@ -1060,7 +1161,7 @@ static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1231,24 +1332,29 @@ static void sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t re
static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
return false;
default:
return false;
}
}
-static int sdma_v4_4_2_early_init(void *handle)
+static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v4_4_2_stop_queue,
+ .start_kernel_queue = &sdma_v4_4_2_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine,
+};
+
+static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = sdma_v4_4_2_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
+ if (r)
return r;
- }
/* TODO: Page queue breaks driver reload under SRIOV */
if (sdma_v4_4_2_fw_support_paging_queue(adev))
@@ -1259,7 +1365,6 @@ static int sdma_v4_4_2_early_init(void *handle)
sdma_v4_4_2_set_vm_pte_funcs(adev);
sdma_v4_4_2_set_irq_funcs(adev);
sdma_v4_4_2_set_ras_funcs(adev);
-
return 0;
}
@@ -1269,29 +1374,34 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
#endif
-static int sdma_v4_4_2_late_init(void *handle)
+static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
#if 0
struct ras_ih_if ih_info = {
.cb = sdma_v4_4_2_process_ras_data_cb,
};
#endif
- if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
- if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
- adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
- adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
- }
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
+
+ /* The initialization is done in the late_init stage to ensure that the SMU
+ * initialization and capability setup are completed before we check the SDMA
+ * reset capability
+ */
+ sdma_v4_4_2_update_reset_mask(adev);
return 0;
}
-static int sdma_v4_4_2_sw_init(void *handle)
+static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 aid_id;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+ uint32_t *ptr;
/* SDMA trap event */
for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
@@ -1336,9 +1446,21 @@ static int sdma_v4_4_2_sw_init(void *handle)
&adev->sdma.srbm_write_irq);
if (r)
return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_CTXEMPTY,
+ &adev->sdma.ctxt_empty_irq);
+ if (r)
+ return r;
}
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ /* Initialize guilty flags for GFX and PAGE queues */
+ adev->sdma.instance[i].gfx_guilty = false;
+ adev->sdma.instance[i].page_guilty = false;
+ adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs;
+
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1382,17 +1504,31 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+
if (amdgpu_sdma_ras_sw_init(adev)) {
dev_err(adev->dev, "fail to initialize sdma ras block\n");
return -EINVAL;
}
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v4_4_2_sw_fini(void *handle)
+static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1401,32 +1537,37 @@ static int sdma_v4_4_2_sw_fini(void *handle)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
- if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2))
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5))
amdgpu_sdma_destroy_inst_ctx(adev, true);
else
amdgpu_sdma_destroy_inst_ctx(adev, false);
+ kfree(adev->sdma.ip_dump);
+
return 0;
}
-static int sdma_v4_4_2_hw_init(void *handle)
+static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t inst_mask;
inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!amdgpu_sriov_vf(adev))
sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
- r = sdma_v4_4_2_inst_start(adev, inst_mask);
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
return r;
}
-static int sdma_v4_4_2_hw_fini(void *handle)
+static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t inst_mask;
int i;
@@ -1447,29 +1588,27 @@ static int sdma_v4_4_2_hw_fini(void *handle)
return 0;
}
-static int sdma_v4_4_2_set_clockgating_state(void *handle,
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
-static int sdma_v4_4_2_suspend(void *handle)
+static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_in_reset(adev))
- sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
- return sdma_v4_4_2_hw_fini(adev);
+ return sdma_v4_4_2_hw_fini(ip_block);
}
-static int sdma_v4_4_2_resume(void *handle)
+static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v4_4_2_hw_init(adev);
+ return sdma_v4_4_2_hw_init(ip_block);
}
-static bool sdma_v4_4_2_is_idle(void *handle)
+static bool sdma_v4_4_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1482,11 +1621,11 @@ static bool sdma_v4_4_2_is_idle(void *handle)
return true;
}
-static int sdma_v4_4_2_wait_for_idle(void *handle)
+static int sdma_v4_4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i, j;
u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
for (j = 0; j < adev->sdma.num_instances; j++) {
@@ -1501,13 +1640,109 @@ static int sdma_v4_4_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v4_4_2_soft_reset(void *handle)
+static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
}
+static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t instance_id, bool is_page_queue)
+{
+ uint32_t reg_offset = is_page_queue ? regSDMA_PAGE_CONTEXT_STATUS : regSDMA_GFX_CONTEXT_STATUS;
+ uint32_t context_status = RREG32(sdma_v4_4_2_get_reg_offset(adev, instance_id, reg_offset));
+
+ /* Check if the SELECTED bit is set */
+ return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
+}
+
+static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 id = ring->me;
+ int r;
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, id, false);
+ amdgpu_amdkfd_resume(adev, true);
+ return r;
+}
+
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 instance_id = ring->me;
+ u32 inst_mask;
+ uint64_t rptr;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ /* Check if this queue is the guilty one */
+ adev->sdma.instance[instance_id].gfx_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
+ if (adev->sdma.has_page_queue)
+ adev->sdma.instance[instance_id].page_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
+
+ /* Cache the rptr before reset, after the reset,
+ * all of the registers will be reset to 0
+ */
+ rptr = amdgpu_ring_get_rptr(ring);
+ ring->cached_rptr = rptr;
+ /* Cache the rptr for the page queue if it exists */
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page_ring = &adev->sdma.instance[instance_id].page;
+ rptr = amdgpu_ring_get_rptr(page_ring);
+ page_ring->cached_rptr = rptr;
+ }
+
+ /* stop queue */
+ inst_mask = 1 << ring->me;
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_mask;
+ int i, r;
+
+ inst_mask = 1 << ring->me;
+ udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT))
+ break;
+ udelay(1);
+ }
+
+ if (i == adev->usec_timeout) {
+ dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n",
+ ring->me);
+ return -ETIMEDOUT;
+ }
+
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, true);
+
+ return r;
+}
+
+static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev,
+ u32 instance_id)
+{
+ /* For SDMA 4.x, use the existing DPM interface for backward compatibility
+ * we need to convert the logical instance ID to physical instance ID before reset.
+ */
+ return amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id));
+}
+
static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1530,7 +1765,7 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
uint32_t instance, i;
DRM_DEBUG("IH: SDMA trap\n");
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
/* Client id gives the SDMA instance in AID. To know the exact SDMA
* instance, interrupt entry gives the node id which corresponds to the AID instance.
@@ -1553,6 +1788,9 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
case 0:
amdgpu_fence_process(&adev->sdma.instance[i].ring);
break;
+ case 1:
+ amdgpu_fence_process(&adev->sdma.instance[i].page);
+ break;
default:
break;
}
@@ -1573,7 +1811,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA))
goto out;
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
goto out;
@@ -1592,7 +1830,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev,
DRM_ERROR("Illegal instruction in SDMA command stream\n");
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0)
return 0;
@@ -1612,19 +1850,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
u32 sdma_cntl;
sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
- DRAM_ECC_INT_ENABLE, 0);
- WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
- break;
- /* sdma ecc interrupt is enabled by default
- * driver doesn't need to do anything to
- * enable the interrupt */
- case AMDGPU_IRQ_STATE_ENABLE:
- default:
- break;
- }
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
return 0;
}
@@ -1633,10 +1861,10 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instance;
- struct amdgpu_task_info task_info;
+ struct amdgpu_task_info *task_info;
u64 addr;
- instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
+ instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id);
if (instance < 0 || instance >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance invalid %d\n", instance);
return -EINVAL;
@@ -1645,15 +1873,19 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
addr = (u64)entry->src_data[0] << 12;
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
-
dev_dbg_ratelimited(adev->dev,
- "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
- "pasid:%u, for process %s pid %d thread %s pid %d\n",
- instance, addr, entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid, task_info.process_name, task_info.tgid,
- task_info.task_name, task_info.pid);
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid);
+
+ task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid);
+ if (task_info) {
+ dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n",
+ task_info->process_name, task_info->tgid,
+ task_info->task.comm, task_info->task.pid);
+ amdgpu_vm_put_task_info(task_info);
+ }
+
return 0;
}
@@ -1696,6 +1928,16 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
return 0;
}
+static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* There is nothing useful to be done here, only kept for debug */
+ dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
{
@@ -1762,10 +2004,10 @@ static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
}
}
-static int sdma_v4_4_2_set_clockgating_state(void *handle,
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t inst_mask;
if (amdgpu_sriov_vf(adev))
@@ -1780,15 +2022,15 @@ static int sdma_v4_4_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v4_4_2_set_powergating_state(void *handle,
+static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v4_4_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1805,6 +2047,46 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v4_4_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_4_2[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v4_4_2_get_reg_offset(adev, i,
+ sdma_reg_list_4_4_2[j].reg_offset));
+ }
+}
+
const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
.name = "sdma_v4_4_2",
.early_init = sdma_v4_4_2_early_init,
@@ -1821,6 +2103,8 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
.set_clockgating_state = sdma_v4_4_2_set_clockgating_state,
.set_powergating_state = sdma_v4_4_2_set_powergating_state,
.get_clockgating_state = sdma_v4_4_2_get_clockgating_state,
+ .dump_ip_state = sdma_v4_4_2_dump_ip_state,
+ .print_ip_state = sdma_v4_4_2_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
@@ -1852,6 +2136,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
};
static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
@@ -1883,6 +2168,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
};
static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
@@ -1935,6 +2221,10 @@ static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = {
.process = sdma_v4_4_2_process_srbm_write_irq,
};
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = {
+ .process = sdma_v4_4_2_process_ctxt_empty_irq,
+};
+
static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
{
adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
@@ -1943,6 +2233,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances;
adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs;
@@ -1951,6 +2242,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs;
adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs;
adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs;
+ adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs;
}
/**
@@ -1960,7 +2252,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1970,11 +2262,11 @@ static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
@@ -2048,11 +2340,45 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
+/**
+ * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA
+ * @adev: Pointer to the AMDGPU device structure
+ *
+ * This function update reset mask for SDMA and sets the supported
+ * reset types based on the IP version and firmware versions.
+ *
+ */
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
+{
+ /* per queue reset not supported for SRIOV */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * the user queue relies on MEC fw and pmfw when the sdma queue do reset.
+ * it needs to check both of them at here to skip old mec and pmfw.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+}
+
const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 4,
.minor = 4,
- .rev = 0,
+ .rev = 2,
.funcs = &sdma_v4_4_2_ip_funcs,
};
@@ -2064,7 +2390,7 @@ static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
if (!amdgpu_sriov_vf(adev))
sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
- r = sdma_v4_4_2_inst_start(adev, inst_mask);
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
return r;
}
@@ -2131,6 +2457,11 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst);
+ unsigned long ue_count = 0;
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = adev->sdma.instance[sdma_inst].aid_id,
+ };
/* sdma v4_4_2 doesn't support query ce counts */
amdgpu_ras_inst_query_ras_error_count(adev,
@@ -2140,7 +2471,9 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev,
ARRAY_SIZE(sdma_v4_4_2_ras_memory_list),
sdma_dev_inst,
AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- &err_data->ue_count);
+ &ue_count);
+
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
}
static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev,
@@ -2188,9 +2521,85 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
.reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
};
+static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */
+static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 };
+
+static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ sdma_v4_4_2_err_codes,
+ ARRAY_SIZE(sdma_v4_4_2_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = {
+ .aca_bank_parser = sdma_v4_4_2_aca_bank_parser,
+ .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid,
+};
+
+static const struct aca_info sdma_v4_4_2_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &sdma_v4_4_2_aca_bank_ops,
+};
+
+static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_sdma_ras_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ return amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA,
+ &sdma_v4_4_2_aca_info, NULL);
+}
+
static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
.ras_block = {
.hw_ops = &sdma_v4_4_2_ras_hw_ops,
+ .ras_late_init = sdma_v4_4_2_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 1cc34efb455b..7dc67a22a7a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -59,10 +59,61 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin");
#define SDMA0_HYP_DEC_REG_END 0x5893
#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2)
+};
+
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring);
static const struct soc15_reg_golden golden_settings_sdma_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
@@ -184,7 +235,7 @@ static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(5, 0, 0):
soc15_program_register_sequence(adev,
golden_settings_sdma_5,
@@ -249,35 +300,23 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
return ret;
}
-static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_0_ring_get_rptr - get the current read pointer
*
@@ -332,67 +371,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
DRM_DEBUG("Setting write pointer\n");
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
-
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
- *wptr_saved = ring->wptr << 2;
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index,
- ring->wptr << 2);
- }
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
- if (ring->use_doorbell) {
- DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("Not using doorbell -- "
- "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
- "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
- upper_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
- ring->me, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
- ring->me, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
- }
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
}
}
@@ -538,11 +546,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if (flags & AMDGPU_FENCE_FLAG_INT) {
- uint32_t ctx = ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
- amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
}
@@ -551,17 +557,15 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_0_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers (NAVI10).
*/
-static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -653,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_0_gfx_stop(adev);
+ sdma_v5_0_gfx_stop(adev, 1 << inst_mask);
sdma_v5_0_rlc_stop(adev);
}
@@ -670,14 +676,16 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ * sdma_v5_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them (NAVI10).
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v5_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
@@ -687,147 +695,165 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ ring = &adev->sdma.instance[i].ring;
- if (!amdgpu_sriov_vf(adev))
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
-
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
- ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
- ring->gpu_addr >> 40);
-
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
+ ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
+ ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
- mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
- doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
+ doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index, 20);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index, 20);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_0_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_0_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- if (!amdgpu_sriov_vf(adev)) {
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
- /* enable MCBP */
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+ }
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_0_ctx_switch_enable(adev, true);
- sdma_v5_0_enable(adev, true);
- }
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+ sdma_v5_0_enable(adev, true);
+ }
+
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_helper(ring);
+/**
+ * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_0_gfx_resume_instance(adev, i, false);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -991,28 +1017,18 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
- return r;
- }
-
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
}
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
@@ -1029,10 +1045,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -1044,8 +1057,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1068,38 +1080,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err0;
- }
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@@ -1127,10 +1125,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
@@ -1138,11 +1133,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1333,9 +1327,44 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_0_early_init(void *handle)
+static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
+{
+ u32 grbm_soft_reset;
+ u32 tmp;
+
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
+
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
+
+static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_0_stop_queue,
+ .start_kernel_queue = &sdma_v5_0_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine,
+};
+
+static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v5_0_init_microcode(adev);
+ if (r)
+ return r;
sdma_v5_0_set_ring_funcs(adev);
sdma_v5_0_set_buffer_funcs(adev);
@@ -1347,11 +1376,13 @@ static int sdma_v5_0_early_init(void *handle)
}
-static int sdma_v5_0_sw_init(void *handle)
+static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+ uint32_t *ptr;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
@@ -1367,13 +1398,9 @@ static int sdma_v5_0_sw_init(void *handle)
if (r)
return r;
- r = sdma_v5_0_init_microcode(adev);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1395,26 +1422,54 @@ static int sdma_v5_0_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ if ((adev->sdma.instance[0].fw_version >= 35) &&
+ !amdgpu_sriov_vf(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_0_sw_fini(void *handle)
+static int sdma_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, false);
+ kfree(adev->sdma.ip_dump);
+
return 0;
}
-static int sdma_v5_0_hw_init(void *handle)
+static int sdma_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v5_0_init_golden_registers(adev);
@@ -1423,15 +1478,12 @@ static int sdma_v5_0_hw_init(void *handle)
return r;
}
-static int sdma_v5_0_hw_fini(void *handle)
+static int sdma_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
sdma_v5_0_ctx_switch_enable(adev, false);
sdma_v5_0_enable(adev, false);
@@ -1439,23 +1491,19 @@ static int sdma_v5_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_0_suspend(void *handle)
+static int sdma_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_fini(adev);
+ return sdma_v5_0_hw_fini(ip_block);
}
-static int sdma_v5_0_resume(void *handle)
+static int sdma_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_init(adev);
+ return sdma_v5_0_hw_init(ip_block);
}
-static bool sdma_v5_0_is_idle(void *handle)
+static bool sdma_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1468,11 +1516,11 @@ static bool sdma_v5_0_is_idle(void *handle)
return true;
}
-static int sdma_v5_0_wait_for_idle(void *handle)
+static int sdma_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1485,13 +1533,104 @@ static int sdma_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v5_0_soft_reset(void *handle)
+static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
}
+static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, ring->me, true);
+ amdgpu_amdkfd_resume(adev, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_0_gfx_stop(adev, 1 << i);
+
+ /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+ /* check sdma copy engine all idle if frozen not received*/
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze*/
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return r;
+}
+
static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1689,15 +1828,15 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_0_set_clockgating_state(void *handle,
+static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(5, 0, 0):
case IP_VERSION(5, 0, 2):
case IP_VERSION(5, 0, 5):
@@ -1713,15 +1852,15 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_0_set_powergating_state(void *handle,
+static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1738,10 +1877,51 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
+static void sdma_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v5_0_get_reg_offset(adev, i,
+ sdma_reg_list_5_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
.name = "sdma_v5_0",
.early_init = sdma_v5_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_0_sw_init,
.sw_fini = sdma_v5_0_sw_fini,
.hw_init = sdma_v5_0_hw_init,
@@ -1754,6 +1934,8 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
.set_clockgating_state = sdma_v5_0_set_clockgating_state,
.set_powergating_state = sdma_v5_0_set_powergating_state,
.get_clockgating_state = sdma_v5_0_get_clockgating_state,
+ .dump_ip_state = sdma_v5_0_dump_ip_state,
+ .print_ip_state = sdma_v5_0_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
@@ -1789,8 +1971,8 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
+ .reset = sdma_v5_0_reset_queue,
};
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1827,7 +2009,7 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (NAVI10).
* Used by the amdgpu ttm implementation to move pages if
@@ -1837,11 +2019,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
index d4e3c2e696f6..2ab71f21755a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
@@ -24,7 +24,6 @@
#ifndef __SDMA_V5_0_H__
#define __SDMA_V5_0_H__
-extern const struct amd_ip_funcs sdma_v5_0_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block;
#endif /* __SDMA_V5_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 2b3ebebc4299..3bd44c24f692 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -60,10 +60,61 @@ MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin");
#define SDMA0_HYP_DEC_REG_END 0x5893
#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2)
+};
+
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring);
static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
{
@@ -89,35 +140,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
return base + internal_offset;
}
-static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v5_2_ring_get_rptr - get the current read pointer
*
@@ -188,6 +227,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) {
+ /* SDMA seems to miss doorbells sometimes when powergating kicks in.
+ * Updating the wptr directly will wake it. This is only safe because
+ * we disallow gfxoff in begin_use() and then allow it again in end_use().
+ */
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
@@ -292,17 +341,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
- amdgpu_ring_write(ring, ref_and_mask); /* reference */
- amdgpu_ring_write(ring, ref_and_mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ if (ring->me > 1) {
+ amdgpu_asic_flush_hdp(adev, ring);
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ }
}
/**
@@ -343,11 +396,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if ((flags & AMDGPU_FENCE_FLAG_INT)) {
- uint32_t ctx = ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
- amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
}
@@ -356,17 +407,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_2_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers.
*/
-static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -457,9 +506,11 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_2_gfx_stop(adev);
+ sdma_v5_2_gfx_stop(adev, inst_mask);
sdma_v5_2_rlc_stop(adev);
}
@@ -473,14 +524,17 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ * sdma_v5_2_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+
+static int sdma_v5_2_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
@@ -490,144 +544,163 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ ring = &adev->sdma.instance[i].ring;
- if (!amdgpu_sriov_vf(adev))
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ }
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
-
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_2_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_2_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
+ /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- /* SRIOV VF has no control of any of registers below */
- if (!amdgpu_sriov_vf(adev)) {
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
- /* enable MCBP */
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
- (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
- SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
-
- /* unhalt engine */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ /* SRIOV VF has no control of any of registers below */
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_2_ctx_switch_enable(adev, true);
- sdma_v5_2_enable(adev, true);
- }
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
+
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_helper(ring);
+/**
+ * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_2_gfx_resume_instance(adev, i, false);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -690,37 +763,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v5_2_soft_reset(void *handle)
+static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset;
u32 tmp;
- int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- grbm_soft_reset = REG_SET_FIELD(0,
- GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
- 1);
- grbm_soft_reset <<= i;
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
+static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma_v5_2_soft_reset_engine(adev, i);
udelay(50);
}
return 0;
}
+static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_2_stop_queue,
+ .start_kernel_queue = &sdma_v5_2_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine,
+};
+
/**
* sdma_v5_2_start - setup and start the async dma engines
*
@@ -732,6 +817,7 @@ static int sdma_v5_2_soft_reset(void *handle)
static int sdma_v5_2_start(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (amdgpu_sriov_vf(adev)) {
sdma_v5_2_ctx_switch_enable(adev, false);
@@ -752,7 +838,11 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
msleep(1000);
}
- sdma_v5_2_soft_reset(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA);
+ if (!ip_block)
+ return -EINVAL;
+
+ sdma_v5_2_soft_reset(ip_block);
/* unhalt the MEs */
sdma_v5_2_enable(adev, true);
/* enable sdma ring preemption */
@@ -827,28 +917,18 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
- return r;
- }
-
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
}
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
@@ -865,10 +945,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -880,8 +957,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -904,37 +980,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err0;
- }
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@@ -962,10 +1024,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
@@ -973,11 +1032,10 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1133,7 +1191,28 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
}
static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1169,9 +1248,14 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_2_early_init(void *handle)
+static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r)
+ return r;
sdma_v5_2_set_ring_funcs(adev);
sdma_v5_2_set_buffer_funcs(adev);
@@ -1216,11 +1300,13 @@ static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
return -EINVAL;
}
-static int sdma_v5_2_sw_init(void *handle)
+static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+ uint32_t *ptr;
/* SDMA trap event */
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1231,13 +1317,9 @@ static int sdma_v5_2_sw_init(void *handle)
return r;
}
- r = amdgpu_sdma_init_microcode(adev, 0, true);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
- return r;
- }
-
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1258,38 +1340,69 @@ static int sdma_v5_2_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 4):
+ if ((adev->sdma.instance[0].fw_version >= 76) &&
+ !amdgpu_sriov_vf(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(5, 2, 5):
+ if ((adev->sdma.instance[0].fw_version >= 34) &&
+ !amdgpu_sriov_vf(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_2_sw_fini(void *handle)
+static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
+ kfree(adev->sdma.ip_dump);
+
return 0;
}
-static int sdma_v5_2_hw_init(void *handle)
+static int sdma_v5_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return sdma_v5_2_start(adev);
}
-static int sdma_v5_2_hw_fini(void *handle)
+static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
sdma_v5_2_ctx_switch_enable(adev, false);
sdma_v5_2_enable(adev, false);
@@ -1297,23 +1410,19 @@ static int sdma_v5_2_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_2_suspend(void *handle)
+static int sdma_v5_2_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_fini(adev);
+ return sdma_v5_2_hw_fini(ip_block);
}
-static int sdma_v5_2_resume(void *handle)
+static int sdma_v5_2_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_init(adev);
+ return sdma_v5_2_hw_init(ip_block);
}
-static bool sdma_v5_2_is_idle(void *handle)
+static bool sdma_v5_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1326,11 +1435,11 @@ static bool sdma_v5_2_is_idle(void *handle)
return true;
}
-static int sdma_v5_2_wait_for_idle(void *handle)
+static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1, sdma2, sdma3;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1345,6 +1454,100 @@ static int sdma_v5_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
+static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ amdgpu_amdkfd_suspend(adev, true);
+ r = amdgpu_sdma_reset_engine(adev, ring->me, true);
+ amdgpu_amdkfd_resume(adev, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_2_gfx_stop(adev, 1 << i);
+
+ /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze and unhalt */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ return r;
+}
+
static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1510,7 +1713,7 @@ static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev,
int i)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(5, 2, 1):
if (adev->sdma.instance[i].fw_version < 70)
return false;
@@ -1575,8 +1778,9 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
-
- if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1))
+ if (adev->sdma.instance[i].fw_version < 70 &&
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(5, 2, 1))
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
@@ -1597,15 +1801,15 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_2_set_clockgating_state(void *handle,
+static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(5, 2, 0):
case IP_VERSION(5, 2, 2):
case IP_VERSION(5, 2, 1):
@@ -1626,15 +1830,15 @@ static int sdma_v5_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_2_set_powergating_state(void *handle,
+static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v5_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1651,10 +1855,81 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ * sdma_v5_2_ring_set_wptr() takes advantage of this
+ * to update the wptr because sometimes SDMA seems to miss
+ * doorbells when entering PG. If you remove this, update
+ * sdma_v5_2_ring_set_wptr() as well!
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void sdma_v5_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_2[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v5_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v5_2_get_reg_offset(adev, i,
+ sdma_reg_list_5_2[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_2_sw_init,
.sw_fini = sdma_v5_2_sw_fini,
.hw_init = sdma_v5_2_hw_init,
@@ -1667,6 +1942,8 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.set_clockgating_state = sdma_v5_2_set_clockgating_state,
.set_powergating_state = sdma_v5_2_set_powergating_state,
.get_clockgating_state = sdma_v5_2_get_clockgating_state,
+ .dump_ip_state = sdma_v5_2_dump_ip_state,
+ .print_ip_state = sdma_v5_2_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
@@ -1698,12 +1975,14 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.test_ib = sdma_v5_2_ring_test_ib,
.insert_nop = sdma_v5_2_ring_insert_nop,
.pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
.emit_wreg = sdma_v5_2_ring_emit_wreg,
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
- .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
.preempt_ib = sdma_v5_2_ring_preempt_ib,
+ .reset = sdma_v5_2_reset_queue,
};
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
@@ -1740,7 +2019,7 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1750,11 +2029,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
index b70414fef2a1..863145b3a77e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h
@@ -24,7 +24,6 @@
#ifndef __SDMA_V5_2_H__
#define __SDMA_V5_2_H__
-extern const struct amd_ip_funcs sdma_v5_2_ip_funcs;
extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block;
#endif /* __SDMA_V5_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 45be0af2570b..db6e41967f12 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -43,18 +43,80 @@
#include "sdma_common.h"
#include "sdma_v6_0.h"
#include "v11_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin");
#define SDMA1_REG_OFFSET 0x600
#define SDMA0_HYP_DEC_REG_START 0x5880
#define SDMA0_HYP_DEC_REG_END 0x589a
#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_CHECKSUM),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS),
+};
+
static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
@@ -79,35 +141,23 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
return base + internal_offset;
}
-static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
- amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, 1);
- ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
- amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
return ret;
}
-static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
- unsigned offset)
-{
- unsigned cur;
-
- BUG_ON(offset > ring->buf_mask);
- BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
- cur = (ring->wptr - 1) & ring->buf_mask;
- if (cur > offset)
- ring->ring[offset] = cur - offset;
- else
- ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
-}
-
/**
* sdma_v6_0_ring_get_rptr - get the current read pointer
*
@@ -156,68 +206,35 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
-
- DRM_DEBUG("Setting write pointer\n");
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
- *wptr_saved = ring->wptr << 2;
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index,
- ring->wptr << 2);
- }
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
- if (ring->use_doorbell) {
- DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("Not using doorbell -- "
- "regSDMA%i_GFX_RB_WPTR == 0x%08x "
- "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
- upper_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
- ring->me, regSDMA0_QUEUE0_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
- ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
- }
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
}
}
@@ -234,7 +251,7 @@ static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
amdgpu_ring_write(ring, ring->funcs->nop);
}
-/**
+/*
* sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
*
* @ring: amdgpu ring pointer
@@ -361,11 +378,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if (flags & AMDGPU_FENCE_FLAG_INT) {
- uint32_t ctx = ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
- amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
}
@@ -381,8 +396,6 @@ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev)
u32 rb_cntl, ib_cntl;
int i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
@@ -457,14 +470,16 @@ static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v6_0_gfx_resume - setup and start the async dma engines
+ * sdma_v6_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v6_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
@@ -473,130 +488,154 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
u32 doorbell_offset;
u32 temp;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ ring = &adev->sdma.instance[i].ring;
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- if (!amdgpu_sriov_vf(adev))
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
-
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
- ring->wptr = 0;
+ doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
- doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
-
- if (i == 0)
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
-
- if (amdgpu_sriov_vf(adev))
- sdma_v6_0_ring_set_wptr(ring);
-
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
- (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
- SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
-
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
- if (amdgpu_sriov_vf(adev))
- sdma_v6_0_enable(adev, true);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_enable(adev, true);
+
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_helper(ring);
+/**
+ * sdma_v6_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v6_0_gfx_resume_instance(adev, i, false);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -717,9 +756,9 @@ static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v6_0_soft_reset(void *handle)
+static int sdma_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp;
int i;
@@ -753,9 +792,9 @@ static int sdma_v6_0_soft_reset(void *handle)
return sdma_v6_0_start(adev);
}
-static bool sdma_v6_0_check_soft_reset(void *handle)
+static bool sdma_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
long tmo = msecs_to_jiffies(1000);
@@ -852,6 +891,12 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_f32_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_f32_dbg1 = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -878,28 +923,18 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
- return r;
- }
-
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
}
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
@@ -916,10 +951,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -931,13 +963,12 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
-/**
+/*
* sdma_v6_0_ring_test_ib - test an IB on the DMA engine
*
* @ring: amdgpu_ring structure holding ring information
@@ -955,37 +986,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err0;
- }
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
@@ -1013,10 +1030,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
@@ -1024,11 +1038,10 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1119,7 +1132,7 @@ static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib,
ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
}
-/**
+/*
* sdma_v6_0_ring_pad_ib - pad the IB
* @ib: indirect buffer to fill with padding
* @ring: amdgpu ring pointer
@@ -1168,7 +1181,7 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
}
-/**
+/*
* sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA
*
* @ring: amdgpu_ring pointer
@@ -1246,19 +1259,40 @@ static struct amdgpu_sdma_ras sdma_v6_0_3_ras = {
static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[SDMA0_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(6, 0, 3):
adev->sdma.ras = &sdma_v6_0_3_ras;
break;
default:
break;
}
-
}
-static int sdma_v6_0_early_init(void *handle)
+static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r)
+ return r;
sdma_v6_0_set_ring_funcs(adev);
sdma_v6_0_set_buffer_funcs(adev);
@@ -1270,11 +1304,13 @@ static int sdma_v6_0_early_init(void *handle)
return 0;
}
-static int sdma_v6_0_sw_init(void *handle)
+static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+ uint32_t *ptr;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
@@ -1283,17 +1319,19 @@ static int sdma_v6_0_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_sdma_init_microcode(adev, 0, true);
- if (r) {
- DRM_ERROR("Failed to load sdma firmware!\n");
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
return r;
- }
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
@@ -1311,67 +1349,154 @@ static int sdma_v6_0_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ if ((adev->sdma.instance[0].fw_version >= 21) &&
+ !amdgpu_sriov_vf(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
if (amdgpu_sdma_ras_sw_init(adev)) {
dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
return -EINVAL;
}
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 18) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 2):
+ if ((adev->sdma.instance[0].fw_version >= 23) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 3):
+ if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 0):
+ if ((adev->sdma.instance[0].fw_version >= 14) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 1):
+ if ((adev->sdma.instance[0].fw_version >= 17) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 2):
+ if ((adev->sdma.instance[0].fw_version >= 15) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 1, 3):
+ if ((adev->sdma.instance[0].fw_version >= 10) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v6_0_sw_fini(void *handle)
+static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
+ kfree(adev->sdma.ip_dump);
+
return 0;
}
-static int sdma_v6_0_hw_init(void *handle)
+static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int irq_type;
+ int i, r;
- return sdma_v6_0_start(adev);
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
}
-static int sdma_v6_0_hw_fini(void *handle)
+static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
- if (amdgpu_sriov_vf(adev)) {
- /* disable the scheduler for SDMA */
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
+ r = sdma_v6_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v6_0_set_userq_trap_interrupts(adev, true);
+}
+
+static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
return 0;
- }
sdma_v6_0_ctxempty_int_enable(adev, false);
sdma_v6_0_enable(adev, false);
+ sdma_v6_0_set_userq_trap_interrupts(adev, false);
return 0;
}
-static int sdma_v6_0_suspend(void *handle)
+static int sdma_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v6_0_hw_fini(adev);
+ return sdma_v6_0_hw_fini(ip_block);
}
-static int sdma_v6_0_resume(void *handle)
+static int sdma_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v6_0_hw_init(adev);
+ return sdma_v6_0_hw_init(ip_block);
}
-static bool sdma_v6_0_is_idle(void *handle)
+static bool sdma_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1384,11 +1509,11 @@ static bool sdma_v6_0_is_idle(void *handle)
return true;
}
-static int sdma_v6_0_wait_for_idle(void *handle)
+static int sdma_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
@@ -1446,6 +1571,31 @@ static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
return r;
}
+static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1470,25 +1620,9 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instances, queue;
- uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: SDMA trap\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
-
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
- return 0;
- }
-
queue = entry->ring_id & 0xf;
instances = (entry->ring_id & 0xf0) >> 4;
if (instances > 1) {
@@ -1510,6 +1644,29 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
+static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -1517,26 +1674,67 @@ static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v6_0_set_clockgating_state(void *handle,
+static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int sdma_v6_0_set_powergating_state(void *handle,
+static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+}
+
+static void sdma_v6_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_6_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v6_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v6_0_get_reg_offset(adev, i,
+ sdma_reg_list_6_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
}
const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
.name = "sdma_v6_0",
.early_init = sdma_v6_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v6_0_sw_init,
.sw_fini = sdma_v6_0_sw_fini,
.hw_init = sdma_v6_0_hw_init,
@@ -1550,6 +1748,8 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
.set_clockgating_state = sdma_v6_0_set_clockgating_state,
.set_powergating_state = sdma_v6_0_set_powergating_state,
.get_clockgating_state = sdma_v6_0_get_clockgating_state,
+ .dump_ip_state = sdma_v6_0_dump_ip_state,
+ .print_ip_state = sdma_v6_0_print_ip_state,
};
static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
@@ -1584,8 +1784,8 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
.emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v6_0_ring_init_cond_exec,
- .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec,
.preempt_ib = sdma_v6_0_ring_preempt_ib,
+ .reset = sdma_v6_0_reset_queue,
};
static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1603,6 +1803,10 @@ static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = {
.process = sdma_v6_0_process_trap_irq,
};
+static const struct amdgpu_irq_src_funcs sdma_v6_0_fence_irq_funcs = {
+ .process = sdma_v6_0_process_fence_irq,
+};
+
static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = {
.process = sdma_v6_0_process_illegal_inst_irq,
};
@@ -1612,6 +1816,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
adev->sdma.num_instances;
adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v6_0_fence_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs;
}
@@ -1622,7 +1827,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1632,11 +1837,11 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
@@ -1660,7 +1865,7 @@ static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
uint64_t dst_offset,
uint32_t byte_count)
{
- ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
ib->ptr[ib->length_dw++] = src_data;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
index 6af23e7888ca..d8cf830916b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0_0_pkt_open.h
@@ -91,6 +91,14 @@
#define SDMA_GCR_GLM_WB (1 << 4)
#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+
+#define SDMA_DCC_DATA_FORMAT(x) ((x) & 0x3f)
+#define SDMA_DCC_NUM_TYPE(x) (((x) & 0x7) << 9)
+#define SDMA_DCC_READ_CM(x) (((x) & 0x3) << 16)
+#define SDMA_DCC_WRITE_CM(x) (((x) & 0x3) << 18)
+#define SDMA_DCC_MAX_COM(x) (((x) & 0x3) << 24)
+#define SDMA_DCC_MAX_UCOM(x) (((x) & 0x1) << 26)
+
/*
** Definitions for SDMA_PKT_COPY_LINEAR packet
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
new file mode 100644
index 000000000000..326ecc8d37d2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -0,0 +1,1858 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "hdp/hdp_6_0_0_offset.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "sdma_v6_0_0_pkt_open.h"
+#include "nbio_v4_3.h"
+#include "sdma_common.h"
+#include "sdma_v7_0.h"
+#include "v12_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin");
+MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x589a
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+/*define for compression field for sdma7*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask 0x00000001
+#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift)
+
+static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_REV),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_VM_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS),
+};
+
+static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v7_0_start(struct amdgpu_device *adev);
+
+static u32 sdma_v7_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance != 0)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET * instance;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static unsigned sdma_v7_0_ring_init_cond_exec(struct amdgpu_ring *ring,
+ uint64_t addr)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, 1);
+ /* this is the offset we need patch later */
+ ret = ring->wptr & ring->buf_mask;
+ /* insert dummy here and patch it later */
+ amdgpu_ring_write(ring, 0);
+
+ return ret;
+}
+
+/**
+ * sdma_v7_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware.
+ */
+static uint64_t sdma_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = (u64 *)ring->rptr_cpu_addr;
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v7_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware.
+ */
+static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ u64 wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
+ }
+
+ return wptr >> 2;
+}
+
+/**
+ * sdma_v7_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware.
+ */
+static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v7_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring.
+ */
+static void sdma_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v7_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v7_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v7_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+
+/**
+ * sdma_v7_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v7_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: fence seq number
+ * @flags: fence flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed.
+ */
+static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+/**
+ * sdma_v7_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers.
+ */
+static void sdma_v7_0_gfx_stop(struct amdgpu_device *adev)
+{
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ }
+}
+
+/**
+ * sdma_v7_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues.
+ */
+static void sdma_v7_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v7_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch.
+ */
+static void sdma_v7_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+}
+
+/**
+ * sdma_v7_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines.
+ */
+static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 mcu_cntl;
+ int i;
+
+ if (!enable) {
+ sdma_v7_0_gfx_stop(adev);
+ sdma_v7_0_rlc_stop(adev);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_MCU_CNTL, HALT, enable ? 0 : 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), mcu_cntl);
+ }
+}
+
+/**
+ * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
+ *
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
+ */
+static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u64 wptr_gpu_addr;
+ int r;
+
+ ring = &adev->sdma.instance[i].ring;
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
+ else
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v7_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ ring->sched.ready = true;
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+ sdma_v7_0_enable(adev, true);
+ }
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ ring->sched.ready = false;
+
+ return r;
+}
+
+/**
+ * sdma_v7_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v7_0_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
+ }
+
+ return 0;
+
+}
+
+/**
+ * sdma_v7_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static void sdma_v12_0_free_ucode_buffer(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ }
+}
+
+/**
+ * sdma_v7_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v7_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v3_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ uint32_t tmp, sdma_status, ic_op_cntl;
+ int i, r, j;
+
+ /* halt the MEs */
+ sdma_v7_0_enable(adev, false);
+
+ if (!adev->sdma.instance[0].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v3_0 *)
+ adev->sdma.instance[0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
+ le32_to_cpu(hdr->ucode_offset_bytes));
+ fw_size = le32_to_cpu(hdr->ucode_size_bytes);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->sdma.instance[i].sdma_fw_obj,
+ &adev->sdma.instance[i].sdma_fw_gpu_addr,
+ (void **)&adev->sdma.instance[i].sdma_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
+ amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_IC_CNTL, GPA, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_LO),
+ lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_BASE_HI),
+ upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
+
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL));
+ tmp = REG_SET_FIELD(tmp, SDMA0_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL), tmp);
+
+ /* Wait for sdma ucode init complete */
+ for (j = 0; j < adev->usec_timeout; j++) {
+ ic_op_cntl = RREG32_SOC15_IP(GC,
+ sdma_v7_0_get_reg_offset(adev, i, regSDMA0_IC_OP_CNTL));
+ sdma_status = RREG32_SOC15_IP(GC,
+ sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+ if ((REG_GET_FIELD(ic_op_cntl, SDMA0_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
+ (REG_GET_FIELD(sdma_status, SDMA0_STATUS_REG, UCODE_INIT_DONE) == 1))
+ break;
+ udelay(1);
+ }
+
+ if (j >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to init sdma ucode\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp;
+ int i;
+
+ sdma_v7_0_gfx_stop(adev);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ //tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
+ //tmp |= SDMA0_FREEZE__FREEZE_MASK;
+ //WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ tmp |= SDMA0_MCU_CNTL__HALT_MASK;
+ tmp |= SDMA0_MCU_CNTL__RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp);
+
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
+ tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+
+ udelay(100);
+ }
+
+ return sdma_v7_0_start(adev);
+}
+
+static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
+}
+
+static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (ring->me >= adev->sdma.num_instances) {
+ dev_err(adev->dev, "sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true);
+ if (r)
+ return r;
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+/**
+ * sdma_v7_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v7_0_ctx_switch_enable(adev, false);
+ sdma_v7_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v7_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v7_0_load_microcode(adev);
+ if (r) {
+ sdma_v12_0_free_ucode_buffer(adev);
+ return r;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v7_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v7_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v7_0_rlc_resume(adev);
+
+ return r;
+}
+
+static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ struct v12_sdma_mqd *m = mqd;
+ uint64_t wb_gpu_addr;
+
+ m->sdmax_rlcx_rb_cntl =
+ order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
+
+ wb_gpu_addr = prop->wptr_gpu_addr;
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ wb_gpu_addr = prop->rptr_gpu_addr;
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
+
+ m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, 0,
+ regSDMA0_QUEUE0_IB_CNTL));
+
+ m->sdmax_rlcx_doorbell_offset =
+ prop->doorbell_index << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+
+ m->sdmax_rlcx_doorbell_log = 0;
+ m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
+ m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_mcu_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_mcu_dbg1 = upper_32_bits(prop->fence_address);
+
+ return 0;
+}
+
+static void sdma_v7_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+ adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
+ adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_0_mqd_init;
+}
+
+/**
+ * sdma_v7_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v7_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring.
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ tmp = 0xCAFEDEAD;
+ memset(&ib, 0, sizeof(ib));
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+
+ if (tmp == 0xDEADBEEF)
+ r = 0;
+ else
+ r = -EINVAL;
+
+err1:
+ amdgpu_ib_free(&ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v7_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA.
+ */
+static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
+
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = 0;
+
+}
+
+/**
+ * sdma_v7_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA.
+ */
+static void sdma_v7_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v7_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA.
+ */
+static void sdma_v7_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v7_0_ring_pad_ib - pad the IB
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: indirect buffer to fill with padding
+ *
+ * Pad the IB with NOPs to a boundary multiple of 8.
+ */
+static void sdma_v7_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (-ib->length_dw) & 0x7;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
+}
+
+/**
+ * sdma_v7_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v7_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA.
+ */
+static void sdma_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ /* SRBM WRITE command will not support on sdma v7.
+ * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
+ */
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
+ r = amdgpu_sdma_init_microcode(adev, 0, true);
+ if (r) {
+ DRM_ERROR("Failed to init sdma firmware!\n");
+ return r;
+ }
+
+ sdma_v7_0_set_ring_funcs(adev);
+ sdma_v7_0_set_buffer_funcs(adev);
+ sdma_v7_0_set_vm_pte_funcs(adev);
+ sdma_v7_0_set_irq_funcs(adev);
+ sdma_v7_0_set_mqd_funcs(adev);
+
+ return 0;
+}
+
+static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = ip_block->adev;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+ uint32_t *ptr;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_12_0_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_12_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
+
+ DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index =
+ (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
+
+ ring->vm_hub = AMDGPU_GFXHUB(0);
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+ }
+
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ if (!amdgpu_sriov_vf(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+ /* Allocate memory for SDMA IP Dump buffer */
+ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr)
+ adev->sdma.ip_dump = ptr;
+ else
+ DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 7966358) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
+ amdgpu_sdma_destroy_inst_ctx(adev, true);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
+ sdma_v12_0_free_ucode_buffer(adev);
+
+ kfree(adev->sdma.ip_dump);
+
+ return 0;
+}
+
+static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int i, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v7_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v7_0_set_userq_trap_interrupts(adev, true);
+}
+
+static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v7_0_ctx_switch_enable(adev, false);
+ sdma_v7_0_enable(adev, false);
+ sdma_v7_0_set_userq_trap_interrupts(adev, false);
+
+ return 0;
+}
+
+static int sdma_v7_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_0_hw_fini(ip_block);
+}
+
+static int sdma_v7_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ return sdma_v7_0_hw_init(ip_block);
+}
+
+static bool sdma_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v7_0_get_reg_offset(adev, i, regSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v7_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v7_0_get_reg_offset(adev, 1, regSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v7_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ sdma_gfx_preempt =
+ sdma_v7_0_get_reg_offset(adev, index, regSDMA0_QUEUE0_PREEMPT);
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ r = amdgpu_ring_alloc(ring, 10);
+ if (r) {
+ DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
+ return r;
+ }
+ sdma_v7_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v7_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = sdma_v7_0_get_reg_offset(adev, type, regSDMA0_CNTL);
+
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int instances, queue;
+
+ DRM_DEBUG("IH: SDMA trap\n");
+
+ queue = entry->ring_id & 0xf;
+ instances = (entry->ring_id & 0xf0) >> 4;
+ if (instances > 1) {
+ DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
+ return -EINVAL;
+ }
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_GFX:
+ switch (queue) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[instances].ring);
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+}
+
+static void sdma_v7_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+ uint32_t instance_offset;
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ drm_printf(p, "\nInstance:%d\n", i);
+
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_0[j].reg_name,
+ adev->sdma.ip_dump[instance_offset + j]);
+ }
+}
+
+static void sdma_v7_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t instance_offset;
+ uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
+
+ if (!adev->sdma.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ instance_offset = i * reg_count;
+ for (j = 0; j < reg_count; j++)
+ adev->sdma.ip_dump[instance_offset + j] =
+ RREG32(sdma_v7_0_get_reg_offset(adev, i,
+ sdma_reg_list_7_0[j].reg_offset));
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
+const struct amd_ip_funcs sdma_v7_0_ip_funcs = {
+ .name = "sdma_v7_0",
+ .early_init = sdma_v7_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v7_0_sw_init,
+ .sw_fini = sdma_v7_0_sw_fini,
+ .hw_init = sdma_v7_0_hw_init,
+ .hw_fini = sdma_v7_0_hw_fini,
+ .suspend = sdma_v7_0_suspend,
+ .resume = sdma_v7_0_resume,
+ .is_idle = sdma_v7_0_is_idle,
+ .wait_for_idle = sdma_v7_0_wait_for_idle,
+ .soft_reset = sdma_v7_0_soft_reset,
+ .check_soft_reset = sdma_v7_0_check_soft_reset,
+ .set_clockgating_state = sdma_v7_0_set_clockgating_state,
+ .set_powergating_state = sdma_v7_0_set_powergating_state,
+ .get_clockgating_state = sdma_v7_0_get_clockgating_state,
+ .dump_ip_state = sdma_v7_0_dump_ip_state,
+ .print_ip_state = sdma_v7_0_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .secure_submission_supported = true,
+ .get_rptr = sdma_v7_0_ring_get_rptr,
+ .get_wptr = sdma_v7_0_ring_get_wptr,
+ .set_wptr = sdma_v7_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v7_0_ring_init_cond_exec */
+ 6 + /* sdma_v7_0_ring_emit_hdp_flush */
+ 6 + /* sdma_v7_0_ring_emit_pipeline_sync */
+ /* sdma_v7_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v7_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v7_0_ring_emit_ib */
+ .emit_ib = sdma_v7_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v7_0_ring_emit_mem_sync,
+ .emit_fence = sdma_v7_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v7_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v7_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v7_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v7_0_ring_test_ring,
+ .test_ib = sdma_v7_0_ring_test_ib,
+ .insert_nop = sdma_v7_0_ring_insert_nop,
+ .pad_ib = sdma_v7_0_ring_pad_ib,
+ .emit_wreg = sdma_v7_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v7_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v7_0_ring_init_cond_exec,
+ .preempt_ib = sdma_v7_0_ring_preempt_ib,
+ .reset = sdma_v7_0_reset_queue,
+};
+
+static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v7_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = {
+ .set = sdma_v7_0_set_trap_irq_state,
+ .process = sdma_v7_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_fence_irq_funcs = {
+ .process = sdma_v7_0_process_fence_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = {
+ .process = sdma_v7_0_process_illegal_inst_irq,
+};
+
+static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
+ adev->sdma.num_instances;
+ adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v7_0_fence_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v7_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill with commands
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: copy flags for the buffers
+ *
+ * Copy GPU buffers using the DMA engine.
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count,
+ uint32_t copy_flags)
+{
+ uint32_t num_type, data_format, max_com, write_cm;
+
+ max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED);
+ data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT);
+ num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE);
+ write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) |
+ SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
+
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+
+ if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)))
+ ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) |
+ SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1);
+ else
+ ib->ptr[ib->length_dw++] = 0;
+}
+
+/**
+ * sdma_v7_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine.
+ */
+static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL) |
+ SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(1);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 8,
+ .emit_copy_buffer = sdma_v7_0_emit_copy_buffer,
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v7_0_emit_fill_buffer,
+};
+
+static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ adev->mman.buffer_funcs = &sdma_v7_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 8,
+ .copy_pte = sdma_v7_0_vm_copy_pte,
+ .write_pte = sdma_v7_0_vm_write_pte,
+ .set_pte_pde = sdma_v7_0_vm_set_pte_pde,
+};
+
+static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ adev->vm_manager.vm_pte_funcs = &sdma_v7_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version sdma_v7_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v7_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h
new file mode 100644
index 000000000000..5af863bb39c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V7_0_H__
+#define __SDMA_V7_0_H__
+
+extern const struct amd_ip_funcs sdma_v7_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v7_0_ip_block;
+
+#endif /* __SDMA_V7_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 4b81f29e5fd5..e0f139de7991 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -35,6 +35,7 @@
#include "amdgpu_vce.h"
#include "atom.h"
#include "amd_pcie.h"
+
#include "si_dpm.h"
#include "sid.h"
#include "si_ih.h"
@@ -44,17 +45,30 @@
#include "dce_v6_0.h"
#include "si.h"
#include "uvd_v3_1.h"
-#include "amdgpu_vkms.h"
+
+#include "uvd/uvd_4_0_d.h"
+
+#include "smu/smu_6_0_d.h"
+#include "smu/smu_6_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gmc/gmc_6_0_d.h"
+#include"gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
-#include "uvd/uvd_4_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+#include "si_enums.h"
#include "amdgpu_dm.h"
+#include "amdgpu_vkms.h"
static const u32 tahiti_golden_registers[] =
{
@@ -909,7 +923,7 @@ static const u32 hainan_mgcg_cgcg_init[] =
/* XXX: update when we support VCE */
#if 0
-/* tahiti, pitcarin, verde */
+/* tahiti, pitcairn, verde */
static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] =
{
{
@@ -940,7 +954,7 @@ static const struct amdgpu_video_codecs hainan_video_codecs_encode =
.codec_array = NULL,
};
-/* tahiti, pitcarin, verde, oland */
+/* tahiti, pitcairn, verde, oland */
static const struct amdgpu_video_codec_info tahiti_video_codecs_decode_array[] =
{
{
@@ -1071,8 +1085,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
u32 r;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- r = RREG32(SMC_IND_DATA_0);
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ r = RREG32(mmSMC_IND_DATA_0);
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
return r;
}
@@ -1082,8 +1096,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
unsigned long flags;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- WREG32(SMC_IND_DATA_0, (v));
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ WREG32(mmSMC_IND_DATA_0, (v));
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
}
@@ -1110,55 +1124,55 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
}
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
- {GRBM_STATUS},
+ {mmGRBM_STATUS},
{mmGRBM_STATUS2},
{mmGRBM_STATUS_SE0},
{mmGRBM_STATUS_SE1},
{mmSRBM_STATUS},
{mmSRBM_STATUS2},
- {DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
- {DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET},
{mmCP_STAT},
{mmCP_STALLED_STAT1},
{mmCP_STALLED_STAT2},
{mmCP_STALLED_STAT3},
- {GB_ADDR_CONFIG},
- {MC_ARB_RAMCFG},
- {GB_TILE_MODE0},
- {GB_TILE_MODE1},
- {GB_TILE_MODE2},
- {GB_TILE_MODE3},
- {GB_TILE_MODE4},
- {GB_TILE_MODE5},
- {GB_TILE_MODE6},
- {GB_TILE_MODE7},
- {GB_TILE_MODE8},
- {GB_TILE_MODE9},
- {GB_TILE_MODE10},
- {GB_TILE_MODE11},
- {GB_TILE_MODE12},
- {GB_TILE_MODE13},
- {GB_TILE_MODE14},
- {GB_TILE_MODE15},
- {GB_TILE_MODE16},
- {GB_TILE_MODE17},
- {GB_TILE_MODE18},
- {GB_TILE_MODE19},
- {GB_TILE_MODE20},
- {GB_TILE_MODE21},
- {GB_TILE_MODE22},
- {GB_TILE_MODE23},
- {GB_TILE_MODE24},
- {GB_TILE_MODE25},
- {GB_TILE_MODE26},
- {GB_TILE_MODE27},
- {GB_TILE_MODE28},
- {GB_TILE_MODE29},
- {GB_TILE_MODE30},
- {GB_TILE_MODE31},
- {CC_RB_BACKEND_DISABLE, true},
- {GC_USER_RB_BACKEND_DISABLE, true},
- {PA_SC_RASTER_CONFIG, true},
+ {mmGB_ADDR_CONFIG},
+ {mmMC_ARB_RAMCFG},
+ {mmGB_TILE_MODE0},
+ {mmGB_TILE_MODE1},
+ {mmGB_TILE_MODE2},
+ {mmGB_TILE_MODE3},
+ {mmGB_TILE_MODE4},
+ {mmGB_TILE_MODE5},
+ {mmGB_TILE_MODE6},
+ {mmGB_TILE_MODE7},
+ {mmGB_TILE_MODE8},
+ {mmGB_TILE_MODE9},
+ {mmGB_TILE_MODE10},
+ {mmGB_TILE_MODE11},
+ {mmGB_TILE_MODE12},
+ {mmGB_TILE_MODE13},
+ {mmGB_TILE_MODE14},
+ {mmGB_TILE_MODE15},
+ {mmGB_TILE_MODE16},
+ {mmGB_TILE_MODE17},
+ {mmGB_TILE_MODE18},
+ {mmGB_TILE_MODE19},
+ {mmGB_TILE_MODE20},
+ {mmGB_TILE_MODE21},
+ {mmGB_TILE_MODE22},
+ {mmGB_TILE_MODE23},
+ {mmGB_TILE_MODE24},
+ {mmGB_TILE_MODE25},
+ {mmGB_TILE_MODE26},
+ {mmGB_TILE_MODE27},
+ {mmGB_TILE_MODE28},
+ {mmGB_TILE_MODE29},
+ {mmGB_TILE_MODE30},
+ {mmGB_TILE_MODE31},
+ {mmCC_RB_BACKEND_DISABLE, true},
+ {mmGC_USER_RB_BACKEND_DISABLE, true},
+ {mmPA_SC_RASTER_CONFIG, true},
};
static uint32_t si_get_register_value(struct amdgpu_device *adev,
@@ -1264,37 +1278,37 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev)
u32 rom_cntl;
bool r;
- bus_cntl = RREG32(R600_BUS_CNTL);
+ bus_cntl = RREG32(mmBUS_CNTL);
if (adev->mode_info.num_crtc) {
- d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
- d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
- vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ d1vga_control = RREG32(mmD1VGA_CONTROL);
+ d2vga_control = RREG32(mmD2VGA_CONTROL);
+ vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
}
rom_cntl = RREG32(R600_ROM_CNTL);
/* enable the rom */
- WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS));
+ WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
if (adev->mode_info.num_crtc) {
/* Disable VGA mode */
- WREG32(AVIVO_D1VGA_CONTROL,
- (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(AVIVO_D2VGA_CONTROL,
- (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(VGA_RENDER_CONTROL,
- (vga_render_control & C_000300_VGA_VSTATUS_CNTL));
+ WREG32(mmD1VGA_CONTROL,
+ (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmD2VGA_CONTROL,
+ (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmVGA_RENDER_CONTROL,
+ (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
}
WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE);
r = amdgpu_read_bios(adev);
/* restore regs */
- WREG32(R600_BUS_CNTL, bus_cntl);
+ WREG32(mmBUS_CNTL, bus_cntl);
if (adev->mode_info.num_crtc) {
- WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
- WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
- WREG32(VGA_RENDER_CONTROL, vga_render_control);
+ WREG32(mmD1VGA_CONTROL, d1vga_control);
+ WREG32(mmD2VGA_CONTROL, d2vga_control);
+ WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
}
WREG32(R600_ROM_CNTL, rom_cntl);
return r;
@@ -1331,23 +1345,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
{
u32 tmp, i;
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_BYPASS_EN;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp |= SPLL_CTLREQ_CHG;
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
for (i = 0; i < adev->usec_timeout; i++) {
- if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+ if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK)
break;
udelay(1);
}
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK |
+ CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK);
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
tmp = RREG32(MPLL_CNTL_MODE);
tmp &= ~MPLL_MCLK_SEL;
@@ -1358,21 +1373,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev)
{
u32 tmp;
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp |= SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_RESET;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_SLEEP;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp &= ~SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
}
static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
@@ -1409,9 +1424,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool si_asic_supports_baco(struct amdgpu_device *adev)
+static int si_asic_supports_baco(struct amdgpu_device *adev)
{
- return false;
+ return 0;
}
static enum amd_reset_method
@@ -1454,14 +1469,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state)
{
uint32_t temp;
- temp = RREG32(CONFIG_CNTL);
+ temp = RREG32(mmCONFIG_CNTL);
if (!state) {
temp &= ~(1<<0);
temp |= (1<<1);
} else {
temp &= ~(1<<1);
}
- WREG32(CONFIG_CNTL, temp);
+ WREG32(mmCONFIG_CNTL, temp);
}
static u32 si_get_xclk(struct amdgpu_device *adev)
@@ -1469,12 +1484,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev)
u32 reference_clock = adev->clock.spll.reference_freq;
u32 tmp;
- tmp = RREG32(CG_CLKPIN_CNTL_2);
- if (tmp & MUX_TCLK_TO_XCLK)
+ tmp = RREG32(mmCG_CLKPIN_CNTL_2);
+ if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK)
return TCLK;
- tmp = RREG32(CG_CLKPIN_CNTL);
- if (tmp & XTALIN_DIVIDE)
+ tmp = RREG32(mmCG_CLKPIN_CNTL);
+ if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK)
return reference_clock / 4;
return reference_clock;
@@ -1519,9 +1534,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
- switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
+ switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) {
case LC_LINK_WIDTH_X1:
return 1;
case LC_LINK_WIDTH_X2:
@@ -1568,13 +1583,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
return;
}
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- link_width_cntl &= ~LC_LINK_WIDTH_MASK;
- link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
- link_width_cntl |= (LC_RECONFIG_NOW |
- LC_RECONFIG_ARC_MISSING_ESCAPE);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK;
+ link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT;
+ link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK |
+ PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK);
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
}
static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
@@ -1888,7 +1903,7 @@ static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev)
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
- DRM_ERROR("Timeout setting UVD clocks!\n");
+ DRM_ERROR("Timeout setting VCE clocks!\n");
return -ETIMEDOUT;
}
@@ -2018,13 +2033,13 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
{
- return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+ return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
>> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
}
-static int si_common_early_init(void *handle)
+static int si_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &si_smc_rreg;
adev->smc_wreg = &si_smc_wreg;
@@ -2148,17 +2163,6 @@ static int si_common_early_init(void *handle)
return 0;
}
-static int si_common_sw_init(void *handle)
-{
- return 0;
-}
-
-static int si_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-
static void si_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -2250,9 +2254,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
return;
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
- LC_CURRENT_DATA_RATE_SHIFT;
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >>
+ PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate == 2) {
DRM_INFO("PCIE gen 3 link speeds already enabled\n");
@@ -2279,17 +2283,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
- tmp = RREG32_PCIE(PCIE_LC_STATUS1);
- max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
- current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+ tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT;
+ current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT;
if (current_lw < max_lw) {
- tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- if (tmp & LC_RENEGOTIATION_SUPPORT) {
- tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
- tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
- tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) {
+ tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK);
+ tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT);
+ tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp);
}
}
@@ -2312,13 +2316,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
PCI_EXP_LNKCTL2,
&gpu_cfg2);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_REDO_EQ;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
mdelay(100);
@@ -2331,58 +2335,47 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
gpu_cfg &
PCI_EXP_LNKCTL_HAWD);
- pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (bridge_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(root,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- pcie_capability_read_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- &tmp16);
- tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN);
- tmp16 |= (gpu_cfg2 &
- (PCI_EXP_LNKCTL2_ENTER_COMP |
- PCI_EXP_LNKCTL2_TX_MARGIN));
- pcie_capability_write_word(adev->pdev,
- PCI_EXP_LNKCTL2,
- tmp16);
-
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp &= ~LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN,
+ gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
}
}
}
- speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
- speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
-
- pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK;
+ speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
+ tmp16 = 0;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
- pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS, tmp16);
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
for (i = 0; i < adev->usec_timeout; i++) {
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0)
break;
udelay(1);
}
@@ -2440,123 +2433,121 @@ static void si_program_aspm(struct amdgpu_device *adev)
if (!amdgpu_device_should_use_aspm(adev))
return;
- if (adev->flags & AMD_IS_APU)
- return;
- orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- data &= ~LC_XMIT_N_FTS_MASK;
- data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+ data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
- data |= LC_GO_TO_RECOVERY;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data);
- orig = data = RREG32_PCIE(PCIE_P_CNTL);
- data |= P_IGNORE_EDB_ERR;
+ orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+ data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_P_CNTL, data);
+ WREG32_PCIE(ixPCIE_P_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
- data |= LC_PMI_TO_L1_DIS;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (!disable_l0s)
- data |= LC_L0S_INACTIVITY(7);
+ data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT);
if (!disable_l1) {
- data |= LC_L1_INACTIVITY(7);
- data &= ~LC_PMI_TO_L1_DIS;
+ data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT);
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
if (!disable_plloff_in_l1) {
bool clk_req_support;
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2);
+ data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3);
+ data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2);
+ data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3);
+ data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data);
}
- orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- data &= ~LC_DYN_LANES_PWR_STATE_MASK;
- data |= LC_DYN_LANES_PWR_STATE(3);
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+ data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT);
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL);
+ data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL);
+ data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data);
if (!disable_clkreq &&
!pci_is_root_bus(adev->pdev->bus)) {
@@ -2572,64 +2563,64 @@ static void si_program_aspm(struct amdgpu_device *adev)
}
if (clk_req_support) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
- data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2);
+ data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data);
- orig = data = RREG32(THM_CLK_CNTL);
- data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
- data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+ orig = data = RREG32(mmTHM_CLK_CNTL);
+ data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+ data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
if (orig != data)
- WREG32(THM_CLK_CNTL, data);
+ WREG32(mmTHM_CLK_CNTL, data);
- orig = data = RREG32(MISC_CLK_CNTL);
- data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
- data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+ orig = data = RREG32(mmMISC_CLK_CNTL);
+ data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK);
+ data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT);
if (orig != data)
- WREG32(MISC_CLK_CNTL, data);
+ WREG32(mmMISC_CLK_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL);
- data &= ~BCLK_AS_XCLK;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL);
+ data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL, data);
+ WREG32(mmCG_CLKPIN_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL_2);
- data &= ~FORCE_BIF_REFCLK_EN;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL_2);
+ data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL_2, data);
+ WREG32(mmCG_CLKPIN_CNTL_2, data);
- orig = data = RREG32(MPLL_BYPASSCLK_SEL);
- data &= ~MPLL_CLKOUT_SEL_MASK;
- data |= MPLL_CLKOUT_SEL(4);
+ orig = data = RREG32(mmMPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+ data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT;
if (orig != data)
- WREG32(MPLL_BYPASSCLK_SEL, data);
+ WREG32(mmMPLL_BYPASSCLK_SEL, data);
- orig = data = RREG32(SPLL_CNTL_MODE);
- data &= ~SPLL_REFCLK_SEL_MASK;
+ orig = data = RREG32(mmSPLL_CNTL_MODE);
+ data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK;
if (orig != data)
- WREG32(SPLL_CNTL_MODE, data);
+ WREG32(mmSPLL_CNTL_MODE, data);
}
}
} else {
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
- orig = data = RREG32_PCIE(PCIE_CNTL2);
- data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+ orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_CNTL2, data);
+ WREG32_PCIE(ixPCIE_CNTL2, data);
if (!disable_l0s) {
- data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
- data = RREG32_PCIE(PCIE_LC_STATUS1);
- if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~LC_L0S_INACTIVITY_MASK;
+ data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) {
+ data = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) {
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
}
}
@@ -2646,9 +2637,9 @@ static void si_fix_pci_max_read_req_size(struct amdgpu_device *adev)
pcie_set_readrq(adev->pdev, 512);
}
-static int si_common_hw_init(void *handle)
+static int si_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_fix_pci_max_read_req_size(adev);
si_init_golden_registers(adev);
@@ -2658,47 +2649,28 @@ static int si_common_hw_init(void *handle)
return 0;
}
-static int si_common_hw_fini(void *handle)
+static int si_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int si_common_suspend(void *handle)
+static int si_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_fini(adev);
+ return si_common_hw_init(ip_block);
}
-static int si_common_resume(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_init(adev);
-}
-
-static bool si_common_is_idle(void *handle)
+static bool si_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int si_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int si_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int si_common_set_clockgating_state(void *handle,
+static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_common_set_powergating_state(void *handle,
+static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2707,16 +2679,10 @@ static int si_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_common_ip_funcs = {
.name = "si_common",
.early_init = si_common_early_init,
- .late_init = NULL,
- .sw_init = si_common_sw_init,
- .sw_fini = si_common_sw_fini,
.hw_init = si_common_hw_init,
.hw_fini = si_common_hw_fini,
- .suspend = si_common_suspend,
.resume = si_common_resume,
.is_idle = si_common_is_idle,
- .wait_for_idle = si_common_wait_for_idle,
- .soft_reset = si_common_soft_reset,
.set_clockgating_state = si_common_set_clockgating_state,
.set_powergating_state = si_common_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 42c4547f32ec..7f18e4875287 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -27,6 +27,8 @@
#include "si.h"
#include "sid.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
{
DMA0_REGISTER_OFFSET,
@@ -38,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
+/**
+ * si_dma_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
{
return *ring->rptr_cpu_addr;
}
+/**
+ * si_dma_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+ return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
}
static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
@@ -56,7 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
}
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -115,13 +131,11 @@ static void si_dma_stop(struct amdgpu_device *adev)
u32 rb_cntl;
unsigned i;
- amdgpu_sdma_unset_buffer_funcs_helper(adev);
-
for (i = 0; i < adev->sdma.num_instances; i++) {
/* dma0 */
- rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
- rb_cntl &= ~DMA_RB_ENABLE;
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+ rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
}
}
@@ -135,51 +149,48 @@ static int si_dma_start(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
- WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
/* Set ring buffer size in dwords */
rb_bufsz = order_base_2(ring->ring_size / 4);
rb_cntl = rb_bufsz << 1;
#ifdef __BIG_ENDIAN
- rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
/* Initialize the ring buffer's read and write pointers */
- WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
- WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0);
rptr_addr = ring->rptr_gpu_addr;
- WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
- WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
- rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
- WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+ WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
/* enable DMA IBs */
- ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+ ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK;
#ifdef __BIG_ENDIAN
- ib_cntl |= DMA_IB_SWAP_ENABLE;
+ ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
+ WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
- dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
- dma_cntl &= ~CTXEMPTY_INT_ENABLE;
- WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
+ dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]);
+ dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl);
ring->wptr = 0;
- WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK);
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
-
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -291,7 +302,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -462,11 +473,11 @@ static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int si_dma_early_init(void *handle)
+static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->sdma.num_instances = 2;
+ adev->sdma.num_instances = SDMA_MAX_INSTANCE;
si_dma_set_ring_funcs(adev);
si_dma_set_buffer_funcs(adev);
@@ -476,11 +487,11 @@ static int si_dma_early_init(void *handle)
return 0;
}
-static int si_dma_sw_init(void *handle)
+static int si_dma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* DMA0 trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
@@ -511,9 +522,9 @@ static int si_dma_sw_init(void *handle)
return r;
}
-static int si_dma_sw_fini(void *handle)
+static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -522,61 +533,56 @@ static int si_dma_sw_fini(void *handle)
return 0;
}
-static int si_dma_hw_init(void *handle)
+static int si_dma_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_dma_start(adev);
}
-static int si_dma_hw_fini(void *handle)
+static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_dma_stop(adev);
+ si_dma_stop(ip_block->adev);
return 0;
}
-static int si_dma_suspend(void *handle)
+static int si_dma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_fini(adev);
+ return si_dma_hw_fini(ip_block);
}
-static int si_dma_resume(void *handle)
+static int si_dma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_init(adev);
+ return si_dma_hw_init(ip_block);
}
-static bool si_dma_is_idle(void *handle)
+static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS2);
+ struct amdgpu_device *adev = ip_block->adev;
+
+ u32 tmp = RREG32(mmSRBM_STATUS2);
- if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
+ if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK))
return false;
return true;
}
-static int si_dma_wait_for_idle(void *handle)
+static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_dma_is_idle(handle))
+ if (si_dma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_dma_soft_reset(void *handle)
+static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block)
{
DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
return 0;
@@ -593,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE0:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -609,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE1:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -639,13 +645,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
-static int si_dma_set_clockgating_state(void *handle,
+static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
u32 orig, data, offset;
int i;
bool enable;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
enable = (state == AMD_CG_STATE_GATE);
@@ -655,11 +661,11 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data &= ~MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
- WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, 0x00000100);
}
} else {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -667,33 +673,33 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data |= MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
- orig = data = RREG32(DMA_CLK_CTRL + offset);
+ orig = data = RREG32(mmDMA_CLK_CTRL + offset);
data = 0xff000000;
if (data != orig)
- WREG32(DMA_CLK_CTRL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, data);
}
}
return 0;
}
-static int si_dma_set_powergating_state(void *handle,
+static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- WREG32(DMA_PGFSM_WRITE, 0x00002000);
- WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
+ WREG32(mmDMA_PGFSM_WRITE, 0x00002000);
+ WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff);
for (tmp = 0; tmp < 5; tmp++)
- WREG32(DMA_PGFSM_WRITE, 0);
+ WREG32(mmDMA_PGFSM_WRITE, 0);
return 0;
}
@@ -701,7 +707,6 @@ static int si_dma_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_dma_ip_funcs = {
.name = "si_dma",
.early_init = si_dma_early_init,
- .late_init = NULL,
.sw_init = si_dma_sw_init,
.sw_fini = si_dma_sw_fini,
.hw_init = si_dma_hw_init,
@@ -766,7 +771,7 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -776,7 +781,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
1, 0, 0, byte_count);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
index 4e935baa7b91..6da65778292b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -23,123 +23,15 @@
#ifndef SI_ENUMS_H
#define SI_ENUMS_H
-#define VBLANK_INT_MASK (1 << 0)
-#define DC_HPDx_INT_EN (1 << 16)
-#define VBLANK_ACK (1 << 4)
-#define VLINE_ACK (1 << 4)
-
-#define CURSOR_WIDTH 64
-#define CURSOR_HEIGHT 64
-
-#define VGA_VSTATUS_CNTL 0xFFFCFFFF
#define PRIORITY_MARK_MASK 0x7fff
#define PRIORITY_OFF (1 << 16)
#define PRIORITY_ALWAYS_ON (1 << 20)
-#define INTERLEAVE_EN (1 << 0)
-
-#define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-
-#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-#define GRPH_ENDIAN_NONE 0
-#define GRPH_ENDIAN_8IN16 1
-#define GRPH_ENDIAN_8IN32 2
-#define GRPH_ENDIAN_8IN64 3
-#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-#define GRPH_RED_SEL_R 0
-#define GRPH_RED_SEL_G 1
-#define GRPH_RED_SEL_B 2
-#define GRPH_RED_SEL_A 3
-#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-#define GRPH_GREEN_SEL_G 0
-#define GRPH_GREEN_SEL_B 1
-#define GRPH_GREEN_SEL_A 2
-#define GRPH_GREEN_SEL_R 3
-#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-#define GRPH_BLUE_SEL_B 0
-#define GRPH_BLUE_SEL_A 1
-#define GRPH_BLUE_SEL_R 2
-#define GRPH_BLUE_SEL_G 3
-#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-#define GRPH_ALPHA_SEL_A 0
-#define GRPH_ALPHA_SEL_R 1
-#define GRPH_ALPHA_SEL_G 2
-#define GRPH_ALPHA_SEL_B 3
-
-#define GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define GRPH_DEPTH_8BPP 0
-#define GRPH_DEPTH_16BPP 1
-#define GRPH_DEPTH_32BPP 2
-
-#define GRPH_FORMAT(x) (((x) & 0x7) << 8)
-#define GRPH_FORMAT_INDEXED 0
-#define GRPH_FORMAT_ARGB1555 0
-#define GRPH_FORMAT_ARGB565 1
-#define GRPH_FORMAT_ARGB4444 2
-#define GRPH_FORMAT_AI88 3
-#define GRPH_FORMAT_MONO16 4
-#define GRPH_FORMAT_BGRA5551 5
-#define GRPH_FORMAT_ARGB8888 0
-#define GRPH_FORMAT_ARGB2101010 1
-#define GRPH_FORMAT_32BPP_DIG 2
-#define GRPH_FORMAT_8B_ARGB2101010 3
-#define GRPH_FORMAT_BGRA1010102 4
-#define GRPH_FORMAT_8B_BGRA1010102 5
-#define GRPH_FORMAT_RGB111110 6
-#define GRPH_FORMAT_BGR101111 7
-
-#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_ARRAY_LINEAR_GENERAL 0
-#define GRPH_ARRAY_LINEAR_ALIGNED 1
-#define GRPH_ARRAY_1D_TILED_THIN1 2
-#define GRPH_ARRAY_2D_TILED_THIN1 4
-#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24)
-
-#define CURSOR_EN (1 << 0)
-#define CURSOR_MODE(x) (((x) & 0x3) << 8)
-#define CURSOR_MONO 0
-#define CURSOR_24_1 1
-#define CURSOR_24_8_PRE_MULT 2
-#define CURSOR_24_8_UNPRE_MULT 3
-#define CURSOR_2X_MAGNIFY (1 << 16)
-#define CURSOR_FORCE_MC_ON (1 << 20)
-#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-#define CURSOR_URGENT_ALWAYS 0
-#define CURSOR_URGENT_1_8 1
-#define CURSOR_URGENT_1_4 2
-#define CURSOR_URGENT_3_8 3
-#define CURSOR_URGENT_1_2 4
-#define CURSOR_UPDATE_PENDING (1 << 0)
-#define CURSOR_UPDATE_TAKEN (1 << 1)
-#define CURSOR_UPDATE_LOCK (1 << 16)
-#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-#define SI_CRTC0_REGISTER_OFFSET 0
-#define SI_CRTC1_REGISTER_OFFSET 0x300
-#define SI_CRTC2_REGISTER_OFFSET 0x2600
-#define SI_CRTC3_REGISTER_OFFSET 0x2900
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00
-
-#define DMA0_REGISTER_OFFSET 0x000
-#define DMA1_REGISTER_OFFSET 0x200
-#define ES_AND_GS_AUTO 3
-#define RADEON_PACKET_TYPE3 3
-#define CE_PARTITION_BASE 3
-#define BUF_SWAP_32BIT (2 << 16)
#define GFX_POWER_STATUS (1 << 1)
#define GFX_CLOCK_STATUS (1 << 2)
#define GFX_LS_STATUS (1 << 3)
-#define RLC_BUSY_STATUS (1 << 0)
+#define RLC_BUSY_STATUS (1 << 0)
#define RLC_PUD(x) ((x) << 0)
#define RLC_PUD_MASK (0xff << 0)
#define RLC_PDD(x) ((x) << 8)
@@ -148,144 +40,8 @@
#define RLC_TTPD_MASK (0xff << 16)
#define RLC_MSD(x) ((x) << 24)
#define RLC_MSD_MASK (0xff << 24)
-#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
-#define WRITE_DATA_DST_SEL(x) ((x) << 8)
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
-#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
-#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
-#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
-#define GFX6_NUM_GFX_RINGS 1
-#define GFX6_NUM_COMPUTE_RINGS 2
#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x02010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02011003
-
-#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \
- (((op) & 0xFF) << 8) | \
- ((n) & 0x3FFF) << 16)
-#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
-#define PACKET3_NOP 0x10
-#define PACKET3_SET_BASE 0x11
-#define PACKET3_BASE_INDEX(x) ((x) << 0)
-#define PACKET3_CLEAR_STATE 0x12
-#define PACKET3_INDEX_BUFFER_SIZE 0x13
-#define PACKET3_DISPATCH_DIRECT 0x15
-#define PACKET3_DISPATCH_INDIRECT 0x16
-#define PACKET3_ALLOC_GDS 0x1B
-#define PACKET3_WRITE_GDS_RAM 0x1C
-#define PACKET3_ATOMIC_GDS 0x1D
-#define PACKET3_ATOMIC 0x1E
-#define PACKET3_OCCLUSION_QUERY 0x1F
-#define PACKET3_SET_PREDICATION 0x20
-#define PACKET3_REG_RMW 0x21
-#define PACKET3_COND_EXEC 0x22
-#define PACKET3_PRED_EXEC 0x23
-#define PACKET3_DRAW_INDIRECT 0x24
-#define PACKET3_DRAW_INDEX_INDIRECT 0x25
-#define PACKET3_INDEX_BASE 0x26
-#define PACKET3_DRAW_INDEX_2 0x27
-#define PACKET3_CONTEXT_CONTROL 0x28
-#define PACKET3_INDEX_TYPE 0x2A
-#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
-#define PACKET3_DRAW_INDEX_AUTO 0x2D
-#define PACKET3_DRAW_INDEX_IMMD 0x2E
-#define PACKET3_NUM_INSTANCES 0x2F
-#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
-#define PACKET3_INDIRECT_BUFFER_CONST 0x31
-#define PACKET3_INDIRECT_BUFFER 0x3F
-#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
-#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
-#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
-#define PACKET3_WRITE_DATA 0x37
-#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
-#define PACKET3_MEM_SEMAPHORE 0x39
-#define PACKET3_MPEG_INDEX 0x3A
-#define PACKET3_COPY_DW 0x3B
-#define PACKET3_WAIT_REG_MEM 0x3C
-#define PACKET3_MEM_WRITE 0x3D
-#define PACKET3_COPY_DATA 0x40
-#define PACKET3_CP_DMA 0x41
-# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
-# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
-# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
-# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
-# define PACKET3_CP_DMA_DIS_WC (1 << 21)
-# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
-# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
-# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
-# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
-# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
-# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
-# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
-#define PACKET3_PFP_SYNC_ME 0x42
-#define PACKET3_SURFACE_SYNC 0x43
-# define PACKET3_DEST_BASE_0_ENA (1 << 0)
-# define PACKET3_DEST_BASE_1_ENA (1 << 1)
-# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
-# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
-# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
-# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
-# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
-# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
-# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
-# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
-# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
-# define PACKET3_DEST_BASE_2_ENA (1 << 19)
-# define PACKET3_DEST_BASE_3_ENA (1 << 21)
-# define PACKET3_TCL1_ACTION_ENA (1 << 22)
-# define PACKET3_TC_ACTION_ENA (1 << 23)
-# define PACKET3_CB_ACTION_ENA (1 << 25)
-# define PACKET3_DB_ACTION_ENA (1 << 26)
-# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
-# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
-#define PACKET3_ME_INITIALIZE 0x44
-#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-#define PACKET3_COND_WRITE 0x45
-#define PACKET3_EVENT_WRITE 0x46
-#define PACKET3_EVENT_WRITE_EOP 0x47
-#define PACKET3_EVENT_WRITE_EOS 0x48
-#define PACKET3_PREAMBLE_CNTL 0x4A
-# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
-# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
-#define PACKET3_ONE_REG_WRITE 0x57
-#define PACKET3_LOAD_CONFIG_REG 0x5F
-#define PACKET3_LOAD_CONTEXT_REG 0x60
-#define PACKET3_LOAD_SH_REG 0x61
-#define PACKET3_SET_CONFIG_REG 0x68
-#define PACKET3_SET_CONFIG_REG_START 0x00002000
-#define PACKET3_SET_CONFIG_REG_END 0x00002c00
-#define PACKET3_SET_CONTEXT_REG 0x69
-#define PACKET3_SET_CONTEXT_REG_START 0x000a000
-#define PACKET3_SET_CONTEXT_REG_END 0x000a400
-#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
-#define PACKET3_SET_RESOURCE_INDIRECT 0x74
-#define PACKET3_SET_SH_REG 0x76
-#define PACKET3_SET_SH_REG_START 0x00002c00
-#define PACKET3_SET_SH_REG_END 0x00003000
-#define PACKET3_SET_SH_REG_OFFSET 0x77
-#define PACKET3_ME_WRITE 0x7A
-#define PACKET3_SCRATCH_RAM_WRITE 0x7D
-#define PACKET3_SCRATCH_RAM_READ 0x7E
-#define PACKET3_CE_WRITE 0x7F
-#define PACKET3_LOAD_CONST_RAM 0x80
-#define PACKET3_WRITE_CONST_RAM 0x81
-#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82
-#define PACKET3_DUMP_CONST_RAM 0x83
-#define PACKET3_INCREMENT_CE_COUNTER 0x84
-#define PACKET3_INCREMENT_DE_COUNTER 0x85
-#define PACKET3_WAIT_ON_CE_COUNTER 0x86
-#define PACKET3_WAIT_ON_DE_COUNTER 0x87
-#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
-#define PACKET3_SET_CE_DE_COUNTERS 0x89
-#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
-#define PACKET3_SWITCH_BUFFER 0x8B
-#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
-#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
-#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 9a24f17a5750..1df00f8a2406 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -27,6 +27,7 @@
#include "amdgpu_ih.h"
#include "sid.h"
#include "si_ih.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
@@ -119,6 +120,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(IH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(IH_RB_CNTL, tmp);
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+ WREG32(IH_RB_CNTL, tmp);
}
return (wptr & ih->ptr_mask);
}
@@ -150,19 +157,19 @@ static void si_ih_set_rptr(struct amdgpu_device *adev,
WREG32(IH_RB_RPTR, ih->rptr);
}
-static int si_ih_early_init(void *handle)
+static int si_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_ih_set_interrupt_funcs(adev);
return 0;
}
-static int si_ih_sw_init(void *handle)
+static int si_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -171,49 +178,43 @@ static int si_ih_sw_init(void *handle)
return amdgpu_irq_init(adev);
}
-static int si_ih_sw_fini(void *handle)
+static int si_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int si_ih_hw_init(void *handle)
+static int si_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_ih_irq_init(adev);
}
-static int si_ih_hw_fini(void *handle)
+static int si_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_ih_irq_disable(adev);
+ si_ih_irq_disable(ip_block->adev);
return 0;
}
-static int si_ih_suspend(void *handle)
+static int si_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_fini(adev);
+ return si_ih_hw_fini(ip_block);
}
-static int si_ih_resume(void *handle)
+static int si_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_init(adev);
+ return si_ih_hw_init(ip_block);
}
-static bool si_ih_is_idle(void *handle)
+static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS);
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
return false;
@@ -221,41 +222,41 @@ static bool si_ih_is_idle(void *handle)
return true;
}
-static int si_ih_wait_for_idle(void *handle)
+static int si_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_ih_is_idle(handle))
+ if (si_ih_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_ih_soft_reset(void *handle)
+static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(SRBM_STATUS);
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
if (srbm_soft_reset) {
- tmp = RREG32(SRBM_SOFT_RESET);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
tmp &= ~srbm_soft_reset;
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
}
@@ -263,13 +264,13 @@ static int si_ih_soft_reset(void *handle)
return 0;
}
-static int si_ih_set_clockgating_state(void *handle,
+static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_ih_set_powergating_state(void *handle,
+static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -278,7 +279,6 @@ static int si_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_ih_ip_funcs = {
.name = "si_ih",
.early_init = si_ih_early_init,
- .late_init = NULL,
.sw_init = si_ih_sw_init,
.sw_fini = si_ih_sw_fini,
.hw_init = si_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index 9a39cbfe6db9..cbd4f8951cfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -24,47 +24,12 @@
#ifndef SI_H
#define SI_H
-#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2
-
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
-
-#define SI_MAX_SH_GPRS 256
-#define SI_MAX_TEMP_GPRS 16
-#define SI_MAX_SH_THREADS 256
-#define SI_MAX_SH_STACK_ENTRIES 4096
-#define SI_MAX_FRC_EOV_CNT 16384
-#define SI_MAX_BACKENDS 8
-#define SI_MAX_BACKENDS_MASK 0xFF
-#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F
-#define SI_MAX_SIMDS 12
-#define SI_MAX_SIMDS_MASK 0x0FFF
-#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF
-#define SI_MAX_PIPES 8
-#define SI_MAX_PIPES_MASK 0xFF
-#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F
-#define SI_MAX_LDS_NUM 0xFFFF
-#define SI_MAX_TCC 16
-#define SI_MAX_TCC_MASK 0xFFFF
#define SI_MAX_CTLACKS_ASSERTION_WAIT 100
-/* SMC IND accessor regs */
-#define SMC_IND_INDEX_0 0x80
-#define SMC_IND_DATA_0 0x81
-
-#define SMC_IND_ACCESS_CNTL 0x8A
-# define AUTO_INCREMENT_IND_0 (1 << 0)
-#define SMC_MESSAGE_0 0x8B
-#define SMC_RESP_0 0x8C
-
/* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */
#define SMC_CG_IND_START 0xc0030000
#define SMC_CG_IND_END 0xc0040000
-#define CG_CGTT_LOCAL_0 0x400
-#define CG_CGTT_LOCAL_1 0x401
-
/* SMC IND registers */
#define SMC_SYSCON_RESET_CNTL 0x80000000
# define RST_REG (1 << 0)
@@ -72,9 +37,6 @@
# define CK_DISABLE (1 << 0)
# define CKEN (1 << 24)
-#define VGA_HDP_CONTROL 0xCA
-#define VGA_MEMORY_DISABLE (1 << 4)
-
#define DCCG_DISP_SLOW_SELECT_REG 0x13F
#define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0)
#define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0)
@@ -83,47 +45,6 @@
#define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4)
#define DCCG_DISP2_SLOW_SELECT_SHIFT 4
-#define CG_SPLL_FUNC_CNTL 0x180
-#define SPLL_RESET (1 << 0)
-#define SPLL_SLEEP (1 << 1)
-#define SPLL_BYPASS_EN (1 << 3)
-#define SPLL_REF_DIV(x) ((x) << 4)
-#define SPLL_REF_DIV_MASK (0x3f << 4)
-#define SPLL_PDIV_A(x) ((x) << 20)
-#define SPLL_PDIV_A_MASK (0x7f << 20)
-#define SPLL_PDIV_A_SHIFT 20
-#define CG_SPLL_FUNC_CNTL_2 0x181
-#define SCLK_MUX_SEL(x) ((x) << 0)
-#define SCLK_MUX_SEL_MASK (0x1ff << 0)
-#define SPLL_CTLREQ_CHG (1 << 23)
-#define SCLK_MUX_UPDATE (1 << 26)
-#define CG_SPLL_FUNC_CNTL_3 0x182
-#define SPLL_FB_DIV(x) ((x) << 0)
-#define SPLL_FB_DIV_MASK (0x3ffffff << 0)
-#define SPLL_FB_DIV_SHIFT 0
-#define SPLL_DITHEN (1 << 28)
-#define CG_SPLL_FUNC_CNTL_4 0x183
-
-#define SPLL_STATUS 0x185
-#define SPLL_CHG_STATUS (1 << 1)
-#define SPLL_CNTL_MODE 0x186
-#define SPLL_SW_DIR_CONTROL (1 << 0)
-# define SPLL_REFCLK_SEL(x) ((x) << 26)
-# define SPLL_REFCLK_SEL_MASK (3 << 26)
-
-#define CG_SPLL_SPREAD_SPECTRUM 0x188
-#define SSEN (1 << 0)
-#define CLK_S(x) ((x) << 4)
-#define CLK_S_MASK (0xfff << 4)
-#define CLK_S_SHIFT 4
-#define CG_SPLL_SPREAD_SPECTRUM_2 0x189
-#define CLK_V(x) ((x) << 0)
-#define CLK_V_MASK (0x3ffffff << 0)
-#define CLK_V_SHIFT 0
-
-#define CG_SPLL_AUTOSCALE_CNTL 0x18b
-# define AUTOSCALE_ON_SS_CLEAR (1 << 9)
-
/* discrete uvd clocks */
#define CG_UPLL_FUNC_CNTL 0x18d
# define UPLL_RESET_MASK 0x00000001
@@ -153,317 +74,13 @@
#define CG_UPLL_SPREAD_SPECTRUM 0x194
# define SSEN_MASK 0x00000001
-#define MPLL_BYPASSCLK_SEL 0x197
-# define MPLL_CLKOUT_SEL(x) ((x) << 8)
-# define MPLL_CLKOUT_SEL_MASK 0xFF00
-
-#define CG_CLKPIN_CNTL 0x198
-# define XTALIN_DIVIDE (1 << 1)
-# define BCLK_AS_XCLK (1 << 2)
-#define CG_CLKPIN_CNTL_2 0x199
-# define FORCE_BIF_REFCLK_EN (1 << 3)
-# define MUX_TCLK_TO_XCLK (1 << 8)
-
-#define THM_CLK_CNTL 0x19b
-# define CMON_CLK_SEL(x) ((x) << 0)
-# define CMON_CLK_SEL_MASK 0xFF
-# define TMON_CLK_SEL(x) ((x) << 8)
-# define TMON_CLK_SEL_MASK 0xFF00
-#define MISC_CLK_CNTL 0x19c
-# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0)
-# define DEEP_SLEEP_CLK_SEL_MASK 0xFF
-# define ZCLK_SEL(x) ((x) << 8)
-# define ZCLK_SEL_MASK 0xFF00
-
-#define CG_THERMAL_CTRL 0x1c0
-#define DPM_EVENT_SRC(x) ((x) << 0)
-#define DPM_EVENT_SRC_MASK (7 << 0)
-#define DIG_THERM_DPM(x) ((x) << 14)
-#define DIG_THERM_DPM_MASK 0x003FC000
-#define DIG_THERM_DPM_SHIFT 14
-#define CG_THERMAL_STATUS 0x1c1
-#define FDO_PWM_DUTY(x) ((x) << 9)
-#define FDO_PWM_DUTY_MASK (0xff << 9)
-#define FDO_PWM_DUTY_SHIFT 9
-#define CG_THERMAL_INT 0x1c2
-#define DIG_THERM_INTH(x) ((x) << 8)
-#define DIG_THERM_INTH_MASK 0x0000FF00
-#define DIG_THERM_INTH_SHIFT 8
-#define DIG_THERM_INTL(x) ((x) << 16)
-#define DIG_THERM_INTL_MASK 0x00FF0000
-#define DIG_THERM_INTL_SHIFT 16
-#define THERM_INT_MASK_HIGH (1 << 24)
-#define THERM_INT_MASK_LOW (1 << 25)
-
-#define CG_MULT_THERMAL_CTRL 0x1c4
-#define TEMP_SEL(x) ((x) << 20)
-#define TEMP_SEL_MASK (0xff << 20)
-#define TEMP_SEL_SHIFT 20
-#define CG_MULT_THERMAL_STATUS 0x1c5
-#define ASIC_MAX_TEMP(x) ((x) << 0)
-#define ASIC_MAX_TEMP_MASK 0x000001ff
-#define ASIC_MAX_TEMP_SHIFT 0
-#define CTF_TEMP(x) ((x) << 9)
-#define CTF_TEMP_MASK 0x0003fe00
-#define CTF_TEMP_SHIFT 9
-
-#define CG_FDO_CTRL0 0x1d5
-#define FDO_STATIC_DUTY(x) ((x) << 0)
-#define FDO_STATIC_DUTY_MASK 0x000000FF
-#define FDO_STATIC_DUTY_SHIFT 0
-#define CG_FDO_CTRL1 0x1d6
-#define FMAX_DUTY100(x) ((x) << 0)
-#define FMAX_DUTY100_MASK 0x000000FF
-#define FMAX_DUTY100_SHIFT 0
-#define CG_FDO_CTRL2 0x1d7
-#define TMIN(x) ((x) << 0)
-#define TMIN_MASK 0x000000FF
-#define TMIN_SHIFT 0
-#define FDO_PWM_MODE(x) ((x) << 11)
-#define FDO_PWM_MODE_MASK (7 << 11)
-#define FDO_PWM_MODE_SHIFT 11
-#define TACH_PWM_RESP_RATE(x) ((x) << 25)
-#define TACH_PWM_RESP_RATE_MASK (0x7f << 25)
-#define TACH_PWM_RESP_RATE_SHIFT 25
-
-#define CG_TACH_CTRL 0x1dc
-# define EDGE_PER_REV(x) ((x) << 0)
-# define EDGE_PER_REV_MASK (0x7 << 0)
-# define EDGE_PER_REV_SHIFT 0
-# define TARGET_PERIOD(x) ((x) << 3)
-# define TARGET_PERIOD_MASK 0xfffffff8
-# define TARGET_PERIOD_SHIFT 3
-#define CG_TACH_STATUS 0x1dd
-# define TACH_PERIOD(x) ((x) << 0)
-# define TACH_PERIOD_MASK 0xffffffff
-# define TACH_PERIOD_SHIFT 0
-
-#define GENERAL_PWRMGT 0x1e0
-# define GLOBAL_PWRMGT_EN (1 << 0)
-# define STATIC_PM_EN (1 << 1)
-# define THERMAL_PROTECTION_DIS (1 << 2)
-# define THERMAL_PROTECTION_TYPE (1 << 3)
-# define SW_SMIO_INDEX(x) ((x) << 6)
-# define SW_SMIO_INDEX_MASK (1 << 6)
-# define SW_SMIO_INDEX_SHIFT 6
-# define VOLT_PWRMGT_EN (1 << 10)
-# define DYN_SPREAD_SPECTRUM_EN (1 << 23)
-#define CG_TPC 0x1e1
-#define SCLK_PWRMGT_CNTL 0x1e2
-# define SCLK_PWRMGT_OFF (1 << 0)
-# define SCLK_LOW_D1 (1 << 1)
-# define FIR_RESET (1 << 4)
-# define FIR_FORCE_TREND_SEL (1 << 5)
-# define FIR_TREND_MODE (1 << 6)
-# define DYN_GFX_CLK_OFF_EN (1 << 7)
-# define GFX_CLK_FORCE_ON (1 << 8)
-# define GFX_CLK_REQUEST_OFF (1 << 9)
-# define GFX_CLK_FORCE_OFF (1 << 10)
-# define GFX_CLK_OFF_ACPI_D1 (1 << 11)
-# define GFX_CLK_OFF_ACPI_D2 (1 << 12)
-# define GFX_CLK_OFF_ACPI_D3 (1 << 13)
-# define DYN_LIGHT_SLEEP_EN (1 << 14)
-
-#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6
-# define CURRENT_STATE_INDEX_MASK (0xf << 4)
-# define CURRENT_STATE_INDEX_SHIFT 4
-
-#define CG_FTV 0x1ef
-
-#define CG_FFCT_0 0x1f0
-# define UTC_0(x) ((x) << 0)
-# define UTC_0_MASK (0x3ff << 0)
-# define DTC_0(x) ((x) << 10)
-# define DTC_0_MASK (0x3ff << 10)
-
-#define CG_BSP 0x1ff
-# define BSP(x) ((x) << 0)
-# define BSP_MASK (0xffff << 0)
-# define BSU(x) ((x) << 16)
-# define BSU_MASK (0xf << 16)
-#define CG_AT 0x200
-# define CG_R(x) ((x) << 0)
-# define CG_R_MASK (0xffff << 0)
-# define CG_L(x) ((x) << 16)
-# define CG_L_MASK (0xffff << 16)
-
-#define CG_GIT 0x201
-# define CG_GICST(x) ((x) << 0)
-# define CG_GICST_MASK (0xffff << 0)
-# define CG_GIPOT(x) ((x) << 16)
-# define CG_GIPOT_MASK (0xffff << 16)
-
-#define CG_SSP 0x203
-# define SST(x) ((x) << 0)
-# define SST_MASK (0xffff << 0)
-# define SSTU(x) ((x) << 16)
-# define SSTU_MASK (0xf << 16)
-
-#define CG_DISPLAY_GAP_CNTL 0x20a
-# define DISP1_GAP(x) ((x) << 0)
-# define DISP1_GAP_MASK (3 << 0)
-# define DISP2_GAP(x) ((x) << 2)
-# define DISP2_GAP_MASK (3 << 2)
-# define VBI_TIMER_COUNT(x) ((x) << 4)
-# define VBI_TIMER_COUNT_MASK (0x3fff << 4)
-# define VBI_TIMER_UNIT(x) ((x) << 20)
-# define VBI_TIMER_UNIT_MASK (7 << 20)
-# define DISP1_GAP_MCHG(x) ((x) << 24)
-# define DISP1_GAP_MCHG_MASK (3 << 24)
-# define DISP2_GAP_MCHG(x) ((x) << 26)
-# define DISP2_GAP_MCHG_MASK (3 << 26)
-
-#define CG_ULV_CONTROL 0x21e
-#define CG_ULV_PARAMETER 0x21f
-
-#define SMC_SCRATCH0 0x221
-
-#define CG_CAC_CTRL 0x22e
-# define CAC_WINDOW(x) ((x) << 0)
-# define CAC_WINDOW_MASK 0x00ffffff
-
-#define DMIF_ADDR_CONFIG 0x2F5
-
-#define DMIF_ADDR_CALC 0x300
-
-#define PIPE0_DMIF_BUFFER_CONTROL 0x0328
-# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0)
-# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4)
-
-#define SRBM_STATUS 0x394
-#define GRBM_RQ_PENDING (1 << 5)
-#define VMC_BUSY (1 << 8)
-#define MCB_BUSY (1 << 9)
-#define MCB_NON_DISPLAY_BUSY (1 << 10)
-#define MCC_BUSY (1 << 11)
-#define MCD_BUSY (1 << 12)
-#define SEM_BUSY (1 << 14)
-#define IH_BUSY (1 << 17)
-
-#define SRBM_SOFT_RESET 0x398
-#define SOFT_RESET_BIF (1 << 1)
-#define SOFT_RESET_DC (1 << 5)
-#define SOFT_RESET_DMA1 (1 << 6)
-#define SOFT_RESET_GRBM (1 << 8)
-#define SOFT_RESET_HDP (1 << 9)
-#define SOFT_RESET_IH (1 << 10)
-#define SOFT_RESET_MC (1 << 11)
-#define SOFT_RESET_ROM (1 << 14)
-#define SOFT_RESET_SEM (1 << 15)
-#define SOFT_RESET_VMC (1 << 17)
-#define SOFT_RESET_DMA (1 << 20)
-#define SOFT_RESET_TST (1 << 21)
-#define SOFT_RESET_REGBB (1 << 22)
-#define SOFT_RESET_ORB (1 << 23)
-
-#define CC_SYS_RB_BACKEND_DISABLE 0x3A0
-#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1
-
-#define SRBM_READ_ERROR 0x3A6
-#define SRBM_INT_CNTL 0x3A8
-#define SRBM_INT_ACK 0x3AA
-
-#define SRBM_STATUS2 0x3B1
-#define DMA_BUSY (1 << 5)
-#define DMA1_BUSY (1 << 6)
-
-#define VM_L2_CNTL 0x500
-#define ENABLE_L2_CACHE (1 << 0)
-#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
-#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2)
-#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4)
-#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
-#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10)
-#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15)
-#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19)
-#define VM_L2_CNTL2 0x501
-#define INVALIDATE_ALL_L1_TLBS (1 << 0)
-#define INVALIDATE_L2_CACHE (1 << 1)
-#define INVALIDATE_CACHE_MODE(x) ((x) << 26)
-#define INVALIDATE_PTE_AND_PDE_CACHES 0
-#define INVALIDATE_ONLY_PTE_CACHES 1
-#define INVALIDATE_ONLY_PDE_CACHES 2
-#define VM_L2_CNTL3 0x502
-#define BANK_SELECT(x) ((x) << 0)
-#define L2_CACHE_UPDATE_MODE(x) ((x) << 6)
-#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15)
-#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20)
-#define VM_L2_STATUS 0x503
-#define L2_BUSY (1 << 0)
-#define VM_CONTEXT0_CNTL 0x504
-#define ENABLE_CONTEXT (1 << 0)
-#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
-#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
-#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
-#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
-#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
-#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
-#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
-#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
-#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
-#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
-#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
-#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24)
-#define VM_CONTEXT1_CNTL 0x505
-#define VM_CONTEXT0_CNTL2 0x50C
-#define VM_CONTEXT1_CNTL2 0x50D
-#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E
-#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F
-#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510
-#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511
-#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512
-#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513
-#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514
-#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515
-
-#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f
-#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537
-#define PROTECTIONS_MASK (0xf << 0)
-#define PROTECTIONS_SHIFT 0
- /* bit 0: range
- * bit 1: pde0
- * bit 2: valid
- * bit 3: read
- * bit 4: write
- */
-#define MEMORY_CLIENT_ID_MASK (0xff << 12)
-#define MEMORY_CLIENT_ID_SHIFT 12
-#define MEMORY_CLIENT_RW_MASK (1 << 24)
-#define MEMORY_CLIENT_RW_SHIFT 24
-#define FAULT_VMID_MASK (0xf << 25)
-#define FAULT_VMID_SHIFT 25
-
#define VM_INVALIDATE_REQUEST 0x51E
#define VM_INVALIDATE_RESPONSE 0x51F
-#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546
-#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547
-
-#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F
-#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550
-#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551
-#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552
-#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553
-#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554
-#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555
-#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556
-#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557
-#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558
-
-#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F
-#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560
-
#define VM_L2_CG 0x570
#define MC_CG_ENABLE (1 << 18)
#define MC_LS_ENABLE (1 << 19)
-#define MC_SHARED_CHMAP 0x801
-#define NOOFCHAN_SHIFT 12
-#define NOOFCHAN_MASK 0x0000f000
-#define MC_SHARED_CHREMAP 0x802
-
#define MC_VM_FB_LOCATION 0x809
#define MC_VM_AGP_TOP 0x80A
#define MC_VM_AGP_BOT 0x80B
@@ -495,21 +112,6 @@
#define MC_CITF_MISC_WR_CG 0x993
#define MC_CITF_MISC_VM_CG 0x994
-#define MC_ARB_RAMCFG 0x9D8
-#define NOOFBANK_SHIFT 0
-#define NOOFBANK_MASK 0x00000003
-#define NOOFRANK_SHIFT 2
-#define NOOFRANK_MASK 0x00000004
-#define NOOFROWS_SHIFT 3
-#define NOOFROWS_MASK 0x00000038
-#define NOOFCOLS_SHIFT 6
-#define NOOFCOLS_MASK 0x000000C0
-#define CHANSIZE_SHIFT 8
-#define CHANSIZE_MASK 0x00000100
-#define CHANSIZE_OVERRIDE (1 << 11)
-#define NOOFGROUPS_SHIFT 12
-#define NOOFGROUPS_MASK 0x00001000
-
#define MC_ARB_DRAM_TIMING 0x9DD
#define MC_ARB_DRAM_TIMING2 0x9DE
@@ -635,20 +237,6 @@
#define CLKS(x) ((x) << 0)
#define CLKS_MASK (0xfff << 0)
-#define HDP_HOST_PATH_CNTL 0xB00
-#define CLOCK_GATING_DIS (1 << 23)
-#define HDP_NONSURFACE_BASE 0xB01
-#define HDP_NONSURFACE_INFO 0xB02
-#define HDP_NONSURFACE_SIZE 0xB03
-
-#define HDP_DEBUG0 0xBCC
-
-#define HDP_ADDR_CONFIG 0xBD2
-#define HDP_MISC_CNTL 0xBD3
-#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0)
-#define HDP_MEM_POWER_LS 0xBD4
-#define HDP_LS_ENABLE (1 << 0)
-
#define ATC_MISC_CG 0xCD4
#define IH_RB_CNTL 0xF80
@@ -678,8 +266,6 @@
# define MC_WR_CLEAN_CNT(x) ((x) << 20)
# define MC_VMID(x) ((x) << 25)
-#define CONFIG_MEMSIZE 0x150A
-
#define INTERRUPT_CNTL 0x151A
# define IH_DUMMY_RD_OVERRIDE (1 << 0)
# define IH_DUMMY_RD_EN (1 << 1)
@@ -687,734 +273,28 @@
# define GEN_IH_INT_EN (1 << 8)
#define INTERRUPT_CNTL2 0x151B
-#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520
-
-#define BIF_FB_EN 0x1524
-#define FB_READ_EN (1 << 0)
-#define FB_WRITE_EN (1 << 1)
-
-#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528
-
-/* DCE6 ELD audio interface */
-#define AZ_F0_CODEC_ENDPOINT_INDEX 0x1780
-# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0)
-# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8)
-#define AZ_F0_CODEC_ENDPOINT_DATA 0x1781
-
-#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25
-#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0)
-#define SPEAKER_ALLOCATION_MASK (0x7f << 0)
-#define SPEAKER_ALLOCATION_SHIFT 0
-#define HDMI_CONNECTION (1 << 16)
-#define DP_CONNECTION (1 << 17)
-
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */
-# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
-/* max channels minus one. 7 = 8 channels */
-# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
-# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
-# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
-/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
- * bit0 = 32 kHz
- * bit1 = 44.1 kHz
- * bit2 = 48 kHz
- * bit3 = 88.2 kHz
- * bit4 = 96 kHz
- * bit5 = 176.4 kHz
- * bit6 = 192 kHz
- */
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37
-# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0)
-# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8)
-/* VIDEO_LIPSYNC, AUDIO_LIPSYNC
- * 0 = invalid
- * x = legal delay value
- * 255 = sync not supported
- */
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38
-# define HBR_CAPABLE (1 << 0) /* enabled by default */
-
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a
-# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0)
-# define PRODUCT_ID(x) (((x) & 0xffff) << 16)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b
-# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c
-# define PORT_ID0(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d
-# define PORT_ID1(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e
-# define DESCRIPTION0(x) (((x) & 0xff) << 0)
-# define DESCRIPTION1(x) (((x) & 0xff) << 8)
-# define DESCRIPTION2(x) (((x) & 0xff) << 16)
-# define DESCRIPTION3(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f
-# define DESCRIPTION4(x) (((x) & 0xff) << 0)
-# define DESCRIPTION5(x) (((x) & 0xff) << 8)
-# define DESCRIPTION6(x) (((x) & 0xff) << 16)
-# define DESCRIPTION7(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40
-# define DESCRIPTION8(x) (((x) & 0xff) << 0)
-# define DESCRIPTION9(x) (((x) & 0xff) << 8)
-# define DESCRIPTION10(x) (((x) & 0xff) << 16)
-# define DESCRIPTION11(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41
-# define DESCRIPTION12(x) (((x) & 0xff) << 0)
-# define DESCRIPTION13(x) (((x) & 0xff) << 8)
-# define DESCRIPTION14(x) (((x) & 0xff) << 16)
-# define DESCRIPTION15(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42
-# define DESCRIPTION16(x) (((x) & 0xff) << 0)
-# define DESCRIPTION17(x) (((x) & 0xff) << 8)
-
-#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54
-# define AUDIO_ENABLED (1 << 31)
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56
-#define PORT_CONNECTIVITY_MASK (3 << 30)
-#define PORT_CONNECTIVITY_SHIFT 30
-
-#define DC_LB_MEMORY_SPLIT 0x1AC3
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-
-#define PRIORITY_A_CNT 0x1AC6
-#define PRIORITY_MARK_MASK 0x7fff
-#define PRIORITY_OFF (1 << 16)
-#define PRIORITY_ALWAYS_ON (1 << 20)
-#define PRIORITY_B_CNT 0x1AC7
-
-#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32
-# define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DPG_PIPE_LATENCY_CONTROL 0x1B33
-# define LATENCY_LOW_WATERMARK(x) ((x) << 0)
-# define LATENCY_HIGH_WATERMARK(x) ((x) << 16)
-
-/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */
-#define VLINE_STATUS 0x1AEE
-# define VLINE_OCCURRED (1 << 0)
-# define VLINE_ACK (1 << 4)
-# define VLINE_STAT (1 << 12)
-# define VLINE_INTERRUPT (1 << 16)
-# define VLINE_INTERRUPT_TYPE (1 << 17)
-/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */
-#define VBLANK_STATUS 0x1AEF
-# define VBLANK_OCCURRED (1 << 0)
-# define VBLANK_ACK (1 << 4)
-# define VBLANK_STAT (1 << 12)
-# define VBLANK_INTERRUPT (1 << 16)
-# define VBLANK_INTERRUPT_TYPE (1 << 17)
-
-/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */
-#define INT_MASK 0x1AD0
-# define VBLANK_INT_MASK (1 << 0)
-# define VLINE_INT_MASK (1 << 4)
-
-#define DISP_INTERRUPT_STATUS 0x183D
-# define LB_D1_VLINE_INTERRUPT (1 << 2)
-# define LB_D1_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD1_INTERRUPT (1 << 17)
-# define DC_HPD1_RX_INTERRUPT (1 << 18)
-# define DACA_AUTODETECT_INTERRUPT (1 << 22)
-# define DACB_AUTODETECT_INTERRUPT (1 << 23)
-# define DC_I2C_SW_DONE_INTERRUPT (1 << 24)
-# define DC_I2C_HW_DONE_INTERRUPT (1 << 25)
-#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E
-# define LB_D2_VLINE_INTERRUPT (1 << 2)
-# define LB_D2_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD2_INTERRUPT (1 << 17)
-# define DC_HPD2_RX_INTERRUPT (1 << 18)
-# define DISP_TIMER_INTERRUPT (1 << 24)
-#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F
-# define LB_D3_VLINE_INTERRUPT (1 << 2)
-# define LB_D3_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD3_INTERRUPT (1 << 17)
-# define DC_HPD3_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840
-# define LB_D4_VLINE_INTERRUPT (1 << 2)
-# define LB_D4_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD4_INTERRUPT (1 << 17)
-# define DC_HPD4_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853
-# define LB_D5_VLINE_INTERRUPT (1 << 2)
-# define LB_D5_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD5_INTERRUPT (1 << 17)
-# define DC_HPD5_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854
-# define LB_D6_VLINE_INTERRUPT (1 << 2)
-# define LB_D6_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD6_INTERRUPT (1 << 17)
-# define DC_HPD6_RX_INTERRUPT (1 << 18)
-
-/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */
-#define GRPH_INT_STATUS 0x1A16
-# define GRPH_PFLIP_INT_OCCURRED (1 << 0)
-# define GRPH_PFLIP_INT_CLEAR (1 << 8)
-/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */
-#define GRPH_INT_CONTROL 0x1A17
-# define GRPH_PFLIP_INT_MASK (1 << 0)
-# define GRPH_PFLIP_INT_TYPE (1 << 8)
-
-#define DAC_AUTODETECT_INT_CONTROL 0x19F2
-
-#define DC_HPD1_INT_STATUS 0x1807
-#define DC_HPD2_INT_STATUS 0x180A
-#define DC_HPD3_INT_STATUS 0x180D
-#define DC_HPD4_INT_STATUS 0x1810
-#define DC_HPD5_INT_STATUS 0x1813
-#define DC_HPD6_INT_STATUS 0x1816
-# define DC_HPDx_INT_STATUS (1 << 0)
-# define DC_HPDx_SENSE (1 << 1)
-# define DC_HPDx_RX_INT_STATUS (1 << 8)
-
-#define DC_HPD1_INT_CONTROL 0x1808
-#define DC_HPD2_INT_CONTROL 0x180B
-#define DC_HPD3_INT_CONTROL 0x180E
-#define DC_HPD4_INT_CONTROL 0x1811
-#define DC_HPD5_INT_CONTROL 0x1814
-#define DC_HPD6_INT_CONTROL 0x1817
-# define DC_HPDx_INT_ACK (1 << 0)
-# define DC_HPDx_INT_POLARITY (1 << 8)
-# define DC_HPDx_INT_EN (1 << 16)
-# define DC_HPDx_RX_INT_ACK (1 << 20)
-# define DC_HPDx_RX_INT_EN (1 << 24)
-
-#define DC_HPD1_CONTROL 0x1809
-#define DC_HPD2_CONTROL 0x180C
-#define DC_HPD3_CONTROL 0x180F
-#define DC_HPD4_CONTROL 0x1812
-#define DC_HPD5_CONTROL 0x1815
-#define DC_HPD6_CONTROL 0x1818
-# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0)
-# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
-# define DC_HPDx_EN (1 << 28)
-
-#define DPG_PIPE_STUTTER_CONTROL 0x1B35
-# define STUTTER_ENABLE (1 << 0)
-
-/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
-#define CRTC_STATUS_FRAME_COUNT 0x1BA6
-
-/* Audio clocks */
-#define DCCG_AUDIO_DTO_SOURCE 0x05ac
-# define DCCG_AUDIO_DTO0_SOURCE_SEL(x) ((x) << 0) /* crtc0 - crtc5 */
-# define DCCG_AUDIO_DTO_SEL (1 << 4) /* 0=dto0 1=dto1 */
-
-#define DCCG_AUDIO_DTO0_PHASE 0x05b0
-#define DCCG_AUDIO_DTO0_MODULE 0x05b4
-#define DCCG_AUDIO_DTO1_PHASE 0x05c0
-#define DCCG_AUDIO_DTO1_MODULE 0x05c4
-
-#define AFMT_AUDIO_SRC_CONTROL 0x1c4f
-#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0)
-/* AFMT_AUDIO_SRC_SELECT
- * 0 = stream0
- * 1 = stream1
- * 2 = stream2
- * 3 = stream3
- * 4 = stream4
- * 5 = stream5
- */
-
-#define GRBM_CNTL 0x2000
-#define GRBM_READ_TIMEOUT(x) ((x) << 0)
-
-#define GRBM_STATUS2 0x2002
-#define RLC_RQ_PENDING (1 << 0)
-#define RLC_BUSY (1 << 8)
-#define TC_BUSY (1 << 9)
-
-#define GRBM_STATUS 0x2004
-#define CMDFIFO_AVAIL_MASK 0x0000000F
-#define RING2_RQ_PENDING (1 << 4)
-#define SRBM_RQ_PENDING (1 << 5)
-#define RING1_RQ_PENDING (1 << 6)
-#define CF_RQ_PENDING (1 << 7)
-#define PF_RQ_PENDING (1 << 8)
-#define GDS_DMA_RQ_PENDING (1 << 9)
-#define GRBM_EE_BUSY (1 << 10)
-#define DB_CLEAN (1 << 12)
-#define CB_CLEAN (1 << 13)
-#define TA_BUSY (1 << 14)
-#define GDS_BUSY (1 << 15)
-#define VGT_BUSY (1 << 17)
-#define IA_BUSY_NO_DMA (1 << 18)
-#define IA_BUSY (1 << 19)
-#define SX_BUSY (1 << 20)
-#define SPI_BUSY (1 << 22)
-#define BCI_BUSY (1 << 23)
-#define SC_BUSY (1 << 24)
-#define PA_BUSY (1 << 25)
-#define DB_BUSY (1 << 26)
-#define CP_COHERENCY_BUSY (1 << 28)
-#define CP_BUSY (1 << 29)
-#define CB_BUSY (1 << 30)
-#define GUI_ACTIVE (1 << 31)
-#define GRBM_STATUS_SE0 0x2005
-#define GRBM_STATUS_SE1 0x2006
-#define SE_DB_CLEAN (1 << 1)
-#define SE_CB_CLEAN (1 << 2)
-#define SE_BCI_BUSY (1 << 22)
-#define SE_VGT_BUSY (1 << 23)
-#define SE_PA_BUSY (1 << 24)
-#define SE_TA_BUSY (1 << 25)
-#define SE_SX_BUSY (1 << 26)
-#define SE_SPI_BUSY (1 << 27)
-#define SE_SC_BUSY (1 << 29)
-#define SE_DB_BUSY (1 << 30)
-#define SE_CB_BUSY (1 << 31)
-
-#define GRBM_SOFT_RESET 0x2008
-#define SOFT_RESET_CP (1 << 0)
-#define SOFT_RESET_CB (1 << 1)
-#define SOFT_RESET_RLC (1 << 2)
-#define SOFT_RESET_DB (1 << 3)
-#define SOFT_RESET_GDS (1 << 4)
-#define SOFT_RESET_PA (1 << 5)
-#define SOFT_RESET_SC (1 << 6)
-#define SOFT_RESET_BCI (1 << 7)
-#define SOFT_RESET_SPI (1 << 8)
-#define SOFT_RESET_SX (1 << 10)
-#define SOFT_RESET_TC (1 << 11)
-#define SOFT_RESET_TA (1 << 12)
-#define SOFT_RESET_VGT (1 << 14)
-#define SOFT_RESET_IA (1 << 15)
-
-#define GRBM_GFX_INDEX 0x200B
-#define INSTANCE_INDEX(x) ((x) << 0)
-#define SH_INDEX(x) ((x) << 8)
-#define SE_INDEX(x) ((x) << 16)
-#define SH_BROADCAST_WRITES (1 << 29)
-#define INSTANCE_BROADCAST_WRITES (1 << 30)
-#define SE_BROADCAST_WRITES (1 << 31)
-
-#define GRBM_INT_CNTL 0x2018
-# define RDERR_INT_ENABLE (1 << 0)
-# define GUI_IDLE_INT_ENABLE (1 << 19)
-
-#define CP_STRMOUT_CNTL 0x213F
-#define SCRATCH_REG0 0x2140
-#define SCRATCH_REG1 0x2141
-#define SCRATCH_REG2 0x2142
-#define SCRATCH_REG3 0x2143
-#define SCRATCH_REG4 0x2144
-#define SCRATCH_REG5 0x2145
-#define SCRATCH_REG6 0x2146
-#define SCRATCH_REG7 0x2147
-
-#define SCRATCH_UMSK 0x2150
-#define SCRATCH_ADDR 0x2151
-
-#define CP_SEM_WAIT_TIMER 0x216F
-
-#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172
-
-#define CP_ME_CNTL 0x21B6
-#define CP_CE_HALT (1 << 24)
-#define CP_PFP_HALT (1 << 26)
-#define CP_ME_HALT (1 << 28)
-
-#define CP_COHER_CNTL2 0x217A
-
-#define CP_RB2_RPTR 0x21BE
-#define CP_RB1_RPTR 0x21BF
-#define CP_RB0_RPTR 0x21C0
-#define CP_RB_WPTR_DELAY 0x21C1
-
-#define CP_QUEUE_THRESHOLDS 0x21D8
-#define ROQ_IB1_START(x) ((x) << 0)
-#define ROQ_IB2_START(x) ((x) << 8)
-#define CP_MEQ_THRESHOLDS 0x21D9
-#define MEQ1_START(x) ((x) << 0)
-#define MEQ2_START(x) ((x) << 8)
-
-#define CP_PERFMON_CNTL 0x21FF
-
#define VGT_VTX_VECT_EJECT_REG 0x222C
-
-#define VGT_CACHE_INVALIDATION 0x2231
-#define CACHE_INVALIDATION(x) ((x) << 0)
-#define VC_ONLY 0
-#define TC_ONLY 1
-#define VC_AND_TC 2
-#define AUTO_INVLD_EN(x) ((x) << 6)
-#define NO_AUTO 0
-#define ES_AUTO 1
-#define GS_AUTO 2
-#define ES_AND_GS_AUTO 3
#define VGT_ESGS_RING_SIZE 0x2232
#define VGT_GSVS_RING_SIZE 0x2233
-
#define VGT_GS_VERTEX_REUSE 0x2235
-
#define VGT_PRIMITIVE_TYPE 0x2256
#define VGT_INDEX_TYPE 0x2257
-
#define VGT_NUM_INDICES 0x225C
#define VGT_NUM_INSTANCES 0x225D
-
#define VGT_TF_RING_SIZE 0x2262
-
#define VGT_HS_OFFCHIP_PARAM 0x226C
-
#define VGT_TF_MEMORY_BASE 0x226E
-#define CC_GC_SHADER_ARRAY_CONFIG 0x226F
-#define INACTIVE_CUS_MASK 0xFFFF0000
-#define INACTIVE_CUS_SHIFT 16
-#define GC_USER_SHADER_ARRAY_CONFIG 0x2270
-
-#define PA_CL_ENHANCE 0x2285
-#define CLIP_VTX_REORDER_ENA (1 << 0)
-#define NUM_CLIP_SEQ(x) ((x) << 1)
-
-#define PA_SU_LINE_STIPPLE_VALUE 0x2298
-
-#define PA_SC_LINE_STIPPLE_STATE 0x22C4
-
-#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9
-#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
-#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16)
-
-#define PA_SC_FIFO_SIZE 0x22F3
-#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0)
-#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6)
-#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15)
-#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23)
-
#define PA_SC_ENHANCE 0x22FC
-#define SQ_CONFIG 0x2300
-
-#define SQC_CACHES 0x2302
-
-#define SQ_POWER_THROTTLE 0x2396
-#define MIN_POWER(x) ((x) << 0)
-#define MIN_POWER_MASK (0x3fff << 0)
-#define MIN_POWER_SHIFT 0
-#define MAX_POWER(x) ((x) << 16)
-#define MAX_POWER_MASK (0x3fff << 16)
-#define MAX_POWER_SHIFT 0
-#define SQ_POWER_THROTTLE2 0x2397
-#define MAX_POWER_DELTA(x) ((x) << 0)
-#define MAX_POWER_DELTA_MASK (0x3fff << 0)
-#define MAX_POWER_DELTA_SHIFT 0
-#define STI_SIZE(x) ((x) << 16)
-#define STI_SIZE_MASK (0x3ff << 16)
-#define STI_SIZE_SHIFT 16
-#define LTI_RATIO(x) ((x) << 27)
-#define LTI_RATIO_MASK (0xf << 27)
-#define LTI_RATIO_SHIFT 27
-
-#define SX_DEBUG_1 0x2418
-
-#define SPI_STATIC_THREAD_MGMT_1 0x2438
-#define SPI_STATIC_THREAD_MGMT_2 0x2439
-#define SPI_STATIC_THREAD_MGMT_3 0x243A
-#define SPI_PS_MAX_WAVE_ID 0x243B
-
-#define SPI_CONFIG_CNTL 0x2440
-
-#define SPI_CONFIG_CNTL_1 0x244F
-#define VTX_DONE_DELAY(x) ((x) << 0)
-#define INTERP_ONE_PRIM_PER_ROW (1 << 4)
-
-#define CGTS_TCC_DISABLE 0x2452
-#define CGTS_USER_TCC_DISABLE 0x2453
-#define TCC_DISABLE_MASK 0xFFFF0000
-#define TCC_DISABLE_SHIFT 16
-#define CGTS_SM_CTRL_REG 0x2454
-#define OVERRIDE (1 << 21)
-#define LS_OVERRIDE (1 << 22)
-
-#define SPI_LB_CU_MASK 0x24D5
-
#define TA_CNTL_AUX 0x2542
-#define CC_RB_BACKEND_DISABLE 0x263D
-#define BACKEND_DISABLE(x) ((x) << 16)
-#define GB_ADDR_CONFIG 0x263E
-#define NUM_PIPES(x) ((x) << 0)
-#define NUM_PIPES_MASK 0x00000007
-#define NUM_PIPES_SHIFT 0
-#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4)
-#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070
-#define PIPE_INTERLEAVE_SIZE_SHIFT 4
-#define NUM_SHADER_ENGINES(x) ((x) << 12)
-#define NUM_SHADER_ENGINES_MASK 0x00003000
-#define NUM_SHADER_ENGINES_SHIFT 12
-#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16)
-#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000
-#define SHADER_ENGINE_TILE_SIZE_SHIFT 16
-#define NUM_GPUS(x) ((x) << 20)
-#define NUM_GPUS_MASK 0x00700000
-#define NUM_GPUS_SHIFT 20
-#define MULTI_GPU_TILE_SIZE(x) ((x) << 24)
-#define MULTI_GPU_TILE_SIZE_MASK 0x03000000
-#define MULTI_GPU_TILE_SIZE_SHIFT 24
-#define ROW_SIZE(x) ((x) << 28)
-#define ROW_SIZE_MASK 0x30000000
-#define ROW_SIZE_SHIFT 28
-
-#define GB_TILE_MODE0 0x2644
-# define MICRO_TILE_MODE(x) ((x) << 0)
-# define ADDR_SURF_DISPLAY_MICRO_TILING 0
-# define ADDR_SURF_THIN_MICRO_TILING 1
-# define ADDR_SURF_DEPTH_MICRO_TILING 2
-# define ARRAY_MODE(x) ((x) << 2)
-# define ARRAY_LINEAR_GENERAL 0
-# define ARRAY_LINEAR_ALIGNED 1
-# define ARRAY_1D_TILED_THIN1 2
-# define ARRAY_2D_TILED_THIN1 4
-# define PIPE_CONFIG(x) ((x) << 6)
-# define ADDR_SURF_P2 0
-# define ADDR_SURF_P4_8x16 4
-# define ADDR_SURF_P4_16x16 5
-# define ADDR_SURF_P4_16x32 6
-# define ADDR_SURF_P4_32x32 7
-# define ADDR_SURF_P8_16x16_8x16 8
-# define ADDR_SURF_P8_16x32_8x16 9
-# define ADDR_SURF_P8_32x32_8x16 10
-# define ADDR_SURF_P8_16x32_16x16 11
-# define ADDR_SURF_P8_32x32_16x16 12
-# define ADDR_SURF_P8_32x32_16x32 13
-# define ADDR_SURF_P8_32x64_32x32 14
-# define TILE_SPLIT(x) ((x) << 11)
-# define ADDR_SURF_TILE_SPLIT_64B 0
-# define ADDR_SURF_TILE_SPLIT_128B 1
-# define ADDR_SURF_TILE_SPLIT_256B 2
-# define ADDR_SURF_TILE_SPLIT_512B 3
-# define ADDR_SURF_TILE_SPLIT_1KB 4
-# define ADDR_SURF_TILE_SPLIT_2KB 5
-# define ADDR_SURF_TILE_SPLIT_4KB 6
-# define BANK_WIDTH(x) ((x) << 14)
-# define ADDR_SURF_BANK_WIDTH_1 0
-# define ADDR_SURF_BANK_WIDTH_2 1
-# define ADDR_SURF_BANK_WIDTH_4 2
-# define ADDR_SURF_BANK_WIDTH_8 3
-# define BANK_HEIGHT(x) ((x) << 16)
-# define ADDR_SURF_BANK_HEIGHT_1 0
-# define ADDR_SURF_BANK_HEIGHT_2 1
-# define ADDR_SURF_BANK_HEIGHT_4 2
-# define ADDR_SURF_BANK_HEIGHT_8 3
-# define MACRO_TILE_ASPECT(x) ((x) << 18)
-# define ADDR_SURF_MACRO_ASPECT_1 0
-# define ADDR_SURF_MACRO_ASPECT_2 1
-# define ADDR_SURF_MACRO_ASPECT_4 2
-# define ADDR_SURF_MACRO_ASPECT_8 3
-# define NUM_BANKS(x) ((x) << 20)
-# define ADDR_SURF_2_BANK 0
-# define ADDR_SURF_4_BANK 1
-# define ADDR_SURF_8_BANK 2
-# define ADDR_SURF_16_BANK 3
-#define GB_TILE_MODE1 0x2645
-#define GB_TILE_MODE2 0x2646
-#define GB_TILE_MODE3 0x2647
-#define GB_TILE_MODE4 0x2648
-#define GB_TILE_MODE5 0x2649
-#define GB_TILE_MODE6 0x264a
-#define GB_TILE_MODE7 0x264b
-#define GB_TILE_MODE8 0x264c
-#define GB_TILE_MODE9 0x264d
-#define GB_TILE_MODE10 0x264e
-#define GB_TILE_MODE11 0x264f
-#define GB_TILE_MODE12 0x2650
-#define GB_TILE_MODE13 0x2651
-#define GB_TILE_MODE14 0x2652
-#define GB_TILE_MODE15 0x2653
-#define GB_TILE_MODE16 0x2654
-#define GB_TILE_MODE17 0x2655
-#define GB_TILE_MODE18 0x2656
-#define GB_TILE_MODE19 0x2657
-#define GB_TILE_MODE20 0x2658
-#define GB_TILE_MODE21 0x2659
-#define GB_TILE_MODE22 0x265a
-#define GB_TILE_MODE23 0x265b
-#define GB_TILE_MODE24 0x265c
-#define GB_TILE_MODE25 0x265d
-#define GB_TILE_MODE26 0x265e
-#define GB_TILE_MODE27 0x265f
-#define GB_TILE_MODE28 0x2660
-#define GB_TILE_MODE29 0x2661
-#define GB_TILE_MODE30 0x2662
-#define GB_TILE_MODE31 0x2663
-
-#define CB_PERFCOUNTER0_SELECT0 0x2688
-#define CB_PERFCOUNTER0_SELECT1 0x2689
-#define CB_PERFCOUNTER1_SELECT0 0x268A
-#define CB_PERFCOUNTER1_SELECT1 0x268B
-#define CB_PERFCOUNTER2_SELECT0 0x268C
-#define CB_PERFCOUNTER2_SELECT1 0x268D
-#define CB_PERFCOUNTER3_SELECT0 0x268E
-#define CB_PERFCOUNTER3_SELECT1 0x268F
-
-#define CB_CGTT_SCLK_CTRL 0x2698
-
-#define GC_USER_RB_BACKEND_DISABLE 0x26DF
-#define BACKEND_DISABLE_MASK 0x00FF0000
-#define BACKEND_DISABLE_SHIFT 16
-
-#define TCP_CHAN_STEER_LO 0x2B03
-#define TCP_CHAN_STEER_HI 0x2B94
-
-#define CP_RB0_BASE 0x3040
-#define CP_RB0_CNTL 0x3041
-#define RB_BUFSZ(x) ((x) << 0)
-#define RB_BLKSZ(x) ((x) << 8)
-#define BUF_SWAP_32BIT (2 << 16)
-#define RB_NO_UPDATE (1 << 27)
-#define RB_RPTR_WR_ENA (1 << 31)
-
-#define CP_RB0_RPTR_ADDR 0x3043
-#define CP_RB0_RPTR_ADDR_HI 0x3044
-#define CP_RB0_WPTR 0x3045
-
-#define CP_PFP_UCODE_ADDR 0x3054
-#define CP_PFP_UCODE_DATA 0x3055
-#define CP_ME_RAM_RADDR 0x3056
-#define CP_ME_RAM_WADDR 0x3057
-#define CP_ME_RAM_DATA 0x3058
-
-#define CP_CE_UCODE_ADDR 0x305A
-#define CP_CE_UCODE_DATA 0x305B
-
-#define CP_RB1_BASE 0x3060
-#define CP_RB1_CNTL 0x3061
-#define CP_RB1_RPTR_ADDR 0x3062
-#define CP_RB1_RPTR_ADDR_HI 0x3063
-#define CP_RB1_WPTR 0x3064
-#define CP_RB2_BASE 0x3065
-#define CP_RB2_CNTL 0x3066
-#define CP_RB2_RPTR_ADDR 0x3067
-#define CP_RB2_RPTR_ADDR_HI 0x3068
-#define CP_RB2_WPTR 0x3069
-#define CP_INT_CNTL_RING0 0x306A
-#define CP_INT_CNTL_RING1 0x306B
-#define CP_INT_CNTL_RING2 0x306C
-# define CNTX_BUSY_INT_ENABLE (1 << 19)
-# define CNTX_EMPTY_INT_ENABLE (1 << 20)
-# define WAIT_MEM_SEM_INT_ENABLE (1 << 21)
-# define TIME_STAMP_INT_ENABLE (1 << 26)
-# define CP_RINGID2_INT_ENABLE (1 << 29)
-# define CP_RINGID1_INT_ENABLE (1 << 30)
-# define CP_RINGID0_INT_ENABLE (1 << 31)
-#define CP_INT_STATUS_RING0 0x306D
-#define CP_INT_STATUS_RING1 0x306E
-#define CP_INT_STATUS_RING2 0x306F
-# define WAIT_MEM_SEM_INT_STAT (1 << 21)
-# define TIME_STAMP_INT_STAT (1 << 26)
-# define CP_RINGID2_INT_STAT (1 << 29)
-# define CP_RINGID1_INT_STAT (1 << 30)
-# define CP_RINGID0_INT_STAT (1 << 31)
-
-#define CP_MEM_SLP_CNTL 0x3079
-# define CP_MEM_LS_EN (1 << 0)
-
-#define CP_DEBUG 0x307F
-
-#define RLC_CNTL 0x30C0
-# define RLC_ENABLE (1 << 0)
-#define RLC_RL_BASE 0x30C1
-#define RLC_RL_SIZE 0x30C2
-#define RLC_LB_CNTL 0x30C3
-# define LOAD_BALANCE_ENABLE (1 << 0)
-#define RLC_SAVE_AND_RESTORE_BASE 0x30C4
-#define RLC_LB_CNTR_MAX 0x30C5
-#define RLC_LB_CNTR_INIT 0x30C6
-
-#define RLC_CLEAR_STATE_RESTORE_BASE 0x30C8
-
-#define RLC_UCODE_ADDR 0x30CB
-#define RLC_UCODE_DATA 0x30CC
-
-#define RLC_GPU_CLOCK_COUNT_LSB 0x30CE
-#define RLC_GPU_CLOCK_COUNT_MSB 0x30CF
-#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x30D0
-#define RLC_MC_CNTL 0x30D1
-#define RLC_UCODE_CNTL 0x30D2
-#define RLC_STAT 0x30D3
-# define RLC_BUSY_STATUS (1 << 0)
-# define GFX_POWER_STATUS (1 << 1)
-# define GFX_CLOCK_STATUS (1 << 2)
-# define GFX_LS_STATUS (1 << 3)
-
-#define RLC_PG_CNTL 0x30D7
-# define GFX_PG_ENABLE (1 << 0)
-# define GFX_PG_SRC (1 << 1)
-
-#define RLC_CGTT_MGCG_OVERRIDE 0x3100
-#define RLC_CGCG_CGLS_CTRL 0x3101
-# define CGCG_EN (1 << 0)
-# define CGLS_EN (1 << 1)
-
-#define RLC_TTOP_D 0x3105
-# define RLC_PUD(x) ((x) << 0)
-# define RLC_PUD_MASK (0xff << 0)
-# define RLC_PDD(x) ((x) << 8)
-# define RLC_PDD_MASK (0xff << 8)
-# define RLC_TTPD(x) ((x) << 16)
-# define RLC_TTPD_MASK (0xff << 16)
-# define RLC_MSD(x) ((x) << 24)
-# define RLC_MSD_MASK (0xff << 24)
-
-#define RLC_LB_INIT_CU_MASK 0x3107
-
-#define RLC_PG_AO_CU_MASK 0x310B
-#define RLC_MAX_PG_CU 0x310C
-# define MAX_PU_CU(x) ((x) << 0)
-# define MAX_PU_CU_MASK (0xff << 0)
-#define RLC_AUTO_PG_CTRL 0x310C
-# define AUTO_PG_EN (1 << 0)
-# define GRBM_REG_SGIT(x) ((x) << 3)
-# define GRBM_REG_SGIT_MASK (0xffff << 3)
-# define PG_AFTER_GRBM_REG_ST(x) ((x) << 19)
-# define PG_AFTER_GRBM_REG_ST_MASK (0x1fff << 19)
-
-#define RLC_SERDES_WR_MASTER_MASK_0 0x3115
-#define RLC_SERDES_WR_MASTER_MASK_1 0x3116
-#define RLC_SERDES_WR_CTRL 0x3117
-
-#define RLC_SERDES_MASTER_BUSY_0 0x3119
-#define RLC_SERDES_MASTER_BUSY_1 0x311A
-
-#define RLC_GCPM_GENERAL_3 0x311E
-
-#define DB_RENDER_CONTROL 0xA000
-
-#define DB_DEPTH_INFO 0xA00F
-
-#define PA_SC_RASTER_CONFIG 0xA0D4
-# define RB_MAP_PKR0(x) ((x) << 0)
-# define RB_MAP_PKR0_MASK (0x3 << 0)
-# define RB_MAP_PKR1(x) ((x) << 2)
-# define RB_MAP_PKR1_MASK (0x3 << 2)
-# define RASTER_CONFIG_RB_MAP_0 0
-# define RASTER_CONFIG_RB_MAP_1 1
-# define RASTER_CONFIG_RB_MAP_2 2
-# define RASTER_CONFIG_RB_MAP_3 3
+// #define PA_SC_RASTER_CONFIG 0xA0D4
# define RB_XSEL2(x) ((x) << 4)
# define RB_XSEL2_MASK (0x3 << 4)
# define RB_XSEL (1 << 6)
# define RB_YSEL (1 << 7)
# define PKR_MAP(x) ((x) << 8)
-# define PKR_MAP_MASK (0x3 << 8)
-# define RASTER_CONFIG_PKR_MAP_0 0
-# define RASTER_CONFIG_PKR_MAP_1 1
-# define RASTER_CONFIG_PKR_MAP_2 2
-# define RASTER_CONFIG_PKR_MAP_3 3
# define PKR_XSEL(x) ((x) << 10)
# define PKR_XSEL_MASK (0x3 << 10)
# define PKR_YSEL(x) ((x) << 12)
@@ -1426,221 +306,19 @@
# define SC_YSEL(x) ((x) << 20)
# define SC_YSEL_MASK (0x3 << 20)
# define SE_MAP(x) ((x) << 24)
-# define SE_MAP_MASK (0x3 << 24)
-# define RASTER_CONFIG_SE_MAP_0 0
-# define RASTER_CONFIG_SE_MAP_1 1
-# define RASTER_CONFIG_SE_MAP_2 2
-# define RASTER_CONFIG_SE_MAP_3 3
# define SE_XSEL(x) ((x) << 26)
# define SE_XSEL_MASK (0x3 << 26)
# define SE_YSEL(x) ((x) << 28)
# define SE_YSEL_MASK (0x3 << 28)
-
-#define VGT_EVENT_INITIATOR 0xA2A4
-# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
-# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
-# define SAMPLE_STREAMOUTSTATS3 (3 << 0)
-# define CACHE_FLUSH_TS (4 << 0)
-# define CACHE_FLUSH (6 << 0)
-# define CS_PARTIAL_FLUSH (7 << 0)
-# define VGT_STREAMOUT_RESET (10 << 0)
-# define END_OF_PIPE_INCR_DE (11 << 0)
-# define END_OF_PIPE_IB_END (12 << 0)
-# define RST_PIX_CNT (13 << 0)
-# define VS_PARTIAL_FLUSH (15 << 0)
-# define PS_PARTIAL_FLUSH (16 << 0)
-# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0)
-# define ZPASS_DONE (21 << 0)
-# define CACHE_FLUSH_AND_INV_EVENT (22 << 0)
-# define PERFCOUNTER_START (23 << 0)
-# define PERFCOUNTER_STOP (24 << 0)
-# define PIPELINESTAT_START (25 << 0)
-# define PIPELINESTAT_STOP (26 << 0)
-# define PERFCOUNTER_SAMPLE (27 << 0)
-# define SAMPLE_PIPELINESTAT (30 << 0)
-# define SAMPLE_STREAMOUTSTATS (32 << 0)
-# define RESET_VTX_CNT (33 << 0)
-# define VGT_FLUSH (36 << 0)
-# define BOTTOM_OF_PIPE_TS (40 << 0)
-# define DB_CACHE_FLUSH_AND_INV (42 << 0)
-# define FLUSH_AND_INV_DB_DATA_TS (43 << 0)
-# define FLUSH_AND_INV_DB_META (44 << 0)
-# define FLUSH_AND_INV_CB_DATA_TS (45 << 0)
-# define FLUSH_AND_INV_CB_META (46 << 0)
-# define CS_DONE (47 << 0)
-# define PS_DONE (48 << 0)
-# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0)
-# define THREAD_TRACE_START (51 << 0)
-# define THREAD_TRACE_STOP (52 << 0)
-# define THREAD_TRACE_FLUSH (54 << 0)
-# define THREAD_TRACE_FINISH (55 << 0)
-
-/* PIF PHY0 registers idx/data 0x8/0xc */
-#define PB0_PIF_CNTL 0x10
-# define LS2_EXIT_TIME(x) ((x) << 17)
-# define LS2_EXIT_TIME_MASK (0x7 << 17)
-# define LS2_EXIT_TIME_SHIFT 17
-#define PB0_PIF_PAIRING 0x11
-# define MULTI_PIF (1 << 25)
-#define PB0_PIF_PWRDOWN_0 0x12
-# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10
-# define PLL_RAMP_UP_TIME_0(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_0_SHIFT 24
-#define PB0_PIF_PWRDOWN_1 0x13
-# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10
-# define PLL_RAMP_UP_TIME_1(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_1_SHIFT 24
-
-#define PB0_PIF_PWRDOWN_2 0x17
-# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10
-# define PLL_RAMP_UP_TIME_2(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_2_SHIFT 24
-#define PB0_PIF_PWRDOWN_3 0x18
-# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10
-# define PLL_RAMP_UP_TIME_3(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_3_SHIFT 24
-/* PIF PHY1 registers idx/data 0x10/0x14 */
-#define PB1_PIF_CNTL 0x10
-#define PB1_PIF_PAIRING 0x11
-#define PB1_PIF_PWRDOWN_0 0x12
-#define PB1_PIF_PWRDOWN_1 0x13
-
-#define PB1_PIF_PWRDOWN_2 0x17
-#define PB1_PIF_PWRDOWN_3 0x18
-/* PCIE registers idx/data 0x30/0x34 */
-#define PCIE_CNTL2 0x1c /* PCIE */
-# define SLV_MEM_LS_EN (1 << 16)
-# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17)
-# define MST_MEM_LS_EN (1 << 18)
-# define REPLAY_MEM_LS_EN (1 << 19)
-#define PCIE_LC_STATUS1 0x28 /* PCIE */
-# define LC_REVERSE_RCVR (1 << 0)
-# define LC_REVERSE_XMIT (1 << 1)
-# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2)
-# define LC_OPERATING_LINK_WIDTH_SHIFT 2
-# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5)
-# define LC_DETECTED_LINK_WIDTH_SHIFT 5
-
-#define PCIE_P_CNTL 0x40 /* PCIE */
-# define P_IGNORE_EDB_ERR (1 << 6)
-
/* PCIE PORT registers idx/data 0x38/0x3c */
-#define PCIE_LC_CNTL 0xa0
-# define LC_L0S_INACTIVITY(x) ((x) << 8)
-# define LC_L0S_INACTIVITY_MASK (0xf << 8)
-# define LC_L0S_INACTIVITY_SHIFT 8
-# define LC_L1_INACTIVITY(x) ((x) << 12)
-# define LC_L1_INACTIVITY_MASK (0xf << 12)
-# define LC_L1_INACTIVITY_SHIFT 12
-# define LC_PMI_TO_L1_DIS (1 << 16)
-# define LC_ASPM_TO_L1_DIS (1 << 24)
-#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
-# define LC_LINK_WIDTH_SHIFT 0
-# define LC_LINK_WIDTH_MASK 0x7
+// #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
# define LC_LINK_WIDTH_X0 0
# define LC_LINK_WIDTH_X1 1
# define LC_LINK_WIDTH_X2 2
# define LC_LINK_WIDTH_X4 3
# define LC_LINK_WIDTH_X8 4
# define LC_LINK_WIDTH_X16 6
-# define LC_LINK_WIDTH_RD_SHIFT 4
-# define LC_LINK_WIDTH_RD_MASK 0x70
-# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7)
-# define LC_RECONFIG_NOW (1 << 8)
-# define LC_RENEGOTIATION_SUPPORT (1 << 9)
-# define LC_RENEGOTIATE_EN (1 << 10)
-# define LC_SHORT_RECONFIG_EN (1 << 11)
-# define LC_UPCONFIGURE_SUPPORT (1 << 12)
-# define LC_UPCONFIGURE_DIS (1 << 13)
-# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21)
-# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21)
-# define LC_DYN_LANES_PWR_STATE_SHIFT 21
-#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */
-# define LC_XMIT_N_FTS(x) ((x) << 0)
-# define LC_XMIT_N_FTS_MASK (0xff << 0)
-# define LC_XMIT_N_FTS_SHIFT 0
-# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8)
-# define LC_N_FTS_MASK (0xff << 24)
-#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */
-# define LC_GEN2_EN_STRAP (1 << 0)
-# define LC_GEN3_EN_STRAP (1 << 1)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3
-# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5)
-# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6)
-# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7)
-# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8)
-# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10
-# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */
-# define LC_CURRENT_DATA_RATE_SHIFT 13
-# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16)
-# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18)
-# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19)
-# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20)
-# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21)
-
-#define PCIE_LC_CNTL2 0xb1
-# define LC_ALLOW_PDWN_IN_L1 (1 << 17)
-# define LC_ALLOW_PDWN_IN_L23 (1 << 18)
-
-#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */
-# define LC_GO_TO_RECOVERY (1 << 30)
-#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */
-# define LC_REDO_EQ (1 << 5)
-# define LC_SET_QUIESCE (1 << 13)
-
-/*
- * UVD
- */
-#define UVD_UDEC_ADDR_CONFIG 0x3bd3
-#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4
-#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5
-#define UVD_RBC_RB_RPTR 0x3da4
-#define UVD_RBC_RB_WPTR 0x3da5
-#define UVD_STATUS 0x3daf
-
-#define UVD_CGC_CTRL 0x3dc2
-# define DCM (1 << 0)
-# define CG_DT(x) ((x) << 2)
-# define CG_DT_MASK (0xf << 2)
-# define CLK_OD(x) ((x) << 6)
-# define CLK_OD_MASK (0x1f << 6)
-
- /* UVD CTX indirect */
-#define UVD_CGC_MEM_CTRL 0xC0
-#define UVD_CGC_CTRL2 0xC1
-# define DYN_OR_EN (1 << 0)
-# define DYN_RR_EN (1 << 1)
-# define G_DIV_ID(x) ((x) << 2)
-# define G_DIV_ID_MASK (0x7 << 2)
/*
* PM4
@@ -1874,45 +552,7 @@
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x200 /* not a register */
-
-#define DMA_RB_CNTL 0x3400
-# define DMA_RB_ENABLE (1 << 0)
-# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
-# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
-# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
-#define DMA_RB_BASE 0x3401
-#define DMA_RB_RPTR 0x3402
-#define DMA_RB_WPTR 0x3403
-
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-
-#define DMA_IB_CNTL 0x3409
-# define DMA_IB_ENABLE (1 << 0)
-# define DMA_IB_SWAP_ENABLE (1 << 4)
-# define CMD_VMID_FORCE (1 << 31)
-#define DMA_IB_RPTR 0x340a
-#define DMA_CNTL 0x340b
-# define TRAP_ENABLE (1 << 0)
-# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
-# define SEM_WAIT_INT_ENABLE (1 << 2)
-# define DATA_SWAP_ENABLE (1 << 3)
-# define FENCE_SWAP_ENABLE (1 << 4)
-# define CTXEMPTY_INT_ENABLE (1 << 28)
-#define DMA_STATUS_REG 0x340d
-# define DMA_IDLE (1 << 0)
-#define DMA_TILING_CONFIG 0x342e
-
-#define DMA_POWER_CNTL 0x342f
-# define MEM_POWER_OVERRIDE (1 << 8)
-#define DMA_CLK_CTRL 0x3430
-
-#define DMA_PG 0x3435
-# define PG_CNTL_ENABLE (1 << 0)
-#define DMA_PGFSM_CONFIG 0x3436
-#define DMA_PGFSM_WRITE 0x3437
+#define SDMA_MAX_INSTANCE 2
#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
(((b) & 0x1) << 26) | \
@@ -1941,6 +581,7 @@
#define DMA_PACKET_POLL_REG_MEM 0xe
#define DMA_PACKET_NOP 0xf
+/* VCE */
#define VCE_STATUS 0x20004
#define VCE_VCPU_CNTL 0x20014
#define VCE_CLK_EN (1 << 0)
@@ -1991,431 +632,144 @@
//#dce stupp
/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
-#define SI_CRTC0_REGISTER_OFFSET 0 //(0x6df0 - 0x6df0)/4
-#define SI_CRTC1_REGISTER_OFFSET 0x300 //(0x79f0 - 0x6df0)/4
-#define SI_CRTC2_REGISTER_OFFSET 0x2600 //(0x105f0 - 0x6df0)/4
-#define SI_CRTC3_REGISTER_OFFSET 0x2900 //(0x111f0 - 0x6df0)/4
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00 //(0x11df0 - 0x6df0)/4
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00 //(0x129f0 - 0x6df0)/4
+#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4
+#define CRTC1_REGISTER_OFFSET (0x1e7c - 0x1b7c) //(0x79f0 - 0x6df0)/4
+#define CRTC2_REGISTER_OFFSET (0x417c - 0x1b7c) //(0x105f0 - 0x6df0)/4
+#define CRTC3_REGISTER_OFFSET (0x447c - 0x1b7c) //(0x111f0 - 0x6df0)/4
+#define CRTC4_REGISTER_OFFSET (0x477c - 0x1b7c) //(0x11df0 - 0x6df0)/4
+#define CRTC5_REGISTER_OFFSET (0x4a7c - 0x1b7c) //(0x129f0 - 0x6df0)/4
+
+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET (0x1807 - 0x1807)
+#define HPD1_REGISTER_OFFSET (0x180a - 0x1807)
+#define HPD2_REGISTER_OFFSET (0x180d - 0x1807)
+#define HPD3_REGISTER_OFFSET (0x1810 - 0x1807)
+#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
+#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
+
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define CURSOR_WIDTH 64
#define CURSOR_HEIGHT 64
-#define AMDGPU_MM_INDEX 0x0000
-#define AMDGPU_MM_DATA 0x0001
-
-#define VERDE_NUM_CRTC 6
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-#define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define EVERGREEN_DATA_FORMAT 0x1ac0
-# define EVERGREEN_INTERLEAVE_EN (1 << 0)
-
-#define MC_SHARED_CHMAP__NOOFCHAN_MASK 0xf000
-#define MC_SHARED_CHMAP__NOOFCHAN__SHIFT 0xc
-
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20)
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20)
-#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20)
-#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20)
-
-#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45
-#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845
-
-#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847
-#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK 0x8
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK 0x4
-
-#define DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK 0x20000
-
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK 0x1
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK 0x100
-
-#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK 0x1
-
-#define R600_D1GRPH_SWAP_CONTROL 0x1843
-#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0)
-
-#define AVIVO_D1VGA_CONTROL 0x00cc
-# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0)
-# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8)
-# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16)
-# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24)
-#define AVIVO_D2VGA_CONTROL 0x00ce
-
-#define R600_BUS_CNTL 0x1508
-# define R600_BIOS_ROM_DIS (1 << 1)
+
#define R600_ROM_CNTL 0x580
# define R600_SCK_OVERWRITE (1 << 1)
# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
# define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28)
-#define GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK 0x1
-
-#define FMT_BIT_DEPTH_CONTROL 0x1bf2
-#define FMT_TRUNCATE_EN (1 << 0)
-#define FMT_TRUNCATE_DEPTH (1 << 4)
-#define FMT_SPATIAL_DITHER_EN (1 << 8)
-#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9)
-#define FMT_SPATIAL_DITHER_DEPTH (1 << 12)
-#define FMT_FRAME_RANDOM_ENABLE (1 << 13)
-#define FMT_RGB_RANDOM_ENABLE (1 << 14)
-#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15)
-#define FMT_TEMPORAL_DITHER_EN (1 << 16)
-#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20)
-#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21)
-#define FMT_TEMPORAL_LEVEL (1 << 24)
-#define FMT_TEMPORAL_DITHER_RESET (1 << 25)
-#define FMT_25FRC_SEL(x) ((x) << 26)
-#define FMT_50FRC_SEL(x) ((x) << 28)
-#define FMT_75FRC_SEL(x) ((x) << 30)
-
-#define EVERGREEN_DC_LUT_CONTROL 0x1a80
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86
-#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c
-#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79
-#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e
-#define EVERGREEN_DC_LUT_RW_MODE 0x1a78
-
-#define EVERGREEN_GRPH_ENABLE 0x1a00
-#define EVERGREEN_GRPH_CONTROL 0x1a01
-#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define EVERGREEN_GRPH_DEPTH_8BPP 0
-#define EVERGREEN_GRPH_DEPTH_16BPP 1
-#define EVERGREEN_GRPH_DEPTH_32BPP 2
-#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define EVERGREEN_ADDR_SURF_2_BANK 0
-#define EVERGREEN_ADDR_SURF_4_BANK 1
-#define EVERGREEN_ADDR_SURF_8_BANK 2
-#define EVERGREEN_ADDR_SURF_16_BANK 3
-#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4)
-#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3
-#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8)
-
-#define EVERGREEN_GRPH_FORMAT_INDEXED 0
-#define EVERGREEN_GRPH_FORMAT_ARGB1555 0
-#define EVERGREEN_GRPH_FORMAT_ARGB565 1
-#define EVERGREEN_GRPH_FORMAT_ARGB4444 2
-#define EVERGREEN_GRPH_FORMAT_AI88 3
-#define EVERGREEN_GRPH_FORMAT_MONO16 4
-#define EVERGREEN_GRPH_FORMAT_BGRA5551 5
+#define GRPH_ARRAY_LINEAR_GENERAL 0
+#define GRPH_ARRAY_LINEAR_ALIGNED 1
+#define GRPH_ARRAY_1D_TILED_THIN1 2
+#define GRPH_ARRAY_2D_TILED_THIN1 4
+
+#define ES_AND_GS_AUTO 3
+#define BUF_SWAP_32BIT (2 << 16)
+
+#define GRPH_DEPTH_8BPP 0
+#define GRPH_DEPTH_16BPP 1
+#define GRPH_DEPTH_32BPP 2
+
+/* 8 BPP */
+#define GRPH_FORMAT_INDEXED 0
+
+/* 16 BPP */
+#define GRPH_FORMAT_ARGB1555 0
+#define GRPH_FORMAT_ARGB565 1
+#define GRPH_FORMAT_ARGB4444 2
+#define GRPH_FORMAT_AI88 3
+#define GRPH_FORMAT_MONO16 4
+#define GRPH_FORMAT_BGRA5551 5
/* 32 BPP */
-#define EVERGREEN_GRPH_FORMAT_ARGB8888 0
-#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1
-#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2
-#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3
-#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4
-#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5
-#define EVERGREEN_GRPH_FORMAT_RGB111110 6
-#define EVERGREEN_GRPH_FORMAT_BGR101111 7
-#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3
-#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6
-#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0
-#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1
-#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2
-#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-
-#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03
-#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-# define EVERGREEN_GRPH_ENDIAN_NONE 0
-# define EVERGREEN_GRPH_ENDIAN_8IN16 1
-# define EVERGREEN_GRPH_ENDIAN_8IN32 2
-# define EVERGREEN_GRPH_ENDIAN_8IN64 3
-#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-# define EVERGREEN_GRPH_RED_SEL_R 0
-# define EVERGREEN_GRPH_RED_SEL_G 1
-# define EVERGREEN_GRPH_RED_SEL_B 2
-# define EVERGREEN_GRPH_RED_SEL_A 3
-#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-# define EVERGREEN_GRPH_GREEN_SEL_G 0
-# define EVERGREEN_GRPH_GREEN_SEL_B 1
-# define EVERGREEN_GRPH_GREEN_SEL_A 2
-# define EVERGREEN_GRPH_GREEN_SEL_R 3
-#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-# define EVERGREEN_GRPH_BLUE_SEL_B 0
-# define EVERGREEN_GRPH_BLUE_SEL_A 1
-# define EVERGREEN_GRPH_BLUE_SEL_R 2
-# define EVERGREEN_GRPH_BLUE_SEL_G 3
-#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-# define EVERGREEN_GRPH_ALPHA_SEL_A 0
-# define EVERGREEN_GRPH_ALPHA_SEL_R 1
-# define EVERGREEN_GRPH_ALPHA_SEL_G 2
-# define EVERGREEN_GRPH_ALPHA_SEL_B 3
-
-#define EVERGREEN_D3VGA_CONTROL 0xf8
-#define EVERGREEN_D4VGA_CONTROL 0xf9
-#define EVERGREEN_D5VGA_CONTROL 0xfa
-#define EVERGREEN_D6VGA_CONTROL 0xfb
-
-#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00
-
-#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02
-#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8)
-
-#define EVERGREEN_GRPH_PITCH 0x1a06
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09
-#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a
-#define EVERGREEN_GRPH_X_START 0x1a0b
-#define EVERGREEN_GRPH_Y_START 0x1a0c
-#define EVERGREEN_GRPH_X_END 0x1a0d
-#define EVERGREEN_GRPH_Y_END 0x1a0e
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12
-#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0)
-
-#define EVERGREEN_VIEWPORT_START 0x1b5c
-#define EVERGREEN_VIEWPORT_SIZE 0x1b5d
-#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1
-
-/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
-#define EVERGREEN_CUR_CONTROL 0x1a66
-# define EVERGREEN_CURSOR_EN (1 << 0)
-# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8)
-# define EVERGREEN_CURSOR_MONO 0
-# define EVERGREEN_CURSOR_24_1 1
-# define EVERGREEN_CURSOR_24_8_PRE_MULT 2
-# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3
-# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16)
-# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20)
-# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-# define EVERGREEN_CURSOR_URGENT_ALWAYS 0
-# define EVERGREEN_CURSOR_URGENT_1_8 1
-# define EVERGREEN_CURSOR_URGENT_1_4 2
-# define EVERGREEN_CURSOR_URGENT_3_8 3
-# define EVERGREEN_CURSOR_URGENT_1_2 4
-#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67
-# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000
-#define EVERGREEN_CUR_SIZE 0x1a68
-#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69
-#define EVERGREEN_CUR_POSITION 0x1a6a
-#define EVERGREEN_CUR_HOT_SPOT 0x1a6b
-#define EVERGREEN_CUR_COLOR1 0x1a6c
-#define EVERGREEN_CUR_COLOR2 0x1a6d
-#define EVERGREEN_CUR_UPDATE 0x1a6e
-# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0)
-# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1)
-# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16)
-# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-
-#define NI_INPUT_CSC_CONTROL 0x1a35
-# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_CSC_BYPASS 0
-# define NI_INPUT_CSC_PROG_COEFF 1
-# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2
-# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_OUTPUT_CSC_CONTROL 0x1a3c
-# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0)
-# define NI_OUTPUT_CSC_BYPASS 0
-# define NI_OUTPUT_CSC_TV_RGB 1
-# define NI_OUTPUT_CSC_YCBCR_601 2
-# define NI_OUTPUT_CSC_YCBCR_709 3
-# define NI_OUTPUT_CSC_PROG_COEFF 4
-# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5
-# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4)
-
-#define NI_DEGAMMA_CONTROL 0x1a58
-# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_DEGAMMA_BYPASS 0
-# define NI_DEGAMMA_SRGB_24 1
-# define NI_DEGAMMA_XVYCC_222 2
-# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4)
-# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12)
-
-#define NI_GAMUT_REMAP_CONTROL 0x1a59
-# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0)
-# define NI_GAMUT_REMAP_BYPASS 0
-# define NI_GAMUT_REMAP_PROG_COEFF 1
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3
-# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_REGAMMA_CONTROL 0x1aa0
-# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0)
-# define NI_REGAMMA_BYPASS 0
-# define NI_REGAMMA_SRGB_24 1
-# define NI_REGAMMA_XVYCC_222 2
-# define NI_REGAMMA_PROG_A 3
-# define NI_REGAMMA_PROG_B 4
-# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4)
-
-
-#define NI_PRESCALE_GRPH_CONTROL 0x1a2d
-# define NI_GRPH_PRESCALE_BYPASS (1 << 4)
-
-#define NI_PRESCALE_OVL_CONTROL 0x1a31
-# define NI_OVL_PRESCALE_BYPASS (1 << 4)
-
-#define NI_INPUT_GAMMA_CONTROL 0x1a10
-# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_GAMMA_USE_LUT 0
-# define NI_INPUT_GAMMA_BYPASS 1
-# define NI_INPUT_GAMMA_SRGB_24 2
-# define NI_INPUT_GAMMA_XVYCC_222 3
-# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4)
-
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-# define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-# define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13
-
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18
-
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0
-
-#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1
-#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0
-#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2
-#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1
-
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb
-
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x3
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x6
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x200
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x9
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x1000
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xc
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8000
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xf
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40000
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x12
+#define GRPH_FORMAT_ARGB8888 0
+#define GRPH_FORMAT_ARGB2101010 1
+#define GRPH_FORMAT_32BPP_DIG 2
+#define GRPH_FORMAT_8B_ARGB2101010 3
+#define GRPH_FORMAT_BGRA1010102 4
+#define GRPH_FORMAT_8B_BGRA1010102 5
+#define GRPH_FORMAT_RGB111110 6
+#define GRPH_FORMAT_BGR101111 7
+
+#define GRPH_ENDIAN_NONE 0
+#define GRPH_ENDIAN_8IN16 1
+#define GRPH_ENDIAN_8IN32 2
+#define GRPH_ENDIAN_8IN64 3
+#define GRPH_RED_SEL_R 0
+#define GRPH_RED_SEL_G 1
+#define GRPH_RED_SEL_B 2
+#define GRPH_RED_SEL_A 3
+
+#define GRPH_GREEN_SEL_G 0
+#define GRPH_GREEN_SEL_B 1
+#define GRPH_GREEN_SEL_A 2
+#define GRPH_GREEN_SEL_R 3
+
+#define GRPH_BLUE_SEL_B 0
+#define GRPH_BLUE_SEL_A 1
+#define GRPH_BLUE_SEL_R 2
+#define GRPH_BLUE_SEL_G 3
+
+#define GRPH_ALPHA_SEL_A 0
+#define GRPH_ALPHA_SEL_R 1
+#define GRPH_ALPHA_SEL_G 2
+#define GRPH_ALPHA_SEL_B 3
+
+/* CUR_CONTROL */
+ #define CURSOR_MONO 0
+ #define CURSOR_24_1 1
+ #define CURSOR_24_8_PRE_MULT 2
+ #define CURSOR_24_8_UNPRE_MULT 3
+ #define CURSOR_URGENT_ALWAYS 0
+ #define CURSOR_URGENT_1_8 1
+ #define CURSOR_URGENT_1_4 2
+ #define CURSOR_URGENT_3_8 3
+ #define CURSOR_URGENT_1_2 4
+
+/* INPUT_CSC_CONTROL */
+# define INPUT_CSC_BYPASS 0
+# define INPUT_CSC_PROG_COEFF 1
+# define INPUT_CSC_PROG_SHARED_MATRIXA 2
+
+/* OUTPUT_CSC_CONTROL */
+# define OUTPUT_CSC_BYPASS 0
+# define OUTPUT_CSC_TV_RGB 1
+# define OUTPUT_CSC_YCBCR_601 2
+# define OUTPUT_CSC_YCBCR_709 3
+# define OUTPUT_CSC_PROG_COEFF 4
+# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5
+
+/* DEGAMMA_CONTROL */
+# define DEGAMMA_BYPASS 0
+# define DEGAMMA_SRGB_24 1
+# define DEGAMMA_XVYCC_222 2
+
+/* GAMUT_REMAP_CONTROL */
+# define GAMUT_REMAP_BYPASS 0
+# define GAMUT_REMAP_PROG_COEFF 1
+# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2
+# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3
+
+/* REGAMMA_CONTROL */
+# define REGAMMA_BYPASS 0
+# define REGAMMA_SRGB_24 1
+# define REGAMMA_XVYCC_222 2
+# define REGAMMA_PROG_A 3
+# define REGAMMA_PROG_B 4
+
+
+/* INPUT_GAMMA_CONTROL */
+# define INPUT_GAMMA_USE_LUT 0
+# define INPUT_GAMMA_BYPASS 1
+# define INPUT_GAMMA_SRGB_24 2
+# define INPUT_GAMMA_XVYCC_222 3
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
@@ -2426,28 +780,14 @@
#define MC_SEQ_MISC0__MT__HBM 0x60000000
#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
-#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
#define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000
-#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
-#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-#define CONFIG_CNTL 0x1509
-#define CC_DRM_ID_STRAPS 0X1559
#define AMDGPU_PCIE_INDEX 0xc
#define AMDGPU_PCIE_DATA 0xd
-#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411
-#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412
-#define DMA_MODE 0x342f
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-#define DMA_BUSY_MASK 0x20
-#define DMA1_BUSY_MASK 0X40
-#define SDMA_MAX_INSTANCE 2
-
#define PCIE_BUS_CLK 10000
#define TCLK (PCIE_BUS_CLK / 10)
#define PCIE_PORT_INDEX 0xe
@@ -2457,8 +797,6 @@
#define EVERGREEN_PIF_PHY1_INDEX 0x10
#define EVERGREEN_PIF_PHY1_DATA 0x14
-#define MC_VM_FB_OFFSET 0x81a
-
/* Discrete VCE clocks */
#define CG_VCEPLL_FUNC_CNTL 0xc0030600
#define VCEPLL_RESET_MASK 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
index 8b8086d5c864..2594467bdd87 100644
--- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -36,7 +36,7 @@ static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_c
#if 0
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) &&
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 7) &&
adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev))
return true;
#endif
@@ -48,18 +48,17 @@ sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
struct amdgpu_reset_handler *handler;
+ int i;
if (reset_context->method != AMD_RESET_METHOD_NONE) {
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_context->method)
return handler;
}
}
if (sienna_cichlid_is_mode2_default(reset_ctl)) {
- list_for_each_entry (handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == AMD_RESET_METHOD_MODE2)
return handler;
}
@@ -82,18 +81,12 @@ static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
-
- if (r) {
- dev_err(adev->dev,
- "suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = false;
}
- return r;
+ return 0;
}
static int
@@ -120,9 +113,9 @@ static void sienna_cichlid_async_reset(struct work_struct *work)
struct amdgpu_reset_control *reset_ctl =
container_of(work, struct amdgpu_reset_control, reset_work);
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_ctl->active_reset) {
dev_dbg(adev->dev, "Resetting device\n");
handler->do_reset(adev);
@@ -176,15 +169,9 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = true;
}
}
@@ -194,15 +181,9 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = true;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -214,7 +195,7 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init(
- (void *)adev);
+ &adev->ip_blocks[i]);
if (r) {
dev_err(adev->dev,
"late_init of IP block <%s> failed %d after reset\n",
@@ -239,6 +220,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
int r;
struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = sienna_cichlid_mode2_restore_ip(tmp_adev);
@@ -256,6 +238,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev,
@@ -281,6 +264,11 @@ static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
.do_reset = sienna_cichlid_mode2_reset,
};
+static struct amdgpu_reset_handler
+ *sienna_cichlid_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &sienna_cichlid_mode2_handler,
+ };
+
int sienna_cichlid_reset_init(struct amdgpu_device *adev)
{
struct amdgpu_reset_control *reset_ctl;
@@ -294,11 +282,9 @@ int sienna_cichlid_reset_init(struct amdgpu_device *adev)
reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;
- INIT_LIST_HEAD(&reset_ctl->reset_handlers);
INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
/* Only mode2 is handled through reset control now */
- amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler);
-
+ reset_ctl->reset_handlers = &sienna_cichlid_rst_handlers;
adev->reset_cntl = reset_ctl;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index dd2d66090d23..68aef47254a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -743,7 +743,7 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev)
adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter;
- res = i2c_add_adapter(control);
+ res = devm_i2c_add_adapter(adev->dev, control);
if (res)
DRM_ERROR("Failed to register hw i2c, err: %d\n", res);
@@ -752,9 +752,6 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev)
void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev)
{
- struct i2c_adapter *control = adev->pm.ras_eeprom_i2c_bus;
-
- i2c_del_adapter(control);
adev->pm.ras_eeprom_i2c_bus = NULL;
adev->pm.fru_eeprom_i2c_bus = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
index ae29620b1ea4..70569ea906bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -44,10 +44,10 @@ smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
{
struct amdgpu_reset_handler *handler;
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
if (reset_context->method != AMD_RESET_METHOD_NONE) {
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_context->method)
return handler;
}
@@ -55,8 +55,7 @@ smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
if (smu_v13_0_10_is_mode2_default(reset_ctl) &&
amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2) {
- list_for_each_entry (handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == AMD_RESET_METHOD_MODE2)
return handler;
}
@@ -81,18 +80,12 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_MES))
continue;
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
-
- if (r) {
- dev_err(adev->dev,
- "suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = false;
}
- return r;
+ return 0;
}
static int
@@ -119,9 +112,9 @@ static void smu_v13_0_10_async_reset(struct work_struct *work)
struct amdgpu_reset_control *reset_ctl =
container_of(work, struct amdgpu_reset_control, reset_work);
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ int i;
- list_for_each_entry(handler, &reset_ctl->reset_handlers,
- handler_list) {
+ for_each_handler(i, handler, reset_ctl) {
if (handler->reset_method == reset_ctl->active_reset) {
dev_dbg(adev->dev, "Resetting device\n");
handler->do_reset(adev);
@@ -187,15 +180,9 @@ static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = true;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -209,7 +196,7 @@ static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init(
- (void *)adev);
+ &adev->ip_blocks[i]);
if (r) {
dev_err(adev->dev,
"late_init of IP block <%s> failed %d after reset\n",
@@ -234,6 +221,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
int r;
struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = smu_v13_0_10_mode2_restore_ip(tmp_adev);
@@ -247,6 +235,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev,
@@ -272,6 +261,11 @@ static struct amdgpu_reset_handler smu_v13_0_10_mode2_handler = {
.do_reset = smu_v13_0_10_mode2_reset,
};
+static struct amdgpu_reset_handler
+ *smu_v13_0_10_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+ &smu_v13_0_10_mode2_handler,
+ };
+
int smu_v13_0_10_reset_init(struct amdgpu_device *adev)
{
struct amdgpu_reset_control *reset_ctl;
@@ -285,10 +279,9 @@ int smu_v13_0_10_reset_init(struct amdgpu_device *adev)
reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
reset_ctl->get_reset_handler = smu_v13_0_10_get_reset_handler;
- INIT_LIST_HEAD(&reset_ctl->reset_handlers);
INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
/* Only mode2 is handled through reset control now */
- amdgpu_reset_add_handler(reset_ctl, &smu_v13_0_10_mode2_handler);
+ reset_ctl->reset_handlers = &smu_v13_0_10_rst_handlers;
adev->reset_cntl = reset_ctl;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
index 13e905c22592..bf8b8e5ddf5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
@@ -128,6 +128,27 @@ static bool smuio_v13_0_is_host_gpu_xgmi_supported(struct amdgpu_device *adev)
return data ? true : false;
}
+static enum amdgpu_pkg_type smuio_v13_0_get_pkg_type(struct amdgpu_device *adev)
+{
+ enum amdgpu_pkg_type pkg_type;
+ u32 data;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID);
+
+ switch (data) {
+ case 0x4:
+ case 0xC:
+ pkg_type = AMDGPU_PKG_TYPE_CEM;
+ break;
+ default:
+ pkg_type = AMDGPU_PKG_TYPE_OAM;
+ break;
+ }
+
+ return pkg_type;
+}
+
const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
.get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
.get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
@@ -136,4 +157,5 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
.is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
.update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
.get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
+ .get_pkg_type = smuio_v13_0_get_pkg_type,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
index 4368a5891eeb..5461b5289793 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c
@@ -84,6 +84,12 @@ static enum amdgpu_pkg_type smuio_v13_0_3_get_pkg_type(struct amdgpu_device *ade
* b0100 - b1111 - Reserved
*/
switch (data & PKG_TYPE_MASK) {
+ case 0x0:
+ pkg_type = AMDGPU_PKG_TYPE_CEM;
+ break;
+ case 0x1:
+ pkg_type = AMDGPU_PKG_TYPE_OAM;
+ break;
case 0x2:
pkg_type = AMDGPU_PKG_TYPE_APU;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
new file mode 100644
index 000000000000..2a51a70d4846
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v14_0_2.h"
+#include "smuio/smuio_14_0_2_offset.h"
+#include "smuio/smuio_14_0_2_sh_mask.h"
+#include <linux/preempt.h>
+
+static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ u64 clock;
+ u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ preempt_disable();
+ clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ /* the clock counter may be udpated during polling the counters */
+ clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = {
+ .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset,
+ .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
new file mode 100644
index 000000000000..6e617f832d90
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V14_0_2_H__
+#define __SMUIO_V14_0_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs;
+
+#endif /* __SMUIO_V14_0_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
index e4e30b9d481b..c04fdd2d5b38 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
@@ -60,7 +60,7 @@ static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *f
{
u32 data;
- /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */
+ /* CGTT_ROM_CLK_CTRL0 is not available for APUs */
if (adev->flags & AMD_IS_APU)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index f5be40d7ba36..9785fada4fa7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -28,7 +28,6 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
@@ -90,8 +89,8 @@ static const struct amd_ip_funcs soc15_common_ip_funcs;
/* Vega, Raven, Arcturus */
static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_encode =
@@ -103,12 +102,11 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
/* Vega */
static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_decode =
@@ -120,12 +118,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
/* Raven */
static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
};
@@ -138,12 +136,12 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
/* Renoir, Arcturus */
static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -156,7 +154,7 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode =
static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -171,11 +169,29 @@ static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = {
.codec_array = NULL,
};
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0,
+};
+
static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
- if (adev->ip_versions[VCE_HWIP][0]) {
- switch (adev->ip_versions[VCE_HWIP][0]) {
+ if (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+ switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 1, 0):
if (encode)
@@ -187,7 +203,7 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
return -EINVAL;
}
} else {
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
if (encode)
@@ -209,6 +225,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &vcn_4_0_3_video_codecs_decode;
return 0;
+ case IP_VERSION(5, 0, 1):
+ if (encode)
+ *codecs = &vcn_5_0_1_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_1_video_codecs_decode_vcn0;
+ return 0;
default:
return -EINVAL;
}
@@ -324,11 +346,14 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
{
u32 reference_clock = adev->clock.spll.reference_freq;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) ||
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1))
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14))
return 10000;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) ||
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1))
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 1))
return reference_clock / 4;
return reference_clock;
@@ -501,7 +526,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
static enum amd_reset_method
soc15_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset = false;
+ int baco_reset = 0;
bool connected_to_cpu = false;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -522,7 +547,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(10, 0, 0):
case IP_VERSION(10, 0, 1):
case IP_VERSION(12, 0, 0):
@@ -539,7 +564,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
*/
if (ras && adev->ras_enabled &&
adev->pm.fw_version <= 0x283400)
- baco_reset = false;
+ baco_reset = 0;
} else {
baco_reset = amdgpu_dpm_is_baco_supported(adev);
}
@@ -553,10 +578,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
return AMD_RESET_METHOD_MODE2;
break;
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
/* Use gpu_recovery param to target a reset method.
* Enable triggering of GPU reset only if specified
* by module parameter.
*/
+ if (adev->pcie_reset_ctx.in_link_reset)
+ return AMD_RESET_METHOD_LINK;
if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
return AMD_RESET_METHOD_MODE2;
else if (!(adev->flags & AMD_IS_APU))
@@ -573,13 +602,36 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
return AMD_RESET_METHOD_MODE1;
}
+static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ /* Will reset for the following suspend abort cases.
+ * 1) S3 suspend aborted in the normal S3 suspend
+ * 2) S3 suspend aborted in performing pm core test.
+ */
+ if (adev->in_s3 && !pm_resume_via_firmware())
+ return true;
+ else
+ return false;
+}
+
static int soc15_asic_reset(struct amdgpu_device *adev)
{
/* original raven doesn't have full asic reset */
+ /* On the latest Raven, the GPU reset can be performed
+ * successfully. So now, temporarily enable it for the
+ * S3 suspend abort case.
+ */
+
+ if ((adev->apu_flags & AMD_APU_IS_PICASSO ||
+ !(adev->apu_flags & AMD_APU_IS_RAVEN)) &&
+ soc15_need_reset_on_resume(adev))
+ goto asic_reset;
+
if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
- (adev->apu_flags & AMD_APU_IS_RAVEN2))
+ (adev->apu_flags & AMD_APU_IS_RAVEN2))
return 0;
+asic_reset:
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_PCI:
dev_info(adev->dev, "PCI reset\n");
@@ -590,27 +642,30 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
case AMD_RESET_METHOD_MODE2:
dev_info(adev->dev, "MODE2 reset\n");
return amdgpu_dpm_mode2_reset(adev);
+ case AMD_RESET_METHOD_LINK:
+ dev_info(adev->dev, "Link reset\n");
+ return amdgpu_device_link_reset(adev);
default:
dev_info(adev->dev, "MODE1 reset\n");
return amdgpu_device_mode1_reset(adev);
}
}
-static bool soc15_supports_baco(struct amdgpu_device *adev)
+static int soc15_supports_baco(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
case IP_VERSION(11, 0, 2):
if (adev->asic_type == CHIP_VEGA20) {
if (adev->psp.sos.fw_version >= 0x80067)
return amdgpu_dpm_is_baco_supported(adev);
- return false;
+ return 0;
} else {
return amdgpu_dpm_is_baco_supported(adev);
}
break;
default:
- return false;
+ return 0;
}
}
@@ -645,8 +700,7 @@ static void soc15_program_aspm(struct amdgpu_device *adev)
if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
@@ -687,7 +741,6 @@ static void soc15_reg_base_init(struct amdgpu_device *adev)
void soc15_set_virt_ops(struct amdgpu_device *adev)
{
adev->virt.ops = &xgpu_ai_virt_ops;
-
/* init soc15 reg base early enough so we can
* request request full access for sriov before
* set_ip_blocks. */
@@ -804,6 +857,10 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_RENOIR)
return true;
+ if (amdgpu_gmc_need_reset_on_init(adev))
+ return true;
+ if (amdgpu_psp_tos_reload_needed(adev))
+ return true;
/* Just return false for soc15 GPUs. Reset does not seem to
* be necessary.
*/
@@ -895,24 +952,20 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
.get_config_memsize = &soc15_get_config_memsize,
.need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &aqua_vanjaram_doorbell_index_init,
- .get_pcie_usage = &amdgpu_nbio_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
.supports_baco = &soc15_supports_baco,
.pre_asic_init = &soc15_pre_asic_init,
.query_video_codecs = &soc15_query_video_codecs,
.encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
+ .get_reg_state = &aqua_vanjaram_get_reg_state,
};
-static int soc15_common_early_init(void *handle)
+static int soc15_common_early_init(struct amdgpu_ip_block *ip_block)
{
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (!amdgpu_sriov_vf(adev)) {
- adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
- adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
- }
+ adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &amdgpu_device_indirect_rreg;
@@ -921,6 +974,8 @@ static int soc15_common_early_init(void *handle)
adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext;
adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext;
+ adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext;
adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;
adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;
adev->didt_rreg = &soc15_didt_rreg;
@@ -935,7 +990,7 @@ static int soc15_common_early_init(void *handle)
/* TODO: split the GC and PG flags based on the relevant IP version for which
* they are relevant.
*/
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 0, 1):
adev->asic_funcs = &soc15_asic_funcs;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
@@ -1147,6 +1202,8 @@ static int soc15_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x3c;
break;
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->asic_funcs = &aqua_vanjaram_asic_funcs;
adev->cg_flags =
AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
@@ -1158,7 +1215,10 @@ static int soc15_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG;
+ /*TODO: need a new external_rev_id for GC 9.4.4? */
adev->external_rev_id = adev->rev_id + 0x46;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->external_rev_id = adev->rev_id + 0x50;
break;
default:
/* FIXME: not supported yet */
@@ -1173,9 +1233,9 @@ static int soc15_common_early_init(void *handle)
return 0;
}
-static int soc15_common_late_init(void *handle)
+static int soc15_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
@@ -1188,9 +1248,9 @@ static int soc15_common_late_init(void *handle)
return 0;
}
-static int soc15_common_sw_init(void *handle)
+static int soc15_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_add_irq_id(adev);
@@ -1202,9 +1262,9 @@ static int soc15_common_sw_init(void *handle)
return 0;
}
-static int soc15_common_sw_fini(void *handle)
+static int soc15_common_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->df.funcs &&
adev->df.funcs->sw_fini)
@@ -1226,9 +1286,9 @@ static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev)
}
}
-static int soc15_common_hw_init(void *handle)
+static int soc15_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable aspm */
soc15_program_aspm(adev);
@@ -1255,9 +1315,9 @@ static int soc15_common_hw_init(void *handle)
return 0;
}
-static int soc15_common_hw_fini(void *handle)
+static int soc15_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because soc15_enable_doorbell_aperture
@@ -1270,7 +1330,13 @@ static int soc15_common_hw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_put_irq(adev);
- if (adev->nbio.ras_if &&
+ /*
+ * For minimal init, late_init is not called, hence RAS irqs are not
+ * enabled.
+ */
+ if ((!amdgpu_sriov_vf(adev)) &&
+ (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+ adev->nbio.ras_if &&
amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
if (adev->nbio.ras &&
adev->nbio.ras->init_ras_controller_interrupt)
@@ -1283,35 +1349,27 @@ static int soc15_common_hw_fini(void *handle)
return 0;
}
-static int soc15_common_suspend(void *handle)
+static int soc15_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc15_common_hw_fini(adev);
+ return soc15_common_hw_fini(ip_block);
}
-static int soc15_common_resume(void *handle)
+static int soc15_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- return soc15_common_hw_init(adev);
+ if (soc15_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
+ soc15_asic_reset(adev);
+ }
+ return soc15_common_hw_init(ip_block);
}
-static bool soc15_common_is_idle(void *handle)
+static bool soc15_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int soc15_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int soc15_common_soft_reset(void *handle)
-{
- return 0;
-}
-
static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable)
{
uint32_t def, data;
@@ -1356,15 +1414,15 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable
WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data);
}
-static int soc15_common_set_clockgating_state(void *handle,
+static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 2, 0):
case IP_VERSION(7, 4, 0):
@@ -1408,20 +1466,24 @@ static int soc15_common_set_clockgating_state(void *handle,
return 0;
}
-static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
+static void soc15_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
- adev->nbio.funcs->get_clockgating_state(adev, flags);
-
- adev->hdp.funcs->get_clock_gating_state(adev, flags);
+ if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state)
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
- if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2)) {
+ if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state)
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+ if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) {
/* AMD_CG_SUPPORT_DRM_MGCG */
data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
if (!(data & 0x01000000))
@@ -1434,12 +1496,14 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
}
/* AMD_CG_SUPPORT_ROM_MGCG */
- adev->smuio.funcs->get_clock_gating_state(adev, flags);
+ if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state)
+ adev->smuio.funcs->get_clock_gating_state(adev, flags);
- adev->df.funcs->get_clockgating_state(adev, flags);
+ if (adev->df.funcs && adev->df.funcs->get_clockgating_state)
+ adev->df.funcs->get_clockgating_state(adev, flags);
}
-static int soc15_common_set_powergating_state(void *handle,
+static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* todo */
@@ -1457,8 +1521,6 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = {
.suspend = soc15_common_suspend,
.resume = soc15_common_resume,
.is_idle = soc15_common_is_idle,
- .wait_for_idle = soc15_common_wait_for_idle,
- .soft_reset = soc15_common_soft_reset,
.set_clockgating_state = soc15_common_set_clockgating_state,
.set_powergating_state = soc15_common_set_powergating_state,
.get_clockgating_state= soc15_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index eac54042c6c0..c8ac11a9cdef 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,6 +27,7 @@
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
+#include "amdgpu_reg_state.h"
extern const struct amdgpu_ip_block_version vega10_common_ip_block;
@@ -87,9 +88,15 @@ struct soc15_ras_field_entry {
};
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
+#define SOC15_REG_ENTRY_STR(ip, inst, reg) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg }
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
+/* Over ride the instance id */
+#define SOC15_REG_ENTRY_OFFSET_INST(entry, inst) \
+ (adev->reg_offset[entry.hwip][inst][entry.seg] + entry.reg_offset)
+
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
@@ -111,9 +118,11 @@ int vega10_reg_base_init(struct amdgpu_device *adev);
int vega20_reg_base_init(struct amdgpu_device *adev);
int arct_reg_base_init(struct amdgpu_device *adev);
int aldebaran_reg_base_init(struct amdgpu_device *adev);
-void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev);
u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id);
int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev);
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+ enum amdgpu_reg_state reg_state, void *buf,
+ size_t max_size);
void vega10_doorbell_index_init(struct amdgpu_device *adev);
void vega20_doorbell_index_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index da683afa0222..242b24f73c17 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,7 +69,7 @@
#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0)
-#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
@@ -86,8 +86,8 @@
#define WREG32_SOC15_IP(ip, reg, value) \
__WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0)
-#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \
- __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0)
+#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \
+ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst)
#define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \
__WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
@@ -140,7 +140,7 @@
/* for GC only */
#define RREG32_RLC(reg) \
- __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP)
+ __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0)
#define WREG32_RLC_NO_KIQ(reg, value, hwip) \
__WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0)
@@ -204,4 +204,10 @@
+ adev->asic_funcs->encode_ext_smn_addressing(ext), \
value) \
+#define RREG64_MCA(ext, mca_base, idx) \
+ RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8))
+
+#define WREG64_MCA(ext, mca_base, idx, val) \
+ WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 2357ff39323f..cf93fa477674 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -76,6 +76,12 @@
((cond & 0xF) << 24) | \
((type & 0xF) << 28))
+#define CP_PACKETJ_NOP 0x60000000
+#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF)
+#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F)
+#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF)
+#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF)
+
/* Packet 3 types */
#define PACKET3_NOP 0x10
#define PACKET3_SET_BASE 0x11
@@ -87,11 +93,25 @@
#define PACKET3_DISPATCH_INDIRECT 0x16
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
#define PACKET3_COND_EXEC 0x22
#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24)
#define PACKET3_DRAW_INDIRECT 0x24
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
#define PACKET3_INDEX_BASE 0x26
@@ -126,6 +146,28 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -154,6 +196,33 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -163,7 +232,63 @@
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_COND_WRITE 0x45
#define PACKET3_EVENT_WRITE 0x46
@@ -175,6 +300,15 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
#define PACKET3_RELEASE_MEM 0x49
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
@@ -280,6 +414,13 @@
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
#define PACKET3_REWIND 0x59
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0)
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F
#define PACKET3_LOAD_CONFIG_REG 0x60
@@ -294,12 +435,16 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
#define PACKET3_SCRATCH_RAM_READ 0x7E
#define PACKET3_LOAD_CONST_RAM 0x80
@@ -407,6 +552,15 @@
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+#define PACKET3_RUN_CLEANER_SHADER_9_0 0xD7
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
+#define PACKET3_RUN_CLEANER_SHADER 0xD2
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
#define VCE_CMD_NO_OP 0x00000000
#define VCE_CMD_END 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 8b2ff2b281b0..ad36c96478a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -49,14 +49,14 @@ static const struct amd_ip_funcs soc21_common_ip_funcs;
/* SOC21 */
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
@@ -72,7 +72,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = {
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -80,7 +80,7 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -96,14 +96,14 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = {
/* SRIOV SOC21, not const since data is controlled by host */
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
@@ -117,23 +117,17 @@ static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
};
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -153,10 +147,11 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
return -EINVAL;
- switch (adev->ip_versions[UVD_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
case IP_VERSION(4, 0, 2):
case IP_VERSION(4, 0, 4):
+ case IP_VERSION(4, 0, 5):
if (amdgpu_sriov_vf(adev)) {
if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
!amdgpu_sriov_is_av1_support(adev)) {
@@ -184,6 +179,12 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
}
}
return 0;
+ case IP_VERSION(4, 0, 6):
+ if (encode)
+ *codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+ return 0;
default:
return -EINVAL;
}
@@ -373,13 +374,17 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
amdgpu_reset_method);
- switch (adev->ip_versions[MP1_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 10):
return AMD_RESET_METHOD_MODE1;
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
+ case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
return AMD_RESET_METHOD_MODE2;
default:
if (amdgpu_dpm_is_baco_supported(adev))
@@ -432,8 +437,7 @@ static void soc21_program_aspm(struct amdgpu_device *adev)
if (!amdgpu_device_should_use_aspm(adev))
return;
- if (!(adev->flags & AMD_IS_APU) &&
- (adev->nbio.funcs->program_aspm))
+ if (adev->nbio.funcs->program_aspm)
adev->nbio.funcs->program_aspm(adev);
}
@@ -447,12 +451,10 @@ const struct amdgpu_ip_block_version soc21_common_ip_block = {
static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
- return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- return false;
default:
return true;
}
@@ -503,6 +505,7 @@ static void soc21_init_doorbell_index(struct amdgpu_device *adev)
adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.vpe_ring = AMDGPU_NAVI10_DOORBELL64_VPE;
adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
@@ -548,13 +551,11 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = {
.update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
};
-static int soc21_common_early_init(void *handle)
+static int soc21_common_early_init(struct amdgpu_ip_block *ip_block)
{
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
- adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &amdgpu_device_indirect_rreg;
@@ -575,7 +576,7 @@ static int soc21_common_early_init(void *handle)
adev->rev_id = amdgpu_device_get_rev_id(adev);
adev->external_rev_id = 0xff;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
@@ -687,7 +688,122 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x80;
break;
-
+ case IP_VERSION(11, 5, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x1;
+ else
+ adev->external_rev_id = adev->rev_id + 0x10;
+ break;
+ case IP_VERSION(11, 5, 1):
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG;
+ adev->external_rev_id = adev->rev_id + 0xc1;
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x40;
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -701,9 +817,9 @@ static int soc21_common_early_init(void *handle)
return 0;
}
-static int soc21_common_late_init(void *handle)
+static int soc21_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
@@ -739,9 +855,9 @@ static int soc21_common_late_init(void *handle)
return 0;
}
-static int soc21_common_sw_init(void *handle)
+static int soc21_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
@@ -749,14 +865,9 @@ static int soc21_common_sw_init(void *handle)
return 0;
}
-static int soc21_common_sw_fini(void *handle)
+static int soc21_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static int soc21_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable aspm */
soc21_program_aspm(adev);
@@ -774,9 +885,9 @@ static int soc21_common_hw_init(void *handle)
return 0;
}
-static int soc21_common_hw_fini(void *handle)
+static int soc21_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because soc21_enable_doorbell_aperture
@@ -797,44 +908,63 @@ static int soc21_common_hw_fini(void *handle)
return 0;
}
-static int soc21_common_suspend(void *handle)
+static int soc21_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc21_common_hw_fini(adev);
+ return soc21_common_hw_fini(ip_block);
}
-static int soc21_common_resume(void *handle)
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 sol_reg1, sol_reg2;
- return soc21_common_hw_init(adev);
-}
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ * As for dGPU suspend abort cases the SOL value
+ * will be kept as zero at this resume point.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
-static bool soc21_common_is_idle(void *handle)
-{
- return true;
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
}
-static int soc21_common_wait_for_idle(void *handle)
+static int soc21_common_resume(struct amdgpu_ip_block *ip_block)
{
- return 0;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
+ return soc21_common_hw_init(ip_block);
}
-static int soc21_common_soft_reset(void *handle)
+static bool soc21_common_is_idle(struct amdgpu_ip_block *ip_block)
{
- return 0;
+ return true;
}
-static int soc21_common_set_clockgating_state(void *handle,
+static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(4, 3, 0):
case IP_VERSION(4, 3, 1):
case IP_VERSION(7, 7, 0):
+ case IP_VERSION(7, 7, 1):
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -848,12 +978,12 @@ static int soc21_common_set_clockgating_state(void *handle,
return 0;
}
-static int soc21_common_set_powergating_state(void *handle,
+static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- switch (adev->ip_versions[LSDMA_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
case IP_VERSION(6, 0, 0):
case IP_VERSION(6, 0, 2):
adev->lsdma.funcs->update_memory_power_gating(adev,
@@ -866,15 +996,13 @@ static int soc21_common_set_powergating_state(void *handle,
return 0;
}
-static void soc21_common_get_clockgating_state(void *handle, u64 *flags)
+static void soc21_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->get_clockgating_state(adev, flags);
adev->hdp.funcs->get_clock_gating_state(adev, flags);
-
- return;
}
static const struct amd_ip_funcs soc21_common_ip_funcs = {
@@ -882,14 +1010,11 @@ static const struct amd_ip_funcs soc21_common_ip_funcs = {
.early_init = soc21_common_early_init,
.late_init = soc21_common_late_init,
.sw_init = soc21_common_sw_init,
- .sw_fini = soc21_common_sw_fini,
.hw_init = soc21_common_hw_init,
.hw_fini = soc21_common_hw_fini,
.suspend = soc21_common_suspend,
.resume = soc21_common_resume,
.is_idle = soc21_common_is_idle,
- .wait_for_idle = soc21_common_wait_for_idle,
- .soft_reset = soc21_common_soft_reset,
.set_clockgating_state = soc21_common_set_clockgating_state,
.set_powergating_state = soc21_common_set_powergating_state,
.get_clockgating_state = soc21_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
new file mode 100644
index 000000000000..972b449ab89f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -0,0 +1,601 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "mp/mp_14_0_2_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "soc24.h"
+#include "mxgpu_nv.h"
+
+static const struct amd_ip_funcs soc24_common_ip_funcs;
+
+static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_encode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_encode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_encode_array_vcn0),
+ .codec_array = vcn_5_0_0_video_codecs_encode_array_vcn0,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_0_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_0_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_0_video_codecs_decode_array_vcn0,
+};
+
+static int soc24_query_video_codecs(struct amdgpu_device *adev, bool encode,
+ const struct amdgpu_video_codecs **codecs)
+{
+ if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config))
+ return -EINVAL;
+
+ switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ if (encode)
+ *codecs = &vcn_5_0_0_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_0_video_codecs_decode_vcn0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static u32 soc24_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc24_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+void soc24_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl);
+}
+
+static struct soc15_allowed_register_entry soc24_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)},
+};
+
+static uint32_t soc24_read_indexed_register(struct amdgpu_device *adev,
+ u32 se_num,
+ u32 sh_num,
+ u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc24_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc24_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) &&
+ adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc24_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc24_allowed_read_registers); i++) {
+ en = &soc24_allowed_read_registers[i];
+ if (!adev->reg_offset[en->hwip][en->inst])
+ continue;
+ else if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg]
+ + en->reg_offset))
+ continue;
+
+ *value = soc24_get_register_value(adev,
+ soc24_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static enum amd_reset_method
+soc24_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev,
+ "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
+ return AMD_RESET_METHOD_MODE1;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int soc24_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (soc24_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = amdgpu_dpm_mode2_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static void soc24_program_aspm(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_should_use_aspm(adev))
+ return;
+
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->program_aspm))
+ adev->nbio.funcs->program_aspm(adev);
+}
+
+const struct amdgpu_ip_block_version soc24_common_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc24_common_ip_funcs,
+};
+
+static bool soc24_need_full_reset(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ default:
+ return true;
+ }
+}
+
+static bool soc24_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static uint64_t soc24_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ /* TODO
+ * dummy implement for pcie_replay_count sysfs interface
+ * */
+ return 0;
+}
+
+static void soc24_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.gfx_userqueue_start =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START;
+ adev->doorbell_index.gfx_userqueue_end =
+ AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END;
+ adev->doorbell_index.mes_ring0 = AMDGPU_NAVI10_DOORBELL_MES_RING0;
+ adev->doorbell_index.mes_ring1 = AMDGPU_NAVI10_DOORBELL_MES_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void soc24_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static int soc24_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
+static const struct amdgpu_asic_funcs soc24_asic_funcs = {
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc24_read_register,
+ .reset = &soc24_asic_reset,
+ .reset_method = &soc24_asic_reset_method,
+ .get_xclk = &soc24_get_xclk,
+ .get_config_memsize = &soc24_get_config_memsize,
+ .init_doorbell_index = &soc24_init_doorbell_index,
+ .need_full_reset = &soc24_need_full_reset,
+ .need_reset_on_init = &soc24_need_reset_on_init,
+ .get_pcie_replay_count = &soc24_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &soc24_pre_asic_init,
+ .query_video_codecs = &soc24_query_video_codecs,
+ .update_umd_stable_pstate = &soc24_update_umd_stable_pstate,
+};
+
+static int soc24_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->set_reg_remap(adev);
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &amdgpu_device_indirect_rreg;
+ adev->pcie_wreg = &amdgpu_device_indirect_wreg;
+ adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64;
+ adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64;
+ adev->pciep_rreg = amdgpu_device_pcie_port_rreg;
+ adev->pciep_wreg = amdgpu_device_pcie_port_wreg;
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+ adev->didt_rreg = NULL;
+ adev->didt_wreg = NULL;
+
+ adev->asic_funcs = &soc24_asic_funcs;
+
+ adev->rev_id = amdgpu_device_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_MC_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x40;
+ break;
+ case IP_VERSION(12, 0, 1):
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_MC_LS;
+
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_VCN_DPG;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
+ return 0;
+}
+
+static int soc24_common_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_get_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbif v6_3_1 only support fatal error hanlding
+ * just enable the interrupt directly
+ */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ /* Enable selfring doorbell aperture late because doorbell BAR
+ * aperture will change if resize BAR successfully in gmc sw_init.
+ */
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc24_common_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
+ return 0;
+}
+
+static int soc24_common_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* enable aspm */
+ soc24_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
+
+ if (adev->df.funcs->hw_init)
+ adev->df.funcs->hw_init(adev);
+
+ /* enable the doorbell aperture */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* Disable the doorbell aperture and selfring doorbell aperture
+ * separately in hw_fini because soc21_enable_doorbell_aperture
+ * has been removed and there is no need to delay disabling
+ * selfring doorbell.
+ */
+ adev->nbio.funcs->enable_doorbell_aperture(adev, false);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
+
+ if (amdgpu_sriov_vf(adev)) {
+ xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
+ return 0;
+}
+
+static int soc24_common_suspend(struct amdgpu_ip_block *ip_block)
+{
+ return soc24_common_hw_fini(ip_block);
+}
+
+static int soc24_common_resume(struct amdgpu_ip_block *ip_block)
+{
+ return soc24_common_hw_init(ip_block);
+}
+
+static bool soc24_common_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ return true;
+}
+
+static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(6, 3, 1):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ adev->lsdma.funcs->update_memory_power_gating(adev,
+ state == AMD_PG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void soc24_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ return;
+}
+
+static const struct amd_ip_funcs soc24_common_ip_funcs = {
+ .name = "soc24_common",
+ .early_init = soc24_common_early_init,
+ .late_init = soc24_common_late_init,
+ .sw_init = soc24_common_sw_init,
+ .hw_init = soc24_common_hw_init,
+ .hw_fini = soc24_common_hw_fini,
+ .suspend = soc24_common_suspend,
+ .resume = soc24_common_resume,
+ .is_idle = soc24_common_is_idle,
+ .set_clockgating_state = soc24_common_set_clockgating_state,
+ .set_powergating_state = soc24_common_set_powergating_state,
+ .get_clockgating_state = soc24_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.h b/drivers/gpu/drm/amd/amdgpu/soc24.h
new file mode 100644
index 000000000000..fa7e442e0b62
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC24_H__
+#define __SOC24_H__
+
+extern const struct amdgpu_ip_block_version soc24_common_ip_block;
+
+void soc24_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 879bb7af297c..8a3f326474e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -30,12 +30,18 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+/* invalid node instance value */
+#define TA_RAS_INV_NODE 0xffff
+
/* RAS related enumerations */
/**********************************************************/
enum ras_command {
TA_RAS_COMMAND__ENABLE_FEATURES = 0,
TA_RAS_COMMAND__DISABLE_FEATURES,
TA_RAS_COMMAND__TRIGGER_ERROR,
+ TA_RAS_COMMAND__QUERY_BLOCK_INFO,
+ TA_RAS_COMMAND__QUERY_SUB_BLOCK_INFO,
+ TA_RAS_COMMAND__QUERY_ADDRESS,
};
enum ta_ras_status {
@@ -86,6 +92,9 @@ enum ta_ras_block {
TA_RAS_BLOCK__MCA,
TA_RAS_BLOCK__VCN,
TA_RAS_BLOCK__JPEG,
+ TA_RAS_BLOCK__IH,
+ TA_RAS_BLOCK__MPIO,
+ TA_RAS_BLOCK__MMSCH,
TA_NUM_BLOCK_MAX
};
@@ -105,6 +114,19 @@ enum ta_ras_error_type {
TA_RAS_ERROR__POISON = 8,
};
+enum ta_ras_address_type {
+ TA_RAS_MCA_TO_PA,
+ TA_RAS_PA_TO_MCA,
+};
+
+enum ta_ras_nps_mode {
+ TA_RAS_UNKNOWN_MODE = 0,
+ TA_RAS_NPS1_MODE = 1,
+ TA_RAS_NPS2_MODE = 2,
+ TA_RAS_NPS4_MODE = 4,
+ TA_RAS_NPS8_MODE = 8,
+};
+
/* Input/output structures for RAS commands */
/**********************************************************/
@@ -131,6 +153,28 @@ struct ta_ras_init_flags {
uint8_t dgpu_mode;
uint16_t xcc_mask;
uint8_t channel_dis_num;
+ uint8_t nps_mode;
+ uint32_t active_umc_mask;
+};
+
+struct ta_ras_mca_addr {
+ uint64_t err_addr;
+ uint32_t ch_inst;
+ uint32_t umc_inst;
+ uint32_t node_inst;
+ uint32_t socket_id;
+};
+
+struct ta_ras_phy_addr {
+ uint64_t pa;
+ uint32_t bank;
+ uint32_t channel_idx;
+};
+
+struct ta_ras_query_address_input {
+ enum ta_ras_address_type addr_type;
+ struct ta_ras_mca_addr ma;
+ struct ta_ras_phy_addr pa;
};
struct ta_ras_output_flags {
@@ -139,6 +183,13 @@ struct ta_ras_output_flags {
uint8_t reg_access_failure_flag;
};
+struct ta_ras_query_address_output {
+ /* don't use the flags here */
+ struct ta_ras_output_flags flags;
+ struct ta_ras_mca_addr ma;
+ struct ta_ras_phy_addr pa;
+};
+
/* Common input structure for RAS callbacks */
/**********************************************************/
union ta_ras_cmd_input {
@@ -146,12 +197,14 @@ union ta_ras_cmd_input {
struct ta_ras_enable_features_input enable_features;
struct ta_ras_disable_features_input disable_features;
struct ta_ras_trigger_error_input trigger_error;
+ struct ta_ras_query_address_input address;
uint32_t reserve_pad[256];
};
union ta_ras_cmd_output {
struct ta_ras_output_flags flags;
+ struct ta_ras_query_address_output address;
uint32_t reserve_pad[256];
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
index 00d8bdb8254f..9ec2e03d41c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
@@ -31,10 +31,12 @@
* Secure Display Command ID
*/
enum ta_securedisplay_command {
- /* Query whether TA is responding used only for validation purpose */
+ /* Query whether TA is responding. It is used only for validation purpose */
TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1,
/* Send region of Interest and CRC value to I2C */
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2,
+ /* V2 to send multiple regions of Interest and CRC value to I2C */
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3,
/* Maximum Command ID */
TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF,
};
@@ -83,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret {
enum ta_securedisplay_buffer_size {
/* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */
TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15,
+ /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */
+ TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16,
};
/** Input/output structures for Secure Display commands */
@@ -95,7 +99,15 @@ enum ta_securedisplay_buffer_size {
* Physical ID to determine which DIO scratch register should be used to get ROI
*/
struct ta_securedisplay_send_roi_crc_input {
- uint32_t phy_id; /* Physical ID */
+ /* Physical ID */
+ uint32_t phy_id;
+};
+
+struct ta_securedisplay_send_roi_crc_v2_input {
+ /* Physical ID */
+ uint32_t phy_id;
+ /* Region of interest index */
+ uint8_t roi_idx;
};
/** @union ta_securedisplay_cmd_input
@@ -104,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input {
union ta_securedisplay_cmd_input {
/* send ROI and CRC input buffer format */
struct ta_securedisplay_send_roi_crc_input send_roi_crc;
+ /* send ROI and CRC input buffer format, v2 adds a ROI index */
+ struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2;
uint32_t reserved[4];
};
@@ -128,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output {
uint8_t reserved;
};
+struct ta_securedisplay_send_roi_crc_v2_output {
+ uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */
+};
+
/** @union ta_securedisplay_cmd_output
* Output buffer
*/
@@ -136,6 +154,8 @@ union ta_securedisplay_cmd_output {
struct ta_securedisplay_query_ta_output query_ta;
/* Send ROI CRC output buffer format used only for validation purpose */
struct ta_securedisplay_send_roi_crc_output send_roi_crc;
+ /* Send ROI CRC output buffer format used only for validation purpose */
+ struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2;
uint32_t reserved[4];
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
index da815a93d46e..d5748032674e 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-
#ifndef _TA_XGMI_IF_H
#define _TA_XGMI_IF_H
@@ -28,20 +27,31 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+#define EXTEND_PEER_LINK_INFO_CMD_FLAG 1
+
enum ta_command_xgmi {
+ /* Initialize the Context and Session Topology */
TA_COMMAND_XGMI__INITIALIZE = 0x00,
+ /* Gets the current GPU's node ID */
TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
+ /* Gets the current GPU's hive ID */
TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
- TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
+ /* Gets the Peer's topology Information */
+ TA_COMMAND_XGMI__GET_TOPOLOGY_INFO = 0x03,
+ /* Sets the Peer's topology Information */
TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04,
- TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B
+ /* Gets the total links between adjacent peer dies in hive */
+ TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B,
+ /* Gets the total links and connected port numbers between adjacent peer dies in hive */
+ TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS = 0x0C
};
/* XGMI related enumerations */
/**********************************************************/;
-enum ta_xgmi_connected_nodes {
- TA_XGMI__MAX_CONNECTED_NODES = 64
-};
+enum { TA_XGMI__MAX_CONNECTED_NODES = 64 };
+enum { TA_XGMI__MAX_INTERNAL_STATE = 32 };
+enum { TA_XGMI__MAX_INTERNAL_STATE_BUFFER = 128 };
+enum { TA_XGMI__MAX_PORT_NUM = 8 };
enum ta_xgmi_status {
TA_XGMI_STATUS__SUCCESS = 0x00,
@@ -81,6 +91,18 @@ struct ta_xgmi_peer_link_info {
uint8_t num_links;
};
+struct xgmi_connected_port_num {
+ uint8_t dst_xgmi_port_num;
+ uint8_t src_xgmi_port_num;
+};
+
+/* support both the port num and num_links */
+struct ta_xgmi_extend_peer_link_info {
+ uint64_t node_id;
+ uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
+};
+
struct ta_xgmi_cmd_initialize_output {
uint32_t status;
};
@@ -103,16 +125,21 @@ struct ta_xgmi_cmd_get_topology_info_output {
struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
-struct ta_xgmi_cmd_get_peer_link_info_output {
+struct ta_xgmi_cmd_set_topology_info_input {
uint32_t num_nodes;
- struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+ struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
-struct ta_xgmi_cmd_set_topology_info_input {
+/* support XGMI TA w/ and w/o port_num both so two similar structs defined */
+struct ta_xgmi_cmd_get_peer_link_info {
uint32_t num_nodes;
- struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+ struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
};
+struct ta_xgmi_cmd_get_extend_peer_link_info {
+ uint32_t num_nodes;
+ struct ta_xgmi_extend_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
+};
/**********************************************************/
/* Common input structure for XGMI callbacks */
union ta_xgmi_cmd_input {
@@ -126,16 +153,23 @@ union ta_xgmi_cmd_output {
struct ta_xgmi_cmd_get_node_id_output get_node_id;
struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
- struct ta_xgmi_cmd_get_peer_link_info_output get_link_info;
+ struct ta_xgmi_cmd_get_peer_link_info get_link_info;
+ struct ta_xgmi_cmd_get_extend_peer_link_info get_extend_link_info;
};
-/**********************************************************/
struct ta_xgmi_shared_memory {
uint32_t cmd_id;
uint32_t resp_id;
enum ta_xgmi_status xgmi_status;
+
+ /* if the number of xgmi link record is more than 128, driver will set the
+ * flag 0 to get the first 128 of the link records and will set to 1, to get
+ * the second set
+ */
uint8_t flag_extend_link_record;
- uint8_t reserved0[3];
+ /* bit0: port_num info support flag for GET_EXTEND_PEER_LINKS commmand */
+ uint8_t caps_flag;
+ uint8_t reserved[2];
union ta_xgmi_cmd_input xgmi_in_message;
union ta_xgmi_cmd_output xgmi_out_message;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 917707bba7f3..7d17ae56f901 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32(mmIH_RB_CNTL, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -277,9 +283,9 @@ static void tonga_ih_set_rptr(struct amdgpu_device *adev,
}
}
-static int tonga_ih_early_init(void *handle)
+static int tonga_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -291,10 +297,10 @@ static int tonga_ih_early_init(void *handle)
return 0;
}
-static int tonga_ih_sw_init(void *handle)
+static int tonga_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true);
if (r)
@@ -308,9 +314,9 @@ static int tonga_ih_sw_init(void *handle)
return r;
}
-static int tonga_ih_sw_fini(void *handle)
+static int tonga_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -318,10 +324,10 @@ static int tonga_ih_sw_fini(void *handle)
return 0;
}
-static int tonga_ih_hw_init(void *handle)
+static int tonga_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = tonga_ih_irq_init(adev);
if (r)
@@ -330,32 +336,26 @@ static int tonga_ih_hw_init(void *handle)
return 0;
}
-static int tonga_ih_hw_fini(void *handle)
+static int tonga_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- tonga_ih_irq_disable(adev);
+ tonga_ih_irq_disable(ip_block->adev);
return 0;
}
-static int tonga_ih_suspend(void *handle)
+static int tonga_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_resume(void *handle)
+static int tonga_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static bool tonga_ih_is_idle(void *handle)
+static bool tonga_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -364,11 +364,11 @@ static bool tonga_ih_is_idle(void *handle)
return true;
}
-static int tonga_ih_wait_for_idle(void *handle)
+static int tonga_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -380,9 +380,9 @@ static int tonga_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool tonga_ih_check_soft_reset(void *handle)
+static bool tonga_ih_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -399,29 +399,27 @@ static bool tonga_ih_check_soft_reset(void *handle)
}
}
-static int tonga_ih_pre_soft_reset(void *handle)
+static int tonga_ih_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->irq.srbm_soft_reset)
+ if (!ip_block->adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_post_soft_reset(void *handle)
+static int tonga_ih_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static int tonga_ih_soft_reset(void *handle)
+static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->irq.srbm_soft_reset)
@@ -450,13 +448,13 @@ static int tonga_ih_soft_reset(void *handle)
return 0;
}
-static int tonga_ih_set_clockgating_state(void *handle,
+static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int tonga_ih_set_powergating_state(void *handle,
+static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -465,7 +463,6 @@ static int tonga_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs tonga_ih_ip_funcs = {
.name = "tonga_ih",
.early_init = tonga_ih_early_init,
- .late_init = NULL,
.sw_init = tonga_ih_sw_init,
.sw_fini = tonga_ih_sw_fini,
.hw_init = tonga_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
new file mode 100644
index 000000000000..8dc32787d625
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -0,0 +1,728 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v12_0.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_12_0_0_offset.h"
+#include "umc/umc_12_0_0_sh_mask.h"
+#include "mp/mp_13_0_6_sh_mask.h"
+
+#define MAX_ECC_NUM_PER_RETIREMENT 32
+#define DELAYED_TIME_FOR_GPU_RESET 1000 //ms
+
+static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev,
+ uint32_t node_inst,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ uint64_t cross_node_offset = (node_inst == 0) ? 0 : UMC_V12_0_CROSS_NODE_OFFSET;
+
+ umc_inst = index / 4;
+ ch_inst = index % 4;
+
+ return adev->umc.channel_offs * ch_inst + UMC_V12_0_INST_DIST * umc_inst +
+ UMC_V12_0_NODE_DIST * node_inst + cross_node_offset;
+}
+
+static int umc_v12_0_reset_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint64_t odecc_err_cnt_addr;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ odecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V12_0_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_reset_error_count_per_channel, NULL);
+}
+
+bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ dev_dbg(adev->dev,
+ "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n",
+ mc_umc_status,
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC)
+ );
+
+ return (amdgpu_ras_is_poison_mode_supported(adev) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison) == 1)));
+}
+
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
+ return false;
+
+ return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
+}
+
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
+{
+ if (umc_v12_0_is_deferred_error(adev, mc_umc_status))
+ return false;
+
+ return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) ||
+ /* Identify data parity error in replay mode */
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
+ !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)))));
+}
+
+static void umc_v12_0_query_error_count_per_type(struct amdgpu_device *adev,
+ uint64_t umc_reg_offset,
+ unsigned long *error_count,
+ check_error_type_func error_type_func)
+{
+ uint64_t mc_umc_status;
+ uint64_t mc_umc_status_addr;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Check MCUMC_STATUS */
+ mc_umc_status =
+ RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (error_type_func(adev, mc_umc_status))
+ *error_count += 1;
+}
+
+static int umc_v12_0_query_error_count(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ unsigned long ue_count = 0, ce_count = 0, de_count = 0;
+
+ /* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3],
+ * which can be used as die ID directly */
+ struct amdgpu_smuio_mcm_config_info mcm_info = {
+ .socket_id = adev->smuio.funcs->get_socket_id(adev),
+ .die_id = node_inst,
+ };
+
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &ce_count, umc_v12_0_is_correctable_error);
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &ue_count, umc_v12_0_is_uncorrectable_error);
+ umc_v12_0_query_error_count_per_type(adev, umc_reg_offset,
+ &de_count, umc_v12_0_is_deferred_error);
+
+ amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count);
+ amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count);
+ amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, de_count);
+
+ return 0;
+}
+
+static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_query_error_count, ras_error_status);
+
+ umc_v12_0_reset_error_count(adev);
+}
+
+static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ uint32_t vram_type = adev->gmc.vram_type;
+ struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits);
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* default setting */
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
+ flip_bits->flip_row_bit = 13;
+ flip_bits->bit_num = 4;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT;
+ } else if (nps == AMDGPU_NPS4_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT;
+ }
+
+ switch (vram_type) {
+ case AMDGPU_VRAM_TYPE_HBM:
+ /* other nps modes are taken as nps1 */
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+
+ break;
+ case AMDGPU_VRAM_TYPE_HBM3E:
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ flip_bits->flip_row_bit = 12;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT;
+
+ break;
+ default:
+ dev_warn(adev->dev,
+ "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
+ break;
+ }
+
+ adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
+}
+
+static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr)
+{
+ uint32_t col, col_lower, row, row_lower, row_high, bank;
+ uint32_t channel_index = 0, umc_inst = 0;
+ uint32_t i, bit_num, retire_unit, *flip_bits;
+ uint64_t soc_pa, column, err_addr;
+ struct ta_ras_query_address_output addr_out_tmp;
+ struct ta_ras_query_address_output *paddr_out;
+ int ret = 0;
+
+ if (!addr_out)
+ paddr_out = &addr_out_tmp;
+ else
+ paddr_out = addr_out;
+
+ err_addr = bank = 0;
+ if (addr_in) {
+ err_addr = addr_in->ma.err_addr;
+ addr_in->addr_type = TA_RAS_MCA_TO_PA;
+ ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
+ err_addr);
+
+ goto out;
+ }
+
+ bank = paddr_out->pa.bank;
+ /* no need to care about umc inst if addr_in is NULL */
+ umc_inst = addr_in->ma.umc_inst;
+ }
+
+ flip_bits = adev->umc.flip_bits.flip_bits_in_pa;
+ bit_num = adev->umc.flip_bits.bit_num;
+ retire_unit = adev->umc.retire_unit;
+
+ soc_pa = paddr_out->pa.pa;
+ channel_index = paddr_out->pa.channel_idx;
+ /* clear loop bits in soc physical address */
+ for (i = 0; i < bit_num; i++)
+ soc_pa &= ~BIT_ULL(flip_bits[i]);
+
+ paddr_out->pa.pa = soc_pa;
+ /* get column bit 0 and 1 in mca address */
+ col_lower = (err_addr >> 1) & 0x3ULL;
+ /* extra row bit will be handled later */
+ row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL;
+ row_lower &= ~BIT_ULL(adev->umc.flip_bits.flip_row_bit);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 5, 0)) {
+ row_high = (soc_pa >> adev->umc.flip_bits.r13_in_pa) & 0x3ULL;
+ /* it's 2.25GB in each channel, from MCA address to PA
+ * [R14 R13] is converted if the two bits value are 0x3,
+ * get them from PA instead of MCA address.
+ */
+ row_lower |= (row_high << 13);
+ }
+
+ if (!err_data && !dump_addr)
+ goto out;
+
+ /* loop for all possibilities of retired bits */
+ for (column = 0; column < retire_unit; column++) {
+ soc_pa = paddr_out->pa.pa;
+ for (i = 0; i < bit_num; i++)
+ soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]);
+
+ col = ((column & 0x7) << 2) | col_lower;
+ /* handle extra row bit */
+ if (bit_num == RETIRE_FLIP_BITS_NUM)
+ row = ((column >> 3) << adev->umc.flip_bits.flip_row_bit) |
+ row_lower;
+
+ if (dump_addr)
+ dev_info(adev->dev,
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
+ soc_pa, row, col, bank, channel_index);
+
+ if (err_data)
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ soc_pa, channel_index, umc_inst);
+ }
+
+out:
+ return ret;
+}
+
+static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ struct ta_ras_query_address_input addr_in;
+ uint64_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr;
+ uint64_t mc_umc_addrt0;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return 0;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+ }
+
+ /* calculate error address if ue error is detected */
+ if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) ||
+ umc_v12_0_is_deferred_error(adev, mc_umc_status)) {
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ err_addr = RREG64_PCIE_EXT((mc_umc_addrt0 + umc_reg_offset) * 4);
+
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev);
+ else
+ addr_in.ma.socket_id = 0;
+
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch_inst;
+ addr_in.ma.umc_inst = umc_inst;
+ addr_in.ma.node_inst = node_inst;
+
+ umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true);
+ }
+
+ /* clear umc status */
+ WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+ return 0;
+}
+
+static void umc_v12_0_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_query_error_address, ras_error_status);
+}
+
+static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t odecc_cnt_sel;
+ uint64_t odecc_cnt_sel_addr, odecc_err_cnt_addr;
+ uint64_t umc_reg_offset =
+ get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+ odecc_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccCntSel);
+ odecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt);
+
+ odecc_cnt_sel = RREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ odecc_cnt_sel = REG_SET_FIELD(odecc_cnt_sel, UMCCH0_OdEccCntSel,
+ OdEccErrInt, 0x1);
+ WREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4, odecc_cnt_sel);
+
+ /* set error count to initial value */
+ WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V12_0_CE_CNT_INIT);
+
+ return 0;
+}
+
+static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
+ enum amdgpu_mca_error_type type, void *ras_error_status)
+{
+ uint64_t mc_umc_status = *(uint64_t *)ras_error_status;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ return umc_v12_0_is_correctable_error(adev, mc_umc_status);
+ case AMDGPU_MCA_ERROR_TYPE_DE:
+ return umc_v12_0_is_deferred_error(adev, mc_umc_status);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v12_0_err_cnt_init_per_channel, NULL);
+}
+
+static bool umc_v12_0_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+ /*
+ * Force return true, because regUMCCH0_EccCtrl
+ * is not accessible from host side
+ */
+ return true;
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = {
+ .query_ras_error_count = umc_v12_0_query_ras_error_count,
+ .query_ras_error_address = umc_v12_0_query_ras_error_address,
+};
+
+static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct amdgpu_device *adev = handle->adev;
+ struct aca_bank_info info;
+ enum aca_error_type err_type;
+ u64 status, count;
+ u32 ext_error_code;
+ int ret;
+
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ if (umc_v12_0_is_deferred_error(adev, status))
+ err_type = ACA_ERROR_TYPE_DEFERRED;
+ else if (umc_v12_0_is_uncorrectable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_UE;
+ else if (umc_v12_0_is_correctable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_CE;
+ else
+ return 0;
+ bank->aca_err_type = err_type;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ amdgpu_umc_update_ecc_status(adev,
+ bank->regs[ACA_REG_IDX_STATUS],
+ bank->regs[ACA_REG_IDX_IPID],
+ bank->regs[ACA_REG_IDX_ADDR]);
+
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+ if (umc_v12_0_is_deferred_error(adev, status))
+ count = ext_error_code == 0 ?
+ adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL;
+ else
+ count = ext_error_code == 0 ?
+ ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
+
+ return aca_error_cache_log_bank_error(handle, &info, err_type, count);
+}
+
+static const struct aca_bank_ops umc_v12_0_aca_bank_ops = {
+ .aca_bank_parser = umc_v12_0_aca_bank_parser,
+};
+
+const struct aca_info umc_v12_0_aca_info = {
+ .hwip = ACA_HWIP_TYPE_UMC,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK,
+ .bank_ops = &umc_v12_0_aca_bank_ops,
+};
+
+static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int ret;
+
+ ret = amdgpu_umc_ras_late_init(adev, ras_block);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__UMC,
+ &umc_v12_0_aca_info, NULL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint16_t hwid, mcatype;
+ uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+ uint64_t err_addr, pa_addr = 0;
+ struct ras_ecc_err *ecc_err;
+ struct ta_ras_query_address_output addr_out;
+ uint32_t shift_bit = adev->umc.flip_bits.flip_bits_in_pa[2];
+ int count, ret, i;
+
+ hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
+ mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
+
+ /* The IP block decode of consumption is SMU */
+ if (hwid != MCA_UMC_HWID_V12_0 || mcatype != MCA_UMC_MCATYPE_V12_0) {
+ con->umc_ecc_log.consumption_q_count++;
+ return 0;
+ }
+
+ if (!status)
+ return 0;
+
+ if (!umc_v12_0_is_deferred_error(adev, status))
+ return 0;
+
+ err_addr = REG_GET_FIELD(addr,
+ MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ dev_dbg(adev->dev,
+ "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n",
+ ipid,
+ MCA_IPID_2_SOCKET_ID(ipid),
+ MCA_IPID_2_DIE_ID(ipid),
+ MCA_IPID_2_UMC_INST(ipid),
+ MCA_IPID_2_UMC_CH(ipid),
+ err_addr);
+
+ ret = amdgpu_umc_mca_to_addr(adev,
+ err_addr, MCA_IPID_2_UMC_CH(ipid),
+ MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid),
+ MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true);
+ if (ret)
+ return ret;
+
+ ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL);
+ if (!ecc_err)
+ return -ENOMEM;
+
+ pa_addr = addr_out.pa.pa;
+ ecc_err->status = status;
+ ecc_err->ipid = ipid;
+ ecc_err->addr = addr;
+ ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->channel_idx = addr_out.pa.channel_idx;
+
+ /* If converted pa_pfn is 0, use pa C4 pfn. */
+ if (!ecc_err->pa_pfn)
+ ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT;
+
+ ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err);
+ if (ret) {
+ if (ret == -EEXIST)
+ con->umc_ecc_log.de_queried_count++;
+ else
+ dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret);
+
+ kfree(ecc_err);
+ return ret;
+ }
+
+ con->umc_ecc_log.de_queried_count++;
+
+ memset(page_pfn, 0, sizeof(page_pfn));
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ pa_addr,
+ page_pfn, ARRAY_SIZE(page_pfn));
+ if (count <= 0) {
+ dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count);
+ return 0;
+ }
+
+ /* Reserve memory */
+ for (i = 0; i < count; i++)
+ amdgpu_ras_reserve_page(adev, page_pfn[i]);
+
+ /* The problem case is as follows:
+ * 1. GPU A triggers a gpu ras reset, and GPU A drives
+ * GPU B to also perform a gpu ras reset.
+ * 2. After gpu B ras reset started, gpu B queried a DE
+ * data. Since the DE data was queried in the ras reset
+ * thread instead of the page retirement thread, bad
+ * page retirement work would not be triggered. Then
+ * even if all gpu resets are completed, the bad pages
+ * will be cached in RAM until GPU B's bad page retirement
+ * work is triggered again and then saved to eeprom.
+ * Trigger delayed work to save the bad pages to eeprom in time
+ * after gpu ras reset is completed.
+ */
+ if (amdgpu_ras_in_recovery(adev))
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(DELAYED_TIME_FOR_GPU_RESET));
+
+ return 0;
+}
+
+static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
+ struct ras_ecc_err *ecc_err, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+ int ret, i, count;
+
+ if (!err_data || !ecc_err)
+ return -EINVAL;
+
+ memset(page_pfn, 0, sizeof(page_pfn));
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+ ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT,
+ page_pfn, ARRAY_SIZE(page_pfn));
+
+ for (i = 0; i < count; i++) {
+ ret = amdgpu_umc_fill_error_record(err_data,
+ ecc_err->addr,
+ page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT,
+ ecc_err->channel_idx,
+ MCA_IPID_2_UMC_INST(ecc_err->ipid));
+ if (ret)
+ break;
+ }
+
+ err_data->de_count++;
+
+ return ret;
+}
+
+static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT];
+ struct radix_tree_root *ecc_tree;
+ int new_detected, ret, i;
+
+ ecc_tree = &con->umc_ecc_log.de_page_tree;
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries,
+ 0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG);
+ for (i = 0; i < new_detected; i++) {
+ if (!entries[i])
+ continue;
+
+ ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status);
+ if (ret) {
+ dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret);
+ break;
+ }
+ radix_tree_tag_clear(ecc_tree,
+ entries[i]->pa_pfn, UMC_ECC_NEW_DETECTED_TAG);
+ }
+ mutex_unlock(&con->umc_ecc_log.lock);
+}
+
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page)
+{
+ uint32_t die = 0;
+
+ /* we only calculate die id for nps1 mode right now */
+ die += ((((retired_page >> 12) & 0x1ULL)^
+ ((retired_page >> 20) & 0x1ULL) ^
+ ((retired_page >> 27) & 0x1ULL) ^
+ ((retired_page >> 34) & 0x1ULL) ^
+ ((retired_page >> 41) & 0x1ULL)) << 0);
+
+ /* the original PA_C4 and PA_R13 may be cleared in retired_page, so
+ * get them from mca_addr.
+ */
+ die += ((((retired_page >> 13) & 0x1ULL) ^
+ ((mca_addr >> 5) & 0x1ULL) ^
+ ((retired_page >> 28) & 0x1ULL) ^
+ ((mca_addr >> 23) & 0x1ULL) ^
+ ((retired_page >> 42) & 0x1ULL)) << 1);
+ die &= 3;
+
+ return die;
+}
+
+struct amdgpu_umc_ras umc_v12_0_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v12_0_ras_hw_ops,
+ .ras_late_init = umc_v12_0_ras_late_init,
+ },
+ .err_cnt_init = umc_v12_0_err_cnt_init,
+ .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
+ .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
+ .check_ecc_err_status = umc_v12_0_check_ecc_err_status,
+ .update_ecc_status = umc_v12_0_update_ecc_status,
+ .convert_ras_err_addr = umc_v12_0_convert_error_address,
+ .get_die_id_from_pa = umc_v12_0_get_die_id,
+ .get_retire_flip_bits = umc_v12_0_get_retire_flip_bits,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
new file mode 100644
index 000000000000..63b7e7254526
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V12_0_H__
+#define __UMC_V12_0_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+#define UMC_V12_0_NODE_DIST 0x40000000
+#define UMC_V12_0_INST_DIST 0x40000
+
+/* UMC register per channel offset */
+#define UMC_V12_0_PER_CHANNEL_OFFSET 0x400
+
+/* UMC cross node offset */
+#define UMC_V12_0_CROSS_NODE_OFFSET 0x100000000
+
+/* OdEccErrCnt max value */
+#define UMC_V12_0_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V12_0_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V12_0_CE_CNT_INIT (UMC_V12_0_CE_CNT_MAX - UMC_V12_0_CE_INT_THRESHOLD)
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V12_0_CHANNEL_INSTANCE_NUM 8
+/* number of umc instance with memory map register access */
+#define UMC_V12_0_UMC_INSTANCE_NUM 4
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V12_0_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V12_0_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* one piece of normalized address is mapped to 8 pieces of physical address */
+#define UMC_V12_0_NA_MAP_PA_NUM 8
+/* R13 bit shift should be considered, double the number */
+#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2)
+
+/* column bits in SOC physical address */
+#define UMC_V12_0_PA_C2_BIT 15
+#define UMC_V12_0_PA_C3_BIT 16
+#define UMC_V12_0_PA_C4_BIT 21
+/* row bits in SOC physical address */
+#define UMC_V12_0_PA_R0_BIT 22
+#define UMC_V12_0_PA_R10_BIT 32
+#define UMC_V12_0_PA_R11_BIT 33
+#define UMC_V12_0_PA_R12_BIT 34
+#define UMC_V12_0_PA_R13_BIT 35
+/* channel bit in SOC physical address */
+#define UMC_V12_0_PA_CH4_BIT 12
+#define UMC_V12_0_PA_CH5_BIT 13
+/* bank bit in SOC physical address */
+#define UMC_V12_0_PA_B0_BIT 19
+#define UMC_V12_0_PA_B1_BIT 20
+/* row bits in MCA address */
+#define UMC_V12_0_MA_R0_BIT 10
+
+#define MCA_UMC_HWID_V12_0 0x96
+#define MCA_UMC_MCATYPE_V12_0 0x0
+
+#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
+ (((_ipid_lo) >> 12) & 0xF))
+#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
+
+#define MCA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03)
+
+#define MCA_IPID_2_UMC_CH(ipid) \
+ (MCA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_UMC_INST(ipid) \
+ (MCA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_SOCKET_ID(ipid) \
+ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \
+ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03))
+
+bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
+
+typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status);
+
+extern struct amdgpu_umc_ras umc_v12_0_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
index 0d6b50528d76..97fa88ed770c 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -25,7 +25,7 @@
static void umc_v6_0_init_registers(struct amdgpu_device *adev)
{
- unsigned i,j;
+ unsigned i, j;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 530549314ce4..a3ee3c4c650f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
uint64_t reg_value;
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
- dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+ dev_info(adev->dev, "Deferred error\n");
if (mc_umc_status)
dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index 46bfdee79bfd..a32f87992f20 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -336,7 +336,7 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic
uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst,
unsigned long *error_count)
{
- uint64_t mc_umc_status;
+ uint16_t ecc_ce_cnt;
uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -345,12 +345,10 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic
umc_inst * adev->umc.channel_inst_num +
ch_inst;
- /* check the MCUMC_STATUS */
- mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
- *error_count += 1;
- }
+ /* Retrieve CE count */
+ ecc_ce_cnt = ras->umc_ecc.ecc[eccinfo_table_idx].ce_count_lo_chip;
+ if (ecc_ce_cnt)
+ *error_count += ecc_ce_cnt;
}
static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev,
@@ -444,11 +442,6 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade
umc_v8_10_ecc_info_query_error_address, ras_error_status);
}
-static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr)
-{
- hdr->version = RAS_TABLE_VER_V2_1;
-}
-
const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
.query_ras_error_count = umc_v8_10_query_ras_error_count,
.query_ras_error_address = umc_v8_10_query_ras_error_address,
@@ -462,5 +455,4 @@ struct amdgpu_umc_ras umc_v8_10_ras = {
.query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address,
- .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
new file mode 100644
index 000000000000..eaca10a3c4a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_14.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_14_0_offset.h"
+#include "umc/umc_8_14_0_sh_mask.h"
+
+static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst;
+}
+
+static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ umc_v8_14_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_14_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_query_error_count_per_channel, ras_error_status);
+
+ umc_v8_14_clear_error_count(adev);
+}
+
+static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_err_cnt_init_per_channel, NULL);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_14_query_ras_error_count,
+};
+
+struct amdgpu_umc_ras umc_v8_14_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_14_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_14_err_cnt_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
new file mode 100644
index 000000000000..20a258f0017a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_14_H__
+#define __UMC_V8_14_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num)
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* UMC register per channel offset */
+#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400
+
+#define UMC_V8_14_INST_DIST 0x40000
+
+/* EccErrCnt max value */
+#define UMC_V8_14_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V8_14_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD)
+
+extern struct amdgpu_umc_ras umc_v8_14_ras;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
new file mode 100644
index 000000000000..ce3bb12e3572
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "soc21.h"
+#include "vcn/vcn_4_0_0_offset.h"
+#include "vcn/vcn_4_0_0_sh_mask.h"
+
+#include "amdgpu_umsch_mm.h"
+#include "umsch_mm_4_0_api_def.h"
+#include "umsch_mm_v4_0.h"
+
+#define regUVD_IPX_DLDO_CONFIG 0x0064
+#define regUVD_IPX_DLDO_CONFIG_BASE_IDX 1
+#define regUVD_IPX_DLDO_STATUS 0x0065
+#define regUVD_IPX_DLDO_STATUS_BASE_IDX 1
+
+#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT 0x00000002
+#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG_MASK 0x0000000cUL
+#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT 0x00000001
+#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK 0x00000002UL
+
+static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ uint64_t data;
+ int r;
+
+ r = amdgpu_umsch_mm_allocate_ucode_buffer(umsch);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_allocate_ucode_data_buffer(umsch);
+ if (r)
+ goto err_free_ucode_bo;
+
+ umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
+ WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
+ 0 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK);
+ }
+
+ data = RREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL);
+ data = REG_SET_FIELD(data, UMSCH_MES_RESET_CTRL, MES_CORE_SOFT_RESET, 0);
+ WREG32_SOC15_UMSCH(regUMSCH_MES_RESET_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, VMID, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, EXE_DISABLE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, CACHE_POLICY, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_CNTL, data);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START,
+ lower_32_bits(adev->umsch_mm.irq_start_addr >> 2));
+ WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START_HI,
+ upper_32_bits(adev->umsch_mm.irq_start_addr >> 2));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START,
+ lower_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+ WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START_HI,
+ upper_32_bits(adev->umsch_mm.uc_start_addr >> 2));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_HI, 0);
+
+ data = adev->umsch_mm.uc_start_addr + adev->umsch_mm.ucode_size - 1;
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data));
+
+ data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ?
+ 0 : adev->umsch_mm.ucode_fw_gpu_addr;
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_HI, upper_32_bits(data));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_MIBOUND_LO, 0x1FFFFF);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_LO,
+ lower_32_bits(adev->umsch_mm.data_start_addr));
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_HI,
+ upper_32_bits(adev->umsch_mm.data_start_addr));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_LO,
+ adev->umsch_mm.data_size - 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_HI, 0);
+
+ data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ?
+ 0 : adev->umsch_mm.data_fw_gpu_addr;
+ WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_LO, lower_32_bits(data));
+ WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_HI, upper_32_bits(data));
+
+ WREG32_SOC15_UMSCH(regVCN_MES_MDBOUND_LO, 0x3FFFF);
+
+ data = RREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE);
+ data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, IC_FORCE_GPUVM, 1);
+ data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, DC_FORCE_GPUVM, 1);
+ WREG32_SOC15_UMSCH(regUVD_UMSCH_FORCE, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data);
+
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, 0);
+
+#if defined(CONFIG_DEBUG_FS)
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, lower_32_bits(umsch->log_gpu_addr));
+ WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, upper_32_bits(umsch->log_gpu_addr));
+#endif
+
+ WREG32_SOC15_UMSCH(regVCN_MES_GP1_LO, 0);
+ WREG32_SOC15_UMSCH(regVCN_MES_GP1_HI, 0);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 0);
+ data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+ amdgpu_umsch_mm_psp_execute_cmd_buf(umsch);
+
+ r = SOC15_WAIT_ON_RREG(VCN, 0, regVCN_MES_MSTATUS_LO, 0xAAAAAAAA, 0xFFFFFFFF);
+ if (r) {
+ dev_err(adev->dev, "UMSCH FW Load: Failed, regVCN_MES_MSTATUS_LO: 0x%08x\n",
+ RREG32_SOC15(VCN, 0, regVCN_MES_MSTATUS_LO));
+ goto err_free_data_bo;
+ }
+
+ return 0;
+
+err_free_data_bo:
+ amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj,
+ &adev->umsch_mm.data_fw_gpu_addr,
+ (void **)&adev->umsch_mm.data_fw_ptr);
+err_free_ucode_bo:
+ amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj,
+ &adev->umsch_mm.ucode_fw_gpu_addr,
+ (void **)&adev->umsch_mm.ucode_fw_ptr);
+ return r;
+}
+
+static void umsch_mm_v4_0_aggregated_doorbell_init(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_REALTIME]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_FOCUS]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, OFFSET,
+ umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_IDLE]);
+ data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3, data);
+}
+
+static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, OFFSET, ring->doorbell_index);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 1);
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
+
+ ring->wptr = 0;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
+ data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
+ WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
+
+ umsch_mm_v4_0_aggregated_doorbell_init(umsch);
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_ring *ring = &umsch->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
+ data = REG_SET_FIELD(data, VCN_RB_ENABLE, UMSCH_RB_EN, 0);
+ WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
+
+ data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL);
+ data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0);
+ WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
+ WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch)
+{
+ union UMSCHAPI__SET_HW_RESOURCES set_hw_resources = {};
+ struct amdgpu_device *adev = umsch->ring.adev;
+ int r;
+
+ set_hw_resources.header.type = UMSCH_API_TYPE_SCHEDULER;
+ set_hw_resources.header.opcode = UMSCH_API_SET_HW_RSRC;
+ set_hw_resources.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn;
+ set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe;
+ set_hw_resources.collaboration_mask_vpe =
+ adev->vpe.collaborate_mode ? 0x3 : 0x0;
+ set_hw_resources.engine_mask = umsch->engine_mask;
+
+ set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask;
+ set_hw_resources.vcn1_hqd_mask[0] = umsch->vcn1_hqd_mask;
+ set_hw_resources.vcn_hqd_mask[0] = umsch->vcn_hqd_mask[0];
+ set_hw_resources.vcn_hqd_mask[1] = umsch->vcn_hqd_mask[1];
+ set_hw_resources.vpe_hqd_mask[0] = umsch->vpe_hqd_mask;
+
+ set_hw_resources.g_sch_ctx_gpu_mc_ptr = umsch->sch_ctx_gpu_addr;
+
+ set_hw_resources.enable_level_process_quantum_check = 1;
+
+ memcpy(set_hw_resources.mmhub_base, adev->reg_offset[MMHUB_HWIP][0],
+ sizeof(uint32_t) * 5);
+ set_hw_resources.mmhub_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, MMHUB_HWIP, 0));
+
+ memcpy(set_hw_resources.osssys_base, adev->reg_offset[OSSSYS_HWIP][0],
+ sizeof(uint32_t) * 5);
+ set_hw_resources.osssys_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, OSSSYS_HWIP, 0));
+
+ set_hw_resources.vcn_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCN_HWIP, 0));
+ set_hw_resources.vpe_version =
+ IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0));
+
+ set_hw_resources.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ set_hw_resources.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &set_hw_resources.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH SET_HW_RESOURCES: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_add_queue_input *input_ptr)
+{
+ struct amdgpu_device *adev = umsch->ring.adev;
+ union UMSCHAPI__ADD_QUEUE add_queue = {};
+ int r;
+
+ add_queue.header.type = UMSCH_API_TYPE_SCHEDULER;
+ add_queue.header.opcode = UMSCH_API_ADD_QUEUE;
+ add_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ add_queue.process_id = input_ptr->process_id;
+ add_queue.page_table_base_addr = input_ptr->page_table_base_addr;
+ add_queue.process_va_start = input_ptr->process_va_start;
+ add_queue.process_va_end = input_ptr->process_va_end;
+ add_queue.process_quantum = input_ptr->process_quantum;
+ add_queue.process_csa_addr = input_ptr->process_csa_addr;
+ add_queue.context_quantum = input_ptr->context_quantum;
+ add_queue.context_csa_addr = input_ptr->context_csa_addr;
+ add_queue.inprocess_context_priority = input_ptr->inprocess_context_priority;
+ add_queue.context_global_priority_level =
+ (enum UMSCH_AMD_PRIORITY_LEVEL)input_ptr->context_global_priority_level;
+ add_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0;
+ add_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1;
+ add_queue.affinity.u32All = input_ptr->affinity;
+ add_queue.mqd_addr = input_ptr->mqd_addr;
+ add_queue.engine_type = (enum UMSCH_ENGINE_TYPE)input_ptr->engine_type;
+ add_queue.h_context = input_ptr->h_context;
+ add_queue.h_queue = input_ptr->h_queue;
+ add_queue.vm_context_cntl = input_ptr->vm_context_cntl;
+ add_queue.is_context_suspended = input_ptr->is_context_suspended;
+ add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0;
+
+ add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &add_queue.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH ADD_QUEUE: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_remove_queue(struct amdgpu_umsch_mm *umsch,
+ struct umsch_mm_remove_queue_input *input_ptr)
+{
+ union UMSCHAPI__REMOVE_QUEUE remove_queue = {};
+ struct amdgpu_device *adev = umsch->ring.adev;
+ int r;
+
+ remove_queue.header.type = UMSCH_API_TYPE_SCHEDULER;
+ remove_queue.header.opcode = UMSCH_API_REMOVE_QUEUE;
+ remove_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ remove_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0;
+ remove_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1;
+ remove_queue.context_csa_addr = input_ptr->context_csa_addr;
+
+ remove_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
+ remove_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
+
+ r = amdgpu_umsch_mm_submit_pkt(umsch, &remove_queue.max_dwords_in_api,
+ API_FRAME_SIZE_IN_DWORDS);
+ if (r)
+ return r;
+
+ r = amdgpu_umsch_mm_query_fence(umsch);
+ if (r) {
+ dev_err(adev->dev, "UMSCH REMOVE_QUEUE: Failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static int umsch_mm_v4_0_set_regs(struct amdgpu_umsch_mm *umsch)
+{
+ struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm);
+
+ umsch->rb_wptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_WPTR);
+ umsch->rb_rptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_RPTR);
+
+ return 0;
+}
+
+static const struct umsch_mm_funcs umsch_mm_v4_0_funcs = {
+ .set_hw_resources = umsch_mm_v4_0_set_hw_resources,
+ .add_queue = umsch_mm_v4_0_add_queue,
+ .remove_queue = umsch_mm_v4_0_remove_queue,
+ .set_regs = umsch_mm_v4_0_set_regs,
+ .init_microcode = amdgpu_umsch_mm_init_microcode,
+ .load_microcode = umsch_mm_v4_0_load_microcode,
+ .ring_init = amdgpu_umsch_mm_ring_init,
+ .ring_start = umsch_mm_v4_0_ring_start,
+ .ring_stop = umsch_mm_v4_0_ring_stop,
+};
+
+void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch)
+{
+ umsch->funcs = &umsch_mm_v4_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h
new file mode 100644
index 000000000000..06bc0fa74996
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __UMSCH_MM_V4_0_H__
+#define __UMSCH_MM_V4_0_H__
+
+void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index 5534c769b655..2e79a3afc774 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -98,7 +98,7 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
- * uvd_v3_1_ring_emit_fence - emit an fence & trap command
+ * uvd_v3_1_ring_emit_fence - emit a fence & trap command
*
* @ring: amdgpu_ring pointer
* @addr: address
@@ -242,7 +242,7 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
uint64_t addr;
uint32_t size;
- /* programm the VCPU memory controller bits 0-27 */
+ /* program the VCPU memory controller bits 0-27 */
addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
@@ -416,7 +416,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev)
/* Set the write pointer delay */
WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
- /* programm the 4GB memory segment for rptr and ring buffer */
+ /* Program the 4GB memory segment for rptr and ring buffer */
WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
(0x7 << 16) | (0x1 << 31));
@@ -531,9 +531,9 @@ static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev)
}
-static int uvd_v3_1_early_init(void *handle)
+static int uvd_v3_1_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v3_1_set_ring_funcs(adev);
@@ -542,10 +542,10 @@ static int uvd_v3_1_early_init(void *handle)
return 0;
}
-static int uvd_v3_1_sw_init(void *handle)
+static int uvd_v3_1_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
void *ptr;
uint32_t ucode_len;
@@ -577,15 +577,13 @@ static int uvd_v3_1_sw_init(void *handle)
ptr += ucode_len;
memcpy(&adev->uvd.keyselect, ptr, 4);
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v3_1_sw_fini(void *handle)
+static int uvd_v3_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -623,18 +621,42 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v3_1_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing.
*
- * Initialize the hardware, boot up the VCPU and do some testing
+ * On SI, the UVD is meant to be used in a specific power state,
+ * or alternatively the driver can manually enable its clock.
+ * In amdgpu we use the dedicated UVD power state when DPM is enabled.
+ * Calling amdgpu_dpm_enable_uvd makes DPM select the UVD power state
+ * for the SMU and afterwards enables the UVD clock.
+ * This is automatically done by amdgpu_uvd_ring_begin_use when work
+ * is submitted to the UVD ring. Here, we have to call it manually
+ * in order to power up UVD before firmware validation.
+ *
+ * Note that we must not disable the UVD clock here, as that would
+ * cause the ring test to fail. However, UVD is powered off
+ * automatically after the ring test: amdgpu_uvd_ring_end_use calls
+ * the UVD idle work handler which will disable the UVD clock when
+ * all fences are signalled.
*/
-static int uvd_v3_1_hw_init(void *handle)
+static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
uvd_v3_1_mc_resume(adev);
+ uvd_v3_1_enable_mgcg(adev, true);
+
+ /* Make sure UVD is powered during FW validation.
+ * It's going to be automatically powered off after the ring test.
+ */
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
r = uvd_v3_1_fw_validate(adev);
if (r) {
@@ -642,9 +664,6 @@ static int uvd_v3_1_hw_init(void *handle)
return r;
}
- uvd_v3_1_enable_mgcg(adev, true);
- amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-
uvd_v3_1_start(adev);
r = amdgpu_ring_test_helper(ring);
@@ -690,13 +709,13 @@ done:
/**
* uvd_v3_1_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v3_1_hw_fini(void *handle)
+static int uvd_v3_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -706,10 +725,17 @@ static int uvd_v3_1_hw_fini(void *handle)
return 0;
}
-static int uvd_v3_1_suspend(void *handle)
+static int uvd_v3_1_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v3_1_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -735,36 +761,35 @@ static int uvd_v3_1_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v3_1_hw_fini(adev);
+ r = uvd_v3_1_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v3_1_resume(void *handle)
+static int uvd_v3_1_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v3_1_hw_init(adev);
+ return uvd_v3_1_hw_init(ip_block);
}
-static bool uvd_v3_1_is_idle(void *handle)
+static bool uvd_v3_1_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v3_1_wait_for_idle(void *handle)
+static int uvd_v3_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -773,9 +798,9 @@ static int uvd_v3_1_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v3_1_soft_reset(void *handle)
+static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v3_1_stop(adev);
@@ -786,13 +811,13 @@ static int uvd_v3_1_soft_reset(void *handle)
return uvd_v3_1_start(adev);
}
-static int uvd_v3_1_set_clockgating_state(void *handle,
+static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v3_1_set_powergating_state(void *handle,
+static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -801,11 +826,11 @@ static int uvd_v3_1_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.name = "uvd_v3_1",
.early_init = uvd_v3_1_early_init,
- .late_init = NULL,
.sw_init = uvd_v3_1_sw_init,
.sw_fini = uvd_v3_1_sw_fini,
.hw_init = uvd_v3_1_hw_init,
.hw_fini = uvd_v3_1_hw_fini,
+ .prepare_suspend = uvd_v3_1_prepare_suspend,
.suspend = uvd_v3_1_suspend,
.resume = uvd_v3_1_resume,
.is_idle = uvd_v3_1_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c108b8381795..4b96fd583772 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v4_2_start(struct amdgpu_device *adev);
static void uvd_v4_2_stop(struct amdgpu_device *adev);
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
bool sw_mode);
@@ -90,9 +90,9 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v4_2_early_init(void *handle)
+static int uvd_v4_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v4_2_set_ring_funcs(adev);
@@ -101,10 +101,10 @@ static int uvd_v4_2_early_init(void *handle)
return 0;
}
-static int uvd_v4_2_sw_init(void *handle)
+static int uvd_v4_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -127,15 +127,13 @@ static int uvd_v4_2_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v4_2_sw_fini(void *handle)
+static int uvd_v4_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -149,13 +147,13 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v4_2_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v4_2_hw_init(void *handle)
+static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -204,13 +202,13 @@ done:
/**
* uvd_v4_2_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v4_2_hw_fini(void *handle)
+static int uvd_v4_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -220,10 +218,17 @@ static int uvd_v4_2_hw_fini(void *handle)
return 0;
}
-static int uvd_v4_2_suspend(void *handle)
+static int uvd_v4_2_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v4_2_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -249,23 +254,22 @@ static int uvd_v4_2_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v4_2_hw_fini(adev);
+ r = uvd_v4_2_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v4_2_resume(void *handle)
+static int uvd_v4_2_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v4_2_hw_init(adev);
+ return uvd_v4_2_hw_init(ip_block);
}
/**
@@ -298,7 +302,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
/* enable VCPU clock */
WREG32(mmUVD_VCPU_CNTL, 1 << 9);
- /* disable interupt */
+ /* disable interrupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
#ifdef __BIG_ENDIAN
@@ -308,6 +312,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
#endif
WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
/* initialize UVD memory controller */
WREG32(mmUVD_LMI_CTRL, 0x203108);
@@ -654,17 +659,17 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
}
-static bool uvd_v4_2_is_idle(void *handle)
+static bool uvd_v4_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v4_2_wait_for_idle(void *handle)
+static int uvd_v4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -673,9 +678,9 @@ static int uvd_v4_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v4_2_soft_reset(void *handle)
+static int uvd_v4_2_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v4_2_stop(adev);
@@ -704,13 +709,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v4_2_set_powergating_state(void *handle,
+static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -720,7 +725,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
@@ -751,11 +756,11 @@ static int uvd_v4_2_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
.name = "uvd_v4_2",
.early_init = uvd_v4_2_early_init,
- .late_init = NULL,
.sw_init = uvd_v4_2_sw_init,
.sw_fini = uvd_v4_2_sw_fini,
.hw_init = uvd_v4_2_hw_init,
.hw_fini = uvd_v4_2_hw_fini,
+ .prepare_suspend = uvd_v4_2_prepare_suspend,
.suspend = uvd_v4_2_suspend,
.resume = uvd_v4_2_resume,
.is_idle = uvd_v4_2_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index d7e31e48a2b8..71409ad8b7ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v5_0_start(struct amdgpu_device *adev);
static void uvd_v5_0_stop(struct amdgpu_device *adev);
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -88,9 +88,9 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v5_0_early_init(void *handle)
+static int uvd_v5_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v5_0_set_ring_funcs(adev);
@@ -99,10 +99,10 @@ static int uvd_v5_0_early_init(void *handle)
return 0;
}
-static int uvd_v5_0_sw_init(void *handle)
+static int uvd_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -125,15 +125,13 @@ static int uvd_v5_0_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v5_0_sw_fini(void *handle)
+static int uvd_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -145,19 +143,19 @@ static int uvd_v5_0_sw_fini(void *handle)
/**
* uvd_v5_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v5_0_hw_init(void *handle)
+static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v5_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -202,13 +200,13 @@ done:
/**
* uvd_v5_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v5_0_hw_fini(void *handle)
+static int uvd_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -218,10 +216,17 @@ static int uvd_v5_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v5_0_suspend(void *handle)
+static int uvd_v5_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -247,23 +252,22 @@ static int uvd_v5_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v5_0_hw_fini(adev);
+ r = uvd_v5_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v5_0_resume(void *handle)
+static int uvd_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v5_0_hw_init(adev);
+ return uvd_v5_0_hw_init(ip_block);
}
/**
@@ -576,17 +580,17 @@ static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool uvd_v5_0_is_idle(void *handle)
+static bool uvd_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v5_0_wait_for_idle(void *handle)
+static int uvd_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -595,9 +599,9 @@ static int uvd_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v5_0_soft_reset(void *handle)
+static int uvd_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v5_0_stop(adev);
@@ -786,15 +790,15 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v5_0_wait_for_idle(handle))
+ if (uvd_v5_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v5_0_enable_clock_gating(adev, true);
@@ -808,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v5_0_set_powergating_state(void *handle,
+static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -818,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -833,9 +837,9 @@ out:
return ret;
}
-static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags)
+static void uvd_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -858,11 +862,11 @@ out:
static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
.name = "uvd_v5_0",
.early_init = uvd_v5_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v5_0_sw_init,
.sw_fini = uvd_v5_0_sw_fini,
.hw_init = uvd_v5_0_hw_init,
.hw_fini = uvd_v5_0_hw_fini,
+ .prepare_suspend = uvd_v5_0_prepare_suspend,
.suspend = uvd_v5_0_suspend,
.resume = uvd_v5_0_resume,
.is_idle = uvd_v5_0_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 5fe872f4bea7..ceb94bbb03a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v6_0_start(struct amdgpu_device *adev);
static void uvd_v6_0_stop(struct amdgpu_device *adev);
static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev);
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -217,7 +217,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -281,7 +282,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -354,9 +356,9 @@ error:
return r;
}
-static int uvd_v6_0_early_init(void *handle)
+static int uvd_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
if (!(adev->flags & AMD_IS_APU) &&
@@ -375,11 +377,11 @@ static int uvd_v6_0_early_init(void *handle)
return 0;
}
-static int uvd_v6_0_sw_init(void *handle)
+static int uvd_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* UVD TRAP */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
@@ -432,15 +434,13 @@ static int uvd_v6_0_sw_init(void *handle)
}
}
- r = amdgpu_uvd_entity_init(adev);
-
return r;
}
-static int uvd_v6_0_sw_fini(void *handle)
+static int uvd_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -457,19 +457,19 @@ static int uvd_v6_0_sw_fini(void *handle)
/**
* uvd_v6_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v6_0_hw_init(void *handle)
+static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int i, r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v6_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -526,13 +526,13 @@ done:
/**
* uvd_v6_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v6_0_hw_fini(void *handle)
+static int uvd_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -542,10 +542,17 @@ static int uvd_v6_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v6_0_suspend(void *handle)
+static int uvd_v6_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -571,23 +578,22 @@ static int uvd_v6_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v6_0_hw_fini(adev);
+ r = uvd_v6_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v6_0_resume(void *handle)
+static int uvd_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v6_0_hw_init(adev);
+ return uvd_v6_0_hw_init(ip_block);
}
/**
@@ -1139,29 +1145,29 @@ static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, vmid);
}
-static bool uvd_v6_0_is_idle(void *handle)
+static bool uvd_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v6_0_wait_for_idle(void *handle)
+static int uvd_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v6_0_is_idle(handle))
+ if (uvd_v6_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
}
#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v6_0_check_soft_reset(void *handle)
+static bool uvd_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1179,9 +1185,9 @@ static bool uvd_v6_0_check_soft_reset(void *handle)
}
}
-static int uvd_v6_0_pre_soft_reset(void *handle)
+static int uvd_v6_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1190,9 +1196,9 @@ static int uvd_v6_0_pre_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_soft_reset(void *handle)
+static int uvd_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->uvd.inst->srbm_soft_reset)
@@ -1221,9 +1227,9 @@ static int uvd_v6_0_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_post_soft_reset(void *handle)
+static int uvd_v6_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1446,15 +1452,15 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v6_0_wait_for_idle(handle))
+ if (uvd_v6_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v6_0_enable_clock_gating(adev, true);
/* enable HW gates because UVD is idle */
@@ -1467,7 +1473,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v6_0_set_powergating_state(void *handle,
+static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -1477,7 +1483,7 @@ static int uvd_v6_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
@@ -1494,9 +1500,9 @@ out:
return ret;
}
-static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags)
+static void uvd_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -1523,11 +1529,11 @@ out:
static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
.name = "uvd_v6_0",
.early_init = uvd_v6_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v6_0_sw_init,
.sw_fini = uvd_v6_0_sw_fini,
.hw_init = uvd_v6_0_hw_init,
.hw_fini = uvd_v6_0_hw_fini,
+ .prepare_suspend = uvd_v6_0_prepare_suspend,
.suspend = uvd_v6_0_suspend,
.resume = uvd_v6_0_resume,
.is_idle = uvd_v6_0_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 86d1d46e1e5e..1f8866f3f63c 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -225,7 +225,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle,
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -288,7 +289,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle,
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
- AMDGPU_IB_POOL_DIRECT, &job);
+ AMDGPU_IB_POOL_DIRECT, &job,
+ AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
if (r)
return r;
@@ -361,9 +363,9 @@ error:
return r;
}
-static int uvd_v7_0_early_init(void *handle)
+static int uvd_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->asic_type == CHIP_VEGA20) {
u32 harvest;
@@ -395,12 +397,12 @@ static int uvd_v7_0_early_init(void *handle)
return 0;
}
-static int uvd_v7_0_sw_init(void *handle)
+static int uvd_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
if (adev->uvd.harvest_config & (1 << j))
@@ -480,10 +482,6 @@ static int uvd_v7_0_sw_init(void *handle)
if (r)
return r;
- r = amdgpu_uvd_entity_init(adev);
- if (r)
- return r;
-
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -491,10 +489,10 @@ static int uvd_v7_0_sw_init(void *handle)
return r;
}
-static int uvd_v7_0_sw_fini(void *handle)
+static int uvd_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_virt_free_mm_table(adev);
@@ -514,13 +512,13 @@ static int uvd_v7_0_sw_fini(void *handle)
/**
* uvd_v7_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v7_0_hw_init(void *handle)
+static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
uint32_t tmp;
int i, j, r;
@@ -592,13 +590,13 @@ done:
/**
* uvd_v7_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v7_0_hw_fini(void *handle)
+static int uvd_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -612,10 +610,17 @@ static int uvd_v7_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v7_0_suspend(void *handle)
+static int uvd_v7_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ return amdgpu_uvd_prepare_suspend(adev);
+}
+
+static int uvd_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -641,23 +646,22 @@ static int uvd_v7_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v7_0_hw_fini(adev);
+ r = uvd_v7_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v7_0_resume(void *handle)
+static int uvd_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v7_0_hw_init(adev);
+ return uvd_v7_0_hw_init(ip_block);
}
/**
@@ -1286,7 +1290,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
unsigned i;
/* No patching necessary for the first instance */
@@ -1460,104 +1464,6 @@ static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-#if 0
-static bool uvd_v7_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
-}
-
-static int uvd_v7_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v7_0_is_idle(handle))
- return 0;
- }
- return -ETIMEDOUT;
-}
-
-#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v7_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(mmSRBM_STATUS);
-
- if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
- REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
- (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
- AMDGPU_UVD_STATUS_BUSY_MASK))
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
- SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
-
- if (srbm_soft_reset) {
- adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->uvd.inst[ring->me].srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int uvd_v7_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- uvd_v7_0_stop(adev);
- return 0;
-}
-
-static int uvd_v7_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int uvd_v7_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return uvd_v7_0_start(adev);
-}
-#endif
-
static int uvd_v7_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1607,172 +1513,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-#if 0
-static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, data2, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
- data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
-
- data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
- UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
- (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
- (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
-
- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
- UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
- UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
- UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
- UVD_CGC_CTRL__SYS_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MODE_MASK |
- UVD_CGC_CTRL__MPEG2_MODE_MASK |
- UVD_CGC_CTRL__REGS_MODE_MASK |
- UVD_CGC_CTRL__RBC_MODE_MASK |
- UVD_CGC_CTRL__LMI_MC_MODE_MASK |
- UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
- UVD_CGC_CTRL__IDCT_MODE_MASK |
- UVD_CGC_CTRL__MPRD_MODE_MASK |
- UVD_CGC_CTRL__MPC_MODE_MASK |
- UVD_CGC_CTRL__LBSI_MODE_MASK |
- UVD_CGC_CTRL__LRBBM_MODE_MASK |
- UVD_CGC_CTRL__WCB_MODE_MASK |
- UVD_CGC_CTRL__VCPU_MODE_MASK |
- UVD_CGC_CTRL__JPEG_MODE_MASK |
- UVD_CGC_CTRL__JPEG2_MODE_MASK |
- UVD_CGC_CTRL__SCPU_MODE_MASK);
- data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
-}
-
-static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, cgc_flags, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
-
- cgc_flags = UVD_CGC_GATE__SYS_MASK |
- UVD_CGC_GATE__UDEC_MASK |
- UVD_CGC_GATE__MPEG2_MASK |
- UVD_CGC_GATE__RBC_MASK |
- UVD_CGC_GATE__LMI_MC_MASK |
- UVD_CGC_GATE__IDCT_MASK |
- UVD_CGC_GATE__MPRD_MASK |
- UVD_CGC_GATE__MPC_MASK |
- UVD_CGC_GATE__LBSI_MASK |
- UVD_CGC_GATE__LRBBM_MASK |
- UVD_CGC_GATE__UDEC_RE_MASK |
- UVD_CGC_GATE__UDEC_CM_MASK |
- UVD_CGC_GATE__UDEC_IT_MASK |
- UVD_CGC_GATE__UDEC_DB_MASK |
- UVD_CGC_GATE__UDEC_MP_MASK |
- UVD_CGC_GATE__WCB_MASK |
- UVD_CGC_GATE__VCPU_MASK |
- UVD_CGC_GATE__SCPU_MASK |
- UVD_CGC_GATE__JPEG_MASK |
- UVD_CGC_GATE__JPEG2_MASK;
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= cgc_flags;
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
-}
-
-static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
- else
- tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
-
- uvd_v7_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
- return 0;
-
- if (enable) {
- /* disable HW gating and enable Sw gating */
- uvd_v7_0_set_sw_clock_gating(adev);
- } else {
- /* wait for STATUS to clear */
- if (uvd_v7_0_wait_for_idle(handle))
- return -EBUSY;
-
- /* enable HW gates because UVD is idle */
- /* uvd_v7_0_set_hw_clock_gating(adev); */
- }
-
- return 0;
-}
-
-static int uvd_v7_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- /* This doesn't actually powergate the UVD block.
- * That's done in the dpm code via the SMC. This
- * just re-inits the block as necessary. The actual
- * gating still happens in the dpm code. We should
- * revisit this when there is a cleaner line between
- * the smc and the hw blocks
- */
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
- return 0;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
-
- if (state == AMD_PG_STATE_GATE) {
- uvd_v7_0_stop(adev);
- return 0;
- } else {
- return uvd_v7_0_start(adev);
- }
-}
-#endif
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
+static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
@@ -1782,19 +1523,13 @@ static int uvd_v7_0_set_clockgating_state(void *handle,
const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
.name = "uvd_v7_0",
.early_init = uvd_v7_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v7_0_sw_init,
.sw_fini = uvd_v7_0_sw_fini,
.hw_init = uvd_v7_0_hw_init,
.hw_fini = uvd_v7_0_hw_fini,
+ .prepare_suspend = uvd_v7_0_prepare_suspend,
.suspend = uvd_v7_0_suspend,
.resume = uvd_v7_0_resume,
- .is_idle = NULL /* uvd_v7_0_is_idle */,
- .wait_for_idle = NULL /* uvd_v7_0_wait_for_idle */,
- .check_soft_reset = NULL /* uvd_v7_0_check_soft_reset */,
- .pre_soft_reset = NULL /* uvd_v7_0_pre_soft_reset */,
- .soft_reset = NULL /* uvd_v7_0_soft_reset */,
- .post_soft_reset = NULL /* uvd_v7_0_post_soft_reset */,
.set_clockgating_state = uvd_v7_0_set_clockgating_state,
.set_powergating_state = NULL /* uvd_v7_0_set_powergating_state */,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 67eb01fef789..bee3e904a6bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -201,20 +201,20 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v2_0_is_idle(void *handle)
+static bool vce_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
}
-static int vce_v2_0_wait_for_idle(void *handle)
+static int vce_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned i;
for (i = 0; i < adev->usec_timeout; i++) {
- if (vce_v2_0_is_idle(handle))
+ if (vce_v2_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
@@ -274,15 +274,21 @@ static int vce_v2_0_start(struct amdgpu_device *adev)
static int vce_v2_0_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
int i;
int status;
+
if (vce_v2_0_lmi_clean(adev)) {
- DRM_INFO("vce is not idle \n");
+ DRM_INFO("VCE is not idle \n");
return 0;
}
- if (vce_v2_0_wait_for_idle(adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
+ if (!ip_block)
+ return -EINVAL;
+
+ if (vce_v2_0_wait_for_idle(ip_block)) {
DRM_INFO("VCE is busy, Can't set clock gating");
return 0;
}
@@ -398,9 +404,9 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable,
}
}
-static int vce_v2_0_early_init(void *handle)
+static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->vce.num_rings = 2;
@@ -410,11 +416,11 @@ static int vce_v2_0_early_init(void *handle)
return 0;
}
-static int vce_v2_0_sw_init(void *handle)
+static int vce_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCE */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
@@ -441,15 +447,13 @@ static int vce_v2_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vce_entity_init(adev);
-
return r;
}
-static int vce_v2_0_sw_fini(void *handle)
+static int vce_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -458,10 +462,10 @@ static int vce_v2_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v2_0_hw_init(void *handle)
+static int vce_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
vce_v2_0_enable_mgcg(adev, true, false);
@@ -477,19 +481,17 @@ static int vce_v2_0_hw_init(void *handle)
return 0;
}
-static int vce_v2_0_hw_fini(void *handle)
+static int vce_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cancel_delayed_work_sync(&adev->vce.idle_work);
+ cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
return 0;
}
-static int vce_v2_0_suspend(void *handle)
+static int vce_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
@@ -515,28 +517,27 @@ static int vce_v2_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v2_0_hw_fini(adev);
+ r = vce_v2_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v2_0_resume(void *handle)
+static int vce_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v2_0_hw_init(adev);
+ return vce_v2_0_hw_init(ip_block);
}
-static int vce_v2_0_soft_reset(void *handle)
+static int vce_v2_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
WREG32_FIELD(SRBM_SOFT_RESET, SOFT_RESET_VCE, 1);
mdelay(5);
@@ -577,13 +578,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v2_0_set_clockgating_state(void *handle,
+static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
bool sw_cg = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE) {
gate = true;
@@ -595,7 +596,7 @@ static int vce_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v2_0_set_powergating_state(void *handle,
+static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -605,7 +606,7 @@ static int vce_v2_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v2_0_stop(adev);
@@ -616,7 +617,6 @@ static int vce_v2_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs vce_v2_0_ip_funcs = {
.name = "vce_v2_0",
.early_init = vce_v2_0_early_init,
- .late_init = NULL,
.sw_init = vce_v2_0_sw_init,
.sw_fini = vce_v2_0_sw_fini,
.hw_init = vce_v2_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 18f6e62af339..708123899c41 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -64,8 +64,8 @@
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vce_v3_0_wait_for_idle(void *handle);
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
/**
* vce_v3_0_ring_get_rptr - get read pointer
@@ -396,9 +396,9 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
}
}
-static int vce_v3_0_early_init(void *handle)
+static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
@@ -415,9 +415,9 @@ static int vce_v3_0_early_init(void *handle)
return 0;
}
-static int vce_v3_0_sw_init(void *handle)
+static int vce_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r, i;
@@ -450,15 +450,13 @@ static int vce_v3_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vce_entity_init(adev);
-
return r;
}
-static int vce_v3_0_sw_fini(void *handle)
+static int vce_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -467,10 +465,10 @@ static int vce_v3_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v3_0_hw_init(void *handle)
+static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vce_v3_0_override_vce_clock_gating(adev, true);
@@ -487,25 +485,25 @@ static int vce_v3_0_hw_init(void *handle)
return 0;
}
-static int vce_v3_0_hw_fini(void *handle)
+static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->vce.idle_work);
- r = vce_v3_0_wait_for_idle(handle);
+ r = vce_v3_0_wait_for_idle(ip_block);
if (r)
return r;
vce_v3_0_stop(adev);
- return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
+ return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE);
}
-static int vce_v3_0_suspend(void *handle)
+static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -530,23 +528,22 @@ static int vce_v3_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v3_0_hw_fini(adev);
+ r = vce_v3_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v3_0_resume(void *handle)
+static int vce_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v3_0_hw_init(adev);
+ return vce_v3_0_hw_init(ip_block);
}
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
@@ -600,9 +597,9 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v3_0_is_idle(void *handle)
+static bool vce_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 mask = 0;
mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
@@ -611,13 +608,13 @@ static bool vce_v3_0_is_idle(void *handle)
return !(RREG32(mmSRBM_STATUS2) & mask);
}
-static int vce_v3_0_wait_for_idle(void *handle)
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v3_0_is_idle(handle))
+ if (vce_v3_0_is_idle(ip_block))
return 0;
return -ETIMEDOUT;
@@ -629,9 +626,9 @@ static int vce_v3_0_wait_for_idle(void *handle)
#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-static bool vce_v3_0_check_soft_reset(void *handle)
+static bool vce_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
/* According to VCE team , we should use VCE_STATUS instead
@@ -670,9 +667,9 @@ static bool vce_v3_0_check_soft_reset(void *handle)
}
}
-static int vce_v3_0_soft_reset(void *handle)
+static int vce_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->vce.srbm_soft_reset)
@@ -701,29 +698,29 @@ static int vce_v3_0_soft_reset(void *handle)
return 0;
}
-static int vce_v3_0_pre_soft_reset(void *handle)
+static int vce_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_suspend(adev);
+ return vce_v3_0_suspend(ip_block);
}
-static int vce_v3_0_post_soft_reset(void *handle)
+static int vce_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_resume(adev);
+ return vce_v3_0_resume(ip_block);
}
static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -763,10 +760,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -804,7 +801,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v3_0_set_powergating_state(void *handle,
+static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -814,7 +811,7 @@ static int vce_v3_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -831,9 +828,9 @@ out:
return ret;
}
-static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags)
+static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -899,7 +896,6 @@ static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
.name = "vce_v3_0",
.early_init = vce_v3_0_early_init,
- .late_init = NULL,
.sw_init = vce_v3_0_sw_init,
.sw_fini = vce_v3_0_sw_fini,
.hw_init = vce_v3_0_hw_init,
@@ -950,7 +946,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
- .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
.emit_frame_size =
6 + /* vce_v3_0_emit_vm_flush */
4 + /* vce_v3_0_emit_pipeline_sync */
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index e0b70cd3b697..335bda64ff5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -407,9 +407,9 @@ static int vce_v4_0_stop(struct amdgpu_device *adev)
return 0;
}
-static int vce_v4_0_early_init(void *handle)
+static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
adev->vce.num_rings = 1;
@@ -422,9 +422,9 @@ static int vce_v4_0_early_init(void *handle)
return 0;
}
-static int vce_v4_0_sw_init(void *handle)
+static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
unsigned size;
@@ -486,11 +486,6 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
-
- r = amdgpu_vce_entity_init(adev);
- if (r)
- return r;
-
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -498,10 +493,10 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
-static int vce_v4_0_sw_fini(void *handle)
+static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* free MM table */
amdgpu_virt_free_mm_table(adev);
@@ -518,10 +513,10 @@ static int vce_v4_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v4_0_hw_init(void *handle)
+static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
r = vce_v4_0_sriov_start(adev);
@@ -541,14 +536,14 @@ static int vce_v4_0_hw_init(void *handle)
return 0;
}
-static int vce_v4_0_hw_fini(void *handle)
+static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->vce.idle_work);
if (!amdgpu_sriov_vf(adev)) {
- /* vce_v4_0_wait_for_idle(handle); */
+ /* vce_v4_0_wait_for_idle(ip_block); */
vce_v4_0_stop(adev);
} else {
/* full access mode, so don't touch any VCE register */
@@ -558,9 +553,9 @@ static int vce_v4_0_hw_fini(void *handle)
return 0;
}
-static int vce_v4_0_suspend(void *handle)
+static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, idx;
if (adev->vce.vcpu_bo == NULL)
@@ -599,16 +594,16 @@ static int vce_v4_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v4_0_hw_fini(adev);
+ r = vce_v4_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v4_0_resume(void *handle)
+static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, idx;
if (adev->vce.vcpu_bo == NULL)
@@ -629,7 +624,7 @@ static int vce_v4_0_resume(void *handle)
return r;
}
- return vce_v4_0_hw_init(adev);
+ return vce_v4_0_hw_init(ip_block);
}
static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
@@ -689,281 +684,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
}
-static int vce_v4_0_set_clockgating_state(void *handle,
+static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
return 0;
}
-#if 0
-static bool vce_v4_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 mask = 0;
-
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
-
- return !(RREG32(mmSRBM_STATUS2) & mask);
-}
-
-static int vce_v4_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v4_0_is_idle(handle))
- return 0;
-
- return -ETIMEDOUT;
-}
-
-#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
-#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
- VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-
-static bool vce_v4_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
-
- /* According to VCE team , we should use VCE_STATUS instead
- * SRBM_STATUS.VCE_BUSY bit for busy status checking.
- * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
- * instance's registers are accessed
- * (0 for 1st instance, 10 for 2nd instance).
- *
- *VCE_STATUS
- *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
- *|----+----+-----------+----+----+----+----------+---------+----|
- *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
- *
- * VCE team suggest use bit 3--bit 6 for busy status check
- */
- mutex_lock(&adev->grbm_idx_mutex);
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- if (srbm_soft_reset) {
- adev->vce.srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->vce.srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int vce_v4_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->vce.srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int vce_v4_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_suspend(adev);
-}
-
-
-static int vce_v4_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_resume(adev);
-}
-
-static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
-{
- u32 tmp, data;
-
- tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
- if (override)
- data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
- else
- data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
-
- if (tmp != data)
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
-}
-
-static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
- bool gated)
-{
- u32 data;
-
- /* Set Override to disable Clock Gating */
- vce_v4_0_override_vce_clock_gating(adev, true);
-
- /* This function enables MGCG which is controlled by firmware.
- With the clocks in the gated state the core is still
- accessible but the firmware will throttle the clocks on the
- fly as necessary.
- */
- if (gated) {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data |= 0x1ff;
- data &= ~0xef0000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0x3ff000;
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x2;
- data &= ~0x00010000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data |= 0x37f;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- } else {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data &= ~0x80010;
- data |= 0xe70008;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x10000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8);
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- }
- vce_v4_0_override_vce_clock_gating(adev, false);
-}
-
-static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
- else
- tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-static int vce_v4_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
- int i;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_TONGA) ||
- (adev->asic_type == CHIP_FIJI))
- vce_v4_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
- return 0;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < 2; i++) {
- /* Program VCE Instance 0 or 1 if not harvested */
- if (adev->vce.harvest_config & (1 << i))
- continue;
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
-
- if (enable) {
- /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
- uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
-
- /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
- }
-
- vce_v4_0_set_vce_sw_clock_gating(adev, enable);
- }
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-#endif
-
-static int vce_v4_0_set_powergating_state(void *handle,
+static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -973,7 +701,7 @@ static int vce_v4_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v4_0_stop(adev);
@@ -1081,19 +809,12 @@ static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
const struct amd_ip_funcs vce_v4_0_ip_funcs = {
.name = "vce_v4_0",
.early_init = vce_v4_0_early_init,
- .late_init = NULL,
.sw_init = vce_v4_0_sw_init,
.sw_fini = vce_v4_0_sw_fini,
.hw_init = vce_v4_0_hw_init,
.hw_fini = vce_v4_0_hw_fini,
.suspend = vce_v4_0_suspend,
.resume = vce_v4_0_resume,
- .is_idle = NULL /* vce_v4_0_is_idle */,
- .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
- .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
- .soft_reset = NULL /* vce_v4_0_soft_reset */,
- .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
.set_clockgating_state = vce_v4_0_set_clockgating_state,
.set_powergating_state = vce_v4_0_set_powergating_state,
};
@@ -1107,7 +828,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.get_rptr = vce_v4_0_ring_get_rptr,
.get_wptr = vce_v4_0_ring_get_wptr,
.set_wptr = vce_v4_0_ring_set_wptr,
- .parse_cs = amdgpu_vce_ring_parse_cs_vm,
+ .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 25ba27151ac0..a316797875a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -45,13 +45,50 @@
#define mmUVD_REG_XX_MASK_1_0 0x05ac
#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1
-static int vcn_v1_0_stop(struct amdgpu_device *adev);
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v1_0_idle_work_handler(struct work_struct *work);
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
@@ -59,38 +96,41 @@ static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
/**
* vcn_v1_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v1_0_early_init(void *handle)
+static int vcn_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst[0].set_pg_state = vcn_v1_0_set_pg_state;
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
- jpeg_v1_0_early_init(handle);
+ jpeg_v1_0_early_init(ip_block);
- return amdgpu_vcn_early_init(adev);
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v1_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v1_0_sw_init(void *handle)
+static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+ uint32_t *ptr;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -99,23 +139,23 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
if (r)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
/* Override the work func */
- adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+ adev->vcn.inst[0].idle_work.work.func = vcn_v1_0_idle_work_handler;
- amdgpu_vcn_setup_ucode(adev);
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
@@ -127,18 +167,18 @@ static int vcn_v1_0_sw_init(void *handle)
if (r)
return r;
- adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =
+ adev->vcn.inst[0].internal.scratch9 = adev->vcn.inst->external.scratch9 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =
+ adev->vcn.inst[0].internal.data0 = adev->vcn.inst->external.data0 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =
+ adev->vcn.inst[0].internal.data1 = adev->vcn.inst->external.data1 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =
+ adev->vcn.inst[0].internal.cmd = adev->vcn.inst->external.cmd =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = adev->vcn.inst->external.nop =
+ adev->vcn.inst[0].internal.nop = adev->vcn.inst->external.nop =
SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst->ring_enc[i];
@@ -150,76 +190,79 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
if (amdgpu_vcnfw_log) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
amdgpu_vcn_fwlog_init(adev->vcn.inst);
}
- r = jpeg_v1_0_sw_init(handle);
+ r = jpeg_v1_0_sw_init(ip_block);
+ /* Allocate memory for VCN IP Dump buffer */
+ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
+ adev->vcn.ip_dump = NULL;
+ } else {
+ adev->vcn.ip_dump = ptr;
+ }
return r;
}
/**
* vcn_v1_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v1_0_sw_fini(void *handle)
+static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- jpeg_v1_0_sw_fini(handle);
+ jpeg_v1_0_sw_fini(ip_block);
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sw_fini(adev, 0);
- return r;
+ kfree(adev->vcn.ip_dump);
+
+ return 0;
}
/**
* vcn_v1_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v1_0_hw_init(void *handle)
+static int vcn_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
ring = adev->jpeg.inst->ring_dec;
r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
-
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
return r;
}
@@ -227,20 +270,21 @@ done:
/**
* vcn_v1_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v1_0_hw_fini(void *handle)
+static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
- vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
return 0;
@@ -249,27 +293,27 @@ static int vcn_v1_0_hw_fini(void *handle)
/**
* vcn_v1_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v1_0_suspend(void *handle)
+static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool idle_work_unexecuted;
- idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
+ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
if (idle_work_unexecuted) {
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
}
- r = vcn_v1_0_hw_fini(adev);
+ r = vcn_v1_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
return r;
}
@@ -277,20 +321,19 @@ static int vcn_v1_0_suspend(void *handle)
/**
* vcn_v1_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v1_0_resume(void *handle)
+static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v1_0_hw_init(adev);
+ r = vcn_v1_0_hw_init(ip_block);
return r;
}
@@ -298,13 +341,14 @@ static int vcn_v1_0_resume(void *handle)
/**
* vcn_v1_0_mc_resume_spg_mode - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -369,9 +413,10 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config);
}
-static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -444,12 +489,13 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/**
* vcn_v1_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
/* JPEG disable CGC */
@@ -570,12 +616,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
/**
* vcn_v1_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Enable clock gating for VCN block
*/
-static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
/* enable JPEG CGC */
@@ -639,8 +686,10 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel)
+static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* disable JPEG CGC */
@@ -693,8 +742,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
}
-static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -738,8 +788,9 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -782,12 +833,13 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
/**
* vcn_v1_0_start_spg_mode - start VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Setup and start the VCN block
*/
-static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -796,13 +848,13 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_disable_static_power_gating(adev);
+ vcn_1_0_disable_static_power_gating(vinst);
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/* disable clock gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
/* disable interupt */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -844,7 +896,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v1_0_mc_resume_spg_mode(adev);
+ vcn_v1_0_mc_resume_spg_mode(vinst);
WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
@@ -957,11 +1009,17 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 0);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -969,7 +1027,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_1_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -978,7 +1036,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
/* enable clock gating */
- vcn_v1_0_clock_gating_dpg_mode(adev, 0);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 0);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -1027,7 +1085,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
- vcn_v1_0_mc_resume_dpg_mode(adev);
+ vcn_v1_0_mc_resume_dpg_mode(vinst);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
@@ -1044,7 +1102,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
- vcn_v1_0_clock_gating_dpg_mode(adev, 1);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 1);
/* setup mmUVD_LMI_CTRL */
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
@@ -1101,24 +1159,32 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 1);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start(struct amdgpu_device *adev)
+static int vcn_v1_0_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+
return (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ?
- vcn_v1_0_start_dpg_mode(adev) : vcn_v1_0_start_spg_mode(adev);
+ vcn_v1_0_start_dpg_mode(vinst) : vcn_v1_0_start_spg_mode(vinst);
}
/**
* vcn_v1_0_stop_spg_mode - stop VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* stop the VCN block
*/
-static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int tmp;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
@@ -1158,13 +1224,20 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
- vcn_v1_0_enable_clock_gating(adev);
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
+ vcn_1_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
/* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
@@ -1193,24 +1266,32 @@ static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop(struct amdgpu_device *adev)
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- r = vcn_v1_0_stop_dpg_mode(adev);
+ r = vcn_v1_0_stop_dpg_mode(vinst);
else
- r = vcn_v1_0_stop_spg_mode(adev);
+ r = vcn_v1_0_stop_spg_mode(vinst);
return r;
}
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
int ret_code;
uint32_t reg_data = 0;
uint32_t reg_data2 = 0;
@@ -1257,7 +1338,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1318,7 +1398,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- ring = &adev->vcn.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
@@ -1336,16 +1415,16 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v1_0_is_idle(void *handle)
+static bool vcn_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v1_0_wait_for_idle(void *handle)
+static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1354,20 +1433,21 @@ static int vcn_v1_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v1_0_set_clockgating_state(void *handle,
+static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v1_0_is_idle(handle))
+ if (!vcn_v1_0_is_idle(ip_block))
return -EBUSY;
- vcn_v1_0_enable_clock_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
}
return 0;
}
@@ -1759,8 +1839,8 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun
}
}
-static int vcn_v1_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
* That's done in the dpm code via the SMC. This
@@ -1770,28 +1850,29 @@ static int vcn_v1_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v1_0_stop(adev);
+ ret = vcn_v1_0_stop(vinst);
else
- ret = vcn_v1_0_start(adev);
+ ret = vcn_v1_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
+
return ret;
}
static void vcn_v1_0_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -1807,7 +1888,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(vcn_inst, &new_state);
}
fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec);
@@ -1816,21 +1897,21 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
}
}
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec))
DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
@@ -1846,7 +1927,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
@@ -1856,7 +1937,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
struct dpg_pause_state new_state;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (fences)
@@ -1874,20 +1955,79 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(adev->vcn.inst, &new_state);
}
}
void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
{
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
- mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
+ schedule_delayed_work(&ring->adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
+ mutex_unlock(&ring->adev->vcn.inst[0].vcn1_jpeg1_workaround);
+}
+
+static void vcn_v1_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+ uint32_t inst_off, is_powered;
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * reg_count;
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered) {
+ drm_printf(p, "\nActive Instance:VCN%d\n", i);
+ for (j = 0; j < reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name,
+ adev->vcn.ip_dump[inst_off + j]);
+ } else {
+ drm_printf(p, "\nInactive Instance:VCN%d\n", i);
+ }
+ }
+}
+
+static void vcn_v1_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, j;
+ bool is_powered;
+ uint32_t inst_off;
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
+
+ if (!adev->vcn.ip_dump)
+ return;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ inst_off = i * reg_count;
+ /* mmUVD_POWER_STATUS is always readable and is first element of the array */
+ adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS);
+ is_powered = (adev->vcn.ip_dump[inst_off] &
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+ if (is_powered)
+ for (j = 1; j < reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i));
+ }
}
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
.early_init = vcn_v1_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v1_0_sw_init,
.sw_fini = vcn_v1_0_sw_fini,
.hw_init = vcn_v1_0_hw_init,
@@ -1896,12 +2036,10 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.resume = vcn_v1_0_resume,
.is_idle = vcn_v1_0_is_idle,
.wait_for_idle = vcn_v1_0_wait_for_idle,
- .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
- .soft_reset = NULL /* vcn_v1_0_soft_reset */,
- .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
.set_clockgating_state = vcn_v1_0_set_clockgating_state,
- .set_powergating_state = vcn_v1_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = vcn_v1_0_dump_ip_state,
+ .print_ip_state = vcn_v1_0_print_ip_state,
};
/*
@@ -1958,11 +2096,11 @@ static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
uint32_t reg = amdgpu_ib_get_value(ib, i);
uint32_t val = amdgpu_ib_get_value(ib, i + 1);
- if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+ if (reg == PACKET0(p->adev->vcn.inst[0].internal.data0, 0)) {
msg_lo = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.data1, 0)) {
msg_hi = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) {
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.cmd, 0)) {
r = vcn_v1_0_validate_bo(p, job,
((u64)msg_hi) << 32 | msg_lo);
if (r)
@@ -2041,17 +2179,14 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
- DRM_INFO("VCN decode is enabled in VM mode\n");
}
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
-
- DRM_INFO("VCN encode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
@@ -2061,7 +2196,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 2;
adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 18794394c5a0..8897dcc9c1a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -39,6 +39,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503
@@ -53,51 +54,90 @@
#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static int vcn_v2_0_start_sriov(struct amdgpu_device *adev);
+static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst);
+
/**
* vcn_v2_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v2_0_early_init(void *handle)
+static int vcn_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
- adev->vcn.num_enc_rings = 1;
+ adev->vcn.inst[0].num_enc_rings = 1;
else
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst->set_pg_state = vcn_v2_0_set_pg_state;
vcn_v2_0_set_dec_ring_funcs(adev);
vcn_v2_0_set_enc_ring_funcs(adev);
vcn_v2_0_set_irq_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v2_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v2_0_sw_init(void *handle)
+static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -107,7 +147,7 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
@@ -115,13 +155,13 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
- amdgpu_vcn_setup_ucode(adev);
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
@@ -137,25 +177,25 @@ static int vcn_v2_0_sw_init(void *handle)
if (r)
return r;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst->ring_enc[i];
@@ -172,7 +212,13 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+ adev->vcn.inst[0].reset = vcn_v2_0_reset;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
@@ -184,21 +230,29 @@ static int vcn_v2_0_sw_init(void *handle)
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(adev->vcn.inst);
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_0, ARRAY_SIZE(vcn_reg_list_2_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* vcn_v2_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v2_0_sw_fini(void *handle)
+static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r, idx;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
fw_shared->present_flag_0 = 0;
@@ -207,25 +261,27 @@ static int vcn_v2_0_sw_fini(void *handle)
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- return r;
+ amdgpu_vcn_sw_fini(adev, 0);
+
+ return 0;
}
/**
* vcn_v2_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v2_0_hw_init(void *handle)
+static int vcn_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
@@ -237,44 +293,40 @@ static int vcn_v2_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
//Disable vcn decode for sriov
if (amdgpu_sriov_vf(adev))
ring->sched.ready = false;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
- return r;
+ return 0;
}
/**
* vcn_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v2_0_hw_fini(void *handle)
+static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
- vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
return 0;
}
@@ -282,20 +334,19 @@ static int vcn_v2_0_hw_fini(void *handle)
/**
* vcn_v2_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v2_0_suspend(void *handle)
+static int vcn_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = vcn_v2_0_hw_fini(adev);
+ r = vcn_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(ip_block->adev, 0);
return r;
}
@@ -303,20 +354,19 @@ static int vcn_v2_0_suspend(void *handle)
/**
* vcn_v2_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v2_0_resume(void *handle)
+static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v2_0_hw_init(adev);
+ r = vcn_v2_0_hw_init(ip_block);
return r;
}
@@ -324,13 +374,14 @@ static int vcn_v2_0_resume(void *handle)
/**
* vcn_v2_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
+static void vcn_v2_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
if (amdgpu_sriov_vf(adev))
@@ -384,9 +435,11 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
-static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -483,12 +536,13 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/**
* vcn_v2_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
if (amdgpu_sriov_vf(adev))
@@ -592,9 +646,10 @@ static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
uint8_t sram_sel, uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -643,12 +698,13 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v2_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -701,8 +757,9 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -750,8 +807,9 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -792,13 +850,15 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
}
}
-static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
+ int ret;
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -810,7 +870,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
+ vcn_v2_0_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -859,7 +919,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
+ vcn_v2_0_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -880,8 +940,13 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
UVD, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
- if (indirect)
- amdgpu_vcn_psp_update_sram(adev, 0, 0);
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, 0, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
/* force RBC into idle state */
rb_bufsz = order_base_2(ring->ring_size);
@@ -924,31 +989,38 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
/* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_start(struct amdgpu_device *adev)
+static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
int i, j, r;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
+ return vcn_v2_0_start_dpg_mode(vinst, adev->vcn.inst->indirect_sram);
- vcn_v2_0_disable_static_power_gating(adev);
+ vcn_v2_0_disable_static_power_gating(vinst);
/* set uvd status busy */
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/*SW clock gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(vinst);
/* enable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
@@ -992,7 +1064,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v2_0_mc_resume(adev);
+ vcn_v2_0_mc_resume(vinst);
/* release VCPU reset to boot */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
@@ -1097,15 +1169,21 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v2_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
- vcn_v2_0_pause_dpg_mode(adev, 0, &state);
+ vcn_v2_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1127,16 +1205,22 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop(struct amdgpu_device *adev)
+static int vcn_v2_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_0_stop_dpg_mode(adev);
+ r = vcn_v2_0_stop_dpg_mode(vinst);
if (r)
return r;
goto power_off;
@@ -1188,19 +1272,26 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
/* clear status */
WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
- vcn_v2_0_enable_clock_gating(adev);
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_clock_gating(vinst);
+ vcn_v2_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS);
power_off:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
return 0;
}
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
@@ -1217,7 +1308,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
@@ -1275,16 +1366,26 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v2_0_is_idle(void *handle)
+static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst)
+{
+ int r;
+
+ r = vcn_v2_0_stop(vinst);
+ if (r)
+ return r;
+ return vcn_v2_0_start(vinst);
+}
+
+static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v2_0_wait_for_idle(void *handle)
+static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1293,10 +1394,10 @@ static int vcn_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v2_0_set_clockgating_state(void *handle,
+static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -1304,12 +1405,12 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v2_0_is_idle(handle))
+ if (!vcn_v2_0_is_idle(ip_block))
return -EBUSY;
- vcn_v2_0_enable_clock_gating(adev);
+ vcn_v2_0_enable_clock_gating(&adev->vcn.inst[0]);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(&adev->vcn.inst[0]);
}
return 0;
}
@@ -1379,9 +1480,9 @@ void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
}
@@ -1396,7 +1497,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[0].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
}
@@ -1416,7 +1517,7 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
WARN_ON(ring->wptr % 2 || count % 2);
for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.nop, 0));
amdgpu_ring_write(ring, 0);
}
}
@@ -1437,25 +1538,25 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
struct amdgpu_device *adev = ring->adev;
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.context_id, 0));
amdgpu_ring_write(ring, seq);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
}
@@ -1478,14 +1579,14 @@ void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_vmid, 0));
amdgpu_ring_write(ring, vmid);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_low, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_high, 0));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_size, 0));
amdgpu_ring_write(ring, ib->length_dw);
}
@@ -1494,16 +1595,16 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.gp_scratch8, 0));
amdgpu_ring_write(ring, mask);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
}
@@ -1528,13 +1629,13 @@ void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
}
@@ -1735,9 +1836,9 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 4);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -1754,8 +1855,8 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
}
-static int vcn_v2_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
* That's done in the dpm code via the SMC. This
@@ -1765,23 +1866,24 @@ static int vcn_v2_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = vinst->adev;
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v2_0_stop(adev);
+ ret = vcn_v2_0_stop(vinst);
else
- ret = vcn_v2_0_start(adev);
+ ret = vcn_v2_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
+
return ret;
}
@@ -1820,7 +1922,7 @@ static int vcn_v2_0_start_mmsch(struct amdgpu_device *adev,
adev->vcn.inst->ring_dec.wptr_old = 0;
vcn_v2_0_dec_ring_set_wptr(&adev->vcn.inst->ring_dec);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
adev->vcn.inst->ring_enc[i].wptr = 0;
adev->vcn.inst->ring_enc[i].wptr_old = 0;
vcn_v2_0_enc_ring_set_wptr(&adev->vcn.inst->ring_enc[i]);
@@ -1878,7 +1980,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
init_table += header->vcn_table_offset;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
@@ -1946,7 +2048,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
- for (r = 0; r < adev->vcn.num_enc_rings; ++r) {
+ for (r = 0; r < adev->vcn.inst[0].num_enc_rings; ++r) {
ring = &adev->vcn.inst->ring_enc[r];
ring->wptr = 0;
MMSCH_V2_0_INSERT_DIRECT_WT(
@@ -1993,7 +2095,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
.name = "vcn_v2_0",
.early_init = vcn_v2_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_0_sw_init,
.sw_fini = vcn_v2_0_sw_fini,
.hw_init = vcn_v2_0_hw_init,
@@ -2002,12 +2103,10 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
.resume = vcn_v2_0_resume,
.is_idle = vcn_v2_0_is_idle,
.wait_for_idle = vcn_v2_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_0_set_clockgating_state,
- .set_powergating_state = vcn_v2_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
@@ -2038,6 +2137,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
@@ -2067,22 +2167,20 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
- DRM_INFO("VCN decode is enabled in VM mode\n");
}
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
-
- DRM_INFO("VCN encode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
@@ -2092,7 +2190,7 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 1;
adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 6fbea38f4d3e..cebee453871c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -39,6 +39,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
@@ -55,37 +56,181 @@
#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_5_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev);
+static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst);
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
};
+static void vcn_v2_5_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i, j;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < v->num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&v->ring_enc[j]);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !v->using_unified_queue) {
+ struct dpg_pause_state new_state;
+
+ if (fence[i] ||
+ unlikely(atomic_read(&v->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ v->pause_dpg_mode(v, &new_state);
+ }
+
+ fence[i] += amdgpu_fence_count_emitted(&v->ring_dec);
+ fences += fence[i];
+
+ }
+
+ if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ amdgpu_vcn_put_profile(adev);
+ } else {
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
+ }
+}
+
+static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me];
+
+ atomic_inc(&adev->vcn.inst[0].total_submission_cnt);
+
+ cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
+
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !v->using_unified_queue) {
+ struct dpg_pause_state new_state;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+ atomic_inc(&v->dpg_enc_submission_cnt);
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ } else {
+ unsigned int fences = 0;
+ unsigned int i;
+
+ for (i = 0; i < v->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&v->ring_enc[i]);
+
+ if (fences || atomic_read(&v->dpg_enc_submission_cnt))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ }
+ v->pause_dpg_mode(v, &new_state);
+ }
+ mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock);
+ amdgpu_vcn_get_profile(adev);
+}
+
+static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
+ !adev->vcn.inst[ring->me].using_unified_queue)
+ atomic_dec(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+ atomic_dec(&adev->vcn.inst[0].total_submission_cnt);
+
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work,
+ VCN_IDLE_TIMEOUT);
+}
+
/**
* vcn_v2_5_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v2_5_early_init(void *handle)
+static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = 2;
adev->vcn.harvest_config = 0;
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ adev->vcn.inst[i].num_enc_rings = 1;
} else {
u32 harvest;
int i;
@@ -94,13 +239,12 @@ static int vcn_v2_5_early_init(void *handle)
harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
adev->vcn.harvest_config |= 1 << i;
+ adev->vcn.inst[i].num_enc_rings = 2;
}
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
- AMDGPU_VCN_HARVEST_VCN1))
+ AMDGPU_VCN_HARVEST_VCN1))
/* both instances are harvested, disable the block */
return -ENOENT;
-
- adev->vcn.num_enc_rings = 2;
}
vcn_v2_5_set_dec_ring_funcs(adev);
@@ -108,23 +252,33 @@ static int vcn_v2_5_early_init(void *handle)
vcn_v2_5_set_irq_funcs(adev);
vcn_v2_5_set_ras_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ adev->vcn.inst[i].set_pg_state = vcn_v2_5_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
/**
* vcn_v2_5_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v2_5_sw_init(void *handle)
+static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+ struct amdgpu_fw_shared *fw_shared;
+
if (adev->vcn.harvest_config & (1 << j))
continue;
/* VCN DEC TRAP */
@@ -134,7 +288,7 @@ static int vcn_v2_5_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
if (r)
@@ -146,39 +300,36 @@ static int vcn_v2_5_sw_init(void *handle)
VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq);
if (r)
return r;
- }
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
+ r = amdgpu_vcn_sw_init(adev, j);
+ if (r)
+ return r;
- amdgpu_vcn_setup_ucode(adev);
+ /* Override the work func */
+ adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ amdgpu_vcn_setup_ucode(adev, j);
- for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ r = amdgpu_vcn_resume(adev, j);
+ if (r)
+ return r;
- if (adev->vcn.harvest_config & (1 << j))
- continue;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
-
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.inst[j].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
ring = &adev->vcn.inst[j].ring_dec;
@@ -187,7 +338,7 @@ static int vcn_v2_5_sw_init(void *handle)
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0))
ring->vm_hub = AMDGPU_MMHUB1(0);
else
ring->vm_hub = AMDGPU_MMHUB0(0);
@@ -198,7 +349,7 @@ static int vcn_v2_5_sw_init(void *handle)
if (r)
return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst[j].ring_enc[i];
@@ -207,7 +358,8 @@ static int vcn_v2_5_sw_init(void *handle)
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0))
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(2, 5, 0))
ring->vm_hub = AMDGPU_MMHUB1(0);
else
ring->vm_hub = AMDGPU_MMHUB0(0);
@@ -225,36 +377,50 @@ static int vcn_v2_5_sw_init(void *handle)
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+ adev->vcn.inst[j].reset = vcn_v2_5_reset;
}
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
-
r = amdgpu_vcn_ras_sw_init(adev);
if (r)
return r;
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_5, ARRAY_SIZE(vcn_reg_list_2_5));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* vcn_v2_5_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v2_5_sw_fini(void *handle)
+static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r, idx;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_fw_shared *fw_shared;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
@@ -270,25 +436,28 @@ static int vcn_v2_5_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- r = amdgpu_vcn_sw_fini(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ amdgpu_vcn_sw_fini(adev, i);
+ }
- return r;
+ return 0;
}
/**
* vcn_v2_5_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v2_5_hw_init(void *handle)
+static int vcn_v2_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r = 0;
@@ -313,50 +482,47 @@ static int vcn_v2_5_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
ring = &adev->vcn.inst[j].ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
}
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
return r;
}
/**
* vcn_v2_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v2_5_hw_fini(void *handle)
+static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, mmUVD_STATUS)))
- vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
- amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0);
}
return 0;
@@ -365,41 +531,47 @@ static int vcn_v2_5_hw_fini(void *handle)
/**
* vcn_v2_5_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v2_5_suspend(void *handle)
+static int vcn_v2_5_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v2_5_hw_fini(adev);
+ r = vcn_v2_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v2_5_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v2_5_resume(void *handle)
+static int vcn_v2_5_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v2_5_hw_init(adev);
+ r = vcn_v2_5_hw_init(ip_block);
return r;
}
@@ -407,68 +579,72 @@ static int vcn_v2_5_resume(void *handle)
/**
* vcn_v2_5_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
+static void vcn_v2_5_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ uint32_t size;
uint32_t offset;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* cache window 0: fw */
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
- offset = 0;
- } else {
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr));
- offset = size;
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- }
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- /* cache window 1: stack */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
-
- /* cache window 2: context */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
-
- /* non-cache window */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
- AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
}
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
}
-static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -565,123 +741,124 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/**
* vcn_v2_5_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t data;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* UVD disable CGC */
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- else
- data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
- data &= ~(UVD_CGC_GATE__SYS_MASK
- | UVD_CGC_GATE__UDEC_MASK
- | UVD_CGC_GATE__MPEG2_MASK
- | UVD_CGC_GATE__REGS_MASK
- | UVD_CGC_GATE__RBC_MASK
- | UVD_CGC_GATE__LMI_MC_MASK
- | UVD_CGC_GATE__LMI_UMC_MASK
- | UVD_CGC_GATE__IDCT_MASK
- | UVD_CGC_GATE__MPRD_MASK
- | UVD_CGC_GATE__MPC_MASK
- | UVD_CGC_GATE__LBSI_MASK
- | UVD_CGC_GATE__LRBBM_MASK
- | UVD_CGC_GATE__UDEC_RE_MASK
- | UVD_CGC_GATE__UDEC_CM_MASK
- | UVD_CGC_GATE__UDEC_IT_MASK
- | UVD_CGC_GATE__UDEC_DB_MASK
- | UVD_CGC_GATE__UDEC_MP_MASK
- | UVD_CGC_GATE__WCB_MASK
- | UVD_CGC_GATE__VCPU_MASK
- | UVD_CGC_GATE__MMSCH_MASK);
-
- WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
-
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
- | UVD_CGC_CTRL__SYS_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MODE_MASK
- | UVD_CGC_CTRL__MPEG2_MODE_MASK
- | UVD_CGC_CTRL__REGS_MODE_MASK
- | UVD_CGC_CTRL__RBC_MODE_MASK
- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
- | UVD_CGC_CTRL__IDCT_MODE_MASK
- | UVD_CGC_CTRL__MPRD_MODE_MASK
- | UVD_CGC_CTRL__MPC_MODE_MASK
- | UVD_CGC_CTRL__LBSI_MODE_MASK
- | UVD_CGC_CTRL__LRBBM_MODE_MASK
- | UVD_CGC_CTRL__WCB_MODE_MASK
- | UVD_CGC_CTRL__VCPU_MODE_MASK
- | UVD_CGC_CTRL__MMSCH_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- /* turn on */
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
- data |= (UVD_SUVD_CGC_GATE__SRE_MASK
- | UVD_SUVD_CGC_GATE__SIT_MASK
- | UVD_SUVD_CGC_GATE__SMP_MASK
- | UVD_SUVD_CGC_GATE__SCM_MASK
- | UVD_SUVD_CGC_GATE__SDB_MASK
- | UVD_SUVD_CGC_GATE__SRE_H264_MASK
- | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SIT_H264_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SCM_H264_MASK
- | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SDB_H264_MASK
- | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SCLR_MASK
- | UVD_SUVD_CGC_GATE__UVD_SC_MASK
- | UVD_SUVD_CGC_GATE__ENT_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
- | UVD_SUVD_CGC_GATE__SITE_MASK
- | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
- | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
- | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
- | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
- | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
- data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
+
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
- }
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
- uint8_t sram_sel, int inst_idx, uint8_t indirect)
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel, uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -730,71 +907,72 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v2_5_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t data = 0;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* enable UVD CGC */
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- else
- data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
- | UVD_CGC_CTRL__SYS_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MODE_MASK
- | UVD_CGC_CTRL__MPEG2_MODE_MASK
- | UVD_CGC_CTRL__REGS_MODE_MASK
- | UVD_CGC_CTRL__RBC_MODE_MASK
- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
- | UVD_CGC_CTRL__IDCT_MODE_MASK
- | UVD_CGC_CTRL__MPRD_MODE_MASK
- | UVD_CGC_CTRL__MPC_MODE_MASK
- | UVD_CGC_CTRL__LBSI_MODE_MASK
- | UVD_CGC_CTRL__LRBBM_MODE_MASK
- | UVD_CGC_CTRL__WCB_MODE_MASK
- | UVD_CGC_CTRL__VCPU_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
- data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
- }
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
+static void vcn_v2_6_enable_ras(struct amdgpu_vcn_inst *vinst,
bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
- if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0))
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(2, 6, 0))
return;
tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
@@ -816,11 +994,14 @@ static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
tmp, 0, indirect);
}
-static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
+ int ret;
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
@@ -835,7 +1016,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v2_5_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -884,7 +1065,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v2_5_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -895,7 +1076,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
- vcn_v2_6_enable_ras(adev, inst_idx, indirect);
+ vcn_v2_6_enable_ras(vinst, indirect);
/* unblock VCPU register access */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@@ -911,8 +1092,13 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
VCN, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
- if (indirect)
- amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
ring = &adev->vcn.inst[inst_idx].ring_dec;
/* force RBC into idle state */
@@ -957,198 +1143,200 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_5_start(struct amdgpu_device *adev)
+static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared =
+ adev->vcn.inst[i].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v2_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- /* disable register anti-hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
- /* set uvd status busy */
- tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
- }
+ /* set uvd status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
return 0;
- /*SW clock gating */
- vcn_v2_5_disable_clock_gating(adev);
+ /* SW clock gating */
+ vcn_v2_5_disable_clock_gating(vinst);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* setup mmUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
- tmp &= ~0xff;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup mmUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup mmUVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- }
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
- vcn_v2_5_mc_resume(adev);
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ tmp &= ~0xff;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v2_5_mc_resume(vinst);
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- for (k = 0; k < 10; ++k) {
- uint32_t status;
-
- for (j = 0; j < 100; ++j) {
- status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
- if (status & 2)
- break;
- if (amdgpu_emu_mode == 1)
- msleep(500);
- else
- mdelay(10);
- }
- r = 0;
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (k = 0; k < 10; ++k) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
if (status & 2)
break;
+ if (amdgpu_emu_mode == 1)
+ msleep(500);
+ else
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
- DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- r = -1;
- }
+ mdelay(10);
+ r = -1;
+ }
- if (r) {
- DRM_ERROR("VCN decode not responding, giving up!!!\n");
- return r;
- }
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
- ring = &adev->vcn.inst[i].ring_dec;
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
- fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
- /* program the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
- ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
- fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
- fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
- fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
-
- fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
- ring = &adev->vcn.inst[i].ring_enc[1];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
- fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
- }
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
return 0;
}
@@ -1239,7 +1427,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
/* mc resume*/
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V1_0_INSERT_DIRECT_WT(
@@ -1349,8 +1537,10 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
}
-static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
/* Wait for power status to be 1 */
@@ -1374,80 +1564,93 @@ static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_5_stop(struct amdgpu_device *adev)
+static int vcn_v2_5_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t tmp;
- int i, r = 0;
+ int r;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_5_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_stop_dpg_mode(vinst);
+ goto done;
+ }
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- /* block LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* clear status */
- WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- vcn_v2_5_enable_clock_gating(adev);
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
- /* enable register anti-hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
- }
+ vcn_v2_5_enable_clock_gating(vinst);
+
+ /* enable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
-static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code = 0;
@@ -1464,7 +1667,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
if (!ret_code) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
/* pause DPG */
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
@@ -1593,11 +1796,12 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
.insert_start = vcn_v2_0_dec_ring_insert_start,
.insert_end = vcn_v2_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .begin_use = vcn_v2_5_ring_begin_use,
+ .end_use = vcn_v2_5_ring_end_use,
.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
/**
@@ -1691,11 +1895,12 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_v2_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .begin_use = vcn_v2_5_ring_begin_use,
+ .end_use = vcn_v2_5_ring_end_use,
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -1707,7 +1912,6 @@ static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
continue;
adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
adev->vcn.inst[i].ring_dec.me = i;
- DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
}
}
@@ -1718,31 +1922,41 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
adev->vcn.inst[j].ring_enc[i].me = j;
}
- DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
}
}
-static bool vcn_v2_5_is_idle(void *handle)
+static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst)
+{
+ int r;
+
+ r = vcn_v2_5_stop(vinst);
+ if (r)
+ return r;
+ return vcn_v2_5_start(vinst);
+}
+
+static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+
ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
return ret;
}
-static int vcn_v2_5_wait_for_idle(void *handle)
+static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1757,45 +1971,50 @@ static int vcn_v2_5_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v2_5_set_clockgating_state(void *handle,
+static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
if (amdgpu_sriov_vf(adev))
return 0;
- if (enable) {
- if (!vcn_v2_5_is_idle(handle))
- return -EBUSY;
- vcn_v2_5_enable_clock_gating(adev);
- } else {
- vcn_v2_5_disable_clock_gating(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (!vcn_v2_5_is_idle(ip_block))
+ return -EBUSY;
+ vcn_v2_5_enable_clock_gating(vinst);
+ } else {
+ vcn_v2_5_disable_clock_gating(vinst);
+ }
}
return 0;
}
-static int vcn_v2_5_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = vinst->adev;
int ret;
if (amdgpu_sriov_vf(adev))
return 0;
- if(state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v2_5_stop(adev);
+ ret = vcn_v2_5_stop(vinst);
else
- ret = vcn_v2_5_start(adev);
+ ret = vcn_v2_5_start(vinst);
- if(!ret)
- adev->vcn.cur_state = state;
+ if (!ret)
+ vinst->cur_state = state;
return ret;
}
@@ -1872,10 +2091,10 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
- adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs;
}
}
@@ -1883,7 +2102,6 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
.name = "vcn_v2_5",
.early_init = vcn_v2_5_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_5_sw_init,
.sw_fini = vcn_v2_5_sw_fini,
.hw_init = vcn_v2_5_hw_init,
@@ -1892,18 +2110,15 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
.resume = vcn_v2_5_resume,
.is_idle = vcn_v2_5_is_idle,
.wait_for_idle = vcn_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
- .set_powergating_state = vcn_v2_5_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
.name = "vcn_v2_6",
.early_init = vcn_v2_5_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_5_sw_init,
.sw_fini = vcn_v2_5_sw_fini,
.hw_init = vcn_v2_5_hw_init,
@@ -1912,12 +2127,10 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
.resume = vcn_v2_5_resume,
.is_idle = vcn_v2_5_is_idle,
.wait_for_idle = vcn_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
- .set_powergating_state = vcn_v2_5_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
@@ -1985,7 +2198,7 @@ static struct amdgpu_vcn_ras vcn_v2_6_ras = {
static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[VCN_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(2, 6, 0):
adev->vcn.ras = &vcn_v2_6_ras;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index a61ecefdafc5..d9cf8f0feeb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -40,6 +40,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
@@ -60,6 +61,42 @@
#define RDECODE_MSG_CREATE 0x00000000
#define RDECODE_MESSAGE_CREATE 0x00000001
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
+};
+
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
@@ -69,10 +106,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v3_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst);
static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
@@ -80,19 +118,21 @@ static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
/**
* vcn_v3_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v3_0_early_init(void *handle)
+static int vcn_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
adev->vcn.harvest_config = 0;
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ adev->vcn.inst[i].num_enc_rings = 1;
} else {
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
@@ -100,42 +140,42 @@ static int vcn_v3_0_early_init(void *handle)
/* both instances are harvested, disable the block */
return -ENOENT;
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 33))
- adev->vcn.num_enc_rings = 0;
- else
- adev->vcn.num_enc_rings = 2;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(3, 0, 33))
+ adev->vcn.inst[i].num_enc_rings = 0;
+ else
+ adev->vcn.inst[i].num_enc_rings = 2;
+ }
}
vcn_v3_0_set_dec_ring_funcs(adev);
vcn_v3_0_set_enc_ring_funcs(adev);
vcn_v3_0_set_irq_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ adev->vcn.inst[i].set_pg_state = vcn_v3_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+ return 0;
}
/**
* vcn_v3_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v3_0_sw_init(void *handle)
+static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
int vcn_doorbell_index = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Note: doorbell assignment is fixed for SRIOV multiple VCN engines
@@ -151,27 +191,37 @@ static int vcn_v3_0_sw_init(void *handle)
}
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ adev->vcn.inst[i].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
/* VCN DEC TRAP */
@@ -185,7 +235,7 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_dec;
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev)) {
- ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1);
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1);
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
}
@@ -197,7 +247,7 @@ static int vcn_v3_0_sw_init(void *handle)
if (r)
return r;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
/* VCN ENC TRAP */
@@ -209,7 +259,7 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[j];
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev)) {
- ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j;
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1) + 1 + j;
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
}
@@ -227,22 +277,38 @@ static int vcn_v3_0_sw_init(void *handle)
cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG;
- if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2))
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 1, 2))
fw_shared->smu_interface_info.smu_interface_type = 2;
- else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1))
+ else if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(3, 1, 1))
fw_shared->smu_interface_info.smu_interface_type = 1;
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
+ adev->vcn.inst[i].reset = vcn_v3_0_reset;
}
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_3_0, ARRAY_SIZE(vcn_reg_list_3_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
return 0;
}
@@ -250,18 +316,18 @@ static int vcn_v3_0_sw_init(void *handle)
/**
* vcn_v3_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v3_0_sw_fini(void *handle)
+static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -276,32 +342,36 @@ static int vcn_v3_0_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- r = amdgpu_vcn_sw_fini(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
- return r;
+ amdgpu_vcn_sw_fini(adev, i);
+ }
+
+ return 0;
}
/**
* vcn_v3_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v3_0_hw_init(void *handle)
+static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r;
if (amdgpu_sriov_vf(adev)) {
r = vcn_v3_0_start_sriov(adev);
if (r)
- goto done;
+ return r;
/* initialize VCN dec and enc ring buffers */
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -320,7 +390,7 @@ static int vcn_v3_0_hw_init(void *handle)
ring->sched.ready = true;
}
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
ring->sched.ready = false;
@@ -346,48 +416,45 @@ static int vcn_v3_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
}
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
- return r;
+ return 0;
}
/**
* vcn_v3_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v3_0_hw_fini(void *handle)
+static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
- vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
}
}
@@ -398,41 +465,47 @@ static int vcn_v3_0_hw_fini(void *handle)
/**
* vcn_v3_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v3_0_suspend(void *handle)
+static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v3_0_hw_fini(adev);
+ r = vcn_v3_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v3_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v3_0_resume(void *handle)
+static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v3_0_hw_init(adev);
+ r = vcn_v3_0_hw_init(ip_block);
return r;
}
@@ -440,14 +513,15 @@ static int vcn_v3_0_resume(void *handle)
/**
* vcn_v3_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -495,9 +569,12 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
}
-static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -591,8 +668,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
-static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -642,8 +721,10 @@ static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int
WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
}
-static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -690,13 +771,14 @@ static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int
/**
* vcn_v3_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: Pointer to the VCN instance structure
*
* Disable clock gating for VCN block
*/
-static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
/* VCN disable CGC */
@@ -823,9 +905,12 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
- uint8_t sram_sel, int inst_idx, uint8_t indirect)
+static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -874,13 +959,14 @@ static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v3_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: Pointer to the VCN instance structure
*
* Enable clock gating for VCN block
*/
-static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
/* enable VCN CGC */
@@ -939,11 +1025,14 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
}
-static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
+ int ret;
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
@@ -958,7 +1047,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v3_0_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -1007,7 +1096,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v3_0_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -1036,8 +1125,13 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
- if (indirect)
- amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
ring = &adev->vcn.inst[inst_idx].ring_dec;
/* force RBC into idle state */
@@ -1088,194 +1182,203 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v3_0_start(struct amdgpu_device *adev)
+static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v3_0_start_dpg_mode(vinst, vinst->indirect_sram);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ /* disable VCN power gating */
+ vcn_v3_0_disable_static_power_gating(vinst);
- /* disable VCN power gating */
- vcn_v3_0_disable_static_power_gating(adev, i);
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
+ /* SW clock gating */
+ vcn_v3_0_disable_clock_gating(vinst);
- /*SW clock gating */
- vcn_v3_0_disable_clock_gating(adev, i);
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
-
- /* setup mmUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup mmUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup mmUVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v3_0_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- for (j = 0; j < 10; ++j) {
- uint32_t status;
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- }
- r = 0;
- if (status & 2)
- break;
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v3_0_mc_resume(vinst);
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
+ if (status & 2)
+ break;
mdelay(10);
- r = -1;
}
+ r = 0;
+ if (status & 2)
+ break;
- if (r) {
- DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
- return r;
- }
+ DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ mdelay(10);
+ r = -1;
+ }
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ if (r) {
+ DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
+ return r;
+ }
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- ring = &adev->vcn.inst[i].ring_dec;
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
- /* programm the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
- ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
- fw_shared->rb.wptr = lower_32_bits(ring->wptr);
- fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
-
- if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
- fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
- fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
-
- fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- ring = &adev->vcn.inst[i].ring_enc[1];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
- fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
- }
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->rb.wptr = lower_32_bits(ring->wptr);
+ fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
+ IP_VERSION(3, 0, 33)) {
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
}
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+
return 0;
}
@@ -1329,7 +1432,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
@@ -1388,7 +1491,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
ring->wptr = 0;
rb_addr = ring->gpu_addr;
@@ -1488,12 +1591,14 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
return 0;
}
-static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v3_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
- vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state);
+ vcn_v3_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
@@ -1516,88 +1621,101 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v3_0_stop(struct amdgpu_device *adev)
+static int vcn_v3_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t tmp;
- int i, r = 0;
+ int r = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v3_0_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v3_0_stop_dpg_mode(vinst);
+ goto done;
+ }
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* disable LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- /* apply soft reset */
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
- /* clear status */
- WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+ /* apply HW clock gating */
+ vcn_v3_0_enable_clock_gating(vinst);
- /* apply HW clock gating */
- vcn_v3_0_enable_clock_gating(adev, i);
+ /* enable VCN power gating */
+ vcn_v3_0_enable_static_power_gating(vinst);
- /* enable VCN power gating */
- vcn_v3_0_enable_static_power_gating(adev, i);
- }
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
-static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
@@ -1628,7 +1746,8 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
- if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
+ IP_VERSION(3, 0, 33)) {
/* Restore */
fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
@@ -1715,7 +1834,7 @@ static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- volatile struct amdgpu_fw_shared *fw_shared;
+ struct amdgpu_fw_shared *fw_shared;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
/*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
@@ -1765,15 +1884,19 @@ static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
-
- /* The create msg must be in the first IB submitted */
- if (atomic_read(&job->base.entity->fence_seq))
- return -EINVAL;
+ struct dma_fence *fence;
/* if VCN0 is harvested, we can't support AV1 */
if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
return -EINVAL;
+ /* wait for all jobs to finish before switching to instance 0 */
+ fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull);
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
[AMDGPU_RING_PRIO_DEFAULT].sched;
drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
@@ -1879,11 +2002,11 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
uint32_t reg = amdgpu_ib_get_value(ib, i);
uint32_t val = amdgpu_ib_get_value(ib, i + 1);
- if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+ if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data0, 0)) {
msg_lo = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+ } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data1, 0)) {
msg_hi = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
+ } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.cmd, 0) &&
val == 0) {
r = vcn_v3_0_dec_msg(p, job,
((u64)msg_hi) << 32 | msg_lo);
@@ -1923,6 +2046,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
/**
@@ -2021,6 +2145,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = amdgpu_vcn_ring_reset,
};
static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -2036,8 +2161,6 @@ static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
else
adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs;
adev->vcn.inst[i].ring_dec.me = i;
- DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i,
- DEC_SW_RING_ENABLED?"(Software Ring)":"");
}
}
@@ -2049,18 +2172,28 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
adev->vcn.inst[i].ring_enc[j].me = i;
}
- if (adev->vcn.num_enc_rings > 0)
- DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
}
}
-static bool vcn_v3_0_is_idle(void *handle)
+static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst)
+{
+ int r;
+
+ r = vcn_v3_0_stop(vinst);
+ if (r)
+ return r;
+ vcn_v3_0_enable_clock_gating(vinst);
+ vcn_v3_0_enable_static_power_gating(vinst);
+ return vcn_v3_0_start(vinst);
+}
+
+static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -2073,9 +2206,9 @@ static bool vcn_v3_0_is_idle(void *handle)
return ret;
}
-static int vcn_v3_0_wait_for_idle(void *handle)
+static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -2091,54 +2224,55 @@ static int vcn_v3_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v3_0_set_clockgating_state(void *handle,
+static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
if (adev->vcn.harvest_config & (1 << i))
continue;
if (enable) {
if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v3_0_enable_clock_gating(adev, i);
+ vcn_v3_0_enable_clock_gating(vinst);
} else {
- vcn_v3_0_disable_clock_gating(adev, i);
+ vcn_v3_0_disable_clock_gating(vinst);
}
}
return 0;
}
-static int vcn_v3_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v3_0_stop(adev);
+ ret = vcn_v3_0_stop(vinst);
else
- ret = vcn_v3_0_start(adev);
+ ret = vcn_v3_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
@@ -2203,7 +2337,7 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
}
}
@@ -2211,7 +2345,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
.name = "vcn_v3_0",
.early_init = vcn_v3_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v3_0_sw_init,
.sw_fini = vcn_v3_0_sw_fini,
.hw_init = vcn_v3_0_hw_init,
@@ -2220,12 +2353,10 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
.resume = vcn_v3_0_resume,
.is_idle = vcn_v3_0_is_idle,
.wait_for_idle = vcn_v3_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v3_0_set_clockgating_state,
- .set_powergating_state = vcn_v3_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v3_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 29164289c5f3..3ae666522d57 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -46,12 +46,49 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_HARVEST_MMSCH 0
#define RDECODE_MSG_CREATE 0x00000000
#define RDECODE_MESSAGE_CREATE 0x00000001
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
@@ -60,25 +97,25 @@ static int amdgpu_ih_clientid_vcns[] = {
static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
/**
* vcn_v4_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v4_0_early_init(void *handle)
+static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
@@ -90,45 +127,77 @@ static int vcn_v4_0_early_init(void *handle)
}
}
- /* re-use enc ring as unified ring */
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
vcn_v4_0_set_unified_ring_funcs(adev);
vcn_v4_0_set_irq_funcs(adev);
vcn_v4_0_set_ras_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) ==
+ IP_VERSION(4, 0, 2)) {
+ fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+ fw_shared->drm_key_wa.method =
+ AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+ }
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+ return 0;
}
/**
* vcn_v4_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v4_0_sw_init(void *handle)
+static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
-
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
-
if (adev->vcn.harvest_config & (1 << i))
continue;
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
/* Init instance 0 sched_score to 1, so it's scheduled after other instances */
if (i == 0)
atomic_set(&adev->vcn.inst[i].sched_score, 1);
@@ -150,7 +219,8 @@ static int vcn_v4_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev))
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i *
+ (adev->vcn.inst[i].num_enc_rings + 1) + 1;
else
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
ring->vm_hub = AMDGPU_MMHUB0(0);
@@ -161,58 +231,54 @@ static int vcn_v4_0_sw_init(void *handle)
if (r)
return r;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
- fw_shared->sq.is_enabled = 1;
+ vcn_v4_0_fw_shared_init(adev, i);
- fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
- fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
- AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
-
- if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 2)) {
- fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
- fw_shared->drm_key_wa.method =
- AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
- }
-
- if (amdgpu_sriov_vf(adev))
- fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
-
- if (amdgpu_vcnfw_log)
- amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
}
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
r = amdgpu_vcn_ras_sw_init(adev);
if (r)
return r;
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0, ARRAY_SIZE(vcn_reg_list_4_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* vcn_v4_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v4_0_sw_fini(void *handle)
+static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -228,32 +294,37 @@ static int vcn_v4_0_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
}
/**
* vcn_v4_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v4_0_hw_init(void *handle)
+static int vcn_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
if (amdgpu_sriov_vf(adev)) {
r = vcn_v4_0_start_sriov(adev);
if (r)
- goto done;
+ return r;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
@@ -264,7 +335,6 @@ static int vcn_v4_0_hw_init(void *handle)
ring->wptr_old = 0;
vcn_v4_0_unified_ring_set_wptr(ring);
ring->sched.ready = true;
-
}
} else {
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -278,45 +348,42 @@ static int vcn_v4_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
-
+ return r;
}
}
-done:
- if (!r)
- DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
- return r;
+ return 0;
}
/**
* vcn_v4_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v4_0_hw_fini(void *handle)
+static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, regUVD_STATUS))) {
- vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
}
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
- amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0);
}
return 0;
@@ -325,41 +392,47 @@ static int vcn_v4_0_hw_fini(void *handle)
/**
* vcn_v4_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v4_0_suspend(void *handle)
+static int vcn_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v4_0_hw_fini(adev);
+ r = vcn_v4_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v4_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v4_0_resume(void *handle)
+static int vcn_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v4_0_hw_init(adev);
+ r = vcn_v4_0_hw_init(ip_block);
return r;
}
@@ -367,17 +440,18 @@ static int vcn_v4_0_resume(void *handle)
/**
* vcn_v4_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -427,17 +501,19 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
/**
* vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Let the VCN memory controller know it's offsets with dpg mode
*/
-static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -528,19 +604,21 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* VCN global tiling registers */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
- VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
}
/**
* vcn_v4_0_disable_static_power_gating - disable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable static power gating for VCN block
*/
-static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -599,13 +677,14 @@ static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int
/**
* vcn_v4_0_enable_static_power_gating - enable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable static power gating for VCN block
*/
-static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -654,13 +733,14 @@ static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int
/**
* vcn_v4_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -765,16 +845,18 @@ static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
/**
* vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
* @sram_sel: sram select
- * @inst_idx: instance number index
* @indirect: indirectly write sram
*
* Disable clock gating for VCN block with dpg mode
*/
-static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
- int inst_idx, uint8_t indirect)
+static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -822,13 +904,14 @@ static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, u
/**
* vcn_v4_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -876,13 +959,13 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
-
- return;
}
-static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
+static void vcn_v4_0_enable_ras(struct amdgpu_vcn_inst *vinst,
bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
@@ -905,17 +988,19 @@ static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
/**
* vcn_v4_0_start_dpg_mode - VCN start with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Start VCN block with dpg mode
*/
-static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
+ int ret;
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
@@ -930,7 +1015,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v4_0_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v4_0_disable_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -978,7 +1063,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v4_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v4_0_mc_resume_dpg_mode(vinst, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
@@ -990,7 +1075,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
- vcn_v4_0_enable_ras(adev, inst_idx, indirect);
+ vcn_v4_0_enable_ras(vinst, indirect);
/* enable master interrupt */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@@ -998,8 +1083,13 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
- if (indirect)
- amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
@@ -1027,6 +1117,11 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
return 0;
}
@@ -1034,178 +1129,202 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
/**
* vcn_v4_0_start - VCN start
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Start VCN block
*/
-static int vcn_v4_0_start(struct amdgpu_device *adev)
+static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- /* disable VCN power gating */
- vcn_v4_0_disable_static_power_gating(adev, i);
-
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
-
- /*SW clock gating */
- vcn_v4_0_disable_clock_gating(adev, i);
-
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
-
- /* setup regUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup regUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v4_0_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- for (j = 0; j < 10; ++j) {
- uint32_t status;
-
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, i, regUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- if (amdgpu_emu_mode == 1)
- msleep(1);
- }
+ /* disable VCN power gating */
+ vcn_v4_0_disable_static_power_gating(vinst);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v4_0_disable_clock_gating(vinst);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- if (amdgpu_emu_mode == 1) {
- r = -1;
- if (status & 2) {
- r = 0;
- break;
- }
- } else {
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
r = 0;
- if (status & 2)
- break;
-
- dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- mdelay(10);
- r = -1;
+ break;
}
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
}
+ }
- if (r) {
- dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
- return r;
- }
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
- ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
-
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
- WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
- ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
- }
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+ return 0;
+}
+
+static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc)
+{
+ struct amdgpu_vcn_rb_metadata *rb_metadata = NULL;
+ uint8_t *rb_ptr = (uint8_t *)ring_enc->ring;
+
+ rb_ptr += ring_enc->ring_size;
+ rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr;
+
+ memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata));
+ rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata);
+ rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+ rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+ rb_metadata->version = 1;
+ rb_metadata->ring_id = vcn_inst & 0xFF;
return 0;
}
@@ -1235,8 +1354,8 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
struct mmsch_v4_0_cmd_end end = { {0} };
struct mmsch_v4_0_init_header header;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
- volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
direct_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_WRITE;
@@ -1259,13 +1378,16 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
+ // Must re/init fw_shared at beginning
+ vcn_v4_0_fw_shared_init(adev, i);
+
table_size = 0;
MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
regUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
@@ -1332,11 +1454,30 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
rb_enc_addr = ring_enc->gpu_addr;
rb_setup->is_rb_enabled_flags |= RB_ENABLED;
- rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
- rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
- rb_setup->rb_size = ring_enc->ring_size / 4;
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+ if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {
+ vcn_v4_0_init_ring_metadata(adev, i, ring_enc);
+
+ memset((void *)&rb_setup->rb_info, 0, sizeof(struct amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP);
+ if (!(adev->vcn.harvest_config & (1 << 0))) {
+ rb_setup->rb_info[0].rb_addr_lo = lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[0].rb_addr_hi = upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[0].rb_size = adev->vcn.inst[0].ring_enc[0].ring_size / 4;
+ }
+ if (!(adev->vcn.harvest_config & (1 << 1))) {
+ rb_setup->rb_info[2].rb_addr_lo = lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[2].rb_addr_hi = upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr);
+ rb_setup->rb_info[2].rb_size = adev->vcn.inst[1].ring_enc[0].ring_size / 4;
+ }
+ fw_shared->decouple.is_enabled = 1;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG);
+ } else {
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ }
+
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
@@ -1421,17 +1562,18 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
/**
* vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
*
* Stop VCN block with dpg mode
*/
-static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
- vcn_v4_0_pause_dpg_mode(adev, inst_idx, &state);
+ vcn_v4_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1446,87 +1588,101 @@ static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
}
/**
* vcn_v4_0_stop - VCN stop
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Stop VCN block
*/
-static int vcn_v4_0_stop(struct amdgpu_device *adev)
+static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
uint32_t tmp;
- int i, r = 0;
+ int r = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- vcn_v4_0_stop_dpg_mode(adev, i);
- continue;
- }
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_stop_dpg_mode(vinst);
+ goto done;
+ }
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- /* disable LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* apply soft reset */
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- /* clear status */
- WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
- /* apply HW clock gating */
- vcn_v4_0_enable_clock_gating(adev, i);
+ /* apply HW clock gating */
+ vcn_v4_0_enable_clock_gating(vinst);
- /* enable VCN power gating */
- vcn_v4_0_enable_static_power_gating(adev, i);
- }
+ /* enable VCN power gating */
+ vcn_v4_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
return 0;
}
@@ -1534,15 +1690,16 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
/**
* vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @new_state: pause state
*
* Pause dpg mode for VCN block
*/
-static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
- struct dpg_pause_state *new_state)
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
int ret_code;
@@ -1644,15 +1801,19 @@ static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
-
- /* The create msg must be in the first IB submitted */
- if (atomic_read(&job->base.entity->fence_seq))
- return -EINVAL;
+ struct dma_fence *fence;
/* if VCN0 is harvested, we can't support AV1 */
if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
return -EINVAL;
+ /* wait for all jobs to finish before switching to instance 0 */
+ fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull);
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
[AMDGPU_RING_PRIO_0].sched;
drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
@@ -1743,22 +1904,16 @@ out:
#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
-
#define RADEON_VCN_ENGINE_INFO (0x30000001)
-#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16
-
#define RENCODE_ENCODE_STANDARD_AV1 2
#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
-#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64
-/* return the offset in ib if id is found, -1 otherwise
- * to speed up the searching we only search upto max_offset
- */
-static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
+/* return the offset in ib if id is found, -1 otherwise */
+static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start)
{
int i;
- for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
+ for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) {
if (ib->ptr[i + 1] == id)
return i;
}
@@ -1773,41 +1928,56 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_vcn_decode_buffer *decode_buffer;
uint64_t addr;
uint32_t val;
- int idx;
+ int idx = 0, sidx;
/* The first instance can decode anything */
if (!ring->me)
return 0;
- /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
- idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
- RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
- if (idx < 0) /* engine info is missing */
- return 0;
-
- val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
- if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
- decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
-
- if (!(decode_buffer->valid_buf_flag & 0x1))
- return 0;
-
- addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
- decode_buffer->msg_buffer_address_lo;
- return vcn_v4_0_dec_msg(p, job, addr);
- } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
- idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
- RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
- if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
- return vcn_v4_0_limit_sched(p, job);
+ while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) {
+ val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
+
+ if (!(decode_buffer->valid_buf_flag & 0x1))
+ return 0;
+
+ addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo;
+ return vcn_v4_0_dec_msg(p, job, addr);
+ } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
+ sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx);
+ if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1)
+ return vcn_v4_0_limit_sched(p, job);
+ }
+ idx += ib->ptr[idx] / 4;
}
return 0;
}
+static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = vcn_v4_0_stop(vinst);
+ if (r)
+ return r;
+ r = vcn_v4_0_start(vinst);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata),
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
@@ -1832,6 +2002,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_ring_reset,
};
/**
@@ -1849,27 +2020,25 @@ static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 2))
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 2))
vcn_v4_0_unified_ring_vm_funcs.secure_submission_supported = true;
adev->vcn.inst[i].ring_enc[0].funcs =
(const struct amdgpu_ring_funcs *)&vcn_v4_0_unified_ring_vm_funcs;
adev->vcn.inst[i].ring_enc[0].me = i;
-
- DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
}
}
/**
* vcn_v4_0_is_idle - check VCN block is idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block structure
*
* Check whether VCN block is idle
*/
-static bool vcn_v4_0_is_idle(void *handle)
+static bool vcn_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1885,13 +2054,13 @@ static bool vcn_v4_0_is_idle(void *handle)
/**
* vcn_v4_0_wait_for_idle - wait for VCN block idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Wait for VCN block idle
*/
-static int vcn_v4_0_wait_for_idle(void *handle)
+static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1910,86 +2079,66 @@ static int vcn_v4_0_wait_for_idle(void *handle)
/**
* vcn_v4_0_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+static int vcn_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
if (enable) {
if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v4_0_enable_clock_gating(adev, i);
+ vcn_v4_0_enable_clock_gating(vinst);
} else {
- vcn_v4_0_disable_clock_gating(adev, i);
+ vcn_v4_0_disable_clock_gating(vinst);
}
}
return 0;
}
-/**
- * vcn_v4_0_set_powergating_state - set VCN block powergating state
- *
- * @handle: amdgpu_device pointer
- * @state: power gating state
- *
- * Set VCN block powergating state
- */
-static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state)
+static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v4_0_stop(adev);
+ ret = vcn_v4_0_stop(vinst);
else
- ret = vcn_v4_0_start(adev);
+ ret = vcn_v4_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
/**
- * vcn_v4_0_set_interrupt_state - set VCN block interrupt state
- *
- * @adev: amdgpu_device pointer
- * @source: interrupt sources
- * @type: interrupt types
- * @state: interrupt states
- *
- * Set VCN block interrupt state
- */
-static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
- unsigned type, enum amdgpu_interrupt_state state)
-{
- return 0;
-}
-
-/**
* vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
*
* @adev: amdgpu_device pointer
@@ -2021,16 +2170,20 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
{
uint32_t ip_instance;
- switch (entry->client_id) {
- case SOC15_IH_CLIENTID_VCN:
- ip_instance = 0;
- break;
- case SOC15_IH_CLIENTID_VCN1:
- ip_instance = 1;
- break;
- default:
- DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
- return 0;
+ if (amdgpu_sriov_is_vcn_rb_decouple(adev)) {
+ ip_instance = entry->ring_id;
+ } else {
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
}
DRM_DEBUG("IH: VCN TRAP\n");
@@ -2049,7 +2202,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
}
static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
- .set = vcn_v4_0_set_interrupt_state,
.process = vcn_v4_0_process_interrupt,
};
@@ -2073,10 +2225,10 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
- adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs;
}
}
@@ -2084,7 +2236,6 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
.name = "vcn_v4_0",
.early_init = vcn_v4_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v4_0_sw_init,
.sw_fini = vcn_v4_0_sw_fini,
.hw_init = vcn_v4_0_hw_init,
@@ -2093,12 +2244,10 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
.resume = vcn_v4_0_resume,
.is_idle = vcn_v4_0_is_idle,
.wait_for_idle = vcn_v4_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_set_clockgating_state,
- .set_powergating_state = vcn_v4_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v4_0_ip_block = {
@@ -2156,7 +2305,7 @@ static struct amdgpu_vcn_ras vcn_v4_0_ras = {
static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[VCN_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 0):
adev->vcn.ras = &vcn_v4_0_ras;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index f85d18cd74ec..eacf4e93ba2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
#include "mmsch_v4_0_3.h"
#include "vcn/vcn_4_0_3_offset.h"
@@ -44,70 +45,156 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+#define NORMALIZE_VCN_REG_OFFSET(offset) \
+ (offset & 0x1FFFF)
static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_3_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
int inst_idx, bool indirect);
+
+static inline bool vcn_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
+{
+ return (adev->vcn.caps & AMDGPU_VCN_CAPS(RRMT_ENABLED)) == 0;
+}
+
/**
* vcn_v4_0_3_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int vcn_v4_0_3_early_init(void *handle)
+static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
- /* re-use enc ring as unified ring */
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
vcn_v4_0_3_set_unified_ring_funcs(adev);
vcn_v4_0_3_set_irq_funcs(adev);
vcn_v4_0_3_set_ras_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_3_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ if (amdgpu_dpm_reset_vcn_is_supported(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ return 0;
+}
+
+static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+ return 0;
}
/**
* vcn_v4_0_3_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v4_0_3_sw_init(void *handle)
+static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r, vcn_inst;
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
-
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
if (r)
return r;
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq);
+
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
vcn_inst = GET_INST(VCN, i);
@@ -125,18 +212,20 @@ static int vcn_v4_0_3_sw_init(void *handle)
ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
+
+ /* There are no per-instance irq source IDs on 4.0.3, the IH
+ * packets use a separate field to differentiate instances.
+ */
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0,
AMDGPU_RING_PRIO_DEFAULT,
&adev->vcn.inst[i].sched_score);
if (r)
return r;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
- fw_shared->sq.is_enabled = true;
+ vcn_v4_0_3_fw_shared_init(adev, i);
- if (amdgpu_vcnfw_log)
- amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
}
if (amdgpu_sriov_vf(adev)) {
@@ -145,9 +234,6 @@ static int vcn_v4_0_3_sw_init(void *handle)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
-
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
r = amdgpu_vcn_ras_sw_init(adev);
if (r) {
@@ -156,24 +242,28 @@ static int vcn_v4_0_3_sw_init(void *handle)
}
}
- return 0;
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_3, ARRAY_SIZE(vcn_reg_list_4_0_3));
+ if (r)
+ return r;
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
}
/**
* vcn_v4_0_3_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v4_0_3_sw_fini(void *handle)
+static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(&adev->ddev, &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->present_flag_0 = 0;
@@ -185,32 +275,63 @@ static int vcn_v4_0_3_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+static int vcn_v4_0_3_hw_init_inst(struct amdgpu_vcn_inst *vinst)
+{
+ int vcn_inst;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_ring *ring;
+ int inst_idx = vinst->inst;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ if (ring->use_doorbell) {
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst,
+ adev->vcn.inst[inst_idx].aid_id);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+ }
+
+ return 0;
}
/**
* vcn_v4_0_3_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v4_0_3_hw_init(void *handle)
+static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
- int i, r, vcn_inst;
+ struct amdgpu_vcn_inst *vinst;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
r = vcn_v4_0_3_start_sriov(adev);
if (r)
- goto done;
+ return r;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
ring = &adev->vcn.inst[i].ring_enc[0];
@@ -220,59 +341,55 @@ static int vcn_v4_0_3_hw_init(void *handle)
ring->sched.ready = true;
}
} else {
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) &
+ 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- vcn_inst = GET_INST(VCN, i);
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
ring = &adev->vcn.inst[i].ring_enc[0];
+ vinst = &adev->vcn.inst[i];
+ vcn_v4_0_3_hw_init_inst(vinst);
- if (ring->use_doorbell) {
- adev->nbio.funcs->vcn_doorbell_range(
- adev, ring->use_doorbell,
- (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- 9 * vcn_inst,
- adev->vcn.inst[i].aid_id);
-
- WREG32_SOC15(
- VCN, GET_INST(VCN, ring->me),
- regVCN_RB1_DB_CTRL,
- ring->doorbell_index
- << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
-
- /* Read DB_CTRL to flush the write DB_CTRL command. */
- RREG32_SOC15(
- VCN, GET_INST(VCN, ring->me),
- regVCN_RB1_DB_CTRL);
- }
+ /* Re-init fw_shared when RAS fatal error occurred */
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (!fw_shared->sq.is_enabled)
+ vcn_v4_0_3_fw_shared_init(adev, i);
r = amdgpu_ring_test_helper(ring);
if (r)
- goto done;
+ return r;
}
}
-done:
- if (!r)
- DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n",
- (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
-
return r;
}
/**
* vcn_v4_0_3_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v4_0_3_hw_fini(void *handle)
+static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
- if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
- vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0);
return 0;
}
@@ -280,41 +397,47 @@ static int vcn_v4_0_3_hw_fini(void *handle)
/**
* vcn_v4_0_3_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v4_0_3_suspend(void *handle)
+static int vcn_v4_0_3_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v4_0_3_hw_fini(adev);
+ r = vcn_v4_0_3_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v4_0_3_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v4_0_3_resume(void *handle)
+static int vcn_v4_0_3_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v4_0_3_hw_init(adev);
+ r = vcn_v4_0_3_hw_init(ip_block);
return r;
}
@@ -322,17 +445,18 @@ static int vcn_v4_0_3_resume(void *handle)
/**
* vcn_v4_0_3_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_3_mc_resume(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size, vcn_inst;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
vcn_inst = GET_INST(VCN, inst_idx);
@@ -396,18 +520,20 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
/**
* vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Let the VCN memory controller know it's offsets with dpg mode
*/
-static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -510,13 +636,14 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
/**
* vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t data;
int vcn_inst;
@@ -603,16 +730,18 @@ static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst
/**
* vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
* @sram_sel: sram select
- * @inst_idx: instance number index
* @indirect: indirectly write sram
*
* Disable clock gating for VCN block with dpg mode
*/
-static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
- int inst_idx, uint8_t indirect)
+static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -654,13 +783,14 @@ static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t data;
int vcn_inst;
@@ -705,18 +835,20 @@ static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_
/**
* vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Start VCN block with dpg mode
*/
-static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared =
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared =
adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
- int vcn_inst;
+ int vcn_inst, ret;
uint32_t tmp;
vcn_inst = GET_INST(VCN, inst_idx);
@@ -740,7 +872,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
}
/* enable clock gating */
- vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v4_0_3_disable_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -790,7 +922,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v4_0_3_mc_resume_dpg_mode(vinst, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
@@ -809,8 +941,13 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
VCN, 0, regUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
- if (indirect)
- amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
ring = &adev->vcn.inst[inst_idx].ring_enc[0];
@@ -842,6 +979,11 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
/*resetting done, fw can check RB ring */
fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
return 0;
}
@@ -870,8 +1012,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
struct mmsch_v4_0_cmd_end end = { {0} };
struct mmsch_v4_0_3_init_header header;
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
- volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
direct_wt.cmd_header.command_type =
MMSCH_COMMAND__DIRECT_REG_WRITE;
@@ -882,6 +1024,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
vcn_inst = GET_INST(VCN, i);
+ vcn_v4_0_3_fw_shared_init(adev, vcn_inst);
+
memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
@@ -894,7 +1038,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
@@ -1035,189 +1179,185 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
/**
* vcn_v4_0_3_start - VCN start
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Start VCN block
*/
-static int vcn_v4_0_3_start(struct amdgpu_device *adev)
+static int vcn_v4_0_3_start(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
- int i, j, k, r, vcn_inst;
+ int j, k, r, vcn_inst;
uint32_t tmp;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ vcn_inst = GET_INST(VCN, i);
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
+ UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
- vcn_inst = GET_INST(VCN, i);
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
- UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
-
- /*SW clock gating */
- vcn_v4_0_3_disable_clock_gating(adev, i);
-
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK,
- ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
-
- /* setup regUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
- WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
- tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup regUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v4_0_3_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* SW clock gating */
+ vcn_v4_0_3_disable_clock_gating(vinst);
- for (j = 0; j < 10; ++j) {
- uint32_t status;
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK,
+ ~UVD_VCPU_CNTL__CLK_EN_MASK);
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, vcn_inst,
- regUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- }
- r = 0;
- if (status & 2)
- break;
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- DRM_DEV_ERROR(adev->dev,
- "VCN decode not responding, trying to reset the VCPU!!!\n");
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
- regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
- regUVD_VCPU_CNTL),
- 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
+ tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_3_mc_resume(vinst);
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst,
+ regUVD_STATUS);
+ if (status & 2)
+ break;
mdelay(10);
- r = -1;
}
+ r = 0;
+ if (status & 2)
+ break;
- if (r) {
- DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
- return r;
- }
+ DRM_DEV_ERROR(adev->dev,
+ "VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ mdelay(10);
+ r = -1;
+ }
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
+ return r;
+ }
- ring = &adev->vcn.inst[i].ring_enc[0];
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* program the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
- upper_32_bits(ring->gpu_addr));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
- ring->ring_size / sizeof(uint32_t));
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- /* resetting ring, fw should not check RB ring */
- tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
- WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
- tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB_EN_MASK;
- WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
- ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
- fw_shared->sq.queue_mode &=
- cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ fw_shared->sq.queue_mode &=
+ cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
- }
return 0;
}
/**
* vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
*
* Stop VCN block with dpg mode
*/
-static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
int vcn_inst;
@@ -1237,106 +1377,114 @@ static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
return 0;
}
/**
* vcn_v4_0_3_stop - VCN stop
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Stop VCN block
*/
-static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
+static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
- int i, r = 0, vcn_inst;
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ int r = 0, vcn_inst;
uint32_t tmp;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- vcn_inst = GET_INST(VCN, i);
+ vcn_inst = GET_INST(VCN, i);
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- vcn_v4_0_3_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_3_stop_dpg_mode(vinst);
+ goto Done;
+ }
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
- UVD_STATUS__IDLE, 0x7);
- if (r)
- goto Done;
-
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
- tmp);
- if (r)
- goto Done;
-
- /* stall UMC channel */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
- tmp);
- if (r)
- goto Done;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
+ UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto Done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* stall UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
- /* Unblock VCPU Register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* Unblock VCPU Register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* reset LMI UMC/LMI/VCPU */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ /* reset LMI UMC/LMI/VCPU */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
- /* clear VCN status */
- WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+ /* clear VCN status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
- /* apply HW clock gating */
- vcn_v4_0_3_enable_clock_gating(adev, i);
- }
-Done:
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ /* apply HW clock gating */
+ vcn_v4_0_3_enable_clock_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+Done:
return 0;
}
/**
* vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @new_state: pause state
*
* Pause dpg mode for VCN block
*/
-static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
- struct dpg_pause_state *new_state)
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
return 0;
@@ -1380,6 +1528,51 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
regUVD_RB_WPTR);
}
+void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ /* Use normalized offsets when required */
+ if (vcn_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_VCN_REG_OFFSET(reg);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ /* Use normalized offsets when required */
+ if (vcn_v4_0_3_normalizn_reqd(ring->adev))
+ reg = NORMALIZE_VCN_REG_OFFSET(reg);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
+ vmid * hub->ctx_addr_distance,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
+
+void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ /* VCN engine access for HDP flush doesn't work when RRMT is enabled.
+ * This is a workaround to avoid any HDP flush through VCN ring.
+ */
+}
+
/**
* vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
*
@@ -1403,6 +1596,34 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
+static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r = 0;
+ int vcn_inst;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ vcn_inst = GET_INST(VCN, ring->me);
+ r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+ return r;
+ }
+
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ vcn_v4_0_3_hw_init_inst(vinst);
+ vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1419,7 +1640,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
.emit_ib = vcn_v2_0_enc_ring_emit_ib,
.emit_fence = vcn_v2_0_enc_ring_emit_fence,
- .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
+ .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
.test_ring = amdgpu_vcn_enc_ring_test_ring,
.test_ib = amdgpu_vcn_unified_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -1427,9 +1649,10 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_vcn_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
- .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_3_ring_reset,
};
/**
@@ -1450,19 +1673,18 @@ static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
adev->vcn.inst[i].aid_id =
vcn_inst / adev->vcn.num_inst_per_aid;
}
- DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n");
}
/**
* vcn_v4_0_3_is_idle - check VCN block is idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block structure
*
* Check whether VCN block is idle
*/
-static bool vcn_v4_0_3_is_idle(void *handle)
+static bool vcn_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1476,13 +1698,13 @@ static bool vcn_v4_0_3_is_idle(void *handle)
/**
* vcn_v4_0_3_wait_for_idle - wait for VCN block idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Wait for VCN block idle
*/
-static int vcn_v4_0_3_wait_for_idle(void *handle)
+static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1497,64 +1719,58 @@ static int vcn_v4_0_3_wait_for_idle(void *handle)
/* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_3_set_clockgating_state(void *handle,
+static int vcn_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (enable) {
if (RREG32_SOC15(VCN, GET_INST(VCN, i),
regUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v4_0_3_enable_clock_gating(adev, i);
+ vcn_v4_0_3_enable_clock_gating(vinst);
} else {
- vcn_v4_0_3_disable_clock_gating(adev, i);
+ vcn_v4_0_3_disable_clock_gating(vinst);
}
}
return 0;
}
-/**
- * vcn_v4_0_3_set_powergating_state - set VCN block powergating state
- *
- * @handle: amdgpu_device pointer
- * @state: power gating state
- *
- * Set VCN block powergating state
- */
-static int vcn_v4_0_3_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v4_0_3_stop(adev);
+ ret = vcn_v4_0_3_stop(vinst);
else
- ret = vcn_v4_0_3_start(adev);
+ ret = vcn_v4_0_3_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
@@ -1620,11 +1836,24 @@ static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int vcn_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
.set = vcn_v4_0_3_set_interrupt_state,
.process = vcn_v4_0_3_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs vcn_v4_0_3_ras_irq_funcs = {
+ .set = vcn_v4_0_3_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
/**
* vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
*
@@ -1640,12 +1869,15 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
adev->vcn.inst->irq.num_types++;
}
adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
+
+ adev->vcn.inst->ras_poison_irq.num_types = 1;
+ adev->vcn.inst->ras_poison_irq.funcs = &vcn_v4_0_3_ras_irq_funcs;
}
static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
.name = "vcn_v4_0_3",
.early_init = vcn_v4_0_3_early_init,
- .late_init = NULL,
+ .late_init = vcn_v4_0_3_late_init,
.sw_init = vcn_v4_0_3_sw_init,
.sw_fini = vcn_v4_0_3_sw_fini,
.hw_init = vcn_v4_0_3_hw_init,
@@ -1654,12 +1886,10 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
.resume = vcn_v4_0_3_resume,
.is_idle = vcn_v4_0_3_is_idle,
.wait_for_idle = vcn_v4_0_3_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
- .set_powergating_state = vcn_v4_0_3_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
@@ -1728,14 +1958,143 @@ static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
}
+static uint32_t vcn_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V4_0_3_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v4_0_3_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v4_0_3_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
.query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
.reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
+ .query_poison_status = vcn_v4_0_3_query_poison_status,
+};
+
+static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v4_0_3_err_codes[] = {
+ 14, 15, /* VCN */
};
+static bool vcn_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v4_0_3_err_codes,
+ ARRAY_SIZE(vcn_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = vcn_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v4_0_3_aca_bank_ops,
+};
+
+static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->vcn.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
.ras_block = {
.hw_ops = &vcn_v4_0_3_ras_hw_ops,
+ .ras_late_init = vcn_v4_0_3_ras_late_init,
},
};
@@ -1760,6 +2119,11 @@ static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
tmp, 0, indirect);
+ tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx,
+ SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2),
+ tmp, 0, indirect);
+
tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
WREG32_SOC15_DPG_MODE(inst_idx,
SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
index 0b046114373a..aeab89853a92 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
@@ -24,6 +24,21 @@
#ifndef __VCN_V4_0_3_H__
#define __VCN_V4_0_3_H__
+enum amdgpu_vcn_v4_0_3_sub_block {
+ AMDGPU_VCN_V4_0_3_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block;
+void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+
+void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring);
+
#endif /* __VCN_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
new file mode 100644
index 000000000000..b107ee80e472
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -0,0 +1,1723 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "mmsch_v4_0.h"
+#include "vcn_v4_0_5.h"
+
+#include "vcn/vcn_4_0_5_offset.h"
+#include "vcn/vcn_4_0_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+
+#include <drm/drm_drv.h>
+
+#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL
+#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX
+#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA
+#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX
+
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
+#define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000)
+#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000)
+
+#define VCN_HARVEST_MMSCH 0
+
+#define RDECODE_MSG_CREATE 0x00000000
+#define RDECODE_MESSAGE_CREATE 0x00000001
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v4_0_5_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v4_0_5_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6))
+ adev->vcn.per_inst_fw = true;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+ vcn_v4_0_5_set_unified_ring_funcs(adev);
+ vcn_v4_0_5_set_irq_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_5_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (amdgpu_sriov_vf(adev))
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ i * (adev->vcn.inst[i].num_enc_rings + 1) + 1;
+ else
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 2 + 8 * i;
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_unified_%d", i);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG);
+ fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ?
+ AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU;
+
+ if (amdgpu_sriov_vf(adev))
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+ fw_shared->drm_key_wa.method =
+ AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode;
+ }
+
+ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_5, ARRAY_SIZE(vcn_reg_list_4_0_5));
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_sw_fini(adev, i);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v4_0_5_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v4_0_5_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v4_0_5_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v4_0_5_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v4_0_5_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v4_0_5_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v4_0_5_disable_static_power_gating - disable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable static power gating for VCN block
+ */
+static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 0, UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ }
+
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+}
+
+/**
+ * vcn_v4_0_5_enable_static_power_gating - enable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable static power gating for VCN block
+ */
+static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG,
+ 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ }
+}
+
+/**
+ * vcn_v4_0_5_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* VCN disable CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_5_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @vinst: VCN instance
+ * @sram_sel: sram select
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable sw clock gating control */
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * vcn_v4_0_5_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ return;
+
+ /* enable VCN CGC */
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
+}
+
+/**
+ * vcn_v4_0_5_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int ret;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v4_0_5_disable_clock_gating_dpg_mode(vinst, 0, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v4_0_5_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+
+/**
+ * vcn_v4_0_5_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ /* disable VCN power gating */
+ vcn_v4_0_5_disable_static_power_gating(vinst);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /* SW clock gating */
+ vcn_v4_0_5_disable_clock_gating(vinst);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_5_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+}
+
+/**
+ * vcn_v4_0_5_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_5_stop_dpg_mode(vinst);
+ r = 0;
+ goto done;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+
+ /* apply HW clock gating */
+ vcn_v4_0_5_enable_clock_gating(vinst);
+
+ /* enable VCN power gating */
+ vcn_v4_0_5_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+done:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+
+ return r;
+}
+
+/**
+ * vcn_v4_0_5_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v4_0_5_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v4_0_5_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v4_0_5_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v4_0_5_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v4_0_5_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = vcn_v4_0_5_stop(vinst);
+ if (r)
+ return r;
+ r = vcn_v4_0_5_start(vinst);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v4_0_5_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_5_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_5_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_5_ring_reset,
+};
+
+/**
+ * vcn_v4_0_5_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5))
+ vcn_v4_0_5_unified_ring_vm_funcs.secure_submission_supported = true;
+
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_5_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ }
+}
+
+/**
+ * vcn_v4_0_5_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_5_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v4_0_5_enable_clock_gating(vinst);
+ } else {
+ vcn_v4_0_5_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ int ret = 0;
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v4_0_5_stop(vinst);
+ else
+ ret = vcn_v4_0_5_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v4_0_5_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_4_0__SRCID_UVD_POISON:
+ amdgpu_vcn_process_poison_irq(adev, source, entry);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = {
+ .process = vcn_v4_0_5_process_interrupt,
+};
+
+/**
+ * vcn_v4_0_5_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v4_0_5_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = {
+ .name = "vcn_v4_0_5",
+ .early_init = vcn_v4_0_5_early_init,
+ .sw_init = vcn_v4_0_5_sw_init,
+ .sw_fini = vcn_v4_0_5_sw_fini,
+ .hw_init = vcn_v4_0_5_hw_init,
+ .hw_fini = vcn_v4_0_5_hw_fini,
+ .suspend = vcn_v4_0_5_suspend,
+ .resume = vcn_v4_0_5_resume,
+ .is_idle = vcn_v4_0_5_is_idle,
+ .wait_for_idle = vcn_v4_0_5_wait_for_idle,
+ .set_clockgating_state = vcn_v4_0_5_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 4,
+ .minor = 0,
+ .rev = 5,
+ .funcs = &vcn_v4_0_5_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h
new file mode 100644
index 000000000000..ff9b3d6f6a47
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V4_0_5_H__
+#define __VCN_V4_0_5_H__
+
+enum amdgpu_vcn_v4_0_5_sub_block {
+ AMDGPU_VCN_V4_0_5_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V4_0_5_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block;
+
+#endif /* __VCN_V4_0_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
new file mode 100644
index 000000000000..0202df5db1e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -0,0 +1,1442 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+
+#include <drm/drm_drv.h>
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+static int amdgpu_ih_clientid_vcns[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v5_0_0_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v5_0_0_set_unified_ring_funcs(adev);
+ vcn_v5_0_0_set_irq_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_ring *ring;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
+
+ ring->vm_hub = AMDGPU_MMHUB0(0);
+ sprintf(ring->name, "vcn_unified_%d", i);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode;
+ }
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ if (!amdgpu_sriov_vf(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0, ARRAY_SIZE(vcn_reg_list_5_0));
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v5_0_0_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_0_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_0_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v5_0_0_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_0_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_0_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_0_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
+
+ return;
+}
+
+/**
+ * vcn_v5_0_0_disable_static_power_gating - disable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable static power gating for VCN block
+ */
+static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ } else {
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+
+ data = 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+ }
+
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+ return;
+}
+
+/**
+ * vcn_v5_0_0_enable_static_power_gating - enable VCN static power gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable static power gating for VCN block
+ */
+static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t data;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK);
+
+ data = 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT;
+ WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS,
+ 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT,
+ UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK);
+ }
+ return;
+}
+
+/**
+ * vcn_v5_0_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ return;
+}
+
+#if 0
+/**
+ * vcn_v5_0_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
+ *
+ * @vinst: VCN instance
+ * @sram_sel: sram select
+ * @indirect: indirectly write sram
+ *
+ * Disable clock gating for VCN block with dpg mode
+ */
+static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
+{
+ return;
+}
+#endif
+
+/**
+ * vcn_v5_0_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+ return;
+}
+
+/**
+ * vcn_v5_0_0_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int ret;
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_0_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, inst_idx, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
+ if (ret) {
+ dev_err(adev->dev, "%s: vcn sram load failed %d\n", __func__, ret);
+ return ret;
+ }
+ }
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ /* disable VCN power gating */
+ vcn_v5_0_0_disable_static_power_gating(vinst);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_0_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+ uint32_t tmp;
+
+ vcn_v5_0_0_pause_dpg_mode(vinst, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
+ return;
+}
+
+/**
+ * vcn_v5_0_0_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_0_stop_dpg_mode(vinst);
+ r = 0;
+ goto done;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+
+ /* enable VCN power gating */
+ vcn_v5_0_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+done:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_0_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d",
+ adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_0_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_0_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_0_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+ int r;
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+ r = vcn_v5_0_0_stop(vinst);
+ if (r)
+ return r;
+ r = vcn_v5_0_0_start(vinst);
+ if (r)
+ return r;
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_0_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_0_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_0_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v5_0_0_ring_reset,
+};
+
+/**
+ * vcn_v5_0_0_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_0_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ }
+}
+
+/**
+ * vcn_v5_0_0_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE);
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_0_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_0_enable_clock_gating(vinst);
+ } else {
+ vcn_v5_0_0_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ int ret = 0;
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_0_stop(vinst);
+ else
+ ret = vcn_v5_0_0_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_0_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_5_0__SRCID_UVD_POISON:
+ amdgpu_vcn_process_poison_irq(adev, source, entry);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_0_irq_funcs = {
+ .process = vcn_v5_0_0_process_interrupt,
+};
+
+/**
+ * vcn_v5_0_0_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
+ adev->vcn.inst[i].irq.funcs = &vcn_v5_0_0_irq_funcs;
+ }
+}
+
+static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = {
+ .name = "vcn_v5_0_0",
+ .early_init = vcn_v5_0_0_early_init,
+ .sw_init = vcn_v5_0_0_sw_init,
+ .sw_fini = vcn_v5_0_0_sw_fini,
+ .hw_init = vcn_v5_0_0_hw_init,
+ .hw_fini = vcn_v5_0_0_hw_fini,
+ .suspend = vcn_v5_0_0_suspend,
+ .resume = vcn_v5_0_0_resume,
+ .is_idle = vcn_v5_0_0_is_idle,
+ .wait_for_idle = vcn_v5_0_0_wait_for_idle,
+ .set_clockgating_state = vcn_v5_0_0_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v5_0_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
new file mode 100644
index 000000000000..51bbccd4360f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V5_0_0_H__
+#define __VCN_V5_0_0_H__
+
+#define VCN_VID_SOC_ADDRESS 0x1FC00
+#define VCN_AON_SOC_ADDRESS 0x1F800
+#define VCN1_VID_SOC_ADDRESS 0x48300
+#define VCN1_AON_SOC_ADDRESS 0x48000
+
+#define VCN_VID_IP_ADDRESS 0x0
+#define VCN_AON_IP_ADDRESS 0x30000
+
+extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block;
+
+#endif /* __VCN_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
new file mode 100644
index 000000000000..714350cabf2f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -0,0 +1,1727 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+
+#include <drm/drm_drv.h>
+
+static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0_1[] = {
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+ SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE)
+};
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+/**
+ * vcn_v5_0_1_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v5_0_1_set_unified_ring_funcs(adev);
+ vcn_v5_0_1_set_irq_funcs(adev);
+ vcn_v5_0_1_set_ras_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_1_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 12):
+ if ((adev->psp.sos.fw_version >= 0x00450025) && amdgpu_dpm_reset_vcn_is_supported(adev))
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+
+ if (fw_shared->sq.is_enabled)
+ return;
+ fw_shared->present_flag_0 =
+ cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+}
+
+/**
+ * vcn_v5_0_1_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 32 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ vcn_v5_0_1_fw_shared_init(adev, i);
+ }
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
+ r = amdgpu_vcn_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
+ return r;
+ }
+ }
+
+ r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0_1, ARRAY_SIZE(vcn_reg_list_5_0_1));
+ if (r)
+ return r;
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
+}
+
+/**
+ * vcn_v5_0_1_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ amdgpu_vcn_sw_fini(adev, i);
+
+ return 0;
+}
+
+static int vcn_v5_0_1_hw_init_inst(struct amdgpu_device *adev, int i)
+{
+ struct amdgpu_ring *ring;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst),
+ adev->vcn.inst[i].aid_id);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v5_0_1_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ } else {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ vcn_v5_0_1_hw_init_inst(adev, i);
+
+ /* Re-init fw_shared, if required */
+ vcn_v5_0_1_fw_shared_init(adev, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev))
+ amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (amdgpu_in_reset(adev))
+ vinst->cur_state = AMD_PG_STATE_GATE;
+
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t reg_data = 0;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, vinst->inst);
+
+ /* pause/unpause if state is changed */
+ if (vinst->pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n",
+ vinst->pause_state.fw_based, new_state->fw_based,
+ new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE");
+ reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+ } else {
+ /* unpause DPG, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+ }
+ vinst->pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+
+/**
+ * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE};
+ int vcn_inst, ret;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_1_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect) {
+ ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+ if (ret) {
+ dev_err(adev->dev, "vcn sram load failed %d\n", ret);
+ return ret;
+ }
+ }
+
+ /* resetting ring, fw should not check RB ring */
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Pause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ vcn_v5_0_1_fw_shared_init(adev, vcn_inst);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+ offset = 0;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET1), 0);
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET2), 0);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ MMSCH_V5_0_INSERT_END();
+
+ header.vcn0.init_status = 0;
+ header.vcn0.table_offset = header.total_size;
+ header.vcn0.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Send init table to mmsch */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r, vcn_inst;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ vcn_inst = GET_INST(VCN, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_1_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(100);
+ if (amdgpu_emu_mode == 1)
+ msleep(20);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+ int vcn_inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Unpause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+}
+
+/**
+ * vcn_v5_0_1_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0, vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_1_stop_dpg_mode(vinst);
+ return 0;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring,
+ unsigned int vmid,
+ struct amdgpu_fence *timedout_fence)
+{
+ int r = 0;
+ int vcn_inst;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+ vcn_inst = GET_INST(VCN, ring->me);
+ r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+ return r;
+ }
+
+ vcn_v5_0_1_hw_init_inst(adev, ring->me);
+ vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram);
+
+ return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_1_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_1_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_1_unified_ring_set_wptr,
+ .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 +
+ 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
+ .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v5_0_1_ring_reset,
+};
+
+/**
+ * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+}
+
+/**
+ * vcn_v5_0_1_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_1_enable_clock_gating(vinst);
+ } else {
+ vcn_v5_0_1_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_1_stop(vinst);
+ else
+ ret = vcn_v5_0_1_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = {
+ .process = vcn_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_ras_irq_funcs = {
+ .set = vcn_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+
+/**
+ * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ adev->vcn.inst->irq.num_types++;
+
+ adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs;
+
+ adev->vcn.inst->ras_poison_irq.num_types = 1;
+ adev->vcn.inst->ras_poison_irq.funcs = &vcn_v5_0_1_ras_irq_funcs;
+
+}
+
+static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = {
+ .name = "vcn_v5_0_1",
+ .early_init = vcn_v5_0_1_early_init,
+ .late_init = vcn_v5_0_1_late_init,
+ .sw_init = vcn_v5_0_1_sw_init,
+ .sw_fini = vcn_v5_0_1_sw_fini,
+ .hw_init = vcn_v5_0_1_hw_init,
+ .hw_fini = vcn_v5_0_1_hw_fini,
+ .suspend = vcn_v5_0_1_suspend,
+ .resume = vcn_v5_0_1_resume,
+ .is_idle = vcn_v5_0_1_is_idle,
+ .wait_for_idle = vcn_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v5_0_1_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = amdgpu_vcn_dump_ip_state,
+ .print_ip_state = amdgpu_vcn_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &vcn_v5_0_1_ip_funcs,
+};
+
+static uint32_t vcn_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V5_0_1_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v5_0_1_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops vcn_v5_0_1_ras_hw_ops = {
+ .query_poison_status = vcn_v5_0_1_query_poison_status,
+};
+
+static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v5_0_1_err_codes[] = {
+ 14, 15, 47, /* VCN [D|V|S] */
+};
+
+static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v5_0_1_err_codes,
+ ARRAY_SIZE(vcn_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = vcn_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v5_0_1_aca_bank_ops,
+};
+
+static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->vcn.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_vcn_ras vcn_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v5_0_1_ras_hw_ops,
+ .ras_late_init = vcn_v5_0_1_ras_late_init,
+ },
+};
+
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ras = &vcn_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
new file mode 100644
index 000000000000..b72e4da68317
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_v5_0_1_H__
+#define __VCN_v5_0_1_H__
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+
+enum amdgpu_vcn_v5_0_1_sub_block {
+ AMDGPU_VCN_V5_0_1_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block;
+
+#endif /* __VCN_v5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index d364c6dd152c..eb16916c6473 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -364,15 +364,20 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -466,18 +471,18 @@ static void vega10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &vega10_ih_self_irq_funcs;
}
-static int vega10_ih_early_init(void *handle)
+static int vega10_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega10_ih_set_interrupt_funcs(adev);
vega10_ih_set_self_irq_funcs(adev);
return 0;
}
-static int vega10_ih_sw_init(void *handle)
+static int vega10_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -519,58 +524,50 @@ static int vega10_ih_sw_init(void *handle)
return r;
}
-static int vega10_ih_sw_fini(void *handle)
+static int vega10_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int vega10_ih_hw_init(void *handle)
+static int vega10_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_irq_init(adev);
+ return vega10_ih_irq_init(ip_block->adev);
}
-static int vega10_ih_hw_fini(void *handle)
+static int vega10_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- vega10_ih_irq_disable(adev);
+ vega10_ih_irq_disable(ip_block->adev);
return 0;
}
-static int vega10_ih_suspend(void *handle)
+static int vega10_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_hw_fini(adev);
+ return vega10_ih_hw_fini(ip_block);
}
-static int vega10_ih_resume(void *handle)
+static int vega10_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_hw_init(adev);
+ return vega10_ih_hw_init(ip_block);
}
-static bool vega10_ih_is_idle(void *handle)
+static bool vega10_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int vega10_ih_wait_for_idle(void *handle)
+static int vega10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int vega10_ih_soft_reset(void *handle)
+static int vega10_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -607,10 +604,10 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega10_ih_set_clockgating_state(void *handle,
+static int vega10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -618,7 +615,7 @@ static int vega10_ih_set_clockgating_state(void *handle,
}
-static int vega10_ih_set_powergating_state(void *handle,
+static int vega10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -627,7 +624,6 @@ static int vega10_ih_set_powergating_state(void *handle,
const struct amd_ip_funcs vega10_ih_ip_funcs = {
.name = "vega10_ih",
.early_init = vega10_ih_early_init,
- .late_init = NULL,
.sw_init = vega10_ih_sw_init,
.sw_fini = vega10_ih_sw_fini,
.hw_init = vega10_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index dbc99536440f..85846fd08ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -114,6 +114,33 @@ static int vega20_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
+ if (enable) {
+ /* Unset the CLEAR_OVERFLOW bit to make sure the next step
+ * is switching the bit from 0 to 1
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Clear RB_OVERFLOW bit */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ }
+
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
@@ -291,27 +318,30 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
adev->nbio.funcs->ih_control(adev);
- if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 2, 1)) &&
- adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
- if (adev->irq.ih.use_bus_addr) {
- ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
- MC_SPACE_GPA_ENABLE, 1);
+ if (!amdgpu_sriov_vf(adev)) {
+ if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) &&
+ adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
}
- WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
- }
- /* psp firmware won't program IH_CHICKEN for aldebaran
- * driver needs to program it properly according to
- * MC_SPACE type in IH_RB_CNTL */
- if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) ||
- (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))) {
- ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);
- if (adev->irq.ih.use_bus_addr) {
- ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
- MC_SPACE_GPA_ENABLE, 1);
+ /* psp firmware won't program IH_CHICKEN for aldebaran
+ * driver needs to program it properly according to
+ * MC_SPACE type in IH_RB_CNTL */
+ if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) ||
+ (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2)) ||
+ (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5))) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken);
}
- WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken);
}
for (i = 0; i < ARRAY_SIZE(ih); i++) {
@@ -320,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
if (ret)
return ret;
}
+ ih[i]->overflow = false;
}
if (!amdgpu_sriov_vf(adev))
@@ -334,8 +365,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index));
/* Enable IH Retry CAM */
- if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0) ||
- adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5))
WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN,
ENABLE, 1);
else
@@ -405,22 +438,30 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
- wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ if (!amdgpu_sriov_vf(adev))
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ else
+ ih->overflow = true;
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 32). Hopefully
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+
out:
return (wptr & ih->ptr_mask);
}
@@ -516,18 +557,18 @@ static void vega20_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &vega20_ih_self_irq_funcs;
}
-static int vega20_ih_early_init(void *handle)
+static int vega20_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega20_ih_set_interrupt_funcs(adev);
vega20_ih_set_self_irq_funcs(adev);
return 0;
}
-static int vega20_ih_sw_init(void *handle)
+static int vega20_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr = true;
int r;
@@ -537,7 +578,7 @@ static int vega20_ih_sw_init(void *handle)
return r;
if ((adev->flags & AMD_IS_APU) &&
- (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
+ (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2)))
use_bus_addr = false;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
@@ -554,7 +595,8 @@ static int vega20_ih_sw_init(void *handle)
adev->irq.ih1.use_doorbell = true;
adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
- if (adev->ip_versions[OSSSYS_HWIP][0] != IP_VERSION(4, 4, 2)) {
+ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 2) &&
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 5)) {
r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true);
if (r)
return r;
@@ -575,19 +617,19 @@ static int vega20_ih_sw_init(void *handle)
return r;
}
-static int vega20_ih_sw_fini(void *handle)
+static int vega20_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int vega20_ih_hw_init(void *handle)
+static int vega20_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = vega20_ih_irq_init(adev);
if (r)
@@ -596,42 +638,36 @@ static int vega20_ih_hw_init(void *handle)
return 0;
}
-static int vega20_ih_hw_fini(void *handle)
+static int vega20_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- vega20_ih_irq_disable(adev);
+ vega20_ih_irq_disable(ip_block->adev);
return 0;
}
-static int vega20_ih_suspend(void *handle)
+static int vega20_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega20_ih_hw_fini(adev);
+ return vega20_ih_hw_fini(ip_block);
}
-static int vega20_ih_resume(void *handle)
+static int vega20_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega20_ih_hw_init(adev);
+ return vega20_ih_hw_init(ip_block);
}
-static bool vega20_ih_is_idle(void *handle)
+static bool vega20_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int vega20_ih_wait_for_idle(void *handle)
+static int vega20_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int vega20_ih_soft_reset(void *handle)
+static int vega20_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -665,10 +701,10 @@ static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega20_ih_set_clockgating_state(void *handle,
+static int vega20_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega20_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -676,7 +712,7 @@ static int vega20_ih_set_clockgating_state(void *handle,
}
-static int vega20_ih_set_powergating_state(void *handle,
+static int vega20_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -685,7 +721,6 @@ static int vega20_ih_set_powergating_state(void *handle,
const struct amd_ip_funcs vega20_ih_ip_funcs = {
.name = "vega20_ih",
.early_init = vega20_ih_early_init,
- .late_init = NULL,
.sw_init = vega20_ih_sw_init,
.sw_fini = vega20_ih_sw_fini,
.hw_init = vega20_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 6a8494f98d3e..a611a7345125 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -67,7 +67,6 @@
#include "sdma_v2_4.h"
#include "sdma_v3_0.h"
#include "dce_v10_0.h"
-#include "dce_v11_0.h"
#include "iceland_ih.h"
#include "tonga_ih.h"
#include "cz_ih.h"
@@ -136,15 +135,15 @@ static const struct amdgpu_video_codec_info polaris_video_codecs_encode_array[]
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
.max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
.max_level = 0,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
.max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
.max_level = 0,
},
};
@@ -167,16 +166,16 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] =
{
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 3,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 5,
},
{
@@ -188,9 +187,9 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] =
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 4,
},
};
@@ -206,16 +205,16 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
{
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 3,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 5,
},
{
@@ -227,9 +226,9 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 4,
},
{
@@ -897,7 +896,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool vi_asic_supports_baco(struct amdgpu_device *adev)
+static int vi_asic_supports_baco(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_FIJI:
@@ -908,14 +907,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev)
case CHIP_TOPAZ:
return amdgpu_dpm_is_baco_supported(adev);
default:
- return false;
+ return 0;
}
}
static enum amd_reset_method
vi_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset;
+ int baco_reset;
if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
@@ -935,7 +934,7 @@ vi_asic_reset_method(struct amdgpu_device *adev)
baco_reset = amdgpu_dpm_is_baco_supported(adev);
break;
default:
- baco_reset = false;
+ baco_reset = 0;
break;
}
@@ -1124,11 +1123,10 @@ static void vi_program_aspm(struct amdgpu_device *adev)
bool bL1SS = false;
bool bClkReqSupport = true;
- if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk())
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- if (adev->flags & AMD_IS_APU ||
- adev->asic_type < CHIP_POLARIS10)
+ if (adev->asic_type < CHIP_POLARIS10)
return;
orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
@@ -1456,9 +1454,9 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
#define CZ_REV_BRISTOL(rev) \
((rev >= 0xC8 && rev <= 0xCE) || (rev >= 0xE1 && rev <= 0xE6))
-static int vi_common_early_init(void *handle)
+static int vi_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU) {
adev->smc_rreg = &cz_smc_rreg;
@@ -1680,9 +1678,9 @@ static int vi_common_early_init(void *handle)
return 0;
}
-static int vi_common_late_init(void *handle)
+static int vi_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_vi_mailbox_get_irq(adev);
@@ -1690,9 +1688,9 @@ static int vi_common_late_init(void *handle)
return 0;
}
-static int vi_common_sw_init(void *handle)
+static int vi_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_vi_mailbox_add_irq_id(adev);
@@ -1700,14 +1698,9 @@ static int vi_common_sw_init(void *handle)
return 0;
}
-static int vi_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int vi_common_hw_init(void *handle)
+static int vi_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* move the golden regs per IP block */
vi_init_golden_registers(adev);
@@ -1719,9 +1712,9 @@ static int vi_common_hw_init(void *handle)
return 0;
}
-static int vi_common_hw_fini(void *handle)
+static int vi_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable the doorbell aperture */
vi_enable_doorbell_aperture(adev, false);
@@ -1732,35 +1725,21 @@ static int vi_common_hw_fini(void *handle)
return 0;
}
-static int vi_common_suspend(void *handle)
+static int vi_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vi_common_hw_fini(adev);
+ return vi_common_hw_fini(ip_block);
}
-static int vi_common_resume(void *handle)
+static int vi_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vi_common_hw_init(adev);
+ return vi_common_hw_init(ip_block);
}
-static bool vi_common_is_idle(void *handle)
+static bool vi_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int vi_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int vi_common_soft_reset(void *handle)
-{
- return 0;
-}
-
static void vi_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
@@ -1965,10 +1944,10 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
return 0;
}
-static int vi_common_set_clockgating_state(void *handle,
+static int vi_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -2008,15 +1987,15 @@ static int vi_common_set_clockgating_state(void *handle,
return 0;
}
-static int vi_common_set_powergating_state(void *handle,
+static int vi_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void vi_common_get_clockgating_state(void *handle, u64 *flags)
+static void vi_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2048,14 +2027,11 @@ static const struct amd_ip_funcs vi_common_ip_funcs = {
.early_init = vi_common_early_init,
.late_init = vi_common_late_init,
.sw_init = vi_common_sw_init,
- .sw_fini = vi_common_sw_fini,
.hw_init = vi_common_hw_init,
.hw_fini = vi_common_hw_fini,
.suspend = vi_common_suspend,
.resume = vi_common_resume,
.is_idle = vi_common_is_idle,
- .wait_for_idle = vi_common_wait_for_idle,
- .soft_reset = vi_common_soft_reset,
.set_clockgating_state = vi_common_set_clockgating_state,
.set_powergating_state = vi_common_set_powergating_state,
.get_clockgating_state = vi_common_get_clockgating_state,
@@ -2147,8 +2123,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
break;
@@ -2165,8 +2139,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)
@@ -2186,8 +2158,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
else if (amdgpu_device_has_dc_support(adev))
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
- else
- amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 80ce42aacc0c..b61f6b838ec2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -246,6 +246,7 @@
* 1 - Stream
* 2 - Bypass
*/
+#define EOP_EXEC (1 << 28) /* For Trailing Fence */
#define DATA_SEL(x) ((x) << 29)
/* 0 - discard
* 1 - send low 32bit data
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h
new file mode 100644
index 000000000000..47534dbbd137
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h
@@ -0,0 +1,218 @@
+/* Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __VPE_6_1_FW_IF_H_
+#define __VPE_6_1_FW_IF_H_
+
+/****************
+ * VPE OP Codes
+ ****************/
+enum VPE_CMD_OPCODE {
+ VPE_CMD_OPCODE_NOP = 0x0,
+ VPE_CMD_OPCODE_VPE_DESC = 0x1,
+ VPE_CMD_OPCODE_PLANE_CFG = 0x2,
+ VPE_CMD_OPCODE_VPEP_CFG = 0x3,
+ VPE_CMD_OPCODE_INDIRECT = 0x4,
+ VPE_CMD_OPCODE_FENCE = 0x5,
+ VPE_CMD_OPCODE_TRAP = 0x6,
+ VPE_CMD_OPCODE_REG_WRITE = 0x7,
+ VPE_CMD_OPCODE_POLL_REGMEM = 0x8,
+ VPE_CMD_OPCODE_COND_EXE = 0x9,
+ VPE_CMD_OPCODE_ATOMIC = 0xA,
+ VPE_CMD_OPCODE_PRED_EXE = 0xB,
+ VPE_CMD_OPCODE_COLLAB_SYNC = 0xC,
+ VPE_CMD_OPCODE_TIMESTAMP = 0xD
+};
+
+/** Generic Command Header
+ * Generic Commands include:
+ * Noop, Fence, Trap,
+ * RegisterWrite, PollRegisterWriteMemory,
+ * SetLocalTimestamp, GetLocalTimestamp
+ * GetGlobalGPUTimestamp */
+#define VPE_HEADER_SUB_OPCODE__SHIFT 8
+#define VPE_HEADER_SUB_OPCODE_MASK 0x0000FF00
+#define VPE_HEADER_OPCODE__SHIFT 0
+#define VPE_HEADER_OPCODE_MASK 0x000000FF
+
+#define VPE_CMD_HEADER(op, subop) \
+ (((subop << VPE_HEADER_SUB_OPCODE__SHIFT) & VPE_HEADER_SUB_OPCODE_MASK) | \
+ ((op << VPE_HEADER_OPCODE__SHIFT) & VPE_HEADER_OPCODE_MASK))
+
+
+ /***************************
+ * VPE NOP
+ ***************************/
+#define VPE_CMD_NOP_HEADER_COUNT__SHIFT 16
+#define VPE_CMD_NOP_HEADER_COUNT_MASK 0x00003FFF
+
+#define VPE_CMD_NOP_HEADER_COUNT(count) \
+ (((count) & VPE_CMD_NOP_HEADER_COUNT_MASK) << VPE_CMD_NOP_HEADER_COUNT__SHIFT)
+
+ /***************************
+ * VPE Descriptor
+ ***************************/
+#define VPE_DESC_CD__SHIFT 16
+#define VPE_DESC_CD_MASK 0x000F0000
+
+#define VPE_DESC_CMD_HEADER(cd) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPE_DESC, 0) | \
+ (((cd) << VPE_DESC_CD__SHIFT) & VPE_DESC_CD_MASK))
+
+ /***************************
+ * VPE Plane Config
+ ***************************/
+enum VPE_PLANE_CFG_SUBOP {
+ VPE_PLANE_CFG_SUBOP_1_TO_1 = 0x0,
+ VPE_PLANE_CFG_SUBOP_2_TO_1 = 0x1,
+ VPE_PLANE_CFG_SUBOP_2_TO_2 = 0x2
+};
+
+#define VPE_PLANE_CFG_ONE_PLANE 0
+#define VPE_PLANE_CFG_TWO_PLANES 1
+
+#define VPE_PLANE_CFG_NPS0__SHIFT 16
+#define VPE_PLANE_CFG_NPS0_MASK 0x00030000
+
+#define VPE_PLANE_CFG_NPD0__SHIFT 18
+#define VPE_PLANE_CFG_NPD0_MASK 0x000C0000
+
+#define VPE_PLANE_CFG_NPS1__SHIFT 20
+#define VPE_PLANE_CFG_NPS1_MASK 0x00300000
+
+#define VPE_PLANE_CFG_NPD1__SHIFT 22
+#define VPE_PLANE_CFG_NPD1_MASK 0x00C00000
+
+#define VPE_PLANE_CFG_TMZ__SHIFT 16
+#define VPE_PLANE_CFG_TMZ_MASK 0x00010000
+
+#define VPE_PLANE_CFG_SWIZZLE_MODE__SHIFT 3
+#define VPE_PLANE_CFG_SWIZZLE_MODE_MASK 0x000000F8
+
+#define VPE_PLANE_CFG_ROTATION__SHIFT 0
+#define VPE_PLANE_CFG_ROTATION_MASK 0x00000003
+
+#define VPE_PLANE_ADDR_LO__SHIFT 0
+#define VPE_PLANE_ADDR_LO_MASK 0xFFFFFF00
+
+#define VPE_PLANE_CFG_PITCH__SHIFT 0
+#define VPE_PLANE_CFG_PITCH_MASK 0x00003FFF
+
+#define VPE_PLANE_CFG_VIEWPORT_Y__SHIFT 16
+#define VPE_PLANE_CFG_VIEWPORT_Y_MASK 0x3FFF0000
+#define VPE_PLANE_CFG_VIEWPORT_X__SHIFT 0
+#define VPE_PLANE_CFG_VIEWPORT_X_MASK 0x00003FFF
+
+
+#define VPE_PLANE_CFG_VIEWPORT_HEIGHT__SHIFT 16
+#define VPE_PLANE_CFG_VIEWPORT_HEIGHT_MASK 0x1FFF0000
+#define VPE_PLANE_CFG_VIEWPORT_ELEMENT_SIZE__SHIFT 13
+#define VPE_PLANE_CFG_VIEWPORT_ELEMENT_SIZE_MASK 0x0000E000
+#define VPE_PLANE_CFG_VIEWPORT_WIDTH__SHIFT 0
+#define VPE_PLANE_CFG_VIEWPORT_WIDTH_MASK 0x00001FFF
+
+enum VPE_PLANE_CFG_ELEMENT_SIZE {
+ VPE_PLANE_CFG_ELEMENT_SIZE_8BPE = 0,
+ VPE_PLANE_CFG_ELEMENT_SIZE_16BPE = 1,
+ VPE_PLANE_CFG_ELEMENT_SIZE_32BPE = 2,
+ VPE_PLANE_CFG_ELEMENT_SIZE_64BPE = 3
+};
+
+#define VPE_PLANE_CFG_CMD_HEADER(subop, nps0, npd0, nps1, npd1) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_PLANE_CFG, subop) | \
+ (((nps0) << VPE_PLANE_CFG_NPS0__SHIFT) & VPE_PLANE_CFG_NPS0_MASK) | \
+ (((npd0) << VPE_PLANE_CFG_NPD0__SHIFT) & VPE_PLANE_CFG_NPD0_MASK) | \
+ (((nps1) << VPE_PLANE_CFG_NPS1__SHIFT) & VPE_PLANE_CFG_NPS1_MASK) | \
+ (((npd0) << VPE_PLANE_CFG_NPD1__SHIFT) & VPE_PLANE_CFG_NPD1_MASK))
+
+
+/************************
+ * VPEP Config
+ ************************/
+enum VPE_VPEP_CFG_SUBOP {
+ VPE_VPEP_CFG_SUBOP_DIR_CFG = 0x0,
+ VPE_VPEP_CFG_SUBOP_IND_CFG = 0x1
+};
+
+
+// Direct Config Command Header
+#define VPE_DIR_CFG_HEADER_ARRAY_SIZE__SHIFT 16
+#define VPE_DIR_CFG_HEADER_ARRAY_SIZE_MASK 0xFFFF0000
+
+#define VPE_DIR_CFG_CMD_HEADER(subop, arr_sz) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPEP_CFG, subop) | \
+ (((arr_sz) << VPE_DIR_CFG_HEADER_ARRAY_SIZE__SHIFT) & VPE_DIR_CFG_HEADER_ARRAY_SIZE_MASK))
+
+
+#define VPE_DIR_CFG_PKT_REGISTER_OFFSET__SHIFT 2
+#define VPE_DIR_CFG_PKT_REGISTER_OFFSET_MASK 0x000FFFFC
+
+#define VPE_DIR_CFG_PKT_DATA_SIZE__SHIFT 20
+#define VPE_DIR_CFG_PKT_DATA_SIZE_MASK 0xFFF00000
+
+
+// InDirect Config Command Header
+#define VPE_IND_CFG_HEADER_NUM_DST__SHIFT 28
+#define VPE_IND_CFG_HEADER_NUM_DST_MASK 0xF0000000
+
+#define VPE_IND_CFG_CMD_HEADER(subop, num_dst) \
+ (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPEP_CFG, subop) | \
+ (((num_dst) << VPE_IND_CFG_HEADER_NUM_DST__SHIFT) & VPE_IND_CFG_HEADER_NUM_DST_MASK))
+
+// Indirect Buffer Command Header
+#define VPE_CMD_INDIRECT_HEADER_VMID__SHIFT 16
+#define VPE_CMD_INDIRECT_HEADER_VMID_MASK 0x0000000F
+#define VPE_CMD_INDIRECT_HEADER_VMID(vmid) \
+ (((vmid) & VPE_CMD_INDIRECT_HEADER_VMID_MASK) << VPE_CMD_INDIRECT_HEADER_VMID__SHIFT)
+
+
+/**************************
+ * Poll Reg/Mem Sub-OpCode
+ **************************/
+enum VPE_POLL_REGMEM_SUBOP {
+ VPE_POLL_REGMEM_SUBOP_REGMEM = 0x0,
+ VPE_POLL_REGMEM_SUBOP_REGMEM_WRITE = 0x1
+};
+
+#define VPE_CMD_POLL_REGMEM_HEADER_FUNC__SHIFT 28
+#define VPE_CMD_POLL_REGMEM_HEADER_FUNC_MASK 0x00000007
+#define VPE_CMD_POLL_REGMEM_HEADER_FUNC(func) \
+ (((func) & VPE_CMD_POLL_REGMEM_HEADER_FUNC_MASK) << VPE_CMD_POLL_REGMEM_HEADER_FUNC__SHIFT)
+
+#define VPE_CMD_POLL_REGMEM_HEADER_MEM__SHIFT 31
+#define VPE_CMD_POLL_REGMEM_HEADER_MEM_MASK 0x00000001
+#define VPE_CMD_POLL_REGMEM_HEADER_MEM(mem) \
+ (((mem) & VPE_CMD_POLL_REGMEM_HEADER_MEM_MASK) << VPE_CMD_POLL_REGMEM_HEADER_MEM__SHIFT)
+
+#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL__SHIFT 0
+#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL_MASK 0x0000FFFF
+#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL(interval) \
+ (((interval) & VPE_CMD_POLL_REGMEM_DW5_INTERVAL_MASK) << VPE_CMD_POLL_REGMEM_DW5_INTERVAL__SHIFT)
+
+#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT__SHIFT 16
+#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT_MASK 0x00000FFF
+#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(count) \
+ (((count) & VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT_MASK) << VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT__SHIFT)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
new file mode 100644
index 000000000000..45876883bbf3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_vpe.h"
+#include "vpe_v6_1.h"
+#include "soc15_common.h"
+#include "ivsrcid/vpe/irqsrcs_vpe_6_1.h"
+#include "vpe/vpe_6_1_0_offset.h"
+#include "vpe/vpe_6_1_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin");
+MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin");
+MODULE_FIRMWARE("amdgpu/vpe_6_1_3.bin");
+
+#define VPE_THREAD1_UCODE_OFFSET 0x8000
+
+#define regVPEC_COLLABORATE_CNTL 0x0013
+#define regVPEC_COLLABORATE_CNTL_BASE_IDX 0
+#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN__SHIFT 0x0
+#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN_MASK 0x00000001L
+
+#define regVPEC_COLLABORATE_CFG 0x0014
+#define regVPEC_COLLABORATE_CFG_BASE_IDX 0
+#define VPEC_COLLABORATE_CFG__MASTER_ID__SHIFT 0x0
+#define VPEC_COLLABORATE_CFG__MASTER_EN__SHIFT 0x3
+#define VPEC_COLLABORATE_CFG__SLAVE0_ID__SHIFT 0x4
+#define VPEC_COLLABORATE_CFG__SLAVE0_EN__SHIFT 0x7
+#define VPEC_COLLABORATE_CFG__MASTER_ID_MASK 0x00000007L
+#define VPEC_COLLABORATE_CFG__MASTER_EN_MASK 0x00000008L
+#define VPEC_COLLABORATE_CFG__SLAVE0_ID_MASK 0x00000070L
+#define VPEC_COLLABORATE_CFG__SLAVE0_EN_MASK 0x00000080L
+
+#define regVPEC_CNTL_6_1_1 0x0016
+#define regVPEC_CNTL_6_1_1_BASE_IDX 0
+#define regVPEC_QUEUE_RESET_REQ_6_1_1 0x002c
+#define regVPEC_QUEUE_RESET_REQ_6_1_1_BASE_IDX 0
+#define regVPEC_PUB_DUMMY2_6_1_1 0x004c
+#define regVPEC_PUB_DUMMY2_6_1_1_BASE_IDX 0
+
+static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset)
+{
+ uint32_t base;
+
+ base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0];
+
+ return base + offset;
+}
+
+static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t i, f32_cntl;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ f32_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0);
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL), f32_cntl);
+ }
+}
+
+static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+ int ret;
+
+ ret = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VPE,
+ VPE_6_1_SRCID__VPE_TRAP,
+ &adev->vpe.trap_irq);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void vpe_v6_1_set_collaborate_mode(struct amdgpu_vpe *vpe, bool enable)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t vpe_colla_cntl, vpe_colla_cfg, i;
+
+ if (!vpe->collaborate_mode)
+ return;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ vpe_colla_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL));
+ vpe_colla_cntl = REG_SET_FIELD(vpe_colla_cntl, VPEC_COLLABORATE_CNTL,
+ COLLABORATE_MODE_EN, enable ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL), vpe_colla_cntl);
+
+ vpe_colla_cfg = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG));
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_ID, 0);
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_EN, enable ? 1 : 0);
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_ID, 1);
+ vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_EN, enable ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG), vpe_colla_cfg);
+ }
+}
+
+static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ const struct vpe_firmware_header_v1_0 *vpe_hdr;
+ const __le32 *data;
+ uint32_t ucode_offset[2], ucode_size[2];
+ uint32_t i, j, size_dw;
+ uint32_t ret;
+
+ /* disable UMSCH_INT_ENABLE */
+ for (j = 0; j < vpe->num_instances; j++) {
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1));
+ else
+ ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL));
+
+ ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0);
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1), ret);
+ else
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret);
+ }
+
+ /* setup collaborate mode */
+ vpe_v6_1_set_collaborate_mode(vpe, true);
+ /* setup DPM */
+ if (amdgpu_vpe_configure_dpm(vpe))
+ dev_warn(adev->dev, "VPE failed to enable DPM\n");
+
+ /*
+ * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well.
+ * Here use instance 0 as master.
+ */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ uint32_t f32_offset, f32_cntl;
+
+ f32_offset = vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL);
+ f32_cntl = RREG32(f32_offset);
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, 0);
+ f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, 0);
+
+ adev->vpe.cmdbuf_cpu_addr[0] = f32_offset;
+ adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
+
+ return amdgpu_vpe_psp_update_sram(adev);
+ }
+
+ vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
+
+ /* Thread 0(command thread) ucode offset/size */
+ ucode_offset[0] = le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes);
+ ucode_size[0] = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes);
+ /* Thread 1(control thread) ucode offset/size */
+ ucode_offset[1] = le32_to_cpu(vpe_hdr->ctl_ucode_offset);
+ ucode_size[1] = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes);
+
+ vpe_v6_1_halt(vpe, true);
+
+ for (j = 0; j < vpe->num_instances; j++) {
+ for (i = 0; i < 2; i++) {
+ if (i > 0)
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET);
+ else
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0);
+
+ data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]);
+ size_dw = ucode_size[i] / sizeof(__le32);
+
+ while (size_dw--) {
+ if (amdgpu_emu_mode && size_dw % 500 == 0)
+ msleep(1);
+ WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++));
+ }
+ }
+ }
+
+ vpe_v6_1_halt(vpe, false);
+
+ return 0;
+}
+
+static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_ring *ring = &vpe->ring;
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t doorbell, doorbell_offset;
+ uint32_t rb_bufsz, rb_cntl;
+ uint32_t ib_cntl, i;
+ int ret;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET));
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
+
+ doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL));
+ doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell);
+
+ adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl);
+ }
+
+ ret = amdgpu_ring_test_helper(ring);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = vpe->ring.adev;
+ uint32_t queue_reset, i;
+ int ret;
+
+ for (i = 0; i < vpe->num_instances; i++) {
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1));
+ else
+ queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ));
+
+ queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1);
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) {
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1), queue_reset);
+ ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ_6_1_1, 0,
+ VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK);
+ } else {
+ WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset);
+ ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0,
+ VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "VPE queue reset failed\n");
+ }
+
+ vpe->ring.sched.ready = false;
+
+ return ret;
+}
+
+static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ struct amdgpu_vpe *vpe = &adev->vpe;
+ uint32_t vpe_cntl;
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1));
+ else
+ vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL));
+
+ vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1), vpe_cntl);
+ else
+ WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl);
+
+ return 0;
+}
+
+static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+
+ dev_dbg(adev->dev, "IH: VPE trap\n");
+
+ switch (entry->client_id) {
+ case SOC21_IH_CLIENTID_VPE:
+ amdgpu_fence_process(&adev->vpe.ring);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe)
+{
+ struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+
+ vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR;
+ vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI;
+ vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR;
+ vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI;
+ vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT;
+
+ if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1))
+ vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2_6_1_1;
+ else
+ vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2;
+
+ vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4;
+ vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3;
+ vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4;
+ vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2;
+ vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3;
+ vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1;
+ vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3;
+
+ return 0;
+}
+
+static const struct vpe_funcs vpe_v6_1_funcs = {
+ .get_reg_offset = vpe_v6_1_get_reg_offset,
+ .set_regs = vpe_v6_1_set_regs,
+ .irq_init = vpe_v6_1_irq_init,
+ .init_microcode = amdgpu_vpe_init_microcode,
+ .load_microcode = vpe_v6_1_load_microcode,
+ .ring_init = amdgpu_vpe_ring_init,
+ .ring_start = vpe_v6_1_ring_start,
+ .ring_stop = vpe_v_6_1_ring_stop,
+ .ring_fini = amdgpu_vpe_ring_fini,
+};
+
+static const struct amdgpu_irq_src_funcs vpe_v6_1_trap_irq_funcs = {
+ .set = vpe_v6_1_set_trap_irq_state,
+ .process = vpe_v6_1_process_trap_irq,
+};
+
+void vpe_v6_1_set_funcs(struct amdgpu_vpe *vpe)
+{
+ vpe->funcs = &vpe_v6_1_funcs;
+ vpe->trap_irq.funcs = &vpe_v6_1_trap_irq_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h
new file mode 100644
index 000000000000..a9bea7905a77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __VPE_V6_1_H__
+#define __VPE_V6_1_H__
+
+#include "amdgpu_vpe.h"
+
+void vpe_v6_1_set_funcs(struct amdgpu_vpe *vpe);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index d3c3d3ab7225..16e12c9913f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -5,7 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
+ depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT) || (LOONGARCH && 64BIT))
select HMM_MIRROR
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index a5ae7bcf44eb..0ce08113c9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -27,7 +27,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device.o \
$(AMDKFD_PATH)/kfd_chardev.o \
$(AMDKFD_PATH)/kfd_topology.o \
- $(AMDKFD_PATH)/kfd_pasid.o \
$(AMDKFD_PATH)/kfd_doorbell.o \
$(AMDKFD_PATH)/kfd_flat_memory.o \
$(AMDKFD_PATH)/kfd_process.o \
@@ -38,6 +37,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v11.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_v12.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
@@ -49,6 +49,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_v12.o \
$(AMDKFD_PATH)/kfd_interrupt.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 795382b55e0a..73acbe0b7c21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -91,7 +91,6 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
uint32_t context_id = ihre->data & 0xfffffff;
- unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8;
u32 pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (pasid == 0)
@@ -107,20 +106,26 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
kfd_signal_hw_exception_event(pasid);
else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
+ struct kfd_process_device *pdd = NULL;
struct kfd_vm_fault_info info;
+ struct kfd_process *p;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_dqm_evict_pasid(dev->dqm, pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
+ if (!pdd)
+ return;
+
+ kfd_evict_process_device(pdd);
memset(&info, 0, sizeof(info));
amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
- if (!info.page_addr && !info.status)
+ if (!info.page_addr && !info.status) {
+ kfd_unref_process(p);
return;
+ }
- if (info.vmid == vmid)
- kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
- else
- kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
+ kfd_signal_vm_fault_event(pdd, &info, NULL);
+ kfd_unref_process(p);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d7cd5fa313ff..0320163b6e74 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820258,
+ 0xbf820001, 0xbf820259,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -390,141 +390,98 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b847b, 0x8e76827b,
- 0xbef600ff, 0x01000000,
- 0xbef20174, 0x80747074,
- 0x82758075, 0xbefc0080,
- 0xbf800000, 0xbe802b00,
- 0xbe822b02, 0xbe842b04,
- 0xbe862b06, 0xbe882b08,
- 0xbe8a2b0a, 0xbe8c2b0c,
- 0xbe8e2b0e, 0xc06b003a,
- 0x00000000, 0xbf8cc07f,
- 0xc06b013a, 0x00000010,
- 0xbf8cc07f, 0xc06b023a,
- 0x00000020, 0xbf8cc07f,
- 0xc06b033a, 0x00000030,
- 0xbf8cc07f, 0x8074c074,
- 0x82758075, 0x807c907c,
- 0xbf0a7b7c, 0xbf85ffe7,
- 0xbef40172, 0xbef00080,
- 0xbefe00c1, 0xbeff00c1,
- 0xbee80080, 0xbee90080,
- 0xbef600ff, 0x01000000,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf85004d, 0xbe840080,
- 0xd2890000, 0x00000900,
- 0x80048104, 0xd2890001,
- 0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
- 0x80048104, 0xd2890003,
- 0x00000900, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8fb1605,
+ 0x807b817b, 0x8e7b847b,
+ 0x8e76827b, 0xbef600ff,
+ 0x01000000, 0xbef20174,
+ 0x80747074, 0x82758075,
+ 0xbefc0080, 0xbf800000,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003a, 0x00000000,
+ 0xbf8cc07f, 0xc06b013a,
+ 0x00000010, 0xbf8cc07f,
+ 0xc06b023a, 0x00000020,
+ 0xbf8cc07f, 0xc06b033a,
+ 0x00000030, 0xbf8cc07f,
+ 0x8074c074, 0x82758075,
+ 0x807c907c, 0xbf0a7b7c,
+ 0xbf85ffe7, 0xbef40172,
+ 0xbef00080, 0xbefe00c1,
+ 0xbeff00c1, 0xbee80080,
+ 0xbee90080, 0xbef600ff,
+ 0x01000000, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85004d,
0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000902,
+ 0xd2890000, 0x00000901,
0x80048104, 0xd2890001,
- 0x00000902, 0x80048104,
- 0xd2890002, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
0x80048104, 0xd2890003,
- 0x00000902, 0x80048104,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000903, 0x80048104,
- 0xd2890001, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
0x80048104, 0xd2890002,
- 0x00000903, 0x80048104,
- 0xd2890003, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbf820008,
- 0xe0724000, 0x701d0000,
- 0xe0724100, 0x701d0100,
- 0xe0724200, 0x701d0200,
- 0xe0724300, 0x701d0300,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb4306, 0x867bc17b,
- 0xbf840063, 0xbf8a0000,
- 0x867aff6f, 0x04000000,
- 0xbf84005f, 0x8e7b867b,
- 0x8e7b827b, 0xbef6007b,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
- 0xd2890000, 0x00000900,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
0x80048104, 0xd2890001,
- 0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
0x80048104, 0xd2890003,
- 0x00000900, 0x80048104,
+ 0x00000903, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
- 0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb2a05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xbf820008, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x867bc17b, 0xbf840063,
+ 0xbf8a0000, 0x867aff6f,
+ 0x04000000, 0xbf84005f,
+ 0x8e7b867b, 0x8e7b827b,
+ 0xbef6007b, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -544,141 +501,185 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2a05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbf8200c7,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x04000000,
- 0xbf84001e, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf840019,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2a05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xbf9c0000, 0xe0524000,
- 0x6e1d0000, 0xe0524100,
- 0x6e1d0100, 0xe0524200,
- 0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xbf8c0f70,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200c7, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf840019, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
+ 0xbefe00c1, 0xbeff00c1,
0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xbf8c0f70, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
- 0xbef60084, 0xbef600ff,
- 0x01000000, 0xc0211bfa,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0xbef60084,
+ 0xbef600ff, 0x01000000,
+ 0xc0211bfa, 0x00000078,
+ 0x80788478, 0xc0211b3a,
0x00000078, 0x80788478,
- 0xc0211b3a, 0x00000078,
- 0x80788478, 0xc0211b7a,
+ 0xc0211b7a, 0x00000078,
+ 0x80788478, 0xc0211c3a,
0x00000078, 0x80788478,
- 0xc0211c3a, 0x00000078,
- 0x80788478, 0xc0211c7a,
+ 0xc0211c7a, 0x00000078,
+ 0x80788478, 0xc0211eba,
0x00000078, 0x80788478,
- 0xc0211eba, 0x00000078,
- 0x80788478, 0xc0211efa,
+ 0xc0211efa, 0x00000078,
+ 0x80788478, 0xc0211a3a,
0x00000078, 0x80788478,
- 0xc0211a3a, 0x00000078,
- 0x80788478, 0xc0211a7a,
+ 0xc0211a7a, 0x00000078,
+ 0x80788478, 0xc0211cfa,
0x00000078, 0x80788478,
- 0xc0211cfa, 0x00000078,
- 0x80788478, 0xbf8cc07f,
- 0xbefc006f, 0xbefe0070,
- 0xbeff0071, 0x866f7bff,
- 0x000003ff, 0xb96f4803,
- 0x866f7bff, 0xfffff800,
- 0x8f6f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xbf8cc07f, 0xbefc006f,
+ 0xbefe0070, 0xbeff0071,
+ 0x866f7bff, 0x000003ff,
+ 0xb96f4803, 0x866f7bff,
+ 0xfffff800, 0x8f6f8b6f,
+ 0xb96fa2c3, 0xb973f801,
+ 0xb8ee2a05, 0x806e816e,
+ 0x8e6e8a6e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
- 0xbf820001, 0xbf8201f5,
+ 0xbf820001, 0xbf820393,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
@@ -711,19 +712,19 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
- 0xbf8cc07f, 0x8f6e976e,
+ 0xbf8c0000, 0x8f6e976e,
0x8a77ff77, 0x00800000,
0x88776e77, 0xf4051bbd,
- 0xfa000000, 0xbf8cc07f,
+ 0xfa000000, 0xbf8c0000,
0xf4051ebd, 0xfa000008,
- 0xbf8cc07f, 0x87ee6e6e,
+ 0xbf8c0000, 0x87ee6e6e,
0xbf840001, 0xbe80206e,
- 0x876eff6d, 0x01ff0000,
- 0xbf850005, 0x8878ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xbf820005,
- 0x876eff6d, 0x01000000,
- 0xbf850002, 0x806c846c,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf850008, 0x876eff6d,
+ 0x01000000, 0xbf850007,
+ 0x8878ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbf820002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x907a8977,
0x877bff7a, 0x003f8000,
@@ -769,13 +770,90 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820002, 0xbeff03c1,
- 0xbf82000b, 0xbef603ff,
- 0x01000000, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
- 0x705d0300, 0xbf82000a,
- 0xbef603ff, 0x01000000,
+ 0xbf820058, 0xbef603ff,
+ 0x01000000, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850049, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06a004,
+ 0xbf84ffef, 0xbf820060,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0xbf820057, 0xbef603ff,
+ 0x01000000, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850049, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbf820008,
0xe0704000, 0x705d0000,
0xe0704100, 0x705d0100,
0xe0704200, 0x705d0200,
@@ -855,250 +933,377 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
- 0xbf840044, 0xbf8a0000,
+ 0xbf840085, 0xbf8a0000,
0x877aff6d, 0x80000000,
- 0xbf840040, 0x8f7b867b,
- 0x8f7b827b, 0xbef6037b,
- 0xb9703a05, 0x80708170,
- 0xbf0d9973, 0xbf850002,
- 0x8f708970, 0xbf820001,
- 0x8f708a70, 0xb97a1e06,
- 0x8f7a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
- 0x8070ff70, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0xd7650000, 0x000100c1,
- 0xd7660000, 0x000200c1,
- 0x16000084, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850012,
- 0xbe8303ff, 0x00000080,
- 0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
+ 0xbf840081, 0x8f7b887b,
+ 0xbef6037b, 0xb9703a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0x8070ff70,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0xd7650000,
+ 0x000100c1, 0xd7660000,
+ 0x000200c1, 0x16000084,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850033, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf85001d, 0xd8d80000,
0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
+ 0xbe840380, 0xd7600000,
+ 0x00000901, 0x80048104,
+ 0xd7600001, 0x00000901,
+ 0x80048104, 0xd7600002,
+ 0x00000901, 0x80048104,
+ 0xd7600003, 0x00000901,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0x807cff7c, 0x00000080,
0xd5250000, 0x0001ff00,
0x00000080, 0xbf0a7b7c,
- 0xbf85fff4, 0xbf820011,
- 0xbe8303ff, 0x00000100,
+ 0xbf85ffe4, 0xbf820044,
+ 0xbe8303ff, 0x00000080,
0xbf800000, 0xbf800000,
0xbf800000, 0xd8d80000,
0x01000000, 0xbf8c0000,
0xe0704000, 0x705d0100,
0x807c037c, 0x80700370,
0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a7b7c,
- 0xbf85fff4, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850004,
- 0xbef003ff, 0x00000200,
- 0xbeff0380, 0xbf820003,
- 0xbef003ff, 0x00000400,
- 0xbeff03c1, 0xb97b3a05,
- 0x807b817b, 0x8f7b827b,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850017,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff4, 0xbf820032,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf85001d,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0x807cff7c,
+ 0x00000100, 0xd5250000,
+ 0x0001ff00, 0x00000100,
+ 0xbf0a7b7c, 0xbf85ffe4,
+ 0xbf820011, 0xbe8303ff,
+ 0x00000100, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000100,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850004, 0xbef003ff,
+ 0x00000200, 0xbeff0380,
+ 0xbf820003, 0xbef003ff,
+ 0x00000400, 0xbeff03c1,
+ 0xb97b3a05, 0x807b817b,
+ 0x8f7b827b, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf85006b, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf8400fa,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf850050,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xbe840380, 0xd7600000,
+ 0x00000900, 0x80048104,
+ 0xd7600001, 0x00000900,
+ 0x80048104, 0xd7600002,
+ 0x00000900, 0x80048104,
+ 0xd7600003, 0x00000900,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000901, 0x80048104,
+ 0xd7600001, 0x00000901,
+ 0x80048104, 0xd7600002,
+ 0x00000901, 0x80048104,
+ 0xd7600003, 0x00000901,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000902, 0x80048104,
+ 0xd7600001, 0x00000902,
+ 0x80048104, 0xd7600002,
+ 0x00000902, 0x80048104,
+ 0xd7600003, 0x00000902,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000903, 0x80048104,
+ 0xd7600001, 0x00000903,
+ 0x80048104, 0xd7600002,
+ 0x00000903, 0x80048104,
+ 0xd7600003, 0x00000903,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffb1, 0xbf8200a6,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000200, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf820094,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
- 0xbf840037, 0x7e008700,
+ 0xbf840065, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850050, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xbe840380,
+ 0xd7600000, 0x00000900,
+ 0x80048104, 0xd7600001,
+ 0x00000900, 0x80048104,
+ 0xd7600002, 0x00000900,
+ 0x80048104, 0xd7600003,
+ 0x00000900, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000901,
+ 0x80048104, 0xd7600001,
+ 0x00000901, 0x80048104,
+ 0xd7600002, 0x00000901,
+ 0x80048104, 0xd7600003,
+ 0x00000901, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000902,
+ 0x80048104, 0xd7600001,
+ 0x00000902, 0x80048104,
+ 0xd7600002, 0x00000902,
+ 0x80048104, 0xd7600003,
+ 0x00000902, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0xbe840380,
+ 0xd7600000, 0x00000903,
+ 0x80048104, 0xd7600001,
+ 0x00000903, 0x80048104,
+ 0xd7600002, 0x00000903,
+ 0x80048104, 0xd7600003,
+ 0x00000903, 0x80048104,
+ 0xf469003a, 0xe0000000,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffef, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf82003b, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
0x705d0300, 0x807c847c,
- 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf820025, 0xbef603ff,
- 0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840011,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
- 0xe0704000, 0x705d0000,
- 0xe0704100, 0x705d0100,
- 0xe0704200, 0x705d0200,
- 0xe0704300, 0x705d0300,
- 0x807c847c, 0x8070ff70,
- 0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xb97b1e06,
- 0x877bc17b, 0xbf84000c,
- 0x8f7b837b, 0x807b7c7b,
- 0xbefe03c1, 0xbeff0380,
+ 0xb97b1e06, 0x877bc17b,
+ 0xbf840027, 0x8f7b837b,
+ 0x807b7c7b, 0xbefe03c1,
+ 0xbeff0380, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850017, 0x7e008700,
+ 0xbe840380, 0xd7600000,
+ 0x00000900, 0x80048104,
+ 0xd7600001, 0x00000900,
+ 0x80048104, 0xd7600002,
+ 0x00000900, 0x80048104,
+ 0xd7600003, 0x00000900,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06c004, 0xbf84ffef,
+ 0x807c817c, 0xbf0a7b7c,
+ 0xbf85ffea, 0xbf820008,
0x7e008700, 0xe0704000,
0x705d0000, 0x807c817c,
0x8070ff70, 0x00000080,
0xbf0a7b7c, 0xbf85fff8,
- 0xbf820144, 0xbef4037e,
+ 0xbf82013f, 0xbef4037e,
0x8775ff7f, 0x0000ffff,
0x8875ff75, 0x00040000,
0xbef60380, 0xbef703ff,
0x10807fac, 0xb97202dc,
0x8f729972, 0x876eff7f,
- 0x04000000, 0xbf840034,
+ 0x04000000, 0xbf840033,
0xbefe03c1, 0x907c9972,
0x877c817c, 0xbf06817c,
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb96f4306, 0x876fc16f,
- 0xbf840029, 0x8f6f866f,
- 0x8f6f826f, 0xbef6036f,
- 0xb9783a05, 0x80788178,
- 0xbf0d9972, 0xbf850002,
- 0x8f788978, 0xbf820001,
- 0x8f788a78, 0xb96e1e06,
- 0x8f6e8a6e, 0x80786e78,
- 0x8078ff78, 0x00000200,
- 0x8078ff78, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbefc0380,
- 0xbf850009, 0xe0310000,
- 0x781d0000, 0x807cff7c,
- 0x00000080, 0x8078ff78,
- 0x00000080, 0xbf0a6f7c,
- 0xbf85fff8, 0xbf820008,
+ 0xbf840028, 0x8f6f886f,
+ 0xbef6036f, 0xb9783a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbefc0380, 0xbf850009,
0xe0310000, 0x781d0000,
- 0x807cff7c, 0x00000100,
- 0x8078ff78, 0x00000100,
+ 0x807cff7c, 0x00000080,
+ 0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff8,
- 0xbef80380, 0xbefe03c1,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820001,
- 0xbeff03c1, 0xb96f3a05,
- 0x806f816f, 0x8f6f826f,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbf850024,
- 0xbef603ff, 0x01000000,
- 0xbeee0378, 0x8078ff78,
- 0x00000200, 0xbefc0384,
- 0xbf0a6f7c, 0xbf840050,
- 0xe0304000, 0x785d0000,
- 0xe0304080, 0x785d0100,
- 0xe0304100, 0x785d0200,
- 0xe0304180, 0x785d0300,
- 0xbf8c3f70, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807c847c,
+ 0xbf820008, 0xe0310000,
+ 0x781d0000, 0x807cff7c,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7c,
+ 0xbf85fff8, 0xbef80380,
+ 0xbefe03c1, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb96f3a05, 0x806f816f,
+ 0x8f6f826f, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850024, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xe0304000, 0x6e5d0000,
- 0xe0304080, 0x6e5d0100,
- 0xe0304100, 0x6e5d0200,
- 0xe0304180, 0x6e5d0300,
- 0xbf8c3f70, 0xbf820034,
- 0xbef603ff, 0x01000000,
- 0xbeee0378, 0x8078ff78,
- 0x00000400, 0xbefc0384,
- 0xbf0a6f7c, 0xbf840012,
- 0xe0304000, 0x785d0000,
- 0xe0304100, 0x785d0100,
- 0xe0304200, 0x785d0200,
- 0xe0304300, 0x785d0300,
- 0xbf8c3f70, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807c847c,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840050, 0xe0304000,
+ 0x785d0000, 0xe0304080,
+ 0x785d0100, 0xe0304100,
+ 0x785d0200, 0xe0304180,
+ 0x785d0300, 0xbf8c0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85ffee, 0xe0304000,
+ 0x6e5d0000, 0xe0304080,
+ 0x6e5d0100, 0xe0304100,
+ 0x6e5d0200, 0xe0304180,
+ 0x6e5d0300, 0xbf8c0000,
+ 0xbf820034, 0xbef603ff,
+ 0x01000000, 0xbeee0378,
0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb96f1e06, 0x876fc16f,
- 0xbf84000e, 0x8f6f836f,
- 0x806f7c6f, 0xbefe03c1,
- 0xbeff0380, 0xe0304000,
- 0x785d0000, 0xbf8c3f70,
- 0x7e008500, 0x807c817c,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7c, 0xbf85fff7,
- 0xbeff03c1, 0xe0304000,
- 0x6e5d0000, 0xe0304100,
- 0x6e5d0100, 0xe0304200,
- 0x6e5d0200, 0xe0304300,
- 0x6e5d0300, 0xbf8c3f70,
+ 0xbefc0384, 0xbf0a6f7c,
+ 0xbf840012, 0xe0304000,
+ 0x785d0000, 0xe0304100,
+ 0x785d0100, 0xe0304200,
+ 0x785d0200, 0xe0304300,
+ 0x785d0300, 0xbf8c0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb96f1e06,
+ 0x876fc16f, 0xbf84000e,
+ 0x8f6f836f, 0x806f7c6f,
+ 0xbefe03c1, 0xbeff0380,
+ 0xe0304000, 0x785d0000,
+ 0xbf8c0000, 0x7e008500,
+ 0x807c817c, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7c,
+ 0xbf85fff7, 0xbeff03c1,
+ 0xe0304000, 0x6e5d0000,
+ 0xe0304100, 0x6e5d0100,
+ 0xe0304200, 0x6e5d0200,
+ 0xe0304300, 0x6e5d0300,
+ 0xbf8c0000, 0xb9783a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x80f8ff78,
+ 0x00000050, 0xbef603ff,
+ 0x01000000, 0xbefc03ff,
+ 0x0000006c, 0x80f89078,
+ 0xf429003a, 0xf0000000,
+ 0xbf8c0000, 0x80fc847c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0x80f8a078,
+ 0xf42d003a, 0xf0000000,
+ 0xbf8c0000, 0x80fc887c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0x80f8c078,
+ 0xf431003a, 0xf0000000,
+ 0xbf8c0000, 0x80fc907c,
+ 0xbf800000, 0xbe803100,
+ 0xbe823102, 0xbe843104,
+ 0xbe863106, 0xbe883108,
+ 0xbe8a310a, 0xbe8c310c,
+ 0xbe8e310e, 0xbf06807c,
+ 0xbf84fff0, 0xba80f801,
+ 0x00000000, 0xbf8a0000,
0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
- 0xbefc03ff, 0x0000006c,
- 0x80f89078, 0xf429003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc847c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0x80f8a078, 0xf42d003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc887c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0x80f8c078, 0xf431003a,
- 0xf0000000, 0xbf8cc07f,
- 0x80fc907c, 0xbf800000,
- 0xbe803100, 0xbe823102,
- 0xbe843104, 0xbe863106,
- 0xbe883108, 0xbe8a310a,
- 0xbe8c310c, 0xbe8e310e,
- 0xbf06807c, 0xbf84fff0,
- 0xba80f801, 0x00000000,
- 0xbf8a0000, 0xb9783a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0xf4211bfa,
+ 0xf4211bfa, 0xf0000000,
+ 0x80788478, 0xf4211b3a,
0xf0000000, 0x80788478,
- 0xf4211b3a, 0xf0000000,
- 0x80788478, 0xf4211b7a,
+ 0xf4211b7a, 0xf0000000,
+ 0x80788478, 0xf4211c3a,
0xf0000000, 0x80788478,
- 0xf4211c3a, 0xf0000000,
- 0x80788478, 0xf4211c7a,
+ 0xf4211c7a, 0xf0000000,
+ 0x80788478, 0xf4211eba,
0xf0000000, 0x80788478,
- 0xf4211eba, 0xf0000000,
- 0x80788478, 0xf4211efa,
+ 0xf4211efa, 0xf0000000,
+ 0x80788478, 0xf4211e7a,
0xf0000000, 0x80788478,
- 0xf4211e7a, 0xf0000000,
- 0x80788478, 0xf4211cfa,
+ 0xf4211cfa, 0xf0000000,
+ 0x80788478, 0xf4211bba,
0xf0000000, 0x80788478,
+ 0xbf8c0000, 0xb9eef814,
0xf4211bba, 0xf0000000,
- 0x80788478, 0xbf8cc07f,
- 0xb9eef814, 0xf4211bba,
- 0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef815,
- 0xbefc036f, 0xbefe0370,
- 0xbeff0371, 0x876f7bff,
- 0x000003ff, 0xb9ef4803,
- 0xb9f9f816, 0x876f7bff,
- 0xfffff800, 0x906f8b6f,
- 0xb9efa2c3, 0xb9f3f801,
- 0xb96e3a05, 0x806e816e,
- 0xbf0d9972, 0xbf850002,
- 0x8f6e896e, 0xbf820001,
- 0x8f6e8a6e, 0xb96f1e06,
- 0x8f6f8a6f, 0x806e6f6e,
- 0x806eff6e, 0x00000200,
- 0x806e746e, 0x826f8075,
- 0x876fff6f, 0x0000ffff,
- 0xf4091c37, 0xfa000050,
- 0xf4091d37, 0xfa000060,
- 0xf4011e77, 0xfa000074,
- 0xbf8cc07f, 0x906e8977,
- 0x876fff6e, 0x003f8000,
- 0x906e8677, 0x876eff6e,
- 0x02000000, 0x886e6f6e,
- 0xb9eef807, 0x876dff6d,
- 0x0000ffff, 0x87fe7e7e,
- 0x87ea6a6a, 0xb9faf802,
- 0xbe80226c, 0xbf810000,
+ 0x80788478, 0xbf8c0000,
+ 0xb9eef815, 0xbefc036f,
+ 0xbefe0370, 0xbeff0371,
+ 0xb9f9f816, 0xb9fb4803,
+ 0x907b8b7b, 0xb9fba2c3,
+ 0xb9f3f801, 0xb96e3a05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbf850002, 0x8f6e896e,
+ 0xbf820001, 0x8f6e8a6e,
+ 0xb96f1e06, 0x8f6f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x876fff6f,
+ 0x0000ffff, 0xf4091c37,
+ 0xfa000050, 0xf4091d37,
+ 0xfa000060, 0xf4011e77,
+ 0xfa000074, 0xbf8c0000,
+ 0x906e8977, 0x876fff6e,
+ 0x003f8000, 0x906e8677,
+ 0x876eff6e, 0x02000000,
+ 0x886e6f6e, 0xb9eef807,
+ 0x876dff6d, 0x0000ffff,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9faf802, 0xbe80226c,
+ 0xbf9b0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202d4,
+ 0xbf820001, 0xbf8202d5,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -1215,99 +1420,37 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf85004d, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -1326,31 +1469,50 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb2a05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -1370,215 +1532,259 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2a05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbefc0080,
- 0xbf11017c, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850059,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbefc0080, 0xbf11017c,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850059, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffa9, 0xbf9c0000,
- 0xbf820016, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf8200e3,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x04000000,
- 0xbf84001f, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf84001a,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x8078ff78,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x781d0000,
- 0xe0510100, 0x781d0000,
- 0x807cff7c, 0x00000200,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85fff6,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffa9,
+ 0xbf9c0000, 0xbf820016,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffeb, 0xbf9c0000,
+ 0xbf8200e3, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001f,
0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf84001a, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xb8ef2a05, 0x806f816f,
- 0x8e6f826f, 0x806fff6f,
- 0x00008000, 0xbef80080,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefc0084,
- 0xbf11087c, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xbefc0080,
- 0xbf11087c, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbefe00c1,
+ 0xbeff00c1, 0xbef600ff,
+ 0x01000000, 0xb8ef2a05,
+ 0x806f816f, 0x8e6f826f,
+ 0x806fff6f, 0x00008000,
+ 0xbef80080, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2a05, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202df,
+ 0xbf820001, 0xbf8202e0,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -1695,99 +1901,37 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf85004d, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -1806,31 +1950,50 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb2b05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -1850,51 +2013,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xb8fb2985,
- 0x807b817b, 0x8e7b837b,
- 0xb8fa2b05, 0x807a817a,
- 0x8e7a827a, 0x80fb7a7b,
- 0x867b7b7b, 0xbf84007a,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
0x807bff7b, 0x00001000,
- 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850059, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -1933,139 +2076,203 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffa9,
- 0xbf9c0000, 0xbf820016,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffeb, 0xbf9c0000,
- 0xbf8200ee, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x866eff7f,
- 0x04000000, 0xbf84001f,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200ee,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001f, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf84001a,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef4306, 0x866fc16f,
- 0xbf84001a, 0x8e6f866f,
- 0x8e6f826f, 0xbef6006f,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2b05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb8ef2985, 0x806f816f,
- 0x8e6f836f, 0xb8f92b05,
- 0x80798179, 0x8e798279,
- 0x80ef796f, 0x866f6f6f,
- 0xbf84001a, 0x806fff6f,
- 0x00008000, 0xbefc0080,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2985, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
@@ -2095,19 +2302,19 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
- 0xbf8cc07f, 0x8f6e976e,
+ 0xbf8c0000, 0x8f6e976e,
0x8a77ff77, 0x00800000,
0x88776e77, 0xf4051bbd,
- 0xfa000000, 0xbf8cc07f,
+ 0xfa000000, 0xbf8c0000,
0xf4051ebd, 0xfa000008,
- 0xbf8cc07f, 0x87ee6e6e,
+ 0xbf8c0000, 0x87ee6e6e,
0xbf840001, 0xbe80206e,
- 0x876eff6d, 0x01ff0000,
- 0xbf850005, 0x8878ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xbf820005,
- 0x876eff6d, 0x01000000,
- 0xbf850002, 0x806c846c,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf850008, 0x876eff6d,
+ 0x01000000, 0xbf850007,
+ 0x8878ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbf820002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9f8f802,
@@ -2115,162 +2322,162 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x0000ffff, 0xbefa0380,
0xb9fa0283, 0xbeee037e,
0xbeef037f, 0xbefe0480,
- 0xbf900004, 0xbf8cc07f,
+ 0xbf900004, 0xbf8c0000,
0x877aff7f, 0x04000000,
0x8f7a857a, 0x886d7a6d,
- 0xbefa037e, 0x877bff7f,
- 0x0000ffff, 0xbefe03c1,
- 0xbeff03c1, 0xdc5f8000,
- 0x007a0000, 0x7e000280,
- 0xbefe037a, 0xbeff037b,
- 0xb97b02dc, 0x8f7b997b,
- 0xb97a3a05, 0x807a817a,
- 0xbf0d997b, 0xbf850002,
- 0x8f7a897a, 0xbf820001,
- 0x8f7a8a7a, 0xb97b1e06,
- 0x8f7b8a7b, 0x807a7b7a,
+ 0x7e008200, 0xbefa037e,
0x877bff7f, 0x0000ffff,
- 0x807aff7a, 0x00000200,
- 0x807a7e7a, 0x827b807b,
- 0xd7610000, 0x00010870,
- 0xd7610000, 0x00010a71,
- 0xd7610000, 0x00010c72,
- 0xd7610000, 0x00010e73,
- 0xd7610000, 0x00011074,
- 0xd7610000, 0x00011275,
- 0xd7610000, 0x00011476,
- 0xd7610000, 0x00011677,
- 0xd7610000, 0x00011a79,
- 0xd7610000, 0x00011c7e,
- 0xd7610000, 0x00011e7f,
- 0xbefe03ff, 0x00003fff,
- 0xbeff0380, 0xdc5f8040,
- 0x007a0000, 0xd760007a,
- 0x00011d00, 0xd760007b,
- 0x00011f00, 0xbefe037a,
- 0xbeff037b, 0xbef4037e,
- 0x8775ff7f, 0x0000ffff,
- 0x8875ff75, 0x00040000,
- 0xbef60380, 0xbef703ff,
- 0x10807fac, 0xbef1037c,
- 0xbef00380, 0xb97302dc,
- 0x8f739973, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820002,
- 0xbeff03c1, 0xbf820009,
+ 0xbefe03c1, 0xbeff03c1,
+ 0xdc5f8000, 0x007a0000,
+ 0x7e000280, 0xbefe037a,
+ 0xbeff037b, 0xb97b02dc,
+ 0x8f7b997b, 0xb97a3a05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbf850002, 0x8f7a897a,
+ 0xbf820001, 0x8f7a8a7a,
+ 0xb97b1e06, 0x8f7b8a7b,
+ 0x807a7b7a, 0x877bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xd7610000,
+ 0x00010870, 0xd7610000,
+ 0x00010a71, 0xd7610000,
+ 0x00010c72, 0xd7610000,
+ 0x00010e73, 0xd7610000,
+ 0x00011074, 0xd7610000,
+ 0x00011275, 0xd7610000,
+ 0x00011476, 0xd7610000,
+ 0x00011677, 0xd7610000,
+ 0x00011a79, 0xd7610000,
+ 0x00011c7e, 0xd7610000,
+ 0x00011e7f, 0xbefe03ff,
+ 0x00003fff, 0xbeff0380,
+ 0xdc5f8040, 0x007a0000,
+ 0xd760007a, 0x00011d00,
+ 0xd760007b, 0x00011f00,
+ 0xbefe037a, 0xbeff037b,
+ 0xbef4037e, 0x8775ff7f,
+ 0x0000ffff, 0x8875ff75,
+ 0x00040000, 0xbef60380,
+ 0xbef703ff, 0x10807fac,
+ 0xbef1037c, 0xbef00380,
+ 0xb97302dc, 0x8f739973,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820002, 0xbeff03c1,
+ 0xbf820009, 0xbef603ff,
+ 0x01000000, 0xe0704080,
+ 0x705d0100, 0xe0704100,
+ 0x705d0200, 0xe0704180,
+ 0x705d0300, 0xbf820008,
0xbef603ff, 0x01000000,
- 0xe0704080, 0x705d0100,
- 0xe0704100, 0x705d0200,
- 0xe0704180, 0x705d0300,
- 0xbf820008, 0xbef603ff,
- 0x01000000, 0xe0704100,
- 0x705d0100, 0xe0704200,
- 0x705d0200, 0xe0704300,
- 0x705d0300, 0xb9703a05,
- 0x80708170, 0xbf0d9973,
- 0xbf850002, 0x8f708970,
- 0xbf820001, 0x8f708a70,
- 0xb97a1e06, 0x8f7a8a7a,
- 0x80707a70, 0x8070ff70,
- 0x00000200, 0xbef603ff,
- 0x01000000, 0x7e000280,
- 0x7e020280, 0x7e040280,
- 0xbefc0380, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xd7610002, 0x0000f86c,
- 0x807c817c, 0x8a7aff6d,
- 0x80000000, 0xd7610002,
- 0x0000f87a, 0x807c817c,
- 0xd7610002, 0x0000f86e,
- 0x807c817c, 0xd7610002,
- 0x0000f86f, 0x807c817c,
- 0xd7610002, 0x0000f878,
- 0x807c817c, 0xb97af803,
- 0xd7610002, 0x0000f87a,
- 0x807c817c, 0xd7610002,
- 0x0000f87b, 0x807c817c,
- 0xb971f801, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xb971f814, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xb971f815, 0xd7610002,
- 0x0000f871, 0x807c817c,
- 0xbefe03ff, 0x0000ffff,
- 0xbeff0380, 0xe0704000,
- 0x705d0200, 0xbefe03c1,
+ 0xe0704100, 0x705d0100,
+ 0xe0704200, 0x705d0200,
+ 0xe0704300, 0x705d0300,
0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
0x8f7a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
0xbef603ff, 0x01000000,
- 0xbef90380, 0xbefc0380,
- 0xbf800000, 0xbe802f00,
- 0xbe822f02, 0xbe842f04,
- 0xbe862f06, 0xbe882f08,
- 0xbe8a2f0a, 0xbe8c2f0c,
- 0xbe8e2f0e, 0xd7610002,
- 0x0000f200, 0x80798179,
- 0xd7610002, 0x0000f201,
+ 0x7e000280, 0x7e020280,
+ 0x7e040280, 0xbefc0380,
+ 0xd7610002, 0x0000f871,
+ 0x807c817c, 0xd7610002,
+ 0x0000f86c, 0x807c817c,
+ 0x8a7aff6d, 0x80000000,
+ 0xd7610002, 0x0000f87a,
+ 0x807c817c, 0xd7610002,
+ 0x0000f86e, 0x807c817c,
+ 0xd7610002, 0x0000f86f,
+ 0x807c817c, 0xd7610002,
+ 0x0000f878, 0x807c817c,
+ 0xb97af803, 0xd7610002,
+ 0x0000f87a, 0x807c817c,
+ 0xd7610002, 0x0000f87b,
+ 0x807c817c, 0xb971f801,
+ 0xd7610002, 0x0000f871,
+ 0x807c817c, 0xb971f814,
+ 0xd7610002, 0x0000f871,
+ 0x807c817c, 0xb971f815,
+ 0xd7610002, 0x0000f871,
+ 0x807c817c, 0xbefe03ff,
+ 0x0000ffff, 0xbeff0380,
+ 0xe0704000, 0x705d0200,
+ 0xbefe03c1, 0xb9703a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0xbef603ff,
+ 0x01000000, 0xbef90380,
+ 0xbefc0380, 0xbf800000,
+ 0xbe802f00, 0xbe822f02,
+ 0xbe842f04, 0xbe862f06,
+ 0xbe882f08, 0xbe8a2f0a,
+ 0xbe8c2f0c, 0xbe8e2f0e,
+ 0xd7610002, 0x0000f200,
0x80798179, 0xd7610002,
- 0x0000f202, 0x80798179,
- 0xd7610002, 0x0000f203,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
0x80798179, 0xd7610002,
- 0x0000f204, 0x80798179,
- 0xd7610002, 0x0000f205,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
0x80798179, 0xd7610002,
- 0x0000f206, 0x80798179,
- 0xd7610002, 0x0000f207,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
0x80798179, 0xd7610002,
- 0x0000f208, 0x80798179,
- 0xd7610002, 0x0000f209,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
0x80798179, 0xd7610002,
- 0x0000f20a, 0x80798179,
- 0xd7610002, 0x0000f20b,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
0x80798179, 0xd7610002,
- 0x0000f20c, 0x80798179,
- 0xd7610002, 0x0000f20d,
+ 0x0000f20b, 0x80798179,
+ 0xd7610002, 0x0000f20c,
0x80798179, 0xd7610002,
- 0x0000f20e, 0x80798179,
- 0xd7610002, 0x0000f20f,
- 0x80798179, 0xbf06a079,
- 0xbf840006, 0xe0704000,
- 0x705d0200, 0x8070ff70,
- 0x00000080, 0xbef90380,
- 0x7e040280, 0x807c907c,
- 0xbf0aff7c, 0x00000060,
- 0xbf85ffbc, 0xbe802f00,
- 0xbe822f02, 0xbe842f04,
- 0xbe862f06, 0xbe882f08,
- 0xbe8a2f0a, 0xd7610002,
- 0x0000f200, 0x80798179,
- 0xd7610002, 0x0000f201,
+ 0x0000f20d, 0x80798179,
+ 0xd7610002, 0x0000f20e,
0x80798179, 0xd7610002,
- 0x0000f202, 0x80798179,
- 0xd7610002, 0x0000f203,
+ 0x0000f20f, 0x80798179,
+ 0xbf06a079, 0xbf840006,
+ 0xe0704000, 0x705d0200,
+ 0x8070ff70, 0x00000080,
+ 0xbef90380, 0x7e040280,
+ 0x807c907c, 0xbf0aff7c,
+ 0x00000060, 0xbf85ffbc,
+ 0xbe802f00, 0xbe822f02,
+ 0xbe842f04, 0xbe862f06,
+ 0xbe882f08, 0xbe8a2f0a,
+ 0xd7610002, 0x0000f200,
0x80798179, 0xd7610002,
- 0x0000f204, 0x80798179,
- 0xd7610002, 0x0000f205,
+ 0x0000f201, 0x80798179,
+ 0xd7610002, 0x0000f202,
0x80798179, 0xd7610002,
- 0x0000f206, 0x80798179,
- 0xd7610002, 0x0000f207,
+ 0x0000f203, 0x80798179,
+ 0xd7610002, 0x0000f204,
0x80798179, 0xd7610002,
- 0x0000f208, 0x80798179,
- 0xd7610002, 0x0000f209,
+ 0x0000f205, 0x80798179,
+ 0xd7610002, 0x0000f206,
0x80798179, 0xd7610002,
- 0x0000f20a, 0x80798179,
- 0xd7610002, 0x0000f20b,
- 0x80798179, 0xe0704000,
- 0x705d0200, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820001,
- 0xbeff03c1, 0xb97b4306,
- 0x877bc17b, 0xbf840044,
- 0xbf8a0000, 0x877aff6d,
- 0x80000000, 0xbf840040,
- 0x8f7b867b, 0x8f7b827b,
+ 0x0000f207, 0x80798179,
+ 0xd7610002, 0x0000f208,
+ 0x80798179, 0xd7610002,
+ 0x0000f209, 0x80798179,
+ 0xd7610002, 0x0000f20a,
+ 0x80798179, 0xd7610002,
+ 0x0000f20b, 0x80798179,
+ 0xe0704000, 0x705d0200,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb97b4306, 0x877bc17b,
+ 0xbf840043, 0xbf8a0000,
+ 0x877aff6d, 0x80000000,
+ 0xbf84003f, 0x8f7b887b,
0xbef6037b, 0xb9703a05,
0x80708170, 0xbf0d9973,
0xbf850002, 0x8f708970,
@@ -2341,172 +2548,169 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xe0704000, 0x705d0000,
0x807c817c, 0x8070ff70,
0x00000080, 0xbf0a7b7c,
- 0xbf85fff8, 0xbf82013b,
+ 0xbf85fff8, 0xbf820136,
0xbef4037e, 0x8775ff7f,
0x0000ffff, 0x8875ff75,
0x00040000, 0xbef60380,
0xbef703ff, 0x10807fac,
0xb97202dc, 0x8f729972,
0x876eff7f, 0x04000000,
- 0xbf840034, 0xbefe03c1,
+ 0xbf840033, 0xbefe03c1,
0x907c9972, 0x877c817c,
0xbf06817c, 0xbf850002,
0xbeff0380, 0xbf820001,
0xbeff03c1, 0xb96f4306,
- 0x876fc16f, 0xbf840029,
- 0x8f6f866f, 0x8f6f826f,
- 0xbef6036f, 0xb9783a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0x8078ff78,
- 0x00000080, 0xbef603ff,
- 0x01000000, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850009,
- 0xe0310000, 0x781d0000,
- 0x807cff7c, 0x00000080,
+ 0x876fc16f, 0xbf840028,
+ 0x8f6f886f, 0xbef6036f,
+ 0xb9783a05, 0x80788178,
+ 0xbf0d9972, 0xbf850002,
+ 0x8f788978, 0xbf820001,
+ 0x8f788a78, 0xb96e1e06,
+ 0x8f6e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
0x8078ff78, 0x00000080,
- 0xbf0a6f7c, 0xbf85fff8,
- 0xbf820008, 0xe0310000,
+ 0xbef603ff, 0x01000000,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850009, 0xe0310000,
0x781d0000, 0x807cff7c,
- 0x00000100, 0x8078ff78,
- 0x00000100, 0xbf0a6f7c,
- 0xbf85fff8, 0xbef80380,
- 0xbefe03c1, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0xbeff0380,
- 0xbf820001, 0xbeff03c1,
- 0xb96f3a05, 0x806f816f,
- 0x8f6f826f, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbf850024, 0xbef603ff,
- 0x01000000, 0xbeee0378,
- 0x8078ff78, 0x00000200,
- 0xbefc0384, 0xbf0a6f7c,
- 0xbf840050, 0xe0304000,
- 0x785d0000, 0xe0304080,
- 0x785d0100, 0xe0304100,
- 0x785d0200, 0xe0304180,
- 0x785d0300, 0xbf8c3f70,
- 0x7e008500, 0x7e028501,
- 0x7e048502, 0x7e068503,
- 0x807c847c, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85ffee, 0xe0304000,
- 0x6e5d0000, 0xe0304080,
- 0x6e5d0100, 0xe0304100,
- 0x6e5d0200, 0xe0304180,
- 0x6e5d0300, 0xbf8c3f70,
- 0xbf820034, 0xbef603ff,
- 0x01000000, 0xbeee0378,
- 0x8078ff78, 0x00000400,
- 0xbefc0384, 0xbf0a6f7c,
- 0xbf840012, 0xe0304000,
- 0x785d0000, 0xe0304100,
- 0x785d0100, 0xe0304200,
- 0x785d0200, 0xe0304300,
- 0x785d0300, 0xbf8c3f70,
- 0x7e008500, 0x7e028501,
- 0x7e048502, 0x7e068503,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xb96f1e06,
- 0x876fc16f, 0xbf84000e,
- 0x8f6f836f, 0x806f7c6f,
- 0xbefe03c1, 0xbeff0380,
- 0xe0304000, 0x785d0000,
- 0xbf8c3f70, 0x7e008500,
- 0x807c817c, 0x8078ff78,
+ 0x00000080, 0x8078ff78,
0x00000080, 0xbf0a6f7c,
- 0xbf85fff7, 0xbeff03c1,
+ 0xbf85fff8, 0xbf820008,
+ 0xe0310000, 0x781d0000,
+ 0x807cff7c, 0x00000100,
+ 0x8078ff78, 0x00000100,
+ 0xbf0a6f7c, 0xbf85fff8,
+ 0xbef80380, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb96f3a05,
+ 0x806f816f, 0x8f6f826f,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850024,
+ 0xbef603ff, 0x01000000,
+ 0xbeee0378, 0x8078ff78,
+ 0x00000200, 0xbefc0384,
+ 0xbf0a6f7c, 0xbf840050,
+ 0xe0304000, 0x785d0000,
+ 0xe0304080, 0x785d0100,
+ 0xe0304100, 0x785d0200,
+ 0xe0304180, 0x785d0300,
+ 0xbf8c0000, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807c847c,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85ffee,
0xe0304000, 0x6e5d0000,
- 0xe0304100, 0x6e5d0100,
- 0xe0304200, 0x6e5d0200,
- 0xe0304300, 0x6e5d0300,
- 0xbf8c3f70, 0xb9783a05,
- 0x80788178, 0xbf0d9972,
- 0xbf850002, 0x8f788978,
- 0xbf820001, 0x8f788a78,
- 0xb96e1e06, 0x8f6e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0x80f8ff78,
- 0x00000050, 0xbef603ff,
- 0x01000000, 0xbefc03ff,
- 0x0000006c, 0x80f89078,
- 0xf429003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc847c,
- 0xbf800000, 0xbe803100,
- 0xbe823102, 0x80f8a078,
- 0xf42d003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc887c,
- 0xbf800000, 0xbe803100,
- 0xbe823102, 0xbe843104,
- 0xbe863106, 0x80f8c078,
- 0xf431003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe803100,
- 0xbe823102, 0xbe843104,
- 0xbe863106, 0xbe883108,
- 0xbe8a310a, 0xbe8c310c,
- 0xbe8e310e, 0xbf06807c,
- 0xbf84fff0, 0xba80f801,
- 0x00000000, 0xbf8a0000,
+ 0xe0304080, 0x6e5d0100,
+ 0xe0304100, 0x6e5d0200,
+ 0xe0304180, 0x6e5d0300,
+ 0xbf8c0000, 0xbf820034,
+ 0xbef603ff, 0x01000000,
+ 0xbeee0378, 0x8078ff78,
+ 0x00000400, 0xbefc0384,
+ 0xbf0a6f7c, 0xbf840012,
+ 0xe0304000, 0x785d0000,
+ 0xe0304100, 0x785d0100,
+ 0xe0304200, 0x785d0200,
+ 0xe0304300, 0x785d0300,
+ 0xbf8c0000, 0x7e008500,
+ 0x7e028501, 0x7e048502,
+ 0x7e068503, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xb96f1e06, 0x876fc16f,
+ 0xbf84000e, 0x8f6f836f,
+ 0x806f7c6f, 0xbefe03c1,
+ 0xbeff0380, 0xe0304000,
+ 0x785d0000, 0xbf8c0000,
+ 0x7e008500, 0x807c817c,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7c, 0xbf85fff7,
+ 0xbeff03c1, 0xe0304000,
+ 0x6e5d0000, 0xe0304100,
+ 0x6e5d0100, 0xe0304200,
+ 0x6e5d0200, 0xe0304300,
+ 0x6e5d0300, 0xbf8c0000,
0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
0x8f788a78, 0xb96e1e06,
0x8f6e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
0xbef603ff, 0x01000000,
- 0xf4211bfa, 0xf0000000,
- 0x80788478, 0xf4211b3a,
+ 0xbefc03ff, 0x0000006c,
+ 0x80f89078, 0xf429003a,
+ 0xf0000000, 0xbf8c0000,
+ 0x80fc847c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0x80f8a078, 0xf42d003a,
+ 0xf0000000, 0xbf8c0000,
+ 0x80fc887c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0x80f8c078, 0xf431003a,
+ 0xf0000000, 0xbf8c0000,
+ 0x80fc907c, 0xbf800000,
+ 0xbe803100, 0xbe823102,
+ 0xbe843104, 0xbe863106,
+ 0xbe883108, 0xbe8a310a,
+ 0xbe8c310c, 0xbe8e310e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xba80f801, 0x00000000,
+ 0xbf8a0000, 0xb9783a05,
+ 0x80788178, 0xbf0d9972,
+ 0xbf850002, 0x8f788978,
+ 0xbf820001, 0x8f788a78,
+ 0xb96e1e06, 0x8f6e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0xbef603ff,
+ 0x01000000, 0xf4211bfa,
0xf0000000, 0x80788478,
- 0xf4211b7a, 0xf0000000,
- 0x80788478, 0xf4211c3a,
+ 0xf4211b3a, 0xf0000000,
+ 0x80788478, 0xf4211b7a,
0xf0000000, 0x80788478,
- 0xf4211c7a, 0xf0000000,
- 0x80788478, 0xf4211eba,
+ 0xf4211c3a, 0xf0000000,
+ 0x80788478, 0xf4211c7a,
0xf0000000, 0x80788478,
- 0xf4211efa, 0xf0000000,
- 0x80788478, 0xf4211e7a,
+ 0xf4211eba, 0xf0000000,
+ 0x80788478, 0xf4211efa,
0xf0000000, 0x80788478,
- 0xf4211cfa, 0xf0000000,
- 0x80788478, 0xf4211bba,
+ 0xf4211e7a, 0xf0000000,
+ 0x80788478, 0xf4211cfa,
0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef814,
0xf4211bba, 0xf0000000,
- 0x80788478, 0xbf8cc07f,
- 0xb9eef815, 0xbefc036f,
- 0xbefe0370, 0xbeff0371,
- 0x876f7bff, 0x000003ff,
- 0xb9ef4803, 0x876f7bff,
- 0xfffff800, 0x906f8b6f,
- 0xb9efa2c3, 0xb9f3f801,
- 0xb96e3a05, 0x806e816e,
- 0xbf0d9972, 0xbf850002,
- 0x8f6e896e, 0xbf820001,
- 0x8f6e8a6e, 0xb96f1e06,
- 0x8f6f8a6f, 0x806e6f6e,
- 0x806eff6e, 0x00000200,
- 0x806e746e, 0x826f8075,
- 0x876fff6f, 0x0000ffff,
- 0xf4091c37, 0xfa000050,
- 0xf4091d37, 0xfa000060,
- 0xf4011e77, 0xfa000074,
- 0xbf8cc07f, 0x876dff6d,
- 0x0000ffff, 0x87fe7e7e,
- 0x87ea6a6a, 0xb9faf802,
- 0xbe80226c, 0xbf810000,
+ 0x80788478, 0xbf8c0000,
+ 0xb9eef814, 0xf4211bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8c0000, 0xb9eef815,
+ 0xbefc036f, 0xbefe0370,
+ 0xbeff0371, 0xb9fb4803,
+ 0x907b8b7b, 0xb9fba2c3,
+ 0xb9f3f801, 0xb96e3a05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbf850002, 0x8f6e896e,
+ 0xbf820001, 0x8f6e8a6e,
+ 0xb96f1e06, 0x8f6f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x876fff6f,
+ 0x0000ffff, 0xf4091c37,
+ 0xfa000050, 0xf4091d37,
+ 0xfa000060, 0xf4011e77,
+ 0xfa000074, 0xbf8c0000,
+ 0x876dff6d, 0x0000ffff,
+ 0x87fe7e7e, 0x87ea6a6a,
+ 0xb9faf802, 0xbe80226c,
+ 0xbf9b0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx11_hex[] = {
- 0xbfa00001, 0xbfa00225,
+ 0xbfa00001, 0xbfa00227,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
0xb8fbf803, 0xbf0d9e6d,
@@ -2518,7 +2722,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0x8b6eff7b, 0x00000400,
0xbfa20045, 0xbf830010,
0xb8fbf803, 0xbfa0fffa,
- 0x8b6eff7b, 0x00000900,
+ 0x8b6eff7b, 0x00160900,
0xbfa20015, 0x8b6eff7b,
0x000071ff, 0xbfa10008,
0x8b6fff7b, 0x00007080,
@@ -2530,187 +2734,188 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0x8b6eff6e, 0x00000800,
0xbfa20003, 0x8b6eff7b,
0x00000400, 0xbfa2002a,
- 0xbefa4d82, 0xbf89fc07,
+ 0xbefa4d82, 0xbf890000,
0x84fa887a, 0xbf0d8f7b,
0xbfa10002, 0x8c7bff7b,
0xffff0000, 0xf4005bbd,
- 0xf8000010, 0xbf89fc07,
+ 0xf8000010, 0xbf890000,
0x846e976e, 0x9177ff77,
0x00800000, 0x8c776e77,
0xf4045bbd, 0xf8000000,
- 0xbf89fc07, 0xf4045ebd,
- 0xf8000008, 0xbf89fc07,
+ 0xbf890000, 0xf4045ebd,
+ 0xf8000008, 0xbf890000,
0x8bee6e6e, 0xbfa10001,
0xbe80486e, 0x8b6eff6d,
- 0x01ff0000, 0xbfa20005,
- 0x8c78ff78, 0x00002000,
- 0x80ec886c, 0x82ed806d,
- 0xbfa00005, 0x8b6eff6d,
- 0x01000000, 0xbfa20002,
+ 0x00ff0000, 0xbfa20008,
+ 0x8b6eff6d, 0x01000000,
+ 0xbfa20007, 0x8c78ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbfa00002,
0x806c846c, 0x826d806d,
0x8b6dff6d, 0x0000ffff,
0x8bfe7e7e, 0x8bea6a6a,
0xb978f802, 0xbe804a6c,
- 0x8b6dff6d, 0x0000ffff,
- 0xbefa0080, 0xb97a0283,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbefe4d84,
- 0xbf89fc07, 0x8b7aff7f,
- 0x04000000, 0x847a857a,
- 0x8c6d7a6d, 0xbefa007e,
- 0x8b7bff7f, 0x0000ffff,
- 0xbefe00c1, 0xbeff00c1,
- 0xdca6c000, 0x007a0000,
- 0x7e000280, 0xbefe007a,
- 0xbeff007b, 0xb8fb02dc,
- 0x847b997b, 0xb8fa3b05,
- 0x807a817a, 0xbf0d997b,
- 0xbfa20002, 0x847a897a,
- 0xbfa00001, 0x847a8a7a,
- 0xb8fb1e06, 0x847b8a7b,
- 0x807a7b7a, 0x8b7bff7f,
- 0x0000ffff, 0x807aff7a,
- 0x00000200, 0x807a7e7a,
- 0x827b807b, 0xd7610000,
- 0x00010870, 0xd7610000,
- 0x00010a71, 0xd7610000,
- 0x00010c72, 0xd7610000,
- 0x00010e73, 0xd7610000,
- 0x00011074, 0xd7610000,
- 0x00011275, 0xd7610000,
- 0x00011476, 0xd7610000,
- 0x00011677, 0xd7610000,
- 0x00011a79, 0xd7610000,
- 0x00011c7e, 0xd7610000,
- 0x00011e7f, 0xbefe00ff,
- 0x00003fff, 0xbeff0080,
- 0xdca6c040, 0x007a0000,
- 0xd760007a, 0x00011d00,
- 0xd760007b, 0x00011f00,
+ 0xbf0d9878, 0xbfa10001,
+ 0xbfb00000, 0x8b6dff6d,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0283, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbefe4d84, 0xbf890000,
+ 0x8b7aff7f, 0x04000000,
+ 0x847a857a, 0x8c6d7a6d,
+ 0xbefa007e, 0x8b7bff7f,
+ 0x0000ffff, 0xbefe00c1,
+ 0xbeff00c1, 0xdca6c000,
+ 0x007a0000, 0x7e000280,
0xbefe007a, 0xbeff007b,
- 0xbef4007e, 0x8b75ff7f,
- 0x0000ffff, 0x8c75ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x10807fac,
- 0xbef1007d, 0xbef00080,
- 0xb8f302dc, 0x84739973,
- 0xbefe00c1, 0x857d9973,
- 0x8b7d817d, 0xbf06817d,
- 0xbfa20002, 0xbeff0080,
- 0xbfa00002, 0xbeff00c1,
- 0xbfa00009, 0xbef600ff,
- 0x01000000, 0xe0685080,
- 0x701d0100, 0xe0685100,
- 0x701d0200, 0xe0685180,
- 0x701d0300, 0xbfa00008,
+ 0xb8fb02dc, 0x847b997b,
+ 0xb8fa3b05, 0x807a817a,
+ 0xbf0d997b, 0xbfa20002,
+ 0x847a897a, 0xbfa00001,
+ 0x847a8a7a, 0xb8fb1e06,
+ 0x847b8a7b, 0x807a7b7a,
+ 0x8b7bff7f, 0x0000ffff,
+ 0x807aff7a, 0x00000200,
+ 0x807a7e7a, 0x827b807b,
+ 0xd7610000, 0x00010870,
+ 0xd7610000, 0x00010a71,
+ 0xd7610000, 0x00010c72,
+ 0xd7610000, 0x00010e73,
+ 0xd7610000, 0x00011074,
+ 0xd7610000, 0x00011275,
+ 0xd7610000, 0x00011476,
+ 0xd7610000, 0x00011677,
+ 0xd7610000, 0x00011a79,
+ 0xd7610000, 0x00011c7e,
+ 0xd7610000, 0x00011e7f,
+ 0xbefe00ff, 0x00003fff,
+ 0xbeff0080, 0xdca6c040,
+ 0x007a0000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe007a,
+ 0xbeff007b, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xbef1007d,
+ 0xbef00080, 0xb8f302dc,
+ 0x84739973, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00002,
+ 0xbeff00c1, 0xbfa00009,
0xbef600ff, 0x01000000,
- 0xe0685100, 0x701d0100,
- 0xe0685200, 0x701d0200,
- 0xe0685300, 0x701d0300,
+ 0xe0685080, 0x701d0100,
+ 0xe0685100, 0x701d0200,
+ 0xe0685180, 0x701d0300,
+ 0xbfa00008, 0xbef600ff,
+ 0x01000000, 0xe0685100,
+ 0x701d0100, 0xe0685200,
+ 0x701d0200, 0xe0685300,
+ 0x701d0300, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0xb8fa1e06, 0x847a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef600ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbefd0080, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xd7610002, 0x0000fa6c,
+ 0x807d817d, 0x917aff6d,
+ 0x80000000, 0xd7610002,
+ 0x0000fa7a, 0x807d817d,
+ 0xd7610002, 0x0000fa6e,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa6f, 0x807d817d,
+ 0xd7610002, 0x0000fa78,
+ 0x807d817d, 0xb8faf803,
+ 0xd7610002, 0x0000fa7a,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa7b, 0x807d817d,
+ 0xb8f1f801, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f814, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f815, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xbefe00ff, 0x0000ffff,
+ 0xbeff0080, 0xe0685000,
+ 0x701d0200, 0xbefe00c1,
0xb8f03b05, 0x80708170,
0xbf0d9973, 0xbfa20002,
0x84708970, 0xbfa00001,
0x84708a70, 0xb8fa1e06,
0x847a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
0xbef600ff, 0x01000000,
- 0x7e000280, 0x7e020280,
- 0x7e040280, 0xbefd0080,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xd7610002,
- 0x0000fa6c, 0x807d817d,
- 0x917aff6d, 0x80000000,
- 0xd7610002, 0x0000fa7a,
- 0x807d817d, 0xd7610002,
- 0x0000fa6e, 0x807d817d,
- 0xd7610002, 0x0000fa6f,
- 0x807d817d, 0xd7610002,
- 0x0000fa78, 0x807d817d,
- 0xb8faf803, 0xd7610002,
- 0x0000fa7a, 0x807d817d,
- 0xd7610002, 0x0000fa7b,
- 0x807d817d, 0xb8f1f801,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f814,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f815,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xbefe00ff,
- 0x0000ffff, 0xbeff0080,
- 0xe0685000, 0x701d0200,
- 0xbefe00c1, 0xb8f03b05,
- 0x80708170, 0xbf0d9973,
- 0xbfa20002, 0x84708970,
- 0xbfa00001, 0x84708a70,
- 0xb8fa1e06, 0x847a8a7a,
- 0x80707a70, 0xbef600ff,
- 0x01000000, 0xbef90080,
- 0xbefd0080, 0xbf800000,
- 0xbe804100, 0xbe824102,
- 0xbe844104, 0xbe864106,
- 0xbe884108, 0xbe8a410a,
- 0xbe8c410c, 0xbe8e410e,
- 0xd7610002, 0x0000f200,
- 0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
- 0xd7610002, 0x0000f202,
- 0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
- 0xd7610002, 0x0000f204,
+ 0xbef90080, 0xbefd0080,
+ 0xbf800000, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xbe8c410c,
+ 0xbe8e410e, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
- 0xd7610002, 0x0000f206,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
- 0xd7610002, 0x0000f208,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
- 0xd7610002, 0x0000f20a,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
- 0xd7610002, 0x0000f20c,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
- 0x0000f20d, 0x80798179,
- 0xd7610002, 0x0000f20e,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
0x80798179, 0xd7610002,
- 0x0000f20f, 0x80798179,
- 0xbf06a079, 0xbfa10006,
- 0xe0685000, 0x701d0200,
- 0x8070ff70, 0x00000080,
- 0xbef90080, 0x7e040280,
- 0x807d907d, 0xbf0aff7d,
- 0x00000060, 0xbfa2ffbc,
- 0xbe804100, 0xbe824102,
- 0xbe844104, 0xbe864106,
- 0xbe884108, 0xbe8a410a,
- 0xd7610002, 0x0000f200,
+ 0x0000f20c, 0x80798179,
+ 0xd7610002, 0x0000f20d,
0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
- 0xd7610002, 0x0000f202,
+ 0x0000f20e, 0x80798179,
+ 0xd7610002, 0x0000f20f,
+ 0x80798179, 0xbf06a079,
+ 0xbfa10006, 0xe0685000,
+ 0x701d0200, 0x8070ff70,
+ 0x00000080, 0xbef90080,
+ 0x7e040280, 0x807d907d,
+ 0xbf0aff7d, 0x00000060,
+ 0xbfa2ffbc, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
- 0xd7610002, 0x0000f204,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
- 0xd7610002, 0x0000f206,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
- 0xd7610002, 0x0000f208,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
- 0xd7610002, 0x0000f20a,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
- 0xe0685000, 0x701d0200,
- 0xbefe00c1, 0x857d9973,
- 0x8b7d817d, 0xbf06817d,
- 0xbfa20002, 0xbeff0080,
- 0xbfa00001, 0xbeff00c1,
- 0xb8fb4306, 0x8b7bc17b,
- 0xbfa10044, 0xbfbd0000,
- 0x8b7aff6d, 0x80000000,
- 0xbfa10040, 0x847b867b,
- 0x847b827b, 0xbef6007b,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
+ 0x80798179, 0xe0685000,
+ 0x701d0200, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00001,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x8b7bc17b, 0xbfa10043,
+ 0xbfbd0000, 0x8b7aff6d,
+ 0x80000000, 0xbfa1003f,
+ 0x847b887b, 0xbef6007b,
0xb8f03b05, 0x80708170,
0xbf0d9973, 0xbfa20002,
0x84708970, 0xbfa00001,
@@ -2781,177 +2986,175 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0x701d0000, 0x807d817d,
0x8070ff70, 0x00000080,
0xbf0a7b7d, 0xbfa2fff8,
- 0xbfa00146, 0xbef4007e,
+ 0xbfa00143, 0xbef4007e,
0x8b75ff7f, 0x0000ffff,
0x8c75ff75, 0x00040000,
0xbef60080, 0xbef700ff,
0x10807fac, 0xb8f202dc,
0x84729972, 0x8b6eff7f,
- 0x04000000, 0xbfa1003a,
+ 0x04000000, 0xbfa10039,
0xbefe00c1, 0x857d9972,
0x8b7d817d, 0xbf06817d,
0xbfa20002, 0xbeff0080,
0xbfa00001, 0xbeff00c1,
0xb8ef4306, 0x8b6fc16f,
- 0xbfa1002f, 0x846f866f,
- 0x846f826f, 0xbef6006f,
- 0xb8f83b05, 0x80788178,
- 0xbf0d9972, 0xbfa20002,
- 0x84788978, 0xbfa00001,
- 0x84788a78, 0xb8ee1e06,
- 0x846e8a6e, 0x80786e78,
+ 0xbfa1002e, 0x846f886f,
+ 0xbef6006f, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa2000c,
+ 0xe0500000, 0x781d0000,
+ 0xbf890000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000080, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7d,
+ 0xbfa2fff5, 0xbfa0000b,
+ 0xe0500000, 0x781d0000,
+ 0xbf890000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7d,
+ 0xbfa2fff5, 0xbef80080,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef3b05, 0x806f816f,
+ 0x846f826f, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20024, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
0x8078ff78, 0x00000200,
- 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbefd0080,
- 0xbfa2000c, 0xe0500000,
- 0x781d0000, 0xbf8903f7,
- 0xdac00000, 0x00000000,
- 0x807dff7d, 0x00000080,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7d, 0xbfa2fff5,
- 0xbfa0000b, 0xe0500000,
- 0x781d0000, 0xbf8903f7,
- 0xdac00000, 0x00000000,
- 0x807dff7d, 0x00000100,
- 0x8078ff78, 0x00000100,
- 0xbf0a6f7d, 0xbfa2fff5,
- 0xbef80080, 0xbefe00c1,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbfa20002,
- 0xbeff0080, 0xbfa00001,
- 0xbeff00c1, 0xb8ef3b05,
- 0x806f816f, 0x846f826f,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbfa20024,
- 0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000200, 0xbefd0084,
- 0xbf0a6f7d, 0xbfa10050,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10050, 0xe0505000,
+ 0x781d0000, 0xe0505080,
+ 0x781d0100, 0xe0505100,
+ 0x781d0200, 0xe0505180,
+ 0x781d0300, 0xbf890000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7d,
+ 0xbfa2ffee, 0xe0505000,
+ 0x6e1d0000, 0xe0505080,
+ 0x6e1d0100, 0xe0505100,
+ 0x6e1d0200, 0xe0505180,
+ 0x6e1d0300, 0xbf890000,
+ 0xbfa00034, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10012, 0xe0505000,
+ 0x781d0000, 0xe0505100,
+ 0x781d0100, 0xe0505200,
+ 0x781d0200, 0xe0505300,
+ 0x781d0300, 0xbf890000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7d,
+ 0xbfa2ffee, 0xb8ef1e06,
+ 0x8b6fc16f, 0xbfa1000e,
+ 0x846f836f, 0x806f7d6f,
+ 0xbefe00c1, 0xbeff0080,
0xe0505000, 0x781d0000,
- 0xe0505080, 0x781d0100,
- 0xe0505100, 0x781d0200,
- 0xe0505180, 0x781d0300,
- 0xbf8903f7, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807d847d,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7d, 0xbfa2ffee,
+ 0xbf890000, 0x7e008500,
+ 0x807d817d, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7d,
+ 0xbfa2fff7, 0xbeff00c1,
0xe0505000, 0x6e1d0000,
- 0xe0505080, 0x6e1d0100,
- 0xe0505100, 0x6e1d0200,
- 0xe0505180, 0x6e1d0300,
- 0xbf8903f7, 0xbfa00034,
- 0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefd0084,
- 0xbf0a6f7d, 0xbfa10012,
- 0xe0505000, 0x781d0000,
- 0xe0505100, 0x781d0100,
- 0xe0505200, 0x781d0200,
- 0xe0505300, 0x781d0300,
- 0xbf8903f7, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807d847d,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7d, 0xbfa2ffee,
- 0xb8ef1e06, 0x8b6fc16f,
- 0xbfa1000e, 0x846f836f,
- 0x806f7d6f, 0xbefe00c1,
- 0xbeff0080, 0xe0505000,
- 0x781d0000, 0xbf8903f7,
- 0x7e008500, 0x807d817d,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7d, 0xbfa2fff7,
- 0xbeff00c1, 0xe0505000,
- 0x6e1d0000, 0xe0505100,
- 0x6e1d0100, 0xe0505200,
- 0x6e1d0200, 0xe0505300,
- 0x6e1d0300, 0xbf8903f7,
+ 0xe0505100, 0x6e1d0100,
+ 0xe0505200, 0x6e1d0200,
+ 0xe0505300, 0x6e1d0300,
+ 0xbf890000, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x80f8ff78,
+ 0x00000050, 0xbef600ff,
+ 0x01000000, 0xbefd00ff,
+ 0x0000006c, 0x80f89078,
+ 0xf428403a, 0xf0000000,
+ 0xbf890000, 0x80fd847d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0x80f8a078,
+ 0xf42c403a, 0xf0000000,
+ 0xbf890000, 0x80fd887d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0xbe844304,
+ 0xbe864306, 0x80f8c078,
+ 0xf430403a, 0xf0000000,
+ 0xbf890000, 0x80fd907d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0xbe844304,
+ 0xbe864306, 0xbe884308,
+ 0xbe8a430a, 0xbe8c430c,
+ 0xbe8e430e, 0xbf06807d,
+ 0xbfa1fff0, 0xb980f801,
+ 0x00000000, 0xbfbd0000,
0xb8f83b05, 0x80788178,
0xbf0d9972, 0xbfa20002,
0x84788978, 0xbfa00001,
0x84788a78, 0xb8ee1e06,
0x846e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0x80f8ff78, 0x00000050,
0xbef600ff, 0x01000000,
- 0xbefd00ff, 0x0000006c,
- 0x80f89078, 0xf428403a,
- 0xf0000000, 0xbf89fc07,
- 0x80fd847d, 0xbf800000,
- 0xbe804300, 0xbe824302,
- 0x80f8a078, 0xf42c403a,
- 0xf0000000, 0xbf89fc07,
- 0x80fd887d, 0xbf800000,
- 0xbe804300, 0xbe824302,
- 0xbe844304, 0xbe864306,
- 0x80f8c078, 0xf430403a,
- 0xf0000000, 0xbf89fc07,
- 0x80fd907d, 0xbf800000,
- 0xbe804300, 0xbe824302,
- 0xbe844304, 0xbe864306,
- 0xbe884308, 0xbe8a430a,
- 0xbe8c430c, 0xbe8e430e,
- 0xbf06807d, 0xbfa1fff0,
- 0xb980f801, 0x00000000,
- 0xbfbd0000, 0xb8f83b05,
- 0x80788178, 0xbf0d9972,
- 0xbfa20002, 0x84788978,
- 0xbfa00001, 0x84788a78,
- 0xb8ee1e06, 0x846e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0xbef600ff,
- 0x01000000, 0xf4205bfa,
+ 0xf4205bfa, 0xf0000000,
+ 0x80788478, 0xf4205b3a,
0xf0000000, 0x80788478,
- 0xf4205b3a, 0xf0000000,
- 0x80788478, 0xf4205b7a,
+ 0xf4205b7a, 0xf0000000,
+ 0x80788478, 0xf4205c3a,
0xf0000000, 0x80788478,
- 0xf4205c3a, 0xf0000000,
- 0x80788478, 0xf4205c7a,
+ 0xf4205c7a, 0xf0000000,
+ 0x80788478, 0xf4205eba,
0xf0000000, 0x80788478,
- 0xf4205eba, 0xf0000000,
- 0x80788478, 0xf4205efa,
+ 0xf4205efa, 0xf0000000,
+ 0x80788478, 0xf4205e7a,
0xf0000000, 0x80788478,
- 0xf4205e7a, 0xf0000000,
- 0x80788478, 0xf4205cfa,
+ 0xf4205cfa, 0xf0000000,
+ 0x80788478, 0xf4205bba,
0xf0000000, 0x80788478,
+ 0xbf890000, 0xb96ef814,
0xf4205bba, 0xf0000000,
- 0x80788478, 0xbf89fc07,
- 0xb96ef814, 0xf4205bba,
- 0xf0000000, 0x80788478,
- 0xbf89fc07, 0xb96ef815,
- 0xbefd006f, 0xbefe0070,
- 0xbeff0071, 0x8b6f7bff,
- 0x000003ff, 0xb96f4803,
- 0x8b6f7bff, 0xfffff800,
- 0x856f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee3b05,
- 0x806e816e, 0xbf0d9972,
- 0xbfa20002, 0x846e896e,
- 0xbfa00001, 0x846e8a6e,
- 0xb8ef1e06, 0x846f8a6f,
- 0x806e6f6e, 0x806eff6e,
- 0x00000200, 0x806e746e,
- 0x826f8075, 0x8b6fff6f,
- 0x0000ffff, 0xf4085c37,
- 0xf8000050, 0xf4085d37,
- 0xf8000060, 0xf4005e77,
- 0xf8000074, 0xbf89fc07,
- 0x8b6dff6d, 0x0000ffff,
- 0x8bfe7e7e, 0x8bea6a6a,
- 0xb8eef802, 0xbf0d866e,
- 0xbfa20002, 0xb97af802,
- 0xbe80486c, 0xb97af802,
- 0xbe804a6c, 0xbfb00000,
+ 0x80788478, 0xbf890000,
+ 0xb96ef815, 0xbefd006f,
+ 0xbefe0070, 0xbeff0071,
+ 0xb97b4803, 0x857b8b7b,
+ 0xb97b22c3, 0x857b867b,
+ 0xb97b7443, 0xb973f801,
+ 0xb8ee3b05, 0x806e816e,
+ 0xbf0d9972, 0xbfa20002,
+ 0x846e896e, 0xbfa00001,
+ 0x846e8a6e, 0xb8ef1e06,
+ 0x846f8a6f, 0x806e6f6e,
+ 0x806eff6e, 0x00000200,
+ 0x806e746e, 0x826f8075,
+ 0x8b6fff6f, 0x0000ffff,
+ 0xf4085c37, 0xf8000050,
+ 0xf4085d37, 0xf8000060,
+ 0xf4005e77, 0xf8000074,
+ 0xbf890000, 0x8b6dff6d,
+ 0x0000ffff, 0x8bfe7e7e,
+ 0x8bea6a6a, 0xb8eef802,
+ 0xbf0d866e, 0xbfa20002,
+ 0xb97af802, 0xbe80486c,
+ 0xb97af802, 0xbe804a6c,
+ 0xbfb10000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
- 0xbf820001, 0xbf8202db,
+ 0xbf820001, 0xbf8202dc,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -3066,99 +3269,143 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000901,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -3177,31 +3424,51 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb2b05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -3241,31 +3508,913 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
0x701d0000, 0xe0724100,
0x701d0100, 0xe0724200,
0x701d0200, 0xe0724300,
0x701d0300, 0x807c847c,
0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xb8fb2985,
- 0x807b817b, 0x8e7b837b,
- 0xb8fa2b05, 0x807a817a,
- 0x8e7a827a, 0x80fb7a7b,
- 0x867b7b7b, 0xbf84007a,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200ee,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001f, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf84001a,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbef600ff, 0x01000000,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_gfx12_hex[] = {
+ 0xbfa00001, 0xbfa002a2,
+ 0xb0804009, 0xb8f8f804,
+ 0x9178ff78, 0x00008c00,
+ 0xb8fbf811, 0x8b6eff78,
+ 0x00004000, 0xbfa10008,
+ 0x8b6eff7b, 0x00000080,
+ 0xbfa20018, 0x8b6ea07b,
+ 0xbfa20042, 0xbf830010,
+ 0xb8fbf811, 0xbfa0fffb,
+ 0x8b6eff7b, 0x00000bd0,
+ 0xbfa20010, 0xb8eef812,
+ 0x8b6f8f7b, 0xbfa10002,
+ 0x8c6eff6e, 0x00000080,
+ 0xb8eff813, 0x8b6e6e6f,
+ 0xbfa20008, 0x8b6eff6d,
+ 0xf0000000, 0xbfa20005,
+ 0x8b6fff6f, 0x00000200,
+ 0xbfa20002, 0x8b6ea07b,
+ 0xbfa2002c, 0xbefa4d82,
+ 0xbf8a0000, 0x84fa887a,
+ 0xbf0d8f7b, 0xbfa10002,
+ 0x8c7bff7b, 0xffff0000,
+ 0xf4601bbd, 0xf8000010,
+ 0xbf8a0000, 0x846e976e,
+ 0x9177ff77, 0x00800000,
+ 0x8c776e77, 0xf4603bbd,
+ 0xf8000000, 0xbf8a0000,
+ 0xf4603ebd, 0xf8000008,
+ 0xbf8a0000, 0x8bee6e6e,
+ 0xbfa10001, 0xbe80486e,
+ 0x8b6eff6d, 0xf0000000,
+ 0xbfa20009, 0xb8eef811,
+ 0x8b6eff6e, 0x00000080,
+ 0xbfa20007, 0x8c78ff78,
+ 0x00004000, 0x80ec886c,
+ 0x82ed806d, 0xbfa00002,
+ 0x806c846c, 0x826d806d,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0x85788978, 0xb9783244,
+ 0xbe804a6c, 0xb8faf802,
+ 0xbf0d987a, 0xbfa10001,
+ 0xbfb00000, 0x8b6dff6d,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0151, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbefe4d84, 0xbf8a0000,
+ 0x8b7aff7f, 0x04000000,
+ 0x847a857a, 0x8c6d7a6d,
+ 0xbefa007e, 0x8b7bff7f,
+ 0x0000ffff, 0xbefe00c1,
+ 0xbeff00c1, 0xee0a407a,
+ 0x000c0000, 0x00000000,
+ 0x7e000280, 0xbefe007a,
+ 0xbeff007b, 0xb8fb0742,
+ 0x847b997b, 0xb8fa3b05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbfa20002, 0x847a897a,
+ 0xbfa00001, 0x847a8a7a,
+ 0xb8fb1e06, 0x847b8a7b,
+ 0x807a7b7a, 0x8b7bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xd7610000,
+ 0x00010870, 0xd7610000,
+ 0x00010a71, 0xd7610000,
+ 0x00010c72, 0xd7610000,
+ 0x00010e73, 0xd7610000,
+ 0x00011074, 0xd7610000,
+ 0x00011275, 0xd7610000,
+ 0x00011476, 0xd7610000,
+ 0x00011677, 0xd7610000,
+ 0x00011a79, 0xd7610000,
+ 0x00011c7e, 0xd7610000,
+ 0x00011e7f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
+ 0x00003fff, 0xbeff0080,
+ 0xee0a407a, 0x000c0000,
+ 0x00004000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe007a,
+ 0xbeff007b, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xbef1007d,
+ 0xbef00080, 0xb8f30742,
+ 0x84739973, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00002,
+ 0xbeff00c1, 0xbfa0000c,
+ 0xbef600ff, 0x01000000,
+ 0xc4068070, 0x008ce801,
+ 0x00008000, 0xc4068070,
+ 0x008ce802, 0x00010000,
+ 0xc4068070, 0x008ce803,
+ 0x00018000, 0xbfa0000b,
+ 0xbef600ff, 0x01000000,
+ 0xc4068070, 0x008ce801,
+ 0x00010000, 0xc4068070,
+ 0x008ce802, 0x00020000,
+ 0xc4068070, 0x008ce803,
+ 0x00030000, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0xb8fa1e06, 0x847a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef600ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbe804ec2, 0xbf94fffe,
+ 0xb8faf804, 0x8b7a847a,
+ 0x91788478, 0x8c787a78,
+ 0x917aff6d, 0x80000000,
+ 0xd7610002, 0x00010071,
+ 0xd7610002, 0x0001026c,
+ 0xd7610002, 0x0001047a,
+ 0xd7610002, 0x0001066e,
+ 0xd7610002, 0x0001086f,
+ 0xd7610002, 0x00010a78,
+ 0xd7610002, 0x00010e7b,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xb8faf811, 0xd7610002,
+ 0x00010c7a, 0xb8faf801,
+ 0xd7610002, 0x0001107a,
+ 0xb8faf814, 0xd7610002,
+ 0x0001127a, 0xb8faf815,
+ 0xd7610002, 0x0001147a,
+ 0xb8faf812, 0xd7610002,
+ 0x0001167a, 0xb8faf813,
+ 0xd7610002, 0x0001187a,
+ 0xb8faf802, 0xd7610002,
+ 0x00011a7a, 0xbefa50c1,
+ 0xbfc70000, 0xd7610002,
+ 0x00011c7a, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
+ 0x0000ffff, 0xbeff0080,
+ 0xc4068070, 0x008ce802,
+ 0x00000000, 0xbefe00c1,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0xbef600ff, 0x01000000,
+ 0xbef90080, 0xbefd0080,
+ 0xbf800000, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xbe8c410c,
+ 0xbe8e410e, 0xbf068079,
+ 0xbfa10032, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd7610002,
+ 0x0001180c, 0xd7610002,
+ 0x00011a0d, 0xd7610002,
+ 0x00011c0e, 0xd7610002,
+ 0x00011e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xbfa00038, 0xd7610002,
+ 0x00012000, 0xd7610002,
+ 0x00012201, 0xd7610002,
+ 0x00012402, 0xd7610002,
+ 0x00012603, 0xd7610002,
+ 0x00012804, 0xd7610002,
+ 0x00012a05, 0xd7610002,
+ 0x00012c06, 0xd7610002,
+ 0x00012e07, 0xd7610002,
+ 0x00013008, 0xd7610002,
+ 0x00013209, 0xd7610002,
+ 0x0001340a, 0xd7610002,
+ 0x0001360b, 0xd7610002,
+ 0x0001380c, 0xd7610002,
+ 0x00013a0d, 0xd7610002,
+ 0x00013c0e, 0xd7610002,
+ 0x00013e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xc4068070, 0x008ce802,
+ 0x00000000, 0x8070ff70,
+ 0x00000080, 0xbef90080,
+ 0x7e040280, 0x807d907d,
+ 0xbf0aff7d, 0x00000060,
+ 0xbfa2ff88, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xc4068070,
+ 0x008ce802, 0x00000000,
+ 0xbefe00c1, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8fb4306, 0x8b7bc17b,
+ 0xbfa10044, 0x8b7aff6d,
+ 0x80000000, 0xbfa10041,
+ 0x847b897b, 0xbef6007b,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xd71f0000, 0x000100c1,
+ 0xd7200000, 0x000200c1,
+ 0x16000084, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa20013,
+ 0xbe8300ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8a0000,
+ 0xc4068070, 0x008ce801,
+ 0x00000000, 0x807d037d,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7d, 0xbfa2fff3,
+ 0xbfa00012, 0xbe8300ff,
+ 0x00000100, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8a0000, 0xc4068070,
+ 0x008ce801, 0x00000000,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7d,
+ 0xbfa2fff3, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20004,
+ 0xbef000ff, 0x00000200,
+ 0xbeff0080, 0xbfa00003,
+ 0xbef000ff, 0x00000400,
+ 0xbeff00c1, 0xb8fb3b05,
+ 0x807b817b, 0x847b827b,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa2001b,
+ 0xbef600ff, 0x01000000,
+ 0xbefd0084, 0xbf0a7b7d,
+ 0xbfa10040, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xc4068070,
+ 0x008ce800, 0x00000000,
+ 0xc4068070, 0x008ce801,
+ 0x00008000, 0xc4068070,
+ 0x008ce802, 0x00010000,
+ 0xc4068070, 0x008ce803,
+ 0x00018000, 0x807d847d,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7d, 0xbfa2ffeb,
+ 0xbfa0002a, 0xbef600ff,
+ 0x01000000, 0xbefd0084,
+ 0xbf0a7b7d, 0xbfa10015,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xc4068070, 0x008ce800,
+ 0x00000000, 0xc4068070,
+ 0x008ce801, 0x00010000,
+ 0xc4068070, 0x008ce802,
+ 0x00020000, 0xc4068070,
+ 0x008ce803, 0x00030000,
+ 0x807d847d, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7d,
+ 0xbfa2ffeb, 0xb8fb1e06,
+ 0x8b7bc17b, 0xbfa1000d,
+ 0x847b837b, 0x807b7d7b,
+ 0xbefe00c1, 0xbeff0080,
+ 0x7e008700, 0xc4068070,
+ 0x008ce800, 0x00000000,
+ 0x807d817d, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7d,
+ 0xbfa2fff7, 0xbfa0016e,
+ 0xbef4007e, 0x8b75ff7f,
+ 0x0000ffff, 0x8c75ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x10807fac,
+ 0xbef1007f, 0xb8f20742,
+ 0x84729972, 0x8b6eff7f,
+ 0x04000000, 0xbfa1003b,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef4306, 0x8b6fc16f,
+ 0xbfa10030, 0x846f896f,
+ 0xbef6006f, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa2000d,
+ 0xc4050078, 0x0080e800,
+ 0x00000000, 0xbf8a0000,
+ 0xdac00000, 0x00000000,
+ 0x807dff7d, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff4,
+ 0xbfa0000c, 0xc4050078,
+ 0x0080e800, 0x00000000,
+ 0xbf8a0000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7d,
+ 0xbfa2fff4, 0xbef80080,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef3b05, 0x806f816f,
+ 0x846f826f, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa2002c, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000200,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10061, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00008000, 0xc4050078,
+ 0x008ce802, 0x00010000,
+ 0xc4050078, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00008000, 0xc405006e,
+ 0x008ce802, 0x00010000,
+ 0xc405006e, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0xbfa0003d, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10016, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00010000, 0xc4050078,
+ 0x008ce802, 0x00020000,
+ 0xc4050078, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xb8ef1e06,
+ 0x8b6fc16f, 0xbfa1000f,
+ 0x846f836f, 0x806f7d6f,
+ 0xbefe00c1, 0xbeff0080,
+ 0xc4050078, 0x008ce800,
+ 0x00000000, 0xbf8a0000,
+ 0x7e008500, 0x807d817d,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff6,
+ 0xbeff00c1, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00010000, 0xc405006e,
+ 0x008ce802, 0x00020000,
+ 0xc405006e, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
+ 0xbef600ff, 0x01000000,
+ 0xbefd00ff, 0x0000006c,
+ 0x80f89078, 0xf462403a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd847d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0x80f8a078, 0xf462603a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd887d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0x80f8c078, 0xf462803a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd907d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0xbe884308, 0xbe8a430a,
+ 0xbe8c430c, 0xbe8e430e,
+ 0xbf06807d, 0xbfa1fff0,
+ 0xb980f801, 0x00000000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0xbef600ff, 0x01000000,
+ 0xbeff0071, 0xf4621bfa,
+ 0xf0000000, 0x80788478,
+ 0xf4621b3a, 0xf0000000,
+ 0x80788478, 0xf4621b7a,
+ 0xf0000000, 0x80788478,
+ 0xf4621c3a, 0xf0000000,
+ 0x80788478, 0xf4621c7a,
+ 0xf0000000, 0x80788478,
+ 0xf4621eba, 0xf0000000,
+ 0x80788478, 0xf4621efa,
+ 0xf0000000, 0x80788478,
+ 0xf4621e7a, 0xf0000000,
+ 0x80788478, 0xf4621cfa,
+ 0xf0000000, 0x80788478,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xb96ef814, 0xf4621bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8a0000, 0xb96ef815,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xb96ef812, 0xf4621bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8a0000, 0xb96ef813,
+ 0x8b6eff7f, 0x04000000,
+ 0xbfa1000d, 0x80788478,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xbf0d806e, 0xbfa10006,
+ 0x856e906e, 0x8b6e6e6e,
+ 0xbfa10003, 0xbe804ec1,
+ 0x816ec16e, 0xbfa0fffb,
+ 0xbefd006f, 0xbefe0070,
+ 0xbeff0071, 0xb97b2011,
+ 0x857b867b, 0xb97b0191,
+ 0x857b827b, 0xb97bba11,
+ 0xb973f801, 0xb8ee3b05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbfa20002, 0x846e896e,
+ 0xbfa00001, 0x846e8a6e,
+ 0xb8ef1e06, 0x846f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x8b6fff6f,
+ 0x0000ffff, 0xf4605c37,
+ 0xf8000050, 0xf4605d37,
+ 0xf8000060, 0xf4601e77,
+ 0xf8000074, 0xbf8a0000,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0xb97af804, 0xbe804ec2,
+ 0xbf94fffe, 0xbe804a6c,
+ 0xbe804ec2, 0xbf94fffe,
+ 0xbfb10000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
+ 0xbf820001, 0xbf8202ca,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
+ 0x866eff78, 0x00002000,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001a,
+ 0x866eff7b, 0x00000400,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850011, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf850006,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
+ 0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8979ff79, 0x00800000,
+ 0x87796e79, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x866dff6d,
+ 0x0000ffff, 0x8f7a8b79,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e8378,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb9780002, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbefa0080, 0xb97a0283,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb5306, 0x867bc17b,
+ 0xbf840052, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf84004e, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85001d,
+ 0x24040682, 0xd86c0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000100, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffe5,
+ 0xbf820016, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
+ 0xd0c9006a, 0x0000f702,
+ 0xbefe016a, 0xbf87fff6,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
0x807bff7b, 0x00001000,
- 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850059, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -3304,137 +4453,204 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffa9,
- 0xbf9c0000, 0xbf820016,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffeb, 0xbf9c0000,
- 0xbf8200ee, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x866eff7f,
- 0x04000000, 0xbf84001f,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200f4,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf840025, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef5306,
+ 0x866fc16f, 0xbf840020,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0xe0510200, 0x781d0000,
+ 0xe0510300, 0x781d0000,
+ 0xe0510400, 0x781d0000,
+ 0x807cff7c, 0x00000500,
+ 0x8078ff78, 0x00000500,
+ 0xbf0a6f7c, 0xbf85fff0,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef4306, 0x866fc16f,
- 0xbf84001a, 0x8e6f866f,
- 0x8e6f826f, 0xbef6006f,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2b05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb8ef2985, 0x806f816f,
- 0x8e6f836f, 0xb8f92b05,
- 0x80798179, 0x8e798279,
- 0x80ef796f, 0x866f6f6f,
- 0xbf84001a, 0x806fff6f,
- 0x00008000, 0xbefc0080,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2985, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b79,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf810000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index fdab64624422..96fbb16ceb21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -33,6 +33,7 @@
* gfx11:
* cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
* sp3 gfx11.sp3 -hex gfx11.hex
+ *
*/
#define CHIP_NAVI10 26
@@ -43,22 +44,33 @@
#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
-#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
+#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
-var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#define S_COHERENCE glc:1
+#define V_COHERENCE slc:1 glc:1
+#define S_WAITCNT_0 s_waitcnt 0
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_STATUS_TRAP_EN_SHIFT = 6
+var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
+var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 8
+var S_STATUS_HWREG = HW_REG_STATUS
+var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK = 0x01000000
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
-var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
-var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
#if ASIC_FAMILY < CHIP_PLUM_BONITO
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
@@ -73,14 +85,17 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT = 11
var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
+#if ASIC_FAMILY >= CHIP_PLUM_BONITO
+var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT = 16
+var SQ_WAVE_TRAPSTS_WAVE_START_MASK = 0x20000
+var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT = 17
+var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x40000
+var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x100000
+#endif
+var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12
var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19
@@ -92,6 +107,28 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
+var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+
+#if ASIC_FAMILY < CHIP_PLUM_BONITO
+var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE = 32 - S_TRAPSTS_RESTORE_PART_2_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT = 0
+var S_TRAPSTS_RESTORE_PART_3_SIZE = 0
+#else
+var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK |\
+ SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK |\
+ SQ_WAVE_TRAPSTS_WAVE_START_MASK |\
+ SQ_WAVE_TRAPSTS_WAVE_END_MASK |\
+ SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_TRAPSTS_WAVE_START_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
+#endif
+var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
+var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
+var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
+
// bits [31:24] unused by SPI debug data
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000
@@ -104,8 +141,6 @@ var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
-var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
-var S_SAVE_PC_HI_HT_MASK = 0x01000000
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
@@ -165,6 +200,7 @@ var s_restore_buf_rsrc3 = ttmp11
var s_restore_size = ttmp6
var s_restore_ttmps_lo = s_restore_tmp
var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
shader main
asic(DEFAULT)
@@ -177,13 +213,13 @@ L_JUMP_TO_RESTORE:
s_branch L_RESTORE
L_SKIP_RESTORE:
- s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_getreg_b32 s_save_status, hwreg(S_STATUS_HWREG) //save STATUS since we will change SCC
// Clear SPI_PRIO: do not save with elevated priority.
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+ s_andn2_b32 s_save_status, s_save_status, S_STATUS_ALWAYS_CLEAR_MASK
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_getreg_b32 s_save_trapsts, hwreg(S_TRAPSTS_HWREG)
#if SW_SA_TRAP
// If ttmp1[30] is set then issue s_barrier to unblock dependent waves.
@@ -198,7 +234,7 @@ L_TRAP_NO_BARRIER:
s_cbranch_scc1 L_CHECK_SAVE
#endif
- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_and_b32 ttmp2, s_save_status, S_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_HALTED
L_HALTED:
@@ -207,14 +243,14 @@ L_HALTED:
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_SAVE:
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_SAVE_CONTEXT_MASK
s_cbranch_scc1 L_SAVE
// Wave is halted but neither host trap nor SAVECTX is raised.
// Caused by instruction fetch memory violation.
// Spin wait until context saved to prevent interrupt storm.
s_sleep 0x10
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_getreg_b32 s_save_trapsts, hwreg(S_TRAPSTS_HWREG)
s_branch L_CHECK_SAVE
L_NOT_HALTED:
@@ -224,7 +260,7 @@ L_NOT_HALTED:
// Check non-maskable exceptions. memory_violation, illegal_instruction
// and xnack_error exceptions always cause the wave to enter the trap
// handler.
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_NON_MASKABLE_EXCP_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
@@ -248,15 +284,15 @@ L_CHECK_TRAP_ID:
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
-if SINGLE_STEP_MISSED_WORKAROUND
+#if SINGLE_STEP_MISSED_WORKAROUND
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
-end
+#endif
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_SAVE_CONTEXT_MASK
s_cbranch_scc1 L_SAVE
L_FETCH_2ND_TRAP:
@@ -269,7 +305,7 @@ L_FETCH_2ND_TRAP:
// ttmp12 holds SQ_WAVE_STATUS
#if HAVE_SENDMSG_RTN
s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
#else
s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
@@ -281,16 +317,16 @@ L_FETCH_2ND_TRAP:
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:
- s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
- s_waitcnt lgkmcnt(0)
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 S_COHERENCE // debug trap enabled flag
+ S_WAITCNT_0
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
s_or_b32 ttmp11, ttmp11, ttmp2
- s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
- s_waitcnt lgkmcnt(0)
- s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
- s_waitcnt lgkmcnt(0)
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 S_COHERENCE // second-level TBA
+ S_WAITCNT_0
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 S_COHERENCE // second-level TMA
+ S_WAITCNT_0
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
@@ -298,9 +334,13 @@ L_NO_SIGN_EXTEND_TMA:
L_NO_NEXT_TRAP:
// If not caused by trap then halt wave to prevent re-entry.
- s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_TRAP_CASE
- s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // Host trap will not cause trap re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
// Rewind the PC to prevent this from occurring.
@@ -310,10 +350,6 @@ L_NO_NEXT_TRAP:
s_branch L_EXIT_TRAP
L_TRAP_CASE:
- // Host trap will not cause trap re-entry.
- s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
- s_cbranch_scc1 L_EXIT_TRAP
-
// Advance past trap instruction to prevent re-entry.
s_add_u32 ttmp0, ttmp0, 0x4
s_addc_u32 ttmp1, ttmp1, 0x0
@@ -328,14 +364,22 @@ L_EXIT_TRAP:
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
- s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+#if ASIC_FAMILY == CHIP_PLUM_BONITO
+ s_bitcmp1_b32 s_save_status, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+#endif
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_mov_b32 s_save_tmp, 0
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
#if HAVE_XNACK
save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
@@ -360,7 +404,7 @@ L_SLEEP:
s_sleep 0x2
s_cbranch_execz L_SLEEP
#else
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
#endif
// Save first_wave flag so we can clear high bits of save address.
@@ -369,6 +413,12 @@ L_SLEEP:
s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
#if NO_SQC_STORE
+#if ASIC_FAMILY <= CHIP_SIENNA_CICHLID
+ // gfx10: If there was a VALU exception, the exception state must be
+ // cleared before executing the VALU instructions below.
+ v_clrexcp
+#endif
+
// Trap temporaries must be saved via VGPR but all VGPRs are in use.
// There is no ttmp space to hold the resource constant for VGPR save.
// Save v0 by itself since it requires only two SGPRs.
@@ -376,7 +426,7 @@ L_SLEEP:
s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0xFFFFFFFF
- global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] V_COHERENCE
v_mov_b32 v0, 0x0
s_mov_b32 exec_lo, s_save_ttmps_lo
s_mov_b32 exec_hi, s_save_ttmps_hi
@@ -384,7 +434,7 @@ L_SLEEP:
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
- get_wave_size(s_save_ttmps_hi)
+ get_wave_size2(s_save_ttmps_hi)
get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
get_svgpr_size_bytes(s_save_ttmps_hi)
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
@@ -408,15 +458,15 @@ L_SLEEP:
s_mov_b32 exec_lo, 0x3FFF
s_mov_b32 exec_hi, 0x0
- global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 V_COHERENCE
v_readlane_b32 ttmp14, v0, 0xE
v_readlane_b32 ttmp15, v0, 0xF
s_mov_b32 exec_lo, ttmp14
s_mov_b32 exec_hi, ttmp15
#else
- s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
- s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
- s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
+ s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 S_COHERENCE
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 S_COHERENCE
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 S_COHERENCE
#endif
/* setup Resource Contants */
@@ -430,7 +480,7 @@ L_SLEEP:
/* global mem offset */
s_mov_b32 s_save_mem_offset, 0x0
- get_wave_size(s_wave_size)
+ get_wave_size2(s_wave_size)
#if HAVE_XNACK
// Save and clear vector XNACK state late to free up SGPRs.
@@ -454,12 +504,22 @@ L_SAVE_4VGPR_WAVE32:
// VGPR Allocated in 4-GPR granularity
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_FIRST_VGPRS32_WITH_TCP
+
+ write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+ s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS32_WITH_TCP:
+#endif
+
#if !NO_SQC_STORE
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
#endif
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
s_branch L_SAVE_HWREG
L_SAVE_4VGPR_WAVE64:
@@ -467,12 +527,22 @@ L_SAVE_4VGPR_WAVE64:
// VGPR Allocated in 4-GPR granularity
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_FIRST_VGPRS64_WITH_TCP
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+ s_branch L_SAVE_HWREG
+
+L_SAVE_FIRST_VGPRS64_WITH_TCP:
+#endif
+
#if !NO_SQC_STORE
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
#endif
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
/* save HW registers */
@@ -500,7 +570,7 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ s_getreg_b32 s_save_tmp, hwreg(S_TRAPSTS_HWREG)
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
// Not used on Sienna_Cichlid but keep layout same for debugger.
@@ -519,7 +589,7 @@ L_SAVE_HWREG:
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
s_mov_b32 exec_lo, 0xFFFF
s_mov_b32 exec_hi, 0x0
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
// Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
s_mov_b32 exec_lo, 0xFFFFFFFF
@@ -562,7 +632,7 @@ L_SAVE_SGPR_LOOP:
s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
s_mov_b32 ttmp13, 0x0
v_mov_b32 v2, 0x0
@@ -583,7 +653,7 @@ L_SAVE_SGPR_SKIP_TCP_STORE:
write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
#else
// restore s_save_buf_rsrc0,1
s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask
@@ -613,8 +683,7 @@ L_SAVE_LDS_NORMAL:
// first wave do LDS save;
- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
@@ -639,16 +708,36 @@ L_SAVE_LDS_NORMAL:
s_cbranch_scc1 L_SAVE_LDS_W64
L_SAVE_LDS_W32:
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W32
+
+L_SAVE_LDS_LOOP_SQC_W32:
+ ds_read_b32 v1, v0
+ S_WAITCNT_0
+
+ write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 128 //every buffer_store_lds does 128 bytes
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W32:
+#endif
+
s_mov_b32 s3, 128
s_nop 0
s_nop 0
s_nop 0
L_SAVE_LDS_LOOP_W32:
ds_read_b32 v1, v0
- s_waitcnt 0
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ S_WAITCNT_0
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
- s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
@@ -657,14 +746,34 @@ L_SAVE_LDS_LOOP_W32:
s_branch L_SAVE_LDS_DONE
L_SAVE_LDS_W64:
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W64
+
+L_SAVE_LDS_LOOP_SQC_W64:
+ ds_read_b32 v1, v0
+ S_WAITCNT_0
+
+ write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W64 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP_W64:
+#endif
+
s_mov_b32 s3, 256
s_nop 0
s_nop 0
s_nop 0
L_SAVE_LDS_LOOP_W64:
ds_read_b32 v1, v0
- s_waitcnt 0
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ S_WAITCNT_0
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
@@ -706,16 +815,35 @@ L_SAVE_VGPR_NORMAL:
s_cmp_lt_u32 m0, s_save_alloc_size
s_cbranch_scc0 L_SAVE_VGPR_END
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W32:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W32
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_VGPR_W32_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
v_movrels_b32 v1, v1 //v1 = v[1+m0]
v_movrels_b32 v2, v2 //v2 = v[2+m0]
v_movrels_b32 v3, v3 //v3 = v[3+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
s_add_u32 m0, m0, 4 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
@@ -732,16 +860,35 @@ L_SAVE_VGPR_WAVE64:
s_cmp_lt_u32 m0, s_save_alloc_size
s_cbranch_scc0 L_SAVE_SHARED_VGPR
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP
+
+L_SAVE_VGPR_LOOP_SQC_W64:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W64
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_VGPR_W64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
v_movrels_b32 v1, v1 //v1 = v[1+m0]
v_movrels_b32 v2, v2 //v2 = v[2+m0]
v_movrels_b32 v3, v3 //v3 = v[3+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
s_add_u32 m0, m0, 4 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
@@ -759,9 +906,26 @@ L_SAVE_SHARED_VGPR:
s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0x00000000
+
+#if SAVE_AFTER_XNACK_ERROR
+ check_if_tcp_store_ok()
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC:
+ v_movrels_b32 v0, v0
+
+ write_vgprs_to_mem_with_sqc_w64(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ s_add_u32 m0, m0, 1
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC
+
+ s_branch L_SAVE_VGPR_END
+#endif
+
L_SAVE_SHARED_VGPR_WAVE64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 m0, m0, 1 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
@@ -779,7 +943,7 @@ L_RESTORE:
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
//determine it is wave32 or wave64
- get_wave_size(s_restore_size)
+ get_wave_size2(s_restore_size)
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_RESTORE_VGPR
@@ -799,8 +963,7 @@ L_RESTORE_LDS_NORMAL:
s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
@@ -824,7 +987,7 @@ L_RESTORE_LDS_LOOP_W32:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
#else
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
- s_waitcnt vmcnt(0)
+ S_WAITCNT_0
ds_store_addtid_b32 v0
#endif
s_add_u32 m0, m0, 128 // 128 DW
@@ -838,7 +1001,7 @@ L_RESTORE_LDS_LOOP_W64:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
#else
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
- s_waitcnt vmcnt(0)
+ S_WAITCNT_0
ds_store_addtid_b32 v0
#endif
s_add_u32 m0, m0, 256 // 256 DW
@@ -879,11 +1042,11 @@ L_RESTORE_VGPR_NORMAL:
s_cbranch_scc0 L_RESTORE_SGPR
L_RESTORE_VGPR_WAVE32_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*3
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
v_movreld_b32 v1, v1
v_movreld_b32 v2, v2
@@ -894,11 +1057,11 @@ L_RESTORE_VGPR_WAVE32_LOOP:
s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
/* VGPR restore on v0 */
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*3
+ S_WAITCNT_0
s_branch L_RESTORE_SGPR
@@ -913,11 +1076,11 @@ L_RESTORE_VGPR_WAVE64:
s_cbranch_scc0 L_RESTORE_SHARED_VGPR
L_RESTORE_VGPR_WAVE64_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*3
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
v_movreld_b32 v1, v1
v_movreld_b32 v2, v2
@@ -939,8 +1102,8 @@ L_RESTORE_SHARED_VGPR:
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0x00000000
L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
s_add_u32 m0, m0, 1 //next vgpr index
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
@@ -951,11 +1114,11 @@ L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
/* VGPR restore on v0 */
L_RESTORE_V0:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*3
+ S_WAITCNT_0
/* restore SGPRs */
//will be 2+8+16*6
@@ -972,7 +1135,7 @@ L_RESTORE_SGPR:
s_mov_b32 m0, s_sgpr_save_num
read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -981,7 +1144,7 @@ L_RESTORE_SGPR:
s_movreld_b64 s2, s2
read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -993,7 +1156,7 @@ L_RESTORE_SGPR:
L_RESTORE_SGPR_LOOP:
read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -1035,12 +1198,12 @@ L_RESTORE_HWREG:
read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s_restore_flat_scratch
read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+ S_WAITCNT_0
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
@@ -1048,16 +1211,21 @@ L_RESTORE_HWREG:
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
-
#if HAVE_XNACK
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
#endif
- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+ // {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, 0, S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts
+ s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts
+
+if S_TRAPSTS_RESTORE_PART_3_SIZE > 0
+ s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts
+end
+
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
@@ -1069,10 +1237,10 @@ L_RESTORE_HWREG:
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
- s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
- s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
- s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
- s_waitcnt lgkmcnt(0)
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 S_COHERENCE
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 S_COHERENCE
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 S_COHERENCE
+ S_WAITCNT_0
#if HAVE_XNACK
restore_ib_sts(s_restore_tmp, s_restore_m0)
@@ -1094,11 +1262,12 @@ L_RESTORE_HWREG:
L_RETURN_WITHOUT_PRIV:
#endif
- s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu
+
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
- s_endpgm
+ s_endpgm_saved
end
function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
@@ -1109,7 +1278,7 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
#else
s_mov_b32 exec_lo, m0
s_mov_b32 m0, s_mem_offset
- s_buffer_store_dword s, s_rsrc, m0 glc:1
+ s_buffer_store_dword s, s_rsrc, m0 S_COHERENCE
s_add_u32 s_mem_offset, s_mem_offset, 4
s_mov_b32 m0, exec_lo
#endif
@@ -1124,10 +1293,10 @@ function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
- s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
- s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
- s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
- s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 S_COHERENCE
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 S_COHERENCE
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 S_COHERENCE
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 S_COHERENCE
s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
@@ -1141,40 +1310,72 @@ function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset)
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
- s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
- s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
- s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 S_COHERENCE
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 S_COHERENCE
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 S_COHERENCE
s_add_u32 s_rsrc[0], s_rsrc[0], 4*12
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
end
function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
- s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dword s, s_rsrc, s_mem_offset S_COHERENCE
s_add_u32 s_mem_offset, s_mem_offset, 4
end
function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*16
- s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset S_COHERENCE
end
function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*8
- s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset S_COHERENCE
end
function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*4
- s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset S_COHERENCE
end
+#if SAVE_AFTER_XNACK_ERROR
+function check_if_tcp_store_ok
+ // If TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp
-function get_lds_size_bytes(s_lds_size_byte)
- s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
- s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
+L_TCP_STORE_CHECK_DONE:
end
+function write_vgpr_to_mem_with_sqc(vgpr, n_lanes, s_rsrc, s_mem_offset)
+ s_mov_b32 s4, 0
+
+L_WRITE_VGPR_LANE_LOOP:
+ for var lane = 0; lane < 4; ++lane
+ v_readlane_b32 s[lane], vgpr, s4
+ s_add_u32 s4, s4, 1
+ end
+
+ s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
+
+ s_add_u32 s_mem_offset, s_mem_offset, 0x10
+ s_cmp_eq_u32 s4, n_lanes
+ s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP
+end
+
+function write_vgprs_to_mem_with_sqc_w32(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+ for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+ write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 32, s_rsrc, s_mem_offset)
+ end
+end
+
+function write_vgprs_to_mem_with_sqc_w64(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
+ for var vgpr = 0; vgpr < n_vgprs; ++vgpr
+ write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 64, s_rsrc, s_mem_offset)
+ end
+end
+#endif
+
function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
@@ -1200,11 +1401,12 @@ function get_hwreg_size_bytes
return 128
end
-function get_wave_size(s_reg)
+function get_wave_size2(s_reg)
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
end
+#if HAVE_XNACK
function save_and_clear_ib_sts(tmp1, tmp2)
// Preserve and clear scalar XNACK state before issuing scalar loads.
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
@@ -1229,3 +1431,4 @@ function restore_ib_sts(tmp1, tmp2)
s_or_b32 tmp1, tmp1, tmp2
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
end
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
new file mode 100644
index 000000000000..5a1a1b1f897f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -0,0 +1,1136 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ *
+ * gfx12:
+ * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3
+ * sp3 gfx12.sp3 -hex gfx12.hex
+ */
+
+#define CHIP_GFX12 37
+
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
+#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
+
+var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
+var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
+var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
+var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
+var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
+var SQ_WAVE_STATUS_WAVE64_SIZE = 1
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
+var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
+
+var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
+var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
+var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
+var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
+
+var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT
+var BARRIER_STATE_SIGNAL_OFFSET = 16
+var BARRIER_STATE_VALID_OFFSET = 0
+
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
+
+// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
+// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
+
+var s_sgpr_save_num = 108
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+var s_save_pc_lo = ttmp0
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_state_priv = ttmp12
+var s_save_excp_flag_priv = ttmp15
+var s_save_xnack_mask = s_save_excp_flag_priv
+var s_wave_size = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_mem_offset = ttmp4
+var s_save_alloc_size = s_save_excp_flag_priv
+var s_save_tmp = ttmp14
+var s_save_m0 = ttmp5
+var s_save_ttmps_lo = s_save_tmp
+var s_save_ttmps_hi = s_save_excp_flag_priv
+
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+var S_WAVE_SIZE = 25
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp
+var s_restore_m0 = s_restore_alloc_size
+var s_restore_mode = ttmp7
+var s_restore_flat_scratch = s_restore_tmp
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_state_priv = ttmp14
+var s_restore_excp_flag_priv = ttmp15
+var s_restore_xnack_mask = ttmp13
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_size = ttmp6
+var s_restore_ttmps_lo = s_restore_tmp
+var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
+
+shader main
+ asic(DEFAULT)
+ type(CS)
+ wave_size(32)
+
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE
+
+L_SKIP_RESTORE:
+ s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
+
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK
+
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+
+ s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
+
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
+ s_sleep 0x10
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ s_and_b32 ttmp2, ttmp3, ttmp2
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+#if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ // WAVE_TRAP_CTRL is already in ttmp3.
+ s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+#endif
+
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+L_FETCH_2ND_TRAP:
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+ s_wait_idle
+ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag
+ s_wait_idle
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA
+ s_wait_idle
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA
+ s_wait_idle
+
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_TRAP_CASE
+
+ // Host trap will not cause trap re-entry.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
+ // Only restore fields which the trap handler changes.
+ s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
+ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
+
+ s_rfe_b64 [ttmp0, ttmp1]
+
+L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_mov_b32 s_save_tmp, 0
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+ s_wait_idle
+
+ // Save first_wave flag so we can clear high bits of save address.
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
+ // There is no ttmp space to hold the resource constant for VGPR save.
+ // Save v0 by itself since it requires only two SGPRs.
+ s_mov_b32 s_save_ttmps_lo, exec_lo
+ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS
+ v_mov_b32 v0, 0x0
+ s_mov_b32 exec_lo, s_save_ttmps_lo
+ s_mov_b32 exec_hi, s_save_ttmps_hi
+
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_wave_size2(s_save_ttmps_hi)
+ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ get_svgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+ v_writelane_b32 v0, ttmp4, 0x4
+ v_writelane_b32 v0, ttmp5, 0x5
+ v_writelane_b32 v0, ttmp6, 0x6
+ v_writelane_b32 v0, ttmp7, 0x7
+ v_writelane_b32 v0, ttmp8, 0x8
+ v_writelane_b32 v0, ttmp9, 0x9
+ v_writelane_b32 v0, ttmp10, 0xA
+ v_writelane_b32 v0, ttmp11, 0xB
+ v_writelane_b32 v0, ttmp13, 0xD
+ v_writelane_b32 v0, exec_lo, 0xE
+ v_writelane_b32 v0, exec_hi, 0xF
+ valu_sgpr_hazard()
+
+ s_mov_b32 exec_lo, 0x3FFF
+ s_mov_b32 exec_hi, 0x0
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS
+ v_readlane_b32 ttmp14, v0, 0xE
+ v_readlane_b32 ttmp15, v0, 0xF
+ s_mov_b32 exec_lo, ttmp14
+ s_mov_b32 exec_hi, ttmp15
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+
+ s_mov_b32 s_save_m0, m0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0
+ get_wave_size2(s_wave_size)
+
+ /* save first 4 VGPRs, needed for SGPR save */
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_branch L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+ s_branch L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ /* save HW registers */
+
+L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
+ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
+ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
+
+ // Ensure no further changes to barrier or LDS state.
+ // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ // Re-read final state of BARRIER_COMPLETE field for save.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV)
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp
+
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ v_writelane_b32 v2, s_save_m0, 0x0
+ v_writelane_b32 v2, s_save_pc_lo, 0x1
+ v_writelane_b32 v2, s_save_tmp, 0x2
+ v_writelane_b32 v2, s_save_exec_lo, 0x3
+ v_writelane_b32 v2, s_save_exec_hi, 0x4
+ v_writelane_b32 v2, s_save_state_priv, 0x5
+ v_writelane_b32 v2, s_save_xnack_mask, 0x7
+ valu_sgpr_hazard()
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ v_writelane_b32 v2, s_save_tmp, 0x6
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_MODE)
+ v_writelane_b32 v2, s_save_tmp, 0x8
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
+ v_writelane_b32 v2, s_save_tmp, 0x9
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
+ v_writelane_b32 v2, s_save_tmp, 0xA
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ v_writelane_b32 v2, s_save_tmp, 0xB
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ v_writelane_b32 v2, s_save_tmp, 0xC
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ v_writelane_b32 v2, s_save_tmp, 0xD
+
+ s_get_barrier_state s_save_tmp, -1
+ s_wait_kmcnt (0)
+ v_writelane_b32 v2, s_save_tmp, 0xE
+ valu_sgpr_hazard()
+
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
+ s_mov_b32 exec_hi, 0x0
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ s_cmp_eq_u32 ttmp13, 0x0
+ s_cbranch_scc0 L_WRITE_V2_SECOND_HALF
+ write_16sgpr_to_v2(s0, 0x0)
+ s_branch L_SAVE_SGPR_SKIP_TCP_STORE
+L_WRITE_V2_SECOND_HALF:
+ write_16sgpr_to_v2(s0, 0x10)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
+ s_mov_b32 ttmp13, 0x0
+ v_mov_b32 v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
+
+ //save the rest 12 SGPR
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ write_12sgpr_to_v2(s0)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ /* save LDS */
+
+L_SAVE_LDS:
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_LDS_NORMAL
+L_ENABLE_SAVE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_LDS_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ //load 0~63*4(byte address) to vgpr v0
+ v_mbcnt_lo_u32_b32 v0, -1, 0
+ v_mbcnt_hi_u32_b32 v0, -1, v0
+ v_mul_u32_u24 v0, 4, v0
+
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_SAVE_LDS_W64
+
+L_SAVE_LDS_W32:
+ s_mov_b32 s3, 128
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W32:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_W64:
+ s_mov_b32 s3, 256
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W64:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete?
+
+L_SAVE_LDS_DONE:
+ /* save VGPRs - set the Rest VGPRs */
+L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI
+ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_VGPR_NORMAL
+L_ENABLE_SAVE_VGPR_EXEC_HI:
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_VGPR_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_SAVE_VGPR_WAVE64
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+L_SAVE_VGPR_W32_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete?
+
+ s_branch L_SAVE_VGPR_END
+
+L_SAVE_VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_SHARED_VGPR
+
+L_SAVE_VGPR_W64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
+
+L_SAVE_SHARED_VGPR:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //save shared_vgpr will start from the index of m0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete?
+
+L_SAVE_VGPR_END:
+ s_branch L_END_PGM
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+
+ // Save s_restore_spi_init_hi for later use.
+ s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
+
+ //determine it is wave32 or wave64
+ get_wave_size2(s_restore_size)
+
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+L_RESTORE_LDS:
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_LDS_NORMAL
+L_ENABLE_RESTORE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_LDS_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
+
+L_RESTORE_LDS_LOOP_W32:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 128 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete?
+ s_branch L_RESTORE_VGPR
+
+L_RESTORE_LDS_LOOP_W64:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 256 // 256 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete?
+
+ /* restore VGPRs */
+L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_VGPR_NORMAL
+L_ENABLE_RESTORE_VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_VGPR_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE32_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
+
+ /* VGPR restore on v0 */
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+
+ s_branch L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE64:
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
+
+L_RESTORE_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+L_RESTORE_SHARED_VGPR:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used?
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //restore shared_vgpr will start from the index of m0
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!!
+
+ /* VGPR restore on v0 */
+L_RESTORE_V0:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+
+ /* restore SGPRs */
+ //will be 2+8+16*6
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+L_RESTORE_SGPR:
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 m0, s_sgpr_save_num
+
+ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+
+ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP
+
+ // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+ // Clear DEBUG_EN before and restore MODE after the barrier.
+ s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0
+
+ /* restore HW registers */
+L_RESTORE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // Restore s_restore_spi_init_hi before the saved value gets clobbered.
+ s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
+
+ // Only the first wave needs to restore the workgroup barrier.
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // Skip over WAVE_STATUS, since there is no state to restore from it
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // extract the saved signal count from s_restore_tmp
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
+
+ // We need to call s_barrier_signal repeatedly to restore the signal
+ // count of the work group barrier. The member count is already
+ // initialized with the number of waves in the work group.
+L_BARRIER_RESTORE_LOOP:
+ s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+ s_barrier_signal -1
+ s_add_i32 s_restore_tmp, s_restore_tmp, -1
+ s_branch L_BARRIER_RESTORE_LOOP
+
+L_SKIP_BARRIER_RESTORE:
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode
+
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ get_svgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS
+ s_wait_idle
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
+
+ // Make barrier and LDS state visible to all waves in the group.
+ // STATE_PRIV.BARRIER_COMPLETE may change after this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+
+L_END_PGM:
+ // Make sure that no wave of the workgroup can exit the trap handler
+ // before the workgroup barrier state is saved.
+ s_barrier_signal -2
+ s_barrier_wait -2
+ s_endpgm_saved
+end
+
+function write_16sgpr_to_v2(s, lane_offset)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset
+ end
+ valu_sgpr_hazard()
+ s_add_u32 ttmp13, ttmp13, 0x10
+end
+
+function write_12sgpr_to_v2(s)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx
+ end
+ valu_sgpr_hazard()
+end
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*8
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*4
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_bitcmp1_b32 s_size, S_WAVE_SIZE
+ s_cbranch_scc1 L_ENABLE_SHIFT_W64
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
+ s_branch L_SHIFT_DONE
+L_ENABLE_SHIFT_W64:
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value)
+L_SHIFT_DONE:
+end
+
+function get_svgpr_size_bytes(s_svgpr_size_byte)
+ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7)
+end
+
+function get_sgpr_size_bytes
+ return 512
+end
+
+function get_hwreg_size_bytes
+ return 128
+end
+
+function get_wave_size2(s_reg)
+ s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+end
+
+function valu_sgpr_hazard
+#if HAVE_VALU_SGPR_HAZARD
+ for var rep = 0; rep < 8; rep ++
+ ds_nop
+ end
+#endif
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index e506411ad28a..6869e07a2fff 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -37,17 +37,28 @@
* gc_9_4_3:
* cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3
* sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex
+ *
+ * gc_9_5_0:
+ * cpp -DASIC_FAMILY=GC_9_5_0 cwsr_trap_handler_gfx9.asm -P -o gc_9_5_0.sp3
+ * sp3 gc_9_5_0.sp3 -hex gc_9_5_0.hex
*/
#define CHIP_VEGAM 18
#define CHIP_ARCTURUS 23
#define CHIP_ALDEBARAN 25
#define CHIP_GC_9_4_3 26
+#define CHIP_GC_9_5_0 27
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
+
/**************************************************************************/
/* variables */
/**************************************************************************/
@@ -62,7 +73,13 @@ var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11
+var LDS_RESTORE_GRANULARITY_BYTES = 1280
+#else
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var LDS_RESTORE_GRANULARITY_BYTES = 512
+#endif
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
@@ -430,7 +447,9 @@ L_SAVE:
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
+ // Clear VSKIP state now that MODE.VSKIP has been saved.
+ // If user shader set it then vector instructions would be skipped.
+ s_setvskip 0,0
/* the first wave in the threadgroup */
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
@@ -557,12 +576,21 @@ if SAVE_AFTER_XNACK_ERROR
v_lshlrev_b32 v2, 2, v3
L_SAVE_LDS_LOOP_SQC:
+#if ASIC_FAMILY < CHIP_GC_9_5_0
ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
s_waitcnt lgkmcnt(0)
-
write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
v_add_u32 v2, 0x200, v2
+#else
+ // gfx950 needs to save in multiple of 256 bytes.
+ ds_read_b32 v0, v2
+ s_waitcnt lgkmcnt(0)
+ write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ v_add_u32 v2, 0x100, v2
+#endif
+
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
@@ -581,11 +609,14 @@ end
L_SAVE_LDS_LOOP_VECTOR:
ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
s_waitcnt lgkmcnt(0)
- buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
// s_waitcnt vmcnt(0)
// v_add_u32 v2, vcc[0:1], v2, v3
v_add_u32 v2, v2, v3
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ s_mov_b64 exec, vcc
+#endif
s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
// restore rsrc3
@@ -748,8 +779,13 @@ L_RESTORE:
L_RESTORE_LDS_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
- s_add_u32 m0, m0, 256*2 // 128 DW
- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW
+#endif
+ s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
@@ -921,7 +957,7 @@ L_RESTORE:
/* the END */
/**************************************************************************/
L_END_PGM:
- s_endpgm
+ s_endpgm_saved
end
@@ -979,17 +1015,17 @@ L_TCP_STORE_CHECK_DONE:
end
function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
end
function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
- buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
s_waitcnt vmcnt(0)
end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index c37f1fcd2165..0f0719528bcc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -22,7 +22,6 @@
*/
#include <linux/device.h>
-#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/file.h>
@@ -36,7 +35,6 @@
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <linux/fdtable.h>
#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
@@ -63,8 +61,10 @@ static const struct file_operations kfd_fops = {
};
static int kfd_char_dev_major = -1;
-static struct class *kfd_class;
struct device *kfd_device;
+static const struct class kfd_class = {
+ .name = kfd_dev_name,
+};
static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
{
@@ -94,14 +94,13 @@ int kfd_chardev_init(void)
if (err < 0)
goto err_register_chrdev;
- kfd_class = class_create(kfd_dev_name);
- err = PTR_ERR(kfd_class);
- if (IS_ERR(kfd_class))
+ err = class_register(&kfd_class);
+ if (err)
goto err_class_create;
- kfd_device = device_create(kfd_class, NULL,
- MKDEV(kfd_char_dev_major, 0),
- NULL, kfd_dev_name);
+ kfd_device = device_create(&kfd_class, NULL,
+ MKDEV(kfd_char_dev_major, 0),
+ NULL, kfd_dev_name);
err = PTR_ERR(kfd_device);
if (IS_ERR(kfd_device))
goto err_device_create;
@@ -109,7 +108,7 @@ int kfd_chardev_init(void)
return 0;
err_device_create:
- class_destroy(kfd_class);
+ class_unregister(&kfd_class);
err_class_create:
unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
err_register_chrdev:
@@ -118,8 +117,8 @@ err_register_chrdev:
void kfd_chardev_exit(void)
{
- device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
- class_destroy(kfd_class);
+ device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0));
+ class_unregister(&kfd_class);
unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
kfd_device = NULL;
}
@@ -155,8 +154,8 @@ static int kfd_open(struct inode *inode, struct file *filep)
/* filep now owns the reference returned by kfd_create_process */
filep->private_data = process;
- dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
- process->pasid, process->is_32bit_user_mode);
+ dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",
+ process->lead_thread->pid, process->is_32bit_user_mode);
return 0;
}
@@ -212,6 +211,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
if (!access_ok((const void __user *) args->read_pointer_address,
sizeof(uint32_t))) {
pr_err("Can't access read pointer\n");
@@ -246,14 +250,15 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->priority = args->queue_priority;
q_properties->queue_address = args->ring_base_address;
q_properties->queue_size = args->ring_size;
- q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
- q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
+ q_properties->read_ptr = (void __user *)args->read_pointer_address;
+ q_properties->write_ptr = (void __user *)args->write_pointer_address;
q_properties->eop_ring_buffer_address = args->eop_buffer_address;
q_properties->eop_ring_buffer_size = args->eop_buffer_size;
q_properties->ctx_save_restore_area_address =
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
q_properties->ctl_stack_size = args->ctl_stack_size;
+ q_properties->sdma_engine_id = args->sdma_engine_id;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -261,6 +266,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_SDMA;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
+ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)
+ q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;
else
return -ENOTSUPP;
@@ -305,7 +312,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
struct kfd_process_device *pdd;
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
- struct amdgpu_bo *wptr_bo = NULL;
memset(&q_properties, 0, sizeof(struct queue_properties));
@@ -333,6 +339,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
+ if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
+ int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +
+ kfd_get_num_xgmi_sdma_engines(dev) - 1;
+
+ if (q_properties.sdma_engine_id > max_sdma_eng_id) {
+ err = -EINVAL;
+ pr_err("sdma_engine_id %i exceeds maximum id of %i\n",
+ q_properties.sdma_engine_id, max_sdma_eng_id);
+ goto err_sdma_engine_id;
+ }
+ }
+
if (!pdd->qpd.proc_doorbells) {
err = kfd_alloc_process_doorbells(dev->kfd, pdd);
if (err) {
@@ -341,48 +359,17 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
}
}
- /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
- * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
- */
- if (dev->kfd->shared_resources.enable_mes &&
- ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
- >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
- struct amdgpu_bo_va_mapping *wptr_mapping;
- struct amdgpu_vm *wptr_vm;
-
- wptr_vm = drm_priv_to_vm(pdd->drm_priv);
- err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
- if (err)
- goto err_wptr_map_gart;
-
- wptr_mapping = amdgpu_vm_bo_lookup_mapping(
- wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
- amdgpu_bo_unreserve(wptr_vm->root.bo);
- if (!wptr_mapping) {
- pr_err("Failed to lookup wptr bo\n");
- err = -EINVAL;
- goto err_wptr_map_gart;
- }
-
- wptr_bo = wptr_mapping->bo_va->base.bo;
- if (wptr_bo->tbo.base.size > PAGE_SIZE) {
- pr_err("Requested GART mapping for wptr bo larger than one page\n");
- err = -EINVAL;
- goto err_wptr_map_gart;
- }
-
- err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
- if (err) {
- pr_err("Failed to map wptr bo to GART\n");
- goto err_wptr_map_gart;
- }
+ err = kfd_queue_acquire_buffers(pdd, &q_properties);
+ if (err) {
+ pr_debug("failed to acquire user queue buffers\n");
+ goto err_acquire_queue_buf;
}
- pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
- p->pasid,
+ pr_debug("Creating queue for process pid %d on gpu 0x%x\n",
+ p->lead_thread->pid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
+ err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,
NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -416,9 +403,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return 0;
err_create_queue:
- if (wptr_bo)
- amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
-err_wptr_map_gart:
+ kfd_queue_unref_bo_vas(pdd, &q_properties);
+ kfd_queue_release_buffers(pdd, &q_properties);
+err_acquire_queue_buf:
+err_sdma_engine_id:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);
@@ -431,9 +419,9 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
- pr_debug("Destroying queue id %d for pasid 0x%x\n",
+ pr_debug("Destroying queue id %d for process pid %d\n",
args->queue_id,
- p->pasid);
+ p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -477,6 +465,11 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
properties.queue_address = args->ring_base_address;
properties.queue_size = args->ring_size;
properties.queue_percent = args->queue_percentage & 0xFF;
@@ -484,8 +477,8 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
properties.priority = args->queue_priority;
- pr_debug("Updating queue id %d for pasid 0x%x\n",
- args->queue_id, p->pasid);
+ pr_debug("Updating queue id %d for process pid %d\n",
+ args->queue_id, p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -528,15 +521,10 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
}
- minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
- if (!minfo.cu_mask.ptr)
- return -ENOMEM;
-
- retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
- if (retval) {
+ minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size);
+ if (IS_ERR(minfo.cu_mask.ptr)) {
pr_debug("Could not copy CU mask from userspace");
- retval = -EFAULT;
- goto out;
+ return PTR_ERR(minfo.cu_mask.ptr);
}
mutex_lock(&p->mutex);
@@ -545,7 +533,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
mutex_unlock(&p->mutex);
-out:
kfree(minfo.cu_mask.ptr);
return retval;
}
@@ -612,7 +599,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
- args->alternate_aperture_size))
+ args->alternate_aperture_size,
+ args->misc_process_flag))
err = -EINVAL;
out:
@@ -711,7 +699,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
struct kfd_process_device_apertures *pAperture;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);
args->num_of_nodes = 0;
@@ -763,7 +751,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
int ret;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d",
+ p->lead_thread->pid);
if (args->num_of_nodes == 0) {
/* Return number of nodes, so that user space can alloacate
@@ -778,8 +767,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
* nodes, but not more than args->num_of_nodes as that is
* the amount of memory allocated by user
*/
- pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
- args->num_of_nodes), GFP_KERNEL);
+ pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+ GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -1021,7 +1010,7 @@ err_drm_file:
bool kfd_dev_is_large_bar(struct kfd_node *dev)
{
- if (debug_largebar) {
+ if (dev->kfd->adev->debug_largebar) {
pr_debug("Simulate large-bar allocation on non large-bar machine\n");
return true;
}
@@ -1075,7 +1064,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
svm_range_list_lock_and_flush_work(&p->svms, current->mm);
mutex_lock(&p->svms.lock);
mmap_write_unlock(current->mm);
- if (interval_tree_iter_first(&p->svms.objects,
+
+ /* Skip a special case that allocates VRAM without VA,
+ * VA will be invalid of 0.
+ */
+ if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) &&
+ interval_tree_iter_first(&p->svms.objects,
args->va_addr >> PAGE_SHIFT,
(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
pr_err("Address: 0x%llx already allocated by SVM\n",
@@ -1138,7 +1132,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_unlock;
}
offset = dev->adev->rmmio_remap.bus_addr;
- if (!offset) {
+ if (!offset || (PAGE_SIZE > 4096)) {
err = -ENOMEM;
goto err_unlock;
}
@@ -1164,7 +1158,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1;
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+ atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
}
mutex_unlock(&p->mutex);
@@ -1235,7 +1229,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+ atomic64_sub(size, &pdd->vram_usage);
err_unlock:
err_pdd:
@@ -1416,8 +1410,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
- pr_err("Failed to unmap from gpu %d/%d\n",
- i, args->n_devices);
+ pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices);
goto unmap_memory_from_gpu_failed;
}
args->n_success = i+1;
@@ -1432,17 +1425,23 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
goto sync_memory_failed;
}
}
- mutex_unlock(&p->mutex);
- if (flush_tlb) {
- /* Flush TLBs after waiting for the page table updates to complete */
- for (i = 0; i < args->n_devices; i++) {
- peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
- if (WARN_ON_ONCE(!peer_pdd))
- continue;
+ /* Flush TLBs after waiting for the page table updates to complete */
+ for (i = 0; i < args->n_devices; i++) {
+ peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ if (flush_tlb)
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
- }
+
+ /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
+ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+ if (err)
+ goto sync_memory_failed;
}
+
+ mutex_unlock(&p->mutex);
+
kfree(devices_arr);
return 0;
@@ -1516,7 +1515,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
- if (dev)
+ if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
@@ -1538,7 +1537,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
if (xcp_id >= 0)
args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
else
- args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+ args->gpu_id = dev->id;
args->flags = flags;
/* Copy metadata buffer to user mode */
@@ -1560,16 +1559,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
{
struct kfd_ioctl_import_dmabuf_args *args = data;
struct kfd_process_device *pdd;
- struct dma_buf *dmabuf;
int idr_handle;
uint64_t size;
void *mem;
int r;
- dmabuf = dma_buf_get(args->dmabuf_fd);
- if (IS_ERR(dmabuf))
- return PTR_ERR(dmabuf);
-
mutex_lock(&p->mutex);
pdd = kfd_process_device_data_by_id(p, args->gpu_id);
if (!pdd) {
@@ -1583,10 +1577,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
goto err_unlock;
}
- r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
- args->va_addr, pdd->drm_priv,
- (struct kgd_mem **)&mem, &size,
- NULL);
+ r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,
+ args->va_addr, pdd->drm_priv,
+ (struct kgd_mem **)&mem, &size,
+ NULL);
if (r)
goto err_unlock;
@@ -1597,7 +1591,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
}
mutex_unlock(&p->mutex);
- dma_buf_put(dmabuf);
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
@@ -1608,7 +1601,6 @@ err_free:
pdd->drm_priv, NULL);
err_unlock:
mutex_unlock(&p->mutex);
- dma_buf_put(dmabuf);
return r;
}
@@ -1851,8 +1843,9 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
return num_of_bos;
}
-static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
- u32 *shared_fd)
+static int criu_get_prime_handle(struct kgd_mem *mem,
+ int flags, u32 *shared_fd,
+ struct file **file)
{
struct dma_buf *dmabuf;
int ret;
@@ -1863,13 +1856,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
return ret;
}
- ret = dma_buf_fd(dmabuf, flags);
+ ret = get_unused_fd_flags(flags);
if (ret < 0) {
pr_err("dmabuf create fd failed, ret:%d\n", ret);
goto out_free_dmabuf;
}
*shared_fd = ret;
+ *file = dmabuf->file;
return 0;
out_free_dmabuf:
@@ -1877,6 +1871,25 @@ out_free_dmabuf:
return ret;
}
+static void commit_files(struct file **files,
+ struct kfd_criu_bo_bucket *bo_buckets,
+ unsigned int count,
+ int err)
+{
+ while (count--) {
+ struct file *file = files[count];
+
+ if (!file)
+ continue;
+ if (err) {
+ fput(file);
+ put_unused_fd(bo_buckets[count].dmabuf_fd);
+ } else {
+ fd_install(bo_buckets[count].dmabuf_fd, file);
+ }
+ }
+}
+
static int criu_checkpoint_bos(struct kfd_process *p,
uint32_t num_bos,
uint8_t __user *user_bos,
@@ -1885,6 +1898,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets;
struct kfd_criu_bo_priv_data *bo_privs;
+ struct file **files = NULL;
int ret = 0, pdd_index, bo_index = 0, id;
void *mem;
@@ -1898,6 +1912,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
+ files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
struct kfd_process_device *pdd = p->pdds[pdd_index];
struct amdgpu_bo *dumper_bo;
@@ -1908,11 +1928,6 @@ static int criu_checkpoint_bos(struct kfd_process *p,
struct kfd_criu_bo_priv_data *bo_priv;
int i, dev_idx = 0;
- if (!mem) {
- ret = -ENOMEM;
- goto exit;
- }
-
kgd_mem = (struct kgd_mem *)mem;
dumper_bo = kgd_mem->bo;
@@ -1945,7 +1960,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, &files[bo_index]);
if (ret)
goto exit;
} else {
@@ -1963,7 +1978,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
for (i = 0; i < p->n_pdds; i++) {
- if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
+ if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem))
bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
}
@@ -1996,12 +2011,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
*priv_offset += num_bos * sizeof(*bo_privs);
exit:
- while (ret && bo_index--) {
- if (bo_buckets[bo_index].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[bo_index].dmabuf_fd);
- }
-
+ commit_files(files, bo_buckets, bo_index, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
@@ -2026,9 +2037,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
num_events = kfd_get_num_events(p);
- ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
- if (ret)
- return ret;
+ svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
*num_objects = num_queues + num_events + num_svm_ranges;
@@ -2307,7 +2316,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
return -EINVAL;
}
offset = pdd->dev->adev->rmmio_remap.bus_addr;
- if (!offset) {
+ if (!offset || (PAGE_SIZE > 4096)) {
pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
return -ENOMEM;
}
@@ -2346,14 +2355,15 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
bo_bucket->restored_offset = offset;
/* Update the VRAM usage count */
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ atomic64_add(bo_bucket->size, &pdd->vram_usage);
}
return 0;
}
static int criu_restore_bo(struct kfd_process *p,
struct kfd_criu_bo_bucket *bo_bucket,
- struct kfd_criu_bo_priv_data *bo_priv)
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct file **file)
{
struct kfd_process_device *pdd;
struct kgd_mem *kgd_mem;
@@ -2405,7 +2415,7 @@ static int criu_restore_bo(struct kfd_process *p,
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, file);
if (ret)
return ret;
} else {
@@ -2422,6 +2432,7 @@ static int criu_restore_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets = NULL;
struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ struct file **files = NULL;
int ret = 0;
uint32_t i = 0;
@@ -2435,6 +2446,12 @@ static int criu_restore_bos(struct kfd_process *p,
if (!bo_buckets)
return -ENOMEM;
+ files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
ret = copy_from_user(bo_buckets, (void __user *)args->bos,
args->num_bos * sizeof(*bo_buckets));
if (ret) {
@@ -2460,7 +2477,7 @@ static int criu_restore_bos(struct kfd_process *p,
/* Create and map new BOs */
for (; i < args->num_bos; i++) {
- ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
if (ret) {
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
goto exit;
@@ -2475,11 +2492,8 @@ static int criu_restore_bos(struct kfd_process *p,
ret = -EFAULT;
exit:
- while (ret && i--) {
- if (bo_buckets[i].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[i].dmabuf_fd);
- }
+ commit_files(files, bo_buckets, i, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
@@ -2551,8 +2565,8 @@ static int criu_restore(struct file *filep,
pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
- if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
- !args->num_devices || !args->num_bos)
+ if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data ||
+ !args->priv_data_size || !args->num_devices)
return -EINVAL;
mutex_lock(&p->mutex);
@@ -2936,6 +2950,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
if (IS_ERR_OR_NULL(target)) {
pr_debug("Cannot find process PID %i to debug\n", args->pid);
r = target ? PTR_ERR(target) : -ESRCH;
+ target = NULL;
goto out;
}
@@ -3236,8 +3251,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
int retcode = -EINVAL;
bool ptrace_attached = false;
- if (nr >= AMDKFD_CORE_IOCTL_COUNT)
+ if (nr >= AMDKFD_CORE_IOCTL_COUNT) {
+ retcode = -ENOTTY;
goto err_i1;
+ }
if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
u32 amdkfd_size;
@@ -3250,8 +3267,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
asize = amdkfd_size;
cmd = ioctl->cmd;
- } else
+ } else {
+ retcode = -ENOTTY;
goto err_i1;
+ }
dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
@@ -3348,6 +3367,9 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
+ if (PAGE_SIZE > 4096)
+ return -EINVAL;
+
address = dev->adev->rmmio_remap.bus_addr;
vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
@@ -3355,12 +3377,12 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pr_debug("pasid 0x%x mapping mmio page\n"
+ pr_debug("process pid %d mapping mmio page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
- process->pasid, (unsigned long long) vma->vm_start,
+ process->lead_thread->pid, (unsigned long long) vma->vm_start,
address, vma->vm_flags, PAGE_SIZE);
return io_remap_pfn_range(vma,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index f76b7aee5c0a..4a7180b46b71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -28,6 +28,7 @@
#include "kfd_topology.h"
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_xgmi.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
@@ -55,6 +56,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -64,6 +66,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
/* Scalar L1 Instruction Cache (in SQC module) per bank */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -73,6 +76,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = {
/* Scalar L1 Data Cache (in SQC module) per bank */
.cache_size = 8,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -88,6 +92,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -95,8 +100,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
},
{
/* Scalar L1 Instruction Cache (in SQC module) per bank */
- .cache_size = 8,
+ .cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -104,8 +110,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
},
{
/* Scalar L1 Data Cache (in SQC module) per bank. */
- .cache_size = 4,
+ .cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -135,6 +142,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -144,6 +152,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -153,6 +162,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -162,6 +172,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -174,6 +185,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -183,6 +195,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -192,6 +205,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -201,6 +215,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -213,6 +228,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -222,6 +238,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -231,6 +248,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -240,6 +258,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -252,6 +271,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -261,6 +281,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -270,6 +291,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -279,6 +301,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -291,6 +314,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -300,6 +324,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -309,6 +334,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -318,6 +344,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 8192,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -330,6 +357,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -339,6 +367,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -348,6 +377,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -357,6 +387,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 8192,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -369,6 +400,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -378,6 +410,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -387,6 +420,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -396,6 +430,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -405,6 +440,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -417,6 +453,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -426,6 +463,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -435,6 +473,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -444,6 +483,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -453,6 +493,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -465,6 +506,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -474,6 +516,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -483,6 +526,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -492,6 +536,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -501,6 +546,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -513,6 +559,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -522,6 +569,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -531,6 +579,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -540,6 +589,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -549,6 +599,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 4096,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -558,6 +609,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 128*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -570,6 +622,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -579,6 +632,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -588,6 +642,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -597,6 +652,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -606,6 +662,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 3072,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -615,6 +672,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 96*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -627,6 +685,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -636,6 +695,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -645,6 +705,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -654,6 +715,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -663,6 +725,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -672,6 +735,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 32*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -684,6 +748,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -693,6 +758,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -702,6 +768,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -711,6 +778,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -720,6 +788,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 1024,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -729,6 +798,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
/* L3 Data Cache per GPU */
.cache_size = 16*1024,
.cache_level = 3,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -741,6 +811,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -750,6 +821,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -759,6 +831,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -768,6 +841,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -777,6 +851,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -789,6 +864,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -798,6 +874,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -807,6 +884,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -816,6 +894,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -825,6 +904,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 256,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -837,6 +917,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -846,6 +927,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -855,6 +937,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -864,6 +947,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -873,6 +957,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 256,
.cache_level = 2,
+ .cache_line_size = 128,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -885,6 +970,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
/* TCP L1 Cache per CU */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -894,6 +980,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
/* Scalar L1 Instruction Cache per SQC */
.cache_size = 32,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -903,6 +990,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
/* Scalar L1 Data Cache per SQC */
.cache_size = 16,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -912,6 +1000,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
/* GL1 Data Cache per SA */
.cache_size = 128,
.cache_level = 1,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -921,6 +1010,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = {
/* L2 Data Cache per GPU (Total Tex Cache) */
.cache_size = 2048,
.cache_level = 2,
+ .cache_line_size = 64,
.flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE),
@@ -1333,6 +1423,7 @@ err:
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ bool cache_line_size_missing,
struct kfd_gpu_cache_info *pcache_info)
{
struct amdgpu_device *adev = kdev->adev;
@@ -1345,7 +1436,10 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
- pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@@ -1357,6 +1451,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Data Cache per SQC */
@@ -1367,6 +1464,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 64;
i++;
}
/* GL1 Data Cache per SA */
@@ -1379,6 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ if (cache_line_size_missing)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@@ -1389,12 +1491,82 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
+ i++;
+ }
+ /* L3 Data Cache per GPU */
+ if (adev->gmc.mall_size) {
+ pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+ pcache_info[i].cache_level = 3;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = 64;
+ i++;
+ }
+ return i;
+}
+
+static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
+ struct kfd_gpu_cache_info *pcache_info)
+{
+ struct amdgpu_device *adev = kdev->adev;
+ int i = 0;
+
+ /* TCP L1 Cache per CU */
+ if (adev->gfx.config.gc_tcp_size_per_cu) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+ pcache_info[i].cache_level = 1;
+ /* Cacheline size not available in IP discovery for gc943,gc944 */
+ pcache_info[i].cache_line_size = 128;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = 1;
+ i++;
+ }
+ /* Scalar L1 Instruction Cache per SQC */
+ if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+ pcache_info[i].cache_size =
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+ i++;
+ }
+ /* Scalar L1 Data Cache per SQC */
+ if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+ i++;
+ }
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ if (adev->gfx.config.gc_tcc_size) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+ pcache_info[i].cache_level = 2;
+ pcache_info[i].cache_line_size = 128;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
i++;
}
/* L3 Data Cache per GPU */
if (adev->gmc.mall_size) {
pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
pcache_info[i].cache_level = 3;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1407,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
+ bool cache_line_size_missing = false;
switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
@@ -1461,10 +1634,16 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
break;
case IP_VERSION(9, 4, 2):
- case IP_VERSION(9, 4, 3):
*pcache_info = aldebaran_cache_info;
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ num_of_cache_types =
+ kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
+ *pcache_info);
+ break;
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
*pcache_info = raven_cache_info;
@@ -1522,8 +1701,21 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* Cacheline size not available in IP discovery for gc11.
+ * kfd_fill_gpu_cache_info_from_gfx_config to hard code it
+ */
+ cache_line_size_missing = true;
+ fallthrough;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
num_of_cache_types =
- kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
+ cache_line_size_missing,
+ *pcache_info);
break;
default:
*pcache_info = dummy_cache_info;
@@ -1941,9 +2133,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
KFD_CRAT_INTRA_SOCKET_WEIGHT;
- uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->adev, NULL, true) : mem_bw;
-
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -1952,8 +2141,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->weight_xgmi = weight;
- sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
- sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
+ if (ext_cpu) {
+ amdgpu_xgmi_get_bandwidth(kdev->adev, NULL,
+ AMDGPU_XGMI_BW_MODE_PER_LINK,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
+ } else {
+ sub_type_hdr->minimum_bandwidth_mbs = mem_bw;
+ sub_type_hdr->maximum_bandwidth_mbs = mem_bw;
+ }
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
@@ -2006,12 +2203,12 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
if (use_ta_info) {
sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
- sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
- peer_kdev->adev, false);
- sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+ amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev->adev);
+ amdgpu_xgmi_get_bandwidth(kdev->adev, peer_kdev->adev,
+ AMDGPU_XGMI_BW_MODE_PER_PEER,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
} else {
bool is_single_hop = kdev->kfd == peer_kdev->kfd;
int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
@@ -2037,11 +2234,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
uint32_t proximity_domain)
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+ struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
+ struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
struct crat_subtype_generic *sub_type_hdr;
struct kfd_local_mem_info local_mem_info;
struct kfd_topology_device *peer_dev;
struct crat_subtype_computeunit *cu;
- struct kfd_cu_info cu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
uint32_t nid = 0;
@@ -2054,9 +2252,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* Modify length and total_entries as subunits are added.
*/
avail_size -= sizeof(struct crat_header);
- if (avail_size < 0)
- return -ENOMEM;
-
memset(crat_table, 0, sizeof(struct crat_header));
memcpy(&crat_table->signature, CRAT_SIGNATURE,
@@ -2070,9 +2265,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* First fill in the sub type header and then sub type data
*/
avail_size -= sizeof(struct crat_subtype_computeunit);
- if (avail_size < 0)
- return -ENOMEM;
-
sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
@@ -2085,21 +2277,20 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
cu->proximity_domain = proximity_domain;
- amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
- cu->num_simd_per_cu = cu_info.simd_per_cu;
- cu->num_simd_cores = cu_info.simd_per_cu *
- (cu_info.cu_active_number / kdev->kfd->num_nodes);
- cu->max_waves_simd = cu_info.max_waves_per_simd;
+ cu->num_simd_per_cu = cu_info->simd_per_cu;
+ cu->num_simd_cores = cu_info->simd_per_cu *
+ (cu_info->number / kdev->kfd->num_nodes);
+ cu->max_waves_simd = cu_info->max_waves_per_simd;
- cu->wave_front_size = cu_info.wave_front_size;
- cu->array_count = cu_info.num_shader_arrays_per_engine *
- cu_info.num_shader_engines;
- total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
+ cu->wave_front_size = cu_info->wave_front_size;
+ cu->array_count = gfx_info->max_sh_per_se *
+ gfx_info->max_shader_engines;
+ total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);
cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
- cu->num_cu_per_array = cu_info.num_cu_per_sh;
- cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
- cu->num_banks = cu_info.num_shader_engines;
- cu->lds_size_in_kb = cu_info.lds_size;
+ cu->num_cu_per_array = gfx_info->max_cu_per_sh;
+ cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;
+ cu->num_banks = gfx_info->max_shader_engines;
+ cu->lds_size_in_kb = cu_info->lds_size;
cu->hsa_capability = 0;
@@ -2115,7 +2306,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
- if (debug_largebar)
+ if (kdev->adev->debug_largebar)
local_mem_info.local_mem_size_private = 0;
if (local_mem_info.local_mem_size_private == 0)
@@ -2170,6 +2361,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
continue;
if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
continue;
+ if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev))
+ continue;
sub_type_hdr = (typeof(sub_type_hdr))(
(char *)sub_type_hdr +
sizeof(struct crat_subtype_iolink));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 74c2d7a0d628..a8ca7ecb6d27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -42,8 +42,6 @@
#define CRAT_OEMTABLEID_LENGTH 8
#define CRAT_RESERVED_LENGTH 6
-#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
-
/* Compute Unit flags */
#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
@@ -303,6 +301,7 @@ struct kfd_node;
struct kfd_gpu_cache_info {
uint32_t cache_size;
uint32_t cache_level;
+ uint32_t cache_line_size;
uint32_t flags;
/* Indicates how many Compute Units share this cache
* within a SA. Value = 1 indicates the cache is not shared
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 9ec750666382..ba99e0f258ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -25,6 +25,7 @@
#include "kfd_topology.h"
#include <linux/file.h>
#include <uapi/linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_sysfs.h>
#define MAX_WATCH_ADDRESSES 4
@@ -103,7 +104,8 @@ void debug_event_write_work_handler(struct work_struct *work)
struct kfd_process,
debug_event_workarea);
- kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+ if (process->debug_trap_enabled && process->dbg_ev_file)
+ kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
}
/* update process/device/queue exception status, write to descriptor
@@ -202,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
size_t exception_data_size)
{
struct kfd_process *p;
+ struct kfd_process_device *pdd = NULL;
bool signaled_to_debugger_or_runtime = false;
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
- if (!p)
+ if (!pdd)
return false;
if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
@@ -236,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
mutex_unlock(&p->mutex);
} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
- kfd_dqm_evict_pasid(dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(dev, p->pasid, NULL,
- exception_data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, exception_data);
signaled_to_debugger_or_runtime = true;
}
@@ -274,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
data = (struct kfd_hsa_memory_exception_data *)
pdd->vm_fault_exc_data;
- kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, data);
error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
}
@@ -348,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ int r;
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
+ if (!pdd->proc_ctx_cpu_ptr) {
+ r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate process context bo\n");
+ return r;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
pdd->watch_points, flags, sq_trap_en);
}
@@ -363,47 +382,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i
*watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
- spin_lock(&pdd->dev->kfd->watch_points_lock);
+ spin_lock(&pdd->dev->watch_points_lock);
for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
/* device watchpoint in use so skip */
- if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
+ if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
continue;
pdd->alloc_watch_ids |= 0x1 << i;
- pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
+ pdd->dev->alloc_watch_ids |= 0x1 << i;
*watch_id = i;
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
return 0;
}
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
return -ENOMEM;
}
static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
{
- spin_lock(&pdd->dev->kfd->watch_points_lock);
+ spin_lock(&pdd->dev->watch_points_lock);
/* process owns device watch point so safe to clear */
if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
pdd->alloc_watch_ids &= ~(0x1 << watch_id);
- pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
+ pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
}
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
}
static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
{
bool owns_watch_id = false;
- spin_lock(&pdd->dev->kfd->watch_points_lock);
+ spin_lock(&pdd->dev->watch_points_lock);
owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
((pdd->alloc_watch_ids >> watch_id) & 0x1);
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
return owns_watch_id;
}
@@ -497,14 +516,24 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
int i, r = 0, rewind_count = 0;
for (i = 0; i < target->n_pdds; i++) {
- if (!kfd_dbg_is_per_vmid_supported(target->pdds[i]->dev) &&
+ struct kfd_topology_device *topo_dev =
+ kfd_topology_device_by_id(target->pdds[i]->dev->id);
+ uint32_t caps = topo_dev->node_props.capability;
+
+ if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
(*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
*flags = prev_flags;
return -EACCES;
}
+
+ if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) &&
+ (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) {
+ *flags = prev_flags;
+ return -EACCES;
+ }
}
- target->dbg_flags = *flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
+ target->dbg_flags = *flags;
*flags = prev_flags;
for (i = 0; i < target->n_pdds; i++) {
struct kfd_process_device *pdd = target->pdds[i];
@@ -645,6 +674,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+ cancel_work_sync(&target->debug_event_workarea);
fput(target->dbg_ev_file);
target->dbg_ev_file = NULL;
@@ -1018,12 +1048,14 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
uint32_t *entry_size)
{
struct kfd_dbg_device_info_entry device_info;
- uint32_t tmp_entry_size = *entry_size, tmp_num_devices;
+ uint32_t tmp_entry_size, tmp_num_devices;
int i, r = 0;
if (!(target && user_info && number_of_device_infos && entry_size))
return -EINVAL;
+ tmp_entry_size = *entry_size;
+
tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds);
*number_of_device_infos = target->n_pdds;
*entry_size = min_t(size_t, *entry_size, sizeof(device_info));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index fd0ff64d4184..27aa1a5b120f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -78,6 +78,8 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
{
return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
@@ -134,6 +136,7 @@ static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
(KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
- (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
+ (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0));
}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 4a5a0a4e00f2..9bde2c64540f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -27,6 +27,16 @@
#include "kfd_priv.h"
static struct dentry *debugfs_root;
+static struct dentry *debugfs_proc;
+static struct list_head procs;
+
+struct debugfs_proc_entry {
+ struct list_head list;
+ struct dentry *proc_dentry;
+ pid_t pid;
+};
+
+#define MAX_DEBUGFS_FILENAME_LEN 32
static int kfd_debugfs_open(struct inode *inode, struct file *file)
{
@@ -92,6 +102,8 @@ static const struct file_operations kfd_debugfs_hang_hws_fops = {
void kfd_debugfs_init(void)
{
debugfs_root = debugfs_create_dir("kfd", NULL);
+ debugfs_proc = debugfs_create_dir("proc", debugfs_root);
+ INIT_LIST_HEAD(&procs);
debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
kfd_debugfs_mqds_by_process, &kfd_debugfs_fops);
@@ -107,5 +119,69 @@ void kfd_debugfs_init(void)
void kfd_debugfs_fini(void)
{
+ debugfs_remove_recursive(debugfs_proc);
debugfs_remove_recursive(debugfs_root);
}
+
+static ssize_t kfd_debugfs_pasid_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kfd_process_device *pdd = file_inode(file)->i_private;
+ char tmp[32];
+ int len;
+
+ len = snprintf(tmp, sizeof(tmp), "%u\n", pdd->pasid);
+
+ return simple_read_from_buffer(buf, count, ppos, tmp, len);
+}
+
+static const struct file_operations kfd_debugfs_pasid_fops = {
+ .owner = THIS_MODULE,
+ .read = kfd_debugfs_pasid_read,
+};
+
+void kfd_debugfs_add_process(struct kfd_process *p)
+{
+ int i;
+ char name[MAX_DEBUGFS_FILENAME_LEN];
+ struct debugfs_proc_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return;
+
+ list_add(&entry->list, &procs);
+ entry->pid = p->lead_thread->pid;
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "%d",
+ (int)entry->pid);
+ entry->proc_dentry = debugfs_create_dir(name, debugfs_proc);
+
+ /* Create debugfs files for each GPU:
+ * - proc/<pid>/pasid_<gpuid>
+ */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "pasid_%u",
+ pdd->dev->id);
+ debugfs_create_file((const char *)name, S_IFREG | 0444,
+ entry->proc_dentry, pdd,
+ &kfd_debugfs_pasid_fops);
+ }
+}
+
+void kfd_debugfs_remove_process(struct kfd_process *p)
+{
+ struct debugfs_proc_entry *entry, *next;
+
+ mutex_lock(&kfd_processes_mutex);
+ list_for_each_entry_safe(entry, next, &procs, list) {
+ if (entry->pid != p->lead_thread->pid)
+ continue;
+
+ debugfs_remove_recursive(entry->proc_dentry);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ mutex_unlock(&kfd_processes_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 93ce181eb3ba..e9cfb80bd436 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -56,6 +56,7 @@ extern const struct kfd2kgd_calls gc_9_4_3_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v12_kfd2kgd;
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
@@ -65,7 +66,7 @@ static int kfd_resume(struct kfd_node *kfd);
static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
{
- uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
+ uint32_t sdma_version = amdgpu_ip_version(kfd->adev, SDMA0_HWIP, 0);
switch (sdma_version) {
case IP_VERSION(4, 0, 0):/* VEGA10 */
@@ -83,6 +84,8 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(4, 2, 2):/* ARCTURUS */
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
@@ -95,6 +98,12 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
kfd->device_info.num_sdma_queues_per_engine = 8;
break;
default:
@@ -111,6 +120,12 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
case IP_VERSION(6, 0, 3):
+ case IP_VERSION(6, 1, 0):
+ case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
/* Reserve 1 for paging and 1 for gfx */
kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
@@ -139,6 +154,8 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
break;
case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
+ case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+ case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
kfd->device_info.event_interrupt_class =
&event_interrupt_class_v9_4_3;
break;
@@ -162,6 +179,15 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ /* GFX12_TODO: Change to v12 version. */
kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
break;
default:
@@ -214,6 +240,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
*/
kfd->device_info.needs_pci_atomics = true;
kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
+ } else if (gc_version < IP_VERSION(13, 0, 0)) {
+ kfd->device_info.needs_pci_atomics = true;
+ kfd->device_info.no_atomic_fw_version = 2090;
+ } else {
+ kfd->device_info.needs_pci_atomics = true;
}
} else {
kfd->device_info.doorbell_size = 4;
@@ -279,7 +310,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v8_kfd2kgd;
break;
default:
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
/* Vega 10 */
case IP_VERSION(9, 0, 1):
gfx_target_version = 90000;
@@ -321,9 +352,12 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &aldebaran_kfd2kgd;
break;
case IP_VERSION(9, 4, 3):
- gfx_target_version = adev->rev_id >= 1 ? 90402
- : adev->flags & AMD_IS_APU ? 90400
- : 90401;
+ case IP_VERSION(9, 4, 4):
+ gfx_target_version = 90402;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
+ case IP_VERSION(9, 5, 0):
+ gfx_target_version = 90500;
f2g = &gc_9_4_3_kfd2kgd;
break;
/* Navi10 */
@@ -402,17 +436,34 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v11_kfd2kgd;
break;
case IP_VERSION(11, 0, 3):
- if ((adev->pdev->device == 0x7460 &&
- adev->pdev->revision == 0x00) ||
- (adev->pdev->device == 0x7461 &&
- adev->pdev->revision == 0x00))
- /* Note: Compiler version is 11.0.5 while HW version is 11.0.3 */
- gfx_target_version = 110005;
- else
- /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
- gfx_target_version = 110001;
+ /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
+ gfx_target_version = 110001;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 0):
+ gfx_target_version = 110500;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 1):
+ gfx_target_version = 110501;
f2g = &gfx_v11_kfd2kgd;
break;
+ case IP_VERSION(11, 5, 2):
+ gfx_target_version = 110502;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 3):
+ gfx_target_version = 110503;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(12, 0, 0):
+ gfx_target_version = 120000;
+ f2g = &gfx_v12_kfd2kgd;
+ break;
+ case IP_VERSION(12, 0, 1):
+ gfx_target_version = 120001;
+ f2g = &gfx_v12_kfd2kgd;
+ break;
default:
break;
}
@@ -420,11 +471,13 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
}
if (!f2g) {
- if (adev->ip_versions[GC_HWIP][0])
- dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
- adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
+ if (amdgpu_ip_version(adev, GC_HWIP, 0))
+ dev_info(kfd_device,
+ "GC IP %06x %s not supported in kfd\n",
+ amdgpu_ip_version(adev, GC_HWIP, 0),
+ vf ? "VF" : "");
else
- dev_err(kfd_device, "%s %s not supported in kfd\n",
+ dev_info(kfd_device, "%s %s not supported in kfd\n",
amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
return NULL;
}
@@ -442,6 +495,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
mutex_init(&kfd->doorbell_mutex);
ida_init(&kfd->doorbell_ida);
+ atomic_set(&kfd->kfd_processes_count, 0);
return kfd;
}
@@ -450,37 +504,56 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
{
if (cwsr_enable && kfd->device_info.supports_cwsr) {
if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
- BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_arcturus_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
- BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
- } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
- BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_nv1x_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex)
+ > KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
- } else {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(12, 0, 0)) {
+ /* The gfx11 cwsr trap handler must fit inside a single
+ page. */
BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx11_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex)
+ > KFD_CWSR_TMA_OFFSET);
+ kfd->cwsr_isa = cwsr_trap_gfx12_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex);
}
kfd->cwsr_enabled = true;
@@ -505,15 +578,21 @@ static int kfd_gws_init(struct kfd_node *node)
&& kfd->mec2_fw_version >= 0x30) ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2)
&& kfd->mec2_fw_version >= 0x28) ||
- (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b) ||
(KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
- && mes_rev >= 68))))
+ && mes_rev >= 68) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))))) {
+ if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))
+ node->adev->gds.gws_size = 64;
ret = amdgpu_amdkfd_alloc_gws(node->adev,
node->adev->gds.gws_size, &node->gws);
+ }
return ret;
}
@@ -577,6 +656,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
struct kfd_node *knode;
unsigned int i;
+ /*
+ * flush_work ensures that there are no outstanding
+ * work-queue items that will access interrupt_ring. New work items
+ * can't be created because we stopped interrupt handling above.
+ */
+ flush_workqueue(kfd->ih_wq);
+ destroy_workqueue(kfd->ih_wq);
+
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
@@ -672,14 +759,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
- /* For GFX9.4.3, we need special handling for VMIDs depending on
- * partition mode.
+ /* For multi-partition capable GPUs, we need special handling for VMIDs
+ * depending on partition mode.
* In CPX mode, the VMID range needs to be shared between XCDs.
* Additionally, there are 13 VMIDs (3-15) available for KFD. To
* divide them equally, we change starting VMID to 4 and not use
* VMID 3.
- * If the VMID range changes for GFX9.4.3, then this code MUST be
- * revisited.
+ * If the VMID range changes for multi-partition capable GPUs, then
+ * this code MUST be revisited.
*/
if (kfd->adev->xcp_mgr) {
partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
@@ -744,11 +831,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
/*
- * For GFX9.4.3, the KFD abstracts all partitions within a socket as
- * xGMI connected in the topology so assign a unique hive id per
- * device based on the pci device location if device is in PCIe mode.
+ * For multi-partition capable GPUs, the KFD abstracts all partitions
+ * within a socket as xGMI connected in the topology so assign a unique
+ * hive id per device based on the pci device location if device is in
+ * PCIe mode.
*/
- if (!kfd->hive_id && (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) && kfd->num_nodes > 1)
+ if (!kfd->hive_id && kfd->num_nodes > 1)
kfd->hive_id = pci_dev_id(kfd->adev->pdev);
kfd->noretry = kfd->adev->gmc.noretry;
@@ -786,11 +874,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
}
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
- partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
- /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
- * 4-9 and second XCD gets VMID range 10-15.
+ /* For multi-partition capable GPUs and CPX mode, first
+ * XCD gets VMID range 4-9 and second XCD gets VMID
+ * range 10-15.
*/
node->vm_info.first_vmid_kfd = (i%2 == 0) ?
@@ -814,7 +902,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
&node->local_mem_info, node->xcp);
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3))
+ if (kfd->adev->xcp_mgr)
kfd_setup_interrupt_bitmap(node, i);
/* Initialize the KFD node */
@@ -822,13 +910,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
dev_err(kfd_device, "Error initializing KFD node\n");
goto node_init_error;
}
+
+ spin_lock_init(&node->watch_points_lock);
+
kfd->nodes[i] = node;
}
svm_range_set_max_pages(kfd->adev);
- spin_lock_init(&kfd->watch_points_lock);
-
kfd->init_complete = true;
dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
kfd->adev->pdev->device);
@@ -845,7 +934,7 @@ node_alloc_error:
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
- amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
alloc_gtt_mem_failure:
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
@@ -863,13 +952,14 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
- amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
}
kfree(kfd);
}
-int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
struct kfd_node *node;
int i;
@@ -879,11 +969,10 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
- kfd_smi_event_update_gpu_reset(node, false);
- node->dqm->ops.pre_reset(node->dqm);
+ kfd_smi_event_update_gpu_reset(node, false, reset_context);
}
- kgd2kfd_suspend(kfd, false);
+ kgd2kfd_suspend(kfd, true);
for (i = 0; i < kfd->num_nodes; i++)
kfd_signal_reset_event(kfd->nodes[i]);
@@ -919,37 +1008,48 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
atomic_set(&node->sram_ecc_flag, 0);
- kfd_smi_event_update_gpu_reset(node, true);
+ kfd_smi_event_update_gpu_reset(node, true, NULL);
}
return 0;
}
-bool kfd_is_locked(void)
+bool kfd_is_locked(struct kfd_dev *kfd)
{
+ uint8_t id = 0;
+ struct kfd_node *dev;
+
lockdep_assert_held(&kfd_processes_mutex);
- return (kfd_locked > 0);
+
+ /* check reset/suspend lock */
+ if (kfd_locked > 0)
+ return true;
+
+ if (kfd)
+ return kfd->kfd_dev_lock > 0;
+
+ /* check lock on all cgroup accessible devices */
+ while (kfd_topology_enum_kfd_devices(id++, &dev) == 0) {
+ if (!dev || kfd_devcgroup_check_permission(dev))
+ continue;
+
+ if (dev->kfd->kfd_dev_lock > 0)
+ return true;
+ }
+
+ return false;
}
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
- /* for runtime suspend, skip locking kfd */
- if (!run_pm) {
- mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- /* For first KFD device suspend all the KFD processes */
- if (count == 1)
- kfd_suspend_all_processes();
- }
+ if (suspend_proc)
+ kgd2kfd_suspend_process(kfd);
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
@@ -957,9 +1057,9 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
}
}
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
@@ -970,16 +1070,36 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
return ret;
}
- /* for runtime resume, skip unlocking kfd */
- if (!run_pm) {
- mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
+ if (resume_proc)
+ ret = kgd2kfd_resume_process(kfd);
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
- ret = kfd_resume_all_processes();
- }
+ return ret;
+}
+
+void kgd2kfd_suspend_process(struct kfd_dev *kfd)
+{
+ if (!kfd->init_complete)
+ return;
+
+ mutex_lock(&kfd_processes_mutex);
+ /* For first KFD device suspend all the KFD processes */
+ if (++kfd_locked == 1)
+ kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
+}
+
+int kgd2kfd_resume_process(struct kfd_dev *kfd)
+{
+ int ret = 0;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ mutex_lock(&kfd_processes_mutex);
+ if (--kfd_locked == 0)
+ ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
return ret;
}
@@ -997,21 +1117,6 @@ static int kfd_resume(struct kfd_node *node)
return err;
}
-static inline void kfd_queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
-{
- int cpu, new_cpu;
-
- cpu = new_cpu = smp_processor_id();
- do {
- new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
- if (cpu_to_node(new_cpu) == numa_node_id())
- break;
- } while (cpu != new_cpu);
-
- queue_work_on(new_cpu, wq, work);
-}
-
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -1029,7 +1134,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
}
for (i = 0; i < kfd->num_nodes; i++) {
- node = kfd->nodes[i];
+ /* Race if another thread in b/w
+ * kfd_cleanup_nodes and kfree(kfd),
+ * when kfd->nodes[i] = NULL
+ */
+ if (kfd->nodes[i])
+ node = kfd->nodes[i];
+ else
+ return;
+
spin_lock_irqsave(&node->interrupt_lock, flags);
if (node->interrupts_active
@@ -1037,7 +1150,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
- kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ queue_work(node->kfd->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}
@@ -1334,6 +1447,13 @@ void kfd_dec_compute_active(struct kfd_node *node)
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
+static bool kfd_compute_active(struct kfd_node *node)
+{
+ if (atomic_read(&node->kfd->compute_profile))
+ return true;
+ return false;
+}
+
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
/*
@@ -1367,27 +1487,225 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
kfd_get_num_sdma_engines(node);
}
-int kgd2kfd_check_and_lock_kfd(void)
+int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
{
+ struct kfd_process *p;
+ int r = 0, temp, idx;
+
mutex_lock(&kfd_processes_mutex);
- if (!hash_empty(kfd_processes_table) || kfd_is_locked()) {
- mutex_unlock(&kfd_processes_mutex);
- return -EBUSY;
+
+ /* kfd_processes_count is per kfd_dev, return -EBUSY without
+ * further check
+ */
+ if (!!atomic_read(&kfd->kfd_processes_count)) {
+ pr_debug("process_wq_release not finished\n");
+ r = -EBUSY;
+ goto out;
+ }
+
+ if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd))
+ goto out;
+
+ /* fail under system reset/resume or kfd device is partition switching. */
+ if (kfd_is_locked(kfd)) {
+ r = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * ensure all running processes are cgroup excluded from device before mode switch.
+ * i.e. no pdd was created on the process socket.
+ */
+ idx = srcu_read_lock(&kfd_processes_srcu);
+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->dev->kfd != kfd)
+ continue;
+
+ r = -EBUSY;
+ goto proc_check_unlock;
+ }
}
- ++kfd_locked;
+proc_check_unlock:
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+out:
+ if (!r)
+ ++kfd->kfd_dev_lock;
mutex_unlock(&kfd_processes_mutex);
- return 0;
+ return r;
}
-void kgd2kfd_unlock_kfd(void)
+void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
{
mutex_lock(&kfd_processes_mutex);
- --kfd_locked;
+ --kfd->kfd_dev_lock;
mutex_unlock(&kfd_processes_mutex);
}
+int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+ int ret;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return -EINVAL;
+ }
+ node = kfd->nodes[node_id];
+
+ ret = node->dqm->ops.unhalt(node->dqm);
+ if (ret)
+ dev_err(kfd_device, "Error in starting scheduler\n");
+
+ return ret;
+}
+
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.unhalt(node->dqm);
+ if (r) {
+ dev_err(kfd_device, "Error in starting scheduler\n");
+ return r;
+ }
+ }
+ return 0;
+}
+
+int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return -EINVAL;
+ }
+
+ node = kfd->nodes[node_id];
+ return node->dqm->ops.halt(node->dqm);
+}
+
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+ struct kfd_node *node;
+ int i, r;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ node = kfd->nodes[i];
+ r = node->dqm->ops.halt(node->dqm);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+
+ if (!kfd->init_complete)
+ return false;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return false;
+ }
+
+ node = kfd->nodes[node_id];
+
+ return kfd_compute_active(node);
+}
+
+/**
+ * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9
+ * @adev: amdgpu device
+ * @entry: vm fault interrupt vector
+ * @retry_fault: if this is retry fault
+ *
+ * retry fault -
+ * with CAM enabled, adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * adev soft_ring
+ * | gmc_v9_0_process_interrupt() worker failed to recover page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * without CAM, adev primary ring1
+ * | gmc_v9_0_process_interrupt worker failed to recvoer page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * no-retry fault -
+ * adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault
+ * of same process, don't copy interrupt to KFD node ih_fifo.
+ * With gdb debugger enabled, need convert the retry fault to no-retry fault for
+ * debugger, cannot use the fast path.
+ *
+ * Return:
+ * true - use the fast path to handle this fault
+ * false - use normal path to handle it
+ */
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ struct kfd_process *p;
+ u32 cam_index;
+
+ if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) {
+ p = kfd_lookup_process_by_pasid(entry->pasid, NULL);
+ if (!p)
+ return true;
+
+ if (p->gpu_page_fault && !p->debug_trap_enabled) {
+ if (retry_fault && adev->irq.retry_cam_enabled) {
+ cam_index = entry->src_data[2] & 0x3ff;
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ }
+
+ kfd_unref_process(p);
+ return true;
+ }
+
+ /*
+ * This is the first page fault, set flag and then signal user space
+ */
+ p->gpu_page_fault = true;
+ kfd_unref_process(p);
+ }
+ return false;
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
@@ -1400,6 +1718,11 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev)
return -EINVAL;
}
+ if (dev->kfd->shared_resources.enable_mes) {
+ dev_err(dev->adev->dev, "Inducing MES hang is not supported\n");
+ return -EINVAL;
+ }
+
return dqm_debugfs_hang_hws(dev->dqm);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8a6cb41444a4..6c5c7c1bf5ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -35,12 +35,16 @@
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
-#include "mes_api_def.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_sdma.h"
+#include "mes_v11_api_def.h"
#include "kfd_debug.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
+/* See unmap_queues_cpsch() */
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
u32 pasid, unsigned int vmid);
@@ -65,7 +69,8 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id);
-static void kfd_process_hw_exception(struct work_struct *work);
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
@@ -92,7 +97,7 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
{
return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap,
- KGD_MAX_QUEUES);
+ AMDGPU_MAX_QUEUES);
}
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
@@ -152,17 +157,24 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
static void kfd_hws_hang(struct device_queue_manager *dqm)
{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ /* Mark all device queues as reset. */
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ pdd->has_reset_queue = true;
+ }
+ }
+
/*
* Issue a GPU reset if HWS is unresponsive
*/
- dqm->is_hws_hang = true;
-
- /* It's possible we're detecting a HWS hang in the
- * middle of a GPU reset. No need to schedule another
- * reset in this case.
- */
- if (!dqm->is_resetting)
- schedule_work(&dqm->hw_exception_work);
+ amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
static int convert_to_mes_queue_type(int queue_type)
@@ -194,11 +206,13 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
int r, queue_type;
uint64_t wptr_addr_off;
- if (dqm->is_hws_hang)
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
+ if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
- queue_input.process_id = qpd->pqm->process->pasid;
+ queue_input.process_id = pdd->pasid;
queue_input.page_table_base_addr = qpd->page_table_base;
queue_input.process_va_start = 0;
queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
@@ -214,10 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.mqd_addr = q->gart_mqd_addr;
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
- if (q->wptr_bo) {
- wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
- queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
- }
+ wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
+ queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off;
queue_input.is_kfd_process = 1;
queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
@@ -227,13 +239,16 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
- queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
- kfd_dbg_has_ttmps_always_setup(q->device);
+ queue_input.skip_process_ctx_clear =
+ qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED &&
+ (qpd->pqm->process->debug_trap_enabled ||
+ kfd_dbg_has_ttmps_always_setup(q->device));
queue_type = convert_to_mes_queue_type(q->properties.type);
if (queue_type < 0) {
- pr_err("Queue type not supported with MES, queue:%d\n",
- q->properties.type);
+ dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n",
+ q->properties.type);
+ up_read(&adev->reset_domain->sem);
return -EINVAL;
}
queue_input.queue_type = (uint32_t)queue_type;
@@ -243,10 +258,11 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
amdgpu_mes_unlock(&adev->mes);
+ up_read(&adev->reset_domain->sem);
if (r) {
- pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
+ dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n",
q->properties.doorbell_off);
- pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
kfd_hws_hang(dqm);
}
@@ -260,7 +276,9 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
int r;
struct mes_remove_queue_input queue_input;
- if (dqm->is_hws_hang)
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
+ if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
@@ -270,20 +288,22 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
amdgpu_mes_unlock(&adev->mes);
+ up_read(&adev->reset_domain->sem);
if (r) {
- pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
+ dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n",
q->properties.doorbell_off);
- pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
kfd_hws_hang(dqm);
}
return r;
}
-static int remove_all_queues_mes(struct device_queue_manager *dqm)
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
{
struct device_process_node *cur;
+ struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd;
struct queue *q;
int retval = 0;
@@ -294,7 +314,7 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
if (q->properties.is_active) {
retval = remove_queue_mes(dqm, q, qpd);
if (retval) {
- pr_err("%s: Failed to remove queue %d for dev %d",
+ dev_err(dev, "%s: Failed to remove queue %d for dev %d",
__func__,
q->properties.queue_id,
dqm->dev->id);
@@ -307,6 +327,73 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
return retval;
}
+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_active)
+ continue;
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval) {
+ dev_err(dev, "%s: Failed to add queue %d for dev %d",
+ __func__,
+ q->properties.queue_id,
+ dqm->dev->id);
+ return retval;
+ }
+ }
+ }
+
+ return retval;
+}
+
+static int suspend_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r = 0;
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ r = amdgpu_mes_suspend(adev);
+ up_read(&adev->reset_domain->sem);
+
+ if (r) {
+ dev_err(adev->dev, "failed to suspend gangs from MES\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
+static int resume_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r = 0;
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ r = amdgpu_mes_resume(adev);
+ up_read(&adev->reset_domain->sem);
+
+ if (r) {
+ dev_err(adev->dev, "failed to resume gangs from MES\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
static void increment_queue_count(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
@@ -407,7 +494,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
qpd->proc_doorbells,
- q->doorbell_id);
+ q->doorbell_id,
+ dev->kfd->device_info.doorbell_size);
return 0;
}
@@ -443,6 +531,8 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct device *dev = dqm->dev->adev->dev;
int allocated_vmid = -1, i;
for (i = dqm->dev->vm_info.first_vmid_kfd;
@@ -454,15 +544,15 @@ static int allocate_vmid(struct device_queue_manager *dqm,
}
if (allocated_vmid < 0) {
- pr_err("no more vmid to allocate\n");
+ dev_err(dev, "no more vmid to allocate\n");
return -ENOSPC;
}
pr_debug("vmid allocated: %d\n", allocated_vmid);
- dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
+ dqm->vmid_pasid[allocated_vmid] = pdd->pasid;
- set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
+ set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
@@ -510,10 +600,12 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
+ struct device *dev = dqm->dev->adev->dev;
+
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->adev->asic_type == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
- pr_err("Failed to flush TC\n");
+ dev_err(dev, "Failed to flush TC\n");
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
@@ -708,10 +800,15 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
pr_debug("Killing all process wavefronts\n");
if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
- pr_err("no vmid pasid mapping supported \n");
+ dev_err(dev->adev->dev, "no vmid pasid mapping supported\n");
return -EOPNOTSUPP;
}
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
* ATC_VMID15_PASID_MAPPING
* to check which VMID the current process is mapped to.
@@ -721,23 +818,19 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
(dev->adev, vmid, &queried_pasid);
- if (status && queried_pasid == p->pasid) {
- pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
- vmid, p->pasid);
+ if (status && queried_pasid == pdd->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and process pid %d\n",
+ vmid, p->lead_thread->pid);
break;
}
}
if (vmid > last_vmid_to_scan) {
- pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
+ dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n",
+ p->lead_thread->pid);
return -EFAULT;
}
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dev, p);
- if (!pdd)
- return -EFAULT;
-
reg_gfx_index.bits.sh_broadcast_writes = 1;
reg_gfx_index.bits.se_broadcast_writes = 1;
reg_gfx_index.bits.instance_broadcast_writes = 1;
@@ -821,6 +914,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
{
int retval;
uint64_t sdma_val = 0;
+ struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct mqd_manager *mqd_mgr =
dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
@@ -831,7 +925,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
- pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id);
}
@@ -850,6 +944,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
struct mqd_update_info *minfo)
{
int retval = 0;
+ struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr;
struct kfd_process_device *pdd;
bool prev_active = false;
@@ -874,8 +969,14 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
else if (prev_active)
retval = remove_queue_mes(dqm, q, &pdd->qpd);
+ /* queue is reset so inaccessable */
+ if (pdd->has_reset_queue) {
+ retval = -EACCES;
+ goto out_unlock;
+ }
+
if (retval) {
- pr_err("unmap queue failed\n");
+ dev_err(dev, "unmap queue failed\n");
goto out_unlock;
}
} else if (prev_active &&
@@ -894,7 +995,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
if (retval) {
- pr_err("destroy mqd failed\n");
+ dev_err(dev, "destroy mqd failed\n");
goto out_unlock;
}
}
@@ -965,8 +1066,8 @@ static int suspend_single_queue(struct device_queue_manager *dqm,
if (q->properties.is_suspended)
return 0;
- pr_debug("Suspending PASID %u queue [%i]\n",
- pdd->process->pasid,
+ pr_debug("Suspending process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
q->properties.queue_id);
is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
@@ -1013,8 +1114,8 @@ static int resume_single_queue(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
- pr_debug("Restoring from suspend PASID %u queue [%i]\n",
- pdd->process->pasid,
+ pr_debug("Restoring from suspend process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
q->properties.queue_id);
q->properties.is_suspended = false;
@@ -1047,8 +1148,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
pdd->last_evict_timestamp = get_jiffies_64();
/* Mark all queues as evicted. Deactivate all active queues on
@@ -1088,6 +1189,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
+ struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd;
int retval = 0;
@@ -1104,8 +1206,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
if (!pdd->drm_priv)
goto out;
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Mark all queues as evicted. Deactivate all active queues on
* the qpd.
@@ -1119,11 +1221,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
decrement_queue_count(dqm, qpd, q);
if (dqm->dev->kfd->shared_resources.enable_mes) {
- retval = remove_queue_mes(dqm, q, qpd);
- if (retval) {
- pr_err("Failed to evict queue %d\n",
+ int err;
+
+ err = remove_queue_mes(dqm, q, qpd);
+ if (err) {
+ dev_err(dev, "Failed to evict queue %d\n",
q->properties.queue_id);
- goto out;
+ retval = err;
}
}
}
@@ -1163,8 +1267,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
}
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
@@ -1225,6 +1329,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct queue *q;
+ struct device *dev = dqm->dev->adev->dev;
struct kfd_process_device *pdd;
uint64_t eviction_duration;
int retval = 0;
@@ -1246,8 +1351,8 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
if (!pdd->drm_priv)
goto vm_not_acquired;
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
@@ -1265,7 +1370,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
if (dqm->dev->kfd->shared_resources.enable_mes) {
retval = add_queue_mes(dqm, q, qpd);
if (retval) {
- pr_err("Failed to restore queue %d\n",
+ dev_err(dev, "Failed to restore queue %d\n",
q->properties.queue_id);
goto out;
}
@@ -1457,35 +1562,30 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
}
if (dqm->dev->adev->asic_type == CHIP_HAWAII)
- pm_uninit(&dqm->packet_mgr, false);
+ pm_uninit(&dqm->packet_mgr);
dqm->sched_running = false;
dqm_unlock(dqm);
return 0;
}
-static void pre_reset(struct device_queue_manager *dqm)
-{
- dqm_lock(dqm);
- dqm->is_resetting = true;
- dqm_unlock(dqm);
-}
-
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id)
{
+ struct device *dev = dqm->dev->adev->dev;
int bit;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
- pr_err("No more SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) {
+ dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n",
+ get_num_sdma_queues(dqm));
return -ENOMEM;
}
if (restore_sdma_id) {
/* Re-use existing sdma_id */
if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
- pr_err("SDMA queue already in use\n");
+ dev_err(dev, "SDMA queue already in use\n");
return -EBUSY;
}
clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
@@ -1503,14 +1603,15 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
q->properties.sdma_queue_id = q->sdma_id /
kfd_get_num_sdma_engines(dqm->dev);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
- pr_err("No more XGMI SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) {
+ dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n",
+ get_num_xgmi_sdma_queues(dqm));
return -ENOMEM;
}
if (restore_sdma_id) {
/* Re-use existing sdma_id */
if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
- pr_err("SDMA queue already in use\n");
+ dev_err(dev, "SDMA queue already in use\n");
return -EBUSY;
}
clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
@@ -1532,6 +1633,41 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
kfd_get_num_xgmi_sdma_engines(dqm->dev);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
+ int i, num_queues, num_engines, eng_offset = 0, start_engine;
+ bool free_bit_found = false, is_xgmi = false;
+
+ if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) {
+ num_queues = get_num_sdma_queues(dqm);
+ num_engines = kfd_get_num_sdma_engines(dqm->dev);
+ q->properties.type = KFD_QUEUE_TYPE_SDMA;
+ } else {
+ num_queues = get_num_xgmi_sdma_queues(dqm);
+ num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev);
+ eng_offset = kfd_get_num_sdma_engines(dqm->dev);
+ q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI;
+ is_xgmi = true;
+ }
+
+ /* Scan available bit based on target engine ID. */
+ start_engine = q->properties.sdma_engine_id - eng_offset;
+ for (i = start_engine; i < num_queues; i += num_engines) {
+
+ if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap))
+ continue;
+
+ clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap);
+ q->sdma_id = i;
+ q->properties.sdma_queue_id = q->sdma_id / num_engines;
+ free_bit_found = true;
+ break;
+ }
+
+ if (!free_bit_found) {
+ dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n",
+ q->properties.sdma_engine_id, num_queues);
+ return -ENOMEM;
+ }
}
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -1562,11 +1698,12 @@ static int set_sched_resources(struct device_queue_manager *dqm)
{
int i, mec;
struct scheduling_resources res;
+ struct device *dev = dqm->dev->adev->dev;
res.vmid_mask = dqm->dev->compute_vmid_bitmap;
res.queue_mask = 0;
- for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+ for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
/ dqm->dev->kfd->shared_resources.num_pipe_per_mec;
@@ -1582,7 +1719,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
* definition of res.queue_mask needs updating
*/
if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
- pr_err("Invalid queue enabled by amdgpu: %d\n", i);
+ dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i);
break;
}
@@ -1611,21 +1748,75 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
- INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
dqm->trap_debug_vmid = 0;
init_sdma_bitmaps(dqm);
- if (dqm->dev->kfd2kgd->get_iq_wait_times)
- dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- &dqm->wait_times,
- ffs(dqm->dev->xcc_mask) - 1);
+ update_dqm_wait_times(dqm);
return 0;
}
+/* halt_cpsch:
+ * Unmap queues so the schedule doesn't continue remaining jobs in the queue.
+ * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch
+ * is called.
+ */
+static int halt_cpsch(struct device_queue_manager *dqm)
+{
+ int ret = 0;
+
+ dqm_lock(dqm);
+ if (!dqm->sched_running) {
+ dqm_unlock(dqm);
+ return 0;
+ }
+
+ WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n");
+
+ if (!dqm->is_hws_hang) {
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ ret = unmap_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD, false);
+ else
+ ret = remove_all_kfd_queues_mes(dqm);
+ }
+ dqm->sched_halt = true;
+ dqm_unlock(dqm);
+
+ return ret;
+}
+
+/* unhalt_cpsch
+ * Unset dqm->sched_halt and map queues back to runlist
+ */
+static int unhalt_cpsch(struct device_queue_manager *dqm)
+{
+ int ret = 0;
+
+ dqm_lock(dqm);
+ if (!dqm->sched_running || !dqm->sched_halt) {
+ WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n");
+ dqm_unlock(dqm);
+ return 0;
+ }
+ dqm->sched_halt = false;
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ ret = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+ 0, USE_DEFAULT_GRACE_PERIOD);
+ else
+ ret = add_all_kfd_queues_mes(dqm);
+
+ dqm_unlock(dqm);
+
+ return ret;
+}
+
static int start_cpsch(struct device_queue_manager *dqm)
{
- int retval;
+ struct device *dev = dqm->dev->adev->dev;
+ int retval, num_hw_queue_slots;
retval = 0;
@@ -1655,38 +1846,37 @@ static int start_cpsch(struct device_queue_manager *dqm)
init_interrupts(dqm);
/* clear hang status when driver try to start the hw scheduler */
- dqm->is_hws_hang = false;
- dqm->is_resetting = false;
dqm->sched_running = true;
- if (!dqm->dev->kfd->shared_resources.enable_mes)
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
+ dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n");
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+ }
- /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
- if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
- (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
- uint32_t reg_offset = 0;
- uint32_t grace_period = 1;
+ /* setup per-queue reset detection buffer */
+ num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe *
+ dqm->dev->kfd->shared_resources.num_pipe_per_mec *
+ NUM_XCC(dqm->dev->xcc_mask);
- retval = pm_update_grace_period(&dqm->packet_mgr,
- grace_period);
- if (retval)
- pr_err("Setting grace timeout failed\n");
- else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
- /* Update dqm->wait_times maintained in software */
- dqm->dev->kfd2kgd->build_grace_period_packet_info(
- dqm->dev->adev, dqm->wait_times,
- grace_period, &reg_offset,
- &dqm->wait_times);
+ dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info);
+ dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL);
+
+ if (!dqm->detect_hang_info) {
+ retval = -ENOMEM;
+ goto fail_detect_hang_buffer;
}
dqm_unlock(dqm);
return 0;
+fail_detect_hang_buffer:
+ kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
fail_allocate_vidmem:
fail_set_sched_resources:
if (!dqm->dev->kfd->shared_resources.enable_mes)
- pm_uninit(&dqm->packet_mgr, false);
+ pm_uninit(&dqm->packet_mgr);
fail_packet_manager_init:
dqm_unlock(dqm);
return retval;
@@ -1694,22 +1884,17 @@ fail_packet_manager_init:
static int stop_cpsch(struct device_queue_manager *dqm)
{
- bool hanging;
-
dqm_lock(dqm);
if (!dqm->sched_running) {
dqm_unlock(dqm);
return 0;
}
- if (!dqm->is_hws_hang) {
- if (!dqm->dev->kfd->shared_resources.enable_mes)
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
- else
- remove_all_queues_mes(dqm);
- }
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
+ else
+ remove_all_kfd_queues_mes(dqm);
- hanging = dqm->is_hws_hang || dqm->is_resetting;
dqm->sched_running = false;
if (!dqm->dev->kfd->shared_resources.enable_mes)
@@ -1717,7 +1902,9 @@ static int stop_cpsch(struct device_queue_manager *dqm)
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
if (!dqm->dev->kfd->shared_resources.enable_mes)
- pm_uninit(&dqm->packet_mgr, hanging);
+ pm_uninit(&dqm->packet_mgr);
+ kfree(dqm->detect_hang_info);
+ dqm->detect_hang_info = NULL;
dqm_unlock(dqm);
return 0;
@@ -1789,7 +1976,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
dqm_lock(dqm);
retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
dqm_unlock(dqm);
@@ -1880,15 +2068,21 @@ out:
return retval;
}
-int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
- uint64_t fence_value,
- unsigned int timeout_ms)
+int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
+ uint64_t fence_value,
+ unsigned int timeout_ms)
{
unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
+ struct device *dev = dqm->dev->adev->dev;
+ uint64_t *fence_addr = dqm->fence_addr;
while (*fence_addr != fence_value) {
+ /* Fatal err detected, this response won't come */
+ if (amdgpu_amdkfd_is_fed(dqm->dev->adev))
+ return -EIO;
+
if (time_after(jiffies, end_jiffies)) {
- pr_err("qcm fence wait loop timeout expired\n");
+ dev_err(dev, "qcm fence wait loop timeout expired\n");
/* In HWS case, this is used to halt the driver thread
* in order not to mess up CP states before doing
* scandumps for FW debugging.
@@ -1907,9 +2101,10 @@ int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
/* dqm->lock mutex has to be locked before calling this function */
static int map_queues_cpsch(struct device_queue_manager *dqm)
{
+ struct device *dev = dqm->dev->adev->dev;
int retval;
- if (!dqm->sched_running)
+ if (!dqm->sched_running || dqm->sched_halt)
return 0;
if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
return 0;
@@ -1919,7 +2114,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
pr_debug("%s sent runlist\n", __func__);
if (retval) {
- pr_err("failed to execute runlist\n");
+ dev_err(dev, "failed to execute runlist\n");
return retval;
}
dqm->active_runlist = true;
@@ -1927,43 +2122,288 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
return retval;
}
-/* dqm->lock mutex has to be locked before calling this function */
+static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n",
+ q->properties.queue_id, pdd->process->lead_thread->pid);
+
+ pdd->has_reset_queue = true;
+ if (q->properties.is_active) {
+ q->properties.is_active = false;
+ decrement_queue_count(dqm, qpd, q);
+ }
+}
+
+static int detect_queue_hang(struct device_queue_manager *dqm)
+{
+ int i;
+
+ /* detect should be used only in dqm locked queue reset */
+ if (WARN_ON(dqm->detect_hang_count > 0))
+ return 0;
+
+ memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+ uint32_t mec, pipe, queue;
+ int xcc_id;
+
+ mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
+ / dqm->dev->kfd->shared_resources.num_pipe_per_mec;
+
+ if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+ continue;
+
+ amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue);
+
+ for_each_inst(xcc_id, dqm->dev->xcc_mask) {
+ uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr(
+ dqm->dev->adev, pipe, queue, xcc_id);
+ struct dqm_detect_hang_info hang_info;
+
+ if (!queue_addr)
+ continue;
+
+ hang_info.pipe_id = pipe;
+ hang_info.queue_id = queue;
+ hang_info.xcc_id = xcc_id;
+ hang_info.queue_address = queue_addr;
+
+ dqm->detect_hang_info[dqm->detect_hang_count] = hang_info;
+ dqm->detect_hang_count++;
+ }
+ }
+
+ return dqm->detect_hang_count;
+}
+
+static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (queue_address == q->properties.queue_address)
+ return q;
+ }
+ }
+
+ return NULL;
+}
+
+static int reset_hung_queues(struct device_queue_manager *dqm)
+{
+ int r = 0, reset_count = 0, i;
+
+ if (!dqm->detect_hang_info || dqm->is_hws_hang)
+ return -EIO;
+
+ /* assume dqm locked. */
+ if (!detect_queue_hang(dqm))
+ return -ENOTRECOVERABLE;
+
+ for (i = 0; i < dqm->detect_hang_count; i++) {
+ struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i];
+ struct queue *q = find_queue_by_address(dqm, hang_info.queue_address);
+ struct kfd_process_device *pdd;
+ uint64_t queue_addr = 0;
+
+ if (!q) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ pdd = kfd_get_process_device_data(dqm->dev, q->process);
+ if (!pdd) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev,
+ hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id,
+ KFD_UNMAP_LATENCY_MS);
+
+ /* either reset failed or we reset an unexpected queue. */
+ if (queue_addr != q->properties.queue_address) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ set_queue_as_reset(dqm, q, &pdd->qpd);
+ reset_count++;
+ }
+
+ if (reset_count == dqm->detect_hang_count)
+ kfd_signal_reset_event(dqm->dev);
+ else
+ r = -ENOTRECOVERABLE;
+
+reset_fail:
+ dqm->detect_hang_count = 0;
+
+ return r;
+}
+
+static bool sdma_has_hang(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int i, j;
+
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j))
+ continue;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm,
+ uint32_t doorbell_off)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) &&
+ q->properties.doorbell_off == doorbell_off) {
+ set_queue_as_reset(dqm, q, qpd);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static int reset_hung_queues_sdma(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int r = 0, i, j;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ /* Scan for hung HW queues and reset engine. */
+ dqm->detect_hang_count = 0;
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ uint32_t doorbell_off =
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j);
+
+ if (!doorbell_off)
+ continue;
+
+ /* Reset engine and check. */
+ if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) ||
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) ||
+ !set_sdma_queue_as_reset(dqm, doorbell_off)) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ /* Should only expect one queue active per engine */
+ dqm->detect_hang_count++;
+ break;
+ }
+ }
+
+ /* Signal process reset */
+ if (dqm->detect_hang_count)
+ kfd_signal_reset_event(dqm->dev);
+ else
+ r = -ENOTRECOVERABLE;
+
+reset_fail:
+ dqm->detect_hang_count = 0;
+
+ return r;
+}
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma)
+{
+ struct amdgpu_device *adev = dqm->dev->adev;
+
+ while (halt_if_hws_hang)
+ schedule();
+
+ if (adev->debug_disable_gpu_ring_reset) {
+ dev_info_once(adev->dev,
+ "%s queue hung, but ring reset disabled",
+ is_sdma ? "sdma" : "compute");
+
+ return -EPERM;
+ }
+ if (!amdgpu_gpu_recovery)
+ return -ENOTRECOVERABLE;
+
+ return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm);
+}
+
+/* dqm->lock mutex has to be locked before calling this function
+ *
+ * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
+ * for context switch latency. Lower values are used by debugger
+ * since context switching are triggered at high frequency.
+ * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
+ *
+ */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param,
uint32_t grace_period,
bool reset)
{
- int retval = 0;
+ struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr;
+ int retval;
if (!dqm->sched_running)
return 0;
- if (dqm->is_hws_hang || dqm->is_resetting)
- return -EIO;
if (!dqm->active_runlist)
- return retval;
+ return 0;
+ if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem))
+ return -EIO;
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
+ retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period);
if (retval)
- return retval;
+ goto out;
}
retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
if (retval)
- return retval;
+ goto out;
*dqm->fence_addr = KFD_FENCE_INIT;
+ mb();
pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED);
/* should be timed out */
- retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
- queue_preemption_timeout_ms);
+ retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED,
+ queue_preemption_timeout_ms);
if (retval) {
- pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
+ dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
kfd_hws_hang(dqm);
- return retval;
+ goto out;
}
/* In the current MEC firmware implementation, if compute queue
@@ -1975,29 +2415,36 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
* check those fields
*/
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
- if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
- pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
- while (halt_if_hws_hang)
- schedule();
- return -ETIME;
- }
+ if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) &&
+ reset_queues_on_hws_hang(dqm, false))
+ goto reset_fail;
+
+ /* Check for SDMA hang and attempt SDMA reset */
+ if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true))
+ goto reset_fail;
/* We need to reset the grace period value for this device */
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- if (pm_update_grace_period(&dqm->packet_mgr,
- USE_DEFAULT_GRACE_PERIOD))
- pr_err("Failed to reset grace period\n");
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_RESET, 0 /* unused */))
+ dev_err(dev, "Failed to reset grace period\n");
}
pm_release_ib(&dqm->packet_mgr);
dqm->active_runlist = false;
-
+out:
+ up_read(&dqm->dev->adev->reset_domain->sem);
return retval;
+
+reset_fail:
+ dqm->is_hws_hang = true;
+ kfd_hws_hang(dqm);
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return -ETIME;
}
/* only for compute queue */
-static int reset_queues_cpsch(struct device_queue_manager *dqm,
- uint16_t pasid)
+static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid)
{
int retval;
@@ -2018,13 +2465,13 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
{
int retval;
- if (dqm->is_hws_hang)
+ if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem))
return -EIO;
retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
- if (retval)
- return retval;
-
- return map_queues_cpsch(dqm);
+ if (!retval)
+ retval = map_queues_cpsch(dqm);
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return retval;
}
static int wait_on_destroy_queue(struct device_queue_manager *dqm,
@@ -2034,6 +2481,9 @@ static int wait_on_destroy_queue(struct device_queue_manager *dqm,
q->process);
int ret = 0;
+ if (WARN_ON(!pdd))
+ return ret;
+
if (pdd->qpd.is_debug)
return ret;
@@ -2060,6 +2510,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
uint64_t sdma_val = 0;
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+ struct device *dev = dqm->dev->adev->dev;
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
@@ -2067,7 +2518,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
&sdma_val);
if (retval)
- pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id);
}
@@ -2102,10 +2553,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val;
}
- list_del(&q->list);
- qpd->queue_count--;
if (q->properties.is_active) {
decrement_queue_count(dqm, qpd, q);
+ q->properties.is_active = false;
if (!dqm->dev->kfd->shared_resources.enable_mes) {
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
@@ -2116,6 +2566,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = remove_queue_mes(dqm, q, qpd);
}
}
+ list_del(&q->list);
+ qpd->queue_count--;
/*
* Unconditionally decrement this counter, regardless of the queue's
@@ -2145,20 +2597,13 @@ failed_try_destroy_debugged_queue:
return retval;
}
-/*
- * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0xFFFF
-
static bool set_cache_memory_policy(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
bool retval = true;
@@ -2167,41 +2612,17 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
dqm_lock(dqm);
- if (alternate_aperture_size == 0) {
- /* base > limit disables APE1 */
- qpd->sh_mem_ape1_base = 1;
- qpd->sh_mem_ape1_limit = 0;
- } else {
- /*
- * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
- * SH_MEM_APE1_BASE[31:0], 0x0000 }
- * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
- * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
- * Verify that the base and size parameters can be
- * represented in this format and convert them.
- * Additionally restrict APE1 to user-mode addresses.
- */
-
- uint64_t base = (uintptr_t)alternate_aperture_base;
- uint64_t limit = base + alternate_aperture_size - 1;
-
- if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
- (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
- retval = false;
- goto out;
- }
-
- qpd->sh_mem_ape1_base = base >> 16;
- qpd->sh_mem_ape1_limit = limit >> 16;
- }
-
retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
default_policy,
alternate_policy,
alternate_aperture_base,
- alternate_aperture_size);
+ alternate_aperture_size,
+ misc_process_properties);
+
+ if (retval)
+ goto out;
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
@@ -2304,7 +2725,7 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
dqm_lock(dqm);
mqd_mgr = dqm->mqd_mgrs[mqd_type];
- *mqd_size = mqd_mgr->mqd_size;
+ *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask);
*ctl_stack_size = 0;
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
@@ -2348,6 +2769,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct queue *q;
+ struct device *dev = dqm->dev->adev->dev;
struct kernel_queue *kq, *kq_next;
struct mqd_manager *mqd_mgr;
struct device_process_node *cur, *next_dpn;
@@ -2381,7 +2803,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
if (dqm->dev->kfd->shared_resources.enable_mes) {
retval = remove_queue_mes(dqm, q, qpd);
if (retval)
- pr_err("Failed to remove queue %d\n",
+ dev_err(dev, "Failed to remove queue %d\n",
q->properties.queue_id);
}
}
@@ -2403,10 +2825,12 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
- if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
+ if ((retval || qpd->reset_wavefronts) &&
+ down_read_trylock(&dqm->dev->adev->reset_domain->sem)) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
qpd->reset_wavefronts = false;
+ up_read(&dqm->dev->adev->reset_domain->sem);
}
/* Lastly, free mqd resources.
@@ -2436,12 +2860,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
static int init_mqd_managers(struct device_queue_manager *dqm)
{
int i, j;
+ struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr;
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
if (!mqd_mgr) {
- pr_err("mqd manager [%d] initialization failed\n", i);
+ dev_err(dev, "mqd manager [%d] initialization failed\n", i);
goto out_free;
}
dqm->mqd_mgrs[i] = mqd_mgr;
@@ -2512,7 +2937,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
dqm->ops.initialize = initialize_cpsch;
dqm->ops.start = start_cpsch;
dqm->ops.stop = stop_cpsch;
- dqm->ops.pre_reset = pre_reset;
+ dqm->ops.halt = halt_cpsch;
+ dqm->ops.unhalt = unhalt_cpsch;
dqm->ops.destroy_queue = destroy_queue_cpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.register_process = register_process;
@@ -2533,7 +2959,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
/* initialize dqm for no cp scheduling */
dqm->ops.start = start_nocpsch;
dqm->ops.stop = stop_nocpsch;
- dqm->ops.pre_reset = pre_reset;
dqm->ops.create_queue = create_queue_nocpsch;
dqm->ops.destroy_queue = destroy_queue_nocpsch;
dqm->ops.update_queue = update_queue;
@@ -2551,7 +2976,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
dqm->ops.checkpoint_mqd = checkpoint_mqd;
break;
default:
- pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
+ dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy);
goto out_free;
}
@@ -2572,7 +2997,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
break;
default:
- if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0))
+ device_queue_manager_init_v12(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
device_queue_manager_init_v11(&dqm->asic_ops);
else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
device_queue_manager_init_v10(&dqm->asic_ops);
@@ -2589,7 +3016,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
goto out_free;
if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
- pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
+ dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n");
goto out_free;
}
@@ -2608,7 +3035,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
- amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
@@ -2620,45 +3047,130 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
kfree(dqm);
}
-int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
+int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id)
{
- struct kfd_process_device *pdd;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process_device *pdd = NULL;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd);
+ struct device_queue_manager *dqm = knode->dqm;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q = NULL;
int ret = 0;
- if (!p)
+ if (!pdd)
return -EINVAL;
- WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- pdd = kfd_get_process_device_data(dqm->dev, p);
- if (pdd)
- ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
+
+ dqm_lock(dqm);
+
+ if (pdd) {
+ qpd = &pdd->qpd;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->doorbell_id == doorbell_id && q->properties.is_active) {
+ ret = suspend_all_queues_mes(dqm);
+ if (ret) {
+ dev_err(dev, "Suspending all queues failed");
+ goto out;
+ }
+
+ q->properties.is_evicted = true;
+ q->properties.is_active = false;
+ decrement_queue_count(dqm, qpd, q);
+
+ ret = remove_queue_mes(dqm, q, qpd);
+ if (ret) {
+ dev_err(dev, "Removing bad queue failed");
+ goto out;
+ }
+
+ ret = resume_all_queues_mes(dqm);
+ if (ret)
+ dev_err(dev, "Resuming all queues failed");
+
+ break;
+ }
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
kfd_unref_process(p);
+ return ret;
+}
+
+static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct device *dev = dqm->dev->adev->dev;
+ int ret = 0;
+ /* Check if process is already evicted */
+ dqm_lock(dqm);
+ if (qpd->evicted) {
+ /* Increment the evicted count to make sure the
+ * process stays evicted before its terminated.
+ */
+ qpd->evicted++;
+ dqm_unlock(dqm);
+ goto out;
+ }
+ dqm_unlock(dqm);
+
+ ret = suspend_all_queues_mes(dqm);
+ if (ret) {
+ dev_err(dev, "Suspending all queues failed");
+ goto out;
+ }
+
+ ret = dqm->ops.evict_process_queues(dqm, qpd);
+ if (ret) {
+ dev_err(dev, "Evicting process queues failed");
+ goto out;
+ }
+
+ ret = resume_all_queues_mes(dqm);
+ if (ret)
+ dev_err(dev, "Resuming all queues failed");
+
+out:
return ret;
}
-static void kfd_process_hw_exception(struct work_struct *work)
+int kfd_evict_process_device(struct kfd_process_device *pdd)
{
- struct device_queue_manager *dqm = container_of(work,
- struct device_queue_manager, hw_exception_work);
- amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
+ struct device_queue_manager *dqm;
+ struct kfd_process *p;
+ int ret = 0;
+
+ p = pdd->process;
+ dqm = pdd->dev->dqm;
+
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
+
+ if (dqm->dev->kfd->shared_resources.enable_mes)
+ ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd);
+ else
+ ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
+
+ return ret;
}
int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int r;
+ struct device *dev = dqm->dev->adev->dev;
int updated_vmid_mask;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
- pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL;
}
dqm_lock(dqm);
if (dqm->trap_debug_vmid != 0) {
- pr_err("Trap debug id already reserved\n");
+ dev_err(dev, "Trap debug id already reserved\n");
r = -EBUSY;
goto out_unlock;
}
@@ -2694,19 +3206,20 @@ out_unlock:
int release_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
+ struct device *dev = dqm->dev->adev->dev;
int r;
int updated_vmid_mask;
uint32_t trap_debug_vmid;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
- pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL;
}
dqm_lock(dqm);
trap_debug_vmid = dqm->trap_debug_vmid;
if (dqm->trap_debug_vmid == 0) {
- pr_err("Trap debug id is not reserved\n");
+ dev_err(dev, "Trap debug id is not reserved\n");
r = -EINVAL;
goto out_unlock;
}
@@ -2765,7 +3278,7 @@ struct copy_context_work_handler_workarea {
struct kfd_process *p;
};
-static void copy_context_work_handler (struct work_struct *work)
+static void copy_context_work_handler(struct work_struct *work)
{
struct copy_context_work_handler_workarea *workarea;
struct mqd_manager *mqd_mgr;
@@ -2792,6 +3305,9 @@ static void copy_context_work_handler (struct work_struct *work)
struct qcm_process_device *qpd = &pdd->qpd;
list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE)
+ continue;
+
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
/* We ignore the return value from get_wave_state
@@ -2843,6 +3359,7 @@ int resume_queues(struct kfd_process *p,
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd = &pdd->qpd;
struct queue *q;
int r, per_device_resumed = 0;
@@ -2893,7 +3410,7 @@ int resume_queues(struct kfd_process *p,
0,
USE_DEFAULT_GRACE_PERIOD);
if (r) {
- pr_err("Failed to resume process queues\n");
+ dev_err(dev, "Failed to resume process queues\n");
if (queue_ids) {
list_for_each_entry(q, &qpd->queues_list, list) {
int q_idx = q_array_get_index(
@@ -2945,6 +3462,7 @@ int suspend_queues(struct kfd_process *p,
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct device *dev = dqm->dev->adev->dev;
struct qcm_process_device *qpd = &pdd->qpd;
struct queue *q;
int r, per_device_suspended = 0;
@@ -2993,7 +3511,7 @@ int suspend_queues(struct kfd_process *p,
grace_period);
if (r)
- pr_err("Failed to suspend process queues.\n");
+ dev_err(dev, "Failed to suspend process queues.\n");
else
total_suspended += per_device_suspended;
@@ -3080,10 +3598,11 @@ void set_queue_snapshot_entry(struct queue *q,
int debug_lock_and_unmap(struct device_queue_manager *dqm)
{
+ struct device *dev = dqm->dev->adev->dev;
int r;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
- pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL;
}
@@ -3101,10 +3620,11 @@ int debug_lock_and_unmap(struct device_queue_manager *dqm)
int debug_map_and_unlock(struct device_queue_manager *dqm)
{
+ struct device *dev = dqm->dev->adev->dev;
int r;
if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
- pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+ dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy);
return -EINVAL;
}
@@ -3128,6 +3648,30 @@ int debug_refresh_runlist(struct device_queue_manager *dqm)
return debug_map_and_unlock(dqm);
}
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format)
+{
+ struct queue *q;
+ bool r = false;
+
+ if (!queue_format)
+ return r;
+
+ dqm_lock(dqm);
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.doorbell_off == doorbell_off) {
+ *queue_format = q->properties.format;
+ r = true;
+ goto out;
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
+ return r;
+}
#if defined(CONFIG_DEBUG_FS)
static void seq_reg_dump(struct seq_file *m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index cf7e182588f8..74a61b5b2f0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -37,7 +37,6 @@
#define KFD_MES_PROCESS_QUANTUM 100000
#define KFD_MES_GANG_QUANTUM 10000
-#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
struct device_process_node {
struct qcm_process_device *qpd;
@@ -106,6 +105,12 @@ union GRBM_GFX_INDEX_BITS {
* @uninitialize: Destroys all the device queue manager resources allocated in
* initialize routine.
*
+ * @halt: This routine unmaps queues from runlist and set halt status to true
+ * so no more queues will be mapped to runlist until unhalt.
+ *
+ * @unhalt: This routine unset halt status to flase and maps queues back to
+ * runlist.
+ *
* @create_kernel_queue: Creates kernel queue. Used for debug queue.
*
* @destroy_kernel_queue: Destroys kernel queue. Used for debug queue.
@@ -152,8 +157,9 @@ struct device_queue_manager_ops {
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
- void (*pre_reset)(struct device_queue_manager *dqm);
void (*uninitialize)(struct device_queue_manager *dqm);
+ int (*halt)(struct device_queue_manager *dqm);
+ int (*unhalt)(struct device_queue_manager *dqm);
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
@@ -167,7 +173,8 @@ struct device_queue_manager_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -203,7 +210,8 @@ struct device_queue_manager_asic_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
@@ -211,6 +219,13 @@ struct device_queue_manager_asic_ops {
struct kfd_node *dev);
};
+struct dqm_detect_hang_info {
+ int pipe_id;
+ int queue_id;
+ int xcc_id;
+ uint64_t queue_address;
+};
+
/**
* struct device_queue_manager
*
@@ -255,9 +270,9 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
bool is_resetting;
- struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
+ bool sched_halt;
/* used for GFX 9.4.3 only */
uint32_t current_logical_xcc_start;
@@ -265,6 +280,11 @@ struct device_queue_manager {
uint32_t wait_times;
wait_queue_head_t destroy_wait;
+
+ /* for per-queue reset support */
+ struct dqm_detect_hang_info *detect_hang_info;
+ size_t detect_hang_info_size;
+ int detect_hang_count;
};
void device_queue_manager_init_cik(
@@ -277,6 +297,8 @@ void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v12(
+ struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_cp_queues_num(struct device_queue_manager *dqm);
@@ -302,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q,
int debug_lock_and_unmap(struct device_queue_manager *dqm);
int debug_map_and_unlock(struct device_queue_manager *dqm);
int debug_refresh_runlist(struct device_queue_manager *dqm);
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
@@ -334,4 +359,14 @@ static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val
/* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
return get_user(*val, q_rptr + 1);
}
+
+static inline void update_dqm_wait_times(struct device_queue_manager *dqm)
+{
+ if (dqm->dev->kfd2kgd->get_iq_wait_times)
+ dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
+}
+
+
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index d4d95c7f2e5d..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -27,12 +27,21 @@
#include "oss/oss_2_4_sh_mask.h"
#include "gca/gfx_7_2_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -80,10 +89,41 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
@@ -97,37 +137,22 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
-
- return true;
-}
-
-static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+
+static int update_qpd_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..ba6e3d747ccd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,18 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +57,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v10(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index 2e129da7acb4..8b447d04558f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,18 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
asic_ops->update_qpd = update_qpd_v11;
asic_ops->init_sdma_vm = init_sdma_vm_v11;
asic_ops->mqd_manager_init = mqd_manager_init_v11;
@@ -48,28 +56,29 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v11(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
new file mode 100644
index 000000000000..3550da3a46f9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
+
+void device_queue_manager_init_v12(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v12;
+ asic_ops->update_qpd = update_qpd_v12;
+ asic_ops->init_sdma_vm = init_sdma_vm_v12;
+ asic_ops->mqd_manager_init = mqd_manager_init_v12;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ return 0;
+}
+
+static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 onwards any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 54eb1bff903c..9fcc8c6e57b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -30,10 +30,18 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v9;
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
asic_ops->mqd_manager_init = mqd_manager_init_v9;
@@ -48,10 +56,42 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ if (dqm->dev->kfd->noretry)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+ qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) {
+ if (misc_process_properties & KFD_PROC_FLAG_MFMA_HIGH_PRECISION)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRECISION_MODE__SHIFT;
+ }
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+ qpd->sh_mem_config);
+ return true;
+}
+
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- struct kfd_process_device *pdd;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
pdd = qpd_to_pdd(qpd);
@@ -63,7 +103,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
if (dqm->dev->kfd->noretry)
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
- if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
qpd->sh_mem_config |=
(1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index b291ee0fab94..dad83356e976 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -27,12 +27,21 @@
#include "gca/gfx_8_0_sh_mask.h"
#include "oss/oss_3_0_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -81,10 +90,41 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_UC :
@@ -100,40 +140,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
- return true;
-}
-
-static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+static int update_qpd_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 7b38537c7c99..05c74887fd6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
- *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
+ *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
+ kfd->doorbells,
+ inx,
+ kfd->device_info.doorbell_size);
inx *= 2;
pr_debug("Get kernel queue doorbell\n"
@@ -240,7 +243,10 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
return 0;
}
- first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
+ first_db_index = amdgpu_doorbell_index_on_bar(adev,
+ pdd->qpd.proc_doorbells,
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);
return adev->doorbell.base + first_db_index * sizeof(uint32_t);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 0f58be65132f..82905f3e54dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -31,6 +31,7 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
+#include "kfd_device_queue_manager.h"
#include <linux/device.h>
/*
@@ -726,7 +727,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
@@ -747,6 +748,16 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint64_t *slots = page_slots(p->signal_page);
uint32_t id;
+ /*
+ * If id is valid but slot is not signaled, GPU may signal the same event twice
+ * before driver have chance to process the first interrupt, then signal slot is
+ * auto-reset after set_event wakeup the user space, just drop the second event as
+ * the application only need wakeup once.
+ */
+ if ((valid_id_bits > 31 || (1U << valid_id_bits) >= KFD_SIGNAL_EVENT_LIMIT) &&
+ partial_id < KFD_SIGNAL_EVENT_LIMIT && slots[partial_id] == UNSIGNALED_EVENT_SLOT)
+ goto out_unlock;
+
if (valid_id_bits)
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
@@ -775,6 +786,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
}
}
+out_unlock:
rcu_read_unlock();
kfd_unref_process(p);
}
@@ -880,6 +892,10 @@ static int copy_signaled_event_data(uint32_t num_events,
dst = &data[i].memory_exception_data;
src = &event->memory_exception_data;
size = sizeof(struct kfd_hsa_memory_exception_data);
+ } else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ dst = &data[i].memory_exception_data;
+ src = &event->hw_exception_data;
+ size = sizeof(struct kfd_hsa_hw_exception_data);
} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
waiter->event_age_enabled) {
dst = &data[i].signal_event_data.last_event_age;
@@ -1123,8 +1139,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (type == KFD_EVENT_TYPE_MEMORY) {
dev_warn(kfd_device,
- "Sending SIGSEGV to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGSEGV to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGSEGV, p->lead_thread, 0);
}
@@ -1132,13 +1148,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (send_signal) {
if (send_sigterm) {
dev_warn(kfd_device,
- "Sending SIGTERM to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGTERM to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGTERM, p->lead_thread, 0);
} else {
dev_err(kfd_device,
- "Process %d (pasid 0x%x) got unhandled exception",
- p->lead_thread->pid, p->pasid);
+ "Process pid %d got unhandled exception",
+ p->lead_thread->pid);
}
}
@@ -1152,7 +1168,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
@@ -1161,22 +1177,39 @@ void kfd_signal_hw_exception_event(u32 pasid)
kfd_unref_process(p);
}
-void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
+{
+ struct kfd_process_device *pdd;
+ struct kfd_hsa_memory_exception_data exception_data;
+ int i;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.va = gpu_va;
+ exception_data.failure.NotPresent = 1;
+
+ // Send VM seg fault to all kfd process device
+ for (i = 0; i < p->n_pdds; i++) {
+ pdd = p->pdds[i];
+ exception_data.gpu_id = pdd->user_gpu_id;
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
+ }
+}
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data)
{
struct kfd_event *ev;
uint32_t id;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = pdd->process;
struct kfd_hsa_memory_exception_data memory_exception_data;
int user_gpu_id;
- if (!p)
- return; /* Presumably process exited. */
-
- user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ user_gpu_id = kfd_process_get_user_gpu_id(p, pdd->dev->id);
if (unlikely(user_gpu_id == -EINVAL)) {
- WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n",
+ pdd->dev->id);
return;
}
@@ -1213,7 +1246,6 @@ void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
}
rcu_read_unlock();
- kfd_unref_process(p);
}
void kfd_signal_reset_event(struct kfd_node *dev)
@@ -1240,12 +1272,41 @@ void kfd_signal_reset_event(struct kfd_node *dev)
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p);
if (unlikely(user_gpu_id == -EINVAL)) {
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
continue;
}
+ if (unlikely(!pdd)) {
+ WARN_ONCE(1, "Could not get device data from process pid:%d\n",
+ p->lead_thread->pid);
+ continue;
+ }
+
+ if (dev->dqm->detect_hang_count && !pdd->has_reset_queue)
+ continue;
+
+ if (dev->dqm->detect_hang_count) {
+ struct amdgpu_task_info *ti;
+ struct amdgpu_fpriv *drv_priv;
+
+ if (unlikely(amdgpu_file_to_fpriv(pdd->drm_file, &drv_priv))) {
+ WARN_ONCE(1, "Could not get vm for device %x from pid:%d\n",
+ dev->id, p->lead_thread->pid);
+ continue;
+ }
+
+ ti = amdgpu_vm_get_task_info_vm(&drv_priv->vm);
+ if (ti) {
+ dev_err(dev->adev->dev,
+ "Queues reset on process %s tid %d thread %s pid %d\n",
+ ti->process_name, ti->tgid, ti->task.comm, ti->task.pid);
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
rcu_read_lock();
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
@@ -1274,19 +1335,22 @@ void kfd_signal_reset_event(struct kfd_node *dev)
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
{
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_event *ev;
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
int user_gpu_id;
- if (!p)
+ if (!p) {
+ dev_warn(dev->adev->dev, "Not find process with pasid:%d\n", pasid);
return; /* Presumably process exited. */
+ }
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
if (unlikely(user_gpu_id == -EINVAL)) {
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ kfd_unref_process(p);
return;
}
@@ -1318,6 +1382,8 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
}
}
+ dev_warn(dev->adev->dev, "Send SIGBUS to process %s(pasid:%d)\n",
+ p->lead_thread->comm, pasid);
rcu_read_unlock();
/* user application will handle SIGBUS signal */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 62b205dac63a..1d170dc50df3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -23,7 +23,6 @@
*/
#include <linux/device.h>
-#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/sched.h>
@@ -36,6 +35,7 @@
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/processor.h>
+#include "amdgpu_vm.h"
/*
* The primary memory I/O features being added for revisions of gfxip
@@ -326,7 +326,7 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
* with small reserved space for kernel.
* Set them to CANONICAL addresses.
*/
- pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_base = max(SVM_USER_BASE, AMDGPU_VA_RESERVED_BOTTOM);
pdd->gpuvm_limit =
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
@@ -345,7 +345,7 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
pdd->lds_base = MAKE_LDS_APP_BASE_V9();
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
- pdd->gpuvm_base = PAGE_SIZE;
+ pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM;
pdd->gpuvm_limit =
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
@@ -356,7 +356,7 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
* Place TBA/TMA on opposite side of VM hole to prevent
* stray faults from triggering SVM on these pages.
*/
- pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
+ pdd->qpd.cwsr_base = AMDGPU_VA_RESERVED_TRAP_START(pdd->dev->adev);
}
int kfd_init_apertures(struct kfd_process *process)
@@ -379,7 +379,8 @@ int kfd_init_apertures(struct kfd_process *process)
pdd = kfd_create_process_device_data(dev, process);
if (!pdd) {
- pr_err("Failed to create process device data\n");
+ dev_err(dev->adev->dev,
+ "Failed to create process device data\n");
return -ENOMEM;
}
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index c7991e07b6be..3e1ad8974797 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -129,57 +129,6 @@ enum SQ_INTERRUPT_ERROR_TYPE {
KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \
>> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
-static void event_interrupt_poison_consumption(struct kfd_node *dev,
- uint16_t pasid, uint16_t client_id)
-{
- int old_poison, ret = -EINVAL;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
-
- if (!p)
- return;
-
- /* all queues of a process will be unmapped in one time */
- old_poison = atomic_cmpxchg(&p->poison, 0, 1);
- kfd_unref_process(p);
- if (old_poison)
- return;
-
- switch (client_id) {
- case SOC15_IH_CLIENTID_SE0SH:
- case SOC15_IH_CLIENTID_SE1SH:
- case SOC15_IH_CLIENTID_SE2SH:
- case SOC15_IH_CLIENTID_SE3SH:
- case SOC15_IH_CLIENTID_UTCL2:
- ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
- break;
- case SOC15_IH_CLIENTID_SDMA0:
- case SOC15_IH_CLIENTID_SDMA1:
- case SOC15_IH_CLIENTID_SDMA2:
- case SOC15_IH_CLIENTID_SDMA3:
- case SOC15_IH_CLIENTID_SDMA4:
- break;
- default:
- break;
- }
-
- kfd_signal_poison_consumed_event(dev, pasid);
-
- /* resetting queue passes, do page retirement without gpu reset
- * resetting queue fails, fallback to gpu reset solution
- */
- if (!ret) {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, unmap queue flow succeeded: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
- } else {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, fall back to gpu reset flow: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
- }
-}
-
static bool event_interrupt_isr_v10(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
@@ -219,14 +168,14 @@ static bool event_interrupt_isr_v10(struct kfd_node *dev,
client_id != SOC15_IH_CLIENTID_SE3SH)
return false;
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
- /* If there is no valid PASID, it's likely a bug */
- if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+ if (pasid == 0)
return 0;
/* Interrupt types we care about: various signals and faults.
@@ -268,37 +217,66 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- pr_debug(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_BUF0_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_BUF1_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_AUTO_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF0_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF1_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- pr_debug("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SA_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- WGP_ID));
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) {
if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
@@ -310,33 +288,45 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0,
ERR_TYPE);
- pr_warn("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SA_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- WGP_ID),
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID),
sq_intr_err_type);
- if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
- sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
- event_interrupt_poison_consumption(dev, pasid, source_id);
- return;
- }
break;
default:
break;
}
kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
@@ -355,9 +345,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
client_id == SOC15_IH_CLIENTID_SDMA7) {
if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
- } else if (source_id == SOC15_INTSRC_SDMA_ECC) {
- event_interrupt_poison_consumption(dev, pasid, source_id);
- return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
@@ -366,12 +353,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
struct kfd_hsa_memory_exception_data exception_data;
- if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
- amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
- event_interrupt_poison_consumption(dev, pasid, client_id);
- return;
- }
-
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index f933bd231fb9..2788a52714d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -148,51 +148,78 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \
KFD_CTXID0_DOORBELL_ID_MASK)
-static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_auto(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_debug(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
}
-static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_inst(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_debug(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SH_ID),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
}
-static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_error(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_warn(
+ dev_warn_ratelimited(
+ dev->adev->dev,
"sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ DETAIL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ TYPE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ PRIV),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ WGP_ID));
}
static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
uint16_t pasid, uint16_t source_id)
{
+ enum amdgpu_ras_block block = 0;
int ret = -EINVAL;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ uint32_t reset = 0;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return;
@@ -210,9 +237,14 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
case SOC15_INTSRC_SQ_INTERRUPT_MSG:
if (dev->dqm->ops.reset_queues)
ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+ block = AMDGPU_RAS_BLOCK__GFX;
+ if (ret)
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
break;
case SOC21_INTSRC_SDMA_ECC:
default:
+ block = AMDGPU_RAS_BLOCK__GFX;
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
break;
}
@@ -220,10 +252,7 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
/* resetting queue passes, do page retirement without gpu reset
resetting queue fails, fallback to gpu reset solution */
- if (!ret)
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
- else
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset);
}
static bool event_interrupt_isr_v11(struct kfd_node *dev,
@@ -251,14 +280,14 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev,
(context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
return false;
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
- /* If there is no valid PASID, it's likely a bug */
- if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+ if (pasid == 0)
return false;
/* Interrupt types we care about: various signals and faults.
@@ -325,11 +354,15 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
/* CP */
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, context_id0, 32);
- else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
- kfd_set_dbg_ev_from_interrupt(dev, pasid,
- KFD_CTXID0_DOORBELL_ID(context_id0),
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) {
+ u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0);
+
+ kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id,
KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
NULL, 0);
+ kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id);
+ }
/* SDMA */
else if (source_id == SOC21_INTSRC_SDMA_TRAP)
@@ -345,10 +378,10 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
switch (sq_int_enc) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- print_sq_intr_info_auto(context_id0, context_id1);
+ print_sq_intr_info_auto(dev, context_id0, context_id1);
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- print_sq_intr_info_inst(context_id0, context_id1);
+ print_sq_intr_info_inst(dev, context_id0, context_id1);
sq_int_priv = REG_GET_FIELD(context_id0,
SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -358,7 +391,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
return;
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
- print_sq_intr_info_error(context_id0, context_id1);
+ print_sq_intr_info_error(dev, context_id0, context_id1);
sq_int_errtype = REG_GET_FIELD(context_id0,
SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 830396b1c3b1..4ceb251312a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -27,6 +27,7 @@
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
+#include "amdgpu_ras.h"
/*
* GFX9 SQ Interrupts
@@ -143,8 +144,12 @@ enum SQ_INTERRUPT_ERROR_TYPE {
static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
uint16_t pasid, uint16_t client_id)
{
- int old_poison, ret = -EINVAL;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ enum amdgpu_ras_block block = 0;
+ uint32_t reset = 0;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int old_poison, ret;
if (!p)
return;
@@ -161,34 +166,75 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SE2SH:
case SOC15_IH_CLIENTID_SE3SH:
case SOC15_IH_CLIENTID_UTCL2:
- ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
+ block = AMDGPU_RAS_BLOCK__GFX;
+ if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX);
+ break;
+ case SOC15_IH_CLIENTID_VMC:
+ case SOC15_IH_CLIENTID_VMC1:
+ block = AMDGPU_RAS_BLOCK__MMHUB;
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
break;
case SOC15_IH_CLIENTID_SDMA0:
case SOC15_IH_CLIENTID_SDMA1:
case SOC15_IH_CLIENTID_SDMA2:
case SOC15_IH_CLIENTID_SDMA3:
case SOC15_IH_CLIENTID_SDMA4:
+ block = AMDGPU_RAS_BLOCK__SDMA;
+ if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA);
break;
default:
- break;
+ dev_warn(dev->adev->dev,
+ "client %d does not support poison consumption\n", client_id);
+ return;
}
+ ret = amdgpu_ras_mark_ras_event(dev->adev, type);
+ if (ret)
+ return;
+
kfd_signal_poison_consumed_event(dev, pasid);
- /* resetting queue passes, do page retirement without gpu reset
- * resetting queue fails, fallback to gpu reset solution
- */
- if (!ret) {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, unmap queue flow succeeded: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
- } else {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, fall back to gpu reset flow: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
- }
+ event_id = amdgpu_ras_acquire_event_id(dev->adev, type);
+
+ RAS_EVENT_LOG(dev->adev, event_id,
+ "poison is consumed by client %d, kick off gpu reset flow\n", client_id);
+
+ amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
+ block, pasid, NULL, NULL, reset);
}
static bool context_id_expected(struct kfd_dev *dev)
@@ -268,11 +314,12 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev,
& ~pasid_mask) | pasid);
}
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
/* If there is no valid PASID, it's likely a bug */
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
@@ -333,28 +380,82 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- pr_debug(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- pr_debug("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_int_data);
if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) {
if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -366,14 +467,37 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE);
- pr_warn("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
@@ -385,7 +509,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
}
kfd_signal_event_interrupt(pasid, sq_int_data, 24);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
@@ -412,8 +537,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
struct kfd_hsa_memory_exception_data exception_data;
- if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
- amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+ if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) {
event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index dd3c43c1ad70..783c2f5a04e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -46,7 +46,7 @@
#include <linux/kfifo.h>
#include "kfd_priv.h"
-#define KFD_IH_NUM_ENTRIES 8192
+#define KFD_IH_NUM_ENTRIES 16384
static void interrupt_wq(struct work_struct *);
@@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node)
return r;
}
- node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
- if (unlikely(!node->ih_wq)) {
- kfifo_free(&node->ih_fifo);
- dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
- return -ENOMEM;
+ if (!node->kfd->ih_wq) {
+ node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND,
+ node->kfd->num_nodes);
+ if (unlikely(!node->kfd->ih_wq)) {
+ kfifo_free(&node->ih_fifo);
+ dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
}
spin_lock_init(&node->interrupt_lock);
@@ -96,14 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node)
spin_lock_irqsave(&node->interrupt_lock, flags);
node->interrupts_active = false;
spin_unlock_irqrestore(&node->interrupt_lock, flags);
-
- /*
- * flush_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_workqueue(node->ih_wq);
-
kfifo_free(&node->ih_fifo);
}
@@ -112,55 +107,48 @@ void kfd_interrupt_exit(struct kfd_node *node)
*/
bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
{
- int count;
-
- count = kfifo_in(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
- if (count != node->kfd->device_info.ih_ring_entry_size) {
- dev_dbg_ratelimited(node->adev->dev,
- "Interrupt ring overflow, dropping interrupt %d\n",
- count);
+ if (kfifo_is_full(&node->ih_fifo)) {
+ dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n",
+ node->node_id);
return false;
}
+ kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
return true;
}
/*
* Assumption: single reader/writer. This function is not re-entrant
*/
-static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
+static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
{
int count;
- count = kfifo_out(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
-
- WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
+ if (kfifo_is_empty(&node->ih_fifo))
+ return false;
+ count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
+ node->kfd->device_info.ih_ring_entry_size);
+ WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
return count == node->kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
{
- struct kfd_node *dev = container_of(work, struct kfd_node,
- interrupt_work);
- uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+ struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
+ uint32_t *ih_ring_entry;
unsigned long start_jiffies = jiffies;
- if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(dev->adev->dev, "Ring entry too small\n");
- return;
- }
-
- while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+ while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
+ kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
+
if (time_is_before_jiffies(start_jiffies + HZ)) {
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings
*/
- queue_work(dev->ih_wq, &dev->interrupt_work);
+ queue_work(dev->kfd->ih_wq, &dev->interrupt_work);
break;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 1bea629c49ca..fb3129883a4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -32,6 +32,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers.h"
#include "kfd_pm4_opcodes.h"
+#include "amdgpu_reset.h"
#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
@@ -45,11 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
int retval;
union PM4_MES_TYPE_3_HEADER nop;
- if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
- return false;
-
- pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
- queue_size);
+ pr_debug("Initializing queue type %d size %d\n", type, queue_size);
memset(&prop, 0, sizeof(prop));
memset(&nop, 0, sizeof(nop));
@@ -68,7 +65,8 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
break;
default:
- pr_err("Invalid queue type %d\n", type);
+ WARN(1, "Invalid queue type %d\n", type);
+ dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
return false;
}
@@ -78,13 +76,14 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);
if (!prop.doorbell_ptr) {
- pr_err("Failed to initialize doorbell");
+ dev_err(dev->adev->dev, "Failed to initialize doorbell");
goto err_get_kernel_doorbell;
}
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0) {
- pr_err("Failed to init pq queues size %d\n", queue_size);
+ dev_err(dev->adev->dev, "Failed to init pq queues size %d\n",
+ queue_size);
goto err_pq_allocate_vidmem;
}
@@ -123,7 +122,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
memset(kq->pq_kernel_addr, 0, queue_size);
memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel));
- memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel));
+ memset(kq->wptr_kernel, 0, dev->kfd->device_info.doorbell_size);
prop.queue_size = queue_size;
prop.is_interop = false;
@@ -196,15 +195,17 @@ err_get_kernel_doorbell:
}
/* Uninitialize a kernel queue and free all its memory usages. */
-static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
+static void kq_uninitialize(struct kernel_queue *kq)
{
- if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging)
+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && down_read_trylock(&kq->dev->adev->reset_domain->sem)) {
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
kq->queue->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
kq->queue->pipe,
kq->queue->queue);
+ up_read(&kq->dev->adev->reset_domain->sem);
+ }
else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
@@ -286,7 +287,7 @@ err_no_space:
return -ENOMEM;
}
-void kq_submit_packet(struct kernel_queue *kq)
+int kq_submit_packet(struct kernel_queue *kq)
{
#ifdef DEBUG
int i;
@@ -298,15 +299,26 @@ void kq_submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
+ /* Fatal err detected, packet submission won't go through */
+ if (amdgpu_amdkfd_is_fed(kq->dev->adev))
+ return -EIO;
+
+ /* Make sure ring buffer is updated before wptr updated */
+ mb();
+
if (kq->dev->kfd->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
} else {
*kq->wptr_kernel = kq->pending_wptr;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
kq->pending_wptr);
}
+
+ return 0;
}
void kq_rollback_packet(struct kernel_queue *kq)
@@ -332,15 +344,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
return kq;
- pr_err("Failed to init kernel queue\n");
+ dev_err(dev->adev->dev, "Failed to init kernel queue\n");
kfree(kq);
return NULL;
}
-void kernel_queue_uninit(struct kernel_queue *kq, bool hanging)
+void kernel_queue_uninit(struct kernel_queue *kq)
{
- kq_uninitialize(kq, hanging);
+ kq_uninitialize(kq);
kfree(kq);
}
@@ -351,26 +363,26 @@ static __attribute__((unused)) void test_kq(struct kfd_node *dev)
uint32_t *buffer, i;
int retval;
- pr_err("Starting kernel queue test\n");
+ dev_err(dev->adev->dev, "Starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
if (unlikely(!kq)) {
- pr_err(" Failed to initialize HIQ\n");
- pr_err("Kernel queue test failed\n");
+ dev_err(dev->adev->dev, " Failed to initialize HIQ\n");
+ dev_err(dev->adev->dev, "Kernel queue test failed\n");
return;
}
retval = kq_acquire_packet_buffer(kq, 5, &buffer);
if (unlikely(retval != 0)) {
- pr_err(" Failed to acquire packet buffer\n");
- pr_err("Kernel queue test failed\n");
+ dev_err(dev->adev->dev, " Failed to acquire packet buffer\n");
+ dev_err(dev->adev->dev, "Kernel queue test failed\n");
return;
}
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
kq_submit_packet(kq);
- pr_err("Ending kernel queue test\n");
+ dev_err(dev->adev->dev, "Ending kernel queue test\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 9a6244430845..e24ee50acdf0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -47,7 +47,7 @@
int kq_acquire_packet_buffer(struct kernel_queue *kq,
size_t packet_size_in_dwords,
unsigned int **buffer_ptr);
-void kq_submit_packet(struct kernel_queue *kq);
+int kq_submit_packet(struct kernel_queue *kq);
void kq_rollback_packet(struct kernel_queue *kq);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 7d82c7da223a..59a5a3fea65d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -39,22 +39,22 @@
#endif
#define dev_fmt(fmt) "kfd_migrate: " fmt
-static uint64_t
-svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+static u64
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, u64 addr)
{
return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
}
static int
-svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
- dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages,
+ dma_addr_t *addr, u64 *gart_addr, u64 flags)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_job *job;
unsigned int num_dw, num_bytes;
struct dma_fence *fence;
- uint64_t src_addr, dst_addr;
- uint64_t pte_flags;
+ u64 src_addr, dst_addr;
+ u64 pte_flags;
void *cpu_addr;
int r;
@@ -68,7 +68,8 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED,
- &job);
+ &job,
+ AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP);
if (r)
return r;
@@ -77,7 +78,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -122,15 +123,15 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
static int
svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
- uint64_t *vram, uint64_t npages,
+ u64 *vram, u64 npages,
enum MIGRATION_COPY_DIR direction,
struct dma_fence **mfence)
{
- const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+ const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- uint64_t gart_s, gart_d;
+ u64 gart_s, gart_d;
struct dma_fence *next;
- uint64_t size;
+ u64 size;
int r;
mutex_lock(&adev->mman.gtt_window_lock);
@@ -153,7 +154,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
- NULL, &next, false, true, false);
+ NULL, &next, false, true, 0);
if (r) {
dev_err(adev->dev, "fail %d to copy memory\n", r);
goto out_unlock;
@@ -262,64 +263,54 @@ static void svm_migrate_put_sys_page(unsigned long addr)
static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
{
- unsigned long cpages = 0;
+ unsigned long mpages = 0;
unsigned long i;
for (i = 0; i < migrate->npages; i++) {
- if (migrate->src[i] & MIGRATE_PFN_VALID &&
+ if (migrate->dst[i] & MIGRATE_PFN_VALID &&
migrate->src[i] & MIGRATE_PFN_MIGRATE)
- cpages++;
- }
- return cpages;
-}
-
-static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
-{
- unsigned long upages = 0;
- unsigned long i;
-
- for (i = 0; i < migrate->npages; i++) {
- if (migrate->src[i] & MIGRATE_PFN_VALID &&
- !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
- upages++;
+ mpages++;
}
- return upages;
+ return mpages;
}
static int
svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch, uint64_t ttm_res_offset)
+ dma_addr_t *scratch, u64 ttm_res_offset)
{
- uint64_t npages = migrate->cpages;
+ u64 npages = migrate->npages;
struct amdgpu_device *adev = node->adev;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
+ u64 mpages = 0;
dma_addr_t *src;
- uint64_t *dst;
- uint64_t i, j;
+ u64 *dst;
+ u64 i, j;
int r;
pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
prange->last, ttm_res_offset);
src = scratch;
- dst = (uint64_t *)(scratch + npages);
+ dst = (u64 *)(scratch + npages);
amdgpu_res_first(prange->ttm_res, ttm_res_offset,
npages << PAGE_SHIFT, &cursor);
- for (i = j = 0; i < npages; i++) {
+ for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) {
struct page *spage;
- dst[i] = cursor.start + (j << PAGE_SHIFT);
- migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
- svm_migrate_get_vram_page(prange, migrate->dst[i]);
- migrate->dst[i] = migrate_pfn(migrate->dst[i]);
-
+ if (migrate->src[i] & MIGRATE_PFN_MIGRATE) {
+ dst[i] = cursor.start + (j << PAGE_SHIFT);
+ migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+ svm_migrate_get_vram_page(prange, migrate->dst[i]);
+ migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+ mpages++;
+ }
spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) {
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
- DMA_TO_DEVICE);
+ DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, src[i]);
if (r) {
dev_err(dev, "%s: fail %d dma_map_page\n",
@@ -366,9 +357,12 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
out_free_vram_pages:
if (r) {
pr_debug("failed %d to copy memory to vram\n", r);
- while (i--) {
+ for (i = 0; i < npages && mpages; i++) {
+ if (!dst[i])
+ continue;
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
+ mpages--;
}
}
@@ -392,16 +386,17 @@ out_free_vram_pages:
static long
svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start,
- uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
+ struct vm_area_struct *vma, u64 start,
+ u64 end, uint32_t trigger, u64 ttm_res_offset)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
- uint64_t npages = (end - start) >> PAGE_SHIFT;
+ u64 npages = (end - start) >> PAGE_SHIFT;
struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate = { 0 };
unsigned long cpages = 0;
+ unsigned long mpages = 0;
dma_addr_t *scratch;
void *buf;
int r = -ENOMEM;
@@ -414,7 +409,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
buf = kvcalloc(npages,
- 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
GFP_KERNEL);
if (!buf)
goto out;
@@ -442,35 +437,35 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
goto out_free;
}
if (cpages != npages)
- pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+ pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
cpages, npages);
else
- pr_debug("0x%lx pages migrated\n", cpages);
+ pr_debug("0x%lx pages collected\n", cpages);
r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
migrate_vma_pages(&migrate);
- pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
- svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
-
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- 0, node->id, trigger);
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages, cpages, migrate.npages);
- svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+ svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, node->id, trigger, r);
out:
- if (!r && cpages) {
+ if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
- WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+ WRITE_ONCE(pdd->page_in, pdd->page_in + mpages);
- return cpages;
+ return mpages;
}
return r;
}
@@ -479,6 +474,8 @@ out:
* svm_migrate_ram_to_vram - migrate svm range from system to device
* @prange: range structure
* @best_loc: the device to migrate to
+ * @start_mgr: start page to migrate
+ * @last_mgr: last page to migrate
* @mm: the process mm structure
* @trigger: reason of migration
*
@@ -489,19 +486,20 @@ out:
*/
static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ unsigned long start_mgr, unsigned long last_mgr,
struct mm_struct *mm, uint32_t trigger)
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
- uint64_t ttm_res_offset;
+ u64 ttm_res_offset;
struct kfd_node *node;
- unsigned long cpages = 0;
+ unsigned long mpages = 0;
long r = 0;
- if (prange->actual_loc == best_loc) {
- pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
- prange->svms, prange->start, prange->last, best_loc);
- return 0;
+ if (start_mgr < prange->start || last_mgr > prange->last) {
+ pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ start_mgr, last_mgr, prange->start, prange->last);
+ return -EFAULT;
}
node = svm_range_get_node_by_id(prange, best_loc);
@@ -510,18 +508,28 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
return -ENODEV;
}
- pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
- prange->start, prange->last, best_loc);
+ pr_debug("svms 0x%p [0x%lx 0x%lx] in [0x%lx 0x%lx] to gpu 0x%x\n",
+ prange->svms, start_mgr, last_mgr, prange->start, prange->last,
+ best_loc);
- start = prange->start << PAGE_SHIFT;
- end = (prange->last + 1) << PAGE_SHIFT;
+ start = start_mgr << PAGE_SHIFT;
+ end = (last_mgr + 1) << PAGE_SHIFT;
+
+ r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
+ if (r) {
+ dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
+ return -ENOSPC;
+ }
r = svm_range_vram_node_new(node, prange, true);
if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
- return r;
+ goto out;
}
- ttm_res_offset = prange->offset << PAGE_SHIFT;
+ ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
for (addr = start; addr < end;) {
unsigned long next;
@@ -536,19 +544,27 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("failed %ld to migrate\n", r);
break;
} else {
- cpages += r;
+ mpages += r;
}
ttm_res_offset += next - addr;
addr = next;
}
- if (cpages) {
+ if (mpages) {
prange->actual_loc = best_loc;
- svm_range_free_dma_mappings(prange, true);
- } else {
+ prange->vram_pages += mpages;
+ } else if (!prange->actual_loc) {
+ /* if no page migrated and all pages from prange are at
+ * sys ram drop svm_bo got from svm_range_vram_node_new
+ */
svm_range_vram_node_free(prange);
}
+out:
+ amdgpu_amdkfd_unreserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
return r < 0 ? r : 0;
}
@@ -565,22 +581,22 @@ static void svm_migrate_page_free(struct page *page)
static int
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch, uint64_t npages)
+ dma_addr_t *scratch, u64 npages)
{
struct device *dev = adev->dev;
- uint64_t *src;
+ u64 *src;
dma_addr_t *dst;
struct page *dpage;
- uint64_t i = 0, j;
- uint64_t addr;
+ u64 i = 0, j;
+ u64 addr;
int r = 0;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
prange->last);
- addr = prange->start << PAGE_SHIFT;
+ addr = migrate->start;
- src = (uint64_t *)(scratch + npages);
+ src = (u64 *)(scratch + npages);
dst = scratch;
for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
@@ -620,7 +636,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_oom;
}
- dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, dst[i]);
if (r) {
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
@@ -663,19 +679,18 @@ out_oom:
* Context: Process context, caller hold mmap read lock, prange->migrate_mutex
*
* Return:
- * 0 - success with all pages migrated
* negative values - indicate error
- * positive values - partial migration, number of pages not migrated
+ * positive values or zero - number of pages got migrated
*/
static long
svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
- struct vm_area_struct *vma, uint64_t start, uint64_t end,
+ struct vm_area_struct *vma, u64 start, u64 end,
uint32_t trigger, struct page *fault_page)
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
- uint64_t npages = (end - start) >> PAGE_SHIFT;
- unsigned long upages = npages;
+ u64 npages = (end - start) >> PAGE_SHIFT;
unsigned long cpages = 0;
+ unsigned long mpages = 0;
struct amdgpu_device *adev = node->adev;
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
@@ -695,7 +710,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
buf = kvcalloc(npages,
- 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+ 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t),
GFP_KERNEL);
if (!buf)
goto out;
@@ -721,47 +736,48 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
if (!cpages) {
pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
prange->start, prange->last);
- upages = svm_migrate_unsuccessful_pages(&migrate);
goto out_free;
}
if (cpages != npages)
- pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+ pr_debug("partial migration, 0x%lx/0x%llx pages collected\n",
cpages, npages);
else
- pr_debug("0x%lx pages migrated\n", cpages);
+ pr_debug("0x%lx pages collected\n", cpages);
r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
scratch, npages);
migrate_vma_pages(&migrate);
- upages = svm_migrate_unsuccessful_pages(&migrate);
- pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
- upages, cpages, migrate.npages);
+ mpages = svm_migrate_successful_pages(&migrate);
+ pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+ mpages, cpages, migrate.npages);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- node->id, 0, trigger);
-
- svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+ svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ node->id, 0, trigger, r);
out:
- if (!r && cpages) {
+ if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
if (pdd)
- WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
+ WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
}
- return r ? r : upages;
+
+ return r ? r : mpages;
}
/**
* svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure
* @mm: process mm, use current->mm if NULL
+ * @start_mgr: start page need be migrated to sys ram
+ * @last_mgr: last page need be migrated to sys ram
* @trigger: reason of migration
* @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
*
@@ -771,6 +787,7 @@ out:
* 0 - OK, otherwise error code
*/
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ unsigned long start_mgr, unsigned long last_mgr,
uint32_t trigger, struct page *fault_page)
{
struct kfd_node *node;
@@ -778,26 +795,33 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
unsigned long addr;
unsigned long start;
unsigned long end;
- unsigned long upages = 0;
+ unsigned long mpages = 0;
long r = 0;
+ /* this pragne has no any vram page to migrate to sys ram */
if (!prange->actual_loc) {
pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
prange->start, prange->last);
return 0;
}
+ if (start_mgr < prange->start || last_mgr > prange->last) {
+ pr_debug("range [0x%lx 0x%lx] out prange [0x%lx 0x%lx]\n",
+ start_mgr, last_mgr, prange->start, prange->last);
+ return -EFAULT;
+ }
+
node = svm_range_get_node_by_id(prange, prange->actual_loc);
if (!node) {
pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
return -ENODEV;
}
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
- prange->svms, prange, prange->start, prange->last,
+ prange->svms, prange, start_mgr, last_mgr,
prange->actual_loc);
- start = prange->start << PAGE_SHIFT;
- end = (prange->last + 1) << PAGE_SHIFT;
+ start = start_mgr << PAGE_SHIFT;
+ end = (last_mgr + 1) << PAGE_SHIFT;
for (addr = start; addr < end;) {
unsigned long next;
@@ -816,14 +840,24 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
pr_debug("failed %ld to migrate prange %p\n", r, prange);
break;
} else {
- upages += r;
+ mpages += r;
}
addr = next;
}
- if (r >= 0 && !upages) {
- svm_range_vram_node_free(prange);
- prange->actual_loc = 0;
+ if (r >= 0) {
+ WARN_ONCE(prange->vram_pages < mpages,
+ "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).",
+ prange->vram_pages, mpages);
+ prange->vram_pages -= mpages;
+
+ /* prange does not have vram page set its actual_loc to system
+ * and drop its svm_bo ref
+ */
+ if (prange->vram_pages == 0 && prange->ttm_res) {
+ prange->actual_loc = 0;
+ svm_range_vram_node_free(prange);
+ }
}
return r < 0 ? r : 0;
@@ -833,17 +867,23 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
* svm_migrate_vram_to_vram - migrate svm range from device to device
* @prange: range structure
* @best_loc: the device to migrate to
+ * @start: start page need be migrated to sys ram
+ * @last: last page need be migrated to sys ram
* @mm: process mm, use current->mm if NULL
* @trigger: reason of migration
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
+ * migrate all vram pages in prange to sys ram, then migrate
+ * [start, last] pages from sys ram to gpu node best_loc.
+ *
* Return:
* 0 - OK, otherwise error code
*/
static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
- struct mm_struct *mm, uint32_t trigger)
+ unsigned long start, unsigned long last,
+ struct mm_struct *mm, uint32_t trigger)
{
int r, retries = 3;
@@ -855,7 +895,8 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do {
- r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
+ r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
+ trigger, NULL);
if (r)
return r;
} while (prange->actual_loc && --retries);
@@ -863,17 +904,21 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (prange->actual_loc)
return -EDEADLK;
- return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+ return svm_migrate_ram_to_vram(prange, best_loc, start, last, mm, trigger);
}
int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+ unsigned long start, unsigned long last,
struct mm_struct *mm, uint32_t trigger)
{
- if (!prange->actual_loc)
- return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+ if (!prange->actual_loc || prange->actual_loc == best_loc)
+ return svm_migrate_ram_to_vram(prange, best_loc, start, last,
+ mm, trigger);
+
else
- return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
+ return svm_migrate_vram_to_vram(prange, best_loc, start, last,
+ mm, trigger);
}
@@ -889,10 +934,9 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
*/
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{
+ unsigned long start, last, size;
unsigned long addr = vmf->address;
struct svm_range_bo *svm_bo;
- enum svm_work_list_ops op;
- struct svm_range *parent;
struct svm_range *prange;
struct kfd_process *p;
struct mm_struct *mm;
@@ -929,51 +973,31 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
mutex_lock(&p->svms.lock);
- prange = svm_range_from_addr(&p->svms, addr, &parent);
+ prange = svm_range_from_addr(&p->svms, addr, NULL);
if (!prange) {
pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
r = -EFAULT;
goto out_unlock_svms;
}
- mutex_lock(&parent->migrate_mutex);
- if (prange != parent)
- mutex_lock_nested(&prange->migrate_mutex, 1);
+ mutex_lock(&prange->migrate_mutex);
if (!prange->actual_loc)
goto out_unlock_prange;
- svm_range_lock(parent);
- if (prange != parent)
- mutex_lock_nested(&prange->lock, 1);
- r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
- if (prange != parent)
- mutex_unlock(&prange->lock);
- svm_range_unlock(parent);
- if (r) {
- pr_debug("failed %d to split range by granularity\n", r);
- goto out_unlock_prange;
- }
+ /* Align migration range start and size to granularity size */
+ size = 1UL << prange->granularity;
+ start = max(ALIGN_DOWN(addr, size), prange->start);
+ last = min(ALIGN(addr + 1, size) - 1, prange->last);
- r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
- KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
- vmf->page);
+ r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, start, last,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, vmf->page);
if (r)
pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
- r, prange->svms, prange, prange->start, prange->last);
-
- /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
- if (p->xnack_enabled && parent == prange)
- op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
- else
- op = SVM_OP_UPDATE_RANGE_NOTIFIER;
- svm_range_add_list_work(&p->svms, parent, mm, op);
- schedule_deferred_list_work(&p->svms);
+ r, prange->svms, prange, start, last);
out_unlock_prange:
- if (prange != parent)
- mutex_unlock(&prange->migrate_mutex);
- mutex_unlock(&parent->migrate_mutex);
+ mutex_unlock(&prange->migrate_mutex);
out_unlock_svms:
mutex_unlock(&p->svms.lock);
out_unref_process:
@@ -1001,10 +1025,10 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
void *r;
/* Page migration works on gfx9 or newer */
- if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
return -EINVAL;
- if (adev->gmc.is_app_apu)
+ if (adev->apu_prefer_gtt)
return 0;
pgmap = &kfddev->pgmap;
@@ -1021,7 +1045,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
} else {
res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
if (IS_ERR(res))
- return -ENOMEM;
+ return PTR_ERR(res);
pgmap->range.start = res->start;
pgmap->range.end = res->end;
pgmap->type = MEMORY_DEVICE_PRIVATE;
@@ -1037,10 +1061,10 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
pr_err("failed to register HMM device memory\n");
- /* Disable SVM support capability */
- pgmap->type = 0;
if (pgmap->type == MEMORY_DEVICE_PRIVATE)
devm_release_mem_region(adev->dev, res->start, resource_size(res));
+ /* Disable SVM support capability */
+ pgmap->type = 0;
return PTR_ERR(r);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 487f26368164..2eebf67f9c2c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+ unsigned long start, unsigned long last,
struct mm_struct *mm, uint32_t trigger);
+
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+ unsigned long start, unsigned long last,
uint32_t trigger, struct page *fault_page);
+
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index aee2212e52f6..33aa23450b3f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -78,8 +78,8 @@ err_ioctl:
static void kfd_exit(void)
{
kfd_cleanup_processes();
- kfd_debugfs_fini();
kfd_process_destroy_wq();
+ kfd_debugfs_fini();
kfd_procfs_shutdown();
kfd_topology_shutdown();
kfd_chardev_exit();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 447829c22295..d9ae854b6908 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -99,7 +99,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask, uint32_t inst)
{
- struct kfd_cu_info cu_info;
+ struct amdgpu_cu_info *cu_info = &mm->dev->adev->gfx.cu_info;
+ struct amdgpu_gfx_config *gfx_info = &mm->dev->adev->gfx.config;
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
@@ -108,9 +109,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
- amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
-
- cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
+ cu_active_per_node = cu_info->number / mm->dev->kfd->num_nodes;
if (cu_mask_count > cu_active_per_node)
cu_mask_count = cu_active_per_node;
@@ -118,18 +117,21 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* Returning with no CU's enabled will hang the queue, which should be
* attention grabbing.
*/
- if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
- pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
+ if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
+ dev_err(mm->dev->adev->dev,
+ "Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
+ gfx_info->max_shader_engines);
return;
}
- if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
- pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
- cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
+ if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
+ dev_err(mm->dev->adev->dev,
+ "Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+ gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
return;
}
cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) &&
- KFD_GC_VERSION(mm->dev) < IP_VERSION(12, 0, 0)) ? 2 : 1;
+ KFD_GC_VERSION(mm->dev) < IP_VERSION(13, 0, 0)) ? 2 : 1;
/* Count active CUs per SH.
*
@@ -142,10 +144,10 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
* See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
*/
- for (se = 0; se < cu_info.num_shader_engines; se++)
- for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
+ for (se = 0; se < gfx_info->max_shader_engines; se++)
+ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++)
cu_per_sh[se][sh] = hweight32(
- cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
+ cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) *
cu_bitmap_sh_mul]);
/* Symmetrically map cu_mask to all SEs & SHs:
@@ -184,13 +186,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
*
* First ensure all CUs are disabled, then enable user specified CUs.
*/
- for (i = 0; i < cu_info.num_shader_engines; i++)
+ for (i = 0; i < gfx_info->max_shader_engines; i++)
se_mask[i] = 0;
i = inst;
for (cu = 0; cu < 16; cu += cu_inc) {
- for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
- for (se = 0; se < cu_info.num_shader_engines; se++) {
+ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) {
+ for (se = 0; se < gfx_info->max_shader_engines; se++) {
if (cu_per_sh[se][sh] > cu) {
if (cu_mask[i / 32] & (en_mask << (i % 32)))
se_mask[se] |= en_mask << (cu + sh * 16);
@@ -223,7 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem);
kfree(mqd_mem_obj);
} else {
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
@@ -290,3 +292,21 @@ uint64_t kfd_mqd_stride(struct mqd_manager *mm,
{
return mm->mqd_size;
}
+
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+ uint32_t inst)
+{
+ if (doorbell_id) {
+ struct device *dev = node->adev->dev;
+
+ if (node->adev->xcp_mgr && node->adev->xcp_mgr->num_xcps > 0)
+ dev_err(dev, "XCC %d: Queue preemption failed for queue with doorbell_id: %x\n",
+ inst, doorbell_id);
+ else
+ dev_err(dev, "Queue preemption failed for queue with doorbell_id: %x\n",
+ doorbell_id);
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 57bf5e513f4d..17cc1f25c8d0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -119,7 +119,7 @@ struct mqd_manager {
#if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
- uint32_t (*read_doorbell_id)(void *mqd);
+ bool (*check_preemption_failed)(struct mqd_manager *mm, void *mqd);
uint64_t (*mqd_stride)(struct mqd_manager *mm,
struct queue_properties *p);
@@ -128,6 +128,31 @@ struct mqd_manager {
uint32_t mqd_size;
};
+struct mqd_user_context_save_area_header {
+ /* Byte offset from start of user context
+ * save area to the last saved top (lowest
+ * address) of control stack data. Must be
+ * 4 byte aligned.
+ */
+ uint32_t control_stack_offset;
+
+ /* Byte size of the last saved control stack
+ * data. Must be 4 byte aligned.
+ */
+ uint32_t control_stack_size;
+
+ /* Byte offset from start of user context save
+ * area to the last saved base (lowest address)
+ * of wave state data. Must be 4 byte aligned.
+ */
+ uint32_t wave_state_offset;
+
+ /* Byte size of the last saved wave state data.
+ * Must be 4 byte aligned.
+ */
+ uint32_t wave_state_size;
+};
+
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev,
struct queue_properties *q);
@@ -173,4 +198,6 @@ void kfd_get_hiq_xcc_mqd(struct kfd_node *dev,
uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev);
uint64_t kfd_mqd_stride(struct mqd_manager *mm,
struct queue_properties *q);
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+ uint32_t inst);
#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 1a4a69943c71..05f3ac2eaef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -206,11 +206,11 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct cik_mqd *m = (struct cik_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
@@ -423,7 +423,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 8b7fed913526..1695dd78ede8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -167,9 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -223,11 +226,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v10_compute_mqd *m = (struct v10_compute_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
@@ -487,7 +490,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 15277f1d5cf0..3c0ae28c5923 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -55,8 +55,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
if (has_wa_flag) {
- uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ?
- 0xffff : 0xffffffff;
+ uint32_t wa_mask =
+ (minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff;
m->compute_static_thread_mgmt_se0 = wa_mask;
m->compute_static_thread_mgmt_se1 = wa_mask;
@@ -154,6 +154,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -221,7 +223,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
@@ -277,11 +279,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
@@ -516,7 +518,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
new file mode 100644
index 000000000000..565858b9044d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v12_structs.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v12_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v12_compute_mqd *)mqd;
+}
+
+static inline struct v12_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v12_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+ struct mqd_update_info *minfo)
+{
+ struct v12_compute_mqd *m;
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+
+ if (!minfo || !minfo->cu_mask.ptr)
+ return;
+
+ mqd_symmetrically_map_cu_mask(mm,
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+ m = get_mqd(mqd);
+ m->compute_static_thread_mgmt_se0 = se_mask[0];
+ m->compute_static_thread_mgmt_se1 = se_mask[1];
+ m->compute_static_thread_mgmt_se2 = se_mask[2];
+ m->compute_static_thread_mgmt_se3 = se_mask[3];
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+ m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
+}
+
+static void set_priority(struct v12_compute_mqd *m, struct queue_properties *q)
+{
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+ m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj;
+
+ /*
+ * Allocate one PAGE_SIZE memory for MQD as MES writes to areas beyond
+ * struct MQD size.
+ */
+ if (kfd_gtt_sa_allocate(node, PAGE_SIZE, &mqd_mem_obj))
+ return NULL;
+
+ return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct v12_compute_mqd *m;
+
+ m = (struct v12_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memset(m, 0, PAGE_SIZE);
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ /* Set cp_hqd_hq_status0.c_queue_debug_en to 1 to have the CP set up the
+ * DISPATCH_PTR. This is required for the kfd debugger
+ */
+ m->cp_hqd_hq_status0 = 1 << 14;
+
+ if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev))
+ m->cp_hqd_hq_status0 |= 1 << 29;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (mm->dev->kfd->cwsr_enabled) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ int r = 0;
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms, 0);
+ return r;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v12_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
+ m->cp_hqd_pq_control |=
+ ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ /* GC 10 removed WPP_CLAMP from PQ Control */
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT;
+ m->cp_hqd_pq_doorbell_control |=
+ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->kfd->cwsr_enabled)
+ m->cp_hqd_ctx_save_control = 0;
+
+ update_cu_mask(mm, mqd, minfo);
+ set_priority(m, q);
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
+{
+ struct v12_compute_mqd *m = (struct v12_compute_mqd *)mqd;
+
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct v12_compute_mqd *m;
+ struct mqd_user_context_save_area_header header;
+
+ m = get_mqd(mqd);
+
+ /* Control stack is written backwards, while workgroup context data
+ * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+ * Current position is at m->cp_hqd_cntl_stack_offset and
+ * m->cp_hqd_wg_state_offset, respectively.
+ */
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+ /* Control stack is not copied to user mode for GFXv12 because
+ * it's part of the context save area that is already
+ * accessible to user mode
+ */
+ header.control_stack_size = *ctl_stack_used_size;
+ header.wave_state_size = *save_area_used_size;
+
+ header.wave_state_offset = m->cp_hqd_wg_state_offset;
+ header.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack, &header, sizeof(header)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v12_compute_mqd *m;
+
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v12_sdma_mqd *m;
+
+ m = (struct v12_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+ memset(m, 0, sizeof(struct v12_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = mqd_mem_obj->gpu_addr;
+
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v12_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+ << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
+ << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
+ & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v12_compute_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v12_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v12(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+ mqd->get_wave_state = get_wave_state;
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ mqd->check_preemption_failed = check_preemption_failed;
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = kfd_load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct v12_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 42d881809dc7..f2dee320fada 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -77,7 +77,9 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
- if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
@@ -181,6 +183,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -243,7 +248,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
@@ -299,19 +304,33 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0;
- if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
+ if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) {
+ if (minfo->update_flag & UPDATE_FLAG_IS_GWS)
+ m->compute_resource_limits |=
+ COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+ else
+ m->compute_resource_limits &=
+ ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK;
+ }
+
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v9_mqd *m = (struct v9_mqd *)mqd;
+ uint32_t doorbell_id = m->queue_doorbell_id0;
+
+ m->queue_doorbell_id0 = 0;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
@@ -354,7 +373,7 @@ static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stac
{
struct v9_mqd *m = get_mqd(mqd);
- *ctl_stack_size = m->cp_hqd_cntl_stack_size;
+ *ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask);
}
static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
@@ -369,6 +388,24 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
}
+static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm,
+ void *mqd,
+ void *mqd_dst,
+ void *ctl_stack_dst)
+{
+ struct v9_mqd *m;
+ int xcc;
+ uint64_t size = get_mqd(mqd)->cp_mqd_stride_size;
+
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+ m = get_mqd(mqd + size * xcc);
+
+ checkpoint_mqd(mm, m,
+ (uint8_t *)mqd_dst + sizeof(*m) * xcc,
+ (uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc);
+ }
+}
+
static void restore_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *qp,
@@ -476,6 +513,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+ /* Allow context switch so we don't cross-process starve with a massive
+ * command buffer of long-running SDMA commands
+ */
+ m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
q->is_active = QUEUE_IS_ACTIVE(*q);
}
@@ -535,6 +576,9 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
if (xcc == 0) {
/* Set no_update_rptr = 0 in Master XCC */
@@ -598,6 +642,25 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
return err;
}
+static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd)
+{
+ uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int inst = 0, xcc_id;
+ struct v9_mqd *m;
+ bool ret = false;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ m = get_mqd(mqd + hiq_mqd_size * inst);
+ ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev,
+ m->queue_doorbell_id0, inst);
+ m->queue_doorbell_id0 = 0;
+ ++inst;
+ }
+
+ return ret;
+}
+
static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
struct kfd_mem_obj *xcc_mqd_mem_obj,
uint64_t offset)
@@ -626,7 +689,9 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
-
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
m->cp_mqd_stride_size = offset;
/*
@@ -686,7 +751,10 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
- update_cu_mask(mm, mqd, minfo, xcc);
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+ update_cu_mask(mm, m, minfo, xcc);
if (q->format == KFD_QUEUE_FORMAT_AQL) {
switch (xcc) {
@@ -708,6 +776,43 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
}
}
+static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, u32 ctl_stack_size)
+{
+ struct kfd_mem_obj xcc_mqd_mem_obj;
+ u32 mqd_ctl_stack_size;
+ struct v9_mqd *m;
+ u32 num_xcc;
+ int xcc;
+
+ uint64_t offset = mm->mqd_stride(mm, qp);
+
+ mm->dev->dqm->current_logical_xcc_start++;
+
+ num_xcc = NUM_XCC(mm->dev->xcc_mask);
+ mqd_ctl_stack_size = ctl_stack_size / num_xcc;
+
+ memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
+
+ /* Set the MQD pointer and gart address to XCC0 MQD */
+ *mqd = mqd_mem_obj->cpu_ptr;
+ if (gart_addr)
+ *gart_addr = mqd_mem_obj->gpu_addr;
+
+ for (xcc = 0; xcc < num_xcc; xcc++) {
+ get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc);
+ restore_mqd(mm, (void **)&m,
+ &xcc_mqd_mem_obj,
+ NULL,
+ qp,
+ (uint8_t *)mqd_src + xcc * sizeof(*m),
+ (uint8_t *)ctl_stack_src + xcc * mqd_ctl_stack_size,
+ mqd_ctl_stack_size);
+ }
+}
static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
@@ -841,25 +946,29 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->free_mqd = kfd_free_mqd_cp;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_checkpoint_info = get_checkpoint_info;
- mqd->checkpoint_mqd = checkpoint_mqd;
- mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v9_mqd);
mqd->mqd_stride = mqd_stride_v9;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_v9_4_3;
mqd->load_mqd = load_mqd_v9_4_3;
mqd->update_mqd = update_mqd_v9_4_3;
mqd->destroy_mqd = destroy_mqd_v9_4_3;
mqd->get_wave_state = get_wave_state_v9_4_3;
+ mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3;
+ mqd->restore_mqd = restore_mqd_v9_4_3;
} else {
mqd->init_mqd = init_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->get_wave_state = get_wave_state;
+ mqd->checkpoint_mqd = checkpoint_mqd;
+ mqd->restore_mqd = restore_mqd;
}
break;
case KFD_MQD_TYPE_HIQ:
@@ -872,15 +981,19 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) {
+ mqd->check_preemption_failed = check_preemption_failed;
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_hiq_v9_4_3;
mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
+ mqd->check_preemption_failed = check_preemption_failed_v9_4_3;
} else {
mqd->init_mqd = init_mqd_hiq;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->destroy_mqd = destroy_hiq_mqd;
+ mqd->check_preemption_failed = check_preemption_failed;
}
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 3e1a574d4ea6..c1fafc502515 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -237,11 +237,11 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct vi_mqd *m = (struct vi_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
@@ -482,7 +482,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 401096c103b2..b1a6eb349bb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -28,6 +28,11 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
+#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
+#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
+#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
+#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
+
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
{
@@ -40,12 +45,14 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
- bool *over_subscription)
+ int *over_subscription,
+ int xnack_conflict)
{
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
unsigned int map_queue_size;
unsigned int max_proc_per_quantum = 1;
- struct kfd_node *dev = pm->dqm->dev;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->active_queue_count;
@@ -57,17 +64,22 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- *over_subscription = false;
+ *over_subscription = 0;
- if (dev->max_proc_per_quantum > 1)
- max_proc_per_quantum = dev->max_proc_per_quantum;
+ if (node->max_proc_per_quantum > 1)
+ max_proc_per_quantum = node->max_proc_per_quantum;
- if ((process_count > max_proc_per_quantum) ||
- compute_queue_count > get_cp_queues_num(pm->dqm) ||
- gws_queue_count > 1) {
- *over_subscription = true;
- pr_debug("Over subscribed runlist\n");
- }
+ if (process_count > max_proc_per_quantum)
+ *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
+ if (compute_queue_count > get_cp_queues_num(pm->dqm))
+ *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
+ if (gws_queue_count > 1)
+ *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
+ if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
+
+ if (*over_subscription)
+ dev_dbg(dev, "Over subscribed runlist\n");
map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
@@ -81,29 +93,32 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
if (*over_subscription)
*rlib_size += pm->pmf->runlist_size;
- pr_debug("runlist ib size %d\n", *rlib_size);
+ dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
}
static int pm_allocate_runlist_ib(struct packet_manager *pm,
unsigned int **rl_buffer,
uint64_t *rl_gpu_buffer,
unsigned int *rl_buffer_size,
- bool *is_over_subscription)
+ int *is_over_subscription,
+ int xnack_conflict)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval;
if (WARN_ON(pm->allocated))
return -EINVAL;
- pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
+ xnack_conflict);
mutex_lock(&pm->lock);
- retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
- &pm->ib_buffer_obj);
+ retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
if (retval) {
- pr_err("Failed to allocate runlist IB\n");
+ dev_err(dev, "Failed to allocate runlist IB\n");
goto out;
}
@@ -125,32 +140,54 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
{
unsigned int alloc_size_bytes;
unsigned int *rl_buffer, rl_wptr, i;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval, processes_mapped;
struct device_process_node *cur;
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
- bool is_over_subscription;
+ int is_over_subscription;
+ int xnack_enabled = -1;
+ bool xnack_conflict = 0;
rl_wptr = retval = processes_mapped = 0;
+ /* Check if processes set different xnack modes */
+ list_for_each_entry(cur, queues, list) {
+ qpd = cur->qpd;
+ if (xnack_enabled < 0)
+ /* First process */
+ xnack_enabled = qpd->pqm->process->xnack_enabled;
+ else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
+ /* Found a process with a different xnack mode */
+ xnack_conflict = 1;
+ break;
+ }
+ }
+
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
- &alloc_size_bytes, &is_over_subscription);
+ &alloc_size_bytes, &is_over_subscription,
+ xnack_conflict);
if (retval)
return retval;
*rl_size_bytes = alloc_size_bytes;
pm->ib_size_bytes = alloc_size_bytes;
- pr_debug("Building runlist ib process count: %d queues count %d\n",
+ dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->active_queue_count);
+build_runlist_ib:
/* build the run list ib packet */
list_for_each_entry(cur, queues, list) {
qpd = cur->qpd;
+ /* group processes with the same xnack mode together */
+ if (qpd->pqm->process->xnack_enabled != xnack_enabled)
+ continue;
/* build map process packet */
if (processes_mapped >= pm->dqm->processes_count) {
- pr_debug("Not enough space left in runlist IB\n");
+ dev_dbg(dev, "Not enough space left in runlist IB\n");
pm_release_ib(pm);
return -ENOMEM;
}
@@ -167,7 +204,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!kq->queue->properties.is_active)
continue;
- pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -186,7 +224,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!q->properties.is_active)
continue;
- pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -202,18 +241,33 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
alloc_size_bytes);
}
}
+ if (xnack_conflict) {
+ /* pick up processes with the other xnack mode */
+ xnack_enabled = !xnack_enabled;
+ xnack_conflict = 0;
+ goto build_runlist_ib;
+ }
- pr_debug("Finished map process and queues to runlist\n");
+ dev_dbg(dev, "Finished map process and queues to runlist\n");
if (is_over_subscription) {
if (!pm->is_over_subscription)
- pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+ dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
+ is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
+ " too many processes" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
+ " too many queues" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
+ " multiple processes using cooperative launch" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
+ " xnack on/off processes mixed on gfx9" : "");
+
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
}
- pm->is_over_subscription = is_over_subscription;
+ pm->is_over_subscription = !!is_over_subscription;
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
pr_debug("0x%2X ", rl_buffer[i]);
@@ -239,7 +293,9 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
break;
default:
if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
- KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
pm->pmf = &kfd_aldebaran_pm_funcs;
else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
pm->pmf = &kfd_v9_pm_funcs;
@@ -262,16 +318,18 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
return 0;
}
-void pm_uninit(struct packet_manager *pm, bool hanging)
+void pm_uninit(struct packet_manager *pm)
{
mutex_destroy(&pm->lock);
- kernel_queue_uninit(pm->priv_queue, hanging);
+ kernel_queue_uninit(pm->priv_queue);
pm->priv_queue = NULL;
}
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -281,14 +339,14 @@ int pm_send_set_resources(struct packet_manager *pm,
size / sizeof(uint32_t),
(unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->set_resources(pm, buffer, res);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -325,7 +383,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_create_runlist;
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
@@ -343,6 +401,8 @@ fail_create_runlist_ib:
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint64_t fence_value)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -354,14 +414,14 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -370,12 +430,33 @@ out:
return retval;
}
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
+ * by writing to CP_IQ_WAIT_TIME2 registers.
+ *
+ * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
+ * @value: Depends on the cmd. This parameter is unused for
+ * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
+ * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
+ *
+ */
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval = 0;
uint32_t *buffer, size;
- size = pm->pmf->set_grace_period_size;
+ if (!pm->pmf->config_dequeue_wait_counts ||
+ !pm->pmf->config_dequeue_wait_counts_size)
+ return 0;
+
+ if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
+ return 0;
+
+ size = pm->pmf->config_dequeue_wait_counts_size;
mutex_lock(&pm->lock);
@@ -385,18 +466,24 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
(unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev,
+ "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
- retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
- if (!retval)
- kq_submit_packet(pm->priv_queue);
- else
+ retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
+ cmd, value);
+ if (!retval) {
+ retval = kq_submit_packet(pm->priv_queue);
+
+ /* If default value is modified, cache that in dqm->wait_times */
+ if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
+ update_dqm_wait_times(pm->dqm);
+ } else {
kq_rollback_packet(pm->priv_queue);
+ }
}
-
out:
mutex_unlock(&pm->lock);
return retval;
@@ -406,6 +493,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -414,14 +503,14 @@ int pm_send_unmap_queue(struct packet_manager *pm,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
if (!retval)
- kq_submit_packet(pm->priv_queue);
+ retval = kq_submit_packet(pm->priv_queue);
else
kq_rollback_packet(pm->priv_queue);
@@ -463,6 +552,8 @@ out:
int pm_debugfs_hang_hws(struct packet_manager *pm)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int r = 0;
@@ -474,16 +565,16 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
r = -ENOMEM;
goto out;
}
memset(buffer, 0x55, size);
kq_submit_packet(pm->priv_queue);
- pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
- buffer[0], buffer[1], buffer[2], buffer[3],
- buffer[4], buffer[5], buffer[6]);
+ dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+ buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
+ buffer[5], buffer[6]);
out:
mutex_unlock(&pm->lock);
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 1a03173e2313..505036968a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -37,14 +37,17 @@ static int pm_map_process_v9(struct packet_manager *pm,
struct kfd_node *kfd = pm->dqm->dev;
struct kfd_process_device *pdd =
container_of(qpd, struct kfd_process_device, qpd);
+ struct amdgpu_device *adev = kfd->adev;
packet = (struct pm4_mes_map_process *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
+ if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
@@ -89,17 +92,22 @@ static int pm_map_process_aldebaran(struct packet_manager *pm,
struct pm4_mes_map_process_aldebaran *packet;
uint64_t vm_page_table_base_addr = qpd->page_table_base;
struct kfd_dev *kfd = pm->dqm->dev->kfd;
+ struct kfd_node *knode = pm->dqm->dev;
struct kfd_process_device *pdd =
container_of(qpd, struct kfd_process_device, qpd);
int i;
+ struct amdgpu_device *adev = kfd->adev;
packet = (struct pm4_mes_map_process_aldebaran *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process_aldebaran));
+ if (adev->enforce_isolation[knode->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
@@ -144,18 +152,23 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
int concurrent_proc_cnt = 0;
struct kfd_node *kfd = pm->dqm->dev;
+ struct amdgpu_device *adev = kfd->adev;
/* Determine the number of processes to map together to HW:
* it can not exceed the number of VMIDs available to the
* scheduler, and it is determined by the smaller of the number
* of processes in the runlist and kfd module parameter
* hws_max_conc_proc.
+ * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs
+ * cleaner between process switch), enable single-process mode
+ * in HWS.
* Note: the arbitration between the number of VMIDs and
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- concurrent_proc_cnt = min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
+ concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE) ?
+ 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum);
packet = (struct pm4_mes_runlist *)buffer;
@@ -190,6 +203,8 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_set_resources__hsa_interface_queue_hiq;
packet->bitfields2.vmid_mask = res->vmid_mask;
packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+ if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)
+ packet->bitfields2.enb_xnack_retry_disable_check = 1;
packet->bitfields7.oac_mask = res->oac_mask;
packet->bitfields8.gds_heap_base = res->gds_heap_base;
packet->bitfields8.gds_heap_size = res->gds_heap_size;
@@ -205,14 +220,14 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
static inline bool pm_use_ext_eng(struct kfd_dev *dev)
{
- return dev->adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 2, 0);
+ return amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) >=
+ IP_VERSION(5, 2, 0);
}
static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
struct pm4_mes_map_queues *packet;
- bool use_static = is_static;
packet = (struct pm4_mes_map_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
@@ -225,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+ packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0;
packet->bitfields2.extended_engine_sel =
extended_engine_sel__mes_map_queues__legacy_engine_sel;
packet->bitfields2.queue_type =
@@ -233,7 +248,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
switch (q->properties.type) {
case KFD_QUEUE_TYPE_COMPUTE:
- if (use_static)
+ if (is_static)
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_latency_static_queue_vi;
break;
@@ -243,7 +258,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
- use_static = false; /* no static queues under SDMA */
if (q->properties.sdma_engine_id < 2 &&
!pm_use_ext_eng(q->device->kfd))
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
@@ -286,23 +300,79 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-static int pm_set_grace_period_v9(struct packet_manager *pm,
+static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
+ uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
+ pm->dqm->dev->adev,
+ pm->dqm->wait_times,
+ sch_value,
+ que_sleep,
+ reg_offset,
+ reg_data);
+}
+
+/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with
+ * register/value for configuring dequeue wait counts
+ *
+ * @return: -ve for failure and 0 for success and buffer is
+ * filled in with packet
+ *
+ **/
+static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
uint32_t *buffer,
- uint32_t grace_period)
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
struct pm4_mec_write_data_mmio *packet;
uint32_t reg_offset = 0;
uint32_t reg_data = 0;
- pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
- pm->dqm->dev->adev,
- pm->dqm->wait_times,
- grace_period,
- &reg_offset,
- &reg_data);
+ switch (cmd) {
+ case KFD_DEQUEUE_WAIT_INIT: {
+ uint32_t sch_wave = 0, que_sleep = 1;
+
+ /* For all gfx9 ASICs > gfx941,
+ * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+ * On a 1GHz machine this is roughly 1 microsecond, which is
+ * about how long it takes to load data out of memory during
+ * queue connect
+ * QUE_SLEEP: Wait Count for Dequeue Retry.
+ *
+ * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
+ */
+ if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))
+ return -EPERM;
+
+ if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
+ sch_wave = 1;
- if (grace_period == USE_DEFAULT_GRACE_PERIOD)
- reg_data = pm->dqm->wait_times;
+ pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
+ &reg_offset, &reg_data);
+
+ break;
+ }
+ case KFD_DEQUEUE_WAIT_RESET:
+ /* reg_data would be set to dqm->wait_times */
+ pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, &reg_data);
+ break;
+
+ case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
+ /* The CP cannot handle value 0 and it will result in
+ * an infinite grace period being set so set to 1 to prevent this. Also
+ * avoid debugger API breakage as it sets 0 and expects a low value.
+ */
+ if (!value)
+ value = 1;
+ pm_build_dequeue_wait_counts_packet_info(pm, value, 0, &reg_offset, &reg_data);
+ break;
+ default:
+ pr_err("Invalid dequeue wait cmd\n");
+ return -EINVAL;
+ }
packet = (struct pm4_mec_write_data_mmio *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
@@ -404,7 +474,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -412,7 +482,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
@@ -423,7 +493,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
@@ -431,7 +501,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index c1199d06d131..a1de5d7e173a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -42,6 +42,7 @@ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
struct qcm_process_device *qpd)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct pm4_mes_map_process *packet;
packet = (struct pm4_mes_map_process *)buffer;
@@ -52,7 +53,7 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields3.page_table_base = qpd->page_table_base;
packet->bitfields10.gds_size = qpd->gds_size;
packet->bitfields10.num_gws = qpd->num_gws;
@@ -303,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_vi,
.unmap_queues = pm_unmap_queues_vi,
- .set_grace_period = NULL,
+ .config_dequeue_wait_counts = NULL,
.query_status = pm_query_status_vi,
.release_mem = pm_release_mem_vi,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -311,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = 0,
+ .config_dequeue_wait_counts_size = 0,
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index 8b6b2bd5c148..e356a207d03c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -63,7 +63,8 @@ struct pm4_mes_set_resources {
struct {
uint32_t vmid_mask:16;
uint32_t unmap_latency:8;
- uint32_t reserved1:5;
+ uint32_t reserved1:4;
+ uint32_t enb_xnack_retry_disable_check:1;
enum mes_set_resources_queue_type_enum queue_type:3;
} bitfields2;
uint32_t ordinal2;
@@ -145,8 +146,9 @@ struct pm4_mes_map_process {
union {
struct {
- uint32_t pasid:16;
- uint32_t reserved1:2;
+ uint32_t pasid:16; /* 0 - 15 */
+ uint32_t reserved1:1; /* 16 */
+ uint32_t exec_cleaner_shader:1; /* 17 */
uint32_t debug_vmid:4;
uint32_t new_debug:1;
uint32_t reserved2:1;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
index 38f5cb6a222a..e0ed62c4ade0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
@@ -37,7 +37,7 @@ struct pm4_mes_map_process_aldebaran {
struct {
uint32_t pasid:16; /* 0 - 15 */
uint32_t single_memops:1; /* 16 */
- uint32_t reserved1:1; /* 17 */
+ uint32_t exec_cleaner_shader:1; /* 17 */
uint32_t debug_vmid:4; /* 18 - 21 */
uint32_t new_debug:1; /* 22 */
uint32_t tmz:1; /* 23 */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index fa24e1852493..70ef051511bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -32,7 +32,7 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#include <linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <linux/kfifo.h>
#include <linux/seq_file.h>
@@ -99,11 +99,11 @@
/*
* Size of the per-process TBA+TMA buffer: 2 pages
*
- * The first page is the TBA used for the CWSR ISA code. The second
- * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
+ * The first chunk is the TBA used for the CWSR ISA code. The second
+ * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode.
*/
#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
-#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
+#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048)
#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
(KFD_MAX_NUM_OF_PROCESSES * \
@@ -111,7 +111,14 @@
#define KFD_KERNEL_QUEUE_SIZE 2048
-#define KFD_UNMAP_LATENCY_MS (4000)
+/* KFD_UNMAP_LATENCY_MS is the timeout CP waiting for SDMA preemption. One XCC
+ * can be associated to 2 SDMA engines. queue_preemption_timeout_ms is the time
+ * driver waiting for CP returning the UNMAP_QUEUE fence. Thus the math is
+ * queue_preemption_timeout_ms = sdma_preemption_time * 2 + cp workload
+ * The format here makes CP workload 10% of total timeout
+ */
+#define KFD_UNMAP_LATENCY_MS \
+ ((queue_preemption_timeout_ms - queue_preemption_timeout_ms / 10) >> 1)
#define KFD_MAX_SDMA_QUEUES 128
@@ -202,11 +209,13 @@ enum cache_policy {
cache_policy_noncoherent
};
-#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0])
+#define KFD_GC_VERSION(dev) (amdgpu_ip_version((dev)->adev, GC_HWIP, 0))
#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \
- (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)))
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
struct kfd_node;
@@ -272,7 +281,6 @@ struct kfd_node {
/* Interrupts */
struct kfifo ih_fifo;
- struct workqueue_struct *ih_wq;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
@@ -288,7 +296,6 @@ struct kfd_node {
/* Global GWS resource shared between processes */
void *gws;
- bool gws_debug_workaround;
/* Clients watching SMI events */
struct list_head smi_clients;
@@ -309,6 +316,10 @@ struct kfd_node {
struct kfd_local_mem_info local_mem_info;
struct kfd_dev *kfd;
+
+ /* Track per device allocated watch points */
+ uint32_t alloc_watch_ids;
+ spinlock_t watch_points_lock;
};
struct kfd_dev {
@@ -361,15 +372,18 @@ struct kfd_dev {
struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
- /* Track per device allocated watch points */
- uint32_t alloc_watch_ids;
- spinlock_t watch_points_lock;
+ struct workqueue_struct *ih_wq;
/* Kernel doorbells for KFD device */
struct amdgpu_bo *doorbells;
/* bitmap for dynamic doorbell allocation from doorbell object */
unsigned long *doorbell_bitmap;
+
+ /* for dynamic partitioning */
+ int kfd_dev_lock;
+
+ atomic_t kfd_processes_count;
};
enum kfd_mempool {
@@ -413,13 +427,16 @@ enum kfd_unmap_queues_filter {
* @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
*
* @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface.
+ *
+ * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID.
*/
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
KFD_QUEUE_TYPE_HIQ,
KFD_QUEUE_TYPE_DIQ,
- KFD_QUEUE_TYPE_SDMA_XGMI
+ KFD_QUEUE_TYPE_SDMA_XGMI,
+ KFD_QUEUE_TYPE_SDMA_BY_ENG_ID
};
enum kfd_queue_format {
@@ -493,8 +510,8 @@ struct queue_properties {
uint64_t queue_size;
uint32_t priority;
uint32_t queue_percent;
- uint32_t *read_ptr;
- uint32_t *write_ptr;
+ void __user *read_ptr;
+ void __user *write_ptr;
void __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
@@ -521,6 +538,12 @@ struct queue_properties {
uint64_t tba_addr;
uint64_t tma_addr;
uint64_t exception_status;
+
+ struct amdgpu_bo *wptr_bo;
+ struct amdgpu_bo *rptr_bo;
+ struct amdgpu_bo *ring_bo;
+ struct amdgpu_bo *eop_buf_bo;
+ struct amdgpu_bo *cwsr_bo;
};
#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \
@@ -532,6 +555,7 @@ struct queue_properties {
enum mqd_update_flag {
UPDATE_FLAG_DBG_WA_ENABLE = 1,
UPDATE_FLAG_DBG_WA_DISABLE = 2,
+ UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */
};
struct mqd_update_info {
@@ -602,7 +626,7 @@ struct queue {
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
- struct amdgpu_bo *wptr_bo;
+ struct amdgpu_bo *wptr_bo_gart;
};
enum KFD_MQD_TYPE {
@@ -748,7 +772,6 @@ struct kfd_process_device {
/* VM context for GPUVM allocations */
struct file *drm_file;
void *drm_priv;
- atomic64_t tlb_seq;
/* GPUVM allocations storage */
struct idr alloc_idr;
@@ -765,7 +788,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound;
/* VRAM usage */
- uint64_t vram_usage;
+ atomic64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN];
@@ -836,6 +859,11 @@ struct kfd_process_device {
void *proc_ctx_bo;
uint64_t proc_ctx_gpu_addr;
void *proc_ctx_cpu_ptr;
+
+ /* Tracks queue reset status */
+ bool has_reset_queue;
+
+ u32 pasid;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -853,6 +881,14 @@ struct svm_range_list {
struct delayed_work restore_work;
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
struct task_struct *faulting_task;
+ /* check point ts decides if page fault recovery need be dropped */
+ uint64_t checkpoint_ts[MAX_GPU_INSTANCE];
+
+ /* Default granularity to use in buffer migration
+ * and restoration of backing memory while handling
+ * recoverable page faults
+ */
+ uint8_t default_granularity;
};
/* Process data */
@@ -887,8 +923,6 @@ struct kfd_process {
/* We want to receive a notification when the mm_struct is destroyed */
struct mmu_notifier mmu_notifier;
- u32 pasid;
-
/*
* Array of kfd_process_device pointers,
* one for each device the process is using.
@@ -918,7 +952,7 @@ struct kfd_process {
* fence will be triggered during eviction and new one will be created
* during restore
*/
- struct dma_fence *ef;
+ struct dma_fence __rcu *ef;
/* Work items for evicting and restoring BOs */
struct delayed_work eviction_work;
@@ -971,7 +1005,7 @@ struct kfd_process {
struct work_struct debug_event_workarea;
/* Tracks debug per-vmid request for debug flags */
- bool dbg_flags;
+ u32 dbg_flags;
atomic_t poison;
/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
@@ -981,6 +1015,9 @@ struct kfd_process {
struct semaphore runtime_enable_sema;
bool is_runtime_retry;
struct kfd_runtime_info runtime_info;
+
+ /* if gpu page fault sent to KFD */
+ bool gpu_page_fault;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -1013,7 +1050,8 @@ void kfd_process_destroy_wq(void);
void kfd_cleanup_processes(void);
struct kfd_process *kfd_create_process(struct task_struct *thread);
struct kfd_process *kfd_get_process(const struct task_struct *task);
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
@@ -1065,8 +1103,6 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
/* PASIDs */
int kfd_pasid_init(void);
void kfd_pasid_exit(void);
-bool kfd_set_pasid_limit(unsigned int new_limit);
-unsigned int kfd_get_pasid_limit(void);
u32 kfd_pasid_alloc(void);
void kfd_pasid_free(u32 pasid);
@@ -1116,7 +1152,6 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
uint32_t proximity_domain);
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
struct kfd_node *kfd_device_by_id(uint32_t gpu_id);
-struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev);
static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id,
uint32_t vmid)
{
@@ -1128,7 +1163,9 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
struct kfd_dev *dev = adev->kfd.dev;
uint32_t i;
- if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
return dev->nodes[0];
for (i = 0; i < dev->num_nodes; i++)
@@ -1282,6 +1319,15 @@ int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
+int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
+ u64 expected_size);
+void kfd_queue_buffer_put(struct amdgpu_bo **bo);
+int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo);
+int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
+ struct queue_properties *properties);
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
@@ -1293,12 +1339,15 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_v12(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
enum kfd_queue_type type);
-void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
-int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
+void kernel_queue_uninit(struct kernel_queue *kq);
+int kfd_evict_process_device(struct kfd_process_device *pdd);
+int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id);
/* Process Queue Manager */
struct process_queue_node {
@@ -1313,10 +1362,8 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
void pqm_uninit(struct process_queue_manager *pqm);
int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_node *dev,
- struct file *f,
struct queue_properties *properties,
unsigned int *qid,
- struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -1328,8 +1375,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
struct mqd_update_info *minfo);
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws);
-struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
- unsigned int qid);
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid);
int pqm_get_wave_state(struct process_queue_manager *pqm,
@@ -1343,7 +1388,7 @@ int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
int *num_qss_entries,
uint32_t *entry_size);
-int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
+int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
uint64_t fence_value,
unsigned int timeout_ms);
@@ -1356,6 +1401,24 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
#define KFD_FENCE_COMPLETED (100)
#define KFD_FENCE_INIT (10)
+/**
+ * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring
+ * dequeue wait counts.
+ *
+ * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a
+ * certain ASICs. For these ASICs, this is default value used by RESET
+ * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value
+ * for certain ASICs. For others set it to default hardware reset value
+ * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait
+ *
+ */
+enum kfd_config_dequeue_wait_counts_cmd {
+ KFD_DEQUEUE_WAIT_INIT = 1,
+ KFD_DEQUEUE_WAIT_RESET = 2,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
+};
+
+
struct packet_manager {
struct device_queue_manager *dqm;
struct kernel_queue *priv_queue;
@@ -1381,8 +1444,8 @@ struct packet_manager_funcs {
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset);
- int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
- uint32_t grace_period);
+ int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1393,7 +1456,7 @@ struct packet_manager_funcs {
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
- int set_grace_period_size;
+ int config_dequeue_wait_counts_size;
int query_status_size;
int release_mem_size;
};
@@ -1403,7 +1466,7 @@ extern const struct packet_manager_funcs kfd_v9_pm_funcs;
extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
-void pm_uninit(struct packet_manager *pm, bool hanging);
+void pm_uninit(struct packet_manager *pm);
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res);
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
@@ -1416,7 +1479,9 @@ int pm_send_unmap_queue(struct packet_manager *pm,
void pm_release_ib(struct packet_manager *pm);
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t wait_counts_config);
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
@@ -1454,7 +1519,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
-void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data);
@@ -1462,11 +1529,18 @@ void kfd_signal_reset_event(struct kfd_node *dev);
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
+ enum TLB_FLUSH_TYPE type)
+{
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+ amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+}
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
- return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}
@@ -1474,7 +1548,7 @@ static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
int kfd_send_exception_to_runtime(struct kfd_process *p,
unsigned int queue_id,
uint64_t error_reason);
-bool kfd_is_locked(void);
+bool kfd_is_locked(struct kfd_dev *kfd);
/* Compute profile */
void kfd_inc_compute_active(struct kfd_node *dev);
@@ -1482,10 +1556,15 @@ void kfd_dec_compute_active(struct kfd_node *dev);
/* Cgroup Support */
/* Check with device cgroup if @kfd device is accessible */
-static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd)
+static inline int kfd_devcgroup_check_permission(struct kfd_node *node)
{
#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
- struct drm_device *ddev = adev_to_drm(kfd->adev);
+ struct drm_device *ddev;
+
+ if (node->xcp)
+ ddev = node->xcp->ddev;
+ else
+ ddev = adev_to_drm(node->adev);
return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
ddev->render->index,
@@ -1516,10 +1595,15 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev);
int pm_debugfs_hang_hws(struct packet_manager *pm);
int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
+void kfd_debugfs_add_process(struct kfd_process *p);
+void kfd_debugfs_remove_process(struct kfd_process *p);
+
#else
static inline void kfd_debugfs_init(void) {}
static inline void kfd_debugfs_fini(void) {}
+static inline void kfd_debugfs_add_process(struct kfd_process *p) {}
+static inline void kfd_debugfs_remove_process(struct kfd_process *p) {}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index fbf053001af9..ddfe30c13e9d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu.h"
+#include "amdgpu_reset.h"
struct mm_struct;
@@ -270,6 +271,9 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
struct kfd_node *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
+ int i;
+ struct kfd_cu_occupancy *cu_occupancy;
+ u32 queue_format;
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
@@ -279,41 +283,64 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
cu_cnt = 0;
proc = pdd->process;
if (pdd->qpd.queue_count == 0) {
- pr_debug("Gpu-Id: %d has no active queues for process %d\n",
- dev->id, proc->pasid);
+ pr_debug("Gpu-Id: %d has no active queues for process pid %d\n",
+ dev->id, (int)proc->lead_thread->pid);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
- dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
- &max_waves_per_cu, 0);
+
+ cu_occupancy = kcalloc(AMDGPU_MAX_QUEUES, sizeof(*cu_occupancy), GFP_KERNEL);
+ if (!cu_occupancy)
+ return -ENOMEM;
+
+ /*
+ * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
+ * For AQL queues, because of cooperative dispatch we multiply the wave count
+ * by number of XCCs in the partition to get the total wave counts across all
+ * XCCs in the partition.
+ * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
+ */
+ dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
+ &max_waves_per_cu, ffs(dev->xcc_mask) - 1);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
+ if (cu_occupancy[i].wave_cnt != 0 &&
+ kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
+ cu_occupancy[i].doorbell_off,
+ &queue_format)) {
+ if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
+ wave_cnt += cu_occupancy[i].wave_cnt;
+ else
+ wave_cnt += (NUM_XCC(dev->xcc_mask) *
+ cu_occupancy[i].wave_cnt);
+ }
+ }
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
+ kfree(cu_occupancy);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- if (strcmp(attr->name, "pasid") == 0) {
- struct kfd_process *p = container_of(attr, struct kfd_process,
- attr_pasid);
-
- return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
- } else if (strncmp(attr->name, "vram_", 5) == 0) {
+ if (strcmp(attr->name, "pasid") == 0)
+ return snprintf(buffer, PAGE_SIZE, "%d\n", 0);
+ else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
- return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_sdma);
struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
- INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
- kfd_sdma_activity_worker);
+ INIT_WORK_ONSTACK(&sdma_activity_work_handler.sdma_activity_work,
+ kfd_sdma_activity_worker);
sdma_activity_work_handler.pdd = pdd;
sdma_activity_work_handler.sdma_activity_counter = 0;
@@ -321,6 +348,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
flush_work(&sdma_activity_work_handler.sdma_activity_work);
+ destroy_work_on_stack(&sdma_activity_work_handler.sdma_activity_work);
return snprintf(buffer, PAGE_SIZE, "%llu\n",
(sdma_activity_work_handler.sdma_activity_counter)/
@@ -664,7 +692,8 @@ int kfd_process_create_wq(void)
if (!kfd_process_wq)
kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
if (!kfd_restore_wq)
- kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
+ kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq",
+ WQ_FREEZABLE);
if (!kfd_process_wq || !kfd_restore_wq) {
kfd_process_destroy_wq();
@@ -810,6 +839,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
return ERR_PTR(-EINVAL);
}
+ /* If the process just called exec(3), it is possible that the
+ * cleanup of the kfd_process (following the release of the mm
+ * of the old process image) is still in the cleanup work queue.
+ * Make sure to drain any job before trying to recreate any
+ * resource for this process.
+ */
+ flush_workqueue(kfd_process_wq);
+
/*
* take kfd processes mutex before starting of process creation
* so there won't be a case where two threads of the same process
@@ -817,14 +854,16 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
*/
mutex_lock(&kfd_processes_mutex);
- if (kfd_is_locked()) {
- mutex_unlock(&kfd_processes_mutex);
+ if (kfd_is_locked(NULL)) {
pr_debug("KFD is locked! Cannot create process");
- return ERR_PTR(-EINVAL);
+ process = ERR_PTR(-EINVAL);
+ goto out;
}
- /* A prior open of /dev/kfd could have already created the process. */
- process = find_process(thread, false);
+ /* A prior open of /dev/kfd could have already created the process.
+ * find_process will increase process kref in this case
+ */
+ process = find_process(thread, true);
if (process) {
pr_debug("Process already found\n");
} else {
@@ -861,11 +900,11 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
kfd_procfs_add_sysfs_files(process);
kfd_procfs_add_sysfs_counters(process);
+ kfd_debugfs_add_process(process);
+
init_waitqueue_head(&process->wait_irq_drain);
}
out:
- if (!IS_ERR(process))
- kref_get(&process->ref);
mutex_unlock(&kfd_processes_mutex);
mmput(thread->mm);
@@ -1017,17 +1056,15 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
- pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
- pdd->dev->id, p->pasid);
+ kfd_smi_event_process(pdd, false);
+ pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
+ pdd->dev->id, p->lead_thread->pid);
kfd_process_device_destroy_cwsr_dgpu(pdd);
kfd_process_device_destroy_ib_mem(pdd);
- if (pdd->drm_file) {
- amdgpu_amdkfd_gpuvm_release_process_vm(
- pdd->dev->adev, pdd->drm_priv);
+ if (pdd->drm_file)
fput(pdd->drm_file);
- }
if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
@@ -1037,9 +1074,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
- if (pdd->dev->kfd->shared_resources.enable_mes)
+ if (pdd->dev->kfd->shared_resources.enable_mes &&
+ pdd->proc_ctx_cpu_ptr)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
- pdd->proc_ctx_bo);
+ &pdd->proc_ctx_bo);
/*
* before destroying pdd, make sure to report availability
* for auto suspend
@@ -1050,6 +1088,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pdd->runtime_inuse = false;
}
+ atomic_dec(&pdd->dev->kfd->kfd_processes_count);
+
kfree(pdd);
p->pdds[i] = NULL;
}
@@ -1100,6 +1140,17 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
p->kobj = NULL;
}
+/*
+ * If any GPU is ongoing reset, wait for reset complete.
+ */
+static void kfd_process_wait_gpu_reset_complete(struct kfd_process *p)
+{
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++)
+ flush_workqueue(p->pdds[i]->dev->adev->reset_domain->wq);
+}
+
/* No process locking is needed in this function, because the process
* is not findable any more. We must assume that no other thread is
* using it any more, otherwise we couldn't safely free the process
@@ -1109,28 +1160,37 @@ static void kfd_process_wq_release(struct work_struct *work)
{
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
+ struct dma_fence *ef;
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
+ /*
+ * If GPU in reset, user queues may still running, wait for reset complete.
+ */
+ kfd_process_wait_gpu_reset_complete(p);
+
/* Signal the eviction fence after user mode queues are
* destroyed. This allows any BOs to be freed without
* triggering pointless evictions or waiting for fences.
*/
- dma_fence_signal(p->ef);
+ synchronize_rcu();
+ ef = rcu_access_pointer(p->ef);
+ if (ef)
+ dma_fence_signal(ef);
kfd_process_remove_sysfs(p);
+ kfd_debugfs_remove_process(p);
kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p);
svm_range_list_fini(p);
kfd_process_destroy_pdds(p);
- dma_fence_put(p->ef);
+ dma_fence_put(ef);
kfd_event_free_process(p);
- kfd_pasid_free(p->pasid);
mutex_destroy(&p->mutex);
put_task_struct(p->lead_thread);
@@ -1148,10 +1208,8 @@ static void kfd_process_ref_release(struct kref *ref)
static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
{
- int idx = srcu_read_lock(&kfd_processes_srcu);
- struct kfd_process *p = find_process_by_mm(mm);
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
+ /* This increments p->ref counter if kfd process p exists */
+ struct kfd_process *p = kfd_lookup_process_by_mm(mm);
return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
}
@@ -1299,7 +1357,8 @@ int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (IS_ERR_VALUE(qpd->tba_addr)) {
int err = qpd->tba_addr;
- pr_err("Failure to set tba address. error %d.\n", err);
+ dev_err(dev->adev->dev,
+ "Failure to set tba address. error %d.\n", err);
qpd->tba_addr = 0;
qpd->cwsr_kaddr = NULL;
return err;
@@ -1416,8 +1475,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* per-process XNACK mode selection. But let the dev->noretry
* setting still influence the default XNACK mode.
*/
- if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev))
+ if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) {
+ if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) {
+ pr_debug("SRIOV platform xnack not supported\n");
+ return false;
+ }
continue;
+ }
/* GFXv10 and later GPUs do not support shader preemption
* during page faults. This can lead to poor QoS for queue
@@ -1477,12 +1541,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
atomic_set(&process->debugged_process_count, 0);
sema_init(&process->runtime_enable_sema, 0);
- process->pasid = kfd_pasid_alloc();
- if (process->pasid == 0) {
- err = -ENOSPC;
- goto err_alloc_pasid;
- }
-
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
@@ -1536,8 +1594,6 @@ err_init_svm_range_list:
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
- kfd_pasid_free(process->pasid);
-err_alloc_pasid:
kfd_event_free_process(process);
err_event_init:
mutex_destroy(&process->mutex);
@@ -1562,7 +1618,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
- int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
@@ -1581,25 +1636,11 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
- pdd->vram_usage = 0;
+ atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
- if (dev->kfd->shared_resources.enable_mes) {
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
- AMDGPU_MES_PROC_CTX_SIZE,
- &pdd->proc_ctx_bo,
- &pdd->proc_ctx_gpu_addr,
- &pdd->proc_ctx_cpu_ptr,
- false);
- if (retval) {
- pr_err("failed to allocate process context bo\n");
- goto err_free_pdd;
- }
- memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
- }
-
p->pdds[p->n_pdds++] = pdd;
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1610,11 +1651,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
/* Init idr used for memory handle translation */
idr_init(&pdd->alloc_idr);
- return pdd;
+ atomic_inc(&dev->kfd->kfd_processes_count);
-err_free_pdd:
- kfree(pdd);
- return NULL;
+ return pdd;
}
/**
@@ -1637,6 +1676,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct amdgpu_fpriv *drv_priv;
struct amdgpu_vm *avm;
struct kfd_process *p;
+ struct dma_fence *ef;
struct kfd_node *dev;
int ret;
@@ -1656,13 +1696,16 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
&p->kgd_process_info,
- &p->ef);
+ p->ef ? NULL : &ef);
if (ret) {
- pr_err("Failed to create process VM object\n");
+ dev_err(dev->adev->dev, "Failed to create process VM object\n");
return ret;
}
+
+ if (!p->ef)
+ RCU_INIT_POINTER(p->ef, ef);
+
pdd->drm_priv = drm_file->private_data;
- atomic64_set(&pdd->tlb_seq, 0);
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
@@ -1671,15 +1714,21 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (ret)
goto err_init_cwsr;
- ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
- if (ret)
- goto err_set_pasid;
+ if (unlikely(!avm->pasid)) {
+ dev_warn(pdd->dev->adev->dev, "WARN: vm %p has no pasid associated",
+ avm);
+ ret = -EINVAL;
+ goto err_get_pasid;
+ }
+ pdd->pasid = avm->pasid;
pdd->drm_file = drm_file;
+ kfd_smi_event_process(pdd, true);
+
return 0;
-err_set_pasid:
+err_get_pasid:
kfd_process_device_destroy_cwsr_dgpu(pdd);
err_init_cwsr:
kfd_process_device_destroy_ib_mem(pdd);
@@ -1705,7 +1754,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
pdd = kfd_get_process_device_data(dev, p);
if (!pdd) {
- pr_err("Process device data doesn't exist\n");
+ dev_err(dev->adev->dev, "Process device data doesn't exist\n");
return ERR_PTR(-ENOMEM);
}
@@ -1765,25 +1814,50 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
idr_remove(&pdd->alloc_idr, handle);
}
-/* This increments the process->ref counter. */
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
+static struct kfd_process_device *kfd_lookup_process_device_by_pasid(u32 pasid)
{
- struct kfd_process *p, *ret_p = NULL;
+ struct kfd_process_device *ret_p = NULL;
+ struct kfd_process *p;
unsigned int temp;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
+ int i;
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- if (p->pasid == pasid) {
- kref_get(&p->ref);
- ret_p = p;
- break;
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->pasid == pasid) {
+ ret_p = p->pdds[i];
+ break;
+ }
}
+ if (ret_p)
+ break;
+ }
+ return ret_p;
+}
+
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd)
+{
+ struct kfd_process_device *ret_p;
+
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+
+ ret_p = kfd_lookup_process_device_by_pasid(pasid);
+ if (ret_p) {
+ if (pdd)
+ *pdd = ret_p;
+ kref_get(&ret_p->process->ref);
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+ return ret_p->process;
}
srcu_read_unlock(&kfd_processes_srcu, idx);
- return ret_p;
+ if (pdd)
+ *pdd = NULL;
+
+ return NULL;
}
/* This increments the process->ref counter. */
@@ -1815,6 +1889,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ struct device *dev = pdd->dev->adev->dev;
kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
trigger);
@@ -1826,10 +1901,12 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
* them been add back since they actually not be saved right now.
*/
if (r && r != -EIO) {
- pr_err("Failed to evict process queues\n");
+ dev_err(dev, "Failed to evict process queues\n");
goto fail;
}
n_evicted++;
+
+ pdd->dev->dqm->is_hws_hang = false;
}
return r;
@@ -1848,7 +1925,8 @@ fail:
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
- pr_err("Failed to restore queues\n");
+ dev_err(pdd->dev->adev->dev,
+ "Failed to restore queues\n");
n_evicted--;
}
@@ -1864,13 +1942,14 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ struct device *dev = pdd->dev->adev->dev;
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
- pr_err("Failed to restore process queues\n");
+ dev_err(dev, "Failed to restore process queues\n");
if (!ret)
ret = r;
}
@@ -1904,6 +1983,23 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
return -EINVAL;
}
+static int signal_eviction_fence(struct kfd_process *p)
+{
+ struct dma_fence *ef;
+ int ret;
+
+ rcu_read_lock();
+ ef = dma_fence_get_rcu_safe(&p->ef);
+ rcu_read_unlock();
+ if (!ef)
+ return -EINVAL;
+
+ ret = dma_fence_signal(ef);
+ dma_fence_put(ef);
+
+ return ret;
+}
+
static void evict_process_worker(struct work_struct *work)
{
int ret;
@@ -1916,29 +2012,45 @@ static void evict_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, eviction_work);
- WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
- "Eviction fence mismatch\n");
-
- /* Narrow window of overlap between restore and evict work
- * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
- * unreserves KFD BOs, it is possible to evicted again. But
- * restore has few more steps of finish. So lets wait for any
- * previous restore work to complete
- */
- flush_delayed_work(&p->restore_work);
- pr_debug("Started evicting pasid 0x%x\n", p->pasid);
+ pr_debug("Started evicting process pid %d\n", p->lead_thread->pid);
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
- dma_fence_signal(p->ef);
- dma_fence_put(p->ef);
- p->ef = NULL;
- queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+ /* If another thread already signaled the eviction fence,
+ * they are responsible stopping the queues and scheduling
+ * the restore work.
+ */
+ if (signal_eviction_fence(p) ||
+ mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
+ kfd_process_restore_queues(p);
- pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
+ pr_debug("Finished evicting process pid %d\n", p->lead_thread->pid);
} else
- pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
+ pr_err("Failed to evict queues of process pid %d\n", p->lead_thread->pid);
+}
+
+static int restore_process_helper(struct kfd_process *p)
+{
+ int ret = 0;
+
+ /* VMs may not have been acquired yet during debugging. */
+ if (p->kgd_process_info) {
+ ret = amdgpu_amdkfd_gpuvm_restore_process_bos(
+ p->kgd_process_info, &p->ef);
+ if (ret)
+ return ret;
+ }
+
+ ret = kfd_process_restore_queues(p);
+ if (!ret)
+ pr_debug("Finished restoring process pid %d\n",
+ p->lead_thread->pid);
+ else
+ pr_err("Failed to restore queues of process pid %d\n",
+ p->lead_thread->pid);
+
+ return ret;
}
static void restore_process_worker(struct work_struct *work)
@@ -1953,7 +2065,7 @@ static void restore_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, restore_work);
- pr_debug("Started restoring pasid 0x%x\n", p->pasid);
+ pr_debug("Started restoring process pasid %d\n", (int)p->lead_thread->pid);
/* Setting last_restore_timestamp before successful restoration.
* Otherwise this would have to be set by KGD (restore_process_bos)
@@ -1966,24 +2078,15 @@ static void restore_process_worker(struct work_struct *work)
*/
p->last_restore_timestamp = get_jiffies_64();
- /* VMs may not have been acquired yet during debugging. */
- if (p->kgd_process_info)
- ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
- &p->ef);
+
+ ret = restore_process_helper(p);
if (ret) {
- pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
- p->pasid, PROCESS_BACK_OFF_TIME_MS);
- ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
- WARN(!ret, "reschedule restore work failed\n");
- return;
+ pr_debug("Failed to restore BOs of process pid %d, retry after %d ms\n",
+ p->lead_thread->pid, PROCESS_BACK_OFF_TIME_MS);
+ if (mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
+ kfd_process_restore_queues(p);
}
-
- ret = kfd_process_restore_queues(p);
- if (!ret)
- pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
- else
- pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
}
void kfd_suspend_all_processes(void)
@@ -1994,14 +2097,9 @@ void kfd_suspend_all_processes(void)
WARN(debug_evictions, "Evicting all processes");
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- cancel_delayed_work_sync(&p->eviction_work);
- flush_delayed_work(&p->restore_work);
-
if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
- pr_err("Failed to suspend process 0x%x\n", p->pasid);
- dma_fence_signal(p->ef);
- dma_fence_put(p->ef);
- p->ef = NULL;
+ pr_err("Failed to suspend process pid %d\n", p->lead_thread->pid);
+ signal_eviction_fence(p);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
@@ -2013,9 +2111,9 @@ int kfd_resume_all_processes(void)
int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
- pr_err("Restore process %d failed during resume\n",
- p->pasid);
+ if (restore_process_helper(p)) {
+ pr_err("Restore process pid %d failed during resume\n",
+ p->lead_thread->pid);
ret = -EFAULT;
}
}
@@ -2030,7 +2128,7 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
struct qcm_process_device *qpd;
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
- pr_err("Incorrect CWSR mapping size.\n");
+ dev_err(dev->adev->dev, "Incorrect CWSR mapping size.\n");
return -EINVAL;
}
@@ -2042,7 +2140,8 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(KFD_CWSR_TBA_TMA_SIZE));
if (!qpd->cwsr_kaddr) {
- pr_err("Error allocating per process CWSR buffer.\n");
+ dev_err(dev->adev->dev,
+ "Error allocating per process CWSR buffer.\n");
return -ENOMEM;
}
@@ -2054,36 +2153,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
}
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
- struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
- uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
- struct kfd_node *dev = pdd->dev;
- uint32_t xcc_mask = dev->xcc_mask;
- int xcc = 0;
-
- /*
- * It can be that we race and lose here, but that is extremely unlikely
- * and the worst thing which could happen is that we flush the changes
- * into the TLB once more which is harmless.
- */
- if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
- return;
-
- if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
- /* Nothing to flush until a VMID is assigned, which
- * only happens when the first queue is created.
- */
- if (pdd->qpd.vmid)
- amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
- pdd->qpd.vmid);
- } else {
- for_each_inst(xcc, xcc_mask)
- amdgpu_amdkfd_flush_gpu_tlb_pasid(
- dev->adev, pdd->process->pasid, type, xcc);
- }
-}
-
/* assumes caller holds process lock. */
int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
{
@@ -2099,12 +2168,14 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
memset(irq_drain_fence, 0, sizeof(irq_drain_fence));
irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) |
KFD_IRQ_FENCE_CLIENTID;
- irq_drain_fence[3] = pdd->process->pasid;
+ irq_drain_fence[3] = pdd->pasid;
/*
- * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+ * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
*/
- if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
irq_drain_fence[3] |= node_id << 16;
}
@@ -2128,7 +2199,7 @@ void kfd_process_close_interrupt_drain(unsigned int pasid)
{
struct kfd_process *p;
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return;
@@ -2249,8 +2320,8 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- seq_printf(m, "Process %d PASID 0x%x:\n",
- p->lead_thread->tgid, p->pasid);
+ seq_printf(m, "Process %d PASID %d:\n",
+ p->lead_thread->tgid, p->lead_thread->pid);
mutex_lock(&p->mutex);
r = pqm_debugfs_mqds(m, &p->pqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index adb5e4bdc0b2..7fbb5c274ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -28,6 +28,7 @@
#include "kfd_priv.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_reset.h"
static inline struct process_queue_node *get_queue_by_qid(
struct process_queue_manager *pqm, unsigned int qid)
@@ -68,8 +69,8 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("Cannot open more queues for process with pasid 0x%x\n",
- pqm->process->pasid);
+ pr_info("Cannot open more queues for process with pid %d\n",
+ pqm->process->lead_thread->pid);
return -ENOMEM;
}
@@ -85,14 +86,24 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
if (pdd->already_dequeued)
return;
-
+ /* The MES context flush needs to filter out the case which the
+ * KFD process is created without setting up the MES context and
+ * queue for creating a compute queue.
+ */
dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+ if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
+ down_read_trylock(&dev->adev->reset_domain->sem)) {
+ amdgpu_mes_flush_shader_debugger(dev->adev,
+ pdd->proc_ctx_gpu_addr);
+ up_read(&dev->adev->reset_domain->sem);
+ }
pdd->already_dequeued = true;
}
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws)
{
+ struct mqd_update_info minfo = {0};
struct kfd_node *dev = NULL;
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
@@ -123,7 +134,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
+ !dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem);
@@ -144,9 +158,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
}
pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+ minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q, NULL);
+ pqn->q, &minfo);
}
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
@@ -169,16 +184,53 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
return 0;
}
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+ struct process_queue_node *pqn)
+{
+ struct kfd_node *dev;
+ struct kfd_process_device *pdd;
+
+ dev = pqn->q->device;
+
+ pdd = kfd_get_process_device_data(dev, pqm->process);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ return;
+ }
+
+ if (pqn->q->gws) {
+ if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
+ !dev->kfd->shared_resources.enable_mes)
+ amdgpu_amdkfd_remove_gws_from_process(
+ pqm->process->kgd_process_info, pqn->q->gws);
+ pdd->qpd.num_gws = 0;
+ }
+
+ if (dev->kfd->shared_resources.enable_mes) {
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
+ }
+}
+
void pqm_uninit(struct process_queue_manager *pqm)
{
struct process_queue_node *pqn, *next;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
- if (pqn->q && pqn->q->gws &&
- KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
- !pqn->q->device->kfd->shared_resources.enable_mes)
- amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
- pqn->q->gws);
+ if (pqn->q) {
+ struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
+ pqm->process);
+ if (pdd) {
+ kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ } else {
+ WARN_ON(!pdd);
+ }
+ pqm_clean_queue_resource(pqm, pqn);
+ }
+
kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
list_del(&pqn->process_queue_list);
@@ -192,7 +244,6 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_node *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, struct amdgpu_bo *wptr_bo,
unsigned int qid)
{
int retval;
@@ -224,12 +275,29 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
- (*q)->wptr_bo = wptr_bo;
+
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+ */
+ if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
+ pr_err("Queue memory allocated to wrong device\n");
+ retval = -EINVAL;
+ goto free_gang_ctx_bo;
+ }
+
+ retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
+ &(*q)->wptr_bo_gart);
+ if (retval) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto free_gang_ctx_bo;
+ }
}
pr_debug("PQM After init queue");
return 0;
+free_gang_ctx_bo:
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo);
cleanup:
uninit_queue(*q);
*q = NULL;
@@ -238,10 +306,8 @@ cleanup:
int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_node *dev,
- struct file *f,
struct queue_properties *properties,
unsigned int *qid,
- struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -256,10 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
unsigned int max_queues = 127; /* HWS limit */
/*
- * On GFX 9.4.3, increase the number of queues that
- * can be created to 255. No HWS limit on GFX 9.4.3.
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
@@ -292,10 +360,26 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
+ /* Register process if this is the first queue */
if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+ /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */
+ if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ dev_err(dev->adev->dev, "failed to allocate process context bo\n");
+ return retval;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
retval = -ENOMEM;
@@ -305,13 +389,14 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
+ case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
/* SDMA queues are always allocated statically no matter
* which scheduler mode is used. We also do not need to
* check whether a SDMA queue can be allocated here, because
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -332,7 +417,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -363,8 +448,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
- pqm->process->pasid, type, retval);
+ if ((type == KFD_QUEUE_TYPE_SDMA ||
+ type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
+ retval == -ENOMEM)
+ pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
+ else
+ pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
goto err_create_queue;
}
@@ -377,7 +469,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
*/
uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
pdd->qpd.proc_doorbells,
- 0);
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);
*p_doorbell_offset_in_process = (q->properties.doorbell_off
- first_db_index) * sizeof(uint32_t);
@@ -398,7 +491,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
err_create_queue:
uninit_queue(q);
if (kq)
- kernel_queue_uninit(kq, false);
+ kernel_queue_uninit(kq);
kfree(pqn);
err_allocate_pqn:
/* check if queues list is empty unregister process from device */
@@ -445,37 +538,26 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
- kernel_queue_uninit(pqn->kq, false);
+ kernel_queue_uninit(pqn->kq);
}
if (pqn->q) {
- kfd_procfs_del_queue(pqn->q);
+ retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ if (retval)
+ goto err_destroy_queue;
+
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
- pqm->process->pasid,
+ pdd->pasid,
pqn->q->properties.queue_id, retval);
- if (retval != -ETIME)
+ if (retval != -ETIME && retval != -EIO)
goto err_destroy_queue;
}
-
- if (pqn->q->gws) {
- if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
- !dev->kfd->shared_resources.enable_mes)
- amdgpu_amdkfd_remove_gws_from_process(
- pqm->process->kgd_process_info,
- pqn->q->gws);
- pdd->qpd.num_gws = 0;
- }
-
- if (dev->kfd->shared_resources.enable_mes) {
- amdgpu_amdkfd_free_gtt_mem(dev->adev,
- pqn->q->gang_ctx_bo);
- if (pqn->q->wptr_bo)
- amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
-
- }
+ kfd_procfs_del_queue(pqn->q);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);
}
@@ -498,11 +580,42 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm,
struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid);
- if (!pqn) {
+ if (!pqn || !pqn->q) {
pr_debug("No queue %d exists for update operation\n", qid);
return -EFAULT;
}
+ /*
+ * Update with NULL ring address is used to disable the queue
+ */
+ if (p->queue_address && p->queue_size) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_vm *vm;
+ struct queue *q = pqn->q;
+ int err;
+
+ pdd = kfd_get_process_device_data(q->device, q->process);
+ if (!pdd)
+ return -ENODEV;
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
+ p->queue_size)) {
+ pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
+ p->queue_address, p->queue_size);
+ return -EFAULT;
+ }
+
+ kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo);
+ kfd_queue_buffer_put(&pqn->q->properties.ring_bo);
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ pqn->q->properties.ring_bo = p->ring_bo;
+ }
+
pqn->q->properties.queue_address = p->queue_address;
pqn->q->properties.queue_size = p->queue_size;
pqn->q->properties.queue_percent = p->queue_percent;
@@ -559,19 +672,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return 0;
}
-struct kernel_queue *pqm_get_kernel_queue(
- struct process_queue_manager *pqm,
- unsigned int qid)
-{
- struct process_queue_node *pqn;
-
- pqn = get_queue_by_qid(pqm, qid);
- if (pqn && pqn->kq)
- return pqn->kq;
-
- return NULL;
-}
-
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid)
{
@@ -814,7 +914,10 @@ static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
- /* data stored in this order: priv_data, mqd, ctl_stack */
+ /*
+ * data stored in this order:
+ * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
q_data->mqd_size = mqd_size;
q_data->ctl_stack_size = ctl_stack_size;
@@ -863,7 +966,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p,
}
static void set_queue_properties_from_criu(struct queue_properties *qp,
- struct kfd_criu_queue_priv_data *q_data)
+ struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc)
{
qp->is_interop = false;
qp->queue_percent = q_data->q_percent;
@@ -876,7 +979,11 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
- qp->ctl_stack_size = q_data->ctl_stack_size;
+ if (q_data->type == KFD_QUEUE_TYPE_COMPUTE)
+ qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc;
+ else
+ qp->ctl_stack_size = q_data->ctl_stack_size;
+
qp->type = q_data->type;
qp->format = q_data->format;
}
@@ -936,17 +1043,19 @@ int kfd_criu_restore_queue(struct kfd_process *p,
goto exit;
}
- /* data stored in this order: mqd, ctl_stack */
+ /*
+ * data stored in this order:
+ * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
mqd = q_extra_data;
ctl_stack = mqd + q_data->mqd_size;
memset(&qp, 0, sizeof(qp));
- set_queue_properties_from_criu(&qp, q_data);
+ set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
- NULL);
+ ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
goto exit;
@@ -962,6 +1071,7 @@ exit:
pr_debug("Queue id %d was restored successfully\n", queue_id);
kfree(q_data);
+ kfree(q_extra_data);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 0f6992b1895c..a65c67cf56ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -24,6 +24,8 @@
#include <linux/slab.h>
#include "kfd_priv.h"
+#include "kfd_topology.h"
+#include "kfd_svm.h"
void print_queue_properties(struct queue_properties *q)
{
@@ -82,3 +84,386 @@ void uninit_queue(struct queue *q)
{
kfree(q);
}
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ struct kfd_process *p = pdd->process;
+ struct list_head update_list;
+ struct svm_range *prange;
+ int ret = -EINVAL;
+
+ INIT_LIST_HEAD(&update_list);
+ addr >>= PAGE_SHIFT;
+ size >>= PAGE_SHIFT;
+
+ mutex_lock(&p->svms.lock);
+
+ /*
+ * range may split to multiple svm pranges aligned to granularity boundaery.
+ */
+ while (size) {
+ uint32_t gpuid, gpuidx;
+ int r;
+
+ prange = svm_range_from_addr(&p->svms, addr, NULL);
+ if (!prange)
+ break;
+
+ if (!prange->mapped_to_gpu)
+ break;
+
+ r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx);
+ if (r < 0)
+ break;
+ if (!test_bit(gpuidx, prange->bitmap_access) &&
+ !test_bit(gpuidx, prange->bitmap_aip))
+ break;
+
+ if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED))
+ break;
+
+ list_add(&prange->update_list, &update_list);
+
+ if (prange->last - prange->start + 1 >= size) {
+ size = 0;
+ break;
+ }
+
+ size -= prange->last - prange->start + 1;
+ addr += prange->last - prange->start + 1;
+ }
+ if (size) {
+ pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1);
+ goto out_unlock;
+ }
+
+ list_for_each_entry(prange, &update_list, update_list)
+ atomic_inc(&prange->queue_refcount);
+ ret = 0;
+
+out_unlock:
+ mutex_unlock(&p->svms.lock);
+ return ret;
+}
+
+static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ struct kfd_process *p = pdd->process;
+ struct svm_range *prange, *pchild;
+ struct interval_tree_node *node;
+ unsigned long last;
+
+ addr >>= PAGE_SHIFT;
+ last = addr + (size >> PAGE_SHIFT) - 1;
+
+ mutex_lock(&p->svms.lock);
+
+ node = interval_tree_iter_first(&p->svms.objects, addr, last);
+ while (node) {
+ struct interval_tree_node *next_node;
+ unsigned long next_start;
+
+ prange = container_of(node, struct svm_range, it_node);
+ next_node = interval_tree_iter_next(node, addr, last);
+ next_start = min(node->last, last) + 1;
+
+ if (atomic_add_unless(&prange->queue_refcount, -1, 0)) {
+ list_for_each_entry(pchild, &prange->child_list, child_list)
+ atomic_add_unless(&pchild->queue_refcount, -1, 0);
+ }
+
+ node = next_node;
+ addr = next_start;
+ }
+
+ mutex_unlock(&p->svms.lock);
+}
+#else
+
+static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ return -EINVAL;
+}
+
+static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+}
+
+#endif
+
+int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
+ u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ u64 user_addr;
+ u64 size;
+
+ user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!mapping)
+ goto out_err;
+
+ if (user_addr != mapping->start ||
+ (size != 0 && user_addr + size - 1 != mapping->last)) {
+ pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n",
+ expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT,
+ (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT);
+ goto out_err;
+ }
+
+ *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+ mapping->bo_va->queue_refcount++;
+ return 0;
+
+out_err:
+ *pbo = NULL;
+ return -EINVAL;
+}
+
+/* FIXME: remove this function, just call amdgpu_bo_unref directly */
+void kfd_queue_buffer_put(struct amdgpu_bo **bo)
+{
+ amdgpu_bo_unref(bo);
+}
+
+int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
+{
+ struct kfd_topology_device *topo_dev;
+ u64 expected_queue_size;
+ struct amdgpu_vm *vm;
+ u32 total_cwsr_size;
+ int err;
+
+ topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+ if (!topo_dev)
+ return -EINVAL;
+
+ /* AQL queues on GFX7 and GFX8 appear twice their actual size */
+ if (properties->type == KFD_QUEUE_TYPE_COMPUTE &&
+ properties->format == KFD_QUEUE_FORMAT_AQL &&
+ topo_dev->node_props.gfx_target_version >= 70000 &&
+ topo_dev->node_props.gfx_target_version < 90000)
+ expected_queue_size = properties->queue_size / 2;
+ else
+ expected_queue_size = properties->queue_size;
+
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE);
+ if (err)
+ goto out_err_unreserve;
+
+ err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE);
+ if (err)
+ goto out_err_unreserve;
+
+ err = kfd_queue_buffer_get(vm, (void *)properties->queue_address,
+ &properties->ring_bo, expected_queue_size);
+ if (err)
+ goto out_err_unreserve;
+
+ /* only compute queue requires EOP buffer and CWSR area */
+ if (properties->type != KFD_QUEUE_TYPE_COMPUTE)
+ goto out_unreserve;
+
+ /* EOP buffer is not required for all ASICs */
+ if (properties->eop_ring_buffer_address) {
+ if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) {
+ pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n",
+ properties->eop_ring_buffer_size,
+ topo_dev->node_props.eop_buffer_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+ err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address,
+ &properties->eop_buf_bo,
+ properties->eop_ring_buffer_size);
+ if (err)
+ goto out_err_unreserve;
+ }
+
+ if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) {
+ pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n",
+ properties->ctl_stack_size,
+ topo_dev->node_props.ctl_stack_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+
+ if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) {
+ pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n",
+ properties->ctx_save_restore_area_size,
+ topo_dev->node_props.cwsr_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+
+ total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
+ * NUM_XCC(pdd->dev->xcc_mask);
+ total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
+
+ err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address,
+ &properties->cwsr_bo, total_cwsr_size);
+ if (!err)
+ goto out_unreserve;
+
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address,
+ total_cwsr_size);
+ if (err)
+ goto out_err_release;
+
+ return 0;
+
+out_unreserve:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+
+out_err_unreserve:
+ amdgpu_bo_unreserve(vm->root.bo);
+out_err_release:
+ /* FIXME: make a _locked version of this that can be called before
+ * dropping the VM reservation.
+ */
+ kfd_queue_unref_bo_vas(pdd, properties);
+ kfd_queue_release_buffers(pdd, properties);
+ return err;
+}
+
+int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
+{
+ struct kfd_topology_device *topo_dev;
+ u32 total_cwsr_size;
+
+ kfd_queue_buffer_put(&properties->wptr_bo);
+ kfd_queue_buffer_put(&properties->rptr_bo);
+ kfd_queue_buffer_put(&properties->ring_bo);
+ kfd_queue_buffer_put(&properties->eop_buf_bo);
+ kfd_queue_buffer_put(&properties->cwsr_bo);
+
+ topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+ if (!topo_dev)
+ return -EINVAL;
+ total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
+ * NUM_XCC(pdd->dev->xcc_mask);
+ total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
+
+ kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size);
+ return 0;
+}
+
+void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo)
+{
+ if (*bo) {
+ struct amdgpu_bo_va *bo_va;
+
+ bo_va = amdgpu_vm_bo_find(vm, *bo);
+ if (bo_va && bo_va->queue_refcount)
+ bo_va->queue_refcount--;
+ }
+}
+
+int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
+ struct queue_properties *properties)
+{
+ struct amdgpu_vm *vm;
+ int err;
+
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ kfd_queue_unref_bo_va(vm, &properties->wptr_bo);
+ kfd_queue_unref_bo_va(vm, &properties->rptr_bo);
+ kfd_queue_unref_bo_va(vm, &properties->ring_bo);
+ kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo);
+ kfd_queue_unref_bo_va(vm, &properties->cwsr_bo);
+
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+}
+
+#define SGPR_SIZE_PER_CU 0x4000
+#define LDS_SIZE_PER_CU 0x10000
+#define HWREG_SIZE_PER_CU 0x1000
+#define DEBUGGER_BYTES_ALIGN 64
+#define DEBUGGER_BYTES_PER_WAVE 32
+
+static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
+{
+ u32 vgpr_size = 0x40000;
+
+ if (gfxv == 90402 || /* GFX_VERSION_AQUA_VANJARAM */
+ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */
+ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */
+ gfxv == 90500)
+ vgpr_size = 0x80000;
+ else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */
+ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */
+ gfxv == 120000 || /* GFX_VERSION_GFX1200 */
+ gfxv == 120001) /* GFX_VERSION_GFX1201 */
+ vgpr_size = 0x60000;
+
+ return vgpr_size;
+}
+
+#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \
+ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
+ (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
+ HWREG_SIZE_PER_CU)
+
+#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
+ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
+
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
+{
+ struct kfd_node_properties *props = &dev->node_props;
+ u32 gfxv = props->gfx_target_version;
+ u32 ctl_stack_size;
+ u32 wg_data_size;
+ u32 wave_num;
+ u32 cu_num;
+
+ if (gfxv < 80001) /* GFX_VERSION_CARRIZO */
+ return;
+
+ cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
+ wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */
+ min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
+ : cu_num * 32;
+
+ wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
+ ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
+ ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+ PAGE_SIZE);
+
+ if ((gfxv / 10000 * 10000) == 100000) {
+ /* HW design limits control stack size to 0x7000.
+ * This is insufficient for theoretical PM4 cases
+ * but sufficient for AQL, limited by SPI events.
+ */
+ ctl_stack_size = min(ctl_stack_size, 0x7000);
+ }
+
+ props->ctl_stack_size = ctl_stack_size;
+ props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+ props->cwsr_size = ctl_stack_size + wg_data_size;
+
+ if (gfxv == 80002) /* GFX_VERSION_TONGA */
+ props->eop_buffer_size = 0x8000;
+ else if (gfxv == 90402) /* GFX_VERSION_AQUA_VANJARAM */
+ props->eop_buffer_size = 4096;
+ else if (gfxv >= 80000)
+ props->eop_buffer_size = 4096;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index d9953c2b2661..a499449fcb06 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -29,6 +29,7 @@
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
+#include "amdgpu_reset.h"
struct kfd_smi_client {
struct list_head list;
@@ -43,7 +44,7 @@ struct kfd_smi_client {
bool suser;
};
-#define MAX_KFIFO_SIZE 1024
+#define KFD_MAX_KFIFO_SIZE 8192
static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
@@ -85,7 +86,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
- size = min_t(size_t, size, MAX_KFIFO_SIZE);
+ size = min_t(size_t, size, KFD_MAX_KFIFO_SIZE);
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -162,10 +163,9 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
unsigned int event)
{
- uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
uint64_t events = READ_ONCE(client->events);
- if (pid && client->pid != pid && !(client->suser && (events & all)))
+ if (pid && client->pid != pid && !client->suser)
return false;
return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
@@ -215,9 +215,11 @@ static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev,
add_event_to_kfifo(pid, dev, event, fifo_in, len);
}
-void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset)
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context)
{
unsigned int event;
+ char reset_cause[64];
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
@@ -225,29 +227,37 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset)
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
- kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
+
+ memset(reset_cause, 0, sizeof(reset_cause));
+
+ if (reset_context)
+ amdgpu_reset_get_desc(reset_context, reset_cause,
+ sizeof(reset_cause));
+
+ kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
+ dev->reset_seq_num, reset_cause));
}
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask)
{
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
throttle_bitmask,
- amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
+ amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
}
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
{
- struct amdgpu_task_info task_info;
-
- memset(&task_info, 0, sizeof(struct amdgpu_task_info));
- amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
- /* Report VM faults from user applications, not retry from kernel */
- if (!task_info.pid)
- return;
-
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
- task_info.pid, task_info.task_name);
+ struct amdgpu_task_info *task_info;
+
+ task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid);
+ if (task_info) {
+ /* Report VM faults from user applications, not retry from kernel */
+ if (task_info->task.pid)
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
+ task_info->task.pid, task_info->task.comm));
+ amdgpu_vm_put_task_info(task_info);
+ }
}
void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
@@ -255,16 +265,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
ktime_t ts)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
- "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
- address, node->id, write_fault ? 'W' : 'R');
+ KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
+ address, node->id, write_fault ? 'W' : 'R'));
}
void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
unsigned long address, bool migration)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
- "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
- pid, address, node->id, migration ? 'M' : 'U');
+ KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
+ pid, address, node->id, migration ? 'M' : 'U'));
}
void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
@@ -274,34 +284,35 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
- "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_START(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, prefetch_loc, preferred_loc, trigger);
+ from, to, prefetch_loc, preferred_loc, trigger));
}
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger)
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
- "%lld -%d @%lx(%lx) %x->%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_END(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, trigger);
+ from, to, trigger, error_code));
}
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
- "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
- node->id, trigger);
+ KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
+ node->id, trigger));
}
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
- node->id);
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
+ node->id, 0));
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
@@ -318,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x %c\n", ktime_get_boottime_ns(),
- p->lead_thread->pid, pdd->dev->id, 'R');
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
+ p->lead_thread->pid, pdd->dev->id, 'R'));
}
kfd_unref_process(p);
}
@@ -329,8 +340,29 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
- "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
- pid, address, last - address + 1, node->id, trigger);
+ KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
+ pid, address, last - address + 1, node->id, trigger));
+}
+
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
+{
+ struct amdgpu_task_info *task_info;
+ struct amdgpu_vm *avm;
+
+ if (!pdd->drm_priv)
+ return;
+
+ avm = drm_priv_to_vm(pdd->drm_priv);
+ task_info = amdgpu_vm_get_task_info_vm(avm);
+
+ if (task_info) {
+ kfd_smi_event_add(0, pdd->dev,
+ start ? KFD_SMI_EVENT_PROCESS_START :
+ KFD_SMI_EVENT_PROCESS_END,
+ KFD_EVENT_FMT_PROCESS(task_info->task.pid,
+ task_info->task.comm));
+ amdgpu_vm_put_task_info(task_info);
+ }
}
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
@@ -343,7 +375,7 @@ int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
return -ENOMEM;
INIT_LIST_HEAD(&client->list);
- ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
+ ret = kfifo_alloc(&client->fifo, KFD_MAX_KFIFO_SIZE, GFP_KERNEL);
if (ret) {
kfree(client);
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index fa95c2dfd587..bb4d72b57387 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -24,11 +24,14 @@
#ifndef KFD_SMI_EVENTS_H_INCLUDED
#define KFD_SMI_EVENTS_H_INCLUDED
+struct amdgpu_reset_context;
+
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask);
-void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset);
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context);
void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
unsigned long address, bool write_fault,
ktime_t ts);
@@ -41,7 +44,8 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger);
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code);
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
@@ -49,4 +53,5 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
unsigned long address, unsigned long last,
uint32_t trigger);
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index bb16b795d1bc..9d72411c3379 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -198,6 +198,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
addr[i] >> PAGE_SHIFT, page_to_pfn(page));
}
+
return 0;
}
@@ -231,7 +232,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
return r;
}
-void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages)
{
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -249,7 +250,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
}
}
-void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
+void svm_range_dma_unmap(struct svm_range *prange)
{
struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
@@ -270,10 +271,8 @@ void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
continue;
}
dev = &pdd->dev->adev->pdev->dev;
- if (unmap_dma)
- svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
- kvfree(dma_addr);
- prange->dma_addr[gpuidx] = NULL;
+
+ svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages);
}
}
@@ -281,30 +280,42 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
{
uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+ uint32_t gpuidx;
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
prange->start, prange->last);
svm_range_vram_node_free(prange);
- svm_range_free_dma_mappings(prange, do_unmap);
+ if (do_unmap)
+ svm_range_dma_unmap(prange);
if (do_unmap && !p->xnack_enabled) {
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
}
+
+ /* free dma_addr array for each gpu */
+ for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+ if (prange->dma_addr[gpuidx]) {
+ kvfree(prange->dma_addr[gpuidx]);
+ prange->dma_addr[gpuidx] = NULL;
+ }
+ }
+
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
kfree(prange);
}
static void
-svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
- uint8_t *granularity, uint32_t *flags)
+svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location,
+ int32_t *prefetch_loc, uint8_t *granularity,
+ uint32_t *flags)
{
*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
- *granularity = 9;
+ *granularity = svms->default_granularity;
*flags =
KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
}
@@ -340,6 +351,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
INIT_LIST_HEAD(&prange->child_list);
atomic_set(&prange->invalid, 0);
prange->validate_timestamp = 0;
+ prange->vram_pages = 0;
mutex_init(&prange->migrate_mutex);
mutex_init(&prange->lock);
@@ -347,7 +359,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
- svm_range_set_default_attributes(&prange->preferred_loc,
+ svm_range_set_default_attributes(svms, &prange->preferred_loc,
&prange->prefetch_loc,
&prange->granularity, &prange->flags);
@@ -386,19 +398,37 @@ static void svm_range_bo_release(struct kref *kref)
prange->start, prange->last);
mutex_lock(&prange->lock);
prange->svm_bo = NULL;
+ /* prange should not hold vram page now */
+ WARN_ONCE(prange->actual_loc, "prange should not hold vram page");
mutex_unlock(&prange->lock);
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
- if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
- /* We're not in the eviction worker.
- * Signal the fence and synchronize with any
- * pending eviction work.
+
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ struct kfd_process_device *pdd;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+
+ mm = svm_bo->eviction_fence->mm;
+ /*
+ * The forked child process takes svm_bo device pages ref, svm_bo could be
+ * released after parent process is gone.
*/
- dma_fence_signal(&svm_bo->eviction_fence->base);
- cancel_work_sync(&svm_bo->eviction_work);
+ p = kfd_lookup_process_by_mm(mm);
+ if (p) {
+ pdd = kfd_get_process_device_data(svm_bo->node, p);
+ if (pdd)
+ atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+ kfd_unref_process(p);
+ }
+ mmput(mm);
}
+
+ if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
+ /* We're not in the eviction worker. Signal the fence. */
+ dma_fence_signal(&svm_bo->eviction_fence->base);
dma_fence_put(&svm_bo->eviction_fence->base);
amdgpu_bo_unref(&svm_bo->bo);
kfree(svm_bo);
@@ -495,11 +525,11 @@ svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange)
/* We need a new svm_bo. Spin-loop to wait for concurrent
* svm_range_bo_release to finish removing this range from
- * its range list. After this, it is safe to reuse the
- * svm_bo pointer and svm_bo_list head.
+ * its range list and set prange->svm_bo to null. After this,
+ * it is safe to reuse the svm_bo pointer and svm_bo_list head.
*/
- while (!list_empty_careful(&prange->svm_bo_list))
- ;
+ while (!list_empty_careful(&prange->svm_bo_list) || prange->svm_bo)
+ cond_resched();
return false;
}
@@ -523,6 +553,7 @@ int
svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear)
{
+ struct kfd_process_device *pdd;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
@@ -532,7 +563,8 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
+ pr_debug("process pid: %d svms 0x%p [0x%lx 0x%lx]\n",
+ p->lead_thread->pid, prange->svms,
prange->start, prange->last);
if (svm_range_validate_svm_bo(node, prange))
@@ -614,6 +646,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
list_add(&prange->svm_bo_list, &svm_bo->range_list);
spin_unlock(&svm_bo->list_lock);
+ pdd = svm_range_get_pdd_by_node(prange, node);
+ if (pdd)
+ atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
return 0;
reserve_bo_failed:
@@ -628,8 +664,15 @@ create_bo_failed:
void svm_range_vram_node_free(struct svm_range *prange)
{
- svm_range_bo_unref(prange->svm_bo);
- prange->ttm_res = NULL;
+ /* serialize prange->svm_bo unref */
+ mutex_lock(&prange->lock);
+ /* prange->svm_bo has not been unref */
+ if (prange->ttm_res) {
+ prange->ttm_res = NULL;
+ mutex_unlock(&prange->lock);
+ svm_range_bo_unref(prange->svm_bo);
+ } else
+ mutex_unlock(&prange->lock);
}
struct kfd_node *
@@ -760,7 +803,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
prange->flags &= ~attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
- prange->granularity = attrs[i].value;
+ prange->granularity = min_t(uint32_t, attrs[i].value, 0x3F);
break;
default:
WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
@@ -820,7 +863,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
}
}
- return !prange->is_error_flag;
+ return true;
}
/**
@@ -862,14 +905,29 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
static void *
svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
- uint64_t offset)
+ uint64_t offset, uint64_t *vram_pages)
{
+ unsigned char *src = (unsigned char *)psrc + offset;
unsigned char *dst;
+ uint64_t i;
dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
if (!dst)
return NULL;
- memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+ if (!vram_pages) {
+ memcpy(dst, src, num_elements * size);
+ return (void *)dst;
+ }
+
+ *vram_pages = 0;
+ for (i = 0; i < num_elements; i++) {
+ dma_addr_t *temp;
+ temp = (dma_addr_t *)dst + i;
+ *temp = *((dma_addr_t *)src + i);
+ if (*temp&SVM_RANGE_VRAM_DOMAIN)
+ (*vram_pages)++;
+ }
return (void *)dst;
}
@@ -883,7 +941,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
if (!src->dma_addr[i])
continue;
dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
- sizeof(*src->dma_addr[i]), src->npages, 0);
+ sizeof(*src->dma_addr[i]), src->npages, 0, NULL);
if (!dst->dma_addr[i])
return -ENOMEM;
}
@@ -894,7 +952,7 @@ svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
- uint64_t new_start, uint64_t new_n)
+ uint64_t new_start, uint64_t new_n, uint64_t *new_vram_pages)
{
unsigned char *new, *old, *pold;
uint64_t d;
@@ -906,11 +964,12 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
return 0;
d = (new_start - old_start) * size;
- new = svm_range_copy_array(pold, size, new_n, d);
+ /* get dma addr array for new range and calculte its vram page number */
+ new = svm_range_copy_array(pold, size, new_n, d, new_vram_pages);
if (!new)
return -ENOMEM;
d = (new_start == old_start) ? new_n * size : 0;
- old = svm_range_copy_array(pold, size, old_n, d);
+ old = svm_range_copy_array(pold, size, old_n, d, NULL);
if (!old) {
kvfree(new);
return -ENOMEM;
@@ -932,10 +991,13 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old,
for (i = 0; i < MAX_GPU_INSTANCE; i++) {
r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
sizeof(*old->dma_addr[i]), old->start,
- npages, new->start, new->npages);
+ npages, new->start, new->npages,
+ old->actual_loc ? &new->vram_pages : NULL);
if (r)
return r;
}
+ if (old->actual_loc)
+ old->vram_pages -= new->vram_pages;
return 0;
}
@@ -1017,6 +1079,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
new->mapped_to_gpu = old->mapped_to_gpu;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+ atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
return 0;
}
@@ -1078,101 +1141,46 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
}
static int
-svm_range_split_tail(struct svm_range *prange,
- uint64_t new_last, struct list_head *insert_list)
+svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
+ struct list_head *insert_list, struct list_head *remap_list)
{
- struct svm_range *tail;
+ struct svm_range *tail = NULL;
int r = svm_range_split(prange, prange->start, new_last, &tail);
- if (!r)
+ if (!r) {
list_add(&tail->list, insert_list);
+ if (!IS_ALIGNED(new_last + 1, 1UL << prange->granularity))
+ list_add(&tail->update_list, remap_list);
+ }
return r;
}
static int
-svm_range_split_head(struct svm_range *prange,
- uint64_t new_start, struct list_head *insert_list)
+svm_range_split_head(struct svm_range *prange, uint64_t new_start,
+ struct list_head *insert_list, struct list_head *remap_list)
{
- struct svm_range *head;
+ struct svm_range *head = NULL;
int r = svm_range_split(prange, new_start, prange->last, &head);
- if (!r)
+ if (!r) {
list_add(&head->list, insert_list);
+ if (!IS_ALIGNED(new_start, 1UL << prange->granularity))
+ list_add(&head->update_list, remap_list);
+ }
return r;
}
static void
-svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
- struct svm_range *pchild, enum svm_work_list_ops op)
+svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op)
{
pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
pchild, pchild->start, pchild->last, prange, op);
- pchild->work_item.mm = mm;
+ pchild->work_item.mm = NULL;
pchild->work_item.op = op;
list_add_tail(&pchild->child_list, &prange->child_list);
}
-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
- unsigned long addr, struct svm_range *parent,
- struct svm_range *prange)
-{
- struct svm_range *head, *tail;
- unsigned long start, last, size;
- int r;
-
- /* Align splited range start and size to granularity size, then a single
- * PTE will be used for whole range, this reduces the number of PTE
- * updated and the L1 TLB space used for translation.
- */
- size = 1UL << prange->granularity;
- start = ALIGN_DOWN(addr, size);
- last = ALIGN(addr + 1, size) - 1;
-
- pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
- prange->svms, prange->start, prange->last, start, last, size);
-
- if (start > prange->start) {
- r = svm_range_split(prange, start, prange->last, &head);
- if (r)
- return r;
- svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
- }
-
- if (last < prange->last) {
- r = svm_range_split(prange, prange->start, last, &tail);
- if (r)
- return r;
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
- }
-
- /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
- if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
- prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
- pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
- prange, prange->start, prange->last,
- SVM_OP_ADD_RANGE_AND_MAP);
- }
- return 0;
-}
static bool
svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
{
@@ -1181,22 +1189,23 @@ svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
}
static uint64_t
-svm_range_get_pte_flags(struct kfd_node *node,
+svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm,
struct svm_range *prange, int domain)
{
struct kfd_node *bo_node;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
+ uint32_t gc_ip_version = KFD_GC_VERSION(node);
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
- bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
- bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
+ bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
+ bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
unsigned int mtype_local;
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_node = prange->svm_bo->node;
- switch (node->adev->ip_versions[GC_HWIP][0]) {
+ switch (gc_ip_version) {
case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_node == node) {
@@ -1232,22 +1241,31 @@ svm_range_get_pte_flags(struct kfd_node *node,
}
break;
case IP_VERSION(9, 4, 3):
- mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
- (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW);
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ if (ext_coherent)
+ mtype_local = AMDGPU_VM_MTYPE_CC;
+ else
+ mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+ amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
snoop = true;
- if (uncached) {
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
+ if (domain == SVM_RANGE_VRAM_DOMAIN) {
/* local HBM region close to partition */
if (bo_node->adev == node->adev &&
(!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
mapping_flags |= mtype_local;
- /* local HBM region far from partition or remote XGMI GPU */
- else if (svm_nodes_in_same_hive(bo_node, node))
+ /* local HBM region far from partition or remote XGMI GPU
+ * with regular system scope coherence
+ */
+ else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent)
mapping_flags |= AMDGPU_VM_MTYPE_NC;
- /* PCIe P2P */
- else
+ /* PCIe P2P on GPUs pre-9.5.0 */
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) &&
+ !svm_nodes_in_same_hive(bo_node, node))
mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ /* Other remote memory */
+ else
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the APU */
} else if (node->adev->flags & AMD_IS_APU) {
/* On NUMA systems, locality is determined per-page
@@ -1256,29 +1274,37 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (num_possible_nodes() <= 1)
mapping_flags |= mtype_local;
else
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
}
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
- mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
-
- if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
- mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
pte_flags = AMDGPU_PTE_VALID;
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+ if (gc_ip_version >= IP_VERSION(12, 0, 0))
+ pte_flags |= AMDGPU_PTE_IS_PTE;
- pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
+ amdgpu_gmc_get_vm_pte(node->adev, vm, NULL, mapping_flags, &pte_flags);
+ pte_flags |= AMDGPU_PTE_READABLE;
+ if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+ pte_flags |= AMDGPU_PTE_WRITEABLE;
return pte_flags;
}
@@ -1291,7 +1317,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("[0x%llx 0x%llx]\n", start, last);
- return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
+ return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, start,
last, init_pte_value, 0, 0, NULL, NULL,
fence);
}
@@ -1385,7 +1411,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
last_start, prange->start + i, last_domain ? "GPU" : "CPU");
- pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
+ pte_flags = svm_range_get_pte_flags(pdd->dev, vm, prange, last_domain);
if (readonly)
pte_flags &= ~AMDGPU_PTE_WRITEABLE;
@@ -1398,8 +1424,8 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
* different memory partition based on fpfn/lpfn, we should use
* same vm_manager.vram_base_offset regardless memory partition.
*/
- r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
- last_start, prange->start + i,
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, true,
+ NULL, last_start, prange->start + i,
pte_flags,
(last_start - prange->start) << PAGE_SHIFT,
bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
@@ -1501,7 +1527,7 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
uint32_t gpuidx;
int r;
- drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+ drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0, 0);
drm_exec_until_all_locked(&ctx->exec) {
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
@@ -1529,9 +1555,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
goto unreserve_out;
}
- r = amdgpu_vm_validate_pt_bos(pdd->dev->adev,
- drm_priv_to_vm(pdd->drm_priv),
- svm_range_bo_validate, NULL);
+ r = amdgpu_vm_validate(pdd->dev->adev,
+ drm_priv_to_vm(pdd->drm_priv), NULL,
+ svm_range_bo_validate, NULL);
if (r) {
pr_debug("failed %d validate pt bos\n", r);
goto unreserve_out;
@@ -1586,6 +1612,7 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
* 5. Release page table (and SVM BO) reservation
*/
static int svm_range_validate_and_map(struct mm_struct *mm,
+ unsigned long map_start, unsigned long map_last,
struct svm_range *prange, int32_t gpuidx,
bool intr, bool wait, bool flush_tlb)
{
@@ -1625,18 +1652,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
if (test_bit(gpuidx, prange->bitmap_access))
bitmap_set(ctx->bitmap, gpuidx, 1);
}
+
+ /*
+ * If prange is already mapped or with always mapped flag,
+ * update mapping on GPUs with ACCESS attribute
+ */
+ if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+ if (prange->mapped_to_gpu ||
+ prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
+ bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ }
} else {
bitmap_or(ctx->bitmap, prange->bitmap_access,
prange->bitmap_aip, MAX_GPU_INSTANCE);
}
if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
- bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
- if (!prange->mapped_to_gpu ||
- bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
- r = 0;
- goto free_ctx;
- }
+ r = 0;
+ goto free_ctx;
}
if (prange->actual_loc && !prange->ttm_res) {
@@ -1648,7 +1681,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
goto free_ctx;
}
- svm_range_reserve_bos(ctx, intr);
+ r = svm_range_reserve_bos(ctx, intr);
+ if (r)
+ goto free_ctx;
p = container_of(prange->svms, struct kfd_process, svms);
owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
@@ -1660,75 +1695,102 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
}
- start = prange->start << PAGE_SHIFT;
- end = (prange->last + 1) << PAGE_SHIFT;
- for (addr = start; addr < end && !r; ) {
- struct hmm_range *hmm_range;
+ start = map_start << PAGE_SHIFT;
+ end = (map_last + 1) << PAGE_SHIFT;
+ for (addr = start; !r && addr < end; ) {
+ struct hmm_range *hmm_range = NULL;
+ unsigned long map_start_vma;
+ unsigned long map_last_vma;
struct vm_area_struct *vma;
- unsigned long next;
+ unsigned long next = 0;
unsigned long offset;
unsigned long npages;
bool readonly;
vma = vma_lookup(mm, addr);
- if (!vma) {
+ if (vma) {
+ readonly = !(vma->vm_flags & VM_WRITE);
+
+ next = min(vma->vm_end, end);
+ npages = (next - addr) >> PAGE_SHIFT;
+ /* HMM requires at least READ permissions. If provided with PROT_NONE,
+ * unmap the memory. If it's not already mapped, this is a no-op
+ * If PROT_WRITE is provided without READ, warn first then unmap
+ */
+ if (!(vma->vm_flags & VM_READ)) {
+ unsigned long e, s;
+
+ svm_range_lock(prange);
+ if (vma->vm_flags & VM_WRITE)
+ pr_debug("VM_WRITE without VM_READ is not supported");
+ s = max(start, prange->start);
+ e = min(end, prange->last);
+ if (e >= s)
+ r = svm_range_unmap_from_gpus(prange, s, e,
+ KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU);
+ svm_range_unlock(prange);
+ /* If unmap returns non-zero, we'll bail on the next for loop
+ * iteration, so just leave r and continue
+ */
+ addr = next;
+ continue;
+ }
+
+ WRITE_ONCE(p->svms.faulting_task, current);
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
+ readonly, owner,
+ &hmm_range);
+ WRITE_ONCE(p->svms.faulting_task, NULL);
+ if (r)
+ pr_debug("failed %d to get svm range pages\n", r);
+ } else {
r = -EFAULT;
- goto unreserve_out;
- }
- readonly = !(vma->vm_flags & VM_WRITE);
-
- next = min(vma->vm_end, end);
- npages = (next - addr) >> PAGE_SHIFT;
- WRITE_ONCE(p->svms.faulting_task, current);
- r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
- readonly, owner, NULL,
- &hmm_range);
- WRITE_ONCE(p->svms.faulting_task, NULL);
- if (r) {
- pr_debug("failed %d to get svm range pages\n", r);
- if (r == -EBUSY)
- r = -EAGAIN;
- goto unreserve_out;
}
- offset = (addr - start) >> PAGE_SHIFT;
- r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
- hmm_range->hmm_pfns);
- if (r) {
- pr_debug("failed %d to dma map range\n", r);
- goto unreserve_out;
+ if (!r) {
+ offset = (addr >> PAGE_SHIFT) - prange->start;
+ r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
+ hmm_range->hmm_pfns);
+ if (r)
+ pr_debug("failed %d to dma map range\n", r);
}
svm_range_lock(prange);
- if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+
+ /* Free backing memory of hmm_range if it was initialized
+ * Overrride return value to TRY AGAIN only if prior returns
+ * were successful
+ */
+ if (hmm_range && amdgpu_hmm_range_get_pages_done(hmm_range) && !r) {
pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN;
- goto unlock_out;
}
- if (!list_empty(&prange->child_list)) {
+
+ if (!r && !list_empty(&prange->child_list)) {
pr_debug("range split by unmap in parallel, validate again\n");
r = -EAGAIN;
- goto unlock_out;
}
- r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx->bitmap, wait, flush_tlb);
+ if (!r) {
+ map_start_vma = max(map_start, prange->start + offset);
+ map_last_vma = min(map_last, prange->start + offset + npages - 1);
+ if (map_start_vma <= map_last_vma) {
+ offset = map_start_vma - prange->start;
+ npages = map_last_vma - map_start_vma + 1;
+ r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+ ctx->bitmap, wait, flush_tlb);
+ }
+ }
+
+ if (!r && next == end)
+ prange->mapped_to_gpu = true;
-unlock_out:
svm_range_unlock(prange);
addr = next;
}
- if (addr == end) {
- prange->validated_once = true;
- prange->mapped_to_gpu = true;
- }
-
-unreserve_out:
svm_range_unreserve_bos(ctx);
-
- prange->is_error_flag = !!r;
if (!r)
prange->validate_timestamp = ktime_get_boottime();
@@ -1811,8 +1873,8 @@ static void svm_range_restore_work(struct work_struct *work)
*/
mutex_lock(&prange->migrate_mutex);
- r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- false, true, false);
+ r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+ MAX_GPU_INSTANCE, false, true, false);
if (r)
pr_debug("failed %d to map 0x%lx to gpus\n", r,
prange->start);
@@ -1849,7 +1911,7 @@ out_reschedule:
/* If validation failed, reschedule another attempt */
if (evicted_ranges) {
pr_debug("reschedule to restore svm range\n");
- schedule_delayed_work(&svms->restore_work,
+ queue_delayed_work(system_freezable_wq, &svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm);
@@ -1925,7 +1987,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
pr_debug("failed to quiesce KFD\n");
pr_debug("schedule to restore svm %p ranges\n", svms);
- schedule_delayed_work(&svms->restore_work,
+ queue_delayed_work(system_freezable_wq, &svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
unsigned long s, l;
@@ -1980,8 +2042,10 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
new->mapped_to_gpu = old->mapped_to_gpu;
+ new->vram_pages = old->vram_pages;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+ atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
return new;
}
@@ -2045,6 +2109,7 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
* @update_list: output, the ranges need validate and update GPU mapping
* @insert_list: output, the ranges need insert to svms
* @remove_list: output, the ranges are replaced and need remove from svms
+ * @remap_list: output, remap unaligned svm ranges
*
* Check if the virtual address range has overlap with any existing ranges,
* split partly overlapping ranges and add new ranges in the gaps. All changes
@@ -2068,7 +2133,7 @@ static int
svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
struct list_head *update_list, struct list_head *insert_list,
- struct list_head *remove_list)
+ struct list_head *remove_list, struct list_head *remap_list)
{
unsigned long last = start + size - 1UL;
struct svm_range_list *svms = &p->svms;
@@ -2084,6 +2149,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
INIT_LIST_HEAD(insert_list);
INIT_LIST_HEAD(remove_list);
INIT_LIST_HEAD(&new_list);
+ INIT_LIST_HEAD(remap_list);
node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
@@ -2097,7 +2163,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
next = interval_tree_iter_next(node, start, last);
next_start = min(node->last, last) + 1;
- if (svm_range_is_same_attrs(p, prange, nattr, attrs)) {
+ if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
+ prange->mapped_to_gpu) {
/* nothing to do */
} else if (node->start < start || node->last > last) {
/* node intersects the update range and its attributes
@@ -2119,14 +2186,14 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
if (node->start < start) {
pr_debug("change old range start\n");
r = svm_range_split_head(prange, start,
- insert_list);
+ insert_list, remap_list);
if (r)
goto out;
}
if (node->last > last) {
pr_debug("change old range last\n");
r = svm_range_split_tail(prange, last,
- insert_list);
+ insert_list, remap_list);
if (r)
goto out;
}
@@ -2247,16 +2314,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
- int drain;
uint32_t i;
p = container_of(svms, struct kfd_process, svms);
-restart:
- drain = atomic_read(&svms->drain_pagefaults);
- if (!drain)
- return;
-
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
pdd = p->pdds[i];
if (!pdd)
@@ -2276,8 +2337,6 @@ restart:
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
- if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
- goto restart;
}
static void svm_range_deferred_list_work(struct work_struct *work)
@@ -2299,17 +2358,8 @@ static void svm_range_deferred_list_work(struct work_struct *work)
prange->start, prange->last, prange->work_item.op);
mm = prange->work_item.mm;
-retry:
- mmap_write_lock(mm);
- /* Checking for the need to drain retry faults must be inside
- * mmap write lock to serialize with munmap notifiers.
- */
- if (unlikely(atomic_read(&svms->drain_pagefaults))) {
- mmap_write_unlock(mm);
- svm_range_drain_retry_fault(svms);
- goto retry;
- }
+ mmap_write_lock(mm);
/* Remove from deferred_list must be inside mmap write lock, for
* two race cases:
@@ -2341,8 +2391,10 @@ retry:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
- /* Pairs with mmget in svm_range_add_list_work */
- mmput(mm);
+ /* Pairs with mmget in svm_range_add_list_work. If dropping the
+ * last mm refcount, schedule release work to avoid circular locking
+ */
+ mmput_async(mm);
spin_lock(&svms->deferred_list_lock);
}
@@ -2363,15 +2415,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op != SVM_OP_UNMAP_RANGE)
prange->work_item.op = op;
} else {
- prange->work_item.op = op;
-
- /* Pairs with mmput in deferred_list_work */
- mmget(mm);
- prange->work_item.mm = mm;
- list_add_tail(&prange->deferred_list,
- &prange->svms->deferred_range_list);
- pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
- prange, prange->start, prange->last, op);
+ /* Pairs with mmput in deferred_list_work.
+ * If process is exiting and mm is gone, don't update mmu notifier.
+ */
+ if (mmget_not_zero(mm)) {
+ prange->work_item.mm = mm;
+ prange->work_item.op = op;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
}
spin_unlock(&svms->deferred_list_lock);
}
@@ -2385,8 +2439,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms)
}
static void
-svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
- struct svm_range *prange, unsigned long start,
+svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start,
unsigned long last)
{
struct svm_range *head;
@@ -2407,12 +2460,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
svm_range_split(tail, last + 1, tail->last, &head);
if (head != prange && tail != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE);
} else if (tail != prange) {
- svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE);
} else if (head != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
} else if (parent != prange) {
prange->work_item.op = SVM_OP_UNMAP_RANGE;
}
@@ -2428,6 +2481,17 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
struct kfd_process *p;
unsigned long s, l;
bool unmap_parent;
+ uint32_t i;
+
+ if (atomic_read(&prange->queue_refcount)) {
+ int r;
+
+ pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n",
+ prange->start << PAGE_SHIFT);
+ r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
+ if (r)
+ pr_debug("failed %d to quiesce KFD queues\n", r);
+ }
p = kfd_lookup_process_by_mm(mm);
if (!p)
@@ -2437,11 +2501,38 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
prange, prange->start, prange->last, start, last);
- /* Make sure pending page faults are drained in the deferred worker
- * before the range is freed to avoid straggler interrupts on
- * unmapped memory causing "phantom faults".
+ /* calculate time stamps that are used to decide which page faults need be
+ * dropped or handled before unmap pages from gpu vm
*/
- atomic_inc(&svms->drain_pagefaults);
+ for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *adev;
+ struct amdgpu_ih_ring *ih;
+ uint32_t checkpoint_wptr;
+
+ pdd = p->pdds[i];
+ if (!pdd)
+ continue;
+
+ adev = pdd->dev->adev;
+
+ /* Check and drain ih1 ring if cam not available */
+ if (adev->irq.ih1.ring_size) {
+ ih = &adev->irq.ih1;
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ if (ih->rptr != checkpoint_wptr) {
+ svms->checkpoint_ts[i] =
+ amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+ continue;
+ }
+ }
+
+ /* check if dev->irq.ih_soft is not empty */
+ ih = &adev->irq.ih_soft;
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ if (ih->rptr != checkpoint_wptr)
+ svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+ }
unmap_parent = start <= prange->start && last >= prange->last;
@@ -2451,14 +2542,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l, trigger);
- svm_range_unmap_split(mm, prange, pchild, start, last);
+ svm_range_unmap_split(prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
svm_range_unmap_from_gpus(prange, s, l, trigger);
- svm_range_unmap_split(mm, prange, prange, start, last);
+ svm_range_unmap_split(prange, prange, start, last);
if (unmap_parent)
svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
@@ -2501,8 +2592,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
if (range->event == MMU_NOTIFY_RELEASE)
return true;
- if (!mmget_not_zero(mni->mm))
- return true;
start = mni->interval_tree.start;
last = mni->interval_tree.last;
@@ -2529,7 +2618,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
}
svm_range_unlock(prange);
- mmput(mni->mm);
return true;
}
@@ -2616,7 +2704,7 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
}
- if (node->adev->gmc.is_app_apu)
+ if (node->adev->apu_prefer_gtt)
return 0;
if (prange->preferred_loc == gpuid ||
@@ -2653,6 +2741,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
{
struct vm_area_struct *vma;
struct interval_tree_node *node;
+ struct rb_node *rb_node;
unsigned long start_limit, end_limit;
vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
@@ -2664,24 +2753,24 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
*is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
- (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+ (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity));
end_limit = min(vma->vm_end >> PAGE_SHIFT,
- (unsigned long)ALIGN(addr + 1, 2UL << 8));
+ (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity));
+
/* First range that starts after the fault address */
node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
if (node) {
end_limit = min(end_limit, node->start);
/* Last range that ends before the fault address */
- node = container_of(rb_prev(&node->rb),
- struct interval_tree_node, rb);
+ rb_node = rb_prev(&node->rb);
} else {
/* Last range must end before addr because
* there was no range after addr
*/
- node = container_of(rb_last(&p->svms.objects.rb_root),
- struct interval_tree_node, rb);
+ rb_node = rb_last(&p->svms.objects.rb_root);
}
- if (node) {
+ if (rb_node) {
+ node = container_of(rb_node, struct interval_tree_node, rb);
if (node->last >= addr) {
WARN(1, "Overlap with prev node and page fault addr\n");
return -EFAULT;
@@ -2882,8 +2971,9 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint32_t vmid, uint32_t node_id,
- uint64_t addr, bool write_fault)
+ uint64_t addr, uint64_t ts, bool write_fault)
{
+ unsigned long start, last, size;
struct mm_struct *mm = NULL;
struct svm_range_list *svms;
struct svm_range *prange;
@@ -2891,7 +2981,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
ktime_t timestamp = ktime_get_boottime();
struct kfd_node *node;
int32_t best_loc;
- int32_t gpuidx = MAX_GPU_INSTANCE;
+ int32_t gpuid, gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
struct vm_area_struct *vma;
bool migration = false;
@@ -2902,7 +2992,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
return -EFAULT;
}
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p) {
pr_debug("kfd process not founded pasid 0x%x\n", pasid);
return 0;
@@ -2912,11 +3002,25 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
if (atomic_read(&svms->drain_pagefaults)) {
- pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr);
r = 0;
goto out;
}
+ node = kfd_node_by_irq_ids(adev, node_id, vmid);
+ if (!node) {
+ pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
+ vmid);
+ r = -EFAULT;
+ goto out;
+ }
+
+ if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
+ pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id);
+ r = -EFAULT;
+ goto out;
+ }
+
if (!p->xnack_enabled) {
pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
r = -EFAULT;
@@ -2933,16 +3037,26 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out;
}
- node = kfd_node_by_irq_ids(adev, node_id, vmid);
- if (!node) {
- pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
- vmid);
- r = -EFAULT;
- goto out;
- }
mmap_read_lock(mm);
retry_write_locked:
mutex_lock(&svms->lock);
+
+ /* check if this page fault time stamp is before svms->checkpoint_ts */
+ if (svms->checkpoint_ts[gpuidx] != 0) {
+ if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) {
+ pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ if (write_locked)
+ mmap_write_downgrade(mm);
+ r = -EAGAIN;
+ goto out_unlock_svms;
+ } else {
+ /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
+ * to zero to avoid following ts wrap around give wrong comparing
+ */
+ svms->checkpoint_ts[gpuidx] = 0;
+ }
+ }
+
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
@@ -3019,41 +3133,46 @@ retry_write_locked:
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
write_fault, timestamp);
- if (prange->actual_loc != best_loc) {
- migration = true;
+ /* Align migration range start and size to granularity size */
+ size = 1UL << prange->granularity;
+ start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
+ last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
+ if (prange->actual_loc != 0 || best_loc != 0) {
if (best_loc) {
- r = svm_migrate_to_vram(prange, best_loc, mm,
- KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
+ r = svm_migrate_to_vram(prange, best_loc, start, last,
+ mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr);
/* Fallback to system memory if migration to
* VRAM failed
*/
- if (prange->actual_loc)
- r = svm_migrate_vram_to_ram(prange, mm,
- KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
- NULL);
+ if (prange->actual_loc && prange->actual_loc != best_loc)
+ r = svm_migrate_vram_to_ram(prange, mm, start, last,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
else
r = 0;
}
} else {
- r = svm_migrate_vram_to_ram(prange, mm,
- KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
- NULL);
+ r = svm_migrate_vram_to_ram(prange, mm, start, last,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
}
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
- r, svms, prange->start, prange->last);
- goto out_unlock_range;
+ r, svms, start, last);
+ goto out_migrate_fail;
+ } else {
+ migration = true;
}
}
- r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+ r = svm_range_validate_and_map(mm, start, last, prange, gpuidx, false,
+ false, false);
if (r)
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
- r, svms, prange->start, prange->last);
+ r, svms, start, last);
+out_migrate_fail:
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
migration);
@@ -3063,7 +3182,8 @@ out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
- svm_range_count_fault(node, p, gpuidx);
+ if (r != -EAGAIN)
+ svm_range_count_fault(node, p, gpuidx);
mmput(mm);
out:
@@ -3140,7 +3260,8 @@ void svm_range_list_fini(struct kfd_process *p)
struct svm_range *prange;
struct svm_range *next;
- pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p\n", p->lead_thread->pid,
+ &p->svms);
cancel_delayed_work_sync(&p->svms.restore_work);
@@ -3150,8 +3271,9 @@ void svm_range_list_fini(struct kfd_process *p)
/*
* Ensure no retry fault comes in afterwards, as page fault handler will
* not find kfd process and take mm lock to recover fault.
+ * stop kfd page fault handing, then wait pending page faults got drained
*/
- atomic_inc(&p->svms.drain_pagefaults);
+ atomic_set(&p->svms.drain_pagefaults, 1);
svm_range_drain_retry_fault(&p->svms);
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
@@ -3162,7 +3284,8 @@ void svm_range_list_fini(struct kfd_process *p)
mutex_destroy(&p->svms.lock);
- pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p done\n",
+ p->lead_thread->pid, &p->svms);
}
int svm_range_list_init(struct kfd_process *p)
@@ -3185,6 +3308,12 @@ int svm_range_list_init(struct kfd_process *p)
if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
+ /* Value of default granularity cannot exceed 0x1B, the
+ * number of pages supported by a 4-level paging table
+ */
+ svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B);
+ pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity);
+
return 0;
}
@@ -3329,7 +3458,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
goto out;
}
- if (bo_node->adev->gmc.is_app_apu) {
+ if (bo_node->adev->apu_prefer_gtt) {
best_loc = 0;
goto out;
}
@@ -3398,32 +3527,39 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
*migrated = false;
best_loc = svm_range_best_prefetch_location(prange);
- if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
- best_loc == prange->actual_loc)
+ /* when best_loc is a gpu node and same as prange->actual_loc
+ * we still need do migration as prange->actual_loc !=0 does
+ * not mean all pages in prange are vram. hmm migrate will pick
+ * up right pages during migration.
+ */
+ if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
+ (best_loc == 0 && prange->actual_loc == 0))
return 0;
if (!best_loc) {
- r = svm_migrate_vram_to_ram(prange, mm,
+ r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
*migrated = !r;
return r;
}
- r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
+ r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
+ mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
- return r;
+ return 0;
}
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
{
- if (!fence)
- return -EINVAL;
-
- if (dma_fence_is_signaled(&fence->base))
- return 0;
-
- if (fence->svm_bo) {
+ /* Dereferencing fence->svm_bo is safe here because the fence hasn't
+ * signaled yet and we're under the protection of the fence->lock.
+ * After the fence is signaled in svm_range_bo_release, we cannot get
+ * here any more.
+ *
+ * Reference is dropped in svm_range_evict_svm_bo_worker.
+ */
+ if (svm_bo_ref_unless_zero(fence->svm_bo)) {
WRITE_ONCE(fence->svm_bo->evicting, 1);
schedule_work(&fence->svm_bo->eviction_work);
}
@@ -3438,8 +3574,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
int r = 0;
svm_bo = container_of(work, struct svm_range_bo, eviction_work);
- if (!svm_bo_ref_unless_zero(svm_bo))
- return; /* svm_bo was freed while eviction was pending */
if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
mm = svm_bo->eviction_fence->mm;
@@ -3464,7 +3598,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
+ /* migrate all vram pages in this prange to sys ram
+ * after that prange->actual_loc should be zero
+ */
r = svm_migrate_vram_to_ram(prange, mm,
+ prange->start, prange->last,
KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
} while (!r && prange->actual_loc && --retries);
@@ -3502,15 +3640,16 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
struct list_head update_list;
struct list_head insert_list;
struct list_head remove_list;
+ struct list_head remap_list;
struct svm_range_list *svms;
struct svm_range *prange;
struct svm_range *next;
bool update_mapping = false;
bool flush_tlb;
- int r = 0;
+ int r, ret = 0;
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
- p->pasid, &p->svms, start, start + size - 1, size);
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, size);
r = svm_range_check_attr(p, nattr, attrs);
if (r)
@@ -3533,7 +3672,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
/* Add new range and split existing ranges as needed */
r = svm_range_add(p, start, size, nattr, attrs, &update_list,
- &insert_list, &remove_list);
+ &insert_list, &remove_list, &remap_list);
if (r) {
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
@@ -3587,15 +3726,28 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
- r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
- true, true, flush_tlb);
+ r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+ MAX_GPU_INSTANCE, true, true, flush_tlb);
if (r)
pr_debug("failed %d to map svm range\n", r);
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
if (r)
- break;
+ ret = r;
+ }
+
+ list_for_each_entry(prange, &remap_list, update_list) {
+ pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
+ prange, prange->start, prange->last);
+ mutex_lock(&prange->migrate_mutex);
+ r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
+ MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
+ if (r)
+ pr_debug("failed %d on remap svm range\n", r);
+ mutex_unlock(&prange->migrate_mutex);
+ if (r)
+ ret = r;
}
dynamic_svm_range_dump(svms);
@@ -3605,10 +3757,10 @@ out_unlock_range:
out:
mutex_unlock(&process_info->lock);
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
- &p->svms, start, start + size - 1, r);
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] done, r=%d\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, r);
- return r;
+ return ret ? ret : r;
}
static int
@@ -3689,7 +3841,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
node = interval_tree_iter_first(&svms->objects, start, last);
if (!node) {
pr_debug("range attrs not found return default values\n");
- svm_range_set_default_attributes(&location, &prefetch_loc,
+ svm_range_set_default_attributes(svms, &location, &prefetch_loc,
&granularity, &flags_and);
flags_or = flags_and;
if (p->xnack_enabled)
@@ -3944,8 +4096,8 @@ exit:
return ret;
}
-int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size)
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
{
uint64_t total_size, accessibility_size, common_attr_size;
int nattr_common = 4, nattr_accessibility = 1;
@@ -3957,8 +4109,6 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
*svm_priv_data_size = 0;
svms = &p->svms;
- if (!svms)
- return -EINVAL;
mutex_lock(&svms->lock);
list_for_each_entry(prange, &svms->list, list) {
@@ -4000,7 +4150,6 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
*svm_priv_data_size);
- return 0;
}
int kfd_criu_checkpoint_svm(struct kfd_process *p,
@@ -4017,8 +4166,6 @@ int kfd_criu_checkpoint_svm(struct kfd_process *p,
struct mm_struct *mm;
svms = &p->svms;
- if (!svms)
- return -EINVAL;
mm = get_task_mm(p->lead_thread);
if (!mm) {
@@ -4116,7 +4263,7 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
break;
default:
- r = EINVAL;
+ r = -EINVAL;
break;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 9e668eeefb32..01c7a4877904 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -78,6 +78,7 @@ struct svm_work_list_item {
* @update_list:link list node used to add to update_list
* @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages
+ * @vram_pages: vram pages number in this svm_range
* @dma_addr: dma mapping address on each GPU for system memory physical page
* @ttm_res: vram ttm resource map
* @offset: range start offset within mm_nodes
@@ -88,7 +89,9 @@ struct svm_work_list_item {
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
* @perferred_loc: perferred location, 0 for CPU, or GPU id
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
- * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @actual_loc: this svm_range location. 0: all pages are from sys ram;
+ * GPU id: this svm_range may include vram pages from GPU with
+ * id actual_loc.
* @granularity:migration granularity, log2 num pages
* @invalid: not 0 means cpu page table is invalidated
* @validate_timestamp: system timestamp when range is validated
@@ -112,6 +115,7 @@ struct svm_range {
struct list_head list;
struct list_head update_list;
uint64_t npages;
+ uint64_t vram_pages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
uint64_t offset;
@@ -132,9 +136,8 @@ struct svm_range {
struct list_head child_list;
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
- bool validated_once;
bool mapped_to_gpu;
- bool is_error_flag;
+ atomic_t queue_refcount;
};
static inline void svm_range_lock(struct svm_range *prange)
@@ -170,22 +173,19 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear);
void svm_range_vram_node_free(struct svm_range *prange);
-int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
- unsigned long addr, struct svm_range *parent,
- struct svm_range *prange);
int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
- uint32_t vmid, uint32_t node_id, uint64_t addr,
+ uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts,
bool write_fault);
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
void svm_range_add_list_work(struct svm_range_list *svms,
struct svm_range *prange, struct mm_struct *mm,
enum svm_work_list_ops op);
void schedule_deferred_list_work(struct svm_range_list *svms);
-void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
-void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma);
-int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size);
+void svm_range_dma_unmap(struct svm_range *prange);
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size);
int kfd_criu_checkpoint_svm(struct kfd_process *p,
uint8_t __user *user_priv_data,
uint64_t *priv_offset);
@@ -202,7 +202,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
* is initialized to not 0 when page migration register device memory.
*/
#define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
- (adev)->gmc.is_app_apu)
+ ((adev)->apu_prefer_gtt))
void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
@@ -225,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p)
static inline int svm_range_restore_pages(struct amdgpu_device *adev,
unsigned int pasid,
uint32_t client_id, uint32_t node_id,
- uint64_t addr, bool write_fault)
+ uint64_t addr, uint64_t ts, bool write_fault)
{
return -EFAULT;
}
@@ -237,13 +237,12 @@ static inline int svm_range_schedule_evict_svm_bo(
return -EINVAL;
}
-static inline int svm_range_get_info(struct kfd_process *p,
- uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size)
+static inline void svm_range_get_info(struct kfd_process *p,
+ uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
{
*num_svm_ranges = 0;
*svm_priv_data_size = 0;
- return 0;
}
static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c8c75ff7cea8..5c98746eb72d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -31,6 +31,7 @@
#include <linux/log2.h>
#include <linux/dmi.h>
#include <linux/atomic.h>
+#include <linux/crc16.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
@@ -107,24 +108,6 @@ struct kfd_node *kfd_device_by_id(uint32_t gpu_id)
return top_dev->gpu;
}
-struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev)
-{
- struct kfd_topology_device *top_dev;
- struct kfd_node *device = NULL;
-
- down_read(&topology_lock);
-
- list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) {
- device = top_dev->gpu;
- break;
- }
-
- up_read(&topology_lock);
-
- return device;
-}
-
/* Called with write topology_lock acquired */
static void kfd_release_topology_device(struct kfd_topology_device *dev)
{
@@ -291,6 +274,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
iolink->max_bandwidth);
sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
iolink->rec_transfer_size);
+ sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask",
+ iolink->rec_sdma_eng_id_mask);
sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
return offs;
@@ -525,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) &&
+ (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
+
sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
@@ -534,11 +523,15 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->gpu->kfd->mec_fw_version);
sysfs_show_32bit_prop(buffer, offs, "capability",
dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, offs, "capability2",
+ dev->node_props.capability2);
sysfs_show_64bit_prop(buffer, offs, "debug_prop",
dev->node_props.debug_prop);
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
dev->gpu->kfd->sdma_fw_version);
sysfs_show_64bit_prop(buffer, offs, "unique_id",
+ dev->gpu->xcp ?
+ dev->gpu->xcp->unique_id :
dev->gpu->adev->unique_id);
sysfs_show_32bit_prop(buffer, offs, "num_xcc",
NUM_XCC(dev->gpu->xcc_mask));
@@ -958,32 +951,30 @@ static void kfd_update_system_properties(void)
dev = list_last_entry(&topology_device_list,
struct kfd_topology_device, list);
if (dev) {
- sys_props.platform_id =
- (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+ sys_props.platform_id = dev->oem_id64;
sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
sys_props.platform_rev = dev->oem_revision;
}
up_read(&topology_lock);
}
-static void find_system_memory(const struct dmi_header *dm,
- void *private)
+static void find_system_memory(const struct dmi_header *dm, void *private)
{
+ struct dmi_mem_device *memdev = container_of(dm, struct dmi_mem_device, header);
struct kfd_mem_properties *mem;
- u16 mem_width, mem_clock;
struct kfd_topology_device *kdev =
(struct kfd_topology_device *)private;
- const u8 *dmi_data = (const u8 *)(dm + 1);
-
- if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
- mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
- mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
- list_for_each_entry(mem, &kdev->mem_props, list) {
- if (mem_width != 0xFFFF && mem_width != 0)
- mem->width = mem_width;
- if (mem_clock != 0)
- mem->mem_clk_max = mem_clock;
- }
+
+ if (memdev->header.type != DMI_ENTRY_MEM_DEVICE)
+ return;
+ if (memdev->header.length < sizeof(struct dmi_mem_device))
+ return;
+
+ list_for_each_entry(mem, &kdev->mem_props, list) {
+ if (memdev->total_width != 0xFFFF && memdev->total_width != 0)
+ mem->width = memdev->total_width;
+ if (memdev->speed != 0)
+ mem->mem_clk_max = memdev->speed;
}
}
@@ -1092,14 +1083,17 @@ void kfd_topology_shutdown(void)
static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
{
- uint32_t hashout;
+ uint32_t gpu_id;
uint32_t buf[8];
uint64_t local_mem_size;
- int i;
+ struct kfd_topology_device *dev;
+ bool is_unique;
+ uint8_t *crc_buf;
if (!gpu)
return 0;
+ crc_buf = (uint8_t *)&buf;
local_mem_size = gpu->local_mem_info.local_mem_size_private +
gpu->local_mem_info.local_mem_size_public;
buf[0] = gpu->adev->pdev->devfn;
@@ -1112,10 +1106,34 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
buf[6] = upper_32_bits(local_mem_size);
buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16);
- for (i = 0, hashout = 0; i < 8; i++)
- hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
+ gpu_id = crc16(0, crc_buf, sizeof(buf)) &
+ ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+
+ /* There is a very small possibility when generating a
+ * 16 (KFD_GPU_ID_HASH_WIDTH) bit value from 8 word buffer
+ * that the value could be 0 or non-unique. So, check if
+ * it is unique and non-zero. If not unique increment till
+ * unique one is found. In case of overflow, restart from 1
+ */
+
+ down_read(&topology_lock);
+ do {
+ is_unique = true;
+ if (!gpu_id)
+ gpu_id = 1;
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev->gpu && dev->gpu_id == gpu_id) {
+ is_unique = false;
+ break;
+ }
+ }
+ if (unlikely(!is_unique))
+ gpu_id = (gpu_id + 1) &
+ ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+ } while (!is_unique);
+ up_read(&topology_lock);
- return hashout;
+ return gpu_id;
}
/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
* the GPU device is not already present in the topology device
@@ -1238,6 +1256,61 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
}
}
+#define REC_SDMA_NUM_GPU 8
+static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = {
+ { -1, 14, 12, 2, 4, 8, 10, 6 },
+ { 14, -1, 2, 10, 8, 4, 6, 12 },
+ { 10, 2, -1, 12, 14, 6, 4, 8 },
+ { 2, 12, 10, -1, 6, 14, 8, 4 },
+ { 4, 8, 14, 6, -1, 10, 12, 2 },
+ { 8, 4, 6, 14, 12, -1, 2, 10 },
+ { 10, 6, 4, 8, 12, 2, -1, 14 },
+ { 6, 12, 8, 4, 2, 10, 14, -1 }};
+
+static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
+ struct kfd_iolink_properties *outbound_link,
+ struct kfd_iolink_properties *inbound_link)
+{
+ struct kfd_node *gpu = outbound_link->gpu;
+ struct amdgpu_device *adev = gpu->adev;
+ unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+ unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
+ unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+ uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
+ uint32_t xgmi_sdma_eng_id_mask =
+ ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;
+
+ bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
+ adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
+ num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
+ num_xgmi_nodes == 8);
+
+ if (support_rec_eng) {
+ int src_socket_id = adev->gmc.xgmi.physical_node_id;
+ int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
+ unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;
+
+ outbound_link->rec_sdma_eng_id_mask =
+ 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
+ inbound_link->rec_sdma_eng_id_mask =
+ 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);
+
+ /* If recommended engine is out of range, need to reset the mask */
+ if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+ if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+
+ } else {
+ uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+ num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
+ sdma_eng_id_mask;
+
+ outbound_link->rec_sdma_eng_id_mask = engine_mask;
+ inbound_link->rec_sdma_eng_id_mask = engine_mask;
+ }
+}
+
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
{
struct kfd_iolink_properties *link, *inbound_link;
@@ -1276,6 +1349,7 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
+ kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link);
}
}
@@ -1342,10 +1416,11 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
num_cpu++;
}
+ if (list_empty(&kdev->io_link_props))
+ return -ENODATA;
+
gpu_link = list_first_entry(&kdev->io_link_props,
- struct kfd_iolink_properties, list);
- if (!gpu_link)
- return -ENOMEM;
+ struct kfd_iolink_properties, list);
for (i = 0; i < num_cpu; i++) {
/* CPU <--> GPU */
@@ -1423,15 +1498,17 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
peer->gpu->adev))
return ret;
+ if (list_empty(&kdev->io_link_props))
+ return -ENODATA;
+
iolink1 = list_first_entry(&kdev->io_link_props,
- struct kfd_iolink_properties, list);
- if (!iolink1)
- return -ENOMEM;
+ struct kfd_iolink_properties, list);
+
+ if (list_empty(&peer->io_link_props))
+ return -ENODATA;
iolink2 = list_first_entry(&peer->io_link_props,
- struct kfd_iolink_properties, list);
- if (!iolink2)
- return -ENOMEM;
+ struct kfd_iolink_properties, list);
props = kfd_alloc_struct(props);
if (!props)
@@ -1449,17 +1526,19 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
/* CPU->CPU link*/
cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
if (cpu_dev) {
- list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
- if (iolink3->node_to == iolink2->node_to)
- break;
-
- props->weight += iolink3->weight;
- props->min_latency += iolink3->min_latency;
- props->max_latency += iolink3->max_latency;
- props->min_bandwidth = min(props->min_bandwidth,
- iolink3->min_bandwidth);
- props->max_bandwidth = min(props->max_bandwidth,
- iolink3->max_bandwidth);
+ list_for_each_entry(iolink3, &cpu_dev->io_link_props, list) {
+ if (iolink3->node_to != iolink2->node_to)
+ continue;
+
+ props->weight += iolink3->weight;
+ props->min_latency += iolink3->min_latency;
+ props->max_latency += iolink3->max_latency;
+ props->min_bandwidth = min(props->min_bandwidth,
+ iolink3->min_bandwidth);
+ props->max_bandwidth = min(props->max_bandwidth,
+ iolink3->max_bandwidth);
+ break;
+ }
} else {
WARN(1, "CPU node not found");
}
@@ -1510,7 +1589,8 @@ static int kfd_dev_create_p2p_links(void)
break;
if (!dev->gpu || !dev->gpu->adev ||
(dev->gpu->kfd->hive_id &&
- dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id))
+ dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id &&
+ amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev)))
goto next;
/* check if node(s) is/are peer accessible in one direction or bi-direction */
@@ -1533,7 +1613,6 @@ out:
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
- struct kfd_cu_info *cu_info,
int cu_bitmask,
int cache_type, unsigned int cu_processor_id,
int cu_block)
@@ -1560,6 +1639,7 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
pcache->cache_level = pcache_info[cache_type].cache_level;
pcache->cache_size = pcache_info[cache_type].cache_size;
+ pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
pcache->cache_type |= HSA_CACHE_TYPE_DATA;
@@ -1595,18 +1675,37 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
- struct kfd_cu_info *cu_info,
+ struct amdgpu_cu_info *cu_info,
+ struct amdgpu_gfx_config *gfx_info,
int cache_type, unsigned int cu_processor_id,
struct kfd_node *knode)
{
- unsigned int cu_sibling_map_mask;
+ unsigned int cu_sibling_map_mask = 0;
int first_active_cu;
int i, j, k, xcc, start, end;
+ int num_xcc = NUM_XCC(knode->xcc_mask);
struct kfd_cache_properties *pcache = NULL;
+ enum amdgpu_memory_partition mode;
+ struct amdgpu_device *adev = knode->adev;
+ bool found = false;
start = ffs(knode->xcc_mask) - 1;
- end = start + NUM_XCC(knode->xcc_mask);
- cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
+ end = start + num_xcc;
+
+ /* To find the bitmap in the first active cu in the first
+ * xcc, it is based on the assumption that evrey xcc must
+ * have at least one active cu.
+ */
+ for (i = 0; i < gfx_info->max_shader_engines && !found; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) {
+ if (cu_info->bitmap[start][i % 4][j % 4]) {
+ cu_sibling_map_mask =
+ cu_info->bitmap[start][i % 4][j % 4];
+ found = true;
+ }
+ }
+ }
+
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1624,7 +1723,19 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
pcache->processor_id_low = cu_processor_id
+ (first_active_cu - 1);
pcache->cache_level = pcache_info[cache_type].cache_level;
+ pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
+
+ if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
+ mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ else
+ mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
pcache->cache_size = pcache_info[cache_type].cache_size;
+ /* Partition mode only affects L3 cache size */
+ if (mode && pcache->cache_level == 3)
+ pcache->cache_size /= mode;
if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
pcache->cache_type |= HSA_CACHE_TYPE_DATA;
@@ -1642,15 +1753,15 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
k = 0;
for (xcc = start; xcc < end; xcc++) {
- for (i = 0; i < cu_info->num_shader_engines; i++) {
- for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+ for (i = 0; i < gfx_info->max_shader_engines; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se; j++) {
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
k += 4;
- cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
+ cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4];
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
}
}
@@ -1675,19 +1786,18 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
unsigned int cu_processor_id;
int ret;
unsigned int num_cu_shared;
- struct kfd_cu_info cu_info;
- struct kfd_cu_info *pcu_info;
+ struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
+ struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
int gpu_processor_id;
- struct kfd_cache_properties *props_ext;
+ struct kfd_cache_properties *props_ext = NULL;
int num_of_entries = 0;
int num_of_cache_types = 0;
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
- amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
- pcu_info = &cu_info;
gpu_processor_id = dev->node_props.simd_id_base;
+ memset(cache_info, 0, sizeof(cache_info));
pcache_info = cache_info;
num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
if (!num_of_cache_types) {
@@ -1711,12 +1821,12 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
cu_processor_id = gpu_processor_id;
if (pcache_info[ct].cache_level == 1) {
for (xcc = start; xcc < end; xcc++) {
- for (i = 0; i < pcu_info->num_shader_engines; i++) {
- for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
- for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+ for (i = 0; i < gfx_info->max_shader_engines; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se; j++) {
+ for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
- ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
- pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
+ ret = fill_in_l1_pcache(&props_ext, pcache_info,
+ cu_info->bitmap[xcc][i % 4][j + i / 4], ct,
cu_processor_id, k);
if (ret < 0)
@@ -1729,9 +1839,9 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
/* Move to next CU block */
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
- pcu_info->num_cu_per_sh) ?
+ gfx_info->max_cu_per_sh) ?
pcache_info[ct].num_cu_shared :
- (pcu_info->num_cu_per_sh - k);
+ (gfx_info->max_cu_per_sh - k);
cu_processor_id += num_cu_shared;
}
}
@@ -1739,7 +1849,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
}
} else {
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
- pcu_info, ct, cu_processor_id, kdev);
+ cu_info, gfx_info, ct, cu_processor_id, kdev);
if (ret < 0)
break;
@@ -1754,7 +1864,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
}
-static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
+static int kfd_topology_add_device_locked(struct kfd_node *gpu,
struct kfd_topology_device **dev)
{
int proximity_domain = ++topology_crat_proximity_domain;
@@ -1767,8 +1877,7 @@ static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
COMPUTE_UNIT_GPU, gpu,
proximity_domain);
if (res) {
- pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
+ dev_err(gpu->adev->dev, "Error creating VCRAT\n");
topology_crat_proximity_domain--;
goto err;
}
@@ -1779,8 +1888,7 @@ static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
&temp_topology_device_list,
proximity_domain);
if (res) {
- pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
+ dev_err(gpu->adev->dev, "Error parsing VCRAT\n");
topology_crat_proximity_domain--;
goto err;
}
@@ -1806,8 +1914,8 @@ static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
if (!res)
sys_props.generation_count++;
else
- pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
- gpu_id, res);
+ dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n",
+ res);
err:
kfd_destroy_crat_image(crat_image);
@@ -1890,7 +1998,8 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
- if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4))
dev->node_props.debug_prop |=
HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
@@ -1902,13 +2011,17 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+
+ if (!amdgpu_sriov_vf(dev->gpu->adev))
+ dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
+
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0))
dev->node_props.capability |=
- HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+ HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;
}
kfd_topology_set_dbg_firmware_support(dev);
@@ -1918,19 +2031,18 @@ int kfd_topology_add_device(struct kfd_node *gpu)
{
uint32_t gpu_id;
struct kfd_topology_device *dev;
- struct kfd_cu_info cu_info;
int res = 0;
int i;
const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
+ struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
+ struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
- gpu_id = kfd_generate_gpu_id(gpu);
if (gpu->xcp && !gpu->xcp->ddev) {
dev_warn(gpu->adev->dev,
- "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
- gpu_id);
+ "Won't add GPU to topology since it has no drm node assigned.");
return 0;
} else {
- pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n");
}
/* Check to see if this gpu device exists in the topology_device_list.
@@ -1942,11 +2054,12 @@ int kfd_topology_add_device(struct kfd_node *gpu)
down_write(&topology_lock);
dev = kfd_assign_gpu(gpu);
if (!dev)
- res = kfd_topology_add_device_locked(gpu, gpu_id, &dev);
+ res = kfd_topology_add_device_locked(gpu, &dev);
up_write(&topology_lock);
if (res)
return res;
+ gpu_id = kfd_generate_gpu_id(gpu);
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
@@ -1959,9 +2072,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
/* Fill-in additional information that is not available in CRAT but
* needed for the topology
*/
-
- amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info);
-
for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
dev->node_props.name[i] = __tolower(asic_name[i]);
if (asic_name[i] == '\0')
@@ -1970,7 +2080,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
dev->node_props.name[i] = '\0';
dev->node_props.simd_arrays_per_engine =
- cu_info.num_shader_arrays_per_engine;
+ gfx_info->max_sh_per_se;
dev->node_props.gfx_target_version =
gpu->kfd->device_info.gfx_target_version;
@@ -1981,7 +2091,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
HSA_CAP_ASIC_REVISION_MASK);
dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
- if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3))
+ if (gpu->kfd->num_nodes > 1)
dev->node_props.location_id |= dev->gpu->node_id;
dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
@@ -2051,7 +2161,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
*/
if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
dev->node_props.simd_count =
- cu_info.simd_per_cu * cu_info.cu_active_number;
+ cu_info->simd_per_cu * cu_info->number;
dev->node_props.max_waves_per_simd = 10;
}
@@ -2074,6 +2184,8 @@ int kfd_topology_add_device(struct kfd_node *gpu)
dev->gpu->adev->gmc.xgmi.connected_to_cpu)
dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
+ kfd_queue_ctx_save_restore_size(dev);
+
kfd_debug_print_topology();
kfd_notify_gpu_change(gpu_id, 1);
@@ -2218,7 +2330,7 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
if (first_cpu_of_numa_node >= nr_cpu_ids)
return -1;
#ifdef CONFIG_X86_64
- return cpu_data(first_cpu_of_numa_node).apicid;
+ return cpu_data(first_cpu_of_numa_node).topo.apicid;
#else
return first_cpu_of_numa_node;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 27386ce9a021..3de8ec0043bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -24,6 +24,7 @@
#ifndef __KFD_TOPOLOGY_H__
#define __KFD_TOPOLOGY_H__
+#include <linux/dmi.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/kfd_sysfs.h>
@@ -50,6 +51,7 @@ struct kfd_node_properties {
uint32_t cpu_core_id_base;
uint32_t simd_id_base;
uint32_t capability;
+ uint32_t capability2;
uint64_t debug_prop;
uint32_t max_waves_per_simd;
uint32_t lds_size_in_kb;
@@ -74,6 +76,10 @@ struct kfd_node_properties {
uint32_t num_sdma_xgmi_engines;
uint32_t num_sdma_queues_per_engine;
uint32_t num_cp_queues;
+ uint32_t cwsr_size;
+ uint32_t ctl_stack_size;
+ uint32_t eop_buffer_size;
+ uint32_t debug_memory_size;
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
@@ -121,6 +127,7 @@ struct kfd_iolink_properties {
uint32_t min_bandwidth;
uint32_t max_bandwidth;
uint32_t rec_transfer_size;
+ uint32_t rec_sdma_eng_id_mask;
uint32_t flags;
struct kfd_node *gpu;
struct kobject *kobj;
@@ -154,7 +161,10 @@ struct kfd_topology_device {
struct attribute attr_gpuid;
struct attribute attr_name;
struct attribute attr_props;
- uint8_t oem_id[CRAT_OEMID_LENGTH];
+ union {
+ uint8_t oem_id[CRAT_OEMID_LENGTH];
+ uint64_t oem_id64;
+ };
uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH];
uint32_t oem_revision;
};
@@ -171,6 +181,22 @@ struct kfd_system_properties {
struct attribute attr_props;
};
+struct dmi_mem_device {
+ struct dmi_header header;
+ u16 physical_handle;
+ u16 error_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form_factor;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+} __packed;
+
struct kfd_topology_device *kfd_create_topology_device(
struct list_head *device_list);
void kfd_release_topology_device_list(struct list_head *device_list);
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
index 10138676f27f..e5c0205f2618 100644
--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -29,6 +29,7 @@
#define SOC15_INTSRC_CP_BAD_OPCODE 183
#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
#define SOC15_INTSRC_VMC_FAULT 0
+#define SOC15_INTSRC_VMC_UTCL2_POISON 1
#define SOC15_INTSRC_SDMA_TRAP 224
#define SOC15_INTSRC_SDMA_ECC 220
#define SOC21_INTSRC_SDMA_TRAP 49
diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
index 353597fc908d..44009aa8216e 100644
--- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
+++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c
@@ -21,6 +21,7 @@
*
*/
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -43,19 +44,32 @@ static const struct drm_driver amdgpu_xcp_driver = {
.minor = 0,
};
-static int pdev_num;
+static int8_t pdev_num;
static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE];
+static DEFINE_MUTEX(xcp_mutex);
int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
{
struct platform_device *pdev;
struct xcp_device *pxcp_dev;
- int ret;
+ char dev_name[20];
+ int ret, i;
+
+ guard(mutex)(&xcp_mutex);
if (pdev_num >= MAX_XCP_PLATFORM_DEVICE)
return -ENODEV;
- pdev = platform_device_register_simple("amdgpu_xcp", pdev_num, NULL, 0);
+ for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) {
+ if (!xcp_dev[i])
+ break;
+ }
+
+ if (i >= MAX_XCP_PLATFORM_DEVICE)
+ return -ENODEV;
+
+ snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", i);
+ pdev = platform_device_register_simple(dev_name, -1, NULL, 0);
if (IS_ERR(pdev))
return PTR_ERR(pdev);
@@ -70,8 +84,8 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev)
goto out_devres;
}
- xcp_dev[pdev_num] = pxcp_dev;
- xcp_dev[pdev_num]->pdev = pdev;
+ xcp_dev[i] = pxcp_dev;
+ xcp_dev[i]->pdev = pdev;
*ddev = &pxcp_dev->drm;
pdev_num++;
@@ -86,15 +100,43 @@ out_unregister:
}
EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc);
+static void free_xcp_dev(int8_t index)
+{
+ if ((index < MAX_XCP_PLATFORM_DEVICE) && (xcp_dev[index])) {
+ struct platform_device *pdev = xcp_dev[index]->pdev;
+
+ devres_release_group(&pdev->dev, NULL);
+ platform_device_unregister(pdev);
+
+ xcp_dev[index] = NULL;
+ pdev_num--;
+ }
+}
+
+void amdgpu_xcp_drm_dev_free(struct drm_device *ddev)
+{
+ int8_t i;
+
+ guard(mutex)(&xcp_mutex);
+
+ for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) {
+ if ((xcp_dev[i]) && (&xcp_dev[i]->drm == ddev)) {
+ free_xcp_dev(i);
+ break;
+ }
+ }
+}
+EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free);
+
void amdgpu_xcp_drv_release(void)
{
- for (--pdev_num; pdev_num >= 0; --pdev_num) {
- devres_release_group(&xcp_dev[pdev_num]->pdev->dev, NULL);
- platform_device_unregister(xcp_dev[pdev_num]->pdev);
- xcp_dev[pdev_num]->pdev = NULL;
- xcp_dev[pdev_num] = NULL;
+ int8_t i;
+
+ guard(mutex)(&xcp_mutex);
+
+ for (i = 0; pdev_num && i < MAX_XCP_PLATFORM_DEVICE; i++) {
+ free_xcp_dev(i);
}
- pdev_num = 0;
}
EXPORT_SYMBOL(amdgpu_xcp_drv_release);
diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
index c1c4b679bf95..580a1602c8e3 100644
--- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
+++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h
@@ -25,5 +25,6 @@
#define _AMDGPU_XCP_DRV_H_
int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev);
+void amdgpu_xcp_drm_dev_free(struct drm_device *ddev);
void amdgpu_xcp_drv_release(void);
#endif /* _AMDGPU_XCP_DRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 901d1961b739..abd3b6564373 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -1,24 +1,29 @@
# SPDX-License-Identifier: MIT
+# Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
+
menu "Display Engine Configuration"
depends on DRM && DRM_AMDGPU
config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
- depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
+ depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64
+ select CEC_CORE
+ select CEC_NOTIFIER
select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
- select DRM_AMD_DC_FP if (X86 || LOONGARCH || (PPC64 && ALTIVEC) || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG))
+ select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV))
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
Raven ASICs.
- calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
- architectures built with Clang (all released versions), whereby the stack
- frame gets blown up to well over 5k. This would cause an immediate kernel
- panic on most architectures. We'll revert this when the following bug report
- has been resolved: https://github.com/llvm/llvm-project/issues/41896.
+ calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 ||
+ ARM64 || LOONGARCH || RISCV) architectures built with Clang (all released
+ versions), whereby the stack frame gets blown up to well over 5k. This
+ would cause an immediate kernel panic on most architectures. We'll revert
+ this when the following bug report has been resolved:
+ https://github.com/llvm/llvm-project/issues/41896.
config DRM_AMD_DC_FP
def_bool n
diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index 0d610cb376bb..0084a8d55254 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -29,6 +29,22 @@ AMDDALPATH = $(RELATIVE_AMD_DISPLAY_PATH)
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hwss
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/resource
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dsc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/optc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dpp
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hubbub
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dccg
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hubp
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dio
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dwb
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/hpo
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mmhubbub
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mpc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/opp
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/pg
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/soc_and_ip_translator
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/TODO b/drivers/gpu/drm/amd/display/TODO
deleted file mode 100644
index a8a6c106e8c7..000000000000
--- a/drivers/gpu/drm/amd/display/TODO
+++ /dev/null
@@ -1,110 +0,0 @@
-===============================================================================
-TODOs
-===============================================================================
-
-1. Base this on drm-next - WIP
-
-
-2. Cleanup commit history
-
-
-3. WIP - Drop page flip helper and use DRM's version
-
-
-4. DONE - Flatten all DC objects
- * dc_stream/core_stream/stream should just be dc_stream
- * Same for other DC objects
-
- "Is there any major reason to keep all those abstractions?
-
- Could you collapse everything into struct dc_stream?
-
- I haven't looked recently but I didn't get the impression there was a
- lot of design around what was public/protected, more whatever needed
- to be used by someone else was in public."
- ~ Dave Airlie
-
-
-5. DONE - Rename DC objects to align more with DRM
- * dc_surface -> dc_plane_state
- * dc_stream -> dc_stream_state
-
-
-6. DONE - Per-plane and per-stream validation
-
-
-7. WIP - Per-plane and per-stream commit
-
-
-8. WIP - Split pipe_ctx into plane and stream resource structs
-
-
-9. Attach plane and stream reources to state object instead of validate_context
-
-
-10. Remove dc_edid_caps and drm_helpers_parse_edid_caps
- * Use drm_display_info instead
- * Remove DC's edid quirks and rely on DRM's quirks (add quirks if needed)
-
- "Making sure you use the sink-specific helper libraries and kernel
- subsystems, since there's really no good reason to have 2nd
- implementation of those in the kernel. Looks likes that's done for mst
- and edid parsing. There's still a bit a midlayer feeling to the edid
- parsing side (e.g. dc_edid_caps and dm_helpers_parse_edid_caps, I
- think it'd be much better if you convert that over to reading stuff
- from drm_display_info and if needed, push stuff into the core). Also,
- I can't come up with a good reason why DC needs all this (except to
- reimplement half of our edid quirk table, which really isn't a good
- idea). Might be good if you put this onto the list of things to fix
- long-term, but imo not a blocker. Definitely make sure new stuff
- doesn't slip in (i.e. if you start adding edid quirks to DC instead of
- the drm core, refactoring to use the core edid stuff was pointless)."
- ~ Daniel Vetter
-
-
-11. Remove dc/i2caux. This folder can be somewhat misleading. It's basically an
-overy complicated HW programming function for sendind and receiving i2c/aux
-commands. We can greatly simplify that and move it into dc/dceXYZ like other
-HW blocks.
-
-12. drm_modeset_lock in MST should no longer be needed in recent kernels
- * Adopt appropriate locking scheme
-
-13. get_modes and best_encoder callbacks look a bit funny. Can probably rip out
-a few indirections, and consider removing entirely and using the
-drm_atomic_helper_best_encoder default behaviour.
-
-14. core/dc_debug.c, consider switching to the atomic state debug helpers and
-moving all your driver state printing into the various atomic_print_state
-callbacks. There's also plans to expose this stuff in a standard way across all
-drivers, to make debugging userspace compositors easier across different hw.
-
-15. Move DP/HDMI dual mode adaptors to drm_dp_dual_mode_helper.c. See
-dal_ddc_service_i2c_query_dp_dual_mode_adaptor.
-
-16. Move to core SCDC helpers (I think those are new since initial DC review).
-
-17. There's still a pretty massive layer cake around dp aux and DPCD handling,
-with like 3 levels of abstraction and using your own structures instead of the
-stuff in drm_dp_helper.h. drm_dp_helper.h isn't really great and already has 2
-incompatible styles, just means more reasons not to add a third (or well third
-one gets to do the cleanup refactor).
-
-18. There's a pile of sink handling code, both for DP and HDMI where I didn't
-immediately recognize the standard. I think long term it'd be best for the drm
-subsystem if we try to move as much of that into helpers/core as possible, and
-share it with drivers. But that's a very long term goal, and by far not just an
-issue with DC - other drivers, especially around DP sink handling, are equally
-guilty.
-
-19. DONE - The DC logger is still a rather sore thing, but I know that the
-DRM_DEBUG stuff just isn't up to the challenges either. We need to figure out
-something that integrates better with DRM and linux debug printing, while not
-being useless with filtering output. dynamic debug printing might be an option.
-
-20. Use kernel i2c device to program HDMI retimer. Some boards have an HDMI
-retimer that we need to program to pass PHY compliance. Currently that's
-bypassing the i2c device and goes directly to HW. This should be changed.
-
-21. Remove vector.c from dc/basics. It's used in DDC code which can probably
-be simplified enough to no longer need a vector implementation.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 8bf94920d23e..7329b8cc2576 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,22 +25,26 @@
+ifneq ($(CONFIG_DRM_AMD_DC),)
AMDGPUDM = \
amdgpu_dm.o \
amdgpu_dm_plane.o \
amdgpu_dm_crtc.o \
amdgpu_dm_irq.o \
amdgpu_dm_mst_types.o \
- amdgpu_dm_color.o
+ amdgpu_dm_color.o \
+ amdgpu_dm_services.o \
+ amdgpu_dm_helpers.o \
+ amdgpu_dm_pp_smu.o \
+ amdgpu_dm_psr.o \
+ amdgpu_dm_replay.o \
+ amdgpu_dm_quirks.o \
+ amdgpu_dm_wb.o
ifdef CONFIG_DRM_AMD_DC_FP
AMDGPUDM += dc_fpu.o
endif
-ifneq ($(CONFIG_DRM_AMD_DC),)
-AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o amdgpu_dm_replay.o
-endif
-
AMDGPUDM += amdgpu_dm_hdcp.o
ifneq ($(CONFIG_DEBUG_FS),)
@@ -52,3 +56,4 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc
AMDGPU_DM = $(addprefix $(AMDDALPATH)/amdgpu_dm/,$(AMDGPUDM))
AMD_DISPLAY_FILES += $(AMDGPU_DM)
+endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c6fd34bab358..0d03e324d5b9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -37,14 +38,13 @@
#include "dc/dc_dmub_srv.h"
#include "dc/dc_edid_parser.h"
#include "dc/dc_stat.h"
+#include "dc/dc_state.h"
#include "amdgpu_dm_trace.h"
-#include "dpcd_defs.h"
#include "link/protocols/link_dpcd.h"
#include "link_service_types.h"
#include "link/protocols/link_dp_capability.h"
#include "link/protocols/link_ddc.h"
-#include "vid.h"
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_ucode.h"
@@ -54,7 +54,7 @@
#include "amdgpu_dm_crtc.h"
#include "amdgpu_dm_hdcp.h"
#include <drm/display/drm_hdcp_helper.h>
-#include "amdgpu_pm.h"
+#include "amdgpu_dm_wb.h"
#include "amdgpu_atombios.h"
#include "amd_shared.h"
@@ -75,39 +75,37 @@
#include <linux/types.h>
#include <linux/pm_runtime.h>
#include <linux/pci.h>
+#include <linux/power_supply.h>
#include <linux/firmware.h>
#include <linux/component.h>
-#include <linux/dmi.h>
+#include <linux/sort.h>
+#include <drm/drm_privacy_screen_consumer.h>
#include <drm/display/drm_dp_mst_helper.h>
#include <drm/display/drm_hdmi_helper.h>
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_uapi.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_blend.h>
+#include <drm/drm_fixed.h>
#include <drm/drm_fourcc.h>
#include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
+#include <drm/drm_utils.h>
#include <drm/drm_vblank.h>
#include <drm/drm_audio_component.h>
#include <drm/drm_gem_atomic_helper.h>
-#include <drm/drm_plane_helper.h>
+#include <media/cec-notifier.h>
#include <acpi/video.h>
#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
-#include "dcn/dcn_1_0_offset.h"
-#include "dcn/dcn_1_0_sh_mask.h"
-#include "soc15_hw_ip.h"
-#include "soc15_common.h"
-#include "vega10_ip_offset.h"
-
-#include "gc/gc_11_0_0_offset.h"
-#include "gc/gc_11_0_0_sh_mask.h"
-
#include "modules/inc/mod_freesync.h"
#include "modules/power/power_helpers.h"
+static_assert(AMDGPU_DMUB_NOTIFICATION_MAX == DMUB_NOTIFICATION_MAX, "AMDGPU_DMUB_NOTIFICATION_MAX mismatch");
+
#define FIRMWARE_RENOIR_DMUB "amdgpu/renoir_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB);
#define FIRMWARE_SIENNA_CICHLID_DMUB "amdgpu/sienna_cichlid_dmcub.bin"
@@ -142,6 +140,18 @@ MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
#define FIRMWARE_NAVI12_DMCU "amdgpu/navi12_dmcu.bin"
MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
+#define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
+
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
+#define FIRMWARE_DCN_36_DMUB "amdgpu/dcn_3_6_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_36_DMUB);
+
+#define FIRMWARE_DCN_401_DMUB "amdgpu/dcn_4_0_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_401_DMUB);
+
/* Number of bytes in PSP header for firmware. */
#define PSP_HEADER_BYTES 0x100
@@ -162,6 +172,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
static int amdgpu_dm_init(struct amdgpu_device *adev);
static void amdgpu_dm_fini(struct amdgpu_device *adev);
static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector);
+static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state);
+static struct amdgpu_i2c_adapter *
+create_i2c(struct ddc_service *ddc_service, bool oem);
static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link)
{
@@ -220,6 +233,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
static int amdgpu_dm_connector_get_modes(struct drm_connector *connector);
+static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state);
static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
static int amdgpu_dm_atomic_check(struct drm_device *dev,
@@ -228,6 +242,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector);
static void handle_hpd_rx_irq(void *param);
+static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
+ int bl_idx,
+ u32 user_brightness);
+
static bool
is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
struct drm_crtc_state *new_crtc_state);
@@ -254,7 +272,7 @@ static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc)
acrtc = adev->mode_info.crtcs[crtc];
if (!acrtc->dm_irq_params.stream) {
- DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
+ drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n",
crtc);
return 0;
}
@@ -265,8 +283,9 @@ static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc)
static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
u32 *vbl, u32 *position)
{
- u32 v_blank_start, v_blank_end, h_position, v_position;
+ u32 v_blank_start = 0, v_blank_end = 0, h_position = 0, v_position = 0;
struct amdgpu_crtc *acrtc = NULL;
+ struct dc *dc = adev->dm.dc;
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
@@ -274,11 +293,14 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
acrtc = adev->mode_info.crtcs[crtc];
if (!acrtc->dm_irq_params.stream) {
- DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n",
+ drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n",
crtc);
return 0;
}
+ if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+ dc_allow_idle_optimizations(dc, false);
+
/*
* TODO rework base driver to use values directly.
* for now parse it back into reg-format
@@ -295,24 +317,24 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
return 0;
}
-static bool dm_is_idle(void *handle)
+static bool dm_is_idle(struct amdgpu_ip_block *ip_block)
{
/* XXX todo */
return true;
}
-static int dm_wait_for_idle(void *handle)
+static int dm_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* XXX todo */
return 0;
}
-static bool dm_check_soft_reset(void *handle)
+static bool dm_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
return false;
}
-static int dm_soft_reset(void *handle)
+static int dm_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX todo */
return 0;
@@ -342,6 +364,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev,
static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
struct dm_crtc_state *new_state)
{
+ if (new_state->stream->adjust.timing_adjust_pending)
+ return true;
if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED)
return true;
else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state))
@@ -350,13 +374,18 @@ static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
return false;
}
-static inline void reverse_planes_order(struct dc_surface_update *array_of_surface_update,
- int planes_count)
+/*
+ * DC will program planes with their z-order determined by their ordering
+ * in the dc_surface_updates array. This comparator is used to sort them
+ * by descending zpos.
+ */
+static int dm_plane_layer_index_cmp(const void *a, const void *b)
{
- int i, j;
+ const struct dc_surface_update *sa = (struct dc_surface_update *)a;
+ const struct dc_surface_update *sb = (struct dc_surface_update *)b;
- for (i = 0, j = planes_count - 1; i < j; i++, j--)
- swap(array_of_surface_update[i], array_of_surface_update[j]);
+ /* Sort by descending dc_plane layer_index (i.e. normalized_zpos) */
+ return sb->surface->layer_index - sa->surface->layer_index;
}
/**
@@ -383,13 +412,13 @@ static inline bool update_planes_and_stream_adapter(struct dc *dc,
struct dc_stream_update *stream_update,
struct dc_surface_update *array_of_surface_update)
{
- reverse_planes_order(array_of_surface_update, planes_count);
+ sort(array_of_surface_update, planes_count,
+ sizeof(*array_of_surface_update), dm_plane_layer_index_cmp, NULL);
/*
* Previous frame finished and HW is ready for optimization.
*/
- if (update_type == UPDATE_TYPE_FAST)
- dc_post_update_surfaces_to_stream(dc);
+ dc_post_update_surfaces_to_stream(dc);
return dc_update_planes_and_stream(dc,
array_of_surface_update,
@@ -410,6 +439,7 @@ static void dm_pflip_high_irq(void *interrupt_params)
struct amdgpu_crtc *amdgpu_crtc;
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
+ struct drm_device *dev = adev_to_drm(adev);
unsigned long flags;
struct drm_pending_vblank_event *e;
u32 vpos, hpos, v_blank_start, v_blank_end;
@@ -420,18 +450,17 @@ static void dm_pflip_high_irq(void *interrupt_params)
/* IRQ could occur when in initial stage */
/* TODO work and BO cleanup */
if (amdgpu_crtc == NULL) {
- DC_LOG_PFLIP("CRTC is null, returning.\n");
+ drm_dbg_state(dev, "CRTC is null, returning.\n");
return;
}
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
- DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p]\n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED,
- amdgpu_crtc->crtc_id,
- amdgpu_crtc);
+ drm_dbg_state(dev,
+ "amdgpu_crtc->pflip_status = %d != AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p]\n",
+ amdgpu_crtc->pflip_status, AMDGPU_FLIP_SUBMITTED,
+ amdgpu_crtc->crtc_id, amdgpu_crtc);
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
return;
}
@@ -497,9 +526,53 @@ static void dm_pflip_high_irq(void *interrupt_params)
amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
- DC_LOG_PFLIP("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n",
- amdgpu_crtc->crtc_id, amdgpu_crtc,
- vrr_active, (int) !e);
+ drm_dbg_state(dev,
+ "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n",
+ amdgpu_crtc->crtc_id, amdgpu_crtc, vrr_active, (int)!e);
+}
+
+static void dm_handle_vmin_vmax_update(struct work_struct *offload_work)
+{
+ struct vupdate_offload_work *work = container_of(offload_work, struct vupdate_offload_work, work);
+ struct amdgpu_device *adev = work->adev;
+ struct dc_stream_state *stream = work->stream;
+ struct dc_crtc_timing_adjust *adjust = work->adjust;
+
+ mutex_lock(&adev->dm.dc_lock);
+ dc_stream_adjust_vmin_vmax(adev->dm.dc, stream, adjust);
+ mutex_unlock(&adev->dm.dc_lock);
+
+ dc_stream_release(stream);
+ kfree(work->adjust);
+ kfree(work);
+}
+
+static void schedule_dc_vmin_vmax(struct amdgpu_device *adev,
+ struct dc_stream_state *stream,
+ struct dc_crtc_timing_adjust *adjust)
+{
+ struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL);
+ if (!offload_work) {
+ drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n");
+ return;
+ }
+
+ struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_KERNEL);
+ if (!adjust_copy) {
+ drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n");
+ kfree(offload_work);
+ return;
+ }
+
+ dc_stream_retain(stream);
+ memcpy(adjust_copy, adjust, sizeof(*adjust_copy));
+
+ INIT_WORK(&offload_work->work, dm_handle_vmin_vmax_update);
+ offload_work->adev = adev;
+ offload_work->stream = stream;
+ offload_work->adjust = adjust_copy;
+
+ queue_work(system_wq, &offload_work->work);
}
static void dm_vupdate_high_irq(void *interrupt_params)
@@ -518,7 +591,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)
if (acrtc) {
vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);
drm_dev = acrtc->base.dev;
- vblank = &drm_dev->vblank[acrtc->base.index];
+ vblank = drm_crtc_vblank_crtc(&acrtc->base);
previous_timestamp = atomic64_read(&irq_params->previous_timestamp);
frame_duration_ns = vblank->time - previous_timestamp;
@@ -529,9 +602,9 @@ static void dm_vupdate_high_irq(void *interrupt_params)
atomic64_set(&irq_params->previous_timestamp, vblank->time);
}
- DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d\n",
- acrtc->crtc_id,
- vrr_active);
+ drm_dbg_vbl(drm_dev,
+ "crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id,
+ vrr_active);
/* Core vblank handling is done here after end of front-porch in
* vrr mode, as vblank timestamping will give valid results
@@ -539,22 +612,27 @@ static void dm_vupdate_high_irq(void *interrupt_params)
* page-flip completion events that have been queued to us
* if a pageflip happened inside front-porch.
*/
- if (vrr_active) {
+ if (vrr_active && acrtc->dm_irq_params.stream) {
+ bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled;
+ bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled;
+ bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state
+ == VRR_STATE_ACTIVE_VARIABLE;
+
amdgpu_dm_crtc_handle_vblank(acrtc);
/* BTR processing for pre-DCE12 ASICs */
- if (acrtc->dm_irq_params.stream &&
- adev->family < AMDGPU_FAMILY_AI) {
+ if (adev->family < AMDGPU_FAMILY_AI) {
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
mod_freesync_handle_v_update(
adev->dm.freesync_module,
acrtc->dm_irq_params.stream,
&acrtc->dm_irq_params.vrr_params);
- dc_stream_adjust_vmin_vmax(
- adev->dm.dc,
- acrtc->dm_irq_params.stream,
- &acrtc->dm_irq_params.vrr_params.adjust);
+ if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) {
+ schedule_dc_vmin_vmax(adev,
+ acrtc->dm_irq_params.stream,
+ &acrtc->dm_irq_params.vrr_params.adjust);
+ }
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
}
}
@@ -572,6 +650,7 @@ static void dm_crtc_high_irq(void *interrupt_params)
{
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
+ struct drm_writeback_job *job;
struct amdgpu_crtc *acrtc;
unsigned long flags;
int vrr_active;
@@ -580,10 +659,39 @@ static void dm_crtc_high_irq(void *interrupt_params)
if (!acrtc)
return;
+ if (acrtc->wb_conn) {
+ spin_lock_irqsave(&acrtc->wb_conn->job_lock, flags);
+
+ if (acrtc->wb_pending) {
+ job = list_first_entry_or_null(&acrtc->wb_conn->job_queue,
+ struct drm_writeback_job,
+ list_entry);
+ acrtc->wb_pending = false;
+ spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
+
+ if (job) {
+ unsigned int v_total, refresh_hz;
+ struct dc_stream_state *stream = acrtc->dm_irq_params.stream;
+
+ v_total = stream->adjust.v_total_max ?
+ stream->adjust.v_total_max : stream->timing.v_total;
+ refresh_hz = div_u64((uint64_t) stream->timing.pix_clk_100hz *
+ 100LL, (v_total * stream->timing.h_total));
+ mdelay(1000 / refresh_hz);
+
+ drm_writeback_signal_completion(acrtc->wb_conn, 0);
+ dc_stream_fc_disable_writeback(adev->dm.dc,
+ acrtc->dm_irq_params.stream, 0);
+ }
+ } else
+ spin_unlock_irqrestore(&acrtc->wb_conn->job_lock, flags);
+ }
+
vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);
- DC_LOG_VBLANK("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
- vrr_active, acrtc->dm_irq_params.active_planes);
+ drm_dbg_vbl(adev_to_drm(adev),
+ "crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id,
+ vrr_active, acrtc->dm_irq_params.active_planes);
/**
* Core vblank handling at start of front-porch is only possible
@@ -607,15 +715,20 @@ static void dm_crtc_high_irq(void *interrupt_params)
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
if (acrtc->dm_irq_params.stream &&
- acrtc->dm_irq_params.vrr_params.supported &&
- acrtc->dm_irq_params.freesync_config.state ==
- VRR_STATE_ACTIVE_VARIABLE) {
+ acrtc->dm_irq_params.vrr_params.supported) {
+ bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled;
+ bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled;
+ bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE;
+
mod_freesync_handle_v_update(adev->dm.freesync_module,
acrtc->dm_irq_params.stream,
&acrtc->dm_irq_params.vrr_params);
- dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream,
- &acrtc->dm_irq_params.vrr_params.adjust);
+ /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */
+ if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) {
+ schedule_dc_vmin_vmax(adev, acrtc->dm_irq_params.stream,
+ &acrtc->dm_irq_params.vrr_params.adjust);
+ }
}
/*
@@ -683,6 +796,29 @@ static void dmub_aux_setconfig_callback(struct amdgpu_device *adev,
complete(&adev->dm.dmub_aux_transfer_done);
}
+static void dmub_aux_fused_io_callback(struct amdgpu_device *adev,
+ struct dmub_notification *notify)
+{
+ if (!adev || !notify) {
+ ASSERT(false);
+ return;
+ }
+
+ const struct dmub_cmd_fused_request *req = &notify->fused_request;
+ const uint8_t ddc_line = req->u.aux.ddc_line;
+
+ if (ddc_line >= ARRAY_SIZE(adev->dm.fused_io)) {
+ ASSERT(false);
+ return;
+ }
+
+ struct fused_io_sync *sync = &adev->dm.fused_io[ddc_line];
+
+ static_assert(sizeof(*req) <= sizeof(sync->reply_data), "Size mismatch");
+ memcpy(sync->reply_data, req, sizeof(*req));
+ complete(&sync->replied);
+}
+
/**
* dmub_hpd_callback - DMUB HPD interrupt processing callback.
* @adev: amdgpu_device pointer
@@ -706,12 +842,18 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
return;
if (notify == NULL) {
- DRM_ERROR("DMUB HPD callback notification was NULL");
+ drm_err(adev_to_drm(adev), "DMUB HPD callback notification was NULL");
return;
}
if (notify->link_index > adev->dm.dc->link_count) {
- DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index);
+ drm_err(adev_to_drm(adev), "DMUB HPD index (%u)is abnormal", notify->link_index);
+ return;
+ }
+
+ /* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */
+ if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) {
+ drm_info(adev_to_drm(adev), "Skip DMUB HPD IRQ callback in suspend/resume\n");
return;
}
@@ -721,14 +863,18 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (link && aconnector->dc_link == link) {
if (notify->type == DMUB_NOTIFICATION_HPD)
- DRM_INFO("DMUB HPD callback: link_index=%u\n", link_index);
+ drm_info(adev_to_drm(adev), "DMUB HPD IRQ callback: link_index=%u\n", link_index);
else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ)
- DRM_INFO("DMUB HPD IRQ callback: link_index=%u\n", link_index);
+ drm_info(adev_to_drm(adev), "DMUB HPD RX IRQ callback: link_index=%u\n", link_index);
else
- DRM_WARN("DMUB Unknown HPD callback type %d, link_index=%u\n",
+ drm_warn(adev_to_drm(adev), "DMUB Unknown HPD callback type %d, link_index=%u\n",
notify->type, link_index);
hpd_aconnector = aconnector;
@@ -738,14 +884,31 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
drm_connector_list_iter_end(&iter);
if (hpd_aconnector) {
- if (notify->type == DMUB_NOTIFICATION_HPD)
+ if (notify->type == DMUB_NOTIFICATION_HPD) {
+ if (hpd_aconnector->dc_link->hpd_status == (notify->hpd_status == DP_HPD_PLUG))
+ drm_warn(adev_to_drm(adev), "DMUB reported hpd status unchanged. link_index=%u\n", link_index);
handle_hpd_irq_helper(hpd_aconnector);
- else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ)
+ } else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) {
handle_hpd_rx_irq(hpd_aconnector);
+ }
}
}
/**
+ * dmub_hpd_sense_callback - DMUB HPD sense processing callback.
+ * @adev: amdgpu_device pointer
+ * @notify: dmub notification structure
+ *
+ * HPD sense changes can occur during low power states and need to be
+ * notified from firmware to driver.
+ */
+static void dmub_hpd_sense_callback(struct amdgpu_device *adev,
+ struct dmub_notification *notify)
+{
+ drm_dbg_driver(adev_to_drm(adev), "DMUB HPD SENSE callback.\n");
+}
+
+/**
* register_dmub_notify_callback - Sets callback for DMUB notify
* @adev: amdgpu_device pointer
* @type: Type of dmub notification
@@ -778,7 +941,7 @@ static void dm_handle_hpd_work(struct work_struct *work)
dmub_hpd_wrk = container_of(work, struct dmub_hpd_work, handle_hpd_work);
if (!dmub_hpd_wrk->dmub_notify) {
- DRM_ERROR("dmub_hpd_wrk dmub_notify is NULL");
+ drm_err(adev_to_drm(dmub_hpd_wrk->adev), "dmub_hpd_wrk dmub_notify is NULL");
return;
}
@@ -792,6 +955,30 @@ static void dm_handle_hpd_work(struct work_struct *work)
}
+static const char *dmub_notification_type_str(enum dmub_notification_type e)
+{
+ switch (e) {
+ case DMUB_NOTIFICATION_NO_DATA:
+ return "NO_DATA";
+ case DMUB_NOTIFICATION_AUX_REPLY:
+ return "AUX_REPLY";
+ case DMUB_NOTIFICATION_HPD:
+ return "HPD";
+ case DMUB_NOTIFICATION_HPD_IRQ:
+ return "HPD_IRQ";
+ case DMUB_NOTIFICATION_SET_CONFIG_REPLY:
+ return "SET_CONFIG_REPLY";
+ case DMUB_NOTIFICATION_DPIA_NOTIFICATION:
+ return "DPIA_NOTIFICATION";
+ case DMUB_NOTIFICATION_HPD_SENSE_NOTIFY:
+ return "HPD_SENSE_NOTIFY";
+ case DMUB_NOTIFICATION_FUSED_IO:
+ return "FUSED_IO";
+ default:
+ return "<unknown>";
+ }
+}
+
#define DMUB_TRACE_MAX_READ 64
/**
* dm_dmub_outbox1_low_irq() - Handles Outbox interrupt
@@ -802,14 +989,30 @@ static void dm_handle_hpd_work(struct work_struct *work)
*/
static void dm_dmub_outbox1_low_irq(void *interrupt_params)
{
- struct dmub_notification notify;
+ struct dmub_notification notify = {0};
struct common_irq_params *irq_params = interrupt_params;
struct amdgpu_device *adev = irq_params->adev;
struct amdgpu_display_manager *dm = &adev->dm;
struct dmcub_trace_buf_entry entry = { 0 };
u32 count = 0;
struct dmub_hpd_work *dmub_hpd_wrk;
- struct dc_link *plink = NULL;
+
+ do {
+ if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
+ trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
+ entry.param0, entry.param1);
+
+ drm_dbg_driver(adev_to_drm(adev), "trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
+ entry.trace_code, entry.tick_count, entry.param0, entry.param1);
+ } else
+ break;
+
+ count++;
+
+ } while (count <= DMUB_TRACE_MAX_READ);
+
+ if (count > DMUB_TRACE_MAX_READ)
+ drm_dbg_driver(adev_to_drm(adev), "Warning : count > DMUB_TRACE_MAX_READ");
if (dc_enable_dmub_notifications(adev->dm.dc) &&
irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) {
@@ -817,81 +1020,56 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
do {
dc_stat_get_dmub_notification(adev->dm.dc, &notify);
if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) {
- DRM_ERROR("DM: notify type %d invalid!", notify.type);
+ drm_err(adev_to_drm(adev), "DM: notify type %d invalid!", notify.type);
continue;
}
if (!dm->dmub_callback[notify.type]) {
- DRM_DEBUG_DRIVER("DMUB notification skipped, no handler: type=%d\n", notify.type);
+ drm_warn(adev_to_drm(adev), "DMUB notification skipped due to no handler: type=%s\n",
+ dmub_notification_type_str(notify.type));
continue;
}
if (dm->dmub_thread_offload[notify.type] == true) {
dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC);
if (!dmub_hpd_wrk) {
- DRM_ERROR("Failed to allocate dmub_hpd_wrk");
+ drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk");
return;
}
dmub_hpd_wrk->dmub_notify = kmemdup(&notify, sizeof(struct dmub_notification),
GFP_ATOMIC);
if (!dmub_hpd_wrk->dmub_notify) {
kfree(dmub_hpd_wrk);
- DRM_ERROR("Failed to allocate dmub_hpd_wrk->dmub_notify");
+ drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk->dmub_notify");
return;
}
INIT_WORK(&dmub_hpd_wrk->handle_hpd_work, dm_handle_hpd_work);
dmub_hpd_wrk->adev = adev;
- if (notify.type == DMUB_NOTIFICATION_HPD) {
- plink = adev->dm.dc->links[notify.link_index];
- if (plink) {
- plink->hpd_status =
- notify.hpd_status == DP_HPD_PLUG;
- }
- }
queue_work(adev->dm.delayed_hpd_wq, &dmub_hpd_wrk->handle_hpd_work);
} else {
dm->dmub_callback[notify.type](adev, &notify);
}
} while (notify.pending_notification);
}
-
-
- do {
- if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) {
- trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count,
- entry.param0, entry.param1);
-
- DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n",
- entry.trace_code, entry.tick_count, entry.param0, entry.param1);
- } else
- break;
-
- count++;
-
- } while (count <= DMUB_TRACE_MAX_READ);
-
- if (count > DMUB_TRACE_MAX_READ)
- DRM_DEBUG_DRIVER("Warning : count > DMUB_TRACE_MAX_READ");
}
-static int dm_set_clockgating_state(void *handle,
+static int dm_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dm_set_powergating_state(void *handle,
+static int dm_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
/* Prototypes of private functions */
-static int dm_early_init(void *handle);
+static int dm_early_init(struct amdgpu_ip_block *ip_block);
/* Allocate memory for FBC compressed data */
static void amdgpu_dm_fbc_init(struct drm_connector *connector)
{
- struct drm_device *dev = connector->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct dm_compressor_info *compressor = &adev->dm.compressor;
struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(connector);
struct drm_display_mode *mode;
@@ -908,8 +1086,8 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector)
list_for_each_entry(mode, &connector->modes, head) {
- if (max_size < mode->htotal * mode->vtotal)
- max_size = mode->htotal * mode->vtotal;
+ if (max_size < (unsigned long) mode->htotal * mode->vtotal)
+ max_size = (unsigned long) mode->htotal * mode->vtotal;
}
if (max_size) {
@@ -918,10 +1096,10 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector)
&compressor->gpu_addr, &compressor->cpu_addr);
if (r)
- DRM_ERROR("DM: Failed to initialize FBC\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize FBC\n");
else {
adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr;
- DRM_INFO("DM: FBC alloc %lu\n", max_size*4);
+ drm_info(adev_to_drm(adev), "DM: FBC alloc %lu\n", max_size*4);
}
}
@@ -945,13 +1123,19 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port,
drm_connector_list_iter_begin(dev, &conn_iter);
drm_for_each_connector_iter(connector, &conn_iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->audio_inst != port)
continue;
*enabled = true;
+ mutex_lock(&connector->eld_mutex);
ret = drm_eld_size(connector->eld);
memcpy(buf, connector->eld, min(max_bytes, ret));
+ mutex_unlock(&connector->eld_mutex);
break;
}
@@ -985,8 +1169,7 @@ static int amdgpu_dm_audio_component_bind(struct device *kdev,
static void amdgpu_dm_audio_component_unbind(struct device *kdev,
struct device *hda_kdev, void *data)
{
- struct drm_device *dev = dev_get_drvdata(kdev);
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(dev_get_drvdata(kdev));
struct drm_audio_component *acomp = data;
acomp->ops = NULL;
@@ -1069,6 +1252,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
const struct firmware *dmub_fw = adev->dm.dmub_fw;
struct dmcu *dmcu = adev->dm.dc->res_pool->dmcu;
struct abm *abm = adev->dm.dc->res_pool->abm;
+ struct dc_context *ctx = adev->dm.dc->ctx;
struct dmub_srv_hw_params hw_params;
enum dmub_status status;
const unsigned char *fw_inst_const, *fw_bss_data;
@@ -1080,31 +1264,35 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
return 0;
if (!fb_info) {
- DRM_ERROR("No framebuffer info for DMUB service.\n");
+ drm_err(adev_to_drm(adev), "No framebuffer info for DMUB service.\n");
return -EINVAL;
}
if (!dmub_fw) {
/* Firmware required for DMUB support. */
- DRM_ERROR("No firmware provided for DMUB.\n");
+ drm_err(adev_to_drm(adev), "No firmware provided for DMUB.\n");
return -EINVAL;
}
+ /* initialize register offsets for ASICs with runtime initialization available */
+ if (dmub_srv->hw_funcs.init_reg_offsets)
+ dmub_srv->hw_funcs.init_reg_offsets(dmub_srv, ctx);
+
status = dmub_srv_has_hw_support(dmub_srv, &has_hw_support);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error checking HW support for DMUB: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error checking HW support for DMUB: %d\n", status);
return -EINVAL;
}
if (!has_hw_support) {
- DRM_INFO("DMUB unsupported on ASIC\n");
+ drm_info(adev_to_drm(adev), "DMUB unsupported on ASIC\n");
return 0;
}
/* Reset DMCUB if it was previously running - before we overwrite its memory. */
status = dmub_srv_hw_reset(dmub_srv);
if (status != DMUB_STATUS_OK)
- DRM_WARN("Error resetting DMUB HW: %d\n", status);
+ drm_warn(adev_to_drm(adev), "Error resetting DMUB HW: %d\n", status);
hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data;
@@ -1150,6 +1338,9 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
memset(fb_info->fb[DMUB_WINDOW_6_FW_STATE].cpu_addr, 0,
fb_info->fb[DMUB_WINDOW_6_FW_STATE].size);
+ memset(fb_info->fb[DMUB_WINDOW_SHARED_STATE].cpu_addr, 0,
+ fb_info->fb[DMUB_WINDOW_SHARED_STATE].size);
+
/* Initialize hardware. */
memset(&hw_params, 0, sizeof(hw_params));
hw_params.fb_base = adev->gmc.fb_start;
@@ -1165,9 +1356,13 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
for (i = 0; i < fb_info->num_fb; ++i)
hw_params.fb[i] = &fb_info->fb[i];
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
hw_params.dpia_supported = true;
hw_params.disable_dpia = adev->dm.dc->debug.dpia_debug.bits.disable_dpia;
break;
@@ -1175,16 +1370,27 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
break;
}
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ hw_params.ips_sequential_ono = adev->external_rev_id > 0x10;
+ hw_params.lower_hbr3_phy_ssc = true;
+ break;
+ default:
+ break;
+ }
+
status = dmub_srv_hw_init(dmub_srv, &hw_params);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error initializing DMUB HW: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error initializing DMUB HW: %d\n", status);
return -EINVAL;
}
/* Wait for firmware load to finish. */
status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
if (status != DMUB_STATUS_OK)
- DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
+ drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
/* Init DMCU and ABM if available. */
if (dmcu && abm) {
@@ -1195,13 +1401,36 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
if (!adev->dm.dc->ctx->dmub_srv)
adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv);
if (!adev->dm.dc->ctx->dmub_srv) {
- DRM_ERROR("Couldn't allocate DC DMUB server!\n");
+ drm_err(adev_to_drm(adev), "Couldn't allocate DC DMUB server!\n");
return -ENOMEM;
}
- DRM_INFO("DMUB hardware initialized: version=0x%08X\n",
+ drm_info(adev_to_drm(adev), "DMUB hardware initialized: version=0x%08X\n",
adev->dm.dmcub_fw_version);
+ /* Keeping sanity checks off if
+ * DCN31 >= 4.0.59.0
+ * DCN314 >= 8.0.16.0
+ * Otherwise, turn on sanity checks
+ */
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 1, 2):
+ case IP_VERSION(3, 1, 3):
+ if (adev->dm.dmcub_fw_version &&
+ adev->dm.dmcub_fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
+ adev->dm.dmcub_fw_version < DMUB_FW_VERSION(4, 0, 59))
+ adev->dm.dc->debug.sanity_checks = true;
+ break;
+ case IP_VERSION(3, 1, 4):
+ if (adev->dm.dmcub_fw_version &&
+ adev->dm.dmcub_fw_version >= DMUB_FW_VERSION(4, 0, 0) &&
+ adev->dm.dmcub_fw_version < DMUB_FW_VERSION(8, 0, 16))
+ adev->dm.dc->debug.sanity_checks = true;
+ break;
+ default:
+ break;
+ }
+
return 0;
}
@@ -1210,6 +1439,7 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev)
struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
enum dmub_status status;
bool init;
+ int r;
if (!dmub_srv) {
/* DMUB isn't supported on the ASIC. */
@@ -1218,16 +1448,18 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev)
status = dmub_srv_is_hw_init(dmub_srv, &init);
if (status != DMUB_STATUS_OK)
- DRM_WARN("DMUB hardware init check failed: %d\n", status);
+ drm_warn(adev_to_drm(adev), "DMUB hardware init check failed: %d\n", status);
if (status == DMUB_STATUS_OK && init) {
/* Wait for firmware load to finish. */
status = dmub_srv_wait_for_auto_load(dmub_srv, 100000);
if (status != DMUB_STATUS_OK)
- DRM_WARN("Wait for DMUB auto-load failed: %d\n", status);
+ drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
} else {
/* Perform the full hardware initialization. */
- dm_dmub_hw_init(adev);
+ r = dm_dmub_hw_init(adev);
+ if (r)
+ drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
}
}
@@ -1246,9 +1478,11 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
agp_top = adev->gmc.agp_end >> 24;
/* AGP aperture is disabled */
- if (agp_bot == agp_top) {
+ if (agp_bot > agp_top) {
logical_addr_low = adev->gmc.fb_start >> 18;
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
/*
* Raven2 has a HW issue that it is unable to use the vram which
* is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -1260,7 +1494,9 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
logical_addr_high = adev->gmc.fb_end >> 18;
} else {
logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18;
- if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ if (adev->apu_flags & (AMD_APU_IS_RAVEN2 |
+ AMD_APU_IS_RENOIR |
+ AMD_APU_IS_GREEN_SARDINE))
/*
* Raven2 has a HW issue that it is unable to use the vram which
* is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
@@ -1333,18 +1569,18 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
offload_work = container_of(work, struct hpd_rx_irq_offload_work, work);
aconnector = offload_work->offload_wq->aconnector;
+ adev = offload_work->adev;
if (!aconnector) {
- DRM_ERROR("Can't retrieve aconnector in hpd_rx_irq_offload_work");
+ drm_err(adev_to_drm(adev), "Can't retrieve aconnector in hpd_rx_irq_offload_work");
goto skip;
}
- adev = drm_to_adev(aconnector->base.dev);
dc_link = aconnector->dc_link;
mutex_lock(&aconnector->hpd_lock);
if (!dc_link_detect_connection_type(dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
mutex_unlock(&aconnector->hpd_lock);
if (new_connection_type == dc_connection_none)
@@ -1413,8 +1649,9 @@ skip:
}
-static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct dc *dc)
+static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct amdgpu_device *adev)
{
+ struct dc *dc = adev->dm.dc;
int max_caps = dc->caps.max_links;
int i = 0;
struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq = NULL;
@@ -1430,7 +1667,7 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct
create_singlethread_workqueue("amdgpu_dm_hpd_rx_offload_wq");
if (hpd_rx_offload_wq[i].wq == NULL) {
- DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!");
+ drm_err(adev_to_drm(adev), "create amdgpu_dm_hpd_rx_offload_wq fail!");
goto out_err;
}
@@ -1479,76 +1716,163 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev)
return false;
}
-static const struct dmi_system_id hpd_disconnect_quirk_table[] = {
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"),
- },
- },
- {
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"),
- },
- },
- {}
- /* TODO: refactor this from a fixed table to a dynamic option */
-};
-static void retrieve_dmi_info(struct amdgpu_display_manager *dm)
+void*
+dm_allocate_gpu_mem(
+ struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ size_t size,
+ long long *addr)
{
- const struct dmi_system_id *dmi_id;
+ struct dal_allocation *da;
+ u32 domain = (type == DC_MEM_ALLOC_TYPE_GART) ?
+ AMDGPU_GEM_DOMAIN_GTT : AMDGPU_GEM_DOMAIN_VRAM;
+ int ret;
+
+ da = kzalloc(sizeof(struct dal_allocation), GFP_KERNEL);
+ if (!da)
+ return NULL;
+
+ ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ domain, &da->bo,
+ &da->gpu_addr, &da->cpu_ptr);
- dm->aux_hpd_discon_quirk = false;
+ *addr = da->gpu_addr;
- dmi_id = dmi_first_match(hpd_disconnect_quirk_table);
- if (dmi_id) {
- dm->aux_hpd_discon_quirk = true;
- DRM_INFO("aux_hpd_discon_quirk attached\n");
+ if (ret) {
+ kfree(da);
+ return NULL;
+ }
+
+ /* add da to list in dm */
+ list_add(&da->list, &adev->dm.da_list);
+
+ return da->cpu_ptr;
+}
+
+void
+dm_free_gpu_mem(
+ struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ void *pvMem)
+{
+ struct dal_allocation *da;
+
+ /* walk the da list in DM */
+ list_for_each_entry(da, &adev->dm.da_list, list) {
+ if (pvMem == da->cpu_ptr) {
+ amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
+ list_del(&da->list);
+ kfree(da);
+ break;
+ }
+ }
+
+}
+
+static enum dmub_status
+dm_dmub_send_vbios_gpint_command(struct amdgpu_device *adev,
+ enum dmub_gpint_command command_code,
+ uint16_t param,
+ uint32_t timeout_us)
+{
+ union dmub_gpint_data_register reg, test;
+ uint32_t i;
+
+ /* Assume that VBIOS DMUB is ready to take commands */
+
+ reg.bits.status = 1;
+ reg.bits.command_code = command_code;
+ reg.bits.param = param;
+
+ cgs_write_register(adev->dm.cgs_device, 0x34c0 + 0x01f8, reg.all);
+
+ for (i = 0; i < timeout_us; ++i) {
+ udelay(1);
+
+ /* Check if our GPINT got acked */
+ reg.bits.status = 0;
+ test = (union dmub_gpint_data_register)
+ cgs_read_register(adev->dm.cgs_device, 0x34c0 + 0x01f8);
+
+ if (test.all == reg.all)
+ return DMUB_STATUS_OK;
+ }
+
+ return DMUB_STATUS_TIMEOUT;
+}
+
+static void *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *adev)
+{
+ void *bb;
+ long long addr;
+ unsigned int bb_size;
+ int i = 0;
+ uint16_t chunk;
+ enum dmub_gpint_command send_addrs[] = {
+ DMUB_GPINT__SET_BB_ADDR_WORD0,
+ DMUB_GPINT__SET_BB_ADDR_WORD1,
+ DMUB_GPINT__SET_BB_ADDR_WORD2,
+ DMUB_GPINT__SET_BB_ADDR_WORD3,
+ };
+ enum dmub_status ret;
+
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(4, 0, 1):
+ bb_size = sizeof(struct dml2_soc_bb);
+ break;
+ default:
+ return NULL;
+ }
+
+ bb = dm_allocate_gpu_mem(adev,
+ DC_MEM_ALLOC_TYPE_GART,
+ bb_size,
+ &addr);
+ if (!bb)
+ return NULL;
+
+ for (i = 0; i < 4; i++) {
+ /* Extract 16-bit chunk */
+ chunk = ((uint64_t) addr >> (i * 16)) & 0xFFFF;
+ /* Send the chunk */
+ ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000);
+ if (ret != DMUB_STATUS_OK)
+ goto free_bb;
}
+
+ /* Now ask DMUB to copy the bb */
+ ret = dm_dmub_send_vbios_gpint_command(adev, DMUB_GPINT__BB_COPY, 1, 200000);
+ if (ret != DMUB_STATUS_OK)
+ goto free_bb;
+
+ return bb;
+
+free_bb:
+ dm_free_gpu_mem(adev, DC_MEM_ALLOC_TYPE_GART, (void *) bb);
+ return NULL;
+
+}
+
+static enum dmub_ips_disable_type dm_get_default_ips_mode(
+ struct amdgpu_device *adev)
+{
+ enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE;
+
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(3, 5, 1):
+ ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ break;
+ default:
+ /* ASICs older than DCN35 do not have IPSs */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0))
+ ret = DMUB_IPS_DISABLE_ALL;
+ break;
+ }
+
+ return ret;
}
static int amdgpu_dm_init(struct amdgpu_device *adev)
@@ -1569,7 +1893,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
mutex_init(&adev->dm.audio_lock);
if (amdgpu_dm_irq_init(adev)) {
- DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
+ drm_err(adev_to_drm(adev), "failed to initialize DM IRQ support.\n");
goto error;
}
@@ -1586,18 +1910,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
init_data.driver = adev;
- adev->dm.cgs_device = amdgpu_cgs_create_device(adev);
-
- if (!adev->dm.cgs_device) {
- DRM_ERROR("amdgpu: failed to create cgs device.\n");
- goto error;
- }
-
+ /* cgs_device was created in dm_sw_init() */
init_data.cgs_device = adev->dm.cgs_device;
init_data.dce_environment = DCE_ENV_PRODUCTION_DRV;
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
switch (adev->dm.dmcub_fw_version) {
case 0: /* development */
@@ -1616,40 +1934,27 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
break;
}
- switch (adev->asic_type) {
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- init_data.flags.gpu_vm_support = true;
- break;
- default:
- switch (adev->ip_versions[DCE_HWIP][0]) {
- case IP_VERSION(1, 0, 0):
- case IP_VERSION(1, 0, 1):
- /* enable S/G on PCO and RV2 */
- if ((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
- (adev->apu_flags & AMD_APU_IS_PICASSO))
- init_data.flags.gpu_vm_support = true;
- break;
- case IP_VERSION(2, 1, 0):
- case IP_VERSION(3, 0, 1):
- case IP_VERSION(3, 1, 2):
- case IP_VERSION(3, 1, 3):
- case IP_VERSION(3, 1, 4):
- case IP_VERSION(3, 1, 5):
- case IP_VERSION(3, 1, 6):
- init_data.flags.gpu_vm_support = true;
- break;
- default:
- break;
- }
- break;
- }
- if (init_data.flags.gpu_vm_support &&
- (amdgpu_sg_display == 0))
+ /* APU support S/G display by default except:
+ * ASICs before Carrizo,
+ * RAVEN1 (Users reported stability issue)
+ */
+
+ if (adev->asic_type < CHIP_CARRIZO) {
init_data.flags.gpu_vm_support = false;
+ } else if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->apu_flags & AMD_APU_IS_RAVEN)
+ init_data.flags.gpu_vm_support = false;
+ else
+ init_data.flags.gpu_vm_support = (amdgpu_sg_display != 0);
+ } else {
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(2, 0, 3))
+ init_data.flags.gpu_vm_support = (amdgpu_sg_display == 1);
+ else
+ init_data.flags.gpu_vm_support =
+ (amdgpu_sg_display != 0) && (adev->flags & AMD_IS_APU);
+ }
- if (init_data.flags.gpu_vm_support)
- adev->mode_info.gpu_vm_support = true;
+ adev->mode_info.gpu_vm_support = init_data.flags.gpu_vm_support;
if (amdgpu_dc_feature_mask & DC_FBC_MASK)
init_data.flags.fbc_support = true;
@@ -1670,30 +1975,56 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
init_data.flags.seamless_boot_edp_requested = false;
- if (check_seamless_boot_capability(adev)) {
+ if (amdgpu_device_seamless_boot_supported(adev)) {
init_data.flags.seamless_boot_edp_requested = true;
init_data.flags.allow_seamless_boot_optimization = true;
- DRM_INFO("Seamless boot condition check passed\n");
+ drm_dbg(adev->dm.ddev, "Seamless boot requested\n");
}
init_data.flags.enable_mipi_converter_optimization = true;
init_data.dcn_reg_offsets = adev->reg_offset[DCE_HWIP][0];
init_data.nbio_reg_offsets = adev->reg_offset[NBIO_HWIP][0];
+ init_data.clk_reg_offsets = adev->reg_offset[CLK_HWIP][0];
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_IPS)
+ init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL;
+ else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS_DYNAMIC)
+ init_data.flags.disable_ips = DMUB_IPS_DISABLE_DYNAMIC;
+ else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS2_DYNAMIC)
+ init_data.flags.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ else if (amdgpu_dc_debug_mask & DC_FORCE_IPS_ENABLE)
+ init_data.flags.disable_ips = DMUB_IPS_ENABLE;
+ else
+ init_data.flags.disable_ips = dm_get_default_ips_mode(adev);
- INIT_LIST_HEAD(&adev->dm.da_list);
+ init_data.flags.disable_ips_in_vpb = 0;
+
+ /* DCN35 and above supports dynamic DTBCLK switch */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
+ init_data.flags.allow_0_dtb_clk = true;
+
+ /* Enable DWB for tested platforms only */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0))
+ init_data.num_virtual_links = 1;
retrieve_dmi_info(&adev->dm);
+ if (adev->dm.edp0_on_dp1_quirk)
+ init_data.flags.support_edp0_on_dp1 = true;
+
+ if (adev->dm.bb_from_dmub)
+ init_data.bb_from_dmub = adev->dm.bb_from_dmub;
+ else
+ init_data.bb_from_dmub = NULL;
/* Display Core create. */
adev->dm.dc = dc_create(&init_data);
if (adev->dm.dc) {
- DRM_INFO("Display Core v%s initialized on %s\n", DC_VER,
+ drm_info(adev_to_drm(adev), "Display Core v%s initialized on %s\n", DC_VER,
dce_version_to_string(adev->dm.dc->ctx->dce_version));
} else {
- DRM_INFO("Display Core v%s failed to initialize on %s\n", DC_VER,
- dce_version_to_string(adev->dm.dc->ctx->dce_version));
+ drm_info(adev_to_drm(adev), "Display Core failed to initialize with v%s!\n", DC_VER);
goto error;
}
@@ -1719,42 +2050,46 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH)
adev->dm.dc->debug.force_subvp_mclk_switch = true;
+ if (amdgpu_dc_debug_mask & DC_DISABLE_SUBVP_FAMS) {
+ adev->dm.dc->debug.force_disable_subvp = true;
+ adev->dm.dc->debug.fams2_config.bits.enable = false;
+ }
+
+ if (amdgpu_dc_debug_mask & DC_ENABLE_DML2) {
+ adev->dm.dc->debug.using_dml2 = true;
+ adev->dm.dc->debug.using_dml21 = true;
+ }
+
+ if (amdgpu_dc_debug_mask & DC_HDCP_LC_FORCE_FW_ENABLE)
+ adev->dm.dc->debug.hdcp_lc_force_fw_enable = true;
+
+ if (amdgpu_dc_debug_mask & DC_HDCP_LC_ENABLE_SW_FALLBACK)
+ adev->dm.dc->debug.hdcp_lc_enable_sw_fallback = true;
+
+ if (amdgpu_dc_debug_mask & DC_SKIP_DETECTION_LT)
+ adev->dm.dc->debug.skip_detection_link_training = true;
+
adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm;
/* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
adev->dm.dc->debug.ignore_cable_id = true;
- /* TODO: There is a new drm mst change where the freedom of
- * vc_next_start_slot update is revoked/moved into drm, instead of in
- * driver. This forces us to make sure to get vc_next_start_slot updated
- * in drm function each time without considering if mst_state is active
- * or not. Otherwise, next time hotplug will give wrong start_slot
- * number. We are implementing a temporary solution to even notify drm
- * mst deallocation when link is no longer of MST type when uncommitting
- * the stream so we will have more time to work on a proper solution.
- * Ideally when dm_helpers_dp_mst_stop_top_mgr message is triggered, we
- * should notify drm to do a complete "reset" of its states and stop
- * calling further drm mst functions when link is no longer of an MST
- * type. This could happen when we unplug an MST hubs/displays. When
- * uncommit stream comes later after unplug, we should just reset
- * hardware states only.
- */
- adev->dm.dc->debug.temp_mst_deallocation_sequence = true;
-
if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
- DRM_INFO("DP-HDMI FRL PCON supported\n");
+ drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n");
r = dm_dmub_hw_init(adev);
if (r) {
- DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+ drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
goto error;
}
dc_hardware_init(adev->dm.dc);
- adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev->dm.dc);
+ adev->dm.restore_backlight = true;
+
+ adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev);
if (!adev->dm.hpd_rx_offload_wq) {
- DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n");
goto error;
}
@@ -1769,10 +2104,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
adev->dm.freesync_module = mod_freesync_create(adev->dm.dc);
if (!adev->dm.freesync_module) {
- DRM_ERROR(
- "amdgpu: failed to initialize freesync_module.\n");
+ drm_err(adev_to_drm(adev),
+ "failed to initialize freesync_module.\n");
} else
- DRM_DEBUG_DRIVER("amdgpu: freesync_module init done %p.\n",
+ drm_dbg_driver(adev_to_drm(adev), "amdgpu: freesync_module init done %p.\n",
adev->dm.freesync_module);
amdgpu_dm_init_color_mod();
@@ -1781,16 +2116,20 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
adev->dm.vblank_control_workqueue =
create_singlethread_workqueue("dm_vblank_control_workqueue");
if (!adev->dm.vblank_control_workqueue)
- DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to initialize vblank_workqueue.\n");
}
+ if (adev->dm.dc->caps.ips_support &&
+ adev->dm.dc->config.disable_ips != DMUB_IPS_DISABLE_ALL)
+ adev->dm.idle_workqueue = idle_create_workqueue(adev);
+
if (adev->dm.dc->caps.max_links > 0 && adev->family >= AMDGPU_FAMILY_RV) {
adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc);
if (!adev->dm.hdcp_workqueue)
- DRM_ERROR("amdgpu: failed to initialize hdcp_workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to initialize hdcp_workqueue.\n");
else
- DRM_DEBUG_DRIVER("amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue);
+ drm_dbg_driver(adev_to_drm(adev), "amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue);
dc_init_callbacks(adev->dm.dc, &init_params);
}
@@ -1798,37 +2137,37 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
init_completion(&adev->dm.dmub_aux_transfer_done);
adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
if (!adev->dm.dmub_notify) {
- DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify");
+ drm_info(adev_to_drm(adev), "fail to allocate adev->dm.dmub_notify");
goto error;
}
adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq");
if (!adev->dm.delayed_hpd_wq) {
- DRM_ERROR("amdgpu: failed to create hpd offload workqueue.\n");
+ drm_err(adev_to_drm(adev), "failed to create hpd offload workqueue.\n");
goto error;
}
amdgpu_dm_outbox_init(adev);
if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY,
dmub_aux_setconfig_callback, false)) {
- DRM_ERROR("amdgpu: fail to register dmub aux callback");
+ drm_err(adev_to_drm(adev), "fail to register dmub aux callback");
goto error;
}
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
- goto error;
- }
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+
+ for (size_t i = 0; i < ARRAY_SIZE(adev->dm.fused_io); i++)
+ init_completion(&adev->dm.fused_io[i].replied);
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_FUSED_IO,
+ dmub_aux_fused_io_callback, false)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub fused io callback");
goto error;
}
- }
-
- /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
- * It is expected that DMUB will resend any pending notifications at this point, for
- * example HPD from DPIA.
- */
- if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+ * It is expected that DMUB will resend any pending notifications at this point. Note
+ * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to
+ * align legacy interface initialization sequence. Connection status will be proactivly
+ * detected once in the amdgpu_dm_initialize_drm_device.
+ */
dc_enable_dmub_outbox(adev->dm.dc);
/* DPIA trace goes to dmesg logs only if outbox is enabled */
@@ -1837,8 +2176,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
if (amdgpu_dm_initialize_drm_device(adev)) {
- DRM_ERROR(
- "amdgpu: failed to initialize sw for display support.\n");
+ drm_err(adev_to_drm(adev),
+ "failed to initialize sw for display support.\n");
goto error;
}
@@ -1852,18 +2191,22 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
adev_to_drm(adev)->mode_config.cursor_height = adev->dm.dc->caps.max_cursor_size;
if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) {
- DRM_ERROR(
- "amdgpu: failed to initialize sw for display support.\n");
+ drm_err(adev_to_drm(adev),
+ "failed to initialize vblank for display support.\n");
goto error;
}
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- adev->dm.secure_display_ctxs = amdgpu_dm_crtc_secure_display_create_contexts(adev);
- if (!adev->dm.secure_display_ctxs)
- DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n");
+ amdgpu_dm_crtc_secure_display_create_contexts(adev);
+ if (!adev->dm.secure_display_ctx.crtc_ctx)
+ drm_err(adev_to_drm(adev), "failed to initialize secure display contexts.\n");
+
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1))
+ adev->dm.secure_display_ctx.support_mul_roi = true;
+
#endif
- DRM_DEBUG_DRIVER("KMS initialized.\n");
+ drm_dbg_driver(adev_to_drm(adev), "KMS initialized.\n");
return 0;
error:
@@ -1872,9 +2215,9 @@ error:
return -EINVAL;
}
-static int amdgpu_dm_early_fini(void *handle)
+static int amdgpu_dm_early_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_dm_audio_fini(adev);
@@ -1890,18 +2233,28 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.vblank_control_workqueue = NULL;
}
+ if (adev->dm.idle_workqueue) {
+ if (adev->dm.idle_workqueue->running) {
+ adev->dm.idle_workqueue->enable = false;
+ flush_work(&adev->dm.idle_workqueue->work);
+ }
+
+ kfree(adev->dm.idle_workqueue);
+ adev->dm.idle_workqueue = NULL;
+ }
+
amdgpu_dm_destroy_drm_device(&adev->dm);
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- if (adev->dm.secure_display_ctxs) {
+ if (adev->dm.secure_display_ctx.crtc_ctx) {
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->dm.secure_display_ctxs[i].crtc) {
- flush_work(&adev->dm.secure_display_ctxs[i].notify_ta_work);
- flush_work(&adev->dm.secure_display_ctxs[i].forward_roi_work);
+ if (adev->dm.secure_display_ctx.crtc_ctx[i].crtc) {
+ flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].notify_ta_work);
+ flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].forward_roi_work);
}
}
- kfree(adev->dm.secure_display_ctxs);
- adev->dm.secure_display_ctxs = NULL;
+ kfree(adev->dm.secure_display_ctx.crtc_ctx);
+ adev->dm.secure_display_ctx.crtc_ctx = NULL;
}
#endif
if (adev->dm.hdcp_workqueue) {
@@ -1909,17 +2262,15 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
adev->dm.hdcp_workqueue = NULL;
}
- if (adev->dm.dc)
+ if (adev->dm.dc) {
dc_deinit_callbacks(adev->dm.dc);
-
- if (adev->dm.dc)
dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv);
-
- if (dc_enable_dmub_notifications(adev->dm.dc)) {
- kfree(adev->dm.dmub_notify);
- adev->dm.dmub_notify = NULL;
- destroy_workqueue(adev->dm.delayed_hpd_wq);
- adev->dm.delayed_hpd_wq = NULL;
+ if (dc_enable_dmub_notifications(adev->dm.dc)) {
+ kfree(adev->dm.dmub_notify);
+ adev->dm.dmub_notify = NULL;
+ destroy_workqueue(adev->dm.delayed_hpd_wq);
+ adev->dm.delayed_hpd_wq = NULL;
+ }
}
if (adev->dm.dmub_bo)
@@ -1927,7 +2278,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
&adev->dm.dmub_bo_gpu_addr,
&adev->dm.dmub_bo_cpu_addr);
- if (adev->dm.hpd_rx_offload_wq) {
+ if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) {
for (i = 0; i < adev->dm.dc->caps.max_links; i++) {
if (adev->dm.hpd_rx_offload_wq[i].wq) {
destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq);
@@ -2004,7 +2355,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
return 0;
break;
default:
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 0, 3):
case IP_VERSION(2, 0, 0):
@@ -2020,11 +2371,15 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
return 0;
default:
break;
}
- DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
+ drm_err(adev_to_drm(adev), "Unsupported ASIC type: 0x%X\n", adev->asic_type);
return -EINVAL;
}
@@ -2033,7 +2388,8 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
return 0;
}
- r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, fw_name_dmcu);
+ r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name_dmcu);
if (r == -ENODEV) {
/* DMCU firmware is not necessary, so don't raise a fuss if it's missing */
DRM_DEBUG_KMS("dm: DMCU firmware not found\n");
@@ -2041,7 +2397,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
return 0;
}
if (r) {
- dev_err(adev->dev, "amdgpu_dm: Can't validate firmware \"%s\"\n",
+ drm_err(adev_to_drm(adev), "amdgpu_dm: Can't validate firmware \"%s\"\n",
fw_name_dmcu);
amdgpu_ucode_release(&adev->dm.fw_dmcu);
return r;
@@ -2085,15 +2441,27 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
struct dmub_srv_create_params create_params;
struct dmub_srv_region_params region_params;
struct dmub_srv_region_info region_info;
- struct dmub_srv_fb_params fb_params;
+ struct dmub_srv_memory_params memory_params;
struct dmub_srv_fb_info *fb_info;
struct dmub_srv *dmub_srv;
const struct dmcub_firmware_header_v1_0 *hdr;
enum dmub_asic dmub_asic;
enum dmub_status status;
+ static enum dmub_window_memory_type window_memory_type[DMUB_WINDOW_TOTAL] = {
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_0_INST_CONST
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_1_STACK
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_2_BSS_DATA
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_3_VBIOS
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_4_MAILBOX
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_5_TRACEBUFF
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_6_FW_STATE
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_7_SCRATCH_MEM
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_IB_MEM
+ DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_SHARED_STATE
+ };
int r;
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
dmub_asic = DMUB_ASIC_DCN21;
break;
@@ -2128,6 +2496,17 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
case IP_VERSION(3, 2, 1):
dmub_asic = DMUB_ASIC_DCN321;
break;
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ dmub_asic = DMUB_ASIC_DCN35;
+ break;
+ case IP_VERSION(3, 6, 0):
+ dmub_asic = DMUB_ASIC_DCN36;
+ break;
+ case IP_VERSION(4, 0, 1):
+ dmub_asic = DMUB_ASIC_DCN401;
+ break;
+
default:
/* ASIC doesn't support DMUB. */
return 0;
@@ -2144,7 +2523,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->inst_const_bytes), PAGE_SIZE);
- DRM_INFO("Loading DMUB firmware via PSP: version=0x%08X\n",
+ drm_info(adev_to_drm(adev), "Loading DMUB firmware via PSP: version=0x%08X\n",
adev->dm.dmcub_fw_version);
}
@@ -2153,7 +2532,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
dmub_srv = adev->dm.dmub_srv;
if (!dmub_srv) {
- DRM_ERROR("Failed to allocate DMUB service!\n");
+ drm_err(adev_to_drm(adev), "Failed to allocate DMUB service!\n");
return -ENOMEM;
}
@@ -2166,7 +2545,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
/* Create the DMUB service. */
status = dmub_srv_create(dmub_srv, &create_params);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error creating DMUB service: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error creating DMUB service: %d\n", status);
return -EINVAL;
}
@@ -2185,12 +2564,13 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
adev->dm.dmub_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
PSP_HEADER_BYTES;
+ region_params.window_memory_type = window_memory_type;
status = dmub_srv_calc_region_info(dmub_srv, &region_params,
&region_info);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error calculating DMUB region info: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error calculating DMUB region info: %d\n", status);
return -EINVAL;
}
@@ -2208,35 +2588,48 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
return r;
/* Rebase the regions on the framebuffer address. */
- memset(&fb_params, 0, sizeof(fb_params));
- fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
- fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
- fb_params.region_info = &region_info;
+ memset(&memory_params, 0, sizeof(memory_params));
+ memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr;
+ memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr;
+ memory_params.region_info = &region_info;
+ memory_params.window_memory_type = window_memory_type;
adev->dm.dmub_fb_info =
kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
fb_info = adev->dm.dmub_fb_info;
if (!fb_info) {
- DRM_ERROR(
+ drm_err(adev_to_drm(adev),
"Failed to allocate framebuffer info for DMUB service!\n");
return -ENOMEM;
}
- status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+ status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
if (status != DMUB_STATUS_OK) {
- DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
+ drm_err(adev_to_drm(adev), "Error calculating DMUB FB info: %d\n", status);
return -EINVAL;
}
+ adev->dm.bb_from_dmub = dm_dmub_get_vbios_bounding_box(adev);
+
return 0;
}
-static int dm_sw_init(void *handle)
+static int dm_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
+ adev->dm.cgs_device = amdgpu_cgs_create_device(adev);
+
+ if (!adev->dm.cgs_device) {
+ drm_err(adev_to_drm(adev), "failed to create cgs device.\n");
+ return -EINVAL;
+ }
+
+ /* Moved from dm init since we need to use allocations for storing bounding box data */
+ INIT_LIST_HEAD(&adev->dm.da_list);
+
r = dm_dmub_sw_init(adev);
if (r)
return r;
@@ -2244,15 +2637,28 @@ static int dm_sw_init(void *handle)
return load_dmcu_fw(adev);
}
-static int dm_sw_fini(void *handle)
+static int dm_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct dal_allocation *da;
+
+ list_for_each_entry(da, &adev->dm.da_list, list) {
+ if (adev->dm.bb_from_dmub == (void *) da->cpu_ptr) {
+ amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
+ list_del(&da->list);
+ kfree(da);
+ adev->dm.bb_from_dmub = NULL;
+ break;
+ }
+ }
+
kfree(adev->dm.dmub_fb_info);
adev->dm.dmub_fb_info = NULL;
if (adev->dm.dmub_srv) {
dmub_srv_destroy(adev->dm.dmub_srv);
+ kfree(adev->dm.dmub_srv);
adev->dm.dmub_srv = NULL;
}
@@ -2271,16 +2677,20 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type == dc_connection_mst_branch &&
aconnector->mst_mgr.aux) {
- DRM_DEBUG_DRIVER("DM_MST: starting TM on aconnector: %p [id: %d]\n",
+ drm_dbg_kms(dev, "DM_MST: starting TM on aconnector: %p [id: %d]\n",
aconnector,
aconnector->base.base.id);
ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
if (ret < 0) {
- DRM_ERROR("DM_MST: Failed to start MST\n");
+ drm_err(dev, "DM_MST: Failed to start MST\n");
aconnector->dc_link->type =
dc_connection_single;
ret = dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
@@ -2294,9 +2704,9 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev)
return ret;
}
-static int dm_late_init(void *handle)
+static int dm_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct dmcu_iram_parameters params;
unsigned int linear_lut[16];
@@ -2342,9 +2752,9 @@ static int dm_late_init(void *handle)
static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
{
+ u8 buf[UUID_SIZE];
+ guid_t guid;
int ret;
- u8 guid[16];
- u64 tmp64;
mutex_lock(&mgr->lock);
if (!mgr->mst_primary)
@@ -2365,31 +2775,74 @@ static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
}
/* Some hubs forget their guids after they resume */
- ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
- if (ret != 16) {
+ ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, buf, sizeof(buf));
+ if (ret != sizeof(buf)) {
drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
goto out_fail;
}
- if (memchr_inv(guid, 0, 16) == NULL) {
- tmp64 = get_jiffies_64();
- memcpy(&guid[0], &tmp64, sizeof(u64));
- memcpy(&guid[8], &tmp64, sizeof(u64));
+ import_guid(&guid, buf);
+
+ if (guid_is_null(&guid)) {
+ guid_gen(&guid);
+ export_guid(buf, &guid);
- ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16);
+ ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, buf, sizeof(buf));
- if (ret != 16) {
+ if (ret != sizeof(buf)) {
drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n");
goto out_fail;
}
}
- memcpy(mgr->mst_primary->guid, guid, 16);
+ guid_copy(&mgr->mst_primary->guid, &guid);
out_fail:
mutex_unlock(&mgr->lock);
}
+void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct cec_notifier *n = aconnector->notifier;
+
+ if (!n)
+ return;
+
+ cec_notifier_phys_addr_invalidate(n);
+}
+
+void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct drm_connector *connector = &aconnector->base;
+ struct cec_notifier *n = aconnector->notifier;
+
+ if (!n)
+ return;
+
+ cec_notifier_set_phys_addr(n,
+ connector->display_info.source_physical_address);
+}
+
+static void s3_handle_hdmi_cec(struct drm_device *ddev, bool suspend)
+{
+ struct amdgpu_dm_connector *aconnector;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter conn_iter;
+
+ drm_connector_list_iter_begin(ddev, &conn_iter);
+ drm_for_each_connector_iter(connector, &conn_iter) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ aconnector = to_amdgpu_dm_connector(connector);
+ if (suspend)
+ hdmi_cec_unset_edid(aconnector);
+ else
+ hdmi_cec_set_edid(aconnector);
+ }
+ drm_connector_list_iter_end(&conn_iter);
+}
+
static void s3_handle_mst(struct drm_device *dev, bool suspend)
{
struct amdgpu_dm_connector *aconnector;
@@ -2399,6 +2852,10 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
@@ -2462,7 +2919,7 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
* therefore, this function apply to navi10/12/14 but not Renoir
* *
*/
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 0, 2):
case IP_VERSION(2, 0, 0):
break;
@@ -2472,16 +2929,43 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
ret = amdgpu_dpm_write_watermarks_table(adev);
if (ret) {
- DRM_ERROR("Failed to update WMTABLE!\n");
+ drm_err(adev_to_drm(adev), "Failed to update WMTABLE!\n");
return ret;
}
return 0;
}
+static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct amdgpu_i2c_adapter *oem_i2c;
+ struct ddc_service *oem_ddc_service;
+ int r;
+
+ oem_ddc_service = dc_get_oem_i2c_device(adev->dm.dc);
+ if (oem_ddc_service) {
+ oem_i2c = create_i2c(oem_ddc_service, true);
+ if (!oem_i2c) {
+ drm_info(adev_to_drm(adev), "Failed to create oem i2c adapter data\n");
+ return -ENOMEM;
+ }
+
+ r = devm_i2c_add_adapter(adev->dev, &oem_i2c->base);
+ if (r) {
+ drm_info(adev_to_drm(adev), "Failed to register oem i2c\n");
+ kfree(oem_i2c);
+ return r;
+ }
+ dm->oem_i2c = oem_i2c;
+ }
+
+ return 0;
+}
+
/**
* dm_hw_init() - Initialize DC device
- * @handle: The base driver device containing the amdgpu_dm device.
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the &struct amdgpu_display_manager device. This involves calling
* the initializers of each DM component, then populating the struct with them.
@@ -2499,27 +2983,35 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
* - Vblank support
* - Debug FS entries, if enabled
*/
-static int dm_hw_init(void *handle)
+static int dm_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
/* Create DAL display manager */
- amdgpu_dm_init(adev);
+ r = amdgpu_dm_init(adev);
+ if (r)
+ return r;
amdgpu_dm_hpd_init(adev);
+ r = dm_oem_i2c_hw_init(adev);
+ if (r)
+ drm_info(adev_to_drm(adev), "Failed to add OEM i2c bus\n");
+
return 0;
}
/**
* dm_hw_fini() - Teardown DC device
- * @handle: The base driver device containing the amdgpu_dm device.
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Teardown components within &struct amdgpu_display_manager that require
* cleanup. This involves cleaning up the DRM device, DC, and any modules that
* were loaded. Also flush IRQ workqueues and disable them.
*/
-static int dm_hw_fini(void *handle)
+static int dm_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_dm_hpd_fini(adev);
@@ -2545,44 +3037,50 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
if (rc)
- DRM_WARN("Failed to %s pflip interrupts\n",
+ drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n",
enable ? "enable" : "disable");
- if (enable) {
- if (amdgpu_dm_crtc_vrr_active(to_dm_crtc_state(acrtc->base.state)))
- rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, true);
- } else
- rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false);
-
- if (rc)
- DRM_WARN("Failed to %sable vupdate interrupt\n", enable ? "en" : "dis");
+ if (dc_supports_vrr(adev->dm.dc->ctx->dce_version)) {
+ if (enable) {
+ if (amdgpu_dm_crtc_vrr_active(
+ to_dm_crtc_state(acrtc->base.state)))
+ rc = amdgpu_dm_crtc_set_vupdate_irq(
+ &acrtc->base, true);
+ } else
+ rc = amdgpu_dm_crtc_set_vupdate_irq(
+ &acrtc->base, false);
+
+ if (rc)
+ drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n",
+ enable ? "en" : "dis");
+ }
irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
/* During gpu-reset we disable and then enable vblank irq, so
* don't use amdgpu_irq_get/put() to avoid refcount change.
*/
if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
- DRM_WARN("Failed to %sable vblank interrupt\n", enable ? "en" : "dis");
+ drm_warn(adev_to_drm(adev), "Failed to %sable vblank interrupt\n", enable ? "en" : "dis");
}
}
}
+DEFINE_FREE(state_release, struct dc_state *, if (_T) dc_state_release(_T))
+
static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
{
- struct dc_state *context = NULL;
- enum dc_status res = DC_ERROR_UNEXPECTED;
+ struct dc_state *context __free(state_release) = NULL;
int i;
struct dc_stream_state *del_streams[MAX_PIPES];
int del_streams_count = 0;
+ struct dc_commit_streams_params params = {};
memset(del_streams, 0, sizeof(del_streams));
- context = dc_create_state(dc);
+ context = dc_state_create_current_copy(dc);
if (context == NULL)
- goto context_alloc_fail;
-
- dc_resource_state_copy_construct_current(dc, context);
+ return DC_ERROR_UNEXPECTED;
/* First remove from context all streams */
for (i = 0; i < context->stream_count; i++) {
@@ -2593,23 +3091,20 @@ static enum dc_status amdgpu_dm_commit_zero_streams(struct dc *dc)
/* Remove all planes for removed streams and then remove the streams */
for (i = 0; i < del_streams_count; i++) {
- if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
- res = DC_FAIL_DETACH_SURFACES;
- goto fail;
- }
+ enum dc_status res;
+
+ if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context))
+ return DC_FAIL_DETACH_SURFACES;
- res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+ res = dc_state_remove_stream(dc, context, del_streams[i]);
if (res != DC_OK)
- goto fail;
+ return res;
}
- res = dc_commit_streams(dc, context->streams, context->stream_count);
-
-fail:
- dc_release_state(context);
+ params.streams = context->streams;
+ params.stream_count = context->stream_count;
-context_alloc_fail:
- return res;
+ return dc_commit_streams(dc, &params);
}
static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm)
@@ -2622,32 +3117,108 @@ static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm)
}
}
-static int dm_suspend(void *handle)
+static int dm_cache_state(struct amdgpu_device *adev)
+{
+ int r;
+
+ adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev));
+ if (IS_ERR(adev->dm.cached_state)) {
+ r = PTR_ERR(adev->dm.cached_state);
+ adev->dm.cached_state = NULL;
+ }
+
+ return adev->dm.cached_state ? 0 : r;
+}
+
+static void dm_destroy_cached_state(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = handle;
struct amdgpu_display_manager *dm = &adev->dm;
- int ret = 0;
+ struct drm_device *ddev = adev_to_drm(adev);
+ struct dm_plane_state *dm_new_plane_state;
+ struct drm_plane_state *new_plane_state;
+ struct dm_crtc_state *dm_new_crtc_state;
+ struct drm_crtc_state *new_crtc_state;
+ struct drm_plane *plane;
+ struct drm_crtc *crtc;
+ int i;
+
+ if (!dm->cached_state)
+ return;
+
+ /* Force mode set in atomic commit */
+ for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
+ new_crtc_state->active_changed = true;
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ reset_freesync_config_for_crtc(dm_new_crtc_state);
+ }
+
+ /*
+ * atomic_check is expected to create the dc states. We need to release
+ * them here, since they were duplicated as part of the suspend
+ * procedure.
+ */
+ for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (dm_new_crtc_state->stream) {
+ WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1);
+ dc_stream_release(dm_new_crtc_state->stream);
+ dm_new_crtc_state->stream = NULL;
+ }
+ dm_new_crtc_state->base.color_mgmt_changed = true;
+ }
+
+ for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
+ dm_new_plane_state = to_dm_plane_state(new_plane_state);
+ if (dm_new_plane_state->dc_state) {
+ WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1);
+ dc_plane_state_release(dm_new_plane_state->dc_state);
+ dm_new_plane_state->dc_state = NULL;
+ }
+ }
+
+ drm_atomic_helper_resume(ddev, dm->cached_state);
+
+ dm->cached_state = NULL;
+}
+
+static int dm_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_display_manager *dm = &adev->dm;
if (amdgpu_in_reset(adev)) {
+ enum dc_status res;
+
mutex_lock(&dm->dc_lock);
dc_allow_idle_optimizations(adev->dm.dc, false);
- dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
+ dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state);
- dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
+ if (dm->cached_dc_state)
+ dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, false);
- amdgpu_dm_commit_zero_streams(dm->dc);
+ res = amdgpu_dm_commit_zero_streams(dm->dc);
+ if (res != DC_OK) {
+ drm_err(adev_to_drm(adev), "Failed to commit zero streams: %d\n", res);
+ return -EINVAL;
+ }
amdgpu_dm_irq_suspend(adev);
hpd_rx_irq_work_suspend(dm);
- return ret;
+ return 0;
}
- WARN_ON(adev->dm.cached_state);
- adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev));
+ if (!adev->dm.cached_state) {
+ int r = dm_cache_state(adev);
+
+ if (r)
+ return r;
+ }
+
+ s3_handle_hdmi_cec(adev_to_drm(adev), true);
s3_handle_mst(adev_to_drm(adev), true);
@@ -2657,10 +3228,15 @@ static int dm_suspend(void *handle)
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+ if (dm->dc->caps.ips_support && adev->in_s0ix)
+ dc_allow_idle_optimizations(dm->dc, true);
+
+ dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
+
return 0;
}
-struct amdgpu_dm_connector *
+struct drm_connector *
amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
struct drm_crtc *crtc)
{
@@ -2673,7 +3249,7 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
crtc_from_state = new_con_state->crtc;
if (crtc_from_state == crtc)
- return to_amdgpu_dm_connector(connector);
+ return connector;
}
return NULL;
@@ -2685,6 +3261,7 @@ static void emulated_link_detect(struct dc_link *link)
struct display_sink_capability sink_caps = { 0 };
enum dc_edid_status edid_status;
struct dc_context *dc_ctx = link->ctx;
+ struct drm_device *dev = adev_to_drm(dc_ctx->driver_context);
struct dc_sink *sink = NULL;
struct dc_sink *prev_sink = NULL;
@@ -2734,7 +3311,7 @@ static void emulated_link_detect(struct dc_link *link)
}
default:
- DC_ERROR("Invalid connector type! signal:%d\n",
+ drm_err(dev, "Invalid connector type! signal:%d\n",
link->connector_signal);
return;
}
@@ -2744,7 +3321,7 @@ static void emulated_link_detect(struct dc_link *link)
sink = dc_sink_create(&sink_init_data);
if (!sink) {
- DC_ERROR("Failed to create sink!\n");
+ drm_err(dev, "Failed to create sink!\n");
return;
}
@@ -2757,7 +3334,7 @@ static void emulated_link_detect(struct dc_link *link)
sink);
if (edid_status != EDID_OK)
- DC_ERROR("Failed to read EDID");
+ drm_err(dev, "Failed to read EDID\n");
}
@@ -2770,57 +3347,70 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
struct dc_scaling_info scaling_infos[MAX_SURFACES];
struct dc_flip_addrs flip_addrs[MAX_SURFACES];
struct dc_stream_update stream_update;
- } *bundle;
+ } *bundle __free(kfree);
int k, m;
bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
if (!bundle) {
- dm_error("Failed to allocate update bundle\n");
- goto cleanup;
+ drm_err(dm->ddev, "Failed to allocate update bundle\n");
+ return;
}
for (k = 0; k < dc_state->stream_count; k++) {
bundle->stream_update.stream = dc_state->streams[k];
- for (m = 0; m < dc_state->stream_status->plane_count; m++) {
+ for (m = 0; m < dc_state->stream_status[k].plane_count; m++) {
bundle->surface_updates[m].surface =
- dc_state->stream_status->plane_states[m];
+ dc_state->stream_status[k].plane_states[m];
bundle->surface_updates[m].surface->force_full_update =
true;
}
update_planes_and_stream_adapter(dm->dc,
UPDATE_TYPE_FULL,
- dc_state->stream_status->plane_count,
+ dc_state->stream_status[k].plane_count,
dc_state->streams[k],
&bundle->stream_update,
bundle->surface_updates);
}
+}
-cleanup:
- kfree(bundle);
+static void apply_delay_after_dpcd_poweroff(struct amdgpu_device *adev,
+ struct dc_sink *sink)
+{
+ struct dc_panel_patch *ppatch = NULL;
+
+ if (!sink)
+ return;
+
+ ppatch = &sink->edid_caps.panel_patch;
+ if (ppatch->wait_after_dpcd_poweroff_ms) {
+ msleep(ppatch->wait_after_dpcd_poweroff_ms);
+ drm_dbg_driver(adev_to_drm(adev),
+ "%s: adding a %ds delay as w/a for panel\n",
+ __func__,
+ ppatch->wait_after_dpcd_poweroff_ms / 1000);
+ }
}
-static int dm_resume(void *handle)
+static int dm_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct drm_device *ddev = adev_to_drm(adev);
struct amdgpu_display_manager *dm = &adev->dm;
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
- struct drm_crtc *crtc;
- struct drm_crtc_state *new_crtc_state;
- struct dm_crtc_state *dm_new_crtc_state;
- struct drm_plane *plane;
- struct drm_plane_state *new_plane_state;
- struct dm_plane_state *dm_new_plane_state;
struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
enum dc_connection_type new_connection_type = dc_connection_none;
struct dc_state *dc_state;
- int i, r, j, ret;
- bool need_hotplug = false;
+ int i, r, j;
+ struct dc_commit_streams_params commit_params = {};
+
+ if (dm->dc->caps.ips_support) {
+ dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
+ }
if (amdgpu_in_reset(adev)) {
dc_state = dm->cached_dc_state;
@@ -2843,11 +3433,16 @@ static int dm_resume(void *handle)
link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);
r = dm_dmub_hw_init(adev);
- if (r)
- DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
+ if (r) {
+ drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
+ return r;
+ }
+ dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
+
dc_resume(dm->dc);
+ adev->dm.restore_backlight = true;
amdgpu_dm_irq_resume_early(adev);
@@ -2864,26 +3459,34 @@ static int dm_resume(void *handle)
dc_enable_dmub_outbox(adev->dm.dc);
}
- WARN_ON(!dc_commit_streams(dm->dc, dc_state->streams, dc_state->stream_count));
+ commit_params.streams = dc_state->streams;
+ commit_params.stream_count = dc_state->stream_count;
+ dc_exit_ips_for_hw_access(dm->dc);
+ WARN_ON(!dc_commit_streams(dm->dc, &commit_params));
dm_gpureset_commit_state(dm->cached_dc_state, dm);
dm_gpureset_toggle_interrupts(adev, dm->cached_dc_state, true);
- dc_release_state(dm->cached_dc_state);
+ dc_state_release(dm->cached_dc_state);
dm->cached_dc_state = NULL;
amdgpu_dm_irq_resume_late(adev);
mutex_unlock(&dm->dc_lock);
+ /* set the backlight after a reset */
+ for (i = 0; i < dm->num_of_edps; i++) {
+ if (dm->backlight_dev[i])
+ amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
+ }
+
return 0;
}
/* Recreate dc_state - DC invalidates it when setting power state to S3. */
- dc_release_state(dm_state->context);
- dm_state->context = dc_create_state(dm->dc);
+ dc_state_release(dm_state->context);
+ dm_state->context = dc_state_create(dm->dc, NULL);
/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
- dc_resource_state_construct(dm->dc, dm_state->context);
/* Before powering on DC we need to re-initialize DMUB. */
dm_dmub_hw_resume(adev);
@@ -2895,6 +3498,7 @@ static int dm_resume(void *handle)
}
/* power on hardware */
+ dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
/* program HPD filter */
@@ -2906,12 +3510,19 @@ static int dm_resume(void *handle)
*/
amdgpu_dm_irq_resume_early(adev);
+ s3_handle_hdmi_cec(ddev, false);
+
/* On resume we need to rewrite the MSTM control bits to enable MST*/
s3_handle_mst(ddev, false);
/* Do detection*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+ bool ret;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (!aconnector->dc_link)
@@ -2921,19 +3532,23 @@ static int dm_resume(void *handle)
* this is the case when traversing through already created end sink
* MST connectors, should be skipped
*/
- if (aconnector && aconnector->mst_root)
+ if (aconnector->mst_root)
continue;
- mutex_lock(&aconnector->hpd_lock);
+ guard(mutex)(&aconnector->hpd_lock);
if (!dc_link_detect_connection_type(aconnector->dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(aconnector->dc_link);
} else {
- mutex_lock(&dm->dc_lock);
- dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
- mutex_unlock(&dm->dc_lock);
+ guard(mutex)(&dm->dc_lock);
+ dc_exit_ips_for_hw_access(dm->dc);
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_RESUMEFROMS3S4);
+ if (ret) {
+ /* w/a delay for certain panels */
+ apply_delay_after_dpcd_poweroff(adev, aconnector->dc_sink);
+ }
}
if (aconnector->fake_enable && aconnector->dc_link->local_sink)
@@ -2943,66 +3558,33 @@ static int dm_resume(void *handle)
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
amdgpu_dm_update_connector_after_detect(aconnector);
- mutex_unlock(&aconnector->hpd_lock);
}
drm_connector_list_iter_end(&iter);
- /* Force mode set in atomic commit */
- for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i)
- new_crtc_state->active_changed = true;
-
- /*
- * atomic_check is expected to create the dc states. We need to release
- * them here, since they were duplicated as part of the suspend
- * procedure.
- */
- for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
- dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
- if (dm_new_crtc_state->stream) {
- WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1);
- dc_stream_release(dm_new_crtc_state->stream);
- dm_new_crtc_state->stream = NULL;
- }
- }
-
- for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
- dm_new_plane_state = to_dm_plane_state(new_plane_state);
- if (dm_new_plane_state->dc_state) {
- WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1);
- dc_plane_state_release(dm_new_plane_state->dc_state);
- dm_new_plane_state->dc_state = NULL;
- }
- }
-
- drm_atomic_helper_resume(ddev, dm->cached_state);
-
- dm->cached_state = NULL;
+ dm_destroy_cached_state(adev);
/* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
continue;
- ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true);
-
- if (ret < 0) {
- dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
- aconnector->dc_link);
- need_hotplug = true;
- }
+ drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr);
}
drm_connector_list_iter_end(&iter);
- if (need_hotplug)
- drm_kms_helper_hotplug_event(ddev);
-
amdgpu_dm_irq_resume_late(adev);
amdgpu_dm_smu_write_watermarks_table(adev);
+ drm_kms_helper_hotplug_event(ddev);
+
return 0;
}
@@ -3059,22 +3641,25 @@ static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = {
static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
.atomic_commit_tail = amdgpu_dm_atomic_commit_tail,
- .atomic_commit_setup = drm_dp_mst_atomic_setup_commit,
+ .atomic_commit_setup = amdgpu_dm_atomic_setup_commit,
};
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
{
+ const struct drm_panel_backlight_quirk *panel_backlight_quirk;
struct amdgpu_dm_backlight_caps *caps;
struct drm_connector *conn_base;
struct amdgpu_device *adev;
struct drm_luminance_range_info *luminance_range;
+ struct drm_device *drm;
if (aconnector->bl_idx == -1 ||
aconnector->dc_link->connector_signal != SIGNAL_TYPE_EDP)
return;
conn_base = &aconnector->base;
- adev = drm_to_adev(conn_base->dev);
+ drm = conn_base->dev;
+ adev = drm_to_adev(drm);
caps = &adev->dm.backlight_caps[aconnector->bl_idx];
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
@@ -3092,24 +3677,49 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
caps->aux_support = false;
else if (amdgpu_backlight == 1)
caps->aux_support = true;
+ if (caps->aux_support)
+ aconnector->dc_link->backlight_control_type = BACKLIGHT_CONTROL_AMD_AUX;
luminance_range = &conn_base->display_info.luminance_range;
- if (luminance_range->max_luminance) {
- caps->aux_min_input_signal = luminance_range->min_luminance;
+ if (luminance_range->max_luminance)
caps->aux_max_input_signal = luminance_range->max_luminance;
- } else {
- caps->aux_min_input_signal = 0;
+ else
caps->aux_max_input_signal = 512;
+
+ if (luminance_range->min_luminance)
+ caps->aux_min_input_signal = luminance_range->min_luminance;
+ else
+ caps->aux_min_input_signal = 1;
+
+ panel_backlight_quirk =
+ drm_get_panel_backlight_quirk(aconnector->drm_edid);
+ if (!IS_ERR_OR_NULL(panel_backlight_quirk)) {
+ if (panel_backlight_quirk->min_brightness) {
+ caps->min_input_signal =
+ panel_backlight_quirk->min_brightness - 1;
+ drm_info(drm,
+ "Applying panel backlight quirk, min_brightness: %d\n",
+ caps->min_input_signal);
+ }
+ if (panel_backlight_quirk->brightness_mask) {
+ drm_info(drm,
+ "Applying panel backlight quirk, brightness_mask: 0x%X\n",
+ panel_backlight_quirk->brightness_mask);
+ caps->brightness_mask =
+ panel_backlight_quirk->brightness_mask;
+ }
}
}
+DEFINE_FREE(sink_release, struct dc_sink *, if (_T) dc_sink_release(_T))
+
void amdgpu_dm_update_connector_after_detect(
struct amdgpu_dm_connector *aconnector)
{
struct drm_connector *connector = &aconnector->base;
+ struct dc_sink *sink __free(sink_release) = NULL;
struct drm_device *dev = connector->dev;
- struct dc_sink *sink;
/* MST handled by drm_mst framework */
if (aconnector->mst_mgr.mst_state == true)
@@ -3131,7 +3741,7 @@ void amdgpu_dm_update_connector_after_detect(
* For S3 resume with headless use eml_sink to fake stream
* because on resume connector->sink is set to NULL
*/
- mutex_lock(&dev->mode_config.mutex);
+ guard(mutex)(&dev->mode_config.mutex);
if (sink) {
if (aconnector->dc_sink) {
@@ -3147,7 +3757,7 @@ void amdgpu_dm_update_connector_after_detect(
aconnector->dc_sink = sink;
dc_sink_retain(aconnector->dc_sink);
amdgpu_dm_update_freesync_caps(connector,
- aconnector->edid);
+ aconnector->drm_edid);
} else {
amdgpu_dm_update_freesync_caps(connector, NULL);
if (!aconnector->dc_sink) {
@@ -3156,10 +3766,6 @@ void amdgpu_dm_update_connector_after_detect(
}
}
- mutex_unlock(&dev->mode_config.mutex);
-
- if (sink)
- dc_sink_release(sink);
return;
}
@@ -3167,27 +3773,23 @@ void amdgpu_dm_update_connector_after_detect(
* TODO: temporary guard to look for proper fix
* if this sink is MST sink, we should not do anything
*/
- if (sink && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- dc_sink_release(sink);
+ if (sink && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
return;
- }
if (aconnector->dc_sink == sink) {
/*
* We got a DP short pulse (Link Loss, DP CTS, etc...).
* Do nothing!!
*/
- DRM_DEBUG_DRIVER("DCHPD: connector_id=%d: dc_sink didn't change.\n",
- aconnector->connector_id);
- if (sink)
- dc_sink_release(sink);
+ drm_dbg_kms(dev, "DCHPD: connector_id=%d: dc_sink didn't change.\n",
+ aconnector->connector_id);
return;
}
- DRM_DEBUG_DRIVER("DCHPD: connector_id=%d: Old sink=%p New sink=%p\n",
- aconnector->connector_id, aconnector->dc_sink, sink);
+ drm_dbg_kms(dev, "DCHPD: connector_id=%d: Old sink=%p New sink=%p\n",
+ aconnector->connector_id, aconnector->dc_sink, sink);
- mutex_lock(&dev->mode_config.mutex);
+ guard(mutex)(&dev->mode_config.mutex);
/*
* 1. Update status of the drm connector
@@ -3206,38 +3808,42 @@ void amdgpu_dm_update_connector_after_detect(
aconnector->dc_sink = sink;
dc_sink_retain(aconnector->dc_sink);
if (sink->dc_edid.length == 0) {
- aconnector->edid = NULL;
+ aconnector->drm_edid = NULL;
+ hdmi_cec_unset_edid(aconnector);
if (aconnector->dc_link->aux_mode) {
- drm_dp_cec_unset_edid(
- &aconnector->dm_dp_aux.aux);
+ drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
}
} else {
- aconnector->edid =
- (struct edid *)sink->dc_edid.raw_edid;
+ const struct edid *edid = (const struct edid *)sink->dc_edid.raw_edid;
+ aconnector->drm_edid = drm_edid_alloc(edid, sink->dc_edid.length);
+ drm_edid_connector_update(connector, aconnector->drm_edid);
+
+ hdmi_cec_set_edid(aconnector);
if (aconnector->dc_link->aux_mode)
- drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
- aconnector->edid);
+ drm_dp_cec_attach(&aconnector->dm_dp_aux.aux,
+ connector->display_info.source_physical_address);
}
if (!aconnector->timing_requested) {
aconnector->timing_requested =
kzalloc(sizeof(struct dc_crtc_timing), GFP_KERNEL);
if (!aconnector->timing_requested)
- dm_error("failed to create aconnector->requested_timing\n");
+ drm_err(dev,
+ "failed to create aconnector->requested_timing\n");
}
- drm_connector_update_edid_property(connector, aconnector->edid);
- amdgpu_dm_update_freesync_caps(connector, aconnector->edid);
+ amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid);
update_connector_ext_caps(aconnector);
} else {
+ hdmi_cec_unset_edid(aconnector);
drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
amdgpu_dm_update_freesync_caps(connector, NULL);
- drm_connector_update_edid_property(connector, NULL);
aconnector->num_modes = 0;
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
- aconnector->edid = NULL;
+ drm_edid_free(aconnector->drm_edid);
+ aconnector->drm_edid = NULL;
kfree(aconnector->timing_requested);
aconnector->timing_requested = NULL;
/* Set CP to DESIRED if it was ENABLED, so we can re-enable it again on hotplug */
@@ -3245,12 +3851,7 @@ void amdgpu_dm_update_connector_after_detect(
connector->state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED;
}
- mutex_unlock(&dev->mode_config.mutex);
-
update_subconnector_property(aconnector);
-
- if (sink)
- dc_sink_release(sink);
}
static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
@@ -3260,6 +3861,7 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
enum dc_connection_type new_connection_type = dc_connection_none;
struct amdgpu_device *adev = drm_to_adev(dev);
struct dm_connector_state *dm_con_state = to_dm_connector_state(connector->state);
+ struct dc *dc = aconnector->dc_link->ctx->dc;
bool ret = false;
if (adev->dm.disable_hpd_irq)
@@ -3269,7 +3871,7 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
* In case of failure or MST no need to update connector status or notify the OS
* since (for MST case) MST does this in its own context.
*/
- mutex_lock(&aconnector->hpd_lock);
+ guard(mutex)(&aconnector->hpd_lock);
if (adev->dm.hdcp_workqueue) {
hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index);
@@ -3281,7 +3883,7 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
aconnector->timing_changed = false;
if (!dc_link_detect_connection_type(aconnector->dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(aconnector->dc_link);
@@ -3293,10 +3895,13 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
drm_kms_helper_connector_hotplug_event(connector);
} else {
- mutex_lock(&adev->dm.dc_lock);
- ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
- mutex_unlock(&adev->dm.dc_lock);
+ scoped_guard(mutex, &adev->dm.dc_lock) {
+ dc_exit_ips_for_hw_access(dc);
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+ }
if (ret) {
+ /* w/a delay for certain panels */
+ apply_delay_after_dpcd_poweroff(adev, aconnector->dc_sink);
amdgpu_dm_update_connector_after_detect(aconnector);
drm_modeset_lock_all(dev);
@@ -3307,8 +3912,6 @@ static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector)
drm_kms_helper_connector_hotplug_event(connector);
}
}
- mutex_unlock(&aconnector->hpd_lock);
-
}
static void handle_hpd_irq(void *param)
@@ -3319,20 +3922,21 @@ static void handle_hpd_irq(void *param)
}
-static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq,
+static void schedule_hpd_rx_offload_work(struct amdgpu_device *adev, struct hpd_rx_irq_offload_work_queue *offload_wq,
union hpd_irq_data hpd_irq_data)
{
struct hpd_rx_irq_offload_work *offload_work =
kzalloc(sizeof(*offload_work), GFP_KERNEL);
if (!offload_work) {
- DRM_ERROR("Failed to allocate hpd_rx_irq_offload_work.\n");
+ drm_err(adev_to_drm(adev), "Failed to allocate hpd_rx_irq_offload_work.\n");
return;
}
INIT_WORK(&offload_work->work, dm_handle_hpd_rx_offload_work);
offload_work->data = hpd_irq_data;
offload_work->offload_wq = offload_wq;
+ offload_work->adev = adev;
queue_work(offload_wq->wq, &offload_work->work);
DRM_DEBUG_KMS("queue work to handle hpd_rx offload work");
@@ -3353,6 +3957,7 @@ static void handle_hpd_rx_irq(void *param)
bool has_left_work = false;
int idx = dc_link->link_index;
struct hpd_rx_irq_offload_work_queue *offload_wq = &adev->dm.hpd_rx_offload_wq[idx];
+ struct dc *dc = aconnector->dc_link->ctx->dc;
memset(&hpd_irq_data, 0, sizeof(hpd_irq_data));
@@ -3373,7 +3978,7 @@ static void handle_hpd_rx_irq(void *param)
goto out;
if (hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
- schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+ schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
goto out;
}
@@ -3395,7 +4000,7 @@ static void handle_hpd_rx_irq(void *param)
spin_unlock(&offload_wq->offload_lock);
if (!skip)
- schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+ schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
goto out;
}
@@ -3412,7 +4017,7 @@ static void handle_hpd_rx_irq(void *param)
spin_unlock(&offload_wq->offload_lock);
if (!skip)
- schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+ schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data);
goto out;
}
@@ -3422,7 +4027,7 @@ out:
if (result && !is_mst_root_connector) {
/* Downstream Port status changed. */
if (!dc_link_detect_connection_type(dc_link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(dc_link);
@@ -3442,6 +4047,7 @@ out:
bool ret = false;
mutex_lock(&adev->dm.dc_lock);
+ dc_exit_ips_for_hw_access(dc);
ret = dc_link_detect(dc_link, DETECT_REASON_HPDRX);
mutex_unlock(&adev->dm.dc_lock);
@@ -3470,7 +4076,7 @@ out:
mutex_unlock(&aconnector->hpd_lock);
}
-static void register_hpd_handlers(struct amdgpu_device *adev)
+static int register_hpd_handlers(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
@@ -3481,9 +4087,32 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD,
+ dmub_hpd_callback, true)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub hpd callback");
+ return -EINVAL;
+ }
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ,
+ dmub_hpd_callback, true)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub hpd callback");
+ return -EINVAL;
+ }
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY,
+ dmub_hpd_sense_callback, true)) {
+ drm_err(adev_to_drm(adev), "fail to register dmub hpd sense callback");
+ return -EINVAL;
+ }
+ }
+
list_for_each_entry(connector,
&dev->mode_config.connector_list, head) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
dc_link = aconnector->dc_link;
@@ -3491,9 +4120,16 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
int_params.irq_source = dc_link->irq_source_hpd;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- handle_hpd_irq,
- (void *) aconnector);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_HPD1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_HPD6) {
+ drm_err(adev_to_drm(adev), "Failed to register hpd irq!\n");
+ return -EINVAL;
+ }
+
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ handle_hpd_irq, (void *) aconnector))
+ return -ENOMEM;
}
if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
@@ -3502,15 +4138,19 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
int_params.irq_source = dc_link->irq_source_hpd_rx;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- handle_hpd_rx_irq,
- (void *) aconnector);
- }
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_HPD1RX ||
+ int_params.irq_source > DC_IRQ_SOURCE_HPD6RX) {
+ drm_err(adev_to_drm(adev), "Failed to register hpd rx irq!\n");
+ return -EINVAL;
+ }
- if (adev->dm.hpd_rx_offload_wq)
- adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
- aconnector;
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ handle_hpd_rx_irq, (void *) aconnector))
+ return -ENOMEM;
+ }
}
+ return 0;
}
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -3543,7 +4183,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, client_id, i + 1, &adev->crtc_irq);
if (r) {
- DRM_ERROR("Failed to add crtc irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
return r;
}
@@ -3551,13 +4191,21 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i + 1, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) {
+ drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_crtc_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_crtc_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use GRPH_PFLIP interrupt */
@@ -3565,7 +4213,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq);
if (r) {
- DRM_ERROR("Failed to add page flip irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
return r;
}
@@ -3573,27 +4221,34 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST ||
+ int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) {
+ drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.pflip_params[int_params.irq_source - DC_IRQ_SOURCE_PFLIP_FIRST];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_pflip_high_irq, c_irq_params);
-
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_pflip_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* HPD */
r = amdgpu_irq_add_id(adev, client_id,
VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r) {
- DRM_ERROR("Failed to add hpd irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
return r;
}
- register_hpd_handlers(adev);
+ r = register_hpd_handlers(adev);
- return 0;
+ return r;
}
#endif
@@ -3629,7 +4284,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
for (i = VISLANDS30_IV_SRCID_D1_VERTICAL_INTERRUPT0; i <= VISLANDS30_IV_SRCID_D6_VERTICAL_INTERRUPT0; i++) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->crtc_irq);
if (r) {
- DRM_ERROR("Failed to add crtc irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
return r;
}
@@ -3637,20 +4292,28 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) {
+ drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_crtc_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_crtc_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use VUPDATE interrupt */
for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i += 2) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->vupdate_irq);
if (r) {
- DRM_ERROR("Failed to add vupdate irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n");
return r;
}
@@ -3658,13 +4321,21 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) {
+ drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_vupdate_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_vupdate_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use GRPH_PFLIP interrupt */
@@ -3672,7 +4343,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) {
r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq);
if (r) {
- DRM_ERROR("Failed to add page flip irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
return r;
}
@@ -3680,27 +4351,34 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST ||
+ int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) {
+ drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.pflip_params[int_params.irq_source - DC_IRQ_SOURCE_PFLIP_FIRST];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_pflip_high_irq, c_irq_params);
-
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_pflip_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* HPD */
r = amdgpu_irq_add_id(adev, client_id,
VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r) {
- DRM_ERROR("Failed to add hpd irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
return r;
}
- register_hpd_handlers(adev);
+ r = register_hpd_handlers(adev);
- return 0;
+ return r;
}
/* Register IRQ sources and initialize IRQ callbacks */
@@ -3744,7 +4422,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->crtc_irq);
if (r) {
- DRM_ERROR("Failed to add crtc irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n");
return r;
}
@@ -3752,13 +4430,21 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) {
+ drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(
- adev, &int_params, dm_crtc_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_crtc_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use otg vertical line interrupt */
@@ -3768,7 +4454,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
vrtl_int_srcid[i], &adev->vline0_irq);
if (r) {
- DRM_ERROR("Failed to add vline0 irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add vline0 irq id!\n");
return r;
}
@@ -3776,9 +4462,11 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, vrtl_int_srcid[i], 0);
- if (int_params.irq_source == DC_IRQ_SOURCE_INVALID) {
- DRM_ERROR("Failed to register vline0 irq %d!\n", vrtl_int_srcid[i]);
- break;
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_DC1_VLINE0 ||
+ int_params.irq_source > DC_IRQ_SOURCE_DC6_VLINE0) {
+ drm_err(adev_to_drm(adev), "Failed to register vline0 irq!\n");
+ return -EINVAL;
}
c_irq_params = &adev->dm.vline0_params[int_params.irq_source
@@ -3787,8 +4475,10 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_dcn_vertical_interrupt0_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_dcn_vertical_interrupt0_high_irq,
+ c_irq_params))
+ return -ENOMEM;
}
#endif
@@ -3803,7 +4493,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq);
if (r) {
- DRM_ERROR("Failed to add vupdate irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n");
return r;
}
@@ -3811,13 +4501,21 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 ||
+ int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) {
+ drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_vupdate_high_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_vupdate_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* Use GRPH_PFLIP interrupt */
@@ -3826,7 +4524,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
i++) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq);
if (r) {
- DRM_ERROR("Failed to add page flip irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n");
return r;
}
@@ -3834,27 +4532,34 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)
int_params.irq_source =
dc_interrupt_to_irq_source(dc, i, 0);
+ if (int_params.irq_source == DC_IRQ_SOURCE_INVALID ||
+ int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST ||
+ int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) {
+ drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n");
+ return -EINVAL;
+ }
+
c_irq_params = &adev->dm.pflip_params[int_params.irq_source - DC_IRQ_SOURCE_PFLIP_FIRST];
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_pflip_high_irq, c_irq_params);
-
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_pflip_high_irq, c_irq_params))
+ return -ENOMEM;
}
/* HPD */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT,
&adev->hpd_irq);
if (r) {
- DRM_ERROR("Failed to add hpd irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n");
return r;
}
- register_hpd_handlers(adev);
+ r = register_hpd_handlers(adev);
- return 0;
+ return r;
}
/* Register Outbox IRQ sources and initialize IRQ callbacks */
static int register_outbox_irq_handlers(struct amdgpu_device *adev)
@@ -3870,7 +4575,7 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT,
&adev->dmub_outbox_irq);
if (r) {
- DRM_ERROR("Failed to add outbox irq id!\n");
+ drm_err(adev_to_drm(adev), "Failed to add outbox irq id!\n");
return r;
}
@@ -3885,8 +4590,9 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev)
c_irq_params->adev = adev;
c_irq_params->irq_src = int_params.irq_source;
- amdgpu_dm_irq_register_interrupt(adev, &int_params,
- dm_dmub_outbox1_low_irq, c_irq_params);
+ if (!amdgpu_dm_irq_register_interrupt(adev, &int_params,
+ dm_dmub_outbox1_low_irq, c_irq_params))
+ return -ENOMEM;
}
return 0;
@@ -3950,7 +4656,7 @@ dm_atomic_duplicate_state(struct drm_private_obj *obj)
old_state = to_dm_atomic_state(obj->state);
if (old_state && old_state->context)
- new_state->context = dc_copy_state(old_state->context);
+ new_state->context = dc_state_create_copy(old_state->context);
if (!new_state->context) {
kfree(new_state);
@@ -3966,7 +4672,7 @@ static void dm_atomic_destroy_state(struct drm_private_obj *obj,
struct dm_atomic_state *dm_state = to_dm_atomic_state(state);
if (dm_state && dm_state->context)
- dc_release_state(dm_state->context);
+ dc_state_release(dm_state->context);
kfree(dm_state);
}
@@ -4002,14 +4708,12 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
if (!state)
return -ENOMEM;
- state->context = dc_create_state(adev->dm.dc);
+ state->context = dc_state_create_current_copy(adev->dm.dc);
if (!state->context) {
kfree(state);
return -ENOMEM;
}
- dc_resource_state_copy_construct_current(adev->dm.dc, state->context);
-
drm_atomic_private_obj_init(adev_to_drm(adev),
&adev->dm.atomic_obj,
&state->base,
@@ -4017,14 +4721,22 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
r = amdgpu_display_modeset_create_props(adev);
if (r) {
- dc_release_state(state->context);
+ dc_state_release(state->context);
kfree(state);
return r;
}
+#ifdef AMD_PRIVATE_COLOR
+ if (amdgpu_dm_create_color_properties(adev)) {
+ dc_state_release(state->context);
+ kfree(state);
+ return -ENOMEM;
+ }
+#endif
+
r = amdgpu_dm_audio_init(adev);
if (r) {
- dc_release_state(state->context);
+ dc_state_release(state->context);
kfree(state);
return r;
}
@@ -4034,38 +4746,46 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12
#define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
+#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2)
#define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
int bl_idx)
{
+ struct amdgpu_dm_backlight_caps *caps = &dm->backlight_caps[bl_idx];
+
+ if (caps->caps_valid)
+ return;
+
#if defined(CONFIG_ACPI)
- struct amdgpu_dm_backlight_caps caps;
+ amdgpu_acpi_get_backlight_caps(caps);
- memset(&caps, 0, sizeof(caps));
+ /* validate the firmware value is sane */
+ if (caps->caps_valid) {
+ int spread = caps->max_input_signal - caps->min_input_signal;
- if (dm->backlight_caps[bl_idx].caps_valid)
- return;
+ if (caps->max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
+ caps->min_input_signal < 0 ||
+ spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
+ spread < AMDGPU_DM_MIN_SPREAD) {
+ DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n",
+ caps->min_input_signal, caps->max_input_signal);
+ caps->caps_valid = false;
+ }
+ }
- amdgpu_acpi_get_backlight_caps(&caps);
- if (caps.caps_valid) {
- dm->backlight_caps[bl_idx].caps_valid = true;
- if (caps.aux_support)
- return;
- dm->backlight_caps[bl_idx].min_input_signal = caps.min_input_signal;
- dm->backlight_caps[bl_idx].max_input_signal = caps.max_input_signal;
- } else {
- dm->backlight_caps[bl_idx].min_input_signal =
- AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
- dm->backlight_caps[bl_idx].max_input_signal =
- AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ if (!caps->caps_valid) {
+ caps->min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
+ caps->max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ caps->caps_valid = true;
}
#else
- if (dm->backlight_caps[bl_idx].aux_support)
+ if (caps->aux_support)
return;
- dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
- dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ caps->min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT;
+ caps->max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT;
+ caps->caps_valid = true;
#endif
}
@@ -4087,6 +4807,82 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
return 1;
}
+/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */
+static inline u32 scale_input_to_fw(int min, int max, u64 input)
+{
+ return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min);
+}
+
+/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */
+static inline u32 scale_fw_to_input(int min, int max, u64 input)
+{
+ return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL);
+}
+
+static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps,
+ unsigned int min, unsigned int max,
+ uint32_t *user_brightness)
+{
+ u32 brightness = scale_input_to_fw(min, max, *user_brightness);
+ u8 lower_signal, upper_signal, upper_lum, lower_lum, lum;
+ int left, right;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)
+ return;
+
+ if (!caps->data_points)
+ return;
+
+ /*
+ * Handle the case where brightness is below the first data point
+ * Interpolate between (0,0) and (first_signal, first_lum)
+ */
+ if (brightness < caps->luminance_data[0].input_signal) {
+ lum = DIV_ROUND_CLOSEST(caps->luminance_data[0].luminance * brightness,
+ caps->luminance_data[0].input_signal);
+ goto scale;
+ }
+
+ left = 0;
+ right = caps->data_points - 1;
+ while (left <= right) {
+ int mid = left + (right - left) / 2;
+ u8 signal = caps->luminance_data[mid].input_signal;
+
+ /* Exact match found */
+ if (signal == brightness) {
+ lum = caps->luminance_data[mid].luminance;
+ goto scale;
+ }
+
+ if (signal < brightness)
+ left = mid + 1;
+ else
+ right = mid - 1;
+ }
+
+ /* verify bound */
+ if (left >= caps->data_points)
+ left = caps->data_points - 1;
+
+ /* At this point, left > right */
+ lower_signal = caps->luminance_data[right].input_signal;
+ upper_signal = caps->luminance_data[left].input_signal;
+ lower_lum = caps->luminance_data[right].luminance;
+ upper_lum = caps->luminance_data[left].luminance;
+
+ /* interpolate */
+ if (right == left || !lower_lum)
+ lum = upper_lum;
+ else
+ lum = lower_lum + DIV_ROUND_CLOSEST((upper_lum - lower_lum) *
+ (brightness - lower_signal),
+ upper_signal - lower_signal);
+scale:
+ *user_brightness = scale_fw_to_input(min, max,
+ DIV_ROUND_CLOSEST(lum * brightness, 101));
+}
+
static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps,
uint32_t brightness)
{
@@ -4095,9 +4891,10 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c
if (!get_brightness_range(caps, &min, &max))
return brightness;
- // Rescale 0..255 to min..max
- return min + DIV_ROUND_CLOSEST((max - min) * brightness,
- AMDGPU_MAX_BL_LEVEL);
+ convert_custom_brightness(caps, min, max, &brightness);
+
+ // Rescale 0..max to min..max
+ return min + DIV_ROUND_CLOSEST_ULL((u64)(max - min) * brightness, max);
}
static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps,
@@ -4110,8 +4907,8 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap
if (brightness < min)
return 0;
- // Rescale min..max to 0..255
- return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min),
+ // Rescale min..max to 0..max
+ return DIV_ROUND_CLOSEST_ULL((u64)max * (brightness - min),
max - min);
}
@@ -4119,33 +4916,61 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
int bl_idx,
u32 user_brightness)
{
- struct amdgpu_dm_backlight_caps caps;
+ struct amdgpu_dm_backlight_caps *caps;
struct dc_link *link;
u32 brightness;
- bool rc;
+ bool rc, reallow_idle = false;
amdgpu_dm_update_backlight_caps(dm, bl_idx);
- caps = dm->backlight_caps[bl_idx];
+ caps = &dm->backlight_caps[bl_idx];
dm->brightness[bl_idx] = user_brightness;
/* update scratch register */
if (bl_idx == 0)
amdgpu_atombios_scratch_regs_set_backlight_level(dm->adev, dm->brightness[bl_idx]);
- brightness = convert_brightness_from_user(&caps, dm->brightness[bl_idx]);
+ brightness = convert_brightness_from_user(caps, dm->brightness[bl_idx]);
link = (struct dc_link *)dm->backlight_link[bl_idx];
+ /* Apply brightness quirk */
+ if (caps->brightness_mask)
+ brightness |= caps->brightness_mask;
+
/* Change brightness based on AUX property */
- if (caps.aux_support) {
+ mutex_lock(&dm->dc_lock);
+ if (dm->dc->caps.ips_support && dm->dc->ctx->dmub_srv->idle_allowed) {
+ dc_allow_idle_optimizations(dm->dc, false);
+ reallow_idle = true;
+ }
+
+ if (trace_amdgpu_dm_brightness_enabled()) {
+ trace_amdgpu_dm_brightness(__builtin_return_address(0),
+ user_brightness,
+ brightness,
+ caps->aux_support,
+ power_supply_is_system_supplied() > 0);
+ }
+
+ if (caps->aux_support) {
rc = dc_link_set_backlight_level_nits(link, true, brightness,
AUX_BL_DEFAULT_TRANSITION_TIME_MS);
if (!rc)
DRM_DEBUG("DM: Failed to update backlight via AUX on eDP[%d]\n", bl_idx);
} else {
- rc = dc_link_set_backlight_level(link, brightness, 0);
+ struct set_backlight_level_params backlight_level_params = { 0 };
+
+ backlight_level_params.backlight_pwm_u16_16 = brightness;
+ backlight_level_params.transition_time_in_ms = 0;
+
+ rc = dc_link_set_backlight_level(link, &backlight_level_params);
if (!rc)
DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx);
}
+ if (dm->dc->caps.ips_support && reallow_idle)
+ dc_allow_idle_optimizations(dm->dc, true);
+
+ mutex_unlock(&dm->dc_lock);
+
if (rc)
dm->actual_brightness[bl_idx] = user_brightness;
}
@@ -4178,10 +5003,8 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm,
if (caps.aux_support) {
u32 avg, peak;
- bool rc;
- rc = dc_link_get_backlight_level_nits(link, &avg, &peak);
- if (!rc)
+ if (!dc_link_get_backlight_level_nits(link, &avg, &peak))
return dm->brightness[bl_idx];
return convert_brightness_to_user(&caps, avg);
}
@@ -4220,7 +5043,9 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
struct drm_device *drm = aconnector->base.dev;
struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm;
struct backlight_properties props = { 0 };
+ struct amdgpu_dm_backlight_caps *caps;
char bl_name[16];
+ int min, max;
if (aconnector->bl_idx == -1)
return;
@@ -4232,8 +5057,24 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
return;
}
- props.max_brightness = AMDGPU_MAX_BL_LEVEL;
- props.brightness = AMDGPU_MAX_BL_LEVEL;
+ caps = &dm->backlight_caps[aconnector->bl_idx];
+ if (get_brightness_range(caps, &min, &max)) {
+ if (power_supply_is_system_supplied() > 0)
+ props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->ac_level, 100);
+ else
+ props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->dc_level, 100);
+ /* min is zero, so max needs to be adjusted */
+ props.max_brightness = max - min;
+ drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max,
+ caps->ac_level, caps->dc_level);
+ } else
+ props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL;
+
+ if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) {
+ drm_info(drm, "Using custom brightness curve\n");
+ props.scale = BACKLIGHT_SCALE_NON_LINEAR;
+ } else
+ props.scale = BACKLIGHT_SCALE_LINEAR;
props.type = BACKLIGHT_RAW;
snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d",
@@ -4242,12 +5083,13 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector)
dm->backlight_dev[aconnector->bl_idx] =
backlight_device_register(bl_name, aconnector->base.kdev, dm,
&amdgpu_dm_backlight_ops, &props);
+ dm->brightness[aconnector->bl_idx] = props.brightness;
if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) {
- DRM_ERROR("DM: Backlight registration failed!\n");
+ drm_err(drm, "DM: Backlight registration failed!\n");
dm->backlight_dev[aconnector->bl_idx] = NULL;
} else
- DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name);
+ drm_dbg_driver(drm, "DM: Registered Backlight device: %s\n", bl_name);
}
static int initialize_plane(struct amdgpu_display_manager *dm,
@@ -4261,7 +5103,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL);
if (!plane) {
- DRM_ERROR("KMS: Failed to allocate plane\n");
+ drm_err(adev_to_drm(dm->adev), "KMS: Failed to allocate plane\n");
return -ENOMEM;
}
plane->type = plane_type;
@@ -4279,7 +5121,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
ret = amdgpu_dm_plane_init(dm, plane, possible_crtcs, plane_cap);
if (ret) {
- DRM_ERROR("KMS: Failed to initialize plane\n");
+ drm_err(adev_to_drm(dm->adev), "KMS: Failed to initialize plane\n");
kfree(plane);
return ret;
}
@@ -4309,7 +5151,6 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm,
aconnector->bl_idx = bl_idx;
amdgpu_dm_update_backlight_caps(dm, bl_idx);
- dm->brightness[bl_idx] = AMDGPU_MAX_BL_LEVEL;
dm->backlight_link[bl_idx] = link;
dm->num_of_edps++;
@@ -4349,13 +5190,16 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
link_cnt = dm->dc->caps.max_links;
if (amdgpu_dm_mode_config_init(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize mode config\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize mode config\n");
return -EINVAL;
}
/* There is one primary plane per CRTC */
primary_planes = dm->dc->caps.max_streams;
- ASSERT(primary_planes <= AMDGPU_MAX_PLANES);
+ if (primary_planes > AMDGPU_MAX_PLANES) {
+ drm_err(adev_to_drm(adev), "DM: Plane nums out of 6 planes\n");
+ return -EINVAL;
+ }
/*
* Initialize primary planes, implicit planes for legacy IOCTLS.
@@ -4366,7 +5210,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
if (initialize_plane(dm, mode_info, i,
DRM_PLANE_TYPE_PRIMARY, plane)) {
- DRM_ERROR("KMS: Failed to initialize primary plane\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize primary plane\n");
goto fail;
}
}
@@ -4398,19 +5242,19 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
if (initialize_plane(dm, NULL, primary_planes + i,
DRM_PLANE_TYPE_OVERLAY, plane)) {
- DRM_ERROR("KMS: Failed to initialize overlay plane\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize overlay plane\n");
goto fail;
}
}
for (i = 0; i < dm->dc->caps.max_streams; i++)
if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) {
- DRM_ERROR("KMS: Failed to initialize crtc\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize crtc\n");
goto fail;
}
/* Use Outbox interrupt */
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 0, 0):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
@@ -4420,19 +5264,23 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
case IP_VERSION(2, 1, 0):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
if (register_outbox_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
default:
DRM_DEBUG_KMS("Unsupported DCN IP version for outbox: 0x%X\n",
- adev->ip_versions[DCE_HWIP][0]);
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
}
/* Determine whether to enable PSR support by default. */
if (!(amdgpu_dc_debug_mask & DC_DISABLE_PSR)) {
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 4):
@@ -4440,6 +5288,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
psr_feature_enabled = true;
break;
default:
@@ -4448,28 +5300,54 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
}
}
+ /* Determine whether to enable Replay support by default. */
if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 1, 4):
- case IP_VERSION(3, 1, 5):
- case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
replay_feature_enabled = true;
break;
+
default:
replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
break;
}
}
+
+ if (link_cnt > MAX_LINKS) {
+ drm_err(adev_to_drm(adev),
+ "KMS: Cannot support more than %d display indexes\n",
+ MAX_LINKS);
+ goto fail;
+ }
+
/* loops over all connectors on the board */
for (i = 0; i < link_cnt; i++) {
struct dc_link *link = NULL;
- if (i > AMDGPU_DM_MAX_DISPLAY_INDEX) {
- DRM_ERROR(
- "KMS: Cannot support more than %d display indexes\n",
- AMDGPU_DM_MAX_DISPLAY_INDEX);
+ link = dc_get_link_at_index(dm->dc, i);
+
+ if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) {
+ struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL);
+
+ if (!wbcon) {
+ drm_err(adev_to_drm(adev), "KMS: Failed to allocate writeback connector\n");
+ continue;
+ }
+
+ if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) {
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize writeback connector\n");
+ kfree(wbcon);
+ continue;
+ }
+
+ link->psr_settings.psr_feature_enabled = false;
+ link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
+
continue;
}
@@ -4482,19 +5360,21 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
goto fail;
if (amdgpu_dm_encoder_init(dm->ddev, aencoder, i)) {
- DRM_ERROR("KMS: Failed to initialize encoder\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize encoder\n");
goto fail;
}
if (amdgpu_dm_connector_init(dm, aconnector, i, aencoder)) {
- DRM_ERROR("KMS: Failed to initialize connector\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to initialize connector\n");
goto fail;
}
- link = dc_get_link_at_index(dm->dc, i);
+ if (dm->hpd_rx_offload_wq)
+ dm->hpd_rx_offload_wq[aconnector->base.index].aconnector =
+ aconnector;
if (!dc_link_detect_connection_type(link, &new_connection_type))
- DRM_ERROR("KMS: Failed to detect connector\n");
+ drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n");
if (aconnector->base.force && new_connection_type == dc_connection_none) {
emulated_link_detect(link);
@@ -4503,6 +5383,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
bool ret = false;
mutex_lock(&dm->dc_lock);
+ dc_exit_ips_for_hw_access(dm->dc);
ret = dc_link_detect(link, DETECT_REASON_BOOT);
mutex_unlock(&dm->dc_lock);
@@ -4510,20 +5391,20 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
amdgpu_dm_update_connector_after_detect(aconnector);
setup_backlight_device(dm, aconnector);
- /*
- * Disable psr if replay can be enabled
- */
- if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector))
- psr_feature_enabled = false;
+ /* Disable PSR if Replay can be enabled */
+ if (replay_feature_enabled)
+ if (amdgpu_dm_set_replay_caps(link, aconnector))
+ psr_feature_enabled = false;
- if (psr_feature_enabled)
+ if (psr_feature_enabled) {
amdgpu_dm_set_psr_caps(link);
-
- /* TODO: Fix vblank control helpers to delay PSR entry to allow this when
- * PSR is also supported.
- */
- if (link->psr_settings.psr_feature_enabled)
- adev_to_drm(adev)->vblank_disable_immediate = false;
+ drm_info(adev_to_drm(adev), "PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n",
+ link->psr_settings.psr_feature_enabled,
+ link->psr_settings.psr_version,
+ link->dpcd_caps.psr_info.psr_version,
+ link->dpcd_caps.psr_info.psr_dpcd_caps.raw,
+ link->dpcd_caps.psr_info.psr2_su_y_granularity_cap);
+ }
}
}
amdgpu_set_panel_orientation(&aconnector->base);
@@ -4537,7 +5418,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case CHIP_VERDE:
case CHIP_OLAND:
if (dce60_register_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
@@ -4559,12 +5440,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case CHIP_VEGA12:
case CHIP_VEGA20:
if (dce110_register_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
default:
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(1, 0, 0):
case IP_VERSION(1, 0, 1):
case IP_VERSION(2, 0, 2):
@@ -4582,14 +5463,18 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
if (dcn10_register_irq_handlers(dm->adev)) {
- DRM_ERROR("DM: Failed to initialize IRQ\n");
+ drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n");
goto fail;
}
break;
default:
- DRM_ERROR("Unsupported DCE IP versions: 0x%X\n",
- adev->ip_versions[DCE_HWIP][0]);
+ drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%X\n",
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
goto fail;
}
break;
@@ -4605,7 +5490,8 @@ fail:
static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm)
{
- drm_atomic_private_obj_fini(&dm->atomic_obj);
+ if (dm->atomic_obj.state)
+ drm_atomic_private_obj_fini(&dm->atomic_obj);
}
/******************************************************************************
@@ -4649,15 +5535,20 @@ static ssize_t s3_debug_store(struct device *device,
int s3_state;
struct drm_device *drm_dev = dev_get_drvdata(device);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ struct amdgpu_ip_block *ip_block;
+
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE);
+ if (!ip_block)
+ return -EINVAL;
ret = kstrtoint(buf, 0, &s3_state);
if (ret == 0) {
if (s3_state) {
- dm_resume(adev);
+ dm_resume(ip_block);
drm_kms_helper_hotplug_event(adev_to_drm(adev));
} else
- dm_suspend(adev);
+ dm_suspend(ip_block);
}
return ret == 0 ? count : 0;
@@ -4672,14 +5563,14 @@ static int dm_init_microcode(struct amdgpu_device *adev)
char *fw_name_dmub;
int r;
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 1, 0):
fw_name_dmub = FIRMWARE_RENOIR_DMUB;
if (ASICREV_IS_GREEN_SARDINE(adev->external_rev_id))
fw_name_dmub = FIRMWARE_GREEN_SARDINE_DMUB;
break;
case IP_VERSION(3, 0, 0):
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB;
else
fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB;
@@ -4712,19 +5603,30 @@ static int dm_init_microcode(struct amdgpu_device *adev)
case IP_VERSION(3, 2, 1):
fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
+ case IP_VERSION(3, 5, 0):
+ fw_name_dmub = FIRMWARE_DCN_35_DMUB;
+ break;
+ case IP_VERSION(3, 5, 1):
+ fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+ break;
+ case IP_VERSION(3, 6, 0):
+ fw_name_dmub = FIRMWARE_DCN_36_DMUB;
+ break;
+ case IP_VERSION(4, 0, 1):
+ fw_name_dmub = FIRMWARE_DCN_401_DMUB;
+ break;
default:
/* ASIC doesn't support DMUB. */
return 0;
}
- r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, fw_name_dmub);
- if (r)
- DRM_ERROR("DMUB firmware loading failed: %d\n", r);
+ r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name_dmub);
return r;
}
-static int dm_early_init(void *handle)
+static int dm_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_mode_info *mode_info = &adev->mode_info;
struct atom_context *ctx = mode_info->atom_context;
int index = GetIndexIntoMasterTable(DATA, Object_Header);
@@ -4733,7 +5635,7 @@ static int dm_early_init(void *handle)
/* if there is no object header, skip DM */
if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
- dev_info(adev->dev, "No object header, skipping DM\n");
+ drm_info(adev_to_drm(adev), "No object header, skipping DM\n");
return -ENOENT;
}
@@ -4806,7 +5708,7 @@ static int dm_early_init(void *handle)
break;
default:
- switch (adev->ip_versions[DCE_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(2, 0, 2):
case IP_VERSION(3, 0, 0):
adev->mode_info.num_crtc = 6;
@@ -4836,13 +5738,17 @@ static int dm_early_init(void *handle)
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
case IP_VERSION(3, 2, 1):
+ case IP_VERSION(3, 5, 0):
+ case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
+ case IP_VERSION(4, 0, 1):
adev->mode_info.num_crtc = 4;
adev->mode_info.num_hpd = 4;
adev->mode_info.num_dig = 4;
break;
default:
- DRM_ERROR("Unsupported DCE IP versions: 0x%x\n",
- adev->ip_versions[DCE_HWIP][0]);
+ drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%x\n",
+ amdgpu_ip_version(adev, DCE_HWIP, 0));
return -EINVAL;
}
break;
@@ -4913,9 +5819,9 @@ fill_plane_color_attributes(const struct drm_plane_state *plane_state,
case DRM_COLOR_YCBCR_BT2020:
if (full_range)
- *color_space = COLOR_SPACE_2020_YCBCR;
+ *color_space = COLOR_SPACE_2020_YCBCR_FULL;
else
- return -EINVAL;
+ *color_space = COLOR_SPACE_2020_YCBCR_LIMITED;
break;
default:
@@ -4931,8 +5837,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
const u64 tiling_flags,
struct dc_plane_info *plane_info,
struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc)
+ bool tmz_surface)
{
const struct drm_framebuffer *fb = plane_state->fb;
const struct amdgpu_framebuffer *afb =
@@ -4991,7 +5896,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616;
break;
default:
- DRM_ERROR(
+ drm_err(adev_to_drm(adev),
"Unsupported screen format %p4cc\n",
&fb->format->format);
return -EINVAL;
@@ -5031,7 +5936,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
&plane_info->tiling_info,
&plane_info->plane_size,
&plane_info->dcc, address,
- tmz_surface, force_disable_dcc);
+ tmz_surface);
if (ret)
return ret;
@@ -5052,7 +5957,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
struct dc_scaling_info scaling_info;
struct dc_plane_info plane_info;
int ret;
- bool force_disable_dcc = false;
ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, plane_state, &scaling_info);
if (ret)
@@ -5063,13 +5967,11 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
dc_plane_state->clip_rect = scaling_info.clip_rect;
dc_plane_state->scaling_quality = scaling_info.scaling_quality;
- force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend;
ret = fill_dc_plane_info_and_addr(adev, plane_state,
afb->tiling_flags,
&plane_info,
&dc_plane_state->address,
- afb->tmz_surface,
- force_disable_dcc);
+ afb->tmz_surface);
if (ret)
return ret;
@@ -5094,7 +5996,9 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
* Always set input transfer function, since plane state is refreshed
* every time.
*/
- ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state);
+ ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state,
+ plane_state,
+ dc_plane_state);
if (ret)
return ret;
@@ -5134,6 +6038,10 @@ static inline void fill_dc_dirty_rect(struct drm_plane *plane,
* @new_plane_state: New state of @plane
* @crtc_state: New state of CRTC connected to the @plane
* @flip_addrs: DC flip tracking struct, which also tracts dirty rects
+ * @is_psr_su: Flag indicating whether Panel Self Refresh Selective Update (PSR SU) is enabled.
+ * If PSR SU is enabled and damage clips are available, only the regions of the screen
+ * that have changed will be updated. If PSR SU is not enabled,
+ * or if damage clips are not available, the entire screen will be updated.
* @dirty_regions_changed: dirty regions changed
*
* For PSR SU, DC informs the DMUB uController of dirty rectangle regions
@@ -5152,6 +6060,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
struct drm_plane_state *new_plane_state,
struct drm_crtc_state *crtc_state,
struct dc_flip_addrs *flip_addrs,
+ bool is_psr_su,
bool *dirty_regions_changed)
{
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
@@ -5170,9 +6079,16 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
if (plane->type == DRM_PLANE_TYPE_CURSOR)
return;
+ if (new_plane_state->rotation != DRM_MODE_ROTATE_0)
+ goto ffu;
+
num_clips = drm_plane_get_damage_clips_count(new_plane_state);
clips = drm_plane_get_damage_clips(new_plane_state);
+ if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 &&
+ is_psr_su)))
+ goto ffu;
+
if (!dm_crtc_state->mpo_requested) {
if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
goto ffu;
@@ -5401,12 +6317,14 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing,
if (dc_crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB)
color_space = COLOR_SPACE_2020_RGB_FULLRANGE;
else
- color_space = COLOR_SPACE_2020_YCBCR;
+ color_space = COLOR_SPACE_2020_YCBCR_LIMITED;
break;
case DRM_MODE_COLORIMETRY_DEFAULT: // ITU601
default:
if (dc_crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB) {
color_space = COLOR_SPACE_SRGB;
+ if (connector_state->hdmi.broadcast_rgb == DRM_HDMI_BROADCAST_RGB_LIMITED)
+ color_space = COLOR_SPACE_SRGB_LIMITED;
/*
* 27030khz is the separation point between HDTV and SDTV
* according to HDMI spec, we use YCbCr709 and YCbCr601
@@ -5431,6 +6349,24 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing,
return color_space;
}
+static enum display_content_type
+get_output_content_type(const struct drm_connector_state *connector_state)
+{
+ switch (connector_state->content_type) {
+ default:
+ case DRM_MODE_CONTENT_TYPE_NO_DATA:
+ return DISPLAY_CONTENT_TYPE_NO_DATA;
+ case DRM_MODE_CONTENT_TYPE_GRAPHICS:
+ return DISPLAY_CONTENT_TYPE_GRAPHICS;
+ case DRM_MODE_CONTENT_TYPE_PHOTO:
+ return DISPLAY_CONTENT_TYPE_PHOTO;
+ case DRM_MODE_CONTENT_TYPE_CINEMA:
+ return DISPLAY_CONTENT_TYPE_CINEMA;
+ case DRM_MODE_CONTENT_TYPE_GAME:
+ return DISPLAY_CONTENT_TYPE_GAME;
+ }
+}
+
static bool adjust_colour_depth_from_display_info(
struct dc_crtc_timing *timing_out,
const struct drm_display_info *info)
@@ -5478,9 +6414,13 @@ static void fill_stream_properties_from_drm_display_mode(
{
struct dc_crtc_timing *timing_out = &stream->timing;
const struct drm_display_info *info = &connector->display_info;
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct amdgpu_dm_connector *aconnector = NULL;
struct hdmi_vendor_infoframe hv_frame;
struct hdmi_avi_infoframe avi_frame;
+ ssize_t err;
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ aconnector = to_amdgpu_dm_connector(connector);
memset(&hv_frame, 0, sizeof(hv_frame));
memset(&avi_frame, 0, sizeof(avi_frame));
@@ -5494,8 +6434,13 @@ static void fill_stream_properties_from_drm_display_mode(
&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
else if (drm_mode_is_420_also(info, mode_in)
+ && aconnector
&& aconnector->force_yuv420_output)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
+ else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR422)
+ && aconnector
+ && aconnector->force_yuv422_output)
+ timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR422;
else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444)
&& stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444;
@@ -5523,13 +6468,23 @@ static void fill_stream_properties_from_drm_display_mode(
}
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
- drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in);
+ err = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame,
+ (struct drm_connector *)connector,
+ mode_in);
+ if (err < 0)
+ drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd\n",
+ connector->name, err);
timing_out->vic = avi_frame.video_code;
- drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in);
+ err = drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame,
+ (struct drm_connector *)connector,
+ mode_in);
+ if (err < 0)
+ drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd\n",
+ connector->name, err);
timing_out->hdmi_vic = hv_frame.vic;
}
- if (is_freesync_video_mode(mode_in, aconnector)) {
+ if (aconnector && is_freesync_video_mode(mode_in, aconnector)) {
timing_out->h_addressable = mode_in->hdisplay;
timing_out->h_total = mode_in->htotal;
timing_out->h_sync_width = mode_in->hsync_end - mode_in->hsync_start;
@@ -5553,8 +6508,8 @@ static void fill_stream_properties_from_drm_display_mode(
timing_out->aspect_ratio = get_aspect_ratio(mode_in);
- stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
+ stream->out_transfer_func.type = TF_TYPE_PREDEFINED;
+ stream->out_transfer_func.tf = TRANSFER_FUNCTION_SRGB;
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) {
if (!adjust_colour_depth_from_display_info(timing_out, info) &&
drm_mode_is_420_also(info, mode_in) &&
@@ -5565,6 +6520,7 @@ static void fill_stream_properties_from_drm_display_mode(
}
stream->output_color_space = get_output_color_space(timing_out, connector_state);
+ stream->content_type = get_output_content_type(connector_state);
}
static void fill_audio_info(struct audio_info *audio_info,
@@ -5637,29 +6593,29 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode,
const struct drm_display_mode *native_mode,
bool scale_enabled)
{
- if (scale_enabled) {
- copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode);
- } else if (native_mode->clock == drm_mode->clock &&
- native_mode->htotal == drm_mode->htotal &&
- native_mode->vtotal == drm_mode->vtotal) {
- copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode);
+ if (scale_enabled || (
+ native_mode->clock == drm_mode->clock &&
+ native_mode->htotal == drm_mode->htotal &&
+ native_mode->vtotal == drm_mode->vtotal)) {
+ if (native_mode->crtc_clock)
+ copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode);
} else {
/* no scaling nor amdgpu inserted, no need to patch */
}
}
static struct dc_sink *
-create_fake_sink(struct amdgpu_dm_connector *aconnector)
+create_fake_sink(struct drm_device *dev, struct dc_link *link)
{
struct dc_sink_init_data sink_init_data = { 0 };
struct dc_sink *sink = NULL;
- sink_init_data.link = aconnector->dc_link;
- sink_init_data.sink_signal = aconnector->dc_link->connector_signal;
+ sink_init_data.link = link;
+ sink_init_data.sink_signal = link->connector_signal;
sink = dc_sink_create(&sink_init_data);
if (!sink) {
- DRM_ERROR("Failed to create sink!\n");
+ drm_err(dev, "Failed to create sink!\n");
return NULL;
}
sink->sink_signal = SIGNAL_TYPE_VIRTUAL;
@@ -5773,6 +6729,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
&aconnector->base.probed_modes :
&aconnector->base.modes;
+ if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return NULL;
+
if (aconnector->freesync_vid_base.clock != 0)
return &aconnector->freesync_vid_base;
@@ -5789,7 +6748,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
m_pref = list_first_entry_or_null(
&aconnector->base.modes, struct drm_display_mode, head);
if (!m_pref) {
- DRM_DEBUG_DRIVER("No preferred mode found in EDID\n");
+ drm_dbg_driver(aconnector->base.dev, "No preferred mode found in EDID\n");
return NULL;
}
}
@@ -5843,6 +6802,7 @@ static bool is_freesync_video_mode(const struct drm_display_mode *mode,
return true;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
static void update_dsc_caps(struct amdgpu_dm_connector *aconnector,
struct dc_sink *sink, struct dc_stream_state *stream,
struct dsc_dec_dpcd_caps *dsc_caps)
@@ -5861,7 +6821,6 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector,
}
}
-
static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector,
struct dc_sink *sink, struct dc_stream_state *stream,
struct dsc_dec_dpcd_caps *dsc_caps,
@@ -5925,7 +6884,6 @@ static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector,
}
}
-
static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
struct dc_sink *sink, struct dc_stream_state *stream,
struct dsc_dec_dpcd_caps *dsc_caps)
@@ -5949,13 +6907,13 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
dc_dsc_policy_set_enable_dsc_when_not_needed(
aconnector->dsc_settings.dsc_force_enable == DSC_CLK_FORCE_ENABLE);
- if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_EDP &&
+ if (sink->sink_signal == SIGNAL_TYPE_EDP &&
!aconnector->dc_link->panel_config.dsc.disable_dsc_edp &&
dc->caps.edp_dsc_support && aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE) {
apply_dsc_policy_for_edp(aconnector, sink, stream, dsc_caps, max_dsc_target_bpp_limit_override);
- } else if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) {
+ } else if (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) {
if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) {
if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc->res_pool->dscs[0],
dsc_caps,
@@ -5965,7 +6923,8 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
dc_link_get_highest_encoding_format(aconnector->dc_link),
&stream->timing.dsc_cfg)) {
stream->timing.flags.DSC = 1;
- DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name);
+ drm_dbg_driver(drm_connector->dev, "%s: SST_DSC [%s] DSC is selected from SST RX\n",
+ __func__, drm_connector->name);
}
} else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) {
timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing,
@@ -5984,7 +6943,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
dc_link_get_highest_encoding_format(aconnector->dc_link),
&stream->timing.dsc_cfg)) {
stream->timing.flags.DSC = 1;
- DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from DP-HDMI PCON\n",
+ drm_dbg_driver(drm_connector->dev, "%s: SST_DSC [%s] DSC is selected from DP-HDMI PCON\n",
__func__, drm_connector->name);
}
}
@@ -6003,16 +6962,18 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel)
stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel;
}
+#endif
static struct dc_stream_state *
-create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+create_stream_for_sink(struct drm_connector *connector,
const struct drm_display_mode *drm_mode,
const struct dm_connector_state *dm_state,
const struct dc_stream_state *old_stream,
int requested_bpc)
{
+ struct drm_device *dev = connector->dev;
+ struct amdgpu_dm_connector *aconnector = NULL;
struct drm_display_mode *preferred_mode = NULL;
- struct drm_connector *drm_connector;
const struct drm_connector_state *con_state = &dm_state->base;
struct dc_stream_state *stream = NULL;
struct drm_display_mode mode;
@@ -6024,24 +6985,33 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
int mode_refresh;
int preferred_refresh = 0;
enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
+#if defined(CONFIG_DRM_AMD_DC_FP)
struct dsc_dec_dpcd_caps dsc_caps;
-
+#endif
+ struct dc_link *link = NULL;
struct dc_sink *sink = NULL;
drm_mode_init(&mode, drm_mode);
memset(&saved_mode, 0, sizeof(saved_mode));
- if (aconnector == NULL) {
- DRM_ERROR("aconnector is NULL!\n");
- return stream;
- }
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) {
+ aconnector = NULL;
+ aconnector = to_amdgpu_dm_connector(connector);
+ link = aconnector->dc_link;
+ } else {
+ struct drm_writeback_connector *wbcon = NULL;
+ struct amdgpu_dm_wb_connector *dm_wbcon = NULL;
- drm_connector = &aconnector->base;
+ wbcon = drm_connector_to_writeback(connector);
+ dm_wbcon = to_amdgpu_dm_wb_connector(wbcon);
+ link = dm_wbcon->link;
+ }
- if (!aconnector->dc_sink) {
- sink = create_fake_sink(aconnector);
+ if (!aconnector || !aconnector->dc_sink) {
+ sink = create_fake_sink(dev, link);
if (!sink)
return stream;
+
} else {
sink = aconnector->dc_sink;
dc_sink_retain(sink);
@@ -6050,16 +7020,17 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
stream = dc_create_stream_for_sink(sink);
if (stream == NULL) {
- DRM_ERROR("Failed to create stream for sink!\n");
+ drm_err(dev, "Failed to create stream for sink!\n");
goto finish;
}
+ /* We leave this NULL for writeback connectors */
stream->dm_stream_context = aconnector;
stream->timing.flags.LTE_340MCSC_SCRAMBLE =
- drm_connector->display_info.hdmi.scdc.scrambling.low_rates;
+ connector->display_info.hdmi.scdc.scrambling.low_rates;
- list_for_each_entry(preferred_mode, &aconnector->base.modes, head) {
+ list_for_each_entry(preferred_mode, &connector->modes, head) {
/* Search for preferred mode */
if (preferred_mode->type & DRM_MODE_TYPE_PREFERRED) {
native_mode_found = true;
@@ -6068,7 +7039,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
}
if (!native_mode_found)
preferred_mode = list_first_entry_or_null(
- &aconnector->base.modes,
+ &connector->modes,
struct drm_display_mode,
head);
@@ -6081,13 +7052,16 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
* case, we call set mode ourselves to restore the previous mode
* and the modelist may not be filled in time.
*/
- DRM_DEBUG_DRIVER("No preferred mode found\n");
- } else {
- recalculate_timing = is_freesync_video_mode(&mode, aconnector);
+ drm_dbg_driver(dev, "No preferred mode found\n");
+ } else if (aconnector) {
+ recalculate_timing = amdgpu_freesync_vid_mode &&
+ is_freesync_video_mode(&mode, aconnector);
if (recalculate_timing) {
freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
drm_mode_copy(&saved_mode, &mode);
+ saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio;
drm_mode_copy(&mode, freesync_mode);
+ mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio;
} else {
decide_crtc_timing_for_drm_display_mode(
&mode, preferred_mode, scale);
@@ -6098,8 +7072,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (recalculate_timing)
drm_mode_set_crtcinfo(&saved_mode, 0);
- else if (!old_stream)
- drm_mode_set_crtcinfo(&mode, 0);
/*
* If scaling is enabled and refresh rate didn't change
@@ -6107,31 +7079,37 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
*/
if (!scale || mode_refresh != preferred_refresh)
fill_stream_properties_from_drm_display_mode(
- stream, &mode, &aconnector->base, con_state, NULL,
+ stream, &mode, connector, con_state, NULL,
requested_bpc);
else
fill_stream_properties_from_drm_display_mode(
- stream, &mode, &aconnector->base, con_state, old_stream,
+ stream, &mode, connector, con_state, old_stream,
requested_bpc);
+ /* The rest isn't needed for writeback connectors */
+ if (!aconnector)
+ goto finish;
+
if (aconnector->timing_changed) {
- DC_LOG_DEBUG("%s: overriding timing for automated test, bpc %d, changing to %d\n",
- __func__,
- stream->timing.display_color_depth,
- aconnector->timing_requested->display_color_depth);
+ drm_dbg(aconnector->base.dev,
+ "overriding timing for automated test, bpc %d, changing to %d\n",
+ stream->timing.display_color_depth,
+ aconnector->timing_requested->display_color_depth);
stream->timing = *aconnector->timing_requested;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
/* SST DSC determination policy */
update_dsc_caps(aconnector, sink, stream, &dsc_caps);
if (aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE && dsc_caps.is_dsc_supported)
apply_dsc_policy_for_stream(aconnector, sink, stream, &dsc_caps);
+#endif
update_stream_scaling_settings(&mode, dm_state, stream);
fill_audio_info(
&stream->audio_info,
- drm_connector,
+ connector,
sink);
update_stream_signal(stream, sink);
@@ -6139,23 +7117,29 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
- if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ stream->signal == SIGNAL_TYPE_EDP) {
+ const struct dc_edid_caps *edid_caps;
+ unsigned int disable_colorimetry = 0;
+
+ if (aconnector->dc_sink) {
+ edid_caps = &aconnector->dc_sink->edid_caps;
+ disable_colorimetry = edid_caps->panel_patch.disable_colorimetry;
+ }
+
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
- stream->use_vsc_sdp_for_colorimetry = false;
- if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- stream->use_vsc_sdp_for_colorimetry =
- aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
- } else {
- if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
- stream->use_vsc_sdp_for_colorimetry = true;
- }
- if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
+ stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED &&
+ !disable_colorimetry;
+
+ if (stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22)
tf = TRANSFER_FUNC_GAMMA_22;
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
- aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
+ aconnector->sr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
}
finish:
@@ -6237,9 +7221,6 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
} else if (property == adev->mode_info.underscan_property) {
dm_new_state->underscan_enable = val;
ret = 0;
- } else if (property == adev->mode_info.abm_level_property) {
- dm_new_state->abm_level = val;
- ret = 0;
}
return ret;
@@ -6282,18 +7263,110 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
} else if (property == adev->mode_info.underscan_property) {
*val = dm_state->underscan_enable;
ret = 0;
- } else if (property == adev->mode_info.abm_level_property) {
- *val = dm_state->abm_level;
- ret = 0;
}
return ret;
}
+/**
+ * DOC: panel power savings
+ *
+ * The display manager allows you to set your desired **panel power savings**
+ * level (between 0-4, with 0 representing off), e.g. using the following::
+ *
+ * # echo 3 > /sys/class/drm/card0-eDP-1/amdgpu/panel_power_savings
+ *
+ * Modifying this value can have implications on color accuracy, so tread
+ * carefully.
+ */
+
+static ssize_t panel_power_savings_show(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_connector *connector = dev_get_drvdata(device);
+ struct drm_device *dev = connector->dev;
+ u8 val;
+
+ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+ val = to_dm_connector_state(connector->state)->abm_level ==
+ ABM_LEVEL_IMMEDIATE_DISABLE ? 0 :
+ to_dm_connector_state(connector->state)->abm_level;
+ drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t panel_power_savings_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_connector *connector = dev_get_drvdata(device);
+ struct drm_device *dev = connector->dev;
+ long val;
+ int ret;
+
+ ret = kstrtol(buf, 0, &val);
+
+ if (ret)
+ return ret;
+
+ if (val < 0 || val > 4)
+ return -EINVAL;
+
+ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+ to_dm_connector_state(connector->state)->abm_level = val ?:
+ ABM_LEVEL_IMMEDIATE_DISABLE;
+ drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+ drm_kms_helper_hotplug_event(dev);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(panel_power_savings);
+
+static struct attribute *amdgpu_attrs[] = {
+ &dev_attr_panel_power_savings.attr,
+ NULL
+};
+
+static const struct attribute_group amdgpu_group = {
+ .name = "amdgpu",
+ .attrs = amdgpu_attrs
+};
+
+static bool
+amdgpu_dm_should_create_sysfs(struct amdgpu_dm_connector *amdgpu_dm_connector)
+{
+ if (amdgpu_dm_abm_level >= 0)
+ return false;
+
+ if (amdgpu_dm_connector->base.connector_type != DRM_MODE_CONNECTOR_eDP)
+ return false;
+
+ /* check for OLED panels */
+ if (amdgpu_dm_connector->bl_idx >= 0) {
+ struct drm_device *drm = amdgpu_dm_connector->base.dev;
+ struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm;
+ struct amdgpu_dm_backlight_caps *caps;
+
+ caps = &dm->backlight_caps[amdgpu_dm_connector->bl_idx];
+ if (caps->aux_support)
+ return false;
+ }
+
+ return true;
+}
+
static void amdgpu_dm_connector_unregister(struct drm_connector *connector)
{
struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+ if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector))
+ sysfs_remove_group(&connector->kdev->kobj, &amdgpu_group);
+
+ cec_notifier_conn_unregister(amdgpu_dm_connector->notifier);
drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux);
}
@@ -6325,10 +7398,6 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
drm_dp_cec_unregister_connector(&aconnector->dm_dp_aux.aux);
drm_connector_unregister(connector);
drm_connector_cleanup(connector);
- if (aconnector->i2c) {
- i2c_del_adapter(&aconnector->i2c->base);
- kfree(aconnector->i2c);
- }
kfree(aconnector->dm_dp_aux.aux.name);
kfree(connector);
@@ -6355,8 +7424,12 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
state->vcpi_slots = 0;
state->pbn = 0;
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
- state->abm_level = amdgpu_dm_abm_level;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (amdgpu_dm_abm_level <= 0)
+ state->abm_level = ABM_LEVEL_IMMEDIATE_DISABLE;
+ else
+ state->abm_level = amdgpu_dm_abm_level;
+ }
__drm_atomic_helper_connector_reset(connector, &state->base);
}
@@ -6394,6 +7467,13 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector)
to_amdgpu_dm_connector(connector);
int r;
+ if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector)) {
+ r = sysfs_create_group(&connector->kdev->kobj,
+ &amdgpu_group);
+ if (r)
+ return r;
+ }
+
amdgpu_dm_register_backlight_device(amdgpu_dm_connector);
if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
@@ -6416,19 +7496,31 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector)
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct dc_link *dc_link = aconnector->dc_link;
struct dc_sink *dc_em_sink = aconnector->dc_em_sink;
- struct edid *edid;
+ const struct drm_edid *drm_edid;
+ struct i2c_adapter *ddc;
+ struct drm_device *dev = connector->dev;
- if (!connector->edid_override)
- return;
+ if (dc_link && dc_link->aux_mode)
+ ddc = &aconnector->dm_dp_aux.aux.ddc;
+ else
+ ddc = &aconnector->i2c->base;
- drm_edid_override_connector_update(&aconnector->base);
- edid = aconnector->base.edid_blob_ptr->data;
- aconnector->edid = edid;
+ drm_edid = drm_edid_read_ddc(connector, ddc);
+ drm_edid_connector_update(connector, drm_edid);
+ if (!drm_edid) {
+ drm_err(dev, "No EDID found on connector: %s.\n", connector->name);
+ return;
+ }
+ aconnector->drm_edid = drm_edid;
/* Update emulated (virtual) sink's EDID */
if (dc_em_sink && dc_link) {
+ // FIXME: Get rid of drm_edid_raw()
+ const struct edid *edid = drm_edid_raw(drm_edid);
+
memset(&dc_em_sink->edid_caps, 0, sizeof(struct dc_edid_caps));
- memmove(dc_em_sink->dc_edid.raw_edid, edid, (edid->extensions + 1) * EDID_LENGTH);
+ memmove(dc_em_sink->dc_edid.raw_edid, edid,
+ (edid->extensions + 1) * EDID_LENGTH);
dm_helpers_parse_edid_caps(
dc_link,
&dc_em_sink->dc_edid,
@@ -6457,32 +7549,34 @@ static int get_modes(struct drm_connector *connector)
static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
{
+ struct drm_connector *connector = &aconnector->base;
+ struct dc_link *dc_link = aconnector->dc_link;
struct dc_sink_init_data init_params = {
.link = aconnector->dc_link,
.sink_signal = SIGNAL_TYPE_VIRTUAL
};
- struct edid *edid;
+ const struct drm_edid *drm_edid;
+ const struct edid *edid;
+ struct i2c_adapter *ddc;
- if (!aconnector->base.edid_blob_ptr) {
- /* if connector->edid_override valid, pass
- * it to edid_override to edid_blob_ptr
- */
-
- drm_edid_override_connector_update(&aconnector->base);
-
- if (!aconnector->base.edid_blob_ptr) {
- DRM_ERROR("No EDID firmware found on connector: %s ,forcing to OFF!\n",
- aconnector->base.name);
+ if (dc_link && dc_link->aux_mode)
+ ddc = &aconnector->dm_dp_aux.aux.ddc;
+ else
+ ddc = &aconnector->i2c->base;
- aconnector->base.force = DRM_FORCE_OFF;
- return;
- }
+ drm_edid = drm_edid_read_ddc(connector, ddc);
+ drm_edid_connector_update(connector, drm_edid);
+ if (!drm_edid) {
+ drm_err(connector->dev, "No EDID found on connector: %s.\n", connector->name);
+ return;
}
- edid = (struct edid *) aconnector->base.edid_blob_ptr->data;
+ if (connector->display_info.is_hdmi)
+ init_params.sink_signal = SIGNAL_TYPE_HDMI_TYPE_A;
- aconnector->edid = edid;
+ aconnector->drm_edid = drm_edid;
+ edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
aconnector->dc_em_sink = dc_link_add_remote_sink(
aconnector->dc_link,
(uint8_t *)edid,
@@ -6493,7 +7587,8 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector)
aconnector->dc_sink = aconnector->dc_link->local_sink ?
aconnector->dc_link->local_sink :
aconnector->dc_em_sink;
- dc_sink_retain(aconnector->dc_sink);
+ if (aconnector->dc_sink)
+ dc_sink_retain(aconnector->dc_sink);
}
}
@@ -6527,7 +7622,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
if (!dc_plane_state)
goto cleanup;
- dc_state = dc_create_state(dc);
+ dc_state = dc_state_create(dc, NULL);
if (!dc_state)
goto cleanup;
@@ -6554,9 +7649,9 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
dc_result = dc_validate_plane(dc, dc_plane_state);
if (dc_result == DC_OK)
- dc_result = dc_add_stream_to_ctx(dc, dc_state, stream);
+ dc_result = dc_state_add_stream(dc, dc_state, stream);
- if (dc_result == DC_OK && !dc_add_plane_to_context(
+ if (dc_result == DC_OK && !dc_state_add_plane(
dc,
stream,
dc_plane_state,
@@ -6564,11 +7659,11 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc,
dc_result = DC_FAIL_ATTACH_SURFACES;
if (dc_result == DC_OK)
- dc_result = dc_validate_global_state(dc, dc_state, true);
+ dc_result = dc_validate_global_state(dc, dc_state, DC_VALIDATE_MODE_ONLY);
cleanup:
if (dc_state)
- dc_release_state(dc_state);
+ dc_state_release(dc_state);
if (dc_plane_state)
dc_plane_state_release(dc_plane_state);
@@ -6577,28 +7672,45 @@ cleanup:
}
struct dc_stream_state *
-create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+create_validate_stream_for_sink(struct drm_connector *connector,
const struct drm_display_mode *drm_mode,
const struct dm_connector_state *dm_state,
const struct dc_stream_state *old_stream)
{
- struct drm_connector *connector = &aconnector->base;
+ struct amdgpu_dm_connector *aconnector = NULL;
struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct dc_stream_state *stream;
const struct drm_connector_state *drm_state = dm_state ? &dm_state->base : NULL;
int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8;
enum dc_status dc_result = DC_OK;
+ uint8_t bpc_limit = 6;
+
+ if (!dm_state)
+ return NULL;
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ aconnector = to_amdgpu_dm_connector(connector);
+
+ if (aconnector &&
+ (aconnector->dc_link->connector_signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+ aconnector->dc_link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER))
+ bpc_limit = 8;
do {
- stream = create_stream_for_sink(aconnector, drm_mode,
+ drm_dbg_kms(connector->dev, "Trying with %d bpc\n", requested_bpc);
+ stream = create_stream_for_sink(connector, drm_mode,
dm_state, old_stream,
requested_bpc);
if (stream == NULL) {
- DRM_ERROR("Failed to create stream for sink!\n");
+ drm_err(adev_to_drm(adev), "Failed to create stream for sink!\n");
break;
}
dc_result = dc_validate_stream(adev->dm.dc, stream);
+
+ if (!aconnector) /* writeback connector */
+ return stream;
+
if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
@@ -6606,11 +7718,12 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
dc_result = dm_validate_stream_and_context(adev->dm.dc, stream);
if (dc_result != DC_OK) {
- DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d (%s)\n",
+ DRM_DEBUG_KMS("Pruned mode %d x %d (clk %d) %s %s -- %s\n",
drm_mode->hdisplay,
drm_mode->vdisplay,
drm_mode->clock,
- dc_result,
+ dc_pixel_encoding_to_str(stream->timing.pixel_encoding),
+ dc_color_depth_to_str(stream->timing.display_color_depth),
dc_status_to_str(dc_result));
dc_stream_release(stream);
@@ -6618,27 +7731,59 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
requested_bpc -= 2; /* lower bpc to retry validation */
}
- } while (stream == NULL && requested_bpc >= 6);
-
- if (dc_result == DC_FAIL_ENC_VALIDATE && !aconnector->force_yuv420_output) {
- DRM_DEBUG_KMS("Retry forcing YCbCr420 encoding\n");
+ } while (stream == NULL && requested_bpc >= bpc_limit);
- aconnector->force_yuv420_output = true;
- stream = create_validate_stream_for_sink(aconnector, drm_mode,
- dm_state, old_stream);
+ switch (dc_result) {
+ /*
+ * If we failed to validate DP bandwidth stream with the requested RGB color depth,
+ * we try to fallback and configure in order:
+ * YUV422 (8bpc, 6bpc)
+ * YUV420 (8bpc, 6bpc)
+ */
+ case DC_FAIL_ENC_VALIDATE:
+ case DC_EXCEED_DONGLE_CAP:
+ case DC_NO_DP_LINK_BANDWIDTH:
+ /* recursively entered twice and already tried both YUV422 and YUV420 */
+ if (aconnector->force_yuv422_output && aconnector->force_yuv420_output)
+ break;
+ /* first failure; try YUV422 */
+ if (!aconnector->force_yuv422_output) {
+ drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV422\n",
+ __func__, __LINE__, dc_result);
+ aconnector->force_yuv422_output = true;
+ /* recursively entered and YUV422 failed, try YUV420 */
+ } else if (!aconnector->force_yuv420_output) {
+ drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV420\n",
+ __func__, __LINE__, dc_result);
+ aconnector->force_yuv420_output = true;
+ }
+ stream = create_validate_stream_for_sink(connector, drm_mode,
+ dm_state, old_stream);
+ aconnector->force_yuv422_output = false;
aconnector->force_yuv420_output = false;
+ break;
+ case DC_OK:
+ break;
+ default:
+ drm_dbg_kms(connector->dev, "%s:%d Unhandled validation failure %d\n",
+ __func__, __LINE__, dc_result);
+ break;
}
return stream;
}
enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
int result = MODE_ERROR;
struct dc_sink *dc_sink;
+ struct drm_display_mode *test_mode;
/* TODO: Unhardcode stream count */
struct dc_stream_state *stream;
+ /* we always have an amdgpu_dm_connector here since we got
+ * here via the amdgpu_dm_connector_helper_funcs
+ */
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
@@ -6657,13 +7802,20 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec
if (dc_sink == NULL && aconnector->base.force != DRM_FORCE_ON_DIGITAL &&
aconnector->base.force != DRM_FORCE_ON) {
- DRM_ERROR("dc_sink is NULL!\n");
+ drm_err(connector->dev, "dc_sink is NULL!\n");
goto fail;
}
- stream = create_validate_stream_for_sink(aconnector, mode,
+ test_mode = drm_mode_duplicate(connector->dev, mode);
+ if (!test_mode)
+ goto fail;
+
+ drm_mode_set_crtcinfo(test_mode, 0);
+
+ stream = create_validate_stream_for_sink(connector, test_mode,
to_dm_connector_state(connector->state),
NULL);
+ drm_mode_destroy(connector->dev, test_mode);
if (stream) {
dc_stream_release(stream);
result = MODE_OK;
@@ -6746,6 +7898,9 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn,
struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn);
int ret;
+ if (WARN_ON(unlikely(!old_con_state || !new_con_state)))
+ return -EINVAL;
+
trace_amdgpu_dm_connector_atomic_check(new_con_state);
if (conn->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
@@ -6757,6 +7912,14 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn,
if (!crtc)
return 0;
+ if (new_con_state->privacy_screen_sw_state != old_con_state->privacy_screen_sw_state) {
+ new_crtc_state = drm_atomic_get_crtc_state(state, crtc);
+ if (IS_ERR(new_crtc_state))
+ return PTR_ERR(new_crtc_state);
+
+ new_crtc_state->mode_changed = true;
+ }
+
if (new_con_state->colorspace != old_con_state->colorspace) {
new_crtc_state = drm_atomic_get_crtc_state(state, crtc);
if (IS_ERR(new_crtc_state))
@@ -6765,6 +7928,14 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn,
new_crtc_state->mode_changed = true;
}
+ if (new_con_state->content_type != old_con_state->content_type) {
+ new_crtc_state = drm_atomic_get_crtc_state(state, crtc);
+ if (IS_ERR(new_crtc_state))
+ return PTR_ERR(new_crtc_state);
+
+ new_crtc_state->mode_changed = true;
+ }
+
if (!drm_connector_atomic_hdr_metadata_equal(old_con_state, new_con_state)) {
struct dc_info_packet hdr_infopacket;
@@ -6850,6 +8021,23 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
int clock, bpp = 0;
bool is_y420 = false;
+ if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+ (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+ struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+ struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+ enum drm_mode_status result;
+
+ result = drm_crtc_helper_mode_valid_fixed(encoder->crtc, adjusted_mode, native_mode);
+ if (result != MODE_OK && dm_new_connector_state->scaling == RMX_OFF) {
+ drm_dbg_driver(encoder->dev,
+ "mode %dx%d@%dHz is not native, enabling scaling\n",
+ adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+ drm_mode_vrefresh(adjusted_mode));
+ dm_new_connector_state->scaling = RMX_FULL;
+ }
+ return 0;
+ }
+
if (!aconnector->mst_output_port)
return 0;
@@ -6863,8 +8051,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
if (IS_ERR(mst_state))
return PTR_ERR(mst_state);
- if (!mst_state->pbn_div)
- mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link);
+ mst_state->pbn_div.full = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link);
if (!state->duplicated) {
int max_bpc = conn_state->max_requested_bpc;
@@ -6876,7 +8063,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
max_bpc);
bpp = convert_dc_color_depth_into_bpc(color_depth) * 3;
clock = adjusted_mode->clock;
- dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false);
+ dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp << 4);
}
dm_new_connector_state->vcpi_slots =
@@ -6904,10 +8091,13 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
struct amdgpu_dm_connector *aconnector;
struct dm_connector_state *dm_conn_state;
int i, j, ret;
- int vcpi, pbn_div, pbn, slot_num = 0;
+ int vcpi, pbn_div, pbn = 0, slot_num = 0;
for_each_new_connector_in_state(state, connector, new_con_state, i) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (!aconnector->mst_output_port)
@@ -6941,7 +8131,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
}
}
- if (j == dc_state->stream_count)
+ if (j == dc_state->stream_count || pbn_div == 0)
continue;
slot_num = DIV_ROUND_UP(pbn, pbn_div);
@@ -7085,6 +8275,10 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder,
{"1920x1200", 1920, 1200}
};
+ if ((connector->connector_type != DRM_MODE_CONNECTOR_eDP) &&
+ (connector->connector_type != DRM_MODE_CONNECTOR_LVDS))
+ return;
+
n = ARRAY_SIZE(common_modes);
for (i = 0; i < n; i++) {
@@ -7150,16 +8344,16 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector)
}
static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
- struct edid *edid)
+ const struct drm_edid *drm_edid)
{
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
- if (edid) {
+ if (drm_edid) {
/* empty probed_modes */
INIT_LIST_HEAD(&connector->probed_modes);
amdgpu_dm_connector->num_modes =
- drm_add_edid_modes(connector, edid);
+ drm_edid_connector_add_modes(connector);
/* sorting the probed modes before calling function
* amdgpu_dm_get_native_mode() since EDID can have
@@ -7173,10 +8367,10 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
amdgpu_dm_get_native_mode(connector);
/* Freesync capabilities are reset by calling
- * drm_add_edid_modes() and need to be
+ * drm_edid_connector_add_modes() and need to be
* restored here.
*/
- amdgpu_dm_update_freesync_caps(connector, edid);
+ amdgpu_dm_update_freesync_caps(connector, drm_edid);
} else {
amdgpu_dm_connector->num_modes = 0;
}
@@ -7272,12 +8466,12 @@ static uint add_fs_modes(struct amdgpu_dm_connector *aconnector)
}
static void amdgpu_dm_connector_add_freesync_modes(struct drm_connector *connector,
- struct edid *edid)
+ const struct drm_edid *drm_edid)
{
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
- if (!edid)
+ if (!(amdgpu_freesync_vid_mode && drm_edid))
return;
if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
@@ -7290,23 +8484,24 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector)
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
struct drm_encoder *encoder;
- struct edid *edid = amdgpu_dm_connector->edid;
+ const struct drm_edid *drm_edid = amdgpu_dm_connector->drm_edid;
struct dc_link_settings *verified_link_cap =
&amdgpu_dm_connector->dc_link->verified_link_cap;
const struct dc *dc = amdgpu_dm_connector->dc_link->dc;
encoder = amdgpu_dm_connector_to_encoder(connector);
- if (!drm_edid_is_valid(edid)) {
+ if (!drm_edid) {
amdgpu_dm_connector->num_modes =
drm_add_modes_noedid(connector, 640, 480);
if (dc->link_srv->dp_get_encoding_format(verified_link_cap) == DP_128b_132b_ENCODING)
amdgpu_dm_connector->num_modes +=
drm_add_modes_noedid(connector, 1920, 1080);
} else {
- amdgpu_dm_connector_ddc_get_modes(connector, edid);
- amdgpu_dm_connector_add_common_modes(encoder, connector);
- amdgpu_dm_connector_add_freesync_modes(connector, edid);
+ amdgpu_dm_connector_ddc_get_modes(connector, drm_edid);
+ if (encoder)
+ amdgpu_dm_connector_add_common_modes(encoder, connector);
+ amdgpu_dm_connector_add_freesync_modes(connector, drm_edid);
}
amdgpu_dm_fbc_init(connector);
@@ -7378,6 +8573,10 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
dm->ddev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_NONE);
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA
+ || (connector_type == DRM_MODE_CONNECTOR_DisplayPort && !aconnector->mst_root))
+ drm_connector_attach_broadcast_rgb_property(&aconnector->base);
+
drm_object_attach_property(&aconnector->base.base,
adev->mode_info.underscan_property,
UNDERSCAN_OFF);
@@ -7394,10 +8593,9 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
aconnector->base.state->max_bpc = 16;
aconnector->base.state->max_requested_bpc = aconnector->base.state->max_bpc;
- if (connector_type == DRM_MODE_CONNECTOR_eDP &&
- (dc_is_dmcu_initialized(adev->dm.dc) || adev->dm.dc->ctx->dmub_srv)) {
- drm_object_attach_property(&aconnector->base.base,
- adev->mode_info.abm_level_property, 0);
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA) {
+ /* Content Type is currently only implemented for HDMI. */
+ drm_connector_attach_content_type_property(&aconnector->base);
}
if (connector_type == DRM_MODE_CONNECTOR_HDMIA) {
@@ -7420,6 +8618,18 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
if (adev->dm.hdcp_workqueue)
drm_connector_attach_content_protection_property(&aconnector->base, true);
}
+
+ if (connector_type == DRM_MODE_CONNECTOR_eDP) {
+ struct drm_privacy_screen *privacy_screen;
+
+ privacy_screen = drm_privacy_screen_get(adev_to_drm(adev)->dev, NULL);
+ if (!IS_ERR(privacy_screen)) {
+ drm_connector_attach_privacy_screen_provider(&aconnector->base,
+ privacy_screen);
+ } else if (PTR_ERR(privacy_screen) != -ENODEV) {
+ drm_warn(adev_to_drm(adev), "Error getting privacy-screen\n");
+ }
+ }
}
static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
@@ -7431,6 +8641,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
int i;
int result = -EIO;
+ if (!ddc_service->ddc_pin)
+ return result;
+
cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
if (!cmd.payloads)
@@ -7447,11 +8660,18 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
cmd.payloads[i].data = msgs[i].buf;
}
- if (dc_submit_i2c(
- ddc_service->ctx->dc,
- ddc_service->link->link_index,
- &cmd))
- result = num;
+ if (i2c->oem) {
+ if (dc_submit_i2c_oem(
+ ddc_service->ctx->dc,
+ &cmd))
+ result = num;
+ } else {
+ if (dc_submit_i2c(
+ ddc_service->ctx->dc,
+ ddc_service->link->link_index,
+ &cmd))
+ result = num;
+ }
kfree(cmd.payloads);
return result;
@@ -7468,9 +8688,7 @@ static const struct i2c_algorithm amdgpu_dm_i2c_algo = {
};
static struct amdgpu_i2c_adapter *
-create_i2c(struct ddc_service *ddc_service,
- int link_index,
- int *res)
+create_i2c(struct ddc_service *ddc_service, bool oem)
{
struct amdgpu_device *adev = ddc_service->ctx->driver_context;
struct amdgpu_i2c_adapter *i2c;
@@ -7479,16 +8697,41 @@ create_i2c(struct ddc_service *ddc_service,
if (!i2c)
return NULL;
i2c->base.owner = THIS_MODULE;
- i2c->base.class = I2C_CLASS_DDC;
i2c->base.dev.parent = &adev->pdev->dev;
i2c->base.algo = &amdgpu_dm_i2c_algo;
- snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d", link_index);
+ if (oem)
+ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c OEM bus");
+ else
+ snprintf(i2c->base.name, sizeof(i2c->base.name), "AMDGPU DM i2c hw bus %d",
+ ddc_service->link->link_index);
i2c_set_adapdata(&i2c->base, i2c);
i2c->ddc_service = ddc_service;
+ i2c->oem = oem;
return i2c;
}
+int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector)
+{
+ struct cec_connector_info conn_info;
+ struct drm_device *ddev = aconnector->base.dev;
+ struct device *hdmi_dev = ddev->dev;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_HDMI_CEC) {
+ drm_info(ddev, "HDMI-CEC feature masked\n");
+ return -EINVAL;
+ }
+
+ cec_fill_conn_info_from_drm(&conn_info, &aconnector->base);
+ aconnector->notifier =
+ cec_notifier_conn_register(hdmi_dev, NULL, &conn_info);
+ if (!aconnector->notifier) {
+ drm_err(ddev, "Failed to create cec notifier\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
/*
* Note: this function assumes that dc_link_detect() was called for the
@@ -7505,20 +8748,21 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
struct dc_link *link = dc_get_link_at_index(dc, link_index);
struct amdgpu_i2c_adapter *i2c;
+ /* Not needed for writeback connector */
link->priv = aconnector;
- i2c = create_i2c(link->ddc, link->link_index, &res);
+ i2c = create_i2c(link->ddc, false);
if (!i2c) {
- DRM_ERROR("Failed to create i2c adapter data\n");
+ drm_err(adev_to_drm(dm->adev), "Failed to create i2c adapter data\n");
return -ENOMEM;
}
aconnector->i2c = i2c;
- res = i2c_add_adapter(&i2c->base);
+ res = devm_i2c_add_adapter(dm->adev->dev, &i2c->base);
if (res) {
- DRM_ERROR("Failed to register hw i2c %d\n", link->link_index);
+ drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index);
goto out_free;
}
@@ -7532,7 +8776,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
&i2c->base);
if (res) {
- DRM_ERROR("connector_init failed\n");
+ drm_err(adev_to_drm(dm->adev), "connector_init failed\n");
aconnector->connector_id = -1;
goto out_free;
}
@@ -7551,6 +8795,10 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
drm_connector_attach_encoder(
&aconnector->base, &aencoder->base);
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+ connector_type == DRM_MODE_CONNECTOR_HDMIB)
+ amdgpu_dm_initialize_hdmi_connector(aconnector);
+
if (connector_type == DRM_MODE_CONNECTOR_DisplayPort
|| connector_type == DRM_MODE_CONNECTOR_eDP)
amdgpu_dm_initialize_dp_connector(dm, aconnector, link->link_index);
@@ -7608,42 +8856,98 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
static void manage_dm_interrupts(struct amdgpu_device *adev,
struct amdgpu_crtc *acrtc,
- bool enable)
-{
- /*
- * We have no guarantee that the frontend index maps to the same
+ struct dm_crtc_state *acrtc_state)
+{ /*
+ * We cannot be sure that the frontend index maps to the same
* backend index - some even map to more than one.
- *
- * TODO: Use a different interrupt or check DC itself for the mapping.
+ * So we have to go through the CRTC to find the right IRQ.
*/
- int irq_type =
- amdgpu_display_crtc_idx_to_irq_type(
+ int irq_type = amdgpu_display_crtc_idx_to_irq_type(
adev,
acrtc->crtc_id);
+ struct drm_device *dev = adev_to_drm(adev);
- if (enable) {
- drm_crtc_vblank_on(&acrtc->base);
- amdgpu_irq_get(
- adev,
- &adev->pageflip_irq,
- irq_type);
+ struct drm_vblank_crtc_config config = {0};
+ struct dc_crtc_timing *timing;
+ int offdelay;
+
+ if (acrtc_state) {
+ timing = &acrtc_state->stream->timing;
+
+ /*
+ * Depending on when the HW latching event of double-buffered
+ * registers happen relative to the PSR SDP deadline, and how
+ * bad the Panel clock has drifted since the last ALPM off
+ * event, there can be up to 3 frames of delay between sending
+ * the PSR exit cmd to DMUB fw, and when the panel starts
+ * displaying live frames.
+ *
+ * We can set:
+ *
+ * 20/100 * offdelay_ms = 3_frames_ms
+ * => offdelay_ms = 5 * 3_frames_ms
+ *
+ * This ensures that `3_frames_ms` will only be experienced as a
+ * 20% delay on top how long the display has been static, and
+ * thus make the delay less perceivable.
+ */
+ if (acrtc_state->stream->link->psr_settings.psr_version <
+ DC_PSR_VERSION_UNSUPPORTED) {
+ offdelay = DIV64_U64_ROUND_UP((u64)5 * 3 * 10 *
+ timing->v_total *
+ timing->h_total,
+ timing->pix_clk_100hz);
+ config.offdelay_ms = offdelay ?: 30;
+ } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) <
+ IP_VERSION(3, 5, 0) ||
+ !(adev->flags & AMD_IS_APU)) {
+ /*
+ * Older HW and DGPU have issues with instant off;
+ * use a 2 frame offdelay.
+ */
+ offdelay = DIV64_U64_ROUND_UP((u64)20 *
+ timing->v_total *
+ timing->h_total,
+ timing->pix_clk_100hz);
+
+ config.offdelay_ms = offdelay ?: 30;
+ } else {
+ /* offdelay_ms = 0 will never disable vblank */
+ config.offdelay_ms = 1;
+ config.disable_immediate = true;
+ }
+
+ drm_crtc_vblank_on_config(&acrtc->base,
+ &config);
+ /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 2, 0):
+ if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n");
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- amdgpu_irq_get(
- adev,
- &adev->vline0_irq,
- irq_type);
+ if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n");
#endif
+ }
+
} else {
+ /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/
+ switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+ case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 2):
+ case IP_VERSION(3, 0, 3):
+ case IP_VERSION(3, 2, 0):
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- amdgpu_irq_put(
- adev,
- &adev->vline0_irq,
- irq_type);
+ if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n");
#endif
- amdgpu_irq_put(
- adev,
- &adev->pageflip_irq,
- irq_type);
+ if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n");
+ }
+
drm_crtc_vblank_off(&acrtc->base);
}
}
@@ -7823,8 +9127,9 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc)
/* Mark this event as consumed */
acrtc->base.state->event = NULL;
- DC_LOG_PFLIP("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
- acrtc->crtc_id);
+ drm_dbg_state(acrtc->base.dev,
+ "crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
+ acrtc->crtc_id);
}
static void update_freesync_state_on_stream(
@@ -7998,7 +9303,7 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
*/
WARN_ON(amdgpu_dm_crtc_set_vupdate_irq(new_state->base.crtc, true) != 0);
WARN_ON(drm_crtc_vblank_get(new_state->base.crtc) != 0);
- DRM_DEBUG_DRIVER("%s: crtc=%u VRR off->on: Get vblank ref\n",
+ drm_dbg_driver(new_state->base.crtc->dev, "%s: crtc=%u VRR off->on: Get vblank ref\n",
__func__, new_state->base.crtc->base.id);
} else if (old_vrr_active && !new_vrr_active) {
/* Transition VRR active -> inactive:
@@ -8006,7 +9311,7 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state,
*/
WARN_ON(amdgpu_dm_crtc_set_vupdate_irq(new_state->base.crtc, false) != 0);
drm_crtc_vblank_put(new_state->base.crtc);
- DRM_DEBUG_DRIVER("%s: crtc=%u VRR on->off: Drop vblank ref\n",
+ drm_dbg_driver(new_state->base.crtc->dev, "%s: crtc=%u VRR on->off: Drop vblank ref\n",
__func__, new_state->base.crtc->base.id);
}
}
@@ -8033,6 +9338,130 @@ static inline uint32_t get_mem_type(struct drm_framebuffer *fb)
return abo->tbo.resource ? abo->tbo.resource->mem_type : 0;
}
+static void amdgpu_dm_update_cursor(struct drm_plane *plane,
+ struct drm_plane_state *old_plane_state,
+ struct dc_stream_update *update)
+{
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
+ struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
+ struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ uint64_t address = afb ? afb->address : 0;
+ struct dc_cursor_position position = {0};
+ struct dc_cursor_attributes attributes;
+ int ret;
+
+ if (!plane->state->fb && !old_plane_state->fb)
+ return;
+
+ drm_dbg_atomic(plane->dev, "crtc_id=%d with size %d to %d\n",
+ amdgpu_crtc->crtc_id, plane->state->crtc_w,
+ plane->state->crtc_h);
+
+ ret = amdgpu_dm_plane_get_cursor_position(plane, crtc, &position);
+ if (ret)
+ return;
+
+ if (!position.enable) {
+ /* turn off cursor */
+ if (crtc_state && crtc_state->stream) {
+ dc_stream_set_cursor_position(crtc_state->stream,
+ &position);
+ update->cursor_position = &crtc_state->stream->cursor_position;
+ }
+ return;
+ }
+
+ amdgpu_crtc->cursor_width = plane->state->crtc_w;
+ amdgpu_crtc->cursor_height = plane->state->crtc_h;
+
+ memset(&attributes, 0, sizeof(attributes));
+ attributes.address.high_part = upper_32_bits(address);
+ attributes.address.low_part = lower_32_bits(address);
+ attributes.width = plane->state->crtc_w;
+ attributes.height = plane->state->crtc_h;
+ attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA;
+ attributes.rotation_angle = 0;
+ attributes.attribute_flags.value = 0;
+
+ /* Enable cursor degamma ROM on DCN3+ for implicit sRGB degamma in DRM
+ * legacy gamma setup.
+ */
+ if (crtc_state->cm_is_degamma_srgb &&
+ adev->dm.dc->caps.color.dpp.gamma_corr)
+ attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
+
+ if (afb)
+ attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
+
+ if (crtc_state->stream) {
+ if (!dc_stream_set_cursor_attributes(crtc_state->stream,
+ &attributes))
+ drm_err(adev_to_drm(adev), "DC failed to set cursor attributes\n");
+
+ update->cursor_attributes = &crtc_state->stream->cursor_attributes;
+
+ if (!dc_stream_set_cursor_position(crtc_state->stream,
+ &position))
+ drm_err(adev_to_drm(adev), "DC failed to set cursor position\n");
+
+ update->cursor_position = &crtc_state->stream->cursor_position;
+ }
+}
+
+static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach,
+ const struct dm_crtc_state *acrtc_state,
+ const u64 current_ts)
+{
+ struct psr_settings *psr = &acrtc_state->stream->link->psr_settings;
+ struct replay_settings *pr = &acrtc_state->stream->link->replay_settings;
+ struct amdgpu_dm_connector *aconn =
+ (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context;
+ bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state);
+
+ if (acrtc_state->update_type > UPDATE_TYPE_FAST) {
+ if (pr->config.replay_supported && !pr->replay_feature_enabled)
+ amdgpu_dm_link_setup_replay(acrtc_state->stream->link, aconn);
+ else if (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED &&
+ !psr->psr_feature_enabled)
+ if (!aconn->disallow_edp_enter_psr)
+ amdgpu_dm_link_setup_psr(acrtc_state->stream);
+ }
+
+ /* Decrement skip count when SR is enabled and we're doing fast updates. */
+ if (acrtc_state->update_type == UPDATE_TYPE_FAST &&
+ (psr->psr_feature_enabled || pr->config.replay_supported)) {
+ if (aconn->sr_skip_count > 0)
+ aconn->sr_skip_count--;
+
+ /* Allow SR when skip count is 0. */
+ acrtc_attach->dm_irq_params.allow_sr_entry = !aconn->sr_skip_count;
+
+ /*
+ * If sink supports PSR SU/Panel Replay, there is no need to rely on
+ * a vblank event disable request to enable PSR/RP. PSR SU/RP
+ * can be enabled immediately once OS demonstrates an
+ * adequate number of fast atomic commits to notify KMD
+ * of update events. See `vblank_control_worker()`.
+ */
+ if (!vrr_active &&
+ acrtc_attach->dm_irq_params.allow_sr_entry &&
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+ !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) &&
+#endif
+ (current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) {
+ if (pr->replay_feature_enabled && !pr->replay_allow_active)
+ amdgpu_dm_replay_enable(acrtc_state->stream, true);
+ if (psr->psr_version == DC_PSR_VERSION_SU_1 &&
+ !psr->psr_allow_active && !aconn->disallow_edp_enter_psr)
+ amdgpu_dm_psr_enable(acrtc_state->stream);
+ }
+ } else {
+ acrtc_attach->dm_irq_params.allow_sr_entry = false;
+ }
+}
+
static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct drm_device *dev,
struct amdgpu_display_manager *dm,
@@ -8056,6 +9485,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bool cursor_update = false;
bool pflip_present = false;
bool dirty_rects_changed = false;
+ bool updated_planes_and_streams = false;
struct {
struct dc_surface_update surface_updates[MAX_SURFACES];
struct dc_plane_info plane_infos[MAX_SURFACES];
@@ -8067,7 +9497,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
if (!bundle) {
- dm_error("Failed to allocate update bundle\n");
+ drm_err(dev, "Failed to allocate update bundle\n");
goto cleanup;
}
@@ -8075,8 +9505,24 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* Disable the cursor first if we're disabling all the planes.
* It'll remain on the screen after the planes are re-enabled
* if we don't.
+ *
+ * If the cursor is transitioning from native to overlay mode, the
+ * native cursor needs to be disabled first.
*/
- if (acrtc_state->active_planes == 0)
+ if (acrtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE &&
+ dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE) {
+ struct dc_cursor_position cursor_position = {0};
+
+ if (!dc_stream_set_cursor_position(acrtc_state->stream,
+ &cursor_position))
+ drm_err(dev, "DC failed to disable native cursor\n");
+
+ bundle->stream_update.cursor_position =
+ &acrtc_state->stream->cursor_position;
+ }
+
+ if (acrtc_state->active_planes == 0 &&
+ dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE)
amdgpu_dm_commit_cursors(state);
/* update planes when needed */
@@ -8090,10 +9536,14 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state);
/* Cursor plane is handled after stream updates */
- if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+ if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+ acrtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE) {
if ((fb && crtc == pcrtc) ||
- (old_plane_state->fb && old_plane_state->crtc == pcrtc))
+ (old_plane_state->fb && old_plane_state->crtc == pcrtc)) {
cursor_update = true;
+ if (amdgpu_ip_version(dm->adev, DCE_HWIP, 0) != 0)
+ amdgpu_dm_update_cursor(plane, old_plane_state, &bundle->stream_update);
+ }
continue;
}
@@ -8111,9 +9561,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->surface_updates[planes_count].surface = dc_plane;
if (new_pcrtc_state->color_mgmt_changed) {
- bundle->surface_updates[planes_count].gamma = dc_plane->gamma_correction;
- bundle->surface_updates[planes_count].in_transfer_func = dc_plane->in_transfer_func;
+ bundle->surface_updates[planes_count].gamma = &dc_plane->gamma_correction;
+ bundle->surface_updates[planes_count].in_transfer_func = &dc_plane->in_transfer_func;
bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
+ bundle->surface_updates[planes_count].hdr_mult = dc_plane->hdr_mult;
+ bundle->surface_updates[planes_count].func_shaper = &dc_plane->in_shaper_func;
+ bundle->surface_updates[planes_count].lut3d_func = &dc_plane->lut3d_func;
+ bundle->surface_updates[planes_count].blend_tf = &dc_plane->blend_tf;
}
amdgpu_dm_plane_fill_dc_scaling_info(dm->adev, new_plane_state,
@@ -8136,7 +9590,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
afb->tiling_flags,
&bundle->plane_infos[planes_count],
&bundle->flip_addrs[planes_count].address,
- afb->tmz_surface, false);
+ afb->tmz_surface);
drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n",
new_plane_state->plane->index,
@@ -8150,6 +9604,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
fill_dc_dirty_rects(plane, old_plane_state,
new_plane_state, new_crtc_state,
&bundle->flip_addrs[planes_count],
+ acrtc_state->stream->link->psr_settings.psr_version ==
+ DC_PSR_VERSION_SU_1,
&dirty_rects_changed);
/*
@@ -8159,7 +9615,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* during the PSR-SU was disabled.
*/
if (acrtc_state->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
- acrtc_attach->dm_irq_params.allow_psr_entry &&
+ acrtc_attach->dm_irq_params.allow_sr_entry &&
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
!amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) &&
#endif
@@ -8168,7 +9624,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns =
timestamp_ns;
if (acrtc_state->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(acrtc_state->stream);
+ amdgpu_dm_psr_disable(acrtc_state->stream, true);
mutex_unlock(&dm->dc_lock);
}
}
@@ -8199,7 +9655,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->surface_updates[planes_count].surface = dc_plane;
if (!bundle->surface_updates[planes_count].surface) {
- DRM_ERROR("No surface for CRTC: id=%d\n",
+ drm_err(dev, "No surface for CRTC: id=%d\n",
acrtc_attach->crtc_id);
continue;
}
@@ -8286,15 +9742,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->stream_update.vrr_infopacket =
&acrtc_state->stream->vrr_infopacket;
}
- } else if (cursor_update && acrtc_state->active_planes > 0 &&
- acrtc_attach->base.state->event) {
- drm_crtc_vblank_get(pcrtc);
-
+ } else if (cursor_update && acrtc_state->active_planes > 0) {
spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
-
- acrtc_attach->event = acrtc_attach->base.state->event;
- acrtc_attach->base.state->event = NULL;
-
+ if (acrtc_attach->base.state->event) {
+ drm_crtc_vblank_get(pcrtc);
+ acrtc_attach->event = acrtc_attach->base.state->event;
+ acrtc_attach->base.state->event = NULL;
+ }
spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
}
@@ -8324,7 +9778,11 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->stream_update.output_csc_transform =
&acrtc_state->stream->csc_color_matrix;
bundle->stream_update.out_transfer_func =
- acrtc_state->stream->out_transfer_func;
+ &acrtc_state->stream->out_transfer_func;
+ bundle->stream_update.lut3d_func =
+ (struct dc_3dlut *) acrtc_state->stream->lut3d_func;
+ bundle->stream_update.func_shaper =
+ (struct dc_transfer_func *) acrtc_state->stream->func_shaper;
}
acrtc_state->stream->abm_level = acrtc_state->abm_level;
@@ -8332,9 +9790,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->stream_update.abm_level = &acrtc_state->abm_level;
mutex_lock(&dm->dc_lock);
- if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
- acrtc_state->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(acrtc_state->stream);
+ if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) {
+ if (acrtc_state->stream->link->replay_settings.replay_allow_active)
+ amdgpu_dm_replay_disable(acrtc_state->stream);
+ if (acrtc_state->stream->link->psr_settings.psr_allow_active)
+ amdgpu_dm_psr_disable(acrtc_state->stream, true);
+ }
mutex_unlock(&dm->dc_lock);
/*
@@ -8356,6 +9817,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
acrtc_state->stream,
&bundle->stream_update,
bundle->surface_updates);
+ updated_planes_and_streams = true;
/**
* Enable or disable the interrupts on the backend.
@@ -8374,44 +9836,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
dm_update_pflip_irq_state(drm_to_adev(dev),
acrtc_attach);
- if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
- acrtc_state->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
- !acrtc_state->stream->link->psr_settings.psr_feature_enabled)
- amdgpu_dm_link_setup_psr(acrtc_state->stream);
-
- /* Decrement skip count when PSR is enabled and we're doing fast updates. */
- if (acrtc_state->update_type == UPDATE_TYPE_FAST &&
- acrtc_state->stream->link->psr_settings.psr_feature_enabled) {
- struct amdgpu_dm_connector *aconn =
- (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context;
-
- if (aconn->psr_skip_count > 0)
- aconn->psr_skip_count--;
-
- /* Allow PSR when skip count is 0. */
- acrtc_attach->dm_irq_params.allow_psr_entry = !aconn->psr_skip_count;
-
- /*
- * If sink supports PSR SU, there is no need to rely on
- * a vblank event disable request to enable PSR. PSR SU
- * can be enabled immediately once OS demonstrates an
- * adequate number of fast atomic commits to notify KMD
- * of update events. See `vblank_control_worker()`.
- */
- if (acrtc_state->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
- acrtc_attach->dm_irq_params.allow_psr_entry &&
-#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
- !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) &&
-#endif
- !acrtc_state->stream->link->psr_settings.psr_allow_active &&
- (timestamp_ns -
- acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns) >
- 500000000)
- amdgpu_dm_psr_enable(acrtc_state->stream);
- } else {
- acrtc_attach->dm_irq_params.allow_psr_entry = false;
- }
-
+ amdgpu_dm_enable_self_refresh(acrtc_attach, acrtc_state, timestamp_ns);
mutex_unlock(&dm->dc_lock);
}
@@ -8420,7 +9845,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* This avoids redundant programming in the case where we're going
* to be disabling a single plane - those pipes are being disabled.
*/
- if (acrtc_state->active_planes)
+ if (acrtc_state->active_planes &&
+ (!updated_planes_and_streams || amdgpu_ip_version(dm->adev, DCE_HWIP, 0) == 0) &&
+ acrtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE)
amdgpu_dm_commit_cursors(state);
cleanup:
@@ -8459,6 +9886,9 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
continue;
notify:
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
@@ -8491,6 +9921,9 @@ notify:
if (!status)
continue;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
@@ -8516,6 +9949,12 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat
stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state);
}
+static void dm_clear_writeback(struct amdgpu_display_manager *dm,
+ struct dm_crtc_state *crtc_state)
+{
+ dc_stream_remove_writeback(dm->dc, crtc_state->stream, 0);
+}
+
static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
struct dc_state *dc_state)
{
@@ -8525,8 +9964,38 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
struct drm_crtc *crtc;
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+ struct drm_connector_state *old_con_state;
+ struct drm_connector *connector;
bool mode_set_reset_required = false;
u32 i;
+ struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count};
+
+ /* Disable writeback */
+ for_each_old_connector_in_state(state, connector, old_con_state, i) {
+ struct dm_connector_state *dm_old_con_state;
+ struct amdgpu_crtc *acrtc;
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ old_crtc_state = NULL;
+
+ dm_old_con_state = to_dm_connector_state(old_con_state);
+ if (!dm_old_con_state->base.crtc)
+ continue;
+
+ acrtc = to_amdgpu_crtc(dm_old_con_state->base.crtc);
+ if (acrtc)
+ old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+
+ if (!acrtc || !acrtc->wb_enabled)
+ continue;
+
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+ dm_clear_writeback(dm, dm_old_crtc_state);
+ acrtc->wb_enabled = false;
+ }
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
new_crtc_state, i) {
@@ -8537,7 +10006,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
if (old_crtc_state->active &&
(!new_crtc_state->active ||
drm_atomic_crtc_needs_modeset(new_crtc_state))) {
- manage_dm_interrupts(adev, acrtc, false);
+ manage_dm_interrupts(adev, acrtc, NULL);
dc_stream_release(dm_old_crtc_state->stream);
}
}
@@ -8567,7 +10036,8 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
memset(&position, 0, sizeof(position));
mutex_lock(&dm->dc_lock);
- dc_stream_set_cursor_position(dm_old_crtc_state->stream, &position);
+ dc_exit_ips_for_hw_access(dm->dc);
+ dc_stream_program_cursor_position(dm_old_crtc_state->stream, &position);
mutex_unlock(&dm->dc_lock);
}
@@ -8583,7 +10053,9 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
if (amdgpu_dm_crtc_modeset_required(new_crtc_state, dm_new_crtc_state->stream, dm_old_crtc_state->stream)) {
- DRM_DEBUG_ATOMIC("Atomic commit: SET crtc id %d: [%p]\n", acrtc->crtc_id, acrtc);
+ drm_dbg_atomic(dev,
+ "Atomic commit: SET crtc id %d: [%p]\n",
+ acrtc->crtc_id, acrtc);
if (!dm_new_crtc_state->stream) {
/*
@@ -8601,8 +10073,9 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
* have a sink to keep the pipe running so that
* hw state is consistent with the sw state
*/
- DRM_DEBUG_DRIVER("%s: Failed to create new stream for crtc %d\n",
- __func__, acrtc->base.base.id);
+ drm_dbg_atomic(dev,
+ "Failed to create new stream for crtc %d\n",
+ acrtc->base.base.id);
continue;
}
@@ -8616,7 +10089,9 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
crtc->hwmode = new_crtc_state->mode;
mode_set_reset_required = true;
} else if (modereset_required(new_crtc_state)) {
- DRM_DEBUG_ATOMIC("Atomic commit: RESET. crtc id %d:[%p]\n", acrtc->crtc_id, acrtc);
+ drm_dbg_atomic(dev,
+ "Atomic commit: RESET. crtc id %d:[%p]\n",
+ acrtc->crtc_id, acrtc);
/* i.e. reset mode */
if (dm_old_crtc_state->stream)
remove_stream(adev, acrtc, dm_old_crtc_state->stream);
@@ -8625,20 +10100,22 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
}
} /* for_each_crtc_in_state() */
- /* if there mode set or reset, disable eDP PSR */
+ /* if there mode set or reset, disable eDP PSR, Replay */
if (mode_set_reset_required) {
if (dm->vblank_control_workqueue)
flush_workqueue(dm->vblank_control_workqueue);
+ amdgpu_dm_replay_disable_all(dm);
amdgpu_dm_psr_disable_all(dm);
}
dm_enable_per_frame_crtc_master_sync(dc_state);
mutex_lock(&dm->dc_lock);
- WARN_ON(!dc_commit_streams(dm->dc, dc_state->streams, dc_state->stream_count));
+ dc_exit_ips_for_hw_access(dm->dc);
+ WARN_ON(!dc_commit_streams(dm->dc, &params));
/* Allow idle optimization when vblank count is 0 for display off */
- if (dm->active_vblank_irq_count == 0)
+ if ((dm->active_vblank_irq_count == 0) && amdgpu_dm_is_headless(dm->adev))
dc_allow_idle_optimizations(dm->dc, true);
mutex_unlock(&dm->dc_lock);
@@ -8652,74 +10129,167 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
dc_stream_get_status(dm_new_crtc_state->stream);
if (!status)
- status = dc_stream_get_status_from_state(dc_state,
+ status = dc_state_get_stream_status(dc_state,
dm_new_crtc_state->stream);
if (!status)
- DC_ERR("got no status for stream %p on acrtc%p\n", dm_new_crtc_state->stream, acrtc);
+ drm_err(dev,
+ "got no status for stream %p on acrtc%p\n",
+ dm_new_crtc_state->stream, acrtc);
else
acrtc->otg_inst = status->primary_otg_inst;
}
}
+
+ /* During boot up and resume the DC layer will reset the panel brightness
+ * to fix a flicker issue.
+ * It will cause the dm->actual_brightness is not the current panel brightness
+ * level. (the dm->brightness is the correct panel level)
+ * So we set the backlight level with dm->brightness value after initial
+ * set mode. Use restore_backlight flag to avoid setting backlight level
+ * for every subsequent mode set.
+ */
+ if (dm->restore_backlight) {
+ for (i = 0; i < dm->num_of_edps; i++) {
+ if (dm->backlight_dev[i])
+ amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]);
+ }
+ dm->restore_backlight = false;
+ }
}
-/**
- * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
- * @state: The atomic state to commit
- *
- * This will tell DC to commit the constructed DC state from atomic_check,
- * programming the hardware. Any failures here implies a hardware failure, since
- * atomic check should have filtered anything non-kosher.
- */
-static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+static void dm_set_writeback(struct amdgpu_display_manager *dm,
+ struct dm_crtc_state *crtc_state,
+ struct drm_connector *connector,
+ struct drm_connector_state *new_con_state)
{
- struct drm_device *dev = state->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
- struct amdgpu_display_manager *dm = &adev->dm;
- struct dm_atomic_state *dm_state;
- struct dc_state *dc_state = NULL;
- u32 i, j;
- struct drm_crtc *crtc;
- struct drm_crtc_state *old_crtc_state, *new_crtc_state;
- unsigned long flags;
- bool wait_for_vblank = true;
- struct drm_connector *connector;
- struct drm_connector_state *old_con_state, *new_con_state;
- struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
- int crtc_disable_count = 0;
+ struct drm_writeback_connector *wb_conn = drm_connector_to_writeback(connector);
+ struct amdgpu_device *adev = dm->adev;
+ struct amdgpu_crtc *acrtc;
+ struct dc_writeback_info *wb_info;
+ struct pipe_ctx *pipe = NULL;
+ struct amdgpu_framebuffer *afb;
+ int i = 0;
- trace_amdgpu_dm_atomic_commit_tail_begin(state);
+ wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL);
+ if (!wb_info) {
+ drm_err(adev_to_drm(adev), "Failed to allocate wb_info\n");
+ return;
+ }
- drm_atomic_helper_update_legacy_modeset_state(dev, state);
- drm_dp_mst_atomic_wait_for_dependencies(state);
+ acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc);
+ if (!acrtc) {
+ drm_err(adev_to_drm(adev), "no amdgpu_crtc found\n");
+ kfree(wb_info);
+ return;
+ }
- dm_state = dm_atomic_get_new_state(state);
- if (dm_state && dm_state->context) {
- dc_state = dm_state->context;
- amdgpu_dm_commit_streams(state, dc_state);
+ afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb);
+ if (!afb) {
+ drm_err(adev_to_drm(adev), "No amdgpu_framebuffer found\n");
+ kfree(wb_info);
+ return;
+ }
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (dm->dc->current_state->res_ctx.pipe_ctx[i].stream == crtc_state->stream) {
+ pipe = &dm->dc->current_state->res_ctx.pipe_ctx[i];
+ break;
+ }
+ }
+
+ /* fill in wb_info */
+ wb_info->wb_enabled = true;
+
+ wb_info->dwb_pipe_inst = 0;
+ wb_info->dwb_params.dwbscl_black_color = 0;
+ wb_info->dwb_params.hdr_mult = 0x1F000;
+ wb_info->dwb_params.csc_params.gamut_adjust_type = CM_GAMUT_ADJUST_TYPE_BYPASS;
+ wb_info->dwb_params.csc_params.gamut_coef_format = CM_GAMUT_REMAP_COEF_FORMAT_S2_13;
+ wb_info->dwb_params.output_depth = DWB_OUTPUT_PIXEL_DEPTH_10BPC;
+ wb_info->dwb_params.cnv_params.cnv_out_bpc = DWB_CNV_OUT_BPC_10BPC;
+
+ /* width & height from crtc */
+ wb_info->dwb_params.cnv_params.src_width = acrtc->base.mode.crtc_hdisplay;
+ wb_info->dwb_params.cnv_params.src_height = acrtc->base.mode.crtc_vdisplay;
+ wb_info->dwb_params.dest_width = acrtc->base.mode.crtc_hdisplay;
+ wb_info->dwb_params.dest_height = acrtc->base.mode.crtc_vdisplay;
+
+ wb_info->dwb_params.cnv_params.crop_en = false;
+ wb_info->dwb_params.stereo_params.stereo_enabled = false;
+
+ wb_info->dwb_params.cnv_params.out_max_pix_val = 0x3ff; // 10 bits
+ wb_info->dwb_params.cnv_params.out_min_pix_val = 0;
+ wb_info->dwb_params.cnv_params.fc_out_format = DWB_OUT_FORMAT_32BPP_ARGB;
+ wb_info->dwb_params.cnv_params.out_denorm_mode = DWB_OUT_DENORM_BYPASS;
+
+ wb_info->dwb_params.out_format = dwb_scaler_mode_bypass444;
+
+ wb_info->dwb_params.capture_rate = dwb_capture_rate_0;
+
+ wb_info->dwb_params.scaler_taps.h_taps = 4;
+ wb_info->dwb_params.scaler_taps.v_taps = 4;
+ wb_info->dwb_params.scaler_taps.h_taps_c = 2;
+ wb_info->dwb_params.scaler_taps.v_taps_c = 2;
+ wb_info->dwb_params.subsample_position = DWB_INTERSTITIAL_SUBSAMPLING;
+
+ wb_info->mcif_buf_params.luma_pitch = afb->base.pitches[0];
+ wb_info->mcif_buf_params.chroma_pitch = afb->base.pitches[1];
+
+ for (i = 0; i < DWB_MCIF_BUF_COUNT; i++) {
+ wb_info->mcif_buf_params.luma_address[i] = afb->address;
+ wb_info->mcif_buf_params.chroma_address[i] = 0;
+ }
+
+ wb_info->mcif_buf_params.p_vmid = 1;
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) {
+ wb_info->mcif_warmup_params.start_address.quad_part = afb->address;
+ wb_info->mcif_warmup_params.region_size =
+ wb_info->mcif_buf_params.luma_pitch * wb_info->dwb_params.dest_height;
}
+ wb_info->mcif_warmup_params.p_vmid = 1;
+ wb_info->writeback_source_plane = pipe->plane_state;
+
+ dc_stream_add_writeback(dm->dc, crtc_state->stream, wb_info);
+
+ acrtc->wb_pending = true;
+ acrtc->wb_conn = wb_conn;
+ drm_writeback_queue_job(wb_conn, new_con_state);
+}
+
+static void amdgpu_dm_update_hdcp(struct drm_atomic_state *state)
+{
+ struct drm_connector_state *old_con_state, *new_con_state;
+ struct drm_device *dev = state->dev;
+ struct drm_connector *connector;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ int i;
+
+ if (!adev->dm.hdcp_workqueue)
+ return;
for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *dm_new_crtc_state;
+ struct amdgpu_dm_connector *aconnector;
- if (!adev->dm.hdcp_workqueue)
+ if (!connector || connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
continue;
- pr_debug("[HDCP_DM] -------------- i : %x ----------\n", i);
+ aconnector = to_amdgpu_dm_connector(connector);
- if (!connector)
- continue;
+ drm_dbg(dev, "[HDCP_DM] -------------- i : %x ----------\n", i);
- pr_debug("[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n",
+ drm_dbg(dev, "[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n",
connector->index, connector->status, connector->dpms);
- pr_debug("[HDCP_DM] state protection old: %x new: %x\n",
+ drm_dbg(dev, "[HDCP_DM] state protection old: %x new: %x\n",
old_con_state->content_protection, new_con_state->content_protection);
if (aconnector->dc_sink) {
if (aconnector->dc_sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
aconnector->dc_sink->sink_signal != SIGNAL_TYPE_NONE) {
- pr_debug("[HDCP_DM] pipe_ctx dispname=%s\n",
+ drm_dbg(dev, "[HDCP_DM] pipe_ctx dispname=%s\n",
aconnector->dc_sink->edid_caps.display_name);
}
}
@@ -8733,7 +10303,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
}
if (old_crtc_state)
- pr_debug("old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
+ drm_dbg(dev, "old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
old_crtc_state->enable,
old_crtc_state->active,
old_crtc_state->mode_changed,
@@ -8741,29 +10311,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
old_crtc_state->connectors_changed);
if (new_crtc_state)
- pr_debug("NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
+ drm_dbg(dev, "NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n",
new_crtc_state->enable,
new_crtc_state->active,
new_crtc_state->mode_changed,
new_crtc_state->active_changed,
new_crtc_state->connectors_changed);
- }
- for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
- struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
- struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-
- if (!adev->dm.hdcp_workqueue)
- continue;
-
- new_crtc_state = NULL;
- old_crtc_state = NULL;
-
- if (acrtc) {
- new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
- old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
- }
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
@@ -8807,13 +10361,86 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED)
enable_encryption = true;
- DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption);
+ drm_info(dev, "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption);
- hdcp_update_display(
- adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector,
- new_con_state->hdcp_content_type, enable_encryption);
+ if (aconnector->dc_link)
+ hdcp_update_display(
+ adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector,
+ new_con_state->hdcp_content_type, enable_encryption);
}
}
+}
+
+static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state)
+{
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+ int i, ret;
+
+ ret = drm_dp_mst_atomic_setup_commit(state);
+ if (ret)
+ return ret;
+
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ /*
+ * Color management settings. We also update color properties
+ * when a modeset is needed, to ensure it gets reprogrammed.
+ */
+ if (dm_new_crtc_state->base.active && dm_new_crtc_state->stream &&
+ (dm_new_crtc_state->base.color_mgmt_changed ||
+ dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
+ drm_atomic_crtc_needs_modeset(new_crtc_state))) {
+ ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
+ if (ret) {
+ drm_dbg_atomic(state->dev, "Failed to update color state\n");
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
+ * @state: The atomic state to commit
+ *
+ * This will tell DC to commit the constructed DC state from atomic_check,
+ * programming the hardware. Any failures here implies a hardware failure, since
+ * atomic check should have filtered anything non-kosher.
+ */
+static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
+{
+ struct drm_device *dev = state->dev;
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct dm_atomic_state *dm_state;
+ struct dc_state *dc_state = NULL;
+ u32 i, j;
+ struct drm_crtc *crtc;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ unsigned long flags;
+ bool wait_for_vblank = true;
+ struct drm_connector *connector;
+ struct drm_connector_state *old_con_state = NULL, *new_con_state = NULL;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+ int crtc_disable_count = 0;
+
+ trace_amdgpu_dm_atomic_commit_tail_begin(state);
+
+ drm_atomic_helper_update_legacy_modeset_state(dev, state);
+ drm_dp_mst_atomic_wait_for_dependencies(state);
+
+ dm_state = dm_atomic_get_new_state(state);
+ if (dm_state && dm_state->context) {
+ dc_state = dm_state->context;
+ amdgpu_dm_commit_streams(state, dc_state);
+ }
+
+ amdgpu_dm_update_hdcp(state);
/* Handle connector state changes */
for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) {
@@ -8824,7 +10451,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
struct dc_stream_update stream_update;
struct dc_info_packet hdr_packet;
struct dc_stream_status *status = NULL;
- bool abm_changed, hdr_changed, scaling_changed;
+ bool abm_changed, hdr_changed, scaling_changed, output_color_space_changed = false;
memset(&stream_update, 0, sizeof(stream_update));
@@ -8843,13 +10470,18 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
scaling_changed = is_scaling_state_different(dm_new_con_state,
dm_old_con_state);
+ if ((new_con_state->hdmi.broadcast_rgb != old_con_state->hdmi.broadcast_rgb) &&
+ (dm_old_crtc_state->stream->output_color_space !=
+ get_output_color_space(&dm_new_crtc_state->stream->timing, new_con_state)))
+ output_color_space_changed = true;
+
abm_changed = dm_new_crtc_state->abm_level !=
dm_old_crtc_state->abm_level;
hdr_changed =
!drm_connector_atomic_hdr_metadata_equal(old_con_state, new_con_state);
- if (!scaling_changed && !abm_changed && !hdr_changed)
+ if (!scaling_changed && !abm_changed && !hdr_changed && !output_color_space_changed)
continue;
stream_update.stream = dm_new_crtc_state->stream;
@@ -8861,6 +10493,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
stream_update.dst = dm_new_crtc_state->stream->dst;
}
+ if (output_color_space_changed) {
+ dm_new_crtc_state->stream->output_color_space
+ = get_output_color_space(&dm_new_crtc_state->stream->timing, new_con_state);
+
+ stream_update.output_color_space = &dm_new_crtc_state->stream->output_color_space;
+ }
+
if (abm_changed) {
dm_new_crtc_state->stream->abm_level = dm_new_crtc_state->abm_level;
@@ -8885,11 +10524,18 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
* To fix this, DC should permit updating only stream properties.
*/
dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC);
+ if (!dummy_updates) {
+ drm_err(adev_to_drm(adev), "Failed to allocate memory for dummy_updates.\n");
+ continue;
+ }
for (j = 0; j < status->plane_count; j++)
dummy_updates[j].surface = status->plane_states[0];
+ sort(dummy_updates, status->plane_count,
+ sizeof(*dummy_updates), dm_plane_layer_index_cmp, NULL);
mutex_lock(&dm->dc_lock);
+ dc_exit_ips_for_hw_access(dm->dc);
dc_update_planes_and_stream(dm->dc,
dummy_updates,
status->plane_count,
@@ -8897,6 +10543,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
&stream_update);
mutex_unlock(&dm->dc_lock);
kfree(dummy_updates);
+
+ drm_connector_update_privacy_screen(new_con_state);
}
/**
@@ -8931,7 +10579,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
drm_atomic_crtc_needs_modeset(new_crtc_state))) {
dc_stream_retain(dm_new_crtc_state->stream);
acrtc->dm_irq_params.stream = dm_new_crtc_state->stream;
- manage_dm_interrupts(adev, acrtc, true);
+ manage_dm_interrupts(adev, acrtc, dm_new_crtc_state);
}
/* Handle vrr on->off / off->on transitions */
amdgpu_dm_handle_vrr_transition(dm_old_crtc_state, dm_new_crtc_state);
@@ -8947,20 +10595,26 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
if (amdgpu_dm_is_valid_crc_source(cur_crc_src)) {
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
if (amdgpu_dm_crc_window_is_activated(crtc)) {
- spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- acrtc->dm_irq_params.window_param.update_win = true;
+ uint8_t cnt;
- /**
- * It takes 2 frames for HW to stably generate CRC when
- * resuming from suspend, so we set skip_frame_cnt 2.
- */
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 2;
+ spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+ for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) {
+ if (acrtc->dm_irq_params.window_param[cnt].enable) {
+ acrtc->dm_irq_params.window_param[cnt].update_win = true;
+
+ /**
+ * It takes 2 frames for HW to stably generate CRC when
+ * resuming from suspend, so we set skip_frame_cnt 2.
+ */
+ acrtc->dm_irq_params.window_param[cnt].skip_frame_cnt = 2;
+ }
+ }
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
}
#endif
if (amdgpu_dm_crtc_configure_crc_source(
crtc, dm_new_crtc_state, cur_crc_src))
- DRM_DEBUG_DRIVER("Failed to configure crc source");
+ drm_dbg_atomic(dev, "Failed to configure crc source");
}
}
#endif
@@ -8978,6 +10632,31 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
amdgpu_dm_commit_planes(state, dev, dm, crtc, wait_for_vblank);
}
+ /* Enable writeback */
+ for_each_new_connector_in_state(state, connector, new_con_state, i) {
+ struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
+ struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
+
+ if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ if (!new_con_state->writeback_job)
+ continue;
+
+ new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+
+ if (!new_crtc_state)
+ continue;
+
+ if (acrtc->wb_enabled)
+ continue;
+
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ dm_set_writeback(dm, dm_new_crtc_state, connector, new_con_state);
+ acrtc->wb_enabled = true;
+ }
+
/* Update audio instances for each connector. */
amdgpu_dm_commit_audio(dev, state);
@@ -9030,6 +10709,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
for (i = 0; i < crtc_disable_count; i++)
pm_runtime_put_autosuspend(dev->dev);
pm_runtime_mark_last_busy(dev->dev);
+
+ trace_amdgpu_dm_atomic_commit_tail_finish(state);
}
static int dm_force_atomic_commit(struct drm_connector *connector)
@@ -9055,16 +10736,20 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
*/
conn_state = drm_atomic_get_connector_state(state, connector);
- ret = PTR_ERR_OR_ZERO(conn_state);
- if (ret)
+ /* Check for error in getting connector state */
+ if (IS_ERR(conn_state)) {
+ ret = PTR_ERR(conn_state);
goto out;
+ }
/* Attach crtc to drm_atomic_state*/
crtc_state = drm_atomic_get_crtc_state(state, &disconnected_acrtc->base);
- ret = PTR_ERR_OR_ZERO(crtc_state);
- if (ret)
+ /* Check for error in getting crtc state */
+ if (IS_ERR(crtc_state)) {
+ ret = PTR_ERR(crtc_state);
goto out;
+ }
/* force a restore */
crtc_state->mode_changed = true;
@@ -9072,9 +10757,11 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
/* Attach plane to drm_atomic_state */
plane_state = drm_atomic_get_plane_state(state, plane);
- ret = PTR_ERR_OR_ZERO(plane_state);
- if (ret)
+ /* Check for error in getting plane state */
+ if (IS_ERR(plane_state)) {
+ ret = PTR_ERR(plane_state);
goto out;
+ }
/* Call commit internally with the state we just constructed */
ret = drm_atomic_commit(state);
@@ -9082,7 +10769,7 @@ static int dm_force_atomic_commit(struct drm_connector *connector)
out:
drm_atomic_state_put(state);
if (ret)
- DRM_ERROR("Restoring old state failed with %i\n", ret);
+ drm_err(ddev, "Restoring old state failed with %i\n", ret);
return ret;
}
@@ -9095,10 +10782,15 @@ out:
void dm_restore_drm_connector_state(struct drm_device *dev,
struct drm_connector *connector)
{
- struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct amdgpu_dm_connector *aconnector;
struct amdgpu_crtc *disconnected_acrtc;
struct dm_crtc_state *acrtc_state;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return;
+
+ aconnector = to_amdgpu_dm_connector(connector);
+
if (!aconnector->dc_sink || !connector->state || !connector->encoder)
return;
@@ -9161,7 +10853,7 @@ static int do_aquire_global_lock(struct drm_device *dev,
&commit->flip_done, 10*HZ);
if (ret == 0)
- DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done timed out\n",
+ drm_err(dev, "[CRTC:%d:%s] hw_done or flip_done timed out\n",
crtc->base.id, crtc->name);
drm_crtc_commit_put(commit);
@@ -9175,12 +10867,16 @@ static void get_freesync_config_for_crtc(
struct dm_connector_state *new_con_state)
{
struct mod_freesync_config config = {0};
- struct amdgpu_dm_connector *aconnector =
- to_amdgpu_dm_connector(new_con_state->base.connector);
+ struct amdgpu_dm_connector *aconnector;
struct drm_display_mode *mode = &new_crtc_state->base.mode;
int vrefresh = drm_mode_vrefresh(mode);
bool fs_vid_mode = false;
+ if (new_con_state->base.connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return;
+
+ aconnector = to_amdgpu_dm_connector(new_con_state->base.connector);
+
new_crtc_state->vrr_supported = new_con_state->freesync_capable &&
vrefresh >= aconnector->min_vfreq &&
vrefresh <= aconnector->max_vfreq;
@@ -9203,6 +10899,8 @@ static void get_freesync_config_for_crtc(
} else {
config.state = VRR_STATE_INACTIVE;
}
+ } else {
+ config.state = VRR_STATE_UNSUPPORTED;
}
out:
new_crtc_state->freesync_config = config;
@@ -9273,6 +10971,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
struct dm_atomic_state *dm_state = NULL;
struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
struct dc_stream_state *new_stream;
+ struct amdgpu_device *adev = dm->adev;
int ret = 0;
/*
@@ -9280,6 +10979,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
* update changed items
*/
struct amdgpu_crtc *acrtc = NULL;
+ struct drm_connector *connector = NULL;
struct amdgpu_dm_connector *aconnector = NULL;
struct drm_connector_state *drm_new_conn_state = NULL, *drm_old_conn_state = NULL;
struct dm_connector_state *dm_new_conn_state = NULL, *dm_old_conn_state = NULL;
@@ -9289,18 +10989,20 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
acrtc = to_amdgpu_crtc(crtc);
- aconnector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+ connector = amdgpu_dm_find_first_crtc_matching_connector(state, crtc);
+ if (connector)
+ aconnector = to_amdgpu_dm_connector(connector);
/* TODO This hack should go away */
- if (aconnector && enable) {
+ if (connector && enable) {
/* Make sure fake sink is created in plug-in scenario */
drm_new_conn_state = drm_atomic_get_new_connector_state(state,
- &aconnector->base);
+ connector);
drm_old_conn_state = drm_atomic_get_old_connector_state(state,
- &aconnector->base);
+ connector);
- if (IS_ERR(drm_new_conn_state)) {
- ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
+ if (WARN_ON(!drm_new_conn_state)) {
+ ret = -EINVAL;
goto fail;
}
@@ -9310,7 +11012,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
goto skip_modeset;
- new_stream = create_validate_stream_for_sink(aconnector,
+ new_stream = create_validate_stream_for_sink(connector,
&new_crtc_state->mode,
dm_new_conn_state,
dm_old_crtc_state->stream);
@@ -9323,7 +11025,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
*/
if (!new_stream) {
- DRM_DEBUG_DRIVER("%s: Failed to create new stream for crtc %d\n",
+ drm_dbg_driver(adev_to_drm(adev), "%s: Failed to create new stream for crtc %d\n",
__func__, acrtc->base.base.id);
ret = -ENOMEM;
goto fail;
@@ -9352,7 +11054,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
* TODO: Refactor this function to allow this check to work
* in all conditions.
*/
- if (dm_new_crtc_state->stream &&
+ if (amdgpu_freesync_vid_mode &&
+ dm_new_crtc_state->stream &&
is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state))
goto skip_modeset;
@@ -9360,7 +11063,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) {
new_crtc_state->mode_changed = false;
- DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d",
+ drm_dbg_driver(adev_to_drm(adev), "Mode change not required, setting mode_changed to %d",
new_crtc_state->mode_changed);
}
}
@@ -9392,20 +11095,20 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
}
/* Now check if we should set freesync video mode */
- if (dm_new_crtc_state->stream &&
+ if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream &&
dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream) &&
is_timing_unchanged_for_freesync(new_crtc_state,
old_crtc_state)) {
new_crtc_state->mode_changed = false;
- DRM_DEBUG_DRIVER(
+ drm_dbg_driver(adev_to_drm(adev),
"Mode change not required for front porch change, setting mode_changed to %d",
new_crtc_state->mode_changed);
set_freesync_fixed_config(dm_new_crtc_state);
goto skip_modeset;
- } else if (aconnector &&
+ } else if (amdgpu_freesync_vid_mode && aconnector &&
is_freesync_video_mode(&new_crtc_state->mode,
aconnector)) {
struct drm_display_mode *high_mode;
@@ -9419,11 +11122,11 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
if (ret)
goto fail;
- DRM_DEBUG_DRIVER("Disabling DRM crtc: %d\n",
+ drm_dbg_driver(adev_to_drm(adev), "Disabling DRM crtc: %d\n",
crtc->base.id);
/* i.e. reset mode */
- if (dc_remove_stream_from_ctx(
+ if (dc_state_remove_stream(
dm->dc,
dm_state->context,
dm_old_crtc_state->stream) != DC_OK) {
@@ -9444,7 +11147,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
* added MST connectors not found in existing crtc_state in the chained mode
* TODO: need to dig out the root cause of that
*/
- if (!aconnector)
+ if (!connector)
goto skip_modeset;
if (modereset_required(new_crtc_state))
@@ -9466,7 +11169,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
DRM_DEBUG_ATOMIC("Enabling DRM crtc: %d\n",
crtc->base.id);
- if (dc_add_stream_to_ctx(
+ if (dc_state_add_stream(
dm->dc,
dm_state->context,
dm_new_crtc_state->stream) != DC_OK) {
@@ -9487,7 +11190,7 @@ skip_modeset:
* We want to do dc stream updates that do not require a
* full modeset below.
*/
- if (!(enable && aconnector && new_crtc_state->active))
+ if (!(enable && connector && new_crtc_state->active))
return 0;
/*
* Given above conditions, the dc state cannot be NULL because:
@@ -9513,8 +11216,9 @@ skip_modeset:
* when a modeset is needed, to ensure it gets reprogrammed.
*/
if (dm_new_crtc_state->base.color_mgmt_changed ||
+ dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
drm_atomic_crtc_needs_modeset(new_crtc_state)) {
- ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
+ ret = amdgpu_dm_check_crtc_color_mgmt(dm_new_crtc_state, true);
if (ret)
goto fail;
}
@@ -9538,15 +11242,20 @@ static bool should_reset_plane(struct drm_atomic_state *state,
{
struct drm_plane *other;
struct drm_plane_state *old_other_state, *new_other_state;
- struct drm_crtc_state *new_crtc_state;
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *old_dm_crtc_state, *new_dm_crtc_state;
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
int i;
/*
- * TODO: Remove this hack once the checks below are sufficient
- * enough to determine when we need to reset all the planes on
- * the stream.
+ * TODO: Remove this hack for all asics once it proves that the
+ * fast updates works fine on DCN3.2+.
*/
- if (state->allow_modeset)
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 2, 0) &&
+ state->allow_modeset)
+ return true;
+
+ if (amdgpu_in_reset(adev) && state->allow_modeset)
return true;
/* Exit early if we know that we're adding or removing the plane. */
@@ -9559,14 +11268,38 @@ static bool should_reset_plane(struct drm_atomic_state *state,
new_crtc_state =
drm_atomic_get_new_crtc_state(state, new_plane_state->crtc);
+ old_crtc_state =
+ drm_atomic_get_old_crtc_state(state, old_plane_state->crtc);
if (!new_crtc_state)
return true;
+ /*
+ * A change in cursor mode means a new dc pipe needs to be acquired or
+ * released from the state
+ */
+ old_dm_crtc_state = to_dm_crtc_state(old_crtc_state);
+ new_dm_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+ old_dm_crtc_state != NULL &&
+ old_dm_crtc_state->cursor_mode != new_dm_crtc_state->cursor_mode) {
+ return true;
+ }
+
/* CRTC Degamma changes currently require us to recreate planes. */
if (new_crtc_state->color_mgmt_changed)
return true;
+ /*
+ * On zpos change, planes need to be reordered by removing and re-adding
+ * them one by one to the dc state, in order of descending zpos.
+ *
+ * TODO: We can likely skip bandwidth validation if the only thing that
+ * changed about the plane was it'z z-ordering.
+ */
+ if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos)
+ return true;
+
if (drm_atomic_crtc_needs_modeset(new_crtc_state))
return true;
@@ -9580,6 +11313,10 @@ static bool should_reset_plane(struct drm_atomic_state *state,
*/
for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
struct amdgpu_framebuffer *old_afb, *new_afb;
+ struct dm_plane_state *dm_new_other_state, *dm_old_other_state;
+
+ dm_new_other_state = to_dm_plane_state(new_other_state);
+ dm_old_other_state = to_dm_plane_state(old_other_state);
if (other->type == DRM_PLANE_TYPE_CURSOR)
continue;
@@ -9616,6 +11353,18 @@ static bool should_reset_plane(struct drm_atomic_state *state,
old_other_state->color_encoding != new_other_state->color_encoding)
return true;
+ /* HDR/Transfer Function changes. */
+ if (dm_old_other_state->degamma_tf != dm_new_other_state->degamma_tf ||
+ dm_old_other_state->degamma_lut != dm_new_other_state->degamma_lut ||
+ dm_old_other_state->hdr_mult != dm_new_other_state->hdr_mult ||
+ dm_old_other_state->ctm != dm_new_other_state->ctm ||
+ dm_old_other_state->shaper_lut != dm_new_other_state->shaper_lut ||
+ dm_old_other_state->shaper_tf != dm_new_other_state->shaper_tf ||
+ dm_old_other_state->lut3d != dm_new_other_state->lut3d ||
+ dm_old_other_state->blend_lut != dm_new_other_state->blend_lut ||
+ dm_old_other_state->blend_tf != dm_new_other_state->blend_tf)
+ return true;
+
/* Framebuffer checks fall at the end. */
if (!old_other_state->fb || !new_other_state->fb)
continue;
@@ -9682,12 +11431,14 @@ static int dm_check_cursor_fb(struct amdgpu_crtc *new_acrtc,
* check tiling flags when the FB doesn't have a modifier.
*/
if (!(fb->flags & DRM_MODE_FB_MODIFIERS)) {
- if (adev->family < AMDGPU_FAMILY_AI) {
+ if (adev->family >= AMDGPU_FAMILY_GC_12_0_0) {
+ linear = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE) == 0;
+ } else if (adev->family >= AMDGPU_FAMILY_AI) {
+ linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
+ } else {
linear = AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_2D_TILED_THIN1 &&
AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE) == 0;
- } else {
- linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
}
if (!linear) {
DRM_DEBUG_ATOMIC("Cursor FB not linear");
@@ -9698,6 +11449,68 @@ static int dm_check_cursor_fb(struct amdgpu_crtc *new_acrtc,
return 0;
}
+/*
+ * Helper function for checking the cursor in native mode
+ */
+static int dm_check_native_cursor_state(struct drm_crtc *new_plane_crtc,
+ struct drm_plane *plane,
+ struct drm_plane_state *new_plane_state,
+ bool enable)
+{
+
+ struct amdgpu_crtc *new_acrtc;
+ int ret;
+
+ if (!enable || !new_plane_crtc ||
+ drm_atomic_plane_disabling(plane->state, new_plane_state))
+ return 0;
+
+ new_acrtc = to_amdgpu_crtc(new_plane_crtc);
+
+ if (new_plane_state->src_x != 0 || new_plane_state->src_y != 0) {
+ DRM_DEBUG_ATOMIC("Cropping not supported for cursor plane\n");
+ return -EINVAL;
+ }
+
+ if (new_plane_state->fb) {
+ ret = dm_check_cursor_fb(new_acrtc, new_plane_state,
+ new_plane_state->fb);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static bool dm_should_update_native_cursor(struct drm_atomic_state *state,
+ struct drm_crtc *old_plane_crtc,
+ struct drm_crtc *new_plane_crtc,
+ bool enable)
+{
+ struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
+
+ if (!enable) {
+ if (old_plane_crtc == NULL)
+ return true;
+
+ old_crtc_state = drm_atomic_get_old_crtc_state(
+ state, old_plane_crtc);
+ dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
+
+ return dm_old_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE;
+ } else {
+ if (new_plane_crtc == NULL)
+ return true;
+
+ new_crtc_state = drm_atomic_get_new_crtc_state(
+ state, new_plane_crtc);
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ return dm_new_crtc_state->cursor_mode == DM_CURSOR_NATIVE_MODE;
+ }
+}
+
static int dm_update_plane_state(struct dc *dc,
struct drm_atomic_state *state,
struct drm_plane *plane,
@@ -9713,8 +11526,7 @@ static int dm_update_plane_state(struct dc *dc,
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state;
struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state;
- struct amdgpu_crtc *new_acrtc;
- bool needs_reset;
+ bool needs_reset, update_native_cursor;
int ret = 0;
@@ -9723,24 +11535,16 @@ static int dm_update_plane_state(struct dc *dc,
dm_new_plane_state = to_dm_plane_state(new_plane_state);
dm_old_plane_state = to_dm_plane_state(old_plane_state);
- if (plane->type == DRM_PLANE_TYPE_CURSOR) {
- if (!enable || !new_plane_crtc ||
- drm_atomic_plane_disabling(plane->state, new_plane_state))
- return 0;
-
- new_acrtc = to_amdgpu_crtc(new_plane_crtc);
-
- if (new_plane_state->src_x != 0 || new_plane_state->src_y != 0) {
- DRM_DEBUG_ATOMIC("Cropping not supported for cursor plane\n");
- return -EINVAL;
- }
+ update_native_cursor = dm_should_update_native_cursor(state,
+ old_plane_crtc,
+ new_plane_crtc,
+ enable);
- if (new_plane_state->fb) {
- ret = dm_check_cursor_fb(new_acrtc, new_plane_state,
- new_plane_state->fb);
- if (ret)
- return ret;
- }
+ if (plane->type == DRM_PLANE_TYPE_CURSOR && update_native_cursor) {
+ ret = dm_check_native_cursor_state(new_plane_crtc, plane,
+ new_plane_state, enable);
+ if (ret)
+ return ret;
return 0;
}
@@ -9770,7 +11574,7 @@ static int dm_update_plane_state(struct dc *dc,
if (ret)
return ret;
- if (!dc_remove_plane_from_context(
+ if (!dc_state_remove_plane(
dc,
dm_old_crtc_state->stream,
dm_old_plane_state->dc_state,
@@ -9806,20 +11610,14 @@ static int dm_update_plane_state(struct dc *dc,
ret = amdgpu_dm_plane_helper_check_state(new_plane_state, new_crtc_state);
if (ret)
- return ret;
+ goto out;
WARN_ON(dm_new_plane_state->dc_state);
dc_new_plane_state = dc_create_plane_state(dc);
- if (!dc_new_plane_state)
- return -ENOMEM;
-
- /* Block top most plane from being a video plane */
- if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
- if (is_video_format(new_plane_state->fb->format->format) && *is_top_most_overlay)
- return -EINVAL;
-
- *is_top_most_overlay = false;
+ if (!dc_new_plane_state) {
+ ret = -ENOMEM;
+ goto out;
}
DRM_DEBUG_ATOMIC("Enabling DRM plane: %d on DRM crtc %d\n",
@@ -9832,13 +11630,13 @@ static int dm_update_plane_state(struct dc *dc,
new_crtc_state);
if (ret) {
dc_plane_state_release(dc_new_plane_state);
- return ret;
+ goto out;
}
ret = dm_atomic_get_state(state, &dm_state);
if (ret) {
dc_plane_state_release(dc_new_plane_state);
- return ret;
+ goto out;
}
/*
@@ -9848,14 +11646,15 @@ static int dm_update_plane_state(struct dc *dc,
* state. It'll be released when the atomic state is
* cleaned.
*/
- if (!dc_add_plane_to_context(
+ if (!dc_state_add_plane(
dc,
dm_new_crtc_state->stream,
dc_new_plane_state,
dm_state->context)) {
dc_plane_state_release(dc_new_plane_state);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
dm_new_plane_state->dc_state = dc_new_plane_state;
@@ -9870,6 +11669,16 @@ static int dm_update_plane_state(struct dc *dc,
*lock_and_validation_needed = true;
}
+out:
+ /* If enabling cursor overlay failed, attempt fallback to native mode */
+ if (enable && ret == -EINVAL && plane->type == DRM_PLANE_TYPE_CURSOR) {
+ ret = dm_check_native_cursor_state(new_plane_crtc, plane,
+ new_plane_state, enable);
+ if (ret)
+ return ret;
+
+ dm_new_crtc_state->cursor_mode = DM_CURSOR_NATIVE_MODE;
+ }
return ret;
}
@@ -9892,64 +11701,75 @@ static void dm_get_oriented_plane_size(struct drm_plane_state *plane_state,
}
}
-static int dm_check_crtc_cursor(struct drm_atomic_state *state,
- struct drm_crtc *crtc,
- struct drm_crtc_state *new_crtc_state)
+static void
+dm_get_plane_scale(struct drm_plane_state *plane_state,
+ int *out_plane_scale_w, int *out_plane_scale_h)
{
- struct drm_plane *cursor = crtc->cursor, *underlying;
- struct drm_plane_state *new_cursor_state, *new_underlying_state;
- int i;
- int cursor_scale_w, cursor_scale_h, underlying_scale_w, underlying_scale_h;
- int cursor_src_w, cursor_src_h;
- int underlying_src_w, underlying_src_h;
-
- /* On DCE and DCN there is no dedicated hardware cursor plane. We get a
- * cursor per pipe but it's going to inherit the scaling and
- * positioning from the underlying pipe. Check the cursor plane's
- * blending properties match the underlying planes'.
- */
+ int plane_src_w, plane_src_h;
- new_cursor_state = drm_atomic_get_new_plane_state(state, cursor);
- if (!new_cursor_state || !new_cursor_state->fb)
- return 0;
+ dm_get_oriented_plane_size(plane_state, &plane_src_w, &plane_src_h);
+ *out_plane_scale_w = plane_src_w ? plane_state->crtc_w * 1000 / plane_src_w : 0;
+ *out_plane_scale_h = plane_src_h ? plane_state->crtc_h * 1000 / plane_src_h : 0;
+}
- dm_get_oriented_plane_size(new_cursor_state, &cursor_src_w, &cursor_src_h);
- cursor_scale_w = new_cursor_state->crtc_w * 1000 / cursor_src_w;
- cursor_scale_h = new_cursor_state->crtc_h * 1000 / cursor_src_h;
+/*
+ * The normalized_zpos value cannot be used by this iterator directly. It's only
+ * calculated for enabled planes, potentially causing normalized_zpos collisions
+ * between enabled/disabled planes in the atomic state. We need a unique value
+ * so that the iterator will not generate the same object twice, or loop
+ * indefinitely.
+ */
+static inline struct __drm_planes_state *__get_next_zpos(
+ struct drm_atomic_state *state,
+ struct __drm_planes_state *prev)
+{
+ unsigned int highest_zpos = 0, prev_zpos = 256;
+ uint32_t highest_id = 0, prev_id = UINT_MAX;
+ struct drm_plane_state *new_plane_state;
+ struct drm_plane *plane;
+ int i, highest_i = -1;
- for_each_new_plane_in_state_reverse(state, underlying, new_underlying_state, i) {
- /* Narrow down to non-cursor planes on the same CRTC as the cursor */
- if (new_underlying_state->crtc != crtc || underlying == crtc->cursor)
- continue;
+ if (prev != NULL) {
+ prev_zpos = prev->new_state->zpos;
+ prev_id = prev->ptr->base.id;
+ }
- /* Ignore disabled planes */
- if (!new_underlying_state->fb)
+ for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+ /* Skip planes with higher zpos than the previously returned */
+ if (new_plane_state->zpos > prev_zpos ||
+ (new_plane_state->zpos == prev_zpos &&
+ plane->base.id >= prev_id))
continue;
- dm_get_oriented_plane_size(new_underlying_state,
- &underlying_src_w, &underlying_src_h);
- underlying_scale_w = new_underlying_state->crtc_w * 1000 / underlying_src_w;
- underlying_scale_h = new_underlying_state->crtc_h * 1000 / underlying_src_h;
-
- if (cursor_scale_w != underlying_scale_w ||
- cursor_scale_h != underlying_scale_h) {
- drm_dbg_atomic(crtc->dev,
- "Cursor [PLANE:%d:%s] scaling doesn't match underlying [PLANE:%d:%s]\n",
- cursor->base.id, cursor->name, underlying->base.id, underlying->name);
- return -EINVAL;
+ /* Save the index of the plane with highest zpos */
+ if (new_plane_state->zpos > highest_zpos ||
+ (new_plane_state->zpos == highest_zpos &&
+ plane->base.id > highest_id)) {
+ highest_zpos = new_plane_state->zpos;
+ highest_id = plane->base.id;
+ highest_i = i;
}
-
- /* If this plane covers the whole CRTC, no need to check planes underneath */
- if (new_underlying_state->crtc_x <= 0 &&
- new_underlying_state->crtc_y <= 0 &&
- new_underlying_state->crtc_x + new_underlying_state->crtc_w >= new_crtc_state->mode.hdisplay &&
- new_underlying_state->crtc_y + new_underlying_state->crtc_h >= new_crtc_state->mode.vdisplay)
- break;
}
- return 0;
+ if (highest_i < 0)
+ return NULL;
+
+ return &state->planes[highest_i];
}
+/*
+ * Use the uniqueness of the plane's (zpos, drm obj ID) combination to iterate
+ * by descending zpos, as read from the new plane state. This is the same
+ * ordering as defined by drm_atomic_normalize_zpos().
+ */
+#define for_each_oldnew_plane_in_descending_zpos(__state, plane, old_plane_state, new_plane_state) \
+ for (struct __drm_planes_state *__i = __get_next_zpos((__state), NULL); \
+ __i != NULL; __i = __get_next_zpos((__state), __i)) \
+ for_each_if(((plane) = __i->ptr, \
+ (void)(plane) /* Only to avoid unused-but-set-variable warning */, \
+ (old_plane_state) = __i->old_state, \
+ (new_plane_state) = __i->new_state, 1))
+
static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc)
{
struct drm_connector *connector;
@@ -9964,6 +11784,9 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
if (conn_state->crtc != crtc)
continue;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (!aconnector->mst_output_port || !aconnector->mst_root)
aconnector = NULL;
@@ -9978,6 +11801,193 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
}
/**
+ * DOC: Cursor Modes - Native vs Overlay
+ *
+ * In native mode, the cursor uses a integrated cursor pipe within each DCN hw
+ * plane. It does not require a dedicated hw plane to enable, but it is
+ * subjected to the same z-order and scaling as the hw plane. It also has format
+ * restrictions, a RGB cursor in native mode cannot be enabled within a non-RGB
+ * hw plane.
+ *
+ * In overlay mode, the cursor uses a separate DCN hw plane, and thus has its
+ * own scaling and z-pos. It also has no blending restrictions. It lends to a
+ * cursor behavior more akin to a DRM client's expectations. However, it does
+ * occupy an extra DCN plane, and therefore will only be used if a DCN plane is
+ * available.
+ */
+
+/**
+ * dm_crtc_get_cursor_mode() - Determine the required cursor mode on crtc
+ * @adev: amdgpu device
+ * @state: DRM atomic state
+ * @dm_crtc_state: amdgpu state for the CRTC containing the cursor
+ * @cursor_mode: Returns the required cursor mode on dm_crtc_state
+ *
+ * Get whether the cursor should be enabled in native mode, or overlay mode, on
+ * the dm_crtc_state.
+ *
+ * The cursor should be enabled in overlay mode if there exists an underlying
+ * plane - on which the cursor may be blended - that is either YUV formatted, or
+ * scaled differently from the cursor.
+ *
+ * Since zpos info is required, drm_atomic_normalize_zpos must be called before
+ * calling this function.
+ *
+ * Return: 0 on success, or an error code if getting the cursor plane state
+ * failed.
+ */
+static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev,
+ struct drm_atomic_state *state,
+ struct dm_crtc_state *dm_crtc_state,
+ enum amdgpu_dm_cursor_mode *cursor_mode)
+{
+ struct drm_plane_state *old_plane_state, *plane_state, *cursor_state;
+ struct drm_crtc_state *crtc_state = &dm_crtc_state->base;
+ struct drm_plane *plane;
+ bool consider_mode_change = false;
+ bool entire_crtc_covered = false;
+ bool cursor_changed = false;
+ int underlying_scale_w, underlying_scale_h;
+ int cursor_scale_w, cursor_scale_h;
+ int i;
+
+ /* Overlay cursor not supported on HW before DCN
+ * DCN401 does not have the cursor-on-scaled-plane or cursor-on-yuv-plane restrictions
+ * as previous DCN generations, so enable native mode on DCN401 in addition to DCE
+ */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) == 0 ||
+ amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(4, 0, 1)) {
+ *cursor_mode = DM_CURSOR_NATIVE_MODE;
+ return 0;
+ }
+
+ /* Init cursor_mode to be the same as current */
+ *cursor_mode = dm_crtc_state->cursor_mode;
+
+ /*
+ * Cursor mode can change if a plane's format changes, scale changes, is
+ * enabled/disabled, or z-order changes.
+ */
+ for_each_oldnew_plane_in_state(state, plane, old_plane_state, plane_state, i) {
+ int new_scale_w, new_scale_h, old_scale_w, old_scale_h;
+
+ /* Only care about planes on this CRTC */
+ if ((drm_plane_mask(plane) & crtc_state->plane_mask) == 0)
+ continue;
+
+ if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ cursor_changed = true;
+
+ if (drm_atomic_plane_enabling(old_plane_state, plane_state) ||
+ drm_atomic_plane_disabling(old_plane_state, plane_state) ||
+ old_plane_state->fb->format != plane_state->fb->format) {
+ consider_mode_change = true;
+ break;
+ }
+
+ dm_get_plane_scale(plane_state, &new_scale_w, &new_scale_h);
+ dm_get_plane_scale(old_plane_state, &old_scale_w, &old_scale_h);
+ if (new_scale_w != old_scale_w || new_scale_h != old_scale_h) {
+ consider_mode_change = true;
+ break;
+ }
+ }
+
+ if (!consider_mode_change && !crtc_state->zpos_changed)
+ return 0;
+
+ /*
+ * If no cursor change on this CRTC, and not enabled on this CRTC, then
+ * no need to set cursor mode. This avoids needlessly locking the cursor
+ * state.
+ */
+ if (!cursor_changed &&
+ !(drm_plane_mask(crtc_state->crtc->cursor) & crtc_state->plane_mask)) {
+ return 0;
+ }
+
+ cursor_state = drm_atomic_get_plane_state(state,
+ crtc_state->crtc->cursor);
+ if (IS_ERR(cursor_state))
+ return PTR_ERR(cursor_state);
+
+ /* Cursor is disabled */
+ if (!cursor_state->fb)
+ return 0;
+
+ /* For all planes in descending z-order (all of which are below cursor
+ * as per zpos definitions), check their scaling and format
+ */
+ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, plane_state) {
+
+ /* Only care about non-cursor planes on this CRTC */
+ if ((drm_plane_mask(plane) & crtc_state->plane_mask) == 0 ||
+ plane->type == DRM_PLANE_TYPE_CURSOR)
+ continue;
+
+ /* Underlying plane is YUV format - use overlay cursor */
+ if (amdgpu_dm_plane_is_video_format(plane_state->fb->format->format)) {
+ *cursor_mode = DM_CURSOR_OVERLAY_MODE;
+ return 0;
+ }
+
+ dm_get_plane_scale(plane_state,
+ &underlying_scale_w, &underlying_scale_h);
+ dm_get_plane_scale(cursor_state,
+ &cursor_scale_w, &cursor_scale_h);
+
+ /* Underlying plane has different scale - use overlay cursor */
+ if (cursor_scale_w != underlying_scale_w &&
+ cursor_scale_h != underlying_scale_h) {
+ *cursor_mode = DM_CURSOR_OVERLAY_MODE;
+ return 0;
+ }
+
+ /* If this plane covers the whole CRTC, no need to check planes underneath */
+ if (plane_state->crtc_x <= 0 && plane_state->crtc_y <= 0 &&
+ plane_state->crtc_x + plane_state->crtc_w >= crtc_state->mode.hdisplay &&
+ plane_state->crtc_y + plane_state->crtc_h >= crtc_state->mode.vdisplay) {
+ entire_crtc_covered = true;
+ break;
+ }
+ }
+
+ /* If planes do not cover the entire CRTC, use overlay mode to enable
+ * cursor over holes
+ */
+ if (entire_crtc_covered)
+ *cursor_mode = DM_CURSOR_NATIVE_MODE;
+ else
+ *cursor_mode = DM_CURSOR_OVERLAY_MODE;
+
+ return 0;
+}
+
+static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev,
+ struct drm_atomic_state *state,
+ struct drm_crtc_state *crtc_state)
+{
+ struct drm_plane *plane;
+ struct drm_plane_state *new_plane_state, *old_plane_state;
+
+ drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) {
+ new_plane_state = drm_atomic_get_plane_state(state, plane);
+ old_plane_state = drm_atomic_get_plane_state(state, plane);
+
+ if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) {
+ drm_err(dev, "Failed to get plane state for plane %s\n", plane->name);
+ return false;
+ }
+
+ if (old_plane_state->fb && new_plane_state->fb &&
+ get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb))
+ return true;
+ }
+
+ return false;
+}
+
+/**
* amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
*
* @dev: The DRM device
@@ -10013,7 +12023,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct drm_crtc *crtc;
struct drm_crtc_state *old_crtc_state, *new_crtc_state;
struct drm_plane *plane;
- struct drm_plane_state *old_plane_state, *new_plane_state;
+ struct drm_plane_state *old_plane_state, *new_plane_state, *new_cursor_state;
enum dc_status status;
int ret, i;
bool lock_and_validation_needed = false;
@@ -10021,13 +12031,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state;
struct drm_dp_mst_topology_mgr *mgr;
struct drm_dp_mst_topology_state *mst_state;
- struct dsc_mst_fairness_vars vars[MAX_PIPES];
+ struct dsc_mst_fairness_vars vars[MAX_PIPES] = {0};
trace_amdgpu_dm_atomic_check_begin(state);
ret = drm_atomic_helper_check_modeset(dev, state);
if (ret) {
- DRM_DEBUG_DRIVER("drm_atomic_helper_check_modeset() failed\n");
+ drm_dbg_atomic(dev, "drm_atomic_helper_check_modeset() failed\n");
goto fail;
}
@@ -10042,7 +12052,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
new_crtc_state = drm_atomic_get_crtc_state(state, new_con_state->crtc);
if (IS_ERR(new_crtc_state)) {
- DRM_DEBUG_DRIVER("drm_atomic_get_crtc_state() failed\n");
+ drm_dbg_atomic(dev, "drm_atomic_get_crtc_state() failed\n");
ret = PTR_ERR(new_crtc_state);
goto fail;
}
@@ -10057,7 +12067,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
if (drm_atomic_crtc_needs_modeset(new_crtc_state)) {
ret = add_affected_mst_dsc_crtcs(state, crtc);
if (ret) {
- DRM_DEBUG_DRIVER("add_affected_mst_dsc_crtcs() failed\n");
+ drm_dbg_atomic(dev, "add_affected_mst_dsc_crtcs() failed\n");
goto fail;
}
}
@@ -10074,7 +12084,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
ret = amdgpu_dm_verify_lut_sizes(new_crtc_state);
if (ret) {
- DRM_DEBUG_DRIVER("amdgpu_dm_verify_lut_sizes() failed\n");
+ drm_dbg_atomic(dev, "amdgpu_dm_verify_lut_sizes() failed\n");
goto fail;
}
@@ -10083,13 +12093,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
ret = drm_atomic_add_affected_connectors(state, crtc);
if (ret) {
- DRM_DEBUG_DRIVER("drm_atomic_add_affected_connectors() failed\n");
+ drm_dbg_atomic(dev, "drm_atomic_add_affected_connectors() failed\n");
goto fail;
}
ret = drm_atomic_add_affected_planes(state, crtc);
if (ret) {
- DRM_DEBUG_DRIVER("drm_atomic_add_affected_planes() failed\n");
+ drm_dbg_atomic(dev, "drm_atomic_add_affected_planes() failed\n");
goto fail;
}
@@ -10128,7 +12138,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
if (IS_ERR(new_plane_state)) {
ret = PTR_ERR(new_plane_state);
- DRM_DEBUG_DRIVER("new_plane_state is BAD\n");
+ drm_dbg_atomic(dev, "new_plane_state is BAD\n");
goto fail;
}
}
@@ -10146,12 +12156,34 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
goto fail;
}
+ /*
+ * Determine whether cursors on each CRTC should be enabled in native or
+ * overlay mode.
+ */
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+
+ ret = dm_crtc_get_cursor_mode(adev, state, dm_new_crtc_state,
+ &dm_new_crtc_state->cursor_mode);
+ if (ret) {
+ drm_dbg(dev, "Failed to determine cursor mode\n");
+ goto fail;
+ }
+
+ /*
+ * If overlay cursor is needed, DC cannot go through the
+ * native cursor update path. All enabled planes on the CRTC
+ * need to be added for DC to not disable a plane by mistake
+ */
+ if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) {
+ ret = drm_atomic_add_affected_planes(state, crtc);
+ if (ret)
+ goto fail;
+ }
+ }
+
/* Remove exiting planes if they are modified */
- for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
- if (old_plane_state->fb && new_plane_state->fb &&
- get_mem_type(old_plane_state->fb) !=
- get_mem_type(new_plane_state->fb))
- lock_and_validation_needed = true;
+ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) {
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
@@ -10160,7 +12192,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
&lock_and_validation_needed,
&is_top_most_overlay);
if (ret) {
- DRM_DEBUG_DRIVER("dm_update_plane_state() failed\n");
+ drm_dbg_atomic(dev, "dm_update_plane_state() failed\n");
goto fail;
}
}
@@ -10173,7 +12205,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
false,
&lock_and_validation_needed);
if (ret) {
- DRM_DEBUG_DRIVER("DISABLE: dm_update_crtc_state() failed\n");
+ drm_dbg_atomic(dev, "DISABLE: dm_update_crtc_state() failed\n");
goto fail;
}
}
@@ -10186,13 +12218,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
true,
&lock_and_validation_needed);
if (ret) {
- DRM_DEBUG_DRIVER("ENABLE: dm_update_crtc_state() failed\n");
+ drm_dbg_atomic(dev, "ENABLE: dm_update_crtc_state() failed\n");
goto fail;
}
}
/* Add new/modified planes */
- for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+ for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) {
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
new_plane_state,
@@ -10200,35 +12232,75 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
&lock_and_validation_needed,
&is_top_most_overlay);
if (ret) {
- DRM_DEBUG_DRIVER("dm_update_plane_state() failed\n");
+ drm_dbg_atomic(dev, "dm_update_plane_state() failed\n");
goto fail;
}
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
if (dc_resource_is_dsc_encoding_supported(dc)) {
ret = pre_validate_dsc(state, &dm_state, vars);
if (ret != 0)
goto fail;
}
+#endif
/* Run this here since we want to validate the streams we created */
ret = drm_atomic_helper_check_planes(dev, state);
if (ret) {
- DRM_DEBUG_DRIVER("drm_atomic_helper_check_planes() failed\n");
+ drm_dbg_atomic(dev, "drm_atomic_helper_check_planes() failed\n");
goto fail;
}
for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
if (dm_new_crtc_state->mpo_requested)
- DRM_DEBUG_DRIVER("MPO enablement requested on crtc:[%p]\n", crtc);
+ drm_dbg_atomic(dev, "MPO enablement requested on crtc:[%p]\n", crtc);
}
- /* Check cursor planes scaling */
+ /* Check cursor restrictions */
for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
- ret = dm_check_crtc_cursor(state, crtc, new_crtc_state);
+ enum amdgpu_dm_cursor_mode required_cursor_mode;
+ int is_rotated, is_scaled;
+
+ /* Overlay cusor not subject to native cursor restrictions */
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE)
+ continue;
+
+ /* Check if rotation or scaling is enabled on DCN401 */
+ if ((drm_plane_mask(crtc->cursor) & new_crtc_state->plane_mask) &&
+ amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(4, 0, 1)) {
+ new_cursor_state = drm_atomic_get_new_plane_state(state, crtc->cursor);
+
+ is_rotated = new_cursor_state &&
+ ((new_cursor_state->rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_0);
+ is_scaled = new_cursor_state && ((new_cursor_state->src_w >> 16 != new_cursor_state->crtc_w) ||
+ (new_cursor_state->src_h >> 16 != new_cursor_state->crtc_h));
+
+ if (is_rotated || is_scaled) {
+ drm_dbg_driver(
+ crtc->dev,
+ "[CRTC:%d:%s] cannot enable hardware cursor due to rotation/scaling\n",
+ crtc->base.id, crtc->name);
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+
+ /* If HW can only do native cursor, check restrictions again */
+ ret = dm_crtc_get_cursor_mode(adev, state, dm_new_crtc_state,
+ &required_cursor_mode);
if (ret) {
- DRM_DEBUG_DRIVER("dm_check_crtc_cursor() failed\n");
+ drm_dbg_driver(crtc->dev,
+ "[CRTC:%d:%s] Checking cursor mode failed\n",
+ crtc->base.id, crtc->name);
+ goto fail;
+ } else if (required_cursor_mode == DM_CURSOR_OVERLAY_MODE) {
+ drm_dbg_driver(crtc->dev,
+ "[CRTC:%d:%s] Cannot enable native cursor due to scaling or YUV restrictions\n",
+ crtc->base.id, crtc->name);
+ ret = -EINVAL;
goto fail;
}
}
@@ -10311,26 +12383,30 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
if (lock_and_validation_needed) {
ret = dm_atomic_get_state(state, &dm_state);
if (ret) {
- DRM_DEBUG_DRIVER("dm_atomic_get_state() failed\n");
+ drm_dbg_atomic(dev, "dm_atomic_get_state() failed\n");
goto fail;
}
ret = do_aquire_global_lock(dev, state);
if (ret) {
- DRM_DEBUG_DRIVER("do_aquire_global_lock() failed\n");
+ drm_dbg_atomic(dev, "do_aquire_global_lock() failed\n");
goto fail;
}
- ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
- if (ret) {
- DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
- ret = -EINVAL;
- goto fail;
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ if (dc_resource_is_dsc_encoding_supported(dc)) {
+ ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+ if (ret) {
+ drm_dbg_atomic(dev, "MST_DSC compute_mst_dsc_configs_for_state() failed\n");
+ ret = -EINVAL;
+ goto fail;
+ }
}
+#endif
ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars);
if (ret) {
- DRM_DEBUG_DRIVER("dm_update_mst_vcpi_slots_for_dsc() failed\n");
+ drm_dbg_atomic(dev, "dm_update_mst_vcpi_slots_for_dsc() failed\n");
goto fail;
}
@@ -10342,12 +12418,12 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
*/
ret = drm_dp_mst_atomic_check(state);
if (ret) {
- DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n");
+ drm_dbg_atomic(dev, "MST drm_dp_mst_atomic_check() failed\n");
goto fail;
}
- status = dc_validate_global_state(dc, dm_state->context, true);
+ status = dc_validate_global_state(dc, dm_state->context, DC_VALIDATE_MODE_ONLY);
if (status != DC_OK) {
- DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)",
+ drm_dbg_atomic(dev, "DC global validation failure: %s (%d)",
dc_status_to_str(status), status);
ret = -EINVAL;
goto fail;
@@ -10402,9 +12478,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/*
* Only allow async flips for fast updates that don't change
- * the FB pitch, the DCC state, rotation, etc.
+ * the FB pitch, the DCC state, rotation, mem_type, etc.
*/
- if (new_crtc_state->async_flip && lock_and_validation_needed) {
+ if (new_crtc_state->async_flip &&
+ (lock_and_validation_needed ||
+ amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) {
drm_dbg_atomic(crtc->dev,
"[CRTC:%d:%s] async flips are only supported for fast updates\n",
crtc->base.id, crtc->name);
@@ -10425,36 +12503,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
fail:
if (ret == -EDEADLK)
- DRM_DEBUG_DRIVER("Atomic check stopped to avoid deadlock.\n");
+ drm_dbg_atomic(dev, "Atomic check stopped to avoid deadlock.\n");
else if (ret == -EINTR || ret == -EAGAIN || ret == -ERESTARTSYS)
- DRM_DEBUG_DRIVER("Atomic check stopped due to signal.\n");
+ drm_dbg_atomic(dev, "Atomic check stopped due to signal.\n");
else
- DRM_DEBUG_DRIVER("Atomic check failed with err: %d\n", ret);
+ drm_dbg_atomic(dev, "Atomic check failed with err: %d\n", ret);
trace_amdgpu_dm_atomic_check_finish(state, ret);
return ret;
}
-static bool is_dp_capable_without_timing_msa(struct dc *dc,
- struct amdgpu_dm_connector *amdgpu_dm_connector)
-{
- u8 dpcd_data;
- bool capable = false;
-
- if (amdgpu_dm_connector->dc_link &&
- dm_helpers_dp_read_dpcd(
- NULL,
- amdgpu_dm_connector->dc_link,
- DP_DOWN_STREAM_PORT_COUNT,
- &dpcd_data,
- sizeof(dpcd_data))) {
- capable = (dpcd_data & DP_MSA_TIMING_PAR_IGNORED) ? true:false;
- }
-
- return capable;
-}
-
static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
unsigned int offset,
unsigned int total_length,
@@ -10483,9 +12542,9 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
input->cea_total_length = total_length;
memcpy(input->payload, data, length);
- res = dm_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+ res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
if (!res) {
- DRM_ERROR("EDID CEA parser failed\n");
+ drm_err(adev_to_drm(dm->adev), "EDID CEA parser failed\n");
return false;
}
@@ -10493,7 +12552,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
if (output->type == DMUB_CMD__EDID_CEA_ACK) {
if (!output->ack.success) {
- DRM_ERROR("EDID CEA ack failed at offset %d\n",
+ drm_err(adev_to_drm(dm->adev), "EDID CEA ack failed at offset %d\n",
output->ack.offset);
}
} else if (output->type == DMUB_CMD__EDID_CEA_AMD_VSDB) {
@@ -10505,7 +12564,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm,
vsdb->min_refresh_rate_hz = output->amd_vsdb.min_frame_rate;
vsdb->max_refresh_rate_hz = output->amd_vsdb.max_frame_rate;
} else {
- DRM_WARN("Unknown EDID CEA parser results\n");
+ drm_warn(adev_to_drm(dm->adev), "Unknown EDID CEA parser results\n");
return false;
}
@@ -10585,8 +12644,51 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector,
return ret;
}
+static void parse_edid_displayid_vrr(struct drm_connector *connector,
+ const struct edid *edid)
+{
+ u8 *edid_ext = NULL;
+ int i;
+ int j = 0;
+ u16 min_vfreq;
+ u16 max_vfreq;
+
+ if (edid == NULL || edid->extensions == 0)
+ return;
+
+ /* Find DisplayID extension */
+ for (i = 0; i < edid->extensions; i++) {
+ edid_ext = (void *)(edid + (i + 1));
+ if (edid_ext[0] == DISPLAYID_EXT)
+ break;
+ }
+
+ if (edid_ext == NULL)
+ return;
+
+ while (j < EDID_LENGTH) {
+ /* Get dynamic video timing range from DisplayID if available */
+ if (EDID_LENGTH - j > 13 && edid_ext[j] == 0x25 &&
+ (edid_ext[j+1] & 0xFE) == 0 && (edid_ext[j+2] == 9)) {
+ min_vfreq = edid_ext[j+9];
+ if (edid_ext[j+1] & 7)
+ max_vfreq = edid_ext[j+10] + ((edid_ext[j+11] & 3) << 8);
+ else
+ max_vfreq = edid_ext[j+10];
+
+ if (max_vfreq && min_vfreq) {
+ connector->display_info.monitor_range.max_vfreq = max_vfreq;
+ connector->display_info.monitor_range.min_vfreq = min_vfreq;
+
+ return;
+ }
+ }
+ j++;
+ }
+}
+
static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
- struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
+ const struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
u8 *edid_ext = NULL;
int i;
@@ -10602,7 +12704,7 @@ static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
break;
}
- while (j < EDID_LENGTH) {
+ while (j < EDID_LENGTH - sizeof(struct amd_vsdb_block)) {
struct amd_vsdb_block *amd_vsdb = (struct amd_vsdb_block *)&edid_ext[j];
unsigned int ieeeId = (amd_vsdb->ieee_id[2] << 16) | (amd_vsdb->ieee_id[1] << 8) | (amd_vsdb->ieee_id[0]);
@@ -10621,7 +12723,8 @@ static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector,
}
static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
- struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info)
+ const struct edid *edid,
+ struct amdgpu_hdmi_vsdb_info *vsdb_info)
{
u8 *edid_ext = NULL;
int i;
@@ -10655,7 +12758,7 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
* amdgpu_dm_update_freesync_caps - Update Freesync capabilities
*
* @connector: Connector to query.
- * @edid: EDID from monitor
+ * @drm_edid: DRM EDID from monitor
*
* Amdgpu supports Freesync in DP and HDMI displays, and it is required to keep
* track of some of the display information in the internal data struct used by
@@ -10663,25 +12766,21 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector,
* FreeSync parameters.
*/
void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
- struct edid *edid)
+ const struct drm_edid *drm_edid)
{
int i = 0;
- struct detailed_timing *timing;
- struct detailed_non_pixel *data;
- struct detailed_data_monitor_range *range;
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
struct dm_connector_state *dm_con_state = NULL;
struct dc_sink *sink;
-
- struct drm_device *dev = connector->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct amdgpu_hdmi_vsdb_info vsdb_info = {0};
+ const struct edid *edid;
bool freesync_capable = false;
enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE;
if (!connector->state) {
- DRM_ERROR("%s - Connector has no state", __func__);
+ drm_err(adev_to_drm(adev), "%s - Connector has no state", __func__);
goto update;
}
@@ -10689,14 +12788,13 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
amdgpu_dm_connector->dc_sink :
amdgpu_dm_connector->dc_em_sink;
- if (!edid || !sink) {
+ drm_edid_connector_update(connector, drm_edid);
+
+ if (!drm_edid || !sink) {
dm_con_state = to_dm_connector_state(connector->state);
amdgpu_dm_connector->min_vfreq = 0;
amdgpu_dm_connector->max_vfreq = 0;
- amdgpu_dm_connector->pixel_clock_mhz = 0;
- connector->display_info.monitor_range.min_vfreq = 0;
- connector->display_info.monitor_range.max_vfreq = 0;
freesync_capable = false;
goto update;
@@ -10704,57 +12802,26 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
dm_con_state = to_dm_connector_state(connector->state);
- if (!adev->dm.freesync_module)
+ if (!adev->dm.freesync_module || !dc_supports_vrr(sink->ctx->dce_version))
goto update;
- if (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT
- || sink->sink_signal == SIGNAL_TYPE_EDP) {
- bool edid_check_required = false;
-
- if (edid) {
- edid_check_required = is_dp_capable_without_timing_msa(
- adev->dm.dc,
- amdgpu_dm_connector);
- }
+ edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
- if (edid_check_required == true && (edid->version > 1 ||
- (edid->version == 1 && edid->revision > 1))) {
- for (i = 0; i < 4; i++) {
-
- timing = &edid->detailed_timings[i];
- data = &timing->data.other_data;
- range = &data->data.range;
- /*
- * Check if monitor has continuous frequency mode
- */
- if (data->type != EDID_DETAIL_MONITOR_RANGE)
- continue;
- /*
- * Check for flag range limits only. If flag == 1 then
- * no additional timing information provided.
- * Default GTF, GTF Secondary curve and CVT are not
- * supported
- */
- if (range->flags != 1)
- continue;
-
- amdgpu_dm_connector->min_vfreq = range->min_vfreq;
- amdgpu_dm_connector->max_vfreq = range->max_vfreq;
- amdgpu_dm_connector->pixel_clock_mhz =
- range->pixel_clock_mhz * 10;
-
- connector->display_info.monitor_range.min_vfreq = range->min_vfreq;
- connector->display_info.monitor_range.max_vfreq = range->max_vfreq;
-
- break;
- }
-
- if (amdgpu_dm_connector->max_vfreq -
- amdgpu_dm_connector->min_vfreq > 10) {
+ /* Some eDP panels only have the refresh rate range info in DisplayID */
+ if ((connector->display_info.monitor_range.min_vfreq == 0 ||
+ connector->display_info.monitor_range.max_vfreq == 0))
+ parse_edid_displayid_vrr(connector, edid);
+ if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ sink->sink_signal == SIGNAL_TYPE_EDP)) {
+ if (amdgpu_dm_connector->dc_link &&
+ amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) {
+ amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq;
+ amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq;
+ if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
freesync_capable = true;
- }
}
+
parse_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
if (vsdb_info.replay_mode) {
@@ -10763,12 +12830,9 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
amdgpu_dm_connector->as_type = ADAPTIVE_SYNC_TYPE_EDP;
}
- } else if (edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) {
+ } else if (drm_edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) {
i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
if (i >= 0 && vsdb_info.freesync_supported) {
- timing = &edid->detailed_timings[i];
- data = &timing->data.other_data;
-
amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz;
amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz;
if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10)
@@ -10779,7 +12843,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
}
}
- as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link);
+ if (amdgpu_dm_connector->dc_link)
+ as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link);
if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) {
i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info);
@@ -10803,6 +12868,12 @@ update:
if (dm_con_state)
dm_con_state->freesync_capable = freesync_capable;
+ if (connector->state && amdgpu_dm_connector->dc_link && !freesync_capable &&
+ amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported) {
+ amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported = false;
+ amdgpu_dm_connector->dc_link->replay_settings.replay_feature_enabled = false;
+ }
+
if (connector->vrr_capable_property)
drm_connector_set_vrr_capable_property(connector,
freesync_capable);
@@ -10827,15 +12898,24 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev)
mutex_unlock(&adev->dm.dc_lock);
}
+static inline void amdgpu_dm_exit_ips_for_hw_access(struct dc *dc)
+{
+ if (dc->ctx->dmub_srv && !dc->ctx->dmub_srv->idle_exit_counter)
+ dc_exit_ips_for_hw_access(dc);
+}
+
void dm_write_reg_func(const struct dc_context *ctx, uint32_t address,
u32 value, const char *func_name)
{
#ifdef DM_CHECK_ADDR_0
if (address == 0) {
- DC_ERR("invalid register write. address = 0");
+ drm_err(adev_to_drm(ctx->driver_context),
+ "invalid register write. address = 0");
return;
}
#endif
+
+ amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
cgs_write_register(ctx->cgs_device, address, value);
trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value);
}
@@ -10846,7 +12926,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
u32 value;
#ifdef DM_CHECK_ADDR_0
if (address == 0) {
- DC_ERR("invalid register read; address = 0\n");
+ drm_err(adev_to_drm(ctx->driver_context),
+ "invalid register read; address = 0\n");
return 0;
}
#endif
@@ -10858,6 +12939,8 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
return 0;
}
+ amdgpu_dm_exit_ips_for_hw_access(ctx->dc);
+
value = cgs_read_register(ctx->cgs_device, address);
trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value);
@@ -10882,7 +12965,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(
}
if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
- DRM_ERROR("wait_for_completion_timeout timeout!");
+ drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!");
*operation_result = AUX_RET_ERROR_TIMEOUT;
goto out;
}
@@ -10892,31 +12975,24 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(
* Transient states before tunneling is enabled could
* lead to this error. We can ignore this for now.
*/
- if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
- DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
+ if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) {
+ drm_warn(adev_to_drm(adev), "DPIA AUX failed on 0x%x(%d), error %d\n",
payload->address, payload->length,
p_notify->result);
}
- *operation_result = AUX_RET_ERROR_INVALID_REPLY;
+ *operation_result = p_notify->result;
goto out;
}
+ payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF;
+ if (adev->dm.dmub_notify->aux_reply.command & 0xF0)
+ /* The reply is stored in the top nibble of the command. */
+ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF;
- payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
- if (!payload->write && p_notify->aux_reply.length &&
- (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
-
- if (payload->length != p_notify->aux_reply.length) {
- DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
- p_notify->aux_reply.length,
- payload->address, payload->length);
- *operation_result = AUX_RET_ERROR_INVALID_REPLY;
- goto out;
- }
-
+ /*write req may receive a byte indicating partially written number as well*/
+ if (p_notify->aux_reply.length)
memcpy(payload->data, p_notify->aux_reply.data,
p_notify->aux_reply.length);
- }
/* success */
ret = p_notify->aux_reply.length;
@@ -10927,6 +13003,79 @@ out:
return ret;
}
+static void abort_fused_io(
+ struct dc_context *ctx,
+ const struct dmub_cmd_fused_request *request
+)
+{
+ union dmub_rb_cmd command = { 0 };
+ struct dmub_rb_cmd_fused_io *io = &command.fused_io;
+
+ io->header.type = DMUB_CMD__FUSED_IO;
+ io->header.sub_type = DMUB_CMD__FUSED_IO_ABORT;
+ io->header.payload_bytes = sizeof(*io) - sizeof(io->header);
+ io->request = *request;
+ dm_execute_dmub_cmd(ctx, &command, DM_DMUB_WAIT_TYPE_NO_WAIT);
+}
+
+static bool execute_fused_io(
+ struct amdgpu_device *dev,
+ struct dc_context *ctx,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+)
+{
+ const uint8_t ddc_line = commands[0].fused_io.request.u.aux.ddc_line;
+
+ if (ddc_line >= ARRAY_SIZE(dev->dm.fused_io))
+ return false;
+
+ struct fused_io_sync *sync = &dev->dm.fused_io[ddc_line];
+ struct dmub_rb_cmd_fused_io *first = &commands[0].fused_io;
+ const bool result = dm_execute_dmub_cmd_list(ctx, count, commands, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+ && first->header.ret_status
+ && first->request.status == FUSED_REQUEST_STATUS_SUCCESS;
+
+ if (!result)
+ return false;
+
+ while (wait_for_completion_timeout(&sync->replied, usecs_to_jiffies(timeout_us))) {
+ reinit_completion(&sync->replied);
+
+ struct dmub_cmd_fused_request *reply = (struct dmub_cmd_fused_request *) sync->reply_data;
+
+ static_assert(sizeof(*reply) <= sizeof(sync->reply_data), "Size mismatch");
+
+ if (reply->identifier == first->request.identifier) {
+ first->request = *reply;
+ return true;
+ }
+ }
+
+ reinit_completion(&sync->replied);
+ first->request.status = FUSED_REQUEST_STATUS_TIMEOUT;
+ abort_fused_io(ctx, &first->request);
+ return false;
+}
+
+bool amdgpu_dm_execute_fused_io(
+ struct amdgpu_device *dev,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us)
+{
+ struct amdgpu_display_manager *dm = &dev->dm;
+
+ mutex_lock(&dm->dpia_aux_lock);
+
+ const bool result = execute_fused_io(dev, link->ctx, commands, count, timeout_us);
+
+ mutex_unlock(&dm->dpia_aux_lock);
+ return result;
+}
+
int amdgpu_dm_process_dmub_set_config_sync(
struct dc_context *ctx,
unsigned int link_index,
@@ -10945,7 +13094,7 @@ int amdgpu_dm_process_dmub_set_config_sync(
ret = 0;
*operation_result = adev->dm.dmub_notify->sc_status;
} else {
- DRM_ERROR("wait_for_completion_timeout timeout!");
+ drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!");
ret = -1;
*operation_result = SET_CONFIG_UNKNOWN_ERROR;
}
@@ -10956,27 +13105,6 @@ int amdgpu_dm_process_dmub_set_config_sync(
return ret;
}
-/*
- * Check whether seamless boot is supported.
- *
- * So far we only support seamless boot on CHIP_VANGOGH.
- * If everything goes well, we may consider expanding
- * seamless boot to other ASICs.
- */
-bool check_seamless_boot_capability(struct amdgpu_device *adev)
-{
- switch (adev->ip_versions[DCE_HWIP][0]) {
- case IP_VERSION(3, 0, 1):
- if (!adev->mman.keep_stolen_vga_memory)
- return true;
- break;
- default:
- break;
- }
-
- return false;
-}
-
bool dm_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
{
return dc_dmub_srv_cmd_run(ctx->dmub_srv, cmd, wait_type);
@@ -10986,3 +13114,10 @@ bool dm_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
{
return dc_dmub_srv_cmd_run_list(ctx->dmub_srv, count, cmd, wait_type);
}
+
+void dm_acpi_process_phy_transition_interlock(
+ const struct dc_context *ctx,
+ struct dm_process_phy_transition_init_params process_phy_transition_init_params)
+{
+ // Not yet implemented
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 9e4cc5eeda76..009f206226f0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright (C) 2015-2020 Advanced Micro Devices, Inc. All rights reserved.
*
@@ -32,6 +33,7 @@
#include <drm/drm_crtc.h>
#include <drm/drm_plane.h>
#include "link_service_types.h"
+#include <drm/drm_writeback.h>
/*
* This file contains the definition for amdgpu_display_manager
@@ -49,11 +51,14 @@
#define AMDGPU_DM_MAX_NUM_EDP 2
-#define AMDGPU_DMUB_NOTIFICATION_MAX 5
+#define AMDGPU_DMUB_NOTIFICATION_MAX 8
#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
#define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40
#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_VERSION_3 0x3
+
+#define AMDGPU_HDR_MULT_DEFAULT (0x100000000LL)
+
/*
#include "include/amdgpu_dal_power_if.h"
#include "amdgpu_dm_irq.h"
@@ -77,6 +82,7 @@ struct amdgpu_bo;
struct dmub_srv;
struct dc_plane_state;
struct dmub_notification;
+struct dmub_cmd_fused_request;
struct amd_vsdb_block {
unsigned char ieee_id[3];
@@ -134,6 +140,46 @@ struct vblank_control_work {
};
/**
+ * struct idle_workqueue - Work data for periodic action in idle
+ * @work: Kernel work data for the work event
+ * @dm: amdgpu display manager device
+ * @enable: true if idle worker is enabled
+ * @running: true if idle worker is running
+ */
+struct idle_workqueue {
+ struct work_struct work;
+ struct amdgpu_display_manager *dm;
+ bool enable;
+ bool running;
+};
+
+/**
+ * struct vupdate_offload_work - Work data for offloading task from vupdate handler
+ * @work: Kernel work data for the work event
+ * @adev: amdgpu_device back pointer
+ * @stream: DC stream associated with the crtc
+ * @adjust: DC CRTC timing adjust to be applied to the crtc
+ */
+struct vupdate_offload_work {
+ struct work_struct work;
+ struct amdgpu_device *adev;
+ struct dc_stream_state *stream;
+ struct dc_crtc_timing_adjust *adjust;
+};
+
+#define MAX_LUMINANCE_DATA_POINTS 99
+
+/**
+ * struct amdgpu_dm_luminance_data - Custom luminance data
+ * @luminance: Luminance in percent
+ * @input_signal: Input signal in range 0-255
+ */
+struct amdgpu_dm_luminance_data {
+ u8 luminance;
+ u8 input_signal;
+} __packed;
+
+/**
* struct amdgpu_dm_backlight_caps - Information about backlight
*
* Describe the backlight support for ACPI or eDP AUX.
@@ -169,6 +215,27 @@ struct amdgpu_dm_backlight_caps {
* @aux_support: Describes if the display supports AUX backlight.
*/
bool aux_support;
+ /**
+ * @brightness_mask: After deriving brightness, OR it with this mask.
+ * Workaround for panels with issues with certain brightness values.
+ */
+ u32 brightness_mask;
+ /**
+ * @ac_level: the default brightness if booted on AC
+ */
+ u8 ac_level;
+ /**
+ * @dc_level: the default brightness if booted on DC
+ */
+ u8 dc_level;
+ /**
+ * @data_points: the number of custom luminance data points
+ */
+ u8 data_points;
+ /**
+ * @luminance_data: custom luminance data
+ */
+ struct amdgpu_dm_luminance_data luminance_data[MAX_LUMINANCE_DATA_POINTS];
};
/**
@@ -230,6 +297,10 @@ struct hpd_rx_irq_offload_work {
* @offload_wq: offload work queue that this work is queued to
*/
struct hpd_rx_irq_offload_work_queue *offload_wq;
+ /**
+ * @adev: amdgpu_device pointer
+ */
+ struct amdgpu_device *adev;
};
/**
@@ -484,6 +555,13 @@ struct amdgpu_display_manager {
*/
struct workqueue_struct *vblank_control_workqueue;
+ /**
+ * @idle_workqueue:
+ *
+ * Periodic work for idle events.
+ */
+ struct idle_workqueue *idle_workqueue;
+
struct drm_atomic_state *cached_state;
struct dc_state *cached_dc_state;
@@ -508,12 +586,12 @@ struct amdgpu_display_manager {
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
/**
- * @secure_display_ctxs:
+ * @secure_display_ctx:
*
- * Store the ROI information and the work_struct to command dmub and psp for
- * all crtcs.
+ * Store secure display relevant info. e.g. the ROI information
+ * , the work_struct to command dmub, etc.
*/
- struct secure_display_context *secure_display_ctxs;
+ struct secure_display_context secure_display_ctx;
#endif
/**
* @hpd_rx_offload_wq:
@@ -553,6 +631,13 @@ struct amdgpu_display_manager {
u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
/**
+ * @restore_backlight:
+ *
+ * Flag to indicate whether to restore backlight after modeset.
+ */
+ bool restore_backlight;
+
+ /**
* @aux_hpd_discon_quirk:
*
* quirk for hpd discon while aux is on-going.
@@ -561,11 +646,43 @@ struct amdgpu_display_manager {
bool aux_hpd_discon_quirk;
/**
+ * @edp0_on_dp1_quirk:
+ *
+ * quirk for platforms that put edp0 on DP1.
+ */
+ bool edp0_on_dp1_quirk;
+
+ /**
* @dpia_aux_lock:
*
* Guards access to DPIA AUX
*/
struct mutex dpia_aux_lock;
+
+ /**
+ * @bb_from_dmub:
+ *
+ * Bounding box data read from dmub during early initialization for DCN4+
+ * Data is stored as a byte array that should be casted to the appropriate bb struct
+ */
+ void *bb_from_dmub;
+
+ /**
+ * @oem_i2c:
+ *
+ * OEM i2c bus
+ */
+ struct amdgpu_i2c_adapter *oem_i2c;
+
+ /**
+ * @fused_io:
+ *
+ * dmub fused io interface
+ */
+ struct fused_io_sync {
+ struct completion replied;
+ char reply_data[0x40]; // Cannot include dmub_cmd here
+ } fused_io[8];
};
enum dsc_clock_force_state {
@@ -631,9 +748,11 @@ struct amdgpu_dm_connector {
uint32_t connector_id;
int bl_idx;
+ struct cec_notifier *notifier;
+
/* we need to mind the EDID between detect
and get modes due to analog/digital/tvencoder */
- struct edid *edid;
+ const struct drm_edid *drm_edid;
/* shared with amdgpu */
struct amdgpu_hpd hpd;
@@ -657,8 +776,13 @@ struct amdgpu_dm_connector {
struct drm_dp_mst_port *mst_output_port;
struct amdgpu_dm_connector *mst_root;
struct drm_dp_aux *dsc_aux;
+ uint32_t mst_local_bw;
+ uint16_t vc_full_pbn;
struct mutex handle_mst_msg_ready;
+ /* branch device specific data */
+ uint32_t branch_ieee_oui;
+
/* TODO see if we can merge with ddc_bus or make a dm_connector */
struct amdgpu_i2c_adapter *i2c;
@@ -674,7 +798,6 @@ struct amdgpu_dm_connector {
* value is set to zero when there is no FreeSync support.
*/
int max_vfreq ;
- int pixel_clock_mhz;
/* Audio instance - protected by audio_lock. */
int audio_inst;
@@ -683,12 +806,14 @@ struct amdgpu_dm_connector {
bool fake_enable;
bool force_yuv420_output;
+ bool force_yuv422_output;
struct dsc_preferred_settings dsc_settings;
union dp_downstream_port_present mst_downstream_port_present;
/* Cached display modes */
struct drm_display_mode freesync_vid_base;
- int psr_skip_count;
+ int sr_skip_count;
+ bool disallow_edp_enter_psr;
/* Record progress status of mst*/
uint8_t mst_status;
@@ -714,11 +839,112 @@ static inline void amdgpu_dm_set_mst_status(uint8_t *status,
#define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
+struct amdgpu_dm_wb_connector {
+ struct drm_writeback_connector base;
+ struct dc_link *link;
+};
+
+#define to_amdgpu_dm_wb_connector(x) container_of(x, struct amdgpu_dm_wb_connector, base)
+
extern const struct amdgpu_ip_block_version dm_ip_block;
+/* enum amdgpu_transfer_function: pre-defined transfer function supported by AMD.
+ *
+ * It includes standardized transfer functions and pure power functions. The
+ * transfer function coefficients are available at modules/color/color_gamma.c
+ */
+enum amdgpu_transfer_function {
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT,
+ AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF,
+ AMDGPU_TRANSFER_FUNCTION_PQ_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_IDENTITY,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_BT709_OETF,
+ AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF,
+ AMDGPU_TRANSFER_FUNCTION_COUNT
+};
+
struct dm_plane_state {
struct drm_plane_state base;
struct dc_plane_state *dc_state;
+
+ /* Plane color mgmt */
+ /**
+ * @degamma_lut:
+ *
+ * 1D LUT for mapping framebuffer/plane pixel data before sampling or
+ * blending operations. It's usually applied to linearize input space.
+ * The blob (if not NULL) is an array of &struct drm_color_lut.
+ */
+ struct drm_property_blob *degamma_lut;
+ /**
+ * @degamma_tf:
+ *
+ * Predefined transfer function to tell DC driver the input space to
+ * linearize.
+ */
+ enum amdgpu_transfer_function degamma_tf;
+ /**
+ * @hdr_mult:
+ *
+ * Multiplier to 'gain' the plane. When PQ is decoded using the fixed
+ * func transfer function to the internal FP16 fb, 1.0 -> 80 nits (on
+ * AMD at least). When sRGB is decoded, 1.0 -> 1.0, obviously.
+ * Therefore, 1.0 multiplier = 80 nits for SDR content. So if you
+ * want, 203 nits for SDR content, pass in (203.0 / 80.0). Format is
+ * S31.32 sign-magnitude.
+ *
+ * HDR multiplier can wide range beyond [0.0, 1.0]. This means that PQ
+ * TF is needed for any subsequent linear-to-non-linear transforms.
+ */
+ __u64 hdr_mult;
+ /**
+ * @ctm:
+ *
+ * Color transformation matrix. The blob (if not NULL) is a &struct
+ * drm_color_ctm_3x4.
+ */
+ struct drm_property_blob *ctm;
+ /**
+ * @shaper_lut: shaper lookup table blob. The blob (if not NULL) is an
+ * array of &struct drm_color_lut.
+ */
+ struct drm_property_blob *shaper_lut;
+ /**
+ * @shaper_tf:
+ *
+ * Predefined transfer function to delinearize color space.
+ */
+ enum amdgpu_transfer_function shaper_tf;
+ /**
+ * @lut3d: 3D lookup table blob. The blob (if not NULL) is an array of
+ * &struct drm_color_lut.
+ */
+ struct drm_property_blob *lut3d;
+ /**
+ * @blend_lut: blend lut lookup table blob. The blob (if not NULL) is an
+ * array of &struct drm_color_lut.
+ */
+ struct drm_property_blob *blend_lut;
+ /**
+ * @blend_tf:
+ *
+ * Pre-defined transfer function for converting plane pixel data before
+ * applying blend LUT.
+ */
+ enum amdgpu_transfer_function blend_tf;
+};
+
+enum amdgpu_dm_cursor_mode {
+ DM_CURSOR_NATIVE_MODE = 0,
+ DM_CURSOR_OVERLAY_MODE,
};
struct dm_crtc_state {
@@ -743,6 +969,16 @@ struct dm_crtc_state {
struct dc_info_packet vrr_infopacket;
int abm_level;
+
+ /**
+ * @regamma_tf:
+ *
+ * Pre-defined transfer function for converting internal FB -> wire
+ * encoding.
+ */
+ enum amdgpu_transfer_function regamma_tf;
+
+ enum amdgpu_dm_cursor_mode cursor_mode;
};
#define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base)
@@ -794,24 +1030,34 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
int link_index);
enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode);
+ const struct drm_display_mode *mode);
void dm_restore_drm_connector_state(struct drm_device *dev,
struct drm_connector *connector);
void amdgpu_dm_update_freesync_caps(struct drm_connector *connector,
- struct edid *edid);
+ const struct drm_edid *drm_edid);
void amdgpu_dm_trigger_timing_sync(struct drm_device *dev);
+/* 3D LUT max size is 17x17x17 (4913 entries) */
+#define MAX_COLOR_3DLUT_SIZE 17
+#define MAX_COLOR_3DLUT_BITDEPTH 12
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+ struct drm_plane_state *plane_state);
+/* 1D LUT size */
#define MAX_COLOR_LUT_ENTRIES 4096
/* Legacy gamm LUT users such as X doesn't like large LUT sizes */
#define MAX_COLOR_LEGACY_LUT_ENTRIES 256
void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
+int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
+ bool check_only);
int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
struct dc_plane_state *dc_plane_state);
void amdgpu_dm_update_connector_after_detect(
@@ -822,13 +1068,19 @@ extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
struct aux_payload *payload, enum aux_return_code_type *operation_result);
+bool amdgpu_dm_execute_fused_io(
+ struct amdgpu_device *dev,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+);
+
int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
-bool check_seamless_boot_capability(struct amdgpu_device *adev);
-
struct dc_stream_state *
- create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
+ create_validate_stream_for_sink(struct drm_connector *connector,
const struct drm_display_mode *drm_mode,
const struct dm_connector_state *dm_state,
const struct dc_stream_state *old_stream);
@@ -836,9 +1088,27 @@ struct dc_stream_state *
int dm_atomic_get_state(struct drm_atomic_state *state,
struct dm_atomic_state **dm_state);
-struct amdgpu_dm_connector *
+struct drm_connector *
amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
struct drm_crtc *crtc);
int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth);
+struct idle_workqueue *idle_create_workqueue(struct amdgpu_device *adev);
+
+void *dm_allocate_gpu_mem(struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ size_t size,
+ long long *addr);
+void dm_free_gpu_mem(struct amdgpu_device *adev,
+ enum dc_gpu_mem_alloc_type type,
+ void *addr);
+
+bool amdgpu_dm_is_headless(struct amdgpu_device *adev);
+
+void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector);
+void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector);
+int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector);
+
+void retrieve_dmi_info(struct amdgpu_display_manager *dm);
+
#endif /* __AMDGPU_DM_H__ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index a4cb23d059bd..a4ac6d442278 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -27,7 +28,6 @@
#include "amdgpu_dm.h"
#include "dc.h"
#include "modules/color/color_gamma.h"
-#include "basics/conversion.h"
/**
* DOC: overview
@@ -72,6 +72,7 @@
*/
#define MAX_DRM_LUT_VALUE 0xFFFF
+#define SDR_WHITE_LEVEL_INIT_VALUE 80
/**
* amdgpu_dm_init_color_mod - Initialize the color module.
@@ -84,6 +85,247 @@ void amdgpu_dm_init_color_mod(void)
setup_x_points_distribution();
}
+static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x)
+{
+ struct fixed31_32 val;
+
+ /* If negative, convert to 2's complement. */
+ if (x & (1ULL << 63))
+ x = -(x & ~(1ULL << 63));
+
+ val.value = x;
+ return val;
+}
+
+#ifdef AMD_PRIVATE_COLOR
+/* Pre-defined Transfer Functions (TF)
+ *
+ * AMD driver supports pre-defined mathematical functions for transferring
+ * between encoded values and optical/linear space. Depending on HW color caps,
+ * ROMs and curves built by the AMD color module support these transforms.
+ *
+ * The driver-specific color implementation exposes properties for pre-blending
+ * degamma TF, shaper TF (before 3D LUT), and blend(dpp.ogam) TF and
+ * post-blending regamma (mpc.ogam) TF. However, only pre-blending degamma
+ * supports ROM curves. AMD color module uses pre-defined coefficients to build
+ * curves for the other blocks. What can be done by each color block is
+ * described by struct dpp_color_capsand struct mpc_color_caps.
+ *
+ * AMD driver-specific color API exposes the following pre-defined transfer
+ * functions:
+ *
+ * - Identity: linear/identity relationship between pixel value and
+ * luminance value;
+ * - Gamma 2.2, Gamma 2.4, Gamma 2.6: pure power functions;
+ * - sRGB: 2.4: The piece-wise transfer function from IEC 61966-2-1:1999;
+ * - BT.709: has a linear segment in the bottom part and then a power function
+ * with a 0.45 (~1/2.22) gamma for the rest of the range; standardized by
+ * ITU-R BT.709-6;
+ * - PQ (Perceptual Quantizer): used for HDR display, allows luminance range
+ * capability of 0 to 10,000 nits; standardized by SMPTE ST 2084.
+ *
+ * The AMD color model is designed with an assumption that SDR (sRGB, BT.709,
+ * Gamma 2.2, etc.) peak white maps (normalized to 1.0 FP) to 80 nits in the PQ
+ * system. This has the implication that PQ EOTF (non-linear to linear) maps to
+ * [0.0..125.0] where 125.0 = 10,000 nits / 80 nits.
+ *
+ * Non-linear and linear forms are described in the table below:
+ *
+ * ┌───────────┬─────────────────────┬──────────────────────┐
+ * │ │ Non-linear │ Linear │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ sRGB │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ BT709 │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ Gamma 2.x │ UNORM or [0.0, 1.0] │ [0.0, 1.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ PQ │ UNORM or FP16 CCCS* │ [0.0, 125.0] │
+ * ├───────────┼─────────────────────┼──────────────────────┤
+ * │ Identity │ UNORM or FP16 CCCS* │ [0.0, 1.0] or CCCS** │
+ * └───────────┴─────────────────────┴──────────────────────┘
+ * * CCCS: Windows canonical composition color space
+ * ** Respectively
+ *
+ * In the driver-specific API, color block names attached to TF properties
+ * suggest the intention regarding non-linear encoding pixel's luminance
+ * values. As some newer encodings don't use gamma curve, we make encoding and
+ * decoding explicit by defining an enum list of transfer functions supported
+ * in terms of EOTF and inverse EOTF, where:
+ *
+ * - EOTF (electro-optical transfer function): is the transfer function to go
+ * from the encoded value to an optical (linear) value. De-gamma functions
+ * traditionally do this.
+ * - Inverse EOTF (simply the inverse of the EOTF): is usually intended to go
+ * from an optical/linear space (which might have been used for blending)
+ * back to the encoded values. Gamma functions traditionally do this.
+ */
+static const char * const
+amdgpu_transfer_function_names[] = {
+ [AMDGPU_TRANSFER_FUNCTION_DEFAULT] = "Default",
+ [AMDGPU_TRANSFER_FUNCTION_IDENTITY] = "Identity",
+ [AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF] = "sRGB EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF] = "BT.709 inv_OETF",
+ [AMDGPU_TRANSFER_FUNCTION_PQ_EOTF] = "PQ EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF] = "Gamma 2.2 EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF] = "Gamma 2.4 EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF] = "Gamma 2.6 EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF] = "sRGB inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_BT709_OETF] = "BT.709 OETF",
+ [AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF] = "PQ inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF] = "Gamma 2.2 inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF] = "Gamma 2.4 inv_EOTF",
+ [AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF] = "Gamma 2.6 inv_EOTF",
+};
+
+static const u32 amdgpu_eotf =
+ BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_PQ_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF);
+
+static const u32 amdgpu_inv_eotf =
+ BIT(AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_BT709_OETF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF);
+
+static struct drm_property *
+amdgpu_create_tf_property(struct drm_device *dev,
+ const char *name,
+ u32 supported_tf)
+{
+ u32 transfer_functions = supported_tf |
+ BIT(AMDGPU_TRANSFER_FUNCTION_DEFAULT) |
+ BIT(AMDGPU_TRANSFER_FUNCTION_IDENTITY);
+ struct drm_prop_enum_list enum_list[AMDGPU_TRANSFER_FUNCTION_COUNT];
+ int i, len;
+
+ len = 0;
+ for (i = 0; i < AMDGPU_TRANSFER_FUNCTION_COUNT; i++) {
+ if ((transfer_functions & BIT(i)) == 0)
+ continue;
+
+ enum_list[len].type = i;
+ enum_list[len].name = amdgpu_transfer_function_names[i];
+ len++;
+ }
+
+ return drm_property_create_enum(dev, DRM_MODE_PROP_ENUM,
+ name, enum_list, len);
+}
+
+int
+amdgpu_dm_create_color_properties(struct amdgpu_device *adev)
+{
+ struct drm_property *prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_DEGAMMA_LUT", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_degamma_lut_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_DEGAMMA_LUT_SIZE",
+ 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_degamma_lut_size_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_PLANE_DEGAMMA_TF",
+ amdgpu_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_degamma_tf_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ 0, "AMD_PLANE_HDR_MULT", 0, U64_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_hdr_mult_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_CTM", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_ctm_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_SHAPER_LUT", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_shaper_lut_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_SHAPER_LUT_SIZE", 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_shaper_lut_size_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_PLANE_SHAPER_TF",
+ amdgpu_inv_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_shaper_tf_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_LUT3D", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_lut3d_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_LUT3D_SIZE", 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_lut3d_size_property = prop;
+
+ prop = drm_property_create(adev_to_drm(adev),
+ DRM_MODE_PROP_BLOB,
+ "AMD_PLANE_BLEND_LUT", 0);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_blend_lut_property = prop;
+
+ prop = drm_property_create_range(adev_to_drm(adev),
+ DRM_MODE_PROP_IMMUTABLE,
+ "AMD_PLANE_BLEND_LUT_SIZE", 0, UINT_MAX);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_blend_lut_size_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_PLANE_BLEND_TF",
+ amdgpu_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.plane_blend_tf_property = prop;
+
+ prop = amdgpu_create_tf_property(adev_to_drm(adev),
+ "AMD_CRTC_REGAMMA_TF",
+ amdgpu_inv_eotf);
+ if (!prop)
+ return -ENOMEM;
+ adev->mode_info.regamma_tf_property = prop;
+
+ return 0;
+}
+#endif
+
/**
* __extract_blob_lut - Extracts the DRM lut and lut size from a blob.
* @blob: DRM color mgmt property blob
@@ -182,7 +424,6 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut,
static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
struct fixed31_32 *matrix)
{
- int64_t val;
int i;
/*
@@ -201,12 +442,29 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm,
}
/* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
- val = ctm->matrix[i - (i / 4)];
- /* If negative, convert to 2's complement. */
- if (val & (1ULL << 63))
- val = -(val & ~(1ULL << 63));
+ matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i - (i / 4)]);
+ }
+}
+
+/**
+ * __drm_ctm_3x4_to_dc_matrix - converts a DRM CTM 3x4 to a DC CSC float matrix
+ * @ctm: DRM color transformation matrix with 3x4 dimensions
+ * @matrix: DC CSC float matrix
+ *
+ * The matrix needs to be a 3x4 (12 entry) matrix.
+ */
+static void __drm_ctm_3x4_to_dc_matrix(const struct drm_color_ctm_3x4 *ctm,
+ struct fixed31_32 *matrix)
+{
+ int i;
- matrix[i].value = val;
+ /* The format provided is S31.32, using signed-magnitude representation.
+ * Our fixed31_32 is also S31.32, but is using 2's complement. We have
+ * to convert from signed-magnitude to 2's complement.
+ */
+ for (i = 0; i < 12; i++) {
+ /* gamut_remap_matrix[i] = ctm[i - floor(i/4)] */
+ matrix[i] = amdgpu_dm_fixpt_from_s3132(ctm->matrix[i]);
}
}
@@ -268,16 +526,18 @@ static int __set_output_tf(struct dc_transfer_func *func,
struct calculate_buffer cal_buffer = {0};
bool res;
- ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
-
cal_buffer.buffer_index = -1;
- gamma = dc_create_gamma();
- if (!gamma)
- return -ENOMEM;
+ if (lut_size) {
+ ASSERT(lut && lut_size == MAX_COLOR_LUT_ENTRIES);
- gamma->num_entries = lut_size;
- __drm_lut_to_dc_gamma(lut, gamma, false);
+ gamma = dc_create_gamma();
+ if (!gamma)
+ return -ENOMEM;
+
+ gamma->num_entries = lut_size;
+ __drm_lut_to_dc_gamma(lut, gamma, false);
+ }
if (func->tf == TRANSFER_FUNCTION_LINEAR) {
/*
@@ -285,27 +545,67 @@ static int __set_output_tf(struct dc_transfer_func *func,
* on top of a linear input. But degamma params can be used
* instead to simulate this.
*/
- gamma->type = GAMMA_CUSTOM;
+ if (gamma)
+ gamma->type = GAMMA_CUSTOM;
res = mod_color_calculate_degamma_params(NULL, func,
- gamma, true);
+ gamma, gamma != NULL);
} else {
/*
* Assume sRGB. The actual mapping will depend on whether the
* input was legacy or not.
*/
- gamma->type = GAMMA_CS_TFM_1D;
- res = mod_color_calculate_regamma_params(func, gamma, false,
+ if (gamma)
+ gamma->type = GAMMA_CS_TFM_1D;
+ res = mod_color_calculate_regamma_params(func, gamma, gamma != NULL,
has_rom, NULL, &cal_buffer);
}
- dc_gamma_release(&gamma);
+ if (gamma)
+ dc_gamma_release(&gamma);
return res ? 0 : -ENOMEM;
}
+static int amdgpu_dm_set_atomic_regamma(struct dc_transfer_func *out_tf,
+ const struct drm_color_lut *regamma_lut,
+ uint32_t regamma_size, bool has_rom,
+ enum dc_transfer_func_predefined tf)
+{
+ int ret = 0;
+
+ if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
+ /*
+ * CRTC RGM goes into RGM LUT.
+ *
+ * Note: there is no implicit sRGB regamma here. We are using
+ * degamma calculation from color module to calculate the curve
+ * from a linear base if gamma TF is not set. However, if gamma
+ * TF (!= Linear) and LUT are set at the same time, we will use
+ * regamma calculation, and the color module will combine the
+ * pre-defined TF and the custom LUT values into the LUT that's
+ * actually programmed.
+ */
+ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ out_tf->tf = tf;
+ out_tf->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+ ret = __set_output_tf(out_tf, regamma_lut, regamma_size, has_rom);
+ } else {
+ /*
+ * No CRTC RGM means we can just put the block into bypass
+ * since we don't have any plane level adjustments using it.
+ */
+ out_tf->type = TF_TYPE_BYPASS;
+ out_tf->tf = TRANSFER_FUNCTION_LINEAR;
+ }
+
+ return ret;
+}
+
/**
* __set_input_tf - calculates the input transfer function based on expected
* input space.
+ * @caps: dc color capabilities
* @func: transfer function
* @lut: lookup table that defines the color space
* @lut_size: size of respective lut.
@@ -313,27 +613,240 @@ static int __set_output_tf(struct dc_transfer_func *func,
* Returns:
* 0 in case of success. -ENOMEM if fails.
*/
-static int __set_input_tf(struct dc_transfer_func *func,
+static int __set_input_tf(struct dc_color_caps *caps, struct dc_transfer_func *func,
const struct drm_color_lut *lut, uint32_t lut_size)
{
struct dc_gamma *gamma = NULL;
bool res;
- gamma = dc_create_gamma();
- if (!gamma)
- return -ENOMEM;
+ if (lut_size) {
+ gamma = dc_create_gamma();
+ if (!gamma)
+ return -ENOMEM;
- gamma->type = GAMMA_CUSTOM;
- gamma->num_entries = lut_size;
+ gamma->type = GAMMA_CUSTOM;
+ gamma->num_entries = lut_size;
- __drm_lut_to_dc_gamma(lut, gamma, false);
+ __drm_lut_to_dc_gamma(lut, gamma, false);
+ }
- res = mod_color_calculate_degamma_params(NULL, func, gamma, true);
- dc_gamma_release(&gamma);
+ res = mod_color_calculate_degamma_params(caps, func, gamma, gamma != NULL);
+
+ if (gamma)
+ dc_gamma_release(&gamma);
return res ? 0 : -ENOMEM;
}
+static enum dc_transfer_func_predefined
+amdgpu_tf_to_dc_tf(enum amdgpu_transfer_function tf)
+{
+ switch (tf) {
+ default:
+ case AMDGPU_TRANSFER_FUNCTION_DEFAULT:
+ case AMDGPU_TRANSFER_FUNCTION_IDENTITY:
+ return TRANSFER_FUNCTION_LINEAR;
+ case AMDGPU_TRANSFER_FUNCTION_SRGB_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_SRGB_INV_EOTF:
+ return TRANSFER_FUNCTION_SRGB;
+ case AMDGPU_TRANSFER_FUNCTION_BT709_OETF:
+ case AMDGPU_TRANSFER_FUNCTION_BT709_INV_OETF:
+ return TRANSFER_FUNCTION_BT709;
+ case AMDGPU_TRANSFER_FUNCTION_PQ_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_PQ_INV_EOTF:
+ return TRANSFER_FUNCTION_PQ;
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA22_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA22_INV_EOTF:
+ return TRANSFER_FUNCTION_GAMMA22;
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA24_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA24_INV_EOTF:
+ return TRANSFER_FUNCTION_GAMMA24;
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA26_EOTF:
+ case AMDGPU_TRANSFER_FUNCTION_GAMMA26_INV_EOTF:
+ return TRANSFER_FUNCTION_GAMMA26;
+ }
+}
+
+static void __to_dc_lut3d_color(struct dc_rgb *rgb,
+ const struct drm_color_lut lut,
+ int bit_precision)
+{
+ rgb->red = drm_color_lut_extract(lut.red, bit_precision);
+ rgb->green = drm_color_lut_extract(lut.green, bit_precision);
+ rgb->blue = drm_color_lut_extract(lut.blue, bit_precision);
+}
+
+static void __drm_3dlut_to_dc_3dlut(const struct drm_color_lut *lut,
+ uint32_t lut3d_size,
+ struct tetrahedral_params *params,
+ bool use_tetrahedral_9,
+ int bit_depth)
+{
+ struct dc_rgb *lut0;
+ struct dc_rgb *lut1;
+ struct dc_rgb *lut2;
+ struct dc_rgb *lut3;
+ int lut_i, i;
+
+
+ if (use_tetrahedral_9) {
+ lut0 = params->tetrahedral_9.lut0;
+ lut1 = params->tetrahedral_9.lut1;
+ lut2 = params->tetrahedral_9.lut2;
+ lut3 = params->tetrahedral_9.lut3;
+ } else {
+ lut0 = params->tetrahedral_17.lut0;
+ lut1 = params->tetrahedral_17.lut1;
+ lut2 = params->tetrahedral_17.lut2;
+ lut3 = params->tetrahedral_17.lut3;
+ }
+
+ for (lut_i = 0, i = 0; i < lut3d_size - 4; lut_i++, i += 4) {
+ /*
+ * We should consider the 3D LUT RGB values are distributed
+ * along four arrays lut0-3 where the first sizes 1229 and the
+ * other 1228. The bit depth supported for 3dlut channel is
+ * 12-bit, but DC also supports 10-bit.
+ *
+ * TODO: improve color pipeline API to enable the userspace set
+ * bit depth and 3D LUT size/stride, as specified by VA-API.
+ */
+ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+ __to_dc_lut3d_color(&lut1[lut_i], lut[i + 1], bit_depth);
+ __to_dc_lut3d_color(&lut2[lut_i], lut[i + 2], bit_depth);
+ __to_dc_lut3d_color(&lut3[lut_i], lut[i + 3], bit_depth);
+ }
+ /* lut0 has 1229 points (lut_size/4 + 1) */
+ __to_dc_lut3d_color(&lut0[lut_i], lut[i], bit_depth);
+}
+
+/* amdgpu_dm_atomic_lut3d - set DRM 3D LUT to DC stream
+ * @drm_lut3d: user 3D LUT
+ * @drm_lut3d_size: size of 3D LUT
+ * @lut3d: DC 3D LUT
+ *
+ * Map user 3D LUT data to DC 3D LUT and all necessary bits to program it
+ * on DCN accordingly.
+ */
+static void amdgpu_dm_atomic_lut3d(const struct drm_color_lut *drm_lut3d,
+ uint32_t drm_lut3d_size,
+ struct dc_3dlut *lut)
+{
+ if (!drm_lut3d_size) {
+ lut->state.bits.initialized = 0;
+ } else {
+ /* Stride and bit depth are not programmable by API yet.
+ * Therefore, only supports 17x17x17 3D LUT (12-bit).
+ */
+ lut->lut_3d.use_tetrahedral_9 = false;
+ lut->lut_3d.use_12bits = true;
+ lut->state.bits.initialized = 1;
+ __drm_3dlut_to_dc_3dlut(drm_lut3d, drm_lut3d_size, &lut->lut_3d,
+ lut->lut_3d.use_tetrahedral_9,
+ MAX_COLOR_3DLUT_BITDEPTH);
+ }
+}
+
+static int amdgpu_dm_atomic_shaper_lut(const struct drm_color_lut *shaper_lut,
+ bool has_rom,
+ enum dc_transfer_func_predefined tf,
+ uint32_t shaper_size,
+ struct dc_transfer_func *func_shaper)
+{
+ int ret = 0;
+
+ if (shaper_size || tf != TRANSFER_FUNCTION_LINEAR) {
+ /*
+ * If user shaper LUT is set, we assume a linear color space
+ * (linearized by degamma 1D LUT or not).
+ */
+ func_shaper->type = TF_TYPE_DISTRIBUTED_POINTS;
+ func_shaper->tf = tf;
+ func_shaper->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+ ret = __set_output_tf(func_shaper, shaper_lut, shaper_size, has_rom);
+ } else {
+ func_shaper->type = TF_TYPE_BYPASS;
+ func_shaper->tf = TRANSFER_FUNCTION_LINEAR;
+ }
+
+ return ret;
+}
+
+static int amdgpu_dm_atomic_blend_lut(const struct drm_color_lut *blend_lut,
+ bool has_rom,
+ enum dc_transfer_func_predefined tf,
+ uint32_t blend_size,
+ struct dc_transfer_func *func_blend)
+{
+ int ret = 0;
+
+ if (blend_size || tf != TRANSFER_FUNCTION_LINEAR) {
+ /*
+ * DRM plane gamma LUT or TF means we are linearizing color
+ * space before blending (similar to degamma programming). As
+ * we don't have hardcoded curve support, or we use AMD color
+ * module to fill the parameters that will be translated to HW
+ * points.
+ */
+ func_blend->type = TF_TYPE_DISTRIBUTED_POINTS;
+ func_blend->tf = tf;
+ func_blend->sdr_ref_white_level = SDR_WHITE_LEVEL_INIT_VALUE;
+
+ ret = __set_input_tf(NULL, func_blend, blend_lut, blend_size);
+ } else {
+ func_blend->type = TF_TYPE_BYPASS;
+ func_blend->tf = TRANSFER_FUNCTION_LINEAR;
+ }
+
+ return ret;
+}
+
+/**
+ * amdgpu_dm_verify_lut3d_size - verifies if 3D LUT is supported and if user
+ * shaper and 3D LUTs match the hw supported size
+ * @adev: amdgpu device
+ * @plane_state: the DRM plane state
+ *
+ * Verifies if pre-blending (DPP) 3D LUT is supported by the HW (DCN 2.0 or
+ * newer) and if the user shaper and 3D LUTs match the supported size.
+ *
+ * Returns:
+ * 0 on success. -EINVAL if lut size are invalid.
+ */
+int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
+ struct drm_plane_state *plane_state)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ const struct drm_color_lut *shaper = NULL, *lut3d = NULL;
+ uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE;
+ bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend;
+
+ /* shaper LUT is only available if 3D LUT color caps */
+ exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0;
+ shaper = __extract_blob_lut(dm_plane_state->shaper_lut, &size);
+
+ if (shaper && size != exp_size) {
+ drm_dbg(&adev->ddev,
+ "Invalid Shaper LUT size. Should be %u but got %u.\n",
+ exp_size, size);
+ return -EINVAL;
+ }
+
+ /* The number of 3D LUT entries is the dimension size cubed */
+ exp_size = has_3dlut ? dim_size * dim_size * dim_size : 0;
+ lut3d = __extract_blob_lut(dm_plane_state->lut3d, &size);
+
+ if (lut3d && size != exp_size) {
+ drm_dbg(&adev->ddev,
+ "Invalid 3D LUT size. Should be %u but got %u.\n",
+ exp_size, size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes
* @crtc_state: the DRM CRTC state
@@ -371,39 +884,42 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state)
}
/**
- * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
+ * amdgpu_dm_check_crtc_color_mgmt: Check if DRM color props are programmable by DC.
* @crtc: amdgpu_dm crtc state
+ * @check_only: only check color state without update dc stream
*
- * With no plane level color management properties we're free to use any
- * of the HW blocks as long as the CRTC CTM always comes before the
- * CRTC RGM and after the CRTC DGM.
- *
- * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
- * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
- * - The CRTC CTM will be placed in the gamut remap block if it is non-linear.
+ * This function just verifies CRTC LUT sizes, if there is enough space for
+ * output transfer function and if its parameters can be calculated by AMD
+ * color module. It also adjusts some settings for programming CRTC degamma at
+ * plane stage, using plane DGM block.
*
* The RGM block is typically more fully featured and accurate across
* all ASICs - DCE can't support a custom non-linear CRTC DGM.
*
* For supporting both plane level color management and CRTC level color
- * management at once we have to either restrict the usage of CRTC properties
- * or blend adjustments together.
+ * management at once we have to either restrict the usage of some CRTC
+ * properties or blend adjustments together.
*
* Returns:
- * 0 on success. Error code if setup fails.
+ * 0 on success. Error code if validation fails.
*/
-int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
+
+int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
+ bool check_only)
{
struct dc_stream_state *stream = crtc->stream;
struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
bool has_rom = adev->asic_type <= CHIP_RAVEN;
- struct drm_color_ctm *ctm = NULL;
+ struct dc_transfer_func *out_tf;
const struct drm_color_lut *degamma_lut, *regamma_lut;
uint32_t degamma_size, regamma_size;
bool has_regamma, has_degamma;
+ enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_LINEAR;
bool is_legacy;
int r;
+ tf = amdgpu_tf_to_dc_tf(crtc->regamma_tf);
+
r = amdgpu_dm_verify_lut_sizes(&crtc->base);
if (r)
return r;
@@ -423,6 +939,14 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
crtc->cm_has_degamma = false;
crtc->cm_is_degamma_srgb = false;
+ if (check_only) {
+ out_tf = kvzalloc(sizeof(*out_tf), GFP_KERNEL);
+ if (!out_tf)
+ return -ENOMEM;
+ } else {
+ out_tf = &stream->out_transfer_func;
+ }
+
/* Setup regamma and degamma. */
if (is_legacy) {
/*
@@ -437,29 +961,21 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
* inverse color ramp in legacy userspace.
*/
crtc->cm_is_degamma_srgb = true;
- stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
-
- r = __set_legacy_tf(stream->out_transfer_func, regamma_lut,
- regamma_size, has_rom);
- if (r)
- return r;
- } else if (has_regamma) {
- /* If atomic regamma, CRTC RGM goes into RGM LUT. */
- stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
-
- r = __set_output_tf(stream->out_transfer_func, regamma_lut,
- regamma_size, has_rom);
- if (r)
- return r;
- } else {
+ out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+ out_tf->tf = TRANSFER_FUNCTION_SRGB;
/*
- * No CRTC RGM means we can just put the block into bypass
- * since we don't have any plane level adjustments using it.
+ * Note: although we pass has_rom as parameter here, we never
+ * actually use ROM because the color module only takes the ROM
+ * path if transfer_func->type == PREDEFINED.
+ *
+ * See more in mod_color_calculate_regamma_params()
*/
- stream->out_transfer_func->type = TF_TYPE_BYPASS;
- stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
+ r = __set_legacy_tf(out_tf, regamma_lut,
+ regamma_size, has_rom);
+ } else {
+ regamma_size = has_regamma ? regamma_size : 0;
+ r = amdgpu_dm_set_atomic_regamma(out_tf, regamma_lut,
+ regamma_size, has_rom, tf);
}
/*
@@ -468,6 +984,43 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
* have to place the CTM in the OCSC in that case.
*/
crtc->cm_has_degamma = has_degamma;
+ if (check_only)
+ kvfree(out_tf);
+
+ return r;
+}
+
+/**
+ * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
+ * @crtc: amdgpu_dm crtc state
+ *
+ * With no plane level color management properties we're free to use any
+ * of the HW blocks as long as the CRTC CTM always comes before the
+ * CRTC RGM and after the CRTC DGM.
+ *
+ * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
+ * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
+ * - The CRTC CTM will be placed in the gamut remap block if it is non-linear.
+ *
+ * The RGM block is typically more fully featured and accurate across
+ * all ASICs - DCE can't support a custom non-linear CRTC DGM.
+ *
+ * For supporting both plane level color management and CRTC level color
+ * management at once we have to either restrict the usage of CRTC properties
+ * or blend adjustments together.
+ *
+ * Returns:
+ * 0 on success. Error code if setup fails.
+ */
+int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
+{
+ struct dc_stream_state *stream = crtc->stream;
+ struct drm_color_ctm *ctm = NULL;
+ int ret;
+
+ ret = amdgpu_dm_check_crtc_color_mgmt(crtc, false);
+ if (ret)
+ return ret;
/* Setup CRTC CTM. */
if (crtc->base.ctm) {
@@ -495,20 +1048,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
return 0;
}
-/**
- * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
- * @crtc: amdgpu_dm crtc state
- * @dc_plane_state: target DC surface
- *
- * Update the underlying dc_stream_state's input transfer function (ITF) in
- * preparation for hardware commit. The transfer function used depends on
- * the preparation done on the stream for color management.
- *
- * Returns:
- * 0 on success. -ENOMEM if mem allocation fails.
- */
-int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
- struct dc_plane_state *dc_plane_state)
+static int
+map_crtc_degamma_to_dc_plane(struct dm_crtc_state *crtc,
+ struct dc_plane_state *dc_plane_state,
+ struct dc_color_caps *caps)
{
const struct drm_color_lut *degamma_lut;
enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
@@ -531,8 +1074,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
&degamma_size);
ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
- dc_plane_state->in_transfer_func->type =
- TF_TYPE_DISTRIBUTED_POINTS;
+ dc_plane_state->in_transfer_func.type = TF_TYPE_DISTRIBUTED_POINTS;
/*
* This case isn't fully correct, but also fairly
@@ -559,32 +1101,227 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
* map these to the atomic one instead.
*/
if (crtc->cm_is_degamma_srgb)
- dc_plane_state->in_transfer_func->tf = tf;
+ dc_plane_state->in_transfer_func.tf = tf;
else
- dc_plane_state->in_transfer_func->tf =
+ dc_plane_state->in_transfer_func.tf =
TRANSFER_FUNCTION_LINEAR;
- r = __set_input_tf(dc_plane_state->in_transfer_func,
+ r = __set_input_tf(caps, &dc_plane_state->in_transfer_func,
degamma_lut, degamma_size);
if (r)
return r;
- } else if (crtc->cm_is_degamma_srgb) {
+ } else {
/*
* For legacy gamma support we need the regamma input
* in linear space. Assume that the input is sRGB.
*/
- dc_plane_state->in_transfer_func->type = TF_TYPE_PREDEFINED;
- dc_plane_state->in_transfer_func->tf = tf;
+ dc_plane_state->in_transfer_func.type = TF_TYPE_PREDEFINED;
+ dc_plane_state->in_transfer_func.tf = tf;
if (tf != TRANSFER_FUNCTION_SRGB &&
- !mod_color_calculate_degamma_params(NULL,
- dc_plane_state->in_transfer_func, NULL, false))
+ !mod_color_calculate_degamma_params(caps,
+ &dc_plane_state->in_transfer_func,
+ NULL, false))
return -ENOMEM;
- } else {
- /* ...Otherwise we can just bypass the DGM block. */
- dc_plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
- dc_plane_state->in_transfer_func->tf = TRANSFER_FUNCTION_LINEAR;
}
return 0;
}
+
+static int
+__set_dm_plane_degamma(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state,
+ struct dc_color_caps *color_caps)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ const struct drm_color_lut *degamma_lut;
+ enum amdgpu_transfer_function tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ uint32_t degamma_size;
+ bool has_degamma_lut;
+ int ret;
+
+ degamma_lut = __extract_blob_lut(dm_plane_state->degamma_lut,
+ &degamma_size);
+
+ has_degamma_lut = degamma_lut &&
+ !__is_lut_linear(degamma_lut, degamma_size);
+
+ tf = dm_plane_state->degamma_tf;
+
+ /* If we don't have plane degamma LUT nor TF to set on DC, we have
+ * nothing to do here, return.
+ */
+ if (!has_degamma_lut && tf == AMDGPU_TRANSFER_FUNCTION_DEFAULT)
+ return -EINVAL;
+
+ dc_plane_state->in_transfer_func.tf = amdgpu_tf_to_dc_tf(tf);
+
+ if (has_degamma_lut) {
+ ASSERT(degamma_size == MAX_COLOR_LUT_ENTRIES);
+
+ dc_plane_state->in_transfer_func.type =
+ TF_TYPE_DISTRIBUTED_POINTS;
+
+ ret = __set_input_tf(color_caps, &dc_plane_state->in_transfer_func,
+ degamma_lut, degamma_size);
+ if (ret)
+ return ret;
+ } else {
+ dc_plane_state->in_transfer_func.type =
+ TF_TYPE_PREDEFINED;
+
+ if (!mod_color_calculate_degamma_params(color_caps,
+ &dc_plane_state->in_transfer_func, NULL, false))
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static int
+amdgpu_dm_plane_set_color_properties(struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ enum amdgpu_transfer_function shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ enum amdgpu_transfer_function blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ const struct drm_color_lut *shaper_lut, *lut3d, *blend_lut;
+ uint32_t shaper_size, lut3d_size, blend_size;
+ int ret;
+
+ dc_plane_state->hdr_mult = amdgpu_dm_fixpt_from_s3132(dm_plane_state->hdr_mult);
+
+ shaper_lut = __extract_blob_lut(dm_plane_state->shaper_lut, &shaper_size);
+ shaper_size = shaper_lut != NULL ? shaper_size : 0;
+ shaper_tf = dm_plane_state->shaper_tf;
+ lut3d = __extract_blob_lut(dm_plane_state->lut3d, &lut3d_size);
+ lut3d_size = lut3d != NULL ? lut3d_size : 0;
+
+ amdgpu_dm_atomic_lut3d(lut3d, lut3d_size, &dc_plane_state->lut3d_func);
+ ret = amdgpu_dm_atomic_shaper_lut(shaper_lut, false,
+ amdgpu_tf_to_dc_tf(shaper_tf),
+ shaper_size,
+ &dc_plane_state->in_shaper_func);
+ if (ret) {
+ drm_dbg_kms(plane_state->plane->dev,
+ "setting plane %d shaper LUT failed.\n",
+ plane_state->plane->index);
+
+ return ret;
+ }
+
+ blend_tf = dm_plane_state->blend_tf;
+ blend_lut = __extract_blob_lut(dm_plane_state->blend_lut, &blend_size);
+ blend_size = blend_lut != NULL ? blend_size : 0;
+
+ ret = amdgpu_dm_atomic_blend_lut(blend_lut, false,
+ amdgpu_tf_to_dc_tf(blend_tf),
+ blend_size, &dc_plane_state->blend_tf);
+ if (ret) {
+ drm_dbg_kms(plane_state->plane->dev,
+ "setting plane %d gamma lut failed.\n",
+ plane_state->plane->index);
+
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_dm_update_plane_color_mgmt: Maps DRM color management to DC plane.
+ * @crtc: amdgpu_dm crtc state
+ * @plane_state: DRM plane state
+ * @dc_plane_state: target DC surface
+ *
+ * Update the underlying dc_stream_state's input transfer function (ITF) in
+ * preparation for hardware commit. The transfer function used depends on
+ * the preparation done on the stream for color management.
+ *
+ * Returns:
+ * 0 on success. -ENOMEM if mem allocation fails.
+ */
+int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
+ struct drm_plane_state *plane_state,
+ struct dc_plane_state *dc_plane_state)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state);
+ struct drm_color_ctm_3x4 *ctm = NULL;
+ struct dc_color_caps *color_caps = NULL;
+ bool has_crtc_cm_degamma;
+ int ret;
+
+ ret = amdgpu_dm_verify_lut3d_size(adev, plane_state);
+ if (ret) {
+ drm_dbg_driver(&adev->ddev, "amdgpu_dm_verify_lut3d_size() failed\n");
+ return ret;
+ }
+
+ if (dc_plane_state->ctx && dc_plane_state->ctx->dc)
+ color_caps = &dc_plane_state->ctx->dc->caps.color;
+
+ /* Initially, we can just bypass the DGM block. */
+ dc_plane_state->in_transfer_func.type = TF_TYPE_BYPASS;
+ dc_plane_state->in_transfer_func.tf = TRANSFER_FUNCTION_LINEAR;
+
+ /* After, we start to update values according to color props */
+ has_crtc_cm_degamma = (crtc->cm_has_degamma || crtc->cm_is_degamma_srgb);
+
+ ret = __set_dm_plane_degamma(plane_state, dc_plane_state, color_caps);
+ if (ret == -ENOMEM)
+ return ret;
+
+ /* We only have one degamma block available (pre-blending) for the
+ * whole color correction pipeline, so that we can't actually perform
+ * plane and CRTC degamma at the same time. Explicitly reject atomic
+ * updates when userspace sets both plane and CRTC degamma properties.
+ */
+ if (has_crtc_cm_degamma && ret != -EINVAL) {
+ drm_dbg_kms(crtc->base.crtc->dev,
+ "doesn't support plane and CRTC degamma at the same time\n");
+ return -EINVAL;
+ }
+
+ /* If we are here, it means we don't have plane degamma settings, check
+ * if we have CRTC degamma waiting for mapping to pre-blending degamma
+ * block
+ */
+ if (has_crtc_cm_degamma) {
+ /*
+ * AMD HW doesn't have post-blending degamma caps. When DRM
+ * CRTC atomic degamma is set, we maps it to DPP degamma block
+ * (pre-blending) or, on legacy gamma, we use DPP degamma to
+ * linearize (implicit degamma) from sRGB/BT709 according to
+ * the input space.
+ */
+ ret = map_crtc_degamma_to_dc_plane(crtc, dc_plane_state, color_caps);
+ if (ret)
+ return ret;
+ }
+
+ /* Setup CRTC CTM. */
+ if (dm_plane_state->ctm) {
+ ctm = (struct drm_color_ctm_3x4 *)dm_plane_state->ctm->data;
+ /*
+ * DCN2 and older don't support both pre-blending and
+ * post-blending gamut remap. For this HW family, if we have
+ * the plane and CRTC CTMs simultaneously, CRTC CTM takes
+ * priority, and we discard plane CTM, as implemented in
+ * dcn10_program_gamut_remap(). However, DCN3+ has DPP
+ * (pre-blending) and MPC (post-blending) `gamut remap` blocks;
+ * therefore, we can program plane and CRTC CTMs together by
+ * mapping CRTC CTM to MPC and keeping plane CTM setup at DPP,
+ * as it's done by dcn30_program_gamut_remap().
+ */
+ __drm_ctm_3x4_to_dc_matrix(ctm, dc_plane_state->gamut_remap_matrix.matrix);
+
+ dc_plane_state->gamut_remap_matrix.enable_remap = true;
+ dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+ } else {
+ /* Bypass CTM. */
+ dc_plane_state->gamut_remap_matrix.enable_remap = false;
+ dc_plane_state->input_csc_color_matrix.enable_adjustment = false;
+ }
+
+ return amdgpu_dm_plane_set_color_properties(plane_state, dc_plane_state);
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index 52ecfa746b54..e20aa7438066 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -30,6 +31,7 @@
#include "amdgpu_dm.h"
#include "dc.h"
#include "amdgpu_securedisplay.h"
+#include "amdgpu_dm_psr.h"
static const char *const pipe_crc_sources[] = {
"none",
@@ -83,45 +85,274 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc,
}
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+static void update_phy_id_mapping(struct amdgpu_device *adev)
+{
+ struct drm_device *ddev = adev_to_drm(adev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct drm_connector *connector;
+ struct amdgpu_dm_connector *aconnector;
+ struct amdgpu_dm_connector *sort_connector[AMDGPU_DM_MAX_CRTC] = {NULL};
+ struct drm_connector_list_iter iter;
+ uint8_t idx = 0, idx_2 = 0, connector_cnt = 0;
+
+ dm->secure_display_ctx.phy_mapping_updated = false;
+
+ mutex_lock(&ddev->mode_config.mutex);
+ drm_connector_list_iter_begin(ddev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->status != connector_status_connected)
+ continue;
+
+ if (idx >= AMDGPU_DM_MAX_CRTC) {
+ DRM_WARN("%s connected connectors exceed max crtc\n", __func__);
+ mutex_unlock(&ddev->mode_config.mutex);
+ return;
+ }
+
+ aconnector = to_amdgpu_dm_connector(connector);
+
+ sort_connector[idx] = aconnector;
+ idx++;
+ connector_cnt++;
+ }
+ drm_connector_list_iter_end(&iter);
+
+ /* sort connectors by link_enc_hw_instance first */
+ for (idx = connector_cnt; idx > 1 ; idx--) {
+ for (idx_2 = 0; idx_2 < (idx - 1); idx_2++) {
+ if (sort_connector[idx_2]->dc_link->link_enc_hw_inst >
+ sort_connector[idx_2 + 1]->dc_link->link_enc_hw_inst)
+ swap(sort_connector[idx_2], sort_connector[idx_2 + 1]);
+ }
+ }
+
+ /*
+ * Sort mst connectors by RAD. mst connectors with the same enc_hw_instance are already
+ * sorted together above.
+ */
+ for (idx = 0; idx < connector_cnt; /*Do nothing*/) {
+ if (sort_connector[idx]->mst_root) {
+ uint8_t i, j, k;
+ uint8_t mst_con_cnt = 1;
+
+ for (idx_2 = (idx + 1); idx_2 < connector_cnt; idx_2++) {
+ if (sort_connector[idx_2]->mst_root == sort_connector[idx]->mst_root)
+ mst_con_cnt++;
+ else
+ break;
+ }
+
+ for (i = mst_con_cnt; i > 1; i--) {
+ for (j = idx; j < (idx + i - 2); j++) {
+ int mstb_lct = sort_connector[j]->mst_output_port->parent->lct;
+ int next_mstb_lct = sort_connector[j + 1]->mst_output_port->parent->lct;
+ u8 *rad;
+ u8 *next_rad;
+ bool swap = false;
+
+ /* Sort by mst tree depth first. Then compare RAD if depth is the same*/
+ if (mstb_lct > next_mstb_lct) {
+ swap = true;
+ } else if (mstb_lct == next_mstb_lct) {
+ if (mstb_lct == 1) {
+ if (sort_connector[j]->mst_output_port->port_num > sort_connector[j + 1]->mst_output_port->port_num)
+ swap = true;
+ } else if (mstb_lct > 1) {
+ rad = sort_connector[j]->mst_output_port->parent->rad;
+ next_rad = sort_connector[j + 1]->mst_output_port->parent->rad;
+
+ for (k = 0; k < mstb_lct - 1; k++) {
+ int shift = (k % 2) ? 0 : 4;
+ int port_num = (rad[k / 2] >> shift) & 0xf;
+ int next_port_num = (next_rad[k / 2] >> shift) & 0xf;
+
+ if (port_num > next_port_num) {
+ swap = true;
+ break;
+ }
+ }
+ } else {
+ DRM_ERROR("MST LCT shouldn't be set as < 1");
+ mutex_unlock(&ddev->mode_config.mutex);
+ return;
+ }
+ }
+
+ if (swap)
+ swap(sort_connector[j], sort_connector[j + 1]);
+ }
+ }
+
+ idx += mst_con_cnt;
+ } else {
+ idx++;
+ }
+ }
+
+ /* Complete sorting. Assign relavant result to dm->secure_display_ctx.phy_id_mapping[]*/
+ memset(dm->secure_display_ctx.phy_id_mapping, 0, sizeof(dm->secure_display_ctx.phy_id_mapping));
+ for (idx = 0; idx < connector_cnt; idx++) {
+ aconnector = sort_connector[idx];
+
+ dm->secure_display_ctx.phy_id_mapping[idx].assigned = true;
+ dm->secure_display_ctx.phy_id_mapping[idx].is_mst = false;
+ dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst = aconnector->dc_link->link_enc_hw_inst;
+
+ if (sort_connector[idx]->mst_root) {
+ dm->secure_display_ctx.phy_id_mapping[idx].is_mst = true;
+ dm->secure_display_ctx.phy_id_mapping[idx].lct = aconnector->mst_output_port->parent->lct;
+ dm->secure_display_ctx.phy_id_mapping[idx].port_num = aconnector->mst_output_port->port_num;
+ memcpy(dm->secure_display_ctx.phy_id_mapping[idx].rad,
+ aconnector->mst_output_port->parent->rad, sizeof(aconnector->mst_output_port->parent->rad));
+ }
+ }
+ mutex_unlock(&ddev->mode_config.mutex);
+
+ dm->secure_display_ctx.phy_id_mapping_cnt = connector_cnt;
+ dm->secure_display_ctx.phy_mapping_updated = true;
+}
+
+static bool get_phy_id(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_connector *aconnector, uint8_t *phy_id)
+{
+ int idx, idx_2;
+ bool found = false;
+
+ /*
+ * Assume secure display start after all connectors are probed. The connection
+ * config is static as well
+ */
+ if (!dm->secure_display_ctx.phy_mapping_updated) {
+ DRM_WARN("%s Should update the phy id table before get it's value", __func__);
+ return false;
+ }
+
+ for (idx = 0; idx < dm->secure_display_ctx.phy_id_mapping_cnt; idx++) {
+ if (!dm->secure_display_ctx.phy_id_mapping[idx].assigned) {
+ DRM_ERROR("phy_id_mapping[%d] should be assigned", idx);
+ return false;
+ }
+
+ if (aconnector->dc_link->link_enc_hw_inst ==
+ dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst) {
+ if (!dm->secure_display_ctx.phy_id_mapping[idx].is_mst) {
+ found = true;
+ goto out;
+ } else {
+ /* Could caused by wrongly pass mst root connector */
+ if (!aconnector->mst_output_port) {
+ DRM_ERROR("%s Check mst case but connector without a port assigned", __func__);
+ return false;
+ }
+
+ if (aconnector->mst_root &&
+ aconnector->mst_root->mst_mgr.mst_primary == NULL) {
+ DRM_WARN("%s pass in a stale mst connector", __func__);
+ }
+
+ if (aconnector->mst_output_port->parent->lct == dm->secure_display_ctx.phy_id_mapping[idx].lct &&
+ aconnector->mst_output_port->port_num == dm->secure_display_ctx.phy_id_mapping[idx].port_num) {
+ if (aconnector->mst_output_port->parent->lct == 1) {
+ found = true;
+ goto out;
+ } else if (aconnector->mst_output_port->parent->lct > 1) {
+ /* Check RAD */
+ for (idx_2 = 0; idx_2 < aconnector->mst_output_port->parent->lct - 1; idx_2++) {
+ int shift = (idx_2 % 2) ? 0 : 4;
+ int port_num = (aconnector->mst_output_port->parent->rad[idx_2 / 2] >> shift) & 0xf;
+ int port_num2 = (dm->secure_display_ctx.phy_id_mapping[idx].rad[idx_2 / 2] >> shift) & 0xf;
+
+ if (port_num != port_num2)
+ break;
+ }
+
+ if (idx_2 == aconnector->mst_output_port->parent->lct - 1) {
+ found = true;
+ goto out;
+ }
+ } else {
+ DRM_ERROR("lCT should be >= 1");
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+out:
+ if (found) {
+ DRM_DEBUG_DRIVER("Associated secure display PHY ID as %d", idx);
+ *phy_id = idx;
+ } else {
+ DRM_WARN("Can't find associated phy ID");
+ return false;
+ }
+
+ return true;
+}
+
static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc, struct dc_stream_state *stream)
{
struct drm_device *drm_dev = crtc->dev;
struct amdgpu_display_manager *dm = &drm_to_adev(drm_dev)->dm;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_dm_connector *aconnector;
bool was_activated;
+ uint8_t phy_id;
+ unsigned long flags;
+ int i;
- spin_lock_irq(&drm_dev->event_lock);
- was_activated = acrtc->dm_irq_params.window_param.activated;
- acrtc->dm_irq_params.window_param.x_start = 0;
- acrtc->dm_irq_params.window_param.y_start = 0;
- acrtc->dm_irq_params.window_param.x_end = 0;
- acrtc->dm_irq_params.window_param.y_end = 0;
- acrtc->dm_irq_params.window_param.activated = false;
- acrtc->dm_irq_params.window_param.update_win = false;
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
- spin_unlock_irq(&drm_dev->event_lock);
+ spin_lock_irqsave(&drm_dev->event_lock, flags);
+ was_activated = acrtc->dm_irq_params.crc_window_activated;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ acrtc->dm_irq_params.window_param[i].x_start = 0;
+ acrtc->dm_irq_params.window_param[i].y_start = 0;
+ acrtc->dm_irq_params.window_param[i].x_end = 0;
+ acrtc->dm_irq_params.window_param[i].y_end = 0;
+ acrtc->dm_irq_params.window_param[i].enable = false;
+ acrtc->dm_irq_params.window_param[i].update_win = false;
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 0;
+ }
+ acrtc->dm_irq_params.crc_window_activated = false;
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags);
/* Disable secure_display if it was enabled */
- if (was_activated) {
+ if (was_activated && dm->secure_display_ctx.op_mode == LEGACY_MODE) {
/* stop ROI update on this crtc */
- flush_work(&dm->secure_display_ctxs[crtc->index].notify_ta_work);
- flush_work(&dm->secure_display_ctxs[crtc->index].forward_roi_work);
- dc_stream_forward_crc_window(stream, NULL, true);
+ flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].notify_ta_work);
+ flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].forward_roi_work);
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ if (aconnector && get_phy_id(dm, aconnector, &phy_id)) {
+ if (dm->secure_display_ctx.support_mul_roi)
+ dc_stream_forward_multiple_crc_window(stream, NULL, phy_id, true);
+ else
+ dc_stream_forward_crc_window(stream, NULL, phy_id, true);
+ } else {
+ DRM_DEBUG_DRIVER("%s Can't find matching phy id", __func__);
+ }
}
}
static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
{
- struct secure_display_context *secure_display_ctx;
+ struct secure_display_crtc_context *crtc_ctx;
struct psp_context *psp;
struct ta_securedisplay_cmd *securedisplay_cmd;
struct drm_crtc *crtc;
struct dc_stream_state *stream;
+ struct amdgpu_dm_connector *aconnector;
uint8_t phy_inst;
+ struct amdgpu_display_manager *dm;
+ struct crc_data crc_cpy[MAX_CRC_WINDOW_NUM];
+ unsigned long flags;
+ uint8_t roi_idx = 0;
int ret;
+ int i;
- secure_display_ctx = container_of(work, struct secure_display_context, notify_ta_work);
- crtc = secure_display_ctx->crtc;
+ crtc_ctx = container_of(work, struct secure_display_crtc_context, notify_ta_work);
+ crtc = crtc_ctx->crtc;
if (!crtc)
return;
@@ -133,21 +364,50 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
return;
}
+ dm = &drm_to_adev(crtc->dev)->dm;
stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream;
- phy_inst = stream->link->link_enc_hw_inst;
-
- /* need lock for multiple crtcs to use the command buffer */
- mutex_lock(&psp->securedisplay_context.mutex);
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ if (!aconnector)
+ return;
- psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
- TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ mutex_lock(&crtc->dev->mode_config.mutex);
+ if (!get_phy_id(dm, aconnector, &phy_inst)) {
+ DRM_WARN("%s Can't find mapping phy id!", __func__);
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+ return;
+ }
+ mutex_unlock(&crtc->dev->mode_config.mutex);
- securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst;
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ memcpy(crc_cpy, crtc_ctx->crc_info.crc, sizeof(struct crc_data) * MAX_CRC_WINDOW_NUM);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ /* need lock for multiple crtcs to use the command buffer */
+ mutex_lock(&psp->securedisplay_context.mutex);
/* PSP TA is expected to finish data transmission over I2C within current frame,
* even there are up to 4 crtcs request to send in this frame.
*/
- ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ if (dm->secure_display_ctx.support_mul_roi) {
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2);
+
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.phy_id = phy_inst;
+
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ if (crc_cpy[i].crc_ready)
+ roi_idx |= 1 << i;
+ }
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.roi_idx = roi_idx;
+
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2);
+ } else {
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst;
+
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ }
if (!ret) {
if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS)
@@ -160,22 +420,47 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
static void
amdgpu_dm_forward_crc_window(struct work_struct *work)
{
- struct secure_display_context *secure_display_ctx;
+ struct secure_display_crtc_context *crtc_ctx;
struct amdgpu_display_manager *dm;
struct drm_crtc *crtc;
struct dc_stream_state *stream;
+ struct amdgpu_dm_connector *aconnector;
+ struct crc_window roi_cpy[MAX_CRC_WINDOW_NUM];
+ unsigned long flags;
+ uint8_t phy_id;
- secure_display_ctx = container_of(work, struct secure_display_context, forward_roi_work);
- crtc = secure_display_ctx->crtc;
+ crtc_ctx = container_of(work, struct secure_display_crtc_context, forward_roi_work);
+ crtc = crtc_ctx->crtc;
if (!crtc)
return;
dm = &drm_to_adev(crtc->dev)->dm;
stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream;
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ if (!aconnector)
+ return;
+
+ mutex_lock(&crtc->dev->mode_config.mutex);
+ if (!get_phy_id(dm, aconnector, &phy_id)) {
+ DRM_WARN("%s Can't find mapping phy id!", __func__);
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+ return;
+ }
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ memcpy(roi_cpy, crtc_ctx->roi, sizeof(struct crc_window) * MAX_CRC_WINDOW_NUM);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
mutex_lock(&dm->dc_lock);
- dc_stream_forward_crc_window(stream, &secure_display_ctx->rect, false);
+ if (dm->secure_display_ctx.support_mul_roi)
+ dc_stream_forward_multiple_crc_window(stream, roi_cpy,
+ phy_id, false);
+ else
+ dc_stream_forward_crc_window(stream, &roi_cpy[0].rect,
+ phy_id, false);
mutex_unlock(&dm->dc_lock);
}
@@ -186,7 +471,7 @@ bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc)
bool ret = false;
spin_lock_irq(&drm_dev->event_lock);
- ret = acrtc->dm_irq_params.window_param.activated;
+ ret = acrtc->dm_irq_params.crc_window_activated;
spin_unlock_irq(&drm_dev->event_lock);
return ret;
@@ -224,10 +509,14 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
mutex_lock(&adev->dm.dc_lock);
+ /* For PSR1, check that the panel has exited PSR */
+ if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
+ amdgpu_dm_psr_wait_disable(stream_state);
+
/* Enable or disable CRTC CRC generation */
if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) {
if (!dc_stream_configure_crc(stream_state->ctx->dc,
- stream_state, NULL, enable, enable)) {
+ stream_state, NULL, enable, enable, 0, true)) {
ret = -EINVAL;
goto unlock;
}
@@ -258,6 +547,10 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
struct drm_crtc_commit *commit;
struct dm_crtc_state *crtc_state;
struct drm_device *drm_dev = crtc->dev;
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+#endif
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct drm_dp_aux *aux = NULL;
bool enable = false;
@@ -326,6 +619,9 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
if (!connector->state || connector->state->crtc != crtc)
continue;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconn = to_amdgpu_dm_connector(connector);
break;
}
@@ -354,6 +650,17 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
}
+ /*
+ * Reading the CRC requires the vblank interrupt handler to be
+ * enabled. Keep a reference until CRC capture stops.
+ */
+ enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src);
+ if (!enabled && enable) {
+ ret = drm_crtc_vblank_get(crtc);
+ if (ret)
+ goto cleanup;
+ }
+
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
/* Reset secure_display when we change crc source from debugfs */
amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream);
@@ -364,16 +671,7 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
goto cleanup;
}
- /*
- * Reading the CRC requires the vblank interrupt handler to be
- * enabled. Keep a reference until CRC capture stops.
- */
- enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src);
if (!enabled && enable) {
- ret = drm_crtc_vblank_get(crtc);
- if (ret)
- goto cleanup;
-
if (dm_is_crc_source_dprx(source)) {
if (drm_dp_start_crc(aux, crtc)) {
DRM_DEBUG_DRIVER("dp start crc failed\n");
@@ -399,6 +697,13 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
/* Reset crc_skipped on dm state */
crtc_state->crc_skip_count = 0;
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ /* Initialize phy id mapping table for secure display*/
+ if (dm->secure_display_ctx.op_mode == LEGACY_MODE &&
+ !dm->secure_display_ctx.phy_mapping_updated)
+ update_phy_id_mapping(adev);
+#endif
+
cleanup:
if (commit)
drm_crtc_commit_put(commit);
@@ -453,7 +758,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc)
}
if (dm_is_crc_source_crtc(cur_crc_src)) {
- if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state,
+ if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, 0,
&crcs[0], &crcs[1], &crcs[2]))
return;
@@ -469,8 +774,17 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
enum amdgpu_dm_pipe_crc_source cur_crc_src;
struct amdgpu_crtc *acrtc = NULL;
struct amdgpu_device *adev = NULL;
- struct secure_display_context *secure_display_ctx = NULL;
+ struct secure_display_crtc_context *crtc_ctx = NULL;
+ bool reset_crc_frame_count[MAX_CRC_WINDOW_NUM] = {false};
+ uint32_t crc_r[MAX_CRC_WINDOW_NUM] = {0};
+ uint32_t crc_g[MAX_CRC_WINDOW_NUM] = {0};
+ uint32_t crc_b[MAX_CRC_WINDOW_NUM] = {0};
unsigned long flags1;
+ bool forward_roi_change = false;
+ bool notify_ta = false;
+ bool all_crc_ready = true;
+ struct dc_stream_state *stream_state;
+ int i;
if (crtc == NULL)
return;
@@ -478,78 +792,160 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
acrtc = to_amdgpu_crtc(crtc);
adev = drm_to_adev(crtc->dev);
drm_dev = crtc->dev;
+ stream_state = to_dm_crtc_state(crtc->state)->stream;
spin_lock_irqsave(&drm_dev->event_lock, flags1);
cur_crc_src = acrtc->dm_irq_params.crc_src;
/* Early return if CRC capture is not enabled. */
if (!amdgpu_dm_is_valid_crc_source(cur_crc_src) ||
- !dm_is_crc_source_crtc(cur_crc_src))
- goto cleanup;
-
- if (!acrtc->dm_irq_params.window_param.activated)
- goto cleanup;
+ !dm_is_crc_source_crtc(cur_crc_src)) {
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+ return;
+ }
- if (acrtc->dm_irq_params.window_param.skip_frame_cnt) {
- acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1;
- goto cleanup;
+ if (!acrtc->dm_irq_params.crc_window_activated) {
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+ return;
}
- secure_display_ctx = &adev->dm.secure_display_ctxs[acrtc->crtc_id];
- if (WARN_ON(secure_display_ctx->crtc != crtc)) {
- /* We have set the crtc when creating secure_display_context,
+ crtc_ctx = &adev->dm.secure_display_ctx.crtc_ctx[acrtc->crtc_id];
+ if (WARN_ON(crtc_ctx->crtc != crtc)) {
+ /* We have set the crtc when creating secure_display_crtc_context,
* don't expect it to be changed here.
*/
- secure_display_ctx->crtc = crtc;
+ crtc_ctx->crtc = crtc;
}
- if (acrtc->dm_irq_params.window_param.update_win) {
- /* prepare work for dmub to update ROI */
- secure_display_ctx->rect.x = acrtc->dm_irq_params.window_param.x_start;
- secure_display_ctx->rect.y = acrtc->dm_irq_params.window_param.y_start;
- secure_display_ctx->rect.width = acrtc->dm_irq_params.window_param.x_end -
- acrtc->dm_irq_params.window_param.x_start;
- secure_display_ctx->rect.height = acrtc->dm_irq_params.window_param.y_end -
- acrtc->dm_irq_params.window_param.y_start;
- schedule_work(&secure_display_ctx->forward_roi_work);
-
- acrtc->dm_irq_params.window_param.update_win = false;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ struct crc_params crc_window = {
+ .windowa_x_start = acrtc->dm_irq_params.window_param[i].x_start,
+ .windowa_y_start = acrtc->dm_irq_params.window_param[i].y_start,
+ .windowa_x_end = acrtc->dm_irq_params.window_param[i].x_end,
+ .windowa_y_end = acrtc->dm_irq_params.window_param[i].y_end,
+ .windowb_x_start = acrtc->dm_irq_params.window_param[i].x_start,
+ .windowb_y_start = acrtc->dm_irq_params.window_param[i].y_start,
+ .windowb_x_end = acrtc->dm_irq_params.window_param[i].x_end,
+ .windowb_y_end = acrtc->dm_irq_params.window_param[i].y_end,
+ };
+
+ crtc_ctx->roi[i].enable = acrtc->dm_irq_params.window_param[i].enable;
+
+ if (!acrtc->dm_irq_params.window_param[i].enable) {
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ continue;
+ }
- /* Statically skip 1 frame, because we may need to wait below things
- * before sending ROI to dmub:
- * 1. We defer the work by using system workqueue.
- * 2. We may need to wait for dc_lock before accessing dmub.
- */
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 1;
+ if (acrtc->dm_irq_params.window_param[i].skip_frame_cnt) {
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt -= 1;
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ continue;
+ }
- } else {
- /* prepare work for psp to read ROI/CRC and send to I2C */
- schedule_work(&secure_display_ctx->notify_ta_work);
+ if (acrtc->dm_irq_params.window_param[i].update_win) {
+ crtc_ctx->roi[i].rect.x = crc_window.windowa_x_start;
+ crtc_ctx->roi[i].rect.y = crc_window.windowa_y_start;
+ crtc_ctx->roi[i].rect.width = crc_window.windowa_x_end -
+ crc_window.windowa_x_start;
+ crtc_ctx->roi[i].rect.height = crc_window.windowa_y_end -
+ crc_window.windowa_y_start;
+
+ if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE)
+ /* forward task to dmub to update ROI */
+ forward_roi_change = true;
+ else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE)
+ /* update ROI via dm*/
+ dc_stream_configure_crc(stream_state->ctx->dc, stream_state,
+ &crc_window, true, true, i, false);
+
+ reset_crc_frame_count[i] = true;
+
+ acrtc->dm_irq_params.window_param[i].update_win = false;
+
+ /* Statically skip 1 frame, because we may need to wait below things
+ * before sending ROI to dmub:
+ * 1. We defer the work by using system workqueue.
+ * 2. We may need to wait for dc_lock before accessing dmub.
+ */
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 1;
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ } else {
+ if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, i,
+ &crc_r[i], &crc_g[i], &crc_b[i]))
+ DRM_ERROR("Secure Display: fail to get crc from engine %d\n", i);
+
+ if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE)
+ /* forward task to psp to read ROI/CRC and output via I2C */
+ notify_ta = true;
+ else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE)
+ /* Avoid ROI window get changed, keep overwriting. */
+ dc_stream_configure_crc(stream_state->ctx->dc, stream_state,
+ &crc_window, true, true, i, false);
+
+ /* crc ready for psp to read out */
+ crtc_ctx->crc_info.crc[i].crc_ready = true;
+ }
}
-cleanup:
spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+
+ if (forward_roi_change)
+ schedule_work(&crtc_ctx->forward_roi_work);
+
+ if (notify_ta)
+ schedule_work(&crtc_ctx->notify_ta_work);
+
+ spin_lock_irqsave(&crtc_ctx->crc_info.lock, flags1);
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ crtc_ctx->crc_info.crc[i].crc_R = crc_r[i];
+ crtc_ctx->crc_info.crc[i].crc_G = crc_g[i];
+ crtc_ctx->crc_info.crc[i].crc_B = crc_b[i];
+
+ if (!crtc_ctx->roi[i].enable) {
+ crtc_ctx->crc_info.crc[i].frame_count = 0;
+ continue;
+ }
+
+ if (!crtc_ctx->crc_info.crc[i].crc_ready)
+ all_crc_ready = false;
+
+ if (reset_crc_frame_count[i] || crtc_ctx->crc_info.crc[i].frame_count == UINT_MAX)
+ /* Reset the reference frame count after user update the ROI
+ * or it reaches the maximum value.
+ */
+ crtc_ctx->crc_info.crc[i].frame_count = 0;
+ else
+ crtc_ctx->crc_info.crc[i].frame_count += 1;
+ }
+ spin_unlock_irqrestore(&crtc_ctx->crc_info.lock, flags1);
+
+ if (all_crc_ready)
+ complete_all(&crtc_ctx->crc_info.completion);
}
-struct secure_display_context *
-amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev)
+void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev)
{
- struct secure_display_context *secure_display_ctxs = NULL;
+ struct secure_display_crtc_context *crtc_ctx = NULL;
int i;
- secure_display_ctxs = kcalloc(adev->mode_info.num_crtc,
- sizeof(struct secure_display_context),
+ crtc_ctx = kcalloc(adev->mode_info.num_crtc,
+ sizeof(struct secure_display_crtc_context),
GFP_KERNEL);
- if (!secure_display_ctxs)
- return NULL;
+ if (!crtc_ctx) {
+ adev->dm.secure_display_ctx.crtc_ctx = NULL;
+ return;
+ }
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- INIT_WORK(&secure_display_ctxs[i].forward_roi_work, amdgpu_dm_forward_crc_window);
- INIT_WORK(&secure_display_ctxs[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read);
- secure_display_ctxs[i].crtc = &adev->mode_info.crtcs[i]->base;
+ INIT_WORK(&crtc_ctx[i].forward_roi_work, amdgpu_dm_forward_crc_window);
+ INIT_WORK(&crtc_ctx[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read);
+ crtc_ctx[i].crtc = &adev->mode_info.crtcs[i]->base;
+ spin_lock_init(&crtc_ctx[i].crc_info.lock);
}
- return secure_display_ctxs;
+ adev->dm.secure_display_ctx.crtc_ctx = crtc_ctx;
+
+ adev->dm.secure_display_ctx.op_mode = DISPLAY_CRC_MODE;
}
#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
index 748e80ef40d0..95bdb8699d7f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
@@ -40,20 +41,53 @@ enum amdgpu_dm_pipe_crc_source {
};
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+#define MAX_CRTC 6
+
+enum secure_display_mode {
+ /* via dmub + psp */
+ LEGACY_MODE = 0,
+ /* driver directly */
+ DISPLAY_CRC_MODE,
+ SECURE_DISPLAY_MODE_MAX,
+};
+
+struct phy_id_mapping {
+ bool assigned;
+ bool is_mst;
+ uint8_t enc_hw_inst;
+ u8 lct;
+ u8 port_num;
+ u8 rad[8];
+};
+
+struct crc_data {
+ uint32_t crc_R;
+ uint32_t crc_G;
+ uint32_t crc_B;
+ uint32_t frame_count;
+ bool crc_ready;
+};
+
+struct crc_info {
+ struct crc_data crc[MAX_CRC_WINDOW_NUM];
+ struct completion completion;
+ spinlock_t lock;
+};
+
struct crc_window_param {
uint16_t x_start;
uint16_t y_start;
uint16_t x_end;
uint16_t y_end;
/* CRC window is activated or not*/
- bool activated;
+ bool enable;
/* Update crc window during vertical blank or not */
bool update_win;
/* skip reading/writing for few frames */
int skip_frame_cnt;
};
-struct secure_display_context {
+struct secure_display_crtc_context {
/* work to notify PSP TA*/
struct work_struct notify_ta_work;
@@ -63,7 +97,20 @@ struct secure_display_context {
struct drm_crtc *crtc;
/* Region of Interest (ROI) */
- struct rect rect;
+ struct crc_window roi[MAX_CRC_WINDOW_NUM];
+
+ struct crc_info crc_info;
+};
+
+struct secure_display_context {
+
+ struct secure_display_crtc_context *crtc_ctx;
+ /* Whether dmub support multiple ROI setting */
+ bool support_mul_roi;
+ enum secure_display_mode op_mode;
+ bool phy_mapping_updated;
+ int phy_id_mapping_cnt;
+ struct phy_id_mapping phy_id_mapping[MAX_CRTC];
};
#endif
@@ -95,8 +142,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc);
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc);
void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc);
-struct secure_display_context *amdgpu_dm_crtc_secure_display_create_contexts(
- struct amdgpu_device *adev);
+void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev);
#else
#define amdgpu_dm_crc_window_is_activated(x)
#define amdgpu_dm_crtc_handle_crc_window_irq(x)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 97b7a0b8a1c2..1ec9d03ad747 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -35,6 +35,9 @@
#include "amdgpu_dm_trace.h"
#include "amdgpu_dm_debugfs.h"
+#define HPD_DETECTION_PERIOD_uS 2000000
+#define HPD_DETECTION_TIME_uS 100000
+
void amdgpu_dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc)
{
struct drm_crtc *crtc = &acrtc->base;
@@ -90,13 +93,157 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
return rc;
}
-bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state)
+bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state)
{
return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE ||
dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED;
}
-static void vblank_control_worker(struct work_struct *work)
+/**
+ * amdgpu_dm_crtc_set_panel_sr_feature() - Manage panel self-refresh features.
+ *
+ * @vblank_work: is a pointer to a struct vblank_control_work object.
+ * @vblank_enabled: indicates whether the DRM vblank counter is currently
+ * enabled (true) or disabled (false).
+ * @allow_sr_entry: represents whether entry into the self-refresh mode is
+ * allowed (true) or not allowed (false).
+ *
+ * The DRM vblank counter enable/disable action is used as the trigger to enable
+ * or disable various panel self-refresh features:
+ *
+ * Panel Replay and PSR SU
+ * - Enable when:
+ * - VRR is disabled
+ * - vblank counter is disabled
+ * - entry is allowed: usermode demonstrates an adequate number of fast
+ * commits)
+ * - CRC capture window isn't active
+ * - Keep enabled even when vblank counter gets enabled
+ *
+ * PSR1
+ * - Enable condition same as above
+ * - Disable when vblank counter is enabled
+ */
+static void amdgpu_dm_crtc_set_panel_sr_feature(
+ struct vblank_control_work *vblank_work,
+ bool vblank_enabled, bool allow_sr_entry)
+{
+ struct dc_link *link = vblank_work->stream->link;
+ bool is_sr_active = (link->replay_settings.replay_allow_active ||
+ link->psr_settings.psr_allow_active);
+ bool is_crc_window_active = false;
+ bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(vblank_work->acrtc);
+
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+ is_crc_window_active =
+ amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base);
+#endif
+
+ if (link->replay_settings.replay_feature_enabled && !vrr_active &&
+ allow_sr_entry && !is_sr_active && !is_crc_window_active) {
+ amdgpu_dm_replay_enable(vblank_work->stream, true);
+ } else if (vblank_enabled) {
+ if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active)
+ amdgpu_dm_psr_disable(vblank_work->stream, false);
+ } else if (link->psr_settings.psr_feature_enabled && !vrr_active &&
+ allow_sr_entry && !is_sr_active && !is_crc_window_active) {
+
+ struct amdgpu_dm_connector *aconn =
+ (struct amdgpu_dm_connector *) vblank_work->stream->dm_stream_context;
+
+ if (!aconn->disallow_edp_enter_psr) {
+ struct amdgpu_display_manager *dm = vblank_work->dm;
+
+ amdgpu_dm_psr_enable(vblank_work->stream);
+ if (dm->idle_workqueue &&
+ (dm->dc->config.disable_ips == DMUB_IPS_ENABLE) &&
+ dm->dc->idle_optimizations_allowed &&
+ dm->idle_workqueue->enable &&
+ !dm->idle_workqueue->running)
+ schedule_work(&dm->idle_workqueue->work);
+ }
+ }
+}
+
+bool amdgpu_dm_is_headless(struct amdgpu_device *adev)
+{
+ struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
+ struct drm_device *dev;
+ bool is_headless = true;
+
+ if (adev == NULL)
+ return true;
+
+ dev = adev->dm.ddev;
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ if (connector->status == connector_status_connected) {
+ is_headless = false;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ return is_headless;
+}
+
+static void amdgpu_dm_idle_worker(struct work_struct *work)
+{
+ struct idle_workqueue *idle_work;
+
+ idle_work = container_of(work, struct idle_workqueue, work);
+ idle_work->dm->idle_workqueue->running = true;
+
+ while (idle_work->enable) {
+ fsleep(HPD_DETECTION_PERIOD_uS);
+ mutex_lock(&idle_work->dm->dc_lock);
+ if (!idle_work->dm->dc->idle_optimizations_allowed) {
+ mutex_unlock(&idle_work->dm->dc_lock);
+ break;
+ }
+ dc_allow_idle_optimizations(idle_work->dm->dc, false);
+
+ mutex_unlock(&idle_work->dm->dc_lock);
+ fsleep(HPD_DETECTION_TIME_uS);
+ mutex_lock(&idle_work->dm->dc_lock);
+
+ if (!amdgpu_dm_is_headless(idle_work->dm->adev) &&
+ !amdgpu_dm_psr_is_active_allowed(idle_work->dm)) {
+ mutex_unlock(&idle_work->dm->dc_lock);
+ break;
+ }
+
+ if (idle_work->enable) {
+ dc_post_update_surfaces_to_stream(idle_work->dm->dc);
+ dc_allow_idle_optimizations(idle_work->dm->dc, true);
+ }
+ mutex_unlock(&idle_work->dm->dc_lock);
+ }
+ idle_work->dm->idle_workqueue->running = false;
+}
+
+struct idle_workqueue *idle_create_workqueue(struct amdgpu_device *adev)
+{
+ struct idle_workqueue *idle_work;
+
+ idle_work = kzalloc(sizeof(*idle_work), GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(idle_work))
+ return NULL;
+
+ idle_work->dm = &adev->dm;
+ idle_work->enable = false;
+ idle_work->running = false;
+ INIT_WORK(&idle_work->work, amdgpu_dm_idle_worker);
+
+ return idle_work;
+}
+
+static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)
{
struct vblank_control_work *vblank_work =
container_of(work, struct vblank_control_work, work);
@@ -109,9 +256,8 @@ static void vblank_control_worker(struct work_struct *work)
else if (dm->active_vblank_irq_count)
dm->active_vblank_irq_count--;
- dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0);
-
- DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0);
+ if (dm->active_vblank_irq_count > 0)
+ dc_allow_idle_optimizations(dm->dc, false);
/*
* Control PSR based on vblank requirements from OS
@@ -123,25 +269,15 @@ static void vblank_control_worker(struct work_struct *work)
* where the SU region is the full hactive*vactive region. See
* fill_dc_dirty_rects().
*/
- if (vblank_work->stream && vblank_work->stream->link) {
- /*
- * Prioritize replay, instead of psr
- */
- if (vblank_work->stream->link->replay_settings.replay_feature_enabled)
- amdgpu_dm_replay_enable(vblank_work->stream, false);
- else if (vblank_work->enable) {
- if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
- vblank_work->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(vblank_work->stream);
- } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled &&
- !vblank_work->stream->link->psr_settings.psr_allow_active &&
-#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
- !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
-#endif
- vblank_work->stream->link->panel_config.psr.disallow_replay &&
- vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
- amdgpu_dm_psr_enable(vblank_work->stream);
- }
+ if (vblank_work->stream && vblank_work->stream->link && vblank_work->acrtc) {
+ amdgpu_dm_crtc_set_panel_sr_feature(
+ vblank_work, vblank_work->enable,
+ vblank_work->acrtc->dm_irq_params.allow_sr_entry);
+ }
+
+ if (dm->active_vblank_irq_count == 0) {
+ dc_post_update_surfaces_to_stream(dm->dc);
+ dc_allow_idle_optimizations(dm->dc, true);
}
mutex_unlock(&dm->dc_lock);
@@ -151,37 +287,102 @@ static void vblank_control_worker(struct work_struct *work)
kfree(vblank_work);
}
-static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
+static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable)
{
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct amdgpu_device *adev = drm_to_adev(crtc->dev);
struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
struct amdgpu_display_manager *dm = &adev->dm;
struct vblank_control_work *work;
+ int irq_type;
int rc = 0;
if (acrtc->otg_inst == -1)
goto skip;
+ irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id);
+
if (enable) {
- /* vblank irq on -> Only need vupdate irq in vrr mode */
- if (amdgpu_dm_crtc_vrr_active(acrtc_state))
- rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true);
+ struct dc *dc = adev->dm.dc;
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
+ struct psr_settings *psr = &acrtc_state->stream->link->psr_settings;
+ struct replay_settings *pr = &acrtc_state->stream->link->replay_settings;
+ bool sr_supported = (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED) ||
+ pr->config.replay_supported;
+
+ /*
+ * IPS & self-refresh feature can cause vblank counter resets between
+ * vblank disable and enable.
+ * It may cause system stuck due to waiting for the vblank counter.
+ * Call this function to estimate missed vblanks by using timestamps and
+ * update the vblank counter in DRM.
+ */
+ if (dc->caps.ips_support &&
+ dc->config.disable_ips != DMUB_IPS_DISABLE_ALL &&
+ sr_supported && vblank->config.disable_immediate)
+ drm_crtc_vblank_restore(crtc);
+ }
+
+ if (dc_supports_vrr(dm->dc->ctx->dce_version)) {
+ if (enable) {
+ /* vblank irq on -> Only need vupdate irq in vrr mode */
+ if (amdgpu_dm_crtc_vrr_active(acrtc_state))
+ rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true);
+ } else {
+ /* vblank irq off -> vupdate irq off */
+ rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false);
+ }
+ }
+
+ if (rc)
+ return rc;
+
+ /* crtc vblank or vstartup interrupt */
+ if (enable) {
+ rc = amdgpu_irq_get(adev, &adev->crtc_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Get crtc_irq ret=%d\n", rc);
} else {
- /* vblank irq off -> vupdate irq off */
- rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false);
+ rc = amdgpu_irq_put(adev, &adev->crtc_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Put crtc_irq ret=%d\n", rc);
}
if (rc)
return rc;
- rc = (enable)
- ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id)
- : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id);
+ /*
+ * hubp surface flip interrupt
+ *
+ * We have no guarantee that the frontend index maps to the same
+ * backend index - some even map to more than one.
+ *
+ * TODO: Use a different interrupt or check DC itself for the mapping.
+ */
+ if (enable) {
+ rc = amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Get pageflip_irq ret=%d\n", rc);
+ } else {
+ rc = amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Put pageflip_irq ret=%d\n", rc);
+ }
if (rc)
return rc;
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ /* crtc vline0 interrupt, only available on DCN+ */
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) != 0) {
+ if (enable) {
+ rc = amdgpu_irq_get(adev, &adev->vline0_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Get vline0_irq ret=%d\n", rc);
+ } else {
+ rc = amdgpu_irq_put(adev, &adev->vline0_irq, irq_type);
+ drm_dbg_vbl(crtc->dev, "Put vline0_irq ret=%d\n", rc);
+ }
+
+ if (rc)
+ return rc;
+ }
+#endif
skip:
if (amdgpu_in_reset(adev))
return 0;
@@ -191,7 +392,7 @@ skip:
if (!work)
return -ENOMEM;
- INIT_WORK(&work->work, vblank_control_worker);
+ INIT_WORK(&work->work, amdgpu_dm_crtc_vblank_control_worker);
work->dm = dm;
work->acrtc = acrtc;
work->enable = enable;
@@ -209,15 +410,15 @@ skip:
int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc)
{
- return dm_set_vblank(crtc, true);
+ return amdgpu_dm_crtc_set_vblank(crtc, true);
}
void amdgpu_dm_crtc_disable_vblank(struct drm_crtc *crtc)
{
- dm_set_vblank(crtc, false);
+ amdgpu_dm_crtc_set_vblank(crtc, false);
}
-static void dm_crtc_destroy_state(struct drm_crtc *crtc,
+static void amdgpu_dm_crtc_destroy_state(struct drm_crtc *crtc,
struct drm_crtc_state *state)
{
struct dm_crtc_state *cur = to_dm_crtc_state(state);
@@ -233,7 +434,7 @@ static void dm_crtc_destroy_state(struct drm_crtc *crtc,
kfree(state);
}
-static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc)
+static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *crtc)
{
struct dm_crtc_state *state, *cur;
@@ -260,8 +461,10 @@ static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc)
state->freesync_config = cur->freesync_config;
state->cm_has_degamma = cur->cm_has_degamma;
state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
+ state->regamma_tf = cur->regamma_tf;
state->crc_skip_count = cur->crc_skip_count;
state->mpo_requested = cur->mpo_requested;
+ state->cursor_mode = cur->cursor_mode;
/* TODO Duplicate dc_stream after objects are stream object is flattened */
return &state->base;
@@ -273,12 +476,12 @@ static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc)
kfree(crtc);
}
-static void dm_crtc_reset_state(struct drm_crtc *crtc)
+static void amdgpu_dm_crtc_reset_state(struct drm_crtc *crtc)
{
struct dm_crtc_state *state;
if (crtc->state)
- dm_crtc_destroy_state(crtc, crtc->state);
+ amdgpu_dm_crtc_destroy_state(crtc, crtc->state);
state = kzalloc(sizeof(*state), GFP_KERNEL);
if (WARN_ON(!state))
@@ -296,14 +499,78 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
}
#endif
+#ifdef AMD_PRIVATE_COLOR
+/**
+ * dm_crtc_additional_color_mgmt - enable additional color properties
+ * @crtc: DRM CRTC
+ *
+ * This function lets the driver enable post-blending CRTC regamma transfer
+ * function property in addition to DRM CRTC gamma LUT. Default value means
+ * linear transfer function, which is the default CRTC gamma LUT behaviour
+ * without this property.
+ */
+static void
+dm_crtc_additional_color_mgmt(struct drm_crtc *crtc)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+ if (adev->dm.dc->caps.color.mpc.ogam_ram)
+ drm_object_attach_property(&crtc->base,
+ adev->mode_info.regamma_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+}
+
+static int
+amdgpu_dm_atomic_crtc_set_property(struct drm_crtc *crtc,
+ struct drm_crtc_state *state,
+ struct drm_property *property,
+ uint64_t val)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+ if (property == adev->mode_info.regamma_tf_property) {
+ if (acrtc_state->regamma_tf != val) {
+ acrtc_state->regamma_tf = val;
+ acrtc_state->base.color_mgmt_changed |= 1;
+ }
+ } else {
+ drm_dbg_atomic(crtc->dev,
+ "[CRTC:%d:%s] unknown property [PROP:%d:%s]]\n",
+ crtc->base.id, crtc->name,
+ property->base.id, property->name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc,
+ const struct drm_crtc_state *state,
+ struct drm_property *property,
+ uint64_t *val)
+{
+ struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+ struct dm_crtc_state *acrtc_state = to_dm_crtc_state(state);
+
+ if (property == adev->mode_info.regamma_tf_property)
+ *val = acrtc_state->regamma_tf;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+#endif
+
/* Implemented only the options currently available for the driver */
static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
- .reset = dm_crtc_reset_state,
+ .reset = amdgpu_dm_crtc_reset_state,
.destroy = amdgpu_dm_crtc_destroy,
.set_config = drm_atomic_helper_set_config,
.page_flip = drm_atomic_helper_page_flip,
- .atomic_duplicate_state = dm_crtc_duplicate_state,
- .atomic_destroy_state = dm_crtc_destroy_state,
+ .atomic_duplicate_state = amdgpu_dm_crtc_duplicate_state,
+ .atomic_destroy_state = amdgpu_dm_crtc_destroy_state,
.set_crc_source = amdgpu_dm_crtc_set_crc_source,
.verify_crc_source = amdgpu_dm_crtc_verify_crc_source,
.get_crc_sources = amdgpu_dm_crtc_get_crc_sources,
@@ -314,13 +581,17 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
#if defined(CONFIG_DEBUG_FS)
.late_register = amdgpu_dm_crtc_late_register,
#endif
+#ifdef AMD_PRIVATE_COLOR
+ .atomic_set_property = amdgpu_dm_atomic_crtc_set_property,
+ .atomic_get_property = amdgpu_dm_atomic_crtc_get_property,
+#endif
};
-static void dm_crtc_helper_disable(struct drm_crtc *crtc)
+static void amdgpu_dm_crtc_helper_disable(struct drm_crtc *crtc)
{
}
-static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
+static int amdgpu_dm_crtc_count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
{
struct drm_atomic_state *state = new_crtc_state->state;
struct drm_plane *plane;
@@ -352,8 +623,8 @@ static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
return num_active;
}
-static void dm_update_crtc_active_planes(struct drm_crtc *crtc,
- struct drm_crtc_state *new_crtc_state)
+static void amdgpu_dm_crtc_update_crtc_active_planes(struct drm_crtc *crtc,
+ struct drm_crtc_state *new_crtc_state)
{
struct dm_crtc_state *dm_new_crtc_state =
to_dm_crtc_state(new_crtc_state);
@@ -364,18 +635,18 @@ static void dm_update_crtc_active_planes(struct drm_crtc *crtc,
return;
dm_new_crtc_state->active_planes =
- count_crtc_active_planes(new_crtc_state);
+ amdgpu_dm_crtc_count_crtc_active_planes(new_crtc_state);
}
-static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+static bool amdgpu_dm_crtc_helper_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
return true;
}
-static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
- struct drm_atomic_state *state)
+static int amdgpu_dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
{
struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state,
crtc);
@@ -386,7 +657,7 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
trace_amdgpu_dm_crtc_atomic_check(crtc_state);
- dm_update_crtc_active_planes(crtc, crtc_state);
+ amdgpu_dm_crtc_update_crtc_active_planes(crtc, crtc_state);
if (WARN_ON(unlikely(!dm_crtc_state->stream &&
amdgpu_dm_crtc_modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) {
@@ -417,6 +688,15 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
return -EINVAL;
}
+ if (!state->legacy_cursor_update && amdgpu_dm_crtc_vrr_active(dm_crtc_state)) {
+ struct drm_plane_state *primary_state;
+
+ /* Pull in primary plane for correct VRR handling */
+ primary_state = drm_atomic_get_plane_state(state, crtc->primary);
+ if (IS_ERR(primary_state))
+ return PTR_ERR(primary_state);
+ }
+
/* In some use cases, like reset, no stream is attached */
if (!dm_crtc_state->stream)
return 0;
@@ -429,9 +709,9 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
}
static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = {
- .disable = dm_crtc_helper_disable,
- .atomic_check = dm_crtc_helper_atomic_check,
- .mode_fixup = dm_crtc_helper_mode_fixup,
+ .disable = amdgpu_dm_crtc_helper_disable,
+ .atomic_check = amdgpu_dm_crtc_helper_atomic_check,
+ .mode_fixup = amdgpu_dm_crtc_helper_mode_fixup,
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
@@ -484,11 +764,23 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
* support programmable degamma anywhere.
*/
is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch;
- drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0,
+ /* Dont't enable DRM CRTC degamma property for DCN401 since the
+ * pre-blending degamma LUT doesn't apply to cursor, and therefore
+ * can't work similar to a post-blending degamma LUT as in other hw
+ * versions.
+ * TODO: revisit it once KMS plane color API is merged.
+ */
+ drm_crtc_enable_color_mgmt(&acrtc->base,
+ (is_dcn &&
+ dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01) ?
+ MAX_COLOR_LUT_ENTRIES : 0,
true, MAX_COLOR_LUT_ENTRIES);
drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
+#ifdef AMD_PRIVATE_COLOR
+ dm_crtc_additional_color_mgmt(&acrtc->base);
+#endif
return 0;
fail:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
index 17e948753f59..c1212947a77b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
@@ -37,7 +37,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable);
bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc);
-bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state);
+bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state);
int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 7c21e21bcc51..f263e1a4537e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -25,11 +26,13 @@
#include <linux/string_helpers.h>
#include <linux/uaccess.h>
+#include <media/cec-notifier.h>
#include "dc.h"
#include "amdgpu.h"
#include "amdgpu_dm.h"
#include "amdgpu_dm_debugfs.h"
+#include "amdgpu_dm_replay.h"
#include "dm_helpers.h"
#include "dmub/dmub_srv.h"
#include "resource.h"
@@ -37,6 +40,7 @@
#include "link_hwss.h"
#include "dc/dc_dmub_srv.h"
#include "link/protocols/link_dp_capability.h"
+#include "inc/hw/dchubbub.h"
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
#include "amdgpu_dm_psr.h"
@@ -256,7 +260,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
struct dc_link *link = connector->dc_link;
struct amdgpu_device *adev = drm_to_adev(connector->base.dev);
struct dc *dc = (struct dc *)link->dc;
- struct dc_link_settings prefer_link_settings;
+ struct dc_link_settings prefer_link_settings = {0};
char *wr_buf = NULL;
const uint32_t wr_buf_size = 40;
/* 0: lane_count; 1: link_rate */
@@ -387,7 +391,7 @@ static ssize_t dp_mst_link_setting(struct file *f, const char __user *buf,
struct dc_link *link = aconnector->dc_link;
struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev);
struct dc *dc = (struct dc *)link->dc;
- struct dc_link_settings prefer_link_settings;
+ struct dc_link_settings prefer_link_settings = {0};
char *wr_buf = NULL;
const uint32_t wr_buf_size = 40;
/* 0: lane_count; 1: link_rate */
@@ -611,7 +615,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
uint32_t wr_buf_size = 40;
long param[3];
bool use_prefer_link_setting;
- struct link_training_settings link_lane_settings;
+ struct link_training_settings link_lane_settings = {0};
int max_param_num = 3;
uint8_t param_nums = 0;
int r = 0;
@@ -766,7 +770,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED};
struct dc_link_settings cur_link_settings = {LANE_COUNT_UNKNOWN,
LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED};
- struct link_training_settings link_training_settings;
+ struct link_training_settings link_training_settings = {0};
int i;
if (size == 0)
@@ -900,9 +904,10 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data)
{
struct amdgpu_device *adev = m->private;
struct dmub_srv_fb_info *fb_info = adev->dm.dmub_fb_info;
+ struct dmub_fw_meta_info *fw_meta_info = NULL;
struct dmub_debugfs_trace_entry *entries;
uint8_t *tbuf_base;
- uint32_t tbuf_size, max_entries, num_entries, i;
+ uint32_t tbuf_size, max_entries, num_entries, first_entry, i;
if (!fb_info)
return 0;
@@ -911,20 +916,42 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data)
if (!tbuf_base)
return 0;
- tbuf_size = fb_info->fb[DMUB_WINDOW_5_TRACEBUFF].size;
+ if (adev->dm.dmub_srv)
+ fw_meta_info = &adev->dm.dmub_srv->meta_info;
+
+ tbuf_size = fw_meta_info ? fw_meta_info->trace_buffer_size :
+ DMUB_TRACE_BUFFER_SIZE;
max_entries = (tbuf_size - sizeof(struct dmub_debugfs_trace_header)) /
sizeof(struct dmub_debugfs_trace_entry);
num_entries =
((struct dmub_debugfs_trace_header *)tbuf_base)->entry_count;
+ /* DMCUB tracebuffer is a ring. If it rolled over, print a hint that
+ * entries are being overwritten.
+ */
+ if (num_entries > max_entries)
+ seq_printf(m, "...\n");
+
+ first_entry = num_entries % max_entries;
num_entries = min(num_entries, max_entries);
entries = (struct dmub_debugfs_trace_entry
*)(tbuf_base +
sizeof(struct dmub_debugfs_trace_header));
- for (i = 0; i < num_entries; ++i) {
+ /* To print entries chronologically, start from the first entry till the
+ * top of buffer, then from base of buffer to first entry.
+ */
+ for (i = first_entry; i < num_entries; ++i) {
+ struct dmub_debugfs_trace_entry *entry = &entries[i];
+
+ seq_printf(m,
+ "trace_code=%u tick_count=%u param0=%u param1=%u\n",
+ entry->trace_code, entry->tick_count, entry->param0,
+ entry->param1);
+ }
+ for (i = 0; i < first_entry; ++i) {
struct dmub_debugfs_trace_entry *entry = &entries[i];
seq_printf(m,
@@ -959,6 +986,58 @@ static int dmub_fw_state_show(struct seq_file *m, void *data)
return seq_write(m, state_base, state_size);
}
+/* replay_capability_show() - show eDP panel replay capability
+ *
+ * The read function: replay_capability_show
+ * Shows if sink and driver has Replay capability or not.
+ *
+ * cat /sys/kernel/debug/dri/0/eDP-X/replay_capability
+ *
+ * Expected output:
+ * "Sink support: no\n" - if panel doesn't support Replay
+ * "Sink support: yes\n" - if panel supports Replay
+ * "Driver support: no\n" - if driver doesn't support Replay
+ * "Driver support: yes\n" - if driver supports Replay
+ */
+static int replay_capability_show(struct seq_file *m, void *data)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *link = aconnector->dc_link;
+ bool sink_support_replay = false;
+ bool driver_support_replay = false;
+
+ if (!link)
+ return -ENODEV;
+
+ if (link->type == dc_connection_none)
+ return -ENODEV;
+
+ if (!(link->connector_signal & SIGNAL_TYPE_EDP))
+ return -ENODEV;
+
+ /* If Replay is already set to support, skip the checks */
+ if (link->replay_settings.config.replay_supported) {
+ sink_support_replay = true;
+ driver_support_replay = true;
+ } else if ((amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
+ sink_support_replay = amdgpu_dm_link_supports_replay(link, aconnector);
+ } else {
+ struct dc *dc = link->ctx->dc;
+
+ sink_support_replay = amdgpu_dm_link_supports_replay(link, aconnector);
+ if (dc->ctx->dmub_srv && dc->ctx->dmub_srv->dmub)
+ driver_support_replay =
+ (bool)dc->ctx->dmub_srv->dmub->feature_caps.replay_supported;
+ }
+
+ seq_printf(m, "Sink support: %s\n", str_yes_no(sink_support_replay));
+ seq_printf(m, "Driver support: %s\n", str_yes_no(driver_support_replay));
+ seq_printf(m, "Config support: %s\n", str_yes_no(link->replay_settings.config.replay_supported));
+
+ return 0;
+}
+
/* psr_capability_show() - show eDP panel PSR capability
*
* The read function: sink_psr_capability_show
@@ -1091,7 +1170,7 @@ static int amdgpu_current_colorspace_show(struct seq_file *m, void *data)
case COLOR_SPACE_2020_RGB_FULLRANGE:
seq_puts(m, "BT2020_RGB");
break;
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
seq_puts(m, "BT2020_YCC");
break;
default:
@@ -1201,6 +1280,35 @@ static int internal_display_show(struct seq_file *m, void *data)
return 0;
}
+/*
+ * Returns the number of segments used if ODM Combine mode is enabled.
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-1/odm_combine_segments
+ */
+static int odm_combine_segments_show(struct seq_file *m, void *unused)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+ struct dc_link *link = aconnector->dc_link;
+ struct pipe_ctx *pipe_ctx = NULL;
+ int i, segments = -EOPNOTSUPP;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx->stream &&
+ pipe_ctx->stream->link == link)
+ break;
+ }
+
+ if (connector->status != connector_status_connected)
+ return -ENODEV;
+
+ if (pipe_ctx != NULL && pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments)
+ pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments(pipe_ctx->stream_res.tg, &segments);
+
+ seq_printf(m, "%d\n", segments);
+ return 0;
+}
+
/* function description
*
* generic SDP message access for testing
@@ -1219,7 +1327,7 @@ static ssize_t dp_sdp_message_debugfs_write(struct file *f, const char __user *b
size_t size, loff_t *pos)
{
int r;
- uint8_t data[36];
+ uint8_t data[36] = {0};
struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
struct dm_crtc_state *acrtc_state;
uint32_t write_size = 36;
@@ -1337,7 +1445,7 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
uint8_t param_nums = 0;
bool ret = false;
- if (!aconnector || !aconnector->dc_link)
+ if (!aconnector->dc_link)
return -EINVAL;
if (size == 0)
@@ -1446,26 +1554,25 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
const uint32_t rd_buf_size = 10;
struct pipe_ctx *pipe_ctx;
ssize_t result = 0;
- int i, r, str_len = 30;
+ int i, r, str_len = 10;
rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL);
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -1473,10 +1580,9 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_clock_en);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -1566,7 +1672,9 @@ static ssize_t dp_dsc_clock_en_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -1632,7 +1740,6 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -1646,12 +1753,12 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -1659,10 +1766,9 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_slice_width);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -1750,7 +1856,9 @@ static ssize_t dp_dsc_slice_width_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -1816,7 +1924,6 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -1830,12 +1937,12 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -1843,10 +1950,9 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_slice_height);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -1934,7 +2040,9 @@ static ssize_t dp_dsc_slice_height_write(struct file *f, const char __user *buf,
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -1996,7 +2104,6 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2010,12 +2117,12 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -2023,10 +2130,9 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_bits_per_pixel);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -2111,7 +2217,9 @@ static ssize_t dp_dsc_bits_per_pixel_write(struct file *f, const char __user *bu
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -2171,7 +2279,6 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2185,12 +2292,12 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -2198,10 +2305,9 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_pic_width);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -2227,7 +2333,6 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2241,12 +2346,12 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -2254,10 +2359,9 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_pic_height);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -2298,7 +2402,6 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2312,12 +2415,12 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -2325,10 +2428,9 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_chunk_size);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -2369,7 +2471,6 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
char *rd_buf = NULL;
- char *rd_buf_ptr = NULL;
struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
struct display_stream_compressor *dsc;
struct dcn_dsc_state dsc_state = {0};
@@ -2383,12 +2484,12 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
if (!rd_buf)
return -ENOMEM;
- rd_buf_ptr = rd_buf;
-
for (i = 0; i < MAX_PIPES; i++) {
pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i];
if (pipe_ctx->stream &&
- pipe_ctx->stream->link == aconnector->dc_link)
+ pipe_ctx->stream->link == aconnector->dc_link &&
+ pipe_ctx->stream->sink &&
+ pipe_ctx->stream->sink == aconnector->dc_sink)
break;
}
@@ -2396,10 +2497,9 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf,
if (dsc)
dsc->funcs->dsc_read_state(dsc, &dsc_state);
- snprintf(rd_buf_ptr, str_len,
+ snprintf(rd_buf, str_len,
"%d\n",
dsc_state.dsc_slice_bpg_offset);
- rd_buf_ptr += str_len;
while (size) {
if (*pos >= rd_buf_size)
@@ -2565,6 +2665,49 @@ unlock:
}
/*
+ * IPS status. Read only.
+ *
+ * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_ips_status
+ */
+static int ips_status_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = m->private;
+ struct dc *dc = adev->dm.dc;
+ struct dc_dmub_srv *dc_dmub_srv;
+
+ seq_printf(m, "IPS config: %d\n", dc->config.disable_ips);
+ seq_printf(m, "Idle optimization: %d\n", dc->idle_optimizations_allowed);
+
+ if (adev->dm.idle_workqueue) {
+ seq_printf(m, "Idle workqueue - enabled: %d\n", adev->dm.idle_workqueue->enable);
+ seq_printf(m, "Idle workqueue - running: %d\n", adev->dm.idle_workqueue->running);
+ }
+
+ dc_dmub_srv = dc->ctx->dmub_srv;
+ if (dc_dmub_srv && dc_dmub_srv->dmub) {
+ uint32_t rcg_count, ips1_count, ips2_count;
+ volatile const struct dmub_shared_state_ips_fw *ips_fw =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+ rcg_count = ips_fw->rcg_entry_count;
+ ips1_count = ips_fw->ips1_entry_count;
+ ips2_count = ips_fw->ips2_entry_count;
+ seq_printf(m, "entry counts: rcg=%u ips1=%u ips2=%u\n",
+ rcg_count,
+ ips1_count,
+ ips2_count);
+ rcg_count = ips_fw->rcg_exit_count;
+ ips1_count = ips_fw->ips1_exit_count;
+ ips2_count = ips_fw->ips2_exit_count;
+ seq_printf(m, "exit counts: rcg=%u ips1=%u ips2=%u",
+ rcg_count,
+ ips1_count,
+ ips2_count);
+ seq_puts(m, "\n");
+ }
+ return 0;
+}
+
+/*
* Backlight at this moment. Read only.
* As written to display, taking ABM and backlight lut into account.
* Ranges from 0x0 to 0x10000 (= 100% PWM)
@@ -2707,16 +2850,80 @@ static int is_dpia_link_show(struct seq_file *m, void *data)
return 0;
}
+/**
+ * hdmi_cec_state_show - Read out the HDMI-CEC feature status
+ * @m: sequence file.
+ * @data: unused.
+ *
+ * Return 0 on success
+ */
+static int hdmi_cec_state_show(struct seq_file *m, void *data)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+ seq_printf(m, "%s:%d\n", connector->name, connector->base.id);
+ seq_printf(m, "HDMI-CEC status: %d\n", aconnector->notifier ? 1 : 0);
+
+ return 0;
+}
+
+/**
+ * hdmi_cec_state_write - Enable/Disable HDMI-CEC feature from driver side
+ * @f: file structure.
+ * @buf: userspace buffer. set to '1' to enable; '0' to disable cec feature.
+ * @size: size of buffer from userpsace.
+ * @pos: unused.
+ *
+ * Return size on success, error code on failure
+ */
+static ssize_t hdmi_cec_state_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ int ret;
+ bool enable;
+ struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
+ struct drm_device *ddev = aconnector->base.dev;
+
+ if (size == 0)
+ return -EINVAL;
+
+ ret = kstrtobool_from_user(buf, size, &enable);
+ if (ret) {
+ drm_dbg_driver(ddev, "invalid user data !\n");
+ return ret;
+ }
+
+ if (enable) {
+ if (aconnector->notifier)
+ return -EINVAL;
+ ret = amdgpu_dm_initialize_hdmi_connector(aconnector);
+ if (ret)
+ return ret;
+ hdmi_cec_set_edid(aconnector);
+ } else {
+ if (!aconnector->notifier)
+ return -EINVAL;
+ cec_notifier_conn_unregister(aconnector->notifier);
+ aconnector->notifier = NULL;
+ }
+
+ return size;
+}
+
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status);
DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
DEFINE_SHOW_ATTRIBUTE(internal_display);
+DEFINE_SHOW_ATTRIBUTE(odm_combine_segments);
+DEFINE_SHOW_ATTRIBUTE(replay_capability);
DEFINE_SHOW_ATTRIBUTE(psr_capability);
DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
DEFINE_SHOW_ATTRIBUTE(is_dpia_link);
+DEFINE_SHOW_STORE_ATTRIBUTE(hdmi_cec_state);
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@@ -2852,7 +3059,8 @@ static const struct {
char *name;
const struct file_operations *fops;
} hdmi_debugfs_entries[] = {
- {"hdcp_sink_capability", &hdcp_sink_capability_fops}
+ {"hdcp_sink_capability", &hdcp_sink_capability_fops},
+ {"hdmi_cec_state", &hdmi_cec_state_fops}
};
/*
@@ -2883,6 +3091,51 @@ DEFINE_DEBUGFS_ATTRIBUTE(force_yuv420_output_fops, force_yuv420_output_get,
force_yuv420_output_set, "%llu\n");
/*
+ * Read Replay state
+ */
+static int replay_get_state(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *connector = data;
+ struct dc_link *link = connector->dc_link;
+ uint64_t state = REPLAY_STATE_INVALID;
+
+ dc_link_get_replay_state(link, &state);
+
+ *val = state;
+
+ return 0;
+}
+
+/*
+ * Start / Stop capture Replay residency
+ */
+static int replay_set_residency(void *data, u64 val)
+{
+ struct amdgpu_dm_connector *connector = data;
+ struct dc_link *link = connector->dc_link;
+ bool is_start = (val != 0);
+ u32 residency = 0;
+
+ link->dc->link_srv->edp_replay_residency(link, &residency, is_start, PR_RESIDENCY_MODE_PHY);
+ return 0;
+}
+
+/*
+ * Read Replay residency
+ */
+static int replay_get_residency(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *connector = data;
+ struct dc_link *link = connector->dc_link;
+ u32 residency = 0;
+
+ link->dc->link_srv->edp_replay_residency(link, &residency, false, PR_RESIDENCY_MODE_PHY);
+ *val = (u64)residency;
+
+ return 0;
+}
+
+/*
* Read PSR state
*/
static int psr_get(void *data, u64 *val)
@@ -2905,9 +3158,9 @@ static int psr_read_residency(void *data, u64 *val)
{
struct amdgpu_dm_connector *connector = data;
struct dc_link *link = connector->dc_link;
- u32 residency;
+ u32 residency = 0;
- link->dc->link_srv->edp_get_psr_residency(link, &residency);
+ link->dc->link_srv->edp_get_psr_residency(link, &residency, PSR_RESIDENCY_MODE_PHY);
*val = (u64)residency;
@@ -2940,6 +3193,132 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val)
return 0;
}
+/* check if kernel disallow eDP enter psr state
+ * cat /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr
+ * 0: allow edp enter psr; 1: disallow
+ */
+static int disallow_edp_enter_psr_get(void *data, u64 *val)
+{
+ struct amdgpu_dm_connector *aconnector = data;
+
+ *val = (u64) aconnector->disallow_edp_enter_psr;
+ return 0;
+}
+
+/* set kernel disallow eDP enter psr state
+ * echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr
+ * 0: allow edp enter psr; 1: disallow
+ *
+ * usage: test app read crc from PSR eDP rx.
+ *
+ * during kernel boot up, kernel write dpcd 0x170 = 5.
+ * this notify eDP rx psr enable and let rx check crc.
+ * rx fw will start checking crc for rx internal logic.
+ * crc read count within dpcd 0x246 is not updated and
+ * value is 0. when eDP tx driver wants to read rx crc
+ * from dpcd 0x246, 0x270, read count 0 lead tx driver
+ * timeout.
+ *
+ * to avoid this, we add this debugfs to let test app to disbable
+ * rx crc checking for rx internal logic. then test app can read
+ * non-zero crc read count.
+ *
+ * expected app sequence is as below:
+ * 1. disable eDP PHY and notify eDP rx with dpcd 0x600 = 2.
+ * 2. echo 0x1 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr
+ * 3. enable eDP PHY and notify eDP rx with dpcd 0x600 = 1 but
+ * without dpcd 0x170 = 5.
+ * 4. read crc from rx dpcd 0x270, 0x246, etc.
+ * 5. echo 0x0 /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr.
+ * this will let eDP back to normal with psr setup dpcd 0x170 = 5.
+ */
+static int disallow_edp_enter_psr_set(void *data, u64 val)
+{
+ struct amdgpu_dm_connector *aconnector = data;
+
+ aconnector->disallow_edp_enter_psr = val ? true : false;
+ return 0;
+}
+
+static int dmub_trace_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = data;
+ struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+ enum dmub_gpint_command cmd;
+ u64 mask = 0xffff;
+ u8 shift = 0;
+ u32 res;
+ int i;
+
+ if (!srv->fw_version)
+ return -EINVAL;
+
+ for (i = 0; i < 4; i++) {
+ res = (val & mask) >> shift;
+
+ switch (i) {
+ case 0:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0;
+ break;
+ case 1:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1;
+ break;
+ case 2:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2;
+ break;
+ case 3:
+ cmd = DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3;
+ break;
+ }
+
+ if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, res, NULL, DM_DMUB_WAIT_TYPE_WAIT))
+ return -EIO;
+
+ usleep_range(100, 1000);
+
+ mask <<= 16;
+ shift += 16;
+ }
+
+ return 0;
+}
+
+static int dmub_trace_mask_show(void *data, u64 *val)
+{
+ enum dmub_gpint_command cmd = DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0;
+ struct amdgpu_device *adev = data;
+ struct dmub_srv *srv = adev->dm.dc->ctx->dmub_srv->dmub;
+ u8 shift = 0;
+ u64 raw = 0;
+ u64 res = 0;
+ int i = 0;
+
+ if (!srv->fw_version)
+ return -EINVAL;
+
+ while (i < 4) {
+ uint32_t response;
+
+ if (!dc_wake_and_execute_gpint(adev->dm.dc->ctx, cmd, 0, &response, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return -EIO;
+
+ raw = response;
+ usleep_range(100, 1000);
+
+ cmd++;
+ res |= (raw << shift);
+ shift += 16;
+ i++;
+ }
+
+ *val = res;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show,
+ dmub_trace_mask_set, "0x%llx\n");
+
/*
* Set dmcub trace event IRQ enable or disable.
* Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
@@ -2974,6 +3353,9 @@ static int dmcub_trace_event_state_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_get,
dmcub_trace_event_state_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(replay_state_fops, replay_get_state, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(replay_residency_fops, replay_get_residency, replay_set_residency,
+ "%llu\n");
DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n");
DEFINE_DEBUGFS_ATTRIBUTE(psr_residency_fops, psr_read_residency, NULL,
"%llu\n");
@@ -2982,8 +3364,13 @@ DEFINE_DEBUGFS_ATTRIBUTE(allow_edp_hotplug_detection_fops,
allow_edp_hotplug_detection_get,
allow_edp_hotplug_detection_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(disallow_edp_enter_psr_fops,
+ disallow_edp_enter_psr_get,
+ disallow_edp_enter_psr_set, "%llu\n");
+
DEFINE_SHOW_ATTRIBUTE(current_backlight);
DEFINE_SHOW_ATTRIBUTE(target_backlight);
+DEFINE_SHOW_ATTRIBUTE(ips_status);
static const struct {
char *name;
@@ -2991,7 +3378,8 @@ static const struct {
} connector_debugfs_entries[] = {
{"force_yuv420_output", &force_yuv420_output_fops},
{"trigger_hotplug", &trigger_hotplug_debugfs_fops},
- {"internal_display", &internal_display_fops}
+ {"internal_display", &internal_display_fops},
+ {"odm_combine_segments", &odm_combine_segments_fops}
};
/*
@@ -3142,6 +3530,11 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
}
}
if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) {
+ debugfs_create_file("replay_capability", 0444, dir, connector,
+ &replay_capability_fops);
+ debugfs_create_file("replay_state", 0444, dir, connector, &replay_state_fops);
+ debugfs_create_file_unsafe("replay_residency", 0444, dir,
+ connector, &replay_residency_fops);
debugfs_create_file_unsafe("psr_capability", 0444, dir, connector, &psr_capability_fops);
debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops);
debugfs_create_file_unsafe("psr_residency", 0444, dir,
@@ -3154,6 +3547,8 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
&edp_ilr_debugfs_fops);
debugfs_create_file("allow_edp_hotplug_detection", 0644, dir, connector,
&allow_edp_hotplug_detection_fops);
+ debugfs_create_file("disallow_edp_enter_psr", 0644, dir, connector,
+ &disallow_edp_enter_psr_fops);
}
for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) {
@@ -3182,8 +3577,8 @@ static int crc_win_x_start_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.x_start = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].x_start = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3199,7 +3594,7 @@ static int crc_win_x_start_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.x_start;
+ *val = acrtc->dm_irq_params.window_param[0].x_start;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3219,8 +3614,8 @@ static int crc_win_y_start_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.y_start = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].y_start = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3236,7 +3631,7 @@ static int crc_win_y_start_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.y_start;
+ *val = acrtc->dm_irq_params.window_param[0].y_start;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3255,8 +3650,8 @@ static int crc_win_x_end_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.x_end = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].x_end = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3272,7 +3667,7 @@ static int crc_win_x_end_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.x_end;
+ *val = acrtc->dm_irq_params.window_param[0].x_end;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3291,8 +3686,8 @@ static int crc_win_y_end_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.y_end = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].y_end = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3308,7 +3703,7 @@ static int crc_win_y_end_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.y_end;
+ *val = acrtc->dm_irq_params.window_param[0].y_end;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3331,13 +3726,14 @@ static int crc_win_update_set(void *data, u64 val)
/* PSR may write to OTG CRC window control register,
* so close it before starting secure_display.
*/
- amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream);
+ amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true);
spin_lock_irq(&adev_to_drm(adev)->event_lock);
- acrtc->dm_irq_params.window_param.activated = true;
- acrtc->dm_irq_params.window_param.update_win = true;
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
+ acrtc->dm_irq_params.window_param[0].enable = true;
+ acrtc->dm_irq_params.window_param[0].update_win = true;
+ acrtc->dm_irq_params.window_param[0].skip_frame_cnt = 0;
+ acrtc->dm_irq_params.crc_window_activated = true;
spin_unlock_irq(&adev_to_drm(adev)->event_lock);
mutex_unlock(&adev->dm.dc_lock);
@@ -3488,6 +3884,7 @@ static int trigger_hpd_mst_set(void *data, u64 val)
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct dc_link *link = NULL;
+ int ret;
if (val == 1) {
drm_connector_list_iter_begin(dev, &iter);
@@ -3496,10 +3893,15 @@ static int trigger_hpd_mst_set(void *data, u64 val)
if (aconnector->dc_link->type == dc_connection_mst_branch &&
aconnector->mst_mgr.aux) {
mutex_lock(&adev->dm.dc_lock);
- dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+ ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
mutex_unlock(&adev->dm.dc_lock);
- drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
+ if (!ret)
+ DRM_ERROR("DM_MST: Failed to detect dc link!");
+
+ ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
+ if (ret < 0)
+ DRM_ERROR("DM_MST: Failed to set the device into MST mode!");
}
}
} else if (val == 0) {
@@ -3606,6 +4008,36 @@ DEFINE_DEBUGFS_ATTRIBUTE(disable_hpd_ops, disable_hpd_get,
disable_hpd_set, "%llu\n");
/*
+ * Prints hardware capabilities. These are used for IGT testing.
+ */
+static int capabilities_show(struct seq_file *m, void *unused)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+ struct dc *dc = adev->dm.dc;
+ bool mall_supported = dc->caps.mall_size_total;
+ bool subvp_supported = dc->caps.subvp_fw_processing_delay_us;
+ unsigned int mall_in_use = false;
+ unsigned int subvp_in_use = false;
+
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ if (hubbub && hubbub->funcs->get_mall_en)
+ hubbub->funcs->get_mall_en(hubbub, &mall_in_use);
+
+ if (dc->cap_funcs.get_subvp_en)
+ subvp_in_use = dc->cap_funcs.get_subvp_en(dc, dc->current_state);
+
+ seq_printf(m, "mall supported: %s, enabled: %s\n",
+ mall_supported ? "yes" : "no", mall_in_use ? "yes" : "no");
+ seq_printf(m, "sub-viewport supported: %s, enabled: %s\n",
+ subvp_supported ? "yes" : "no", subvp_in_use ? "yes" : "no");
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(capabilities);
+
+/*
* Temporary w/a to force sst sequence in M42D DP2 mst receiver
* Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dp_set_mst_en_for_sst
*/
@@ -3798,6 +4230,8 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
debugfs_create_file("amdgpu_mst_topology", 0444, root,
adev, &mst_topo_fops);
+ debugfs_create_file("amdgpu_dm_capabilities", 0444, root,
+ adev, &capabilities_fops);
debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev,
&dtn_log_fops);
debugfs_create_file("amdgpu_dm_dp_set_mst_en_for_sst", 0644, root, adev,
@@ -3820,6 +4254,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
debugfs_create_file_unsafe("amdgpu_dm_force_timing_sync", 0644, root,
adev, &force_timing_sync_ops);
+ debugfs_create_file_unsafe("amdgpu_dm_dmub_trace_mask", 0644, root,
+ adev, &dmub_trace_mask_fops);
+
debugfs_create_file_unsafe("amdgpu_dm_dmcub_trace_event_en", 0644, root,
adev, &dmcub_trace_event_state_fops);
@@ -3832,4 +4269,7 @@ void dtn_debugfs_init(struct amdgpu_device *adev)
debugfs_create_file_unsafe("amdgpu_dm_disable_hpd", 0644, root, adev,
&disable_hpd_ops);
+ if (adev->dm.dc->caps.ips_support)
+ debugfs_create_file_unsafe("amdgpu_dm_ips_status", 0644, root, adev,
+ &ips_status_fops);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
index 071200473c27..122cdc124b3b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 20cfc5be21a4..19038f336155 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
@@ -26,6 +27,7 @@
#include "amdgpu_dm_hdcp.h"
#include "amdgpu.h"
#include "amdgpu_dm.h"
+#include "dc_fused_io.h"
#include "dm_helpers.h"
#include <drm/display/drm_hdcp_helper.h>
#include "hdcp_psp.h"
@@ -76,6 +78,34 @@ lp_read_dpcd(void *handle, uint32_t address, uint8_t *data, uint32_t size)
return dm_helpers_dp_read_dpcd(link->ctx, link, address, data, size);
}
+static bool lp_atomic_write_poll_read_i2c(
+ void *handle,
+ const struct mod_hdcp_atomic_op_i2c *write,
+ const struct mod_hdcp_atomic_op_i2c *poll,
+ struct mod_hdcp_atomic_op_i2c *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ struct dc_link *link = handle;
+
+ return dm_atomic_write_poll_read_i2c(link, write, poll, read, poll_timeout_us, poll_mask_msb);
+}
+
+static bool lp_atomic_write_poll_read_aux(
+ void *handle,
+ const struct mod_hdcp_atomic_op_aux *write,
+ const struct mod_hdcp_atomic_op_aux *poll,
+ struct mod_hdcp_atomic_op_aux *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ struct dc_link *link = handle;
+
+ return dm_atomic_write_poll_read_aux(link, write, poll, read, poll_timeout_us, poll_mask_msb);
+}
+
static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint32_t *srm_size)
{
struct ta_hdcp_shared_memory *hdcp_cmd;
@@ -172,7 +202,10 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
struct mod_hdcp_display_adjustment display_adjust;
unsigned int conn_index = aconnector->base.index;
- mutex_lock(&hdcp_w->mutex);
+ guard(mutex)(&hdcp_w->mutex);
+ drm_connector_get(&aconnector->base);
+ if (hdcp_w->aconnector[conn_index])
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
hdcp_w->aconnector[conn_index] = aconnector;
memset(&link_adjust, 0, sizeof(link_adjust));
@@ -190,6 +223,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
display_adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE;
link_adjust.auth_delay = 2;
+ link_adjust.retry_limit = MAX_NUM_OF_ATTEMPTS;
if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0) {
link_adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0;
@@ -209,7 +243,6 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work,
mod_hdcp_update_display(&hdcp_w->hdcp, conn_index, &link_adjust, &display_adjust, &hdcp_w->output);
process_output(hdcp_w);
- mutex_unlock(&hdcp_w->mutex);
}
static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work,
@@ -220,8 +253,7 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work,
struct drm_connector_state *conn_state = aconnector->base.state;
unsigned int conn_index = aconnector->base.index;
- mutex_lock(&hdcp_w->mutex);
- hdcp_w->aconnector[conn_index] = aconnector;
+ guard(mutex)(&hdcp_w->mutex);
/* the removal of display will invoke auth reset -> hdcp destroy and
* we'd expect the Content Protection (CP) property changed back to
@@ -237,9 +269,11 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work,
}
mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output);
-
+ if (hdcp_w->aconnector[conn_index]) {
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
+ hdcp_w->aconnector[conn_index] = NULL;
+ }
process_output(hdcp_w);
- mutex_unlock(&hdcp_w->mutex);
}
void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index)
@@ -247,7 +281,7 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde
struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
unsigned int conn_index;
- mutex_lock(&hdcp_w->mutex);
+ guard(mutex)(&hdcp_w->mutex);
mod_hdcp_reset_connection(&hdcp_w->hdcp, &hdcp_w->output);
@@ -256,11 +290,13 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde
for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) {
hdcp_w->encryption_status[conn_index] =
MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
+ if (hdcp_w->aconnector[conn_index]) {
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
+ hdcp_w->aconnector[conn_index] = NULL;
+ }
}
process_output(hdcp_w);
-
- mutex_unlock(&hdcp_w->mutex);
}
void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index)
@@ -277,7 +313,7 @@ static void event_callback(struct work_struct *work)
hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue,
callback_dwork);
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
cancel_delayed_work(&hdcp_work->callback_dwork);
@@ -285,8 +321,6 @@ static void event_callback(struct work_struct *work)
&hdcp_work->output);
process_output(hdcp_work);
-
- mutex_unlock(&hdcp_work->mutex);
}
static void event_property_update(struct work_struct *work)
@@ -323,7 +357,7 @@ static void event_property_update(struct work_struct *work)
continue;
drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
if (conn_state->commit) {
ret = wait_for_completion_interruptible_timeout(&conn_state->commit->hw_done,
@@ -355,7 +389,6 @@ static void event_property_update(struct work_struct *work)
drm_hdcp_update_content_protection(connector,
DRM_MODE_CONTENT_PROTECTION_DESIRED);
}
- mutex_unlock(&hdcp_work->mutex);
drm_modeset_unlock(&dev->mode_config.connection_mutex);
}
}
@@ -368,7 +401,7 @@ static void event_property_validate(struct work_struct *work)
struct amdgpu_dm_connector *aconnector;
unsigned int conn_index;
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX;
conn_index++) {
@@ -408,8 +441,6 @@ static void event_property_validate(struct work_struct *work)
schedule_work(&hdcp_work->property_update_work);
}
}
-
- mutex_unlock(&hdcp_work->mutex);
}
static void event_watchdog_timer(struct work_struct *work)
@@ -420,7 +451,7 @@ static void event_watchdog_timer(struct work_struct *work)
struct hdcp_workqueue,
watchdog_timer_dwork);
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
cancel_delayed_work(&hdcp_work->watchdog_timer_dwork);
@@ -429,8 +460,6 @@ static void event_watchdog_timer(struct work_struct *work)
&hdcp_work->output);
process_output(hdcp_work);
-
- mutex_unlock(&hdcp_work->mutex);
}
static void event_cpirq(struct work_struct *work)
@@ -439,13 +468,11 @@ static void event_cpirq(struct work_struct *work)
hdcp_work = container_of(work, struct hdcp_workqueue, cpirq_work);
- mutex_lock(&hdcp_work->mutex);
+ guard(mutex)(&hdcp_work->mutex);
mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CPIRQ, &hdcp_work->output);
process_output(hdcp_work);
-
- mutex_unlock(&hdcp_work->mutex);
}
void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work)
@@ -455,6 +482,7 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work)
for (i = 0; i < hdcp_work->max_link; i++) {
cancel_delayed_work_sync(&hdcp_work[i].callback_dwork);
cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork);
+ cancel_delayed_work_sync(&hdcp_work[i].property_validate_dwork);
}
sysfs_remove_bin_file(kobj, &hdcp_work[0].attr);
@@ -469,7 +497,6 @@ static bool enable_assr(void *handle, struct dc_link *link)
struct mod_hdcp hdcp = hdcp_work->hdcp;
struct psp_context *psp = hdcp.config.psp.handle;
struct ta_dtm_shared_memory *dtm_cmd;
- bool res = true;
if (!psp->dtm_context.context.initialized) {
DRM_INFO("Failed to enable ASSR, DTM TA is not initialized.");
@@ -478,7 +505,7 @@ static bool enable_assr(void *handle, struct dc_link *link)
dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf;
- mutex_lock(&psp->dtm_context.mutex);
+ guard(mutex)(&psp->dtm_context.mutex);
memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory));
dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE;
@@ -490,12 +517,10 @@ static bool enable_assr(void *handle, struct dc_link *link)
if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) {
DRM_INFO("Failed to enable ASSR");
- res = false;
+ return false;
}
- mutex_unlock(&psp->dtm_context.mutex);
-
- return res;
+ return true;
}
static void update_config(void *handle, struct cp_psp_stream_config *config)
@@ -503,6 +528,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
struct hdcp_workqueue *hdcp_work = handle;
struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx;
int link_index = aconnector->dc_link->link_index;
+ unsigned int conn_index = aconnector->base.index;
struct mod_hdcp_display *display = &hdcp_work[link_index].display;
struct mod_hdcp_link *link = &hdcp_work[link_index].link;
struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index];
@@ -547,6 +573,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
link->dp.usb4_enabled = config->usb4_enabled;
display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
link->adjust.auth_delay = 2;
+ link->adjust.retry_limit = MAX_NUM_OF_ATTEMPTS;
link->adjust.hdcp1.disable = 0;
hdcp_w->encryption_status[display->index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF;
@@ -556,13 +583,14 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
(!!aconnector->base.state) ?
aconnector->base.state->hdcp_content_type : -1);
- mutex_lock(&hdcp_w->mutex);
+ guard(mutex)(&hdcp_w->mutex);
mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output);
-
+ drm_connector_get(&aconnector->base);
+ if (hdcp_w->aconnector[conn_index])
+ drm_connector_put(&hdcp_w->aconnector[conn_index]->base);
+ hdcp_w->aconnector[conn_index] = aconnector;
process_output(hdcp_w);
- mutex_unlock(&hdcp_w->mutex);
-
}
/**
@@ -614,7 +642,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
* incorrect/corrupted and we should correct our SRM by getting it from PSP
*/
static ssize_t srm_data_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buffer,
+ const struct bin_attribute *bin_attr, char *buffer,
loff_t pos, size_t count)
{
struct hdcp_workqueue *work;
@@ -638,7 +666,7 @@ static ssize_t srm_data_write(struct file *filp, struct kobject *kobj,
}
static ssize_t srm_data_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buffer,
+ const struct bin_attribute *bin_attr, char *buffer,
loff_t pos, size_t count)
{
struct hdcp_workqueue *work;
@@ -736,17 +764,37 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev,
INIT_DELAYED_WORK(&hdcp_work[i].watchdog_timer_dwork, event_watchdog_timer);
INIT_DELAYED_WORK(&hdcp_work[i].property_validate_dwork, event_property_validate);
- hdcp_work[i].hdcp.config.psp.handle = &adev->psp;
- if (dc->ctx->dce_version == DCN_VERSION_3_1 ||
+ struct mod_hdcp_config *config = &hdcp_work[i].hdcp.config;
+ struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs;
+
+ config->psp.handle = &adev->psp;
+ if (dc->ctx->dce_version == DCN_VERSION_3_1 ||
dc->ctx->dce_version == DCN_VERSION_3_14 ||
dc->ctx->dce_version == DCN_VERSION_3_15 ||
- dc->ctx->dce_version == DCN_VERSION_3_16)
- hdcp_work[i].hdcp.config.psp.caps.dtm_v3_supported = 1;
- hdcp_work[i].hdcp.config.ddc.handle = dc_get_link_at_index(dc, i);
- hdcp_work[i].hdcp.config.ddc.funcs.write_i2c = lp_write_i2c;
- hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c;
- hdcp_work[i].hdcp.config.ddc.funcs.write_dpcd = lp_write_dpcd;
- hdcp_work[i].hdcp.config.ddc.funcs.read_dpcd = lp_read_dpcd;
+ dc->ctx->dce_version == DCN_VERSION_3_16 ||
+ dc->ctx->dce_version == DCN_VERSION_3_2 ||
+ dc->ctx->dce_version == DCN_VERSION_3_21 ||
+ dc->ctx->dce_version == DCN_VERSION_3_5 ||
+ dc->ctx->dce_version == DCN_VERSION_3_51 ||
+ dc->ctx->dce_version == DCN_VERSION_3_6 ||
+ dc->ctx->dce_version == DCN_VERSION_4_01)
+ config->psp.caps.dtm_v3_supported = 1;
+
+ config->ddc.handle = dc_get_link_at_index(dc, i);
+
+ ddc_funcs->write_i2c = lp_write_i2c;
+ ddc_funcs->read_i2c = lp_read_i2c;
+ ddc_funcs->write_dpcd = lp_write_dpcd;
+ ddc_funcs->read_dpcd = lp_read_dpcd;
+
+ config->debug.lc_enable_sw_fallback = dc->debug.hdcp_lc_enable_sw_fallback;
+ if (dc->caps.fused_io_supported || dc->debug.hdcp_lc_force_fw_enable) {
+ ddc_funcs->atomic_write_poll_read_i2c = lp_atomic_write_poll_read_i2c;
+ ddc_funcs->atomic_write_poll_read_aux = lp_atomic_write_poll_read_aux;
+ } else {
+ ddc_funcs->atomic_write_poll_read_i2c = NULL;
+ ddc_funcs->atomic_write_poll_read_aux = NULL;
+ }
memset(hdcp_work[i].aconnector, 0,
sizeof(struct amdgpu_dm_connector *) *
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
index 69b445b011c8..4faa344f196e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2019 Advanced Micro Devices, Inc.
*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 4b230933b28e..fe100e4c9801 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -23,6 +24,8 @@
*
*/
+#include <acpi/video.h>
+
#include <linux/string.h>
#include <linux/acpi.h>
#include <linux/i2c.h>
@@ -31,6 +34,7 @@
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_edid.h>
+#include <drm/drm_fixed.h>
#include "dm_services.h"
#include "amdgpu.h"
@@ -43,6 +47,7 @@
#include "dm_helpers.h"
#include "ddc_service_types.h"
+#include "clk_mgr.h"
static u32 edid_extract_panel_id(struct edid *edid)
{
@@ -51,18 +56,36 @@ static u32 edid_extract_panel_id(struct edid *edid)
(u32)EDID_PRODUCT_ID(edid);
}
-static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps)
+static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct dc_edid_caps *edid_caps)
{
uint32_t panel_id = edid_extract_panel_id(edid);
switch (panel_id) {
+ /* Workaround for monitors that need a delay after detecting the link */
+ case drm_edid_encode_panel_id('G', 'B', 'T', 0x3215):
+ drm_dbg_driver(dev, "Add 10s delay for link detection for panel id %X\n", panel_id);
+ edid_caps->panel_patch.wait_after_dpcd_poweroff_ms = 10000;
+ break;
/* Workaround for some monitors which does not work well with FAMS */
case drm_edid_encode_panel_id('S', 'A', 'M', 0x0E5E):
case drm_edid_encode_panel_id('S', 'A', 'M', 0x7053):
case drm_edid_encode_panel_id('S', 'A', 'M', 0x71AC):
- DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id);
+ drm_dbg_driver(dev, "Disabling FAMS on monitor with panel id %X\n", panel_id);
edid_caps->panel_patch.disable_fams = true;
break;
+ /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB):
+ case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B):
+ case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A):
+ case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1):
+ case drm_edid_encode_panel_id('M', 'S', 'F', 0x1003):
+ drm_dbg_driver(dev, "Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id);
+ edid_caps->panel_patch.remove_sink_ext_caps = true;
+ break;
+ case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154):
+ drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id);
+ edid_caps->panel_patch.disable_colorimetry = true;
+ break;
default:
return;
}
@@ -84,6 +107,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
{
struct amdgpu_dm_connector *aconnector = link->priv;
struct drm_connector *connector = &aconnector->base;
+ struct drm_device *dev = connector->dev;
struct edid *edid_buf = edid ? (struct edid *) edid->raw_edid : NULL;
struct cea_sad *sads;
int sad_count = -1;
@@ -113,6 +137,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
edid_caps->edid_hdmi = connector->display_info.is_hdmi;
+ apply_edid_quirks(dev, edid_buf, edid_caps);
+
sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads);
if (sad_count <= 0)
return result;
@@ -139,8 +165,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
else
edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION;
- apply_edid_quirks(edid_buf, edid_caps);
-
kfree(sads);
kfree(sadb);
@@ -204,37 +228,35 @@ void dm_helpers_dp_update_branch_info(
{}
static void dm_helpers_construct_old_payload(
- struct dc_link *link,
- int pbn_per_slot,
+ struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_mst_topology_state *mst_state,
struct drm_dp_mst_atomic_payload *new_payload,
struct drm_dp_mst_atomic_payload *old_payload)
{
- struct link_mst_stream_allocation_table current_link_table =
- link->mst_stream_alloc_table;
- struct link_mst_stream_allocation *dc_alloc;
- int i;
+ struct drm_dp_mst_atomic_payload *pos;
+ int pbn_per_slot = dfixed_trunc(mst_state->pbn_div);
+ u8 next_payload_vc_start = mgr->next_start_slot;
+ u8 payload_vc_start = new_payload->vc_start_slot;
+ u8 allocated_time_slots;
*old_payload = *new_payload;
/* Set correct time_slots/PBN of old payload.
* other fields (delete & dsc_enabled) in
* struct drm_dp_mst_atomic_payload are don't care fields
- * while calling drm_dp_remove_payload()
+ * while calling drm_dp_remove_payload_part2()
*/
- for (i = 0; i < current_link_table.stream_count; i++) {
- dc_alloc =
- &current_link_table.stream_allocations[i];
-
- if (dc_alloc->vcp_id == new_payload->vcpi) {
- old_payload->time_slots = dc_alloc->slot_count;
- old_payload->pbn = dc_alloc->slot_count * pbn_per_slot;
- break;
- }
+ list_for_each_entry(pos, &mst_state->payloads, next) {
+ if (pos != new_payload &&
+ pos->vc_start_slot > payload_vc_start &&
+ pos->vc_start_slot < next_payload_vc_start)
+ next_payload_vc_start = pos->vc_start_slot;
}
- /* make sure there is an old payload*/
- ASSERT(i != current_link_table.stream_count);
+ allocated_time_slots = next_payload_vc_start - payload_vc_start;
+ old_payload->time_slots = allocated_time_slots;
+ old_payload->pbn = allocated_time_slots * pbn_per_slot;
}
/*
@@ -263,21 +285,20 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
mst_mgr = &aconnector->mst_root->mst_mgr;
mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
-
- /* It's OK for this to fail */
new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
if (enable) {
target_payload = new_payload;
+ /* It's OK for this to fail */
drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload);
} else {
/* construct old payload by VCPI*/
- dm_helpers_construct_old_payload(stream->link, mst_state->pbn_div,
- new_payload, &old_payload);
+ dm_helpers_construct_old_payload(mst_mgr, mst_state,
+ new_payload, &old_payload);
target_payload = &old_payload;
- drm_dp_remove_payload(mst_mgr, mst_state, &old_payload, new_payload);
+ drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload);
}
/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
@@ -336,15 +357,14 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
return ACT_SUCCESS;
}
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
struct dc_context *ctx,
- const struct dc_stream_state *stream,
- bool enable)
+ const struct dc_stream_state *stream)
{
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_state *mst_state;
struct drm_dp_mst_topology_mgr *mst_mgr;
- struct drm_dp_mst_atomic_payload *payload;
+ struct drm_dp_mst_atomic_payload *new_payload;
enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD;
enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
int ret = 0;
@@ -352,20 +372,13 @@ bool dm_helpers_dp_mst_send_payload_allocation(
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
if (!aconnector || !aconnector->mst_root)
- return false;
+ return;
mst_mgr = &aconnector->mst_root->mst_mgr;
mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+ new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
- payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
-
- if (!enable) {
- set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
- clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
- }
-
- if (enable)
- ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, payload);
+ ret = drm_dp_add_payload_part2(mst_mgr, new_payload);
if (ret) {
amdgpu_dm_set_mst_status(&aconnector->mst_status,
@@ -376,10 +389,36 @@ bool dm_helpers_dp_mst_send_payload_allocation(
amdgpu_dm_set_mst_status(&aconnector->mst_status,
clr_flag, false);
}
-
- return true;
}
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream)
+{
+ struct amdgpu_dm_connector *aconnector;
+ struct drm_dp_mst_topology_state *mst_state;
+ struct drm_dp_mst_topology_mgr *mst_mgr;
+ struct drm_dp_mst_atomic_payload *new_payload, old_payload;
+ enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
+ enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
+
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ if (!aconnector || !aconnector->mst_root)
+ return;
+
+ mst_mgr = &aconnector->mst_root->mst_mgr;
+ mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
+ new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port);
+ dm_helpers_construct_old_payload(mst_mgr, mst_state,
+ new_payload, &old_payload);
+
+ drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload);
+
+ amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true);
+ amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false);
+ }
+
void dm_dtn_log_begin(struct dc_context *ctx,
struct dc_log_buffer_ctx *log_ctx)
{
@@ -534,10 +573,8 @@ bool dm_helpers_dp_read_dpcd(
struct amdgpu_dm_connector *aconnector = link->priv;
- if (!aconnector) {
- DC_LOG_DC("Failed to find connector for link!\n");
+ if (!aconnector)
return false;
- }
return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
size) == size;
@@ -552,10 +589,8 @@ bool dm_helpers_dp_write_dpcd(
{
struct amdgpu_dm_connector *aconnector = link->priv;
- if (!aconnector) {
- DRM_ERROR("Failed to find connector for link!");
+ if (!aconnector)
return false;
- }
return drm_dp_dpcd_write(&aconnector->dm_dp_aux.aux,
address, (uint8_t *)data, size) > 0;
@@ -596,6 +631,19 @@ bool dm_helpers_submit_i2c(
return result;
}
+bool dm_helpers_execute_fused_io(
+ struct dc_context *ctx,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+)
+{
+ struct amdgpu_device *dev = ctx->driver_context;
+
+ return amdgpu_dm_execute_fused_io(dev, link, commands, count, timeout_us);
+}
+
static bool execute_synaptics_rc_command(struct drm_dp_aux *aux,
bool is_write_cmd,
unsigned char cmd,
@@ -616,6 +664,8 @@ static bool execute_synaptics_rc_command(struct drm_dp_aux *aux,
// write rc data
memmove(rc_data, data, length);
ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_DATA, rc_data, sizeof(rc_data));
+ if (ret < 0)
+ goto err;
}
// write rc offset
@@ -624,20 +674,21 @@ static bool execute_synaptics_rc_command(struct drm_dp_aux *aux,
rc_offset[2] = (unsigned char) (offset >> 16) & 0xFF;
rc_offset[3] = (unsigned char) (offset >> 24) & 0xFF;
ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_OFFSET, rc_offset, sizeof(rc_offset));
+ if (ret < 0)
+ goto err;
// write rc length
rc_length[0] = (unsigned char) length & 0xFF;
rc_length[1] = (unsigned char) (length >> 8) & 0xFF;
ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_LENGTH, rc_length, sizeof(rc_length));
+ if (ret < 0)
+ goto err;
// write rc cmd
rc_cmd = cmd | 0x80;
ret = drm_dp_dpcd_write(aux, SYNAPTICS_RC_COMMAND, &rc_cmd, sizeof(rc_cmd));
-
- if (ret < 0) {
- DRM_ERROR("%s: write cmd ..., err = %d\n", __func__, ret);
- return false;
- }
+ if (ret < 0)
+ goto err;
// poll until active is 0
for (i = 0; i < 10; i++) {
@@ -657,16 +708,20 @@ static bool execute_synaptics_rc_command(struct drm_dp_aux *aux,
drm_dp_dpcd_read(aux, SYNAPTICS_RC_DATA, data, length);
}
- DC_LOG_DC("%s: success = %d\n", __func__, success);
+ drm_dbg_dp(aux->drm_dev, "success = %d\n", success);
return success;
+
+err:
+ DRM_ERROR("%s: write cmd ..., err = %d\n", __func__, ret);
+ return false;
}
static void apply_synaptics_fifo_reset_wa(struct drm_dp_aux *aux)
{
unsigned char data[16] = {0};
- DC_LOG_DC("Start %s\n", __func__);
+ drm_dbg_dp(aux->drm_dev, "Start\n");
// Step 2
data[0] = 'P';
@@ -724,7 +779,7 @@ static void apply_synaptics_fifo_reset_wa(struct drm_dp_aux *aux)
if (!execute_synaptics_rc_command(aux, true, 0x02, 0, 0, NULL))
return;
- DC_LOG_DC("Done %s\n", __func__);
+ drm_dbg_dp(aux->drm_dev, "Done\n");
}
/* MST Dock */
@@ -737,7 +792,8 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst(
{
uint8_t ret = 0;
- DC_LOG_DC("Configure DSC to non-virtual dpcd synaptics\n");
+ drm_dbg_dp(aux->drm_dev,
+ "MST_DSC Configure DSC to non-virtual dpcd synaptics\n");
if (enable) {
/* When DSC is enabled on previous boot and reboot with the hub,
@@ -750,7 +806,7 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst(
apply_synaptics_fifo_reset_wa(aux);
ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1);
- DRM_INFO("Send DSC enable to synaptics\n");
+ DRM_INFO("MST_DSC Send DSC enable to synaptics\n");
} else {
/* Synaptics hub not support virtual dpcd,
@@ -759,7 +815,7 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst(
*/
if (!stream->link->link_status.link_active) {
ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1);
- DRM_INFO("Send DSC disable to synaptics\n");
+ DRM_INFO("MST_DSC Send DSC disable to synaptics\n");
}
}
@@ -775,18 +831,15 @@ bool dm_helpers_dp_write_dsc_enable(
static const uint8_t DSC_DECODING = 0x01;
static const uint8_t DSC_PASSTHROUGH = 0x02;
- struct amdgpu_dm_connector *aconnector;
+ struct amdgpu_dm_connector *aconnector =
+ (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ struct drm_device *dev = aconnector->base.dev;
struct drm_dp_mst_port *port;
uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE;
uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE;
uint8_t ret = 0;
- if (!stream)
- return false;
-
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
-
if (!aconnector->dsc_aux)
return false;
@@ -803,30 +856,34 @@ bool dm_helpers_dp_write_dsc_enable(
ret = drm_dp_dpcd_write(port->passthrough_aux,
DP_DSC_ENABLE,
&enable_passthrough, 1);
- DC_LOG_DC("Sent DSC pass-through enable to virtual dpcd port, ret = %u\n",
- ret);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC pass-through enable to virtual dpcd port, ret = %u\n",
+ ret);
}
ret = drm_dp_dpcd_write(aconnector->dsc_aux,
DP_DSC_ENABLE, &enable_dsc, 1);
- DC_LOG_DC("Sent DSC decoding enable to %s port, ret = %u\n",
- (port->passthrough_aux) ? "remote RX" :
- "virtual dpcd",
- ret);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC decoding enable to %s port, ret = %u\n",
+ (port->passthrough_aux) ? "remote RX" :
+ "virtual dpcd",
+ ret);
} else {
ret = drm_dp_dpcd_write(aconnector->dsc_aux,
DP_DSC_ENABLE, &enable_dsc, 1);
- DC_LOG_DC("Sent DSC decoding disable to %s port, ret = %u\n",
- (port->passthrough_aux) ? "remote RX" :
- "virtual dpcd",
- ret);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC decoding disable to %s port, ret = %u\n",
+ (port->passthrough_aux) ? "remote RX" :
+ "virtual dpcd",
+ ret);
if (port->passthrough_aux) {
ret = drm_dp_dpcd_write(port->passthrough_aux,
DP_DSC_ENABLE,
&enable_passthrough, 1);
- DC_LOG_DC("Sent DSC pass-through disable to virtual dpcd port, ret = %u\n",
- ret);
+ drm_dbg_dp(dev,
+ "MST_DSC Sent DSC pass-through disable to virtual dpcd port, ret = %u\n",
+ ret);
}
}
}
@@ -834,16 +891,26 @@ bool dm_helpers_dp_write_dsc_enable(
if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) {
if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) {
ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
- DC_LOG_DC("Send DSC %s to SST RX\n", enable_dsc ? "enable" : "disable");
+ drm_dbg_dp(dev,
+ "SST_DSC Send DSC %s to SST RX\n",
+ enable_dsc ? "enable" : "disable");
} else if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) {
ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1);
- DC_LOG_DC("Send DSC %s to DP-HDMI PCON\n", enable_dsc ? "enable" : "disable");
+ drm_dbg_dp(dev,
+ "SST_DSC Send DSC %s to DP-HDMI PCON\n",
+ enable_dsc ? "enable" : "disable");
}
}
return ret;
}
+bool dm_helpers_dp_write_hblank_reduction(struct dc_context *ctx, const struct dc_stream_state *stream)
+{
+ // TODO
+ return false;
+}
+
bool dm_helpers_is_dp_sink_present(struct dc_link *link)
{
bool dp_sink_present;
@@ -860,6 +927,67 @@ bool dm_helpers_is_dp_sink_present(struct dc_link *link)
return dp_sink_present;
}
+static int
+dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len)
+{
+ struct drm_connector *connector = data;
+ struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev);
+ unsigned short start = block * EDID_LENGTH;
+ struct edid *edid;
+ int r;
+
+ if (!acpidev)
+ return -ENODEV;
+
+ /* fetch the entire edid from BIOS */
+ r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, (void *)&edid);
+ if (r < 0) {
+ drm_dbg(connector->dev, "Failed to get EDID from ACPI: %d\n", r);
+ return r;
+ }
+ if (len > r || start > r || start + len > r) {
+ r = -EINVAL;
+ goto cleanup;
+ }
+
+ /* sanity check */
+ if (edid->revision < 4 || !(edid->input & DRM_EDID_INPUT_DIGITAL) ||
+ (edid->input & DRM_EDID_DIGITAL_TYPE_MASK) == DRM_EDID_DIGITAL_TYPE_UNDEF) {
+ r = -EINVAL;
+ goto cleanup;
+ }
+
+ memcpy(buf, (void *)edid + start, len);
+ r = 0;
+
+cleanup:
+ kfree(edid);
+
+ return r;
+}
+
+static const struct drm_edid *
+dm_helpers_read_acpi_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct drm_connector *connector = &aconnector->base;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_ACPI_EDID)
+ return NULL;
+
+ switch (connector->connector_type) {
+ case DRM_MODE_CONNECTOR_LVDS:
+ case DRM_MODE_CONNECTOR_eDP:
+ break;
+ default:
+ return NULL;
+ }
+
+ if (connector->force == DRM_FORCE_OFF)
+ return NULL;
+
+ return drm_edid_read_custom(connector, dm_helpers_probe_acpi_edid, connector);
+}
+
enum dc_edid_status dm_helpers_read_local_edid(
struct dc_context *ctx,
struct dc_link *link,
@@ -870,7 +998,8 @@ enum dc_edid_status dm_helpers_read_local_edid(
struct i2c_adapter *ddc;
int retry = 3;
enum dc_edid_status edid_status;
- struct edid *edid;
+ const struct drm_edid *drm_edid;
+ const struct edid *edid;
if (link->aux_mode)
ddc = &aconnector->dm_dp_aux.aux.ddc;
@@ -881,26 +1010,35 @@ enum dc_edid_status dm_helpers_read_local_edid(
* do check sum and retry to make sure read correct edid.
*/
do {
-
- edid = drm_get_edid(&aconnector->base, ddc);
+ drm_edid = dm_helpers_read_acpi_edid(aconnector);
+ if (drm_edid)
+ drm_info(connector->dev, "Using ACPI provided EDID for %s\n", connector->name);
+ else
+ drm_edid = drm_edid_read_ddc(connector, ddc);
+ drm_edid_connector_update(connector, drm_edid);
/* DP Compliance Test 4.2.2.6 */
if (link->aux_mode && connector->edid_corrupt)
drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, connector->real_edid_checksum);
- if (!edid && connector->edid_corrupt) {
+ if (!drm_edid && connector->edid_corrupt) {
connector->edid_corrupt = false;
return EDID_BAD_CHECKSUM;
}
- if (!edid)
+ if (!drm_edid)
return EDID_NO_RESPONSE;
+ edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw()
+ if (!edid ||
+ edid->extensions >= sizeof(sink->dc_edid.raw_edid) / EDID_LENGTH)
+ return EDID_BAD_INPUT;
+
sink->dc_edid.length = EDID_LENGTH * (edid->extensions + 1);
memmove(sink->dc_edid.raw_edid, (uint8_t *)edid, sink->dc_edid.length);
/* We don't need the original edid anymore */
- kfree(edid);
+ drm_edid_free(drm_edid);
edid_status = dm_helpers_parse_edid_caps(
link,
@@ -950,6 +1088,11 @@ int dm_helper_dmub_aux_transfer_sync(
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
+ if (!link->hpd_status) {
+ *operation_result = AUX_RET_ERROR_HPD_DISCON;
+ return -1;
+ }
+
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
operation_result);
}
@@ -1008,30 +1151,8 @@ void *dm_helpers_allocate_gpu_mem(
long long *addr)
{
struct amdgpu_device *adev = ctx->driver_context;
- struct dal_allocation *da;
- u32 domain = (type == DC_MEM_ALLOC_TYPE_GART) ?
- AMDGPU_GEM_DOMAIN_GTT : AMDGPU_GEM_DOMAIN_VRAM;
- int ret;
-
- da = kzalloc(sizeof(struct dal_allocation), GFP_KERNEL);
- if (!da)
- return NULL;
-
- ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
- domain, &da->bo,
- &da->gpu_addr, &da->cpu_ptr);
-
- *addr = da->gpu_addr;
- if (ret) {
- kfree(da);
- return NULL;
- }
-
- /* add da to list in dm */
- list_add(&da->list, &adev->dm.da_list);
-
- return da->cpu_ptr;
+ return dm_allocate_gpu_mem(adev, type, size, addr);
}
void dm_helpers_free_gpu_mem(
@@ -1040,17 +1161,8 @@ void dm_helpers_free_gpu_mem(
void *pvMem)
{
struct amdgpu_device *adev = ctx->driver_context;
- struct dal_allocation *da;
-
- /* walk the da list in DM */
- list_for_each_entry(da, &adev->dm.da_list, list) {
- if (pvMem == da->cpu_ptr) {
- amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
- list_del(&da->list);
- kfree(da);
- break;
- }
- }
+
+ dm_free_gpu_mem(adev, type, pvMem);
}
bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable)
@@ -1106,6 +1218,9 @@ bool dm_helpers_dp_handle_test_pattern_request(
struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
struct pipe_ctx *pipe_ctx = NULL;
struct amdgpu_dm_connector *aconnector = link->priv;
+ struct drm_device *dev = aconnector->base.dev;
+ struct dc_state *dc_state = ctx->dc->current_state;
+ struct clk_mgr *clk_mgr = ctx->dc->clk_mgr;
int i;
for (i = 0; i < MAX_PIPES; i++) {
@@ -1183,12 +1298,12 @@ bool dm_helpers_dp_handle_test_pattern_request(
&& pipe_ctx->stream->timing.display_color_depth != requestColorDepth)
|| (requestPixelEncoding != PIXEL_ENCODING_UNDEFINED
&& pipe_ctx->stream->timing.pixel_encoding != requestPixelEncoding)) {
- DC_LOG_DEBUG("%s: original bpc %d pix encoding %d, changing to %d %d\n",
- __func__,
- pipe_ctx->stream->timing.display_color_depth,
- pipe_ctx->stream->timing.pixel_encoding,
- requestColorDepth,
- requestPixelEncoding);
+ drm_dbg(dev,
+ "original bpc %d pix encoding %d, changing to %d %d\n",
+ pipe_ctx->stream->timing.display_color_depth,
+ pipe_ctx->stream->timing.pixel_encoding,
+ requestColorDepth,
+ requestPixelEncoding);
pipe_ctx->stream->timing.display_color_depth = requestColorDepth;
pipe_ctx->stream->timing.pixel_encoding = requestPixelEncoding;
@@ -1199,10 +1314,23 @@ bool dm_helpers_dp_handle_test_pattern_request(
if (aconnector->timing_requested)
*aconnector->timing_requested = pipe_ctx->stream->timing;
else
- DC_LOG_ERROR("%s: timing storage failed\n", __func__);
+ drm_err(dev, "timing storage failed\n");
}
+ pipe_ctx->stream->test_pattern.type = test_pattern;
+ pipe_ctx->stream->test_pattern.color_space = test_pattern_color_space;
+
+ /* Temp W/A for compliance test failure */
+ dc_state->bw_ctx.bw.dcn.clk.p_state_change_support = false;
+ dc_state->bw_ctx.bw.dcn.clk.dramclk_khz = clk_mgr->dc_mode_softmax_enabled ?
+ clk_mgr->bw_params->dc_mode_softmax_memclk : clk_mgr->bw_params->max_memclk_mhz;
+ dc_state->bw_ctx.bw.dcn.clk.idle_dramclk_khz = dc_state->bw_ctx.bw.dcn.clk.dramclk_khz;
+ ctx->dc->clk_mgr->funcs->update_clocks(
+ ctx->dc->clk_mgr,
+ dc_state,
+ false);
+
dc_link_dp_set_test_pattern(
(struct dc_link *) link,
test_pattern,
@@ -1221,7 +1349,13 @@ void dm_set_phyd32clk(struct dc_context *ctx, int freq_khz)
void dm_helpers_enable_periodic_detection(struct dc_context *ctx, bool enable)
{
- /* TODO: add periodic detection implementation */
+ struct amdgpu_device *adev = ctx->driver_context;
+
+ if (adev->dm.idle_workqueue) {
+ adev->dm.idle_workqueue->enable = enable;
+ if (enable && !adev->dm.idle_workqueue->running && amdgpu_dm_is_headless(adev))
+ schedule_work(&adev->dm.idle_workqueue->work);
+ }
}
void dm_helpers_dp_mst_update_branch_bandwidth(
@@ -1266,3 +1400,15 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link)
return as_type;
}
+
+bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream)
+{
+ // TODO
+ return false;
+}
+
+bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream)
+{
+ // TODO
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index 51467f132c26..a1c722112c22 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -473,7 +474,7 @@ void amdgpu_dm_irq_fini(struct amdgpu_device *adev)
unregister_all_irq_handlers(adev);
}
-int amdgpu_dm_irq_suspend(struct amdgpu_device *adev)
+void amdgpu_dm_irq_suspend(struct amdgpu_device *adev)
{
int src;
struct list_head *hnd_list_h;
@@ -511,10 +512,9 @@ int amdgpu_dm_irq_suspend(struct amdgpu_device *adev)
}
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
- return 0;
}
-int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
+void amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
{
int src;
struct list_head *hnd_list_h, *hnd_list_l;
@@ -522,7 +522,7 @@ int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
DM_IRQ_TABLE_LOCK(adev, irq_table_flags);
- DRM_DEBUG_KMS("DM_IRQ: early resume\n");
+ drm_dbg(adev_to_drm(adev), "DM_IRQ: early resume\n");
/* re-enable short pulse interrupts HW interrupt */
for (src = DC_IRQ_SOURCE_HPD1RX; src <= DC_IRQ_SOURCE_HPD6RX; src++) {
@@ -533,11 +533,9 @@ int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev)
}
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
-
- return 0;
}
-int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
+void amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
{
int src;
struct list_head *hnd_list_h, *hnd_list_l;
@@ -545,7 +543,7 @@ int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
DM_IRQ_TABLE_LOCK(adev, irq_table_flags);
- DRM_DEBUG_KMS("DM_IRQ: resume\n");
+ drm_dbg(adev_to_drm(adev), "DM_IRQ: resume\n");
/**
* Renable HW interrupt for HPD and only since FLIP and VBLANK
@@ -559,7 +557,6 @@ int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev)
}
DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags);
- return 0;
}
/*
@@ -711,7 +708,7 @@ static inline int dm_irq_state(struct amdgpu_device *adev,
{
bool st;
enum dc_irq_source irq_source;
-
+ struct dc *dc = adev->dm.dc;
struct amdgpu_crtc *acrtc = adev->mode_info.crtcs[crtc_id];
if (!acrtc) {
@@ -729,6 +726,9 @@ static inline int dm_irq_state(struct amdgpu_device *adev,
st = (state == AMDGPU_IRQ_STATE_ENABLE);
+ if (dc && dc->caps.ips_support && dc->idle_optimizations_allowed)
+ dc_allow_idle_optimizations(dc, false);
+
dc_interrupt_set(adev->dm.dc, irq_source, st);
return 0;
}
@@ -891,18 +891,53 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
+ int irq_type;
+ int i;
+
+ /* First, clear all hpd and hpdrx interrupts */
+ for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) {
+ if (!dc_interrupt_set(adev->dm.dc, i, false))
+ drm_err(dev, "Failed to clear hpd(rx) source=%d on init\n",
+ i);
+ }
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_dm_connector *amdgpu_dm_connector =
- to_amdgpu_dm_connector(connector);
+ struct amdgpu_dm_connector *amdgpu_dm_connector;
+ const struct dc_link *dc_link;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
- const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+ amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+ dc_link = amdgpu_dm_connector->dc_link;
+
+ /*
+ * Get a base driver irq reference for hpd ints for the lifetime
+ * of dm. Note that only hpd interrupt types are registered with
+ * base driver; hpd_rx types aren't. IOW, amdgpu_irq_get/put on
+ * hpd_rx isn't available. DM currently controls hpd_rx
+ * explicitly with dc_interrupt_set()
+ */
if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
- dc_interrupt_set(adev->dm.dc,
- dc_link->irq_source_hpd,
- true);
+ irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1;
+ /*
+ * TODO: There's a mismatch between mode_info.num_hpd
+ * and what bios reports as the # of connectors with hpd
+ * sources. Since the # of hpd source types registered
+ * with base driver == mode_info.num_hpd, we have to
+ * fallback to dc_interrupt_set for the remaining types.
+ */
+ if (irq_type < adev->mode_info.num_hpd) {
+ if (amdgpu_irq_get(adev, &adev->hpd_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n",
+ dc_link->irq_source_hpd);
+ } else {
+ dc_interrupt_set(adev->dm.dc,
+ dc_link->irq_source_hpd,
+ true);
+ }
}
if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
@@ -927,17 +962,32 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev)
struct drm_device *dev = adev_to_drm(adev);
struct drm_connector *connector;
struct drm_connector_list_iter iter;
+ int irq_type;
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
- struct amdgpu_dm_connector *amdgpu_dm_connector =
- to_amdgpu_dm_connector(connector);
- const struct dc_link *dc_link = amdgpu_dm_connector->dc_link;
+ struct amdgpu_dm_connector *amdgpu_dm_connector;
+ const struct dc_link *dc_link;
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+ dc_link = amdgpu_dm_connector->dc_link;
if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
- dc_interrupt_set(adev->dm.dc,
- dc_link->irq_source_hpd,
- false);
+ irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1;
+
+ /* TODO: See same TODO in amdgpu_dm_hpd_init() */
+ if (irq_type < adev->mode_info.num_hpd) {
+ if (amdgpu_irq_put(adev, &adev->hpd_irq, irq_type))
+ drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n",
+ dc_link->irq_source_hpd);
+ } else {
+ dc_interrupt_set(adev->dm.dc,
+ dc_link->irq_source_hpd,
+ false);
+ }
}
if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h
index 2349238a626b..4f6b58f4f90d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -90,14 +91,14 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev);
* amdgpu_dm_irq_suspend - disable ASIC interrupt during suspend.
*
*/
-int amdgpu_dm_irq_suspend(struct amdgpu_device *adev);
+void amdgpu_dm_irq_suspend(struct amdgpu_device *adev);
/**
* amdgpu_dm_irq_resume_early - enable HPDRX ASIC interrupts during resume.
* amdgpu_dm_irq_resume - enable ASIC interrupt during resume.
*
*/
-int amdgpu_dm_irq_resume_early(struct amdgpu_device *adev);
-int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev);
+void amdgpu_dm_irq_resume_early(struct amdgpu_device *adev);
+void amdgpu_dm_irq_resume_late(struct amdgpu_device *adev);
#endif /* __AMDGPU_DM_IRQ_H__ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
index 5c9303241aeb..3c9995275cbd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
@@ -33,13 +34,15 @@ struct dm_irq_params {
struct mod_vrr_params vrr_params;
struct dc_stream_state *stream;
int active_planes;
- bool allow_psr_entry;
+ bool allow_sr_entry;
struct mod_freesync_config freesync_config;
#ifdef CONFIG_DEBUG_FS
enum amdgpu_dm_pipe_crc_source crc_src;
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
- struct crc_window_param window_param;
+ struct crc_window_param window_param[MAX_CRC_WINDOW_NUM];
+ /* At least one CRC window is activated or not*/
+ bool crc_window_activated;
#endif
#endif
};
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 57230661132b..5e92eaa67aa3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2012-15 Advanced Micro Devices, Inc.
*
@@ -23,10 +24,13 @@
*
*/
+#include <linux/vmalloc.h>
#include <drm/display/drm_dp_helper.h>
#include <drm/display/drm_dp_mst_helper.h>
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_fixed.h>
+#include <drm/drm_edid.h>
#include "dm_services.h"
#include "amdgpu.h"
#include "amdgpu_dm.h"
@@ -44,10 +48,13 @@
#include "amdgpu_dm_debugfs.h"
#endif
-#include "dc/dcn20/dcn20_resource.h"
+#include "dc/resource/dcn20/dcn20_resource.h"
#define PEAK_FACTOR_X1000 1006
+/*
+ * This function handles both native AUX and I2C-Over-AUX transactions.
+ */
static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
struct drm_dp_aux_msg *msg)
{
@@ -56,6 +63,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
enum aux_return_code_type operation_result;
struct amdgpu_device *adev;
struct ddc_service *ddc;
+ uint8_t copy[16];
if (WARN_ON(msg->size > 16))
return -E2BIG;
@@ -71,6 +79,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
(msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0;
payload.defer_delay = 0;
+ if (payload.write) {
+ memcpy(copy, msg->buffer, msg->size);
+ payload.data = copy;
+ }
+
result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
&operation_result);
@@ -84,15 +97,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
if (adev->dm.aux_hpd_discon_quirk) {
if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE &&
operation_result == AUX_RET_ERROR_HPD_DISCON) {
- result = 0;
+ result = msg->size;
operation_result = AUX_RET_SUCCESS;
}
}
- if (payload.write && result >= 0)
- result = msg->size;
+ /*
+ * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER
+ */
+ if (payload.write && result >= 0) {
+ if (result) {
+ /*one byte indicating partially written bytes*/
+ drm_dbg_dp(adev_to_drm(adev), "AUX partially written\n");
+ result = payload.data[0];
+ } else if (!payload.reply[0])
+ /*I2C_ACK|AUX_ACK*/
+ result = msg->size;
+ }
- if (result < 0)
+ if (result < 0) {
switch (operation_result) {
case AUX_RET_SUCCESS:
break;
@@ -111,6 +134,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
break;
}
+ drm_dbg_dp(adev_to_drm(adev), "DP AUX transfer fail:%d\n", operation_result);
+ }
+
+ if (payload.reply[0])
+ drm_dbg_dp(adev_to_drm(adev), "AUX reply command not ACK: 0x%02x.",
+ payload.reply[0]);
+
return result;
}
@@ -126,7 +156,7 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector)
dc_sink_release(aconnector->dc_sink);
}
- kfree(aconnector->edid);
+ drm_edid_free(aconnector->drm_edid);
drm_connector_cleanup(connector);
drm_dp_mst_put_port_malloc(aconnector->mst_output_port);
@@ -152,6 +182,17 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
return 0;
}
+
+static inline void
+amdgpu_dm_mst_reset_mst_connector_setting(struct amdgpu_dm_connector *aconnector)
+{
+ aconnector->drm_edid = NULL;
+ aconnector->dsc_aux = NULL;
+ aconnector->mst_output_port->passthrough_aux = NULL;
+ aconnector->mst_local_bw = 0;
+ aconnector->vc_full_pbn = 0;
+}
+
static void
amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
{
@@ -173,12 +214,13 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
if (dc_link->sink_count)
dc_link_remove_remote_sink(dc_link, dc_sink);
- DC_LOG_MST("DM_MST: remove remote sink 0x%p, %d remaining\n",
- dc_sink, dc_link->sink_count);
+ drm_dbg_dp(connector->dev,
+ "DM_MST: remove remote sink 0x%p, %d remaining\n",
+ dc_sink, dc_link->sink_count);
dc_sink_release(dc_sink);
aconnector->dc_sink = NULL;
- aconnector->edid = NULL;
+ amdgpu_dm_mst_reset_mst_connector_setting(aconnector);
}
aconnector->mst_status = MST_STATUS_DEFAULT;
@@ -207,6 +249,7 @@ bool needs_dsc_aux_workaround(struct dc_link *link)
return false;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
static bool is_synaptics_cascaded_panamera(struct dc_link *link, struct drm_dp_mst_port *port)
{
u8 branch_vendor_data[4] = { 0 }; // Vendor data 0x50C ~ 0x50F
@@ -246,7 +289,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
aconnector->dsc_aux = &aconnector->mst_root->dm_dp_aux.aux;
/* synaptics cascaded MST hub case */
- if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port))
+ if (is_synaptics_cascaded_panamera(aconnector->dc_link, port))
aconnector->dsc_aux = port->mgr->aux;
if (!aconnector->dsc_aux)
@@ -266,6 +309,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
return true;
}
+#endif
static bool retrieve_downstream_port_device(struct amdgpu_dm_connector *aconnector)
{
@@ -286,6 +330,34 @@ static bool retrieve_downstream_port_device(struct amdgpu_dm_connector *aconnect
return true;
}
+static bool retrieve_branch_specific_data(struct amdgpu_dm_connector *aconnector)
+{
+ struct drm_connector *connector = &aconnector->base;
+ struct drm_dp_mst_port *port = aconnector->mst_output_port;
+ struct drm_dp_mst_port *port_parent;
+ struct drm_dp_aux *immediate_upstream_aux;
+ struct drm_dp_desc branch_desc;
+
+ if (!port->parent)
+ return false;
+
+ port_parent = port->parent->port_parent;
+
+ immediate_upstream_aux = port_parent ? &port_parent->aux : port->mgr->aux;
+
+ if (drm_dp_read_desc(immediate_upstream_aux, &branch_desc, true))
+ return false;
+
+ aconnector->branch_ieee_oui = (branch_desc.ident.oui[0] << 16) +
+ (branch_desc.ident.oui[1] << 8) +
+ (branch_desc.ident.oui[2]);
+
+ drm_dbg_dp(port->aux.drm_dev, "MST branch oui 0x%x detected at %s\n",
+ aconnector->branch_ieee_oui, connector->name);
+
+ return true;
+}
+
static int dm_dp_mst_get_modes(struct drm_connector *connector)
{
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -294,16 +366,18 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
if (!aconnector)
return drm_add_edid_modes(connector, NULL);
- if (!aconnector->edid) {
- struct edid *edid;
+ if (!aconnector->drm_edid) {
+ const struct drm_edid *drm_edid;
- edid = drm_dp_mst_get_edid(connector, &aconnector->mst_root->mst_mgr, aconnector->mst_output_port);
+ drm_edid = drm_dp_mst_edid_read(connector,
+ &aconnector->mst_root->mst_mgr,
+ aconnector->mst_output_port);
- if (!edid) {
+ if (!drm_edid) {
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID, false);
- drm_connector_update_edid_property(
+ drm_edid_connector_update(
&aconnector->base,
NULL);
@@ -325,8 +399,10 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
return 0;
}
- DC_LOG_MST("DM_MST: add remote sink 0x%p, %d remaining\n",
- dc_sink, aconnector->dc_link->sink_count);
+ drm_dbg_dp(connector->dev,
+ "DM_MST: add remote sink 0x%p, %d remaining\n",
+ dc_sink,
+ aconnector->dc_link->sink_count);
dc_sink->priv = aconnector;
aconnector->dc_sink = dc_sink;
@@ -335,7 +411,7 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
return ret;
}
- aconnector->edid = edid;
+ aconnector->drm_edid = drm_edid;
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID, true);
}
@@ -350,10 +426,13 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
struct dc_sink_init_data init_params = {
.link = aconnector->dc_link,
.sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
+ const struct edid *edid;
+
+ edid = drm_edid_raw(aconnector->drm_edid); // FIXME: Get rid of drm_edid_raw()
dc_sink = dc_link_add_remote_sink(
aconnector->dc_link,
- (uint8_t *)aconnector->edid,
- (aconnector->edid->extensions + 1) * EDID_LENGTH,
+ (uint8_t *)edid,
+ (edid->extensions + 1) * EDID_LENGTH,
&init_params);
if (!dc_sink) {
@@ -361,8 +440,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
return 0;
}
- DC_LOG_MST("DM_MST: add remote sink 0x%p, %d remaining\n",
- dc_sink, aconnector->dc_link->sink_count);
+ drm_dbg_dp(connector->dev,
+ "DM_MST: add remote sink 0x%p, %d remaining\n",
+ dc_sink, aconnector->dc_link->sink_count);
dc_sink->priv = aconnector;
/* dc_link_add_remote_sink returns a new reference */
@@ -394,11 +474,13 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
if (aconnector->dc_sink) {
amdgpu_dm_update_freesync_caps(
- connector, aconnector->edid);
+ connector, aconnector->drm_edid);
+#if defined(CONFIG_DRM_AMD_DC_FP)
if (!validate_dsc_caps_on_connector(aconnector))
memset(&aconnector->dc_sink->dsc_caps,
0, sizeof(aconnector->dc_sink->dsc_caps));
+#endif
if (!retrieve_downstream_port_device(aconnector))
memset(&aconnector->mst_downstream_port_present,
@@ -406,10 +488,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
}
}
- drm_connector_update_edid_property(
- &aconnector->base, aconnector->edid);
+ drm_edid_connector_update(&aconnector->base, aconnector->drm_edid);
- ret = drm_add_edid_modes(connector, aconnector->edid);
+ ret = drm_edid_connector_add_modes(connector);
return ret;
}
@@ -420,8 +501,7 @@ dm_mst_atomic_best_encoder(struct drm_connector *connector,
{
struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
connector);
- struct drm_device *dev = connector->dev;
- struct amdgpu_device *adev = drm_to_adev(dev);
+ struct amdgpu_device *adev = drm_to_adev(connector->dev);
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc);
return &adev->dm.mst_encoders[acrtc->crtc_id].base;
@@ -481,12 +561,14 @@ dm_dp_mst_detect(struct drm_connector *connector,
if (aconnector->dc_link->sink_count)
dc_link_remove_remote_sink(aconnector->dc_link, aconnector->dc_sink);
- DC_LOG_MST("DM_MST: remove remote sink 0x%p, %d remaining\n",
- aconnector->dc_link, aconnector->dc_link->sink_count);
+ drm_dbg_dp(connector->dev,
+ "DM_MST: remove remote sink 0x%p, %d remaining\n",
+ aconnector->dc_link,
+ aconnector->dc_link->sink_count);
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
- aconnector->edid = NULL;
+ amdgpu_dm_mst_reset_mst_connector_setting(aconnector);
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD,
@@ -562,17 +644,20 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
if (!aconnector)
return NULL;
+ DRM_DEBUG_DRIVER("%s: Create aconnector 0x%p for port 0x%p\n", __func__, aconnector, port);
+
connector = &aconnector->base;
aconnector->mst_output_port = port;
aconnector->mst_root = master;
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_PROBE, true);
- if (drm_connector_init(
+ if (drm_connector_dynamic_init(
dev,
connector,
&dm_dp_mst_connector_funcs,
- DRM_MODE_CONNECTOR_DisplayPort)) {
+ DRM_MODE_CONNECTOR_DisplayPort,
+ NULL)) {
kfree(aconnector);
return NULL;
}
@@ -606,9 +691,15 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
&connector->base,
dev->mode_config.tile_property,
0);
+ connector->colorspace_property = master->base.colorspace_property;
+ if (connector->colorspace_property)
+ drm_connector_attach_colorspace_property(connector);
drm_connector_set_path_property(connector, pathprop);
+ if (!retrieve_branch_specific_data(aconnector))
+ aconnector->branch_ieee_oui = 0;
+
/*
* Initialize connector state before adding the connectror to drm and
* framebuffer lists
@@ -750,6 +841,7 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
drm_dp_aux_init(&aconnector->dm_dp_aux.aux);
drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux,
&aconnector->base);
+ drm_dp_dpcd_set_probe(&aconnector->dm_dp_aux.aux, false);
if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
return;
@@ -762,13 +854,20 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
drm_connector_attach_dp_subconnector_property(&aconnector->base);
}
-int dm_mst_get_pbn_divider(struct dc_link *link)
+uint32_t dm_mst_get_pbn_divider(struct dc_link *link)
{
+ uint32_t pbn_div_x100;
+ uint64_t dividend, divisor;
+
if (!link)
return 0;
- return dc_link_bandwidth_kbps(link,
- dc_link_get_link_cap(link)) / (8 * 1000 * 54);
+ dividend = (uint64_t)dc_link_bandwidth_kbps(link, dc_link_get_link_cap(link)) * 100;
+ divisor = 8 * 1000 * 54;
+
+ pbn_div_x100 = div64_u64(dividend, divisor);
+
+ return dfixed_const(pbn_div_x100) / 100;
}
struct dsc_mst_fairness_params {
@@ -784,6 +883,7 @@ struct dsc_mst_fairness_params {
struct amdgpu_dm_connector *aconnector;
};
+#if defined(CONFIG_DRM_AMD_DC_FP)
static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link)
{
u8 link_coding_cap;
@@ -852,11 +952,11 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p
if (params[i].sink) {
if (params[i].sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
params[i].sink->sink_signal != SIGNAL_TYPE_NONE)
- DRM_DEBUG_DRIVER("%s i=%d dispname=%s\n", __func__, i,
+ DRM_DEBUG_DRIVER("MST_DSC %s i=%d dispname=%s\n", __func__, i,
params[i].sink->edid_caps.display_name);
}
- DRM_DEBUG_DRIVER("dsc=%d bits_per_pixel=%d pbn=%d\n",
+ DRM_DEBUG_DRIVER("MST_DSC dsc=%d bits_per_pixel=%d pbn=%d\n",
params[i].timing->flags.DSC,
params[i].timing->dsc_cfg.bits_per_pixel,
vars[i + k].pbn);
@@ -935,10 +1035,10 @@ static int increase_dsc_bpp(struct drm_atomic_state *state,
link_timeslots_used = 0;
for (i = 0; i < count; i++)
- link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, mst_state->pbn_div);
+ link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, dfixed_trunc(mst_state->pbn_div));
fair_pbn_alloc =
- (63 - link_timeslots_used) / remaining_to_increase * mst_state->pbn_div;
+ (63 - link_timeslots_used) / remaining_to_increase * dfixed_trunc(mst_state->pbn_div);
if (initial_slack[next_index] > fair_pbn_alloc) {
vars[next_index].pbn += fair_pbn_alloc;
@@ -1005,6 +1105,7 @@ static int try_disable_dsc(struct drm_atomic_state *state,
int remaining_to_try = 0;
int ret;
uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
+ int var_pbn;
for (i = 0; i < count; i++) {
if (vars[i + k].dsc_enabled
@@ -1034,26 +1135,37 @@ static int try_disable_dsc(struct drm_atomic_state *state,
if (next_index == -1)
break;
+ DRM_DEBUG_DRIVER("MST_DSC index #%d, try no compression\n", next_index);
+ var_pbn = vars[next_index].pbn;
vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
ret = drm_dp_atomic_find_time_slots(state,
params[next_index].port->mgr,
params[next_index].port,
vars[next_index].pbn);
- if (ret < 0)
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n",
+ __func__, __LINE__, next_index, ret);
+ vars[next_index].pbn = var_pbn;
return ret;
+ }
ret = drm_dp_mst_atomic_check(state);
if (ret == 0) {
+ DRM_DEBUG_DRIVER("MST_DSC index #%d, greedily disable dsc\n", next_index);
vars[next_index].dsc_enabled = false;
vars[next_index].bpp_x16 = 0;
} else {
- vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps, fec_overhead_multiplier_x1000);
+ DRM_DEBUG_DRIVER("MST_DSC index #%d, restore optimized pbn value\n", next_index);
+ vars[next_index].pbn = var_pbn;
ret = drm_dp_atomic_find_time_slots(state,
params[next_index].port->mgr,
params[next_index].port,
vars[next_index].pbn);
- if (ret < 0)
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n",
+ __func__, __LINE__, next_index, ret);
return ret;
+ }
}
tried[next_index] = true;
@@ -1062,6 +1174,15 @@ static int try_disable_dsc(struct drm_atomic_state *state,
return 0;
}
+static void log_dsc_params(int count, struct dsc_mst_fairness_vars *vars, int k)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ DRM_DEBUG_DRIVER("MST_DSC DSC params: stream #%d --- dsc_enabled = %d, bpp_x16 = %d, pbn = %d\n",
+ i, vars[i + k].dsc_enabled, vars[i + k].bpp_x16, vars[i + k].pbn);
+}
+
static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
struct dc_state *dc_state,
struct dc_link *dc_link,
@@ -1077,6 +1198,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
int i, k, ret;
bool debugfs_overwrite = false;
uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link);
+ struct drm_connector_state *new_conn_state;
memset(params, 0, sizeof(params));
@@ -1084,6 +1206,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
return PTR_ERR(mst_state);
/* Set up params */
+ DRM_DEBUG_DRIVER("%s: MST_DSC Try to set up params from %d streams\n", __func__, dc_state->stream_count);
for (i = 0; i < dc_state->stream_count; i++) {
struct dc_dsc_policy dsc_policy = {0};
@@ -1099,6 +1222,14 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
if (!aconnector->mst_output_port)
continue;
+ new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base);
+
+ if (!new_conn_state) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC Skip the stream 0x%p with invalid new_conn_state\n",
+ __func__, __LINE__, stream);
+ continue;
+ }
+
stream->timing.flags.DSC = 0;
params[count].timing = &stream->timing;
@@ -1112,7 +1243,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v;
params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel;
params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported;
- dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy);
+ dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
if (!dc_dsc_compute_bandwidth_range(
stream->sink->ctx->dc->res_pool->dscs[0],
stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
@@ -1125,9 +1256,14 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
params[count].bw_range.stream_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing,
dc_link_get_highest_encoding_format(dc_link));
+ DRM_DEBUG_DRIVER("MST_DSC #%d stream 0x%p - max_kbps = %u, min_kbps = %u, uncompressed_kbps = %u\n",
+ count, stream, params[count].bw_range.max_kbps, params[count].bw_range.min_kbps,
+ params[count].bw_range.stream_kbps);
count++;
}
+ DRM_DEBUG_DRIVER("%s: MST_DSC Params set up for %d streams\n", __func__, count);
+
if (count == 0) {
ASSERT(0);
return 0;
@@ -1139,6 +1275,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
*link_vars_start_index += count;
/* Try no compression */
+ DRM_DEBUG_DRIVER("MST_DSC Try no compression\n");
for (i = 0; i < count; i++) {
vars[i + k].aconnector = params[i].aconnector;
vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000);
@@ -1157,7 +1294,10 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
return ret;
}
+ log_dsc_params(count, vars, k);
+
/* Try max compression */
+ DRM_DEBUG_DRIVER("MST_DSC Try max compression\n");
for (i = 0; i < count; i++) {
if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) {
vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000);
@@ -1181,14 +1321,26 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
if (ret != 0)
return ret;
+ log_dsc_params(count, vars, k);
+
/* Optimize degree of compression */
+ DRM_DEBUG_DRIVER("MST_DSC Try optimize compression\n");
ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k);
- if (ret < 0)
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("MST_DSC Failed to optimize compression\n");
return ret;
+ }
+ log_dsc_params(count, vars, k);
+
+ DRM_DEBUG_DRIVER("MST_DSC Try disable compression\n");
ret = try_disable_dsc(state, dc_link, params, vars, count, k);
- if (ret < 0)
+ if (ret < 0) {
+ DRM_DEBUG_DRIVER("MST_DSC Failed to disable compression\n");
return ret;
+ }
+
+ log_dsc_params(count, vars, k);
set_dsc_configs_from_fairness_vars(params, vars, count, k);
@@ -1210,15 +1362,19 @@ static bool is_dsc_need_re_compute(
/* only check phy used by dsc mst branch */
if (dc_link->type != dc_connection_mst_branch)
- return false;
+ goto out;
- if (!(dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT ||
- dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT))
- return false;
+ /* add a check for older MST DSC with no virtual DPCDs */
+ if (needs_dsc_aux_workaround(dc_link) &&
+ (!(dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT ||
+ dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT)))
+ goto out;
for (i = 0; i < MAX_PIPES; i++)
stream_on_link[i] = NULL;
+ DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in new dc_state\n", __func__, dc_state->stream_count);
+
/* check if there is mode change in new request */
for (i = 0; i < dc_state->stream_count; i++) {
struct drm_crtc_state *new_crtc_state;
@@ -1228,6 +1384,8 @@ static bool is_dsc_need_re_compute(
if (!stream)
continue;
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC checking #%d stream 0x%p\n", __func__, __LINE__, i, stream);
+
/* check if stream using the same link for mst */
if (stream->link != dc_link)
continue;
@@ -1240,8 +1398,11 @@ static bool is_dsc_need_re_compute(
new_stream_on_link_num++;
new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base);
- if (!new_conn_state)
+ if (!new_conn_state) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_conn_state for stream 0x%p, aconnector 0x%p\n",
+ __func__, __LINE__, stream, aconnector);
continue;
+ }
if (IS_ERR(new_conn_state))
continue;
@@ -1250,19 +1411,37 @@ static bool is_dsc_need_re_compute(
continue;
new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
- if (!new_crtc_state)
+ if (!new_crtc_state) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_crtc_state for crtc of stream 0x%p, aconnector 0x%p\n",
+ __func__, __LINE__, stream, aconnector);
continue;
+ }
if (IS_ERR(new_crtc_state))
continue;
if (new_crtc_state->enable && new_crtc_state->active) {
if (new_crtc_state->mode_changed || new_crtc_state->active_changed ||
- new_crtc_state->connectors_changed)
- return true;
+ new_crtc_state->connectors_changed) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required."
+ "stream 0x%p in new dc_state\n",
+ __func__, __LINE__, stream);
+ is_dsc_need_re_compute = true;
+ goto out;
+ }
}
}
+ if (new_stream_on_link_num == 0) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC no mode change request for streams in new dc_state\n",
+ __func__, __LINE__);
+ is_dsc_need_re_compute = false;
+ goto out;
+ }
+
+ DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in current dc_state\n",
+ __func__, dc->current_state->stream_count);
+
/* check current_state if there stream on link but it is not in
* new request state
*/
@@ -1285,11 +1464,18 @@ static bool is_dsc_need_re_compute(
if (j == new_stream_on_link_num) {
/* not in new state */
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required."
+ "stream 0x%p in current dc_state but not in new dc_state\n",
+ __func__, __LINE__, stream);
is_dsc_need_re_compute = true;
break;
}
}
+out:
+ DRM_DEBUG_DRIVER("%s: MST_DSC dsc recompute %s\n",
+ __func__, is_dsc_need_re_compute ? "required" : "not required");
+
return is_dsc_need_re_compute;
}
@@ -1318,6 +1504,9 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ DRM_DEBUG_DRIVER("%s: MST_DSC compute mst dsc configs for stream 0x%p, aconnector 0x%p\n",
+ __func__, stream, aconnector);
+
if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port)
continue;
@@ -1350,8 +1539,11 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
stream = dc_state->streams[i];
if (stream->timing.flags.DSC == 1)
- if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK)
+ if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) {
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC Failed to request dsc hw resource for stream 0x%p\n",
+ __func__, __LINE__, stream);
return -EINVAL;
+ }
}
return ret;
@@ -1380,6 +1572,9 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ DRM_DEBUG_DRIVER("MST_DSC pre compute mst dsc configs for #%d stream 0x%p, aconnector 0x%p\n",
+ i, stream, aconnector);
+
if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port)
continue;
@@ -1469,12 +1664,12 @@ int pre_validate_dsc(struct drm_atomic_state *state,
int ret = 0;
if (!is_dsc_precompute_needed(state)) {
- DRM_INFO_ONCE("DSC precompute is not needed.\n");
+ DRM_INFO_ONCE("%s:%d MST_DSC dsc precompute is not needed\n", __func__, __LINE__);
return 0;
}
ret = dm_atomic_get_state(state, dm_state_ptr);
if (ret != 0) {
- DRM_INFO_ONCE("dm_atomic_get_state() failed\n");
+ DRM_INFO_ONCE("%s:%d MST_DSC dm_atomic_get_state() failed\n", __func__, __LINE__);
return ret;
}
dm_state = *dm_state_ptr;
@@ -1485,31 +1680,35 @@ int pre_validate_dsc(struct drm_atomic_state *state,
* from dm_state->context.
*/
- local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL);
+ local_dc_state = vmalloc(sizeof(struct dc_state));
if (!local_dc_state)
return -ENOMEM;
+ memcpy(local_dc_state, dm_state->context, sizeof(struct dc_state));
for (i = 0; i < local_dc_state->stream_count; i++) {
struct dc_stream_state *stream = dm_state->context->streams[i];
int ind = find_crtc_index_in_state_by_stream(state, stream);
if (ind >= 0) {
- struct amdgpu_dm_connector *aconnector;
+ struct drm_connector *connector;
struct drm_connector_state *drm_new_conn_state;
struct dm_connector_state *dm_new_conn_state;
struct dm_crtc_state *dm_old_crtc_state;
- aconnector =
+ connector =
amdgpu_dm_find_first_crtc_matching_connector(state,
state->crtcs[ind].ptr);
+ if (!connector)
+ continue;
+
drm_new_conn_state =
drm_atomic_get_new_connector_state(state,
- &aconnector->base);
+ connector);
dm_new_conn_state = to_dm_connector_state(drm_new_conn_state);
dm_old_crtc_state = to_dm_crtc_state(state->crtcs[ind].old_state);
local_dc_state->streams[i] =
- create_validate_stream_for_sink(aconnector,
+ create_validate_stream_for_sink(connector,
&state->crtcs[ind].new_state->mode,
dm_new_conn_state,
dm_old_crtc_state->stream);
@@ -1525,7 +1724,8 @@ int pre_validate_dsc(struct drm_atomic_state *state,
ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars);
if (ret != 0) {
- DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n");
+ DRM_INFO_ONCE("%s:%d MST_DSC dsc pre_compute_mst_dsc_configs_for_state() failed\n",
+ __func__, __LINE__);
ret = -EINVAL;
goto clean_exit;
}
@@ -1539,12 +1739,15 @@ int pre_validate_dsc(struct drm_atomic_state *state,
if (local_dc_state->streams[i] &&
dc_is_timing_changed(stream, local_dc_state->streams[i])) {
- DRM_INFO_ONCE("crtc[%d] needs mode_changed\n", i);
+ DRM_INFO_ONCE("%s:%d MST_DSC crtc[%d] needs mode_change\n", __func__, __LINE__, i);
} else {
int ind = find_crtc_index_in_state_by_stream(state, stream);
- if (ind >= 0)
+ if (ind >= 0) {
+ DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n",
+ __func__, __LINE__, stream);
state->crtcs[ind].new_state->mode_changed = 0;
+ }
}
}
clean_exit:
@@ -1555,112 +1758,230 @@ clean_exit:
dc_stream_release(local_dc_state->streams[i]);
}
- kfree(local_dc_state);
+ vfree(local_dc_state);
return ret;
}
-static unsigned int kbps_from_pbn(unsigned int pbn)
+static uint32_t kbps_from_pbn(unsigned int pbn)
{
- unsigned int kbps = pbn;
+ uint64_t kbps = (uint64_t)pbn;
kbps *= (1000000 / PEAK_FACTOR_X1000);
kbps *= 8;
kbps *= 54;
kbps /= 64;
- return kbps;
+ return (uint32_t)kbps;
}
static bool is_dsc_common_config_possible(struct dc_stream_state *stream,
struct dc_dsc_bw_range *bw_range)
{
struct dc_dsc_policy dsc_policy = {0};
+ bool is_dsc_possible;
- dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy);
- dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0],
- stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
- dsc_policy.min_target_bpp * 16,
- dsc_policy.max_target_bpp * 16,
- &stream->sink->dsc_caps.dsc_dec_caps,
- &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range);
+ dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
+ is_dsc_possible = dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0],
+ stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
+ dsc_policy.min_target_bpp * 16,
+ dsc_policy.max_target_bpp * 16,
+ &stream->sink->dsc_caps.dsc_dec_caps,
+ &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range);
- return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16;
+ return is_dsc_possible;
}
+#endif
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+static bool dp_get_link_current_set_bw(struct drm_dp_aux *aux, uint32_t *cur_link_bw)
+{
+ uint32_t total_data_bw_efficiency_x10000 = 0;
+ uint32_t link_rate_per_lane_kbps = 0;
+ enum dc_link_rate link_rate;
+ union lane_count_set lane_count;
+ u8 dp_link_encoding;
+ u8 link_bw_set = 0;
+ u8 data[16] = {0};
+
+ *cur_link_bw = 0;
+
+ if (drm_dp_dpcd_read(aux, DP_LINK_BW_SET, data, 16) != 16)
+ return false;
+
+ dp_link_encoding = data[DP_MAIN_LINK_CHANNEL_CODING_SET - DP_LINK_BW_SET];
+ link_bw_set = data[DP_LINK_BW_SET - DP_LINK_BW_SET];
+ lane_count.raw = data[DP_LANE_COUNT_SET - DP_LINK_BW_SET];
+
+ drm_dbg_dp(aux->drm_dev, "MST_DSC downlink setting: %d, 0x%x x %d\n",
+ dp_link_encoding, link_bw_set, lane_count.bits.LANE_COUNT_SET);
+
+ switch (dp_link_encoding) {
+ case DP_8b_10b_ENCODING:
+ link_rate = link_bw_set;
+ link_rate_per_lane_kbps = link_rate * LINK_RATE_REF_FREQ_IN_KHZ * BITS_PER_DP_BYTE;
+ total_data_bw_efficiency_x10000 = DATA_EFFICIENCY_8b_10b_x10000;
+ total_data_bw_efficiency_x10000 /= 100;
+ total_data_bw_efficiency_x10000 *= DATA_EFFICIENCY_8b_10b_FEC_EFFICIENCY_x100;
+ break;
+ case DP_128b_132b_ENCODING:
+ switch (link_bw_set) {
+ case DP_LINK_BW_10:
+ link_rate = LINK_RATE_UHBR10;
+ break;
+ case DP_LINK_BW_13_5:
+ link_rate = LINK_RATE_UHBR13_5;
+ break;
+ case DP_LINK_BW_20:
+ link_rate = LINK_RATE_UHBR20;
+ break;
+ default:
+ return false;
+ }
+
+ link_rate_per_lane_kbps = link_rate * 10000;
+ total_data_bw_efficiency_x10000 = DATA_EFFICIENCY_128b_132b_x10000;
+ break;
+ default:
+ return false;
+ }
+
+ *cur_link_bw = link_rate_per_lane_kbps * lane_count.bits.LANE_COUNT_SET / 10000 * total_data_bw_efficiency_x10000;
+ return true;
+}
+#endif
enum dc_status dm_dp_mst_is_port_support_mode(
struct amdgpu_dm_connector *aconnector,
struct dc_stream_state *stream)
{
- int bpp, pbn, branch_max_throughput_mps = 0;
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ int branch_max_throughput_mps = 0;
struct dc_link_settings cur_link_settings;
- unsigned int end_to_end_bw_in_kbps = 0;
- unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
- unsigned int max_compressed_bw_in_kbps = 0;
+ uint32_t end_to_end_bw_in_kbps = 0;
+ uint32_t root_link_bw_in_kbps = 0;
+ uint32_t virtual_channel_bw_in_kbps = 0;
struct dc_dsc_bw_range bw_range = {0};
- struct drm_dp_mst_topology_mgr *mst_mgr;
+ struct dc_dsc_config_options dsc_options = {0};
+ uint32_t stream_kbps;
- /*
- * check if the mode could be supported if DSC pass-through is supported
- * AND check if there enough bandwidth available to support the mode
- * with DSC enabled.
+ /* DSC unnecessary case
+ * Check if timing could be supported within end-to-end BW
*/
- if (is_dsc_common_config_possible(stream, &bw_range) &&
- aconnector->mst_output_port->passthrough_aux) {
- mst_mgr = aconnector->mst_output_port->mgr;
- mutex_lock(&mst_mgr->lock);
+ stream_kbps =
+ dc_bandwidth_in_kbps_from_timing(&stream->timing,
+ dc_link_get_highest_encoding_format(stream->link));
+ cur_link_settings = stream->link->verified_link_cap;
+ root_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, &cur_link_settings);
+ virtual_channel_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
+
+ /* pick the end to end bw bottleneck */
+ end_to_end_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps);
+
+ if (stream_kbps <= end_to_end_bw_in_kbps) {
+ DRM_DEBUG_DRIVER("MST_DSC no dsc required. End-to-end bw sufficient\n");
+ return DC_OK;
+ }
+
+ /*DSC necessary case*/
+ if (!aconnector->dsc_aux)
+ return DC_FAIL_BANDWIDTH_VALIDATE;
- cur_link_settings = stream->link->verified_link_cap;
+ if (is_dsc_common_config_possible(stream, &bw_range)) {
- upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
- &cur_link_settings
- );
- down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
+ /*capable of dsc passthough. dsc bitstream along the entire path*/
+ if (aconnector->mst_output_port->passthrough_aux) {
+ if (bw_range.min_kbps > end_to_end_bw_in_kbps) {
+ DRM_DEBUG_DRIVER("MST_DSC dsc passthrough and decode at endpoint"
+ "Max dsc compression bw can't fit into end-to-end bw\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+ } else {
+ /*dsc bitstream decoded at the dp last link*/
+ struct drm_dp_mst_port *immediate_upstream_port = NULL;
+ uint32_t end_link_bw = 0;
+
+ /*Get last DP link BW capability. Mode shall be supported by Legacy peer*/
+ if (aconnector->mst_output_port->pdt != DP_PEER_DEVICE_DP_LEGACY_CONV &&
+ aconnector->mst_output_port->pdt != DP_PEER_DEVICE_NONE) {
+ if (aconnector->vc_full_pbn != aconnector->mst_output_port->full_pbn) {
+ dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw);
+ aconnector->vc_full_pbn = aconnector->mst_output_port->full_pbn;
+ aconnector->mst_local_bw = end_link_bw;
+ } else {
+ end_link_bw = aconnector->mst_local_bw;
+ }
- /* pick the bottleneck */
- end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
- down_link_bw_in_kbps);
+ if (end_link_bw > 0 &&
+ stream_kbps > end_link_bw &&
+ aconnector->branch_ieee_oui != DP_BRANCH_DEVICE_ID_90CC24) {
+ DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link. "
+ "Mode required bw can't fit into last link\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+ }
- mutex_unlock(&mst_mgr->lock);
+ /*Get virtual channel bandwidth between source and the link before the last link*/
+ if (aconnector->mst_output_port->parent->port_parent)
+ immediate_upstream_port = aconnector->mst_output_port->parent->port_parent;
- /*
- * use the maximum dsc compression bandwidth as the required
- * bandwidth for the mode
- */
- max_compressed_bw_in_kbps = bw_range.min_kbps;
+ if (immediate_upstream_port) {
+ virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn);
+ virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps);
+ } else {
+ /* For topology LCT 1 case - only one mstb*/
+ virtual_channel_bw_in_kbps = root_link_bw_in_kbps;
+ }
- if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) {
- DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n");
- return DC_FAIL_BANDWIDTH_VALIDATE;
+ if (bw_range.min_kbps > virtual_channel_bw_in_kbps) {
+ DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link."
+ "Max dsc compression can't fit into MST available bw\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
}
- } else {
- /* check if mode could be supported within full_pbn */
- bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
- pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
- if (pbn > aconnector->mst_output_port->full_pbn)
+ /*Confirm if we can obtain dsc config*/
+ dc_dsc_get_default_config_option(stream->link->dc, &dsc_options);
+ dsc_options.max_target_bpp_limit_override_x16 = aconnector->base.display_info.max_dsc_bpp * 16;
+ if (dc_dsc_compute_config(stream->sink->ctx->dc->res_pool->dscs[0],
+ &stream->sink->dsc_caps.dsc_dec_caps,
+ &dsc_options,
+ end_to_end_bw_in_kbps,
+ &stream->timing,
+ dc_link_get_highest_encoding_format(stream->link),
+ &stream->timing.dsc_cfg)) {
+ stream->timing.flags.DSC = 1;
+ DRM_DEBUG_DRIVER("MST_DSC require dsc and dsc config found\n");
+ } else {
+ DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find appropriate dsc config\n");
return DC_FAIL_BANDWIDTH_VALIDATE;
- }
+ }
- /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */
- switch (stream->timing.pixel_encoding) {
- case PIXEL_ENCODING_RGB:
- case PIXEL_ENCODING_YCBCR444:
- branch_max_throughput_mps =
- aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_0_mps;
- break;
- case PIXEL_ENCODING_YCBCR422:
- case PIXEL_ENCODING_YCBCR420:
- branch_max_throughput_mps =
- aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_1_mps;
- break;
- default:
- break;
- }
+ /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */
+ switch (stream->timing.pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ case PIXEL_ENCODING_YCBCR444:
+ branch_max_throughput_mps =
+ aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_0_mps;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ case PIXEL_ENCODING_YCBCR420:
+ branch_max_throughput_mps =
+ aconnector->dc_sink->dsc_caps.dsc_dec_caps.branch_overall_throughput_1_mps;
+ break;
+ default:
+ break;
+ }
- if (branch_max_throughput_mps != 0 &&
- ((stream->timing.pix_clk_100hz / 10) > branch_max_throughput_mps * 1000))
+ if (branch_max_throughput_mps != 0 &&
+ ((stream->timing.pix_clk_100hz / 10) > branch_max_throughput_mps * 1000)) {
+ DRM_DEBUG_DRIVER("MST_DSC require dsc but max throughput mps fails\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+ } else {
+ DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find common dsc config\n");
return DC_FAIL_BANDWIDTH_VALIDATE;
-
+ }
+#endif
return DC_OK;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 37c820ab0fdb..6f7ea684b555 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2012-15 Advanced Micro Devices, Inc.
*
@@ -46,8 +47,8 @@
#define SYNAPTICS_CASCADED_HUB_ID 0x5A
#define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0)
-#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031
-#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000
+#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031
+#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000
enum mst_msg_ready_type {
NONE_MSG_RDY_EVENT = 0,
@@ -59,7 +60,7 @@ enum mst_msg_ready_type {
struct amdgpu_display_manager;
struct amdgpu_dm_connector;
-int dm_mst_get_pbn_divider(struct dc_link *link);
+uint32_t dm_mst_get_pbn_divider(struct dc_link *link);
void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
struct amdgpu_dm_connector *aconnector,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index cc74dd69acf2..e027798ece03 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -26,8 +26,10 @@
#include <drm/drm_atomic_helper.h>
#include <drm/drm_blend.h>
+#include "drm/drm_framebuffer.h"
#include <drm/drm_gem_atomic_helper.h>
#include <drm/drm_plane_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fourcc.h>
#include "amdgpu.h"
@@ -90,9 +92,9 @@ enum dm_micro_swizzle {
MICRO_SWIZZLE_R = 3
};
-const struct drm_format_info *amdgpu_dm_plane_get_format_info(const struct drm_mode_fb_cmd2 *cmd)
+const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier)
{
- return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]);
+ return amdgpu_lookup_format_info(pixel_format, modifier);
}
void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
@@ -104,8 +106,6 @@ void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state
*global_alpha = false;
*global_alpha_value = 0xff;
- if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY)
- return;
if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ||
plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) {
@@ -139,14 +139,14 @@ void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state
}
}
-static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod)
+static void amdgpu_dm_plane_add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod)
{
if (!*mods)
return;
if (*cap - *size < 1) {
uint64_t new_cap = *cap * 2;
- uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL);
+ uint64_t *new_mods = kmalloc_array(new_cap, sizeof(uint64_t), GFP_KERNEL);
if (!new_mods) {
kfree(*mods);
@@ -164,12 +164,12 @@ static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_
*size += 1;
}
-static bool modifier_has_dcc(uint64_t modifier)
+static bool amdgpu_dm_plane_modifier_has_dcc(uint64_t modifier)
{
return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
}
-static unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier)
+static unsigned int amdgpu_dm_plane_modifier_gfx9_swizzle_mode(uint64_t modifier)
{
if (modifier == DRM_FORMAT_MOD_LINEAR)
return 0;
@@ -177,8 +177,8 @@ static unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier)
return AMD_FMT_MOD_GET(TILE, modifier);
}
-static void fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info,
- uint64_t tiling_flags)
+static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(struct dc_tiling_info *tiling_info,
+ uint64_t tiling_flags)
{
/* Fill GFX8 params */
if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) {
@@ -190,6 +190,7 @@ static void fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info,
tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+ tiling_info->gfxversion = DcGfxVersion8;
/* XXX fix me for VI */
tiling_info->gfx8.num_banks = num_banks;
tiling_info->gfx8.array_mode =
@@ -209,8 +210,8 @@ static void fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info,
AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
}
-static void fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev,
- union dc_tiling_info *tiling_info)
+static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev,
+ struct dc_tiling_info *tiling_info)
{
/* Fill GFX9 params */
tiling_info->gfx9.num_pipes =
@@ -226,13 +227,13 @@ static void fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev,
tiling_info->gfx9.num_rb_per_se =
adev->gfx.config.gb_addr_config_fields.num_rb_per_se;
tiling_info->gfx9.shaderEnable = 1;
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
}
-static void fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev,
- union dc_tiling_info *tiling_info,
- uint64_t modifier)
+static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev,
+ struct dc_tiling_info *tiling_info,
+ uint64_t modifier)
{
unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier);
unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
@@ -241,7 +242,7 @@ static void fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev
pipes_log2 = min(5u, mod_pipe_xor_bits);
- fill_gfx9_tiling_info_from_device(adev, tiling_info);
+ amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info);
if (!IS_AMD_FMT_MOD(modifier))
return;
@@ -258,13 +259,13 @@ static void fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev
}
}
-static int validate_dcc(struct amdgpu_device *adev,
- const enum surface_pixel_format format,
- const enum dc_rotation_angle rotation,
- const union dc_tiling_info *tiling_info,
- const struct dc_plane_dcc_param *dcc,
- const struct dc_plane_address *address,
- const struct plane_size *plane_size)
+static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const struct dc_tiling_info *tiling_info,
+ const struct dc_plane_dcc_param *dcc,
+ const struct dc_plane_address *address,
+ const struct plane_size *plane_size)
{
struct dc *dc = adev->dm.dc;
struct dc_dcc_surface_param input;
@@ -276,8 +277,11 @@ static int validate_dcc(struct amdgpu_device *adev,
if (!dcc->enable)
return 0;
- if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
- !dc->cap_funcs.get_dcc_compression_cap)
+ if (adev->family < AMDGPU_FAMILY_GC_12_0_0 &&
+ format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
+ return -EINVAL;
+
+ if (!dc->cap_funcs.get_dcc_compression_cap)
return -EINVAL;
input.format = format;
@@ -303,23 +307,23 @@ static int validate_dcc(struct amdgpu_device *adev,
return 0;
}
-static int fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev,
- const struct amdgpu_framebuffer *afb,
- const enum surface_pixel_format format,
- const enum dc_rotation_angle rotation,
- const struct plane_size *plane_size,
- union dc_tiling_info *tiling_info,
- struct dc_plane_dcc_param *dcc,
- struct dc_plane_address *address,
- const bool force_disable_dcc)
+static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev,
+ const struct amdgpu_framebuffer *afb,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const struct plane_size *plane_size,
+ struct dc_tiling_info *tiling_info,
+ struct dc_plane_dcc_param *dcc,
+ struct dc_plane_address *address)
{
const uint64_t modifier = afb->base.modifier;
int ret = 0;
- fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier);
- tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier);
+ amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier);
+ tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier);
+ tiling_info->gfxversion = DcGfxVersion9;
- if (modifier_has_dcc(modifier) && !force_disable_dcc) {
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
uint64_t dcc_address = afb->address + afb->base.offsets[1];
bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
@@ -347,60 +351,104 @@ static int fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev,
address->grph.meta_addr.high_part = upper_32_bits(dcc_address);
}
- ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size);
+ ret = amdgpu_dm_plane_validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size);
if (ret)
- drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret);
+ drm_dbg_kms(adev_to_drm(adev), "amdgpu_dm_plane_validate_dcc: returned error: %d\n", ret);
return ret;
}
-static void add_gfx10_1_modifiers(const struct amdgpu_device *adev,
- uint64_t **mods, uint64_t *size, uint64_t *capacity)
+static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amdgpu_device *adev,
+ const struct amdgpu_framebuffer *afb,
+ const enum surface_pixel_format format,
+ const enum dc_rotation_angle rotation,
+ const struct plane_size *plane_size,
+ struct dc_tiling_info *tiling_info,
+ struct dc_plane_dcc_param *dcc,
+ struct dc_plane_address *address)
+{
+ const uint64_t modifier = afb->base.modifier;
+ int ret = 0;
+
+ /* TODO: Most of this function shouldn't be needed on GFX12. */
+ amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info);
+
+ tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier);
+ tiling_info->gfxversion = DcGfxAddr3;
+
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
+ int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
+
+ dcc->enable = 1;
+ dcc->independent_64b_blks = max_compressed_block == 0;
+
+ if (max_compressed_block == 0)
+ dcc->dcc_ind_blk = hubp_ind_block_64b;
+ else if (max_compressed_block == 1)
+ dcc->dcc_ind_blk = hubp_ind_block_128b;
+ else
+ dcc->dcc_ind_blk = hubp_ind_block_unconstrained;
+ }
+
+ /* TODO: This seems wrong because there is no DCC plane on GFX12. */
+ ret = amdgpu_dm_plane_validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size);
+ if (ret)
+ drm_dbg_kms(adev_to_drm(adev), "amdgpu_dm_plane_validate_dcc: returned error: %d\n", ret);
+
+ return ret;
+}
+
+static void amdgpu_dm_plane_add_gfx10_1_modifiers(const struct amdgpu_device *adev,
+ uint64_t **mods,
+ uint64_t *size,
+ uint64_t *capacity)
{
int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
-
-
- /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits));
+
+
+ /* Only supported for 64bpp, will be filtered in amdgpu_dm_plane_format_mod_supported */
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
}
-static void add_gfx9_modifiers(const struct amdgpu_device *adev,
- uint64_t **mods, uint64_t *size, uint64_t *capacity)
+static void amdgpu_dm_plane_add_gfx9_modifiers(const struct amdgpu_device *adev,
+ uint64_t **mods,
+ uint64_t *size,
+ uint64_t *capacity)
{
int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
int pipe_xor_bits = min(8, pipes +
@@ -421,163 +469,164 @@ static void add_gfx9_modifiers(const struct amdgpu_device *adev,
*/
if (has_constant_encode) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1));
}
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0));
if (has_constant_encode) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
-
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(RB, rb) |
- AMD_FMT_MOD_SET(PIPE, pipes));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(RB, rb) |
+ AMD_FMT_MOD_SET(PIPE, pipes));
}
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) |
- AMD_FMT_MOD_SET(RB, rb) |
- AMD_FMT_MOD_SET(PIPE, pipes));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) |
+ AMD_FMT_MOD_SET(RB, rb) |
+ AMD_FMT_MOD_SET(PIPE, pipes));
}
/*
* Only supported for 64bpp on Raven, will be filtered on format in
- * dm_plane_format_mod_supported.
+ * amdgpu_dm_plane_format_mod_supported.
*/
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
if (adev->family == AMDGPU_FAMILY_RV) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
}
/*
* Only supported for 64bpp on Raven, will be filtered on format in
- * dm_plane_format_mod_supported.
+ * amdgpu_dm_plane_format_mod_supported.
*/
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
if (adev->family == AMDGPU_FAMILY_RV) {
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
}
}
-static void add_gfx10_3_modifiers(const struct amdgpu_device *adev,
- uint64_t **mods, uint64_t *size, uint64_t *capacity)
+static void amdgpu_dm_plane_add_gfx10_3_modifiers(const struct amdgpu_device *adev,
+ uint64_t **mods,
+ uint64_t *size,
+ uint64_t *capacity)
{
int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs) |
- AMD_FMT_MOD_SET(DCC, 1) |
- AMD_FMT_MOD_SET(DCC_RETILE, 1) |
- AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
- AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
- AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
- AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
- AMD_FMT_MOD_SET(PACKERS, pkrs));
-
- /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
-
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs) |
+ AMD_FMT_MOD_SET(DCC, 1) |
+ AMD_FMT_MOD_SET(DCC_RETILE, 1) |
+ AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
+ AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
+ AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) |
+ AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+ AMD_FMT_MOD_SET(PACKERS, pkrs));
+
+ /* Only supported for 64bpp, will be filtered in amdgpu_dm_plane_format_mod_supported */
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
+
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
}
-static void add_gfx11_modifiers(struct amdgpu_device *adev,
+static void amdgpu_dm_plane_add_gfx11_modifiers(struct amdgpu_device *adev,
uint64_t **mods, uint64_t *size, uint64_t *capacity)
{
int num_pipes = 0;
@@ -628,21 +677,53 @@ static void add_gfx11_modifiers(struct amdgpu_device *adev,
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B);
- add_modifier(mods, size, capacity, modifier_dcc_best);
- add_modifier(mods, size, capacity, modifier_dcc_4k);
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_best);
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_4k);
- add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1));
- add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1));
- add_modifier(mods, size, capacity, modifier_r_x);
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, modifier_r_x);
}
- add_modifier(mods, size, capacity, AMD_FMT_MOD |
- AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
- AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D));
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, AMD_FMT_MOD |
+ AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
+ AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D));
}
-static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
+static void amdgpu_dm_plane_add_gfx12_modifiers(struct amdgpu_device *adev,
+ uint64_t **mods, uint64_t *size, uint64_t *capacity)
+{
+ uint64_t ver = AMD_FMT_MOD | AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12);
+ uint64_t mod_256k = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256K_2D);
+ uint64_t mod_64k = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_64K_2D);
+ uint64_t mod_4k = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_4K_2D);
+ uint64_t mod_256b = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256B_2D);
+ uint64_t dcc = ver | AMD_FMT_MOD_SET(DCC, 1);
+ uint8_t max_comp_block[] = {2, 1, 0};
+ uint64_t max_comp_block_mod[ARRAY_SIZE(max_comp_block)] = {0};
+ uint8_t i = 0, j = 0;
+ uint64_t gfx12_modifiers[] = {mod_256k, mod_64k, mod_4k, mod_256b, DRM_FORMAT_MOD_LINEAR};
+
+ for (i = 0; i < ARRAY_SIZE(max_comp_block); i++)
+ max_comp_block_mod[i] = AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block[i]);
+
+ /* With DCC: Best choice should be kept first. Hence, add all 256k modifiers of different
+ * max compressed blocks first and then move on to the next smaller sized layouts.
+ * Do not add the linear modifier here, and hence the condition of size-1 for the loop
+ */
+ for (j = 0; j < ARRAY_SIZE(gfx12_modifiers) - 1; j++)
+ for (i = 0; i < ARRAY_SIZE(max_comp_block); i++)
+ amdgpu_dm_plane_add_modifier(mods, size, capacity,
+ ver | dcc | max_comp_block_mod[i] | gfx12_modifiers[j]);
+
+ /* Without DCC. Add all modifiers including linear at the end */
+ for (i = 0; i < ARRAY_SIZE(gfx12_modifiers); i++)
+ amdgpu_dm_plane_add_modifier(mods, size, capacity, gfx12_modifiers[i]);
+
+}
+
+static int amdgpu_dm_plane_get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods)
{
uint64_t size = 0, capacity = 128;
*mods = NULL;
@@ -651,39 +732,43 @@ static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_ty
if (adev->family < AMDGPU_FAMILY_AI)
return 0;
- *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL);
+ *mods = kmalloc_array(capacity, sizeof(uint64_t), GFP_KERNEL);
if (plane_type == DRM_PLANE_TYPE_CURSOR) {
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
return *mods ? 0 : -ENOMEM;
}
switch (adev->family) {
case AMDGPU_FAMILY_AI:
case AMDGPU_FAMILY_RV:
- add_gfx9_modifiers(adev, mods, &size, &capacity);
+ amdgpu_dm_plane_add_gfx9_modifiers(adev, mods, &size, &capacity);
break;
case AMDGPU_FAMILY_NV:
case AMDGPU_FAMILY_VGH:
case AMDGPU_FAMILY_YC:
case AMDGPU_FAMILY_GC_10_3_6:
case AMDGPU_FAMILY_GC_10_3_7:
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
- add_gfx10_3_modifiers(adev, mods, &size, &capacity);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+ amdgpu_dm_plane_add_gfx10_3_modifiers(adev, mods, &size, &capacity);
else
- add_gfx10_1_modifiers(adev, mods, &size, &capacity);
+ amdgpu_dm_plane_add_gfx10_1_modifiers(adev, mods, &size, &capacity);
break;
case AMDGPU_FAMILY_GC_11_0_0:
case AMDGPU_FAMILY_GC_11_0_1:
- add_gfx11_modifiers(adev, mods, &size, &capacity);
+ case AMDGPU_FAMILY_GC_11_5_0:
+ amdgpu_dm_plane_add_gfx11_modifiers(adev, mods, &size, &capacity);
+ break;
+ case AMDGPU_FAMILY_GC_12_0_0:
+ amdgpu_dm_plane_add_gfx12_modifiers(adev, mods, &size, &capacity);
break;
}
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR);
/* INVALID marks the end of the list. */
- add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
+ amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID);
if (!*mods)
return -ENOMEM;
@@ -691,9 +776,9 @@ static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_ty
return 0;
}
-static int get_plane_formats(const struct drm_plane *plane,
- const struct dc_plane_cap *plane_cap,
- uint32_t *formats, int max_formats)
+static int amdgpu_dm_plane_get_plane_formats(const struct drm_plane *plane,
+ const struct dc_plane_cap *plane_cap,
+ uint32_t *formats, int max_formats)
{
int i, num_formats = 0;
@@ -755,12 +840,11 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
const uint64_t tiling_flags,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
struct dc_plane_dcc_param *dcc,
struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc)
+ bool tmz_surface)
{
const struct drm_framebuffer *fb = &afb->base;
int ret;
@@ -816,23 +900,29 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
upper_32_bits(chroma_addr);
}
- if (adev->family >= AMDGPU_FAMILY_AI) {
- ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format,
- rotation, plane_size,
- tiling_info, dcc,
- address,
- force_disable_dcc);
+ if (adev->family >= AMDGPU_FAMILY_GC_12_0_0) {
+ ret = amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(adev, afb, format,
+ rotation, plane_size,
+ tiling_info, dcc,
+ address);
+ if (ret)
+ return ret;
+ } else if (adev->family >= AMDGPU_FAMILY_AI) {
+ ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format,
+ rotation, plane_size,
+ tiling_info, dcc,
+ address);
if (ret)
return ret;
} else {
- fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags);
+ amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags);
}
return 0;
}
-static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
- struct drm_plane_state *new_state)
+static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane,
+ struct drm_plane_state *new_state)
{
struct amdgpu_framebuffer *afb;
struct drm_gem_object *obj;
@@ -848,19 +938,23 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
}
afb = to_amdgpu_framebuffer(new_state->fb);
- obj = new_state->fb->obj[0];
+ obj = drm_gem_fb_get_obj(new_state->fb, 0);
+ if (!obj) {
+ DRM_ERROR("Failed to get obj from framebuffer\n");
+ return -EINVAL;
+ }
+
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
-
r = amdgpu_bo_reserve(rbo, true);
if (r) {
- dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+ drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r);
return r;
}
r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
if (r) {
- dev_err(adev->dev, "reserving fence slot failed (%d)\n", r);
+ drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r);
goto error_unlock;
}
@@ -869,6 +963,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
@@ -907,14 +1002,13 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) {
struct dc_plane_state *plane_state =
dm_plane_state_new->dc_state;
- bool force_disable_dcc = !plane_state->dcc.enable;
amdgpu_dm_plane_fill_plane_buffer_attributes(
adev, afb, plane_state->format, plane_state->rotation,
afb->tiling_flags,
&plane_state->tiling_info, &plane_state->plane_size,
&plane_state->dcc, &plane_state->address,
- afb->tmz_surface, force_disable_dcc);
+ afb->tmz_surface);
}
return 0;
@@ -927,8 +1021,8 @@ error_unlock:
return r;
}
-static void dm_plane_helper_cleanup_fb(struct drm_plane *plane,
- struct drm_plane_state *old_state)
+static void amdgpu_dm_plane_helper_cleanup_fb(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
{
struct amdgpu_bo *rbo;
int r;
@@ -948,7 +1042,7 @@ static void dm_plane_helper_cleanup_fb(struct drm_plane *plane,
amdgpu_bo_unref(&rbo);
}
-static void get_min_max_dc_plane_scaling(struct drm_device *dev,
+static void amdgpu_dm_plane_get_min_max_dc_plane_scaling(struct drm_device *dev,
struct drm_framebuffer *fb,
int *min_downscale, int *max_upscale)
{
@@ -1029,8 +1123,8 @@ int amdgpu_dm_plane_helper_check_state(struct drm_plane_state *state,
}
/* Get min/max allowed scaling factors from plane caps. */
- get_min_max_dc_plane_scaling(state->crtc->dev, fb,
- &min_downscale, &max_upscale);
+ amdgpu_dm_plane_get_min_max_dc_plane_scaling(state->crtc->dev, fb,
+ &min_downscale, &max_upscale);
/*
* Convert to drm convention: 16.16 fixed point, instead of dc's
* 1.0 == 1000. Also drm scaling is src/dst instead of dc's
@@ -1068,8 +1162,8 @@ int amdgpu_dm_plane_fill_dc_scaling_info(struct amdgpu_device *adev,
* is to gesture the YouTube Android app into full screen
* on ChromeOS.
*/
- if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
- (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) &&
+ if (((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) ||
+ (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) &&
(state->fb && state->fb->format->format == DRM_FORMAT_NV12 &&
(scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0)))
return -EINVAL;
@@ -1100,8 +1194,8 @@ int amdgpu_dm_plane_fill_dc_scaling_info(struct amdgpu_device *adev,
/* Validate scaling per-format with DC plane caps */
if (state->plane && state->plane->dev && state->fb) {
- get_min_max_dc_plane_scaling(state->plane->dev, state->fb,
- &min_downscale, &max_upscale);
+ amdgpu_dm_plane_get_min_max_dc_plane_scaling(state->plane->dev, state->fb,
+ &min_downscale, &max_upscale);
} else {
min_downscale = 250;
max_upscale = 16000;
@@ -1127,8 +1221,8 @@ int amdgpu_dm_plane_fill_dc_scaling_info(struct amdgpu_device *adev,
return 0;
}
-static int dm_plane_atomic_check(struct drm_plane *plane,
- struct drm_atomic_state *state)
+static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane,
+ struct drm_atomic_state *state)
{
struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
plane);
@@ -1166,20 +1260,35 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
return -EINVAL;
}
-static int dm_plane_atomic_async_check(struct drm_plane *plane,
- struct drm_atomic_state *state)
+static int amdgpu_dm_plane_atomic_async_check(struct drm_plane *plane,
+ struct drm_atomic_state *state, bool flip)
{
- /* Only support async updates on cursor planes. */
- if (plane->type != DRM_PLANE_TYPE_CURSOR)
+ struct drm_crtc_state *new_crtc_state;
+ struct drm_plane_state *new_plane_state;
+ struct dm_crtc_state *dm_new_crtc_state;
+
+ if (flip) {
+ if (plane->type != DRM_PLANE_TYPE_OVERLAY)
+ return -EINVAL;
+ } else if (plane->type != DRM_PLANE_TYPE_CURSOR) {
+ return -EINVAL;
+ }
+
+ new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+ new_crtc_state = drm_atomic_get_new_crtc_state(state, new_plane_state->crtc);
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
+ /* Reject overlay cursors for now*/
+ if (!flip && dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE)
return -EINVAL;
return 0;
}
-static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
- struct dc_cursor_position *position)
+int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
+ struct dc_cursor_position *position)
{
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
int x, y;
int xorigin = 0, yorigin = 0;
@@ -1211,12 +1320,14 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
y = 0;
}
position->enable = true;
- position->translate_by_source = true;
position->x = x;
position->y = y;
position->x_hotspot = xorigin;
position->y_hotspot = yorigin;
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(4, 0, 1))
+ position->translate_by_source = true;
+
return 0;
}
@@ -1236,13 +1347,11 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
if (!plane->state->fb && !old_plane_state->fb)
return;
- DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n",
- __func__,
- amdgpu_crtc->crtc_id,
- plane->state->crtc_w,
- plane->state->crtc_h);
+ drm_dbg_atomic(plane->dev, "crtc_id=%d with size %d to %d\n",
+ amdgpu_crtc->crtc_id, plane->state->crtc_w,
+ plane->state->crtc_h);
- ret = get_cursor_position(plane, crtc, &position);
+ ret = amdgpu_dm_plane_get_cursor_position(plane, crtc, &position);
if (ret)
return;
@@ -1250,7 +1359,7 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
/* turn off cursor */
if (crtc_state && crtc_state->stream) {
mutex_lock(&adev->dm.dc_lock);
- dc_stream_set_cursor_position(crtc_state->stream,
+ dc_stream_program_cursor_position(crtc_state->stream,
&position);
mutex_unlock(&adev->dm.dc_lock);
}
@@ -1276,23 +1385,24 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
adev->dm.dc->caps.color.dpp.gamma_corr)
attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
- attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
+ if (afb)
+ attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
if (crtc_state->stream) {
mutex_lock(&adev->dm.dc_lock);
- if (!dc_stream_set_cursor_attributes(crtc_state->stream,
+ if (!dc_stream_program_cursor_attributes(crtc_state->stream,
&attributes))
DRM_ERROR("DC failed to set cursor attributes\n");
- if (!dc_stream_set_cursor_position(crtc_state->stream,
+ if (!dc_stream_program_cursor_position(crtc_state->stream,
&position))
DRM_ERROR("DC failed to set cursor position\n");
mutex_unlock(&adev->dm.dc_lock);
}
}
-static void dm_plane_atomic_async_update(struct drm_plane *plane,
- struct drm_atomic_state *state)
+static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane,
+ struct drm_atomic_state *state)
{
struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
plane);
@@ -1315,15 +1425,39 @@ static void dm_plane_atomic_async_update(struct drm_plane *plane,
amdgpu_dm_plane_handle_cursor_update(plane, old_state);
}
+static void amdgpu_dm_plane_panic_flush(struct drm_plane *plane)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane->state);
+ struct drm_framebuffer *fb = plane->state->fb;
+ struct dc_plane_state *dc_plane_state;
+
+ if (!dm_plane_state || !dm_plane_state->dc_state)
+ return;
+
+ dc_plane_state = dm_plane_state->dc_state;
+
+ dc_plane_force_dcc_and_tiling_disable(dc_plane_state, fb->modifier ? true : false);
+}
+
static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
- .prepare_fb = dm_plane_helper_prepare_fb,
- .cleanup_fb = dm_plane_helper_cleanup_fb,
- .atomic_check = dm_plane_atomic_check,
- .atomic_async_check = dm_plane_atomic_async_check,
- .atomic_async_update = dm_plane_atomic_async_update
+ .prepare_fb = amdgpu_dm_plane_helper_prepare_fb,
+ .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb,
+ .atomic_check = amdgpu_dm_plane_atomic_check,
+ .atomic_async_check = amdgpu_dm_plane_atomic_async_check,
+ .atomic_async_update = amdgpu_dm_plane_atomic_async_update
+};
+
+static const struct drm_plane_helper_funcs dm_primary_plane_helper_funcs = {
+ .prepare_fb = amdgpu_dm_plane_helper_prepare_fb,
+ .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb,
+ .atomic_check = amdgpu_dm_plane_atomic_check,
+ .atomic_async_check = amdgpu_dm_plane_atomic_async_check,
+ .atomic_async_update = amdgpu_dm_plane_atomic_async_update,
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = amdgpu_dm_plane_panic_flush,
};
-static void dm_drm_plane_reset(struct drm_plane *plane)
+static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
{
struct dm_plane_state *amdgpu_state = NULL;
@@ -1333,12 +1467,17 @@ static void dm_drm_plane_reset(struct drm_plane *plane)
amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL);
WARN_ON(amdgpu_state == NULL);
- if (amdgpu_state)
- __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+ if (!amdgpu_state)
+ return;
+
+ __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base);
+ amdgpu_state->degamma_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ amdgpu_state->hdr_mult = AMDGPU_HDR_MULT_DEFAULT;
+ amdgpu_state->shaper_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
+ amdgpu_state->blend_tf = AMDGPU_TRANSFER_FUNCTION_DEFAULT;
}
-static struct drm_plane_state *
-dm_drm_plane_duplicate_state(struct drm_plane *plane)
+static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct drm_plane *plane)
{
struct dm_plane_state *dm_plane_state, *old_dm_plane_state;
@@ -1354,19 +1493,38 @@ dm_drm_plane_duplicate_state(struct drm_plane *plane)
dc_plane_state_retain(dm_plane_state->dc_state);
}
+ if (old_dm_plane_state->degamma_lut)
+ dm_plane_state->degamma_lut =
+ drm_property_blob_get(old_dm_plane_state->degamma_lut);
+ if (old_dm_plane_state->ctm)
+ dm_plane_state->ctm =
+ drm_property_blob_get(old_dm_plane_state->ctm);
+ if (old_dm_plane_state->shaper_lut)
+ dm_plane_state->shaper_lut =
+ drm_property_blob_get(old_dm_plane_state->shaper_lut);
+ if (old_dm_plane_state->lut3d)
+ dm_plane_state->lut3d =
+ drm_property_blob_get(old_dm_plane_state->lut3d);
+ if (old_dm_plane_state->blend_lut)
+ dm_plane_state->blend_lut =
+ drm_property_blob_get(old_dm_plane_state->blend_lut);
+
+ dm_plane_state->degamma_tf = old_dm_plane_state->degamma_tf;
+ dm_plane_state->hdr_mult = old_dm_plane_state->hdr_mult;
+ dm_plane_state->shaper_tf = old_dm_plane_state->shaper_tf;
+ dm_plane_state->blend_tf = old_dm_plane_state->blend_tf;
+
return &dm_plane_state->base;
}
-static bool dm_plane_format_mod_supported(struct drm_plane *plane,
- uint32_t format,
- uint64_t modifier)
+static bool amdgpu_dm_plane_format_mod_supported(struct drm_plane *plane,
+ uint32_t format,
+ uint64_t modifier)
{
struct amdgpu_device *adev = drm_to_adev(plane->dev);
const struct drm_format_info *info = drm_format_info(format);
int i;
- enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3;
-
if (!info)
return false;
@@ -1388,53 +1546,256 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane,
if (i == plane->modifier_count)
return false;
- /*
- * For D swizzle the canonical modifier depends on the bpp, so check
- * it here.
- */
- if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
- adev->family >= AMDGPU_FAMILY_NV) {
- if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4)
- return false;
- }
-
- if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D &&
- info->cpp[0] < 8)
- return false;
+ /* GFX12 doesn't have these limitations. */
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11) {
+ enum dm_micro_swizzle microtile = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier) & 3;
- if (modifier_has_dcc(modifier)) {
- /* Per radeonsi comments 16/64 bpp are more complicated. */
- if (info->cpp[0] != 4)
- return false;
- /* We support multi-planar formats, but not when combined with
- * additional DCC metadata planes.
+ /*
+ * For D swizzle the canonical modifier depends on the bpp, so check
+ * it here.
*/
- if (info->num_planes > 1)
+ if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
+ adev->family >= AMDGPU_FAMILY_NV) {
+ if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4)
+ return false;
+ }
+
+ if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D &&
+ info->cpp[0] < 8)
return false;
+
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
+ /* Per radeonsi comments 16/64 bpp are more complicated. */
+ if (info->cpp[0] != 4)
+ return false;
+ /* We support multi-planar formats, but not when combined with
+ * additional DCC metadata planes.
+ */
+ if (info->num_planes > 1)
+ return false;
+ }
}
return true;
}
-static void dm_drm_plane_destroy_state(struct drm_plane *plane,
- struct drm_plane_state *state)
+static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane,
+ struct drm_plane_state *state)
{
struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+ if (dm_plane_state->degamma_lut)
+ drm_property_blob_put(dm_plane_state->degamma_lut);
+ if (dm_plane_state->ctm)
+ drm_property_blob_put(dm_plane_state->ctm);
+ if (dm_plane_state->lut3d)
+ drm_property_blob_put(dm_plane_state->lut3d);
+ if (dm_plane_state->shaper_lut)
+ drm_property_blob_put(dm_plane_state->shaper_lut);
+ if (dm_plane_state->blend_lut)
+ drm_property_blob_put(dm_plane_state->blend_lut);
+
if (dm_plane_state->dc_state)
dc_plane_state_release(dm_plane_state->dc_state);
drm_atomic_helper_plane_destroy_state(plane, state);
}
+#ifdef AMD_PRIVATE_COLOR
+static void
+dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm,
+ struct drm_plane *plane)
+{
+ struct amdgpu_mode_info mode_info = dm->adev->mode_info;
+ struct dpp_color_caps dpp_color_caps = dm->dc->caps.color.dpp;
+
+ /* Check HW color pipeline capabilities on DPP block (pre-blending)
+ * before exposing related properties.
+ */
+ if (dpp_color_caps.dgam_ram || dpp_color_caps.gamma_corr) {
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_degamma_lut_property,
+ 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_degamma_lut_size_property,
+ MAX_COLOR_LUT_ENTRIES);
+ drm_object_attach_property(&plane->base,
+ dm->adev->mode_info.plane_degamma_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+ }
+ /* HDR MULT is always available */
+ drm_object_attach_property(&plane->base,
+ dm->adev->mode_info.plane_hdr_mult_property,
+ AMDGPU_HDR_MULT_DEFAULT);
+
+ /* Only enable plane CTM if both DPP and MPC gamut remap is available. */
+ if (dm->dc->caps.color.mpc.gamut_remap)
+ drm_object_attach_property(&plane->base,
+ dm->adev->mode_info.plane_ctm_property, 0);
+
+ if (dpp_color_caps.hw_3d_lut || dm->dc->caps.color.mpc.preblend) {
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_shaper_lut_property, 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_shaper_lut_size_property,
+ MAX_COLOR_LUT_ENTRIES);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_shaper_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_lut3d_property, 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_lut3d_size_property,
+ MAX_COLOR_3DLUT_SIZE);
+ }
+
+ if (dpp_color_caps.ogam_ram) {
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_blend_lut_property, 0);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_blend_lut_size_property,
+ MAX_COLOR_LUT_ENTRIES);
+ drm_object_attach_property(&plane->base,
+ mode_info.plane_blend_tf_property,
+ AMDGPU_TRANSFER_FUNCTION_DEFAULT);
+ }
+}
+
+static int
+dm_atomic_plane_set_property(struct drm_plane *plane,
+ struct drm_plane_state *state,
+ struct drm_property *property,
+ uint64_t val)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+ bool replaced = false;
+ int ret;
+
+ if (property == adev->mode_info.plane_degamma_lut_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->degamma_lut,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_degamma_tf_property) {
+ if (dm_plane_state->degamma_tf != val) {
+ dm_plane_state->degamma_tf = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else if (property == adev->mode_info.plane_hdr_mult_property) {
+ if (dm_plane_state->hdr_mult != val) {
+ dm_plane_state->hdr_mult = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else if (property == adev->mode_info.plane_ctm_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->ctm,
+ val,
+ sizeof(struct drm_color_ctm_3x4), -1,
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_shaper_lut_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->shaper_lut,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_shaper_tf_property) {
+ if (dm_plane_state->shaper_tf != val) {
+ dm_plane_state->shaper_tf = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else if (property == adev->mode_info.plane_lut3d_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->lut3d,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_blend_lut_property) {
+ ret = drm_property_replace_blob_from_id(plane->dev,
+ &dm_plane_state->blend_lut,
+ val, -1,
+ sizeof(struct drm_color_lut),
+ &replaced);
+ dm_plane_state->base.color_mgmt_changed |= replaced;
+ return ret;
+ } else if (property == adev->mode_info.plane_blend_tf_property) {
+ if (dm_plane_state->blend_tf != val) {
+ dm_plane_state->blend_tf = val;
+ dm_plane_state->base.color_mgmt_changed = 1;
+ }
+ } else {
+ drm_dbg_atomic(plane->dev,
+ "[PLANE:%d:%s] unknown property [PROP:%d:%s]]\n",
+ plane->base.id, plane->name,
+ property->base.id, property->name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+dm_atomic_plane_get_property(struct drm_plane *plane,
+ const struct drm_plane_state *state,
+ struct drm_property *property,
+ uint64_t *val)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(state);
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
+
+ if (property == adev->mode_info.plane_degamma_lut_property) {
+ *val = (dm_plane_state->degamma_lut) ?
+ dm_plane_state->degamma_lut->base.id : 0;
+ } else if (property == adev->mode_info.plane_degamma_tf_property) {
+ *val = dm_plane_state->degamma_tf;
+ } else if (property == adev->mode_info.plane_hdr_mult_property) {
+ *val = dm_plane_state->hdr_mult;
+ } else if (property == adev->mode_info.plane_ctm_property) {
+ *val = (dm_plane_state->ctm) ?
+ dm_plane_state->ctm->base.id : 0;
+ } else if (property == adev->mode_info.plane_shaper_lut_property) {
+ *val = (dm_plane_state->shaper_lut) ?
+ dm_plane_state->shaper_lut->base.id : 0;
+ } else if (property == adev->mode_info.plane_shaper_tf_property) {
+ *val = dm_plane_state->shaper_tf;
+ } else if (property == adev->mode_info.plane_lut3d_property) {
+ *val = (dm_plane_state->lut3d) ?
+ dm_plane_state->lut3d->base.id : 0;
+ } else if (property == adev->mode_info.plane_blend_lut_property) {
+ *val = (dm_plane_state->blend_lut) ?
+ dm_plane_state->blend_lut->base.id : 0;
+ } else if (property == adev->mode_info.plane_blend_tf_property) {
+ *val = dm_plane_state->blend_tf;
+
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
static const struct drm_plane_funcs dm_plane_funcs = {
.update_plane = drm_atomic_helper_update_plane,
.disable_plane = drm_atomic_helper_disable_plane,
.destroy = drm_plane_helper_destroy,
- .reset = dm_drm_plane_reset,
- .atomic_duplicate_state = dm_drm_plane_duplicate_state,
- .atomic_destroy_state = dm_drm_plane_destroy_state,
- .format_mod_supported = dm_plane_format_mod_supported,
+ .reset = amdgpu_dm_plane_drm_plane_reset,
+ .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state,
+ .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state,
+ .format_mod_supported = amdgpu_dm_plane_format_mod_supported,
+#ifdef AMD_PRIVATE_COLOR
+ .atomic_set_property = dm_atomic_plane_set_property,
+ .atomic_get_property = dm_atomic_plane_get_property,
+#endif
};
int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
@@ -1447,11 +1808,12 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
int res = -EPERM;
unsigned int supported_rotations;
uint64_t *modifiers = NULL;
+ unsigned int primary_zpos = dm->dc->caps.max_slave_planes;
- num_formats = get_plane_formats(plane, plane_cap, formats,
- ARRAY_SIZE(formats));
+ num_formats = amdgpu_dm_plane_get_plane_formats(plane, plane_cap, formats,
+ ARRAY_SIZE(formats));
- res = get_plane_modifiers(dm->adev, plane->type, &modifiers);
+ res = amdgpu_dm_plane_get_plane_modifiers(dm->adev, plane->type, &modifiers);
if (res)
return res;
@@ -1476,10 +1838,19 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
}
if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
- drm_plane_create_zpos_immutable_property(plane, 0);
+ /*
+ * Allow OVERLAY planes to be used as underlays by assigning an
+ * immutable zpos = # of OVERLAY planes to the PRIMARY plane.
+ */
+ drm_plane_create_zpos_immutable_property(plane, primary_zpos);
} else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
- unsigned int zpos = 1 + drm_plane_index(plane);
- drm_plane_create_zpos_property(plane, zpos, 1, 254);
+ /*
+ * OVERLAY planes can be below or above the PRIMARY, but cannot
+ * be above the CURSOR plane.
+ */
+ unsigned int zpos = primary_zpos + 1 + drm_plane_index(plane);
+
+ drm_plane_create_zpos_property(plane, zpos, 0, 254);
} else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
drm_plane_create_zpos_immutable_property(plane, 255);
}
@@ -1508,12 +1879,18 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
supported_rotations);
- if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) &&
+ if (amdgpu_ip_version(dm->adev, DCE_HWIP, 0) > IP_VERSION(3, 0, 1) &&
plane->type != DRM_PLANE_TYPE_CURSOR)
drm_plane_enable_fb_damage_clips(plane);
- drm_plane_helper_add(plane, &dm_plane_helper_funcs);
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY)
+ drm_plane_helper_add(plane, &dm_primary_plane_helper_funcs);
+ else
+ drm_plane_helper_add(plane, &dm_plane_helper_funcs);
+#ifdef AMD_PRIVATE_COLOR
+ dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
+#endif
/* Create (reset) the plane state */
if (plane->funcs->reset)
plane->funcs->reset(plane);
@@ -1521,7 +1898,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
return 0;
}
-bool is_video_format(uint32_t format)
+bool amdgpu_dm_plane_is_video_format(uint32_t format)
{
int i;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
index 930f1572f898..ea2619b507db 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
@@ -29,6 +29,9 @@
#include "dc.h"
+int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
+ struct dc_cursor_position *position);
+
void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
struct drm_plane_state *old_plane_state);
@@ -44,23 +47,22 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
const uint64_t tiling_flags,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
struct dc_plane_dcc_param *dcc,
struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc);
+ bool tmz_surface);
int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
struct drm_plane *plane,
unsigned long possible_crtcs,
const struct dc_plane_cap *plane_cap);
-const struct drm_format_info *amdgpu_dm_plane_get_format_info(const struct drm_mode_fb_cmd2 *cmd);
+const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier);
void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
bool *per_pixel_alpha, bool *pre_multiplied_alpha,
bool *global_alpha, int *global_alpha_value);
-bool is_video_format(uint32_t format);
+bool amdgpu_dm_plane_is_video_format(uint32_t format);
#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 848c5b4bb301..11b2ea6edf95 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -97,6 +98,7 @@ bool dm_pp_apply_display_requirements(
const struct dm_pp_single_disp_config *dc_cfg =
&pp_display_cfg->disp_configs[i];
adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1;
+ adev->pm.pm_display_cfg.displays[i].pixel_clock = dc_cfg->pixel_clock;
}
amdgpu_dpm_display_configuration_change(adev, &adev->pm.pm_display_cfg);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index 08ce3bb8f640..fd491b7a3cd7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
@@ -26,7 +27,6 @@
#include "amdgpu_dm_psr.h"
#include "dc_dmub_srv.h"
#include "dc.h"
-#include "dm_helpers.h"
#include "amdgpu_dm.h"
#include "modules/power/power_helpers.h"
@@ -51,7 +51,11 @@ static bool link_supports_psrsu(struct dc_link *link)
!link->dpcd_caps.psr_info.psr2_su_y_granularity_cap)
return false;
- return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub);
+ if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU)
+ return false;
+
+ /* Temporarily disable PSR-SU to avoid glitches */
+ return false;
}
/*
@@ -83,14 +87,6 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link)
link->psr_settings.psr_feature_enabled = true;
}
-
- DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n",
- link->psr_settings.psr_feature_enabled,
- link->psr_settings.psr_version,
- link->dpcd_caps.psr_info.psr_version,
- link->dpcd_caps.psr_info.psr_dpcd_caps.raw,
- link->dpcd_caps.psr_info.psr2_su_y_granularity_cap);
-
}
/*
@@ -123,8 +119,10 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
psr_config.allow_multi_disp_optimizations =
(amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT);
- if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config))
- return false;
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) {
+ if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config))
+ return false;
+ }
ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context);
@@ -138,9 +136,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
* amdgpu_dm_psr_enable() - enable psr f/w
* @stream: stream state
*
- * Return: true if success
*/
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream)
{
struct dc_link *link = stream->link;
unsigned int vsync_rate_hz = 0;
@@ -156,7 +153,7 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
DRM_DEBUG_DRIVER("Enabling psr...\n");
vsync_rate_hz = div64_u64(div64_u64((
- stream->timing.pix_clk_100hz * 100),
+ stream->timing.pix_clk_100hz * (uint64_t)100),
stream->timing.v_total),
stream->timing.h_total);
@@ -187,7 +184,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
power_opt |= psr_power_opt_z10_static_screen;
- return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+ dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+
+ if (link->ctx->dc->caps.ips_support)
+ dc_allow_idle_optimizations(link->ctx->dc, true);
}
/*
@@ -196,18 +196,17 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
*
* Return: true if success
*/
-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream)
+bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait)
{
- unsigned int power_opt = 0;
bool psr_enable = false;
DRM_DEBUG_DRIVER("Disabling psr...\n");
- return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt);
+ return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL);
}
/*
- * amdgpu_dm_psr_disable() - disable psr f/w
+ * amdgpu_dm_psr_disable_all() - disable psr f/w for all streams
* if psr is enabled on any stream
*
* Return: true if success
@@ -218,3 +217,61 @@ bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm)
return dc_set_psr_allow_active(dm->dc, false);
}
+/*
+ * amdgpu_dm_psr_is_active_allowed() - check if psr is allowed on any stream
+ * @dm: pointer to amdgpu_display_manager
+ *
+ * Return: true if allowed
+ */
+
+bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm)
+{
+ unsigned int i;
+ bool allow_active = false;
+
+ for (i = 0; i < dm->dc->current_state->stream_count ; i++) {
+ struct dc_link *link;
+ struct dc_stream_state *stream = dm->dc->current_state->streams[i];
+
+ link = stream->link;
+ if (!link)
+ continue;
+ if (link->psr_settings.psr_feature_enabled &&
+ link->psr_settings.psr_allow_active) {
+ allow_active = true;
+ break;
+ }
+ }
+
+ return allow_active;
+}
+
+/**
+ * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR
+ * @stream: stream state attached to the eDP link
+ *
+ * Waits for a max of 500ms for the eDP panel to exit PSR.
+ *
+ * Return: true if panel exited PSR, false otherwise.
+ */
+bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream)
+{
+ enum dc_psr_state psr_state = PSR_STATE0;
+ struct dc_link *link = stream->link;
+ int retry_count;
+
+ if (link == NULL)
+ return false;
+
+ for (retry_count = 0; retry_count <= 1000; retry_count++) {
+ dc_link_get_psr_state(link, &psr_state);
+ if (psr_state == PSR_STATE0)
+ break;
+ udelay(500);
+ }
+
+ if (retry_count == 1000)
+ return false;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
index 6806b3c9c84b..4fb8626913cf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
@@ -32,9 +33,11 @@
#define AMDGPU_DM_PSR_ENTRY_DELAY 5
void amdgpu_dm_set_psr_caps(struct dc_link *link);
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream);
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream);
bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream);
-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream);
+bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait);
bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm);
+bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm);
+bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream);
#endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c
new file mode 100644
index 000000000000..1da07ebf9217
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/dmi.h>
+
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+
+struct amdgpu_dm_quirks {
+ bool aux_hpd_discon;
+ bool support_edp0_on_dp1;
+};
+
+static struct amdgpu_dm_quirks quirk_entries = {
+ .aux_hpd_discon = false,
+ .support_edp0_on_dp1 = false
+};
+
+static int edp0_on_dp1_callback(const struct dmi_system_id *id)
+{
+ quirk_entries.support_edp0_on_dp1 = true;
+ return 0;
+}
+
+static int aux_hpd_discon_callback(const struct dmi_system_id *id)
+{
+ quirk_entries.aux_hpd_discon = true;
+ return 0;
+}
+
+static const struct dmi_system_id dmi_quirk_table[] = {
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"),
+ },
+ },
+ {
+ .callback = aux_hpd_discon_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"),
+ },
+ },
+ {
+ .callback = edp0_on_dp1_callback,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"),
+ },
+ },
+ {}
+ /* TODO: refactor this from a fixed table to a dynamic option */
+};
+
+void retrieve_dmi_info(struct amdgpu_display_manager *dm)
+{
+ struct drm_device *dev = dm->ddev;
+ int dmi_id;
+
+ dm->aux_hpd_discon_quirk = false;
+ dm->edp0_on_dp1_quirk = false;
+
+ dmi_id = dmi_check_system(dmi_quirk_table);
+
+ if (!dmi_id)
+ return;
+
+ if (quirk_entries.aux_hpd_discon) {
+ dm->aux_hpd_discon_quirk = true;
+ drm_info(dev, "aux_hpd_discon_quirk attached\n");
+ }
+ if (quirk_entries.support_edp0_on_dp1) {
+ dm->edp0_on_dp1_quirk = true;
+ drm_info(dev, "support_edp0_on_dp1 attached\n");
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
index 32d3086c4cb7..80704d709e44 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
@@ -24,20 +25,21 @@
*/
#include "amdgpu_dm_replay.h"
+#include "dc_dmub_srv.h"
#include "dc.h"
#include "dm_helpers.h"
#include "amdgpu_dm.h"
#include "modules/power/power_helpers.h"
#include "dmub/inc/dmub_cmd.h"
-#include "dc/inc/link.h"
+#include "dc/inc/link_service.h"
/*
- * link_supports_replay() - check if the link supports replay
+ * amdgpu_dm_link_supports_replay() - check if the link supports replay
* @link: link
* @aconnector: aconnector
*
*/
-static bool link_supports_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
+bool amdgpu_dm_link_supports_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
{
struct dm_connector_state *state = to_dm_connector_state(aconnector->base.state);
struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
@@ -60,21 +62,27 @@ static bool link_supports_replay(struct dc_link *link, struct amdgpu_dm_connecto
if (!as_caps->dp_adap_sync_caps.bits.ADAPTIVE_SYNC_SDP_SUPPORT)
return false;
+ // Sink shall populate line deviation information
+ if (dpcd_caps->pr_info.pixel_deviation_per_line == 0 ||
+ dpcd_caps->pr_info.max_deviation_line == 0)
+ return false;
+
return true;
}
/*
- * amdgpu_dm_setup_replay() - setup replay configuration
+ * amdgpu_dm_set_replay_caps() - setup Replay capabilities
* @link: link
* @aconnector: aconnector
*
*/
-bool amdgpu_dm_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
+bool amdgpu_dm_set_replay_caps(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
{
- struct replay_config pr_config;
+ struct replay_config pr_config = { 0 };
union replay_debug_flags *debug_flags = NULL;
+ struct dc *dc = link->ctx->dc;
- // For eDP, if Replay is supported, return true to skip checks
+ // If Replay is already set to support, return true to skip checks
if (link->replay_settings.config.replay_supported)
return true;
@@ -84,30 +92,57 @@ bool amdgpu_dm_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *ac
if (link->panel_config.psr.disallow_replay)
return false;
- if (!link_supports_replay(link, aconnector))
+ if (!amdgpu_dm_link_supports_replay(link, aconnector))
return false;
- // Mark Replay is supported in link and update related attributes
- pr_config.replay_supported = true;
- pr_config.replay_power_opt_supported = 0;
- pr_config.replay_enable_option |= pr_enable_option_static_screen;
- pr_config.replay_timing_sync_supported = aconnector->max_vfreq >= 2 * aconnector->min_vfreq ? true : false;
+ if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub ||
+ !dc->ctx->dmub_srv->dmub->feature_caps.replay_supported)
+ return false;
- if (!pr_config.replay_timing_sync_supported)
- pr_config.replay_enable_option &= ~pr_enable_option_general_ui;
+ // Mark Replay is supported in pr_config
+ pr_config.replay_supported = true;
debug_flags = (union replay_debug_flags *)&pr_config.debug_flags;
debug_flags->u32All = 0;
debug_flags->bitfields.visual_confirm =
- link->ctx->dc->debug.visual_confirm == VISUAL_CONFIRM_REPLAY ? true : false;
-
- link->replay_settings.replay_feature_enabled = true;
+ link->ctx->dc->debug.visual_confirm == VISUAL_CONFIRM_REPLAY;
init_replay_config(link, &pr_config);
return true;
}
+/*
+ * amdgpu_dm_link_setup_replay() - configure replay link
+ * @link: link
+ * @aconnector: aconnector
+ *
+ */
+bool amdgpu_dm_link_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
+{
+ struct replay_config *pr_config;
+
+ if (link == NULL || aconnector == NULL)
+ return false;
+
+ pr_config = &link->replay_settings.config;
+
+ if (!pr_config->replay_supported)
+ return false;
+
+ pr_config->replay_power_opt_supported = 0x11;
+ pr_config->replay_smu_opt_supported = false;
+ pr_config->replay_enable_option |= pr_enable_option_static_screen;
+ pr_config->replay_support_fast_resync_in_ultra_sleep_mode = aconnector->max_vfreq >= 2 * aconnector->min_vfreq;
+ pr_config->replay_timing_sync_supported = false;
+
+ if (!pr_config->replay_timing_sync_supported)
+ pr_config->replay_enable_option &= ~pr_enable_option_general_ui;
+
+ link->replay_settings.replay_feature_enabled = true;
+
+ return true;
+}
/*
* amdgpu_dm_replay_enable() - enable replay f/w
@@ -117,51 +152,23 @@ bool amdgpu_dm_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *ac
*/
bool amdgpu_dm_replay_enable(struct dc_stream_state *stream, bool wait)
{
- uint64_t state;
- unsigned int retry_count;
bool replay_active = true;
- const unsigned int max_retry = 1000;
- bool force_static = true;
struct dc_link *link = NULL;
-
if (stream == NULL)
return false;
link = stream->link;
- if (link == NULL)
- return false;
-
- link->dc->link_srv->edp_setup_replay(link, stream);
-
- link->dc->link_srv->edp_set_replay_allow_active(link, NULL, false, false, NULL);
-
- link->dc->link_srv->edp_set_replay_allow_active(link, &replay_active, false, true, NULL);
-
- if (wait == true) {
-
- for (retry_count = 0; retry_count <= max_retry; retry_count++) {
- dc_link_get_replay_state(link, &state);
- if (replay_active) {
- if (state != REPLAY_STATE_0 &&
- (!force_static || state == REPLAY_STATE_3))
- break;
- } else {
- if (state == REPLAY_STATE_0)
- break;
- }
- udelay(500);
- }
-
- /* assert if max retry hit */
- if (retry_count >= max_retry)
- ASSERT(0);
- } else {
- /* To-do: Add trace log */
+ if (link) {
+ link->dc->link_srv->edp_setup_replay(link, stream);
+ link->dc->link_srv->edp_set_coasting_vtotal(link, stream->timing.v_total);
+ DRM_DEBUG_DRIVER("Enabling replay...\n");
+ link->dc->link_srv->edp_set_replay_allow_active(link, &replay_active, wait, false, NULL);
+ return true;
}
- return true;
+ return false;
}
/*
@@ -172,12 +179,31 @@ bool amdgpu_dm_replay_enable(struct dc_stream_state *stream, bool wait)
*/
bool amdgpu_dm_replay_disable(struct dc_stream_state *stream)
{
+ bool replay_active = false;
+ struct dc_link *link = NULL;
- if (stream->link) {
+ if (stream == NULL)
+ return false;
+
+ link = stream->link;
+
+ if (link) {
DRM_DEBUG_DRIVER("Disabling replay...\n");
- stream->link->dc->link_srv->edp_set_replay_allow_active(stream->link, NULL, false, false, NULL);
+ link->dc->link_srv->edp_set_replay_allow_active(stream->link, &replay_active, true, false, NULL);
return true;
}
return false;
}
+
+/*
+ * amdgpu_dm_replay_disable_all() - disable replay f/w
+ * if replay is enabled on any stream
+ *
+ * Return: true if success
+ */
+bool amdgpu_dm_replay_disable_all(struct amdgpu_display_manager *dm)
+{
+ DRM_DEBUG_DRIVER("Disabling replay if replay is enabled on any stream\n");
+ return dc_set_replay_allow_active(dm->dc, false);
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h
index 01cba3cd6246..73b6c67ae5e7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
@@ -38,9 +39,11 @@ enum replay_enable_option {
pr_enable_option_full_screen_video_coasting = 0x40000,
};
-
+bool amdgpu_dm_link_supports_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector);
bool amdgpu_dm_replay_enable(struct dc_stream_state *stream, bool enable);
-bool amdgpu_dm_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector);
+bool amdgpu_dm_set_replay_caps(struct dc_link *link, struct amdgpu_dm_connector *aconnector);
+bool amdgpu_dm_link_setup_replay(struct dc_link *link, struct amdgpu_dm_connector *aconnector);
bool amdgpu_dm_replay_disable(struct dc_stream_state *stream);
+bool amdgpu_dm_replay_disable_all(struct amdgpu_display_manager *dm);
#endif /* AMDGPU_DM_AMDGPU_DM_REPLAY_H_ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
index d9e33c6bccd9..8550d5e8b753 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
@@ -1,3 +1,4 @@
+//SPDX-License-Identifier: MIT
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
@@ -52,4 +53,12 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc
func_name, line);
}
+void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx)
+{
+}
+
+void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx)
+{
+}
+
/**** power component interfaces ****/
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index 0f580ea37576..aa56fd6d56c3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -1,3 +1,4 @@
+//SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
@@ -37,7 +38,7 @@
#include <drm/drm_framebuffer.h>
#include <drm/drm_encoder.h>
#include <drm/drm_atomic.h>
-#include "dcn10/dcn10_optc.h"
+#include "dc/inc/hw/optc.h"
#include "dc/inc/core_types.h"
@@ -87,7 +88,7 @@ TRACE_EVENT(amdgpu_dc_performance,
__entry->writes = write_count;
__entry->read_delta = read_count - *last_read;
__entry->write_delta = write_count - *last_write;
- __assign_str(func, func);
+ __assign_str(func);
__entry->line = line;
*last_read = read_count;
*last_write = write_count;
@@ -726,6 +727,32 @@ TRACE_EVENT(dcn_optc_lock_unlock_state,
)
);
+TRACE_EVENT(amdgpu_dm_brightness,
+ TP_PROTO(void *function, u32 user_brightness, u32 converted_brightness, bool aux, bool ac),
+ TP_ARGS(function, user_brightness, converted_brightness, aux, ac),
+ TP_STRUCT__entry(
+ __field(void *, function)
+ __field(u32, user_brightness)
+ __field(u32, converted_brightness)
+ __field(bool, aux)
+ __field(bool, ac)
+ ),
+ TP_fast_assign(
+ __entry->function = function;
+ __entry->user_brightness = user_brightness;
+ __entry->converted_brightness = converted_brightness;
+ __entry->aux = aux;
+ __entry->ac = ac;
+ ),
+ TP_printk("%ps: brightness requested=%u converted=%u aux=%s power=%s",
+ (void *)__entry->function,
+ (u32)__entry->user_brightness,
+ (u32)__entry->converted_brightness,
+ (__entry->aux) ? "true" : "false",
+ (__entry->ac) ? "AC" : "DC"
+ )
+);
+
#endif /* _AMDGPU_DM_TRACE_H_ */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
new file mode 100644
index 000000000000..d9527c05fc87
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services_types.h"
+
+#include "amdgpu.h"
+#include "amdgpu_dm.h"
+#include "amdgpu_dm_wb.h"
+#include "amdgpu_display.h"
+#include "dc.h"
+
+#include <drm/drm_edid.h>
+#include <drm/drm_atomic_state_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+
+static const u32 amdgpu_dm_wb_formats[] = {
+ DRM_FORMAT_XRGB2101010,
+};
+
+static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
+ struct drm_crtc_state *crtc_state,
+ struct drm_connector_state *conn_state)
+{
+ struct drm_framebuffer *fb;
+ const struct drm_display_mode *mode = &crtc_state->mode;
+ bool found = false;
+ uint8_t i;
+
+ if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+ return 0;
+
+ fb = conn_state->writeback_job->fb;
+ if (fb->width != mode->hdisplay || fb->height != mode->vdisplay) {
+ DRM_DEBUG_KMS("Invalid framebuffer size %ux%u\n",
+ fb->width, fb->height);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < sizeof(amdgpu_dm_wb_formats) / sizeof(u32); i++) {
+ if (fb->format->format == amdgpu_dm_wb_formats[i])
+ found = true;
+ }
+
+ if (!found) {
+ DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
+ &fb->format->format);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
+{
+ /* Maximum resolution supported by DWB */
+ return drm_add_modes_noedid(connector, 3840, 2160);
+}
+
+static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
+ struct drm_writeback_job *job)
+{
+ struct amdgpu_framebuffer *afb;
+ struct drm_gem_object *obj;
+ struct amdgpu_device *adev;
+ struct amdgpu_bo *rbo;
+ uint32_t domain;
+ int r;
+
+ if (!job->fb) {
+ DRM_DEBUG_KMS("No FB bound\n");
+ return 0;
+ }
+
+ afb = to_amdgpu_framebuffer(job->fb);
+ obj = job->fb->obj[0];
+ rbo = gem_to_amdgpu_bo(obj);
+ adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+ r = amdgpu_bo_reserve(rbo, true);
+ if (r) {
+ drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r);
+ return r;
+ }
+
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+ if (r) {
+ drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r);
+ goto error_unlock;
+ }
+
+ domain = amdgpu_display_supported_domains(adev, rbo->flags);
+
+ rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ r = amdgpu_bo_pin(rbo, domain);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+ goto error_unlock;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", rbo);
+ goto error_unpin;
+ }
+
+ amdgpu_bo_unreserve(rbo);
+
+ afb->address = amdgpu_bo_gpu_offset(rbo);
+
+ amdgpu_bo_ref(rbo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(rbo);
+
+error_unlock:
+ amdgpu_bo_unreserve(rbo);
+ return r;
+}
+
+static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector,
+ struct drm_writeback_job *job)
+{
+ struct amdgpu_bo *rbo;
+ int r;
+
+ if (!job->fb)
+ return;
+
+ rbo = gem_to_amdgpu_bo(job->fb->obj[0]);
+ r = amdgpu_bo_reserve(rbo, false);
+ if (unlikely(r)) {
+ DRM_ERROR("failed to reserve rbo before unpin\n");
+ return;
+ }
+
+ amdgpu_bo_unpin(rbo);
+ amdgpu_bo_unreserve(rbo);
+ amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = {
+ .atomic_check = amdgpu_dm_wb_encoder_atomic_check,
+};
+
+static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = {
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = drm_connector_cleanup,
+ .reset = amdgpu_dm_connector_funcs_reset,
+ .atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = {
+ .get_modes = amdgpu_dm_wb_connector_get_modes,
+ .prepare_writeback_job = amdgpu_dm_wb_prepare_job,
+ .cleanup_writeback_job = amdgpu_dm_wb_cleanup_job,
+};
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_wb_connector *wbcon,
+ uint32_t link_index)
+{
+ struct dc *dc = dm->dc;
+ struct dc_link *link = dc_get_link_at_index(dc, link_index);
+ int res = 0;
+
+ wbcon->link = link;
+
+ drm_connector_helper_add(&wbcon->base.base, &amdgpu_dm_wb_conn_helper_funcs);
+
+ res = drm_writeback_connector_init(&dm->adev->ddev, &wbcon->base,
+ &amdgpu_dm_wb_connector_funcs,
+ &amdgpu_dm_wb_encoder_helper_funcs,
+ amdgpu_dm_wb_formats,
+ ARRAY_SIZE(amdgpu_dm_wb_formats),
+ amdgpu_dm_get_encoder_crtc_mask(dm->adev));
+
+ if (res)
+ return res;
+ /*
+ * Some of the properties below require access to state, like bpc.
+ * Allocate some default initial connector state with our reset helper.
+ */
+ if (wbcon->base.base.funcs->reset)
+ wbcon->base.base.funcs->reset(&wbcon->base.base);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
new file mode 100644
index 000000000000..13d31c857dee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_DM_WB_H__
+#define __AMDGPU_DM_WB_H__
+
+#include <drm/drm_writeback.h>
+
+int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_wb_connector *dm_wbcon,
+ uint32_t link_index);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 172aa10a8800..e46f8ce41d87 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -26,16 +26,7 @@
#include "dc_trace.h"
-#if defined(CONFIG_X86)
-#include <asm/fpu/api.h>
-#elif defined(CONFIG_PPC64)
-#include <asm/switch_to.h>
-#include <asm/cputable.h>
-#elif defined(CONFIG_ARM64)
-#include <asm/neon.h>
-#elif defined(CONFIG_LOONGARCH)
-#include <asm/fpu.h>
-#endif
+#include <linux/fpu.h>
/**
* DOC: DC FPU manipulation overview
@@ -60,11 +51,9 @@ static DEFINE_PER_CPU(int, fpu_recursion_depth);
*/
inline void dc_assert_fp_enabled(void)
{
- int *pcpu, depth = 0;
+ int depth;
- pcpu = get_cpu_ptr(&fpu_recursion_depth);
- depth = *pcpu;
- put_cpu_ptr(&fpu_recursion_depth);
+ depth = __this_cpu_read(fpu_recursion_depth);
ASSERT(depth >= 1);
}
@@ -84,33 +73,17 @@ inline void dc_assert_fp_enabled(void)
*/
void dc_fpu_begin(const char *function_name, const int line)
{
- int *pcpu;
+ int depth;
- pcpu = get_cpu_ptr(&fpu_recursion_depth);
- *pcpu += 1;
-
- if (*pcpu == 1) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
- migrate_disable();
+ WARN_ON_ONCE(!in_task());
+ preempt_disable();
+ depth = __this_cpu_inc_return(fpu_recursion_depth);
+ if (depth == 1) {
+ BUG_ON(!kernel_fpu_available());
kernel_fpu_begin();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP)) {
- preempt_disable();
- enable_kernel_vsx();
- } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) {
- preempt_disable();
- enable_kernel_altivec();
- } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) {
- preempt_disable();
- enable_kernel_fp();
- }
-#elif defined(CONFIG_ARM64)
- kernel_neon_begin();
-#endif
}
- TRACE_DCN_FPU(true, function_name, line, *pcpu);
- put_cpu_ptr(&fpu_recursion_depth);
+ TRACE_DCN_FPU(true, function_name, line, depth);
}
/**
@@ -125,30 +98,15 @@ void dc_fpu_begin(const char *function_name, const int line)
*/
void dc_fpu_end(const char *function_name, const int line)
{
- int *pcpu;
+ int depth;
- pcpu = get_cpu_ptr(&fpu_recursion_depth);
- *pcpu -= 1;
- if (*pcpu <= 0) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
+ depth = __this_cpu_dec_return(fpu_recursion_depth);
+ if (depth == 0) {
kernel_fpu_end();
- migrate_enable();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP)) {
- disable_kernel_vsx();
- preempt_enable();
- } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) {
- disable_kernel_altivec();
- preempt_enable();
- } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) {
- disable_kernel_fp();
- preempt_enable();
- }
-#elif defined(CONFIG_ARM64)
- kernel_neon_end();
-#endif
+ } else {
+ WARN_ON_ONCE(depth < 0);
}
- TRACE_DCN_FPU(false, function_name, line, *pcpu);
- put_cpu_ptr(&fpu_recursion_depth);
+ TRACE_DCN_FPU(false, function_name, line, depth);
+ preempt_enable();
}
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index 1b8c2aef4633..dc943abd6dba 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -22,33 +22,28 @@
#
# Makefile for Display Core (dc) component.
-DC_LIBS = basics bios dml clk_mgr dce gpio irq link virtual dsc
+DC_LIBS = basics bios dml clk_mgr dce gpio hwss irq link virtual dsc resource optc dpp hubbub dccg hubp dio dwb hpo mmhubbub mpc opp pg
ifdef CONFIG_DRM_AMD_DC_FP
KCOV_INSTRUMENT := n
-DC_LIBS += dcn20
DC_LIBS += dcn10
+DC_LIBS += dcn20
DC_LIBS += dcn21
DC_LIBS += dcn201
DC_LIBS += dcn30
DC_LIBS += dcn301
-DC_LIBS += dcn302
-DC_LIBS += dcn303
DC_LIBS += dcn31
-DC_LIBS += dcn314
-DC_LIBS += dcn315
-DC_LIBS += dcn316
-DC_LIBS += dcn32
-DC_LIBS += dcn321
+DC_LIBS += dml
+DC_LIBS += dml2
+DC_LIBS += soc_and_ip_translator
endif
DC_LIBS += dce120
DC_LIBS += dce112
DC_LIBS += dce110
-DC_LIBS += dce100
DC_LIBS += dce80
ifdef CONFIG_DRM_AMD_DC_SI
@@ -57,25 +52,32 @@ endif
DC_LIBS += hdcp
+ifdef CONFIG_DRM_AMD_DC_FP
+DC_LIBS += sspl
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, dc_spl_translate.o)
+endif
+
AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LIBS)))
include $(AMD_DC)
-DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \
-dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o
-
-DISPLAY_CORE += dc_vm_helper.o
-
-AMD_DISPLAY_CORE = $(addprefix $(AMDDALPATH)/dc/core/,$(DISPLAY_CORE))
-
-AMD_DM_REG_UPDATE = $(addprefix $(AMDDALPATH)/dc/,dc_helper.o)
-
-AMD_DISPLAY_FILES += $(AMD_DISPLAY_CORE)
-AMD_DISPLAY_FILES += $(AMD_DM_REG_UPDATE)
-
-DC_DMUB += dc_dmub_srv.o
-DC_EDID += dc_edid_parser.o
-AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
-AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
-AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID)
+FILES =
+FILES += dc_dmub_srv.o
+FILES += dc_edid_parser.o
+FILES += dc_fused_io.o
+FILES += dc_helper.o
+FILES += core/dc.o
+FILES += core/dc_debug.o
+FILES += core/dc_hw_sequencer.o
+FILES += core/dc_link_enc_cfg.o
+FILES += core/dc_link_exports.o
+FILES += core/dc_resource.o
+FILES += core/dc_sink.o
+FILES += core/dc_stat.o
+FILES += core/dc_state.o
+FILES += core/dc_stream.o
+FILES += core/dc_surface.o
+FILES += core/dc_vm_helper.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, $(FILES))
diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile
index 01b99e0d788e..aabcebf69049 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile
@@ -24,7 +24,14 @@
# It provides the general basic services required by other DAL
# subcomponents.
-BASICS = conversion.o fixpt31_32.o vector.o dc_common.o
+BASICS := \
+ conversion.o \
+ fixpt31_32.o \
+ vector.o \
+ dc_common.o \
+ dce_calcs.o \
+ custom_float.o \
+ bw_fixed.o
AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/basics/bw_fixed.c
index 3aa8dd0acd5e..c8cb89e0d4d0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/bw_fixed.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: MIT
/*
- * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -106,9 +107,8 @@ struct bw_fixed bw_frc_to_fixed(int64_t numerator, int64_t denominator)
return res;
}
-struct bw_fixed bw_floor2(
- const struct bw_fixed arg,
- const struct bw_fixed significance)
+struct bw_fixed bw_floor2(const struct bw_fixed arg,
+ const struct bw_fixed significance)
{
struct bw_fixed result;
int64_t multiplicand;
@@ -119,9 +119,8 @@ struct bw_fixed bw_floor2(
return result;
}
-struct bw_fixed bw_ceil2(
- const struct bw_fixed arg,
- const struct bw_fixed significance)
+struct bw_fixed bw_ceil2(const struct bw_fixed arg,
+ const struct bw_fixed significance)
{
struct bw_fixed result;
int64_t multiplicand;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/calcs_logger.h b/drivers/gpu/drm/amd/display/dc/basics/calcs_logger.h
index 62435bfc274d..62435bfc274d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/calcs_logger.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/calcs_logger.h
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index e295a839ab47..bd1f60ecaba4 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -101,9 +101,44 @@ void convert_float_matrix(
}
}
+static struct fixed31_32 int_frac_to_fixed_point(uint16_t arg,
+ uint8_t integer_bits,
+ uint8_t fractional_bits)
+{
+ struct fixed31_32 result;
+ uint16_t sign_mask = 1 << (fractional_bits + integer_bits);
+ uint16_t value_mask = sign_mask - 1;
+
+ result.value = (long long)(arg & value_mask) <<
+ (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+
+ if (arg & sign_mask)
+ result = dc_fixpt_neg(result);
+
+ return result;
+}
+
+/**
+ * convert_hw_matrix - converts HW values into fixed31_32 matrix.
+ * @matrix: fixed point 31.32 matrix
+ * @reg: array of register values
+ * @buffer_size: size of the array of register values
+ *
+ * Converts HW register spec defined format S2D13 into a fixed-point 31.32
+ * matrix.
+ */
+void convert_hw_matrix(struct fixed31_32 *matrix,
+ uint16_t *reg,
+ uint32_t buffer_size)
+{
+ for (int i = 0; i < buffer_size; ++i)
+ matrix[i] = int_frac_to_fixed_point(reg[i], 2, 13);
+}
+
static uint32_t find_gcd(uint32_t a, uint32_t b)
{
- uint32_t remainder = 0;
+ uint32_t remainder;
+
while (b != 0) {
remainder = a % b;
a = b;
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
index 81da4e6f7a1a..a433cef78496 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
@@ -41,6 +41,10 @@ void convert_float_matrix(
void reduce_fraction(uint32_t num, uint32_t den,
uint32_t *out_num, uint32_t *out_den);
+void convert_hw_matrix(struct fixed31_32 *matrix,
+ uint16_t *reg,
+ uint32_t buffer_size);
+
static inline unsigned int log_2(unsigned int num)
{
return ilog2(num);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/custom_float.c b/drivers/gpu/drm/amd/display/dc/basics/custom_float.c
index 31d167bc548f..ae05ded9a7f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/custom_float.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/custom_float.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: MIT
/*
- * Copyright 2017 Advanced Micro Devices, Inc.
+ * Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,52 +26,41 @@
#include "dm_services.h"
#include "custom_float.h"
-
-static bool build_custom_float(
- struct fixed31_32 value,
- const struct custom_float_format *format,
- bool *negative,
- uint32_t *mantissa,
- uint32_t *exponenta)
+static bool build_custom_float(struct fixed31_32 value,
+ const struct custom_float_format *format,
+ bool *negative,
+ uint32_t *mantissa,
+ uint32_t *exponenta)
{
uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
const struct fixed31_32 mantissa_constant_plus_max_fraction =
- dc_fixpt_from_fraction(
- (1LL << (format->mantissa_bits + 1)) - 1,
- 1LL << format->mantissa_bits);
+ dc_fixpt_from_fraction((1LL << (format->mantissa_bits + 1)) - 1,
+ 1LL << format->mantissa_bits);
struct fixed31_32 mantiss;
- if (dc_fixpt_eq(
- value,
- dc_fixpt_zero)) {
+ if (dc_fixpt_eq(value, dc_fixpt_zero)) {
*negative = false;
*mantissa = 0;
*exponenta = 0;
return true;
}
- if (dc_fixpt_lt(
- value,
- dc_fixpt_zero)) {
+ if (dc_fixpt_lt(value, dc_fixpt_zero)) {
*negative = format->sign;
value = dc_fixpt_neg(value);
} else {
*negative = false;
}
- if (dc_fixpt_lt(
- value,
- dc_fixpt_one)) {
+ if (dc_fixpt_lt(value, dc_fixpt_one)) {
uint32_t i = 1;
do {
value = dc_fixpt_shl(value, 1);
++i;
- } while (dc_fixpt_lt(
- value,
- dc_fixpt_one));
+ } while (dc_fixpt_lt(value, dc_fixpt_one));
--i;
@@ -81,54 +71,40 @@ static bool build_custom_float(
}
*exponenta = exp_offset - i;
- } else if (dc_fixpt_le(
- mantissa_constant_plus_max_fraction,
- value)) {
+ } else if (dc_fixpt_le(mantissa_constant_plus_max_fraction, value)) {
uint32_t i = 1;
do {
value = dc_fixpt_shr(value, 1);
++i;
- } while (dc_fixpt_lt(
- mantissa_constant_plus_max_fraction,
- value));
+ } while (dc_fixpt_lt(mantissa_constant_plus_max_fraction, value));
*exponenta = exp_offset + i - 1;
} else {
*exponenta = exp_offset;
}
- mantiss = dc_fixpt_sub(
- value,
- dc_fixpt_one);
+ mantiss = dc_fixpt_sub(value, dc_fixpt_one);
- if (dc_fixpt_lt(
- mantiss,
- dc_fixpt_zero) ||
- dc_fixpt_lt(
- dc_fixpt_one,
- mantiss))
+ if (dc_fixpt_lt(mantiss, dc_fixpt_zero) ||
+ dc_fixpt_lt(dc_fixpt_one, mantiss))
mantiss = dc_fixpt_zero;
else
- mantiss = dc_fixpt_shl(
- mantiss,
- format->mantissa_bits);
+ mantiss = dc_fixpt_shl(mantiss, format->mantissa_bits);
*mantissa = dc_fixpt_floor(mantiss);
return true;
}
-static bool setup_custom_float(
- const struct custom_float_format *format,
- bool negative,
- uint32_t mantissa,
- uint32_t exponenta,
- uint32_t *result)
+static bool setup_custom_float(const struct custom_float_format *format,
+ bool negative,
+ uint32_t mantissa,
+ uint32_t exponenta,
+ uint32_t *result)
{
uint32_t i = 0;
uint32_t j = 0;
-
uint32_t value = 0;
/* verification code:
@@ -179,19 +155,19 @@ static bool setup_custom_float(
return true;
}
-bool convert_to_custom_float_format(
- struct fixed31_32 value,
- const struct custom_float_format *format,
- uint32_t *result)
+bool convert_to_custom_float_format(struct fixed31_32 value,
+ const struct custom_float_format *format,
+ uint32_t *result)
{
uint32_t mantissa;
uint32_t exponenta;
bool negative;
- return build_custom_float(
- value, format, &negative, &mantissa, &exponenta) &&
- setup_custom_float(
- format, negative, mantissa, exponenta, result);
+ return build_custom_float(value, format, &negative, &mantissa, &exponenta) &&
+ setup_custom_float(format,
+ negative,
+ mantissa,
+ exponenta,
+ result);
}
-
diff --git a/drivers/gpu/drm/amd/display/dc/basics/dc_common.c b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
index b2fc4f8e6482..a51c2701da24 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dc_common.c
@@ -40,7 +40,8 @@ bool is_rgb_cspace(enum dc_color_space output_color_space)
case COLOR_SPACE_YCBCR709:
case COLOR_SPACE_YCBCR601_LIMITED:
case COLOR_SPACE_YCBCR709_LIMITED:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
+ case COLOR_SPACE_2020_YCBCR_FULL:
return false;
default:
/* Add a case to switch */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
index f2dfa96f9ef5..4da5adab799c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "resource.h"
#include "dm_services.h"
#include "dce_calcs.h"
@@ -94,7 +92,7 @@ static void calculate_bandwidth(
const uint32_t s_high = 7;
const uint32_t dmif_chunk_buff_margin = 1;
- uint32_t max_chunks_fbc_mode;
+ uint32_t max_chunks_fbc_mode = 0;
int32_t num_cursor_lines;
int32_t i, j, k;
@@ -571,7 +569,7 @@ static void calculate_bandwidth(
break;
}
data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1));
- /*clamp the partitions to the maxium number supported by the lb*/
+ /* clamp the partitions to the maximum number supported by the lb */
if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) {
data->lb_partitions_max[i] = bw_int_to_fixed(10);
}
@@ -1138,7 +1136,7 @@ static void calculate_bandwidth(
}
}
}
- data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed(vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1));
+ data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed((uint64_t)vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1));
data->total_dmifmc_urgent_latency = bw_mul(vbios->dmifmc_urgent_latency, data->total_dmifmc_urgent_trips);
data->total_display_reads_required_data = bw_int_to_fixed(0);
data->total_display_reads_required_dram_access_data = bw_int_to_fixed(0);
@@ -1395,7 +1393,7 @@ static void calculate_bandwidth(
if ((bw_mtn(data->dram_speed_change_margin, bw_int_to_fixed(0)) && bw_ltn(data->dram_speed_change_margin, bw_int_to_fixed(9999)))) {
/*determine the minimum dram clock change margin for each set of clock frequencies*/
data->min_dram_speed_change_margin[i][j] = bw_min2(data->min_dram_speed_change_margin[i][j], data->dram_speed_change_margin);
- /*compute the maximum clock frequuency required for the dram clock change at each set of clock frequencies*/
+ /*compute the maximum clock frequency required for the dram clock change at each set of clock frequencies*/
data->dispclk_required_for_dram_speed_change_pipe[i][j] = bw_max2(bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->active_time[k]))));
if ((bw_ltn(data->dispclk_required_for_dram_speed_change_pipe[i][j], vbios->high_voltage_max_dispclk))) {
data->display_pstate_change_enable[k] = 1;
@@ -1409,7 +1407,7 @@ static void calculate_bandwidth(
if ((bw_mtn(data->dram_speed_change_margin, bw_int_to_fixed(0)) && bw_ltn(data->dram_speed_change_margin, bw_int_to_fixed(9999)))) {
/*determine the minimum dram clock change margin for each display pipe*/
data->min_dram_speed_change_margin[i][j] = bw_min2(data->min_dram_speed_change_margin[i][j], data->dram_speed_change_margin);
- /*compute the maximum clock frequuency required for the dram clock change at each set of clock frequencies*/
+ /*compute the maximum clock frequency required for the dram clock change at each set of clock frequencies*/
data->dispclk_required_for_dram_speed_change_pipe[i][j] = bw_max2(bw_div(bw_div(bw_mul(data->src_pixels_for_first_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_sub(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->mcifwr_burst_time[i][j]))), bw_div(bw_div(bw_mul(data->src_pixels_for_last_output_pixel[k], dceip->display_pipe_throughput_factor), dceip->lb_write_pixels_per_dispclk), (bw_add(bw_sub(bw_sub(bw_sub(bw_sub(data->maximum_latency_hiding_with_cursor[k], vbios->nbp_state_change_latency), data->dmif_burst_time[i][j]), data->dram_speed_change_line_source_transfer_time[k][i][j]), data->mcifwr_burst_time[i][j]), data->active_time[k]))));
if ((bw_ltn(data->dispclk_required_for_dram_speed_change_pipe[i][j], vbios->high_voltage_max_dispclk))) {
data->display_pstate_change_enable[k] = 1;
@@ -1855,7 +1853,7 @@ static void calculate_bandwidth(
/*compute total time to request one chunk from each active display pipe*/
for (i = 0; i <= maximum_number_of_surfaces - 1; i++) {
if (data->enable[i]) {
- data->chunk_request_time = bw_add(data->chunk_request_time, (bw_div((bw_div(bw_int_to_fixed(pixels_per_chunk * data->bytes_per_pixel[i]), data->useful_bytes_per_request[i])), bw_min2(sclk[data->sclk_level], bw_div(data->dispclk, bw_int_to_fixed(2))))));
+ data->chunk_request_time = bw_add(data->chunk_request_time, (bw_div((bw_div(bw_int_to_fixed(pixels_per_chunk * (int64_t)data->bytes_per_pixel[i]), data->useful_bytes_per_request[i])), bw_min2(sclk[data->sclk_level], bw_div(data->dispclk, bw_int_to_fixed(2))))));
}
}
/*compute total time to request cursor data*/
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
index 1726bdf89bae..6073cadde76c 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
@@ -51,8 +51,6 @@ static inline unsigned long long complete_integer_division_u64(
{
unsigned long long result;
- ASSERT(divisor);
-
result = div64_u64_rem(dividend, divisor, remainder);
return result;
@@ -140,8 +138,6 @@ struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
res.value = arg1_int * arg2_int;
- ASSERT(res.value <= LONG_MAX);
-
res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
tmp = arg1_int * arg2_fra;
@@ -185,8 +181,6 @@ struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg)
res.value = arg_int * arg_int;
- ASSERT(res.value <= LONG_MAX);
-
res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
tmp = arg_int * arg_fra;
@@ -217,9 +211,6 @@ struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg)
* @note
* Good idea to use Newton's method
*/
-
- ASSERT(arg.value);
-
return dc_fixpt_from_fraction(
dc_fixpt_one.value,
arg.value);
@@ -293,7 +284,7 @@ struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg)
dc_fixpt_mul(
square,
res),
- n * (n - 1)));
+ (long long)n * (n - 1)));
n -= 2;
} while (n != 0);
@@ -490,3 +481,30 @@ int dc_fixpt_s4d19(struct fixed31_32 arg)
else
return ux_dy(arg.value, 4, 19);
}
+
+struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
+{
+ struct fixed31_32 fixpt_value = dc_fixpt_zero;
+ struct fixed31_32 fixpt_int_value = dc_fixpt_zero;
+ long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
+
+ fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ frac_mask = frac_mask << fractional_bits;
+ fixpt_int_value.value = value & frac_mask;
+ fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ fixpt_value.value |= fixpt_int_value.value;
+ return fixpt_value;
+}
+
+struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value,
+ unsigned int frac_value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
+{
+ struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value);
+
+ fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ return fixpt_value;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/vector.c b/drivers/gpu/drm/amd/display/dc/basics/vector.c
index 6d2924114a3e..b413a672c2c0 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/vector.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/vector.c
@@ -170,7 +170,7 @@ bool dal_vector_remove_at_index(
memmove(
vector->container + (index * vector->struct_size),
vector->container + ((index + 1) * vector->struct_size),
- (vector->count - index - 1) * vector->struct_size);
+ (size_t)(vector->count - index - 1) * vector->struct_size);
vector->count -= 1;
return true;
@@ -219,7 +219,7 @@ bool dal_vector_insert_at(
memmove(
insert_address + vector->struct_size,
insert_address,
- vector->struct_size * (vector->count - position));
+ (size_t)vector->struct_size * (vector->count - position));
memmove(
insert_address,
@@ -271,7 +271,7 @@ struct vector *dal_vector_clone(
/* copy vector's data */
memmove(vec_cloned->container, vector->container,
- vec_cloned->struct_size * vec_cloned->capacity);
+ (size_t)vec_cloned->struct_size * vec_cloned->capacity);
return vec_cloned;
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index 6b3190447581..154fd2c18e88 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -44,8 +44,6 @@
#include "bios_parser_common.h"
-#include "dc.h"
-
#define THREE_PERCENT_OF_10000 300
#define LAST_RECORD_TYPE 0xff
@@ -176,11 +174,8 @@ static struct graphics_object_id bios_parser_get_connector_id(
return object_id;
}
- if (tbl->ucNumberOfObjects <= i) {
- dm_error("Can't find connector id %d in connector table of size %d.\n",
- i, tbl->ucNumberOfObjects);
+ if (tbl->ucNumberOfObjects <= i)
return object_id;
- }
id = le16_to_cpu(tbl->asObjects[i].usObjectID);
object_id = object_id_from_bios_object_id(id);
@@ -667,6 +662,9 @@ static enum bp_result get_ss_info_v3_1(
ss_table_header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base,
DATA_TABLES(ASIC_InternalSS_Info),
struct_size(ss_table_header_include, asSpreadSpectrum, 1)));
+ if (!ss_table_header_include)
+ return BP_RESULT_UNSUPPORTED;
+
table_size =
(le16_to_cpu(ss_table_header_include->sHeader.usStructureSize)
- sizeof(ATOM_COMMON_TABLE_HEADER))
@@ -1036,6 +1034,8 @@ static enum bp_result get_ss_info_from_internal_ss_info_tbl_V2_1(
&bp->base,
DATA_TABLES(ASIC_InternalSS_Info),
struct_size(header, asSpreadSpectrum, 1)));
+ if (!header)
+ return result;
memset(info, 0, sizeof(struct spread_spectrum_info));
@@ -1109,6 +1109,8 @@ static enum bp_result get_ss_info_from_ss_info_table(
get_atom_data_table_revision(header, &revision);
tbl = GET_IMAGE(ATOM_SPREAD_SPECTRUM_INFO, DATA_TABLES(SS_Info));
+ if (!tbl)
+ return result;
if (1 != revision.major || 2 > revision.minor)
return result;
@@ -1636,6 +1638,8 @@ static uint32_t get_ss_entry_number_from_ss_info_tbl(
tbl = GET_IMAGE(ATOM_SPREAD_SPECTRUM_INFO,
DATA_TABLES(SS_Info));
+ if (!tbl)
+ return number;
if (1 != revision.major || 2 > revision.minor)
return number;
@@ -1718,6 +1722,8 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_v2_1(
&bp->base,
DATA_TABLES(ASIC_InternalSS_Info),
struct_size(header_include, asSpreadSpectrum, 1)));
+ if (!header_include)
+ return 0;
size = (le16_to_cpu(header_include->sHeader.usStructureSize)
- sizeof(ATOM_COMMON_TABLE_HEADER))
@@ -1731,6 +1737,7 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_v2_1(
return 0;
}
+
/**
* get_ss_entry_number_from_internal_ss_info_tbl_V3_1
* Get Number of SpreadSpectrum Entry from the ASIC_InternalSS_Info table of
@@ -1756,6 +1763,9 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_V3_1(
header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base,
DATA_TABLES(ASIC_InternalSS_Info),
struct_size(header_include, asSpreadSpectrum, 1)));
+ if (!header_include)
+ return number;
+
size = (le16_to_cpu(header_include->sHeader.usStructureSize) -
sizeof(ATOM_COMMON_TABLE_HEADER)) /
sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3);
@@ -2371,10 +2381,10 @@ static enum bp_result get_integrated_info_v8(
}
/*
- * get_integrated_info_v8
+ * get_integrated_info_v9
*
* @brief
- * Get V8 integrated BIOS information
+ * Get V9 integrated BIOS information
*
* @param
* bios_parser *bp - [in]BIOS parser handler to get master data table
@@ -2552,8 +2562,8 @@ static enum bp_result construct_integrated_info(
/* Sort voltage table from low to high*/
if (result == BP_RESULT_OK) {
- uint32_t i;
- uint32_t j;
+ int32_t i;
+ int32_t j;
for (i = 1; i < NUMBER_OF_DISP_CLK_VOLTAGE; ++i) {
for (j = i; j > 0; --j) {
@@ -2746,6 +2756,7 @@ static enum bp_result bios_get_board_layout_info(
struct board_layout_info *board_layout_info)
{
unsigned int i;
+ struct bios_parser *bp;
enum bp_result record_result;
const unsigned int slot_index_to_vbios_id[MAX_BOARD_SLOTS] = {
@@ -2754,6 +2765,8 @@ static enum bp_result bios_get_board_layout_info(
0, 0
};
+ bp = BP_FROM_DCB(dcb);
+
if (board_layout_info == NULL) {
DC_LOG_DETECTION_EDID_PARSER("Invalid board_layout_info\n");
return BP_RESULT_BADINPUT;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 484d62bcf2c2..04eb647acc4e 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -75,6 +75,10 @@ static enum bp_result get_firmware_info_v3_4(
struct bios_parser *bp,
struct dc_firmware_info *info);
+static enum bp_result get_firmware_info_v3_5(
+ struct bios_parser *bp,
+ struct dc_firmware_info *info);
+
static struct atom_hpd_int_record *get_hpd_record(struct bios_parser *bp,
struct atom_display_object_path_v2 *object);
@@ -355,7 +359,7 @@ static struct atom_display_object_path_v3 *get_bios_object_from_path_v3(struct b
&& id.enum_id == obj_id.enum_id)
return &bp->object_info_tbl.v1_5->display_path[i];
}
- break;
+ break;
case OBJECT_TYPE_CONNECTOR:
case OBJECT_TYPE_GENERIC:
@@ -369,7 +373,7 @@ static struct atom_display_object_path_v3 *get_bios_object_from_path_v3(struct b
&& id.enum_id == obj_id.enum_id)
return &bp->object_info_tbl.v1_5->display_path[i];
}
- break;
+ break;
default:
return NULL;
@@ -405,16 +409,16 @@ static enum bp_result bios_parser_get_i2c_info(struct dc_bios *dcb,
}
switch (bp->object_info_tbl.revision.minor) {
- case 4:
- default:
- object = get_bios_object(bp, id);
+ case 4:
+ default:
+ object = get_bios_object(bp, id);
- if (!object)
- return BP_RESULT_BADINPUT;
+ if (!object)
+ return BP_RESULT_BADINPUT;
- offset = object->disp_recordoffset + bp->object_info_tbl_offset;
- break;
- case 5:
+ offset = object->disp_recordoffset + bp->object_info_tbl_offset;
+ break;
+ case 5:
object_path_v3 = get_bios_object_from_path_v3(bp, id);
if (!object_path_v3)
@@ -568,17 +572,16 @@ static enum bp_result bios_parser_get_hpd_info(
return BP_RESULT_BADINPUT;
switch (bp->object_info_tbl.revision.minor) {
- case 4:
- default:
- object = get_bios_object(bp, id);
+ case 4:
+ default:
+ object = get_bios_object(bp, id);
if (!object)
return BP_RESULT_BADINPUT;
- record = get_hpd_record(bp, object);
-
- break;
- case 5:
+ record = get_hpd_record(bp, object);
+ break;
+ case 5:
object_path_v3 = get_bios_object_from_path_v3(bp, id);
if (!object_path_v3)
@@ -1015,13 +1018,20 @@ static enum bp_result get_ss_info_v4_5(
DC_LOG_BIOS("AS_SIGNAL_TYPE_HDMI ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
case AS_SIGNAL_TYPE_DISPLAY_PORT:
- ss_info->spread_spectrum_percentage =
+ if (bp->base.integrated_info) {
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", bp->base.integrated_info->gpuclk_ss_percentage);
+ ss_info->spread_spectrum_percentage =
+ bp->base.integrated_info->gpuclk_ss_percentage;
+ ss_info->type.CENTER_MODE =
+ bp->base.integrated_info->gpuclk_ss_type;
+ } else {
+ ss_info->spread_spectrum_percentage =
disp_cntl_tbl->dp_ss_percentage;
- ss_info->spread_spectrum_range =
+ ss_info->spread_spectrum_range =
disp_cntl_tbl->dp_ss_rate_10hz * 10;
- if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
- ss_info->type.CENTER_MODE = true;
-
+ if (disp_cntl_tbl->dp_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+ ss_info->type.CENTER_MODE = true;
+ }
DC_LOG_BIOS("AS_SIGNAL_TYPE_DISPLAY_PORT ss_percentage: %d\n", ss_info->spread_spectrum_percentage);
break;
case AS_SIGNAL_TYPE_GPU_PLL:
@@ -1588,8 +1598,6 @@ static bool bios_parser_is_device_id_supported(
return (le16_to_cpu(bp->object_info_tbl.v1_5->supporteddevices) & mask) != 0;
break;
}
-
- return false;
}
static uint32_t bios_parser_get_ss_entry_number(
@@ -1692,7 +1700,7 @@ static enum bp_result bios_parser_enable_disp_power_gating(
static enum bp_result bios_parser_enable_lvtma_control(
struct dc_bios *dcb,
uint8_t uc_pwr_on,
- uint8_t panel_instance,
+ uint8_t pwrseq_instance,
uint8_t bypass_panel_control_wait)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
@@ -1700,7 +1708,7 @@ static enum bp_result bios_parser_enable_lvtma_control(
if (!bp->cmd_tbl.enable_lvtma_control)
return BP_RESULT_FAILURE;
- return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, panel_instance, bypass_panel_control_wait);
+ return bp->cmd_tbl.enable_lvtma_control(bp, uc_pwr_on, pwrseq_instance, bypass_panel_control_wait);
}
static bool bios_parser_is_accelerated_mode(
@@ -1723,15 +1731,6 @@ static void bios_parser_set_scratch_critical_state(
bios_set_scratch_critical_state(dcb, state);
}
-struct atom_dig_transmitter_info_header_v5_3 {
- struct atom_common_table_header table_header;
- uint16_t dpphy_hdmi_settings_offset;
- uint16_t dpphy_dvi_settings_offset;
- uint16_t dpphy_dp_setting_table_offset;
- uint16_t uniphy_xbar_settings_v2_table_offset;
- uint16_t dpphy_internal_reg_overide_offset;
-};
-
static enum bp_result bios_parser_get_firmware_info(
struct dc_bios *dcb,
struct dc_firmware_info *info)
@@ -1755,10 +1754,13 @@ static enum bp_result bios_parser_get_firmware_info(
case 2:
case 3:
result = get_firmware_info_v3_2(bp, info);
- break;
+ break;
case 4:
result = get_firmware_info_v3_4(bp, info);
break;
+ case 5:
+ result = get_firmware_info_v3_5(bp, info);
+ break;
default:
break;
}
@@ -1776,6 +1778,7 @@ static enum bp_result get_firmware_info_v3_1(
struct dc_firmware_info *info)
{
struct atom_firmware_info_v3_1 *firmware_info;
+ struct atom_firmware_info_v3_2 *firmware_info32;
struct atom_display_controller_info_v4_1 *dce_info = NULL;
if (!info)
@@ -1783,11 +1786,13 @@ static enum bp_result get_firmware_info_v3_1(
firmware_info = GET_IMAGE(struct atom_firmware_info_v3_1,
DATA_TABLES(firmwareinfo));
+ firmware_info32 = GET_IMAGE(struct atom_firmware_info_v3_2,
+ DATA_TABLES(firmwareinfo));
dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1,
DATA_TABLES(dce_info));
- if (!firmware_info || !dce_info)
+ if (!firmware_info || !firmware_info32 || !dce_info)
return BP_RESULT_BADBIOSTABLE;
memset(info, 0, sizeof(*info));
@@ -1815,7 +1820,15 @@ static enum bp_result get_firmware_info_v3_1(
bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10;
}
- info->oem_i2c_present = false;
+ /* These fields are marked as reserved in v3_1, but they appear to be populated
+ * properly.
+ */
+ if (firmware_info32 && firmware_info32->board_i2c_feature_id == 0x2) {
+ info->oem_i2c_present = true;
+ info->oem_i2c_obj_id = firmware_info32->board_i2c_feature_gpio_id;
+ } else {
+ info->oem_i2c_present = false;
+ }
return BP_RESULT_OK;
}
@@ -1853,19 +1866,21 @@ static enum bp_result get_firmware_info_v3_2(
/* Vega12 */
smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2,
DATA_TABLES(smu_info));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
if (!smu_info_v3_2)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10;
} else if (revision.minor == 3) {
/* Vega20 */
smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3,
DATA_TABLES(smu_info));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
if (!smu_info_v3_3)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage);
+
info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10;
}
@@ -2047,6 +2062,63 @@ static enum bp_result get_firmware_info_v3_4(
return BP_RESULT_OK;
}
+static enum bp_result get_firmware_info_v3_5(
+ struct bios_parser *bp,
+ struct dc_firmware_info *info)
+{
+ struct atom_firmware_info_v3_5 *firmware_info;
+ struct atom_common_table_header *header;
+ struct atom_data_revision revision;
+ struct atom_display_controller_info_v4_5 *dce_info_v4_5 = NULL;
+
+ if (!info)
+ return BP_RESULT_BADINPUT;
+
+ firmware_info = GET_IMAGE(struct atom_firmware_info_v3_5,
+ DATA_TABLES(firmwareinfo));
+
+ if (!firmware_info)
+ return BP_RESULT_BADBIOSTABLE;
+
+ memset(info, 0, sizeof(*info));
+
+ if (firmware_info->board_i2c_feature_id == 0x2) {
+ info->oem_i2c_present = true;
+ info->oem_i2c_obj_id = firmware_info->board_i2c_feature_gpio_id;
+ } else {
+ info->oem_i2c_present = false;
+ }
+
+ header = GET_IMAGE(struct atom_common_table_header,
+ DATA_TABLES(dce_info));
+
+ get_atom_data_table_revision(header, &revision);
+
+ switch (revision.major) {
+ case 4:
+ switch (revision.minor) {
+ case 5:
+ dce_info_v4_5 = GET_IMAGE(struct atom_display_controller_info_v4_5,
+ DATA_TABLES(dce_info));
+
+ if (!dce_info_v4_5)
+ return BP_RESULT_BADBIOSTABLE;
+
+ /* 100MHz expected */
+ info->pll_info.crystal_frequency = dce_info_v4_5->dce_refclk_10khz * 10;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+
+ return BP_RESULT_OK;
+}
+
static enum bp_result bios_parser_get_encoder_cap_info(
struct dc_bios *dcb,
struct graphics_object_id object_id,
@@ -2215,10 +2287,8 @@ static enum bp_result bios_parser_get_disp_connector_caps_info(
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
struct atom_display_object_path_v2 *object;
-
struct atom_display_object_path_v3 *object_path_v3;
struct atom_connector_caps_record *record_path_v3;
-
struct atom_disp_connector_caps_record *record = NULL;
if (!info)
@@ -2226,22 +2296,22 @@ static enum bp_result bios_parser_get_disp_connector_caps_info(
switch (bp->object_info_tbl.revision.minor) {
case 4:
- default:
- object = get_bios_object(bp, object_id);
-
- if (!object)
- return BP_RESULT_BADINPUT;
-
- record = get_disp_connector_caps_record(bp, object);
- if (!record)
- return BP_RESULT_NORECORD;
-
- info->INTERNAL_DISPLAY =
- (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
- info->INTERNAL_DISPLAY_BL =
- (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
- break;
- case 5:
+ default:
+ object = get_bios_object(bp, object_id);
+
+ if (!object)
+ return BP_RESULT_BADINPUT;
+
+ record = get_disp_connector_caps_record(bp, object);
+ if (!record)
+ return BP_RESULT_NORECORD;
+
+ info->INTERNAL_DISPLAY =
+ (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY) ? 1 : 0;
+ info->INTERNAL_DISPLAY_BL =
+ (record->connectcaps & ATOM_CONNECTOR_CAP_INTERNAL_DISPLAY_BL) ? 1 : 0;
+ break;
+ case 5:
object_path_v3 = get_bios_object_from_path_v3(bp, object_id);
if (!object_path_v3)
@@ -2403,6 +2473,24 @@ static enum bp_result get_vram_info_v30(
return result;
}
+static enum bp_result get_vram_info_from_umc_info_v40(
+ struct bios_parser *bp,
+ struct dc_vram_info *info)
+{
+ struct atom_umc_info_v4_0 *info_v40;
+ enum bp_result result = BP_RESULT_OK;
+
+ info_v40 = GET_IMAGE(struct atom_umc_info_v4_0,
+ DATA_TABLES(umc_info));
+
+ if (info_v40 == NULL)
+ return BP_RESULT_BADBIOSTABLE;
+
+ info->num_chans = info_v40->channel_num;
+ info->dram_channel_width_bytes = (1 << info_v40->channel_width) / 8;
+
+ return result;
+}
/*
* get_integrated_info_v11
@@ -2428,10 +2516,11 @@ static enum bp_result get_integrated_info_v11(
info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11,
DATA_TABLES(integratedsysteminfo));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
if (info_v11 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v11->gpucapinfo);
/*
@@ -2643,11 +2732,12 @@ static enum bp_result get_integrated_info_v2_1(
info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1,
DATA_TABLES(integratedsysteminfo));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
if (info_v2_1 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_1->gpucapinfo);
/*
@@ -2805,11 +2895,11 @@ static enum bp_result get_integrated_info_v2_2(
info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2,
DATA_TABLES(integratedsysteminfo));
- DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
-
if (info_v2_2 == NULL)
return BP_RESULT_BADBIOSTABLE;
+ DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage);
+
info->gpu_cap_info =
le32_to_cpu(info_v2_2->gpucapinfo);
/*
@@ -2826,6 +2916,8 @@ static enum bp_result get_integrated_info_v2_2(
info->ma_channel_number = info_v2_2->umachannelnumber;
info->dp_ss_control =
le16_to_cpu(info_v2_2->reserved1);
+ info->gpuclk_ss_percentage = info_v2_2->gpuclk_ss_percentage;
+ info->gpuclk_ss_type = info_v2_2->gpuclk_ss_type;
for (i = 0; i < NUMBER_OF_UCHAR_FOR_GUID; ++i) {
info->ext_disp_conn_info.gu_id[i] =
@@ -2922,8 +3014,11 @@ static enum bp_result construct_integrated_info(
struct atom_common_table_header *header;
struct atom_data_revision revision;
- uint32_t i;
- uint32_t j;
+ int32_t i;
+ int32_t j;
+
+ if (!info)
+ return result;
if (info && DATA_TABLES(integratedsysteminfo)) {
header = GET_IMAGE(struct atom_common_table_header,
@@ -2948,6 +3043,7 @@ static enum bp_result construct_integrated_info(
result = get_integrated_info_v2_1(bp, info);
break;
case 2:
+ case 3:
result = get_integrated_info_v2_2(bp, info);
break;
default:
@@ -3003,11 +3099,12 @@ static enum bp_result construct_integrated_info(
info->ext_disp_conn_info.path[i].ext_encoder_obj_id.id,
info->ext_disp_conn_info.path[i].caps
);
- if (info->ext_disp_conn_info.path[i].caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)
- DC_LOG_BIOS("BIOS EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
+ if ((info->ext_disp_conn_info.path[i].caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)
+ DC_LOG_BIOS("BIOS AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
else if (bp->base.ctx->dc->config.force_bios_fixed_vs) {
- info->ext_disp_conn_info.path[i].caps |= EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN;
- DC_LOG_BIOS("driver forced EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
+ info->ext_disp_conn_info.path[i].caps &= ~AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ info->ext_disp_conn_info.path[i].caps |= AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN;
+ DC_LOG_BIOS("driver forced AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
}
}
// Log the Checksum and Voltage Swing
@@ -3037,11 +3134,33 @@ static enum bp_result bios_parser_get_vram_info(
struct dc_vram_info *info)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
- static enum bp_result result = BP_RESULT_BADBIOSTABLE;
+ enum bp_result result = BP_RESULT_BADBIOSTABLE;
struct atom_common_table_header *header;
struct atom_data_revision revision;
- if (info && DATA_TABLES(vram_info)) {
+ // vram info moved to umc_info for DCN4x
+ if (info && DATA_TABLES(umc_info)) {
+ header = GET_IMAGE(struct atom_common_table_header,
+ DATA_TABLES(umc_info));
+
+ get_atom_data_table_revision(header, &revision);
+
+ switch (revision.major) {
+ case 4:
+ switch (revision.minor) {
+ case 0:
+ result = get_vram_info_from_umc_info_v40(bp, info);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (result != BP_RESULT_OK && info && DATA_TABLES(vram_info)) {
header = GET_IMAGE(struct atom_common_table_header,
DATA_TABLES(vram_info));
@@ -3335,27 +3454,28 @@ static enum bp_result get_bracket_layout_record(
DC_LOG_DETECTION_EDID_PARSER("Invalid slot_layout_info\n");
return BP_RESULT_BADINPUT;
}
+
tbl = &bp->object_info_tbl;
v1_4 = tbl->v1_4;
v1_5 = tbl->v1_5;
result = BP_RESULT_NORECORD;
switch (bp->object_info_tbl.revision.minor) {
- case 4:
- default:
- for (i = 0; i < v1_4->number_of_path; ++i) {
- if (bracket_layout_id ==
- v1_4->display_path[i].display_objid) {
- result = update_slot_layout_info(dcb, i, slot_layout_info);
- break;
- }
+ case 4:
+ default:
+ for (i = 0; i < v1_4->number_of_path; ++i) {
+ if (bracket_layout_id == v1_4->display_path[i].display_objid) {
+ result = update_slot_layout_info(dcb, i, slot_layout_info);
+ break;
}
- break;
- case 5:
- for (i = 0; i < v1_5->number_of_path; ++i)
- result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
- break;
+ }
+ break;
+ case 5:
+ for (i = 0; i < v1_5->number_of_path; ++i)
+ result = update_slot_layout_info_v2(dcb, i, slot_layout_info);
+ break;
}
+
return result;
}
@@ -3364,9 +3484,7 @@ static enum bp_result bios_get_board_layout_info(
struct board_layout_info *board_layout_info)
{
unsigned int i;
-
struct bios_parser *bp;
-
static enum bp_result record_result;
unsigned int max_slots;
@@ -3376,7 +3494,6 @@ static enum bp_result bios_get_board_layout_info(
0, 0
};
-
bp = BP_FROM_DCB(dcb);
if (board_layout_info == NULL) {
@@ -3557,7 +3674,6 @@ static const struct dc_vbios_funcs vbios_funcs = {
.bios_parser_destroy = firmware_parser_destroy,
.get_board_layout_info = bios_get_board_layout_info,
- /* TODO: use this fn in hw init?*/
.pack_data_tables = bios_parser_pack_data_tables,
.get_atom_dc_golden_table = bios_get_atom_dc_golden_table,
@@ -3667,7 +3783,7 @@ static bool bios_parser2_construct(
bp->base.integrated_info = bios_parser_create_integrated_info(&bp->base);
bp->base.fw_info_valid = bios_parser_get_firmware_info(&bp->base, &bp->base.fw_info) == BP_RESULT_OK;
bios_parser_get_vram_info(&bp->base, &bp->base.vram_info);
-
+ bios_parser_get_soc_bb_info(&bp->base, &bp->base.bb_info);
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
index adc710fe4a45..8d2cf95ae739 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
@@ -78,10 +78,3 @@ void bios_set_scratch_critical_state(
uint32_t critial_state = state ? 1 : 0;
REG_UPDATE(BIOS_SCRATCH_6, S6_CRITICAL_STATE, critial_state);
}
-
-uint32_t bios_get_vga_enabled_displays(
- struct dc_bios *bios)
-{
- return REG_READ(BIOS_SCRATCH_3) & 0XFFFF;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h
index e1b4a40a353d..ab162f2fe577 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h
@@ -34,7 +34,6 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset,
bool bios_is_accelerated_mode(struct dc_bios *bios);
void bios_set_scratch_acc_mode_change(struct dc_bios *bios, uint32_t state);
void bios_set_scratch_critical_state(struct dc_bios *bios, bool state);
-uint32_t bios_get_vga_enabled_displays(struct dc_bios *bios);
#define GET_IMAGE(type, offset) ((type *) bios_get_image(&bp->base, offset, sizeof(type)))
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
index 818a529cacc3..58e88778da7f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
@@ -37,7 +37,7 @@
#define EXEC_BIOS_CMD_TABLE(command, params)\
(amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GetIndexIntoMasterTable(COMMAND, command), \
- (uint32_t *)&params) == 0)
+ (uint32_t *)&params, sizeof(params)) == 0)
#define BIOS_CMD_TABLE_REVISION(command, frev, crev)\
amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
@@ -399,7 +399,7 @@ static enum bp_result transmitter_control_v1_6(
static void init_transmitter_control(struct bios_parser *bp)
{
uint8_t frev;
- uint8_t crev;
+ uint8_t crev = 0;
if (BIOS_CMD_TABLE_REVISION(UNIPHYTransmitterControl,
frev, crev) == false)
@@ -993,7 +993,7 @@ static enum bp_result set_pixel_clock_v3(
allocation.sPCLKInput.usFbDiv =
cpu_to_le16((uint16_t)bp_params->feedback_divider);
allocation.sPCLKInput.ucFracFbDiv =
- (uint8_t)bp_params->fractional_feedback_divider;
+ (uint8_t)(bp_params->fractional_feedback_divider / 100000);
allocation.sPCLKInput.ucPostDiv =
(uint8_t)bp_params->pixel_clock_post_divider;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 90a02d7bd3da..f2b1720a6a66 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -49,7 +49,7 @@
#define EXEC_BIOS_CMD_TABLE(fname, params)\
(amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
GET_INDEX_INTO_MASTER_TABLE(command, fname), \
- (uint32_t *)&params) == 0)
+ (uint32_t *)&params, sizeof(params)) == 0)
#define BIOS_CMD_TABLE_REVISION(fname, frev, crev)\
amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
@@ -101,7 +101,6 @@ static void init_dig_encoder_control(struct bios_parser *bp)
bp->cmd_tbl.dig_encoder_control = encoder_control_digx_v1_5;
break;
default:
- dm_output_to_console("Don't have dig_encoder_control for v%d\n", version);
bp->cmd_tbl.dig_encoder_control = encoder_control_fallback;
break;
}
@@ -123,7 +122,7 @@ static void encoder_control_dmcub(
sizeof(cmd.digx_encoder_control.header);
cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig;
- dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result encoder_control_digx_v1_5(
@@ -210,6 +209,7 @@ static enum bp_result encoder_control_fallback(
******************************************************************************
*****************************************************************************/
+
static enum bp_result transmitter_control_v1_6(
struct bios_parser *bp,
struct bp_transmitter_control *cntl);
@@ -225,9 +225,10 @@ static enum bp_result transmitter_control_fallback(
static void init_transmitter_control(struct bios_parser *bp)
{
uint8_t frev;
- uint8_t crev;
+ uint8_t crev = 0;
- BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev);
+ if (!BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) && (bp->base.ctx->dc->ctx->dce_version <= DCN_VERSION_2_0))
+ BREAK_TO_DEBUGGER();
switch (crev) {
case 6:
@@ -237,7 +238,6 @@ static void init_transmitter_control(struct bios_parser *bp)
bp->cmd_tbl.transmitter_control = transmitter_control_v1_7;
break;
default:
- dm_output_to_console("Don't have transmitter_control for v%d\n", crev);
bp->cmd_tbl.transmitter_control = transmitter_control_fallback;
break;
}
@@ -259,7 +259,7 @@ static void transmitter_control_dmcub(
sizeof(cmd.dig1_transmitter_control.header);
cmd.dig1_transmitter_control.transmitter_control.dig = *dig;
- dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result transmitter_control_v1_6(
@@ -321,7 +321,22 @@ static void transmitter_control_dmcub_v1_7(
sizeof(cmd.dig1_transmitter_control.header);
cmd.dig1_transmitter_control.transmitter_control.dig_v1_7 = *dig;
- dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static struct dc_link *get_link_by_phy_id(struct dc *p_dc, uint32_t phy_id)
+{
+ struct dc_link *link = NULL;
+
+ // Get Transition Bitmask from dc_link structure associated with PHY
+ for (uint8_t link_id = 0; link_id < MAX_LINKS; link_id++) {
+ if (phy_id == p_dc->links[link_id]->link_enc->transmitter) {
+ link = p_dc->links[link_id];
+ break;
+ }
+ }
+
+ return link;
}
static enum bp_result transmitter_control_v1_7(
@@ -362,7 +377,38 @@ static enum bp_result transmitter_control_v1_7(
if (bp->base.ctx->dc->ctx->dmub_srv &&
bp->base.ctx->dc->debug.dmub_command_table) {
+ struct dm_process_phy_transition_init_params process_phy_transition_init_params = {0};
+ struct dc_link *link = get_link_by_phy_id(bp->base.ctx->dc, dig_v1_7.phyid);
+ bool is_phy_transition_interlock_allowed = false;
+ uint8_t action = dig_v1_7.action;
+
+ if (link) {
+ if (link->phy_transition_bitmask &&
+ (action == TRANSMITTER_CONTROL_ENABLE || action == TRANSMITTER_CONTROL_DISABLE)) {
+ is_phy_transition_interlock_allowed = true;
+
+ // Prepare input parameters for processing ACPI retimers
+ process_phy_transition_init_params.action = action;
+ process_phy_transition_init_params.display_port_lanes_count = cntl->lanes_number;
+ process_phy_transition_init_params.phy_id = dig_v1_7.phyid;
+ process_phy_transition_init_params.signal = cntl->signal;
+ process_phy_transition_init_params.sym_clock_10khz = dig_v1_7.symclk_units.symclk_10khz;
+ process_phy_transition_init_params.display_port_link_rate = link->cur_link_settings.link_rate;
+ process_phy_transition_init_params.transition_bitmask = link->phy_transition_bitmask;
+ }
+ dig_v1_7.skip_phy_ssc_reduction = link->wa_flags.skip_phy_ssc_reduction;
+ }
+
+ // Handle PRE_OFF_TO_ON: Process ACPI PHY Transition Interlock
+ if (is_phy_transition_interlock_allowed && action == TRANSMITTER_CONTROL_ENABLE)
+ dm_acpi_process_phy_transition_interlock(bp->base.ctx, process_phy_transition_init_params);
+
transmitter_control_dmcub_v1_7(bp->base.ctx->dmub_srv, &dig_v1_7);
+
+ // Handle POST_ON_TO_OFF: Process ACPI PHY Transition Interlock
+ if (is_phy_transition_interlock_allowed && action == TRANSMITTER_CONTROL_DISABLE)
+ dm_acpi_process_phy_transition_interlock(bp->base.ctx, process_phy_transition_init_params);
+
return BP_RESULT_OK;
}
@@ -407,8 +453,6 @@ static void init_set_pixel_clock(struct bios_parser *bp)
bp->cmd_tbl.set_pixel_clock = set_pixel_clock_v7;
break;
default:
- dm_output_to_console("Don't have set_pixel_clock for v%d\n",
- BIOS_CMD_TABLE_PARA_REVISION(setpixelclock));
bp->cmd_tbl.set_pixel_clock = set_pixel_clock_fallback;
break;
}
@@ -429,7 +473,7 @@ static void set_pixel_clock_dmcub(
sizeof(cmd.set_pixel_clock.header);
cmd.set_pixel_clock.pixel_clock.clk = *clk;
- dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result set_pixel_clock_v7(
@@ -553,7 +597,6 @@ static void init_set_crtc_timing(struct bios_parser *bp)
set_crtc_using_dtd_timing_v3;
break;
default:
- dm_output_to_console("Don't have set_crtc_timing for v%d\n", dtd_version);
bp->cmd_tbl.set_crtc_timing = NULL;
break;
}
@@ -670,8 +713,6 @@ static void init_enable_crtc(struct bios_parser *bp)
bp->cmd_tbl.enable_crtc = enable_crtc_v1;
break;
default:
- dm_output_to_console("Don't have enable_crtc for v%d\n",
- BIOS_CMD_TABLE_PARA_REVISION(enablecrtc));
bp->cmd_tbl.enable_crtc = NULL;
break;
}
@@ -796,7 +837,7 @@ static void enable_disp_power_gating_dmcub(
sizeof(cmd.enable_disp_power_gating.header);
cmd.enable_disp_power_gating.power_gating.pwr = *pwr;
- dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result enable_disp_power_gating_v2_1(
@@ -863,8 +904,6 @@ static void init_set_dce_clock(struct bios_parser *bp)
bp->cmd_tbl.set_dce_clock = set_dce_clock_v2_1;
break;
default:
- dm_output_to_console("Don't have set_dce_clock for v%d\n",
- BIOS_CMD_TABLE_PARA_REVISION(setdceclock));
bp->cmd_tbl.set_dce_clock = NULL;
break;
}
@@ -976,7 +1015,7 @@ static unsigned int get_smu_clock_info_v3_1(struct bios_parser *bp, uint8_t id)
static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
- uint8_t panel_instance,
+ uint8_t pwrseq_instance,
uint8_t bypass_panel_control_wait);
static void init_enable_lvtma_control(struct bios_parser *bp)
@@ -989,7 +1028,7 @@ static void init_enable_lvtma_control(struct bios_parser *bp)
static void enable_lvtma_control_dmcub(
struct dc_dmub_srv *dmcub,
uint8_t uc_pwr_on,
- uint8_t panel_instance,
+ uint8_t pwrseq_instance,
uint8_t bypass_panel_control_wait)
{
@@ -1002,17 +1041,17 @@ static void enable_lvtma_control_dmcub(
DMUB_CMD__VBIOS_LVTMA_CONTROL;
cmd.lvtma_control.data.uc_pwr_action =
uc_pwr_on;
- cmd.lvtma_control.data.panel_inst =
- panel_instance;
+ cmd.lvtma_control.data.pwrseq_inst =
+ pwrseq_instance;
cmd.lvtma_control.data.bypass_panel_control_wait =
bypass_panel_control_wait;
- dm_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmcub->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static enum bp_result enable_lvtma_control(
struct bios_parser *bp,
uint8_t uc_pwr_on,
- uint8_t panel_instance,
+ uint8_t pwrseq_instance,
uint8_t bypass_panel_control_wait)
{
enum bp_result result = BP_RESULT_FAILURE;
@@ -1021,7 +1060,7 @@ static enum bp_result enable_lvtma_control(
bp->base.ctx->dc->debug.dmub_command_table) {
enable_lvtma_control_dmcub(bp->base.ctx->dmub_srv,
uc_pwr_on,
- panel_instance,
+ pwrseq_instance,
bypass_panel_control_wait);
return BP_RESULT_OK;
}
@@ -1045,3 +1084,4 @@ void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp)
init_enable_lvtma_control(bp);
}
+
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
index b6d09bf6cf72..41c8c014397f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.h
@@ -96,7 +96,7 @@ struct cmd_tbl {
struct bios_parser *bp, uint8_t id);
enum bp_result (*enable_lvtma_control)(struct bios_parser *bp,
uint8_t uc_pwr_on,
- uint8_t panel_instance,
+ uint8_t pwrseq_instance,
uint8_t bypass_panel_control_wait);
};
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
index e317a3615147..91bc8a06e2cf 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
@@ -293,3 +293,107 @@ uint8_t dal_cmd_table_helper_encoder_id_to_atom(
return ENCODER_OBJECT_ID_NONE;
}
}
+
+uint8_t phy_id_to_atom(enum transmitter t)
+{
+ uint8_t atom_phy_id;
+
+ switch (t) {
+ case TRANSMITTER_UNIPHY_A:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYE;
+ break;
+ case TRANSMITTER_UNIPHY_F:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYF;
+ break;
+ case TRANSMITTER_UNIPHY_G:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYG;
+ break;
+ default:
+ atom_phy_id = ATOM_PHY_ID_UNIPHYA;
+ break;
+ }
+ return atom_phy_id;
+}
+
+uint8_t clock_source_id_to_atom_phy_clk_src_id(
+ enum clock_source_id id)
+{
+ uint8_t atom_phy_clk_src_id = 0;
+
+ switch (id) {
+ case CLOCK_SOURCE_ID_PLL0:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
+ break;
+ case CLOCK_SOURCE_ID_PLL1:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
+ break;
+ case CLOCK_SOURCE_ID_PLL2:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
+ break;
+ case CLOCK_SOURCE_ID_EXTERNAL:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
+ break;
+ default:
+ atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
+ break;
+ }
+
+ return atom_phy_clk_src_id >> 2;
+}
+
+bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
+{
+ bool result = false;
+
+ if (atom_engine_id != NULL)
+ switch (id) {
+ case ENGINE_ID_DIGA:
+ *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGB:
+ *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGC:
+ *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGD:
+ *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGE:
+ *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGF:
+ *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DIGG:
+ *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
+ result = true;
+ break;
+ case ENGINE_ID_DACA:
+ *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
+ result = true;
+ break;
+ default:
+ break;
+ }
+
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h
index dfd30aaf4032..547700e119a6 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.h
@@ -59,4 +59,12 @@ uint8_t dal_cmd_table_helper_transmitter_bp_to_atom(
uint8_t dal_cmd_table_helper_encoder_id_to_atom(
enum encoder_id id);
+
+uint8_t phy_id_to_atom(enum transmitter t);
+
+uint8_t clock_source_id_to_atom_phy_clk_src_id(
+ enum clock_source_id id);
+
+bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index 8538f13e01bf..268e2414b34f 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -80,12 +80,15 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
case DCN_VERSION_3_16:
case DCN_VERSION_3_2:
case DCN_VERSION_3_21:
+ case DCN_VERSION_3_5:
+ case DCN_VERSION_3_51:
+ case DCN_VERSION_3_6:
+ case DCN_VERSION_4_01:
*h = dal_cmd_tbl_helper_dce112_get_table2();
return true;
default:
- /* Unsupported DCE */
- BREAK_TO_DEBUGGER();
+ *h = dal_cmd_tbl_helper_dce112_get_table2();
return false;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c b/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c
index 11bf247bb180..3099128223df 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c
@@ -31,39 +31,6 @@
#include "../command_table_helper.h"
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V5_DP;
@@ -94,32 +61,6 @@ static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
return atom_dig_mode;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t hpd_sel_to_atom(enum hpd_source_id id)
{
uint8_t atom_hpd_sel = 0;
@@ -207,51 +148,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
{
uint8_t atom_action = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c
index 755b6e33140a..349f0e5d5856 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c
@@ -29,40 +29,9 @@
#include "include/bios_parser_types.h"
-#include "../command_table_helper2.h"
-
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
+#include "../command_table_helper.h"
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
+#include "../command_table_helper2.h"
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
@@ -91,32 +60,6 @@ static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
return atom_dig_mode;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t hpd_sel_to_atom(enum hpd_source_id id)
{
uint8_t atom_hpd_sel = 0;
@@ -209,51 +152,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
{
uint8_t atom_action = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c
index 06b4f7fa4a50..1a5fefcde8af 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c
@@ -31,39 +31,6 @@
#include "../command_table_helper.h"
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V6_DP;
@@ -91,32 +58,6 @@ static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
return atom_dig_mode;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t hpd_sel_to_atom(enum hpd_source_id id)
{
uint8_t atom_hpd_sel = 0;
@@ -209,51 +150,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
{
uint8_t atom_action = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c b/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c
index 710221b4f5c5..01ccc803040c 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce60/command_table_helper_dce60.c
@@ -58,51 +58,6 @@ static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
return atom_action;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static bool clock_source_id_to_atom(
enum clock_source_id id,
uint32_t *atom_pll_id)
@@ -149,32 +104,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V5_DP;
@@ -270,39 +199,6 @@ static uint8_t dig_encoder_sel_to_atom(enum engine_id id)
return atom_dig_encoder_sel;
}
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t disp_power_gating_action_to_atom(
enum bp_pipe_control_action action)
{
diff --git a/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c b/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c
index 8b30b558cf1f..2ec5264536c7 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/dce80/command_table_helper_dce80.c
@@ -58,51 +58,6 @@ static uint8_t encoder_action_to_atom(enum bp_encoder_control_action action)
return atom_action;
}
-static bool engine_bp_to_atom(enum engine_id id, uint32_t *atom_engine_id)
-{
- bool result = false;
-
- if (atom_engine_id != NULL)
- switch (id) {
- case ENGINE_ID_DIGA:
- *atom_engine_id = ASIC_INT_DIG1_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGB:
- *atom_engine_id = ASIC_INT_DIG2_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGC:
- *atom_engine_id = ASIC_INT_DIG3_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGD:
- *atom_engine_id = ASIC_INT_DIG4_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGE:
- *atom_engine_id = ASIC_INT_DIG5_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGF:
- *atom_engine_id = ASIC_INT_DIG6_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DIGG:
- *atom_engine_id = ASIC_INT_DIG7_ENCODER_ID;
- result = true;
- break;
- case ENGINE_ID_DACA:
- *atom_engine_id = ASIC_INT_DAC1_ENCODER_ID;
- result = true;
- break;
- default:
- break;
- }
-
- return result;
-}
-
static bool clock_source_id_to_atom(
enum clock_source_id id,
uint32_t *atom_pll_id)
@@ -149,32 +104,6 @@ static bool clock_source_id_to_atom(
return result;
}
-static uint8_t clock_source_id_to_atom_phy_clk_src_id(
- enum clock_source_id id)
-{
- uint8_t atom_phy_clk_src_id = 0;
-
- switch (id) {
- case CLOCK_SOURCE_ID_PLL0:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P0PLL;
- break;
- case CLOCK_SOURCE_ID_PLL1:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- case CLOCK_SOURCE_ID_PLL2:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P2PLL;
- break;
- case CLOCK_SOURCE_ID_EXTERNAL:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_REFCLK_SRC_EXT;
- break;
- default:
- atom_phy_clk_src_id = ATOM_TRANSMITTER_CONFIG_V5_P1PLL;
- break;
- }
-
- return atom_phy_clk_src_id >> 2;
-}
-
static uint8_t signal_type_to_atom_dig_mode(enum signal_type s)
{
uint8_t atom_dig_mode = ATOM_TRANSMITTER_DIGMODE_V5_DP;
@@ -270,39 +199,6 @@ static uint8_t dig_encoder_sel_to_atom(enum engine_id id)
return atom_dig_encoder_sel;
}
-static uint8_t phy_id_to_atom(enum transmitter t)
-{
- uint8_t atom_phy_id;
-
- switch (t) {
- case TRANSMITTER_UNIPHY_A:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- case TRANSMITTER_UNIPHY_B:
- atom_phy_id = ATOM_PHY_ID_UNIPHYB;
- break;
- case TRANSMITTER_UNIPHY_C:
- atom_phy_id = ATOM_PHY_ID_UNIPHYC;
- break;
- case TRANSMITTER_UNIPHY_D:
- atom_phy_id = ATOM_PHY_ID_UNIPHYD;
- break;
- case TRANSMITTER_UNIPHY_E:
- atom_phy_id = ATOM_PHY_ID_UNIPHYE;
- break;
- case TRANSMITTER_UNIPHY_F:
- atom_phy_id = ATOM_PHY_ID_UNIPHYF;
- break;
- case TRANSMITTER_UNIPHY_G:
- atom_phy_id = ATOM_PHY_ID_UNIPHYG;
- break;
- default:
- atom_phy_id = ATOM_PHY_ID_UNIPHYA;
- break;
- }
- return atom_phy_id;
-}
-
static uint8_t disp_power_gating_action_to_atom(
enum bp_pipe_control_action action)
{
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index ad390e4cd0a9..60021671b386 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -83,7 +83,6 @@ CLK_MGR_DCN10 = rv1_clk_mgr.o rv1_clk_mgr_vbios_smu.o rv2_clk_mgr.o
AMD_DAL_CLK_MGR_DCN10 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn10/,$(CLK_MGR_DCN10))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN10)
-
###############################################################################
# DCN20
###############################################################################
@@ -113,7 +112,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
###############################################################################
# DCN30
###############################################################################
-CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
+CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o dcn30m_clk_mgr.o dcn30m_clk_mgr_smu_msg.o
AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
@@ -172,4 +171,21 @@ AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DC
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
+###############################################################################
+# DCN35
+###############################################################################
+CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN35)
+
+###############################################################################
+# DCN401
+###############################################################################
+CLK_MGR_DCN401 = dcn401_clk_mgr.o dcn401_clk_mgr_smu_msg.o
+
+AMD_DAL_CLK_MGR_DCN401 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn401/,$(CLK_MGR_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN401)
endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index dcedf9645161..15cf13ec5302 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -23,13 +23,12 @@
*
*/
-#include <linux/slab.h>
-
#include "dal_asic_id.h"
#include "dc_types.h"
#include "dccg.h"
#include "clk_mgr_internal.h"
-#include "link.h"
+#include "dc_state_priv.h"
+#include "link_service.h"
#include "dce100/dce_clk_mgr.h"
#include "dce110/dce110_clk_mgr.h"
@@ -48,6 +47,8 @@
#include "dcn315/dcn315_clk_mgr.h"
#include "dcn316/dcn316_clk_mgr.h"
#include "dcn32/dcn32_clk_mgr.h"
+#include "dcn35/dcn35_clk_mgr.h"
+#include "dcn401/dcn401_clk_mgr.h"
int clk_mgr_helper_get_active_display_cnt(
struct dc *dc,
@@ -58,20 +59,15 @@ int clk_mgr_helper_get_active_display_cnt(
display_count = 0;
for (i = 0; i < context->stream_count; i++) {
const struct dc_stream_state *stream = context->streams[i];
+ const struct dc_stream_status *stream_status = &context->stream_status[i];
/* Don't count SubVP phantom pipes as part of active
* display count
*/
- if (stream->mall_stream_config.type == SUBVP_PHANTOM)
+ if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM)
continue;
- /*
- * Only notify active stream or virtual stream.
- * Need to notify virtual stream to work around
- * headless case. HPD does not fire when system is in
- * S0i2.
- */
- if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL)
+ if (!stream->dpms_off || dc->is_switch_in_progress_dest || (stream_status && stream_status->plane_count))
display_count++;
}
@@ -162,7 +158,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
return NULL;
}
dce60_clk_mgr_construct(ctx, clk_mgr);
- dce_clk_mgr_construct(ctx, clk_mgr);
return &clk_mgr->base;
}
#endif
@@ -272,7 +267,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base;
}
- if (asic_id.chip_id == DEVICE_ID_NV_13FE) {
+ if (ctx->dce_version == DCN_VERSION_2_01) {
dcn201_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base;
}
@@ -329,16 +324,14 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
}
break;
case AMDGPU_FAMILY_GC_11_0_0: {
- struct clk_mgr_internal *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
-
- if (clk_mgr == NULL) {
- BREAK_TO_DEBUGGER();
- return NULL;
- }
+ struct clk_mgr_internal *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
- dcn32_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
- return &clk_mgr->base;
- break;
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+ dcn32_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ return &clk_mgr->base;
}
case AMDGPU_FAMILY_GC_11_0_1: {
@@ -354,7 +347,34 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
}
break;
-#endif /* CONFIG_DRM_AMD_DC_FP - Family RV */
+ case AMDGPU_FAMILY_GC_11_5_0: {
+ struct clk_mgr_dcn35 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
+
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+ if (ctx->dce_version == DCN_VERSION_3_51)
+ dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ else
+ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+
+ return &clk_mgr->base.base;
+ }
+ break;
+
+ case AMDGPU_FAMILY_GC_12_0_0: {
+ struct clk_mgr_internal *clk_mgr = dcn401_clk_mgr_construct(ctx, dccg);
+
+ if (clk_mgr == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ return &clk_mgr->base;
+ }
+ break;
+#endif /* CONFIG_DRM_AMD_DC_FP */
default:
ASSERT(0); /* Unknown Asic */
break;
@@ -405,6 +425,13 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
dcn314_clk_mgr_destroy(clk_mgr);
break;
+ case AMDGPU_FAMILY_GC_11_5_0:
+ dcn35_clk_mgr_destroy(clk_mgr);
+ break;
+ case AMDGPU_FAMILY_GC_12_0_0:
+ dcn401_clk_mgr_destroy(clk_mgr);
+ break;
+
default:
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
index 26feefbb8990..6131ede2db7a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
@@ -72,9 +72,9 @@ static const struct state_dependent_clocks dce80_max_clks_by_state[] = {
/* ClocksStateLow */
{ .display_clk_khz = 352000, .pixel_clk_khz = 330000},
/* ClocksStateNominal */
-{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 },
+{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 },
/* ClocksStatePerformance */
-{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } };
+{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 } };
int dentist_get_divider_from_did(int did)
{
@@ -245,6 +245,11 @@ int dce_set_clock(
pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10;
pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+ /* DCE 6.0, DCE 6.4: engine clock is the same as PLL0 */
+ if (clk_mgr_base->ctx->dce_version == DCE_VERSION_6_0 ||
+ clk_mgr_base->ctx->dce_version == DCE_VERSION_6_4)
+ pxl_clk_params.pll_id = CLOCK_SOURCE_ID_PLL0;
+
if (clk_mgr_dce->dfs_bypass_active)
pxl_clk_params.flags.SET_DISPCLK_DFS_BYPASS = true;
@@ -386,8 +391,6 @@ static void dce_pplib_apply_display_requirements(
{
struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
- pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context);
-
dce110_fill_display_configs(context, pp_display_cfg);
if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0)
@@ -400,11 +403,9 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr_base,
{
struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dm_pp_power_level_change_request level_change_req;
- int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
-
- /*TODO: W/A for dal3 linux, investigate why this works */
- if (!clk_mgr_dce->dfs_bypass_active)
- patched_disp_clk = patched_disp_clk * 115 / 100;
+ const int max_disp_clk =
+ clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
+ int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz);
level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context);
/* get max clock state from PPLIB */
@@ -462,6 +463,9 @@ void dce_clk_mgr_construct(
clk_mgr->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID;
+ base->clks.max_supported_dispclk_khz =
+ clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
+
dce_clock_read_integrated_info(clk_mgr);
dce_clock_read_ss_info(clk_mgr);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
index 78df96882d6e..d50b9440210e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
@@ -120,9 +120,15 @@ void dce110_fill_display_configs(
const struct dc_state *context,
struct dm_pp_display_configuration *pp_display_cfg)
{
+ struct dc *dc = context->clk_mgr->ctx->dc;
int j;
int num_cfgs = 0;
+ pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context);
+ pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz;
+ pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0;
+ pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator;
+
for (j = 0; j < context->stream_count; j++) {
int k;
@@ -158,12 +164,29 @@ void dce110_fill_display_configs(
stream->link->cur_link_settings.link_rate;
cfg->link_settings.link_spread =
stream->link->cur_link_settings.link_spread;
- cfg->sym_clock = stream->phy_pix_clk;
+ cfg->pixel_clock = stream->phy_pix_clk;
/* Round v_refresh*/
cfg->v_refresh = stream->timing.pix_clk_100hz * 100;
cfg->v_refresh /= stream->timing.h_total;
cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2)
/ stream->timing.v_total;
+
+ /* Find first CRTC index and calculate its line time.
+ * This is necessary for DPM on SI GPUs.
+ */
+ if (cfg->pipe_idx < pp_display_cfg->crtc_index) {
+ const struct dc_crtc_timing *timing =
+ &context->streams[0]->timing;
+
+ pp_display_cfg->crtc_index = cfg->pipe_idx;
+ pp_display_cfg->line_time_in_us =
+ timing->h_total * 10000 / timing->pix_clk_100hz;
+ }
+ }
+
+ if (!num_cfgs) {
+ pp_display_cfg->crtc_index = 0;
+ pp_display_cfg->line_time_in_us = 0;
}
pp_display_cfg->display_count = num_cfgs;
@@ -195,7 +218,7 @@ void dce11_pplib_apply_display_requirements(
* , then change minimum memory clock based on real-time bandwidth
* limitation.
*/
- if ((dc->ctx->asic_id.chip_family == FAMILY_AI) &&
+ if (dc->bw_vbios && (dc->ctx->asic_id.chip_family == FAMILY_AI) &&
ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) {
pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz,
(uint32_t) div64_s64(
@@ -223,25 +246,8 @@ void dce11_pplib_apply_display_requirements(
pp_display_cfg->min_engine_clock_deep_sleep_khz
= context->bw_ctx.bw.dce.sclk_deep_sleep_khz;
- pp_display_cfg->avail_mclk_switch_time_us =
- dce110_get_min_vblank_time_us(context);
- /* TODO: dce11.2*/
- pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0;
-
- pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz;
-
dce110_fill_display_configs(context, pp_display_cfg);
- /* TODO: is this still applicable?*/
- if (pp_display_cfg->display_count == 1) {
- const struct dc_crtc_timing *timing =
- &context->streams[0]->timing;
-
- pp_display_cfg->crtc_index =
- pp_display_cfg->disp_configs[0].pipe_idx;
- pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz;
- }
-
if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0)
dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
index 5399b8cf6b75..c9ba7b3fd2c3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
@@ -30,7 +30,7 @@
#include "dce110/dce110_clk_mgr.h"
#include "dce120_clk_mgr.h"
#include "dce100/dce_clk_mgr.h"
-#include "dce120/dce120_hw_sequencer.h"
+#include "dce120/dce120_hwseq.h"
static const struct state_dependent_clocks dce120_max_clks_by_state[] = {
/*ClocksStateInvalid - should not be used*/
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
index 0267644717b2..69dd80d9f738 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c
@@ -83,22 +83,13 @@ static const struct state_dependent_clocks dce60_max_clks_by_state[] = {
static int dce60_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
- int dprefclk_wdivider;
- int dp_ref_clk_khz;
- int target_div;
+ struct dc_context *ctx = clk_mgr_base->ctx;
+ int dp_ref_clk_khz = 0;
- /* DCE6 has no DPREFCLK_CNTL to read DP Reference Clock source */
-
- /* Read the mmDENTIST_DISPCLK_CNTL to get the currently
- * programmed DID DENTIST_DPREFCLK_WDIVIDER*/
- REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider);
-
- /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/
- target_div = dentist_get_divider_from_did(dprefclk_wdivider);
-
- /* Calculate the current DFS clock, in kHz.*/
- dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
- * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+ if (ASIC_REV_IS_TAHITI_P(ctx->asic_id.hw_internal_rev))
+ dp_ref_clk_khz = ctx->dc_bios->fw_info.default_display_engine_pll_frequency;
+ else
+ dp_ref_clk_khz = clk_mgr_base->clks.dispclk_khz;
return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz);
}
@@ -109,8 +100,6 @@ static void dce60_pplib_apply_display_requirements(
{
struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
- pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context);
-
dce110_fill_display_configs(context, pp_display_cfg);
if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0)
@@ -123,11 +112,9 @@ static void dce60_update_clocks(struct clk_mgr *clk_mgr_base,
{
struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dm_pp_power_level_change_request level_change_req;
- int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
-
- /*TODO: W/A for dal3 linux, investigate why this works */
- if (!clk_mgr_dce->dfs_bypass_active)
- patched_disp_clk = patched_disp_clk * 115 / 100;
+ const int max_disp_clk =
+ clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
+ int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz);
level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context);
/* get max clock state from PPLIB */
@@ -160,6 +147,8 @@ void dce60_clk_mgr_construct(
struct dc_context *ctx,
struct clk_mgr_internal *clk_mgr)
{
+ struct clk_mgr *base = &clk_mgr->base;
+
dce_clk_mgr_construct(ctx, clk_mgr);
memcpy(clk_mgr->max_clks_by_state,
@@ -170,5 +159,8 @@ void dce60_clk_mgr_construct(
clk_mgr->clk_mgr_shift = &disp_clk_shift;
clk_mgr->clk_mgr_mask = &disp_clk_mask;
clk_mgr->base.funcs = &dce60_funcs;
+
+ base->clks.max_supported_dispclk_khz =
+ clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
index 60761ff3cbf1..369421e46c52 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
@@ -23,9 +23,6 @@
*
*/
-#include <linux/slab.h>
-
-#include "reg_helper.h"
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "rv1_clk_mgr.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c
deleted file mode 100644
index 61dd12198a3c..000000000000
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright 2012-16 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "reg_helper.h"
-#include "clk_mgr_internal.h"
-#include "rv1_clk_mgr_clk.h"
-
-#include "ip/Discovery/hwid.h"
-#include "ip/Discovery/v1/ip_offset_1.h"
-#include "ip/CLK/clk_10_0_default.h"
-#include "ip/CLK/clk_10_0_offset.h"
-#include "ip/CLK/clk_10_0_reg.h"
-#include "ip/CLK/clk_10_0_sh_mask.h"
-
-#include "dce100/dce_clk_mgr.h"
-
-#define CLK_BASE_INNER(inst) \
- CLK_BASE__INST ## inst ## _SEG0
-
-
-#define CLK_REG(reg_name, block, inst)\
- CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \
- mm ## block ## _ ## inst ## _ ## reg_name
-
-#define REG(reg_name) \
- CLK_REG(reg_name, CLK0, 0)
-
-
-/* Only used by testing framework*/
-void rv1_dump_clk_registers(struct clk_state_registers *regs, struct clk_bypass *bypass, struct clk_mgr *clk_mgr_base)
-{
- struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
-
- regs->CLK0_CLK8_CURRENT_CNT = REG_READ(CLK0_CLK8_CURRENT_CNT) / 10; //dcf clk
-
- bypass->dcfclk_bypass = REG_READ(CLK0_CLK8_BYPASS_CNTL) & 0x0007;
- if (bypass->dcfclk_bypass < 0 || bypass->dcfclk_bypass > 4)
- bypass->dcfclk_bypass = 0;
-
-
- regs->CLK0_CLK8_DS_CNTL = REG_READ(CLK0_CLK8_DS_CNTL) / 10; //dcf deep sleep divider
-
- regs->CLK0_CLK8_ALLOW_DS = REG_READ(CLK0_CLK8_ALLOW_DS); //dcf deep sleep allow
-
- regs->CLK0_CLK10_CURRENT_CNT = REG_READ(CLK0_CLK10_CURRENT_CNT) / 10; //dpref clk
-
- bypass->dispclk_pypass = REG_READ(CLK0_CLK10_BYPASS_CNTL) & 0x0007;
- if (bypass->dispclk_pypass < 0 || bypass->dispclk_pypass > 4)
- bypass->dispclk_pypass = 0;
-
- regs->CLK0_CLK11_CURRENT_CNT = REG_READ(CLK0_CLK11_CURRENT_CNT) / 10; //disp clk
-
- bypass->dprefclk_bypass = REG_READ(CLK0_CLK11_BYPASS_CNTL) & 0x0007;
- if (bypass->dprefclk_bypass < 0 || bypass->dprefclk_bypass > 4)
- bypass->dprefclk_bypass = 0;
-
-}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
index 89b79dd39628..d82a52319088 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
@@ -26,7 +26,6 @@
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
-#include <linux/delay.h>
#include "rv1_clk_mgr_vbios_smu.h"
@@ -143,17 +142,3 @@ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_di
return actual_dispclk_set_mhz * 1000;
}
-
-int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
-{
- int actual_dprefclk_set_mhz = -1;
-
- actual_dprefclk_set_mhz = rv1_vbios_smu_send_msg_with_param(
- clk_mgr,
- VBIOSSMC_MSG_SetDprefclkFreq,
- khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
-
- /* TODO: add code for programing DP DTO, currently this is down by command table */
-
- return actual_dprefclk_set_mhz * 1000;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
index 083cb3158859..81d7c912549c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
@@ -27,6 +27,5 @@
#define DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_
int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
-int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
#endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index c435f7632e8e..bb4f3bd7532e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -157,7 +157,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -188,7 +188,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -503,7 +503,7 @@ static void dcn2_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- for (i = 0; i < MAX_PIPES * 2; i++) {
+ for (i = 0; i < MAX_LINKS; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
index 694fe4271b4d..76c612ecfe3c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
@@ -34,8 +34,8 @@
#include "dm_services.h"
#include "cyan_skillfish_ip_offset.h"
-#include "dcn/dcn_2_0_3_offset.h"
-#include "dcn/dcn_2_0_3_sh_mask.h"
+#include "dcn/dcn_2_0_1_offset.h"
+#include "dcn/dcn_2_0_1_sh_mask.h"
#include "clk/clk_11_0_1_offset.h"
#include "clk/clk_11_0_1_sh_mask.h"
@@ -59,8 +59,6 @@
#define CTX \
clk_mgr->base.ctx
-#define DC_LOGGER \
- clk_mgr->base.ctx->logger
static const struct clk_mgr_registers clk_mgr_regs = {
CLK_COMMON_REG_LIST_DCN_201()
@@ -102,13 +100,19 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base,
if (clk_mgr_base->clks.dispclk_khz == 0 ||
dc->debug.force_clock_mode & 0x1) {
+ /* this is from resume or boot up, if forced_clock cfg option
+ * used, we bypass program dispclk and DPPCLK, but need set them
+ * for S3.
+ */
+
force_reset = true;
+ /* force_clock_mode 0x1: force reset the clock even it is the
+ * same clock as long as it is in Passive level.
+ */
dcn2_read_clocks_from_hw_dentist(clk_mgr_base);
}
- clk_mgr_helper_get_active_display_cnt(dc, context);
-
if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz))
clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz;
@@ -152,11 +156,14 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base,
if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
if (dpp_clock_lowered) {
+ // if clock is being lowered, increase DTO before lowering refclk
dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
dcn20_update_clocks_update_dentist(clk_mgr, context);
} else {
+ // if clock is being raised, increase refclk before lowering DTO
if (update_dppclk || update_dispclk)
dcn20_update_clocks_update_dentist(clk_mgr, context);
+ // always update dtos unless clock is lowered and not safe to lower
dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 0c6a4ab72b1d..e18097f82091 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -320,16 +320,16 @@ static void rn_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an
regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ if (regs_and_bypass->dppclk_bypass > 4)
regs_and_bypass->dppclk_bypass = 0;
regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ if (regs_and_bypass->dcfclk_bypass > 4)
regs_and_bypass->dcfclk_bypass = 0;
regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ if (regs_and_bypass->dispclk_bypass > 4)
regs_and_bypass->dispclk_bypass = 0;
regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ if (regs_and_bypass->dprefclk_bypass > 4)
regs_and_bypass->dprefclk_bypass = 0;
if (log_info->enabled) {
@@ -484,7 +484,8 @@ static void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_sm
ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
/* Modify previous watermark range to cover up to max */
- ranges->reader_wm_sets[num_valid_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
+ if (num_valid_sets > 0)
+ ranges->reader_wm_sets[num_valid_sets - 1].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
}
num_valid_sets++;
}
@@ -548,7 +549,7 @@ static void rn_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_l
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- for (i = 0; i < MAX_PIPES * 2; i++) {
+ for (i = 0; i < MAX_LINKS; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
@@ -642,7 +643,8 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
j = -1;
- ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
+ static_assert(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported FCLK DPM levels exceed maximum");
/* Find lowest DPM, FCLK is filled in reverse order*/
@@ -707,9 +709,7 @@ void rn_clk_mgr_construct(
int is_green_sardine = 0;
struct clk_log_info log_info = {0};
-#if defined(CONFIG_DRM_AMD_DC_FP)
is_green_sardine = ASICREV_IS_GREEN_SARDINE(ctx->asic_id.hw_internal_rev);
-#endif
clk_mgr->base.ctx = ctx;
clk_mgr->base.funcs = &dcn21_funcs;
@@ -772,7 +772,7 @@ void rn_clk_mgr_construct(
status = pp_smu->rn_funcs.get_dpm_clock_table(&pp_smu->rn_funcs.pp_smu, &clock_table);
if (status == PP_SMU_RESULT_OK &&
- ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ ctx->dc_bios->integrated_info) {
rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
/* treat memory config as single channel if memory is asymmetrics. */
if (ctx->dc->config.is_asymmetric_memory)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
index 8c9d45e5b13b..5a633333dbb5 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
@@ -26,6 +26,10 @@
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
+#include "dm_helpers.h"
+
+#include "rn_clk_mgr_vbios_smu.h"
+
#include <linux/delay.h>
#include "renoir_ip_offset.h"
@@ -33,8 +37,6 @@
#include "mp/mp_12_0_0_offset.h"
#include "mp/mp_12_0_0_sh_mask.h"
-#include "rn_clk_mgr_vbios_smu.h"
-
#define REG(reg_name) \
(MP0_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
@@ -120,7 +122,10 @@ static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
- ASSERT(result == VBIOSSMC_Result_OK || result == VBIOSSMC_Result_UnknownCmd);
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ }
/* Actual dispclk set is returned in the parameter register */
return REG_READ(MP1_SMN_C2PMSG_83);
@@ -159,20 +164,6 @@ int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dis
return actual_dispclk_set_mhz * 1000;
}
-int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
-{
- int actual_dprefclk_set_mhz = -1;
-
- actual_dprefclk_set_mhz = rn_vbios_smu_send_msg_with_param(
- clk_mgr,
- VBIOSSMC_MSG_SetDprefclkFreq,
- khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
-
- /* TODO: add code for programing DP DTO, currently this is down by command table */
-
- return actual_dprefclk_set_mhz * 1000;
-}
-
int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
{
int actual_dcfclk_set_mhz = -1;
@@ -185,10 +176,6 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
-#ifdef DBG
- smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
-#endif
-
return actual_dcfclk_set_mhz * 1000;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
index 1ce19d875358..f76fad87f0e1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
@@ -30,7 +30,6 @@ enum dcn_pwr_state;
int rn_vbios_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
-int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
index fa09c594fd36..06da34676965 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
@@ -56,6 +56,7 @@
#define DALSMC_MSG_SetDisplayRefreshFromMall 0xF
#define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10
#define DALSMC_MSG_BacoAudioD3PME 0x11
-#define DALSMC_Message_Count 0x12
+#define DALSMC_MSG_SmartAccess 0x12
+#define DALSMC_Message_Count 0x13
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
index 3271c8c7905d..ef77fcd164ed 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
@@ -30,6 +30,7 @@
#include "dce100/dce_clk_mgr.h"
#include "dcn30/dcn30_clk_mgr.h"
#include "dml/dcn30/dcn30_fpu.h"
+#include "dcn30/dcn30m_clk_mgr.h"
#include "reg_helper.h"
#include "core_types.h"
#include "dm_helpers.h"
@@ -474,7 +475,7 @@ static void dcn30_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct d
clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ;
- for (i = 0; i < MAX_PIPES * 2; i++) {
+ for (i = 0; i < MAX_LINKS; i++) {
if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req)
max_phyclk_req = clk_mgr->cur_phyclk_req_table[i];
}
@@ -498,7 +499,8 @@ static struct clk_mgr_funcs dcn3_funcs = {
.are_clock_states_equal = dcn3_are_clock_states_equal,
.enable_pme_wa = dcn3_enable_pme_wa,
.notify_link_rate_change = dcn30_notify_link_rate_change,
- .is_smu_present = dcn3_is_smu_present
+ .is_smu_present = dcn3_is_smu_present,
+ .set_smartmux_switch = dcn30m_set_smartmux_switch
};
static void dcn3_init_clocks_fpga(struct clk_mgr *clk_mgr)
@@ -560,11 +562,19 @@ void dcn3_clk_mgr_construct(
dce_clock_read_ss_info(clk_mgr);
clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
+ if (!clk_mgr->base.bw_params) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
/* need physical address of table to give to PMFW */
clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx,
DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t),
&clk_mgr->wm_range_table_addr);
+ if (!clk_mgr->wm_range_table) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
}
void dcn3_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
index bdbf18306698..827bc2431d5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
@@ -23,7 +23,6 @@
*
*/
-#include <linux/delay.h>
#include "dcn30_clk_mgr_smu_msg.h"
#include "clk_mgr_internal.h"
@@ -54,6 +53,7 @@
*/
static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
{
+ const uint32_t initial_max_retries = max_retries;
uint32_t reg = 0;
do {
@@ -69,7 +69,7 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
/* handle DALSMC_Result_CmdRejectedBusy? */
- /* Log? */
+ TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
return reg;
}
@@ -89,6 +89,8 @@ static bool dcn30_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint
/* Trigger the message transaction by writing the message ID */
REG_WRITE(DAL_MSG_REG, msg_id);
+ TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx);
+
result = dcn30_smu_wait_for_response(clk_mgr, 10, 200000);
if (IS_SMU_TIMEOUT(result)) {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
index 61bb1d86182e..1bfd6f66f035 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
-// This is a stripped-down version of the smu11_driver_if.h file for the relevant DAL interfaces.
+/* Copyright © 2022-2024 Advanced Micro Devices, Inc. All rights reserved. */
#define SMU11_DRIVER_IF_VERSION 0x40
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c
new file mode 100644
index 000000000000..8e8a11c7437e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "clk_mgr_internal.h"
+#include "dcn30/dcn30m_clk_mgr.h"
+#include "dcn30m_clk_mgr_smu_msg.h"
+
+
+uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return dcn30m_smu_set_smart_mux_switch(clk_mgr, pins_to_set);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h
new file mode 100644
index 000000000000..757985b2eadc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN30M_CLK_MGR_H__
+#define __DCN30M_CLK_MGR_H__
+
+uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set);
+
+#endif //__DCN30M_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c
new file mode 100644
index 000000000000..0dd0583ff21e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn30m_clk_mgr_smu_msg.h"
+
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dm_helpers.h"
+
+#include "dalsmc.h"
+
+#define mmDAL_MSG_REG 0x1628A
+#define mmDAL_ARG_REG 0x16273
+#define mmDAL_RESP_REG 0x16274
+
+#define REG(reg_name) \
+ mm ## reg_name
+
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn30m_smu_wait_for_response(struct clk_mgr_internal *clk_mgr,
+ unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t reg = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ /* handle DALSMC_Result_CmdRejectedBusy? */
+
+ /* Log? */
+
+ return reg;
+}
+
+static bool dcn30m_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
+{
+ uint32_t result;
+ /* Wait for response register to be ready */
+ dcn30m_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ result = dcn30m_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ if (IS_SMU_TIMEOUT(result))
+ dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 200000);
+
+ /* Wait for response */
+ if (result == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ return true;
+ }
+
+ return false;
+}
+
+uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Set SmartMux Switch: switch_dgpu = %d\n", pins_to_set);
+
+ dcn30m_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SmartAccess, pins_to_set, &response);
+
+ return response;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h
new file mode 100644
index 000000000000..8a59a473fc5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_
+#define DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_
+
+#include "core_types.h"
+
+struct clk_mgr_internal;
+
+uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set);
+#endif /* DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
index e4f96b6fd79d..b4fb17b7a096 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
@@ -29,6 +29,7 @@
#include <linux/delay.h>
#include "dcn301_smu.h"
+#include "dm_helpers.h"
#include "vangogh_ip_offset.h"
@@ -120,7 +121,10 @@ static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = dcn301_smu_wait_for_response(clk_mgr, 10, 200000);
- ASSERT(result == VBIOSSMC_Result_OK);
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ }
/* Actual dispclk set is returned in the parameter register */
return REG_READ(MP1_SMN_C2PMSG_83);
@@ -180,10 +184,6 @@ int dcn301_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
-#ifdef DBG
- smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
-#endif
-
return actual_dcfclk_set_mhz * 1000;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
index a5489fe6875f..7aee02d56292 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c
@@ -252,16 +252,16 @@ static void vg_dump_clk_registers(struct clk_state_registers_and_bypass *regs_an
regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ if (regs_and_bypass->dppclk_bypass > 4)
regs_and_bypass->dppclk_bypass = 0;
regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ if (regs_and_bypass->dcfclk_bypass > 4)
regs_and_bypass->dcfclk_bypass = 0;
regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ if (regs_and_bypass->dispclk_bypass > 4)
regs_and_bypass->dispclk_bypass = 0;
regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
- if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ if (regs_and_bypass->dprefclk_bypass > 4)
regs_and_bypass->dprefclk_bypass = 0;
if (log_info->enabled) {
@@ -546,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta
int i;
for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) {
+ if (i >= VG_NUM_DCFCLK_DPM_LEVELS)
+ break;
if (clock_table->SocVoltage[i] == voltage)
return clock_table->DcfClocks[i];
}
@@ -561,10 +563,12 @@ static void vg_clk_mgr_helper_populate_bw_params(
{
int i, j;
struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
+ uint32_t max_dispclk = 0, max_dppclk = 0;
j = -1;
- ASSERT(VG_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL);
+ static_assert(VG_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported FCLK DPM levels exceeds maximum");
/* Find lowest DPM, FCLK is filled in reverse order*/
@@ -581,6 +585,15 @@ static void vg_clk_mgr_helper_populate_bw_params(
return;
}
+ /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ if (clock_table->NumDispClkLevelsEnabled <= VG_NUM_DISPCLK_DPM_LEVELS &&
+ clock_table->NumDispClkLevelsEnabled <= VG_NUM_DPPCLK_DPM_LEVELS) {
+ max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
+ max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
+ } else {
+ ASSERT(0);
+ }
+
bw_params->clk_table.num_entries = j + 1;
for (i = 0; i < bw_params->clk_table.num_entries - 1; i++, j--) {
@@ -588,11 +601,17 @@ static void vg_clk_mgr_helper_populate_bw_params(
bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk;
bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage;
bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->DfPstateTable[j].voltage);
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
}
bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].fclk;
bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk;
bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage;
bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, VG_NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, VG_NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, VG_NUM_DPPCLK_DPM_LEVELS);
bw_params->vram_type = bios_info->memory_type;
bw_params->num_channels = bios_info->ma_channel_number;
@@ -728,7 +747,7 @@ void vg_clk_mgr_construct(
clk_mgr->base.base.bw_params = &vg_bw_params;
vg_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios->integrated_info) {
vg_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 3db4ef564b99..051052bd10c9 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -47,15 +47,16 @@
#include "dcn30/dcn30_clk_mgr.h"
#include "dc_dmub_srv.h"
-#include "link.h"
+#include "link_service.h"
#include "logger_types.h"
+
+
+#include "yellow_carp_offset.h"
#undef DC_LOGGER
#define DC_LOGGER \
clk_mgr->base.base.ctx->logger
-#include "yellow_carp_offset.h"
-
#define regCLK1_CLK_PLL_REQ 0x0237
#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
@@ -253,7 +254,7 @@ void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
@@ -562,7 +563,8 @@ static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
j = -1;
- ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
+ static_assert(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported pstate levels exceeds maximum");
/* Find lowest DPM, FCLK is filled in reverse order*/
@@ -784,7 +786,7 @@ void dcn31_clk_mgr_construct(
i, smu_dpm_clks.dpm_clks->DfPstateTable[i].MemClk,
i, smu_dpm_clks.dpm_clks->DfPstateTable[i].Voltage);
}
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios->integrated_info) {
dcn31_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
index 32279c5db724..f201628e4e98 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
@@ -23,7 +23,6 @@
*
*/
-#include <linux/delay.h>
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
@@ -202,10 +201,6 @@ int dcn31_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requeste
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
-#ifdef DBG
- smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
-#endif
-
return actual_dcfclk_set_mhz * 1000;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 7326b7565846..9e63fa72101c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -48,14 +48,11 @@
#include "dcn31/dcn31_clk_mgr.h"
#include "dc_dmub_srv.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn314_smu.h"
#include "logger_types.h"
-#undef DC_LOGGER
-#define DC_LOGGER \
- clk_mgr->base.base.ctx->logger
#define MAX_INSTANCE 7
@@ -77,6 +74,10 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0,
{ { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } },
{ { 0x0001B400, 0x0242E000, 0, 0, 0, 0, 0, 0 } } } };
+#undef DC_LOGGER
+#define DC_LOGGER \
+ clk_mgr->base.base.ctx->logger
+
#define regCLK1_CLK_PLL_REQ 0x0237
#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
@@ -87,6 +88,82 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0,
#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+#define regCLK1_CLK0_DFS_CNTL 0x0269
+#define regCLK1_CLK0_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_DFS_CNTL 0x026c
+#define regCLK1_CLK1_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_DFS_CNTL 0x026f
+#define regCLK1_CLK2_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_DFS_CNTL 0x0272
+#define regCLK1_CLK3_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_DFS_CNTL 0x0275
+#define regCLK1_CLK4_DFS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_DFS_CNTL 0x0278
+#define regCLK1_CLK5_DFS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_CURRENT_CNT 0x02fb
+#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK1_CURRENT_CNT 0x02fc
+#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK2_CURRENT_CNT 0x02fd
+#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK3_CURRENT_CNT 0x02fe
+#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK4_CURRENT_CNT 0x02ff
+#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0
+#define regCLK1_CLK5_CURRENT_CNT 0x0300
+#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0
+
+#define regCLK1_CLK0_BYPASS_CNTL 0x028a
+#define regCLK1_CLK0_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_BYPASS_CNTL 0x0293
+#define regCLK1_CLK1_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_BYPASS_CNTL 0x029c
+#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_BYPASS_CNTL 0x02a5
+#define regCLK1_CLK3_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_BYPASS_CNTL 0x02ae
+#define regCLK1_CLK4_BYPASS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_BYPASS_CNTL 0x02b7
+#define regCLK1_CLK5_BYPASS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_DS_CNTL 0x0283
+#define regCLK1_CLK0_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK1_DS_CNTL 0x028c
+#define regCLK1_CLK1_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK2_DS_CNTL 0x0295
+#define regCLK1_CLK2_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK3_DS_CNTL 0x029e
+#define regCLK1_CLK3_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK4_DS_CNTL 0x02a7
+#define regCLK1_CLK4_DS_CNTL_BASE_IDX 0
+#define regCLK1_CLK5_DS_CNTL 0x02b0
+#define regCLK1_CLK5_DS_CNTL_BASE_IDX 0
+
+#define regCLK1_CLK0_ALLOW_DS 0x0284
+#define regCLK1_CLK0_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK1_ALLOW_DS 0x028d
+#define regCLK1_CLK1_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK2_ALLOW_DS 0x0296
+#define regCLK1_CLK2_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK3_ALLOW_DS 0x029f
+#define regCLK1_CLK3_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK4_ALLOW_DS 0x02a8
+#define regCLK1_CLK4_ALLOW_DS_BASE_IDX 0
+#define regCLK1_CLK5_ALLOW_DS 0x02b1
+#define regCLK1_CLK5_ALLOW_DS_BASE_IDX 0
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+
+#define regCLK6_0_CLK6_spll_field_8 0x464b
+#define regCLK6_0_CLK6_spll_field_8_BASE_IDX 0
+
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT 0xd
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
#define REG(reg_name) \
(CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
@@ -131,35 +208,68 @@ static int dcn314_get_active_display_cnt_wa(
return display_count;
}
-static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
- struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
-
if (disable) {
- if (stream_enc && stream_enc->funcs->disable_fifo)
- pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc);
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
- pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
reset_sync_context_for_pipe(dc, context, i);
} else {
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
-
- if (stream_enc && stream_enc->funcs->enable_fifo)
- pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc);
}
}
}
}
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t ssc_enable;
+
+ REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ return ssc_enable == 1;
+}
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr)
+{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+ struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr_int);
+ struct clk_log_info log_info = {0};
+
+ memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ // Assumption is that boot state always supports pstate
+ clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
+ clk_mgr->clks.p_state_change_support = true;
+ clk_mgr->clks.prev_p_state_change_support = true;
+ clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+ clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn314_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
+ dcn314_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn314->base.base, &log_info);
+ clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000;
+}
+
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower)
@@ -176,6 +286,8 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
if (dc->work_arounds.skip_clock_update)
return;
+ display_count = dcn314_get_active_display_cnt_wa(dc, context);
+
/*
* if it is safe to lower, but we are already in the lower state, we don't have to do anything
* also if safe to lower is false, we just go in the higher state
@@ -194,7 +306,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
}
/* check that we're not already in lower */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
- display_count = dcn314_get_active_display_cnt_wa(dc, context);
/* if we can go lower, go lower */
if (display_count == 0) {
union display_idle_optimization_u idle_info = { 0 };
@@ -251,12 +362,20 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
update_dppclk = true;
}
- if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn314_disable_otg_wa(clk_mgr_base, context, true);
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ dcn314_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
- dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn314_disable_otg_wa(clk_mgr_base, context, false);
+
+ dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
update_dispclk = true;
}
@@ -284,7 +403,7 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
@@ -343,10 +462,65 @@ bool dcn314_are_clock_states_equal(struct dc_clocks *a,
return true;
}
-static void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+
+static void dcn314_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ // read dtbclk
+ internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT);
+ internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL);
+
+ // read dcfclk
+ internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT);
+ internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL);
+
+ // read dcf deep sleep divider
+ internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL);
+ internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS);
+
+ // read dppclk
+ internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT);
+ internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL);
+
+ // read dprefclk
+ internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT);
+ internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL);
+
+ // read dispclk
+ internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT);
+ internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL);
+}
+
+void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
{
- return;
+
+ struct dcn35_clk_internal internal = {0};
+
+ dcn314_dump_clk_registers_internal(&internal, clk_mgr_base);
+
+ regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10;
+ regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10;
+ regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS;
+ regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10;
+ regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10;
+ regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
+ regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10;
+
+ regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ regs_and_bypass->dppclk_bypass = 0;
+ regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ regs_and_bypass->dcfclk_bypass = 0;
+ regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ regs_and_bypass->dispclk_bypass = 0;
+ regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ regs_and_bypass->dprefclk_bypass = 0;
+
}
static struct clk_bw_params dcn314_bw_params = {
@@ -436,6 +610,11 @@ static DpmClocks314_t dummy_clocks;
static struct dcn314_watermarks dummy_wms = { 0 };
+static struct dcn314_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table)
{
int i, num_valid_sets;
@@ -708,13 +887,31 @@ static struct clk_mgr_funcs dcn314_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn314_update_clocks,
- .init_clocks = dcn31_init_clocks,
+ .init_clocks = dcn314_init_clocks,
.enable_pme_wa = dcn314_enable_pme_wa,
.are_clock_states_equal = dcn314_are_clock_states_equal,
.notify_wm_ranges = dcn314_notify_wm_ranges
};
extern struct clk_mgr_funcs dcn3_fpga_funcs;
+static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t clock_source;
+ //uint32_t ssc_enable;
+
+ REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+ //REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
void dcn314_clk_mgr_construct(
struct dc_context *ctx,
struct clk_mgr_dcn314 *clk_mgr,
@@ -782,6 +979,7 @@ void dcn314_clk_mgr_construct(
clk_mgr->base.base.dprefclk_khz = 600000;
clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
dce_clock_read_ss_info(&clk_mgr->base);
+ dcn314_read_ss_info_from_lut(&clk_mgr->base);
/*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
clk_mgr->base.base.bw_params = &dcn314_bw_params;
@@ -830,7 +1028,7 @@ void dcn314_clk_mgr_construct(
i, smu_dpm_clks.dpm_clks->DfPstateTable[i].Voltage);
}
- if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
+ if (ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
dcn314_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index 171f84340eb2..0577eb527bc3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -28,6 +28,8 @@
#define __DCN314_CLK_MGR_H__
#include "clk_mgr_internal.h"
+#define DCN314_NUM_CLOCK_SOURCES 5
+
struct dcn314_watermarks;
struct dcn314_smu_watermark_set {
@@ -40,9 +42,18 @@ struct clk_mgr_dcn314 {
struct dcn314_smu_watermark_set smu_wm_set;
};
+struct dcn314_ss_info_table {
+ uint32_t ss_divider;
+ uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES];
+};
+
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b);
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base);
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower);
@@ -54,4 +65,9 @@ void dcn314_clk_mgr_construct(struct dc_context *ctx,
void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
+void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info);
+
+
#endif //__DCN314_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
index 07baa10a8647..c4af406146b7 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
@@ -220,12 +220,6 @@ int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
-#ifdef DBG
- smu_print("actual_dcfclk_set_mhz %d is set to : %d\n",
- actual_dcfclk_set_mhz,
- actual_dcfclk_set_mhz * 1000);
-#endif
-
return actual_dcfclk_set_mhz * 1000;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
index 047d19ea919c..78ca1e5c5e9e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
@@ -37,34 +37,34 @@ typedef enum {
} WCK_RATIO_e;
typedef struct {
- uint32_t FClk;
- uint32_t MemClk;
- uint32_t Voltage;
- uint8_t WckRatio;
- uint8_t Spare[3];
+ uint32_t FClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
} DfPstateTable314_t;
//Freq in MHz
//Voltage in milli volts with 2 fractional bits
typedef struct {
- uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
- uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
- uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
- uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
- uint32_t VClocks[NUM_VCN_DPM_LEVELS];
- uint32_t DClocks[NUM_VCN_DPM_LEVELS];
- uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
- DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
- uint8_t NumDcfClkLevelsEnabled;
- uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
- uint8_t NumSocClkLevelsEnabled;
- uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
- uint8_t NumDfPstatesEnabled;
- uint8_t spare[3];
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t NumDfPstatesEnabled;
+ uint8_t spare[3];
- uint32_t MinGfxClk;
- uint32_t MaxGfxClk;
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
} DpmClocks314_t;
struct dcn314_watermarks {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index b2c4f97afc8b..b315ed91e010 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -46,7 +46,7 @@
#define DC_LOGGER \
clk_mgr->base.base.ctx->logger
-#include "link.h"
+#include "link_service.h"
#define TO_CLK_MGR_DCN315(clk_mgr)\
container_of(clk_mgr, struct clk_mgr_dcn315, base)
@@ -130,7 +130,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
- int display_count;
+ int display_count = 0;
bool update_dppclk = false;
bool update_dispclk = false;
bool dpp_clock_lowered = false;
@@ -145,6 +145,10 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
*/
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
if (safe_to_lower) {
+ if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
+ dcn315_smu_set_dtbclk(clk_mgr, false);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
/* check that we're not already in lower */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
display_count = dcn315_get_active_display_cnt_wa(dc, context);
@@ -160,6 +164,10 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
}
}
} else {
+ if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
+ dcn315_smu_set_dtbclk(clk_mgr, true);
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
/* check that we're not already in D0 */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
union display_idle_optimization_u idle_info = { 0 };
@@ -186,8 +194,6 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
// workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
if (new_clocks->dppclk_khz < MIN_DPP_DISP_CLK)
new_clocks->dppclk_khz = MIN_DPP_DISP_CLK;
- if (new_clocks->dispclk_khz < MIN_DPP_DISP_CLK)
- new_clocks->dispclk_khz = MIN_DPP_DISP_CLK;
if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
@@ -196,15 +202,19 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
update_dppclk = true;
}
- if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- /* No need to apply the w/a if we haven't taken over from bios yet */
- if (clk_mgr_base->clks.dispclk_khz)
- dcn315_disable_otg_wa(clk_mgr_base, context, true);
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn315_disable_otg_wa(clk_mgr_base, context, true);
+
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+ dcn315_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
- dcn315_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- if (clk_mgr_base->clks.dispclk_khz)
- dcn315_disable_otg_wa(clk_mgr_base, context, false);
+ dcn315_disable_otg_wa(clk_mgr_base, context, false);
update_dispclk = true;
}
@@ -232,7 +242,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static void dcn315_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
@@ -334,7 +344,7 @@ static struct wm_table lpddr5_wm_table = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
+ .pstate_latency_us = 129.0,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
@@ -342,7 +352,7 @@ static struct wm_table lpddr5_wm_table = {
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
+ .pstate_latency_us = 129.0,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
@@ -350,7 +360,7 @@ static struct wm_table lpddr5_wm_table = {
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
+ .pstate_latency_us = 129.0,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
@@ -358,7 +368,7 @@ static struct wm_table lpddr5_wm_table = {
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 11.65333,
+ .pstate_latency_us = 129.0,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
@@ -704,7 +714,7 @@ void dcn315_clk_mgr_construct(
i, smu_dpm_clks.dpm_clks->DfPstateTable[i].Voltage);
}
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios->integrated_info) {
dcn315_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
index 1042cf1a3ab0..478b4d6a3544 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
@@ -29,6 +29,7 @@
#include "dm_helpers.h"
#include "dcn315_smu.h"
#include "mp/mp_13_0_5_offset.h"
+#include "logger_types.h"
#define MAX_INSTANCE 6
#define MAX_SEGMENT 6
@@ -48,12 +49,9 @@ static const struct IP_BASE MP0_BASE = { { { { 0x00016000, 0x00DC0000, 0x00E0000
{ { 0, 0, 0, 0, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0 } },
{ { 0, 0, 0, 0, 0, 0 } } } };
-static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0x0241B000, 0x04040000 } },
- { { 0, 0, 0, 0, 0, 0 } },
- { { 0, 0, 0, 0, 0, 0 } },
- { { 0, 0, 0, 0, 0, 0 } },
- { { 0, 0, 0, 0, 0, 0 } },
- { { 0, 0, 0, 0, 0, 0 } } } };
+
+#define CTX clk_mgr->base.ctx
+#define IND_REG(offset) offset
#define regBIF_BX_PF2_RSMU_INDEX 0x0000
#define regBIF_BX_PF2_RSMU_INDEX_BASE_IDX 1
@@ -66,10 +64,6 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
#define FN(reg_name, field) \
FD(reg_name##__##field)
-#define REG_NBIO(reg_name) \
- (NBIO_BASE.instance[0].segment[regBIF_BX_PF2_ ## reg_name ## _BASE_IDX] + regBIF_BX_PF2_ ## reg_name)
-
-#include "logger_types.h"
#undef DC_LOGGER
#define DC_LOGGER \
CTX->logger
@@ -77,6 +71,13 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
#define mmMP1_C2PMSG_3 0x3B1050C
+#define reg__MP1_C2PMSG_3_MASK (0xFFFFFFFF)
+#define reg__MP1_C2PMSG_3__SHIFT (0)
+
+
+#define data_reg_name__MP1_C2PMSG_3_MASK (0xFFFFFFFF)
+#define data_reg_name__MP1_C2PMSG_3__SHIFT (0)
+
#define VBIOSSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
#define VBIOSSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version
#define VBIOSSMC_MSG_Spare0 0x03 ///< Spare0
@@ -153,12 +154,10 @@ static int dcn315_smu_send_msg_with_param(
for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) {
/* Trigger the message transaction by writing the message ID */
- generic_write_indirect_reg(CTX,
- REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
- mmMP1_C2PMSG_3, msg_id);
- read_back_data = generic_read_indirect_reg(CTX,
- REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
- mmMP1_C2PMSG_3);
+ IX_REG_SET_SYNC(mmMP1_C2PMSG_3, 0,
+ MP1_C2PMSG_3, msg_id);
+ IX_REG_GET_SYNC(mmMP1_C2PMSG_3,
+ MP1_C2PMSG_3, &read_back_data);
if (read_back_data == msg_id)
break;
udelay(2);
@@ -215,10 +214,6 @@ int dcn315_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
-#ifdef DBG
- smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
-#endif
-
return actual_dcfclk_set_mhz * 1000;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 09151cc56ce4..1769b1f26e75 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -39,7 +39,7 @@
#include "dcn316_smu.h"
#include "dm_helpers.h"
#include "dc_dmub_srv.h"
-#include "link.h"
+#include "link_service.h"
// DCN316 this is CLK1 instance
#define MAX_INSTANCE 7
@@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa(
return display_count;
}
-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
- if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
- dc_is_virtual_signal(pipe->stream->signal))) {
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+ !pipe->stream->link_enc)) {
if (disable) {
- pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
reset_sync_context_for_pipe(dc, context, i);
} else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
@@ -135,7 +140,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
- int display_count;
+ int display_count = 0;
bool update_dppclk = false;
bool update_dispclk = false;
bool dpp_clock_lowered = false;
@@ -196,8 +201,6 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
// workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
if (new_clocks->dppclk_khz < 100000)
new_clocks->dppclk_khz = 100000;
- if (new_clocks->dispclk_khz < 100000)
- new_clocks->dispclk_khz = 100000;
if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
@@ -206,12 +209,19 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
update_dppclk = true;
}
- if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn316_disable_otg_wa(clk_mgr_base, context, true);
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+ dcn316_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
- dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn316_disable_otg_wa(clk_mgr_base, context, false);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
update_dispclk = true;
}
@@ -239,7 +249,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static void dcn316_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
@@ -480,7 +490,8 @@ static void dcn316_clk_mgr_helper_populate_bw_params(
j = -1;
- ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
+ static_assert(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL,
+ "number of reported pstate levels exceeds maximum");
/* Find lowest DPM, FCLK is filled in reverse order*/
@@ -646,7 +657,7 @@ void dcn316_clk_mgr_construct(
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
dcn316_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
- if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ if (ctx->dc_bios->integrated_info) {
dcn316_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
index 3ed19197a755..8b82092b91cd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
@@ -189,10 +189,6 @@ int dcn316_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
-#ifdef DBG
- smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
-#endif
-
return actual_dcfclk_set_mhz * 1000;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h
index c427be6add8a..724a508b0adb 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dalsmc.h
@@ -55,7 +55,16 @@
#define DALSMC_MSG_SetFclkSwitchAllow 0x11
#define DALSMC_MSG_SetCabForUclkPstate 0x12
#define DALSMC_MSG_SetWorstCaseUclkLatency 0x13
-#define DALSMC_Message_Count 0x14
+#define DALSMC_MSG_SetAlwaysWaitDmcubResp 0x14
+#define DALSMC_MSG_ReturnHardMinStatus 0x15
+#define DALSMC_Message_Count 0x16
+
+#define CHECK_HARD_MIN_CLK_DISPCLK 0x1
+#define CHECK_HARD_MIN_CLK_DPPCLK 0x2
+#define CHECK_HARD_MIN_CLK_DPREFCLK 0x4
+#define CHECK_HARD_MIN_CLK_DCFCLK 0x8
+#define CHECK_HARD_MIN_CLK_DTBCLK 0x10
+#define CHECK_HARD_MIN_CLK_UCLK 0x20
typedef enum {
FCLK_SWITCH_DISALLOW,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 984b52923534..7da7b41bd092 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -25,23 +25,22 @@
#include "dccg.h"
#include "clk_mgr_internal.h"
-
#include "dcn32/dcn32_clk_mgr_smu_msg.h"
#include "dcn20/dcn20_clk_mgr.h"
#include "dce100/dce_clk_mgr.h"
#include "dcn31/dcn31_clk_mgr.h"
+#include "dcn32/dcn32_clk_mgr.h"
#include "reg_helper.h"
#include "core_types.h"
#include "dm_helpers.h"
-#include "link.h"
-
+#include "link_service.h"
+#include "dc_state_priv.h"
#include "atomfirmware.h"
-#include "smu13_driver_if.h"
+#include "dcn32_smu13_driver_if.h"
#include "dcn/dcn_3_2_0_offset.h"
#include "dcn/dcn_3_2_0_sh_mask.h"
-#include "dcn32/dcn32_clk_mgr.h"
#include "dml/dcn32/dcn32_fpu.h"
#define DCN_BASE__INST0_SEG1 0x000000C0
@@ -53,6 +52,14 @@
#define mmCLK1_CLK3_DFS_CNTL 0x16E72
#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EE7
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EE8
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EE9
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EEA
+#define mmCLK1_CLK4_CURRENT_CNT 0x16EEB
+
+#define mmCLK4_CLK0_CURRENT_CNT 0x1B0C9
+
#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL
#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL
#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xffff0000UL
@@ -156,9 +163,14 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
unsigned int num_levels;
- struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+ struct clk_limit_num_entries *num_entries_per_clk;
unsigned int i;
+ if (!clk_mgr_base->bw_params)
+ return;
+
+ num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+
memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks));
clk_mgr_base->clks.p_state_change_support = true;
clk_mgr_base->clks.prev_p_state_change_support = true;
@@ -166,9 +178,6 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
clk_mgr->smu_present = false;
clk_mgr->dpm_present = false;
- if (!clk_mgr_base->bw_params)
- return;
-
if (!clk_mgr_base->force_smu_not_present && dcn30_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver))
clk_mgr->smu_present = true;
@@ -209,6 +218,16 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
if (clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz > 1950)
clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 1950;
+ /* DPPCLK */
+ dcn32_init_single_clock(clk_mgr, PPCLK_DPPCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dppclk_mhz,
+ &num_entries_per_clk->num_dppclk_levels);
+ num_levels = num_entries_per_clk->num_dppclk_levels;
+ clk_mgr_base->bw_params->dc_mode_limit.dppclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DPPCLK);
+ //HW recommends limit of 1950 MHz in display clock for all DCN3.2.x
+ if (clk_mgr_base->bw_params->dc_mode_limit.dppclk_mhz > 1950)
+ clk_mgr_base->bw_params->dc_mode_limit.dppclk_mhz = 1950;
+
if (num_entries_per_clk->num_dcfclk_levels &&
num_entries_per_clk->num_dtbclk_levels &&
num_entries_per_clk->num_dispclk_levels)
@@ -233,13 +252,15 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
= khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz);
}
+ for (i = 0; i < num_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz > 1950)
+ clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz = 1950;
+
/* Get UCLK, update bounding box */
clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base);
- DC_FP_START();
/* WM range table */
dcn32_build_wm_range_table(clk_mgr);
- DC_FP_END();
}
static void dcn32_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
@@ -355,7 +376,7 @@ static void dcn32_update_clocks_update_dentist(
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -380,7 +401,15 @@ static void dcn32_update_clocks_update_dentist(
uint32_t temp_dispclk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / temp_disp_divider;
if (clk_mgr->smu_present)
- dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK, khz_to_mhz_ceil(temp_dispclk_khz));
+ /*
+ * SMU uses discrete dispclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dispclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK,
+ khz_to_mhz_floor(temp_dispclk_khz));
if (dc->debug.override_dispclk_programming) {
REG_GET(DENTIST_DISPCLK_CNTL,
@@ -401,7 +430,7 @@ static void dcn32_update_clocks_update_dentist(
int32_t N;
int32_t j;
- if (!pipe_ctx->stream)
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
continue;
/* Virtual encoders don't have this function */
if (!stream_enc->funcs->get_fifo_cal_average_level)
@@ -419,7 +448,15 @@ static void dcn32_update_clocks_update_dentist(
/* do requested DISPCLK updates*/
if (clk_mgr->smu_present)
- dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK, khz_to_mhz_ceil(clk_mgr->base.clks.dispclk_khz));
+ /*
+ * SMU uses discrete dispclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dispclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK,
+ khz_to_mhz_floor(clk_mgr->base.clks.dispclk_khz));
if (dc->debug.override_dispclk_programming) {
REG_GET(DENTIST_DISPCLK_CNTL,
@@ -450,6 +487,136 @@ static int dcn32_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
return 0;
}
+static bool dcn32_check_native_scaling(struct pipe_ctx *pipe)
+{
+ bool is_native_scaling = false;
+ int width = pipe->plane_state->src_rect.width;
+ int height = pipe->plane_state->src_rect.height;
+
+ if (pipe->stream->timing.h_addressable == width &&
+ pipe->stream->timing.v_addressable == height &&
+ pipe->plane_state->dst_rect.width == width &&
+ pipe->plane_state->dst_rect.height == height)
+ is_native_scaling = true;
+
+ return is_native_scaling;
+}
+
+static void dcn32_auto_dpm_test_log(
+ struct dc_clocks *new_clocks,
+ struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context)
+{
+ unsigned int dispclk_khz_reg, dppclk_khz_reg, dprefclk_khz_reg, dcfclk_khz_reg, dtbclk_khz_reg,
+ fclk_khz_reg, mall_ss_size_bytes;
+ int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
+
+ struct pipe_ctx *pipe_ctx_list[MAX_PIPES];
+ int active_pipe_count = 0;
+
+ for (int i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+ pipe_ctx_list[active_pipe_count] = pipe_ctx;
+ active_pipe_count++;
+ }
+ }
+
+ msleep(5);
+
+ mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes;
+
+ dispclk_khz_reg = REG_READ(CLK1_CLK0_CURRENT_CNT); // DISPCLK
+ dppclk_khz_reg = REG_READ(CLK1_CLK1_CURRENT_CNT); // DPPCLK
+ dprefclk_khz_reg = REG_READ(CLK1_CLK2_CURRENT_CNT); // DPREFCLK
+ dcfclk_khz_reg = REG_READ(CLK1_CLK3_CURRENT_CNT); // DCFCLK
+ dtbclk_khz_reg = REG_READ(CLK1_CLK4_CURRENT_CNT); // DTBCLK
+ fclk_khz_reg = REG_READ(CLK4_CLK0_CURRENT_CNT); // FCLK
+
+ // Overrides for these clocks in case there is no p_state change support
+ dramclk_khz_override = new_clocks->dramclk_khz;
+ fclk_khz_override = new_clocks->fclk_khz;
+
+ num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+
+ if (!new_clocks->p_state_change_support) {
+ dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000;
+ }
+ if (!new_clocks->fclk_p_state_change_support) {
+ fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////
+ // IMPORTANT: When adding more clocks to these logs, do NOT put a newline
+ // anywhere other than at the very end of the string.
+ //
+ // Formatting example (make sure to have " - " between each entry):
+ //
+ // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n"
+ ////////////////////////////////////////////////////////////////////////////
+ if (active_pipe_count > 0 &&
+ new_clocks->dramclk_khz > 0 &&
+ new_clocks->fclk_khz > 0 &&
+ new_clocks->dcfclk_khz > 0 &&
+ new_clocks->dppclk_khz > 0) {
+
+ uint32_t pix_clk_list[MAX_PIPES] = {0};
+ int p_state_list[MAX_PIPES] = {0};
+ int disp_src_width_list[MAX_PIPES] = {0};
+ int disp_src_height_list[MAX_PIPES] = {0};
+ uint64_t disp_src_refresh_list[MAX_PIPES] = {0};
+ bool is_scaled_list[MAX_PIPES] = {0};
+
+ for (int i = 0; i < active_pipe_count; i++) {
+ struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i];
+ uint64_t refresh_rate;
+
+ pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz;
+ p_state_list[i] = curr_pipe_ctx->p_state_type;
+
+ refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
+ curr_pipe_ctx->stream->timing.v_total * (uint64_t)curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
+ disp_src_refresh_list[i] = refresh_rate;
+
+ if (curr_pipe_ctx->plane_state) {
+ is_scaled_list[i] = !(dcn32_check_native_scaling(curr_pipe_ctx));
+ disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width;
+ disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height;
+ }
+ }
+
+ DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
+ "dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
+ "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
+ "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - "
+ "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - "
+ "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - "
+ "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
+ "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
+ "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
+ "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n",
+ dramclk_khz_override,
+ fclk_khz_override,
+ new_clocks->dcfclk_khz,
+ new_clocks->dppclk_khz,
+ dispclk_khz_reg,
+ dppclk_khz_reg,
+ dprefclk_khz_reg,
+ dcfclk_khz_reg,
+ dtbclk_khz_reg,
+ fclk_khz_reg,
+ pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2],
+ mall_ss_size_bytes,
+ p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3],
+ disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0],
+ disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1],
+ disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2],
+ disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]);
+ }
+}
static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
@@ -547,8 +714,12 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
* since we calculate mode support based on softmax being the max UCLK
* frequency.
*/
- dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
- dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
+ if (dc->debug.disable_dc_mode_overwrite) {
+ dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
+ } else
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
+ dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
} else {
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz);
}
@@ -581,8 +752,13 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
/* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
if (clk_mgr_base->clks.p_state_change_support &&
(update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) &&
- !dc->work_arounds.clock_update_disable_mask.uclk)
+ !dc->work_arounds.clock_update_disable_mask.uclk) {
+ if (dc->clk_mgr->dc_mode_softmax_enabled && dc->debug.disable_dc_mode_overwrite)
+ dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK,
+ max((int)dc->clk_mgr->bw_params->dc_mode_softmax_memclk, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)));
+
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz));
+ }
if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
clk_mgr_base->clks.num_ways > new_clocks->num_ways) {
@@ -599,7 +775,15 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
if (clk_mgr->smu_present && !dpp_clock_lowered)
- dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DPPCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dppclk_khz));
+ /*
+ * SMU uses discrete dppclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dppclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DPPCLK,
+ khz_to_mhz_floor(clk_mgr_base->clks.dppclk_khz));
update_dppclk = true;
}
@@ -620,6 +804,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
/* DCCG requires KHz precision for DTBCLK */
clk_mgr_base->clks.ref_dtbclk_khz =
dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
+
dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
}
@@ -629,7 +814,15 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
dcn32_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
dcn32_update_clocks_update_dentist(clk_mgr, context);
if (clk_mgr->smu_present)
- dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DPPCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dppclk_khz));
+ /*
+ * SMU uses discrete dppclk presets. We applied
+ * the same formula to increase our dppclk_khz
+ * to the next matching discrete value. By
+ * contract, we should use the preset dppclk
+ * floored in Mhz to describe the intended clock.
+ */
+ dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DPPCLK,
+ khz_to_mhz_floor(clk_mgr_base->clks.dppclk_khz));
} else {
/* if clock is being raised, increase refclk before lowering DTO */
if (update_dppclk || update_dispclk)
@@ -646,6 +839,10 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
/*update dmcu for wait_loop count*/
dmcu->funcs->set_psr_wait_loop(dmcu,
clk_mgr_base->clks.dispclk_khz / 1000 / 7);
+
+ if (dc->config.enable_auto_dpm_test_logs) {
+ dcn32_auto_dpm_test_log(new_clocks, clk_mgr, context);
+ }
}
static uint32_t dcn32_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
@@ -850,11 +1047,8 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
&num_entries_per_clk->num_fclk_levels);
clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK);
- if (num_entries_per_clk->num_memclk_levels >= num_entries_per_clk->num_fclk_levels) {
- num_levels = num_entries_per_clk->num_memclk_levels;
- } else {
- num_levels = num_entries_per_clk->num_fclk_levels;
- }
+ num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels);
+
clk_mgr_base->bw_params->max_memclk_mhz =
clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz;
clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1;
@@ -1013,11 +1207,19 @@ void dcn32_clk_mgr_construct(
clk_mgr->smu_present = false;
clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
+ if (!clk_mgr->base.bw_params) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
/* need physical address of table to give to PMFW */
clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx,
DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t),
&clk_mgr->wm_range_table_addr);
+ if (!clk_mgr->wm_range_table) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
}
void dcn32_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
index 700ce42036d7..5d80fdf63ffc 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c
@@ -28,7 +28,7 @@
#include "clk_mgr_internal.h"
#include "reg_helper.h"
#include "dalsmc.h"
-#include "smu13_driver_if.h"
+#include "dcn32_smu13_driver_if.h"
#define mmDAL_MSG_REG 0x1628A
#define mmDAL_ARG_REG 0x16273
@@ -49,6 +49,7 @@
*/
static uint32_t dcn32_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
{
+ const uint32_t initial_max_retries = max_retries;
uint32_t reg = 0;
do {
@@ -62,6 +63,9 @@ static uint32_t dcn32_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un
udelay(delay_us);
} while (max_retries--);
+ TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx);
+
+
return reg;
}
@@ -79,6 +83,8 @@ static bool dcn32_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint
/* Trigger the message transaction by writing the message ID */
REG_WRITE(DAL_MSG_REG, msg_id);
+ TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx);
+
/* Wait for response */
if (dcn32_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) {
if (param_out)
@@ -90,6 +96,68 @@ static bool dcn32_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint
return false;
}
+/*
+ * Use these functions to return back delay information so we can aggregate the total
+ * delay when requesting hardmin clk
+ *
+ * dcn32_smu_wait_for_response_delay
+ * dcn32_smu_send_msg_with_param_delay
+ *
+ */
+static uint32_t dcn32_smu_wait_for_response_delay(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries, unsigned int *total_delay_us)
+{
+ uint32_t reg = 0;
+ *total_delay_us = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ *total_delay_us += delay_us;
+ } while (max_retries--);
+
+ TRACE_SMU_MSG_DELAY(0, 0, *total_delay_us, clk_mgr->base.ctx);
+
+ return reg;
+}
+
+static bool dcn32_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out, unsigned int *total_delay_us)
+{
+ unsigned int delay1_us, delay2_us;
+ *total_delay_us = 0;
+
+ /* Wait for response register to be ready */
+ dcn32_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx);
+
+ /* Wait for response */
+ if (dcn32_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ *total_delay_us = delay1_us + delay2_us;
+ return true;
+ }
+
+ *total_delay_us = delay1_us + 2000000;
+ return false;
+}
+
void dcn32_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool enable)
{
smu_print("FCLK P-state support value is : %d\n", enable);
@@ -122,10 +190,98 @@ void dcn32_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr)
DALSMC_MSG_BacoAudioD3PME, 0, NULL);
}
+/* Check PMFW version if it supports ReturnHardMinStatus message */
+static bool dcn32_get_hard_min_status_supported(struct clk_mgr_internal *clk_mgr)
+{
+ if (ASICREV_IS_GC_11_0_0(clk_mgr->base.ctx->asic_id.hw_internal_rev)) {
+ if (clk_mgr->smu_ver >= 0x4e6a00)
+ return true;
+ } else if (ASICREV_IS_GC_11_0_2(clk_mgr->base.ctx->asic_id.hw_internal_rev)) {
+ if (clk_mgr->smu_ver >= 0x524e00)
+ return true;
+ } else { /* ASICREV_IS_GC_11_0_3 */
+ if (clk_mgr->smu_ver >= 0x503900)
+ return true;
+ }
+ return false;
+}
+
+/* Returns the clocks which were fulfilled by the DAL hard min arbiter in PMFW */
+static unsigned int dcn32_smu_get_hard_min_status(struct clk_mgr_internal *clk_mgr, bool *no_timeout, unsigned int *total_delay_us)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
+ uint32_t param = 0;
+
+ *no_timeout = dcn32_smu_send_msg_with_param_delay(clk_mgr,
+ DALSMC_MSG_ReturnHardMinStatus, param, &response, total_delay_us);
+
+ smu_print("SMU Get hard min status: no_timeout %d delay %d us clk bits %x\n",
+ *no_timeout, *total_delay_us, response);
+
+ return response;
+}
+
+static bool dcn32_smu_wait_get_hard_min_status(struct clk_mgr_internal *clk_mgr,
+ uint32_t clk)
+{
+ int readDalHardMinClkBits, checkDalHardMinClkBits;
+ unsigned int total_delay_us, read_total_delay_us;
+ bool no_timeout, hard_min_done;
+
+ static unsigned int cur_wait_get_hard_min_max_us;
+ static unsigned int cur_wait_get_hard_min_max_timeouts;
+
+ checkDalHardMinClkBits = CHECK_HARD_MIN_CLK_DPREFCLK;
+ if (clk == PPCLK_DISPCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DISPCLK;
+ if (clk == PPCLK_DPPCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DPPCLK;
+ if (clk == PPCLK_DCFCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DCFCLK;
+ if (clk == PPCLK_DTBCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_DTBCLK;
+ if (clk == PPCLK_UCLK)
+ checkDalHardMinClkBits |= CHECK_HARD_MIN_CLK_UCLK;
+
+ if (checkDalHardMinClkBits == CHECK_HARD_MIN_CLK_DPREFCLK)
+ return 0;
+
+ total_delay_us = 0;
+ hard_min_done = false;
+ while (1) {
+ readDalHardMinClkBits = dcn32_smu_get_hard_min_status(clk_mgr, &no_timeout, &read_total_delay_us);
+ total_delay_us += read_total_delay_us;
+ if (checkDalHardMinClkBits == (readDalHardMinClkBits & checkDalHardMinClkBits)) {
+ hard_min_done = true;
+ break;
+ }
+
+
+ if (total_delay_us >= 2000000) {
+ cur_wait_get_hard_min_max_timeouts++;
+ smu_print("SMU Wait get hard min status: %d timeouts\n", cur_wait_get_hard_min_max_timeouts);
+ break;
+ }
+ msleep(1);
+ total_delay_us += 1000;
+ }
+
+ if (total_delay_us > cur_wait_get_hard_min_max_us)
+ cur_wait_get_hard_min_max_us = total_delay_us;
+
+ smu_print("SMU Wait get hard min status: no_timeout %d, delay %d us, max %d us, read %x, check %x\n",
+ no_timeout, total_delay_us, cur_wait_get_hard_min_max_us, readDalHardMinClkBits, checkDalHardMinClkBits);
+
+ return hard_min_done;
+}
+
/* Returns the actual frequency that was set in MHz, 0 on failure */
unsigned int dcn32_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz)
{
uint32_t response = 0;
+ bool hard_min_done = false;
/* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
uint32_t param = (clk << 16) | freq_mhz;
@@ -133,9 +289,13 @@ unsigned int dcn32_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, ui
smu_print("SMU Set hard min by freq: clk = %d, freq_mhz = %d MHz\n", clk, freq_mhz);
dcn32_smu_send_msg_with_param(clk_mgr,
- DALSMC_MSG_SetHardMinByFreq, param, &response);
+ DALSMC_MSG_SetHardMinByFreq, param, &response);
- smu_print("SMU Frequency set = %d KHz\n", response);
+ if (dcn32_get_hard_min_status_supported(clk_mgr)) {
+ hard_min_done = dcn32_smu_wait_get_hard_min_status(clk_mgr, clk);
+ smu_print("SMU Frequency set = %d KHz hard_min_done %d\n", response, hard_min_done);
+ } else
+ smu_print("SMU Frequency set = %d KHz\n", response);
return response;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h
index a34c258c19dc..5c44ab0e8667 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.h
@@ -36,12 +36,10 @@
#define DALSMC_MSG_SetCabForUclkPstate 0x12
#define DALSMC_Result_OK 0x1
-void
-dcn32_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool enable);
-void dcn32_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
-void dcn32_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
+void dcn32_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool enable);
void dcn32_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways);
void dcn32_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+void dcn32_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
unsigned int dcn32_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz);
void dcn32_smu_wait_for_dmub_ack_mclk(struct clk_mgr_internal *clk_mgr, bool enable);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
index d3d5a8caccf8..8d54865bbd5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
-// This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces.
+/* Copyright © 2022-2024 Advanced Micro Devices, Inc. All rights reserved. */
#define SMU13_DRIVER_IF_VERSION 0x18
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/smu13_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/smu13_driver_if.h
deleted file mode 100644
index deeb85047e7b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/smu13_driver_if.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright 2021 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#ifndef SMU13_DRIVER_IF_DCN32_H
-#define SMU13_DRIVER_IF_DCN32_H
-
-// *** IMPORTANT ***
-// PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION 0x18
-
-//Only Clks that have DPM descriptors are listed here
-typedef enum {
- PPCLK_GFXCLK = 0,
- PPCLK_SOCCLK,
- PPCLK_UCLK,
- PPCLK_FCLK,
- PPCLK_DCLK_0,
- PPCLK_VCLK_0,
- PPCLK_DCLK_1,
- PPCLK_VCLK_1,
- PPCLK_DISPCLK,
- PPCLK_DPPCLK,
- PPCLK_DPREFCLK,
- PPCLK_DCFCLK,
- PPCLK_DTBCLK,
- PPCLK_COUNT,
-} PPCLK_e;
-
-typedef enum {
- UCLK_DIV_BY_1 = 0,
- UCLK_DIV_BY_2,
- UCLK_DIV_BY_4,
- UCLK_DIV_BY_8,
-} UCLK_DIV_e;
-
-typedef struct {
- uint8_t WmSetting;
- uint8_t Flags;
- uint8_t Padding[2];
-
-} WatermarkRowGeneric_t;
-
-#define NUM_WM_RANGES 4
-
-typedef enum {
- WATERMARKS_CLOCK_RANGE = 0,
- WATERMARKS_DUMMY_PSTATE,
- WATERMARKS_MALL,
- WATERMARKS_COUNT,
-} WATERMARKS_FLAGS_e;
-
-typedef struct {
- // Watermarks
- WatermarkRowGeneric_t WatermarkRow[NUM_WM_RANGES];
-} Watermarks_t;
-
-typedef struct {
- Watermarks_t Watermarks;
- uint32_t Spare[16];
-
- uint32_t MmHubPadding[8]; // SMU internal use
-} WatermarksExternal_t;
-
-// These defines are used with the following messages:
-// SMC_MSG_TransferTableDram2Smu
-// SMC_MSG_TransferTableSmu2Dram
-
-// Table transfer status
-#define TABLE_TRANSFER_OK 0x0
-#define TABLE_TRANSFER_FAILED 0xFF
-#define TABLE_TRANSFER_PENDING 0xAB
-
-// Table types
-#define TABLE_PMFW_PPTABLE 0
-#define TABLE_COMBO_PPTABLE 1
-#define TABLE_WATERMARKS 2
-#define TABLE_AVFS_PSM_DEBUG 3
-#define TABLE_PMSTATUSLOG 4
-#define TABLE_SMU_METRICS 5
-#define TABLE_DRIVER_SMU_CONFIG 6
-#define TABLE_ACTIVITY_MONITOR_COEFF 7
-#define TABLE_OVERDRIVE 8
-#define TABLE_I2C_COMMANDS 9
-#define TABLE_DRIVER_INFO 10
-#define TABLE_COUNT 11
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
new file mode 100644
index 000000000000..4607eff07253
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "dcn35_clk_mgr.h"
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+#define mmCLK1_CLK_PLL_REQ 0x16E37
+
+#define mmCLK1_CLK0_DFS_CNTL 0x16E69
+#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK1_CLK3_DFS_CNTL 0x16E72
+#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+#define mmCLK1_CLK5_DFS_CNTL 0x16E78
+
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF
+#define mmCLK1_CLK4_CURRENT_CNT 0x16F00
+#define mmCLK1_CLK5_CURRENT_CNT 0x16F01
+
+#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
+#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
+#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
+#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
+#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
+#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
+
+#define mmCLK1_CLK0_DS_CNTL 0x16E83
+#define mmCLK1_CLK1_DS_CNTL 0x16E8C
+#define mmCLK1_CLK2_DS_CNTL 0x16E95
+#define mmCLK1_CLK3_DS_CNTL 0x16E9E
+#define mmCLK1_CLK4_DS_CNTL 0x16EA7
+#define mmCLK1_CLK5_DS_CNTL 0x16EB0
+
+#define mmCLK1_CLK0_ALLOW_DS 0x16E84
+#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
+#define mmCLK1_CLK2_ALLOW_DS 0x16E96
+#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
+#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
+#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
+
+#define mmCLK5_spll_field_8 0x1B04B
+#define mmCLK6_spll_field_8 0x1B24B
+#define mmDENTIST_DISPCLK_CNTL 0x0124
+#define regDENTIST_DISPCLK_CNTL 0x0064
+#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+
+// DENTIST_DISPCLK_CNTL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
+
+#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN35(reg_name)\
+ .reg_name = mm ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn351 = {
+ CLK_REG_LIST_DCN35()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn351 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn351 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define TO_CLK_MGR_DCN35(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn35, base)
+
+
+void dcn351_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ /*register offset changed*/
+ clk_mgr->base.regs = &clk_mgr_regs_dcn351;
+ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351;
+ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351;
+
+ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
new file mode 100644
index 000000000000..b11383fba35f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -0,0 +1,1556 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dcn35_clk_mgr.h"
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+
+// For dce12_get_dp_ref_freq_khz
+#include "dce100/dce_clk_mgr.h"
+
+// For dcn20_update_clocks_update_dpp_dto
+#include "dcn20/dcn20_clk_mgr.h"
+
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn35_smu.h"
+#include "dm_helpers.h"
+
+#include "dcn31/dcn31_clk_mgr.h"
+
+#include "dc_dmub_srv.h"
+#include "link_service.h"
+#include "logger_types.h"
+
+#undef DC_LOGGER
+#define DC_LOGGER \
+ clk_mgr->base.base.ctx->logger
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+#define mmCLK1_CLK_PLL_REQ 0x16E37
+
+#define mmCLK1_CLK0_DFS_CNTL 0x16E69
+#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK1_CLK3_DFS_CNTL 0x16E72
+#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+#define mmCLK1_CLK5_DFS_CNTL 0x16E78
+
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE
+#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF
+#define mmCLK1_CLK5_CURRENT_CNT 0x16F00
+
+#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
+#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
+#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
+#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
+#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
+#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
+
+#define mmCLK1_CLK0_DS_CNTL 0x16E83
+#define mmCLK1_CLK1_DS_CNTL 0x16E8C
+#define mmCLK1_CLK2_DS_CNTL 0x16E95
+#define mmCLK1_CLK3_DS_CNTL 0x16E9E
+#define mmCLK1_CLK4_DS_CNTL 0x16EA7
+#define mmCLK1_CLK5_DS_CNTL 0x16EB0
+
+#define mmCLK1_CLK0_ALLOW_DS 0x16E84
+#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
+#define mmCLK1_CLK2_ALLOW_DS 0x16E96
+#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
+#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
+#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
+
+#define mmCLK5_spll_field_8 0x1B24B
+#define mmCLK6_spll_field_8 0x1B24B
+#define mmDENTIST_DISPCLK_CNTL 0x0124
+#define regDENTIST_DISPCLK_CNTL 0x0064
+#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+// DENTIST_DISPCLK_CNTL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
+
+#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+#define CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
+#undef FN
+#define FN(reg_name, field_name) \
+ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN35(reg_name)\
+ .reg_name = mm ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn35 = {
+ CLK_REG_LIST_DCN35()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn35 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn35 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define TO_CLK_MGR_DCN35(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn35, base)
+
+static int dcn35_get_active_display_cnt_wa(
+ struct dc *dc,
+ struct dc_state *context,
+ int *all_active_disps)
+{
+ int i, display_count = 0;
+ bool tmds_present = false;
+
+ for (i = 0; i < context->stream_count; i++) {
+ const struct dc_stream_state *stream = context->streams[i];
+
+ if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
+ stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+ stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+ tmds_present = true;
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ const struct dc_link *link = dc->links[i];
+
+ /* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
+ if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ display_count++;
+ }
+ if (all_active_disps != NULL)
+ *all_active_disps = display_count;
+ /* WA for hang on HDMI after display off back on*/
+ if (display_count == 0 && tmds_present)
+ display_count = 1;
+
+ return display_count;
+}
+static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
+{
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int i;
+
+ if (dc->ctx->dce_environment == DCE_ENV_DIAG)
+ return;
+
+ for (i = 0; i < dc->res_pool->pipe_count; ++i) {
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dccg *dccg = clk_mgr_internal->dccg;
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
+ struct link_encoder *new_pipe_link_enc = new_pipe->link_res.dio_link_enc;
+ struct link_encoder *pipe_link_enc = pipe->link_res.dio_link_enc;
+ bool stream_changed_otg_dig_on = false;
+ bool has_active_hpo = false;
+
+ if (pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ if (!dc->config.unify_link_enc_assignment) {
+ if (new_pipe->stream)
+ new_pipe_link_enc = new_pipe->stream->link_enc;
+ if (pipe->stream)
+ pipe_link_enc = pipe->stream->link_enc;
+ }
+
+ stream_changed_otg_dig_on = old_pipe->stream && new_pipe->stream &&
+ old_pipe->stream != new_pipe->stream &&
+ old_pipe->stream_res.tg == new_pipe->stream_res.tg &&
+ new_pipe_link_enc && !new_pipe->stream->dpms_off &&
+ new_pipe_link_enc->funcs->is_dig_enabled &&
+ new_pipe_link_enc->funcs->is_dig_enabled(
+ new_pipe_link_enc) &&
+ new_pipe->stream_res.stream_enc &&
+ new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled &&
+ new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc);
+
+ if (old_pipe->stream && new_pipe->stream && old_pipe->stream == new_pipe->stream) {
+ has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) &&
+ dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe);
+
+ }
+
+ if (!has_active_hpo && !stream_changed_otg_dig_on && pipe->stream &&
+ (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || !pipe_link_enc) &&
+ !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe)) {
+ /* This w/a should not trigger when we have a dig active */
+ if (disable) {
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
+ reset_sync_context_for_pipe(dc, context, i);
+ } else {
+ pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
+ }
+ }
+ }
+}
+
+static void dcn35_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context,
+ int ref_dtbclk_khz)
+{
+ struct dccg *dccg = clk_mgr->dccg;
+ uint32_t tg_mask = 0;
+ int i;
+
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ struct dtbclk_dto_params dto_params = {0};
+
+ /* use mask to program DTO once per tg */
+ if (pipe_ctx->stream_res.tg &&
+ !(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) {
+ tg_mask |= (1 << pipe_ctx->stream_res.tg->inst);
+
+ dto_params.otg_inst = pipe_ctx->stream_res.tg->inst;
+ dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
+
+ dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, &dto_params);
+ //dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, &dto_params);
+ }
+ }
+}
+
+static void dcn35_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context, bool safe_to_lower)
+{
+ int i;
+ bool dppclk_active[MAX_PIPES] = {0};
+
+
+ clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz;
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ int dpp_inst = 0, dppclk_khz, prev_dppclk_khz;
+
+ dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
+
+ if (context->res_ctx.pipe_ctx[i].plane_res.dpp)
+ dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
+ else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz == 0) {
+ /* dpp == NULL && dppclk_khz == 0 is valid because of pipe harvesting.
+ * In this case just continue in loop
+ */
+ continue;
+ } else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz > 0) {
+ /* The software state is not valid if dpp resource is NULL and
+ * dppclk_khz > 0.
+ */
+ ASSERT(false);
+ continue;
+ }
+
+ prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
+
+ if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
+ clk_mgr->dccg->funcs->update_dpp_dto(
+ clk_mgr->dccg, dpp_inst, dppclk_khz);
+ dppclk_active[dpp_inst] = true;
+ }
+ if (safe_to_lower)
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ struct dpp *old_dpp = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.dpp;
+
+ if (old_dpp && !dppclk_active[old_dpp->inst])
+ clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, old_dpp->inst, 0);
+ }
+}
+
+static uint8_t get_lowest_dpia_index(const struct dc_link *link)
+{
+ const struct dc *dc_struct = link->dc;
+ uint8_t idx = 0xFF;
+ int i;
+
+ for (i = 0; i < MAX_PIPES * 2; ++i) {
+ if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+ continue;
+
+ if (idx > dc_struct->links[i]->link_index)
+ idx = dc_struct->links[i]->link_index;
+ }
+
+ return idx;
+}
+
+static void dcn35_notify_host_router_bw(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower)
+{
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t host_router_bw_kbps[MAX_HOST_ROUTERS_NUM] = { 0 };
+ int i;
+ for (i = 0; i < context->stream_count; ++i) {
+ const struct dc_stream_state *stream = context->streams[i];
+ const struct dc_link *link = stream->link;
+ uint8_t lowest_dpia_index = 0;
+ unsigned int hr_index = 0;
+
+ if (!link)
+ continue;
+
+ lowest_dpia_index = get_lowest_dpia_index(link);
+ if (link->link_index < lowest_dpia_index)
+ continue;
+
+ hr_index = (link->link_index - lowest_dpia_index) / 2;
+ if (hr_index >= MAX_HOST_ROUTERS_NUM)
+ continue;
+ host_router_bw_kbps[hr_index] += dc_bandwidth_in_kbps_from_timing(
+ &stream->timing, dc_link_get_highest_encoding_format(link));
+ }
+
+ for (i = 0; i < MAX_HOST_ROUTERS_NUM; ++i) {
+ new_clocks->host_router_bw_kbps[i] = host_router_bw_kbps[i];
+ if (should_set_clock(safe_to_lower, new_clocks->host_router_bw_kbps[i], clk_mgr_base->clks.host_router_bw_kbps[i])) {
+ clk_mgr_base->clks.host_router_bw_kbps[i] = new_clocks->host_router_bw_kbps[i];
+ dcn35_smu_notify_host_router_bw(clk_mgr, i, new_clocks->host_router_bw_kbps[i]);
+ }
+ }
+}
+
+void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ union dmub_rb_cmd cmd;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ int display_count = 0;
+ bool update_dppclk = false;
+ bool update_dispclk = false;
+ bool dpp_clock_lowered = false;
+ int all_active_disps = 0;
+
+ if (dc->work_arounds.skip_clock_update)
+ return;
+
+ display_count = dcn35_get_active_display_cnt_wa(dc, context, &all_active_disps);
+ if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz)
+ new_clocks->ref_dtbclk_khz = 600000;
+
+ /*
+ * if it is safe to lower, but we are already in the lower state, we don't have to do anything
+ * also if safe to lower is false, we just go in the higher state
+ */
+ if (safe_to_lower) {
+ if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
+ dcn35_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ }
+
+ if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
+ if (clk_mgr->base.ctx->dc->config.allow_0_dtb_clk)
+ dcn35_smu_set_dtbclk(clk_mgr, false);
+
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ /* check that we're not already in lower */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ /* if we can go lower, go lower */
+ if (display_count == 0)
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+ } else {
+ if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
+ dcn35_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
+ dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
+ }
+
+ if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
+ int actual_dtbclk = 0;
+
+ dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+ dcn35_smu_set_dtbclk(clk_mgr, true);
+
+ actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT);
+
+ if (actual_dtbclk) {
+ clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
+ }
+
+ /* check that we're not already in D0 */
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
+ union display_idle_optimization_u idle_info = { 0 };
+
+ dcn35_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
+ /* update power state */
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE;
+ }
+ }
+ if (dc->debug.force_min_dcfclk_mhz > 0)
+ new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
+ new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ dcn35_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz);
+ }
+
+ if (should_set_clock(safe_to_lower,
+ new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ dcn35_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
+ }
+
+ // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
+ if (new_clocks->dppclk_khz < 100000)
+ new_clocks->dppclk_khz = 100000;
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
+ if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+ dpp_clock_lowered = true;
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) &&
+ (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) {
+ int requested_dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
+
+ /* Clamp the requested clock to PMFW based on their limit. */
+ if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz)
+ requested_dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ dcn35_smu_set_dispclk(clk_mgr, requested_dispclk_khz);
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+
+ dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
+
+ update_dispclk = true;
+ }
+
+ /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
+ if (!dc->debug.disable_dtb_ref_clk_switch &&
+ should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000,
+ clk_mgr_base->clks.ref_dtbclk_khz / 1000)) {
+ dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
+ clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
+ }
+
+ if (dpp_clock_lowered) {
+ // increase per DPP DTO before lowering global dppclk
+ dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ } else {
+ // increase global DPPCLK before lowering per DPP DTO
+ if (update_dppclk || update_dispclk)
+ dcn35_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
+ dcn35_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
+ }
+
+ // notify PMFW of bandwidth per DPIA tunnel
+ if (dc->debug.notify_dpia_hr_bw)
+ dcn35_notify_host_router_bw(clk_mgr_base, context, safe_to_lower);
+
+ // notify DMCUB of latest clocks
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.notify_clocks.header.type = DMUB_CMD__CLK_MGR;
+ cmd.notify_clocks.header.sub_type = DMUB_CMD__CLK_MGR_NOTIFY_CLOCKS;
+ cmd.notify_clocks.clocks.dcfclk_khz = clk_mgr_base->clks.dcfclk_khz;
+ cmd.notify_clocks.clocks.dcfclk_deep_sleep_khz =
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz;
+ cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+ cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
+{
+ /* get FbMult value */
+ struct fixed31_32 pll_req;
+ unsigned int fbmult_frac_val = 0;
+ unsigned int fbmult_int_val = 0;
+
+ /*
+ * Register value of fbmult is in 8.16 format, we are converting to 314.32
+ * to leverage the fix point operations available in driver
+ */
+
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */
+
+ pll_req = dc_fixpt_from_int(fbmult_int_val);
+
+ /*
+ * since fractional part is only 16 bit in register definition but is 32 bit
+ * in our fix point definiton, need to shift left by 16 to obtain correct value
+ */
+ pll_req.value |= fbmult_frac_val << 16;
+
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+ /* integer part is now VCO frequency in kHz */
+ return dc_fixpt_floor(pll_req);
+}
+
+static void dcn35_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ dcn35_smu_enable_pme_wa(clk_mgr);
+}
+
+
+bool dcn35_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b)
+{
+ if (a->dispclk_khz != b->dispclk_khz)
+ return false;
+ else if (a->dppclk_khz != b->dppclk_khz)
+ return false;
+ else if (a->dcfclk_khz != b->dcfclk_khz)
+ return false;
+ else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+ return false;
+ else if (a->zstate_support != b->zstate_support)
+ return false;
+ else if (a->dtbclk_en != b->dtbclk_en)
+ return false;
+
+ return true;
+}
+
+static void dcn35_save_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ // read dtbclk
+ internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT);
+ internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL);
+
+ // read dcfclk
+ internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT);
+ internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL);
+
+ // read dcf deep sleep divider
+ internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL);
+ internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS);
+
+ // read dppclk
+ internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT);
+ internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL);
+
+ // read dprefclk
+ internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT);
+ internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL);
+
+ // read dispclk
+ internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT);
+ internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL);
+}
+
+static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr_dcn35 *clk_mgr)
+{
+ struct dcn35_clk_internal internal = {0};
+ char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3 400 FCH", "0x4 600 FCH"};
+
+ dcn35_save_clk_registers_internal(&internal, &clk_mgr->base.base);
+
+ regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10;
+ regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10;
+ regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS;
+ regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10;
+ regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10;
+ regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10;
+ regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10;
+
+ regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4)
+ regs_and_bypass->dppclk_bypass = 0;
+ regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4)
+ regs_and_bypass->dcfclk_bypass = 0;
+ regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4)
+ regs_and_bypass->dispclk_bypass = 0;
+ regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007;
+ if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4)
+ regs_and_bypass->dprefclk_bypass = 0;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ DC_LOG_SMU("clk_type,clk_value,deepsleep_cntl,deepsleep_allow,bypass\n");
+
+ DC_LOG_SMU("dcfclk,%d,%d,%d,%s\n",
+ regs_and_bypass->dcfclk,
+ regs_and_bypass->dcf_deep_sleep_divider,
+ regs_and_bypass->dcf_deep_sleep_allow,
+ bypass_clks[(int) regs_and_bypass->dcfclk_bypass]);
+
+ DC_LOG_SMU("dprefclk,%d,N/A,N/A,%s\n",
+ regs_and_bypass->dprefclk,
+ bypass_clks[(int) regs_and_bypass->dprefclk_bypass]);
+
+ DC_LOG_SMU("dispclk,%d,N/A,N/A,%s\n",
+ regs_and_bypass->dispclk,
+ bypass_clks[(int) regs_and_bypass->dispclk_bypass]);
+
+ // REGISTER VALUES
+ DC_LOG_SMU("reg_name,value,clk_type");
+
+ DC_LOG_SMU("CLK1_CLK3_CURRENT_CNT,%d,dcfclk",
+ internal.CLK1_CLK3_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK4_CURRENT_CNT,%d,dtbclk",
+ internal.CLK1_CLK4_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK3_DS_CNTL,%d,dcf_deep_sleep_divider",
+ internal.CLK1_CLK3_DS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK3_ALLOW_DS,%d,dcf_deep_sleep_allow",
+ internal.CLK1_CLK3_ALLOW_DS);
+
+ DC_LOG_SMU("CLK1_CLK2_CURRENT_CNT,%d,dprefclk",
+ internal.CLK1_CLK2_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK0_CURRENT_CNT,%d,dispclk",
+ internal.CLK1_CLK0_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK1_CURRENT_CNT,%d,dppclk",
+ internal.CLK1_CLK1_CURRENT_CNT);
+
+ DC_LOG_SMU("CLK1_CLK3_BYPASS_CNTL,%d,dcfclk_bypass",
+ internal.CLK1_CLK3_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK2_BYPASS_CNTL,%d,dprefclk_bypass",
+ internal.CLK1_CLK2_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK0_BYPASS_CNTL,%d,dispclk_bypass",
+ internal.CLK1_CLK0_BYPASS_CNTL);
+
+ DC_LOG_SMU("CLK1_CLK1_BYPASS_CNTL,%d,dppclk_bypass",
+ internal.CLK1_CLK1_BYPASS_CNTL);
+
+ }
+}
+
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ uint32_t ssc_enable;
+
+ if (clk_mgr_base->ctx->dce_version == DCN_VERSION_3_51) {
+ ssc_enable = REG_READ(CLK6_spll_field_8) & CLK6_spll_field_8__spll_ssc_en_MASK;
+ } else {
+ ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK;
+ }
+
+ return ssc_enable != 0;
+}
+
+static void init_clk_states(struct clk_mgr *clk_mgr)
+{
+ uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
+ memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+
+ clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
+ clk_mgr->clks.p_state_change_support = true;
+ clk_mgr->clks.prev_p_state_change_support = true;
+ clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+ clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+}
+
+void dcn35_init_clocks(struct clk_mgr *clk_mgr)
+{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr_int);
+
+ init_clk_states(clk_mgr);
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn35_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
+ dcn35_save_clk_registers(&clk_mgr->boot_snapshot, clk_mgr_dcn35);
+
+ clk_mgr->clks.ref_dtbclk_khz = clk_mgr->boot_snapshot.dtbclk * 10;
+ if (clk_mgr->boot_snapshot.dtbclk > 59000) {
+ /*dtbclk enabled based on */
+ clk_mgr->clks.dtbclk_en = true;
+ }
+}
+static struct clk_bw_params dcn35_bw_params = {
+ .vram_type = Ddr4MemType,
+ .num_channels = 1,
+ .clk_table = {
+ .num_entries = 4,
+ },
+
+};
+
+static struct wm_table ddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ }
+};
+
+static struct wm_table lpddr5_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.65333,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .valid = true,
+ },
+ }
+};
+
+static DpmClocks_t_dcn35 dummy_clocks;
+static DpmClocks_t_dcn351 dummy_clocks_dcn351;
+
+static struct dcn35_watermarks dummy_wms = { 0 };
+
+static struct dcn35_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t clock_source = 0;
+
+ clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK;
+
+ // If it's DFS mode, clock_source is 0.
+ if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
+static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
+{
+ int i, num_valid_sets;
+
+ num_valid_sets = 0;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ /* skip empty entries, the smu array has no holes*/
+ if (!bw_params->wm_table.entries[i].valid)
+ continue;
+
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmSetting = bw_params->wm_table.entries[i].wm_inst;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType = bw_params->wm_table.entries[i].wm_type;
+ /* We will not select WM based on fclk, so leave it as unconstrained */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ if (table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType == WM_TYPE_PSTATE_CHG) {
+ if (i == 0)
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk = 0;
+ else {
+ /* add 1 to make it non-overlapping with next lvl */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk =
+ bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
+ }
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxMclk =
+ bw_params->clk_table.entries[i].dcfclk_mhz;
+
+ } else {
+ /* unconstrained for memory retraining */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
+
+ /* Modify previous watermark range to cover up to max */
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+ }
+ num_valid_sets++;
+ }
+
+ ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */
+
+ /* modify the min and max to make sure we cover the whole range*/
+ table->WatermarkRow[WM_DCFCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_DCFCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxMclk = 0xFFFF;
+ table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
+
+ /* This is for writeback only, does not matter currently as no writeback support*/
+ table->WatermarkRow[WM_SOCCLK][0].WmSetting = WM_A;
+ table->WatermarkRow[WM_SOCCLK][0].MinClock = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxClock = 0xFFFF;
+ table->WatermarkRow[WM_SOCCLK][0].MinMclk = 0;
+ table->WatermarkRow[WM_SOCCLK][0].MaxMclk = 0xFFFF;
+}
+
+static void dcn35_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr);
+ struct dcn35_watermarks *table = clk_mgr_dcn35->smu_wm_set.wm_set;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || clk_mgr_dcn35->smu_wm_set.mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn35_build_watermark_ranges(clk_mgr_base->bw_params, table);
+
+ dcn35_smu_set_dram_addr_high(clk_mgr,
+ clk_mgr_dcn35->smu_wm_set.mc_address.high_part);
+ dcn35_smu_set_dram_addr_low(clk_mgr,
+ clk_mgr_dcn35->smu_wm_set.mc_address.low_part);
+ dcn35_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+static void dcn35_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn35_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks_t_dcn35 *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ dcn35_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn35_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
+
+static void dcn351_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn351_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks_t_dcn351 *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+ memset(table, 0, sizeof(*table));
+ dcn35_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn35_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
+static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
+{
+ uint32_t max = 0;
+ int i;
+
+ for (i = 0; i < num_clocks; ++i) {
+ if (clocks[i] > max)
+ max = clocks[i];
+ }
+
+ return max;
+}
+
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+ return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+ switch (wck_ratio) {
+ case WCK_RATIO_1_2:
+ return 2;
+
+ case WCK_RATIO_1_4:
+ return 4;
+ /* Find lowest DPM, FCLK is filled in reverse order*/
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry)
+{
+ return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2;
+}
+
+static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
+ struct integrated_info *bios_info,
+ DpmClocks_t_dcn35 *clock_table)
+{
+ struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
+ struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+ uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+ uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0;
+ uint32_t num_memps, num_fclk, num_dcfclk;
+ int i;
+
+ /* Determine min/max p-state values. */
+ num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS :
+ clock_table->NumMemPstatesEnabled;
+ for (i = 0; i < num_memps; i++) {
+ uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+ if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) {
+ max_dram_speed_mts = dram_speed_mts;
+ max_pstate = i;
+ }
+ }
+
+ min_dram_speed_mts = max_dram_speed_mts;
+ min_pstate = max_pstate;
+
+ for (i = 0; i < num_memps; i++) {
+ uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]);
+
+ if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) {
+ min_dram_speed_mts = dram_speed_mts;
+ min_pstate = i;
+ }
+ }
+
+ /* We expect the table to contain at least one valid P-state entry. */
+ ASSERT(clock_table->NumMemPstatesEnabled &&
+ is_valid_clock_value(max_dram_speed_mts) &&
+ is_valid_clock_value(min_dram_speed_mts));
+
+ /* dispclk and dppclk can be max at any voltage, same number of levels for both */
+ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
+ clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
+ max_dispclk = find_max_clk_value(clock_table->DispClocks,
+ clock_table->NumDispClkLevelsEnabled);
+ max_dppclk = find_max_clk_value(clock_table->DppClocks,
+ clock_table->NumDispClkLevelsEnabled);
+ } else {
+ /* Invalid number of entries in the table from PMFW. */
+ ASSERT(0);
+ }
+
+ /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+ ASSERT(clock_table->NumDcfClkLevelsEnabled > 0);
+
+ num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS :
+ clock_table->NumFclkLevelsEnabled;
+ max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
+
+ num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+ clock_table->NumDcfClkLevelsEnabled;
+ for (i = 0; i < num_dcfclk; i++) {
+ int j;
+
+ /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+ if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+ break;
+
+ bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+ /* Now update clocks we do read */
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+ bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio =
+ convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio);
+
+ /* Dcfclk and Fclk are tied, but at a different ratio */
+ bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]);
+ }
+
+ /* Make sure to include at least one entry at highest pstate */
+ if (max_pstate != min_pstate || i == 0) {
+ if (i > MAX_NUM_DPM_LVL - 1)
+ i = MAX_NUM_DPM_LVL - 1;
+
+ bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+ bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk;
+ bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage;
+ bw_params->clk_table.entries[i].dcfclk_mhz =
+ find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].socclk_mhz =
+ find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+ bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+ bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+ clock_table->MemPstateTable[max_pstate].WckRatio);
+ i++;
+ }
+ bw_params->clk_table.num_entries = i--;
+
+ /* Make sure all highest clocks are included*/
+ bw_params->clk_table.entries[i].socclk_mhz =
+ find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dispclk_mhz =
+ find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].dppclk_mhz =
+ find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+ bw_params->clk_table.entries[i].fclk_mhz =
+ find_max_clk_value(clock_table->FclkClocks_Freq, NUM_FCLK_DPM_LEVELS);
+ ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels = clock_table->NumDcfClkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_dispclk_levels = clock_table->NumDispClkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_dppclk_levels = clock_table->NumDispClkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled;
+ bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled;
+
+ /*
+ * Set any 0 clocks to max default setting. Not an issue for
+ * power since we aren't doing switching in such case anyway
+ */
+ for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+ if (!bw_params->clk_table.entries[i].fclk_mhz) {
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+ bw_params->clk_table.entries[i].voltage = def_max.voltage;
+ }
+ if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+ bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz)
+ bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+ if (!bw_params->clk_table.entries[i].dispclk_mhz)
+ bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+ if (!bw_params->clk_table.entries[i].dppclk_mhz)
+ bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+ if (!bw_params->clk_table.entries[i].fclk_mhz)
+ bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_mhz)
+ bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+ if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+ bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+ bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+ }
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
+ bw_params->vram_type = bios_info->memory_type;
+ bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+ bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
+
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ bw_params->wm_table.entries[i].wm_inst = i;
+
+ if (i >= bw_params->clk_table.num_entries) {
+ bw_params->wm_table.entries[i].valid = false;
+ continue;
+ }
+
+ bw_params->wm_table.entries[i].wm_type = WM_TYPE_PSTATE_CHG;
+ bw_params->wm_table.entries[i].valid = true;
+ }
+}
+
+static void dcn35_set_low_power_state(struct clk_mgr *clk_mgr_base)
+{
+ int display_count;
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dc_state *context = dc->current_state;
+
+ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
+ display_count = dcn35_get_active_display_cnt_wa(dc, context, NULL);
+ /* if we can go lower, go lower */
+ if (display_count == 0)
+ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
+ }
+}
+
+static void dcn35_exit_low_power_state(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ //SMU optimization is performed part of low power state exit.
+ dcn35_smu_exit_low_power_state(clk_mgr);
+
+}
+
+static bool dcn35_is_ips_supported(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return dcn35_smu_get_ips_supported(clk_mgr) ? true : false;
+}
+
+static void dcn35_init_clocks_fpga(struct clk_mgr *clk_mgr)
+{
+ init_clk_states(clk_mgr);
+
+/* TODO: Implement the functions and remove the ifndef guard */
+}
+
+static void dcn35_update_clocks_fpga(struct clk_mgr *clk_mgr,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+ struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
+ int fclk_adj = new_clocks->fclk_khz;
+
+ /* TODO: remove this after correctly set by DML */
+ new_clocks->dcfclk_khz = 400000;
+ new_clocks->socclk_khz = 400000;
+
+ /* Min fclk = 1.2GHz since all the extra scemi logic seems to run off of it */
+ //int fclk_adj = new_clocks->fclk_khz > 1200000 ? new_clocks->fclk_khz : 1200000;
+ new_clocks->fclk_khz = 4320000;
+
+ if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr->clks.phyclk_khz)) {
+ clk_mgr->clks.phyclk_khz = new_clocks->phyclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr->clks.dcfclk_khz)) {
+ clk_mgr->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower,
+ new_clocks->dcfclk_deep_sleep_khz, clk_mgr->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr->clks.socclk_khz)) {
+ clk_mgr->clks.socclk_khz = new_clocks->socclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr->clks.dramclk_khz)) {
+ clk_mgr->clks.dramclk_khz = new_clocks->dramclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->clks.dppclk_khz)) {
+ clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz;
+ }
+
+ if (should_set_clock(safe_to_lower, fclk_adj, clk_mgr->clks.fclk_khz)) {
+ clk_mgr->clks.fclk_khz = fclk_adj;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr->clks.dispclk_khz)) {
+ clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
+ }
+
+ /* Both fclk and ref_dppclk run on the same scemi clock.
+ * So take the higher value since the DPP DTO is typically programmed
+ * such that max dppclk is 1:1 with ref_dppclk.
+ */
+ if (clk_mgr->clks.fclk_khz > clk_mgr->clks.dppclk_khz)
+ clk_mgr->clks.dppclk_khz = clk_mgr->clks.fclk_khz;
+ if (clk_mgr->clks.dppclk_khz > clk_mgr->clks.fclk_khz)
+ clk_mgr->clks.fclk_khz = clk_mgr->clks.dppclk_khz;
+
+ // Both fclk and ref_dppclk run on the same scemi clock.
+ clk_mgr_int->dccg->ref_dppclk = clk_mgr->clks.fclk_khz;
+
+ /* TODO: set dtbclk in correct place */
+ clk_mgr->clks.dtbclk_en = true;
+ dm_set_dcn_clocks(clk_mgr->ctx, &clk_mgr->clks);
+ dcn35_update_clocks_update_dpp_dto(clk_mgr_int, context, safe_to_lower);
+
+ dcn35_update_clocks_update_dtb_dto(clk_mgr_int, context, clk_mgr->clks.ref_dtbclk_khz);
+}
+
+static struct clk_mgr_funcs dcn35_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+ .update_clocks = dcn35_update_clocks,
+ .init_clocks = dcn35_init_clocks,
+ .enable_pme_wa = dcn35_enable_pme_wa,
+ .are_clock_states_equal = dcn35_are_clock_states_equal,
+ .notify_wm_ranges = dcn35_notify_wm_ranges,
+ .set_low_power_state = dcn35_set_low_power_state,
+ .exit_low_power_state = dcn35_exit_low_power_state,
+ .is_ips_supported = dcn35_is_ips_supported,
+};
+
+struct clk_mgr_funcs dcn35_fpga_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .update_clocks = dcn35_update_clocks_fpga,
+ .init_clocks = dcn35_init_clocks_fpga,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
+};
+
+static void translate_to_DpmClocks_t_dcn35(struct dcn351_smu_dpm_clks *smu_dpm_clks_a,
+ struct dcn35_smu_dpm_clks *smu_dpm_clks_b)
+{
+ /*translate two structures and only take need clock tables*/
+ uint8_t i;
+
+ if (smu_dpm_clks_a == NULL || smu_dpm_clks_b == NULL ||
+ smu_dpm_clks_a->dpm_clks == NULL || smu_dpm_clks_b->dpm_clks == NULL)
+ return;
+
+ for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DcfClocks[i] = smu_dpm_clks_a->dpm_clks->DcfClocks[i];
+
+ for (i = 0; i < NUM_DISPCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DispClocks[i] = smu_dpm_clks_a->dpm_clks->DispClocks[i];
+
+ for (i = 0; i < NUM_DPPCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DppClocks[i] = smu_dpm_clks_a->dpm_clks->DppClocks[i];
+
+ for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->FclkClocks_Freq[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Freq[i];
+ smu_dpm_clks_b->dpm_clks->FclkClocks_Voltage[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Voltage[i];
+ }
+ for (i = 0; i < NUM_MEM_PSTATE_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].MemClk =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].MemClk;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].UClk =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].UClk;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].Voltage =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].Voltage;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].WckRatio =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].WckRatio;
+ }
+ smu_dpm_clks_b->dpm_clks->MaxGfxClk = smu_dpm_clks_a->dpm_clks->MaxGfxClk;
+ smu_dpm_clks_b->dpm_clks->MinGfxClk = smu_dpm_clks_a->dpm_clks->MinGfxClk;
+ smu_dpm_clks_b->dpm_clks->NumDcfClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumDcfClkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumDispClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumDispClkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumFclkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumFclkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumMemPstatesEnabled =
+ smu_dpm_clks_a->dpm_clks->NumMemPstatesEnabled;
+ smu_dpm_clks_b->dpm_clks->NumSocClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumSocClkLevelsEnabled;
+
+ for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->SocClocks[i] = smu_dpm_clks_a->dpm_clks->SocClocks[i];
+ smu_dpm_clks_b->dpm_clks->SocVoltage[i] = smu_dpm_clks_a->dpm_clks->SocVoltage[i];
+ }
+}
+void dcn35_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 };
+ struct dcn351_smu_dpm_clks smu_dpm_clks_dcn351 = { 0 };
+ clk_mgr->base.base.ctx = ctx;
+ clk_mgr->base.base.funcs = &dcn35_funcs;
+
+ clk_mgr->base.pp_smu = pp_smu;
+
+ clk_mgr->base.dccg = dccg;
+ clk_mgr->base.dfs_bypass_disp_clk = 0;
+
+ clk_mgr->base.dprefclk_ss_percentage = 0;
+ clk_mgr->base.dprefclk_ss_divider = 1000;
+ clk_mgr->base.ss_on_dprefclk = false;
+ clk_mgr->base.dfs_ref_freq_khz = 48000;
+ if (ctx->dce_version != DCN_VERSION_3_51) {
+ clk_mgr->base.regs = &clk_mgr_regs_dcn35;
+ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35;
+ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35;
+ }
+
+
+ clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_GART,
+ sizeof(struct dcn35_watermarks),
+ &clk_mgr->smu_wm_set.mc_address.quad_part);
+
+ if (!clk_mgr->smu_wm_set.wm_set) {
+ clk_mgr->smu_wm_set.wm_set = &dummy_wms;
+ clk_mgr->smu_wm_set.mc_address.quad_part = 0;
+ }
+ ASSERT(clk_mgr->smu_wm_set.wm_set);
+
+ smu_dpm_clks.dpm_clks = (DpmClocks_t_dcn35 *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_GART,
+ sizeof(DpmClocks_t_dcn35),
+ &smu_dpm_clks.mc_address.quad_part);
+ if (smu_dpm_clks.dpm_clks == NULL) {
+ smu_dpm_clks.dpm_clks = &dummy_clocks;
+ smu_dpm_clks.mc_address.quad_part = 0;
+ }
+ ASSERT(smu_dpm_clks.dpm_clks);
+
+ if (ctx->dce_version == DCN_VERSION_3_51) {
+ smu_dpm_clks_dcn351.dpm_clks = (DpmClocks_t_dcn351 *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_GART,
+ sizeof(DpmClocks_t_dcn351),
+ &smu_dpm_clks_dcn351.mc_address.quad_part);
+ if (smu_dpm_clks_dcn351.dpm_clks == NULL) {
+ smu_dpm_clks_dcn351.dpm_clks = &dummy_clocks_dcn351;
+ smu_dpm_clks_dcn351.mc_address.quad_part = 0;
+ }
+ }
+
+ clk_mgr->base.smu_ver = dcn35_smu_get_smu_version(&clk_mgr->base);
+
+ if (clk_mgr->base.smu_ver)
+ clk_mgr->base.smu_present = true;
+
+ /* TODO: Check we get what we expect during bringup */
+ clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
+
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) {
+ dcn35_bw_params.wm_table = lpddr5_wm_table;
+ } else {
+ dcn35_bw_params.wm_table = ddr5_wm_table;
+ }
+ /* Saved clocks configured at boot for debug purposes */
+ dcn35_save_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr);
+
+ clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base);
+ clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
+
+ dce_clock_read_ss_info(&clk_mgr->base);
+ /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
+
+ dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
+ clk_mgr->base.base.bw_params = &dcn35_bw_params;
+
+ if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
+ int i;
+ if (ctx->dce_version == DCN_VERSION_3_51) {
+ dcn351_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks_dcn351);
+ translate_to_DpmClocks_t_dcn35(&smu_dpm_clks_dcn351, &smu_dpm_clks);
+ } else
+ dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
+ DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n"
+ "NumDispClkLevelsEnabled: %d\n"
+ "NumSocClkLevelsEnabled: %d\n"
+ "VcnClkLevelsEnabled: %d\n"
+ "FClkLevelsEnabled: %d\n"
+ "NumMemPstatesEnabled: %d\n"
+ "MinGfxClk: %d\n"
+ "MaxGfxClk: %d\n",
+ smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->VcnClkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumFclkLevelsEnabled,
+ smu_dpm_clks.dpm_clks->NumMemPstatesEnabled,
+ smu_dpm_clks.dpm_clks->MinGfxClk,
+ smu_dpm_clks.dpm_clks->MaxGfxClk);
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDcfClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DcfClocks[%d] = %d\n",
+ i,
+ smu_dpm_clks.dpm_clks->DcfClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumDispClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->DispClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->DispClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocClocks[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocClocks[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumFclkLevelsEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->FclkClocks_Freq[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->FclkClocks_Freq[i]);
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->FclkClocks_Voltage[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->FclkClocks_Voltage[i]);
+ }
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumSocClkLevelsEnabled; i++)
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks->SocVoltage[%d] = %d\n",
+ i, smu_dpm_clks.dpm_clks->SocVoltage[i]);
+
+ for (i = 0; i < smu_dpm_clks.dpm_clks->NumMemPstatesEnabled; i++) {
+ DC_LOG_SMU("smu_dpm_clks.dpm_clks.MemPstateTable[%d].UClk = %d\n"
+ "smu_dpm_clks.dpm_clks->MemPstateTable[%d].MemClk= %d\n"
+ "smu_dpm_clks.dpm_clks->MemPstateTable[%d].Voltage = %d\n",
+ i, smu_dpm_clks.dpm_clks->MemPstateTable[i].UClk,
+ i, smu_dpm_clks.dpm_clks->MemPstateTable[i].MemClk,
+ i, smu_dpm_clks.dpm_clks->MemPstateTable[i].Voltage);
+ }
+
+ if (ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
+ dcn35_clk_mgr_helper_populate_bw_params(
+ &clk_mgr->base,
+ ctx->dc_bios->integrated_info,
+ smu_dpm_clks.dpm_clks);
+ }
+ }
+
+ if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ smu_dpm_clks.dpm_clks);
+
+ if (smu_dpm_clks_dcn351.dpm_clks && smu_dpm_clks_dcn351.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ smu_dpm_clks_dcn351.dpm_clks);
+
+ if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
+ bool ips_support = false;
+
+ /*avoid call pmfw at init*/
+ ips_support = dcn35_smu_get_ips_supported(&clk_mgr->base);
+ if (ips_support) {
+ ctx->dc->debug.ignore_pg = false;
+ ctx->dc->debug.disable_dpp_power_gate = false;
+ ctx->dc->debug.disable_hubp_power_gate = false;
+ ctx->dc->debug.disable_dsc_power_gate = false;
+
+ /* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */
+ if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE &&
+ ctx->dce_version != DCN_VERSION_3_51 &&
+ ((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00))
+ ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
+ } else {
+ /*let's reset the config control flag*/
+ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/
+ }
+ }
+}
+
+void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int)
+{
+ struct clk_mgr_dcn35 *clk_mgr = TO_CLK_MGR_DCN35(clk_mgr_int);
+
+ if (clk_mgr->smu_wm_set.wm_set && clk_mgr->smu_wm_set.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr_int->base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
+ clk_mgr->smu_wm_set.wm_set);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
new file mode 100644
index 000000000000..a12a9bf90806
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_CLK_MGR_H__
+#define __DCN35_CLK_MGR_H__
+#include "clk_mgr_internal.h"
+
+#define NUM_CLOCK_SOURCES 5
+
+struct dcn35_watermarks;
+
+struct dcn35_smu_watermark_set {
+ struct dcn35_watermarks *wm_set;
+ union large_integer mc_address;
+};
+
+struct dcn35_ss_info_table {
+ uint32_t ss_divider;
+ uint32_t ss_percentage[NUM_CLOCK_SOURCES];
+};
+
+struct clk_mgr_dcn35 {
+ struct clk_mgr_internal base;
+ struct dcn35_smu_watermark_set smu_wm_set;
+};
+
+bool dcn35_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b);
+void dcn35_init_clocks(struct clk_mgr *clk_mgr);
+void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower);
+
+void dcn35_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+
+void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
+void dcn351_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
+#endif //__DCN35_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
new file mode 100644
index 000000000000..604d256cb47a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+#include "dm_helpers.h"
+#include "dcn35_smu.h"
+
+#include "mp/mp_14_0_0_offset.h"
+#include "mp/mp_14_0_0_sh_mask.h"
+
+/* TODO: Use the real headers when they're correct */
+#define MP1_BASE__INST0_SEG0 0x00016000
+#define MP1_BASE__INST0_SEG1 0x0243FC00
+#define MP1_BASE__INST0_SEG2 0x00DC0000
+#define MP1_BASE__INST0_SEG3 0x00E00000
+#define MP1_BASE__INST0_SEG4 0x00E40000
+#define MP1_BASE__INST0_SEG5 0
+
+#ifdef BASE_INNER
+#undef BASE_INNER
+#endif
+
+#define BASE_INNER(seg) MP1_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define REG(reg_name) (BASE(reg##reg_name##_BASE_IDX) + reg##reg_name)
+
+#define FN(reg_name, field) \
+ FD(reg_name##__##field)
+
+#include "logger_types.h"
+#undef DC_LOGGER
+#define DC_LOGGER \
+ CTX->logger
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+#define VBIOSSMC_MSG_TestMessage 0x1
+#define VBIOSSMC_MSG_GetSmuVersion 0x2
+#define VBIOSSMC_MSG_PowerUpGfx 0x3
+#define VBIOSSMC_MSG_SetDispclkFreq 0x4
+#define VBIOSSMC_MSG_SetDprefclkFreq 0x5 //Not used. DPRef is constant
+#define VBIOSSMC_MSG_SetDppclkFreq 0x6
+#define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x7
+#define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x8
+#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x9 //Keep it in case VMIN dees not support phy clk
+#define VBIOSSMC_MSG_GetFclkFrequency 0xA
+#define VBIOSSMC_MSG_SetDisplayCount 0xB //Not used anymore
+#define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0xC //To ask PMFW turn off TMDP 48MHz refclk during display off to save power
+#define VBIOSSMC_MSG_UpdatePmeRestore 0xD
+#define VBIOSSMC_MSG_SetVbiosDramAddrHigh 0xE //Used for WM table txfr
+#define VBIOSSMC_MSG_SetVbiosDramAddrLow 0xF
+#define VBIOSSMC_MSG_TransferTableSmu2Dram 0x10
+#define VBIOSSMC_MSG_TransferTableDram2Smu 0x11
+#define VBIOSSMC_MSG_SetDisplayIdleOptimizations 0x12
+#define VBIOSSMC_MSG_GetDprefclkFreq 0x13
+#define VBIOSSMC_MSG_GetDtbclkFreq 0x14
+#define VBIOSSMC_MSG_AllowZstatesEntry 0x15
+#define VBIOSSMC_MSG_DisallowZstatesEntry 0x16
+#define VBIOSSMC_MSG_SetDtbClk 0x17
+#define VBIOSSMC_MSG_DispIPS2Entry 0x18 ///< Display IPS2 entry, DMU
+#define VBIOSSMC_MSG_DispIPS2Exit 0x19 ///< Display IPS2 exit, DMU
+#define VBIOSSMC_MSG_DisableLSdma 0x1A ///< Disable LSDMA; only sent by VBIOS
+#define VBIOSSMC_MSG_DpControllerPhyStatus 0x1B ///< Inform PMFW about the pre conditions for turning SLDO2 on/off . bit[0]==1 precondition is met, bit[1-2] are for DPPHY number
+#define VBIOSSMC_MSG_QueryIPS2Support 0x1C ///< Return 1: support; else not supported
+#define VBIOSSMC_MSG_NotifyHostRouterBW 0x1D
+#define VBIOSSMC_Message_Count 0x1E
+
+#define VBIOSSMC_Status_BUSY 0x0
+#define VBIOSSMC_Result_OK 0x1
+#define VBIOSSMC_Result_Failed 0xFF
+#define VBIOSSMC_Result_UnknownCmd 0xFE
+#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD
+#define VBIOSSMC_Result_CmdRejectedBusy 0xFC
+
+union dcn35_dpia_host_router_bw {
+ struct {
+ uint32_t hr_id : 16;
+ uint32_t bw_mbps : 16;
+ } bits;
+ uint32_t all;
+};
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because `the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn35_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t res_val = VBIOSSMC_Status_BUSY;
+
+ do {
+ res_val = REG_READ(MP1_SMN_C2PMSG_91);
+ if (res_val != VBIOSSMC_Status_BUSY)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+
+ if (clk_mgr->base.ctx->dc->debug.disable_timeout)
+ max_retries++;
+ } while (max_retries--);
+
+ return res_val;
+}
+
+static int dcn35_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ unsigned int msg_id,
+ unsigned int param)
+{
+ uint32_t result;
+
+ result = dcn35_smu_wait_for_response(clk_mgr, 10, 2000000);
+ ASSERT(result == VBIOSSMC_Result_OK);
+
+ if (result != VBIOSSMC_Result_OK) {
+ DC_LOG_WARNING("SMU response after wait: %d, msg id = %d\n", result, msg_id);
+
+ if (result == VBIOSSMC_Status_BUSY)
+ return -1;
+ }
+
+ /* First clear response register */
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
+
+ /* Set the parameter register for the SMU message, unit is Mhz */
+ REG_WRITE(MP1_SMN_C2PMSG_83, param);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
+
+ result = dcn35_smu_wait_for_response(clk_mgr, 10, 2000000);
+
+ if (result == VBIOSSMC_Result_Failed) {
+ if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu &&
+ param == TABLE_WATERMARKS)
+ DC_LOG_WARNING("Watermarks table not configured properly by SMU");
+ else
+ ASSERT(0);
+ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
+ DC_LOG_WARNING("SMU response after wait: %d, msg id = %d\n", result, msg_id);
+ return -1;
+ }
+
+ if (IS_SMU_TIMEOUT(result)) {
+ ASSERT(0);
+ result = dcn35_smu_wait_for_response(clk_mgr, 10, 2000000);
+ //dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
+ DC_LOG_WARNING("SMU response after wait: %d, msg id = %d\n", result, msg_id);
+ }
+
+ return REG_READ(MP1_SMN_C2PMSG_83);
+}
+
+int dcn35_smu_get_smu_version(struct clk_mgr_internal *clk_mgr)
+{
+ return dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_GetSmuVersion,
+ 0);
+}
+
+
+int dcn35_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
+{
+ int actual_dispclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dispclk_khz;
+
+ /* Unit of SMU msg parameter is Mhz */
+ actual_dispclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDispclkFreq,
+ khz_to_mhz_ceil(requested_dispclk_khz));
+
+ smu_print("requested_dispclk_khz = %d, actual_dispclk_set_mhz: %d\n", requested_dispclk_khz, actual_dispclk_set_mhz);
+ return actual_dispclk_set_mhz * 1000;
+}
+
+int dcn35_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
+{
+ int actual_dprefclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return clk_mgr->base.dprefclk_khz;
+
+ actual_dprefclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDprefclkFreq,
+ khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
+
+ /* TODO: add code for programing DP DTO, currently this is down by command table */
+
+ return actual_dprefclk_set_mhz * 1000;
+}
+
+int dcn35_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
+{
+ int actual_dcfclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dcfclk_khz;
+
+ actual_dcfclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ khz_to_mhz_ceil(requested_dcfclk_khz));
+
+ smu_print("requested_dcfclk_khz = %d, actual_dcfclk_set_mhz: %d\n", requested_dcfclk_khz, actual_dcfclk_set_mhz);
+
+ return actual_dcfclk_set_mhz * 1000;
+}
+
+int dcn35_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz)
+{
+ int actual_min_ds_dcfclk_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_min_ds_dcfclk_khz;
+
+ actual_min_ds_dcfclk_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetMinDeepSleepDcfclk,
+ khz_to_mhz_ceil(requested_min_ds_dcfclk_khz));
+
+ smu_print("requested_min_ds_dcfclk_khz = %d, actual_min_ds_dcfclk_mhz: %d\n", requested_min_ds_dcfclk_khz, actual_min_ds_dcfclk_mhz);
+
+ return actual_min_ds_dcfclk_mhz * 1000;
+}
+
+int dcn35_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz)
+{
+ int actual_dppclk_set_mhz = -1;
+
+ if (!clk_mgr->smu_present)
+ return requested_dpp_khz;
+
+ actual_dppclk_set_mhz = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDppclkFreq,
+ khz_to_mhz_ceil(requested_dpp_khz));
+
+ smu_print("requested_dpp_khz = %d, actual_dppclk_set_mhz: %d\n", requested_dpp_khz, actual_dppclk_set_mhz);
+
+ return actual_dppclk_set_mhz * 1000;
+}
+
+void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info)
+{
+ if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
+ return;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ //TODO: Work with smu team to define optimization options.
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info);
+ smu_print("%s: VBIOSSMC_MSG_SetDisplayIdleOptimizations idle_info = %x\n", __func__, idle_info);
+}
+
+void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ union display_idle_optimization_u idle_info = { 0 };
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (enable) {
+ idle_info.idle_info.df_request_disabled = 1;
+ idle_info.idle_info.phy_ref_clk_off = 1;
+ }
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDisplayIdleOptimizations,
+ idle_info.data);
+ smu_print("%s smu_enable_phy_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0);
+}
+
+void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_UpdatePmeRestore,
+ 0);
+ smu_print("%s: SMC_MSG_UpdatePmeRestore\n", __func__);
+}
+
+void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrHigh, addr_high);
+}
+
+void dcn35_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_SetVbiosDramAddrLow, addr_low);
+}
+
+void dcn35_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableSmu2Dram, TABLE_DPMCLOCKS);
+}
+
+void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS);
+}
+
+void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
+{
+ unsigned int msg_id, param, retv;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ switch (support) {
+
+ case DCN_ZSTATE_SUPPORT_ALLOW:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10) | (1 << 9) | (1 << 8);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW, param = 0x%x\n", __func__, param);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_DISALLOW:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = 0;
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg_id = DISALLOW, param = 0x%x\n", __func__, param);
+ break;
+
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z10_ONLY, param = 0x%x\n", __func__, param);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 10) | (1 << 8);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_Z10_ONLY, param = 0x%x\n", __func__, param);
+ break;
+
+ case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = (1 << 8);
+ smu_print("%s: SMC_MSG_AllowZstatesEntry msg = ALLOW_Z8_ONLY, param = 0x%x\n", __func__, param);
+ break;
+
+ default: //DCN_ZSTATE_SUPPORT_UNKNOWN
+ msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+ param = 0;
+ break;
+ }
+
+
+ retv = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ msg_id,
+ param);
+ smu_print("%s: msg_id = %d, param = 0x%x, return = 0x%x\n", __func__, msg_id, param, retv);
+}
+
+int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr)
+{
+ int dprefclk;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ dprefclk = dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_GetDprefclkFreq,
+ 0);
+
+ smu_print("%s: SMU DPREF clk = %d mhz\n", __func__, dprefclk);
+ return dprefclk * 1000;
+}
+
+int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr)
+{
+ int dtbclk;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ dtbclk = dcn35_smu_send_msg_with_param(clk_mgr,
+ VBIOSSMC_MSG_GetDtbclkFreq,
+ 0);
+
+ smu_print("%s: get_dtbclk = %dmhz\n", __func__, dtbclk);
+ return dtbclk * 1000;
+}
+/* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
+void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_SetDtbClk,
+ enable);
+ smu_print("%s: smu_set_dtbclk = %d\n", __func__, enable ? 1 : 0);
+}
+
+void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown,
+ enable);
+ smu_print("%s: smu_enable_48mhz_tmdp_refclk_pwrdwn = %d\n", __func__, enable ? 1 : 0);
+}
+
+int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr)
+{
+ int retv;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ retv = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_DispIPS2Exit,
+ 0);
+ smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv);
+ return retv;
+}
+
+int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr)
+{
+ int retv;
+
+ if (!clk_mgr->smu_present)
+ return 0;
+
+ retv = dcn35_smu_send_msg_with_param(
+ clk_mgr,
+ VBIOSSMC_MSG_QueryIPS2Support,
+ 0);
+
+ //smu_print("%s: VBIOSSMC_MSG_QueryIPS2Support return = %x\n", __func__, retv);
+ return retv;
+}
+
+void dcn35_smu_notify_host_router_bw(struct clk_mgr_internal *clk_mgr, uint32_t hr_id, uint32_t bw_kbps)
+{
+ union dcn35_dpia_host_router_bw msg_data = { 0 };
+
+ msg_data.bits.hr_id = hr_id;
+ msg_data.bits.bw_mbps = bw_kbps / 1000;
+
+ dcn35_smu_send_msg_with_param(clk_mgr, VBIOSSMC_MSG_NotifyHostRouterBW, msg_data.all);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h
new file mode 100644
index 000000000000..ab9d21ba0c43
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_35_SMU_H_
+#define DAL_DC_35_SMU_H_
+
+#include "os_types.h"
+
+#ifndef PMFW_DRIVER_IF_H
+#define PMFW_DRIVER_IF_H
+#define PMFW_DRIVER_IF_VERSION 4
+
+typedef enum {
+ DSPCLK_DCFCLK = 0,
+ DSPCLK_DISPCLK,
+ DSPCLK_PIXCLK,
+ DSPCLK_PHYCLK,
+ DSPCLK_COUNT,
+} DSPCLK_e;
+
+typedef struct {
+ uint16_t Freq; // in MHz
+ uint16_t Vid; // min voltage in SVI3 VID
+} DisplayClockTable_t;
+
+typedef struct {
+ uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz)
+ uint16_t MinMclk;
+ uint16_t MaxMclk;
+
+ uint8_t WmSetting;
+ uint8_t WmType; // Used for normal pstate change or memory retraining
+ uint8_t Padding[2];
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+#define WM_PSTATE_CHG 0
+#define WM_RETRAINING 1
+
+typedef enum {
+ WM_SOCCLK = 0,
+ WM_DCFCLK,
+ WM_COUNT,
+} WM_CLOCK_e;
+
+typedef struct {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+
+ uint32_t MmHubPadding[7]; // SMU internal use
+} Watermarks_t;
+
+#define NUM_DCFCLK_DPM_LEVELS 8
+#define NUM_DISPCLK_DPM_LEVELS 8
+#define NUM_DPPCLK_DPM_LEVELS 8
+#define NUM_SOCCLK_DPM_LEVELS 8
+#define NUM_VCN_DPM_LEVELS 8
+#define NUM_SOC_VOLTAGE_LEVELS 8
+#define NUM_VPE_DPM_LEVELS 8
+#define NUM_FCLK_DPM_LEVELS 8
+#define NUM_MEM_PSTATE_LEVELS 4
+
+typedef enum{
+ WCK_RATIO_1_1 = 0, // DDR5, Wck:ck is always 1:1;
+ WCK_RATIO_1_2,
+ WCK_RATIO_1_4,
+ WCK_RATIO_MAX
+} WCK_RATIO_e;
+
+typedef struct {
+ uint32_t UClk;
+ uint32_t MemClk;
+ uint32_t Voltage;
+ uint8_t WckRatio;
+ uint8_t Spare[3];
+} MemPstateTable_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint8_t spare[2];
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_dcn35;
+
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; // Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t Vcn0ClkLevelsEnabled; // Applies to both Vclk0 and Dclk0
+ uint8_t Vcn1ClkLevelsEnabled; // Applies to both Vclk1 and Dclk1
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_dcn351;
+
+#define TABLE_BIOS_IF 0 // Called by BIOS
+#define TABLE_WATERMARKS 1 // Called by DAL through VBIOS
+#define TABLE_CUSTOM_DPM 2 // Called by Driver
+#define TABLE_SPARE1 3
+#define TABLE_DPMCLOCKS 4 // Called by Driver
+#define TABLE_MOMENTARY_PM 5 // Called by Tools
+#define TABLE_MODERN_STDBY 6 // Called by Tools for Modern Standby Log
+#define TABLE_SMU_METRICS 7 // Called by Driver
+#define TABLE_COUNT 8
+
+#endif
+
+struct dcn35_watermarks {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
+
+ uint32_t MmHubPadding[7]; // SMU internal use
+};
+
+struct dcn35_smu_dpm_clks {
+ DpmClocks_t_dcn35 *dpm_clks;
+ union large_integer mc_address;
+};
+
+struct dcn351_smu_dpm_clks {
+ DpmClocks_t_dcn351 *dpm_clks;
+ union large_integer mc_address;
+};
+/* TODO: taken from vgh, may not be correct */
+struct display_idle_optimization {
+ unsigned int df_request_disabled : 1;
+ unsigned int phy_ref_clk_off : 1;
+ unsigned int s0i2_rdy : 1;
+ unsigned int reserved : 29;
+};
+
+union display_idle_optimization_u {
+ struct display_idle_optimization idle_info;
+ uint32_t data;
+};
+
+int dcn35_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
+int dcn35_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
+int dcn35_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
+int dcn35_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
+void dcn35_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info);
+void dcn35_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn35_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
+void dcn35_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn35_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
+void dcn35_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
+void dcn35_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+
+void dcn35_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support);
+void dcn35_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn35_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
+
+int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_get_ips_supported(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr);
+int dcn35_smu_get_dprefclk(struct clk_mgr_internal *clk_mgr);
+void dcn35_smu_notify_host_router_bw(struct clk_mgr_internal *clk_mgr, uint32_t hr_id, uint32_t bw_kbps);
+
+#endif /* DAL_DC_35_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
new file mode 100644
index 000000000000..2e0d34fd7512
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef DALSMC_H
+#define DALSMC_H
+
+#define DALSMC_VERSION 0x1
+
+// SMU Response Codes:
+#define DALSMC_Result_OK 0x1
+#define DALSMC_Result_Failed 0xFF
+#define DALSMC_Result_UnknownCmd 0xFE
+#define DALSMC_Result_CmdRejectedPrereq 0xFD
+#define DALSMC_Result_CmdRejectedBusy 0xFC
+
+
+
+// Message Definitions:
+#define DALSMC_MSG_TestMessage 0x1
+#define DALSMC_MSG_GetSmuVersion 0x2
+#define DALSMC_MSG_GetDriverIfVersion 0x3
+#define DALSMC_MSG_GetMsgHeaderVersion 0x4
+#define DALSMC_MSG_SetDalDramAddrHigh 0x5
+#define DALSMC_MSG_SetDalDramAddrLow 0x6
+#define DALSMC_MSG_TransferTableSmu2Dram 0x7
+#define DALSMC_MSG_TransferTableDram2Smu 0x8
+#define DALSMC_MSG_SetHardMinByFreq 0x9
+#define DALSMC_MSG_SetHardMaxByFreq 0xA
+#define DALSMC_MSG_GetDpmFreqByIndex 0xB
+#define DALSMC_MSG_GetDcModeMaxDpmFreq 0xC
+#define DALSMC_MSG_SetMinDeepSleepDcfclk 0xD
+#define DALSMC_MSG_NumOfDisplays 0xE
+#define DALSMC_MSG_SetExternalClientDfCstateAllow 0xF
+#define DALSMC_MSG_BacoAudioD3PME 0x10
+#define DALSMC_MSG_SetFclkSwitchAllow 0x11
+#define DALSMC_MSG_SetCabForUclkPstate 0x12
+#define DALSMC_MSG_SetWorstCaseUclkLatency 0x13
+#define DALSMC_MSG_DcnExitReset 0x14
+#define DALSMC_MSG_ReturnHardMinStatus 0x15
+#define DALSMC_MSG_SetAlwaysWaitDmcubResp 0x16
+#define DALSMC_MSG_IndicateDrrStatus 0x17 // PMFW 15811
+#define DALSMC_MSG_ActiveUclkFclk 0x18
+#define DALSMC_MSG_IdleUclkFclk 0x19
+#define DALSMC_MSG_SetUclkPstateAllow 0x1A
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#define DALSMC_MSG_GetNumUmcChannels 0x1C
+#define DALSMC_Message_Count 0x1D
+
+typedef enum {
+ FCLK_SWITCH_DISALLOW,
+ FCLK_SWITCH_ALLOW,
+} FclkSwitchAllow_e;
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
new file mode 100644
index 000000000000..306016c1f109
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@@ -0,0 +1,1631 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+#include "dcn401/dcn401_clk_mgr_smu_msg.h"
+#include "dcn20/dcn20_clk_mgr.h"
+#include "dce100/dce_clk_mgr.h"
+#include "dcn31/dcn31_clk_mgr.h"
+#include "dcn32/dcn32_clk_mgr.h"
+#include "dcn401/dcn401_clk_mgr.h"
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dm_helpers.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
+#include "atomfirmware.h"
+
+#include "dcn401_smu14_driver_if.h"
+
+#include "dcn/dcn_4_1_0_offset.h"
+#include "dcn/dcn_4_1_0_sh_mask.h"
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+
+#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E37
+#define mmCLK01_CLK0_CLK0_DFS_CNTL 0x16E69
+#define mmCLK01_CLK0_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E72
+#define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E75
+#define mmCLK20_CLK2_CLK2_DFS_CNTL 0x1B051
+
+#define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL
+#define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL
+#define CLK0_CLK_PLL_REQ__FbMult_frac_MASK 0xffff0000UL
+#define CLK0_CLK_PLL_REQ__FbMult_int__SHIFT 0x00000000
+#define CLK0_CLK_PLL_REQ__PllSpineDiv__SHIFT 0x0000000c
+#define CLK0_CLK_PLL_REQ__FbMult_frac__SHIFT 0x00000010
+
+#undef FN
+#define FN(reg_name, field_name) \
+ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN401(reg_name, block, inst)\
+ .reg_name = mm ## block ## _ ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn401 = {
+ CLK_REG_LIST_DCN401()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn401 = {
+ CLK_COMMON_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn401 = {
+ CLK_COMMON_MASK_SH_LIST_DCN401(_MASK)
+};
+
+#define TO_DCN401_CLK_MGR(clk_mgr)\
+ container_of(clk_mgr, struct dcn401_clk_mgr, base)
+
+static bool dcn401_is_ppclk_dpm_enabled(struct clk_mgr_internal *clk_mgr, PPCLK_e clk)
+{
+ bool ppclk_dpm_enabled = false;
+
+ switch (clk) {
+ case PPCLK_SOCCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_socclk_levels > 1;
+ break;
+ case PPCLK_UCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_memclk_levels > 1;
+ break;
+ case PPCLK_FCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels > 1;
+ break;
+ case PPCLK_DISPCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels > 1;
+ break;
+ case PPCLK_DPPCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels > 1;
+ break;
+ case PPCLK_DPREFCLK:
+ ppclk_dpm_enabled = false;
+ break;
+ case PPCLK_DCFCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels > 1;
+ break;
+ case PPCLK_DTBCLK:
+ ppclk_dpm_enabled =
+ clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels > 1;
+ break;
+ default:
+ ppclk_dpm_enabled = false;
+ }
+
+ ppclk_dpm_enabled &= clk_mgr->smu_present;
+
+ return ppclk_dpm_enabled;
+}
+
+static bool dcn401_is_ppclk_idle_dpm_enabled(struct clk_mgr_internal *clk_mgr, PPCLK_e clk)
+{
+ bool ppclk_idle_dpm_enabled = false;
+
+ switch (clk) {
+ case PPCLK_UCLK:
+ case PPCLK_FCLK:
+ if (ASICREV_IS_GC_12_0_0_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+ clk_mgr->smu_ver >= 0x681800) {
+ ppclk_idle_dpm_enabled = true;
+ } else if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+ clk_mgr->smu_ver >= 0x661300) {
+ ppclk_idle_dpm_enabled = true;
+ }
+ break;
+ default:
+ ppclk_idle_dpm_enabled = false;
+ }
+
+ ppclk_idle_dpm_enabled &= clk_mgr->smu_present;
+
+ return ppclk_idle_dpm_enabled;
+}
+
+static bool dcn401_is_df_throttle_opt_enabled(struct clk_mgr_internal *clk_mgr)
+{
+ bool is_df_throttle_opt_enabled = false;
+
+ if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+ clk_mgr->smu_ver >= 0x663500) {
+ is_df_throttle_opt_enabled = !clk_mgr->base.ctx->dc->debug.force_subvp_df_throttle;
+ }
+
+ is_df_throttle_opt_enabled &= clk_mgr->smu_present;
+
+ return is_df_throttle_opt_enabled;
+}
+
+/* Query SMU for all clock states for a particular clock */
+static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0,
+ unsigned int *num_levels)
+{
+ unsigned int i;
+ char *entry_i = (char *)entry_0;
+
+ uint32_t ret = dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF);
+
+ if (ret & (1 << 31))
+ /* fine-grained, only min and max */
+ *num_levels = 2;
+ else
+ /* discrete, a number of fixed states */
+ /* will set num_levels to 0 on failure */
+ *num_levels = ret & 0xFF;
+
+ /* if the initial message failed, num_levels will be 0 */
+ for (i = 0; i < *num_levels && i < ARRAY_SIZE(clk_mgr->base.bw_params->clk_table.entries); i++) {
+ *((unsigned int *)entry_i) = (dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF);
+ entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]);
+ }
+}
+
+static void dcn401_build_wm_range_table(struct clk_mgr *clk_mgr)
+{
+ /* For min clocks use as reported by PM FW and report those as min */
+ uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz;
+ uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz;
+
+ /* Set A - Normal - default values */
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
+
+ /* Set B - Unused on dcn4 */
+ clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = false;
+
+ /* Set 1A - Dummy P-State - P-State latency set to "dummy p-state" value */
+ /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */
+ if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) {
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = true;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_dcfclk = 0xFFFF;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_uclk = min_uclk_mhz;
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_uclk = 0xFFFF;
+ } else {
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = false;
+ }
+
+ /* Set 1B - Unused on dcn4 */
+ clk_mgr->bw_params->wm_table.nv_entries[WM_1B].valid = false;
+}
+
+void dcn401_init_clocks(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_limit_num_entries *num_entries_per_clk;
+ unsigned int i;
+
+ if (!clk_mgr_base->bw_params)
+ return;
+
+ num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+
+ memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks));
+ clk_mgr_base->clks.p_state_change_support = true;
+ clk_mgr_base->clks.prev_p_state_change_support = true;
+ clk_mgr_base->clks.fclk_prev_p_state_change_support = true;
+ clk_mgr->smu_present = false;
+ clk_mgr->dpm_present = false;
+
+ if (!clk_mgr_base->force_smu_not_present && dcn401_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver))
+ clk_mgr->smu_present = true;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn401_smu_check_driver_if_version(clk_mgr);
+ dcn401_smu_check_msg_header_version(clk_mgr);
+
+ /* DCFCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_DCFCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz,
+ &num_entries_per_clk->num_dcfclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK);
+ if (num_entries_per_clk->num_dcfclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dcfclk_levels - 1].dcfclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = 0;
+
+ /* SOCCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_SOCCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz,
+ &num_entries_per_clk->num_socclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK);
+ if (num_entries_per_clk->num_socclk_levels && clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_socclk_levels - 1].socclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = 0;
+
+ /* DTBCLK */
+ if (!clk_mgr->base.ctx->dc->debug.disable_dtb_ref_clk_switch) {
+ dcn401_init_single_clock(clk_mgr, PPCLK_DTBCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz,
+ &num_entries_per_clk->num_dtbclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK);
+ if (num_entries_per_clk->num_dtbclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dtbclk_levels - 1].dtbclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = 0;
+ }
+
+ /* DISPCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_DISPCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz,
+ &num_entries_per_clk->num_dispclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK);
+ if (num_entries_per_clk->num_dispclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dispclk_levels - 1].dispclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 0;
+
+ /* DPPCLK */
+ dcn401_init_single_clock(clk_mgr, PPCLK_DPPCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].dppclk_mhz,
+ &num_entries_per_clk->num_dppclk_levels);
+
+ if (num_entries_per_clk->num_dcfclk_levels &&
+ num_entries_per_clk->num_dtbclk_levels &&
+ num_entries_per_clk->num_dispclk_levels)
+ clk_mgr->dpm_present = true;
+
+ if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) {
+ for (i = 0; i < num_entries_per_clk->num_dispclk_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
+ < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz))
+ clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
+ = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz);
+ }
+
+ if (clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz) {
+ for (i = 0; i < num_entries_per_clk->num_dppclk_levels; i++)
+ if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz
+ < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz))
+ clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz
+ = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz);
+ }
+
+ /* Get UCLK, update bounding box */
+ clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base);
+
+ /* WM range table */
+ dcn401_build_wm_range_table(clk_mgr_base);
+}
+
+bool dcn401_is_dc_mode_present(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->smu_present && clk_mgr->dpm_present &&
+ ((clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_fclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz) ||
+ (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_socclk_levels &&
+ clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz));
+}
+
+static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
+ struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t dprefclk_did = 0;
+ uint32_t dcfclk_did = 0;
+ uint32_t dtbclk_did = 0;
+ uint32_t dispclk_did = 0;
+ uint32_t dppclk_did = 0;
+ uint32_t fclk_did = 0;
+ uint32_t target_div = 0;
+
+ /* DFS Slice 0 is used for DISPCLK */
+ dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL);
+ /* DFS Slice 1 is used for DPPCLK */
+ dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL);
+ /* DFS Slice 2 is used for DPREFCLK */
+ dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL);
+ /* DFS Slice 3 is used for DCFCLK */
+ dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL);
+ /* DFS Slice 4 is used for DTBCLK */
+ dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL);
+ /* DFS Slice _ is used for FCLK */
+ fclk_did = REG_READ(CLK2_CLK2_DFS_CNTL);
+
+ /* Convert DISPCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dispclk_did);
+ //Get dispclk in khz
+ regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DISPCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dppclk_did);
+ //Get dppclk in khz
+ regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DPREFCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dprefclk_did);
+ //Get dprefclk in khz
+ regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DCFCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dcfclk_did);
+ //Get dcfclk in khz
+ regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DTBCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(dtbclk_did);
+ //Get dtbclk in khz
+ regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+
+ /* Convert DTBCLK DFS Slice DID to divider*/
+ target_div = dentist_get_divider_from_did(fclk_did);
+ //Get fclk in khz
+ regs_and_bypass->fclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz) / target_div;
+}
+
+static bool dcn401_check_native_scaling(struct pipe_ctx *pipe)
+{
+ bool is_native_scaling = false;
+ int width = pipe->plane_state->src_rect.width;
+ int height = pipe->plane_state->src_rect.height;
+
+ if (pipe->stream->timing.h_addressable == width &&
+ pipe->stream->timing.v_addressable == height &&
+ pipe->plane_state->dst_rect.width == width &&
+ pipe->plane_state->dst_rect.height == height)
+ is_native_scaling = true;
+
+ return is_native_scaling;
+}
+
+static void dcn401_auto_dpm_test_log(
+ struct dc_clocks *new_clocks,
+ struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context)
+{
+ unsigned int mall_ss_size_bytes;
+ int dramclk_khz_override, fclk_khz_override, num_fclk_levels;
+
+ struct pipe_ctx *pipe_ctx_list[MAX_PIPES];
+ int active_pipe_count = 0;
+
+ for (int i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+ pipe_ctx_list[active_pipe_count] = pipe_ctx;
+ active_pipe_count++;
+ }
+ }
+
+ msleep(5);
+
+ mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes;
+
+ struct clk_log_info log_info = {0};
+ struct clk_state_registers_and_bypass clk_register_dump;
+
+ dcn401_dump_clk_registers(&clk_register_dump, &clk_mgr->base, &log_info);
+
+ // Overrides for these clocks in case there is no p_state change support
+ dramclk_khz_override = new_clocks->dramclk_khz;
+ fclk_khz_override = new_clocks->fclk_khz;
+
+ num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1;
+
+ if (!new_clocks->p_state_change_support)
+ dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000;
+
+ if (!new_clocks->fclk_p_state_change_support)
+ fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000;
+
+
+ ////////////////////////////////////////////////////////////////////////////
+ // IMPORTANT: When adding more clocks to these logs, do NOT put a newline
+ // anywhere other than at the very end of the string.
+ //
+ // Formatting example (make sure to have " - " between each entry):
+ //
+ // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n"
+ ////////////////////////////////////////////////////////////////////////////
+ if (active_pipe_count > 0 &&
+ new_clocks->dramclk_khz > 0 &&
+ new_clocks->fclk_khz > 0 &&
+ new_clocks->dcfclk_khz > 0 &&
+ new_clocks->dppclk_khz > 0) {
+
+ uint32_t pix_clk_list[MAX_PIPES] = {0};
+ int p_state_list[MAX_PIPES] = {0};
+ int disp_src_width_list[MAX_PIPES] = {0};
+ int disp_src_height_list[MAX_PIPES] = {0};
+ uint64_t disp_src_refresh_list[MAX_PIPES] = {0};
+ bool is_scaled_list[MAX_PIPES] = {0};
+
+ for (int i = 0; i < active_pipe_count; i++) {
+ struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i];
+ uint64_t refresh_rate;
+
+ pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz;
+ p_state_list[i] = curr_pipe_ctx->p_state_type;
+
+ refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 +
+ curr_pipe_ctx->stream->timing.v_total
+ * (uint64_t) curr_pipe_ctx->stream->timing.h_total - (uint64_t)1);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total);
+ refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total);
+ disp_src_refresh_list[i] = refresh_rate;
+
+ if (curr_pipe_ctx->plane_state) {
+ is_scaled_list[i] = !(dcn401_check_native_scaling(curr_pipe_ctx));
+ disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width;
+ disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height;
+ }
+ }
+
+ DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - "
+ "dcfclk:%d - dppclk:%d - dispclk_hw:%d - "
+ "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - "
+ "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - "
+ "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - "
+ "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - "
+ "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - "
+ "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - "
+ "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - "
+ "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n",
+ dramclk_khz_override,
+ fclk_khz_override,
+ new_clocks->dcfclk_khz,
+ new_clocks->dppclk_khz,
+ clk_register_dump.dispclk,
+ clk_register_dump.dppclk,
+ clk_register_dump.dprefclk,
+ clk_register_dump.dcfclk,
+ clk_register_dump.dtbclk,
+ clk_register_dump.fclk,
+ pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2],
+ mall_ss_size_bytes,
+ p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3],
+ disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0],
+ disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1],
+ disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2],
+ disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]);
+ }
+}
+
+static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context,
+ int ref_dtbclk_khz)
+{
+ int i;
+ struct dccg *dccg = clk_mgr->dccg;
+ struct pipe_ctx *otg_master;
+ bool use_hpo_encoder;
+
+
+ for (i = 0; i < context->stream_count; i++) {
+ otg_master = resource_get_otg_master_for_stream(
+ &context->res_ctx, context->streams[i]);
+ ASSERT(otg_master);
+ ASSERT(otg_master->clock_source);
+ ASSERT(otg_master->clock_source->funcs->program_pix_clk);
+ ASSERT(otg_master->stream_res.pix_clk_params.controller_id >= CONTROLLER_ID_D0);
+
+ use_hpo_encoder = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(otg_master);
+ if (!use_hpo_encoder)
+ continue;
+
+ if (otg_master->stream_res.pix_clk_params.controller_id > CONTROLLER_ID_UNDEFINED)
+ otg_master->clock_source->funcs->program_pix_clk(
+ otg_master->clock_source,
+ &otg_master->stream_res.pix_clk_params,
+ dccg->ctx->dc->link_srv->dp_get_encoding_format(
+ &otg_master->link_config.dp_link_settings),
+ &otg_master->pll_settings);
+ }
+}
+
+static void dcn401_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context, bool safe_to_lower, int ref_dppclk_khz)
+{
+ int i;
+
+ clk_mgr->dccg->ref_dppclk = ref_dppclk_khz;
+ for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
+ int dpp_inst = 0, dppclk_khz, prev_dppclk_khz;
+
+ dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz;
+
+ if (context->res_ctx.pipe_ctx[i].plane_res.dpp)
+ dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst;
+ else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz == 0) {
+ /* dpp == NULL && dppclk_khz == 0 is valid because of pipe harvesting.
+ * In this case just continue in loop
+ */
+ continue;
+ } else if (!context->res_ctx.pipe_ctx[i].plane_res.dpp && dppclk_khz > 0) {
+ /* The software state is not valid if dpp resource is NULL and
+ * dppclk_khz > 0.
+ */
+ ASSERT(false);
+ continue;
+ }
+
+ prev_dppclk_khz = clk_mgr->dccg->pipe_dppclk_khz[i];
+
+ if (safe_to_lower || prev_dppclk_khz < dppclk_khz)
+ clk_mgr->dccg->funcs->update_dpp_dto(
+ clk_mgr->dccg, dpp_inst, dppclk_khz);
+ }
+}
+
+static int dcn401_set_hard_min_by_freq_optimized(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, int requested_clk_khz)
+{
+ if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, clk))
+ return 0;
+
+ /*
+ * SMU set hard min interface takes requested clock in mhz and return
+ * actual clock configured in khz. If we floor requested clk to mhz,
+ * there is a chance that the actual clock configured in khz is less
+ * than requested. If we ceil it to mhz, there is a chance that it
+ * unnecessarily dumps up to a higher dpm level, which burns more power.
+ * The solution is to set by flooring it to mhz first. If the actual
+ * clock returned is less than requested, then we will ceil the
+ * requested value to mhz and call it again.
+ */
+ int actual_clk_khz = dcn401_smu_set_hard_min_by_freq(clk_mgr, clk, khz_to_mhz_floor(requested_clk_khz));
+
+ if (actual_clk_khz < requested_clk_khz)
+ actual_clk_khz = dcn401_smu_set_hard_min_by_freq(clk_mgr, clk, khz_to_mhz_ceil(requested_clk_khz));
+
+ return actual_clk_khz;
+}
+
+static void dcn401_update_clocks_update_dentist(
+ struct clk_mgr_internal *clk_mgr,
+ struct dc_state *context)
+{
+ uint32_t new_disp_divider = 0;
+ uint32_t new_dispclk_wdivider = 0;
+ uint32_t dentist_dispclk_wdivider_readback = 0;
+ struct dc *dc = clk_mgr->base.ctx->dc;
+
+ if (clk_mgr->base.clks.dispclk_khz == 0)
+ return;
+
+ new_disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+ * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz;
+
+ new_dispclk_wdivider = dentist_get_did_from_divider(new_disp_divider);
+
+ if (dc->debug.override_dispclk_programming) {
+ REG_GET(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, &dentist_dispclk_wdivider_readback);
+
+ if (dentist_dispclk_wdivider_readback > new_dispclk_wdivider) {
+ REG_UPDATE(DENTIST_DISPCLK_CNTL,
+ DENTIST_DISPCLK_WDIVIDER, new_dispclk_wdivider);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
+ }
+ }
+
+}
+
+static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned int num_steps)
+{
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal);
+
+ unsigned int i;
+ union dcn401_clk_mgr_block_sequence_params *params;
+
+ /* execute sequence */
+ for (i = 0; i < num_steps; i++) {
+ params = &clk_mgr401->block_sequence[i].params;
+
+ switch (clk_mgr401->block_sequence[i].func) {
+ case CLK_MGR401_READ_CLOCKS_FROM_DENTIST:
+ dcn2_read_clocks_from_hw_dentist(clk_mgr_base);
+ break;
+ case CLK_MGR401_UPDATE_NUM_DISPLAYS:
+ dcn401_smu_set_num_of_displays(clk_mgr_internal,
+ params->update_num_displays_params.num_displays);
+ break;
+ case CLK_MGR401_UPDATE_HARDMIN_PPCLK:
+ if (params->update_hardmin_params.response)
+ *params->update_hardmin_params.response = dcn401_smu_set_hard_min_by_freq(
+ clk_mgr_internal,
+ params->update_hardmin_params.ppclk,
+ params->update_hardmin_params.freq_mhz);
+ else
+ dcn401_smu_set_hard_min_by_freq(clk_mgr_internal,
+ params->update_hardmin_params.ppclk,
+ params->update_hardmin_params.freq_mhz);
+ break;
+ case CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED:
+ if (params->update_hardmin_optimized_params.response)
+ *params->update_hardmin_optimized_params.response = dcn401_set_hard_min_by_freq_optimized(
+ clk_mgr_internal,
+ params->update_hardmin_optimized_params.ppclk,
+ params->update_hardmin_optimized_params.freq_khz);
+ else
+ dcn401_set_hard_min_by_freq_optimized(clk_mgr_internal,
+ params->update_hardmin_optimized_params.ppclk,
+ params->update_hardmin_optimized_params.freq_khz);
+ break;
+ case CLK_MGR401_UPDATE_ACTIVE_HARDMINS:
+ dcn401_smu_set_active_uclk_fclk_hardmin(
+ clk_mgr_internal,
+ params->update_idle_hardmin_params.uclk_mhz,
+ params->update_idle_hardmin_params.fclk_mhz);
+ break;
+ case CLK_MGR401_UPDATE_IDLE_HARDMINS:
+ dcn401_smu_set_idle_uclk_fclk_hardmin(
+ clk_mgr_internal,
+ params->update_idle_hardmin_params.uclk_mhz,
+ params->update_idle_hardmin_params.fclk_mhz);
+ break;
+ case CLK_MGR401_UPDATE_SUBVP_HARDMINS:
+ dcn401_smu_set_subvp_uclk_fclk_hardmin(
+ clk_mgr_internal,
+ params->update_idle_hardmin_params.uclk_mhz,
+ params->update_idle_hardmin_params.fclk_mhz);
+ break;
+ case CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK:
+ dcn401_smu_set_min_deep_sleep_dcef_clk(
+ clk_mgr_internal,
+ params->update_deep_sleep_dcfclk_params.freq_mhz);
+ break;
+ case CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT:
+ dcn401_smu_send_fclk_pstate_message(
+ clk_mgr_internal,
+ params->update_pstate_support_params.support);
+ break;
+ case CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT:
+ dcn401_smu_send_uclk_pstate_message(
+ clk_mgr_internal,
+ params->update_pstate_support_params.support);
+ break;
+ case CLK_MGR401_UPDATE_CAB_FOR_UCLK:
+ dcn401_smu_send_cab_for_uclk_message(
+ clk_mgr_internal,
+ params->update_cab_for_uclk_params.num_ways);
+ break;
+ case CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK:
+ dcn401_smu_wait_for_dmub_ack_mclk(
+ clk_mgr_internal,
+ params->update_wait_for_dmub_ack_params.enable);
+ break;
+ case CLK_MGR401_INDICATE_DRR_STATUS:
+ dcn401_smu_indicate_drr_status(
+ clk_mgr_internal,
+ params->indicate_drr_status_params.mod_drr_for_pstate);
+ break;
+ case CLK_MGR401_UPDATE_DPPCLK_DTO:
+ dcn401_update_clocks_update_dpp_dto(
+ clk_mgr_internal,
+ params->update_dppclk_dto_params.context,
+ params->update_dppclk_dto_params.safe_to_lower,
+ *params->update_dppclk_dto_params.ref_dppclk_khz);
+ break;
+ case CLK_MGR401_UPDATE_DTBCLK_DTO:
+ dcn401_update_clocks_update_dtb_dto(
+ clk_mgr_internal,
+ params->update_dtbclk_dto_params.context,
+ *params->update_dtbclk_dto_params.ref_dtbclk_khz);
+ break;
+ case CLK_MGR401_UPDATE_DENTIST:
+ dcn401_update_clocks_update_dentist(
+ clk_mgr_internal,
+ params->update_dentist_params.context);
+ break;
+ case CLK_MGR401_UPDATE_PSR_WAIT_LOOP:
+ params->update_psr_wait_loop_params.dmcu->funcs->set_psr_wait_loop(
+ params->update_psr_wait_loop_params.dmcu,
+ params->update_psr_wait_loop_params.wait);
+ break;
+ default:
+ /* this should never happen */
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+ }
+}
+
+static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
+ struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ struct dc_clocks *new_clocks,
+ bool safe_to_lower)
+{
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal);
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence;
+ bool enter_display_off = false;
+ bool update_active_fclk = false;
+ bool update_active_uclk = false;
+ bool update_idle_fclk = false;
+ bool update_idle_uclk = false;
+ bool update_subvp_prefetch_dramclk = false;
+ bool update_subvp_prefetch_fclk = false;
+ bool is_idle_dpm_enabled = dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
+ dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK) &&
+ dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
+ dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_FCLK);
+ bool is_df_throttle_opt_enabled = is_idle_dpm_enabled &&
+ dcn401_is_df_throttle_opt_enabled(clk_mgr_internal);
+ int total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context);
+ int active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz);
+ int active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz);
+ int idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz);
+ int idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz);
+ int subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+ int subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
+
+ unsigned int num_steps = 0;
+
+ int display_count;
+ bool fclk_p_state_change_support, uclk_p_state_change_support;
+
+ /* CLK_MGR401_UPDATE_NUM_DISPLAYS */
+ if (clk_mgr_internal->smu_present) {
+ display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
+
+ if (display_count == 0)
+ enter_display_off = true;
+
+ if (enter_display_off == safe_to_lower) {
+ block_sequence[num_steps].params.update_num_displays_params.num_displays = display_count;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_NUM_DISPLAYS;
+ num_steps++;
+ }
+ }
+
+ /* CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT */
+ clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support;
+ fclk_p_state_change_support = new_clocks->fclk_p_state_change_support || (total_plane_count == 0);
+ if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support)) {
+ clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support;
+ update_active_fclk = true;
+ update_idle_fclk = true;
+
+ /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW (message not supported on DCN401)*/
+ // if (clk_mgr_base->clks.fclk_p_state_change_support) {
+ // /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */
+ // if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ // block_sequence[num_steps].params.update_pstate_support_params.support = true;
+ // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
+ // num_steps++;
+ // }
+ // }
+ }
+
+ if (!clk_mgr_base->clks.fclk_p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ /* when P-State switching disabled, set UCLK min = max */
+ idle_fclk_mhz =
+ clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1].fclk_mhz;
+ active_fclk_mhz = idle_fclk_mhz;
+ }
+
+ /* UPDATE DCFCLK */
+ if (dc->debug.force_min_dcfclk_mhz > 0)
+ new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
+ new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
+
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+ clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DCFCLK)) {
+ block_sequence[num_steps].params.update_hardmin_params.ppclk = PPCLK_DCFCLK;
+ block_sequence[num_steps].params.update_hardmin_params.freq_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz);
+ block_sequence[num_steps].params.update_hardmin_params.response = NULL;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ num_steps++;
+ }
+ }
+
+ /* CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK */
+ if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+ clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DCFCLK)) {
+ block_sequence[num_steps].params.update_deep_sleep_dcfclk_params.freq_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz);
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK;
+ num_steps++;
+ }
+ }
+
+ /* SOCCLK */
+ if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz))
+ /* We don't actually care about socclk, don't notify SMU of hard min */
+ clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz;
+
+ /* UCLK */
+ if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching &&
+ new_clocks->fw_based_mclk_switching) {
+ /* enable FAMS features */
+ clk_mgr_base->clks.fw_based_mclk_switching = new_clocks->fw_based_mclk_switching;
+
+ block_sequence[num_steps].params.update_wait_for_dmub_ack_params.enable = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK;
+ num_steps++;
+
+ block_sequence[num_steps].params.indicate_drr_status_params.mod_drr_for_pstate = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_INDICATE_DRR_STATUS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_CAB_FOR_UCLK */
+ clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways;
+ if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
+ clk_mgr_base->clks.num_ways < new_clocks->num_ways) {
+ /* increase num ways for subvp */
+ clk_mgr_base->clks.num_ways = new_clocks->num_ways;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_cab_for_uclk_params.num_ways = clk_mgr_base->clks.num_ways;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_CAB_FOR_UCLK;
+ num_steps++;
+ }
+ }
+
+ clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
+ uclk_p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0);
+ if (should_update_pstate_support(safe_to_lower, uclk_p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support)) {
+ clk_mgr_base->clks.p_state_change_support = uclk_p_state_change_support;
+ update_active_uclk = true;
+ update_idle_uclk = true;
+
+ if (clk_mgr_base->clks.p_state_change_support) {
+ /* enable UCLK switching */
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_pstate_support_params.support = true;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT;
+ num_steps++;
+ }
+ }
+ }
+
+ if (!clk_mgr_base->clks.p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ /* when P-State switching disabled, set UCLK min = max */
+ if (dc->clk_mgr->dc_mode_softmax_enabled) {
+ /* will never have the functional UCLK min above the softmax
+ * since we calculate mode support based on softmax being the max UCLK
+ * frequency.
+ */
+ active_uclk_mhz = clk_mgr_base->bw_params->dc_mode_softmax_memclk;
+ } else {
+ active_uclk_mhz = clk_mgr_base->bw_params->max_memclk_mhz;
+ }
+ idle_uclk_mhz = active_uclk_mhz;
+ }
+
+ /* Always update saved value, even if new value not set due to P-State switching unsupported */
+ if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) {
+ clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz;
+
+ if (clk_mgr_base->clks.p_state_change_support) {
+ update_active_uclk = true;
+ active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->idle_dramclk_khz, clk_mgr_base->clks.idle_dramclk_khz)) {
+ clk_mgr_base->clks.idle_dramclk_khz = new_clocks->idle_dramclk_khz;
+
+ if (clk_mgr_base->clks.p_state_change_support) {
+ update_idle_uclk = true;
+ idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_dramclk_khz, clk_mgr_base->clks.subvp_prefetch_dramclk_khz)) {
+ clk_mgr_base->clks.subvp_prefetch_dramclk_khz = new_clocks->subvp_prefetch_dramclk_khz;
+ update_subvp_prefetch_dramclk = true;
+ subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+ }
+
+ /* FCLK */
+ /* Always update saved value, even if new value not set due to P-State switching unsupported */
+ if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr_base->clks.fclk_khz)) {
+ clk_mgr_base->clks.fclk_khz = new_clocks->fclk_khz;
+
+ if (clk_mgr_base->clks.fclk_p_state_change_support) {
+ update_active_fclk = true;
+ active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->idle_fclk_khz, clk_mgr_base->clks.idle_fclk_khz)) {
+ clk_mgr_base->clks.idle_fclk_khz = new_clocks->idle_fclk_khz;
+
+ if (clk_mgr_base->clks.fclk_p_state_change_support) {
+ update_idle_fclk = true;
+ idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz);
+ }
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_fclk_khz, clk_mgr_base->clks.subvp_prefetch_fclk_khz)) {
+ clk_mgr_base->clks.subvp_prefetch_fclk_khz = new_clocks->subvp_prefetch_fclk_khz;
+ update_subvp_prefetch_fclk = true;
+ subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
+ }
+
+ /* When idle DPM is enabled, need to send active and idle hardmins separately */
+ /* CLK_MGR401_UPDATE_ACTIVE_HARDMINS */
+ if ((update_active_uclk || update_active_fclk) && is_idle_dpm_enabled) {
+ block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = active_uclk_mhz;
+ block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = active_fclk_mhz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_ACTIVE_HARDMINS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_IDLE_HARDMINS */
+ if ((update_idle_uclk || update_idle_fclk) && is_idle_dpm_enabled) {
+ block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = idle_uclk_mhz;
+ block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = idle_fclk_mhz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_IDLE_HARDMINS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_SUBVP_HARDMINS */
+ if ((update_subvp_prefetch_dramclk || update_subvp_prefetch_fclk) && is_df_throttle_opt_enabled) {
+ block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = subvp_prefetch_dramclk_mhz;
+ block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = subvp_prefetch_fclk_mhz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_SUBVP_HARDMINS;
+ num_steps++;
+ }
+
+ /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
+ if (update_active_uclk || update_idle_uclk) {
+ if (!is_idle_dpm_enabled) {
+ block_sequence[num_steps].params.update_hardmin_params.ppclk = PPCLK_UCLK;
+ block_sequence[num_steps].params.update_hardmin_params.freq_mhz = active_uclk_mhz;
+ block_sequence[num_steps].params.update_hardmin_params.response = NULL;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ num_steps++;
+ }
+
+ /* disable UCLK P-State support if needed */
+ if (!uclk_p_state_change_support &&
+ should_update_pstate_support(safe_to_lower, uclk_p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support) &&
+ dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_pstate_support_params.support = false;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT;
+ num_steps++;
+ }
+ }
+
+ /* set FCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
+ if (update_active_fclk || update_idle_fclk) {
+ /* No need to send active FCLK hardmin, automatically set based on DCFCLK */
+ // if (!is_idle_dpm_enabled) {
+ // block_sequence[*num_steps].update_hardmin_params.clk_mgr = clk_mgr;
+ // block_sequence[*num_steps].update_hardmin_params.ppclk = PPCLK_FCLK;
+ // block_sequence[*num_steps].update_hardmin_params.freq_mhz = active_fclk_mhz;
+ // block_sequence[*num_steps].update_hardmin_params.response = NULL;
+ // block_sequence[*num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ // (*num_steps)++;
+ // }
+
+ /* disable FCLK P-State support if needed (message not supported on DCN401)*/
+ // if (!fclk_p_state_change_support &&
+ // should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) &&
+ // dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ // block_sequence[num_steps].params.update_pstate_support_params.support = false;
+ // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
+ // num_steps++;
+ // }
+ }
+
+ if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching &&
+ safe_to_lower && !new_clocks->fw_based_mclk_switching) {
+ /* disable FAMS features */
+ clk_mgr_base->clks.fw_based_mclk_switching = new_clocks->fw_based_mclk_switching;
+
+ block_sequence[num_steps].params.update_wait_for_dmub_ack_params.enable = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK;
+ num_steps++;
+
+ block_sequence[num_steps].params.indicate_drr_status_params.mod_drr_for_pstate = clk_mgr_base->clks.fw_based_mclk_switching;
+ block_sequence[num_steps].func = CLK_MGR401_INDICATE_DRR_STATUS;
+ num_steps++;
+ }
+
+ /* CLK_MGR401_UPDATE_CAB_FOR_UCLK */
+ if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
+ safe_to_lower && clk_mgr_base->clks.num_ways > new_clocks->num_ways) {
+ /* decrease num ways for subvp */
+ clk_mgr_base->clks.num_ways = new_clocks->num_ways;
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK)) {
+ block_sequence[num_steps].params.update_cab_for_uclk_params.num_ways = clk_mgr_base->clks.num_ways;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_CAB_FOR_UCLK;
+ num_steps++;
+ }
+ }
+
+ return num_steps;
+}
+
+static unsigned int dcn401_build_update_display_clocks_sequence(
+ struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ struct dc_clocks *new_clocks,
+ bool safe_to_lower)
+{
+ struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal);
+ struct dc *dc = clk_mgr_base->ctx->dc;
+ struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
+ struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence;
+ bool force_reset = false;
+ bool update_dispclk = false;
+ bool update_dppclk = false;
+ bool dppclk_lowered = false;
+
+ unsigned int num_steps = 0;
+
+ /* CLK_MGR401_READ_CLOCKS_FROM_DENTIST */
+ if (clk_mgr_base->clks.dispclk_khz == 0 ||
+ (dc->debug.force_clock_mode & 0x1)) {
+ /* This is from resume or boot up, if forced_clock cfg option used,
+ * we bypass program dispclk and DPPCLK, but need set them for S3.
+ * Force_clock_mode 0x1: force reset the clock even it is the same clock
+ * as long as it is in Passive level.
+ */
+ force_reset = true;
+
+ clk_mgr_base->clks.dispclk_khz = clk_mgr_base->boot_snapshot.dispclk;
+ clk_mgr_base->clks.actual_dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+
+ clk_mgr_base->clks.dppclk_khz = clk_mgr_base->boot_snapshot.dppclk;
+ clk_mgr_base->clks.actual_dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+ }
+
+ /* DTBCLK */
+ if (!new_clocks->dtbclk_en && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DTBCLK)) {
+ new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
+ }
+
+ /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
+ if (!dc->debug.disable_dtb_ref_clk_switch &&
+ should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000) && //TODO these should be ceiled
+ dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DTBCLK)) {
+ /* DCCG requires KHz precision for DTBCLK */
+ block_sequence[num_steps].params.update_hardmin_params.ppclk = PPCLK_DTBCLK;
+ block_sequence[num_steps].params.update_hardmin_params.freq_mhz = khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz);
+ block_sequence[num_steps].params.update_hardmin_params.response = &clk_mgr_base->clks.ref_dtbclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK;
+ num_steps++;
+
+ /* Update DTO in DCCG */
+ block_sequence[num_steps].params.update_dtbclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dtbclk_dto_params.ref_dtbclk_khz = &clk_mgr_base->clks.ref_dtbclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DTBCLK_DTO;
+ num_steps++;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) {
+ if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz)
+ dppclk_lowered = true;
+
+ clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
+ clk_mgr_base->clks.actual_dppclk_khz = new_clocks->dppclk_khz;
+
+ update_dppclk = true;
+ }
+
+ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
+ clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
+
+ block_sequence[num_steps].params.update_hardmin_optimized_params.ppclk = PPCLK_DISPCLK;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.freq_khz = clk_mgr_base->clks.dispclk_khz;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.response = &clk_mgr_base->clks.actual_dispclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED;
+ num_steps++;
+
+ update_dispclk = true;
+ }
+
+ if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
+ if (dppclk_lowered) {
+ /* if clock is being lowered, increase DTO before lowering refclk */
+ block_sequence[num_steps].params.update_dppclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dppclk_dto_params.ref_dppclk_khz = &clk_mgr_base->clks.dppclk_khz;
+ block_sequence[num_steps].params.update_dppclk_dto_params.safe_to_lower = safe_to_lower;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DPPCLK_DTO;
+ num_steps++;
+
+ block_sequence[num_steps].params.update_dentist_params.context = context;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DENTIST;
+ num_steps++;
+
+ if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DPPCLK)) {
+ block_sequence[num_steps].params.update_hardmin_optimized_params.ppclk = PPCLK_DPPCLK;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.freq_khz = clk_mgr_base->clks.dppclk_khz;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.response = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED;
+ num_steps++;
+
+ block_sequence[num_steps].params.update_dppclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dppclk_dto_params.ref_dppclk_khz = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].params.update_dppclk_dto_params.safe_to_lower = safe_to_lower;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DPPCLK_DTO;
+ num_steps++;
+ }
+ } else {
+ /* if clock is being raised, increase refclk before lowering DTO */
+ if (update_dppclk && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_DPPCLK)) {
+ block_sequence[num_steps].params.update_hardmin_optimized_params.ppclk = PPCLK_DPPCLK;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.freq_khz = clk_mgr_base->clks.dppclk_khz;
+ block_sequence[num_steps].params.update_hardmin_optimized_params.response = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED;
+ num_steps++;
+ }
+
+ if (update_dppclk || update_dispclk) {
+ block_sequence[num_steps].params.update_dentist_params.context = context;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DENTIST;
+ num_steps++;
+ }
+
+ block_sequence[num_steps].params.update_dppclk_dto_params.context = context;
+ block_sequence[num_steps].params.update_dppclk_dto_params.ref_dppclk_khz = &clk_mgr_base->clks.actual_dppclk_khz;
+ block_sequence[num_steps].params.update_dppclk_dto_params.safe_to_lower = safe_to_lower;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_DPPCLK_DTO;
+ num_steps++;
+ }
+ }
+
+ if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+ /*update dmcu for wait_loop count*/
+ block_sequence[num_steps].params.update_psr_wait_loop_params.dmcu = dmcu;
+ block_sequence[num_steps].params.update_psr_wait_loop_params.wait = clk_mgr_base->clks.dispclk_khz / 1000 / 7;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_PSR_WAIT_LOOP;
+ num_steps++;
+ }
+
+ return num_steps;
+}
+
+static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base,
+ struct dc_state *context,
+ bool safe_to_lower)
+{
+ struct dc *dc = clk_mgr_base->ctx->dc;
+
+ unsigned int num_steps = 0;
+
+ /* build bandwidth related clocks update sequence */
+ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base,
+ context,
+ &context->bw_ctx.bw.dcn.clk,
+ safe_to_lower);
+
+ /* execute sequence */
+ dcn401_execute_block_sequence(clk_mgr_base, num_steps);
+
+ /* build display related clocks update sequence */
+ num_steps = dcn401_build_update_display_clocks_sequence(clk_mgr_base,
+ context,
+ &context->bw_ctx.bw.dcn.clk,
+ safe_to_lower);
+
+ /* execute sequence */
+ dcn401_execute_block_sequence(clk_mgr_base, num_steps);
+
+ if (dc->config.enable_auto_dpm_test_logs)
+ dcn401_auto_dpm_test_log(&context->bw_ctx.bw.dcn.clk, TO_CLK_MGR_INTERNAL(clk_mgr_base), context);
+
+}
+
+
+static uint32_t dcn401_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
+{
+ struct fixed31_32 pll_req;
+ uint32_t pll_req_reg = 0;
+
+ /* get FbMult value */
+ pll_req_reg = REG_READ(CLK0_CLK_PLL_REQ);
+
+ /* set up a fixed-point number
+ * this works because the int part is on the right edge of the register
+ * and the frac part is on the left edge
+ */
+ pll_req = dc_fixpt_from_int(pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_int);
+ pll_req.value |= pll_req_reg & clk_mgr->clk_mgr_mask->FbMult_frac;
+
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+ return dc_fixpt_floor(pll_req);
+}
+
+static void dcn401_clock_read_ss_info(struct clk_mgr_internal *clk_mgr)
+{
+ struct dc_bios *bp = clk_mgr->base.ctx->dc_bios;
+ int ss_info_num = bp->funcs->get_ss_entry_number(
+ bp, AS_SIGNAL_TYPE_GPU_PLL);
+
+ if (ss_info_num) {
+ struct spread_spectrum_info info = { { 0 } };
+ enum bp_result result = bp->funcs->get_spread_spectrum_info(
+ bp, AS_SIGNAL_TYPE_GPU_PLL, 0, &info);
+
+ /* SSInfo.spreadSpectrumPercentage !=0 would be sign
+ * that SS is enabled
+ */
+ if (result == BP_RESULT_OK &&
+ info.spread_spectrum_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = info.spread_percentage_divider;
+
+ if (info.type.CENTER_MODE == 0) {
+ /* Currently for DP Reference clock we
+ * need only SS percentage for
+ * downspread
+ */
+ clk_mgr->dprefclk_ss_percentage =
+ info.spread_spectrum_percentage;
+ }
+ }
+ }
+}
+static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
+{
+ unsigned int i;
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ WatermarksExternal_t *table = (WatermarksExternal_t *) clk_mgr->wm_range_table;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ if (!table)
+ return;
+
+ memset(table, 0, sizeof(*table));
+
+ /* collect valid ranges, place in pmfw table */
+ for (i = 0; i < WM_SET_COUNT; i++)
+ if (clk_mgr->base.bw_params->wm_table.nv_entries[i].valid) {
+ table->Watermarks.WatermarkRow[i].WmSetting = i;
+ table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type;
+ }
+ dcn401_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32);
+ dcn401_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF);
+ dcn401_smu_transfer_wm_table_dram_2_smu(clk_mgr);
+}
+
+/* Set min memclk to minimum, either constrained by the current mode or DPM0 */
+static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ const struct dc *dc = clk_mgr->base.ctx->dc;
+ struct dc_state *context = dc->current_state;
+ struct dc_clocks new_clocks;
+ int num_steps;
+
+ if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK))
+ return;
+
+ /* build clock update */
+ memcpy(&new_clocks, &clk_mgr_base->clks, sizeof(struct dc_clocks));
+
+ if (current_mode) {
+ new_clocks.dramclk_khz = context->bw_ctx.bw.dcn.clk.dramclk_khz;
+ new_clocks.idle_dramclk_khz = context->bw_ctx.bw.dcn.clk.idle_dramclk_khz;
+ new_clocks.p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+ } else {
+ new_clocks.dramclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz * 1000;
+ new_clocks.idle_dramclk_khz = new_clocks.dramclk_khz;
+ new_clocks.p_state_change_support = true;
+ }
+
+ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base,
+ context,
+ &new_clocks,
+ true);
+
+ /* execute sequence */
+ dcn401_execute_block_sequence(clk_mgr_base, num_steps);
+}
+
+static int dcn401_get_hard_min_memclk(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.dramclk_khz;
+}
+
+static int dcn401_get_hard_min_fclk(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz;
+}
+
+/* Get current memclk states, update bounding box */
+static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+ unsigned int num_levels;
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ /* Refresh memclk and fclk states */
+ dcn401_init_single_clock(clk_mgr, PPCLK_UCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz,
+ &num_entries_per_clk->num_memclk_levels);
+ if (num_entries_per_clk->num_memclk_levels) {
+ clk_mgr_base->bw_params->max_memclk_mhz =
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz;
+ }
+
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK);
+ if (num_entries_per_clk->num_memclk_levels && clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = 0;
+ clk_mgr_base->bw_params->dc_mode_softmax_memclk = clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz;
+
+ dcn401_init_single_clock(clk_mgr, PPCLK_FCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz,
+ &num_entries_per_clk->num_fclk_levels);
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK);
+ if (num_entries_per_clk->num_fclk_levels && clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz ==
+ clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_fclk_levels - 1].fclk_mhz)
+ clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = 0;
+
+ num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels);
+
+ clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1;
+
+ if (clk_mgr->dpm_present && !num_levels)
+ clk_mgr->dpm_present = false;
+
+ clk_mgr_base->bw_params->num_channels = dcn401_smu_get_num_of_umc_channels(clk_mgr);
+ if (clk_mgr_base->ctx->dc_bios) {
+ /* use BIOS values if none provided by PMFW */
+ if (clk_mgr_base->bw_params->num_channels == 0) {
+ clk_mgr_base->bw_params->num_channels = clk_mgr_base->ctx->dc_bios->vram_info.num_chans;
+ }
+ clk_mgr_base->bw_params->dram_channel_width_bytes = clk_mgr_base->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+ }
+
+ /* Refresh bounding box */
+ clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
+ clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
+}
+
+static bool dcn401_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b)
+{
+ if (a->dispclk_khz != b->dispclk_khz)
+ return false;
+ else if (a->dppclk_khz != b->dppclk_khz)
+ return false;
+ else if (a->dcfclk_khz != b->dcfclk_khz)
+ return false;
+ else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+ return false;
+ else if (a->dramclk_khz != b->dramclk_khz)
+ return false;
+ else if (a->p_state_change_support != b->p_state_change_support)
+ return false;
+ else if (a->fclk_p_state_change_support != b->fclk_p_state_change_support)
+ return false;
+
+ return true;
+}
+
+static void dcn401_enable_pme_wa(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ if (!clk_mgr->smu_present)
+ return;
+
+ dcn401_smu_set_pme_workaround(clk_mgr);
+}
+
+static bool dcn401_is_smu_present(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ return clk_mgr->smu_present;
+}
+
+
+static int dcn401_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ int dtb_ref_clk_khz = 0;
+
+ if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) {
+ /* DPM enabled, use currently set value */
+ dtb_ref_clk_khz = clk_mgr_base->clks.ref_dtbclk_khz;
+ } else {
+ /* DPM disabled, so use boot snapshot */
+ dtb_ref_clk_khz = clk_mgr_base->boot_snapshot.dtbclk;
+ }
+
+ return dtb_ref_clk_khz;
+}
+
+static int dcn401_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ uint32_t dispclk_wdivider;
+ int disp_divider;
+
+ REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, &dispclk_wdivider);
+ disp_divider = dentist_get_divider_from_did(dispclk_wdivider);
+
+ /* Return DISPCLK freq in Khz */
+ if (disp_divider)
+ return (DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz) / disp_divider;
+
+ return 0;
+}
+
+unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ unsigned int num_clk_levels;
+
+ switch (clk_type) {
+ case CLK_TYPE_DISPCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+ return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 :
+ clk_mgr->base.boot_snapshot.dispclk;
+ case CLK_TYPE_DPPCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels;
+ return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK) ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dppclk_mhz * 1000 :
+ clk_mgr->base.boot_snapshot.dppclk;
+ case CLK_TYPE_DSCCLK:
+ num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+ return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ?
+ clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 / 3 :
+ clk_mgr->base.boot_snapshot.dispclk / 3;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static struct clk_mgr_funcs dcn401_funcs = {
+ .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn401_get_dtb_ref_freq_khz,
+ .update_clocks = dcn401_update_clocks,
+ .dump_clk_registers = dcn401_dump_clk_registers,
+ .init_clocks = dcn401_init_clocks,
+ .notify_wm_ranges = dcn401_notify_wm_ranges,
+ .set_hard_min_memclk = dcn401_set_hard_min_memclk,
+ .get_memclk_states_from_smu = dcn401_get_memclk_states_from_smu,
+ .are_clock_states_equal = dcn401_are_clock_states_equal,
+ .enable_pme_wa = dcn401_enable_pme_wa,
+ .is_smu_present = dcn401_is_smu_present,
+ .get_dispclk_from_dentist = dcn401_get_dispclk_from_dentist,
+ .get_hard_min_memclk = dcn401_get_hard_min_memclk,
+ .get_hard_min_fclk = dcn401_get_hard_min_fclk,
+ .is_dc_mode_present = dcn401_is_dc_mode_present,
+ .get_max_clock_khz = dcn401_get_max_clock_khz,
+};
+
+struct clk_mgr_internal *dcn401_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct dccg *dccg)
+{
+ struct clk_log_info log_info = {0};
+ struct dcn401_clk_mgr *clk_mgr401 = kzalloc(sizeof(struct dcn401_clk_mgr), GFP_KERNEL);
+ struct clk_mgr_internal *clk_mgr;
+
+ if (!clk_mgr401)
+ return NULL;
+
+ clk_mgr = &clk_mgr401->base;
+ clk_mgr->base.ctx = ctx;
+ clk_mgr->base.funcs = &dcn401_funcs;
+ clk_mgr->regs = &clk_mgr_regs_dcn401;
+ clk_mgr->clk_mgr_shift = &clk_mgr_shift_dcn401;
+ clk_mgr->clk_mgr_mask = &clk_mgr_mask_dcn401;
+
+ clk_mgr->dccg = dccg;
+ clk_mgr->dfs_bypass_disp_clk = 0;
+
+ clk_mgr->dprefclk_ss_percentage = 0;
+ clk_mgr->dprefclk_ss_divider = 1000;
+ clk_mgr->ss_on_dprefclk = false;
+ clk_mgr->dfs_ref_freq_khz = 100000;
+
+ /* Changed from DCN3.2_clock_frequency doc to match
+ * dcn401_dump_clk_registers from 4 * dentist_vco_freq_khz /
+ * dprefclk DID divider
+ */
+ clk_mgr->base.dprefclk_khz = 720000; //TODO update from VBIOS
+
+ /* integer part is now VCO frequency in kHz */
+ clk_mgr->base.dentist_vco_freq_khz = dcn401_get_vco_frequency_from_reg(clk_mgr);
+
+ /* in case we don't get a value from the register, use default */
+ if (clk_mgr->base.dentist_vco_freq_khz == 0)
+ clk_mgr->base.dentist_vco_freq_khz = 4500000; //TODO Update from VBIOS
+
+ dcn401_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
+
+ if (ctx->dc->debug.disable_dtb_ref_clk_switch &&
+ clk_mgr->base.clks.ref_dtbclk_khz != clk_mgr->base.boot_snapshot.dtbclk) {
+ clk_mgr->base.clks.ref_dtbclk_khz = clk_mgr->base.boot_snapshot.dtbclk;
+ }
+
+ if (clk_mgr->base.boot_snapshot.dprefclk != 0) {
+ clk_mgr->base.dprefclk_khz = clk_mgr->base.boot_snapshot.dprefclk;
+ }
+ dcn401_clock_read_ss_info(clk_mgr);
+
+ clk_mgr->dfs_bypass_enabled = false;
+
+ clk_mgr->smu_present = false;
+
+ clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL);
+ if (!clk_mgr->base.bw_params) {
+ BREAK_TO_DEBUGGER();
+ kfree(clk_mgr401);
+ return NULL;
+ }
+
+ /* need physical address of table to give to PMFW */
+ clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx,
+ DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t),
+ &clk_mgr->wm_range_table_addr);
+ if (!clk_mgr->wm_range_table) {
+ BREAK_TO_DEBUGGER();
+ kfree(clk_mgr->base.bw_params);
+ kfree(clk_mgr401);
+ return NULL;
+ }
+
+ return &clk_mgr401->base;
+}
+
+void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr)
+{
+ kfree(clk_mgr->base.bw_params);
+
+ if (clk_mgr->wm_range_table)
+ dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ clk_mgr->wm_range_table);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
new file mode 100644
index 000000000000..97a1ce1e8a9e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DCN401_CLK_MGR_H_
+#define __DCN401_CLK_MGR_H_
+
+#define DCN401_CLK_MGR_MAX_SEQUENCE_SIZE 30
+
+union dcn401_clk_mgr_block_sequence_params {
+ struct {
+ /* inputs */
+ uint32_t num_displays;
+ } update_num_displays_params;
+ struct {
+ /* inputs */
+ uint32_t ppclk;
+ uint16_t freq_mhz;
+ /* outputs */
+ uint32_t *response;
+ } update_hardmin_params;
+ struct {
+ /* inputs */
+ uint32_t ppclk;
+ int freq_khz;
+ /* outputs */
+ uint32_t *response;
+ } update_hardmin_optimized_params;
+ struct {
+ /* inputs */
+ uint16_t uclk_mhz;
+ uint16_t fclk_mhz;
+ } update_idle_hardmin_params;
+ struct {
+ /* inputs */
+ uint16_t freq_mhz;
+ } update_deep_sleep_dcfclk_params;
+ struct {
+ /* inputs */
+ bool support;
+ } update_pstate_support_params;
+ struct {
+ /* inputs */
+ unsigned int num_ways;
+ } update_cab_for_uclk_params;
+ struct {
+ /* inputs */
+ bool enable;
+ } update_wait_for_dmub_ack_params;
+ struct {
+ /* inputs */
+ bool mod_drr_for_pstate;
+ } indicate_drr_status_params;
+ struct {
+ /* inputs */
+ struct dc_state *context;
+ int *ref_dppclk_khz;
+ bool safe_to_lower;
+ } update_dppclk_dto_params;
+ struct {
+ /* inputs */
+ struct dc_state *context;
+ int *ref_dtbclk_khz;
+ } update_dtbclk_dto_params;
+ struct {
+ /* inputs */
+ struct dc_state *context;
+ } update_dentist_params;
+ struct {
+ /* inputs */
+ struct dmcu *dmcu;
+ unsigned int wait;
+ } update_psr_wait_loop_params;
+};
+
+enum dcn401_clk_mgr_block_sequence_func {
+ CLK_MGR401_READ_CLOCKS_FROM_DENTIST,
+ CLK_MGR401_UPDATE_NUM_DISPLAYS,
+ CLK_MGR401_UPDATE_HARDMIN_PPCLK,
+ CLK_MGR401_UPDATE_HARDMIN_PPCLK_OPTIMIZED,
+ CLK_MGR401_UPDATE_ACTIVE_HARDMINS,
+ CLK_MGR401_UPDATE_IDLE_HARDMINS,
+ CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK,
+ CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT,
+ CLK_MGR401_UPDATE_UCLK_PSTATE_SUPPORT,
+ CLK_MGR401_UPDATE_CAB_FOR_UCLK,
+ CLK_MGR401_UPDATE_WAIT_FOR_DMUB_ACK,
+ CLK_MGR401_INDICATE_DRR_STATUS,
+ CLK_MGR401_UPDATE_DPPCLK_DTO,
+ CLK_MGR401_UPDATE_DTBCLK_DTO,
+ CLK_MGR401_UPDATE_DENTIST,
+ CLK_MGR401_UPDATE_PSR_WAIT_LOOP,
+ CLK_MGR401_UPDATE_SUBVP_HARDMINS,
+};
+
+struct dcn401_clk_mgr_block_sequence {
+ union dcn401_clk_mgr_block_sequence_params params;
+ enum dcn401_clk_mgr_block_sequence_func func;
+};
+
+struct dcn401_clk_mgr {
+ struct clk_mgr_internal base;
+
+ struct dcn401_clk_mgr_block_sequence block_sequence[DCN401_CLK_MGR_MAX_SEQUENCE_SIZE];
+};
+
+void dcn401_init_clocks(struct clk_mgr *clk_mgr_base);
+bool dcn401_is_dc_mode_present(struct clk_mgr *clk_mgr_base);
+
+struct clk_mgr_internal *dcn401_clk_mgr_construct(struct dc_context *ctx,
+ struct dccg *dccg);
+
+void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr);
+
+unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type);
+
+#endif /* __DCN401_CLK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
new file mode 100644
index 000000000000..3a263840893e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dcn401_clk_mgr_smu_msg.h"
+
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+
+#include "dalsmc.h"
+#include "dcn401_smu14_driver_if.h"
+
+#define mmDAL_MSG_REG 0x1628A
+#define mmDAL_ARG_REG 0x16273
+#define mmDAL_RESP_REG 0x16274
+
+#define REG(reg_name) \
+ mm ## reg_name
+
+#include "logger_types.h"
+
+#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+
+/* temporary define */
+#ifndef DALSMC_MSG_SubvpUclkFclk
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#endif
+#ifndef DALSMC_MSG_GetNumUmcChannels
+#define DALSMC_MSG_GetNumUmcChannels 0x1C
+#endif
+
+/*
+ * Function to be used instead of REG_WAIT macro because the wait ends when
+ * the register is NOT EQUAL to zero, and because the translation in msg_if.h
+ * won't work with REG_WAIT.
+ */
+static uint32_t dcn401_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
+{
+ uint32_t reg = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ } while (max_retries--);
+
+ return reg;
+}
+
+static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out)
+{
+ /* Wait for response register to be ready */
+ dcn401_smu_wait_for_response(clk_mgr, 10, 200000);
+
+ TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ /* Wait for response */
+ if (dcn401_smu_wait_for_response(clk_mgr, 10, 200000) == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx);
+ return true;
+ }
+
+ TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx);
+ return false;
+}
+
+/*
+ * Use these functions to return back delay information so we can aggregate the total
+ * delay when requesting hardmin clk
+ *
+ * dcn401_smu_wait_for_response_delay
+ * dcn401_smu_send_msg_with_param_delay
+ *
+ */
+static uint32_t dcn401_smu_wait_for_response_delay(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries, unsigned int *total_delay_us)
+{
+ uint32_t reg = 0;
+ *total_delay_us = 0;
+
+ do {
+ reg = REG_READ(DAL_RESP_REG);
+ if (reg)
+ break;
+
+ if (delay_us >= 1000)
+ msleep(delay_us/1000);
+ else if (delay_us > 0)
+ udelay(delay_us);
+ *total_delay_us += delay_us;
+ } while (max_retries--);
+
+ return reg;
+}
+
+static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mgr, uint32_t msg_id, uint32_t param_in, uint32_t *param_out, unsigned int *total_delay_us)
+{
+ unsigned int delay1_us, delay2_us;
+ *total_delay_us = 0;
+
+ /* Wait for response register to be ready */
+ dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us);
+
+ TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx);
+
+ /* Clear response register */
+ REG_WRITE(DAL_RESP_REG, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(DAL_ARG_REG, param_in);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(DAL_MSG_REG, msg_id);
+
+ /* Wait for response */
+ if (dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) {
+ if (param_out)
+ *param_out = REG_READ(DAL_ARG_REG);
+
+ *total_delay_us = delay1_us + delay2_us;
+ TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx);
+ return true;
+ }
+
+ *total_delay_us = delay1_us + 2000000;
+ TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx);
+ return false;
+}
+
+bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version)
+{
+ smu_print("SMU Get SMU version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetSmuVersion, 0, version)) {
+
+ smu_print("SMU version: %d\n", *version);
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Message output should match SMU11_DRIVER_IF_VERSION in smu11_driver_if.h */
+bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Check driver if version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDriverIfVersion, 0, &response)) {
+
+ smu_print("SMU driver if version: %d\n", response);
+
+ if (response == SMU14_DRIVER_IF_VERSION)
+ return true;
+ }
+
+ return false;
+}
+
+/* Message output should match DALSMC_VERSION in dalsmc.h */
+bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr)
+{
+ uint32_t response = 0;
+
+ smu_print("SMU Check msg header version\n");
+
+ if (dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) {
+
+ smu_print("SMU msg header version: %d\n", response);
+
+ if (response == DALSMC_VERSION)
+ return true;
+ }
+
+ return false;
+}
+
+void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support)
+{
+ smu_print("FCLK P-state support value is : %d\n", support);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetFclkSwitchAllow, support, NULL);
+}
+
+void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support)
+{
+ smu_print("UCLK P-state support value is : %d\n", support);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetUclkPstateAllow, support, NULL);
+}
+
+void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways)
+{
+ uint32_t param = (num_ways << 1) | (num_ways > 0);
+
+ dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetCabForUclkPstate, param, NULL);
+ smu_print("Numways for SubVP : %d\n", num_ways);
+}
+
+void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
+{
+ smu_print("SMU Set DRAM addr high: %d\n", addr_high);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetDalDramAddrHigh, addr_high, NULL);
+}
+
+void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
+{
+ smu_print("SMU Set DRAM addr low: %d\n", addr_low);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetDalDramAddrLow, addr_low, NULL);
+}
+
+void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
+{
+ smu_print("SMU Transfer WM table DRAM 2 SMU\n");
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS, NULL);
+}
+
+void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr)
+{
+ smu_print("SMU Set PME workaround\n");
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_BacoAudioD3PME, 0, NULL);
+}
+
+static unsigned int dcn401_smu_get_hard_min_status(struct clk_mgr_internal *clk_mgr, bool *no_timeout, unsigned int *total_delay_us)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
+ uint32_t param = 0;
+
+ *no_timeout = dcn401_smu_send_msg_with_param_delay(clk_mgr,
+ DALSMC_MSG_ReturnHardMinStatus, param, &response, total_delay_us);
+
+ smu_print("SMU Get hard min status: no_timeout %d delay %d us clk bits %x\n",
+ *no_timeout, *total_delay_us, response);
+
+ return response;
+}
+
+static bool dcn401_smu_wait_hard_min_status(struct clk_mgr_internal *clk_mgr, uint32_t ppclk)
+{
+ const unsigned int max_delay_us = 1000000;
+
+ unsigned int hardmin_status_mask = (1 << ppclk);
+ unsigned int total_delay_us = 0;
+ bool hardmin_done = false;
+
+ while (!hardmin_done && total_delay_us < max_delay_us) {
+ unsigned int hardmin_status;
+ unsigned int read_total_delay_us;
+ bool no_timeout;
+
+ if (!hardmin_done && total_delay_us > 0) {
+ /* hardmin not yet fulfilled, wait 500us and retry*/
+ udelay(500);
+ total_delay_us += 500;
+
+ smu_print("SMU Wait hard min status for %d us\n", total_delay_us);
+ }
+
+ hardmin_status = dcn401_smu_get_hard_min_status(clk_mgr, &no_timeout, &read_total_delay_us);
+ total_delay_us += read_total_delay_us;
+ hardmin_done = hardmin_status & hardmin_status_mask;
+ }
+
+ return hardmin_done;
+}
+
+/* Returns the actual frequency that was set in MHz, 0 on failure */
+unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz)
+{
+ uint32_t response = 0;
+ bool hard_min_done = false;
+
+ /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */
+ uint32_t param = (clk << 16) | freq_mhz;
+
+ smu_print("SMU Set hard min by freq: clk = %d, freq_mhz = %d MHz\n", clk, freq_mhz);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetHardMinByFreq, param, &response);
+
+ /* wait until hardmin acknowledged */
+ hard_min_done = dcn401_smu_wait_hard_min_status(clk_mgr, clk);
+ smu_print("SMU Frequency set = %d KHz hard_min_done %d\n", response, hard_min_done);
+
+ return response;
+}
+
+void dcn401_smu_wait_for_dmub_ack_mclk(struct clk_mgr_internal *clk_mgr, bool enable)
+{
+ smu_print("SMU to wait for DMCUB ack for MCLK : %d\n", enable);
+
+ dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetAlwaysWaitDmcubResp, enable ? 1 : 0, NULL);
+}
+
+void dcn401_smu_indicate_drr_status(struct clk_mgr_internal *clk_mgr, bool mod_drr_for_pstate)
+{
+ smu_print("SMU Set indicate drr status = %d\n", mod_drr_for_pstate);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_IndicateDrrStatus, mod_drr_for_pstate ? 1 : 0, NULL);
+}
+
+bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz)
+{
+ uint32_t response = 0;
+ bool success;
+
+ /* 15:0 for uclk, 32:16 for fclk */
+ uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+ smu_print("SMU Set idle hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+ success = dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_IdleUclkFclk, param, &response);
+
+ /* wait until hardmin acknowledged */
+ success &= dcn401_smu_wait_hard_min_status(clk_mgr, PPCLK_UCLK);
+ smu_print("SMU hard_min_done %d\n", success);
+
+ return success;
+}
+
+bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz)
+{
+ uint32_t response = 0;
+ bool success;
+
+ /* 15:0 for uclk, 32:16 for fclk */
+ uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+ smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+ success = dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_ActiveUclkFclk, param, &response);
+
+ /* wait until hardmin acknowledged */
+ success &= dcn401_smu_wait_hard_min_status(clk_mgr, PPCLK_UCLK);
+ smu_print("SMU hard_min_done %d\n", success);
+
+ return success;
+}
+
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz)
+{
+ uint32_t response = 0;
+ bool success;
+
+ /* 15:0 for uclk, 32:16 for fclk */
+ uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+ smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+ success = dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SubvpUclkFclk, param, &response);
+
+ return success;
+}
+
+void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz)
+{
+ smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", freq_mhz);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SetMinDeepSleepDcfclk, freq_mhz, NULL);
+}
+
+void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays)
+{
+ smu_print("SMU Set num of displays: num_displays = %d\n", num_displays);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_NumOfDisplays, num_displays, NULL);
+}
+
+unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr)
+{
+ unsigned int response = 0;
+
+ dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetNumUmcChannels, 0, &response);
+
+ smu_print("SMU Get Num UMC Channels: num_umc_channels = %d\n", response);
+
+ return response;
+}
+
+/*
+ * Frequency in MHz returned in lower 16 bits for valid DPM level
+ *
+ * Call with dpm_level = 0xFF to query features, return value will be:
+ * Bits 7:0 - number of DPM levels
+ * Bit 28 - 1 = auto DPM on
+ * Bit 29 - 1 = sweep DPM on
+ * Bit 30 - 1 = forced DPM on
+ * Bit 31 - 0 = discrete, 1 = fine-grained
+ *
+ * With fine-grained DPM, only min and max frequencies will be reported
+ *
+ * Returns 0 on failure
+ */
+unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type, lower 8 bits for DPM level */
+ uint32_t param = (clk << 16) | dpm_level;
+
+ smu_print("SMU Get dpm freq by index: clk = %d, dpm_level = %d\n", clk, dpm_level);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDpmFreqByIndex, param, &response);
+
+ smu_print("SMU dpm freq: %d MHz\n", response);
+
+ return response;
+}
+
+/* Returns the max DPM frequency in DC mode in MHz, 0 on failure */
+unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk)
+{
+ uint32_t response = 0;
+
+ /* bits 23:16 for clock type */
+ uint32_t param = clk << 16;
+
+ smu_print("SMU Get DC mode max DPM freq: clk = %d\n", clk);
+
+ dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_GetDcModeMaxDpmFreq, param, &response);
+
+ smu_print("SMU DC mode max DMP freq: %d MHz\n", response);
+
+ return response;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
new file mode 100644
index 000000000000..4f5ac603e822
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DCN401_CLK_MGR_SMU_MSG_H_
+#define __DCN401_CLK_MGR_SMU_MSG_H_
+
+#include "os_types.h"
+#include "core_types.h"
+
+struct clk_mgr_internal;
+
+bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version);
+bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr);
+bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr);
+void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support);
+void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support);
+void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways);
+void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
+void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
+void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
+void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
+unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz);
+void dcn401_smu_wait_for_dmub_ack_mclk(struct clk_mgr_internal *clk_mgr, bool enable);
+void dcn401_smu_indicate_drr_status(struct clk_mgr_internal *clk_mgr, bool mod_drr_for_pstate);
+bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz);
+bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz);
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz);
+void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
+void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
+unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr);
+unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk);
+unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level);
+
+#endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h
new file mode 100644
index 000000000000..36034b32870c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_smu14_driver_if.h
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+//
+// This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces.
+
+#define SMU14_DRIVER_IF_VERSION 0x1
+
+//Only Clks that have DPM descriptors are listed here
+typedef enum {
+ PPCLK_GFXCLK = 0,
+ PPCLK_SOCCLK,
+ PPCLK_UCLK,
+ PPCLK_FCLK,
+ PPCLK_DCLK_0,
+ PPCLK_VCLK_0,
+ PPCLK_DISPCLK,
+ PPCLK_DPPCLK,
+ PPCLK_DPREFCLK,
+ PPCLK_DCFCLK,
+ PPCLK_DTBCLK,
+ PPCLK_COUNT,
+} PPCLK_e;
+
+typedef struct {
+ uint8_t WmSetting;
+ uint8_t Flags;
+ uint8_t Padding[2];
+
+} WatermarkRowGeneric_t;
+
+#define NUM_WM_RANGES 4
+
+typedef enum {
+ WATERMARKS_CLOCK_RANGE = 0,
+ WATERMARKS_DUMMY_PSTATE,
+ WATERMARKS_MALL,
+ WATERMARKS_COUNT,
+} WATERMARKS_FLAGS_e;
+
+typedef struct {
+ // Watermarks
+ WatermarkRowGeneric_t WatermarkRow[NUM_WM_RANGES];
+} Watermarks_t;
+
+typedef struct {
+ Watermarks_t Watermarks;
+ uint32_t Spare[16];
+
+ uint32_t MmHubPadding[8]; // SMU internal use
+} WatermarksExternal_t;
+
+// Table types
+#define TABLE_PMFW_PPTABLE 0
+#define TABLE_COMBO_PPTABLE 1
+#define TABLE_WATERMARKS 2
+#define TABLE_AVFS_PSM_DEBUG 3
+#define TABLE_PMSTATUSLOG 4
+#define TABLE_SMU_METRICS 5
+#define TABLE_DRIVER_SMU_CONFIG 6
+#define TABLE_ACTIVITY_MONITOR_COEFF 7
+#define TABLE_OVERDRIVE 8
+#define TABLE_I2C_COMMANDS 9
+#define TABLE_DRIVER_INFO 10
+#define TABLE_ECCINFO 11
+#define TABLE_COUNT 12
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 3a9077b60029..5f2d5638c819 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -24,6 +24,8 @@
#include "dm_services.h"
+#include "amdgpu.h"
+
#include "dc.h"
#include "core_status.h"
@@ -32,6 +34,11 @@
#include "dce/dce_hwseq.h"
#include "resource.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
+#include "dc_plane.h"
+#include "dc_plane_priv.h"
+#include "dc_stream_priv.h"
#include "gpio_service_interface.h"
#include "clk_mgr.h"
@@ -53,7 +60,7 @@
#include "link_encoder.h"
#include "link_enc_cfg.h"
-#include "link.h"
+#include "link_service.h"
#include "dm_helpers.h"
#include "mem_input.h"
@@ -75,6 +82,11 @@
#include "hw_sequencer_private.h"
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#include "dml2/dml2_internal_types.h"
+#include "soc_and_ip_translator.h"
+#endif
+
#include "dce/dmub_outbox.h"
#define CTX \
@@ -206,10 +218,24 @@ static bool create_links(
connectors_num,
num_virtual_links);
- for (i = 0; i < connectors_num; i++) {
+ /* When getting the number of connectors, the VBIOS reports the number of valid indices,
+ * but it doesn't say which indices are valid, and not every index has an actual connector.
+ * So, if we don't find a connector on an index, that is not an error.
+ *
+ * - There is no guarantee that the first N indices will be valid
+ * - VBIOS may report a higher amount of valid indices than there are actual connectors
+ * - Some VBIOS have valid configurations for more connectors than there actually are
+ * on the card. This may be because the manufacturer used the same VBIOS for different
+ * variants of the same card.
+ */
+ for (i = 0; dc->link_count < connectors_num && i < MAX_LINKS; i++) {
+ struct graphics_object_id connector_id = bios->funcs->get_connector_id(bios, i);
struct link_init_data link_init_params = {0};
struct dc_link *link;
+ if (connector_id.id == CONNECTOR_ID_UNKNOWN)
+ continue;
+
DC_LOG_DC("BIOS object table - printing link object info for connector number: %d, link_index: %d", i, dc->link_count);
link_init_params.ctx = dc->ctx;
@@ -229,6 +255,7 @@ static bool create_links(
DC_LOG_DC("BIOS object table - end");
/* Create a link for each usb4 dpia port */
+ dc->lowest_dpia_link_index = MAX_LINKS;
for (i = 0; i < dc->res_pool->usb4_dpia_count; i++) {
struct link_init_data link_init_params = {0};
struct dc_link *link;
@@ -241,6 +268,9 @@ static bool create_links(
link = dc->link_srv->create_link(&link_init_params);
if (link) {
+ if (dc->lowest_dpia_link_index > dc->link_count)
+ dc->lowest_dpia_link_index = dc->link_count;
+
dc->links[dc->link_count] = link;
link->dc = dc;
++dc->link_count;
@@ -266,6 +296,7 @@ static bool create_links(
link->link_id.type = OBJECT_TYPE_CONNECTOR;
link->link_id.id = CONNECTOR_ID_VIRTUAL;
link->link_id.enum_id = ENUM_ID_1;
+ link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL);
if (!link->link_enc) {
@@ -380,6 +411,30 @@ static void dc_perf_trace_destroy(struct dc_perf_trace **perf_trace)
*perf_trace = NULL;
}
+static bool set_long_vtotal(struct dc *dc, struct dc_stream_state *stream, struct dc_crtc_timing_adjust *adjust)
+{
+ if (!dc || !stream || !adjust)
+ return false;
+
+ if (!dc->current_state)
+ return false;
+
+ int i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream == stream && pipe->stream_res.tg) {
+ if (dc->hwss.set_long_vtotal)
+ dc->hwss.set_long_vtotal(&pipe, 1, adjust->v_total_min, adjust->v_total_max);
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
/**
* dc_stream_adjust_vmin_vmax - look up pipe context & update parts of DRR
* @dc: dc reference
@@ -404,14 +459,31 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
* Don't adjust DRR while there's bandwidth optimizations pending to
* avoid conflicting with firmware updates.
*/
- if (dc->ctx->dce_version > DCE_VERSION_MAX)
- if (dc->optimized_required || dc->wm_optimized_required)
+ if (dc->ctx->dce_version > DCE_VERSION_MAX) {
+ if (dc->optimized_required &&
+ (stream->adjust.v_total_max != adjust->v_total_max ||
+ stream->adjust.v_total_min != adjust->v_total_min)) {
+ stream->adjust.timing_adjust_pending = true;
return false;
+ }
+ }
+
+ dc_exit_ips_for_hw_access(dc);
stream->adjust.v_total_max = adjust->v_total_max;
stream->adjust.v_total_mid = adjust->v_total_mid;
stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num;
stream->adjust.v_total_min = adjust->v_total_min;
+ stream->adjust.allow_otg_v_count_halt = adjust->allow_otg_v_count_halt;
+
+ if (dc->caps.max_v_total != 0 &&
+ (adjust->v_total_max > dc->caps.max_v_total || adjust->v_total_min > dc->caps.max_v_total)) {
+ stream->adjust.timing_adjust_pending = false;
+ if (adjust->allow_otg_v_count_halt)
+ return set_long_vtotal(dc, stream, adjust);
+ else
+ return false;
+ }
for (i = 0; i < MAX_PIPES; i++) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -420,7 +492,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
dc->hwss.set_drr(&pipe,
1,
*adjust);
-
+ stream->adjust.timing_adjust_pending = false;
return true;
}
}
@@ -448,6 +520,8 @@ bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
int i = 0;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -468,31 +542,6 @@ bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
return status;
}
-bool dc_stream_get_crtc_position(struct dc *dc,
- struct dc_stream_state **streams, int num_streams,
- unsigned int *v_pos, unsigned int *nom_v_pos)
-{
- /* TODO: Support multiple streams */
- const struct dc_stream_state *stream = streams[0];
- int i;
- bool ret = false;
- struct crtc_position position;
-
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *pipe =
- &dc->current_state->res_ctx.pipe_ctx[i];
-
- if (pipe->stream == stream && pipe->stream_res.stream_enc) {
- dc->hwss.get_position(&pipe, 1, &position);
-
- *v_pos = position.vertical_count;
- *nom_v_pos = position.nominal_vcount;
- ret = true;
- }
- }
- return ret;
-}
-
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
static inline void
dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv,
@@ -515,7 +564,7 @@ dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv,
cmd.secure_display.roi_info.y_end = rect->y + rect->height;
}
- dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
}
static inline void
@@ -530,7 +579,7 @@ dc_stream_forward_dmcu_crc_window(struct dmcu *dmcu,
bool
dc_stream_forward_crc_window(struct dc_stream_state *stream,
- struct rect *rect, bool is_stop)
+ struct rect *rect, uint8_t phy_id, bool is_stop)
{
struct dmcu *dmcu;
struct dc_dmub_srv *dmub_srv;
@@ -549,7 +598,7 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream,
if (i == MAX_PIPES)
return false;
- mux_mapping.phy_output_num = stream->link->link_enc_hw_inst;
+ mux_mapping.phy_output_num = phy_id;
mux_mapping.otg_output_num = pipe->stream_res.tg->inst;
dmcu = dc->res_pool->dmcu;
@@ -566,25 +615,89 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream,
return true;
}
+
+static void
+dc_stream_forward_dmub_multiple_crc_window(struct dc_dmub_srv *dmub_srv,
+ struct crc_window *window, struct otg_phy_mux *mux_mapping, bool stop)
+{
+ int i;
+ union dmub_rb_cmd cmd = {0};
+
+ cmd.secure_display.mul_roi_ctl.phy_id = mux_mapping->phy_output_num;
+ cmd.secure_display.mul_roi_ctl.otg_id = mux_mapping->otg_output_num;
+
+ cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY;
+
+ if (stop) {
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE;
+ } else {
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_start = window[i].rect.x;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_start = window[i].rect.y;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_end = window[i].rect.x + window[i].rect.width;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_end = window[i].rect.y + window[i].rect.height;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].enable = window[i].enable;
+ }
+ }
+
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+}
+
+bool
+dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream,
+ struct crc_window *window, uint8_t phy_id, bool stop)
+{
+ struct dc_dmub_srv *dmub_srv;
+ struct otg_phy_mux mux_mapping;
+ struct pipe_ctx *pipe;
+ int i;
+ struct dc *dc = stream->ctx->dc;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
+ break;
+ }
+
+ /* Stream not found */
+ if (i == MAX_PIPES)
+ return false;
+
+ mux_mapping.phy_output_num = phy_id;
+ mux_mapping.otg_output_num = pipe->stream_res.tg->inst;
+
+ dmub_srv = dc->ctx->dmub_srv;
+
+ /* forward to dmub only. no dmcu support*/
+ if (dmub_srv)
+ dc_stream_forward_dmub_multiple_crc_window(dmub_srv, window, &mux_mapping, stop);
+ else
+ return false;
+
+ return true;
+}
#endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */
/**
* dc_stream_configure_crc() - Configure CRC capture for the given stream.
* @dc: DC Object
* @stream: The stream to configure CRC on.
- * @enable: Enable CRC if true, disable otherwise.
* @crc_window: CRC window (x/y start/end) information
+ * @enable: Enable CRC if true, disable otherwise.
* @continuous: Capture CRC on every frame if true. Otherwise, only capture
* once.
+ * @idx: Capture CRC on which CRC engine instance
+ * @reset: Reset CRC engine before the configuration
*
- * By default, only CRC0 is configured, and the entire frame is used to
- * calculate the CRC.
+ * By default, the entire frame is used to calculate the CRC.
*
* Return: %false if the stream is not found or CRC capture is not supported;
* %true if the stream has been configured.
*/
bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
- struct crc_params *crc_window, bool enable, bool continuous)
+ struct crc_params *crc_window, bool enable, bool continuous,
+ uint8_t idx, bool reset)
{
struct pipe_ctx *pipe;
struct crc_params param;
@@ -597,6 +710,8 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
if (pipe == NULL)
return false;
+ dc_exit_ips_for_hw_access(dc);
+
/* By default, capture the full frame */
param.windowa_x_start = 0;
param.windowa_y_start = 0;
@@ -626,6 +741,9 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
param.continuous_mode = continuous;
param.enable = enable;
+ param.crc_eng_inst = idx;
+ param.reset = reset;
+
tg = pipe->stream_res.tg;
/* Only call if supported */
@@ -640,6 +758,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
*
* @dc: DC object.
* @stream: The DC stream state of the stream to get CRCs from.
+ * @idx: index of crc engine to get CRC from
* @r_cr: CRC value for the red component.
* @g_y: CRC value for the green component.
* @b_cb: CRC value for the blue component.
@@ -649,13 +768,15 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
* Return:
* %false if stream is not found, or if CRCs are not enabled.
*/
-bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream,
+bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
int i;
struct pipe_ctx *pipe;
struct timing_generator *tg;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
pipe = &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->stream == stream)
@@ -668,7 +789,7 @@ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream,
tg = pipe->stream_res.tg;
if (tg->funcs->get_crc)
- return tg->funcs->get_crc(tg, r_cr, g_y, b_cb);
+ return tg->funcs->get_crc(tg, idx, r_cr, g_y, b_cb);
DC_LOG_WARNING("CRC capture not supported.");
return false;
}
@@ -680,6 +801,8 @@ void dc_stream_set_dyn_expansion(struct dc *dc, struct dc_stream_state *stream,
int i;
struct pipe_ctx *pipe_ctx;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (dc->current_state->res_ctx.pipe_ctx[i].stream
== stream) {
@@ -715,6 +838,8 @@ void dc_stream_set_dither_option(struct dc_stream_state *stream,
if (option > DITHER_OPTION_MAX)
return;
+ dc_exit_ips_for_hw_access(stream->ctx->dc);
+
stream->dither_option = option;
memset(&params, 0, sizeof(params));
@@ -739,6 +864,8 @@ bool dc_stream_set_gamut_remap(struct dc *dc, const struct dc_stream_state *stre
bool ret = false;
struct pipe_ctx *pipes;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) {
pipes = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -756,6 +883,8 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream)
bool ret = false;
struct pipe_ctx *pipes;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (dc->current_state->res_ctx.pipe_ctx[i].stream
== stream) {
@@ -782,6 +911,8 @@ void dc_stream_set_static_screen_params(struct dc *dc,
struct pipe_ctx *pipes_affected[MAX_PIPES];
int num_pipes_affected = 0;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < num_streams; i++) {
struct dc_stream_state *stream = streams[i];
@@ -800,11 +931,12 @@ void dc_stream_set_static_screen_params(struct dc *dc,
static void dc_destruct(struct dc *dc)
{
// reset link encoder assignment table on destruct
- if (dc->res_pool && dc->res_pool->funcs->link_encs_assign)
+ if (dc->res_pool && dc->res_pool->funcs->link_encs_assign &&
+ !dc->config.unify_link_enc_assignment)
link_enc_cfg_init(dc, dc->current_state);
if (dc->current_state) {
- dc_release_state(dc->current_state);
+ dc_state_release(dc->current_state);
dc->current_state = NULL;
}
@@ -818,20 +950,24 @@ static void dc_destruct(struct dc *dc)
}
dc_destroy_resource_pool(dc);
-
+#ifdef CONFIG_DRM_AMD_DC_FP
+ dc_destroy_soc_and_ip_translator(&dc->soc_and_ip_translator);
+#endif
if (dc->link_srv)
link_destroy_link_service(&dc->link_srv);
- if (dc->ctx->gpio_service)
- dal_gpio_service_destroy(&dc->ctx->gpio_service);
+ if (dc->ctx) {
+ if (dc->ctx->gpio_service)
+ dal_gpio_service_destroy(&dc->ctx->gpio_service);
- if (dc->ctx->created_bios)
- dal_bios_parser_destroy(&dc->ctx->dc_bios);
+ if (dc->ctx->created_bios)
+ dal_bios_parser_destroy(&dc->ctx->dc_bios);
+ kfree(dc->ctx->logger);
+ dc_perf_trace_destroy(&dc->ctx->perf_trace);
- dc_perf_trace_destroy(&dc->ctx->perf_trace);
-
- kfree(dc->ctx);
- dc->ctx = NULL;
+ kfree(dc->ctx);
+ dc->ctx = NULL;
+ }
kfree(dc->bw_vbios);
dc->bw_vbios = NULL;
@@ -859,6 +995,8 @@ static bool dc_construct_ctx(struct dc *dc,
if (!dc_ctx)
return false;
+ dc_stream_init_rmcm_3dlut(dc);
+
dc_ctx->cgs_device = init_params->cgs_device;
dc_ctx->driver_context = init_params->driver;
dc_ctx->dc = dc;
@@ -868,8 +1006,18 @@ static bool dc_construct_ctx(struct dc *dc,
dc_ctx->dce_environment = init_params->dce_environment;
dc_ctx->dcn_reg_offsets = init_params->dcn_reg_offsets;
dc_ctx->nbio_reg_offsets = init_params->nbio_reg_offsets;
+ dc_ctx->clk_reg_offsets = init_params->clk_reg_offsets;
/* Create logger */
+ dc_ctx->logger = kmalloc(sizeof(*dc_ctx->logger), GFP_KERNEL);
+
+ if (!dc_ctx->logger) {
+ kfree(dc_ctx);
+ return false;
+ }
+
+ dc_ctx->logger->dev = adev_to_drm(init_params->driver);
+ dc->dml.logger = dc_ctx->logger;
dc_ctx->dce_version = resource_parse_asic_id(init_params->asic_id);
@@ -940,12 +1088,17 @@ static bool dc_construct(struct dc *dc,
dc->dcn_ip = dcn_ip;
+ if (init_params->bb_from_dmub)
+ dc->dml2_options.bb_from_dmub = init_params->bb_from_dmub;
+ else
+ dc->dml2_options.bb_from_dmub = NULL;
+
if (!dc_construct_ctx(dc, init_params)) {
dm_error("%s: failed to create ctx\n", __func__);
goto fail;
}
- dc_ctx = dc->ctx;
+ dc_ctx = dc->ctx;
/* Resource should construct all asic specific resources.
* This should be the only place where we need to parse the asic id
@@ -990,7 +1143,8 @@ static bool dc_construct(struct dc *dc,
/* set i2c speed if not done by the respective dcnxxx__resource.c */
if (dc->caps.i2c_speed_in_khz_hdcp == 0)
dc->caps.i2c_speed_in_khz_hdcp = dc->caps.i2c_speed_in_khz;
-
+ if (dc->caps.max_optimizable_video_width == 0)
+ dc->caps.max_optimizable_video_width = 5120;
dc->clk_mgr = dc_clk_mgr_create(dc->ctx, dc->res_pool->pp_smu, dc->res_pool->dccg);
if (!dc->clk_mgr)
goto fail;
@@ -1002,19 +1156,10 @@ static bool dc_construct(struct dc *dc,
dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params);
DC_FP_END();
}
-#endif
-
- /* Creation of current_state must occur after dc->dml
- * is initialized in dc_create_resource_pool because
- * on creation it copies the contents of dc->dml
- */
-
- dc->current_state = dc_create_state(dc);
-
- if (!dc->current_state) {
- dm_error("%s: failed to create validate ctx\n", __func__);
+ dc->soc_and_ip_translator = dc_create_soc_and_ip_translator(dc_ctx->dce_version);
+ if (!dc->soc_and_ip_translator)
goto fail;
- }
+#endif
if (!create_links(dc, init_params->num_virtual_links))
goto fail;
@@ -1025,7 +1170,16 @@ static bool dc_construct(struct dc *dc,
if (!create_link_encoders(dc))
goto fail;
- dc_resource_state_construct(dc, dc->current_state);
+ /* Creation of current_state must occur after dc->dml
+ * is initialized in dc_create_resource_pool because
+ * on creation it copies the contents of dc->dml
+ */
+ dc->current_state = dc_state_create(dc, NULL);
+
+ if (!dc->current_state) {
+ dm_error("%s: failed to create validate ctx\n", __func__);
+ goto fail;
+ }
return true;
@@ -1069,55 +1223,14 @@ static void apply_ctx_interdependent_lock(struct dc *dc,
}
}
-static void phantom_pipe_blank(
- struct dc *dc,
- struct timing_generator *tg,
- int width,
- int height)
-{
- struct dce_hwseq *hws = dc->hwseq;
- enum dc_color_space color_space;
- struct tg_color black_color = {0};
- struct output_pixel_processor *opp = NULL;
- uint32_t num_opps, opp_id_src0, opp_id_src1;
- uint32_t otg_active_width, otg_active_height;
- uint32_t i;
-
- /* program opp dpg blank color */
- color_space = COLOR_SPACE_SRGB;
- color_space_to_black_color(dc, color_space, &black_color);
-
- otg_active_width = width;
- otg_active_height = height;
-
- /* get the OPTC source */
- tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1);
- ASSERT(opp_id_src0 < dc->res_pool->res_cap->num_opp);
-
- for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) {
- if (dc->res_pool->opps[i] != NULL && dc->res_pool->opps[i]->inst == opp_id_src0) {
- opp = dc->res_pool->opps[i];
- break;
- }
+static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
+{
+ if (dc->debug.visual_confirm & VISUAL_CONFIRM_EXPLICIT) {
+ memcpy(&pipe_ctx->visual_confirm_color, &pipe_ctx->plane_state->visual_confirm_color,
+ sizeof(pipe_ctx->visual_confirm_color));
+ return;
}
- if (opp && opp->funcs->opp_set_disp_pattern_generator)
- opp->funcs->opp_set_disp_pattern_generator(
- opp,
- CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- COLOR_DEPTH_UNDEFINED,
- &black_color,
- otg_active_width,
- otg_active_height,
- 0);
-
- if (tg->funcs->is_tg_enabled(tg))
- hws->funcs.wait_for_blank_complete(opp);
-}
-
-static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
-{
if (dc->ctx->dce_version >= DCN_VERSION_1_0) {
memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color));
@@ -1127,6 +1240,10 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
get_surface_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SWIZZLE)
get_surface_tile_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_HW_CURSOR)
+ get_cursor_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_DCC)
+ get_dcc_visual_confirm_color(dc, pipe_ctx, &(pipe_ctx->visual_confirm_color));
else {
if (dc->ctx->dce_version < DCN_VERSION_2_0)
color_space_to_black_color(
@@ -1136,9 +1253,58 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE)
get_mpctree_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP)
- get_subvp_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ get_subvp_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MCLK_SWITCH)
- get_mclk_switch_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2)
+ get_fams2_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_VABC)
+ get_vabc_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ }
+ }
+}
+
+void dc_get_visual_confirm_for_stream(
+ struct dc *dc,
+ struct dc_stream_state *stream_state,
+ struct tg_color *color)
+{
+ struct dc_stream_status *stream_status = dc_stream_get_status(stream_state);
+ struct pipe_ctx *pipe_ctx;
+ int i;
+ struct dc_plane_state *plane_state = NULL;
+
+ if (!stream_status)
+ return;
+
+ switch (dc->debug.visual_confirm) {
+ case VISUAL_CONFIRM_DISABLE:
+ return;
+ case VISUAL_CONFIRM_PSR:
+ case VISUAL_CONFIRM_FAMS:
+ pipe_ctx = dc_stream_get_pipe_ctx(stream_state);
+ if (!pipe_ctx)
+ return;
+ dc_dmub_srv_get_visual_confirm_color_cmd(dc, pipe_ctx);
+ memcpy(color, &dc->ctx->dmub_srv->dmub->visual_confirm_color, sizeof(struct tg_color));
+ return;
+
+ default:
+ /* find plane with highest layer_index */
+ for (i = 0; i < stream_status->plane_count; i++) {
+ if (stream_status->plane_states[i]->visible)
+ plane_state = stream_status->plane_states[i];
+ }
+ if (!plane_state)
+ return;
+ /* find pipe that contains plane with highest layer index */
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state == plane_state) {
+ memcpy(color, &pipe->visual_confirm_color, sizeof(struct tg_color));
+ return;
+ }
}
}
}
@@ -1146,7 +1312,7 @@ static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *conte
static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
{
int i, j;
- struct dc_state *dangling_context = dc_create_state(dc);
+ struct dc_state *dangling_context = dc_state_create_current_copy(dc);
struct dc_state *current_ctx;
struct pipe_ctx *pipe;
struct timing_generator *tg;
@@ -1154,8 +1320,6 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
if (dangling_context == NULL)
return;
- dc_resource_state_copy_construct(dc->current_state, dangling_context);
-
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_stream_state *old_stream =
dc->current_state->res_ctx.pipe_ctx[i].stream;
@@ -1192,6 +1356,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
}
if (should_disable && old_stream) {
+ bool is_phantom = dc_state_get_stream_subvp_type(dc->current_state, old_stream) == SUBVP_PHANTOM;
pipe = &dc->current_state->res_ctx.pipe_ctx[i];
tg = pipe->stream_res.tg;
/* When disabling plane for a phantom pipe, we must turn on the
@@ -1200,21 +1365,25 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
* state that can result in underflow or hang when enabling it
* again for different use.
*/
- if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (is_phantom) {
if (tg->funcs->enable_crtc) {
- int main_pipe_width, main_pipe_height;
-
- main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width;
- main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height;
- phantom_pipe_blank(dc, tg, main_pipe_width, main_pipe_height);
+ if (dc->hwseq->funcs.blank_pixel_data)
+ dc->hwseq->funcs.blank_pixel_data(dc, pipe, true);
tg->funcs->enable_crtc(tg);
}
}
- dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
+
+ if (is_phantom)
+ dc_state_rem_all_phantom_planes_for_stream(dc, old_stream, dangling_context, true);
+ else
+ dc_state_rem_all_planes_for_stream(dc, old_stream, dangling_context);
disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
- if (pipe->stream && pipe->plane_state)
- dc_update_viusal_confirm_color(dc, context, pipe);
+ if (pipe->stream && pipe->plane_state) {
+ if (!dc->debug.using_dml2)
+ set_p_state_switch_method(dc, context, pipe);
+ dc_update_visual_confirm_color(dc, context, pipe);
+ }
if (dc->hwss.apply_ctx_for_surface) {
apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, true);
@@ -1222,6 +1391,9 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, false);
dc->hwss.post_unlock_program_front_end(dc, dangling_context);
}
+
+ if (dc->res_pool->funcs->prepare_mcache_programming)
+ dc->res_pool->funcs->prepare_mcache_programming(dc, dangling_context);
if (dc->hwss.program_front_end_for_ctx) {
dc->hwss.interdependent_update_lock(dc, dc->current_state, true);
dc->hwss.program_front_end_for_ctx(dc, dangling_context);
@@ -1233,7 +1405,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
* The OTG is set to disable on falling edge of VUPDATE so the plane disable
* will still get it's double buffer update.
*/
- if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (is_phantom) {
if (tg->funcs->disable_phantom_crtc)
tg->funcs->disable_phantom_crtc(tg);
}
@@ -1242,7 +1414,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
current_ctx = dc->current_state;
dc->current_state = dangling_context;
- dc_release_state(current_ctx);
+ dc_state_release(current_ctx);
}
static void disable_vbios_mode_if_required(
@@ -1262,6 +1434,9 @@ static void disable_vbios_mode_if_required(
if (stream == NULL)
continue;
+ if (stream->apply_seamless_boot_optimization)
+ continue;
+
// only looking for first odm pipe
if (pipe->prev_odm_pipe)
continue;
@@ -1273,7 +1448,7 @@ static void disable_vbios_mode_if_required(
if (link != NULL && link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
unsigned int enc_inst, tg_inst = 0;
- unsigned int pix_clk_100hz;
+ unsigned int pix_clk_100hz = 0;
enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
if (enc_inst != ENGINE_ID_UNKNOWN) {
@@ -1303,32 +1478,6 @@ static void disable_vbios_mode_if_required(
}
}
-static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
-{
- int i;
- PERF_TRACE();
- for (i = 0; i < MAX_PIPES; i++) {
- int count = 0;
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe->plane_state || pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
- continue;
-
- /* Timeout 100 ms */
- while (count < 100000) {
- /* Must set to false to start with, due to OR in update function */
- pipe->plane_state->status.is_flip_pending = false;
- dc->hwss.update_pending_status(pipe);
- if (!pipe->plane_state->status.is_flip_pending)
- break;
- udelay(1);
- count++;
- }
- ASSERT(!pipe->plane_state->status.is_flip_pending);
- }
- PERF_TRACE();
-}
-
/* Public functions */
struct dc *dc_create(const struct dc_init_data *init_params)
@@ -1340,6 +1489,7 @@ struct dc *dc_create(const struct dc_init_data *init_params)
return NULL;
if (init_params->dce_environment == DCE_ENV_VIRTUAL_HW) {
+ dc->caps.linear_pitch_alignment = 64;
if (!dc_construct_ctx(dc, init_params))
goto destruct_dc;
} else {
@@ -1367,6 +1517,7 @@ struct dc *dc_create(const struct dc_init_data *init_params)
dc->dcn_reg_offsets = init_params->dcn_reg_offsets;
dc->nbio_reg_offsets = init_params->nbio_reg_offsets;
+ dc->clk_reg_offsets = init_params->clk_reg_offsets;
/* Populate versioning information */
dc->versions.dc_ver = DC_VER;
@@ -1375,8 +1526,6 @@ struct dc *dc_create(const struct dc_init_data *init_params)
DC_LOG_DC("Display Core initialized\n");
-
-
return dc;
destruct_dc:
@@ -1414,6 +1563,7 @@ void dc_hardware_init(struct dc *dc)
detect_edp_presence(dc);
if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW)
dc->hwss.init_hw(dc);
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0);
}
void dc_init_callbacks(struct dc *dc,
@@ -1536,7 +1686,10 @@ static void program_timing_sync(
}
for (k = 0; k < group_size; k++) {
- struct dc_stream_status *status = dc_stream_get_status_from_state(ctx, pipe_set[k]->stream);
+ struct dc_stream_status *status = dc_state_get_stream_status(ctx, pipe_set[k]->stream);
+
+ if (!status)
+ continue;
status->timing_sync_info.group_id = num_group;
status->timing_sync_info.group_size = group_size;
@@ -1547,7 +1700,7 @@ static void program_timing_sync(
}
- /* remove any other pipes that are already been synced */
+ /* remove any other unblanked pipes as they have already been synced */
if (dc->config.use_pipe_ctx_sync_logic) {
/* check pipe's syncd to decide which pipe to be removed */
for (j = 1; j < group_size; j++) {
@@ -1560,6 +1713,7 @@ static void program_timing_sync(
pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd;
}
} else {
+ /* remove any other pipes by checking valid plane */
for (j = j + 1; j < group_size; j++) {
bool is_blanked;
@@ -1580,7 +1734,7 @@ static void program_timing_sync(
if (group_size > 1) {
if (sync_type == TIMING_SYNCHRONIZABLE) {
dc->hwss.enable_timing_synchronization(
- dc, group_index, group_size, pipe_set);
+ dc, ctx, group_index, group_size, pipe_set);
} else
if (sync_type == VBLANK_SYNCHRONIZABLE) {
dc->hwss.enable_vblanks_synchronization(
@@ -1628,17 +1782,23 @@ bool dc_validate_boot_timing(const struct dc *dc,
return false;
}
- if (dc->debug.force_odm_combine)
+ if (dc->debug.force_odm_combine) {
+ DC_LOG_DEBUG("boot timing validation failed due to force_odm_combine\n");
return false;
+ }
/* Check for enabled DIG to identify enabled display */
- if (!link->link_enc->funcs->is_dig_enabled(link->link_enc))
+ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
+ DC_LOG_DEBUG("boot timing validation failed due to disabled DIG\n");
return false;
+ }
enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc);
- if (enc_inst == ENGINE_ID_UNKNOWN)
+ if (enc_inst == ENGINE_ID_UNKNOWN) {
+ DC_LOG_DEBUG("boot timing validation failed due to unknown DIG engine ID\n");
return false;
+ }
for (i = 0; i < dc->res_pool->stream_enc_count; i++) {
if (dc->res_pool->stream_enc[i]->id == enc_inst) {
@@ -1652,65 +1812,101 @@ bool dc_validate_boot_timing(const struct dc *dc,
}
// tg_inst not found
- if (i == dc->res_pool->stream_enc_count)
+ if (i == dc->res_pool->stream_enc_count) {
+ DC_LOG_DEBUG("boot timing validation failed due to timing generator instance not found\n");
return false;
+ }
- if (tg_inst >= dc->res_pool->timing_generator_count)
+ if (tg_inst >= dc->res_pool->timing_generator_count) {
+ DC_LOG_DEBUG("boot timing validation failed due to invalid timing generator count\n");
return false;
+ }
- if (tg_inst != link->link_enc->preferred_engine)
+ if (tg_inst != link->link_enc->preferred_engine) {
+ DC_LOG_DEBUG("boot timing validation failed due to non-preferred timing generator\n");
return false;
+ }
tg = dc->res_pool->timing_generators[tg_inst];
- if (!tg->funcs->get_hw_timing)
+ if (!tg->funcs->get_hw_timing) {
+ DC_LOG_DEBUG("boot timing validation failed due to missing get_hw_timing callback\n");
return false;
+ }
- if (!tg->funcs->get_hw_timing(tg, &hw_crtc_timing))
+ if (!tg->funcs->get_hw_timing(tg, &hw_crtc_timing)) {
+ DC_LOG_DEBUG("boot timing validation failed due to failed get_hw_timing return\n");
return false;
+ }
- if (crtc_timing->h_total != hw_crtc_timing.h_total)
+ if (crtc_timing->h_total != hw_crtc_timing.h_total) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_total mismatch\n");
return false;
+ }
- if (crtc_timing->h_border_left != hw_crtc_timing.h_border_left)
+ if (crtc_timing->h_border_left != hw_crtc_timing.h_border_left) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_border_left mismatch\n");
return false;
+ }
- if (crtc_timing->h_addressable != hw_crtc_timing.h_addressable)
+ if (crtc_timing->h_addressable != hw_crtc_timing.h_addressable) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_addressable mismatch\n");
return false;
+ }
- if (crtc_timing->h_border_right != hw_crtc_timing.h_border_right)
+ if (crtc_timing->h_border_right != hw_crtc_timing.h_border_right) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_border_right mismatch\n");
return false;
+ }
- if (crtc_timing->h_front_porch != hw_crtc_timing.h_front_porch)
+ if (crtc_timing->h_front_porch != hw_crtc_timing.h_front_porch) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_front_porch mismatch\n");
return false;
+ }
- if (crtc_timing->h_sync_width != hw_crtc_timing.h_sync_width)
+ if (crtc_timing->h_sync_width != hw_crtc_timing.h_sync_width) {
+ DC_LOG_DEBUG("boot timing validation failed due to h_sync_width mismatch\n");
return false;
+ }
- if (crtc_timing->v_total != hw_crtc_timing.v_total)
+ if (crtc_timing->v_total != hw_crtc_timing.v_total) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_total mismatch\n");
return false;
+ }
- if (crtc_timing->v_border_top != hw_crtc_timing.v_border_top)
+ if (crtc_timing->v_border_top != hw_crtc_timing.v_border_top) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_border_top mismatch\n");
return false;
+ }
- if (crtc_timing->v_addressable != hw_crtc_timing.v_addressable)
+ if (crtc_timing->v_addressable != hw_crtc_timing.v_addressable) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_addressable mismatch\n");
return false;
+ }
- if (crtc_timing->v_border_bottom != hw_crtc_timing.v_border_bottom)
+ if (crtc_timing->v_border_bottom != hw_crtc_timing.v_border_bottom) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_border_bottom mismatch\n");
return false;
+ }
- if (crtc_timing->v_front_porch != hw_crtc_timing.v_front_porch)
+ if (crtc_timing->v_front_porch != hw_crtc_timing.v_front_porch) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_front_porch mismatch\n");
return false;
+ }
- if (crtc_timing->v_sync_width != hw_crtc_timing.v_sync_width)
+ if (crtc_timing->v_sync_width != hw_crtc_timing.v_sync_width) {
+ DC_LOG_DEBUG("boot timing validation failed due to v_sync_width mismatch\n");
return false;
+ }
/* block DSC for now, as VBIOS does not currently support DSC timings */
- if (crtc_timing->flags.DSC)
+ if (crtc_timing->flags.DSC) {
+ DC_LOG_DEBUG("boot timing validation failed due to DSC\n");
return false;
+ }
if (dc_is_dp_signal(link->connector_signal)) {
- unsigned int pix_clk_100hz;
+ unsigned int pix_clk_100hz = 0;
uint32_t numOdmPipes = 1;
uint32_t id_src[4] = {0};
@@ -1722,33 +1918,60 @@ bool dc_validate_boot_timing(const struct dc *dc,
tg->funcs->get_optc_source(tg,
&numOdmPipes, &id_src[0], &id_src[1]);
- if (numOdmPipes == 2)
+ if (numOdmPipes == 2) {
pix_clk_100hz *= 2;
- if (numOdmPipes == 4)
+ } else if (numOdmPipes == 4) {
pix_clk_100hz *= 4;
+ } else if (se && se->funcs->get_pixels_per_cycle) {
+ uint32_t pixels_per_cycle = se->funcs->get_pixels_per_cycle(se);
+
+ if (pixels_per_cycle != 1 && !dc->debug.enable_dp_dig_pixel_rate_div_policy) {
+ DC_LOG_DEBUG("boot timing validation failed due to pixels_per_cycle\n");
+ return false;
+ }
+
+ pix_clk_100hz *= pixels_per_cycle;
+ }
// Note: In rare cases, HW pixclk may differ from crtc's pixclk
// slightly due to rounding issues in 10 kHz units.
- if (crtc_timing->pix_clk_100hz != pix_clk_100hz)
+ if (crtc_timing->pix_clk_100hz != pix_clk_100hz) {
+ DC_LOG_DEBUG("boot timing validation failed due to pix_clk_100hz mismatch\n");
return false;
+ }
- if (!se->funcs->dp_get_pixel_format)
+ if (!se || !se->funcs->dp_get_pixel_format) {
+ DC_LOG_DEBUG("boot timing validation failed due to missing dp_get_pixel_format\n");
return false;
+ }
if (!se->funcs->dp_get_pixel_format(
se,
&hw_crtc_timing.pixel_encoding,
- &hw_crtc_timing.display_color_depth))
+ &hw_crtc_timing.display_color_depth)) {
+ DC_LOG_DEBUG("boot timing validation failed due to dp_get_pixel_format failure\n");
return false;
+ }
- if (hw_crtc_timing.display_color_depth != crtc_timing->display_color_depth)
+ if (hw_crtc_timing.display_color_depth != crtc_timing->display_color_depth) {
+ DC_LOG_DEBUG("boot timing validation failed due to display_color_depth mismatch\n");
return false;
+ }
- if (hw_crtc_timing.pixel_encoding != crtc_timing->pixel_encoding)
+ if (hw_crtc_timing.pixel_encoding != crtc_timing->pixel_encoding) {
+ DC_LOG_DEBUG("boot timing validation failed due to pixel_encoding mismatch\n");
return false;
+ }
}
+
if (link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) {
+ DC_LOG_DEBUG("boot timing validation failed due to VSC SDP colorimetry\n");
+ return false;
+ }
+
+ if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
+ DC_LOG_DEBUG("boot timing validation failed due to DP 128b/132b\n");
return false;
}
@@ -1785,6 +2008,8 @@ void dc_enable_stereo(
int i, j;
struct pipe_ctx *pipe;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
if (context != NULL) {
pipe = &context->res_ctx.pipe_ctx[i];
@@ -1804,6 +2029,8 @@ void dc_enable_stereo(
void dc_trigger_sync(struct dc *dc, struct dc_state *context)
{
if (context->stream_count > 1 && !dc->debug.disable_timing_sync) {
+ dc_exit_ips_for_hw_access(dc);
+
enable_timing_multisync(dc, context);
program_timing_sync(dc, context);
}
@@ -1834,6 +2061,41 @@ void dc_z10_save_init(struct dc *dc)
dc->hwss.z10_save_init(dc);
}
+/* Set a pipe unlock order based on the change in DET allocation and stores it in dc scratch memory
+ * Prevents over allocation of DET during unlock process
+ * e.g. 2 pipe config with different streams with a max of 20 DET segments
+ * Before: After:
+ * - Pipe0: 10 DET segments - Pipe0: 12 DET segments
+ * - Pipe1: 10 DET segments - Pipe1: 8 DET segments
+ * If Pipe0 gets updated first, 22 DET segments will be allocated
+ */
+static void determine_pipe_unlock_order(struct dc *dc, struct dc_state *context)
+{
+ unsigned int i = 0;
+ struct pipe_ctx *pipe = NULL;
+ struct timing_generator *tg = NULL;
+
+ if (!dc->config.set_pipe_unlock_order)
+ return;
+
+ memset(dc->scratch.pipes_to_unlock_first, 0, sizeof(dc->scratch.pipes_to_unlock_first));
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ tg = pipe->stream_res.tg;
+
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !tg->funcs->is_tg_enabled(tg) ||
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ continue;
+ }
+
+ if (resource_calculate_det_for_stream(context, pipe) <
+ resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i])) {
+ dc->scratch.pipes_to_unlock_first[i] = true;
+ }
+ }
+}
+
/**
* dc_commit_state_no_check - Apply context to the hardware
*
@@ -1862,7 +2124,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
/* Check old context for SubVP */
- subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+ subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
if (subvp_prev_use)
break;
}
@@ -1875,6 +2137,18 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
dc->hwss.enable_accelerated_mode(dc, context);
}
+ if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ //Only delay otg master for a given config
+ if (resource_is_pipe_type(pipe, OTG_MASTER)) {
+ //dc_commit_state_no_check is always a full update
+ dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, pipe, false);
+ break;
+ }
+ }
+ }
+
if (context->stream_count > get_seamless_boot_stream_count(context) ||
context->stream_count == 0)
dc->hwss.prepare_bandwidth(dc, context);
@@ -1884,8 +2158,10 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
*/
if (dc->hwss.subvp_pipe_control_lock)
dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use);
+ if (dc->hwss.fams2_global_control_lock)
+ dc->hwss.fams2_global_control_lock(dc, context, true);
- if (dc->debug.enable_double_buffered_dsc_pg_support)
+ if (dc->hwss.update_dsc_pg)
dc->hwss.update_dsc_pg(dc, context, false);
disable_dangling_plane(dc, context);
@@ -1930,10 +2206,21 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
context->streams[i]->update_flags.bits.dsc_changed = prev_dsc_changed;
}
+ determine_pipe_unlock_order(dc, context);
/* Program all planes within new context*/
+ if (dc->res_pool->funcs->prepare_mcache_programming)
+ dc->res_pool->funcs->prepare_mcache_programming(dc, context);
if (dc->hwss.program_front_end_for_ctx) {
dc->hwss.interdependent_update_lock(dc, context, true);
dc->hwss.program_front_end_for_ctx(dc, context);
+
+ if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe);
+ }
+ }
+
dc->hwss.interdependent_update_lock(dc, context, false);
dc->hwss.post_unlock_program_front_end(dc, context);
}
@@ -1942,6 +2229,8 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
dc->hwss.commit_subvp_config(dc, context);
if (dc->hwss.subvp_pipe_control_lock)
dc->hwss.subvp_pipe_control_lock(dc, context, false, true, NULL, subvp_prev_use);
+ if (dc->hwss.fams2_global_control_lock)
+ dc->hwss.fams2_global_control_lock(dc, context, false);
for (i = 0; i < context->stream_count; i++) {
const struct dc_link *link = context->streams[i]->link;
@@ -1984,15 +2273,24 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
dc_enable_stereo(dc, context, dc_streams, context->stream_count);
- if (context->stream_count > get_seamless_boot_stream_count(context) ||
+ if (get_seamless_boot_stream_count(context) == 0 ||
context->stream_count == 0) {
/* Must wait for no flips to be pending before doing optimize bw */
- wait_for_no_pipes_pending(dc, context);
+ hwss_wait_for_no_pipes_pending(dc, context);
+ /*
+ * optimized dispclk depends on ODM setup. Need to wait for ODM
+ * update pending complete before optimizing bandwidth.
+ */
+ hwss_wait_for_odm_update_pending_complete(dc, context);
/* pplib is notified if disp_num changed */
dc->hwss.optimize_bandwidth(dc, context);
+ /* Need to do otg sync again as otg could be out of sync due to otg
+ * workaround applied during clock update
+ */
+ dc_trigger_sync(dc, context);
}
- if (dc->debug.enable_double_buffered_dsc_pg_support)
+ if (dc->hwss.update_dsc_pg)
dc->hwss.update_dsc_pg(dc, context, true);
if (dc->ctx->dce_version >= DCE_VERSION_MAX)
@@ -2016,9 +2314,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
old_state = dc->current_state;
dc->current_state = context;
- dc_release_state(old_state);
+ dc_state_release(old_state);
- dc_retain_state(dc->current_state);
+ dc_state_retain(dc->current_state);
return result;
}
@@ -2030,8 +2328,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
* dc_commit_streams - Commit current stream state
*
* @dc: DC object with the commit state to be configured in the hardware
- * @streams: Array with a list of stream state
- * @stream_count: Total of streams
+ * @params: Parameters for the commit, including the streams to be committed
*
* Function responsible for commit streams change to the hardware.
*
@@ -2039,9 +2336,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
* Return DC_OK if everything work as expected, otherwise, return a dc_status
* code.
*/
-enum dc_status dc_commit_streams(struct dc *dc,
- struct dc_stream_state *streams[],
- uint8_t stream_count)
+enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params *params)
{
int i, j;
struct dc_state *context;
@@ -2050,17 +2345,32 @@ enum dc_status dc_commit_streams(struct dc *dc,
struct pipe_ctx *pipe;
bool handle_exit_odm2to1 = false;
+ if (!params)
+ return DC_ERROR_UNEXPECTED;
+
if (dc->ctx->dce_environment == DCE_ENV_VIRTUAL_HW)
return res;
- if (!streams_changed(dc, streams, stream_count))
+ if (!streams_changed(dc, params->streams, params->stream_count) &&
+ dc->current_state->power_source == params->power_source)
return res;
- DC_LOG_DC("%s: %d streams\n", __func__, stream_count);
+ dc_exit_ips_for_hw_access(dc);
- for (i = 0; i < stream_count; i++) {
- struct dc_stream_state *stream = streams[i];
+ DC_LOG_DC("%s: %d streams\n", __func__, params->stream_count);
+
+ for (i = 0; i < params->stream_count; i++) {
+ struct dc_stream_state *stream = params->streams[i];
struct dc_stream_status *status = dc_stream_get_status(stream);
+ struct dc_sink *sink = stream->sink;
+
+ /* revalidate streams */
+ if (!dc_is_virtual_signal(sink->sink_signal)) {
+ res = dc_validate_stream(dc, stream);
+ if (res != DC_OK)
+ return res;
+ }
+
dc_stream_log(dc, stream);
@@ -2077,7 +2387,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
* scenario, it uses extra pipes than needed to reduce power consumption
* We need to switch off this feature to make room for new streams.
*/
- if (stream_count > dc->current_state->stream_count &&
+ if (params->stream_count > dc->current_state->stream_count &&
dc->current_state->stream_count == 1) {
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -2089,30 +2399,53 @@ enum dc_status dc_commit_streams(struct dc *dc,
if (handle_exit_odm2to1)
res = commit_minimal_transition_state(dc, dc->current_state);
- context = dc_create_state(dc);
+ context = dc_state_create_current_copy(dc);
if (!context)
goto context_alloc_fail;
- dc_resource_state_copy_construct_current(dc, context);
+ context->power_source = params->power_source;
+
+ res = dc_validate_with_context(dc, set, params->stream_count, context, DC_VALIDATE_MODE_AND_PROGRAMMING);
+
+ /*
+ * Only update link encoder to stream assignment after bandwidth validation passed.
+ */
+ if (res == DC_OK && dc->res_pool->funcs->link_encs_assign && !dc->config.unify_link_enc_assignment)
+ dc->res_pool->funcs->link_encs_assign(
+ dc, context, context->streams, context->stream_count);
- res = dc_validate_with_context(dc, set, stream_count, context, false);
if (res != DC_OK) {
BREAK_TO_DEBUGGER();
goto fail;
}
+ /*
+ * If not already seamless, make transition seamless by inserting intermediate minimal transition
+ */
+ if (dc->hwss.is_pipe_topology_transition_seamless &&
+ !dc->hwss.is_pipe_topology_transition_seamless(dc, dc->current_state, context)) {
+ res = commit_minimal_transition_state(dc, context);
+ if (res != DC_OK) {
+ BREAK_TO_DEBUGGER();
+ goto fail;
+ }
+ }
+
res = dc_commit_state_no_check(dc, context);
- for (i = 0; i < stream_count; i++) {
+ for (i = 0; i < params->stream_count; i++) {
for (j = 0; j < context->stream_count; j++) {
- if (streams[i]->stream_id == context->streams[j]->stream_id)
- streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst;
+ if (params->streams[i]->stream_id == context->streams[j]->stream_id)
+ params->streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst;
- if (dc_is_embedded_signal(streams[i]->signal)) {
- struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]);
+ if (dc_is_embedded_signal(params->streams[i]->signal)) {
+ struct dc_stream_status *status = dc_state_get_stream_status(context, params->streams[i]);
+
+ if (!status)
+ continue;
if (dc->hwss.is_abm_supported)
- status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]);
+ status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, params->streams[i]);
else
status->is_abm_supported = true;
}
@@ -2120,7 +2453,7 @@ enum dc_status dc_commit_streams(struct dc *dc,
}
fail:
- dc_release_state(context);
+ dc_state_release(context);
context_alloc_fail:
@@ -2174,7 +2507,7 @@ static bool is_flip_pending_in_pipes(struct dc *dc, struct dc_state *context)
pipe = &context->res_ctx.pipe_ctx[i];
// Don't check flip pending on phantom pipes
- if (!pipe->plane_state || (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM))
+ if (!pipe->plane_state || (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM))
continue;
/* Must set to false to start with, due to OR in update function */
@@ -2232,98 +2565,18 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
if (context->res_ctx.pipe_ctx[i].stream == NULL ||
context->res_ctx.pipe_ctx[i].plane_state == NULL) {
context->res_ctx.pipe_ctx[i].pipe_idx = i;
- dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]);
+ dc->hwss.disable_plane(dc, context, &context->res_ctx.pipe_ctx[i]);
}
process_deferred_updates(dc);
dc->hwss.optimize_bandwidth(dc, context);
- if (dc->debug.enable_double_buffered_dsc_pg_support)
+ if (dc->hwss.update_dsc_pg)
dc->hwss.update_dsc_pg(dc, context, true);
}
dc->optimized_required = false;
- dc->wm_optimized_required = false;
-}
-
-static void init_state(struct dc *dc, struct dc_state *context)
-{
- /* Each context must have their own instance of VBA and in order to
- * initialize and obtain IP and SOC the base DML instance from DC is
- * initially copied into every context
- */
- memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
-}
-
-struct dc_state *dc_create_state(struct dc *dc)
-{
- struct dc_state *context = kvzalloc(sizeof(struct dc_state),
- GFP_KERNEL);
-
- if (!context)
- return NULL;
-
- init_state(dc, context);
-
- kref_init(&context->refcount);
-
- return context;
-}
-
-struct dc_state *dc_copy_state(struct dc_state *src_ctx)
-{
- int i, j;
- struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
-
- if (!new_ctx)
- return NULL;
- memcpy(new_ctx, src_ctx, sizeof(struct dc_state));
-
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i];
-
- if (cur_pipe->top_pipe)
- cur_pipe->top_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
- if (cur_pipe->bottom_pipe)
- cur_pipe->bottom_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
- if (cur_pipe->prev_odm_pipe)
- cur_pipe->prev_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
-
- if (cur_pipe->next_odm_pipe)
- cur_pipe->next_odm_pipe = &new_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
- }
-
- for (i = 0; i < new_ctx->stream_count; i++) {
- dc_stream_retain(new_ctx->streams[i]);
- for (j = 0; j < new_ctx->stream_status[i].plane_count; j++)
- dc_plane_state_retain(
- new_ctx->stream_status[i].plane_states[j]);
- }
-
- kref_init(&new_ctx->refcount);
-
- return new_ctx;
-}
-
-void dc_retain_state(struct dc_state *context)
-{
- kref_get(&context->refcount);
-}
-
-static void dc_state_free(struct kref *kref)
-{
- struct dc_state *context = container_of(kref, struct dc_state, refcount);
- dc_resource_state_destruct(context);
- kvfree(context);
-}
-
-void dc_release_state(struct dc_state *context)
-{
- kref_put(&context->refcount, dc_state_free);
}
bool dc_set_generic_gpio_for_stereo(bool enable,
@@ -2392,7 +2645,7 @@ static bool is_surface_in_context(
return false;
}
-static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
+static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
enum surface_update_type update_type = UPDATE_TYPE_FAST;
@@ -2464,14 +2717,14 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info,
- sizeof(union dc_tiling_info)) != 0) {
+ sizeof(struct dc_tiling_info)) != 0) {
update_flags->bits.swizzle_change = 1;
elevate_update_type(&update_type, UPDATE_TYPE_MED);
/* todo: below are HW dependent, we should add a hook to
* DCE/N resource and validated there.
*/
- if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
+ if (!dc->debug.skip_full_updated_if_possible) {
/* swizzled mode requires RQ to be setup properly,
* thus need to run DML to calculate RQ settings
*/
@@ -2485,6 +2738,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
}
static enum surface_update_type get_scaling_info_update_type(
+ const struct dc *dc,
const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
@@ -2492,29 +2746,33 @@ static enum surface_update_type get_scaling_info_update_type(
if (!u->scaling_info)
return UPDATE_TYPE_FAST;
- if (u->scaling_info->dst_rect.width != u->surface->dst_rect.width
+ if (u->scaling_info->src_rect.width != u->surface->src_rect.width
+ || u->scaling_info->src_rect.height != u->surface->src_rect.height
+ || u->scaling_info->dst_rect.width != u->surface->dst_rect.width
|| u->scaling_info->dst_rect.height != u->surface->dst_rect.height
+ || u->scaling_info->clip_rect.width != u->surface->clip_rect.width
+ || u->scaling_info->clip_rect.height != u->surface->clip_rect.height
|| u->scaling_info->scaling_quality.integer_scaling !=
- u->surface->scaling_quality.integer_scaling
- ) {
+ u->surface->scaling_quality.integer_scaling) {
update_flags->bits.scaling_change = 1;
+ if (u->scaling_info->src_rect.width > u->surface->src_rect.width
+ || u->scaling_info->src_rect.height > u->surface->src_rect.height)
+ /* Making src rect bigger requires a bandwidth change */
+ update_flags->bits.clock_change = 1;
+
if ((u->scaling_info->dst_rect.width < u->surface->dst_rect.width
|| u->scaling_info->dst_rect.height < u->surface->dst_rect.height)
&& (u->scaling_info->dst_rect.width < u->surface->src_rect.width
|| u->scaling_info->dst_rect.height < u->surface->src_rect.height))
/* Making dst rect smaller requires a bandwidth change */
update_flags->bits.bandwidth_change = 1;
- }
- if (u->scaling_info->src_rect.width != u->surface->src_rect.width
- || u->scaling_info->src_rect.height != u->surface->src_rect.height) {
-
- update_flags->bits.scaling_change = 1;
- if (u->scaling_info->src_rect.width > u->surface->src_rect.width
- || u->scaling_info->src_rect.height > u->surface->src_rect.height)
- /* Making src rect bigger requires a bandwidth change */
- update_flags->bits.clock_change = 1;
+ if (u->scaling_info->src_rect.width > dc->caps.max_optimizable_video_width &&
+ (u->scaling_info->clip_rect.width > u->surface->clip_rect.width ||
+ u->scaling_info->clip_rect.height > u->surface->clip_rect.height))
+ /* Changing clip size of a large surface may result in MPC slice count change */
+ update_flags->bits.bandwidth_change = 1;
}
if (u->scaling_info->src_rect.x != u->surface->src_rect.x
@@ -2525,6 +2783,7 @@ static enum surface_update_type get_scaling_info_update_type(
|| u->scaling_info->dst_rect.y != u->surface->dst_rect.y)
update_flags->bits.position_change = 1;
+ /* process every update flag before returning */
if (update_flags->bits.clock_change
|| update_flags->bits.bandwidth_change
|| update_flags->bits.scaling_change)
@@ -2551,10 +2810,10 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
update_flags->raw = 0; // Reset all flags
- type = get_plane_info_update_type(u);
+ type = get_plane_info_update_type(dc, u);
elevate_update_type(&overall_type, type);
- type = get_scaling_info_update_type(u);
+ type = get_scaling_info_update_type(dc, u);
elevate_update_type(&overall_type, type);
if (u->flip_addr) {
@@ -2576,12 +2835,15 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
if (u->gamut_remap_matrix)
update_flags->bits.gamut_remap_change = 1;
+ if (u->blend_tf)
+ update_flags->bits.gamma_change = 1;
+
if (u->gamma) {
enum surface_pixel_format format = SURFACE_PIXEL_FORMAT_GRPH_BEGIN;
if (u->plane_info)
format = u->plane_info->format;
- else if (u->surface)
+ else
format = u->surface->format;
if (dce_use_lut(format))
@@ -2597,12 +2859,33 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
elevate_update_type(&overall_type, UPDATE_TYPE_MED);
}
+ if (u->sdr_white_level_nits)
+ if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) {
+ update_flags->bits.sdr_white_level_nits = 1;
+ elevate_update_type(&overall_type, UPDATE_TYPE_FULL);
+ }
+
+ if (u->cm2_params) {
+ if ((u->cm2_params->component_settings.shaper_3dlut_setting
+ != u->surface->mcm_shaper_3dlut_setting)
+ || (u->cm2_params->component_settings.lut1d_enable
+ != u->surface->mcm_lut1d_enable))
+ update_flags->bits.mcm_transfer_function_enable_change = 1;
+ if (u->cm2_params->cm2_luts.lut3d_data.lut3d_src
+ != u->surface->mcm_luts.lut3d_data.lut3d_src)
+ update_flags->bits.mcm_transfer_function_enable_change = 1;
+ }
if (update_flags->bits.in_transfer_func_change) {
type = UPDATE_TYPE_MED;
elevate_update_type(&overall_type, type);
}
- if (update_flags->bits.lut_3d) {
+ if (update_flags->bits.lut_3d &&
+ u->surface->mcm_luts.lut3d_data.lut3d_src != DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) {
+ type = UPDATE_TYPE_FULL;
+ elevate_update_type(&overall_type, type);
+ }
+ if (update_flags->bits.mcm_transfer_function_enable_change) {
type = UPDATE_TYPE_FULL;
elevate_update_type(&overall_type, type);
}
@@ -2618,6 +2901,29 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
return overall_type;
}
+/* May need to flip the desktop plane in cases where MPO plane receives a flip but desktop plane doesn't
+ * while both planes are flip_immediate
+ */
+static void force_immediate_gsl_plane_flip(struct dc *dc, struct dc_surface_update *updates, int surface_count)
+{
+ bool has_flip_immediate_plane = false;
+ int i;
+
+ for (i = 0; i < surface_count; i++) {
+ if (updates[i].surface->flip_immediate) {
+ has_flip_immediate_plane = true;
+ break;
+ }
+ }
+
+ if (has_flip_immediate_plane && surface_count > 1) {
+ for (i = 0; i < surface_count; i++) {
+ if (updates[i].surface->flip_immediate)
+ updates[i].surface->update_flags.bits.addr_update = 1;
+ }
+ }
+}
+
static enum surface_update_type check_update_surfaces_for_stream(
struct dc *dc,
struct dc_surface_update *updates,
@@ -2628,7 +2934,7 @@ static enum surface_update_type check_update_surfaces_for_stream(
int i;
enum surface_update_type overall_type = UPDATE_TYPE_FAST;
- if (dc->idle_optimizations_allowed)
+ if (dc->idle_optimizations_allowed || dc_can_clear_cursor_limit(dc))
overall_type = UPDATE_TYPE_FULL;
if (stream_status == NULL || stream_status->plane_count != surface_count)
@@ -2638,6 +2944,10 @@ static enum surface_update_type check_update_surfaces_for_stream(
overall_type = UPDATE_TYPE_FULL;
}
+ if (stream_update && stream_update->hw_cursor_req) {
+ overall_type = UPDATE_TYPE_FULL;
+ }
+
/* some stream updates require passive update */
if (stream_update) {
union stream_update_flags *su_flags = &stream_update->stream->update_flags;
@@ -2668,15 +2978,24 @@ static enum surface_update_type check_update_surfaces_for_stream(
if (stream_update->mst_bw_update)
su_flags->bits.mst_bw = 1;
- if (stream_update->stream && stream_update->stream->freesync_on_desktop &&
+ if (stream_update->stream->freesync_on_desktop &&
(stream_update->vrr_infopacket || stream_update->allow_freesync ||
stream_update->vrr_active_variable || stream_update->vrr_active_fixed))
su_flags->bits.fams_changed = 1;
+ if (stream_update->scaler_sharpener_update)
+ su_flags->bits.scaler_sharpener = 1;
+
+ if (stream_update->sharpening_required)
+ su_flags->bits.sharpening_required = 1;
+
+ if (stream_update->output_color_space)
+ su_flags->bits.out_csc = 1;
+
if (su_flags->raw != 0)
overall_type = UPDATE_TYPE_FULL;
- if (stream_update->output_csc_transform || stream_update->output_color_space)
+ if (stream_update->output_csc_transform)
su_flags->bits.out_csc = 1;
/* Output transfer function changes do not require bandwidth recalculation,
@@ -2736,8 +3055,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
} else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
dc->optimized_required = true;
}
-
- dc->optimized_required |= dc->wm_optimized_required;
}
return type;
@@ -2820,55 +3137,66 @@ static void copy_surface_update_to_plane(
srf_update->plane_info->layer_index;
}
- if (srf_update->gamma &&
- (surface->gamma_correction !=
- srf_update->gamma)) {
- memcpy(&surface->gamma_correction->entries,
+ if (srf_update->gamma) {
+ memcpy(&surface->gamma_correction.entries,
&srf_update->gamma->entries,
sizeof(struct dc_gamma_entries));
- surface->gamma_correction->is_identity =
+ surface->gamma_correction.is_identity =
srf_update->gamma->is_identity;
- surface->gamma_correction->num_entries =
+ surface->gamma_correction.num_entries =
srf_update->gamma->num_entries;
- surface->gamma_correction->type =
+ surface->gamma_correction.type =
srf_update->gamma->type;
}
- if (srf_update->in_transfer_func &&
- (surface->in_transfer_func !=
- srf_update->in_transfer_func)) {
- surface->in_transfer_func->sdr_ref_white_level =
+ if (srf_update->in_transfer_func) {
+ surface->in_transfer_func.sdr_ref_white_level =
srf_update->in_transfer_func->sdr_ref_white_level;
- surface->in_transfer_func->tf =
+ surface->in_transfer_func.tf =
srf_update->in_transfer_func->tf;
- surface->in_transfer_func->type =
+ surface->in_transfer_func.type =
srf_update->in_transfer_func->type;
- memcpy(&surface->in_transfer_func->tf_pts,
+ memcpy(&surface->in_transfer_func.tf_pts,
&srf_update->in_transfer_func->tf_pts,
sizeof(struct dc_transfer_func_distributed_points));
}
- if (srf_update->func_shaper &&
- (surface->in_shaper_func !=
- srf_update->func_shaper))
- memcpy(surface->in_shaper_func, srf_update->func_shaper,
- sizeof(*surface->in_shaper_func));
+ if (srf_update->cm2_params) {
+ surface->mcm_shaper_3dlut_setting = srf_update->cm2_params->component_settings.shaper_3dlut_setting;
+ surface->mcm_lut1d_enable = srf_update->cm2_params->component_settings.lut1d_enable;
+ surface->mcm_luts = srf_update->cm2_params->cm2_luts;
+ }
+
+ if (srf_update->func_shaper) {
+ memcpy(&surface->in_shaper_func, srf_update->func_shaper,
+ sizeof(surface->in_shaper_func));
- if (srf_update->lut3d_func &&
- (surface->lut3d_func !=
- srf_update->lut3d_func))
- memcpy(surface->lut3d_func, srf_update->lut3d_func,
- sizeof(*surface->lut3d_func));
+ if (surface->mcm_shaper_3dlut_setting >= DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER)
+ surface->mcm_luts.shaper = &surface->in_shaper_func;
+ }
+
+ if (srf_update->lut3d_func)
+ memcpy(&surface->lut3d_func, srf_update->lut3d_func,
+ sizeof(surface->lut3d_func));
if (srf_update->hdr_mult.value)
surface->hdr_mult =
srf_update->hdr_mult;
- if (srf_update->blend_tf &&
- (surface->blend_tf !=
- srf_update->blend_tf))
- memcpy(surface->blend_tf, srf_update->blend_tf,
- sizeof(*surface->blend_tf));
+ if (srf_update->sdr_white_level_nits)
+ surface->sdr_white_level_nits =
+ srf_update->sdr_white_level_nits;
+
+ if (srf_update->blend_tf) {
+ memcpy(&surface->blend_tf, srf_update->blend_tf,
+ sizeof(surface->blend_tf));
+
+ if (surface->mcm_lut1d_enable)
+ surface->mcm_luts.lut1d_func = &surface->blend_tf;
+ }
+
+ if (srf_update->cm2_params || srf_update->blend_tf)
+ surface->lut_bank_a = !surface->lut_bank_a;
if (srf_update->input_csc_color_matrix)
surface->input_csc_color_matrix =
@@ -2881,6 +3209,14 @@ static void copy_surface_update_to_plane(
if (srf_update->gamut_remap_matrix)
surface->gamut_remap_matrix =
*srf_update->gamut_remap_matrix;
+
+ if (srf_update->cursor_csc_color_matrix)
+ surface->cursor_csc_color_matrix =
+ *srf_update->cursor_csc_color_matrix;
+
+ if (srf_update->bias_and_scale.bias_and_scale_valid)
+ surface->bias_and_scale =
+ srf_update->bias_and_scale;
}
static void copy_stream_update_to_stream(struct dc *dc,
@@ -2899,14 +3235,13 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->dst.height && update->dst.width)
stream->dst = update->dst;
- if (update->out_transfer_func &&
- stream->out_transfer_func != update->out_transfer_func) {
- stream->out_transfer_func->sdr_ref_white_level =
+ if (update->out_transfer_func) {
+ stream->out_transfer_func.sdr_ref_white_level =
update->out_transfer_func->sdr_ref_white_level;
- stream->out_transfer_func->tf = update->out_transfer_func->tf;
- stream->out_transfer_func->type =
+ stream->out_transfer_func.tf = update->out_transfer_func->tf;
+ stream->out_transfer_func.type =
update->out_transfer_func->type;
- memcpy(&stream->out_transfer_func->tf_pts,
+ memcpy(&stream->out_transfer_func.tf_pts,
&update->out_transfer_func->tf_pts,
sizeof(struct dc_transfer_func_distributed_points));
}
@@ -2936,6 +3271,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->vrr_infopacket)
stream->vrr_infopacket = *update->vrr_infopacket;
+ if (update->hw_cursor_req)
+ stream->hw_cursor_req = *update->hw_cursor_req;
+
if (update->allow_freesync)
stream->allow_freesync = *update->allow_freesync;
@@ -2945,8 +3283,14 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->vrr_active_fixed)
stream->vrr_active_fixed = *update->vrr_active_fixed;
- if (update->crtc_timing_adjust)
+ if (update->crtc_timing_adjust) {
+ if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min ||
+ stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max ||
+ stream->adjust.timing_adjust_pending)
+ update->crtc_timing_adjust->timing_adjust_pending = true;
stream->adjust = *update->crtc_timing_adjust;
+ update->crtc_timing_adjust->timing_adjust_pending = false;
+ }
if (update->dpms_off)
stream->dpms_off = *update->dpms_off;
@@ -2966,6 +3310,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->adaptive_sync_infopacket)
stream->adaptive_sync_infopacket = *update->adaptive_sync_infopacket;
+ if (update->avi_infopacket)
+ stream->avi_infopacket = *update->avi_infopacket;
+
if (update->dither_option)
stream->dither_option = *update->dither_option;
@@ -2988,27 +3335,119 @@ static void copy_stream_update_to_stream(struct dc *dc,
update->dsc_config->num_slices_v != 0);
/* Use temporarry context for validating new DSC config */
- struct dc_state *dsc_validate_context = dc_create_state(dc);
+ struct dc_state *dsc_validate_context = dc_state_create_copy(dc->current_state);
if (dsc_validate_context) {
- dc_resource_state_copy_construct(dc->current_state, dsc_validate_context);
-
stream->timing.dsc_cfg = *update->dsc_config;
stream->timing.flags.DSC = enable_dsc;
- if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) {
+ if (dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context,
+ DC_VALIDATE_MODE_ONLY) != DC_OK) {
stream->timing.dsc_cfg = old_dsc_cfg;
stream->timing.flags.DSC = old_dsc_enabled;
update->dsc_config = NULL;
}
- dc_release_state(dsc_validate_context);
+ dc_state_release(dsc_validate_context);
} else {
DC_ERROR("Failed to allocate new validate context for DSC change\n");
update->dsc_config = NULL;
}
}
+ if (update->scaler_sharpener_update)
+ stream->scaler_sharpener_update = *update->scaler_sharpener_update;
+ if (update->sharpening_required)
+ stream->sharpening_required = *update->sharpening_required;
+}
+
+static void backup_planes_and_stream_state(
+ struct dc_scratch_space *scratch,
+ struct dc_stream_state *stream)
+{
+ int i;
+ struct dc_stream_status *status = dc_stream_get_status(stream);
+
+ if (!status)
+ return;
+
+ for (i = 0; i < status->plane_count; i++) {
+ dc_plane_copy_config(&scratch->plane_states[i], status->plane_states[i]);
+ }
+ scratch->stream_state = *stream;
+}
+
+static void restore_planes_and_stream_state(
+ struct dc_scratch_space *scratch,
+ struct dc_stream_state *stream)
+{
+ int i;
+ struct dc_stream_status *status = dc_stream_get_status(stream);
+
+ if (!status)
+ return;
+
+ for (i = 0; i < status->plane_count; i++) {
+ dc_plane_copy_config(status->plane_states[i], &scratch->plane_states[i]);
+ }
+ *stream = scratch->stream_state;
+}
+
+/**
+ * update_seamless_boot_flags() - Helper function for updating seamless boot flags
+ *
+ * @dc: Current DC state
+ * @context: New DC state to be programmed
+ * @surface_count: Number of surfaces that have an updated
+ * @stream: Corresponding stream to be updated in the current flip
+ *
+ * Updating seamless boot flags do not need to be part of the commit sequence. This
+ * helper function will update the seamless boot flags on each flip (if required)
+ * outside of the HW commit sequence (fast or slow).
+ *
+ * Return: void
+ */
+static void update_seamless_boot_flags(struct dc *dc,
+ struct dc_state *context,
+ int surface_count,
+ struct dc_stream_state *stream)
+{
+ if (get_seamless_boot_stream_count(context) > 0 && (surface_count > 0 || stream->dpms_off)) {
+ /* Optimize seamless boot flag keeps clocks and watermarks high until
+ * first flip. After first flip, optimization is required to lower
+ * bandwidth. Important to note that it is expected UEFI will
+ * only light up a single display on POST, therefore we only expect
+ * one stream with seamless boot flag set.
+ */
+ if (stream->apply_seamless_boot_optimization) {
+ stream->apply_seamless_boot_optimization = false;
+
+ if (get_seamless_boot_stream_count(context) == 0)
+ dc->optimized_required = true;
+ }
+ }
}
+/**
+ * update_planes_and_stream_state() - The function takes planes and stream
+ * updates as inputs and determines the appropriate update type. If update type
+ * is FULL, the function allocates a new context, populates and validates it.
+ * Otherwise, it updates current dc context. The function will return both
+ * new_context and new_update_type back to the caller. The function also backs
+ * up both current and new contexts into corresponding dc state scratch memory.
+ * TODO: The function does too many things, and even conditionally allocates dc
+ * context memory implicitly. We should consider to break it down.
+ *
+ * @dc: Current DC state
+ * @srf_updates: an array of surface updates
+ * @surface_count: surface update count
+ * @stream: Corresponding stream to be updated
+ * @stream_update: stream update
+ * @new_update_type: [out] determined update type by the function
+ * @new_context: [out] new context allocated and validated if update type is
+ * FULL, reference to current context if update type is less than FULL.
+ *
+ * Return: true if a valid update is populated into new_context, false
+ * otherwise.
+ */
static bool update_planes_and_stream_state(struct dc *dc,
struct dc_surface_update *srf_updates, int surface_count,
struct dc_stream_state *stream,
@@ -3032,9 +3471,15 @@ static bool update_planes_and_stream_state(struct dc *dc,
}
context = dc->current_state;
-
update_type = dc_check_update_surfaces_for_stream(
dc, srf_updates, surface_count, stream_update, stream_status);
+ /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream.
+ * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip
+ * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come.
+ */
+ force_immediate_gsl_plane_flip(dc, srf_updates, surface_count);
+ if (update_type == UPDATE_TYPE_FULL)
+ backup_planes_and_stream_state(&dc->scratch.current_state, stream);
/* update current stream with the new updates */
copy_stream_update_to_stream(dc, context, stream, stream_update);
@@ -3062,6 +3507,9 @@ static bool update_planes_and_stream_state(struct dc *dc,
if (update_type >= update_surface_trace_level)
update_surface_trace(dc, srf_updates, surface_count);
+ for (i = 0; i < surface_count; i++)
+ copy_surface_update_to_plane(srf_updates[i].surface, &srf_updates[i]);
+
if (update_type >= UPDATE_TYPE_FULL) {
struct dc_plane_state *new_planes[MAX_SURFACES] = {0};
@@ -3069,30 +3517,27 @@ static bool update_planes_and_stream_state(struct dc *dc,
new_planes[i] = srf_updates[i].surface;
/* initialize scratch memory for building context */
- context = dc_create_state(dc);
+ context = dc_state_create_copy(dc->current_state);
if (context == NULL) {
DC_ERROR("Failed to allocate new validate context!\n");
return false;
}
- dc_resource_state_copy_construct(
- dc->current_state, context);
-
/* For each full update, remove all existing phantom pipes first.
* Ensures that we have enough pipes for newly added MPO planes
*/
- if (dc->res_pool->funcs->remove_phantom_pipes)
- dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+ dc_state_remove_phantom_streams_and_planes(dc, context);
+ dc_state_release_phantom_streams_and_planes(dc, context);
/*remove old surfaces from context */
- if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
+ if (!dc_state_rem_all_planes_for_stream(dc, stream, context)) {
BREAK_TO_DEBUGGER();
goto fail;
}
/* add surface to context */
- if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
+ if (!dc_state_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
BREAK_TO_DEBUGGER();
goto fail;
@@ -3103,9 +3548,9 @@ static bool update_planes_and_stream_state(struct dc *dc,
for (i = 0; i < surface_count; i++) {
struct dc_plane_state *surface = srf_updates[i].surface;
- copy_surface_update_to_plane(surface, &srf_updates[i]);
-
- if (update_type >= UPDATE_TYPE_MED) {
+ if (update_type != UPDATE_TYPE_MED)
+ continue;
+ if (surface->update_flags.bits.position_change) {
for (j = 0; j < dc->res_pool->pipe_count; j++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
@@ -3118,32 +3563,22 @@ static bool update_planes_and_stream_state(struct dc *dc,
}
if (update_type == UPDATE_TYPE_FULL) {
- if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
- /* For phantom pipes we remove and create a new set of phantom pipes
- * for each full update (because we don't know if we'll need phantom
- * pipes until after the first round of validation). However, if validation
- * fails we need to keep the existing phantom pipes (because we don't update
- * the dc->current_state).
- *
- * The phantom stream/plane refcount is decremented for validation because
- * we assume it'll be removed (the free comes when the dc_state is freed),
- * but if validation fails we have to increment back the refcount so it's
- * consistent.
- */
- if (dc->res_pool->funcs->retain_phantom_pipes)
- dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state);
+ if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK) {
BREAK_TO_DEBUGGER();
goto fail;
}
}
+ update_seamless_boot_flags(dc, context, surface_count, stream);
*new_context = context;
*new_update_type = update_type;
+ if (update_type == UPDATE_TYPE_FULL)
+ backup_planes_and_stream_state(&dc->scratch.new_state, stream);
return true;
fail:
- dc_release_state(context);
+ dc_state_release(context);
return false;
@@ -3172,7 +3607,8 @@ static void commit_planes_do_stream_update(struct dc *dc,
stream_update->vsp_infopacket ||
stream_update->hfvsif_infopacket ||
stream_update->adaptive_sync_infopacket ||
- stream_update->vtem_infopacket) {
+ stream_update->vtem_infopacket ||
+ stream_update->avi_infopacket) {
resource_build_info_frame(pipe_ctx);
dc->hwss.update_info_frame(pipe_ctx);
@@ -3209,6 +3645,11 @@ static void commit_planes_do_stream_update(struct dc *dc,
}
}
+ if (stream_update->cursor_attributes)
+ program_cursor_attributes(dc, stream);
+
+ if (stream_update->cursor_position)
+ program_cursor_position(dc, stream);
/* Full fe update*/
if (update_type == UPDATE_TYPE_FAST)
@@ -3221,18 +3662,32 @@ static void commit_planes_do_stream_update(struct dc *dc,
if (stream_update->mst_bw_update->is_increase)
dc->link_srv->increase_mst_payload(pipe_ctx,
stream_update->mst_bw_update->mst_stream_bw);
- else
+ else
dc->link_srv->reduce_mst_payload(pipe_ctx,
stream_update->mst_bw_update->mst_stream_bw);
- }
+ }
if (stream_update->pending_test_pattern) {
- dc_link_dp_set_test_pattern(stream->link,
+ /*
+ * test pattern params depends on ODM topology
+ * changes that we could be applying to front
+ * end. Since at the current stage front end
+ * changes are not yet applied. We can only
+ * apply test pattern in hw based on current
+ * state and populate the final test pattern
+ * params in new state. If current and new test
+ * pattern params are different as result of
+ * different ODM topology being used, it will be
+ * detected and handle during front end
+ * programming update.
+ */
+ dc->link_srv->dp_set_test_pattern(stream->link,
stream->test_pattern.type,
stream->test_pattern.color_space,
stream->test_pattern.p_link_settings,
stream->test_pattern.p_custom_pattern,
stream->test_pattern.cust_pattern_size);
+ resource_build_test_pattern_params(&context->res_ctx, pipe_ctx);
}
if (stream_update->dpms_off) {
@@ -3289,6 +3744,9 @@ static bool dc_dmub_should_send_dirty_rect_cmd(struct dc *dc, struct dc_stream_s
if (stream->link->replay_settings.config.replay_supported)
return true;
+ if (stream->ctx->dce_version >= DCN_VERSION_3_5 && stream->abm_level)
+ return true;
+
return false;
}
@@ -3326,6 +3784,7 @@ void dc_dmub_update_dirty_rect(struct dc *dc,
if (srf_updates[i].surface->flip_immediate)
continue;
+ update_dirty_rect->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
update_dirty_rect->dirty_rect_count = flip_addr->dirty_rect_count;
memcpy(update_dirty_rect->src_dirty_rects, flip_addr->dirty_rects,
sizeof(flip_addr->dirty_rects));
@@ -3339,7 +3798,7 @@ void dc_dmub_update_dirty_rect(struct dc *dc,
update_dirty_rect->panel_inst = panel_inst;
update_dirty_rect->pipe_idx = j;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
}
}
}
@@ -3400,6 +3859,15 @@ static void build_dmub_update_dirty_rect(
}
}
+static bool check_address_only_update(union surface_update_flags update_flags)
+{
+ union surface_update_flags addr_only_update_flags;
+ addr_only_update_flags.raw = 0;
+ addr_only_update_flags.bits.addr_update = 1;
+
+ return update_flags.bits.addr_update &&
+ !(update_flags.raw & ~addr_only_update_flags.raw);
+}
/**
* build_dmub_cmd_list() - Build an array of DMCUB commands to be sent to DMCUB
@@ -3431,6 +3899,45 @@ static void build_dmub_cmd_list(struct dc *dc,
build_dmub_update_dirty_rect(dc, surface_count, stream, srf_updates, context, dc_dmub_cmd, dmub_cmd_count);
}
+static void commit_plane_for_stream_offload_fams2_flip(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_state *context)
+{
+ int i, j;
+
+ /* update dirty rect for PSR */
+ dc_dmub_update_dirty_rect(dc, surface_count, stream,
+ srf_updates, context);
+
+ /* Perform requested Updates */
+ for (i = 0; i < surface_count; i++) {
+ struct dc_plane_state *plane_state = srf_updates[i].surface;
+
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (!should_update_pipe_for_stream(context, pipe_ctx, stream))
+ continue;
+
+ if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
+ continue;
+
+ /* update pipe context for plane */
+ if (pipe_ctx->plane_state->update_flags.bits.addr_update)
+ dc->hwss.update_plane_addr(dc, pipe_ctx);
+ }
+ }
+
+ /* Send commands to DMCUB */
+ dc_dmub_srv_fams2_passthrough_flip(dc,
+ context,
+ stream,
+ srf_updates,
+ surface_count);
+}
+
static void commit_planes_for_stream_fast(struct dc *dc,
struct dc_surface_update *srf_updates,
int surface_count,
@@ -3441,18 +3948,53 @@ static void commit_planes_for_stream_fast(struct dc *dc,
{
int i, j;
struct pipe_ctx *top_pipe_to_program = NULL;
+ struct dc_stream_status *stream_status = NULL;
+ bool should_offload_fams2_flip = false;
+ bool should_lock_all_pipes = (update_type != UPDATE_TYPE_FAST);
+
+ if (should_lock_all_pipes)
+ determine_pipe_unlock_order(dc, context);
+
+ if (dc->debug.fams2_config.bits.enable &&
+ dc->debug.fams2_config.bits.enable_offload_flip &&
+ dc_state_is_fams2_in_use(dc, context)) {
+ /* if not offloading to HWFQ, offload to FAMS2 if needed */
+ should_offload_fams2_flip = true;
+ for (i = 0; i < surface_count; i++) {
+ if (srf_updates[i].surface &&
+ srf_updates[i].surface->update_flags.raw &&
+ !check_address_only_update(srf_updates[i].surface->update_flags)) {
+ /* more than address update, need to acquire FAMS2 lock */
+ should_offload_fams2_flip = false;
+ break;
+ }
+ }
+ if (stream_update) {
+ /* more than address update, need to acquire FAMS2 lock */
+ should_offload_fams2_flip = false;
+ }
+ }
+
+ dc_exit_ips_for_hw_access(dc);
+
dc_z10_restore(dc);
top_pipe_to_program = resource_get_otg_master_for_stream(
&context->res_ctx,
stream);
- if (dc->debug.visual_confirm) {
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ if (!top_pipe_to_program)
+ return;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && pipe->plane_state) {
+ if (!dc->debug.using_dml2)
+ set_p_state_switch_method(dc, context, pipe);
- if (pipe->stream && pipe->plane_state)
- dc_update_viusal_confirm_color(dc, context, pipe);
+ if (dc->debug.visual_confirm)
+ dc_update_visual_confirm_color(dc, context, pipe);
}
}
@@ -3464,34 +4006,48 @@ static void commit_planes_for_stream_fast(struct dc *dc,
if (!pipe_ctx->plane_state)
continue;
- if (should_update_pipe_for_plane(context, pipe_ctx, plane_state))
+ if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
continue;
+
pipe_ctx->plane_state->triplebuffer_flips = false;
if (update_type == UPDATE_TYPE_FAST &&
- dc->hwss.program_triplebuffer &&
- !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) {
- /*triple buffer for VUpdate only*/
+ dc->hwss.program_triplebuffer != NULL &&
+ !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) {
+ /*triple buffer for VUpdate only*/
pipe_ctx->plane_state->triplebuffer_flips = true;
}
}
}
- build_dmub_cmd_list(dc,
- srf_updates,
- surface_count,
- stream,
- context,
- context->dc_dmub_cmd,
- &(context->dmub_cmd_count));
- hwss_build_fast_sequence(dc,
- context->dc_dmub_cmd,
- context->dmub_cmd_count,
- context->block_sequence,
- &(context->block_sequence_steps),
- top_pipe_to_program);
- hwss_execute_sequence(dc,
- context->block_sequence,
- context->block_sequence_steps);
+ stream_status = dc_state_get_stream_status(context, stream);
+
+ if (should_offload_fams2_flip) {
+ commit_plane_for_stream_offload_fams2_flip(dc,
+ srf_updates,
+ surface_count,
+ stream,
+ context);
+ } else if (stream_status) {
+ build_dmub_cmd_list(dc,
+ srf_updates,
+ surface_count,
+ stream,
+ context,
+ context->dc_dmub_cmd,
+ &(context->dmub_cmd_count));
+ hwss_build_fast_sequence(dc,
+ context->dc_dmub_cmd,
+ context->dmub_cmd_count,
+ context->block_sequence,
+ &(context->block_sequence_steps),
+ top_pipe_to_program,
+ stream_status,
+ context);
+ hwss_execute_sequence(dc,
+ context->block_sequence,
+ context->block_sequence_steps);
+ }
+
/* Clear update flags so next flip doesn't have redundant programming
* (if there's no stream update, the update flags are not cleared).
* Surface updates are cleared unconditionally at the beginning of each flip,
@@ -3501,45 +4057,6 @@ static void commit_planes_for_stream_fast(struct dc *dc,
top_pipe_to_program->stream->update_flags.raw = 0;
}
-static void wait_for_outstanding_hw_updates(struct dc *dc, const struct dc_state *dc_context)
-{
-/*
- * This function calls HWSS to wait for any potentially double buffered
- * operations to complete. It should be invoked as a pre-amble prior
- * to full update programming before asserting any HW locks.
- */
- int pipe_idx;
- int opp_inst;
- int opp_count = dc->res_pool->pipe_count;
- struct hubp *hubp;
- int mpcc_inst;
- const struct pipe_ctx *pipe_ctx;
-
- for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
- pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx];
-
- if (!pipe_ctx->stream)
- continue;
-
- if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
- pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
-
- hubp = pipe_ctx->plane_res.hubp;
- if (!hubp)
- continue;
-
- mpcc_inst = hubp->inst;
- // MPCC inst is equal to pipe index in practice
- for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
- if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
- dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
- dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
- break;
- }
- }
- }
-}
-
static void commit_planes_for_stream(struct dc *dc,
struct dc_surface_update *srf_updates,
int surface_count,
@@ -3553,14 +4070,34 @@ static void commit_planes_for_stream(struct dc *dc,
bool should_lock_all_pipes = (update_type != UPDATE_TYPE_FAST);
bool subvp_prev_use = false;
bool subvp_curr_use = false;
+ uint8_t current_stream_mask = 0;
+ if (should_lock_all_pipes)
+ determine_pipe_unlock_order(dc, context);
// Once we apply the new subvp context to hardware it won't be in the
// dc->current_state anymore, so we have to cache it before we apply
// the new SubVP context
subvp_prev_use = false;
+ dc_exit_ips_for_hw_access(dc);
+
dc_z10_restore(dc);
- if (update_type == UPDATE_TYPE_FULL)
- wait_for_outstanding_hw_updates(dc, context);
+ if (update_type == UPDATE_TYPE_FULL && dc->optimized_required)
+ hwss_process_outstanding_hw_updates(dc, dc->current_state);
+
+ if (update_type != UPDATE_TYPE_FAST && dc->res_pool->funcs->prepare_mcache_programming)
+ dc->res_pool->funcs->prepare_mcache_programming(dc, context);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && pipe->plane_state) {
+ if (!dc->debug.using_dml2)
+ set_p_state_switch_method(dc, context, pipe);
+
+ if (dc->debug.visual_confirm)
+ dc_update_visual_confirm_color(dc, context, pipe);
+ }
+ }
if (update_type == UPDATE_TYPE_FULL) {
dc_allow_idle_optimizations(dc, false);
@@ -3568,21 +4105,25 @@ static void commit_planes_for_stream(struct dc *dc,
if (get_seamless_boot_stream_count(context) == 0)
dc->hwss.prepare_bandwidth(dc, context);
- if (dc->debug.enable_double_buffered_dsc_pg_support)
+ if (dc->hwss.update_dsc_pg)
dc->hwss.update_dsc_pg(dc, context, false);
context_clock_trace(dc, context);
}
+ if (update_type == UPDATE_TYPE_FULL)
+ hwss_wait_for_outstanding_hw_updates(dc, dc->current_state);
+
top_pipe_to_program = resource_get_otg_master_for_stream(
&context->res_ctx,
stream);
+ ASSERT(top_pipe_to_program != NULL);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
// Check old context for SubVP
- subvp_prev_use |= (old_pipe->stream && old_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM);
+ subvp_prev_use |= (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) == SUBVP_PHANTOM);
if (subvp_prev_use)
break;
}
@@ -3590,20 +4131,12 @@ static void commit_planes_for_stream(struct dc *dc,
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
subvp_curr_use = true;
break;
}
}
- if (dc->debug.visual_confirm)
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->stream && pipe->plane_state)
- dc_update_viusal_confirm_color(dc, context, pipe);
- }
-
if (stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE) {
struct pipe_ctx *mpcc_pipe;
struct pipe_ctx *odm_pipe;
@@ -3632,14 +4165,28 @@ static void commit_planes_for_stream(struct dc *dc,
top_pipe_to_program->stream_res.tg);
}
+ if (dc->hwss.wait_for_dcc_meta_propagation) {
+ dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program);
+ }
+
+ if (dc->hwseq->funcs.wait_for_pipe_update_if_needed)
+ dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type < UPDATE_TYPE_FULL);
+
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
if (dc->hwss.subvp_pipe_control_lock)
- dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use);
- dc->hwss.interdependent_update_lock(dc, context, true);
+ dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use);
+ if (dc->hwss.fams2_global_control_lock)
+ dc->hwss.fams2_global_control_lock(dc, context, true);
+
+ dc->hwss.interdependent_update_lock(dc, context, true);
} else {
if (dc->hwss.subvp_pipe_control_lock)
dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
+
+ if (dc->hwss.fams2_global_control_lock)
+ dc->hwss.fams2_global_control_lock(dc, context, true);
+
/* Lock the top pipe while updating plane addrs, since freesync requires
* plane addr update event triggers to be synchronized.
* top_pipe_to_program is expected to never be NULL
@@ -3680,6 +4227,10 @@ static void commit_planes_for_stream(struct dc *dc,
if (dc->hwss.subvp_pipe_control_lock)
dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes,
NULL, subvp_prev_use);
+
+ if (dc->hwss.fams2_global_control_lock)
+ dc->hwss.fams2_global_control_lock(dc, context, false);
+
return;
}
@@ -3702,24 +4253,26 @@ static void commit_planes_for_stream(struct dc *dc,
for (i = 0; i < surface_count; i++) {
struct dc_plane_state *plane_state = srf_updates[i].surface;
+
/*set logical flag for lock/unlock use*/
for (j = 0; j < dc->res_pool->pipe_count; j++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
if (!pipe_ctx->plane_state)
continue;
- if (should_update_pipe_for_plane(context, pipe_ctx, plane_state))
+ if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
continue;
pipe_ctx->plane_state->triplebuffer_flips = false;
if (update_type == UPDATE_TYPE_FAST &&
- dc->hwss.program_triplebuffer != NULL &&
- !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) {
- /*triple buffer for VUpdate only*/
- pipe_ctx->plane_state->triplebuffer_flips = true;
+ dc->hwss.program_triplebuffer != NULL &&
+ !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) {
+ /*triple buffer for VUpdate only*/
+ pipe_ctx->plane_state->triplebuffer_flips = true;
}
}
if (update_type == UPDATE_TYPE_FULL) {
/* force vsync flip when reconfiguring pipes to prevent underflow */
plane_state->flip_immediate = false;
+ plane_state->triplebuffer_flips = false;
}
}
@@ -3739,23 +4292,45 @@ static void commit_planes_for_stream(struct dc *dc,
if (update_type == UPDATE_TYPE_FAST)
continue;
- ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
-
- if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
- /*turn off triple buffer for full update*/
- dc->hwss.program_triplebuffer(
- dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
- }
stream_status =
stream_get_status(context, pipe_ctx->stream);
- if (dc->hwss.apply_ctx_for_surface)
+ if (dc->hwss.apply_ctx_for_surface && stream_status)
dc->hwss.apply_ctx_for_surface(
dc, pipe_ctx->stream, stream_status->plane_count, context);
}
}
+
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ if (!pipe_ctx->plane_state)
+ continue;
+
+ /* Full fe update*/
+ if (update_type == UPDATE_TYPE_FAST)
+ continue;
+
+ ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
+ if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
+ /*turn off triple buffer for full update*/
+ dc->hwss.program_triplebuffer(
+ dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
+ }
+ }
+
if (dc->hwss.program_front_end_for_ctx && update_type != UPDATE_TYPE_FAST) {
dc->hwss.program_front_end_for_ctx(dc, context);
+
+ //Pipe busy until some frame and line #
+ if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe && update_type == UPDATE_TYPE_FULL) {
+ for (j = 0; j < dc->res_pool->pipe_count; j++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+ dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe_ctx);
+ }
+ }
+
if (dc->debug.validate_dml_output) {
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *cur_pipe = &context->res_ctx.pipe_ctx[i];
@@ -3805,9 +4380,17 @@ static void commit_planes_for_stream(struct dc *dc,
if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state))
continue;
+ if (srf_updates[i].cm2_params &&
+ srf_updates[i].cm2_params->cm2_luts.lut3d_data.lut3d_src ==
+ DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM &&
+ srf_updates[i].cm2_params->component_settings.shaper_3dlut_setting ==
+ DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT &&
+ dc->hwss.trigger_3dlut_dma_load)
+ dc->hwss.trigger_3dlut_dma_load(dc, pipe_ctx);
+
/*program triple buffer after lock based on flip type*/
if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
- /*only enable triplebuffer for fast_update*/
+ /*only enable triplebuffer for fast_update*/
dc->hwss.program_triplebuffer(
dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
}
@@ -3824,7 +4407,8 @@ static void commit_planes_for_stream(struct dc *dc,
}
if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed)
- if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
+ if (top_pipe_to_program &&
+ top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) {
top_pipe_to_program->stream_res.tg->funcs->wait_for_state(
top_pipe_to_program->stream_res.tg,
CRTC_STATE_VACTIVE);
@@ -3869,7 +4453,9 @@ static void commit_planes_for_stream(struct dc *dc,
* programming has completed (we turn on phantom OTG in order
* to complete the plane disable for phantom pipes).
*/
- dc->hwss.apply_ctx_to_hw(dc, context);
+
+ if (dc->hwss.disable_phantom_streams)
+ dc->hwss.disable_phantom_streams(dc, context);
}
if (update_type != UPDATE_TYPE_FAST)
@@ -3881,9 +4467,13 @@ static void commit_planes_for_stream(struct dc *dc,
if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
if (dc->hwss.subvp_pipe_control_lock)
dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use);
+ if (dc->hwss.fams2_global_control_lock)
+ dc->hwss.fams2_global_control_lock(dc, context, false);
} else {
if (dc->hwss.subvp_pipe_control_lock)
dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use);
+ if (dc->hwss.fams2_global_control_lock)
+ dc->hwss.fams2_global_control_lock(dc, context, false);
}
// Fire manual trigger only when bottom plane is flipped
@@ -3902,6 +4492,12 @@ static void commit_planes_for_stream(struct dc *dc,
if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger)
pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg);
}
+
+ current_stream_mask = get_stream_mask(dc, context);
+ if (current_stream_mask != context->stream_mask) {
+ context->stream_mask = current_stream_mask;
+ dc_dmub_srv_notify_stream_mask(dc->ctx->dmub_srv, current_stream_mask);
+ }
}
/**
@@ -3909,6 +4505,7 @@ static void commit_planes_for_stream(struct dc *dc,
*
* @dc: Used to get the current state status
* @stream: Target stream, which we want to remove the attached planes
+ * @srf_updates: Array of surface updates
* @surface_count: Number of surface update
* @is_plane_addition: [in] Fill out with true if it is a plane addition case
*
@@ -3925,6 +4522,7 @@ static void commit_planes_for_stream(struct dc *dc,
*/
static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
int surface_count,
bool *is_plane_addition)
{
@@ -3967,7 +4565,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+ if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_NONE) {
subvp_active = true;
break;
}
@@ -3995,6 +4593,278 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
return force_minimal_pipe_splitting;
}
+struct pipe_split_policy_backup {
+ bool dynamic_odm_policy;
+ bool subvp_policy;
+ enum pipe_split_policy mpc_policy;
+ char force_odm[MAX_PIPES];
+};
+
+static void backup_and_set_minimal_pipe_split_policy(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_split_policy_backup *policy)
+{
+ int i;
+
+ if (!dc->config.is_vmin_only_asic) {
+ policy->mpc_policy = dc->debug.pipe_split_policy;
+ dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
+ }
+ policy->dynamic_odm_policy = dc->debug.enable_single_display_2to1_odm_policy;
+ dc->debug.enable_single_display_2to1_odm_policy = false;
+ policy->subvp_policy = dc->debug.force_disable_subvp;
+ dc->debug.force_disable_subvp = true;
+ for (i = 0; i < context->stream_count; i++) {
+ policy->force_odm[i] = context->streams[i]->debug.force_odm_combine_segments;
+ if (context->streams[i]->debug.allow_transition_for_forced_odm)
+ context->streams[i]->debug.force_odm_combine_segments = 0;
+ }
+}
+
+static void restore_minimal_pipe_split_policy(struct dc *dc,
+ struct dc_state *context,
+ struct pipe_split_policy_backup *policy)
+{
+ uint8_t i;
+
+ if (!dc->config.is_vmin_only_asic)
+ dc->debug.pipe_split_policy = policy->mpc_policy;
+ dc->debug.enable_single_display_2to1_odm_policy =
+ policy->dynamic_odm_policy;
+ dc->debug.force_disable_subvp = policy->subvp_policy;
+ for (i = 0; i < context->stream_count; i++)
+ context->streams[i]->debug.force_odm_combine_segments = policy->force_odm[i];
+}
+
+static void release_minimal_transition_state(struct dc *dc,
+ struct dc_state *minimal_transition_context,
+ struct dc_state *base_context,
+ struct pipe_split_policy_backup *policy)
+{
+ restore_minimal_pipe_split_policy(dc, base_context, policy);
+ dc_state_release(minimal_transition_context);
+}
+
+static void force_vsync_flip_in_minimal_transition_context(struct dc_state *context)
+{
+ uint8_t i;
+ int j;
+ struct dc_stream_status *stream_status;
+
+ for (i = 0; i < context->stream_count; i++) {
+ stream_status = &context->stream_status[i];
+
+ for (j = 0; j < stream_status->plane_count; j++)
+ stream_status->plane_states[j]->flip_immediate = false;
+ }
+}
+
+static struct dc_state *create_minimal_transition_state(struct dc *dc,
+ struct dc_state *base_context, struct pipe_split_policy_backup *policy)
+{
+ struct dc_state *minimal_transition_context = NULL;
+
+ minimal_transition_context = dc_state_create_copy(base_context);
+ if (!minimal_transition_context)
+ return NULL;
+
+ backup_and_set_minimal_pipe_split_policy(dc, base_context, policy);
+ /* commit minimal state */
+ if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context,
+ DC_VALIDATE_MODE_AND_PROGRAMMING) == DC_OK) {
+ /* prevent underflow and corruption when reconfiguring pipes */
+ force_vsync_flip_in_minimal_transition_context(minimal_transition_context);
+ } else {
+ /*
+ * This should never happen, minimal transition state should
+ * always be validated first before adding pipe split features.
+ */
+ release_minimal_transition_state(dc, minimal_transition_context, base_context, policy);
+ BREAK_TO_DEBUGGER();
+ minimal_transition_context = NULL;
+ }
+ return minimal_transition_context;
+}
+
+static bool is_pipe_topology_transition_seamless_with_intermediate_step(
+ struct dc *dc,
+ struct dc_state *initial_state,
+ struct dc_state *intermediate_state,
+ struct dc_state *final_state)
+{
+ return dc->hwss.is_pipe_topology_transition_seamless(dc, initial_state,
+ intermediate_state) &&
+ dc->hwss.is_pipe_topology_transition_seamless(dc,
+ intermediate_state, final_state);
+}
+
+static void swap_and_release_current_context(struct dc *dc,
+ struct dc_state *new_context, struct dc_stream_state *stream)
+{
+
+ int i;
+ struct dc_state *old = dc->current_state;
+ struct pipe_ctx *pipe_ctx;
+
+ /* Since memory free requires elevated IRQ, an interrupt
+ * request is generated by mem free. If this happens
+ * between freeing and reassigning the context, our vsync
+ * interrupt will call into dc and cause a memory
+ * corruption. Hence, we first reassign the context,
+ * then free the old context.
+ */
+ dc->current_state = new_context;
+ dc_state_release(old);
+
+ // clear any forced full updates
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe_ctx = &new_context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
+ pipe_ctx->plane_state->force_full_update = false;
+ }
+}
+
+static int initialize_empty_surface_updates(
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates)
+{
+ struct dc_stream_status *status = dc_stream_get_status(stream);
+ int i;
+
+ if (!status)
+ return 0;
+
+ for (i = 0; i < status->plane_count; i++)
+ srf_updates[i].surface = status->plane_states[i];
+
+ return status->plane_count;
+}
+
+static bool commit_minimal_transition_based_on_new_context(struct dc *dc,
+ struct dc_state *new_context,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count)
+{
+ bool success = false;
+ struct pipe_split_policy_backup policy;
+ struct dc_state *intermediate_context =
+ create_minimal_transition_state(dc, new_context,
+ &policy);
+
+ if (intermediate_context) {
+ if (is_pipe_topology_transition_seamless_with_intermediate_step(
+ dc,
+ dc->current_state,
+ intermediate_context,
+ new_context)) {
+ DC_LOG_DC("commit minimal transition state: base = new state\n");
+ commit_planes_for_stream(dc, srf_updates,
+ surface_count, stream, NULL,
+ UPDATE_TYPE_FULL, intermediate_context);
+ swap_and_release_current_context(
+ dc, intermediate_context, stream);
+ dc_state_retain(dc->current_state);
+ success = true;
+ }
+ release_minimal_transition_state(
+ dc, intermediate_context, new_context, &policy);
+ }
+ return success;
+}
+
+static bool commit_minimal_transition_based_on_current_context(struct dc *dc,
+ struct dc_state *new_context, struct dc_stream_state *stream)
+{
+ bool success = false;
+ struct pipe_split_policy_backup policy;
+ struct dc_state *intermediate_context;
+ struct dc_state *old_current_state = dc->current_state;
+ struct dc_surface_update srf_updates[MAX_SURFACES] = {0};
+ int surface_count;
+
+ /*
+ * Both current and new contexts share the same stream and plane state
+ * pointers. When new context is validated, stream and planes get
+ * populated with new updates such as new plane addresses. This makes
+ * the current context no longer valid because stream and planes are
+ * modified from the original. We backup current stream and plane states
+ * into scratch space whenever we are populating new context. So we can
+ * restore the original values back by calling the restore function now.
+ * This restores back the original stream and plane states associated
+ * with the current state.
+ */
+ restore_planes_and_stream_state(&dc->scratch.current_state, stream);
+ dc_state_retain(old_current_state);
+ intermediate_context = create_minimal_transition_state(dc,
+ old_current_state, &policy);
+
+ if (intermediate_context) {
+ if (is_pipe_topology_transition_seamless_with_intermediate_step(
+ dc,
+ dc->current_state,
+ intermediate_context,
+ new_context)) {
+ DC_LOG_DC("commit minimal transition state: base = current state\n");
+ surface_count = initialize_empty_surface_updates(
+ stream, srf_updates);
+ commit_planes_for_stream(dc, srf_updates,
+ surface_count, stream, NULL,
+ UPDATE_TYPE_FULL, intermediate_context);
+ swap_and_release_current_context(
+ dc, intermediate_context, stream);
+ dc_state_retain(dc->current_state);
+ success = true;
+ }
+ release_minimal_transition_state(dc, intermediate_context,
+ old_current_state, &policy);
+ }
+ dc_state_release(old_current_state);
+ /*
+ * Restore stream and plane states back to the values associated with
+ * new context.
+ */
+ restore_planes_and_stream_state(&dc->scratch.new_state, stream);
+ return success;
+}
+
+/**
+ * commit_minimal_transition_state_in_dc_update - Commit a minimal state based
+ * on current or new context
+ *
+ * @dc: DC structure, used to get the current state
+ * @new_context: New context
+ * @stream: Stream getting the update for the flip
+ * @srf_updates: Surface updates
+ * @surface_count: Number of surfaces
+ *
+ * The function takes in current state and new state and determine a minimal
+ * transition state as the intermediate step which could make the transition
+ * between current and new states seamless. If found, it will commit the minimal
+ * transition state and update current state to this minimal transition state
+ * and return true, if not, it will return false.
+ *
+ * Return:
+ * Return True if the minimal transition succeeded, false otherwise
+ */
+static bool commit_minimal_transition_state_in_dc_update(struct dc *dc,
+ struct dc_state *new_context,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count)
+{
+ bool success = commit_minimal_transition_based_on_new_context(
+ dc, new_context, stream, srf_updates,
+ surface_count);
+ if (!success)
+ success = commit_minimal_transition_based_on_current_context(dc,
+ new_context, stream);
+ if (!success)
+ DC_LOG_ERROR("Fail to commit a seamless minimal transition state between current and new states.\nThis pipe topology update is non-seamless!\n");
+ return success;
+}
+
/**
* commit_minimal_transition_state - Create a transition pipe split state
*
@@ -4016,23 +4886,14 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
static bool commit_minimal_transition_state(struct dc *dc,
struct dc_state *transition_base_context)
{
- struct dc_state *transition_context = dc_create_state(dc);
- enum pipe_split_policy tmp_mpc_policy = 0;
- bool temp_dynamic_odm_policy = 0;
- bool temp_subvp_policy = 0;
+ struct dc_state *transition_context;
+ struct pipe_split_policy_backup policy;
enum dc_status ret = DC_ERROR_UNEXPECTED;
unsigned int i, j;
unsigned int pipe_in_use = 0;
bool subvp_in_use = false;
bool odm_in_use = false;
- if (!transition_context)
- return false;
- /* Setup:
- * Store the current ODM and MPC config in some temp variables to be
- * restored after we commit the transition state.
- */
-
/* check current pipes in use*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &transition_base_context->res_ctx.pipe_ctx[i];
@@ -4047,7 +4908,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
subvp_in_use = true;
break;
}
@@ -4057,10 +4918,10 @@ static bool commit_minimal_transition_state(struct dc *dc,
* pipe, we must use the minimal transition.
*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = &transition_base_context->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->next_odm_pipe) {
- odm_in_use = true;
+ if (resource_is_pipe_type(pipe, OTG_MASTER)) {
+ odm_in_use = resource_get_odm_slice_count(pipe) > 1;
break;
}
}
@@ -4073,53 +4934,24 @@ static bool commit_minimal_transition_state(struct dc *dc,
* Reduce the scenarios to use dc_commit_state_no_check in the stage of flip. Especially
* enter/exit MPO when DCN still have enough resources.
*/
- if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use && !odm_in_use) {
- dc_release_state(transition_context);
+ if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use && !odm_in_use)
return true;
- }
-
- if (!dc->config.is_vmin_only_asic) {
- tmp_mpc_policy = dc->debug.pipe_split_policy;
- dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
- }
-
- temp_dynamic_odm_policy = dc->debug.enable_single_display_2to1_odm_policy;
- dc->debug.enable_single_display_2to1_odm_policy = false;
-
- temp_subvp_policy = dc->debug.force_disable_subvp;
- dc->debug.force_disable_subvp = true;
-
- dc_resource_state_copy_construct(transition_base_context, transition_context);
-
- /* commit minimal state */
- if (dc->res_pool->funcs->validate_bandwidth(dc, transition_context, false)) {
- for (i = 0; i < transition_context->stream_count; i++) {
- struct dc_stream_status *stream_status = &transition_context->stream_status[i];
-
- for (j = 0; j < stream_status->plane_count; j++) {
- struct dc_plane_state *plane_state = stream_status->plane_states[j];
-
- /* force vsync flip when reconfiguring pipes to prevent underflow
- * and corruption
- */
- plane_state->flip_immediate = false;
- }
- }
+ DC_LOG_DC("%s base = %s state, reason = %s\n", __func__,
+ dc->current_state == transition_base_context ? "current" : "new",
+ subvp_in_use ? "Subvp In Use" :
+ odm_in_use ? "ODM in Use" :
+ dc->debug.pipe_split_policy != MPC_SPLIT_AVOID ? "MPC in Use" :
+ "Unknown");
+
+ dc_state_retain(transition_base_context);
+ transition_context = create_minimal_transition_state(dc,
+ transition_base_context, &policy);
+ if (transition_context) {
ret = dc_commit_state_no_check(dc, transition_context);
+ release_minimal_transition_state(dc, transition_context, transition_base_context, &policy);
}
-
- /* always release as dc_commit_state_no_check retains in good case */
- dc_release_state(transition_context);
-
- /* TearDown:
- * Restore original configuration for ODM and MPO.
- */
- if (!dc->config.is_vmin_only_asic)
- dc->debug.pipe_split_policy = tmp_mpc_policy;
-
- dc->debug.enable_single_display_2to1_odm_policy = temp_dynamic_odm_policy;
- dc->debug.force_disable_subvp = temp_subvp_policy;
+ dc_state_release(transition_base_context);
if (ret != DC_OK) {
/* this should never happen */
@@ -4137,42 +4969,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
return true;
}
-/**
- * update_seamless_boot_flags() - Helper function for updating seamless boot flags
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed
- * @surface_count: Number of surfaces that have an updated
- * @stream: Corresponding stream to be updated in the current flip
- *
- * Updating seamless boot flags do not need to be part of the commit sequence. This
- * helper function will update the seamless boot flags on each flip (if required)
- * outside of the HW commit sequence (fast or slow).
- *
- * Return: void
- */
-static void update_seamless_boot_flags(struct dc *dc,
- struct dc_state *context,
- int surface_count,
- struct dc_stream_state *stream)
-{
- if (get_seamless_boot_stream_count(context) > 0 && surface_count > 0) {
- /* Optimize seamless boot flag keeps clocks and watermarks high until
- * first flip. After first flip, optimization is required to lower
- * bandwidth. Important to note that it is expected UEFI will
- * only light up a single display on POST, therefore we only expect
- * one stream with seamless boot flag set.
- */
- if (stream->apply_seamless_boot_optimization) {
- stream->apply_seamless_boot_optimization = false;
-
- if (get_seamless_boot_stream_count(context) == 0)
- dc->optimized_required = true;
- }
- }
-}
-
-static void populate_fast_updates(struct dc_fast_update *fast_update,
+void populate_fast_updates(struct dc_fast_update *fast_update,
struct dc_surface_update *srf_updates,
int surface_count,
struct dc_stream_update *stream_update)
@@ -4182,6 +4979,9 @@ static void populate_fast_updates(struct dc_fast_update *fast_update,
if (stream_update) {
fast_update[0].out_transfer_func = stream_update->out_transfer_func;
fast_update[0].output_csc_transform = stream_update->output_csc_transform;
+ } else {
+ fast_update[0].out_transfer_func = NULL;
+ fast_update[0].output_csc_transform = NULL;
}
for (i = 0; i < surface_count; i++) {
@@ -4190,6 +4990,7 @@ static void populate_fast_updates(struct dc_fast_update *fast_update,
fast_update[i].gamut_remap_matrix = srf_updates[i].gamut_remap_matrix;
fast_update[i].input_csc_color_matrix = srf_updates[i].input_csc_color_matrix;
fast_update[i].coeff_reduction_factor = srf_updates[i].coeff_reduction_factor;
+ fast_update[i].cursor_csc_color_matrix = srf_updates[i].cursor_csc_color_matrix;
}
}
@@ -4206,6 +5007,7 @@ static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_c
fast_update[i].gamma ||
fast_update[i].gamut_remap_matrix ||
fast_update[i].input_csc_color_matrix ||
+ fast_update[i].cursor_csc_color_matrix ||
fast_update[i].coeff_reduction_factor)
return true;
}
@@ -4213,6 +5015,26 @@ static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_c
return false;
}
+bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count)
+{
+ int i;
+
+ if (fast_update[0].out_transfer_func ||
+ fast_update[0].output_csc_transform)
+ return true;
+
+ for (i = 0; i < surface_count; i++) {
+ if (fast_update[i].input_csc_color_matrix ||
+ fast_update[i].gamma ||
+ fast_update[i].gamut_remap_matrix ||
+ fast_update[i].coeff_reduction_factor ||
+ fast_update[i].cursor_csc_color_matrix)
+ return true;
+ }
+
+ return false;
+}
+
static bool full_update_required(struct dc *dc,
struct dc_surface_update *srf_updates,
int surface_count,
@@ -4230,13 +5052,17 @@ static bool full_update_required(struct dc *dc,
srf_updates[i].scaling_info ||
(srf_updates[i].hdr_mult.value &&
srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) ||
+ (srf_updates[i].sdr_white_level_nits &&
+ srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) ||
srf_updates[i].in_transfer_func ||
srf_updates[i].func_shaper ||
srf_updates[i].lut3d_func ||
- srf_updates[i].blend_tf ||
srf_updates[i].surface->force_full_update ||
(srf_updates[i].flip_addr &&
srf_updates[i].flip_addr->address.tmz_surface != srf_updates[i].surface->address.tmz_surface) ||
+ (srf_updates[i].cm2_params &&
+ (srf_updates[i].cm2_params->component_settings.shaper_3dlut_setting != srf_updates[i].surface->mcm_shaper_3dlut_setting ||
+ srf_updates[i].cm2_params->component_settings.lut1d_enable != srf_updates[i].surface->mcm_lut1d_enable)) ||
!is_surface_in_context(context, srf_updates[i].surface)))
return true;
}
@@ -4254,6 +5080,7 @@ static bool full_update_required(struct dc *dc,
stream_update->hfvsif_infopacket ||
stream_update->vtem_infopacket ||
stream_update->adaptive_sync_infopacket ||
+ stream_update->avi_infopacket ||
stream_update->dpms_off ||
stream_update->allow_freesync ||
stream_update->vrr_active_variable ||
@@ -4267,7 +5094,9 @@ static bool full_update_required(struct dc *dc,
stream_update->func_shaper ||
stream_update->lut3d_func ||
stream_update->pending_test_pattern ||
- stream_update->crtc_timing_adjust))
+ stream_update->crtc_timing_adjust ||
+ stream_update->scaler_sharpener_update ||
+ stream_update->hw_cursor_req))
return true;
if (stream) {
@@ -4278,6 +5107,9 @@ static bool full_update_required(struct dc *dc,
if (dc->idle_optimizations_allowed)
return true;
+ if (dc_can_clear_cursor_limit(dc))
+ return true;
+
return false;
}
@@ -4292,15 +5124,13 @@ static bool fast_update_only(struct dc *dc,
&& !full_update_required(dc, srf_updates, surface_count, stream_update, stream);
}
-bool dc_update_planes_and_stream(struct dc *dc,
+static bool update_planes_and_stream_v2(struct dc *dc,
struct dc_surface_update *srf_updates, int surface_count,
struct dc_stream_state *stream,
struct dc_stream_update *stream_update)
{
struct dc_state *context;
enum surface_update_type update_type;
- int i;
- struct mall_temp_config mall_temp_config;
struct dc_fast_update fast_update[MAX_SURFACES] = {0};
/* In cases where MPO and split or ODM are used transitions can
@@ -4309,18 +5139,22 @@ bool dc_update_planes_and_stream(struct dc *dc,
*/
bool force_minimal_pipe_splitting = 0;
bool is_plane_addition = 0;
+ bool is_fast_update_only;
populate_fast_updates(fast_update, srf_updates, surface_count, stream_update);
+ is_fast_update_only = fast_update_only(dc, fast_update, srf_updates,
+ surface_count, stream_update, stream);
force_minimal_pipe_splitting = could_mpcc_tree_change_for_active_pipes(
dc,
stream,
+ srf_updates,
surface_count,
&is_plane_addition);
/* on plane addition, minimal state is the current one */
if (force_minimal_pipe_splitting && is_plane_addition &&
!commit_minimal_transition_state(dc, dc->current_state))
- return false;
+ return false;
if (!update_planes_and_stream_state(
dc,
@@ -4334,35 +5168,20 @@ bool dc_update_planes_and_stream(struct dc *dc,
/* on plane removal, minimal state is the new one */
if (force_minimal_pipe_splitting && !is_plane_addition) {
- /* Since all phantom pipes are removed in full validation,
- * we have to save and restore the subvp/mall config when
- * we do a minimal transition since the flags marking the
- * pipe as subvp/phantom will be cleared (dc copy constructor
- * creates a shallow copy).
- */
- if (dc->res_pool->funcs->save_mall_state)
- dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
if (!commit_minimal_transition_state(dc, context)) {
- dc_release_state(context);
+ dc_state_release(context);
return false;
}
- if (dc->res_pool->funcs->restore_mall_state)
- dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
-
- /* If we do a minimal transition with plane removal and the context
- * has subvp we also have to retain back the phantom stream / planes
- * since the refcount is decremented as part of the min transition
- * (we commit a state with no subvp, so the phantom streams / planes
- * had to be removed).
- */
- if (dc->res_pool->funcs->retain_phantom_pipes)
- dc->res_pool->funcs->retain_phantom_pipes(dc, context);
update_type = UPDATE_TYPE_FULL;
}
- update_seamless_boot_flags(dc, context, surface_count, stream);
- if (fast_update_only(dc, fast_update, srf_updates, surface_count, stream_update, stream) &&
- !dc->debug.enable_legacy_fast_update) {
+ if (dc->hwss.is_pipe_topology_transition_seamless &&
+ !dc->hwss.is_pipe_topology_transition_seamless(
+ dc, dc->current_state, context))
+ commit_minimal_transition_state_in_dc_update(dc, context, stream,
+ srf_updates, surface_count);
+
+ if (is_fast_update_only && !dc->debug.enable_legacy_fast_update) {
commit_planes_for_stream_fast(dc,
srf_updates,
surface_count,
@@ -4371,6 +5190,13 @@ bool dc_update_planes_and_stream(struct dc *dc,
update_type,
context);
} else {
+ if (!stream_update &&
+ dc->hwss.is_pipe_topology_transition_seamless &&
+ !dc->hwss.is_pipe_topology_transition_seamless(
+ dc, dc->current_state, context)) {
+ DC_LOG_ERROR("performing non-seamless pipe topology transition with surface only update!\n");
+ BREAK_TO_DEBUGGER();
+ }
commit_planes_for_stream(
dc,
srf_updates,
@@ -4380,143 +5206,33 @@ bool dc_update_planes_and_stream(struct dc *dc,
update_type,
context);
}
-
- if (dc->current_state != context) {
-
- /* Since memory free requires elevated IRQL, an interrupt
- * request is generated by mem free. If this happens
- * between freeing and reassigning the context, our vsync
- * interrupt will call into dc and cause a memory
- * corruption BSOD. Hence, we first reassign the context,
- * then free the old context.
- */
-
- struct dc_state *old = dc->current_state;
-
- dc->current_state = context;
- dc_release_state(old);
-
- // clear any forced full updates
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
- pipe_ctx->plane_state->force_full_update = false;
- }
- }
+ if (dc->current_state != context)
+ swap_and_release_current_context(dc, context, stream);
return true;
}
-void dc_commit_updates_for_stream(struct dc *dc,
- struct dc_surface_update *srf_updates,
- int surface_count,
+static void commit_planes_and_stream_update_on_current_context(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
struct dc_stream_state *stream,
struct dc_stream_update *stream_update,
- struct dc_state *state)
+ enum surface_update_type update_type)
{
- const struct dc_stream_status *stream_status;
- enum surface_update_type update_type;
- struct dc_state *context;
- struct dc_context *dc_ctx = dc->ctx;
- int i, j;
struct dc_fast_update fast_update[MAX_SURFACES] = {0};
- populate_fast_updates(fast_update, srf_updates, surface_count, stream_update);
- stream_status = dc_stream_get_status(stream);
- context = dc->current_state;
-
- update_type = dc_check_update_surfaces_for_stream(
- dc, srf_updates, surface_count, stream_update, stream_status);
-
- /* TODO: Since change commit sequence can have a huge impact,
- * we decided to only enable it for DCN3x. However, as soon as
- * we get more confident about this change we'll need to enable
- * the new sequence for all ASICs.
- */
- if (dc->ctx->dce_version >= DCN_VERSION_3_2) {
- /*
- * Previous frame finished and HW is ready for optimization.
- */
- if (update_type == UPDATE_TYPE_FAST)
- dc_post_update_surfaces_to_stream(dc);
-
- dc_update_planes_and_stream(dc, srf_updates,
- surface_count, stream,
- stream_update);
- return;
- }
-
- if (update_type >= update_surface_trace_level)
- update_surface_trace(dc, srf_updates, surface_count);
-
-
- if (update_type >= UPDATE_TYPE_FULL) {
-
- /* initialize scratch memory for building context */
- context = dc_create_state(dc);
- if (context == NULL) {
- DC_ERROR("Failed to allocate new validate context!\n");
- return;
- }
-
- dc_resource_state_copy_construct(state, context);
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-
- if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state)
- new_pipe->plane_state->force_full_update = true;
- }
- } else if (update_type == UPDATE_TYPE_FAST) {
- /*
- * Previous frame finished and HW is ready for optimization.
- */
- dc_post_update_surfaces_to_stream(dc);
- }
-
-
- for (i = 0; i < surface_count; i++) {
- struct dc_plane_state *surface = srf_updates[i].surface;
-
- copy_surface_update_to_plane(surface, &srf_updates[i]);
-
- if (update_type >= UPDATE_TYPE_MED) {
- for (j = 0; j < dc->res_pool->pipe_count; j++) {
- struct pipe_ctx *pipe_ctx =
- &context->res_ctx.pipe_ctx[j];
-
- if (pipe_ctx->plane_state != surface)
- continue;
-
- resource_build_scaling_params(pipe_ctx);
- }
- }
- }
-
- copy_stream_update_to_stream(dc, context, stream, stream_update);
-
- if (update_type >= UPDATE_TYPE_FULL) {
- if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
- DC_ERROR("Mode validation failed for stream update!\n");
- dc_release_state(context);
- return;
- }
- }
-
- TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES);
-
- update_seamless_boot_flags(dc, context, surface_count, stream);
- if (fast_update_only(dc, fast_update, srf_updates, surface_count, stream_update, stream) &&
- !dc->debug.enable_legacy_fast_update) {
+ ASSERT(update_type < UPDATE_TYPE_FULL);
+ populate_fast_updates(fast_update, srf_updates, surface_count,
+ stream_update);
+ if (fast_update_only(dc, fast_update, srf_updates, surface_count,
+ stream_update, stream) &&
+ !dc->debug.enable_legacy_fast_update)
commit_planes_for_stream_fast(dc,
srf_updates,
surface_count,
stream,
stream_update,
update_type,
- context);
- } else {
+ dc->current_state);
+ else
commit_planes_for_stream(
dc,
srf_updates,
@@ -4524,32 +5240,154 @@ void dc_commit_updates_for_stream(struct dc *dc,
stream,
stream_update,
update_type,
- context);
- }
- /*update current_State*/
- if (dc->current_state != context) {
+ dc->current_state);
+}
- struct dc_state *old = dc->current_state;
+static void commit_planes_and_stream_update_with_new_context(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ enum surface_update_type update_type,
+ struct dc_state *new_context)
+{
+ ASSERT(update_type >= UPDATE_TYPE_FULL);
+ if (!dc->hwss.is_pipe_topology_transition_seamless(dc,
+ dc->current_state, new_context))
+ /*
+ * It is required by the feature design that all pipe topologies
+ * using extra free pipes for power saving purposes such as
+ * dynamic ODM or SubVp shall only be enabled when it can be
+ * transitioned seamlessly to AND from its minimal transition
+ * state. A minimal transition state is defined as the same dc
+ * state but with all power saving features disabled. So it uses
+ * the minimum pipe topology. When we can't seamlessly
+ * transition from state A to state B, we will insert the
+ * minimal transition state A' or B' in between so seamless
+ * transition between A and B can be made possible.
+ */
+ commit_minimal_transition_state_in_dc_update(dc, new_context,
+ stream, srf_updates, surface_count);
- dc->current_state = context;
- dc_release_state(old);
+ commit_planes_for_stream(
+ dc,
+ srf_updates,
+ surface_count,
+ stream,
+ stream_update,
+ update_type,
+ new_context);
+}
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+static bool update_planes_and_stream_v3(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update)
+{
+ struct dc_state *new_context;
+ enum surface_update_type update_type;
- if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
- pipe_ctx->plane_state->force_full_update = false;
- }
- }
+ /*
+ * When this function returns true and new_context is not equal to
+ * current state, the function allocates and validates a new dc state
+ * and assigns it to new_context. The function expects that the caller
+ * is responsible to free this memory when new_context is no longer
+ * used. We swap current with new context and free current instead. So
+ * new_context's memory will live until the next full update after it is
+ * replaced by a newer context. Refer to the use of
+ * swap_and_free_current_context below.
+ */
+ if (!update_planes_and_stream_state(dc, srf_updates, surface_count,
+ stream, stream_update, &update_type,
+ &new_context))
+ return false;
- /* Legacy optimization path for DCE. */
- if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < DCE_VERSION_MAX) {
- dc_post_update_surfaces_to_stream(dc);
- TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce);
+ if (new_context == dc->current_state) {
+ commit_planes_and_stream_update_on_current_context(dc,
+ srf_updates, surface_count, stream,
+ stream_update, update_type);
+ } else {
+ commit_planes_and_stream_update_with_new_context(dc,
+ srf_updates, surface_count, stream,
+ stream_update, update_type, new_context);
+ swap_and_release_current_context(dc, new_context, stream);
}
- return;
+ return true;
+}
+
+static void clear_update_flags(struct dc_surface_update *srf_updates,
+ int surface_count, struct dc_stream_state *stream)
+{
+ int i;
+
+ if (stream)
+ stream->update_flags.raw = 0;
+
+ for (i = 0; i < surface_count; i++)
+ if (srf_updates[i].surface)
+ srf_updates[i].surface->update_flags.raw = 0;
+}
+
+bool dc_update_planes_and_stream(struct dc *dc,
+ struct dc_surface_update *srf_updates, int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update)
+{
+ bool ret = false;
+
+ dc_exit_ips_for_hw_access(dc);
+ /*
+ * update planes and stream version 3 separates FULL and FAST updates
+ * to their own sequences. It aims to clean up frequent checks for
+ * update type resulting unnecessary branching in logic flow. It also
+ * adds a new commit minimal transition sequence, which detects the need
+ * for minimal transition based on the actual comparison of current and
+ * new states instead of "predicting" it based on per feature software
+ * policy.i.e could_mpcc_tree_change_for_active_pipes. The new commit
+ * minimal transition sequence is made universal to any power saving
+ * features that would use extra free pipes such as Dynamic ODM/MPC
+ * Combine, MPO or SubVp. Therefore there is no longer a need to
+ * specially handle compatibility problems with transitions among those
+ * features as they are now transparent to the new sequence.
+ */
+ if (dc->ctx->dce_version >= DCN_VERSION_4_01)
+ ret = update_planes_and_stream_v3(dc, srf_updates,
+ surface_count, stream, stream_update);
+ else
+ ret = update_planes_and_stream_v2(dc, srf_updates,
+ surface_count, stream, stream_update);
+ if (ret && (dc->ctx->dce_version >= DCN_VERSION_3_2 ||
+ dc->ctx->dce_version == DCN_VERSION_3_01))
+ clear_update_flags(srf_updates, surface_count, stream);
+
+ return ret;
+}
+
+void dc_commit_updates_for_stream(struct dc *dc,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_state *stream,
+ struct dc_stream_update *stream_update,
+ struct dc_state *state)
+{
+ bool ret = false;
+
+ dc_exit_ips_for_hw_access(dc);
+ /* TODO: Since change commit sequence can have a huge impact,
+ * we decided to only enable it for DCN3x. However, as soon as
+ * we get more confident about this change we'll need to enable
+ * the new sequence for all ASICs.
+ */
+ if (dc->ctx->dce_version >= DCN_VERSION_4_01) {
+ ret = update_planes_and_stream_v3(dc, srf_updates, surface_count,
+ stream, stream_update);
+ } else {
+ ret = update_planes_and_stream_v2(dc, srf_updates, surface_count,
+ stream, stream_update);
+ }
+ if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2)
+ clear_update_flags(srf_updates, surface_count, stream);
}
uint8_t dc_get_current_stream_count(struct dc *dc)
@@ -4592,60 +5430,49 @@ void dc_interrupt_ack(struct dc *dc, enum dc_irq_source src)
void dc_power_down_on_boot(struct dc *dc)
{
if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW &&
- dc->hwss.power_down_on_boot)
+ dc->hwss.power_down_on_boot) {
+ if (dc->caps.ips_support)
+ dc_exit_ips_for_hw_access(dc);
dc->hwss.power_down_on_boot(dc);
+ }
}
-void dc_set_power_state(
- struct dc *dc,
- enum dc_acpi_cm_power_state power_state)
+void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state)
{
- struct kref refcount;
- struct display_mode_lib *dml;
-
if (!dc->current_state)
return;
switch (power_state) {
case DC_ACPI_CM_POWER_STATE_D0:
- dc_resource_state_construct(dc, dc->current_state);
+ dc_state_construct(dc, dc->current_state);
+
+ dc_exit_ips_for_hw_access(dc);
dc_z10_restore(dc);
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state);
+
dc->hwss.init_hw(dc);
if (dc->hwss.init_sys_ctx != NULL &&
dc->vm_pa_config.valid) {
dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config);
}
+ break;
+ case DC_ACPI_CM_POWER_STATE_D3:
+ if (dc->caps.ips_support)
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
+ if (dc->caps.ips_v2_support) {
+ if (dc->clk_mgr->funcs->set_low_power_state)
+ dc->clk_mgr->funcs->set_low_power_state(dc->clk_mgr);
+ }
break;
default:
ASSERT(dc->current_state->stream_count == 0);
- /* Zero out the current context so that on resume we start with
- * clean state, and dc hw programming optimizations will not
- * cause any trouble.
- */
- dml = kzalloc(sizeof(struct display_mode_lib),
- GFP_KERNEL);
-
- ASSERT(dml);
- if (!dml)
- return;
-
- /* Preserve refcount */
- refcount = dc->current_state->refcount;
- /* Preserve display mode lib */
- memcpy(dml, &dc->current_state->bw_ctx.dml, sizeof(struct display_mode_lib));
-
- dc_resource_state_destruct(dc->current_state);
- memset(dc->current_state, 0,
- sizeof(*dc->current_state));
+ dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state);
- dc->current_state->refcount = refcount;
- dc->current_state->bw_ctx.dml = *dml;
-
- kfree(dml);
+ dc_state_destruct(dc->current_state);
break;
}
@@ -4668,18 +5495,6 @@ bool dc_is_dmcu_initialized(struct dc *dc)
return false;
}
-void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info)
-{
- info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz;
- info->engineClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dcfclk_khz;
- info->memoryClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dramclk_khz;
- info->maxSupportedDppClock = (unsigned int)state->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz;
- info->dppClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dppclk_khz;
- info->socClock = (unsigned int)state->bw_ctx.bw.dcn.clk.socclk_khz;
- info->dcfClockDeepSleep = (unsigned int)state->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz;
- info->fClock = (unsigned int)state->bw_ctx.bw.dcn.clk.fclk_khz;
- info->phyClock = (unsigned int)state->bw_ctx.bw.dcn.clk.phyclk_khz;
-}
enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping)
{
if (dc->hwss.set_clock)
@@ -4722,9 +5537,65 @@ bool dc_set_psr_allow_active(struct dc *dc, bool enable)
return true;
}
-void dc_allow_idle_optimizations(struct dc *dc, bool allow)
+/* enable/disable eDP Replay without specify stream for eDP */
+bool dc_set_replay_allow_active(struct dc *dc, bool active)
{
- if (dc->debug.disable_idle_power_optimizations)
+ int i;
+ bool allow_active;
+
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ struct dc_link *link;
+ struct dc_stream_state *stream = dc->current_state->streams[i];
+
+ link = stream->link;
+ if (!link)
+ continue;
+
+ if (link->replay_settings.replay_feature_enabled) {
+ if (active && !link->replay_settings.replay_allow_active) {
+ allow_active = true;
+ if (!dc_link_set_replay_allow_active(link, &allow_active,
+ false, false, NULL))
+ return false;
+ } else if (!active && link->replay_settings.replay_allow_active) {
+ allow_active = false;
+ if (!dc_link_set_replay_allow_active(link, &allow_active,
+ true, false, NULL))
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/* set IPS disable state */
+bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips)
+{
+ dc_exit_ips_for_hw_access(dc);
+
+ dc->config.disable_ips = disable_ips;
+
+ return true;
+}
+
+void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const *caller_name)
+{
+ int idle_fclk_khz = 0, idle_dramclk_khz = 0, i = 0;
+ enum mall_stream_type subvp_pipe_type[MAX_PIPES] = {0};
+ struct pipe_ctx *pipe = NULL;
+ struct dc_state *context = dc->current_state;
+
+ if (dc->debug.disable_idle_power_optimizations) {
+ DC_LOG_DEBUG("%s: disabled\n", __func__);
+ return;
+ }
+
+ if (allow != dc->idle_optimizations_allowed)
+ DC_LOG_IPS("%s: allow_idle old=%d new=%d (caller=%s)\n", __func__,
+ dc->idle_optimizations_allowed, allow, caller_name);
+
+ if (dc->caps.ips_support && (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
return;
if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present)
@@ -4734,8 +5605,50 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow)
if (allow == dc->idle_optimizations_allowed)
return;
- if (dc->hwss.apply_idle_power_optimizations && dc->hwss.apply_idle_power_optimizations(dc, allow))
+ if (dc->hwss.apply_idle_power_optimizations && dc->clk_mgr != NULL &&
+ dc->hwss.apply_idle_power_optimizations(dc, allow)) {
dc->idle_optimizations_allowed = allow;
+ DC_LOG_DEBUG("%s: %s\n", __func__, allow ? "enabled" : "disabled");
+ }
+
+ // log idle clocks and sub vp pipe types at idle optimization time
+ if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_fclk)
+ idle_fclk_khz = dc->clk_mgr->funcs->get_hard_min_fclk(dc->clk_mgr);
+
+ if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk)
+ idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr);
+
+ if (dc->res_pool && context) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe);
+ }
+ }
+ if (!dc->caps.is_apu)
+ DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n",
+ __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2],
+ subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name);
+
+}
+
+void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name)
+{
+ if (dc->caps.ips_support)
+ dc_allow_idle_optimizations_internal(dc, false, caller_name);
+}
+
+bool dc_dmub_is_ips_idle_state(struct dc *dc)
+{
+ if (dc->debug.disable_idle_power_optimizations)
+ return false;
+
+ if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
+ return false;
+
+ if (!dc->ctx->dmub_srv)
+ return false;
+
+ return dc->ctx->dmub_srv->idle_allowed;
}
/* set min and max memory clock to lowest and highest DPM level, respectively */
@@ -4783,9 +5696,10 @@ static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memcl
hubp->funcs->set_blank_regs(hubp, true);
}
}
-
- dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz);
- dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz);
+ if (dc->clk_mgr->funcs->set_max_memclk)
+ dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz);
+ if (dc->clk_mgr->funcs->set_min_memclk)
+ dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &context->res_ctx.pipe_ctx[i];
@@ -4834,7 +5748,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable)
if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) {
if (p_state_change_support) {
- if (funcMin <= softMax)
+ if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk)
dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax);
// else: No-Op
} else {
@@ -4844,7 +5758,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable)
}
} else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) {
if (p_state_change_support) {
- if (funcMin <= softMax)
+ if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk)
dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM);
// else: No-Op
} else {
@@ -4855,10 +5769,13 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable)
}
dc->clk_mgr->dc_mode_softmax_enabled = enable;
}
-bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc, struct dc_plane_state *plane,
+bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc,
+ unsigned int pitch,
+ unsigned int height,
+ enum surface_pixel_format format,
struct dc_cursor_attributes *cursor_attr)
{
- if (dc->hwss.does_plane_fit_in_mall && dc->hwss.does_plane_fit_in_mall(dc, plane, cursor_attr))
+ if (dc->hwss.does_plane_fit_in_mall && dc->hwss.does_plane_fit_in_mall(dc, pitch, height, format, cursor_attr))
return true;
return false;
}
@@ -4892,18 +5809,31 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc)
*/
bool dc_is_dmub_outbox_supported(struct dc *dc)
{
- /* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
- if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
- dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
- !dc->debug.dpia_debug.bits.disable_dpia)
- return true;
+ if (!dc->caps.dmcub_support)
+ return false;
- if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 &&
- !dc->debug.dpia_debug.bits.disable_dpia)
- return true;
+ switch (dc->ctx->asic_id.chip_family) {
+
+ case FAMILY_YELLOW_CARP:
+ /* DCN31 B0 USB4 DPIA needs dmub notifications for interrupts */
+ if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
+ !dc->debug.dpia_debug.bits.disable_dpia)
+ return true;
+ break;
+
+ case AMDGPU_FAMILY_GC_11_0_1:
+ case AMDGPU_FAMILY_GC_11_5_0:
+ if (!dc->debug.dpia_debug.bits.disable_dpia)
+ return true;
+ break;
+
+ default:
+ break;
+ }
/* dmub aux needs dmub notifications to be enabled */
return dc->debug.enable_dmub_aux_for_legacy_ddc;
+
}
/**
@@ -5000,7 +5930,7 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc,
);
}
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -5054,7 +5984,7 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
cmd.set_config_access.set_config_control.cmd_pkt.msg_type = payload->msg_type;
cmd.set_config_access.set_config_control.cmd_pkt.msg_data = payload->msg_data;
- if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
+ if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
/* command is not processed by dmub */
notify->sc_status = SET_CONFIG_UNKNOWN_ERROR;
return is_cmd_complete;
@@ -5097,7 +6027,7 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
cmd.set_mst_alloc_slots.mst_slots_control.instance = dc->links[link_index]->ddc_hw_inst;
cmd.set_mst_alloc_slots.mst_slots_control.mst_alloc_slots = mst_alloc_slots;
- if (!dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ if (!dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
/* command is not processed by dmub */
return DC_ERROR_UNEXPECTED;
@@ -5120,6 +6050,27 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
}
/**
+ * dc_process_dmub_dpia_set_tps_notification - Submits tps notification
+ *
+ * @dc: [in] dc structure
+ * @link_index: [in] link index
+ * @tps: [in] request tps
+ *
+ * Submits set_tps_notification command to dmub via inbox message
+ */
+void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps)
+{
+ union dmub_rb_cmd cmd = {0};
+
+ cmd.set_tps_notification.header.type = DMUB_CMD__DPIA;
+ cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION;
+ cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst;
+ cmd.set_tps_notification.tps_notification.tps = tps;
+
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/**
* dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption
*
* @dc: [in] dc structure
@@ -5135,7 +6086,7 @@ void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
cmd.dpia_hpd_int_enable.header.type = DMUB_CMD__DPIA_HPD_INT_ENABLE;
cmd.dpia_hpd_int_enable.enable = hpd_int_enable;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
DC_LOG_DEBUG("%s: hpd_int_enable(%d)\n", __func__, hpd_int_enable);
}
@@ -5234,6 +6185,8 @@ bool dc_abm_save_restore(
struct dc_link *link = stream->sink->link;
struct dc_link *edp_links[MAX_NUM_EDP];
+ if (link->replay_settings.replay_feature_enabled)
+ return false;
/*find primary pipe associated with stream*/
for (i = 0; i < MAX_PIPES; i++) {
@@ -5270,14 +6223,158 @@ bool dc_abm_save_restore(
void dc_query_current_properties(struct dc *dc, struct dc_current_properties *properties)
{
unsigned int i;
- bool subvp_in_use = false;
+ unsigned int max_cursor_size = dc->caps.max_cursor_size;
+ unsigned int stream_cursor_size;
- for (i = 0; i < dc->current_state->stream_count; i++) {
- if (dc->current_state->streams[i]->mall_stream_config.type != SUBVP_NONE) {
- subvp_in_use = true;
- break;
+ if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) {
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ stream_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc,
+ dc->current_state,
+ dc->current_state->streams[i]);
+
+ if (stream_cursor_size < max_cursor_size) {
+ max_cursor_size = stream_cursor_size;
+ }
}
}
- properties->cursor_size_limit = subvp_in_use ? 64 : dc->caps.max_cursor_size;
+
+ properties->cursor_size_limit = max_cursor_size;
+}
+
+/**
+ * dc_set_edp_power() - DM controls eDP power to be ON/OFF
+ *
+ * Called when DM wants to power on/off eDP.
+ * Only work on links with flag skip_implict_edp_power_control is set.
+ *
+ * @dc: Current DC state
+ * @edp_link: a link with eDP connector signal type
+ * @powerOn: power on/off eDP
+ *
+ * Return: void
+ */
+void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link,
+ bool powerOn)
+{
+ if (edp_link->connector_signal != SIGNAL_TYPE_EDP)
+ return;
+
+ if (edp_link->skip_implict_edp_power_control == false)
+ return;
+
+ edp_link->dc->link_srv->edp_set_panel_power(edp_link, powerOn);
+}
+
+/**
+ * dc_get_power_profile_for_dc_state() - extracts power profile from dc state
+ *
+ * Called when DM wants to make power policy decisions based on dc_state
+ *
+ * @context: Pointer to the dc_state from which the power profile is extracted.
+ *
+ * Return: The power profile structure containing the power level information.
+ */
+struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state *context)
+{
+ struct dc_power_profile profile = { 0 };
+
+ profile.power_level = !context->bw_ctx.bw.dcn.clk.p_state_change_support;
+ if (!context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc)
+ return profile;
+ struct dc *dc = context->clk_mgr->ctx->dc;
+
+ if (dc->res_pool->funcs->get_power_profile)
+ profile.power_level = dc->res_pool->funcs->get_power_profile(context);
+ return profile;
+}
+
+/**
+ * dc_get_det_buffer_size_from_state() - extracts detile buffer size from dc state
+ *
+ * This function is called to log the detile buffer size from the dc_state.
+ *
+ * @context: a pointer to the dc_state from which the detile buffer size is extracted.
+ *
+ * Return: the size of the detile buffer, or 0 if not available.
+ */
+unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context)
+{
+ struct dc *dc = context->clk_mgr->ctx->dc;
+
+ if (dc->res_pool->funcs->get_det_buffer_size)
+ return dc->res_pool->funcs->get_det_buffer_size(context);
+ else
+ return 0;
+}
+
+/**
+ * dc_get_host_router_index: Get index of host router from a dpia link
+ *
+ * This function return a host router index of the target link. If the target link is dpia link.
+ *
+ * @link: Pointer to the target link (input)
+ * @host_router_index: Pointer to store the host router index of the target link (output).
+ *
+ * Return: true if the host router index is found and valid.
+ *
+ */
+bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index)
+{
+ struct dc *dc;
+
+ if (!link || !host_router_index || link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
+ return false;
+
+ dc = link->ctx->dc;
+
+ if (link->link_index < dc->lowest_dpia_link_index)
+ return false;
+
+ *host_router_index = (link->link_index - dc->lowest_dpia_link_index) / dc->caps.num_of_dpias_per_host_router;
+ if (*host_router_index < dc->caps.num_of_host_routers)
+ return true;
+ else
+ return false;
+}
+
+bool dc_is_cursor_limit_pending(struct dc *dc)
+{
+ uint32_t i;
+
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ if (dc_stream_is_cursor_limit_pending(dc, dc->current_state->streams[i]))
+ return true;
+ }
+
+ return false;
}
+bool dc_can_clear_cursor_limit(struct dc *dc)
+{
+ uint32_t i;
+
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ if (dc_state_can_clear_stream_cursor_subvp_limit(dc->current_state->streams[i], dc->current_state))
+ return true;
+ }
+
+ return false;
+}
+
+void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst,
+ struct dc_underflow_debug_data *out_data)
+{
+ struct timing_generator *tg = NULL;
+
+ for (int i = 0; i < MAX_PIPES; i++) {
+ if (dc->res_pool->timing_generators[i] &&
+ dc->res_pool->timing_generators[i]->inst == primary_otg_inst) {
+ tg = dc->res_pool->timing_generators[i];
+ break;
+ }
+ }
+
+ dc_exit_ips_for_hw_access(dc);
+ if (dc->hwss.get_underflow_debug_data)
+ dc->hwss.get_underflow_debug_data(dc, tg, out_data);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 69f1c2b89a57..bbce751b485f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -36,6 +36,8 @@
#include "resource.h"
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
@@ -44,136 +46,11 @@
DC_LOG_IF_TRACE(__VA_ARGS__); \
} while (0)
-#define TIMING_TRACE(...) do {\
- if (dc->debug.timing_trace) \
- DC_LOG_SYNC(__VA_ARGS__); \
-} while (0)
-
#define CLOCK_TRACE(...) do {\
if (dc->debug.clock_trace) \
DC_LOG_BANDWIDTH_CALCS(__VA_ARGS__); \
} while (0)
-void pre_surface_trace(
- struct dc *dc,
- const struct dc_plane_state *const *plane_states,
- int surface_count)
-{
- int i;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- for (i = 0; i < surface_count; i++) {
- const struct dc_plane_state *plane_state = plane_states[i];
-
- SURFACE_TRACE("Planes %d:\n", i);
-
- SURFACE_TRACE(
- "plane_state->visible = %d;\n"
- "plane_state->flip_immediate = %d;\n"
- "plane_state->address.type = %d;\n"
- "plane_state->address.grph.addr.quad_part = 0x%llX;\n"
- "plane_state->address.grph.meta_addr.quad_part = 0x%llX;\n"
- "plane_state->scaling_quality.h_taps = %d;\n"
- "plane_state->scaling_quality.v_taps = %d;\n"
- "plane_state->scaling_quality.h_taps_c = %d;\n"
- "plane_state->scaling_quality.v_taps_c = %d;\n",
- plane_state->visible,
- plane_state->flip_immediate,
- plane_state->address.type,
- plane_state->address.grph.addr.quad_part,
- plane_state->address.grph.meta_addr.quad_part,
- plane_state->scaling_quality.h_taps,
- plane_state->scaling_quality.v_taps,
- plane_state->scaling_quality.h_taps_c,
- plane_state->scaling_quality.v_taps_c);
-
- SURFACE_TRACE(
- "plane_state->src_rect.x = %d;\n"
- "plane_state->src_rect.y = %d;\n"
- "plane_state->src_rect.width = %d;\n"
- "plane_state->src_rect.height = %d;\n"
- "plane_state->dst_rect.x = %d;\n"
- "plane_state->dst_rect.y = %d;\n"
- "plane_state->dst_rect.width = %d;\n"
- "plane_state->dst_rect.height = %d;\n"
- "plane_state->clip_rect.x = %d;\n"
- "plane_state->clip_rect.y = %d;\n"
- "plane_state->clip_rect.width = %d;\n"
- "plane_state->clip_rect.height = %d;\n",
- plane_state->src_rect.x,
- plane_state->src_rect.y,
- plane_state->src_rect.width,
- plane_state->src_rect.height,
- plane_state->dst_rect.x,
- plane_state->dst_rect.y,
- plane_state->dst_rect.width,
- plane_state->dst_rect.height,
- plane_state->clip_rect.x,
- plane_state->clip_rect.y,
- plane_state->clip_rect.width,
- plane_state->clip_rect.height);
-
- SURFACE_TRACE(
- "plane_state->plane_size.surface_size.x = %d;\n"
- "plane_state->plane_size.surface_size.y = %d;\n"
- "plane_state->plane_size.surface_size.width = %d;\n"
- "plane_state->plane_size.surface_size.height = %d;\n"
- "plane_state->plane_size.surface_pitch = %d;\n",
- plane_state->plane_size.surface_size.x,
- plane_state->plane_size.surface_size.y,
- plane_state->plane_size.surface_size.width,
- plane_state->plane_size.surface_size.height,
- plane_state->plane_size.surface_pitch);
-
-
- SURFACE_TRACE(
- "plane_state->tiling_info.gfx8.num_banks = %d;\n"
- "plane_state->tiling_info.gfx8.bank_width = %d;\n"
- "plane_state->tiling_info.gfx8.bank_width_c = %d;\n"
- "plane_state->tiling_info.gfx8.bank_height = %d;\n"
- "plane_state->tiling_info.gfx8.bank_height_c = %d;\n"
- "plane_state->tiling_info.gfx8.tile_aspect = %d;\n"
- "plane_state->tiling_info.gfx8.tile_aspect_c = %d;\n"
- "plane_state->tiling_info.gfx8.tile_split = %d;\n"
- "plane_state->tiling_info.gfx8.tile_split_c = %d;\n"
- "plane_state->tiling_info.gfx8.tile_mode = %d;\n"
- "plane_state->tiling_info.gfx8.tile_mode_c = %d;\n",
- plane_state->tiling_info.gfx8.num_banks,
- plane_state->tiling_info.gfx8.bank_width,
- plane_state->tiling_info.gfx8.bank_width_c,
- plane_state->tiling_info.gfx8.bank_height,
- plane_state->tiling_info.gfx8.bank_height_c,
- plane_state->tiling_info.gfx8.tile_aspect,
- plane_state->tiling_info.gfx8.tile_aspect_c,
- plane_state->tiling_info.gfx8.tile_split,
- plane_state->tiling_info.gfx8.tile_split_c,
- plane_state->tiling_info.gfx8.tile_mode,
- plane_state->tiling_info.gfx8.tile_mode_c);
-
- SURFACE_TRACE(
- "plane_state->tiling_info.gfx8.pipe_config = %d;\n"
- "plane_state->tiling_info.gfx8.array_mode = %d;\n"
- "plane_state->color_space = %d;\n"
- "plane_state->dcc.enable = %d;\n"
- "plane_state->format = %d;\n"
- "plane_state->rotation = %d;\n"
- "plane_state->stereo_format = %d;\n",
- plane_state->tiling_info.gfx8.pipe_config,
- plane_state->tiling_info.gfx8.array_mode,
- plane_state->color_space,
- plane_state->dcc.enable,
- plane_state->format,
- plane_state->rotation,
- plane_state->stereo_format);
-
- SURFACE_TRACE("plane_state->tiling_info.gfx9.swizzle = %d;\n",
- plane_state->tiling_info.gfx9.swizzle);
-
- SURFACE_TRACE("\n");
- }
- SURFACE_TRACE("\n");
-}
-
void update_surface_trace(
struct dc *dc,
const struct dc_surface_update *updates,
@@ -304,43 +181,6 @@ void post_surface_trace(struct dc *dc)
}
-void context_timing_trace(
- struct dc *dc,
- struct resource_context *res_ctx)
-{
- int i;
- int h_pos[MAX_PIPES] = {0}, v_pos[MAX_PIPES] = {0};
- struct crtc_position position;
- unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
- DC_LOGGER_INIT(dc->ctx->logger);
-
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
- /* get_position() returns CRTC vertical/horizontal counter
- * hence not applicable for underlay pipe
- */
- if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx)
- continue;
-
- pipe_ctx->stream_res.tg->funcs->get_position(pipe_ctx->stream_res.tg, &position);
- h_pos[i] = position.horizontal_count;
- v_pos[i] = position.vertical_count;
- }
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-
- if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx)
- continue;
-
- TIMING_TRACE("OTG_%d H_tot:%d V_tot:%d H_pos:%d V_pos:%d\n",
- pipe_ctx->stream_res.tg->inst,
- pipe_ctx->stream->timing.h_total,
- pipe_ctx->stream->timing.v_total,
- h_pos[i], v_pos[i]);
- }
-}
-
void context_clock_trace(
struct dc *dc,
struct dc_state *context)
@@ -426,9 +266,53 @@ char *dc_status_to_str(enum dc_status status)
return "Fail dp payload allocation";
case DC_FAIL_DP_LINK_BANDWIDTH:
return "Insufficient DP link bandwidth";
+ case DC_FAIL_HW_CURSOR_SUPPORT:
+ return "HW Cursor not supported";
+ case DC_FAIL_DP_TUNNEL_BW_VALIDATE:
+ return "Fail DP Tunnel BW validation";
case DC_ERROR_UNEXPECTED:
return "Unexpected error";
}
return "Unexpected status error";
}
+
+char *dc_pixel_encoding_to_str(enum dc_pixel_encoding pixel_encoding)
+{
+ switch (pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ return "RGB";
+ case PIXEL_ENCODING_YCBCR422:
+ return "YUV422";
+ case PIXEL_ENCODING_YCBCR444:
+ return "YUV444";
+ case PIXEL_ENCODING_YCBCR420:
+ return "YUV420";
+ default:
+ return "Unknown";
+ }
+}
+
+char *dc_color_depth_to_str(enum dc_color_depth color_depth)
+{
+ switch (color_depth) {
+ case COLOR_DEPTH_666:
+ return "6-bpc";
+ case COLOR_DEPTH_888:
+ return "8-bpc";
+ case COLOR_DEPTH_101010:
+ return "10-bpc";
+ case COLOR_DEPTH_121212:
+ return "12-bpc";
+ case COLOR_DEPTH_141414:
+ return "14-bpc";
+ case COLOR_DEPTH_161616:
+ return "16-bpc";
+ case COLOR_DEPTH_999:
+ return "9-bpc";
+ case COLOR_DEPTH_111111:
+ return "11-bpc";
+ default:
+ return "Unknown";
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index f99ec1b0efaf..d82b1cb467f4 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -29,8 +29,12 @@
#include "hw_sequencer.h"
#include "hw_sequencer_private.h"
#include "basics/dc_common.h"
+#include "resource.h"
+#include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+#define MAX_NUM_MCACHE 8
/* used as index in array of black_color_format */
enum black_color_format {
@@ -173,7 +177,7 @@ static bool is_ycbcr2020_type(
{
bool ret = false;
- if (color_space == COLOR_SPACE_2020_YCBCR)
+ if (color_space == COLOR_SPACE_2020_YCBCR_LIMITED || color_space == COLOR_SPACE_2020_YCBCR_FULL)
ret = true;
return ret;
}
@@ -244,7 +248,8 @@ void color_space_to_black_color(
case COLOR_SPACE_YCBCR709_BLACK:
case COLOR_SPACE_YCBCR601_LIMITED:
case COLOR_SPACE_YCBCR709_LIMITED:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
+ case COLOR_SPACE_2020_YCBCR_FULL:
*black_color = black_color_format[BLACK_COLOR_FORMAT_YUV_CV];
break;
@@ -309,11 +314,11 @@ void get_mpctree_visual_confirm_color(
{
const struct tg_color pipe_colors[6] = {
{MAX_TG_COLOR_VALUE, 0, 0}, /* red */
- {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE / 4, 0}, /* orange */
{MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* yellow */
{0, MAX_TG_COLOR_VALUE, 0}, /* green */
+ {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* cyan */
{0, 0, MAX_TG_COLOR_VALUE}, /* blue */
- {MAX_TG_COLOR_VALUE / 2, 0, MAX_TG_COLOR_VALUE / 2}, /* purple */
+ {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* magenta */
};
struct pipe_ctx *top_pipe = pipe_ctx;
@@ -389,10 +394,10 @@ void get_hdr_visual_confirm_color(
switch (top_pipe_ctx->plane_res.scl_data.format) {
case PIXEL_FORMAT_ARGB2101010:
- if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) {
+ if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_PQ) {
/* HDR10, ARGB2101010 - set border color to red */
color->color_r_cr = color_value;
- } else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
+ } else if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) {
/* FreeSync 2 ARGB2101010 - set border color to pink */
color->color_r_cr = color_value;
color->color_b_cb = color_value;
@@ -400,10 +405,10 @@ void get_hdr_visual_confirm_color(
is_sdr = true;
break;
case PIXEL_FORMAT_FP16:
- if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) {
+ if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_PQ) {
/* HDR10, FP16 - set border color to blue */
color->color_b_cb = color_value;
- } else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
+ } else if (top_pipe_ctx->stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) {
/* FreeSync 2 HDR - set border color to green */
color->color_g_y = color_value;
} else
@@ -422,56 +427,307 @@ void get_hdr_visual_confirm_color(
}
}
-void get_subvp_visual_confirm_color(
- struct dc *dc,
- struct dc_state *context,
- struct pipe_ctx *pipe_ctx,
- struct tg_color *color)
+/* Visual Confirm color definition for Smart Mux */
+void get_smartmux_visual_confirm_color(
+ struct dc *dc,
+ struct tg_color *color)
{
uint32_t color_value = MAX_TG_COLOR_VALUE;
- bool enable_subvp = false;
- int i;
- if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context)
- return;
+ const struct tg_color sm_ver_colors[5] = {
+ {0, 0, 0}, /* SMUX_MUXCONTROL_UNSUPPORTED - Black */
+ {0, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_v10 - Green */
+ {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_v15 - Cyan */
+ {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_MDM - Yellow */
+ {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_vUNKNOWN - Magenta*/
+ };
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ if (dc->caps.is_apu) {
+ /* APU driving the eDP */
+ *color = sm_ver_colors[dc->config.smart_mux_version];
+ } else {
+ /* dGPU driving the eDP - red */
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ }
+}
+
+/* Visual Confirm color definition for VABC */
+void get_vabc_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+ struct dc_link *edp_link = NULL;
+
+ if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link) {
+ if (pipe_ctx->stream->link->connector_signal == SIGNAL_TYPE_EDP)
+ edp_link = pipe_ctx->stream->link;
+ }
- if (pipe->stream && pipe->stream->mall_stream_config.paired_stream &&
- pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
- /* SubVP enable - red */
+ if (edp_link) {
+ switch (edp_link->backlight_control_type) {
+ case BACKLIGHT_CONTROL_PWM:
+ color->color_r_cr = color_value;
color->color_g_y = 0;
color->color_b_cb = 0;
+ break;
+ case BACKLIGHT_CONTROL_AMD_AUX:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case BACKLIGHT_CONTROL_VESA_AUX:
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ }
+ } else {
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ }
+}
+
+void get_subvp_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+ if (pipe_ctx) {
+ switch (pipe_ctx->p_state_type) {
+ case P_STATE_SUB_VP:
color->color_r_cr = color_value;
- enable_subvp = true;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_DRR_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_V_BLANK_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void get_mclk_switch_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
- if (pipe_ctx->stream == pipe->stream)
- return;
+ if (pipe_ctx) {
+ switch (pipe_ctx->p_state_type) {
+ case P_STATE_V_BLANK:
+ color->color_r_cr = color_value;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_FPO:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = color_value;
+ break;
+ case P_STATE_V_ACTIVE:
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ case P_STATE_SUB_VP:
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_DRR_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case P_STATE_V_BLANK_SUB_VP:
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ default:
break;
}
}
+}
+
+void get_cursor_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
- if (enable_subvp && pipe_ctx->stream->mall_stream_config.type == SUBVP_NONE) {
+ if (pipe_ctx->stream && pipe_ctx->stream->cursor_position.enable) {
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ } else {
color->color_r_cr = 0;
- if (pipe_ctx->stream->allow_freesync == 1) {
- /* SubVP enable and DRR on - green */
- color->color_b_cb = 0;
- color->color_g_y = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ }
+}
+
+void get_dcc_visual_confirm_color(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ const uint32_t MCACHE_ID_UNASSIGNED = 0xF;
+
+ if (!pipe_ctx->plane_state->dcc.enable) {
+ color->color_r_cr = 0; /* black - DCC disabled */
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ return;
+ }
+
+ if (dc->ctx->dce_version < DCN_VERSION_4_01) {
+ color->color_r_cr = MAX_TG_COLOR_VALUE; /* red - DCC enabled */
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ return;
+ }
+
+ uint32_t first_id = pipe_ctx->mcache_regs.main.p0.mcache_id_first;
+ uint32_t second_id = pipe_ctx->mcache_regs.main.p0.mcache_id_second;
+
+ if (first_id != MCACHE_ID_UNASSIGNED && second_id != MCACHE_ID_UNASSIGNED && first_id != second_id) {
+ color->color_r_cr = MAX_TG_COLOR_VALUE/2; /* grey - 2 mcache */
+ color->color_g_y = MAX_TG_COLOR_VALUE/2;
+ color->color_b_cb = MAX_TG_COLOR_VALUE/2;
+ }
+
+ else if (first_id != MCACHE_ID_UNASSIGNED || second_id != MCACHE_ID_UNASSIGNED) {
+ const struct tg_color id_colors[MAX_NUM_MCACHE] = {
+ {0, MAX_TG_COLOR_VALUE, 0}, /* green */
+ {0, 0, MAX_TG_COLOR_VALUE}, /* blue */
+ {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* yellow */
+ {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* magenta */
+ {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* cyan */
+ {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* white */
+ {MAX_TG_COLOR_VALUE/2, 0, 0}, /* dark red */
+ {0, MAX_TG_COLOR_VALUE/2, 0}, /* dark green */
+ };
+
+ uint32_t assigned_id = (first_id != MCACHE_ID_UNASSIGNED) ? first_id : second_id;
+ *color = id_colors[assigned_id];
+ }
+}
+
+void set_p_state_switch_method(
+ struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx)
+{
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ bool enable_subvp;
+
+ if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba)
+ return;
+
+ pipe_ctx->p_state_type = P_STATE_UNKNOWN;
+ if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
+ dm_dram_clock_change_unsupported) {
+ /* MCLK switching is supported */
+ if (!pipe_ctx->has_vactive_margin) {
+ /* In Vblank - yellow */
+ pipe_ctx->p_state_type = P_STATE_V_BLANK;
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+ /* FPO + Vblank - cyan */
+ pipe_ctx->p_state_type = P_STATE_FPO;
+ }
} else {
- /* SubVP enable and No DRR - blue */
- color->color_g_y = 0;
- color->color_b_cb = color_value;
+ /* In Vactive - pink */
+ pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
+ }
+
+ /* SubVP */
+ enable_subvp = false;
+
+ for (int i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && dc_state_get_paired_subvp_stream(context, pipe->stream) &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+ /* SubVP enable - red */
+ pipe_ctx->p_state_type = P_STATE_SUB_VP;
+ enable_subvp = true;
+
+ if (pipe_ctx->stream == pipe->stream)
+ return;
+ break;
+ }
+ }
+
+ if (enable_subvp && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_NONE) {
+ if (pipe_ctx->stream->allow_freesync == 1) {
+ /* SubVP enable and DRR on - green */
+ pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
+ } else {
+ /* SubVP enable and No DRR - blue */
+ pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
+ }
}
}
}
+void set_drr_and_clear_adjust_pending(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream,
+ struct drr_params *params)
+{
+ /* params can be null.*/
+ if (pipe_ctx && pipe_ctx->stream_res.tg &&
+ pipe_ctx->stream_res.tg->funcs->set_drr)
+ pipe_ctx->stream_res.tg->funcs->set_drr(
+ pipe_ctx->stream_res.tg, params);
+
+ if (stream)
+ stream->adjust.timing_adjust_pending = false;
+}
+
+void get_fams2_visual_confirm_color(
+ struct dc *dc,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+
+ if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !context || !dc->debug.fams2_config.bits.enable)
+ return;
+
+ /* driver only handles visual confirm when FAMS2 is disabled */
+ if (!dc_state_is_fams2_in_use(dc, context)) {
+ /* when FAMS2 is disabled, all pipes are grey */
+ color->color_g_y = color_value / 2;
+ color->color_b_cb = color_value / 2;
+ color->color_r_cr = color_value / 2;
+ }
+}
+
void hwss_build_fast_sequence(struct dc *dc,
struct dc_dmub_cmd *dc_dmub_cmd,
unsigned int dmub_cmd_count,
- struct block_sequence block_sequence[],
- int *num_steps,
- struct pipe_ctx *pipe_ctx)
+ struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
+ unsigned int *num_steps,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_status *stream_status,
+ struct dc_state *context)
{
struct dc_plane_state *plane = pipe_ctx->plane_state;
struct dc_stream_state *stream = pipe_ctx->stream;
@@ -485,13 +741,27 @@ void hwss_build_fast_sequence(struct dc *dc,
if (!plane || !stream)
return;
+ if (dc->hwss.wait_for_dcc_meta_propagation) {
+ block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.dc = dc;
+ block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.top_pipe_to_program = pipe_ctx;
+ block_sequence[*num_steps].func = HUBP_WAIT_FOR_DCC_META_PROP;
+ (*num_steps)++;
+ }
if (dc->hwss.subvp_pipe_control_lock_fast) {
block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true;
- block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+ plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
(*num_steps)++;
}
+ if (dc->hwss.fams2_global_control_lock_fast) {
+ block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.dc = dc;
+ block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.lock = true;
+ block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.is_required = dc_state_is_fams2_in_use(dc, context);
+ block_sequence[*num_steps].func = DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST;
+ (*num_steps)++;
+ }
if (dc->hwss.pipe_control_lock) {
block_sequence[*num_steps].params.pipe_control_lock_params.dc = dc;
block_sequence[*num_steps].params.pipe_control_lock_params.lock = true;
@@ -512,48 +782,59 @@ void hwss_build_fast_sequence(struct dc *dc,
while (current_pipe) {
current_mpc_pipe = current_pipe;
while (current_mpc_pipe) {
- if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state && current_mpc_pipe->plane_state->update_flags.raw) {
- block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe;
- block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate;
- block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL;
- (*num_steps)++;
- }
- if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) {
- block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc;
- block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe;
- block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips;
- block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER;
- (*num_steps)++;
- }
- if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) {
- block_sequence[*num_steps].params.update_plane_addr_params.dc = dc;
- block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe;
- block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR;
- (*num_steps)++;
- }
+ if (current_mpc_pipe->plane_state) {
+ if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state->update_flags.raw) {
+ block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate;
+ block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL;
+ (*num_steps)++;
+ }
+ if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) {
+ block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc;
+ block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips;
+ block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER;
+ (*num_steps)++;
+ }
+ if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) {
+ if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) &&
+ stream_status->mall_stream_config.type == SUBVP_MAIN) {
+ block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
+ block_sequence[*num_steps].params.subvp_save_surf_addr.addr = &current_mpc_pipe->plane_state->address;
+ block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index;
+ block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR;
+ (*num_steps)++;
+ }
+
+ block_sequence[*num_steps].params.update_plane_addr_params.dc = dc;
+ block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR;
+ (*num_steps)++;
+ }
- if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) {
- block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc;
- block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe;
- block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state;
- block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC;
- (*num_steps)++;
- }
+ if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) {
+ block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc;
+ block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state;
+ block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC;
+ (*num_steps)++;
+ }
- if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) {
- block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe;
- block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP;
- (*num_steps)++;
- }
- if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) {
- block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe;
- block_sequence[*num_steps].func = DPP_SETUP_DPP;
- (*num_steps)++;
- }
- if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) {
- block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe;
- block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE;
- (*num_steps)++;
+ if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) {
+ block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP;
+ (*num_steps)++;
+ }
+ if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) {
+ block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = DPP_SETUP_DPP;
+ (*num_steps)++;
+ }
+ if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) {
+ block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE;
+ (*num_steps)++;
+ }
}
if (hws->funcs.set_output_transfer_func && current_mpc_pipe->stream->update_flags.bits.out_tf) {
block_sequence[*num_steps].params.set_output_transfer_func_params.dc = dc;
@@ -562,7 +843,14 @@ void hwss_build_fast_sequence(struct dc *dc,
block_sequence[*num_steps].func = DPP_SET_OUTPUT_TRANSFER_FUNC;
(*num_steps)++;
}
-
+ if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE &&
+ dc->hwss.update_visual_confirm_color) {
+ block_sequence[*num_steps].params.update_visual_confirm_params.dc = dc;
+ block_sequence[*num_steps].params.update_visual_confirm_params.pipe_ctx = current_mpc_pipe;
+ block_sequence[*num_steps].params.update_visual_confirm_params.mpcc_id = current_mpc_pipe->plane_res.hubp->inst;
+ block_sequence[*num_steps].func = MPC_UPDATE_VISUAL_CONFIRM;
+ (*num_steps)++;
+ }
if (current_mpc_pipe->stream->update_flags.bits.out_csc) {
block_sequence[*num_steps].params.power_on_mpc_mem_pwr_params.mpc = dc->res_pool->mpc;
block_sequence[*num_steps].params.power_on_mpc_mem_pwr_params.mpcc_id = current_mpc_pipe->plane_res.hubp->inst;
@@ -601,10 +889,18 @@ void hwss_build_fast_sequence(struct dc *dc,
if (dc->hwss.subvp_pipe_control_lock_fast) {
block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc;
block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = false;
- block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.pipe_ctx = pipe_ctx;
+ block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.subvp_immediate_flip =
+ plane->flip_immediate && stream_status->mall_stream_config.type == SUBVP_MAIN;
block_sequence[*num_steps].func = DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST;
(*num_steps)++;
}
+ if (dc->hwss.fams2_global_control_lock_fast) {
+ block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.dc = dc;
+ block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.lock = false;
+ block_sequence[*num_steps].params.fams2_global_control_lock_fast_params.is_required = dc_state_is_fams2_in_use(dc, context);
+ block_sequence[*num_steps].func = DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST;
+ (*num_steps)++;
+ }
current_pipe = pipe_ctx;
while (current_pipe) {
@@ -626,7 +922,7 @@ void hwss_build_fast_sequence(struct dc *dc,
}
void hwss_execute_sequence(struct dc *dc,
- struct block_sequence block_sequence[],
+ struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
int num_steps)
{
unsigned int i;
@@ -697,6 +993,17 @@ void hwss_execute_sequence(struct dc *dc,
case DMUB_SEND_DMCUB_CMD:
hwss_send_dmcub_cmd(params);
break;
+ case DMUB_SUBVP_SAVE_SURF_ADDR:
+ hwss_subvp_save_surf_addr(params);
+ break;
+ case HUBP_WAIT_FOR_DCC_META_PROP:
+ dc->hwss.wait_for_dcc_meta_propagation(
+ params->wait_for_dcc_meta_propagation_params.dc,
+ params->wait_for_dcc_meta_propagation_params.top_pipe_to_program);
+ break;
+ case DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST:
+ dc->hwss.fams2_global_control_lock_fast(params);
+ break;
default:
ASSERT(false);
break;
@@ -710,7 +1017,7 @@ void hwss_send_dmcub_cmd(union block_sequence_params *params)
union dmub_rb_cmd *cmd = params->send_dmcub_cmd_params.cmd;
enum dm_dmub_wait_type wait_type = params->send_dmcub_cmd_params.wait_type;
- dm_execute_dmub_cmd(ctx, cmd, wait_type);
+ dc_wake_and_execute_dmub_cmd(ctx, cmd, wait_type);
}
void hwss_program_manual_trigger(union block_sequence_params *params)
@@ -727,6 +1034,9 @@ void hwss_setup_dpp(union block_sequence_params *params)
struct dpp *dpp = pipe_ctx->plane_res.dpp;
struct dc_plane_state *plane_state = pipe_ctx->plane_state;
+ if (!plane_state)
+ return;
+
if (dpp && dpp->funcs->dpp_setup) {
// program the input csc
dpp->funcs->dpp_setup(dpp,
@@ -736,6 +1046,12 @@ void hwss_setup_dpp(union block_sequence_params *params)
plane_state->color_space,
NULL);
}
+
+ if (dpp && dpp->funcs->set_cursor_matrix) {
+ dpp->funcs->set_cursor_matrix(dpp,
+ plane_state->color_space,
+ plane_state->cursor_csc_color_matrix);
+ }
}
void hwss_program_bias_and_scale(union block_sequence_params *params)
@@ -743,12 +1059,12 @@ void hwss_program_bias_and_scale(union block_sequence_params *params)
struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx;
struct dpp *dpp = pipe_ctx->plane_res.dpp;
struct dc_plane_state *plane_state = pipe_ctx->plane_state;
- struct dc_bias_and_scale bns_params = {0};
+ struct dc_bias_and_scale bns_params = plane_state->bias_and_scale;
//TODO :for CNVC set scale and bias registers if necessary
- build_prescale_params(&bns_params, plane_state);
- if (dpp->funcs->dpp_program_bias_and_scale)
+ if (dpp->funcs->dpp_program_bias_and_scale) {
dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
+ }
}
void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params)
@@ -789,40 +1105,13 @@ void hwss_set_ocsc_default(union block_sequence_params *params)
ocsc_mode);
}
-void get_mclk_switch_visual_confirm_color(
- struct dc *dc,
- struct dc_state *context,
- struct pipe_ctx *pipe_ctx,
- struct tg_color *color)
+void hwss_subvp_save_surf_addr(union block_sequence_params *params)
{
- uint32_t color_value = MAX_TG_COLOR_VALUE;
- struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
- if (!dc->ctx || !dc->ctx->dmub_srv || !pipe_ctx || !vba || !context)
- return;
-
- if (vba->DRAMClockChangeSupport[vba->VoltageLevel][vba->maxMpcComb] !=
- dm_dram_clock_change_unsupported) {
- /* MCLK switching is supported */
- if (!pipe_ctx->has_vactive_margin) {
- /* In Vblank - yellow */
- color->color_r_cr = color_value;
- color->color_g_y = color_value;
+ struct dc_dmub_srv *dc_dmub_srv = params->subvp_save_surf_addr.dc_dmub_srv;
+ const struct dc_plane_address *addr = params->subvp_save_surf_addr.addr;
+ uint8_t subvp_index = params->subvp_save_surf_addr.subvp_index;
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
- /* FPO + Vblank - cyan */
- color->color_r_cr = 0;
- color->color_g_y = color_value;
- color->color_b_cb = color_value;
- }
- } else {
- /* In Vactive - pink */
- color->color_r_cr = color_value;
- color->color_b_cb = color_value;
- }
- /* SubVP */
- get_subvp_visual_confirm_color(dc, context, pipe_ctx, color);
- }
+ dc_dmub_srv_subvp_save_surf_addr(dc_dmub_srv, addr, subvp_index);
}
void get_surface_tile_visual_confirm_color(
@@ -845,3 +1134,133 @@ void get_surface_tile_visual_confirm_color(
break;
}
}
+
+/**
+ * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank
+ * pattern updates
+ *
+ * @dc: [in] dc reference
+ * @context: [in] hardware context in use
+ */
+void hwss_wait_for_all_blank_complete(struct dc *dc,
+ struct dc_state *context)
+{
+ struct pipe_ctx *opp_head;
+ struct dce_hwseq *hws = dc->hwseq;
+ int i;
+
+ if (!hws->funcs.wait_for_blank_complete)
+ return;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ opp_head = &context->res_ctx.pipe_ctx[i];
+
+ if (!resource_is_pipe_type(opp_head, OPP_HEAD) ||
+ dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM)
+ continue;
+
+ hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp);
+ }
+}
+
+void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context)
+{
+ struct pipe_ctx *otg_master;
+ struct timing_generator *tg;
+ int i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ otg_master = &context->res_ctx.pipe_ctx[i];
+ if (!resource_is_pipe_type(otg_master, OTG_MASTER) ||
+ dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM)
+ continue;
+ tg = otg_master->stream_res.tg;
+ if (tg->funcs->wait_odm_doublebuffer_pending_clear)
+ tg->funcs->wait_odm_doublebuffer_pending_clear(tg);
+ if (tg->funcs->wait_otg_disable)
+ tg->funcs->wait_otg_disable(tg);
+ }
+
+ /* ODM update may require to reprogram blank pattern for each OPP */
+ hwss_wait_for_all_blank_complete(dc, context);
+}
+
+void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
+{
+ int i;
+ for (i = 0; i < MAX_PIPES; i++) {
+ int count = 0;
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)
+ continue;
+
+ /* Timeout 100 ms */
+ while (count < 100000) {
+ /* Must set to false to start with, due to OR in update function */
+ pipe->plane_state->status.is_flip_pending = false;
+ dc->hwss.update_pending_status(pipe);
+ if (!pipe->plane_state->status.is_flip_pending)
+ break;
+ udelay(1);
+ count++;
+ }
+ ASSERT(!pipe->plane_state->status.is_flip_pending);
+ }
+}
+
+void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context)
+{
+/*
+ * This function calls HWSS to wait for any potentially double buffered
+ * operations to complete. It should be invoked as a pre-amble prior
+ * to full update programming before asserting any HW locks.
+ */
+ int pipe_idx;
+ int opp_inst;
+ int opp_count = dc->res_pool->res_cap->num_opp;
+ struct hubp *hubp;
+ int mpcc_inst;
+ const struct pipe_ctx *pipe_ctx;
+
+ for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
+ pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx];
+
+ if (!pipe_ctx->stream)
+ continue;
+
+ /* For full update we must wait for all double buffer updates, not just DRR updates. This
+ * is particularly important for minimal transitions. Only check for OTG_MASTER pipes,
+ * as non-OTG Master pipes share the same OTG as
+ */
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && dc->hwss.wait_for_all_pending_updates) {
+ dc->hwss.wait_for_all_pending_updates(pipe_ctx);
+ }
+
+ hubp = pipe_ctx->plane_res.hubp;
+ if (!hubp)
+ continue;
+
+ mpcc_inst = hubp->inst;
+ // MPCC inst is equal to pipe index in practice
+ for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
+ if ((dc->res_pool->opps[opp_inst] != NULL) &&
+ (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) {
+ dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
+ dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
+ break;
+ }
+ }
+ }
+ hwss_wait_for_odm_update_pending_complete(dc, dc_context);
+}
+
+void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context)
+{
+ /* wait for outstanding updates */
+ hwss_wait_for_outstanding_hw_updates(dc, dc_context);
+
+ /* perform outstanding post update programming */
+ if (dc->hwss.program_outstanding_updates)
+ dc->hwss.program_outstanding_updates(dc, dc_context);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index be5a6d008b29..a180f68f711c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -24,7 +24,7 @@
#include "link_enc_cfg.h"
#include "resource.h"
-#include "link.h"
+#include "link_service.h"
#define DC_LOGGER dc->ctx->logger
@@ -44,20 +44,8 @@ static bool is_dig_link_enc_stream(struct dc_stream_state *stream)
* yet match.
*/
if (link_enc && ((uint32_t)stream->link->connector_signal & link_enc->output_signals)) {
- if (dc_is_dp_signal(stream->signal)) {
- /* DIGs do not support DP2.0 streams with 128b/132b encoding. */
- struct dc_link_settings link_settings = {0};
-
- stream->ctx->dc->link_srv->dp_decide_link_settings(stream, &link_settings);
- if ((link_settings.link_rate >= LINK_RATE_LOW) &&
- link_settings.link_rate <= LINK_RATE_HIGH3) {
- is_dig_stream = true;
- break;
- }
- } else {
- is_dig_stream = true;
- break;
- }
+ is_dig_stream = true;
+ break;
}
}
}
@@ -248,6 +236,8 @@ static struct link_encoder *get_link_enc_used_by_link(
for (i = 0; i < MAX_PIPES; i++) {
struct link_enc_assignment assignment = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
+ if (assignment.eng_id == ENGINE_ID_UNKNOWN)
+ continue;
if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id))
link_enc = link->dc->res_pool->link_encoders[assignment.eng_id - ENGINE_ID_DIGA];
@@ -517,6 +507,8 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
for (i = 0; i < MAX_PIPES; i++) {
struct link_enc_assignment assignment = get_assignment(dc, i);
+ if (assignment.eng_id == ENGINE_ID_UNKNOWN)
+ continue;
if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id)) {
link_enc = link->dc->res_pool->link_encoders[assignment.eng_id - ENGINE_ID_DIGA];
@@ -540,7 +532,7 @@ struct link_encoder *link_enc_cfg_get_next_avail_link_enc(struct dc *dc)
for (i = 0; i < MAX_PIPES; i++) {
struct link_enc_assignment assignment = get_assignment(dc, i);
- if (assignment.valid)
+ if (assignment.valid && assignment.eng_id != ENGINE_ID_UNKNOWN)
encs_assigned[assignment.eng_id - ENGINE_ID_DIGA] = assignment.eng_id;
}
@@ -555,17 +547,6 @@ struct link_encoder *link_enc_cfg_get_next_avail_link_enc(struct dc *dc)
return link_enc;
}
-struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream(
- struct dc *dc,
- const struct dc_stream_state *stream)
-{
- struct link_encoder *link_enc;
-
- link_enc = link_enc_cfg_get_link_enc_used_by_link(dc, stream->link);
-
- return link_enc;
-}
-
struct link_encoder *link_enc_cfg_get_link_enc(
const struct dc_link *link)
{
@@ -602,6 +583,9 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream_current(
struct link_enc_assignment assignment =
dc->current_state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i];
+ if (assignment.eng_id == ENGINE_ID_UNKNOWN)
+ continue;
+
if (assignment.valid == true && are_ep_ids_equal(&assignment.ep_id, &ep_id)) {
link_enc = stream->link->dc->res_pool->link_encoders[assignment.eng_id - ENGINE_ID_DIGA];
break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index ed94187c2afa..9acd30019717 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -33,10 +33,14 @@
* dc.h with detail interface documentation, then add function implementation
* in this file which calls link functions.
*/
-#include "link.h"
+#include "link_service.h"
#include "dce/dce_i2c.h"
+
struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index)
{
+ if (link_index >= MAX_LINKS)
+ return NULL;
+
return dc->links[link_index];
}
@@ -122,6 +126,14 @@ uint32_t dc_link_bandwidth_kbps(
return link->dc->link_srv->dp_link_bandwidth_kbps(link, link_settings);
}
+uint32_t dc_link_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params)
+{
+ return link->dc->link_srv->dp_required_hblank_size_bytes(link,
+ audio_params);
+}
+
void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map)
{
dc->link_srv->get_cur_res_map(dc, map);
@@ -139,6 +151,12 @@ bool dc_link_update_dsc_config(struct pipe_ctx *pipe_ctx)
return link->dc->link_srv->update_dsc_config(pipe_ctx);
}
+struct ddc_service *
+dc_get_oem_i2c_device(struct dc *dc)
+{
+ return dc->res_pool->oem_device;
+}
+
bool dc_is_oem_i2c_device_present(
struct dc *dc,
size_t slave_address)
@@ -353,15 +371,10 @@ bool dc_link_should_enable_fec(const struct dc_link *link)
return link->dc->link_srv->dp_should_enable_fec(link);
}
-int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
+void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
struct dc_link *link, int peak_bw)
{
- return link->dc->link_srv->dpia_handle_usb4_bandwidth_allocation_for_link(link, peak_bw);
-}
-
-void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result)
-{
- link->dc->link_srv->dpia_handle_bw_alloc_response(link, bw, result);
+ link->dc->link_srv->dpia_handle_usb4_bandwidth_allocation_for_link(link, peak_bw);
}
bool dc_link_check_link_loss_status(
@@ -427,11 +440,10 @@ bool dc_link_get_backlight_level_nits(struct dc_link *link,
}
bool dc_link_set_backlight_level(const struct dc_link *link,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp)
+ struct set_backlight_level_params *backlight_level_params)
{
return link->dc->link_srv->edp_set_backlight_level(link,
- backlight_pwm_u16_16, frame_ramp);
+ backlight_level_params);
}
bool dc_link_set_backlight_level_nits(struct dc_link *link,
@@ -467,6 +479,13 @@ bool dc_link_setup_psr(struct dc_link *link,
return link->dc->link_srv->edp_setup_psr(link, stream, psr_config, psr_context);
}
+bool dc_link_set_replay_allow_active(struct dc_link *link, const bool *allow_active,
+ bool wait, bool force_static, const unsigned int *power_opts)
+{
+ return link->dc->link_srv->edp_set_replay_allow_active(link, allow_active, wait,
+ force_static, power_opts);
+}
+
bool dc_link_get_replay_state(const struct dc_link *link, uint64_t *state)
{
return link->dc->link_srv->edp_get_replay_state(link, state);
@@ -497,7 +516,15 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable)
link->dc->link_srv->enable_hpd_filter(link, enable);
}
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count)
+enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, const struct dc_state *new_ctx)
{
- return dc->link_srv->validate_dpia_bandwidth(streams, count);
+ return dc->link_srv->validate_dp_tunnel_bandwidth(dc, new_ctx);
}
+
+void dc_link_get_alpm_support(struct dc_link *link,
+ bool *auxless_support,
+ bool *auxwake_support)
+{
+ link->dc->link_srv->edp_get_alpm_support(link, auxless_support, auxwake_support);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index f7b51aca6020..bc5dedf5f60c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -40,7 +40,11 @@
#include "virtual/virtual_stream_encoder.h"
#include "dpcd_defs.h"
#include "link_enc_cfg.h"
-#include "link.h"
+#include "link_service.h"
+#include "clk_mgr.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
+
#include "virtual/virtual_link_hwss.h"
#include "link/hwss/link_hwss_dio.h"
#include "link/hwss/link_hwss_dpia.h"
@@ -68,8 +72,15 @@
#include "dcn314/dcn314_resource.h"
#include "dcn315/dcn315_resource.h"
#include "dcn316/dcn316_resource.h"
-#include "../dcn32/dcn32_resource.h"
-#include "../dcn321/dcn321_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
+#include "dcn35/dcn35_resource.h"
+#include "dcn351/dcn351_resource.h"
+#include "dcn36/dcn36_resource.h"
+#include "dcn401/dcn401_resource.h"
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#include "dc_spl_translate.h"
+#endif
#define VISUAL_CONFIRM_BASE_DEFAULT 3
#define VISUAL_CONFIRM_BASE_MIN 1
@@ -81,7 +92,10 @@
*/
#define VISUAL_CONFIRM_DPP_OFFSET_DENO 240
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
+#include "dml2/dml2_wrapper.h"
#define UNABLE_TO_SPLIT -1
@@ -150,7 +164,13 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
case FAMILY_NV:
dc_version = DCN_VERSION_2_0;
- if (asic_id.chip_id == DEVICE_ID_NV_13FE || asic_id.chip_id == DEVICE_ID_NV_143F) {
+ if (asic_id.chip_id == DEVICE_ID_NV_13FE ||
+ asic_id.chip_id == DEVICE_ID_NV_143F ||
+ asic_id.chip_id == DEVICE_ID_NV_13F9 ||
+ asic_id.chip_id == DEVICE_ID_NV_13FA ||
+ asic_id.chip_id == DEVICE_ID_NV_13FB ||
+ asic_id.chip_id == DEVICE_ID_NV_13FC ||
+ asic_id.chip_id == DEVICE_ID_NV_13DB) {
dc_version = DCN_VERSION_2_01;
break;
}
@@ -186,6 +206,18 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
case AMDGPU_FAMILY_GC_11_0_1:
dc_version = DCN_VERSION_3_14;
break;
+ case AMDGPU_FAMILY_GC_11_5_0:
+ dc_version = DCN_VERSION_3_5;
+ if (ASICREV_IS_GC_11_0_4(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_3_51;
+ if (ASICREV_IS_DCN36(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_3_6;
+ break;
+ case AMDGPU_FAMILY_GC_12_0_0:
+ if (ASICREV_IS_GC_12_0_1_A0(asic_id.hw_internal_rev) ||
+ ASICREV_IS_GC_12_0_0_A0(asic_id.hw_internal_rev))
+ dc_version = DCN_VERSION_4_01;
+ break;
default:
dc_version = DCE_VERSION_UNKNOWN;
break;
@@ -290,6 +322,18 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc,
case DCN_VERSION_3_21:
res_pool = dcn321_create_resource_pool(init_data, dc);
break;
+ case DCN_VERSION_3_5:
+ res_pool = dcn35_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_51:
+ res_pool = dcn351_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_3_6:
+ res_pool = dcn36_create_resource_pool(init_data, dc);
+ break;
+ case DCN_VERSION_4_01:
+ res_pool = dcn401_create_resource_pool(init_data, dc);
+ break;
#endif /* CONFIG_DRM_AMD_DC_FP */
default:
break;
@@ -316,7 +360,7 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc,
return res_pool;
}
-void dc_destroy_resource_pool(struct dc *dc)
+void dc_destroy_resource_pool(struct dc *dc)
{
if (dc) {
if (dc->res_pool)
@@ -732,85 +776,6 @@ static inline void get_vp_scan_direction(
*flip_horz_scan_dir = !*flip_horz_scan_dir;
}
-int resource_get_num_mpc_splits(const struct pipe_ctx *pipe)
-{
- int mpc_split_count = 0;
- const struct pipe_ctx *other_pipe = pipe->bottom_pipe;
-
- while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
- mpc_split_count++;
- other_pipe = other_pipe->bottom_pipe;
- }
- other_pipe = pipe->top_pipe;
- while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
- mpc_split_count++;
- other_pipe = other_pipe->top_pipe;
- }
-
- return mpc_split_count;
-}
-
-int resource_get_num_odm_splits(const struct pipe_ctx *pipe)
-{
- int odm_split_count = 0;
-
- pipe = resource_get_otg_master(pipe);
-
- while (pipe->next_odm_pipe) {
- odm_split_count++;
- pipe = pipe->next_odm_pipe;
- }
- return odm_split_count;
-}
-
-static int get_odm_split_index(struct pipe_ctx *pipe_ctx)
-{
- int index = 0;
-
- pipe_ctx = resource_get_opp_head(pipe_ctx);
- if (!pipe_ctx)
- return 0;
-
- while (pipe_ctx->prev_odm_pipe) {
- index++;
- pipe_ctx = pipe_ctx->prev_odm_pipe;
- }
-
- return index;
-}
-
-static int get_mpc_split_index(struct pipe_ctx *pipe_ctx)
-{
- struct pipe_ctx *split_pipe = pipe_ctx->top_pipe;
- int index = 0;
-
- while (split_pipe && split_pipe->plane_state == pipe_ctx->plane_state) {
- index++;
- split_pipe = split_pipe->top_pipe;
- }
-
- return index;
-}
-
-/*
- * This is a preliminary vp size calculation to allow us to check taps support.
- * The result is completely overridden afterwards.
- */
-static void calculate_viewport_size(struct pipe_ctx *pipe_ctx)
-{
- struct scaler_data *data = &pipe_ctx->plane_res.scl_data;
-
- data->viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz, data->recout.width));
- data->viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert, data->recout.height));
- data->viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz_c, data->recout.width));
- data->viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert_c, data->recout.height));
- if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 ||
- pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) {
- swap(data->viewport.width, data->viewport.height);
- swap(data->viewport_c.width, data->viewport_c.height);
- }
-}
-
static struct rect intersect_rec(const struct rect *r0, const struct rect *r1)
{
struct rect rec;
@@ -841,32 +806,6 @@ static struct rect shift_rec(const struct rect *rec_in, int x, int y)
return rec_out;
}
-static struct rect calculate_odm_slice_in_timing_active(struct pipe_ctx *pipe_ctx)
-{
- const struct dc_stream_state *stream = pipe_ctx->stream;
- int odm_slice_count = resource_get_num_odm_splits(pipe_ctx) + 1;
- int odm_slice_idx = get_odm_split_index(pipe_ctx);
- bool is_last_odm_slice = (odm_slice_idx + 1) == odm_slice_count;
- int h_active = stream->timing.h_addressable +
- stream->timing.h_border_left +
- stream->timing.h_border_right;
- int odm_slice_width = h_active / odm_slice_count;
- struct rect odm_rec;
-
- odm_rec.x = odm_slice_width * odm_slice_idx;
- odm_rec.width = is_last_odm_slice ?
- /* last slice width is the reminder of h_active */
- h_active - odm_slice_width * (odm_slice_count - 1) :
- /* odm slice width is the floor of h_active / count */
- odm_slice_width;
- odm_rec.y = 0;
- odm_rec.height = stream->timing.v_addressable +
- stream->timing.v_border_bottom +
- stream->timing.v_border_top;
-
- return odm_rec;
-}
-
static struct rect calculate_plane_rec_in_timing_active(
struct pipe_ctx *pipe_ctx,
const struct rect *rec_in)
@@ -936,21 +875,21 @@ static struct rect calculate_plane_rec_in_timing_active(
struct rect rec_out = {0};
struct fixed31_32 temp;
- temp = dc_fixpt_from_fraction(rec_in->x * stream->dst.width,
+ temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream->dst.width,
stream->src.width);
rec_out.x = stream->dst.x + dc_fixpt_round(temp);
temp = dc_fixpt_from_fraction(
- (rec_in->x + rec_in->width) * stream->dst.width,
+ (rec_in->x + rec_in->width) * (long long)stream->dst.width,
stream->src.width);
rec_out.width = stream->dst.x + dc_fixpt_round(temp) - rec_out.x;
- temp = dc_fixpt_from_fraction(rec_in->y * stream->dst.height,
+ temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream->dst.height,
stream->src.height);
rec_out.y = stream->dst.y + dc_fixpt_round(temp);
temp = dc_fixpt_from_fraction(
- (rec_in->y + rec_in->height) * stream->dst.height,
+ (rec_in->y + rec_in->height) * (long long)stream->dst.height,
stream->src.height);
rec_out.height = stream->dst.y + dc_fixpt_round(temp) - rec_out.y;
@@ -962,8 +901,8 @@ static struct rect calculate_mpc_slice_in_timing_active(
struct rect *plane_clip_rec)
{
const struct dc_stream_state *stream = pipe_ctx->stream;
- int mpc_slice_count = resource_get_num_mpc_splits(pipe_ctx) + 1;
- int mpc_slice_idx = get_mpc_split_index(pipe_ctx);
+ int mpc_slice_count = resource_get_mpc_slice_count(pipe_ctx);
+ int mpc_slice_idx = resource_get_mpc_slice_index(pipe_ctx);
int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1;
struct rect mpc_rec;
@@ -975,6 +914,9 @@ static struct rect calculate_mpc_slice_in_timing_active(
stream->view_format != VIEW_3D_FORMAT_SIDE_BY_SIDE ||
mpc_rec.width % 2 == 0);
+ if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE)
+ mpc_rec.x -= (mpc_rec.width * mpc_slice_idx);
+
/* extra pixels in the division remainder need to go to pipes after
* the extra pixel index minus one(epimo) defined here as:
*/
@@ -990,24 +932,44 @@ static struct rect calculate_mpc_slice_in_timing_active(
return mpc_rec;
}
-static void adjust_recout_for_visual_confirm(struct rect *recout,
- struct pipe_ctx *pipe_ctx)
+static void calculate_adjust_recout_for_visual_confirm(struct pipe_ctx *pipe_ctx,
+ int *base_offset, int *dpp_offset)
{
struct dc *dc = pipe_ctx->stream->ctx->dc;
- int dpp_offset, base_offset;
+ *base_offset = 0;
+ *dpp_offset = 0;
- if (dc->debug.visual_confirm == VISUAL_CONFIRM_DISABLE)
+ if (dc->debug.visual_confirm == VISUAL_CONFIRM_DISABLE || !pipe_ctx->plane_res.dpp)
return;
- dpp_offset = pipe_ctx->stream->timing.v_addressable / VISUAL_CONFIRM_DPP_OFFSET_DENO;
- dpp_offset *= pipe_ctx->plane_res.dpp->inst;
+ *dpp_offset = pipe_ctx->stream->timing.v_addressable / VISUAL_CONFIRM_DPP_OFFSET_DENO;
+ *dpp_offset *= pipe_ctx->plane_res.dpp->inst;
if ((dc->debug.visual_confirm_rect_height >= VISUAL_CONFIRM_BASE_MIN) &&
dc->debug.visual_confirm_rect_height <= VISUAL_CONFIRM_BASE_MAX)
- base_offset = dc->debug.visual_confirm_rect_height;
+ *base_offset = dc->debug.visual_confirm_rect_height;
else
- base_offset = VISUAL_CONFIRM_BASE_DEFAULT;
+ *base_offset = VISUAL_CONFIRM_BASE_DEFAULT;
+}
+
+static void reverse_adjust_recout_for_visual_confirm(struct rect *recout,
+ struct pipe_ctx *pipe_ctx)
+{
+ int dpp_offset, base_offset;
+
+ calculate_adjust_recout_for_visual_confirm(pipe_ctx, &base_offset,
+ &dpp_offset);
+ recout->height += base_offset;
+ recout->height += dpp_offset;
+}
+
+static void adjust_recout_for_visual_confirm(struct rect *recout,
+ struct pipe_ctx *pipe_ctx)
+{
+ int dpp_offset, base_offset;
+ calculate_adjust_recout_for_visual_confirm(pipe_ctx, &base_offset,
+ &dpp_offset);
recout->height -= base_offset;
recout->height -= dpp_offset;
}
@@ -1142,7 +1104,7 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx)
*/
struct rect plane_clip;
struct rect mpc_slice_of_plane_clip;
- struct rect odm_slice;
+ struct rect odm_slice_src;
struct rect overlapping_area;
plane_clip = calculate_plane_rec_in_timing_active(pipe_ctx,
@@ -1152,16 +1114,16 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx)
&pipe_ctx->stream->dst);
mpc_slice_of_plane_clip = calculate_mpc_slice_in_timing_active(
pipe_ctx, &plane_clip);
- odm_slice = calculate_odm_slice_in_timing_active(pipe_ctx);
- overlapping_area = intersect_rec(&mpc_slice_of_plane_clip, &odm_slice);
+ odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
+ overlapping_area = intersect_rec(&mpc_slice_of_plane_clip, &odm_slice_src);
if (overlapping_area.height > 0 &&
overlapping_area.width > 0) {
/* shift the overlapping area so it is with respect to current
- * ODM slice's position
+ * ODM slice source's position
*/
pipe_ctx->plane_res.scl_data.recout = shift_rec(
&overlapping_area,
- -odm_slice.x, -odm_slice.y);
+ -odm_slice_src.x, -odm_slice_src.y);
adjust_recout_for_visual_confirm(
&pipe_ctx->plane_res.scl_data.recout,
pipe_ctx);
@@ -1298,13 +1260,13 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
struct rect recout_clip_in_active_timing;
struct rect recout_clip_in_recout_dst;
struct rect overlap_in_active_timing;
- struct rect odm_slice = calculate_odm_slice_in_timing_active(pipe_ctx);
+ struct rect odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
int vpc_div = (data->format == PIXEL_FORMAT_420BPP8
|| data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1;
bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir;
recout_clip_in_active_timing = shift_rec(
- &data->recout, odm_slice.x, odm_slice.y);
+ &data->recout, odm_slice_src.x, odm_slice_src.y);
recout_dst_in_active_timing = calculate_plane_rec_in_timing_active(
pipe_ctx, &plane_state->dst_rect);
overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing,
@@ -1385,16 +1347,112 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx)
data->viewport_c.y += src.y / vpc_div;
}
+static enum controller_dp_test_pattern convert_dp_to_controller_test_pattern(
+ enum dp_test_pattern test_pattern)
+{
+ enum controller_dp_test_pattern controller_test_pattern;
+
+ switch (test_pattern) {
+ case DP_TEST_PATTERN_COLOR_SQUARES:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_COLORSQUARES;
+ break;
+ case DP_TEST_PATTERN_COLOR_SQUARES_CEA:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA;
+ break;
+ case DP_TEST_PATTERN_VERTICAL_BARS:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_VERTICALBARS;
+ break;
+ case DP_TEST_PATTERN_HORIZONTAL_BARS:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS;
+ break;
+ case DP_TEST_PATTERN_COLOR_RAMP:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_COLORRAMP;
+ break;
+ default:
+ controller_test_pattern =
+ CONTROLLER_DP_TEST_PATTERN_VIDEOMODE;
+ break;
+ }
+
+ return controller_test_pattern;
+}
+
+static enum controller_dp_color_space convert_dp_to_controller_color_space(
+ enum dp_test_pattern_color_space color_space)
+{
+ enum controller_dp_color_space controller_color_space;
+
+ switch (color_space) {
+ case DP_TEST_PATTERN_COLOR_SPACE_RGB:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_RGB;
+ break;
+ case DP_TEST_PATTERN_COLOR_SPACE_YCBCR601:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR601;
+ break;
+ case DP_TEST_PATTERN_COLOR_SPACE_YCBCR709:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR709;
+ break;
+ case DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED:
+ default:
+ controller_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
+ break;
+ }
+
+ return controller_color_space;
+}
+
+void resource_build_test_pattern_params(struct resource_context *res_ctx,
+ struct pipe_ctx *otg_master)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ struct test_pattern_params *params;
+ int odm_cnt;
+ enum controller_dp_test_pattern controller_test_pattern;
+ enum controller_dp_color_space controller_color_space;
+ enum dc_color_depth color_depth = otg_master->stream->timing.display_color_depth;
+ struct rect odm_slice_src;
+ int i;
+
+ controller_test_pattern = convert_dp_to_controller_test_pattern(
+ otg_master->stream->test_pattern.type);
+ controller_color_space = convert_dp_to_controller_color_space(
+ otg_master->stream->test_pattern.color_space);
+
+ if (controller_test_pattern == CONTROLLER_DP_TEST_PATTERN_VIDEOMODE)
+ return;
+
+ odm_cnt = resource_get_opp_heads_for_otg_master(otg_master, res_ctx, opp_heads);
+
+ for (i = 0; i < odm_cnt; i++) {
+ odm_slice_src = resource_get_odm_slice_src_rect(opp_heads[i]);
+ params = &opp_heads[i]->stream_res.test_pattern_params;
+ params->test_pattern = controller_test_pattern;
+ params->color_space = controller_color_space;
+ params->color_depth = color_depth;
+ params->height = odm_slice_src.height;
+ params->offset = odm_slice_src.x;
+ params->width = odm_slice_src.width;
+ }
+}
+
bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
{
const struct dc_plane_state *plane_state = pipe_ctx->plane_state;
struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
- const struct rect odm_slice_rec = calculate_odm_slice_in_timing_active(pipe_ctx);
+ const struct rect odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
+ struct scaling_taps temp = {0};
bool res = false;
+
DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
/* Invalid input */
- if (!plane_state->dst_rect.width ||
+ if (!plane_state ||
+ !plane_state->dst_rect.width ||
!plane_state->dst_rect.height ||
!plane_state->src_rect.width ||
!plane_state->src_rect.height) {
@@ -1402,9 +1460,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
return false;
}
- pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface(
- pipe_ctx->plane_state->format);
-
/* Timing borders are part of vactive that we are also supposed to skip in addition
* to any stream dst offset. Since dm logic assumes dst is in addressable
* space we need to add the left and top borders to dst offsets temporarily.
@@ -1414,22 +1469,45 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
pipe_ctx->stream->dst.y += timing->v_border_top;
/* Calculate H and V active size */
- pipe_ctx->plane_res.scl_data.h_active = odm_slice_rec.width;
- pipe_ctx->plane_res.scl_data.v_active = odm_slice_rec.height;
+ pipe_ctx->plane_res.scl_data.h_active = odm_slice_src.width;
+ pipe_ctx->plane_res.scl_data.v_active = odm_slice_src.height;
+ pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface(
+ pipe_ctx->plane_state->format);
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ if ((pipe_ctx->stream->ctx->dc->config.use_spl) && (!pipe_ctx->stream->ctx->dc->debug.disable_spl)) {
+ struct spl_in *spl_in = &pipe_ctx->plane_res.spl_in;
+ struct spl_out *spl_out = &pipe_ctx->plane_res.spl_out;
+ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
+ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
+ else
+ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
+
+ pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha;
+
+ // Convert pipe_ctx to respective input params for SPL
+ translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in);
+ /* Pass visual confirm debug information */
+ calculate_adjust_recout_for_visual_confirm(pipe_ctx,
+ &spl_in->debug.visual_confirm_base_offset,
+ &spl_in->debug.visual_confirm_dpp_offset);
+ // Set SPL output parameters to dscl_prog_data to be used for hw registers
+ spl_out->dscl_prog_data = resource_get_dscl_prog_data(pipe_ctx);
+ // Calculate scaler parameters from SPL
+ res = spl_calculate_scaler_params(spl_in, spl_out);
+ // Convert respective out params from SPL to scaler data
+ translate_SPL_out_params_to_pipe_ctx(pipe_ctx, spl_out);
+
+ /* Ignore scaler failure if pipe context plane is phantom plane */
+ if (!res && plane_state->is_phantom)
+ res = true;
+ } else {
+#endif
/* depends on h_active */
calculate_recout(pipe_ctx);
/* depends on pixel format */
calculate_scaling_ratios(pipe_ctx);
- /* depends on scaling ratios and recout, does not calculate offset yet */
- calculate_viewport_size(pipe_ctx);
-
- if (!pipe_ctx->stream->ctx->dc->config.enable_windowed_mpo_odm) {
- /* Stopgap for validation of ODM + MPO on one side of screen case */
- if (pipe_ctx->plane_res.scl_data.viewport.height < 1 ||
- pipe_ctx->plane_res.scl_data.viewport.width < 1)
- return false;
- }
/*
* LB calculations depend on vp size, h/v_active and scaling ratios
@@ -1450,6 +1528,24 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha;
+ // get TAP value with 100x100 dummy data for max scaling qualify, override
+ // if a new scaling quality required
+ pipe_ctx->plane_res.scl_data.viewport.width = 100;
+ pipe_ctx->plane_res.scl_data.viewport.height = 100;
+ pipe_ctx->plane_res.scl_data.viewport_c.width = 100;
+ pipe_ctx->plane_res.scl_data.viewport_c.height = 100;
+ if (pipe_ctx->plane_res.xfm != NULL)
+ res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps(
+ pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality);
+
+ if (pipe_ctx->plane_res.dpp != NULL)
+ res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps(
+ pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality);
+
+ temp = pipe_ctx->plane_res.scl_data.taps;
+
+ calculate_inits_and_viewports(pipe_ctx);
+
if (pipe_ctx->plane_res.xfm != NULL)
res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps(
pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality);
@@ -1476,11 +1572,14 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
&plane_state->scaling_quality);
}
- /*
- * Depends on recout, scaling ratios, h_active and taps
- * May need to re-check lb size after this in some obscure scenario
- */
- if (res)
+ /* Ignore scaler failure if pipe context plane is phantom plane */
+ if (!res && plane_state->is_phantom)
+ res = true;
+
+ if (res && (pipe_ctx->plane_res.scl_data.taps.v_taps != temp.v_taps ||
+ pipe_ctx->plane_res.scl_data.taps.h_taps != temp.h_taps ||
+ pipe_ctx->plane_res.scl_data.taps.v_taps_c != temp.v_taps_c ||
+ pipe_ctx->plane_res.scl_data.taps.h_taps_c != temp.h_taps_c))
calculate_inits_and_viewports(pipe_ctx);
/*
@@ -1503,8 +1602,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_res.scl_data.viewport.height = MIN_VIEWPORT_SIZE;
if (pipe_ctx->plane_res.scl_data.viewport.width < MIN_VIEWPORT_SIZE)
pipe_ctx->plane_res.scl_data.viewport.width = MIN_VIEWPORT_SIZE;
-
-
+#ifdef CONFIG_DRM_AMD_DC_FP
+ }
+#endif
DC_LOG_SCALER("%s pipe %d:\nViewport: height:%d width:%d x:%d y:%d Recout: height:%d width:%d x:%d y:%d HACTIVE:%d VACTIVE:%d\n"
"src_rect: height:%d width:%d x:%d y:%d dst_rect: height:%d width:%d x:%d y:%d clip_rect: height:%d width:%d x:%d y:%d\n",
__func__,
@@ -1538,6 +1638,62 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
return res;
}
+bool resource_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *test_pipe, *split_pipe;
+ struct rect r1 = pipe_ctx->plane_res.scl_data.recout;
+ int r1_right, r1_bottom;
+ int cur_layer = pipe_ctx->plane_state->layer_index;
+
+ reverse_adjust_recout_for_visual_confirm(&r1, pipe_ctx);
+ r1_right = r1.x + r1.width;
+ r1_bottom = r1.y + r1.height;
+
+ /**
+ * Disable the cursor if there's another pipe above this with a
+ * plane that contains this pipe's viewport to prevent double cursor
+ * and incorrect scaling artifacts.
+ */
+ for (test_pipe = pipe_ctx->top_pipe; test_pipe;
+ test_pipe = test_pipe->top_pipe) {
+ struct rect r2;
+ int r2_right, r2_bottom;
+ // Skip invisible layer and pipe-split plane on same layer
+ if (!test_pipe->plane_state ||
+ !test_pipe->plane_state->visible ||
+ test_pipe->plane_state->layer_index == cur_layer)
+ continue;
+
+ r2 = test_pipe->plane_res.scl_data.recout;
+ reverse_adjust_recout_for_visual_confirm(&r2, test_pipe);
+ r2_right = r2.x + r2.width;
+ r2_bottom = r2.y + r2.height;
+
+ /**
+ * There is another half plane on same layer because of
+ * pipe-split, merge together per same height.
+ */
+ for (split_pipe = pipe_ctx->top_pipe; split_pipe;
+ split_pipe = split_pipe->top_pipe)
+ if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) {
+ struct rect r2_half;
+
+ r2_half = split_pipe->plane_res.scl_data.recout;
+ reverse_adjust_recout_for_visual_confirm(&r2_half, split_pipe);
+ r2.x = min(r2_half.x, r2.x);
+ r2.width = r2.width + r2_half.width;
+ r2_right = r2.x + r2.width;
+ r2_bottom = min(r2_bottom, r2_half.y + r2_half.height);
+ break;
+ }
+
+ if (r1.x >= r2.x && r1.y >= r2.y && r1_right <= r2_right && r1_bottom <= r2_bottom)
+ return true;
+ }
+
+ return false;
+}
+
enum dc_status resource_build_scaling_params_for_context(
const struct dc *dc,
@@ -1615,6 +1771,27 @@ struct pipe_ctx *resource_find_free_secondary_pipe_legacy(
return secondary_pipe;
}
+int resource_find_free_pipe_used_as_sec_opp_head_by_cur_otg_master(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct pipe_ctx *cur_otg_master)
+{
+ const struct pipe_ctx *cur_sec_opp_head = cur_otg_master->next_odm_pipe;
+ struct pipe_ctx *new_pipe;
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+
+ while (cur_sec_opp_head) {
+ new_pipe = &new_res_ctx->pipe_ctx[cur_sec_opp_head->pipe_idx];
+ if (resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = cur_sec_opp_head->pipe_idx;
+ break;
+ }
+ cur_sec_opp_head = cur_sec_opp_head->next_odm_pipe;
+ }
+
+ return free_pipe_idx;
+}
+
int resource_find_free_pipe_used_in_cur_mpc_blending_tree(
const struct resource_context *cur_res_ctx,
struct resource_context *new_res_ctx,
@@ -1663,6 +1840,53 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
return free_pipe_idx;
}
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool)
+{
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+ const struct pipe_ctx *new_pipe, *cur_pipe;
+ int i;
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ cur_pipe = &cur_res_ctx->pipe_ctx[i];
+ new_pipe = &new_res_ctx->pipe_ctx[i];
+
+ if (resource_is_pipe_type(cur_pipe, OTG_MASTER) &&
+ resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = i;
+ break;
+ }
+ }
+
+ return free_pipe_idx;
+}
+
+int resource_find_free_pipe_used_as_cur_sec_dpp(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool)
+{
+ int free_pipe_idx = FREE_PIPE_INDEX_NOT_FOUND;
+ const struct pipe_ctx *new_pipe, *cur_pipe;
+ int i;
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ cur_pipe = &cur_res_ctx->pipe_ctx[i];
+ new_pipe = &new_res_ctx->pipe_ctx[i];
+
+ if (resource_is_pipe_type(cur_pipe, DPP_PIPE) &&
+ !resource_is_pipe_type(cur_pipe, OPP_HEAD) &&
+ resource_is_pipe_type(new_pipe, FREE_PIPE)) {
+ free_pipe_idx = i;
+ break;
+ }
+ }
+
+ return free_pipe_idx;
+}
+
int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine(
const struct resource_context *cur_res_ctx,
struct resource_context *new_res_ctx,
@@ -1678,7 +1902,7 @@ int resource_find_free_pipe_used_as_cur_sec_dpp_in_mpcc_combine(
if (resource_is_pipe_type(cur_pipe, DPP_PIPE) &&
!resource_is_pipe_type(cur_pipe, OPP_HEAD) &&
- resource_is_for_mpcc_combine(cur_pipe) &&
+ resource_get_mpc_slice_index(cur_pipe) > 0 &&
resource_is_pipe_type(new_pipe, FREE_PIPE)) {
free_pipe_idx = i;
break;
@@ -1709,23 +1933,6 @@ int resource_find_any_free_pipe(struct resource_context *new_res_ctx,
bool resource_is_pipe_type(const struct pipe_ctx *pipe_ctx, enum pipe_type type)
{
-#ifdef DBG
- if (pipe_ctx->stream == NULL) {
- /* a free pipe with dangling states */
- ASSERT(!pipe_ctx->plane_state);
- ASSERT(!pipe_ctx->prev_odm_pipe);
- ASSERT(!pipe_ctx->next_odm_pipe);
- ASSERT(!pipe_ctx->top_pipe);
- ASSERT(!pipe_ctx->bottom_pipe);
- } else if (pipe_ctx->top_pipe) {
- /* a secondary DPP pipe must be signed to a plane */
- ASSERT(pipe_ctx->plane_state)
- }
- /* Add more checks here to prevent corrupted pipe ctx. It is very hard
- * to debug this issue afterwards because we can't pinpoint the code
- * location causing inconsistent pipe context states.
- */
-#endif
switch (type) {
case OTG_MASTER:
return !pipe_ctx->prev_odm_pipe &&
@@ -1742,14 +1949,9 @@ bool resource_is_pipe_type(const struct pipe_ctx *pipe_ctx, enum pipe_type type)
}
}
-bool resource_is_for_mpcc_combine(const struct pipe_ctx *pipe_ctx)
-{
- return resource_get_num_mpc_splits(pipe_ctx) > 0;
-}
-
struct pipe_ctx *resource_get_otg_master_for_stream(
struct resource_context *res_ctx,
- struct dc_stream_state *stream)
+ const struct dc_stream_state *stream)
{
int i;
@@ -1761,6 +1963,81 @@ struct pipe_ctx *resource_get_otg_master_for_stream(
return NULL;
}
+int resource_get_opp_heads_for_otg_master(const struct pipe_ctx *otg_master,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *opp_heads[MAX_PIPES])
+{
+ struct pipe_ctx *opp_head = &res_ctx->pipe_ctx[otg_master->pipe_idx];
+ struct dc *dc = otg_master->stream->ctx->dc;
+ int i = 0;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!resource_is_pipe_type(otg_master, OTG_MASTER)) {
+ DC_LOG_WARNING("%s called from a non OTG master, something "
+ "is wrong in the pipe configuration",
+ __func__);
+ ASSERT(0);
+ return 0;
+ }
+ while (opp_head) {
+ ASSERT(i < MAX_PIPES);
+ opp_heads[i++] = opp_head;
+ opp_head = opp_head->next_odm_pipe;
+ }
+ return i;
+}
+
+int resource_get_dpp_pipes_for_opp_head(const struct pipe_ctx *opp_head,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *dpp_pipes[MAX_PIPES])
+{
+ struct pipe_ctx *pipe = &res_ctx->pipe_ctx[opp_head->pipe_idx];
+ int i = 0;
+
+ if (!resource_is_pipe_type(opp_head, OPP_HEAD)) {
+ ASSERT(0);
+ return 0;
+ }
+ while (pipe && resource_is_pipe_type(pipe, DPP_PIPE)) {
+ ASSERT(i < MAX_PIPES);
+ dpp_pipes[i++] = pipe;
+ pipe = pipe->bottom_pipe;
+ }
+ return i;
+}
+
+int resource_get_dpp_pipes_for_plane(const struct dc_plane_state *plane,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *dpp_pipes[MAX_PIPES])
+{
+ int i = 0, j;
+ struct pipe_ctx *pipe;
+
+ for (j = 0; j < MAX_PIPES; j++) {
+ pipe = &res_ctx->pipe_ctx[j];
+ if (pipe->plane_state == plane && pipe->prev_odm_pipe == NULL) {
+ if (resource_is_pipe_type(pipe, OPP_HEAD) ||
+ pipe->top_pipe->plane_state != plane)
+ break;
+ }
+ }
+
+ if (j < MAX_PIPES) {
+ if (pipe->next_odm_pipe)
+ while (pipe) {
+ dpp_pipes[i++] = pipe;
+ pipe = pipe->next_odm_pipe;
+ }
+ else
+ while (pipe && pipe->plane_state == plane) {
+ dpp_pipes[i++] = pipe;
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ return i;
+}
+
struct pipe_ctx *resource_get_otg_master(const struct pipe_ctx *pipe_ctx)
{
struct pipe_ctx *otg_master = resource_get_opp_head(pipe_ctx);
@@ -1780,6 +2057,395 @@ struct pipe_ctx *resource_get_opp_head(const struct pipe_ctx *pipe_ctx)
return opp_head;
}
+struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe)
+{
+ struct pipe_ctx *pri_dpp_pipe = (struct pipe_ctx *) dpp_pipe;
+
+ ASSERT(resource_is_pipe_type(dpp_pipe, DPP_PIPE));
+ while (pri_dpp_pipe->prev_odm_pipe)
+ pri_dpp_pipe = pri_dpp_pipe->prev_odm_pipe;
+ while (pri_dpp_pipe->top_pipe &&
+ pri_dpp_pipe->top_pipe->plane_state == pri_dpp_pipe->plane_state)
+ pri_dpp_pipe = pri_dpp_pipe->top_pipe;
+ return pri_dpp_pipe;
+}
+
+
+int resource_get_mpc_slice_index(const struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *split_pipe = pipe_ctx->top_pipe;
+ int index = 0;
+
+ while (split_pipe && split_pipe->plane_state == pipe_ctx->plane_state) {
+ index++;
+ split_pipe = split_pipe->top_pipe;
+ }
+
+ return index;
+}
+
+int resource_get_mpc_slice_count(const struct pipe_ctx *pipe)
+{
+ int mpc_split_count = 1;
+ const struct pipe_ctx *other_pipe = pipe->bottom_pipe;
+
+ while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
+ mpc_split_count++;
+ other_pipe = other_pipe->bottom_pipe;
+ }
+ other_pipe = pipe->top_pipe;
+ while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
+ mpc_split_count++;
+ other_pipe = other_pipe->top_pipe;
+ }
+
+ return mpc_split_count;
+}
+
+int resource_get_odm_slice_count(const struct pipe_ctx *pipe)
+{
+ int odm_split_count = 1;
+
+ pipe = resource_get_otg_master(pipe);
+
+ while (pipe->next_odm_pipe) {
+ odm_split_count++;
+ pipe = pipe->next_odm_pipe;
+ }
+ return odm_split_count;
+}
+
+int resource_get_odm_slice_index(const struct pipe_ctx *pipe_ctx)
+{
+ int index = 0;
+
+ pipe_ctx = resource_get_opp_head(pipe_ctx);
+ if (!pipe_ctx)
+ return 0;
+
+ while (pipe_ctx->prev_odm_pipe) {
+ index++;
+ pipe_ctx = pipe_ctx->prev_odm_pipe;
+ }
+
+ return index;
+}
+
+int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master,
+ bool is_last_segment)
+{
+ const struct dc_crtc_timing *timing;
+ int count;
+ int h_active;
+ int width;
+ bool two_pixel_alignment_required = false;
+
+ if (!otg_master || !otg_master->stream)
+ return 0;
+
+ timing = &otg_master->stream->timing;
+ count = resource_get_odm_slice_count(otg_master);
+ h_active = timing->h_addressable +
+ timing->h_border_left +
+ timing->h_border_right +
+ otg_master->dsc_padding_params.dsc_hactive_padding;
+ width = h_active / count;
+
+ if (otg_master->stream_res.tg)
+ two_pixel_alignment_required =
+ otg_master->stream_res.tg->funcs->is_two_pixels_per_container(timing) ||
+ /*
+ * 422 is sub-sampled horizontally. 1 set of chromas
+ * (Cb/Cr) is shared for 2 lumas (i.e 2 Y values).
+ * Therefore even if 422 is still 1 pixel per container,
+ * ODM segment width still needs to be 2 pixel aligned.
+ */
+ timing->pixel_encoding == PIXEL_ENCODING_YCBCR422;
+ if ((width % 2) && two_pixel_alignment_required)
+ width++;
+
+ return is_last_segment ?
+ h_active - width * (count - 1) :
+ width;
+}
+
+struct rect resource_get_odm_slice_dst_rect(struct pipe_ctx *pipe_ctx)
+{
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ bool is_last_odm_slice = pipe_ctx->next_odm_pipe == NULL;
+ struct pipe_ctx *otg_master = resource_get_otg_master(pipe_ctx);
+ int odm_slice_idx = resource_get_odm_slice_index(pipe_ctx);
+ int odm_segment_offset = resource_get_odm_slice_dst_width(otg_master, false);
+ struct rect odm_slice_dst;
+
+ odm_slice_dst.x = odm_segment_offset * odm_slice_idx;
+ odm_slice_dst.width = resource_get_odm_slice_dst_width(otg_master, is_last_odm_slice);
+ odm_slice_dst.y = 0;
+ odm_slice_dst.height = stream->timing.v_addressable +
+ stream->timing.v_border_bottom +
+ stream->timing.v_border_top;
+
+ return odm_slice_dst;
+}
+
+struct rect resource_get_odm_slice_src_rect(struct pipe_ctx *pipe_ctx)
+{
+ struct rect odm_slice_dst;
+ struct rect odm_slice_src;
+ struct pipe_ctx *opp_head = resource_get_opp_head(pipe_ctx);
+ struct output_pixel_processor *opp = opp_head->stream_res.opp;
+ uint32_t left_edge_extra_pixel_count;
+
+ odm_slice_dst = resource_get_odm_slice_dst_rect(opp_head);
+ odm_slice_src = odm_slice_dst;
+
+ if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count)
+ left_edge_extra_pixel_count =
+ opp->funcs->opp_get_left_edge_extra_pixel_count(
+ opp, pipe_ctx->stream->timing.pixel_encoding,
+ resource_is_pipe_type(opp_head, OTG_MASTER));
+ else
+ left_edge_extra_pixel_count = 0;
+
+ odm_slice_src.x -= left_edge_extra_pixel_count;
+ odm_slice_src.width += left_edge_extra_pixel_count;
+
+ return odm_slice_src;
+}
+
+bool resource_is_pipe_topology_changed(const struct dc_state *state_a,
+ const struct dc_state *state_b)
+{
+ int i;
+ const struct pipe_ctx *pipe_a, *pipe_b;
+
+ if (state_a->stream_count != state_b->stream_count)
+ return true;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe_a = &state_a->res_ctx.pipe_ctx[i];
+ pipe_b = &state_b->res_ctx.pipe_ctx[i];
+
+ if (pipe_a->stream && !pipe_b->stream)
+ return true;
+ else if (!pipe_a->stream && pipe_b->stream)
+ return true;
+
+ if (pipe_a->plane_state && !pipe_b->plane_state)
+ return true;
+ else if (!pipe_a->plane_state && pipe_b->plane_state)
+ return true;
+
+ if (pipe_a->bottom_pipe && pipe_b->bottom_pipe) {
+ if (pipe_a->bottom_pipe->pipe_idx != pipe_b->bottom_pipe->pipe_idx)
+ return true;
+ if ((pipe_a->bottom_pipe->plane_state == pipe_a->plane_state) &&
+ (pipe_b->bottom_pipe->plane_state != pipe_b->plane_state))
+ return true;
+ else if ((pipe_a->bottom_pipe->plane_state != pipe_a->plane_state) &&
+ (pipe_b->bottom_pipe->plane_state == pipe_b->plane_state))
+ return true;
+ } else if (pipe_a->bottom_pipe || pipe_b->bottom_pipe) {
+ return true;
+ }
+
+ if (pipe_a->next_odm_pipe && pipe_b->next_odm_pipe) {
+ if (pipe_a->next_odm_pipe->pipe_idx != pipe_b->next_odm_pipe->pipe_idx)
+ return true;
+ } else if (pipe_a->next_odm_pipe || pipe_b->next_odm_pipe) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool resource_is_odm_topology_changed(const struct pipe_ctx *otg_master_a,
+ const struct pipe_ctx *otg_master_b)
+{
+ const struct pipe_ctx *opp_head_a = otg_master_a;
+ const struct pipe_ctx *opp_head_b = otg_master_b;
+
+ if (!resource_is_pipe_type(otg_master_a, OTG_MASTER) ||
+ !resource_is_pipe_type(otg_master_b, OTG_MASTER))
+ return true;
+
+ while (opp_head_a && opp_head_b) {
+ if (opp_head_a->stream_res.opp != opp_head_b->stream_res.opp)
+ return true;
+ if ((opp_head_a->next_odm_pipe && !opp_head_b->next_odm_pipe) ||
+ (!opp_head_a->next_odm_pipe && opp_head_b->next_odm_pipe))
+ return true;
+ opp_head_a = opp_head_a->next_odm_pipe;
+ opp_head_b = opp_head_b->next_odm_pipe;
+ }
+
+ return false;
+}
+
+/*
+ * Sample log:
+ * pipe topology update
+ * ________________________
+ * | plane0 slice0 stream0|
+ * |DPP0----OPP0----OTG0----| <--- case 0 (OTG master pipe with plane)
+ * | plane1 | | |
+ * |DPP1----| | | <--- case 5 (DPP pipe not in last slice)
+ * | plane0 slice1 | |
+ * |DPP2----OPP2----| | <--- case 2 (OPP head pipe with plane)
+ * | plane1 | |
+ * |DPP3----| | <--- case 4 (DPP pipe in last slice)
+ * | slice0 stream1|
+ * |DPG4----OPP4----OTG4----| <--- case 1 (OTG master pipe without plane)
+ * | slice1 | |
+ * |DPG5----OPP5----| | <--- case 3 (OPP head pipe without plane)
+ * |________________________|
+ */
+
+static void resource_log_pipe(struct dc *dc, struct pipe_ctx *pipe,
+ int stream_idx, int slice_idx, int plane_idx, int slice_count,
+ bool is_primary)
+{
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (slice_idx == 0 && plane_idx == 0 && is_primary) {
+ /* case 0 (OTG master pipe with plane) */
+ DC_LOG_DC(" | plane%d slice%d stream%d|",
+ plane_idx, slice_idx, stream_idx);
+ DC_LOG_DC(" |DPP%d----OPP%d----OTG%d----|",
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst);
+ } else if (slice_idx == 0 && plane_idx == -1) {
+ /* case 1 (OTG master pipe without plane) */
+ DC_LOG_DC(" | slice%d stream%d|",
+ slice_idx, stream_idx);
+ DC_LOG_DC(" |DPG%d----OPP%d----OTG%d----|",
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.opp->inst,
+ pipe->stream_res.tg->inst);
+ } else if (slice_idx != 0 && plane_idx == 0 && is_primary) {
+ /* case 2 (OPP head pipe with plane) */
+ DC_LOG_DC(" | plane%d slice%d | |",
+ plane_idx, slice_idx);
+ DC_LOG_DC(" |DPP%d----OPP%d----| |",
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst);
+ } else if (slice_idx != 0 && plane_idx == -1) {
+ /* case 3 (OPP head pipe without plane) */
+ DC_LOG_DC(" | slice%d | |", slice_idx);
+ DC_LOG_DC(" |DPG%d----OPP%d----| |",
+ pipe->plane_res.dpp->inst,
+ pipe->stream_res.opp->inst);
+ } else if (slice_idx == slice_count - 1) {
+ /* case 4 (DPP pipe in last slice) */
+ DC_LOG_DC(" | plane%d | |", plane_idx);
+ DC_LOG_DC(" |DPP%d----| |",
+ pipe->plane_res.dpp->inst);
+ } else {
+ /* case 5 (DPP pipe not in last slice) */
+ DC_LOG_DC(" | plane%d | | |", plane_idx);
+ DC_LOG_DC(" |DPP%d----| | |",
+ pipe->plane_res.dpp->inst);
+ }
+}
+
+static void resource_log_pipe_for_stream(struct dc *dc, struct dc_state *state,
+ struct pipe_ctx *otg_master, int stream_idx)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ struct pipe_ctx *dpp_pipes[MAX_PIPES];
+
+ int slice_idx, dpp_idx, plane_idx, slice_count, dpp_count;
+ bool is_primary;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ slice_count = resource_get_opp_heads_for_otg_master(otg_master,
+ &state->res_ctx, opp_heads);
+ for (slice_idx = 0; slice_idx < slice_count; slice_idx++) {
+ plane_idx = -1;
+ if (opp_heads[slice_idx]->plane_state) {
+ dpp_count = resource_get_dpp_pipes_for_opp_head(
+ opp_heads[slice_idx],
+ &state->res_ctx,
+ dpp_pipes);
+ for (dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) {
+ is_primary = !dpp_pipes[dpp_idx]->top_pipe ||
+ dpp_pipes[dpp_idx]->top_pipe->plane_state != dpp_pipes[dpp_idx]->plane_state;
+ if (is_primary)
+ plane_idx++;
+ resource_log_pipe(dc, dpp_pipes[dpp_idx],
+ stream_idx, slice_idx,
+ plane_idx, slice_count,
+ is_primary);
+ }
+ } else {
+ resource_log_pipe(dc, opp_heads[slice_idx],
+ stream_idx, slice_idx, plane_idx,
+ slice_count, true);
+ }
+
+ }
+}
+
+static int resource_stream_to_stream_idx(struct dc_state *state,
+ struct dc_stream_state *stream)
+{
+ int i, stream_idx = -1;
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream) {
+ stream_idx = i;
+ break;
+ }
+
+ /* never return negative array index */
+ if (stream_idx == -1) {
+ ASSERT(0);
+ return 0;
+ }
+
+ return stream_idx;
+}
+
+void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state)
+{
+ struct pipe_ctx *otg_master;
+ int stream_idx, phantom_stream_idx;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ DC_LOG_DC(" pipe topology update");
+ DC_LOG_DC(" ________________________");
+ for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) {
+ if (state->streams[stream_idx]->is_phantom)
+ continue;
+
+ otg_master = resource_get_otg_master_for_stream(
+ &state->res_ctx, state->streams[stream_idx]);
+
+ if (!otg_master)
+ continue;
+
+ resource_log_pipe_for_stream(dc, state, otg_master, stream_idx);
+ }
+ if (state->phantom_stream_count > 0) {
+ DC_LOG_DC(" | (phantom pipes) |");
+ for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) {
+ if (state->stream_status[stream_idx].mall_stream_config.type != SUBVP_MAIN)
+ continue;
+
+ phantom_stream_idx = resource_stream_to_stream_idx(state,
+ state->stream_status[stream_idx].mall_stream_config.paired_stream);
+ otg_master = resource_get_otg_master_for_stream(
+ &state->res_ctx, state->streams[phantom_stream_idx]);
+ if (!otg_master)
+ continue;
+
+ resource_log_pipe_for_stream(dc, state, otg_master, stream_idx);
+ }
+ }
+ DC_LOG_DC(" |________________________|\n");
+}
+
static struct pipe_ctx *get_tail_pipe(
struct pipe_ctx *head_pipe)
{
@@ -1793,6 +2459,65 @@ static struct pipe_ctx *get_tail_pipe(
return head_pipe;
}
+static struct pipe_ctx *get_last_opp_head(
+ struct pipe_ctx *opp_head)
+{
+ ASSERT(resource_is_pipe_type(opp_head, OPP_HEAD));
+ while (opp_head->next_odm_pipe)
+ opp_head = opp_head->next_odm_pipe;
+ return opp_head;
+}
+
+static struct pipe_ctx *get_last_dpp_pipe_in_mpcc_combine(
+ struct pipe_ctx *dpp_pipe)
+{
+ ASSERT(resource_is_pipe_type(dpp_pipe, DPP_PIPE));
+ while (dpp_pipe->bottom_pipe &&
+ dpp_pipe->plane_state == dpp_pipe->bottom_pipe->plane_state)
+ dpp_pipe = dpp_pipe->bottom_pipe;
+ return dpp_pipe;
+}
+
+static bool update_pipe_params_after_odm_slice_count_change(
+ struct pipe_ctx *otg_master,
+ struct dc_state *context,
+ const struct resource_pool *pool)
+{
+ int i;
+ struct pipe_ctx *pipe;
+ bool result = true;
+
+ for (i = 0; i < pool->pipe_count && result; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (pipe->stream == otg_master->stream && pipe->plane_state)
+ result = resource_build_scaling_params(pipe);
+ }
+
+ if (pool->funcs->build_pipe_pix_clk_params)
+ pool->funcs->build_pipe_pix_clk_params(otg_master);
+
+ resource_build_test_pattern_params(&context->res_ctx, otg_master);
+
+ return result;
+}
+
+static bool update_pipe_params_after_mpc_slice_count_change(
+ const struct dc_plane_state *plane,
+ struct dc_state *context,
+ const struct resource_pool *pool)
+{
+ int i;
+ struct pipe_ctx *pipe;
+ bool result = true;
+
+ for (i = 0; i < pool->pipe_count && result; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (pipe->plane_state == plane)
+ result = resource_build_scaling_params(pipe);
+ }
+ return result;
+}
+
static int acquire_first_split_pipe(
struct resource_context *res_ctx,
const struct resource_pool *pool,
@@ -1825,9 +2550,374 @@ static int acquire_first_split_pipe(
return i;
}
}
- return UNABLE_TO_SPLIT;
+ return FREE_PIPE_INDEX_NOT_FOUND;
+}
+
+static void update_stream_engine_usage(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct stream_encoder *stream_enc,
+ bool acquired)
+{
+ int i;
+
+ for (i = 0; i < pool->stream_enc_count; i++) {
+ if (pool->stream_enc[i] == stream_enc)
+ res_ctx->is_stream_enc_acquired[i] = acquired;
+ }
+}
+
+static void update_hpo_dp_stream_engine_usage(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct hpo_dp_stream_encoder *hpo_dp_stream_enc,
+ bool acquired)
+{
+ int i;
+
+ for (i = 0; i < pool->hpo_dp_stream_enc_count; i++) {
+ if (pool->hpo_dp_stream_enc[i] == hpo_dp_stream_enc)
+ res_ctx->is_hpo_dp_stream_enc_acquired[i] = acquired;
+ }
+}
+
+static inline int find_acquired_hpo_dp_link_enc_for_link(
+ const struct resource_context *res_ctx,
+ const struct dc_link *link)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_to_link_idx); i++)
+ if (res_ctx->hpo_dp_link_enc_ref_cnts[i] > 0 &&
+ res_ctx->hpo_dp_link_enc_to_link_idx[i] == link->link_index)
+ return i;
+
+ return -1;
+}
+
+static inline int find_free_hpo_dp_link_enc(const struct resource_context *res_ctx,
+ const struct resource_pool *pool)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts); i++)
+ if (res_ctx->hpo_dp_link_enc_ref_cnts[i] == 0)
+ break;
+
+ return (i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts) &&
+ i < pool->hpo_dp_link_enc_count) ? i : -1;
+}
+
+static inline void acquire_hpo_dp_link_enc(
+ struct resource_context *res_ctx,
+ unsigned int link_index,
+ int enc_index)
+{
+ res_ctx->hpo_dp_link_enc_to_link_idx[enc_index] = link_index;
+ res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] = 1;
+}
+
+static inline void retain_hpo_dp_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]++;
+}
+
+static inline void release_hpo_dp_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ ASSERT(res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] > 0);
+ res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]--;
+}
+
+static bool add_hpo_dp_link_enc_to_ctx(struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ int enc_index;
+
+ enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ retain_hpo_dp_link_enc(res_ctx, enc_index);
+ } else {
+ enc_index = find_free_hpo_dp_link_enc(res_ctx, pool);
+ if (enc_index >= 0)
+ acquire_hpo_dp_link_enc(res_ctx, stream->link->link_index, enc_index);
+ }
+
+ if (enc_index >= 0)
+ pipe_ctx->link_res.hpo_dp_link_enc = pool->hpo_dp_link_enc[enc_index];
+
+ return pipe_ctx->link_res.hpo_dp_link_enc != NULL;
+}
+
+static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ int enc_index;
+
+ enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ release_hpo_dp_link_enc(res_ctx, enc_index);
+ pipe_ctx->link_res.hpo_dp_link_enc = NULL;
+ }
+}
+
+static inline int find_acquired_dio_link_enc_for_link(
+ const struct resource_context *res_ctx,
+ const struct dc_link *link)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(res_ctx->dio_link_enc_ref_cnts); i++)
+ if (res_ctx->dio_link_enc_ref_cnts[i] > 0 &&
+ res_ctx->dio_link_enc_to_link_idx[i] == link->link_index)
+ return i;
+
+ return -1;
+}
+
+static inline int find_fixed_dio_link_enc(const struct dc_link *link)
+{
+ /* the 8b10b dp phy can only use fixed link encoder */
+ return link->eng_id;
+}
+
+static inline int find_free_dio_link_enc(const struct resource_context *res_ctx,
+ const struct dc_link *link, const struct resource_pool *pool)
+{
+ int i;
+ int enc_count = pool->dig_link_enc_count;
+
+ /* for dpia, check preferred encoder first and then the next one */
+ for (i = 0; i < enc_count; i++)
+ if (res_ctx->dio_link_enc_ref_cnts[(link->dpia_preferred_eng_id + i) % enc_count] == 0)
+ break;
+
+ return (i >= 0 && i < enc_count) ? (link->dpia_preferred_eng_id + i) % enc_count : -1;
+}
+
+static inline void acquire_dio_link_enc(
+ struct resource_context *res_ctx,
+ unsigned int link_index,
+ int enc_index)
+{
+ res_ctx->dio_link_enc_to_link_idx[enc_index] = link_index;
+ res_ctx->dio_link_enc_ref_cnts[enc_index] = 1;
+}
+
+static inline void retain_dio_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ res_ctx->dio_link_enc_ref_cnts[enc_index]++;
+}
+
+static inline void release_dio_link_enc(
+ struct resource_context *res_ctx,
+ int enc_index)
+{
+ ASSERT(res_ctx->dio_link_enc_ref_cnts[enc_index] > 0);
+ res_ctx->dio_link_enc_ref_cnts[enc_index]--;
+}
+
+static bool is_dio_enc_acquired_by_other_link(const struct dc_link *link,
+ int enc_index,
+ int *link_index)
+{
+ const struct dc *dc = link->dc;
+ const struct resource_context *res_ctx = &dc->current_state->res_ctx;
+
+ /* pass the link_index that acquired the enc_index */
+ if (res_ctx->dio_link_enc_ref_cnts[enc_index] > 0 &&
+ res_ctx->dio_link_enc_to_link_idx[enc_index] != link->link_index) {
+ *link_index = res_ctx->dio_link_enc_to_link_idx[enc_index];
+ return true;
+ }
+
+ return false;
+}
+
+static void swap_dio_link_enc_to_muxable_ctx(struct dc_state *context,
+ const struct resource_pool *pool,
+ int new_encoder,
+ int old_encoder)
+{
+ struct resource_context *res_ctx = &context->res_ctx;
+ int stream_count = context->stream_count;
+ int i = 0;
+
+ res_ctx->dio_link_enc_ref_cnts[new_encoder] = res_ctx->dio_link_enc_ref_cnts[old_encoder];
+ res_ctx->dio_link_enc_to_link_idx[new_encoder] = res_ctx->dio_link_enc_to_link_idx[old_encoder];
+ res_ctx->dio_link_enc_ref_cnts[old_encoder] = 0;
+
+ for (i = 0; i < stream_count; i++) {
+ struct dc_stream_state *stream = context->streams[i];
+ struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
+
+ if (pipe_ctx && pipe_ctx->link_res.dio_link_enc == pool->link_encoders[old_encoder])
+ pipe_ctx->link_res.dio_link_enc = pool->link_encoders[new_encoder];
+ }
+}
+
+static bool add_dio_link_enc_to_ctx(const struct dc *dc,
+ struct dc_state *context,
+ const struct resource_pool *pool,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ struct resource_context *res_ctx = &context->res_ctx;
+ int enc_index;
+
+ enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ retain_dio_link_enc(res_ctx, enc_index);
+ } else {
+ if (stream->link->is_dig_mapping_flexible)
+ enc_index = find_free_dio_link_enc(res_ctx, stream->link, pool);
+ else {
+ int link_index = 0;
+
+ enc_index = find_fixed_dio_link_enc(stream->link);
+ /* Fixed mapping link can only use its fixed link encoder.
+ * If the encoder is acquired by other link then get a new free encoder and swap the new
+ * one into the acquiring link.
+ */
+ if (enc_index >= 0 && is_dio_enc_acquired_by_other_link(stream->link, enc_index, &link_index)) {
+ int new_enc_index = find_free_dio_link_enc(res_ctx, dc->links[link_index], pool);
+
+ if (new_enc_index >= 0)
+ swap_dio_link_enc_to_muxable_ctx(context, pool, new_enc_index, enc_index);
+ else
+ return false;
+ }
+ }
+
+ if (enc_index >= 0)
+ acquire_dio_link_enc(res_ctx, stream->link->link_index, enc_index);
+ }
+
+ if (enc_index >= 0)
+ pipe_ctx->link_res.dio_link_enc = pool->link_encoders[enc_index];
+
+ return pipe_ctx->link_res.dio_link_enc != NULL;
+}
+
+static void remove_dio_link_enc_from_ctx(struct resource_context *res_ctx,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_state *stream)
+{
+ int enc_index = -1;
+
+ if (stream->link)
+ enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link);
+
+ if (enc_index >= 0) {
+ release_dio_link_enc(res_ctx, enc_index);
+ pipe_ctx->link_res.dio_link_enc = NULL;
+ }
+}
+
+static int get_num_of_free_pipes(const struct resource_pool *pool, const struct dc_state *context)
+{
+ int i;
+ int count = 0;
+
+ for (i = 0; i < pool->pipe_count; i++)
+ if (resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], FREE_PIPE))
+ count++;
+ return count;
}
+enum dc_status resource_add_otg_master_for_stream_output(struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ struct dc *dc = stream->ctx->dc;
+
+ return dc->res_pool->funcs->add_stream_to_ctx(dc, new_ctx, stream);
+}
+
+void resource_remove_otg_master_for_stream_output(struct dc_state *context,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(
+ &context->res_ctx, stream);
+
+ if (!otg_master)
+ return;
+
+ ASSERT(resource_get_odm_slice_count(otg_master) == 1);
+ ASSERT(otg_master->plane_state == NULL);
+ ASSERT(otg_master->stream_res.stream_enc);
+ update_stream_engine_usage(
+ &context->res_ctx,
+ pool,
+ otg_master->stream_res.stream_enc,
+ false);
+
+ if (stream->ctx->dc->link_srv->dp_is_128b_132b_signal(otg_master)) {
+ update_hpo_dp_stream_engine_usage(
+ &context->res_ctx, pool,
+ otg_master->stream_res.hpo_dp_stream_enc,
+ false);
+ remove_hpo_dp_link_enc_from_ctx(
+ &context->res_ctx, otg_master, stream);
+ }
+
+ if (stream->ctx->dc->config.unify_link_enc_assignment)
+ remove_dio_link_enc_from_ctx(&context->res_ctx, otg_master, stream);
+
+ if (otg_master->stream_res.audio)
+ update_audio_usage(
+ &context->res_ctx,
+ pool,
+ otg_master->stream_res.audio,
+ false);
+
+ resource_unreference_clock_source(&context->res_ctx,
+ pool,
+ otg_master->clock_source);
+
+ if (pool->funcs->remove_stream_from_ctx)
+ pool->funcs->remove_stream_from_ctx(
+ stream->ctx->dc, context, stream);
+
+ memset(otg_master, 0, sizeof(*otg_master));
+}
+
+/* For each OPP head of an OTG master, add top plane at plane index 0.
+ *
+ * In the following example, the stream has 2 ODM slices without a top plane.
+ * By adding a plane 0 to OPP heads, we are configuring our hardware to render
+ * plane 0 by using each OPP head's DPP.
+ *
+ * Inter-pipe Relation (Before Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | | slice 0 | |
+ * | 0 | |blank ----ODM----------- |
+ * | | | slice 1 | | |
+ * | 1 | |blank ---- | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------------------ODM----------- |
+ * | | plane 0 | slice 1 | | |
+ * | 1 | ------------------------- | |
+ * |________|_______________|___________|_____________|
+ */
static bool add_plane_to_opp_head_pipes(struct pipe_ctx *otg_master_pipe,
struct dc_plane_state *plane_state,
struct dc_state *context)
@@ -1846,24 +2936,36 @@ static bool add_plane_to_opp_head_pipes(struct pipe_ctx *otg_master_pipe,
return true;
}
-static void insert_secondary_dpp_pipe_with_plane(struct pipe_ctx *opp_head_pipe,
- struct pipe_ctx *sec_pipe, struct dc_plane_state *plane_state)
-{
- struct pipe_ctx *tail_pipe = get_tail_pipe(opp_head_pipe);
-
- tail_pipe->bottom_pipe = sec_pipe;
- sec_pipe->top_pipe = tail_pipe;
- if (tail_pipe->prev_odm_pipe) {
- ASSERT(tail_pipe->prev_odm_pipe->bottom_pipe);
- sec_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe;
- tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = sec_pipe;
- }
- sec_pipe->plane_state = plane_state;
-}
-
-/* for each opp head pipe of an otg master pipe, acquire a secondary dpp pipe
- * and add the plane. So the plane is added to all MPC blend trees associated
- * with the otg master pipe.
+/* For each OPP head of an OTG master, acquire a secondary DPP pipe and add
+ * the plane. So the plane is added to all ODM slices associated with the OTG
+ * master pipe in the bottom layer.
+ *
+ * In the following example, the stream has 2 ODM slices and a top plane 0.
+ * By acquiring secondary DPP pipes and adding a plane 1, we are configuring our
+ * hardware to render the plane 1 by acquiring a new pipe for each ODM slice and
+ * render plane 1 using new pipes' DPP in the Z axis below plane 0.
+ *
+ * Inter-pipe Relation (Before Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------------------ODM----------- |
+ * | | plane 0 | slice 1 | | |
+ * | 1 | ------------------------- | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Acquiring and Adding Plane)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | | |
+ * | 2 | ------------- | | | |
+ * | | plane 0 | slice 1 | | |
+ * | 1 | -------------MPC--------- | |
+ * | | plane 1 | | | |
+ * | 3 | ------------- | | |
+ * |________|_______________|___________|_____________|
*/
static bool acquire_secondary_dpp_pipes_and_add_plane(
struct pipe_ctx *otg_master_pipe,
@@ -1872,104 +2974,82 @@ static bool acquire_secondary_dpp_pipes_and_add_plane(
struct dc_state *cur_ctx,
struct resource_pool *pool)
{
- struct pipe_ctx *opp_head_pipe, *sec_pipe;
+ struct pipe_ctx *sec_pipe, *tail_pipe;
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ int opp_head_count;
+ int i;
- if (!pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe)
+ if (!pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe) {
+ ASSERT(0);
return false;
+ }
- opp_head_pipe = otg_master_pipe;
- while (opp_head_pipe) {
+ opp_head_count = resource_get_opp_heads_for_otg_master(otg_master_pipe,
+ &new_ctx->res_ctx, opp_heads);
+ if (get_num_of_free_pipes(pool, new_ctx) < opp_head_count)
+ /* not enough free pipes */
+ return false;
+
+ for (i = 0; i < opp_head_count; i++) {
sec_pipe = pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe(
cur_ctx,
new_ctx,
pool,
- opp_head_pipe);
- if (!sec_pipe) {
- /* try tearing down MPCC combine */
- int pipe_idx = acquire_first_split_pipe(
- &new_ctx->res_ctx, pool,
- otg_master_pipe->stream);
-
- if (pipe_idx >= 0)
- sec_pipe = &new_ctx->res_ctx.pipe_ctx[pipe_idx];
+ opp_heads[i]);
+ ASSERT(sec_pipe);
+ sec_pipe->plane_state = plane_state;
+
+ /* establish pipe relationship */
+ tail_pipe = get_tail_pipe(opp_heads[i]);
+ tail_pipe->bottom_pipe = sec_pipe;
+ sec_pipe->top_pipe = tail_pipe;
+ sec_pipe->bottom_pipe = NULL;
+ if (tail_pipe->prev_odm_pipe) {
+ ASSERT(tail_pipe->prev_odm_pipe->bottom_pipe);
+ sec_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe;
+ tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = sec_pipe;
+ } else {
+ sec_pipe->prev_odm_pipe = NULL;
}
-
- if (!sec_pipe)
- return false;
-
- insert_secondary_dpp_pipe_with_plane(opp_head_pipe, sec_pipe,
- plane_state);
- opp_head_pipe = opp_head_pipe->next_odm_pipe;
}
return true;
}
-bool dc_add_plane_to_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state *plane_state,
- struct dc_state *context)
+bool resource_append_dpp_pipes_for_plane_composition(
+ struct dc_state *new_ctx,
+ struct dc_state *cur_ctx,
+ struct resource_pool *pool,
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_plane_state *plane_state)
{
- struct resource_pool *pool = dc->res_pool;
- struct pipe_ctx *otg_master_pipe;
- struct dc_stream_status *stream_status = NULL;
- bool added = false;
-
- stream_status = dc_stream_get_status_from_state(context, stream);
- if (stream_status == NULL) {
- dm_error("Existing stream not found; failed to attach surface!\n");
- goto out;
- } else if (stream_status->plane_count == MAX_SURFACE_NUM) {
- dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
- plane_state, MAX_SURFACE_NUM);
- goto out;
- }
+ bool success;
- otg_master_pipe = resource_get_otg_master_for_stream(
- &context->res_ctx, stream);
if (otg_master_pipe->plane_state == NULL)
- added = add_plane_to_opp_head_pipes(otg_master_pipe,
- plane_state, context);
+ success = add_plane_to_opp_head_pipes(otg_master_pipe,
+ plane_state, new_ctx);
else
- added = acquire_secondary_dpp_pipes_and_add_plane(
- otg_master_pipe, plane_state, context,
- dc->current_state, pool);
- if (added) {
- stream_status->plane_states[stream_status->plane_count] =
- plane_state;
- stream_status->plane_count++;
- dc_plane_state_retain(plane_state);
- }
-
-out:
- return added;
+ success = acquire_secondary_dpp_pipes_and_add_plane(
+ otg_master_pipe, plane_state, new_ctx,
+ cur_ctx, pool);
+ if (success) {
+ /* when appending a plane mpc slice count changes from 0 to 1 */
+ success = update_pipe_params_after_mpc_slice_count_change(
+ plane_state, new_ctx, pool);
+ if (!success)
+ resource_remove_dpp_pipes_for_plane_composition(new_ctx,
+ pool, plane_state);
+ }
+
+ return success;
}
-bool dc_remove_plane_from_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state *plane_state,
- struct dc_state *context)
+void resource_remove_dpp_pipes_for_plane_composition(
+ struct dc_state *context,
+ const struct resource_pool *pool,
+ const struct dc_plane_state *plane_state)
{
int i;
- struct dc_stream_status *stream_status = NULL;
- struct resource_pool *pool = dc->res_pool;
- if (!plane_state)
- return true;
-
- for (i = 0; i < context->stream_count; i++)
- if (context->streams[i] == stream) {
- stream_status = &context->stream_status[i];
- break;
- }
-
- if (stream_status == NULL) {
- dm_error("Existing stream not found; failed to remove plane.\n");
- return false;
- }
-
- /* release pipe for plane*/
for (i = pool->pipe_count - 1; i >= 0; i--) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -1994,116 +3074,336 @@ bool dc_remove_plane_from_context(
memset(pipe_ctx, 0, sizeof(*pipe_ctx));
}
}
+}
+/*
+ * Increase ODM slice count by 1 by acquiring pipes and adding a new ODM slice
+ * at the last index.
+ * return - true if a new ODM slice is added and required pipes are acquired.
+ * false if new_ctx is no longer a valid state after new ODM slice is added.
+ *
+ * This is achieved by duplicating MPC blending tree from previous ODM slice.
+ * In the following example, we have a single MPC tree and 1 ODM slice 0. We
+ * want to add a new odm slice by duplicating the MPC blending tree and add
+ * ODM slice 1.
+ *
+ * Inter-pipe Relation (Before Acquiring and Adding ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Acquiring and Adding ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | | |
+ * | 1 | ------------- | | | |
+ * | | plane 0 | slice 1 | | |
+ * | 2 | -------------MPC--------- | |
+ * | | plane 1 | | | |
+ * | 3 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool acquire_pipes_and_add_odm_slice(
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool)
+{
+ struct pipe_ctx *last_opp_head = get_last_opp_head(otg_master_pipe);
+ struct pipe_ctx *new_opp_head;
+ struct pipe_ctx *last_top_dpp_pipe, *last_bottom_dpp_pipe,
+ *new_top_dpp_pipe, *new_bottom_dpp_pipe;
- for (i = 0; i < stream_status->plane_count; i++) {
- if (stream_status->plane_states[i] == plane_state) {
- dc_plane_state_release(stream_status->plane_states[i]);
- break;
- }
- }
-
- if (i == stream_status->plane_count) {
- dm_error("Existing plane_state not found; failed to detach it!\n");
+ if (!pool->funcs->acquire_free_pipe_as_secondary_opp_head) {
+ ASSERT(0);
return false;
}
+ new_opp_head = pool->funcs->acquire_free_pipe_as_secondary_opp_head(
+ cur_ctx, new_ctx, pool,
+ otg_master_pipe);
+ if (!new_opp_head)
+ return false;
- stream_status->plane_count--;
-
- /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
- for (; i < stream_status->plane_count; i++)
- stream_status->plane_states[i] = stream_status->plane_states[i + 1];
+ last_opp_head->next_odm_pipe = new_opp_head;
+ new_opp_head->prev_odm_pipe = last_opp_head;
+ new_opp_head->next_odm_pipe = NULL;
+ new_opp_head->plane_state = last_opp_head->plane_state;
+ last_top_dpp_pipe = last_opp_head;
+ new_top_dpp_pipe = new_opp_head;
+
+ while (last_top_dpp_pipe->bottom_pipe) {
+ last_bottom_dpp_pipe = last_top_dpp_pipe->bottom_pipe;
+ new_bottom_dpp_pipe = pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe(
+ cur_ctx, new_ctx, pool,
+ new_opp_head);
+ if (!new_bottom_dpp_pipe)
+ return false;
- stream_status->plane_states[stream_status->plane_count] = NULL;
+ new_bottom_dpp_pipe->plane_state = last_bottom_dpp_pipe->plane_state;
+ new_top_dpp_pipe->bottom_pipe = new_bottom_dpp_pipe;
+ new_bottom_dpp_pipe->top_pipe = new_top_dpp_pipe;
+ last_bottom_dpp_pipe->next_odm_pipe = new_bottom_dpp_pipe;
+ new_bottom_dpp_pipe->prev_odm_pipe = last_bottom_dpp_pipe;
+ new_bottom_dpp_pipe->next_odm_pipe = NULL;
+ last_top_dpp_pipe = last_bottom_dpp_pipe;
+ }
return true;
}
-/**
- * dc_rem_all_planes_for_stream - Remove planes attached to the target stream.
+/*
+ * Decrease ODM slice count by 1 by releasing pipes and removing the ODM slice
+ * at the last index.
+ * return - true if the last ODM slice is removed and related pipes are
+ * released. false if there is no removable ODM slice.
*
- * @dc: Current dc state.
- * @stream: Target stream, which we want to remove the attached plans.
- * @context: New context.
+ * In the following example, we have 2 MPC trees and ODM slice 0 and slice 1.
+ * We want to remove the last ODM i.e slice 1. We are releasing secondary DPP
+ * pipe 3 and OPP head pipe 2.
*
- * Return:
- * Return true if DC was able to remove all planes from the target
- * stream, otherwise, return false.
+ * Inter-pipe Relation (Before Releasing and Removing ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | | |
+ * | 1 | ------------- | | | |
+ * | | plane 0 | slice 1 | | |
+ * | 2 | -------------MPC--------- | |
+ * | | plane 1 | | | |
+ * | 3 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Releasing and Removing ODM Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | slice 0 | |
+ * | 0 | -------------MPC---------ODM----------- |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
*/
-bool dc_rem_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_state *context)
+static bool release_pipes_and_remove_odm_slice(
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_state *context,
+ const struct resource_pool *pool)
{
- int i, old_plane_count;
- struct dc_stream_status *stream_status = NULL;
- struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+ struct pipe_ctx *last_opp_head = get_last_opp_head(otg_master_pipe);
+ struct pipe_ctx *tail_pipe = get_tail_pipe(last_opp_head);
- for (i = 0; i < context->stream_count; i++)
- if (context->streams[i] == stream) {
- stream_status = &context->stream_status[i];
- break;
- }
+ if (!pool->funcs->release_pipe) {
+ ASSERT(0);
+ return false;
+ }
- if (stream_status == NULL) {
- dm_error("Existing stream %p not found!\n", stream);
+ if (resource_is_pipe_type(last_opp_head, OTG_MASTER))
return false;
+
+ while (tail_pipe->top_pipe) {
+ tail_pipe->prev_odm_pipe->next_odm_pipe = NULL;
+ tail_pipe = tail_pipe->top_pipe;
+ pool->funcs->release_pipe(context, tail_pipe->bottom_pipe, pool);
+ tail_pipe->bottom_pipe = NULL;
}
+ last_opp_head->prev_odm_pipe->next_odm_pipe = NULL;
+ pool->funcs->release_pipe(context, last_opp_head, pool);
- old_plane_count = stream_status->plane_count;
+ return true;
+}
+
+/*
+ * Increase MPC slice count by 1 by acquiring a new DPP pipe and add it as the
+ * last MPC slice of the plane associated with dpp_pipe.
+ *
+ * return - true if a new MPC slice is added and required pipes are acquired.
+ * false if new_ctx is no longer a valid state after new MPC slice is added.
+ *
+ * In the following example, we add a new MPC slice for plane 0 into the
+ * new_ctx. To do so we pass pipe 0 as dpp_pipe. The function acquires a new DPP
+ * pipe 2 for plane 0 as the bottom most pipe for plane 0.
+ *
+ * Inter-pipe Relation (Before Acquiring and Adding MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Acquiring and Adding MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 0 | | | |
+ * | 2 | ------------- | | |
+ * | | plane 1 | | | |
+ * | 1 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool acquire_dpp_pipe_and_add_mpc_slice(
+ struct pipe_ctx *dpp_pipe,
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool)
+{
+ struct pipe_ctx *last_dpp_pipe =
+ get_last_dpp_pipe_in_mpcc_combine(dpp_pipe);
+ struct pipe_ctx *opp_head = resource_get_opp_head(dpp_pipe);
+ struct pipe_ctx *new_dpp_pipe;
- for (i = 0; i < old_plane_count; i++)
- del_planes[i] = stream_status->plane_states[i];
+ if (!pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe) {
+ ASSERT(0);
+ return false;
+ }
+ new_dpp_pipe = pool->funcs->acquire_free_pipe_as_secondary_dpp_pipe(
+ cur_ctx, new_ctx, pool, opp_head);
+ if (!new_dpp_pipe || resource_get_odm_slice_count(dpp_pipe) > 1)
+ return false;
- for (i = 0; i < old_plane_count; i++)
- if (!dc_remove_plane_from_context(dc, stream, del_planes[i], context))
- return false;
+ new_dpp_pipe->bottom_pipe = last_dpp_pipe->bottom_pipe;
+ if (new_dpp_pipe->bottom_pipe)
+ new_dpp_pipe->bottom_pipe->top_pipe = new_dpp_pipe;
+ new_dpp_pipe->top_pipe = last_dpp_pipe;
+ last_dpp_pipe->bottom_pipe = new_dpp_pipe;
+ new_dpp_pipe->plane_state = last_dpp_pipe->plane_state;
return true;
}
-static bool add_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- const struct dc_validation_set set[],
- int set_count,
- struct dc_state *context)
+/*
+ * Reduce MPC slice count by 1 by releasing the bottom DPP pipe in MPCC combine
+ * with dpp_pipe and removing last MPC slice of the plane associated with
+ * dpp_pipe.
+ *
+ * return - true if the last MPC slice of the plane associated with dpp_pipe is
+ * removed and last DPP pipe in MPCC combine with dpp_pipe is released.
+ * false if there is no removable MPC slice.
+ *
+ * In the following example, we remove an MPC slice for plane 0 from the
+ * context. To do so we pass pipe 0 as dpp_pipe. The function releases pipe 1 as
+ * it is the last pipe for plane 0.
+ *
+ * Inter-pipe Relation (Before Releasing and Removing MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 0 | | | |
+ * | 1 | ------------- | | |
+ * | | plane 1 | | | |
+ * | 2 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ *
+ * Inter-pipe Relation (After Releasing and Removing MPC Slice)
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | plane 0 | | |
+ * | 0 | -------------MPC----------------------- |
+ * | | plane 1 | | | |
+ * | 2 | ------------- | | |
+ * |________|_______________|___________|_____________|
+ */
+static bool release_dpp_pipe_and_remove_mpc_slice(
+ struct pipe_ctx *dpp_pipe,
+ struct dc_state *context,
+ const struct resource_pool *pool)
{
- int i, j;
-
- for (i = 0; i < set_count; i++)
- if (set[i].stream == stream)
- break;
+ struct pipe_ctx *last_dpp_pipe =
+ get_last_dpp_pipe_in_mpcc_combine(dpp_pipe);
- if (i == set_count) {
- dm_error("Stream %p not found in set!\n", stream);
+ if (!pool->funcs->release_pipe) {
+ ASSERT(0);
return false;
}
- for (j = 0; j < set[i].plane_count; j++)
- if (!dc_add_plane_to_context(dc, stream, set[i].plane_states[j], context))
- return false;
+ if (resource_is_pipe_type(last_dpp_pipe, OPP_HEAD) ||
+ resource_get_odm_slice_count(dpp_pipe) > 1)
+ return false;
+
+ last_dpp_pipe->top_pipe->bottom_pipe = last_dpp_pipe->bottom_pipe;
+ if (last_dpp_pipe->bottom_pipe)
+ last_dpp_pipe->bottom_pipe->top_pipe = last_dpp_pipe->top_pipe;
+ pool->funcs->release_pipe(context, last_dpp_pipe, pool);
return true;
}
-bool dc_add_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state * const *plane_states,
- int plane_count,
- struct dc_state *context)
+bool resource_update_pipes_for_stream_with_slice_count(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_stream_state *stream,
+ int new_slice_count)
{
- struct dc_validation_set set;
int i;
+ struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(
+ &new_ctx->res_ctx, stream);
+ int cur_slice_count;
+ bool result = true;
+
+ if (!otg_master)
+ return false;
- set.stream = stream;
- set.plane_count = plane_count;
+ cur_slice_count = resource_get_odm_slice_count(otg_master);
- for (i = 0; i < plane_count; i++)
- set.plane_states[i] = plane_states[i];
+ if (new_slice_count == cur_slice_count)
+ return result;
- return add_all_planes_for_stream(dc, stream, &set, 1, context);
+ if (new_slice_count > cur_slice_count)
+ for (i = 0; i < new_slice_count - cur_slice_count && result; i++)
+ result = acquire_pipes_and_add_odm_slice(
+ otg_master, new_ctx, cur_ctx, pool);
+ else
+ for (i = 0; i < cur_slice_count - new_slice_count && result; i++)
+ result = release_pipes_and_remove_odm_slice(
+ otg_master, new_ctx, pool);
+ if (result)
+ result = update_pipe_params_after_odm_slice_count_change(
+ otg_master, new_ctx, pool);
+ return result;
+}
+
+bool resource_update_pipes_for_plane_with_slice_count(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_plane_state *plane,
+ int new_slice_count)
+{
+ int i;
+ int dpp_pipe_count;
+ int cur_slice_count;
+ struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0};
+ bool result = true;
+
+ dpp_pipe_count = resource_get_dpp_pipes_for_plane(plane,
+ &new_ctx->res_ctx, dpp_pipes);
+ ASSERT(dpp_pipe_count > 0);
+ cur_slice_count = resource_get_mpc_slice_count(dpp_pipes[0]);
+
+ if (new_slice_count == cur_slice_count)
+ return result;
+
+ if (new_slice_count > cur_slice_count)
+ for (i = 0; i < new_slice_count - cur_slice_count && result; i++)
+ result = acquire_dpp_pipe_and_add_mpc_slice(
+ dpp_pipes[0], new_ctx, cur_ctx, pool);
+ else
+ for (i = 0; i < cur_slice_count - new_slice_count && result; i++)
+ result = release_dpp_pipe_and_remove_mpc_slice(
+ dpp_pipes[0], new_ctx, pool);
+ if (result)
+ result = update_pipe_params_after_mpc_slice_count_change(
+ dpp_pipes[0]->plane_state, new_ctx, pool);
+ return result;
}
bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
@@ -2154,6 +3454,8 @@ static bool are_stream_backends_same(
bool dc_is_stream_unchanged(
struct dc_stream_state *old_stream, struct dc_stream_state *stream)
{
+ if (!old_stream || !stream)
+ return false;
if (!are_stream_backends_same(old_stream, stream))
return false;
@@ -2193,122 +3495,6 @@ bool dc_is_stream_scaling_unchanged(struct dc_stream_state *old_stream,
return true;
}
-static void update_stream_engine_usage(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct stream_encoder *stream_enc,
- bool acquired)
-{
- int i;
-
- for (i = 0; i < pool->stream_enc_count; i++) {
- if (pool->stream_enc[i] == stream_enc)
- res_ctx->is_stream_enc_acquired[i] = acquired;
- }
-}
-
-static void update_hpo_dp_stream_engine_usage(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct hpo_dp_stream_encoder *hpo_dp_stream_enc,
- bool acquired)
-{
- int i;
-
- for (i = 0; i < pool->hpo_dp_stream_enc_count; i++) {
- if (pool->hpo_dp_stream_enc[i] == hpo_dp_stream_enc)
- res_ctx->is_hpo_dp_stream_enc_acquired[i] = acquired;
- }
-}
-
-static inline int find_acquired_hpo_dp_link_enc_for_link(
- const struct resource_context *res_ctx,
- const struct dc_link *link)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_to_link_idx); i++)
- if (res_ctx->hpo_dp_link_enc_ref_cnts[i] > 0 &&
- res_ctx->hpo_dp_link_enc_to_link_idx[i] == link->link_index)
- return i;
-
- return -1;
-}
-
-static inline int find_free_hpo_dp_link_enc(const struct resource_context *res_ctx,
- const struct resource_pool *pool)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts); i++)
- if (res_ctx->hpo_dp_link_enc_ref_cnts[i] == 0)
- break;
-
- return (i < ARRAY_SIZE(res_ctx->hpo_dp_link_enc_ref_cnts) &&
- i < pool->hpo_dp_link_enc_count) ? i : -1;
-}
-
-static inline void acquire_hpo_dp_link_enc(
- struct resource_context *res_ctx,
- unsigned int link_index,
- int enc_index)
-{
- res_ctx->hpo_dp_link_enc_to_link_idx[enc_index] = link_index;
- res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] = 1;
-}
-
-static inline void retain_hpo_dp_link_enc(
- struct resource_context *res_ctx,
- int enc_index)
-{
- res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]++;
-}
-
-static inline void release_hpo_dp_link_enc(
- struct resource_context *res_ctx,
- int enc_index)
-{
- ASSERT(res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] > 0);
- res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]--;
-}
-
-static bool add_hpo_dp_link_enc_to_ctx(struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct pipe_ctx *pipe_ctx,
- struct dc_stream_state *stream)
-{
- int enc_index;
-
- enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link);
-
- if (enc_index >= 0) {
- retain_hpo_dp_link_enc(res_ctx, enc_index);
- } else {
- enc_index = find_free_hpo_dp_link_enc(res_ctx, pool);
- if (enc_index >= 0)
- acquire_hpo_dp_link_enc(res_ctx, stream->link->link_index, enc_index);
- }
-
- if (enc_index >= 0)
- pipe_ctx->link_res.hpo_dp_link_enc = pool->hpo_dp_link_enc[enc_index];
-
- return pipe_ctx->link_res.hpo_dp_link_enc != NULL;
-}
-
-static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx,
- struct pipe_ctx *pipe_ctx,
- struct dc_stream_state *stream)
-{
- int enc_index;
-
- enc_index = find_acquired_hpo_dp_link_enc_for_link(res_ctx, stream->link);
-
- if (enc_index >= 0) {
- release_hpo_dp_link_enc(res_ctx, enc_index);
- pipe_ctx->link_res.hpo_dp_link_enc = NULL;
- }
-}
-
/* TODO: release audio object */
void update_audio_usage(
struct resource_context *res_ctx,
@@ -2323,42 +3509,6 @@ void update_audio_usage(
}
}
-static int acquire_first_free_pipe(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct dc_stream_state *stream)
-{
- int i;
-
- for (i = 0; i < pool->pipe_count; i++) {
- if (!res_ctx->pipe_ctx[i].stream) {
- struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-
- pipe_ctx->stream_res.tg = pool->timing_generators[i];
- pipe_ctx->plane_res.mi = pool->mis[i];
- pipe_ctx->plane_res.hubp = pool->hubps[i];
- pipe_ctx->plane_res.ipp = pool->ipps[i];
- pipe_ctx->plane_res.xfm = pool->transforms[i];
- pipe_ctx->plane_res.dpp = pool->dpps[i];
- pipe_ctx->stream_res.opp = pool->opps[i];
- if (pool->dpps[i])
- pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst;
- pipe_ctx->pipe_idx = i;
-
- if (i >= pool->timing_generator_count) {
- int tg_inst = pool->timing_generator_count - 1;
-
- pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst];
- pipe_ctx->stream_res.opp = pool->opps[tg_inst];
- }
-
- pipe_ctx->stream = stream;
- return i;
- }
- }
- return -1;
-}
-
static struct hpo_dp_stream_encoder *find_first_free_match_hpo_dp_stream_enc_for_link(
struct resource_context *res_ctx,
const struct resource_pool *pool,
@@ -2385,6 +3535,9 @@ static struct audio *find_first_free_audio(
{
int i, available_audio_count;
+ if (id == ENGINE_ID_UNKNOWN)
+ return NULL;
+
available_audio_count = pool->audio_count;
for (i = 0; i < available_audio_count; i++) {
@@ -2409,118 +3562,6 @@ static struct audio *find_first_free_audio(
return NULL;
}
-/*
- * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state.
- */
-enum dc_status dc_add_stream_to_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream)
-{
- enum dc_status res;
- DC_LOGGER_INIT(dc->ctx->logger);
-
- if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) {
- DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
- return DC_ERROR_UNEXPECTED;
- }
-
- new_ctx->streams[new_ctx->stream_count] = stream;
- dc_stream_retain(stream);
- new_ctx->stream_count++;
-
- res = dc->res_pool->funcs->add_stream_to_ctx(dc, new_ctx, stream);
- if (res != DC_OK)
- DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
-
- return res;
-}
-
-/*
- * dc_remove_stream_from_ctx() - Remove a stream from a dc_state.
- */
-enum dc_status dc_remove_stream_from_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream)
-{
- int i;
- struct dc_context *dc_ctx = dc->ctx;
- struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(&new_ctx->res_ctx, stream);
- struct pipe_ctx *odm_pipe;
-
- if (!del_pipe) {
- DC_ERROR("Pipe not found for stream %p !\n", stream);
- return DC_ERROR_UNEXPECTED;
- }
-
- odm_pipe = del_pipe->next_odm_pipe;
-
- /* Release primary pipe */
- ASSERT(del_pipe->stream_res.stream_enc);
- update_stream_engine_usage(
- &new_ctx->res_ctx,
- dc->res_pool,
- del_pipe->stream_res.stream_enc,
- false);
-
- if (dc->link_srv->dp_is_128b_132b_signal(del_pipe)) {
- update_hpo_dp_stream_engine_usage(
- &new_ctx->res_ctx, dc->res_pool,
- del_pipe->stream_res.hpo_dp_stream_enc,
- false);
- remove_hpo_dp_link_enc_from_ctx(&new_ctx->res_ctx, del_pipe, del_pipe->stream);
- }
-
- if (del_pipe->stream_res.audio)
- update_audio_usage(
- &new_ctx->res_ctx,
- dc->res_pool,
- del_pipe->stream_res.audio,
- false);
-
- resource_unreference_clock_source(&new_ctx->res_ctx,
- dc->res_pool,
- del_pipe->clock_source);
-
- if (dc->res_pool->funcs->remove_stream_from_ctx)
- dc->res_pool->funcs->remove_stream_from_ctx(dc, new_ctx, stream);
-
- while (odm_pipe) {
- struct pipe_ctx *next_odm_pipe = odm_pipe->next_odm_pipe;
-
- memset(odm_pipe, 0, sizeof(*odm_pipe));
- odm_pipe = next_odm_pipe;
- }
- memset(del_pipe, 0, sizeof(*del_pipe));
-
- for (i = 0; i < new_ctx->stream_count; i++)
- if (new_ctx->streams[i] == stream)
- break;
-
- if (new_ctx->streams[i] != stream) {
- DC_ERROR("Context doesn't have stream %p !\n", stream);
- return DC_ERROR_UNEXPECTED;
- }
-
- dc_stream_release(new_ctx->streams[i]);
- new_ctx->stream_count--;
-
- /* Trim back arrays */
- for (; i < new_ctx->stream_count; i++) {
- new_ctx->streams[i] = new_ctx->streams[i + 1];
- new_ctx->stream_status[i] = new_ctx->stream_status[i + 1];
- }
-
- new_ctx->streams[new_ctx->stream_count] = NULL;
- memset(
- &new_ctx->stream_status[new_ctx->stream_count],
- 0,
- sizeof(new_ctx->stream_status[0]));
-
- return DC_OK;
-}
-
static struct dc_stream_state *find_pll_sharable_stream(
struct dc_stream_state *stream_needs_pll,
struct dc_state *context)
@@ -2561,10 +3602,13 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing)
break;
case COLOR_DEPTH_121212:
normalized_pix_clk = (pix_clk * 36) / 24;
- break;
+ break;
+ case COLOR_DEPTH_141414:
+ normalized_pix_clk = (pix_clk * 42) / 24;
+ break;
case COLOR_DEPTH_161616:
normalized_pix_clk = (pix_clk * 48) / 24;
- break;
+ break;
default:
ASSERT(0);
break;
@@ -2698,17 +3742,119 @@ static int acquire_resource_from_hw_enabled_state(
return -1;
}
-static void mark_seamless_boot_stream(
- const struct dc *dc,
- struct dc_stream_state *stream)
+static void mark_seamless_boot_stream(const struct dc *dc,
+ struct dc_stream_state *stream)
{
struct dc_bios *dcb = dc->ctx->dc_bios;
- if (dc->config.allow_seamless_boot_optimization &&
- !dcb->funcs->is_accelerated_mode(dcb)) {
- if (dc_validate_boot_timing(dc, stream->sink, &stream->timing))
- stream->apply_seamless_boot_optimization = true;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (stream->apply_seamless_boot_optimization)
+ return;
+ if (!dc->config.allow_seamless_boot_optimization)
+ return;
+ if (dcb->funcs->is_accelerated_mode(dcb))
+ return;
+ if (dc_validate_boot_timing(dc, stream->sink, &stream->timing)) {
+ stream->apply_seamless_boot_optimization = true;
+ DC_LOG_DC("Marked stream for seamless boot optimization\n");
+ }
+}
+
+/*
+ * Acquire a pipe as OTG master and assign to the stream in new dc context.
+ * return - true if OTG master pipe is acquired and new dc context is updated.
+ * false if it fails to acquire an OTG master pipe for this stream.
+ *
+ * In the example below, we acquired pipe 0 as OTG master pipe for the stream.
+ * After the function its Inter-pipe Relation is represented by the diagram
+ * below.
+ *
+ * Inter-pipe Relation
+ * __________________________________________________
+ * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
+ * | | | | |
+ * | 0 | |blank ------------------ |
+ * |________|_______________|___________|_____________|
+ */
+static bool acquire_otg_master_pipe_for_stream(
+ const struct dc_state *cur_ctx,
+ struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ /* TODO: Move this function to DCN specific resource file and acquire
+ * DSC resource here. The reason is that the function should have the
+ * same level of responsibility as when we acquire secondary OPP head.
+ * We acquire DSC when we acquire secondary OPP head, so we should
+ * acquire DSC when we acquire OTG master.
+ */
+ int pipe_idx;
+ struct pipe_ctx *pipe_ctx = NULL;
+
+ /*
+ * Upper level code is responsible to optimize unnecessary addition and
+ * removal for unchanged streams. So unchanged stream will keep the same
+ * OTG master instance allocated. When current stream is removed and a
+ * new stream is added, we want to reuse the OTG instance made available
+ * by the removed stream first. If not found, we try to avoid of using
+ * any free pipes already used in current context as this could tear
+ * down exiting ODM/MPC/MPO configuration unnecessarily.
+ */
+
+ /*
+ * Try to acquire the same OTG master already in use. This is not
+ * optimal because resetting an enabled OTG master pipe for a new stream
+ * requires an extra frame of wait. However there are test automation
+ * and eDP assumptions that rely on reusing the same OTG master pipe
+ * during mode change. We have to keep this logic as is for now.
+ */
+ pipe_idx = recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+ /*
+ * Try to acquire a pipe not used in current resource context to avoid
+ * pipe swapping.
+ */
+ if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+ /*
+ * If pipe swapping is unavoidable, try to acquire pipe used as
+ * secondary DPP pipe in current state as we prioritize to support more
+ * streams over supporting MPO planes.
+ */
+ if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ pipe_idx = resource_find_free_pipe_used_as_cur_sec_dpp(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx, pool);
+ if (pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ pipe_idx = resource_find_any_free_pipe(&new_ctx->res_ctx, pool);
+ if (pipe_idx != FREE_PIPE_INDEX_NOT_FOUND) {
+ pipe_ctx = &new_ctx->res_ctx.pipe_ctx[pipe_idx];
+ memset(pipe_ctx, 0, sizeof(*pipe_ctx));
+ pipe_ctx->pipe_idx = pipe_idx;
+ pipe_ctx->stream_res.tg = pool->timing_generators[pipe_idx];
+ pipe_ctx->plane_res.mi = pool->mis[pipe_idx];
+ pipe_ctx->plane_res.hubp = pool->hubps[pipe_idx];
+ pipe_ctx->plane_res.ipp = pool->ipps[pipe_idx];
+ pipe_ctx->plane_res.xfm = pool->transforms[pipe_idx];
+ pipe_ctx->plane_res.dpp = pool->dpps[pipe_idx];
+ pipe_ctx->stream_res.opp = pool->opps[pipe_idx];
+ if (pool->dpps[pipe_idx])
+ pipe_ctx->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
+
+ if (pipe_idx >= pool->timing_generator_count && pool->timing_generator_count != 0) {
+ int tg_inst = pool->timing_generator_count - 1;
+
+ pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst];
+ pipe_ctx->stream_res.opp = pool->opps[tg_inst];
+ }
+
+ pipe_ctx->stream = stream;
+ } else {
+ pipe_idx = acquire_first_split_pipe(&new_ctx->res_ctx, pool, stream);
}
+
+ return pipe_idx != FREE_PIPE_INDEX_NOT_FOUND;
}
enum dc_status resource_map_pool_resources(
@@ -2721,6 +3867,8 @@ enum dc_status resource_map_pool_resources(
struct dc_context *dc_ctx = dc->ctx;
struct pipe_ctx *pipe_ctx = NULL;
int pipe_idx = -1;
+ bool acquired = false;
+ bool is_dio_encoder = true;
calculate_phy_pix_clks(stream);
@@ -2734,20 +3882,20 @@ enum dc_status resource_map_pool_resources(
if (pipe_idx < 0)
/* hw resource was assigned to other stream */
stream->apply_seamless_boot_optimization = false;
+ else
+ acquired = true;
}
- if (pipe_idx < 0)
+ if (!acquired)
/* acquire new resources */
- pipe_idx = acquire_first_free_pipe(&context->res_ctx, pool, stream);
+ acquired = acquire_otg_master_pipe_for_stream(dc->current_state,
+ context, pool, stream);
- if (pipe_idx < 0)
- pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream);
+ pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
- if (pipe_idx < 0 || context->res_ctx.pipe_ctx[pipe_idx].stream_res.tg == NULL)
+ if (!pipe_ctx || pipe_ctx->stream_res.tg == NULL)
return DC_NO_CONTROLLER_RESOURCE;
- pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
-
pipe_ctx->stream_res.stream_enc =
dc->res_pool->funcs->find_first_free_match_stream_enc_for_link(
&context->res_ctx, pool, stream);
@@ -2763,9 +3911,15 @@ enum dc_status resource_map_pool_resources(
/* Allocate DP HPO Stream Encoder based on signal, hw capabilities
* and link settings
*/
- if (dc_is_dp_signal(stream->signal)) {
- if (!dc->link_srv->dp_decide_link_settings(stream, &pipe_ctx->link_config.dp_link_settings))
+ if (dc_is_dp_signal(stream->signal) ||
+ dc_is_virtual_signal(stream->signal)) {
+ if (!dc->link_srv->dp_decide_link_settings(stream,
+ &pipe_ctx->link_config.dp_link_settings))
return DC_FAIL_DP_LINK_BANDWIDTH;
+
+ dc->link_srv->dp_decide_tunnel_settings(stream,
+ &pipe_ctx->link_config.dp_tunnel_settings);
+
if (dc->link_srv->dp_get_encoding_format(
&pipe_ctx->link_config.dp_link_settings) == DP_128b_132b_ENCODING) {
pipe_ctx->stream_res.hpo_dp_stream_enc =
@@ -2784,10 +3938,16 @@ enum dc_status resource_map_pool_resources(
}
}
+ if (dc->config.unify_link_enc_assignment && is_dio_encoder)
+ if (!add_dio_link_enc_to_ctx(dc, context, pool, pipe_ctx, stream))
+ return DC_NO_LINK_ENC_RESOURCE;
+
/* TODO: Add check if ASIC support and EDID audio */
if (!stream->converter_disable_audio &&
dc_is_audio_capable_signal(pipe_ctx->stream->signal) &&
- stream->audio_info.mode_count && stream->audio_info.flags.all) {
+ stream->audio_info.mode_count &&
+ (stream->audio_info.flags.all ||
+ (stream->sink && stream->sink->edid_caps.panel_patch.skip_audio_sab_check))) {
pipe_ctx->stream_res.audio = find_first_free_audio(
&context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id, dc_ctx->dce_version);
@@ -2823,34 +3983,6 @@ enum dc_status resource_map_pool_resources(
return DC_ERROR_UNEXPECTED;
}
-/**
- * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state
- *
- * @dc: copy out of dc->current_state
- * @dst_ctx: copy into this
- *
- * This function makes a shallow copy of the current DC state and increments
- * refcounts on existing streams and planes.
- */
-void dc_resource_state_copy_construct_current(
- const struct dc *dc,
- struct dc_state *dst_ctx)
-{
- dc_resource_state_copy_construct(dc->current_state, dst_ctx);
-}
-
-
-void dc_resource_state_construct(
- const struct dc *dc,
- struct dc_state *dst_ctx)
-{
- dst_ctx->clk_mgr = dc->clk_mgr;
-
- /* Initialise DIG link encoder resource tracking variables. */
- link_enc_cfg_init(dc, dst_ctx);
-}
-
-
bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
{
if (dc->res_pool == NULL)
@@ -2874,8 +4006,10 @@ static bool planes_changed_for_existing_stream(struct dc_state *context,
}
}
- if (!stream_status)
+ if (!stream_status) {
ASSERT(0);
+ return false;
+ }
for (i = 0; i < set_count; i++)
if (set[i].stream == stream)
@@ -2894,6 +4028,31 @@ static bool planes_changed_for_existing_stream(struct dc_state *context,
return false;
}
+static bool add_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ const struct dc_validation_set set[],
+ int set_count,
+ struct dc_state *state)
+{
+ int i, j;
+
+ for (i = 0; i < set_count; i++)
+ if (set[i].stream == stream)
+ break;
+
+ if (i == set_count) {
+ dm_error("Stream %p not found in set!\n", stream);
+ return false;
+ }
+
+ for (j = 0; j < set[i].plane_count; j++)
+ if (!dc_state_add_plane(dc, stream, set[i].plane_states[j], state))
+ return false;
+
+ return true;
+}
+
/**
* dc_validate_with_context - Validate and update the potential new stream in the context object
*
@@ -2901,7 +4060,7 @@ static bool planes_changed_for_existing_stream(struct dc_state *context,
* @set: An array of dc_validation_set with all the current streams reference
* @set_count: Total of streams
* @context: New context
- * @fast_validate: Enable or disable fast validation
+ * @validate_mode: identify the validation mode
*
* This function updates the potential new stream in the context object. It
* creates multiple lists for the add, remove, and unchanged streams. In
@@ -2916,7 +4075,7 @@ enum dc_status dc_validate_with_context(struct dc *dc,
const struct dc_validation_set set[],
int set_count,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
struct dc_stream_state *unchanged_streams[MAX_PIPES] = { 0 };
struct dc_stream_state *del_streams[MAX_PIPES] = { 0 };
@@ -2999,7 +4158,8 @@ enum dc_status dc_validate_with_context(struct dc *dc,
unchanged_streams[i],
set,
set_count)) {
- if (!dc_rem_all_planes_for_stream(dc,
+
+ if (!dc_state_rem_all_planes_for_stream(dc,
unchanged_streams[i],
context)) {
res = DC_FAIL_DETACH_SURFACES;
@@ -3021,12 +4181,24 @@ enum dc_status dc_validate_with_context(struct dc *dc,
}
}
- if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
- res = DC_FAIL_DETACH_SURFACES;
- goto fail;
+ if (dc_state_get_stream_subvp_type(context, del_streams[i]) == SUBVP_PHANTOM) {
+ /* remove phantoms specifically */
+ if (!dc_state_rem_all_phantom_planes_for_stream(dc, del_streams[i], context, true)) {
+ res = DC_FAIL_DETACH_SURFACES;
+ goto fail;
+ }
+
+ res = dc_state_remove_phantom_stream(dc, context, del_streams[i]);
+ dc_state_release_phantom_stream(dc, context, del_streams[i]);
+ } else {
+ if (!dc_state_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+ res = DC_FAIL_DETACH_SURFACES;
+ goto fail;
+ }
+
+ res = dc_state_remove_stream(dc, context, del_streams[i]);
}
- res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
if (res != DC_OK)
goto fail;
}
@@ -3049,7 +4221,7 @@ enum dc_status dc_validate_with_context(struct dc *dc,
/* Add new streams and then add all planes for the new stream */
for (i = 0; i < add_streams_count; i++) {
calculate_phy_pix_clks(add_streams[i]);
- res = dc_add_stream_to_ctx(dc, context, add_streams[i]);
+ res = dc_state_add_stream(dc, context, add_streams[i]);
if (res != DC_OK)
goto fail;
@@ -3072,7 +4244,18 @@ enum dc_status dc_validate_with_context(struct dc *dc,
}
}
- res = dc_validate_global_state(dc, context, fast_validate);
+ /* clear subvp cursor limitations */
+ for (i = 0; i < context->stream_count; i++) {
+ dc_state_set_stream_subvp_cursor_limit(context->streams[i], context, false);
+ }
+
+ res = dc_validate_global_state(dc, context, validate_mode);
+
+ /* calculate pixel rate divider after deciding pxiel clock & odm combine */
+ if ((dc->hwss.calculate_pix_rate_divider) && (res == DC_OK)) {
+ for (i = 0; i < add_streams_count; i++)
+ dc->hwss.calculate_pix_rate_divider(dc, context, add_streams[i]);
+ }
fail:
if (res != DC_OK)
@@ -3083,12 +4266,41 @@ fail:
return res;
}
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#endif /* CONFIG_DRM_AMD_DC_FP */
+
+/**
+ * calculate_timing_params_for_dsc_with_padding - Calculates timing parameters for DSC with padding.
+ * @pipe_ctx: Pointer to the pipe context structure.
+ *
+ * This function calculates the timing parameters for a given pipe context based on the
+ * display stream compression (DSC) configuration. If the horizontal active pixels (hactive) are less
+ * than the total width of the DSC slices, it sets the dsc_hactive_padding value to the difference. If the
+ * total horizontal timing minus the dsc_hactive_padding value is less than 32, it resets the dsc_hactive_padding
+ * value to 0.
+ */
+static void calculate_timing_params_for_dsc_with_padding(struct pipe_ctx *pipe_ctx)
+{
+ struct dc_stream_state *stream = NULL;
+
+ if (!pipe_ctx)
+ return;
+
+ stream = pipe_ctx->stream;
+ pipe_ctx->dsc_padding_params.dsc_hactive_padding = 0;
+ pipe_ctx->dsc_padding_params.dsc_htotal_padding = 0;
+
+ if (stream)
+ pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz = stream->timing.pix_clk_100hz;
+
+}
+
/**
* dc_validate_global_state() - Determine if hardware can support a given state
*
* @dc: dc struct for this driver
* @new_ctx: state to be validated
- * @fast_validate: set to true if only yes/no to support matters
+ * @validate_mode: identify the validation mode
*
* Checks hardware resource availability and bandwidth requirement.
*
@@ -3098,7 +4310,7 @@ fail:
enum dc_status dc_validate_global_state(
struct dc *dc,
struct dc_state *new_ctx,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
enum dc_status result = DC_ERROR_UNEXPECTED;
int i, j;
@@ -3121,6 +4333,10 @@ enum dc_status dc_validate_global_state(
if (pipe_ctx->stream != stream)
continue;
+ /* Decide whether hblank borrow is needed and save it in pipe_ctx */
+ if (dc->debug.enable_hblank_borrow)
+ calculate_timing_params_for_dsc_with_padding(pipe_ctx);
+
if (dc->res_pool->funcs->patch_unknown_plane_state &&
pipe_ctx->plane_state &&
pipe_ctx->plane_state->tiling_info.gfx9.swizzle == DC_SW_UNKNOWN) {
@@ -3153,16 +4369,7 @@ enum dc_status dc_validate_global_state(
result = resource_build_scaling_params_for_context(dc, new_ctx);
if (result == DC_OK)
- if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate))
- result = DC_FAIL_BANDWIDTH_VALIDATE;
-
- /*
- * Only update link encoder to stream assignment after bandwidth validation passed.
- * TODO: Split out assignment and validation.
- */
- if (result == DC_OK && dc->res_pool->funcs->link_encs_assign && fast_validate == false)
- dc->res_pool->funcs->link_encs_assign(
- dc, new_ctx, new_ctx->streams, new_ctx->stream_count);
+ result = dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, validate_mode);
return result;
}
@@ -3195,21 +4402,22 @@ static void set_avi_info_frame(
uint32_t pixel_encoding = 0;
enum scanning_type scan_type = SCANNING_TYPE_NODATA;
enum dc_aspect_ratio aspect = ASPECT_RATIO_NO_DATA;
- bool itc = false;
- uint8_t itc_value = 0;
- uint8_t cn0_cn1 = 0;
- unsigned int cn0_cn1_value = 0;
uint8_t *check_sum = NULL;
uint8_t byte_index = 0;
union hdmi_info_packet hdmi_info;
- union display_content_support support = {0};
unsigned int vic = pipe_ctx->stream->timing.vic;
unsigned int rid = pipe_ctx->stream->timing.rid;
unsigned int fr_ind = pipe_ctx->stream->timing.fr_index;
enum dc_timing_3d_format format;
+ if (stream->avi_infopacket.valid) {
+ *info_packet = stream->avi_infopacket;
+ return;
+ }
+
memset(&hdmi_info, 0, sizeof(union hdmi_info_packet));
+
color_space = pipe_ctx->stream->output_color_space;
if (color_space == COLOR_SPACE_UNKNOWN)
color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ?
@@ -3273,7 +4481,7 @@ static void set_avi_info_frame(
break;
case COLOR_SPACE_2020_RGB_FULLRANGE:
case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR;
hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED;
break;
@@ -3287,8 +4495,8 @@ static void set_avi_info_frame(
break;
}
- if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR &&
- stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
+ if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR_LIMITED &&
+ stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) {
hdmi_info.bits.EC0_EC2 = 0;
hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
}
@@ -3312,49 +4520,27 @@ static void set_avi_info_frame(
/* Active Format Aspect ratio - same as Picture Aspect Ratio. */
hdmi_info.bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE;
- /* TODO: un-hardcode cn0_cn1 and itc */
-
- cn0_cn1 = 0;
- cn0_cn1_value = 0;
-
- itc = true;
- itc_value = 1;
-
- support = stream->content_support;
-
- if (itc) {
- if (!support.bits.valid_content_type) {
- cn0_cn1_value = 0;
- } else {
- if (cn0_cn1 == DISPLAY_CONTENT_TYPE_GRAPHICS) {
- if (support.bits.graphics_content == 1) {
- cn0_cn1_value = 0;
- }
- } else if (cn0_cn1 == DISPLAY_CONTENT_TYPE_PHOTO) {
- if (support.bits.photo_content == 1) {
- cn0_cn1_value = 1;
- } else {
- cn0_cn1_value = 0;
- itc_value = 0;
- }
- } else if (cn0_cn1 == DISPLAY_CONTENT_TYPE_CINEMA) {
- if (support.bits.cinema_content == 1) {
- cn0_cn1_value = 2;
- } else {
- cn0_cn1_value = 0;
- itc_value = 0;
- }
- } else if (cn0_cn1 == DISPLAY_CONTENT_TYPE_GAME) {
- if (support.bits.game_content == 1) {
- cn0_cn1_value = 3;
- } else {
- cn0_cn1_value = 0;
- itc_value = 0;
- }
- }
- }
- hdmi_info.bits.CN0_CN1 = cn0_cn1_value;
- hdmi_info.bits.ITC = itc_value;
+ switch (stream->content_type) {
+ case DISPLAY_CONTENT_TYPE_NO_DATA:
+ hdmi_info.bits.CN0_CN1 = 0;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_GRAPHICS:
+ hdmi_info.bits.CN0_CN1 = 0;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_PHOTO:
+ hdmi_info.bits.CN0_CN1 = 1;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_CINEMA:
+ hdmi_info.bits.CN0_CN1 = 2;
+ hdmi_info.bits.ITC = 1;
+ break;
+ case DISPLAY_CONTENT_TYPE_GAME:
+ hdmi_info.bits.CN0_CN1 = 3;
+ hdmi_info.bits.ITC = 1;
+ break;
}
if (stream->qs_bit == 1) {
@@ -3411,7 +4597,7 @@ static void set_avi_info_frame(
}
if (rid != 0 && fr_ind != 0) {
- hdmi_info.bits.header.version = 5;
+ hdmi_info.bits.header.version = 4;
hdmi_info.bits.header.length = 15;
hdmi_info.bits.FR0_FR3 = fr_ind & 0xF;
@@ -3530,7 +4716,7 @@ static void set_hfvs_info_packet(
static void adaptive_sync_override_dp_info_packets_sdp_line_num(
const struct dc_crtc_timing *timing,
struct enc_sdp_line_num *sdp_line_num,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param)
+ unsigned int vstartup_start)
{
uint32_t asic_blank_start = 0;
uint32_t asic_blank_end = 0;
@@ -3545,8 +4731,8 @@ static void adaptive_sync_override_dp_info_packets_sdp_line_num(
asic_blank_end = (asic_blank_start - tg->v_border_bottom -
tg->v_addressable - tg->v_border_top);
- if (pipe_dlg_param->vstartup_start > asic_blank_end) {
- v_update = (tg->v_total - (pipe_dlg_param->vstartup_start - asic_blank_end));
+ if (vstartup_start > asic_blank_end) {
+ v_update = (tg->v_total - (vstartup_start - asic_blank_end));
sdp_line_num->adaptive_sync_line_num_valid = true;
sdp_line_num->adaptive_sync_line_num = (tg->v_total - v_update - 1);
} else {
@@ -3559,7 +4745,7 @@ static void set_adaptive_sync_info_packet(
struct dc_info_packet *info_packet,
const struct dc_stream_state *stream,
struct encoder_info_frame *info_frame,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param)
+ unsigned int vstartup_start)
{
if (!stream->adaptive_sync_infopacket.valid)
return;
@@ -3567,7 +4753,7 @@ static void set_adaptive_sync_info_packet(
adaptive_sync_override_dp_info_packets_sdp_line_num(
&stream->timing,
&info_frame->sdp_line_num,
- pipe_dlg_param);
+ vstartup_start);
*info_packet = stream->adaptive_sync_infopacket;
}
@@ -3582,59 +4768,6 @@ static void set_vtem_info_packet(
*info_packet = stream->vtem_infopacket;
}
-void dc_resource_state_destruct(struct dc_state *context)
-{
- int i, j;
-
- for (i = 0; i < context->stream_count; i++) {
- for (j = 0; j < context->stream_status[i].plane_count; j++)
- dc_plane_state_release(
- context->stream_status[i].plane_states[j]);
-
- context->stream_status[i].plane_count = 0;
- dc_stream_release(context->streams[i]);
- context->streams[i] = NULL;
- }
- context->stream_count = 0;
-}
-
-void dc_resource_state_copy_construct(
- const struct dc_state *src_ctx,
- struct dc_state *dst_ctx)
-{
- int i, j;
- struct kref refcount = dst_ctx->refcount;
-
- *dst_ctx = *src_ctx;
-
- for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx *cur_pipe = &dst_ctx->res_ctx.pipe_ctx[i];
-
- if (cur_pipe->top_pipe)
- cur_pipe->top_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
-
- if (cur_pipe->bottom_pipe)
- cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
- if (cur_pipe->next_odm_pipe)
- cur_pipe->next_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
-
- if (cur_pipe->prev_odm_pipe)
- cur_pipe->prev_odm_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
- }
-
- for (i = 0; i < dst_ctx->stream_count; i++) {
- dc_stream_retain(dst_ctx->streams[i]);
- for (j = 0; j < dst_ctx->stream_status[i].plane_count; j++)
- dc_plane_state_retain(
- dst_ctx->stream_status[i].plane_states[j]);
- }
-
- /* context refcount should not be overridden */
- dst_ctx->refcount = refcount;
-
-}
-
struct clock_source *dc_resource_find_first_free_pll(
struct resource_context *res_ctx,
const struct resource_pool *pool)
@@ -3653,6 +4786,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
{
enum signal_type signal = SIGNAL_TYPE_NONE;
struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
+ unsigned int vstartup_start = 0;
/* default all packets to invalid */
info->avi.valid = false;
@@ -3666,6 +4800,9 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
info->adaptive_sync.valid = false;
signal = pipe_ctx->stream->signal;
+ if (pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe)
+ vstartup_start = pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe(pipe_ctx);
+
/* HDMi and DP have different info packets*/
if (dc_is_hdmi_signal(signal)) {
set_avi_info_frame(&info->avi, pipe_ctx);
@@ -3687,7 +4824,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
set_adaptive_sync_info_packet(&info->adaptive_sync,
pipe_ctx->stream,
info,
- &pipe_ctx->pipe_dlg_param);
+ vstartup_start);
}
patch_gamut_packet_checksum(&info->gamut);
@@ -3781,7 +4918,10 @@ bool pipe_need_reprogram(
return true;
/* DIG link encoder resource assignment for stream changed. */
- if (pipe_ctx_old->stream->ctx->dc->res_pool->funcs->link_encs_assign) {
+ if (pipe_ctx_old->stream->ctx->dc->config.unify_link_enc_assignment) {
+ if (pipe_ctx_old->link_res.dio_link_enc != pipe_ctx->link_res.dio_link_enc)
+ return true;
+ } else if (pipe_ctx_old->stream->ctx->dc->res_pool->funcs->link_encs_assign) {
bool need_reprogram = false;
struct dc *dc = pipe_ctx_old->stream->ctx->dc;
struct link_encoder *link_enc_prev =
@@ -3814,7 +4954,7 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
option = DITHER_OPTION_SPATIAL8;
break;
case COLOR_DEPTH_101010:
- option = DITHER_OPTION_SPATIAL10;
+ option = DITHER_OPTION_TRUN10;
break;
default:
option = DITHER_OPTION_DISABLE;
@@ -3840,6 +4980,8 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
+ if (option == DITHER_OPTION_TRUN10)
+ fmt_bit_depth->flags.TRUNCATE_MODE = 1;
}
/* special case - Formatter can only reduce by 4 bits at most.
@@ -3924,6 +5066,9 @@ void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream)
{
+ if (dc == NULL || stream == NULL)
+ return DC_ERROR_UNEXPECTED;
+
struct dc_link *link = stream->link;
struct timing_generator *tg = dc->res_pool->timing_generators[0];
enum dc_status res = DC_OK;
@@ -4042,6 +5187,28 @@ void get_audio_check(struct audio_info *aud_modes,
}
}
+struct link_encoder *get_temp_dio_link_enc(
+ const struct resource_context *res_ctx,
+ const struct resource_pool *const pool,
+ const struct dc_link *link)
+{
+ struct link_encoder *link_enc = NULL;
+ int enc_index;
+
+ if (link->is_dig_mapping_flexible)
+ enc_index = find_acquired_dio_link_enc_for_link(res_ctx, link);
+ else
+ enc_index = link->eng_id;
+
+ if (enc_index < 0)
+ enc_index = find_free_dio_link_enc(res_ctx, link, pool);
+
+ if (enc_index >= 0)
+ link_enc = pool->link_encoders[enc_index];
+
+ return link_enc;
+}
+
static struct hpo_dp_link_encoder *get_temp_hpo_dp_link_enc(
const struct resource_context *res_ctx,
const struct resource_pool *const pool,
@@ -4071,11 +5238,17 @@ bool get_temp_dp_link_res(struct dc_link *link,
memset(link_res, 0, sizeof(*link_res));
if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_128b_132b_ENCODING) {
- link_res->hpo_dp_link_enc = get_temp_hpo_dp_link_enc(res_ctx,
- dc->res_pool, link);
+ link_res->hpo_dp_link_enc = get_temp_hpo_dp_link_enc(res_ctx, dc->res_pool, link);
if (!link_res->hpo_dp_link_enc)
return false;
+ } else if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING &&
+ dc->config.unify_link_enc_assignment) {
+ link_res->dio_link_enc = get_temp_dio_link_enc(res_ctx,
+ dc->res_pool, link);
+ if (!link_res->dio_link_enc)
+ return false;
}
+
return true;
}
@@ -4247,7 +5420,17 @@ bool is_h_timing_divisible_by_2(struct dc_stream_state *stream)
return divisible;
}
-bool dc_resource_acquire_secondary_pipe_for_mpc_odm(
+/* This interface is deprecated for new DCNs. It is replaced by the following
+ * new interfaces. These two interfaces encapsulate pipe selection priority
+ * with DCN specific minimum hardware transition optimization algorithm. With
+ * the new interfaces caller no longer needs to know the implementation detail
+ * of a pipe topology.
+ *
+ * resource_update_pipes_with_odm_slice_count
+ * resource_update_pipes_with_mpc_slice_count
+ *
+ */
+bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
const struct dc *dc,
struct dc_state *state,
struct pipe_ctx *pri_pipe,
@@ -4263,6 +5446,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm(
sec_next = sec_pipe->next_odm_pipe;
sec_prev = sec_pipe->prev_odm_pipe;
+ if (pri_pipe == NULL)
+ return false;
+
*sec_pipe = *pri_pipe;
sec_pipe->top_pipe = sec_top;
@@ -4285,7 +5471,7 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm(
sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
if (sec_pipe->stream->timing.flags.DSC == 1) {
#if defined(CONFIG_DRM_AMD_DC_FP)
- dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, pipe_idx);
+ dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, sec_pipe->stream_res.opp->inst);
#endif
ASSERT(sec_pipe->stream_res.dsc);
if (sec_pipe->stream_res.dsc == NULL)
@@ -4334,6 +5520,101 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
remove_hpo_dp_link_enc_from_ctx(&context->res_ctx, pipe_ctx, pipe_ctx->stream);
}
+ if (pipe_ctx->link_res.dio_link_enc == NULL && dc->config.unify_link_enc_assignment)
+ if (!add_dio_link_enc_to_ctx(dc, context, dc->res_pool, pipe_ctx, pipe_ctx->stream))
+ return DC_NO_LINK_ENC_RESOURCE;
+
return DC_OK;
}
+struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx)
+{
+ return &pipe_ctx->plane_res.scl_data.dscl_prog_data;
+}
+
+static bool resource_allocate_mcache(struct dc_state *context, const struct dc_mcache_params *mcache_params)
+{
+ if (context->clk_mgr->ctx->dc->res_pool->funcs->program_mcache_pipe_config)
+ context->clk_mgr->ctx->dc->res_pool->funcs->program_mcache_pipe_config(context, mcache_params);
+
+ return true;
+}
+
+void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options)
+{
+ dml2_options->callbacks.dc = dc;
+ dml2_options->callbacks.build_scaling_params = &resource_build_scaling_params;
+ dml2_options->callbacks.build_test_pattern_params = &resource_build_test_pattern_params;
+ dml2_options->callbacks.acquire_secondary_pipe_for_mpc_odm = &dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy;
+ dml2_options->callbacks.update_pipes_for_stream_with_slice_count = &resource_update_pipes_for_stream_with_slice_count;
+ dml2_options->callbacks.update_pipes_for_plane_with_slice_count = &resource_update_pipes_for_plane_with_slice_count;
+ dml2_options->callbacks.get_mpc_slice_index = &resource_get_mpc_slice_index;
+ dml2_options->callbacks.get_mpc_slice_count = &resource_get_mpc_slice_count;
+ dml2_options->callbacks.get_odm_slice_index = &resource_get_odm_slice_index;
+ dml2_options->callbacks.get_odm_slice_count = &resource_get_odm_slice_count;
+ dml2_options->callbacks.get_opp_head = &resource_get_opp_head;
+ dml2_options->callbacks.get_otg_master_for_stream = &resource_get_otg_master_for_stream;
+ dml2_options->callbacks.get_opp_heads_for_otg_master = &resource_get_opp_heads_for_otg_master;
+ dml2_options->callbacks.get_dpp_pipes_for_plane = &resource_get_dpp_pipes_for_plane;
+ dml2_options->callbacks.get_stream_status = &dc_state_get_stream_status;
+ dml2_options->callbacks.get_stream_from_id = &dc_state_get_stream_from_id;
+ dml2_options->callbacks.get_max_flickerless_instant_vtotal_increase = &dc_stream_get_max_flickerless_instant_vtotal_increase;
+ dml2_options->callbacks.allocate_mcache = &resource_allocate_mcache;
+
+ dml2_options->svp_pstate.callbacks.dc = dc;
+ dml2_options->svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane;
+ dml2_options->svp_pstate.callbacks.add_phantom_stream = &dc_state_add_phantom_stream;
+ dml2_options->svp_pstate.callbacks.build_scaling_params = &resource_build_scaling_params;
+ dml2_options->svp_pstate.callbacks.create_phantom_plane = &dc_state_create_phantom_plane;
+ dml2_options->svp_pstate.callbacks.remove_phantom_plane = &dc_state_remove_phantom_plane;
+ dml2_options->svp_pstate.callbacks.remove_phantom_stream = &dc_state_remove_phantom_stream;
+ dml2_options->svp_pstate.callbacks.create_phantom_stream = &dc_state_create_phantom_stream;
+ dml2_options->svp_pstate.callbacks.release_phantom_plane = &dc_state_release_phantom_plane;
+ dml2_options->svp_pstate.callbacks.release_phantom_stream = &dc_state_release_phantom_stream;
+ dml2_options->svp_pstate.callbacks.get_pipe_subvp_type = &dc_state_get_pipe_subvp_type;
+ dml2_options->svp_pstate.callbacks.get_stream_subvp_type = &dc_state_get_stream_subvp_type;
+ dml2_options->svp_pstate.callbacks.get_paired_subvp_stream = &dc_state_get_paired_subvp_stream;
+ dml2_options->svp_pstate.callbacks.remove_phantom_streams_and_planes = &dc_state_remove_phantom_streams_and_planes;
+ dml2_options->svp_pstate.callbacks.release_phantom_streams_and_planes = &dc_state_release_phantom_streams_and_planes;
+}
+
+/* Returns number of DET segments allocated for a given OTG_MASTER pipe */
+int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ struct pipe_ctx *dpp_pipes[MAX_PIPES];
+
+ int dpp_count = 0;
+ int det_segments = 0;
+
+ if (!otg_master->stream)
+ return 0;
+
+ int slice_count = resource_get_opp_heads_for_otg_master(otg_master,
+ &state->res_ctx, opp_heads);
+
+ for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) {
+ if (opp_heads[slice_idx]->plane_state) {
+ dpp_count = resource_get_dpp_pipes_for_opp_head(
+ opp_heads[slice_idx],
+ &state->res_ctx,
+ dpp_pipes);
+ for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++)
+ det_segments += dpp_pipes[dpp_idx]->hubp_regs.det_size;
+ }
+ }
+ return det_segments;
+}
+
+bool resource_is_hpo_acquired(struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) {
+ if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) {
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
index 5f6392ae31a6..f976ffd6d466 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
@@ -61,10 +61,11 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification
/* For HPD/HPD RX, convert dpia port index into link index */
if (notify->type == DMUB_NOTIFICATION_HPD ||
notify->type == DMUB_NOTIFICATION_HPD_IRQ ||
- notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION ||
+ notify->type == DMUB_NOTIFICATION_AUX_REPLY ||
+ notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION ||
notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) {
notify->link_index =
- get_link_index_from_dpia_port_index(dc, notify->link_index);
+ get_link_index_from_dpia_port_index(dc, notify->instance);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
new file mode 100644
index 000000000000..c61300a7cb1c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -0,0 +1,1077 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "dc_types.h"
+#include "core_types.h"
+#include "core_status.h"
+#include "dc_state.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
+#include "dc_plane_priv.h"
+
+#include "dm_services.h"
+#include "resource.h"
+#include "link_enc_cfg.h"
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+#include "dml2/dml2_wrapper.h"
+#include "dml2/dml2_internal_types.h"
+#endif
+
+#define DC_LOGGER \
+ dc->ctx->logger
+#define DC_LOGGER_INIT(logger)
+
+/* Private dc_state helper functions */
+static bool dc_state_track_phantom_stream(struct dc_state *state,
+ struct dc_stream_state *phantom_stream)
+{
+ if (state->phantom_stream_count >= MAX_PHANTOM_PIPES)
+ return false;
+
+ state->phantom_streams[state->phantom_stream_count++] = phantom_stream;
+
+ return true;
+}
+
+static bool dc_state_untrack_phantom_stream(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+ bool res = false;
+ int i;
+
+ /* first find phantom stream in the dc_state */
+ for (i = 0; i < state->phantom_stream_count; i++) {
+ if (state->phantom_streams[i] == phantom_stream) {
+ state->phantom_streams[i] = NULL;
+ res = true;
+ break;
+ }
+ }
+
+ /* failed to find stream in state */
+ if (!res)
+ return res;
+
+ /* trim back phantom streams */
+ state->phantom_stream_count--;
+ for (; i < state->phantom_stream_count; i++)
+ state->phantom_streams[i] = state->phantom_streams[i + 1];
+
+ return res;
+}
+
+static bool dc_state_is_phantom_stream_tracked(struct dc_state *state, struct dc_stream_state *phantom_stream)
+{
+ int i;
+
+ for (i = 0; i < state->phantom_stream_count; i++) {
+ if (state->phantom_streams[i] == phantom_stream)
+ return true;
+ }
+
+ return false;
+}
+
+static bool dc_state_track_phantom_plane(struct dc_state *state,
+ struct dc_plane_state *phantom_plane)
+{
+ if (state->phantom_plane_count >= MAX_PHANTOM_PIPES)
+ return false;
+
+ state->phantom_planes[state->phantom_plane_count++] = phantom_plane;
+
+ return true;
+}
+
+static bool dc_state_untrack_phantom_plane(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+ bool res = false;
+ int i;
+
+ /* first find phantom plane in the dc_state */
+ for (i = 0; i < state->phantom_plane_count; i++) {
+ if (state->phantom_planes[i] == phantom_plane) {
+ state->phantom_planes[i] = NULL;
+ res = true;
+ break;
+ }
+ }
+
+ /* failed to find plane in state */
+ if (!res)
+ return res;
+
+ /* trim back phantom planes */
+ state->phantom_plane_count--;
+ for (; i < state->phantom_plane_count; i++)
+ state->phantom_planes[i] = state->phantom_planes[i + 1];
+
+ return res;
+}
+
+static bool dc_state_is_phantom_plane_tracked(struct dc_state *state, struct dc_plane_state *phantom_plane)
+{
+ int i;
+
+ for (i = 0; i < state->phantom_plane_count; i++) {
+ if (state->phantom_planes[i] == phantom_plane)
+ return true;
+ }
+
+ return false;
+}
+
+static void dc_state_copy_internal(struct dc_state *dst_state, struct dc_state *src_state)
+{
+ int i, j;
+
+ memcpy(dst_state, src_state, sizeof(struct dc_state));
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ struct pipe_ctx *cur_pipe = &dst_state->res_ctx.pipe_ctx[i];
+
+ if (cur_pipe->top_pipe)
+ cur_pipe->top_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->top_pipe->pipe_idx];
+
+ if (cur_pipe->bottom_pipe)
+ cur_pipe->bottom_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
+
+ if (cur_pipe->prev_odm_pipe)
+ cur_pipe->prev_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->prev_odm_pipe->pipe_idx];
+
+ if (cur_pipe->next_odm_pipe)
+ cur_pipe->next_odm_pipe = &dst_state->res_ctx.pipe_ctx[cur_pipe->next_odm_pipe->pipe_idx];
+ }
+
+ /* retain phantoms */
+ for (i = 0; i < dst_state->phantom_stream_count; i++)
+ dc_stream_retain(dst_state->phantom_streams[i]);
+
+ for (i = 0; i < dst_state->phantom_plane_count; i++)
+ dc_plane_state_retain(dst_state->phantom_planes[i]);
+
+ /* retain streams and planes */
+ for (i = 0; i < dst_state->stream_count; i++) {
+ dc_stream_retain(dst_state->streams[i]);
+ for (j = 0; j < dst_state->stream_status[i].plane_count; j++)
+ dc_plane_state_retain(
+ dst_state->stream_status[i].plane_states[j]);
+ }
+
+}
+
+static void init_state(struct dc *dc, struct dc_state *state)
+{
+ /* Each context must have their own instance of VBA and in order to
+ * initialize and obtain IP and SOC the base DML instance from DC is
+ * initially copied into every context
+ */
+ memcpy(&state->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib));
+}
+
+/* Public dc_state functions */
+struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *params)
+{
+ struct dc_state *state;
+
+ state = kvzalloc(sizeof(struct dc_state), GFP_KERNEL);
+
+ if (!state)
+ return NULL;
+
+ init_state(dc, state);
+ dc_state_construct(dc, state);
+ state->power_source = params ? params->power_source : DC_POWER_SOURCE_AC;
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ if (dc->debug.using_dml2) {
+ if (!dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2)) {
+ dc_state_release(state);
+ return NULL;
+ }
+
+ if (dc->caps.dcmode_power_limits_present && !dml2_create(dc, &dc->dml2_dc_power_options, &state->bw_ctx.dml2_dc_power_source)) {
+ dc_state_release(state);
+ return NULL;
+ }
+ }
+#endif
+
+ kref_init(&state->refcount);
+
+ return state;
+}
+
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state)
+{
+ struct kref refcount = dst_state->refcount;
+#ifdef CONFIG_DRM_AMD_DC_FP
+ struct dml2_context *dst_dml2 = dst_state->bw_ctx.dml2;
+ struct dml2_context *dst_dml2_dc_power_source = dst_state->bw_ctx.dml2_dc_power_source;
+#endif
+
+ dc_state_copy_internal(dst_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ dst_state->bw_ctx.dml2 = dst_dml2;
+ if (src_state->bw_ctx.dml2)
+ dml2_copy(dst_state->bw_ctx.dml2, src_state->bw_ctx.dml2);
+
+ dst_state->bw_ctx.dml2_dc_power_source = dst_dml2_dc_power_source;
+ if (src_state->bw_ctx.dml2_dc_power_source)
+ dml2_copy(dst_state->bw_ctx.dml2_dc_power_source, src_state->bw_ctx.dml2_dc_power_source);
+#endif
+
+ /* context refcount should not be overridden */
+ dst_state->refcount = refcount;
+}
+
+struct dc_state *dc_state_create_copy(struct dc_state *src_state)
+{
+ struct dc_state *new_state;
+
+ new_state = kvmalloc(sizeof(struct dc_state),
+ GFP_KERNEL);
+ if (!new_state)
+ return NULL;
+
+ dc_state_copy_internal(new_state, src_state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ new_state->bw_ctx.dml2 = NULL;
+ new_state->bw_ctx.dml2_dc_power_source = NULL;
+
+ if (src_state->bw_ctx.dml2 &&
+ !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) {
+ dc_state_release(new_state);
+ return NULL;
+ }
+
+ if (src_state->bw_ctx.dml2_dc_power_source &&
+ !dml2_create_copy(&new_state->bw_ctx.dml2_dc_power_source, src_state->bw_ctx.dml2_dc_power_source)) {
+ dc_state_release(new_state);
+ return NULL;
+ }
+#endif
+
+ kref_init(&new_state->refcount);
+
+ return new_state;
+}
+
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state)
+{
+ dc_state_copy(dst_state, dc->current_state);
+}
+
+struct dc_state *dc_state_create_current_copy(struct dc *dc)
+{
+ return dc_state_create_copy(dc->current_state);
+}
+
+void dc_state_construct(struct dc *dc, struct dc_state *state)
+{
+ state->clk_mgr = dc->clk_mgr;
+
+ /* Initialise DIG link encoder resource tracking variables. */
+ if (dc->res_pool)
+ link_enc_cfg_init(dc, state);
+}
+
+void dc_state_destruct(struct dc_state *state)
+{
+ int i, j;
+
+ for (i = 0; i < state->stream_count; i++) {
+ for (j = 0; j < state->stream_status[i].plane_count; j++)
+ dc_plane_state_release(
+ state->stream_status[i].plane_states[j]);
+
+ state->stream_status[i].plane_count = 0;
+ dc_stream_release(state->streams[i]);
+ state->streams[i] = NULL;
+ }
+ state->stream_count = 0;
+
+ /* release tracked phantoms */
+ for (i = 0; i < state->phantom_stream_count; i++) {
+ dc_stream_release(state->phantom_streams[i]);
+ state->phantom_streams[i] = NULL;
+ }
+ state->phantom_stream_count = 0;
+
+ for (i = 0; i < state->phantom_plane_count; i++) {
+ dc_plane_state_release(state->phantom_planes[i]);
+ state->phantom_planes[i] = NULL;
+ }
+ state->phantom_plane_count = 0;
+
+ state->stream_mask = 0;
+ memset(&state->res_ctx, 0, sizeof(state->res_ctx));
+ memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg));
+ memset(&state->dcn_bw_vars, 0, sizeof(state->dcn_bw_vars));
+ state->clk_mgr = NULL;
+ memset(&state->bw_ctx.bw, 0, sizeof(state->bw_ctx.bw));
+ memset(state->block_sequence, 0, sizeof(state->block_sequence));
+ state->block_sequence_steps = 0;
+ memset(state->dc_dmub_cmd, 0, sizeof(state->dc_dmub_cmd));
+ state->dmub_cmd_count = 0;
+ memset(&state->perf_params, 0, sizeof(state->perf_params));
+}
+
+void dc_state_retain(struct dc_state *state)
+{
+ kref_get(&state->refcount);
+}
+
+static void dc_state_free(struct kref *kref)
+{
+ struct dc_state *state = container_of(kref, struct dc_state, refcount);
+
+ dc_state_destruct(state);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ dml2_destroy(state->bw_ctx.dml2);
+ state->bw_ctx.dml2 = 0;
+
+ dml2_destroy(state->bw_ctx.dml2_dc_power_source);
+ state->bw_ctx.dml2_dc_power_source = 0;
+#endif
+
+ kvfree(state);
+}
+
+void dc_state_release(struct dc_state *state)
+{
+ if (state != NULL)
+ kref_put(&state->refcount, dc_state_free);
+}
+/*
+ * dc_state_add_stream() - Add a new dc_stream_state to a dc_state.
+ */
+enum dc_status dc_state_add_stream(
+ const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream)
+{
+ enum dc_status res;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (state->stream_count >= dc->res_pool->timing_generator_count) {
+ DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream);
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ state->streams[state->stream_count] = stream;
+ dc_stream_retain(stream);
+ state->stream_count++;
+
+ res = resource_add_otg_master_for_stream_output(
+ state, dc->res_pool, stream);
+ if (res != DC_OK)
+ DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res);
+
+ return res;
+}
+
+/*
+ * dc_state_remove_stream() - Remove a stream from a dc_state.
+ */
+enum dc_status dc_state_remove_stream(
+ const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream)
+{
+ int i;
+ struct pipe_ctx *del_pipe = resource_get_otg_master_for_stream(
+ &state->res_ctx, stream);
+
+ if (!del_pipe) {
+ dm_error("Pipe not found for stream %p !\n", stream);
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ resource_update_pipes_for_stream_with_slice_count(state,
+ dc->current_state, dc->res_pool, stream, 1);
+ resource_remove_otg_master_for_stream_output(
+ state, dc->res_pool, stream);
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream)
+ break;
+
+ if (state->streams[i] != stream) {
+ dm_error("Context doesn't have stream %p !\n", stream);
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ dc_stream_release_3dlut_for_stream(dc, stream);
+
+ dc_stream_release(state->streams[i]);
+ state->stream_count--;
+
+ /* Trim back arrays */
+ for (; i < state->stream_count; i++) {
+ state->streams[i] = state->streams[i + 1];
+ state->stream_status[i] = state->stream_status[i + 1];
+ }
+
+ state->streams[state->stream_count] = NULL;
+ memset(
+ &state->stream_status[state->stream_count],
+ 0,
+ sizeof(state->stream_status[0]));
+
+ return DC_OK;
+}
+
+static void remove_mpc_combine_for_stream(const struct dc *dc,
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ struct dc_stream_status *status)
+{
+ int i;
+
+ for (i = 0; i < status->plane_count; i++)
+ resource_update_pipes_for_plane_with_slice_count(
+ new_ctx, cur_ctx, dc->res_pool,
+ status->plane_states[i], 1);
+}
+
+bool dc_state_add_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state)
+{
+ struct resource_pool *pool = dc->res_pool;
+ struct pipe_ctx *otg_master_pipe;
+ struct dc_stream_status *stream_status = NULL;
+ bool added = false;
+ int odm_slice_count;
+ int i;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+ otg_master_pipe = resource_get_otg_master_for_stream(
+ &state->res_ctx, stream);
+ if (stream_status == NULL) {
+ dm_error("Existing stream not found; failed to attach surface!\n");
+ goto out;
+ } else if (stream_status->plane_count == MAX_SURFACES) {
+ dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
+ plane_state, MAX_SURFACES);
+ goto out;
+ } else if (!otg_master_pipe) {
+ goto out;
+ }
+
+ added = resource_append_dpp_pipes_for_plane_composition(state,
+ dc->current_state, pool, otg_master_pipe, plane_state);
+
+ if (!added) {
+ /* try to remove MPC combine to free up pipes */
+ for (i = 0; i < state->stream_count; i++)
+ remove_mpc_combine_for_stream(dc, state,
+ dc->current_state,
+ &state->stream_status[i]);
+ added = resource_append_dpp_pipes_for_plane_composition(state,
+ dc->current_state, pool,
+ otg_master_pipe, plane_state);
+ }
+
+ if (!added) {
+ /* try to decrease ODM slice count gradually to free up pipes */
+ odm_slice_count = resource_get_odm_slice_count(otg_master_pipe);
+ for (i = odm_slice_count - 1; i > 0; i--) {
+ resource_update_pipes_for_stream_with_slice_count(state,
+ dc->current_state, dc->res_pool, stream,
+ i);
+ added = resource_append_dpp_pipes_for_plane_composition(
+ state,
+ dc->current_state, pool,
+ otg_master_pipe, plane_state);
+ if (added)
+ break;
+ }
+ }
+
+ if (added) {
+ stream_status->plane_states[stream_status->plane_count] =
+ plane_state;
+ stream_status->plane_count++;
+ dc_plane_state_retain(plane_state);
+ }
+
+out:
+ return added;
+}
+
+bool dc_state_remove_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state)
+{
+ int i;
+ struct dc_stream_status *stream_status = NULL;
+ struct resource_pool *pool = dc->res_pool;
+
+ if (!plane_state)
+ return true;
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream) {
+ stream_status = &state->stream_status[i];
+ break;
+ }
+
+ if (stream_status == NULL) {
+ dm_error("Existing stream not found; failed to remove plane.\n");
+ return false;
+ }
+
+ resource_remove_dpp_pipes_for_plane_composition(
+ state, pool, plane_state);
+
+ for (i = 0; i < stream_status->plane_count; i++) {
+ if (stream_status->plane_states[i] == plane_state) {
+ dc_plane_state_release(stream_status->plane_states[i]);
+ break;
+ }
+ }
+
+ if (i == stream_status->plane_count) {
+ dm_error("Existing plane_state not found; failed to detach it!\n");
+ return false;
+ }
+
+ stream_status->plane_count--;
+
+ /* Start at the plane we've just released, and move all the planes one index forward to "trim" the array */
+ for (; i < stream_status->plane_count; i++)
+ stream_status->plane_states[i] = stream_status->plane_states[i + 1];
+
+ stream_status->plane_states[stream_status->plane_count] = NULL;
+
+ return true;
+}
+
+/**
+ * dc_state_rem_all_planes_for_stream - Remove planes attached to the target stream.
+ *
+ * @dc: Current dc state.
+ * @stream: Target stream, which we want to remove the attached plans.
+ * @state: context from which the planes are to be removed.
+ *
+ * Return:
+ * Return true if DC was able to remove all planes from the target
+ * stream, otherwise, return false.
+ */
+bool dc_state_rem_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ int i, old_plane_count;
+ struct dc_stream_status *stream_status = NULL;
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == stream) {
+ stream_status = &state->stream_status[i];
+ break;
+ }
+
+ if (stream_status == NULL) {
+ dm_error("Existing stream %p not found!\n", stream);
+ return false;
+ }
+
+ old_plane_count = stream_status->plane_count;
+
+ for (i = 0; i < old_plane_count; i++)
+ del_planes[i] = stream_status->plane_states[i];
+
+ for (i = 0; i < old_plane_count; i++)
+ if (!dc_state_remove_plane(dc, stream, del_planes[i], state))
+ return false;
+
+ return true;
+}
+
+bool dc_state_add_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state * const *plane_states,
+ int plane_count,
+ struct dc_state *state)
+{
+ int i;
+ bool result = true;
+
+ for (i = 0; i < plane_count; i++)
+ if (!dc_state_add_plane(dc, stream, plane_states[i], state)) {
+ result = false;
+ break;
+ }
+
+ return result;
+}
+
+/* Private dc_state functions */
+
+/**
+ * dc_state_get_stream_status - Get stream status from given dc state
+ * @state: DC state to find the stream status in
+ * @stream: The stream to get the stream status for
+ *
+ * The given stream is expected to exist in the given dc state. Otherwise, NULL
+ * will be returned.
+ */
+struct dc_stream_status *dc_state_get_stream_status(
+ struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ uint8_t i;
+
+ if (state == NULL)
+ return NULL;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (stream == state->streams[i])
+ return &state->stream_status[i];
+ }
+
+ return NULL;
+}
+
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+ const struct pipe_ctx *pipe_ctx)
+{
+ return dc_state_get_stream_subvp_type(state, pipe_ctx->stream);
+}
+
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ int i;
+
+ enum mall_stream_type type = SUBVP_NONE;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i] == stream) {
+ type = state->stream_status[i].mall_stream_config.type;
+ break;
+ }
+ }
+
+ return type;
+}
+
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ int i;
+
+ struct dc_stream_state *paired_stream = NULL;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i] == stream) {
+ paired_stream = state->stream_status[i].mall_stream_config.paired_stream;
+ break;
+ }
+ }
+
+ return paired_stream;
+}
+
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *main_stream)
+{
+ struct dc_stream_state *phantom_stream;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ phantom_stream = dc_create_stream_for_sink(main_stream->sink);
+
+ if (!phantom_stream) {
+ DC_LOG_ERROR("Failed to allocate phantom stream.\n");
+ return NULL;
+ }
+
+ /* track phantom stream in dc_state */
+ dc_state_track_phantom_stream(state, phantom_stream);
+
+ phantom_stream->is_phantom = true;
+ phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
+ phantom_stream->dpms_off = true;
+
+ return phantom_stream;
+}
+
+void dc_state_release_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream)
+{
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!dc_state_untrack_phantom_stream(state, phantom_stream)) {
+ DC_LOG_ERROR("Failed to free phantom stream %p in dc state %p.\n", phantom_stream, state);
+ return;
+ }
+
+ dc_stream_release(phantom_stream);
+}
+
+struct dc_plane_state *dc_state_create_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *main_plane)
+{
+ struct dc_plane_state *phantom_plane = dc_create_plane_state(dc);
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!phantom_plane) {
+ DC_LOG_ERROR("Failed to allocate phantom plane.\n");
+ return NULL;
+ }
+
+ /* track phantom inside dc_state */
+ dc_state_track_phantom_plane(state, phantom_plane);
+
+ phantom_plane->is_phantom = true;
+
+ return phantom_plane;
+}
+
+void dc_state_release_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *phantom_plane)
+{
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!dc_state_untrack_phantom_plane(state, phantom_plane)) {
+ DC_LOG_ERROR("Failed to free phantom plane %p in dc state %p.\n", phantom_plane, state);
+ return;
+ }
+
+ dc_plane_state_release(phantom_plane);
+}
+
+/* add phantom streams to context and generate correct meta inside dc_state */
+enum dc_status dc_state_add_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream,
+ struct dc_stream_state *main_stream)
+{
+ struct dc_stream_status *main_stream_status;
+ struct dc_stream_status *phantom_stream_status;
+ enum dc_status res = dc_state_add_stream(dc, state, phantom_stream);
+
+ /* check if stream is tracked */
+ if (res == DC_OK && !dc_state_is_phantom_stream_tracked(state, phantom_stream)) {
+ /* stream must be tracked if added to state */
+ dc_state_track_phantom_stream(state, phantom_stream);
+ }
+
+ /* setup subvp meta */
+ main_stream_status = dc_state_get_stream_status(state, main_stream);
+ if (main_stream_status) {
+ main_stream_status->mall_stream_config.type = SUBVP_MAIN;
+ main_stream_status->mall_stream_config.paired_stream = phantom_stream;
+ }
+
+ phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+ if (phantom_stream_status) {
+ phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM;
+ phantom_stream_status->mall_stream_config.paired_stream = main_stream;
+ phantom_stream_status->mall_stream_config.subvp_limit_cursor_size = false;
+ phantom_stream_status->mall_stream_config.cursor_size_limit_subvp = false;
+ }
+
+ dc_state_set_stream_subvp_cursor_limit(main_stream, state, true);
+
+ return res;
+}
+
+enum dc_status dc_state_remove_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream)
+{
+ struct dc_stream_status *main_stream_status = NULL;
+ struct dc_stream_status *phantom_stream_status;
+
+ /* reset subvp meta */
+ phantom_stream_status = dc_state_get_stream_status(state, phantom_stream);
+ if (phantom_stream_status) {
+ main_stream_status = dc_state_get_stream_status(state, phantom_stream_status->mall_stream_config.paired_stream);
+ phantom_stream_status->mall_stream_config.type = SUBVP_NONE;
+ phantom_stream_status->mall_stream_config.paired_stream = NULL;
+ }
+
+ if (main_stream_status) {
+ main_stream_status->mall_stream_config.type = SUBVP_NONE;
+ main_stream_status->mall_stream_config.paired_stream = NULL;
+ }
+
+ /* remove stream from state */
+ return dc_state_remove_stream(dc, state, phantom_stream);
+}
+
+bool dc_state_add_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state)
+{
+ bool res = dc_state_add_plane(dc, phantom_stream, phantom_plane, state);
+
+ /* check if stream is tracked */
+ if (res && !dc_state_is_phantom_plane_tracked(state, phantom_plane)) {
+ /* stream must be tracked if added to state */
+ dc_state_track_phantom_plane(state, phantom_plane);
+ }
+
+ return res;
+}
+
+bool dc_state_remove_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state)
+{
+ return dc_state_remove_plane(dc, phantom_stream, phantom_plane, state);
+}
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_state *state,
+ bool should_release_planes)
+{
+ int i, old_plane_count;
+ struct dc_stream_status *stream_status = NULL;
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
+
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] == phantom_stream) {
+ stream_status = &state->stream_status[i];
+ break;
+ }
+
+ if (stream_status == NULL) {
+ dm_error("Existing stream %p not found!\n", phantom_stream);
+ return false;
+ }
+
+ old_plane_count = stream_status->plane_count;
+
+ for (i = 0; i < old_plane_count; i++)
+ del_planes[i] = stream_status->plane_states[i];
+
+ for (i = 0; i < old_plane_count; i++) {
+ if (!dc_state_remove_plane(dc, phantom_stream, del_planes[i], state))
+ return false;
+ if (should_release_planes)
+ dc_state_release_phantom_plane(dc, state, del_planes[i]);
+ }
+
+ return true;
+}
+
+bool dc_state_add_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state * const *phantom_planes,
+ int plane_count,
+ struct dc_state *state)
+{
+ return dc_state_add_all_planes_for_stream(dc, phantom_stream, phantom_planes, plane_count, state);
+}
+
+bool dc_state_remove_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state)
+{
+ int i;
+ bool removed_phantom = false;
+ struct dc_stream_state *phantom_stream = NULL;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state && pipe->stream && dc_state_get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
+ phantom_stream = pipe->stream;
+
+ dc_state_rem_all_phantom_planes_for_stream(dc, phantom_stream, state, false);
+ dc_state_remove_phantom_stream(dc, state, phantom_stream);
+ removed_phantom = true;
+ }
+ }
+ return removed_phantom;
+}
+
+void dc_state_release_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state)
+{
+ unsigned int phantom_count;
+ struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES];
+ struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES];
+ int i;
+
+ phantom_count = state->phantom_stream_count;
+ memcpy(phantom_streams, state->phantom_streams, sizeof(struct dc_stream_state *) * MAX_PHANTOM_PIPES);
+ for (i = 0; i < phantom_count; i++)
+ dc_state_release_phantom_stream(dc, state, phantom_streams[i]);
+
+ phantom_count = state->phantom_plane_count;
+ memcpy(phantom_planes, state->phantom_planes, sizeof(struct dc_plane_state *) * MAX_PHANTOM_PIPES);
+ for (i = 0; i < phantom_count; i++)
+ dc_state_release_phantom_plane(dc, state, phantom_planes[i]);
+}
+
+struct dc_stream_state *dc_state_get_stream_from_id(const struct dc_state *state, unsigned int id)
+{
+ struct dc_stream_state *stream = NULL;
+ int i;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i] && state->streams[i]->stream_id == id) {
+ stream = state->streams[i];
+ break;
+ }
+ }
+
+ return stream;
+}
+
+bool dc_state_is_fams2_in_use(
+ const struct dc *dc,
+ const struct dc_state *state)
+{
+ bool is_fams2_in_use = false;
+
+ if (state)
+ is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable;
+
+ if (dc->current_state)
+ is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable;
+
+ return is_fams2_in_use;
+}
+
+void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit)
+{
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ stream_status->mall_stream_config.subvp_limit_cursor_size = limit;
+ }
+}
+
+bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ bool limit = false;
+
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ limit = stream_status->mall_stream_config.subvp_limit_cursor_size;
+ }
+
+ return limit;
+}
+
+void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit)
+{
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ stream_status->mall_stream_config.cursor_size_limit_subvp = limit;
+ }
+}
+
+bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ bool limit = false;
+
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ limit = stream_status->mall_stream_config.cursor_size_limit_subvp;
+ }
+
+ return limit;
+}
+
+bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state)
+{
+ bool can_clear_limit = false;
+
+ struct dc_stream_status *stream_status;
+
+ stream_status = dc_state_get_stream_status(state, stream);
+
+ if (stream_status) {
+ can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, state) &&
+ (stream_status->mall_stream_config.type == SUBVP_PHANTOM ||
+ stream->hw_cursor_req ||
+ !stream_status->mall_stream_config.subvp_limit_cursor_size ||
+ !stream->cursor_position.enable ||
+ dc_stream_check_cursor_attributes(stream, state, &stream->cursor_attributes));
+ }
+
+ return can_clear_limit;
+}
+
+bool dc_state_is_subvp_in_use(struct dc_state *state)
+{
+ uint32_t i;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (dc_state_get_stream_subvp_type(state, state->streams[i]) != SUBVP_NONE)
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 01fe2d2fd241..9ac2d41f8fca 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -31,8 +31,16 @@
#include "ipp.h"
#include "timing_generator.h"
#include "dc_dmub_srv.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
#define DC_LOGGER dc->ctx->logger
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+#ifndef MAX
+#define MAX(x, y) ((x > y) ? x : y)
+#endif
/*******************************************************************************
* Private functions
@@ -54,7 +62,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
}
}
-static bool dc_stream_construct(struct dc_stream_state *stream,
+bool dc_stream_construct(struct dc_stream_state *stream,
struct dc_sink *dc_sink_data)
{
uint32_t i = 0;
@@ -114,26 +122,23 @@ static bool dc_stream_construct(struct dc_stream_state *stream,
update_stream_signal(stream, dc_sink_data);
- stream->out_transfer_func = dc_create_transfer_func();
- if (stream->out_transfer_func == NULL) {
- dc_sink_release(dc_sink_data);
- return false;
- }
- stream->out_transfer_func->type = TF_TYPE_BYPASS;
+ stream->out_transfer_func.type = TF_TYPE_BYPASS;
- stream->stream_id = stream->ctx->dc_stream_id_count;
- stream->ctx->dc_stream_id_count++;
+ dc_stream_assign_stream_id(stream);
return true;
}
-static void dc_stream_destruct(struct dc_stream_state *stream)
+void dc_stream_destruct(struct dc_stream_state *stream)
{
dc_sink_release(stream->sink);
- if (stream->out_transfer_func != NULL) {
- dc_transfer_func_release(stream->out_transfer_func);
- stream->out_transfer_func = NULL;
- }
+}
+
+void dc_stream_assign_stream_id(struct dc_stream_state *stream)
+{
+ /* MSB is reserved to indicate phantoms */
+ stream->stream_id = stream->ctx->dc_stream_id_count;
+ stream->ctx->dc_stream_id_count++;
}
void dc_stream_retain(struct dc_stream_state *stream)
@@ -193,14 +198,11 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
if (new_stream->sink)
dc_sink_retain(new_stream->sink);
- if (new_stream->out_transfer_func)
- dc_transfer_func_retain(new_stream->out_transfer_func);
-
- new_stream->stream_id = new_stream->ctx->dc_stream_id_count;
- new_stream->ctx->dc_stream_id_count++;
+ dc_stream_assign_stream_id(new_stream);
/* If using dynamic encoder assignment, wait till stream committed to assign encoder. */
- if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign)
+ if (new_stream->ctx->dc->res_pool->funcs->link_encs_assign &&
+ !new_stream->ctx->dc->config.unify_link_enc_assignment)
new_stream->link_enc = NULL;
kref_init(&new_stream->refcount);
@@ -209,31 +211,6 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
}
/**
- * dc_stream_get_status_from_state - Get stream status from given dc state
- * @state: DC state to find the stream status in
- * @stream: The stream to get the stream status for
- *
- * The given stream is expected to exist in the given dc state. Otherwise, NULL
- * will be returned.
- */
-struct dc_stream_status *dc_stream_get_status_from_state(
- struct dc_state *state,
- struct dc_stream_state *stream)
-{
- uint8_t i;
-
- if (state == NULL)
- return NULL;
-
- for (i = 0; i < state->stream_count; i++) {
- if (stream == state->streams[i])
- return &state->stream_status[i];
- }
-
- return NULL;
-}
-
-/**
* dc_stream_get_status() - Get current stream status of the given stream state
* @stream: The stream to get the stream status for.
*
@@ -244,13 +221,12 @@ struct dc_stream_status *dc_stream_get_status(
struct dc_stream_state *stream)
{
struct dc *dc = stream->ctx->dc;
- return dc_stream_get_status_from_state(dc->current_state, stream);
+ return dc_state_get_stream_status(dc->current_state, stream);
}
-static void program_cursor_attributes(
+void program_cursor_attributes(
struct dc *dc,
- struct dc_stream_state *stream,
- const struct dc_cursor_attributes *attributes)
+ struct dc_stream_state *stream)
{
int i;
struct resource_context *res_ctx;
@@ -288,47 +264,47 @@ static void program_cursor_attributes(
}
}
-#ifndef TRIM_FSFT
/*
- * dc_optimize_timing_for_fsft() - dc to optimize timing
+ * dc_stream_check_cursor_attributes() - Check validitity of cursor attributes and surface address
*/
-bool dc_optimize_timing_for_fsft(
- struct dc_stream_state *pStream,
- unsigned int max_input_rate_in_khz)
+bool dc_stream_check_cursor_attributes(
+ const struct dc_stream_state *stream,
+ struct dc_state *state,
+ const struct dc_cursor_attributes *attributes)
{
- struct dc *dc;
+ const struct dc *dc;
- dc = pStream->ctx->dc;
+ unsigned int max_cursor_size;
- return (dc->hwss.optimize_timing_for_fsft &&
- dc->hwss.optimize_timing_for_fsft(dc, &pStream->timing, max_input_rate_in_khz));
-}
-#endif
+ if (NULL == stream) {
+ dm_error("DC: dc_stream is NULL!\n");
+ return false;
+ }
+ if (NULL == attributes) {
+ dm_error("DC: attributes is NULL!\n");
+ return false;
+ }
-static bool is_subvp_high_refresh_candidate(struct dc_stream_state *stream)
-{
- uint32_t refresh_rate;
- struct dc *dc = stream->ctx->dc;
+ if (attributes->address.quad_part == 0) {
+ dm_output_to_console("DC: Cursor address is 0!\n");
+ return false;
+ }
- refresh_rate = (stream->timing.pix_clk_100hz * (uint64_t)100 +
- stream->timing.v_total * stream->timing.h_total - (uint64_t)1);
- refresh_rate = div_u64(refresh_rate, stream->timing.v_total);
- refresh_rate = div_u64(refresh_rate, stream->timing.h_total);
+ dc = stream->ctx->dc;
- /* If there's any stream that fits the SubVP high refresh criteria,
- * we must return true. This is because cursor updates are asynchronous
- * with full updates, so we could transition into a SubVP config and
- * remain in HW cursor mode if there's no cursor update which will
- * then cause corruption.
+ /* SubVP is not compatible with HW cursor larger than what can fit in cursor SRAM.
+ * Therefore, if cursor is greater than this, fallback to SW cursor.
*/
- if ((refresh_rate >= 120 && refresh_rate <= 175 &&
- stream->timing.v_addressable >= 1440 &&
- stream->timing.v_addressable <= 2160) &&
- (dc->current_state->stream_count > 1 ||
- (dc->current_state->stream_count == 1 && !stream->allow_freesync)))
- return true;
+ if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) {
+ max_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc, state, stream);
+ max_cursor_size = max_cursor_size * max_cursor_size * 4;
- return false;
+ if (attributes->height * attributes->width * 4 > max_cursor_size) {
+ return false;
+ }
+ }
+
+ return true;
}
/*
@@ -338,64 +314,54 @@ bool dc_stream_set_cursor_attributes(
struct dc_stream_state *stream,
const struct dc_cursor_attributes *attributes)
{
- struct dc *dc;
- bool reset_idle_optimizations = false;
+ bool result = false;
- if (NULL == stream) {
- dm_error("DC: dc_stream is NULL!\n");
- return false;
- }
- if (NULL == attributes) {
- dm_error("DC: attributes is NULL!\n");
+ if (!stream)
return false;
+
+ if (dc_stream_check_cursor_attributes(stream, stream->ctx->dc->current_state, attributes)) {
+ stream->cursor_attributes = *attributes;
+ result = true;
}
- if (attributes->address.quad_part == 0) {
- dm_output_to_console("DC: Cursor address is 0!\n");
+ return result;
+}
+
+bool dc_stream_program_cursor_attributes(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_attributes *attributes)
+{
+ struct dc *dc;
+ bool reset_idle_optimizations = false;
+
+ if (!stream)
return false;
- }
dc = stream->ctx->dc;
- /* SubVP is not compatible with HW cursor larger than 64 x 64 x 4.
- * Therefore, if cursor is greater than 64 x 64 x 4, fallback to SW cursor in the following case:
- * 1. If the config is a candidate for SubVP high refresh (both single an dual display configs)
- * 2. If not subvp high refresh, for single display cases, if resolution is >= 5K and refresh rate < 120hz
- * 3. If not subvp high refresh, for multi display cases, if resolution is >= 4K and refresh rate < 120hz
- */
- if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) {
- if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream))
- return false;
- if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 &&
- ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
- return false;
- else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 &&
- ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
- return false;
- }
-
- stream->cursor_attributes = *attributes;
+ if (dc_stream_set_cursor_attributes(stream, attributes)) {
+ dc_z10_restore(dc);
+ /* disable idle optimizations while updating cursor */
+ if (dc->idle_optimizations_allowed) {
+ dc_allow_idle_optimizations(dc, false);
+ reset_idle_optimizations = true;
+ }
- dc_z10_restore(dc);
- /* disable idle optimizations while updating cursor */
- if (dc->idle_optimizations_allowed) {
- dc_allow_idle_optimizations(dc, false);
- reset_idle_optimizations = true;
- }
+ program_cursor_attributes(dc, stream);
- program_cursor_attributes(dc, stream, attributes);
+ /* re-enable idle optimizations if necessary */
+ if (reset_idle_optimizations && !dc->debug.disable_dmub_reallow_idle)
+ dc_allow_idle_optimizations(dc, true);
- /* re-enable idle optimizations if necessary */
- if (reset_idle_optimizations)
- dc_allow_idle_optimizations(dc, true);
+ return true;
+ }
- return true;
+ return false;
}
-static void program_cursor_position(
+void program_cursor_position(
struct dc *dc,
- struct dc_stream_state *stream,
- const struct dc_cursor_position *position)
+ struct dc_stream_state *stream)
{
int i;
struct resource_context *res_ctx;
@@ -434,9 +400,6 @@ bool dc_stream_set_cursor_position(
struct dc_stream_state *stream,
const struct dc_cursor_position *position)
{
- struct dc *dc;
- bool reset_idle_optimizations = false;
-
if (NULL == stream) {
dm_error("DC: dc_stream is NULL!\n");
return false;
@@ -447,24 +410,66 @@ bool dc_stream_set_cursor_position(
return false;
}
+ stream->cursor_position = *position;
+
+
+ return true;
+}
+
+bool dc_stream_program_cursor_position(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_position *position)
+{
+ struct dc *dc;
+ bool reset_idle_optimizations = false;
+ const struct dc_cursor_position *old_position;
+
+ if (!stream)
+ return false;
+
+ old_position = &stream->cursor_position;
dc = stream->ctx->dc;
- dc_z10_restore(dc);
- /* disable idle optimizations if enabling cursor */
- if (dc->idle_optimizations_allowed && (!stream->cursor_position.enable || dc->debug.exit_idle_opt_for_cursor_updates)
- && position->enable) {
- dc_allow_idle_optimizations(dc, false);
- reset_idle_optimizations = true;
- }
+ if (dc_stream_set_cursor_position(stream, position)) {
+ dc_z10_restore(dc);
- stream->cursor_position = *position;
+ /* disable idle optimizations if enabling cursor */
+ if (dc->idle_optimizations_allowed &&
+ (!old_position->enable || dc->debug.exit_idle_opt_for_cursor_updates) &&
+ position->enable) {
+ dc_allow_idle_optimizations(dc, false);
+ reset_idle_optimizations = true;
+ }
- program_cursor_position(dc, stream, position);
- /* re-enable idle optimizations if necessary */
- if (reset_idle_optimizations)
- dc_allow_idle_optimizations(dc, true);
+ program_cursor_position(dc, stream);
+ /* re-enable idle optimizations if necessary */
+ if (reset_idle_optimizations && !dc->debug.disable_dmub_reallow_idle)
+ dc_allow_idle_optimizations(dc, true);
+
+ /* apply/update visual confirm */
+ if (dc->debug.visual_confirm == VISUAL_CONFIRM_HW_CURSOR) {
+ /* update software state */
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ /* adjust visual confirm color for all pipes with current stream */
+ if (stream == pipe_ctx->stream) {
+ get_cursor_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
+
+ /* programming hardware */
+ if (pipe_ctx->plane_state)
+ dc->hwss.update_visual_confirm_color(dc, pipe_ctx,
+ pipe_ctx->plane_res.hubp->mpcc_id);
+ }
+ }
+ }
- return true;
+ return true;
+ }
+
+ return false;
}
bool dc_stream_add_writeback(struct dc *dc,
@@ -490,7 +495,9 @@ bool dc_stream_add_writeback(struct dc *dc,
return false;
}
- wb_info->dwb_params.out_transfer_func = stream->out_transfer_func;
+ dc_exit_ips_for_hw_access(dc);
+
+ wb_info->dwb_params.out_transfer_func = &stream->out_transfer_func;
dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
dwb->dwb_is_drc = false;
@@ -514,16 +521,37 @@ bool dc_stream_add_writeback(struct dc *dc,
if (dc->hwss.enable_writeback) {
struct dc_stream_status *stream_status = dc_stream_get_status(stream);
struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
- dwb->otg_inst = stream_status->primary_otg_inst;
+ if (stream_status)
+ dwb->otg_inst = stream_status->primary_otg_inst;
+ }
+
+ if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+ dm_error("DC: update_bandwidth failed!\n");
+ return false;
+ }
+
+ /* enable writeback */
+ if (dc->hwss.enable_writeback) {
+ struct dwbc *dwb = dc->res_pool->dwbc[wb_info->dwb_pipe_inst];
+
+ if (dwb->funcs->is_enabled(dwb)) {
+ /* writeback pipe already enabled, only need to update */
+ dc->hwss.update_writeback(dc, wb_info, dc->current_state);
+ } else {
+ /* Enable writeback pipe from scratch*/
+ dc->hwss.enable_writeback(dc, wb_info, dc->current_state);
+ }
}
+
return true;
}
-bool dc_stream_remove_writeback(struct dc *dc,
+bool dc_stream_fc_disable_writeback(struct dc *dc,
struct dc_stream_state *stream,
uint32_t dwb_pipe_inst)
{
- int i = 0, j = 0;
+ struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
if (stream == NULL) {
dm_error("DC: dc_stream is NULL!\n");
return false;
@@ -539,39 +567,78 @@ bool dc_stream_remove_writeback(struct dc *dc,
return false;
}
-// stream->writeback_info[dwb_pipe_inst].wb_enabled = false;
- for (i = 0; i < stream->num_wb_info; i++) {
- /*dynamic update*/
- if (stream->writeback_info[i].wb_enabled &&
- stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst) {
- stream->writeback_info[i].wb_enabled = false;
- }
+ dc_exit_ips_for_hw_access(dc);
+
+ if (dwb->funcs->set_fc_enable)
+ dwb->funcs->set_fc_enable(dwb, DWB_FRAME_CAPTURE_DISABLE);
+
+ return true;
+}
+
+/**
+ * dc_stream_remove_writeback() - Disables writeback and removes writeback info.
+ * @dc: Display core control structure.
+ * @stream: Display core stream state.
+ * @dwb_pipe_inst: Display writeback pipe.
+ *
+ * Return: returns true on success, false otherwise.
+ */
+bool dc_stream_remove_writeback(struct dc *dc,
+ struct dc_stream_state *stream,
+ uint32_t dwb_pipe_inst)
+{
+ unsigned int i, j;
+ if (stream == NULL) {
+ dm_error("DC: dc_stream is NULL!\n");
+ return false;
+ }
+
+ if (dwb_pipe_inst >= MAX_DWB_PIPES) {
+ dm_error("DC: writeback pipe is invalid!\n");
+ return false;
+ }
+
+ if (stream->num_wb_info > MAX_DWB_PIPES) {
+ dm_error("DC: num_wb_info is invalid!\n");
+ return false;
}
/* remove writeback info for disabled writeback pipes from stream */
for (i = 0, j = 0; i < stream->num_wb_info; i++) {
if (stream->writeback_info[i].wb_enabled) {
- if (j < i)
- /* trim the array */
+
+ if (stream->writeback_info[i].dwb_pipe_inst == dwb_pipe_inst)
+ stream->writeback_info[i].wb_enabled = false;
+
+ /* trim the array */
+ if (j < i) {
memcpy(&stream->writeback_info[j], &stream->writeback_info[i],
sizeof(struct dc_writeback_info));
- j++;
+ j++;
+ }
}
}
stream->num_wb_info = j;
+ /* recalculate and apply DML parameters */
+ if (!dc->hwss.update_bandwidth(dc, dc->current_state)) {
+ dm_error("DC: update_bandwidth failed!\n");
+ return false;
+ }
+
+ dc_exit_ips_for_hw_access(dc);
+
+ /* disable writeback */
+ if (dc->hwss.disable_writeback) {
+ struct dwbc *dwb = dc->res_pool->dwbc[dwb_pipe_inst];
+
+ if (dwb->funcs->is_enabled(dwb))
+ dc->hwss.disable_writeback(dc, dwb_pipe_inst);
+ }
+
return true;
}
-bool dc_stream_warmup_writeback(struct dc *dc,
- int num_dwb,
- struct dc_writeback_info *wb_info)
-{
- if (dc->hwss.mmhubbub_warmup)
- return dc->hwss.mmhubbub_warmup(dc, num_dwb, wb_info);
- else
- return false;
-}
uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
{
uint8_t i;
@@ -579,10 +646,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
struct resource_context *res_ctx =
&dc->current_state->res_ctx;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
- if (res_ctx->pipe_ctx[i].stream != stream)
+ if (res_ctx->pipe_ctx[i].stream != stream || !tg)
continue;
return tg->funcs->get_frame_count(tg);
@@ -607,6 +676,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
dc = stream->ctx->dc;
res_ctx = &dc->current_state->res_ctx;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
@@ -638,10 +709,12 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
struct resource_context *res_ctx =
&dc->current_state->res_ctx;
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < MAX_PIPES; i++) {
struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg;
- if (res_ctx->pipe_ctx[i].stream != stream)
+ if (res_ctx->pipe_ctx[i].stream != stream || !tg)
continue;
tg->funcs->get_scanoutpos(tg,
@@ -674,6 +747,8 @@ bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream)
if (i == MAX_PIPES)
return true;
+ dc_exit_ips_for_hw_access(dc);
+
return dc->hwss.dmdata_status_done(pipe);
}
@@ -708,6 +783,8 @@ bool dc_stream_set_dynamic_metadata(struct dc *dc,
pipe_ctx->stream->dmdata_address = attr->address;
+ dc_exit_ips_for_hw_access(dc);
+
dc->hwss.program_dmdata_engine(pipe_ctx);
if (hubp->funcs->dmdata_set_attributes != NULL &&
@@ -758,12 +835,12 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream)
stream->dst.height,
stream->output_color_space);
DC_LOG_DC(
- "\tpix_clk_khz: %d, h_total: %d, v_total: %d, pixelencoder:%d, displaycolorDepth:%d\n",
+ "\tpix_clk_khz: %d, h_total: %d, v_total: %d, pixel_encoding:%s, color_depth:%s\n",
stream->timing.pix_clk_100hz / 10,
stream->timing.h_total,
stream->timing.v_total,
- stream->timing.pixel_encoding,
- stream->timing.display_color_depth);
+ dc_pixel_encoding_to_str(stream->timing.pixel_encoding),
+ dc_color_depth_to_str(stream->timing.display_color_depth));
DC_LOG_DC(
"\tlink: %d\n",
stream->link->link_index);
@@ -785,3 +862,371 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream)
}
}
+/*
+* dc_stream_get_3dlut()
+* Requirements:
+* 1. Is stream already owns an RMCM instance, return it.
+* 2. If it doesn't and we don't need to allocate, return NULL.
+* 3. If there's a free RMCM instance, assign to stream and return it.
+* 4. If no free RMCM instances, return NULL.
+*/
+
+struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream,
+ bool allocate_one)
+{
+ unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts;
+
+ // see if one is allocated for this stream
+ for (int i = 0; i < num_rmcm; i++) {
+ if (dc->res_pool->rmcm_3dlut[i].isInUse &&
+ dc->res_pool->rmcm_3dlut[i].stream == stream)
+ return &dc->res_pool->rmcm_3dlut[i];
+ }
+
+ //case: not found one, and dont need to allocate
+ if (!allocate_one)
+ return NULL;
+
+ //see if there is an unused 3dlut, allocate
+ for (int i = 0; i < num_rmcm; i++) {
+ if (!dc->res_pool->rmcm_3dlut[i].isInUse) {
+ dc->res_pool->rmcm_3dlut[i].isInUse = true;
+ dc->res_pool->rmcm_3dlut[i].stream = stream;
+ return &dc->res_pool->rmcm_3dlut[i];
+ }
+ }
+
+ //dont have a 3dlut
+ return NULL;
+}
+
+
+void dc_stream_release_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream)
+{
+ struct dc_rmcm_3dlut *rmcm_3dlut =
+ dc_stream_get_3dlut_for_stream(dc, stream, false);
+
+ if (rmcm_3dlut) {
+ rmcm_3dlut->isInUse = false;
+ rmcm_3dlut->stream = NULL;
+ rmcm_3dlut->protection_bits = 0;
+ }
+}
+
+
+void dc_stream_init_rmcm_3dlut(struct dc *dc)
+{
+ unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts;
+
+ for (int i = 0; i < num_rmcm; i++) {
+ dc->res_pool->rmcm_3dlut[i].isInUse = false;
+ dc->res_pool->rmcm_3dlut[i].stream = NULL;
+ dc->res_pool->rmcm_3dlut[i].protection_bits = 0;
+ }
+}
+
+/*
+ * Finds the greatest index in refresh_rate_hz that contains a value <= refresh
+ */
+static int dc_stream_get_nearest_smallest_index(struct dc_stream_state *stream, int refresh)
+{
+ for (int i = 0; i < (LUMINANCE_DATA_TABLE_SIZE - 1); ++i) {
+ if ((stream->lumin_data.refresh_rate_hz[i] <= refresh) && (refresh < stream->lumin_data.refresh_rate_hz[i + 1])) {
+ return i;
+ }
+ }
+ return 9;
+}
+
+/*
+ * Finds a corresponding brightness for a given refresh rate between 2 given indices, where index1 < index2
+ */
+static int dc_stream_get_brightness_millinits_linear_interpolation (struct dc_stream_state *stream,
+ int index1,
+ int index2,
+ int refresh_hz)
+{
+ long long slope = 0;
+ if (stream->lumin_data.refresh_rate_hz[index2] != stream->lumin_data.refresh_rate_hz[index1]) {
+ slope = (stream->lumin_data.luminance_millinits[index2] - stream->lumin_data.luminance_millinits[index1]) /
+ (stream->lumin_data.refresh_rate_hz[index2] - stream->lumin_data.refresh_rate_hz[index1]);
+ }
+
+ int y_intercept = stream->lumin_data.luminance_millinits[index2] - slope * stream->lumin_data.refresh_rate_hz[index2];
+
+ return (y_intercept + refresh_hz * slope);
+}
+
+/*
+ * Finds a corresponding refresh rate for a given brightness between 2 given indices, where index1 < index2
+ */
+static int dc_stream_get_refresh_hz_linear_interpolation (struct dc_stream_state *stream,
+ int index1,
+ int index2,
+ int brightness_millinits)
+{
+ long long slope = 1;
+ if (stream->lumin_data.refresh_rate_hz[index2] != stream->lumin_data.refresh_rate_hz[index1]) {
+ slope = (stream->lumin_data.luminance_millinits[index2] - stream->lumin_data.luminance_millinits[index1]) /
+ (stream->lumin_data.refresh_rate_hz[index2] - stream->lumin_data.refresh_rate_hz[index1]);
+ }
+
+ int y_intercept = stream->lumin_data.luminance_millinits[index2] - slope * stream->lumin_data.refresh_rate_hz[index2];
+
+ return ((int)div64_s64((brightness_millinits - y_intercept), slope));
+}
+
+/*
+ * Finds the current brightness in millinits given a refresh rate
+ */
+static int dc_stream_get_brightness_millinits_from_refresh (struct dc_stream_state *stream, int refresh_hz)
+{
+ int nearest_smallest_index = dc_stream_get_nearest_smallest_index(stream, refresh_hz);
+ int nearest_smallest_value = stream->lumin_data.refresh_rate_hz[nearest_smallest_index];
+
+ if (nearest_smallest_value == refresh_hz)
+ return stream->lumin_data.luminance_millinits[nearest_smallest_index];
+
+ if (nearest_smallest_index >= 9)
+ return dc_stream_get_brightness_millinits_linear_interpolation(stream, nearest_smallest_index - 1, nearest_smallest_index, refresh_hz);
+
+ if (nearest_smallest_value == stream->lumin_data.refresh_rate_hz[nearest_smallest_index + 1])
+ return stream->lumin_data.luminance_millinits[nearest_smallest_index];
+
+ return dc_stream_get_brightness_millinits_linear_interpolation(stream, nearest_smallest_index, nearest_smallest_index + 1, refresh_hz);
+}
+
+/*
+ * Finds the lowest/highest refresh rate (depending on search_for_max_increase)
+ * that can be achieved from starting_refresh_hz while staying
+ * within flicker criteria
+ */
+static int dc_stream_calculate_flickerless_refresh_rate(struct dc_stream_state *stream,
+ int current_brightness,
+ int starting_refresh_hz,
+ bool is_gaming,
+ bool search_for_max_increase)
+{
+ int nearest_smallest_index = dc_stream_get_nearest_smallest_index(stream, starting_refresh_hz);
+
+ int flicker_criteria_millinits = is_gaming ?
+ stream->lumin_data.flicker_criteria_milli_nits_GAMING :
+ stream->lumin_data.flicker_criteria_milli_nits_STATIC;
+
+ int safe_upper_bound = current_brightness + flicker_criteria_millinits;
+ int safe_lower_bound = current_brightness - flicker_criteria_millinits;
+ int lumin_millinits_temp = 0;
+
+ int offset = -1;
+ if (search_for_max_increase) {
+ offset = 1;
+ }
+
+ /*
+ * Increments up or down by 1 depending on search_for_max_increase
+ */
+ for (int i = nearest_smallest_index; (i > 0 && !search_for_max_increase) || (i < (LUMINANCE_DATA_TABLE_SIZE - 1) && search_for_max_increase); i += offset) {
+
+ lumin_millinits_temp = stream->lumin_data.luminance_millinits[i + offset];
+
+ if ((lumin_millinits_temp >= safe_upper_bound) || (lumin_millinits_temp <= safe_lower_bound)) {
+
+ if (stream->lumin_data.refresh_rate_hz[i + offset] == stream->lumin_data.refresh_rate_hz[i])
+ return stream->lumin_data.refresh_rate_hz[i];
+
+ int target_brightness = (stream->lumin_data.luminance_millinits[i + offset] >= (current_brightness + flicker_criteria_millinits)) ?
+ current_brightness + flicker_criteria_millinits :
+ current_brightness - flicker_criteria_millinits;
+
+ int refresh = 0;
+
+ /*
+ * Need the second input to be < third input for dc_stream_get_refresh_hz_linear_interpolation
+ */
+ if (search_for_max_increase)
+ refresh = dc_stream_get_refresh_hz_linear_interpolation(stream, i, i + offset, target_brightness);
+ else
+ refresh = dc_stream_get_refresh_hz_linear_interpolation(stream, i + offset, i, target_brightness);
+
+ if (refresh == stream->lumin_data.refresh_rate_hz[i + offset])
+ return stream->lumin_data.refresh_rate_hz[i + offset];
+
+ return refresh;
+ }
+ }
+
+ if (search_for_max_increase)
+ return (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*(long long)stream->timing.h_total);
+ else
+ return stream->lumin_data.refresh_rate_hz[0];
+}
+
+/*
+ * Gets the max delta luminance within a specified refresh range
+ */
+static int dc_stream_get_max_delta_lumin_millinits(struct dc_stream_state *stream, int hz1, int hz2, bool isGaming)
+{
+ int lower_refresh_brightness = dc_stream_get_brightness_millinits_from_refresh (stream, hz1);
+ int higher_refresh_brightness = dc_stream_get_brightness_millinits_from_refresh (stream, hz2);
+
+ int min = lower_refresh_brightness;
+ int max = higher_refresh_brightness;
+
+ /*
+ * Static screen, therefore no need to scan through array
+ */
+ if (!isGaming) {
+ if (lower_refresh_brightness >= higher_refresh_brightness) {
+ return lower_refresh_brightness - higher_refresh_brightness;
+ }
+ return higher_refresh_brightness - lower_refresh_brightness;
+ }
+
+ min = MIN(lower_refresh_brightness, higher_refresh_brightness);
+ max = MAX(lower_refresh_brightness, higher_refresh_brightness);
+
+ int nearest_smallest_index = dc_stream_get_nearest_smallest_index(stream, hz1);
+
+ for (; nearest_smallest_index < (LUMINANCE_DATA_TABLE_SIZE - 1) &&
+ stream->lumin_data.refresh_rate_hz[nearest_smallest_index + 1] <= hz2 ; nearest_smallest_index++) {
+ min = MIN(min, stream->lumin_data.luminance_millinits[nearest_smallest_index + 1]);
+ max = MAX(max, stream->lumin_data.luminance_millinits[nearest_smallest_index + 1]);
+ }
+
+ return (max - min);
+}
+
+/*
+ * Determines the max flickerless instant vtotal delta for a stream.
+ * Determines vtotal increase/decrease based on the bool "increase"
+ */
+static unsigned int dc_stream_get_max_flickerless_instant_vtotal_delta(struct dc_stream_state *stream, bool is_gaming, bool increase)
+{
+ if (stream->timing.v_total * stream->timing.h_total == 0)
+ return 0;
+
+ int current_refresh_hz = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, stream->timing.v_total*(long long)stream->timing.h_total);
+
+ int safe_refresh_hz = dc_stream_calculate_flickerless_refresh_rate(stream,
+ dc_stream_get_brightness_millinits_from_refresh(stream, current_refresh_hz),
+ current_refresh_hz,
+ is_gaming,
+ increase);
+
+ int safe_refresh_v_total = (int)div64_s64((long long)stream->timing.pix_clk_100hz*100, safe_refresh_hz*(long long)stream->timing.h_total);
+
+ if (increase)
+ return (((int) stream->timing.v_total - safe_refresh_v_total) >= 0) ? (stream->timing.v_total - safe_refresh_v_total) : 0;
+
+ return ((safe_refresh_v_total - (int) stream->timing.v_total) >= 0) ? (safe_refresh_v_total - stream->timing.v_total) : 0;
+}
+
+/*
+ * Finds the highest refresh rate that can be achieved
+ * from starting_refresh_hz while staying within flicker criteria
+ */
+int dc_stream_calculate_max_flickerless_refresh_rate(struct dc_stream_state *stream, int starting_refresh_hz, bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ int current_brightness = dc_stream_get_brightness_millinits_from_refresh(stream, starting_refresh_hz);
+
+ return dc_stream_calculate_flickerless_refresh_rate(stream,
+ current_brightness,
+ starting_refresh_hz,
+ is_gaming,
+ true);
+}
+
+/*
+ * Finds the lowest refresh rate that can be achieved
+ * from starting_refresh_hz while staying within flicker criteria
+ */
+int dc_stream_calculate_min_flickerless_refresh_rate(struct dc_stream_state *stream, int starting_refresh_hz, bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ int current_brightness = dc_stream_get_brightness_millinits_from_refresh(stream, starting_refresh_hz);
+
+ return dc_stream_calculate_flickerless_refresh_rate(stream,
+ current_brightness,
+ starting_refresh_hz,
+ is_gaming,
+ false);
+}
+
+/*
+ * Determines if there will be a flicker when moving between 2 refresh rates
+ */
+bool dc_stream_is_refresh_rate_range_flickerless(struct dc_stream_state *stream, int hz1, int hz2, bool is_gaming)
+{
+
+ /*
+ * Assume that we wont flicker if there is invalid data
+ */
+ if (!stream->lumin_data.is_valid)
+ return false;
+
+ int dl = dc_stream_get_max_delta_lumin_millinits(stream, hz1, hz2, is_gaming);
+
+ int flicker_criteria_millinits = (is_gaming) ?
+ stream->lumin_data.flicker_criteria_milli_nits_GAMING :
+ stream->lumin_data.flicker_criteria_milli_nits_STATIC;
+
+ return (dl <= flicker_criteria_millinits);
+}
+
+/*
+ * Determines the max instant vtotal delta increase that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_decrease(struct dc_stream_state *stream,
+ bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ return dc_stream_get_max_flickerless_instant_vtotal_delta(stream, is_gaming, true);
+}
+
+/*
+ * Determines the max instant vtotal delta decrease that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_increase(struct dc_stream_state *stream,
+ bool is_gaming)
+{
+ if (!stream->lumin_data.is_valid)
+ return 0;
+
+ return dc_stream_get_max_flickerless_instant_vtotal_delta(stream, is_gaming, false);
+}
+
+bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream)
+{
+ bool is_limit_pending = false;
+
+ if (dc->current_state)
+ is_limit_pending = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state);
+
+ return is_limit_pending;
+}
+
+bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream)
+{
+ bool can_clear_limit = false;
+
+ if (dc->current_state)
+ can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state) &&
+ (stream->hw_cursor_req ||
+ !stream->cursor_position.enable ||
+ dc_stream_check_cursor_attributes(stream, dc->current_state, &stream->cursor_attributes));
+
+ return can_clear_limit;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index a80e45300783..922f23557f5d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -32,76 +32,58 @@
#include "transform.h"
#include "dpp.h"
+#include "dc_plane_priv.h"
+
/*******************************************************************************
* Private functions
******************************************************************************/
-static void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
{
plane_state->ctx = ctx;
- plane_state->gamma_correction = dc_create_gamma();
- if (plane_state->gamma_correction != NULL)
- plane_state->gamma_correction->is_identity = true;
+ plane_state->gamma_correction.is_identity = true;
- plane_state->in_transfer_func = dc_create_transfer_func();
- if (plane_state->in_transfer_func != NULL) {
- plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
- }
- plane_state->in_shaper_func = dc_create_transfer_func();
- if (plane_state->in_shaper_func != NULL) {
- plane_state->in_shaper_func->type = TF_TYPE_BYPASS;
- }
+ plane_state->in_transfer_func.type = TF_TYPE_BYPASS;
- plane_state->lut3d_func = dc_create_3dlut_func();
+ plane_state->in_shaper_func.type = TF_TYPE_BYPASS;
- plane_state->blend_tf = dc_create_transfer_func();
- if (plane_state->blend_tf != NULL) {
- plane_state->blend_tf->type = TF_TYPE_BYPASS;
- }
+ plane_state->lut3d_func.state.raw = 0;
+
+ plane_state->blend_tf.type = TF_TYPE_BYPASS;
plane_state->pre_multiplied_alpha = true;
}
-static void dc_plane_destruct(struct dc_plane_state *plane_state)
+void dc_plane_destruct(struct dc_plane_state *plane_state)
{
- if (plane_state->gamma_correction != NULL) {
- dc_gamma_release(&plane_state->gamma_correction);
- }
- if (plane_state->in_transfer_func != NULL) {
- dc_transfer_func_release(
- plane_state->in_transfer_func);
- plane_state->in_transfer_func = NULL;
- }
- if (plane_state->in_shaper_func != NULL) {
- dc_transfer_func_release(
- plane_state->in_shaper_func);
- plane_state->in_shaper_func = NULL;
- }
- if (plane_state->lut3d_func != NULL) {
- dc_3dlut_func_release(
- plane_state->lut3d_func);
- plane_state->lut3d_func = NULL;
- }
- if (plane_state->blend_tf != NULL) {
- dc_transfer_func_release(
- plane_state->blend_tf);
- plane_state->blend_tf = NULL;
+ // no more pointers to free within dc_plane_state
+}
+
+
+/* dc_state is passed in separately since it may differ from the current dc state accessible from plane_state e.g.
+ * if the driver is doing an update from an old context to a new one and the caller wants the pipe mask for the new
+ * context rather than the existing one
+ */
+uint8_t dc_plane_get_pipe_mask(struct dc_state *dc_state, const struct dc_plane_state *plane_state)
+{
+ uint8_t pipe_mask = 0;
+ int i;
+
+ for (i = 0; i < plane_state->ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &dc_state->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->plane_state == plane_state && pipe_ctx->plane_res.hubp)
+ pipe_mask |= 1 << pipe_ctx->plane_res.hubp->inst;
}
+ return pipe_mask;
}
/*******************************************************************************
* Public functions
******************************************************************************/
-void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
- uint32_t controller_id)
-{
- plane_state->irq_source = controller_id + DC_IRQ_SOURCE_PFLIP1 - 1;
- /*register_flip_interrupt(surface);*/
-}
-
-struct dc_plane_state *dc_create_plane_state(struct dc *dc)
+struct dc_plane_state *dc_create_plane_state(const struct dc *dc)
{
struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state),
GFP_KERNEL);
@@ -127,7 +109,8 @@ struct dc_plane_state *dc_create_plane_state(struct dc *dc)
*****************************************************************************
*/
const struct dc_plane_status *dc_plane_get_status(
- const struct dc_plane_state *plane_state)
+ const struct dc_plane_state *plane_state,
+ union dc_plane_status_update_flags flags)
{
const struct dc_plane_status *plane_status;
struct dc *dc;
@@ -154,11 +137,14 @@ const struct dc_plane_status *dc_plane_get_status(
if (pipe_ctx->plane_state != plane_state)
continue;
- pipe_ctx->plane_state->status.is_flip_pending = false;
+ if (pipe_ctx->plane_state && flags.bits.address)
+ pipe_ctx->plane_state->status.is_flip_pending = false;
break;
}
+ dc_exit_ips_for_hw_access(dc);
+
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx =
&dc->current_state->res_ctx.pipe_ctx[i];
@@ -166,7 +152,8 @@ const struct dc_plane_status *dc_plane_get_status(
if (pipe_ctx->plane_state != plane_state)
continue;
- dc->hwss.update_pending_status(pipe_ctx);
+ if (flags.bits.address)
+ dc->hwss.update_pending_status(pipe_ctx);
}
return plane_status;
@@ -285,4 +272,41 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut)
kref_get(&lut->refcount);
}
+void dc_plane_force_dcc_and_tiling_disable(struct dc_plane_state *plane_state,
+ bool clear_tiling)
+{
+ struct dc *dc;
+ int i;
+
+ if (!plane_state)
+ return;
+
+ dc = plane_state->ctx->dc;
+
+ if (!dc || !dc->current_state)
+ return;
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx)
+ continue;
+
+ if (dc->hwss.clear_surface_dcc_and_tiling)
+ dc->hwss.clear_surface_dcc_and_tiling(pipe_ctx, plane_state, clear_tiling);
+ }
+}
+
+void dc_plane_copy_config(struct dc_plane_state *dst, const struct dc_plane_state *src)
+{
+ struct kref temp_refcount;
+
+ /* backup persistent info */
+ memcpy(&temp_refcount, &dst->refcount, sizeof(struct kref));
+
+ /* copy all configuration information */
+ memcpy(dst, src, sizeof(struct dc_plane_state));
+
+ /* restore persistent info */
+ memcpy(&dst->refcount, &temp_refcount, sizeof(struct kref));
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
index eda2152dcd1f..d1e68dc57a2a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c
@@ -47,6 +47,7 @@ int dc_setup_system_context(struct dc *dc, struct dc_phy_addr_space_config *pa_c
*/
memcpy(&dc->vm_pa_config, pa_config, sizeof(struct dc_phy_addr_space_config));
dc->vm_pa_config.valid = true;
+ dc->dml2_options.gpuvm_enable = true;
dc_z10_save_init(dc);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 31e3183497a7..98f0b6b3c213 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -27,6 +27,8 @@
#define DC_INTERFACE_H_
#include "dc_types.h"
+#include "dc_state.h"
+#include "dc_plane.h"
#include "grph_object_defs.h"
#include "logger_types.h"
#include "hdcp_msg_types.h"
@@ -35,11 +37,17 @@
#include "grph_object_ctrl_defs.h"
#include <inc/hw/opp.h>
-#include "inc/hw_sequencer.h"
+#include "hwss/hw_sequencer.h"
#include "inc/compressor.h"
#include "inc/hw/dmcu.h"
#include "dml/display_mode_lib.h"
+#include "dml2/dml2_wrapper.h"
+
+#include "dmub/inc/dmub_cmd.h"
+
+#include "sspl/dc_spl_types.h"
+
struct abm_save_restore;
/* forward declaration */
@@ -47,13 +55,24 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.247"
+#define DC_VER "3.2.351"
-#define MAX_SURFACES 3
+/**
+ * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC
+ */
+#define MAX_SURFACES 4
+/**
+ * MAX_PLANES - representative of the upper bound of planes that are supported by the HW
+ */
#define MAX_PLANES 6
#define MAX_STREAMS 6
#define MIN_VIEWPORT_SIZE 12
#define MAX_NUM_EDP 2
+#define MAX_SUPPORTED_FORMATS 7
+
+#define MAX_HOST_ROUTERS_NUM 3
+#define MAX_DPIA_PER_HOST_ROUTER 3
+#define MAX_DPIA_NUM (MAX_HOST_ROUTERS_NUM * MAX_DPIA_PER_HOST_ROUTER)
/* Display Core Interfaces */
struct dc_versions {
@@ -179,6 +198,34 @@ struct dpp_color_caps {
struct rom_curve_caps ogam_rom_caps;
};
+/* Below structure is to describe the HW support for mem layout, extend support
+ range to match what OS could handle in the roadmap */
+struct lut3d_caps {
+ uint32_t dma_3d_lut : 1; /*< DMA mode support for 3D LUT */
+ struct {
+ uint32_t swizzle_3d_rgb : 1;
+ uint32_t swizzle_3d_bgr : 1;
+ uint32_t linear_1d : 1;
+ } mem_layout_support;
+ struct {
+ uint32_t unorm_12msb : 1;
+ uint32_t unorm_12lsb : 1;
+ uint32_t float_fp1_5_10 : 1;
+ } mem_format_support;
+ struct {
+ uint32_t order_rgba : 1;
+ uint32_t order_bgra : 1;
+ } mem_pixel_order_support;
+ /*< size options are 9, 17, 33, 45, 65 */
+ struct {
+ uint32_t dim_9 : 1; /* 3D LUT support for 9x9x9 */
+ uint32_t dim_17 : 1; /* 3D LUT support for 17x17x17 */
+ uint32_t dim_33 : 1; /* 3D LUT support for 33x33x33 */
+ uint32_t dim_45 : 1; /* 3D LUT support for 45x45x45 */
+ uint32_t dim_65 : 1; /* 3D LUT support for 65x65x65 */
+ } lut_dim_caps;
+};
+
/**
* struct mpc_color_caps - color pipeline capabilities for multiple pipe and
* plane combined blocks
@@ -187,17 +234,25 @@ struct dpp_color_caps {
* @ogam_ram: programmable out gamma LUT
* @ocsc: output color space conversion matrix
* @num_3dluts: MPC 3D LUT; always assumes a preceding shaper LUT
+ * @num_rmcm_3dluts: number of RMCM 3D LUTS; always assumes a preceding shaper LUT
* @shared_3d_lut: shared 3D LUT flag. Can be either DPP or MPC, but single
* instance
* @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT
+ * @mcm_3d_lut_caps: HW support cap for MCM LUT memory
+ * @rmcm_3d_lut_caps: HW support cap for RMCM LUT memory
+ * @preblend: whether color manager supports preblend with MPC
*/
struct mpc_color_caps {
uint16_t gamut_remap : 1;
uint16_t ogam_ram : 1;
uint16_t ocsc : 1;
uint16_t num_3dluts : 3;
+ uint16_t num_rmcm_3dluts : 3;
uint16_t shared_3d_lut:1;
struct rom_curve_caps ogam_rom_caps;
+ struct lut3d_caps mcm_3d_lut_caps;
+ struct lut3d_caps rmcm_3d_lut_caps;
+ bool preblend;
};
/**
@@ -215,6 +270,12 @@ struct dc_dmub_caps {
bool mclk_sw;
bool subvp_psr;
bool gecc_enable;
+ uint8_t fams_ver;
+ bool aux_backlight_support;
+};
+
+struct dc_scl_caps {
+ bool sharpener_support;
};
struct dc_caps {
@@ -230,7 +291,13 @@ struct dc_caps {
uint32_t i2c_speed_in_khz_hdcp;
uint32_t dmdata_alloc_size;
unsigned int max_cursor_size;
+ unsigned int max_buffered_cursor_size;
unsigned int max_video_width;
+ /*
+ * max video plane width that can be safely assumed to be always
+ * supported by single DPP pipe.
+ */
+ unsigned int max_optimizable_video_width;
unsigned int min_horizontal_blanking_period;
int linear_pitch_alignment;
bool dcc_const_color;
@@ -244,6 +311,8 @@ struct dc_caps {
bool extended_aux_timeout_support;
bool dmcub_support;
bool zstate_support;
+ bool ips_support;
+ bool ips_v2_support;
uint32_t num_of_internal_disp;
enum dp_protocol_version max_dp_protocol_version;
unsigned int mall_size_per_mem_channel;
@@ -257,6 +326,7 @@ struct dc_caps {
bool edp_dsc_support;
bool vbios_lttpr_aware;
bool vbios_lttpr_enable;
+ bool fused_io_supported;
uint32_t max_otg_num;
uint32_t max_cab_allocation_bytes;
uint32_t cache_line_size;
@@ -269,7 +339,19 @@ struct dc_caps {
uint16_t subvp_vertical_int_margin_us;
bool seamless_odm;
uint32_t max_v_total;
+ bool vtotal_limited_by_fp2;
+ uint32_t max_disp_clock_khz_at_vmin;
uint8_t subvp_drr_vblank_start_margin_us;
+ bool cursor_not_scaled;
+ bool dcmode_power_limits_present;
+ bool sequential_ono;
+ /* Conservative limit for DCC cases which require ODM4:1 to support*/
+ uint32_t dcc_plane_width_limit;
+ struct dc_scl_caps scl_caps;
+ uint8_t num_of_host_routers;
+ uint8_t num_of_dpias_per_host_router;
+ /* limit of the ODM only, could be limited by other factors (like pipe count)*/
+ uint8_t max_odm_combine_factor;
};
struct dc_bug_wa {
@@ -283,11 +365,18 @@ struct dc_bug_wa {
uint8_t dcfclk : 1;
uint8_t dcfclk_ds: 1;
} clock_update_disable_mask;
+ bool skip_psr_ips_crtc_disable;
};
struct dc_dcc_surface_param {
struct dc_size surface_size;
enum surface_pixel_format format;
- enum swizzle_mode_values swizzle_mode;
+ unsigned int plane0_pitch;
+ struct dc_size plane1_size;
+ unsigned int plane1_pitch;
+ union {
+ enum swizzle_mode_values swizzle_mode;
+ enum swizzle_mode_addr3_values swizzle_mode_addr3;
+ };
enum dc_scan_direction scan;
};
@@ -295,12 +384,15 @@ struct dc_dcc_setting {
unsigned int max_compressed_blk_size;
unsigned int max_uncompressed_blk_size;
bool independent_64b_blks;
- //These bitfields to be used starting with DCN
+ //These bitfields to be used starting with DCN 3.0
struct {
- uint32_t dcc_256_64_64 : 1;//available in ASICs before DCN (the worst compression case)
- uint32_t dcc_128_128_uncontrained : 1; //available in ASICs before DCN
- uint32_t dcc_256_128_128 : 1; //available starting with DCN
- uint32_t dcc_256_256_unconstrained : 1; //available in ASICs before DCN (the best compression case)
+ uint32_t dcc_256_64_64 : 1;//available in ASICs before DCN 3.0 (the worst compression case)
+ uint32_t dcc_128_128_uncontrained : 1; //available in ASICs before DCN 3.0
+ uint32_t dcc_256_128_128 : 1; //available starting with DCN 3.0
+ uint32_t dcc_256_256_unconstrained : 1; //available in ASICs before DCN 3.0 (the best compression case)
+ uint32_t dcc_256_256 : 1; //available in ASICs starting with DCN 4.0x (the best compression case)
+ uint32_t dcc_256_128 : 1; //available in ASICs starting with DCN 4.0x
+ uint32_t dcc_256_64 : 1; //available in ASICs starting with DCN 4.0x (the worst compression case)
} dcc_controls;
};
@@ -368,11 +460,11 @@ struct dc;
struct dc_plane_state;
struct dc_state;
-
struct dc_cap_funcs {
bool (*get_dcc_compression_cap)(const struct dc *dc,
const struct dc_dcc_surface_param *input,
struct dc_surface_dcc_cap *output);
+ bool (*get_subvp_en)(struct dc *dc, struct dc_state *context);
};
struct link_training_settings;
@@ -404,21 +496,42 @@ struct dc_config {
bool enable_windowed_mpo_odm;
bool forceHBR2CP2520; // Used for switching between test patterns TPS4 and CP2520
uint32_t allow_edp_hotplug_detection;
+ bool skip_riommu_prefetch_wa;
bool clamp_min_dcfclk;
uint64_t vblank_alignment_dto_params;
uint8_t vblank_alignment_max_frame_time_diff;
bool is_asymmetric_memory;
bool is_single_rank_dimm;
bool is_vmin_only_asic;
+ bool use_spl;
+ bool prefer_easf;
bool use_pipe_ctx_sync_logic;
+ int smart_mux_version;
bool ignore_dpref_ss;
bool enable_mipi_converter_optimization;
bool use_default_clock_table;
bool force_bios_enable_lttpr;
uint8_t force_bios_fixed_vs;
int sdpif_request_limit_words_per_umc;
- bool use_old_fixed_vs_sequence;
bool dc_mode_clk_limit_support;
+ bool EnableMinDispClkODM;
+ bool enable_auto_dpm_test_logs;
+ unsigned int disable_ips;
+ unsigned int disable_ips_rcg;
+ unsigned int disable_ips_in_vpb;
+ bool disable_ips_in_dpms_off;
+ bool usb4_bw_alloc_support;
+ bool allow_0_dtb_clk;
+ bool use_assr_psp_message;
+ bool support_edp0_on_dp1;
+ unsigned int enable_fpo_flicker_detection;
+ bool disable_hbr_audio_dp2;
+ bool consolidated_dpia_dp_lt;
+ bool set_pipe_unlock_order;
+ bool enable_dpia_pre_training;
+ bool unify_link_enc_assignment;
+ struct spl_sharpness_range dcn_sharpness_range;
+ struct spl_sharpness_range dcn_override_sharpness_range;
};
enum visual_confirm {
@@ -430,9 +543,15 @@ enum visual_confirm {
VISUAL_CONFIRM_SWAPCHAIN = 6,
VISUAL_CONFIRM_FAMS = 7,
VISUAL_CONFIRM_SWIZZLE = 9,
+ VISUAL_CONFIRM_SMARTMUX_DGPU = 10,
VISUAL_CONFIRM_REPLAY = 12,
VISUAL_CONFIRM_SUBVP = 14,
VISUAL_CONFIRM_MCLK_SWITCH = 16,
+ VISUAL_CONFIRM_FAMS2 = 19,
+ VISUAL_CONFIRM_HW_CURSOR = 20,
+ VISUAL_CONFIRM_VABC = 21,
+ VISUAL_CONFIRM_DCC = 22,
+ VISUAL_CONFIRM_EXPLICIT = 0x80000000,
};
enum dc_psr_power_opts {
@@ -448,12 +567,25 @@ enum dml_hostvm_override_opts {
DML_HOSTVM_OVERRIDE_TRUE = 0x2,
};
+enum dc_replay_power_opts {
+ replay_power_opt_invalid = 0x0,
+ replay_power_opt_smu_opt_static_screen = 0x1,
+ replay_power_opt_z10_static_screen = 0x10,
+};
+
enum dcc_option {
DCC_ENABLE = 0,
DCC_DISABLE = 1,
DCC_HALF_REQ_DISALBE = 2,
};
+enum in_game_fams_config {
+ INGAME_FAMS_SINGLE_DISP_ENABLE, // enable in-game fams
+ INGAME_FAMS_DISABLE, // disable in-game fams
+ INGAME_FAMS_MULTI_DISP_ENABLE, //enable in-game fams for multi-display
+ INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY, //enable in-game fams for multi-display only for clamped RR strategies
+};
+
/**
* enum pipe_split_policy - Pipe split strategy supported by DCN
*
@@ -540,6 +672,7 @@ struct dc_clocks {
bool prev_p_state_change_support;
bool fclk_prev_p_state_change_support;
int num_ways;
+ int host_router_bw_kbps[MAX_HOST_ROUTERS_NUM];
/*
* @fw_based_mclk_switching
@@ -558,6 +691,19 @@ struct dc_clocks {
int max_supported_dispclk_khz;
int bw_dppclk_khz; /*a copy of dppclk_khz*/
int bw_dispclk_khz;
+ int idle_dramclk_khz;
+ int idle_fclk_khz;
+ int subvp_prefetch_dramclk_khz;
+ int subvp_prefetch_fclk_khz;
+
+ /* Stutter efficiency is technically not clock values
+ * but stored here so the values are part of the update_clocks call similar to num_ways
+ * Efficiencies are stored as percentage (0-100)
+ */
+ struct {
+ uint8_t base_efficiency; //LP1
+ uint8_t low_power_efficiency; //LP2
+ } stutter_efficiency;
};
struct dc_bw_validation_profile {
@@ -643,6 +789,57 @@ union root_clock_optimization_options {
uint32_t u32All;
};
+union fine_grain_clock_gating_enable_options {
+ struct {
+ bool dccg_global_fgcg_rep : 1; /* Global fine grain clock gating of repeaters */
+ bool dchub : 1; /* Display controller hub */
+ bool dchubbub : 1;
+ bool dpp : 1; /* Display pipes and planes */
+ bool opp : 1; /* Output pixel processing */
+ bool optc : 1; /* Output pipe timing combiner */
+ bool dio : 1; /* Display output */
+ bool dwb : 1; /* Display writeback */
+ bool mmhubbub : 1; /* Multimedia hub */
+ bool dmu : 1; /* Display core management unit */
+ bool az : 1; /* Azalia */
+ bool dchvm : 1;
+ bool dsc : 1; /* Display stream compression */
+
+ uint32_t reserved : 19;
+ } bits;
+ uint32_t u32All;
+};
+
+enum pg_hw_pipe_resources {
+ PG_HUBP = 0,
+ PG_DPP,
+ PG_DSC,
+ PG_MPCC,
+ PG_OPP,
+ PG_OPTC,
+ PG_DPSTREAM,
+ PG_HDMISTREAM,
+ PG_PHYSYMCLK,
+ PG_HW_PIPE_RESOURCES_NUM_ELEMENT
+};
+
+enum pg_hw_resources {
+ PG_DCCG = 0,
+ PG_DCIO,
+ PG_DIO,
+ PG_DCHUBBUB,
+ PG_DCHVM,
+ PG_DWB,
+ PG_HPO,
+ PG_DCOH,
+ PG_HW_RESOURCES_NUM_ELEMENT
+};
+
+struct pg_block_update {
+ bool pg_pipe_res_update[PG_HW_PIPE_RESOURCES_NUM_ELEMENT][MAX_PIPES];
+ bool pg_res_update[PG_HW_RESOURCES_NUM_ELEMENT];
+};
+
union dpia_debug_options {
struct {
uint32_t disable_dpia:1; /* bit 0 */
@@ -650,7 +847,10 @@ union dpia_debug_options {
uint32_t extend_aux_rd_interval:1; /* bit 2 */
uint32_t disable_mst_dsc_work_around:1; /* bit 3 */
uint32_t enable_force_tbt3_work_around:1; /* bit 4 */
- uint32_t reserved:27;
+ uint32_t disable_usb4_pm_support:1; /* bit 5 */
+ uint32_t enable_usb4_bw_zero_alloc_patch:1; /* bit 6 */
+ uint32_t enable_bw_allocation_mode:1; /* bit 7 */
+ uint32_t reserved:24;
} bits;
uint32_t raw;
};
@@ -748,7 +948,6 @@ struct dc_debug_options {
bool sanity_checks;
bool max_disp_clk;
bool surface_trace;
- bool timing_trace;
bool clock_trace;
bool validation_trace;
bool bandwidth_calcs_trace;
@@ -768,10 +967,18 @@ struct dc_debug_options {
bool voltage_align_fclk;
bool disable_min_fclk;
+ bool hdcp_lc_force_fw_enable;
+ bool hdcp_lc_enable_sw_fallback;
+
bool disable_dfs_bypass;
bool disable_dpp_power_gate;
bool disable_hubp_power_gate;
bool disable_dsc_power_gate;
+ bool disable_optc_power_gate;
+ bool disable_hpo_power_gate;
+ bool disable_io_clk_power_gate;
+ bool disable_mem_power_gate;
+ bool disable_dio_power_gate;
int dsc_min_slice_height_override;
int dsc_bpp_increment_div;
bool disable_pplib_wm_range;
@@ -812,9 +1019,11 @@ struct dc_debug_options {
unsigned int seamless_boot_odm_combine;
unsigned int force_odm_combine_4to1; //bit vector based on otg inst
int minimum_z8_residency_time;
+ int minimum_z10_residency_time;
bool disable_z9_mpc;
unsigned int force_fclk_khz;
bool enable_tri_buf;
+ bool ips_disallow_entry;
bool dmub_offload_enabled;
bool dmcub_emulation;
bool disable_idle_power_optimizations;
@@ -847,13 +1056,14 @@ struct dc_debug_options {
bool ignore_cable_id;
union mem_low_power_enable_options enable_mem_low_power;
union root_clock_optimization_options root_clock_optimization;
+ union fine_grain_clock_gating_enable_options enable_fine_grain_clock_gating;
bool hpo_optimization;
bool force_vblank_alignment;
/* Enable dmub aux for legacy ddc */
bool enable_dmub_aux_for_legacy_ddc;
bool disable_fams;
- bool disable_fams_gaming;
+ enum in_game_fams_config disable_fams_gaming;
/* FEC/PSR1 sequence enable delay in 100us */
uint8_t fec_enable_delay_in100us;
bool enable_driver_sequence_debug;
@@ -862,6 +1072,7 @@ struct dc_debug_options {
bool disable_z10;
bool enable_z9_disable_interface;
bool psr_skip_crtc_disable;
+ uint32_t ips_skip_crtc_disable_mask;
union dpia_debug_options dpia_debug;
bool disable_fixed_vs_aux_timeout_wa;
uint32_t fixed_vs_aux_delay_config_wa;
@@ -872,6 +1083,7 @@ struct dc_debug_options {
unsigned int force_mall_ss_num_ways;
bool alloc_extra_way_for_cursor;
uint32_t subvp_extra_lines;
+ bool disable_force_pstate_allow_on_hw_release;
bool force_usr_allow;
/* uses value at boot and disables switch */
bool disable_dtb_ref_clk_switch;
@@ -883,16 +1095,19 @@ struct dc_debug_options {
bool dml_disallow_alternate_prefetch_modes;
bool use_legacy_soc_bb_mechanism;
bool exit_idle_opt_for_cursor_updates;
+ bool using_dml2;
bool enable_single_display_2to1_odm_policy;
bool enable_double_buffered_dsc_pg_support;
bool enable_dp_dig_pixel_rate_div_policy;
+ bool using_dml21;
enum lttpr_mode lttpr_mode_override;
unsigned int dsc_delay_factor_wa_x1000;
unsigned int min_prefetch_in_strobe_ns;
bool disable_unbounded_requesting;
bool dig_fifo_off_in_blank;
- bool temp_mst_deallocation_sequence;
bool override_dispclk_programming;
+ bool otg_crc_db;
+ bool disallow_dispclk_dppclk_ds;
bool disable_fpo_optimizations;
bool support_eDP1_5;
uint32_t fpo_vactive_margin_us;
@@ -904,12 +1119,53 @@ struct dc_debug_options {
bool disable_dp_plus_plus_wa;
uint32_t fpo_vactive_min_active_margin_us;
uint32_t fpo_vactive_max_blank_us;
+ bool enable_hpo_pg_support;
bool enable_legacy_fast_update;
bool disable_dc_mode_overwrite;
bool replay_skip_crtc_disabled;
+ bool ignore_pg;/*do nothing, let pmfw control it*/
+ bool psp_disabled_wa;
+ unsigned int ips2_eval_delay_us;
+ unsigned int ips2_entry_delay_us;
+ bool optimize_ips_handshake;
+ bool disable_dmub_reallow_idle;
+ bool disable_timeout;
+ bool disable_extblankadj;
+ bool enable_idle_reg_checks;
+ unsigned int static_screen_wait_frames;
+ uint32_t pwm_freq;
+ bool force_chroma_subsampling_1tap;
+ unsigned int dcc_meta_propagation_delay_us;
+ bool disable_422_left_edge_pixel;
+ bool dml21_force_pstate_method;
+ uint32_t dml21_force_pstate_method_values[MAX_PIPES];
+ uint32_t dml21_disable_pstate_method_mask;
+ union fw_assisted_mclk_switch_version fams_version;
+ union dmub_fams2_global_feature_config fams2_config;
+ unsigned int force_cositing;
+ unsigned int disable_spl;
+ unsigned int force_easf;
+ unsigned int force_sharpness;
+ unsigned int force_sharpness_level;
+ unsigned int force_lls;
+ bool notify_dpia_hr_bw;
+ bool enable_ips_visual_confirm;
+ unsigned int sharpen_policy;
+ unsigned int scale_to_sharpness_policy;
+ bool skip_full_updated_if_possible;
+ unsigned int enable_oled_edp_power_up_opt;
+ bool enable_hblank_borrow;
+ bool force_subvp_df_throttle;
+ uint32_t acpi_transition_bitmasks[MAX_PIPES];
+ bool enable_pg_cntl_debug_logs;
+ unsigned int auxless_alpm_lfps_setup_ns;
+ unsigned int auxless_alpm_lfps_period_ns;
+ unsigned int auxless_alpm_lfps_silence_ns;
+ unsigned int auxless_alpm_lfps_t1t2_us;
+ short auxless_alpm_lfps_t1t2_offset_us;
+ bool disable_stutter_for_wm_program;
};
-struct gpu_info_soc_bounding_box_v1_0;
/* Generic structure that can be used to query properties of DC. More fields
* can be added as required.
@@ -918,72 +1174,6 @@ struct dc_current_properties {
unsigned int cursor_size_limit;
};
-struct dc {
- struct dc_debug_options debug;
- struct dc_versions versions;
- struct dc_caps caps;
- struct dc_cap_funcs cap_funcs;
- struct dc_config config;
- struct dc_bounding_box_overrides bb_overrides;
- struct dc_bug_wa work_arounds;
- struct dc_context *ctx;
- struct dc_phy_addr_space_config vm_pa_config;
-
- uint8_t link_count;
- struct dc_link *links[MAX_PIPES * 2];
- struct link_service *link_srv;
-
- struct dc_state *current_state;
- struct resource_pool *res_pool;
-
- struct clk_mgr *clk_mgr;
-
- /* Display Engine Clock levels */
- struct dm_pp_clock_levels sclk_lvls;
-
- /* Inputs into BW and WM calculations. */
- struct bw_calcs_dceip *bw_dceip;
- struct bw_calcs_vbios *bw_vbios;
- struct dcn_soc_bounding_box *dcn_soc;
- struct dcn_ip_params *dcn_ip;
- struct display_mode_lib dml;
-
- /* HW functions */
- struct hw_sequencer_funcs hwss;
- struct dce_hwseq *hwseq;
-
- /* Require to optimize clocks and bandwidth for added/removed planes */
- bool optimized_required;
- bool wm_optimized_required;
- bool idle_optimizations_allowed;
- bool enable_c20_dtm_b0;
-
- /* Require to maintain clocks and bandwidth for UEFI enabled HW */
-
- /* FBC compressor */
- struct compressor *fbc_compressor;
-
- struct dc_debug_data debug_data;
- struct dpcd_vendor_signature vendor_signature;
-
- const char *build_id;
- struct vm_helper *vm_helper;
-
- uint32_t *dcn_reg_offsets;
- uint32_t *nbio_reg_offsets;
-
- /* Scratch memory */
- struct {
- struct {
- /*
- * For matching clock_limits table in driver with table
- * from PMFW.
- */
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
- } update_bw_bounding_box;
- } scratch;
-};
-
enum frame_buffer_mode {
FRAME_BUFFER_MODE_LOCAL_ONLY = 0,
FRAME_BUFFER_MODE_ZFB_ONLY,
@@ -999,6 +1189,8 @@ struct dchub_init_data {
bool dchub_info_valid;
};
+struct dml2_soc_bb;
+
struct dc_init_data {
struct hw_asic_id asic_id;
void *driver; /* ctx */
@@ -1030,6 +1222,8 @@ struct dc_init_data {
*/
uint32_t *dcn_reg_offsets;
uint32_t *nbio_reg_offsets;
+ uint32_t *clk_reg_offsets;
+ void *bb_from_dmub;
};
struct dc_callback_init {
@@ -1130,6 +1324,38 @@ union dc_3dlut_state {
};
+#define MATRIX_9C__DIM_128_ALIGNED_LEN 16 // 9+8 : 9 * 8 + 7 * 8 = 72 + 56 = 128 % 128 = 0
+#define MATRIX_17C__DIM_128_ALIGNED_LEN 32 //17+15: 17 * 8 + 15 * 8 = 136 + 120 = 256 % 128 = 0
+#define MATRIX_33C__DIM_128_ALIGNED_LEN 64 //17+47: 17 * 8 + 47 * 8 = 136 + 376 = 512 % 128 = 0
+
+struct lut_rgb {
+ uint16_t b;
+ uint16_t g;
+ uint16_t r;
+ uint16_t padding;
+};
+
+//this structure maps directly to how the lut will read it from memory
+struct lut_mem_mapping {
+ union {
+ //NATIVE MODE 1, 2
+ //RGB layout [b][g][r] //red is 128 byte aligned
+ //BGR layout [r][g][b] //blue is 128 byte aligned
+ struct lut_rgb rgb_17c[17][17][MATRIX_17C__DIM_128_ALIGNED_LEN];
+ struct lut_rgb rgb_33c[33][33][MATRIX_33C__DIM_128_ALIGNED_LEN];
+
+ //TRANSFORMED
+ uint16_t linear_rgb[(33*33*33*4/128+1)*128];
+ };
+ uint16_t size;
+};
+
+struct dc_rmcm_3dlut {
+ bool isInUse;
+ const struct dc_stream_state *stream;
+ uint8_t protection_bits;
+};
+
struct dc_3dlut {
struct kref refcount;
struct tetrahedral_params lut_3d;
@@ -1166,7 +1392,6 @@ union surface_update_flags {
uint32_t in_transfer_func_change:1;
uint32_t input_csc_change:1;
uint32_t coeff_reduction_change:1;
- uint32_t output_tf_change:1;
uint32_t pixel_format_change:1;
uint32_t plane_size_change:1;
uint32_t gamut_remap_change:1;
@@ -1180,12 +1405,16 @@ union surface_update_flags {
uint32_t stereo_format_change:1;
uint32_t lut_3d:1;
uint32_t tmz_changed:1;
+ uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */
uint32_t full_update:1;
+ uint32_t sdr_white_level_nits:1;
} bits;
uint32_t raw;
};
+#define DC_REMOVE_PLANE_POINTERS 1
+
struct dc_plane_state {
struct dc_plane_address address;
struct dc_plane_flip_time time;
@@ -1196,13 +1425,13 @@ struct dc_plane_state {
struct rect clip_rect;
struct plane_size plane_size;
- union dc_tiling_info tiling_info;
+ struct dc_tiling_info tiling_info;
struct dc_plane_dcc_param dcc;
- struct dc_gamma *gamma_correction;
- struct dc_transfer_func *in_transfer_func;
- struct dc_bias_and_scale *bias_and_scale;
+ struct dc_gamma gamma_correction;
+ struct dc_transfer_func in_transfer_func;
+ struct dc_bias_and_scale bias_and_scale;
struct dc_csc_transform input_csc_color_matrix;
struct fixed31_32 coeff_reduction_factor;
struct fixed31_32 hdr_mult;
@@ -1213,9 +1442,9 @@ struct dc_plane_state {
enum dc_color_space color_space;
- struct dc_3dlut *lut3d_func;
- struct dc_transfer_func *in_shaper_func;
- struct dc_transfer_func *blend_tf;
+ struct dc_3dlut lut3d_func;
+ struct dc_transfer_func in_shaper_func;
+ struct dc_transfer_func blend_tf;
struct dc_transfer_func *gamcor_tf;
enum surface_pixel_format format;
@@ -1251,11 +1480,25 @@ struct dc_plane_state {
struct tg_color visual_confirm_color;
bool is_statically_allocated;
+ enum chroma_cositing cositing;
+ enum dc_cm2_shaper_3dlut_setting mcm_shaper_3dlut_setting;
+ bool mcm_lut1d_enable;
+ struct dc_cm2_func_luts mcm_luts;
+ bool lut_bank_a;
+ enum mpcc_movable_cm_location mcm_location;
+ struct dc_csc_transform cursor_csc_color_matrix;
+ bool adaptive_sharpness_en;
+ int adaptive_sharpness_policy;
+ int sharpness_level;
+ enum linear_light_scaling linear_light_scaling;
+ unsigned int sdr_white_level_nits;
+ struct spl_sharpness_range sharpness_range;
+ enum sharpness_range_source sharpness_source;
};
struct dc_plane_info {
struct plane_size plane_size;
- union dc_tiling_info tiling_info;
+ struct dc_tiling_info tiling_info;
struct dc_plane_dcc_param dcc;
enum surface_pixel_format format;
enum dc_rotation_angle rotation;
@@ -1269,174 +1512,29 @@ struct dc_plane_info {
int global_alpha_value;
bool input_csc_enabled;
int layer_index;
+ enum chroma_cositing cositing;
};
-struct dc_scaling_info {
- struct rect src_rect;
- struct rect dst_rect;
- struct rect clip_rect;
- struct scaling_taps scaling_quality;
-};
-
-struct dc_fast_update {
- const struct dc_flip_addrs *flip_addr;
- const struct dc_gamma *gamma;
- const struct colorspace_transform *gamut_remap_matrix;
- const struct dc_csc_transform *input_csc_color_matrix;
- const struct fixed31_32 *coeff_reduction_factor;
- struct dc_transfer_func *out_transfer_func;
- struct dc_csc_transform *output_csc_transform;
-};
-
-struct dc_surface_update {
- struct dc_plane_state *surface;
-
- /* isr safe update parameters. null means no updates */
- const struct dc_flip_addrs *flip_addr;
- const struct dc_plane_info *plane_info;
- const struct dc_scaling_info *scaling_info;
- struct fixed31_32 hdr_mult;
- /* following updates require alloc/sleep/spin that is not isr safe,
- * null means no updates
- */
- const struct dc_gamma *gamma;
- const struct dc_transfer_func *in_transfer_func;
-
- const struct dc_csc_transform *input_csc_color_matrix;
- const struct fixed31_32 *coeff_reduction_factor;
- const struct dc_transfer_func *func_shaper;
- const struct dc_3dlut *lut3d_func;
- const struct dc_transfer_func *blend_tf;
- const struct colorspace_transform *gamut_remap_matrix;
-};
-
-/*
- * Create a new surface with default parameters;
- */
-struct dc_plane_state *dc_create_plane_state(struct dc *dc);
-const struct dc_plane_status *dc_plane_get_status(
- const struct dc_plane_state *plane_state);
-
-void dc_plane_state_retain(struct dc_plane_state *plane_state);
-void dc_plane_state_release(struct dc_plane_state *plane_state);
-
-void dc_gamma_retain(struct dc_gamma *dc_gamma);
-void dc_gamma_release(struct dc_gamma **dc_gamma);
-struct dc_gamma *dc_create_gamma(void);
-
-void dc_transfer_func_retain(struct dc_transfer_func *dc_tf);
-void dc_transfer_func_release(struct dc_transfer_func *dc_tf);
-struct dc_transfer_func *dc_create_transfer_func(void);
-
-struct dc_3dlut *dc_create_3dlut_func(void);
-void dc_3dlut_func_release(struct dc_3dlut *lut);
-void dc_3dlut_func_retain(struct dc_3dlut *lut);
-
-void dc_post_update_surfaces_to_stream(
- struct dc *dc);
-
#include "dc_stream.h"
-/**
- * struct dc_validation_set - Struct to store surface/stream associations for validation
- */
-struct dc_validation_set {
- /**
- * @stream: Stream state properties
+struct dc_scratch_space {
+ /* used to temporarily backup plane states of a stream during
+ * dc update. The reason is that plane states are overwritten
+ * with surface updates in dc update. Once they are overwritten
+ * current state is no longer valid. We want to temporarily
+ * store current value in plane states so we can still recover
+ * a valid current state during dc update.
*/
- struct dc_stream_state *stream;
+ struct dc_plane_state plane_states[MAX_SURFACES];
- /**
- * @plane_states: Surface state
- */
- struct dc_plane_state *plane_states[MAX_SURFACES];
-
- /**
- * @plane_count: Total of active planes
- */
- uint8_t plane_count;
+ struct dc_stream_state stream_state;
};
-bool dc_validate_boot_timing(const struct dc *dc,
- const struct dc_sink *sink,
- struct dc_crtc_timing *crtc_timing);
-
-enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *plane_state);
-
-void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info);
-
-enum dc_status dc_validate_with_context(struct dc *dc,
- const struct dc_validation_set set[],
- int set_count,
- struct dc_state *context,
- bool fast_validate);
-
-bool dc_set_generic_gpio_for_stereo(bool enable,
- struct gpio_service *gpio_service);
-
-/*
- * fast_validate: we return after determining if we can support the new state,
- * but before we populate the programming info
- */
-enum dc_status dc_validate_global_state(
- struct dc *dc,
- struct dc_state *new_ctx,
- bool fast_validate);
-
-
-void dc_resource_state_construct(
- const struct dc *dc,
- struct dc_state *dst_ctx);
-
-bool dc_acquire_release_mpc_3dlut(
- struct dc *dc, bool acquire,
- struct dc_stream_state *stream,
- struct dc_3dlut **lut,
- struct dc_transfer_func **shaper);
-
-void dc_resource_state_copy_construct(
- const struct dc_state *src_ctx,
- struct dc_state *dst_ctx);
-
-void dc_resource_state_copy_construct_current(
- const struct dc *dc,
- struct dc_state *dst_ctx);
-
-void dc_resource_state_destruct(struct dc_state *context);
-
-bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
-
-enum dc_status dc_commit_streams(struct dc *dc,
- struct dc_stream_state *streams[],
- uint8_t stream_count);
-
-struct dc_state *dc_create_state(struct dc *dc);
-struct dc_state *dc_copy_state(struct dc_state *src_ctx);
-void dc_retain_state(struct dc_state *context);
-void dc_release_state(struct dc_state *context);
-
-struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc,
- struct dc_stream_state *stream,
- int mpcc_inst);
-
-
-uint32_t dc_get_opp_for_plane(struct dc *dc, struct dc_plane_state *plane);
-
-void dc_set_disable_128b_132b_stream_overhead(bool disable);
-
-/* The function returns minimum bandwidth required to drive a given timing
- * return - minimum required timing bandwidth in kbps.
- */
-uint32_t dc_bandwidth_in_kbps_from_timing(
- const struct dc_crtc_timing *timing,
- const enum dc_link_encoding_format link_encoding);
-
-/* Link Interfaces */
/*
* A link contains one or more sinks and their connected status.
* The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
*/
-struct dc_link {
+ struct dc_link {
struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK];
unsigned int sink_count;
struct dc_sink *local_sink;
@@ -1445,6 +1543,7 @@ struct dc_link {
enum signal_type connector_signal;
enum dc_irq_source irq_source_hpd;
enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */
+ enum dc_irq_source irq_source_read_request;/* Read Request */
bool is_hpd_filter_disabled;
bool dp_ss_off;
@@ -1465,7 +1564,13 @@ struct dc_link {
bool is_dig_mapping_flexible;
bool hpd_status; /* HPD status of link without physical HPD pin. */
bool is_hpd_pending; /* Indicates a new received hpd */
- bool is_automated; /* Indicates automated testing */
+
+ /* USB4 DPIA links skip verifying link cap, instead performing the fallback method
+ * for every link training. This is incompatible with DP LL compliance automation,
+ * which expects the same link settings to be used every retry on a link loss.
+ * This flag is used to skip the fallback when link loss occurs during automation.
+ */
+ bool skip_fallback_on_link_loss;
bool edp_sink_present;
@@ -1499,7 +1604,19 @@ struct dc_link {
enum engine_id dpia_preferred_eng_id;
bool test_pattern_enabled;
+ /* Pending/Current test pattern are only used to perform and track
+ * FIXED_VS retimer test pattern/lane adjustment override state.
+ * Pending allows link HWSS to differentiate PHY vs non-PHY pattern,
+ * to perform specific lane adjust overrides before setting certain
+ * PHY test patterns. In cases when lane adjust and set test pattern
+ * calls are not performed atomically (i.e. performing link training),
+ * pending_test_pattern will be invalid or contain a non-PHY test pattern
+ * and current_test_pattern will contain required context for any future
+ * set pattern/set lane adjust to transition between override state(s).
+ * */
enum dp_test_pattern current_test_pattern;
+ enum dp_test_pattern pending_test_pattern;
+
union compliance_test_state compliance_test_state;
void *priv;
@@ -1533,9 +1650,7 @@ struct dc_link {
enum edp_revision edp_revision;
union dpcd_sink_ext_caps dpcd_sink_ext_caps;
- struct backlight_settings backlight_settings;
struct psr_settings psr_settings;
-
struct replay_settings replay_settings;
/* Drive settings read from integrated info table */
@@ -1559,7 +1674,10 @@ struct dc_link {
bool dongle_mode_timing_override;
bool blank_stream_on_ocs_change;
bool read_dpcd204h_on_irq_hpd;
+ bool force_dp_ffe_preset;
+ bool skip_phy_ssc_reduction;
} wa_flags;
+ union dc_dp_ffe_preset forced_dp_ffe_preset;
struct link_mst_stream_allocation_table mst_stream_alloc_table;
struct dc_link_status link_status;
@@ -1567,14 +1685,268 @@ struct dc_link {
struct gpio *hpd_gpio;
enum dc_link_fec_state fec_state;
+ bool is_dds;
+ bool is_display_mux_present;
bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly
struct dc_panel_config panel_config;
struct phy_state phy_state;
+ uint32_t phy_transition_bitmask;
// BW ALLOCATON USB4 ONLY
struct dc_dpia_bw_alloc dpia_bw_alloc_config;
+ bool skip_implict_edp_power_control;
+ enum backlight_control_type backlight_control_type;
+};
+
+struct dc {
+ struct dc_debug_options debug;
+ struct dc_versions versions;
+ struct dc_caps caps;
+ struct dc_cap_funcs cap_funcs;
+ struct dc_config config;
+ struct dc_bounding_box_overrides bb_overrides;
+ struct dc_bug_wa work_arounds;
+ struct dc_context *ctx;
+ struct dc_phy_addr_space_config vm_pa_config;
+
+ uint8_t link_count;
+ struct dc_link *links[MAX_LINKS];
+ uint8_t lowest_dpia_link_index;
+ struct link_service *link_srv;
+
+ struct dc_state *current_state;
+ struct resource_pool *res_pool;
+
+ struct clk_mgr *clk_mgr;
+
+ /* Display Engine Clock levels */
+ struct dm_pp_clock_levels sclk_lvls;
+
+ /* Inputs into BW and WM calculations. */
+ struct bw_calcs_dceip *bw_dceip;
+ struct bw_calcs_vbios *bw_vbios;
+ struct dcn_soc_bounding_box *dcn_soc;
+ struct dcn_ip_params *dcn_ip;
+ struct display_mode_lib dml;
+
+ /* HW functions */
+ struct hw_sequencer_funcs hwss;
+ struct dce_hwseq *hwseq;
+
+ /* Require to optimize clocks and bandwidth for added/removed planes */
+ bool optimized_required;
+ bool idle_optimizations_allowed;
+ bool enable_c20_dtm_b0;
+
+ /* Require to maintain clocks and bandwidth for UEFI enabled HW */
+
+ /* For eDP to know the switching state of SmartMux */
+ bool is_switch_in_progress_orig;
+ bool is_switch_in_progress_dest;
+
+ /* FBC compressor */
+ struct compressor *fbc_compressor;
+
+ struct dc_debug_data debug_data;
+ struct dpcd_vendor_signature vendor_signature;
+
+ const char *build_id;
+ struct vm_helper *vm_helper;
+
+ uint32_t *dcn_reg_offsets;
+ uint32_t *nbio_reg_offsets;
+ uint32_t *clk_reg_offsets;
+
+ /* Scratch memory */
+ struct {
+ struct {
+ /*
+ * For matching clock_limits table in driver with table
+ * from PMFW.
+ */
+ struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ } update_bw_bounding_box;
+ struct dc_scratch_space current_state;
+ struct dc_scratch_space new_state;
+ struct dc_stream_state temp_stream; // Used so we don't need to allocate stream on the stack
+ struct dc_link temp_link;
+ bool pipes_to_unlock_first[MAX_PIPES]; /* Any of the pipes indicated here should be unlocked first */
+ } scratch;
+
+ struct dml2_configuration_options dml2_options;
+ struct dml2_configuration_options dml2_dc_power_options;
+ enum dc_acpi_cm_power_state power_state;
+ struct soc_and_ip_translator *soc_and_ip_translator;
+};
+
+struct dc_scaling_info {
+ struct rect src_rect;
+ struct rect dst_rect;
+ struct rect clip_rect;
+ struct scaling_taps scaling_quality;
+};
+
+struct dc_fast_update {
+ const struct dc_flip_addrs *flip_addr;
+ const struct dc_gamma *gamma;
+ const struct colorspace_transform *gamut_remap_matrix;
+ const struct dc_csc_transform *input_csc_color_matrix;
+ const struct fixed31_32 *coeff_reduction_factor;
+ struct dc_transfer_func *out_transfer_func;
+ struct dc_csc_transform *output_csc_transform;
+ const struct dc_csc_transform *cursor_csc_color_matrix;
+};
+
+struct dc_surface_update {
+ struct dc_plane_state *surface;
+
+ /* isr safe update parameters. null means no updates */
+ const struct dc_flip_addrs *flip_addr;
+ const struct dc_plane_info *plane_info;
+ const struct dc_scaling_info *scaling_info;
+ struct fixed31_32 hdr_mult;
+ /* following updates require alloc/sleep/spin that is not isr safe,
+ * null means no updates
+ */
+ const struct dc_gamma *gamma;
+ const struct dc_transfer_func *in_transfer_func;
+
+ const struct dc_csc_transform *input_csc_color_matrix;
+ const struct fixed31_32 *coeff_reduction_factor;
+ const struct dc_transfer_func *func_shaper;
+ const struct dc_3dlut *lut3d_func;
+ const struct dc_transfer_func *blend_tf;
+ const struct colorspace_transform *gamut_remap_matrix;
+ /*
+ * Color Transformations for pre-blend MCM (Shaper, 3DLUT, 1DLUT)
+ *
+ * change cm2_params.component_settings: Full update
+ * change cm2_params.cm2_luts: Fast update
+ */
+ const struct dc_cm2_parameters *cm2_params;
+ const struct dc_csc_transform *cursor_csc_color_matrix;
+ unsigned int sdr_white_level_nits;
+ struct dc_bias_and_scale bias_and_scale;
+};
+
+struct dc_underflow_debug_data {
+ uint32_t otg_inst;
+ uint32_t otg_underflow;
+ uint32_t h_position;
+ uint32_t v_position;
+ uint32_t otg_frame_count;
+ struct dc_underflow_per_hubp_debug_data {
+ uint32_t hubp_underflow;
+ uint32_t hubp_in_blank;
+ uint32_t hubp_readline;
+ uint32_t det_config_error;
+ } hubps[MAX_PIPES];
+ uint32_t curr_det_sizes[MAX_PIPES];
+ uint32_t target_det_sizes[MAX_PIPES];
+ uint32_t compbuf_config_error;
+};
+
+/*
+ * Create a new surface with default parameters;
+ */
+void dc_gamma_retain(struct dc_gamma *dc_gamma);
+void dc_gamma_release(struct dc_gamma **dc_gamma);
+struct dc_gamma *dc_create_gamma(void);
+
+void dc_transfer_func_retain(struct dc_transfer_func *dc_tf);
+void dc_transfer_func_release(struct dc_transfer_func *dc_tf);
+struct dc_transfer_func *dc_create_transfer_func(void);
+
+struct dc_3dlut *dc_create_3dlut_func(void);
+void dc_3dlut_func_release(struct dc_3dlut *lut);
+void dc_3dlut_func_retain(struct dc_3dlut *lut);
+
+void dc_post_update_surfaces_to_stream(
+ struct dc *dc);
+
+/**
+ * struct dc_validation_set - Struct to store surface/stream associations for validation
+ */
+struct dc_validation_set {
+ /**
+ * @stream: Stream state properties
+ */
+ struct dc_stream_state *stream;
+
+ /**
+ * @plane_states: Surface state
+ */
+ struct dc_plane_state *plane_states[MAX_SURFACES];
+
+ /**
+ * @plane_count: Total of active planes
+ */
+ uint8_t plane_count;
};
+bool dc_validate_boot_timing(const struct dc *dc,
+ const struct dc_sink *sink,
+ struct dc_crtc_timing *crtc_timing);
+
+enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *plane_state);
+
+enum dc_status dc_validate_with_context(struct dc *dc,
+ const struct dc_validation_set set[],
+ int set_count,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode);
+
+bool dc_set_generic_gpio_for_stereo(bool enable,
+ struct gpio_service *gpio_service);
+
+enum dc_status dc_validate_global_state(
+ struct dc *dc,
+ struct dc_state *new_ctx,
+ enum dc_validate_mode validate_mode);
+
+bool dc_acquire_release_mpc_3dlut(
+ struct dc *dc, bool acquire,
+ struct dc_stream_state *stream,
+ struct dc_3dlut **lut,
+ struct dc_transfer_func **shaper);
+
+bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
+void get_audio_check(struct audio_info *aud_modes,
+ struct audio_check *aud_chk);
+
+bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count);
+void populate_fast_updates(struct dc_fast_update *fast_update,
+ struct dc_surface_update *srf_updates,
+ int surface_count,
+ struct dc_stream_update *stream_update);
+/*
+ * Set up streams and links associated to drive sinks
+ * The streams parameter is an absolute set of all active streams.
+ *
+ * After this call:
+ * Phy, Encoder, Timing Generator are programmed and enabled.
+ * New streams are enabled with blank stream; no memory read.
+ */
+enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params *params);
+
+
+struct dc_plane_state *dc_get_surface_for_mpcc(struct dc *dc,
+ struct dc_stream_state *stream,
+ int mpcc_inst);
+
+
+uint32_t dc_get_opp_for_plane(struct dc *dc, struct dc_plane_state *plane);
+
+void dc_set_disable_128b_132b_stream_overhead(bool disable);
+
+/* The function returns minimum bandwidth required to drive a given timing
+ * return - minimum required timing bandwidth in kbps.
+ */
+uint32_t dc_bandwidth_in_kbps_from_timing(
+ const struct dc_crtc_timing *timing,
+ const enum dc_link_encoding_format link_encoding);
+
+/* Link Interfaces */
/* Return an enumerated dc_link.
* dc_link order is constant and determined at
* boot time. They cannot be created or destroyed.
@@ -1592,6 +1964,9 @@ void dc_get_edp_links(const struct dc *dc,
struct dc_link **edp_links,
int *edp_num);
+void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link,
+ bool powerOn);
+
/* The function initiates detection handshake over the given link. It first
* determines if there are display connections over the link. If so it initiates
* detection protocols supported by the connected receiver device. The function
@@ -1713,6 +2088,9 @@ int dc_link_aux_transfer_raw(struct ddc_service *ddc,
struct aux_payload *payload,
enum aux_return_code_type *operation_result);
+struct ddc_service *
+dc_get_oem_i2c_device(struct dc *dc);
+
bool dc_is_oem_i2c_device_present(
struct dc *dc,
size_t slave_address
@@ -1793,6 +2171,24 @@ uint32_t dc_link_bandwidth_kbps(
const struct dc_link *link,
const struct dc_link_settings *link_setting);
+struct dp_audio_bandwidth_params {
+ const struct dc_crtc_timing *crtc_timing;
+ enum dp_link_encoding link_encoding;
+ uint32_t channel_count;
+ uint32_t sample_rate_hz;
+};
+
+/* The function calculates the minimum size of hblank (in bytes) needed to
+ * support the specified channel count and sample rate combination, given the
+ * link encoding and timing to be used. This calculation is not supported
+ * for 8b/10b SST.
+ *
+ * return - min hblank size in bytes, 0 if 8b/10b SST.
+ */
+uint32_t dc_link_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params);
+
/* The function takes a snapshot of current link resource allocation state
* @dc: pointer to dc of the dm calling this
* @map: a dc link resource snapshot defined internally to dc.
@@ -1987,8 +2383,7 @@ void dc_link_edp_panel_backlight_power_on(struct dc_link *link,
* and 16 bit fractional, where 1.0 is max backlight value.
*/
bool dc_link_set_backlight_level(const struct dc_link *dc_link,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
+ struct set_backlight_level_params *backlight_level_params);
/* Set/get nits-based backlight level of an embedded panel (eDP, LVDS). */
bool dc_link_set_backlight_level_nits(struct dc_link *link,
@@ -2013,6 +2408,20 @@ bool dc_link_setup_psr(struct dc_link *dc_link,
const struct dc_stream_state *stream, struct psr_config *psr_config,
struct psr_context *psr_context);
+/*
+ * Communicate with DMUB to allow or disallow Panel Replay on the specified link:
+ *
+ * @link: pointer to the dc_link struct instance
+ * @enable: enable(active) or disable(inactive) replay
+ * @wait: state transition need to wait the active set completed.
+ * @force_static: force disable(inactive) the replay
+ * @power_opts: set power optimazation parameters to DMUB.
+ *
+ * return: allow Replay active will return true, else will return false.
+ */
+bool dc_link_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
+ bool wait, bool force_static, const unsigned int *power_opts);
+
bool dc_link_get_replay_state(const struct dc_link *dc_link, uint64_t *state);
/* On eDP links this function call will stall until T12 has elapsed.
@@ -2077,19 +2486,6 @@ unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link);
void dc_link_set_usb4_req_bw_req(struct dc_link *link, int req_bw);
/*
- * Handle function for when the status of the Request above is complete.
- * We will find out the result of allocating on CM and update structs.
- *
- * @link: pointer to the dc_link struct instance
- * @bw: Allocated or Estimated BW depending on the result
- * @result: Response type
- *
- * return: none
- */
-void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link,
- uint8_t bw, uint8_t result);
-
-/*
* Handle the USB4 BW Allocation related functionality here:
* Plug => Try to allocate max bw from timing parameters supported by the sink
* Unplug => de-allocate bw
@@ -2097,23 +2493,23 @@ void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link,
* @link: pointer to the dc_link struct instance
* @peak_bw: Peak bw used by the link/sink
*
- * return: allocated bw else return 0
*/
-int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
+void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link(
struct dc_link *link, int peak_bw);
/*
- * Validate the BW of all the valid DPIA links to make sure it doesn't exceed
- * available BW for each host router
+ * Calculates the DP tunneling bandwidth required for the stream timing
+ * and aggregates the stream bandwidth for the respective DP tunneling link
*
- * @dc: pointer to dc struct
- * @stream: pointer to all possible streams
- * @num_streams: number of valid DPIA streams
- *
- * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE
+ * return: dc_status
*/
-bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams,
- const unsigned int count);
+enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, const struct dc_state *new_ctx);
+
+/*
+ * Get if ALPM is supported by the link
+ */
+void dc_link_get_alpm_support(struct dc_link *link, bool *auxless_support,
+ bool *auxwake_support);
/* Sink Interfaces - A sink corresponds to a display output device */
@@ -2133,14 +2529,19 @@ struct dc_sink_dsc_caps {
// 'true' if these are virtual DPCD's DSC caps (immediately upstream of sink in MST topology),
// 'false' if they are sink's DSC caps
bool is_virtual_dpcd_dsc;
-#if defined(CONFIG_DRM_AMD_DC_FP)
// 'true' if MST topology supports DSC passthrough for sink
// 'false' if MST topology does not support DSC passthrough
bool is_dsc_passthrough_supported;
-#endif
struct dsc_dec_dpcd_caps dsc_dec_caps;
};
+struct dc_sink_hblank_expansion_caps {
+ // 'true' if these are virtual DPCD's HBlank expansion caps (immediately upstream of sink in MST topology),
+ // 'false' if they are sink's HBlank expansion caps
+ bool is_virtual_dpcd_hblank_expansion;
+ struct hblank_expansion_dpcd_caps dpcd_caps;
+};
+
struct dc_sink_fec_caps {
bool is_rx_fec_supported;
bool is_topology_fec_supported;
@@ -2167,6 +2568,7 @@ struct dc_sink {
struct scdc_caps scdc_caps;
struct dc_sink_dsc_caps dsc_caps;
struct dc_sink_fec_caps fec_caps;
+ struct dc_sink_hblank_expansion_caps hblank_expansion_caps;
bool is_vsc_sdp_colorimetry_supported;
@@ -2235,10 +2637,18 @@ bool dc_is_dmcu_initialized(struct dc *dc);
enum dc_status dc_set_clock(struct dc *dc, enum dc_clock_type clock_type, uint32_t clk_khz, uint32_t stepping);
void dc_get_clock(struct dc *dc, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg);
-bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc, struct dc_plane_state *plane,
- struct dc_cursor_attributes *cursor_attr);
+bool dc_is_plane_eligible_for_idle_optimizations(struct dc *dc,
+ unsigned int pitch,
+ unsigned int height,
+ enum surface_pixel_format format,
+ struct dc_cursor_attributes *cursor_attr);
-void dc_allow_idle_optimizations(struct dc *dc, bool allow);
+#define dc_allow_idle_optimizations(dc, allow) dc_allow_idle_optimizations_internal(dc, allow, __func__)
+#define dc_exit_ips_for_hw_access(dc) dc_exit_ips_for_hw_access_internal(dc, __func__)
+
+void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, const char *caller_name);
+void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name);
+bool dc_dmub_is_ips_idle_state(struct dc *dc);
/* set min and max memory clock to lowest and highest DPM level, respectively */
void dc_unlock_memory_clock_frequency(struct dc *dc);
@@ -2256,6 +2666,11 @@ void dc_hardware_release(struct dc *dc);
void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc);
bool dc_set_psr_allow_active(struct dc *dc, bool enable);
+
+bool dc_set_replay_allow_active(struct dc *dc, bool active);
+
+bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips);
+
void dc_z10_restore(const struct dc *dc);
void dc_z10_save_init(struct dc *dc);
@@ -2287,6 +2702,8 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
uint8_t mst_alloc_slots,
uint8_t *mst_slots_in_use);
+void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps);
+
void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
uint32_t hpd_int_enable);
@@ -2294,13 +2711,44 @@ void dc_print_dmub_diagnostic_data(const struct dc *dc);
void dc_query_current_properties(struct dc *dc, struct dc_current_properties *properties);
+struct dc_power_profile {
+ int power_level; /* Lower is better */
+};
+
+struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state *context);
+
+unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context);
+
+bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index);
+
/* DSC Interfaces */
#include "dc_dsc.h"
+void dc_get_visual_confirm_for_stream(
+ struct dc *dc,
+ struct dc_stream_state *stream_state,
+ struct tg_color *color);
+
/* Disable acc mode Interfaces */
void dc_disable_accelerated_mode(struct dc *dc);
bool dc_is_timing_changed(struct dc_stream_state *cur_stream,
struct dc_stream_state *new_stream);
+bool dc_is_cursor_limit_pending(struct dc *dc);
+bool dc_can_clear_cursor_limit(struct dc *dc);
+
+/**
+ * dc_get_underflow_debug_data_for_otg() - Retrieve underflow debug data.
+ *
+ * @dc: Pointer to the display core context.
+ * @primary_otg_inst: Instance index of the primary OTG that underflowed.
+ * @out_data: Pointer to a dc_underflow_debug_data struct to be filled with debug information.
+ *
+ * This function collects and logs underflow-related HW states when underflow happens,
+ * including OTG underflow status, current read positions, frame count, and per-HUBP debug data.
+ * The results are stored in the provided out_data structure for further analysis or logging.
+ */
+void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, struct dc_underflow_debug_data *out_data);
+
#endif /* DC_INTERFACE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index be9aa1a71847..5fa5e2b63fb7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
@@ -140,7 +140,7 @@ struct dc_vbios_funcs {
enum bp_result (*enable_lvtma_control)(
struct dc_bios *bios,
uint8_t uc_pwr_on,
- uint8_t panel_instance,
+ uint8_t pwrseq_instance,
uint8_t bypass_panel_control_wait);
enum bp_result (*get_soc_bb_info)(
@@ -183,6 +183,7 @@ struct dc_bios {
struct dc_firmware_info fw_info;
bool fw_info_valid;
struct dc_vram_info vram_info;
+ struct bp_soc_bb_info bb_info;
struct dc_golden_table golden_table;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 4c5ef3ef8dbd..53a088ebddef 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -23,6 +23,7 @@
*
*/
+#include "dm_services.h"
#include "dc.h"
#include "dc_dmub_srv.h"
#include "../dmub/dmub_srv.h"
@@ -32,9 +33,13 @@
#include "../basics/conversion.h"
#include "cursor_reg_cache.h"
#include "resource.h"
+#include "clk_mgr.h"
+#include "dc_state_priv.h"
+#include "dc_plane_priv.h"
#define CTX dc_dmub_srv->ctx
#define DC_LOGGER CTX->logger
+#define GPINT_RETRY_NUM 20
static void dc_dmub_srv_construct(struct dc_dmub_srv *dc_srv, struct dc *dc,
struct dmub_srv *dmub)
@@ -66,65 +71,115 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv)
}
}
-void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv)
+bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv)
{
- struct dmub_srv *dmub = dc_dmub_srv->dmub;
- struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ struct dmub_srv *dmub;
+ struct dc_context *dc_ctx;
enum dmub_status status;
- status = dmub_srv_wait_for_idle(dmub, 100000);
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ dc_ctx = dc_dmub_srv->ctx;
+ dmub = dc_dmub_srv->dmub;
+
+ do {
+ status = dmub_srv_wait_for_pending(dmub, 100000);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
+
if (status != DMUB_STATUS_OK) {
DC_ERROR("Error waiting for DMUB idle: status=%d\n", status);
dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
}
+
+ return status == DMUB_STATUS_OK;
}
-void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv)
+void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv)
{
- struct dmub_srv *dmub = dmub_srv->dmub;
- struct dc_context *dc_ctx = dmub_srv->ctx;
+ struct dmub_srv *dmub = dc_dmub_srv->dmub;
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
enum dmub_status status = DMUB_STATUS_OK;
status = dmub_srv_clear_inbox0_ack(dmub);
if (status != DMUB_STATUS_OK) {
DC_ERROR("Error clearing INBOX0 ack: status=%d\n", status);
- dc_dmub_srv_log_diagnostic_data(dmub_srv);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
}
}
-void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv)
+void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv)
{
- struct dmub_srv *dmub = dmub_srv->dmub;
- struct dc_context *dc_ctx = dmub_srv->ctx;
+ struct dmub_srv *dmub = dc_dmub_srv->dmub;
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
enum dmub_status status = DMUB_STATUS_OK;
status = dmub_srv_wait_for_inbox0_ack(dmub, 100000);
if (status != DMUB_STATUS_OK) {
DC_ERROR("Error waiting for INBOX0 HW Lock Ack\n");
- dc_dmub_srv_log_diagnostic_data(dmub_srv);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
}
}
-void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
- union dmub_inbox0_data_register data)
+void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv,
+ union dmub_inbox0_data_register data)
{
- struct dmub_srv *dmub = dmub_srv->dmub;
- struct dc_context *dc_ctx = dmub_srv->ctx;
+ struct dmub_srv *dmub = dc_dmub_srv->dmub;
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
enum dmub_status status = DMUB_STATUS_OK;
status = dmub_srv_send_inbox0_cmd(dmub, data);
if (status != DMUB_STATUS_OK) {
DC_ERROR("Error sending INBOX0 cmd\n");
- dc_dmub_srv_log_diagnostic_data(dmub_srv);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
}
}
-bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list)
{
- return dc_dmub_srv_cmd_run_list(dc_dmub_srv, 1, cmd, wait_type);
+ struct dc_context *dc_ctx;
+ struct dmub_srv *dmub;
+ enum dmub_status status = DMUB_STATUS_OK;
+ int i;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ dc_ctx = dc_dmub_srv->ctx;
+ dmub = dc_dmub_srv->dmub;
+
+ for (i = 0 ; i < count; i++) {
+ /* confirm no messages pending */
+ do {
+ status = dmub_srv_wait_for_idle(dmub, 100000);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
+
+ /* queue command */
+ if (status == DMUB_STATUS_OK)
+ status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]);
+
+ /* check for errors */
+ if (status != DMUB_STATUS_OK) {
+ break;
+ }
+ }
+
+ if (status != DMUB_STATUS_OK) {
+ if (status != DMUB_STATUS_POWER_STATE_D3) {
+ DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
+ return false;
+ }
+
+ return true;
}
-bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type)
+static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list)
{
struct dc_context *dc_ctx;
struct dmub_srv *dmub;
@@ -139,49 +194,120 @@ bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int coun
for (i = 0 ; i < count; i++) {
// Queue command
- status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
+ if (!cmd_list[i].cmd_common.header.multi_cmd_pending ||
+ dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) {
+ status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]);
+ } else {
+ status = DMUB_STATUS_QUEUE_FULL;
+ }
if (status == DMUB_STATUS_QUEUE_FULL) {
/* Execute and wait for queue to become empty again. */
- dmub_srv_cmd_execute(dmub);
- dmub_srv_wait_for_idle(dmub, 100000);
+ status = dmub_srv_fb_cmd_execute(dmub);
+ if (status == DMUB_STATUS_POWER_STATE_D3)
+ return false;
+
+ do {
+ status = dmub_srv_wait_for_inbox_free(dmub, 100000, count - i);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
/* Requeue the command. */
- status = dmub_srv_cmd_queue(dmub, &cmd_list[i]);
+ status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]);
}
if (status != DMUB_STATUS_OK) {
- DC_ERROR("Error queueing DMUB command: status=%d\n", status);
- dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ if (status != DMUB_STATUS_POWER_STATE_D3) {
+ DC_ERROR("Error queueing DMUB command: status=%d\n", status);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
return false;
}
}
- status = dmub_srv_cmd_execute(dmub);
+ status = dmub_srv_fb_cmd_execute(dmub);
if (status != DMUB_STATUS_OK) {
- DC_ERROR("Error starting DMUB execution: status=%d\n", status);
- dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ if (status != DMUB_STATUS_POWER_STATE_D3) {
+ DC_ERROR("Error starting DMUB execution: status=%d\n", status);
+ dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
+ }
return false;
}
+ return true;
+}
+
+bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list)
+{
+ bool res = false;
+
+ if (dc_dmub_srv && dc_dmub_srv->dmub) {
+ if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) {
+ res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list);
+ } else {
+ res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list);
+ }
+
+ if (res)
+ res = dmub_srv_update_inbox_status(dc_dmub_srv->dmub) == DMUB_STATUS_OK;
+ }
+
+ return res;
+}
+
+bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
+ enum dm_dmub_wait_type wait_type,
+ union dmub_rb_cmd *cmd_list)
+{
+ struct dmub_srv *dmub;
+ enum dmub_status status;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ dmub = dc_dmub_srv->dmub;
+
// Wait for DMUB to process command
if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) {
- status = dmub_srv_wait_for_idle(dmub, 100000);
+ do {
+ status = dmub_srv_wait_for_idle(dmub, 100000);
+ } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK);
if (status != DMUB_STATUS_OK) {
DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status);
+ if (!dmub->debug.timeout_info.timeout_occured) {
+ dmub->debug.timeout_info.timeout_occured = true;
+ if (cmd_list)
+ dmub->debug.timeout_info.timeout_cmd = *cmd_list;
+ dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx);
+ }
dc_dmub_srv_log_diagnostic_data(dc_dmub_srv);
return false;
}
// Copy data back from ring buffer into command
- if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
- dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list);
+ if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) {
+ dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list);
+ }
}
return true;
}
+bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+ return dc_dmub_srv_cmd_run_list(dc_dmub_srv, 1, cmd, wait_type);
+}
+
+bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type)
+{
+ if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list))
+ return false;
+
+ return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list);
+}
+
bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv)
{
struct dmub_srv *dmub;
@@ -207,17 +333,11 @@ bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv)
bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
unsigned int stream_mask)
{
- struct dmub_srv *dmub;
- const uint32_t timeout = 30;
-
if (!dc_dmub_srv || !dc_dmub_srv->dmub)
return false;
- dmub = dc_dmub_srv->dmub;
-
- return dmub_srv_send_gpint_command(
- dmub, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
- stream_mask, timeout) == DMUB_STATUS_OK;
+ return dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK,
+ stream_mask, NULL, DM_DMUB_WAIT_TYPE_WAIT);
}
bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv)
@@ -266,7 +386,7 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal
cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
// Send the command to the DMCUB.
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
@@ -280,7 +400,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst)
cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header);
// Send the command to the DMCUB.
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream)
@@ -323,6 +443,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru
int ramp_up_num_steps = 1; // TODO: Ramp is currently disabled. Reenable it.
uint8_t visual_confirm_enabled;
int pipe_idx = 0;
+ struct dc_stream_status *stream_status = NULL;
if (dc == NULL)
return false;
@@ -346,7 +467,8 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru
* that does not use FAMS, we are in an FPO + VActive scenario.
* Assign vactive stretch margin in this case.
*/
- if (!pipe->stream->fpo_in_use) {
+ stream_status = dc_state_get_stream_status(context, pipe->stream);
+ if (stream_status && !stream_status->fpo_in_use) {
cmd.fw_assisted_mclk_switch.config_data.vactive_stretch_margin_us = dc->debug.fpo_vactive_margin_us;
break;
}
@@ -357,7 +479,11 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru
for (i = 0, k = 0; context && i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- if (resource_is_pipe_type(pipe, OTG_MASTER) && pipe->stream->fpo_in_use) {
+ if (!resource_is_pipe_type(pipe, OTG_MASTER))
+ continue;
+
+ stream_status = dc_state_get_stream_status(context, pipe->stream);
+ if (stream_status && stream_status->fpo_in_use) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
uint8_t min_refresh_in_hz = (pipe->stream->timing.min_refresh_in_uhz + 999999) / 1000000;
@@ -373,7 +499,7 @@ bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool should_manage_pstate, stru
sizeof(cmd.fw_assisted_mclk_switch) - sizeof(cmd.fw_assisted_mclk_switch.header);
// Send the command to the DMCUB.
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -394,7 +520,7 @@ void dc_dmub_srv_query_caps_cmd(struct dc_dmub_srv *dc_dmub_srv)
cmd.query_feature_caps.header.payload_bytes = sizeof(struct dmub_cmd_query_feature_caps_data);
/* If command was processed, copy feature caps to dmub srv */
- if (dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+ if (dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
cmd.query_feature_caps.header.ret_status == 0) {
memcpy(&dc_dmub_srv->dmub->feature_caps,
&cmd.query_feature_caps.query_feature_caps_data,
@@ -407,7 +533,9 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
union dmub_rb_cmd cmd = { 0 };
unsigned int panel_inst = 0;
- dc_get_edp_link_panel_inst(dc, pipe_ctx->stream->link, &panel_inst);
+ if (!dc_get_edp_link_panel_inst(dc, pipe_ctx->stream->link, &panel_inst) &&
+ dc->debug.visual_confirm == VISUAL_CONFIRM_DISABLE)
+ return;
memset(&cmd, 0, sizeof(cmd));
@@ -419,7 +547,7 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
cmd.visual_confirm_color.visual_confirm_color_data.visual_confirm_color.panel_inst = panel_inst;
// If command was processed, copy feature caps to dmub srv
- if (dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+ if (dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
cmd.visual_confirm_color.header.ret_status == 0) {
memcpy(&dc->ctx->dmub_srv->dmub->visual_confirm_color,
&cmd.visual_confirm_color.visual_confirm_color_data,
@@ -430,10 +558,11 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
/**
* populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command
*
- * @dc: [in] current dc state
+ * @dc: [in] pointer to dc object
* @subvp_pipe: [in] pipe_ctx for the SubVP pipe
* @vblank_pipe: [in] pipe_ctx for the DRR pipe
* @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ * @context: [in] DC state for access to phantom stream
*
* Populate the DMCUB SubVP command with DRR pipe info. All the information
* required for calculating the SubVP + DRR microschedule is populated here.
@@ -444,12 +573,14 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
* 3. Populate the drr_info with the min and max supported vtotal values
*/
static void populate_subvp_cmd_drr_info(struct dc *dc,
+ struct dc_state *context,
struct pipe_ctx *subvp_pipe,
struct pipe_ctx *vblank_pipe,
struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data)
{
+ struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
- struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ struct dc_crtc_timing *phantom_timing;
struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
uint16_t drr_frame_us = 0;
uint16_t min_drr_supported_us = 0;
@@ -463,6 +594,11 @@ static void populate_subvp_cmd_drr_info(struct dc *dc,
uint16_t min_vtotal_supported = 0;
uint16_t max_vtotal_supported = 0;
+ if (!phantom_stream)
+ return;
+
+ phantom_timing = &phantom_stream->timing;
+
pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true;
pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping
pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now
@@ -537,7 +673,7 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
continue;
// Find the SubVP pipe
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
break;
}
@@ -552,8 +688,9 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
pipe_data->pipe_config.vblank_data.vblank_end =
vblank_pipe->stream->timing.v_total - vblank_pipe->stream->timing.v_front_porch - vblank_pipe->stream->timing.v_addressable;
- if (vblank_pipe->stream->ignore_msa_timing_param)
- populate_subvp_cmd_drr_info(dc, pipe, vblank_pipe, pipe_data);
+ if (vblank_pipe->stream->ignore_msa_timing_param &&
+ (vblank_pipe->stream->allow_freesync || vblank_pipe->stream->vrr_active_variable || vblank_pipe->stream->vrr_active_fixed))
+ populate_subvp_cmd_drr_info(dc, context, pipe, vblank_pipe, pipe_data);
}
/**
@@ -578,10 +715,23 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
uint32_t subvp0_prefetch_us = 0;
uint32_t subvp1_prefetch_us = 0;
uint32_t prefetch_delta_us = 0;
- struct dc_crtc_timing *phantom_timing0 = &subvp_pipes[0]->stream->mall_stream_config.paired_stream->timing;
- struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
+ struct dc_stream_state *phantom_stream0 = NULL;
+ struct dc_stream_state *phantom_stream1 = NULL;
+ struct dc_crtc_timing *phantom_timing0 = NULL;
+ struct dc_crtc_timing *phantom_timing1 = NULL;
struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
+ phantom_stream0 = dc_state_get_paired_subvp_stream(context, subvp_pipes[0]->stream);
+ if (!phantom_stream0)
+ return;
+
+ phantom_stream1 = dc_state_get_paired_subvp_stream(context, subvp_pipes[1]->stream);
+ if (!phantom_stream1)
+ return;
+
+ phantom_timing0 = &phantom_stream0->timing;
+ phantom_timing1 = &phantom_stream1->timing;
+
subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
(uint64_t)phantom_timing0->h_total * 1000000),
(((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
@@ -631,10 +781,16 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
uint32_t j;
struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data =
&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
+ struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
- struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ struct dc_crtc_timing *phantom_timing;
uint32_t out_num_stream, out_den_stream, out_num_plane, out_den_plane, out_num, out_den;
+ if (!phantom_stream)
+ return;
+
+ phantom_timing = &phantom_stream->timing;
+
pipe_data->mode = SUBVP;
pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz;
pipe_data->pipe_config.subvp_data.htotal = subvp_pipe->stream->timing.h_total;
@@ -645,7 +801,8 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable;
pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable;
pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->stream_res.tg->inst;
- pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param;
+ pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param &&
+ (subvp_pipe->stream->allow_freesync || subvp_pipe->stream->vrr_active_variable || subvp_pipe->stream->vrr_active_fixed);
/* Calculate the scaling factor from the src and dst height.
* e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2.
@@ -678,21 +835,22 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
} else if (subvp_pipe->next_odm_pipe) {
pipe_data->pipe_config.subvp_data.main_split_pipe_index = subvp_pipe->next_odm_pipe->pipe_idx;
} else {
- pipe_data->pipe_config.subvp_data.main_split_pipe_index = 0;
+ pipe_data->pipe_config.subvp_data.main_split_pipe_index = 0xF;
}
// Find phantom pipe index based on phantom stream
for (j = 0; j < dc->res_pool->pipe_count; j++) {
struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];
- if (phantom_pipe->stream == subvp_pipe->stream->mall_stream_config.paired_stream) {
+ if (resource_is_pipe_type(phantom_pipe, OTG_MASTER) &&
+ phantom_pipe->stream == dc_state_get_paired_subvp_stream(context, subvp_pipe->stream)) {
pipe_data->pipe_config.subvp_data.phantom_pipe_index = phantom_pipe->stream_res.tg->inst;
if (phantom_pipe->bottom_pipe) {
pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->bottom_pipe->plane_res.hubp->inst;
} else if (phantom_pipe->next_odm_pipe) {
pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = phantom_pipe->next_odm_pipe->plane_res.hubp->inst;
} else {
- pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = 0;
+ pipe_data->pipe_config.subvp_data.phantom_split_pipe_index = 0xF;
}
break;
}
@@ -719,6 +877,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
union dmub_rb_cmd cmd;
struct pipe_ctx *subvp_pipes[2];
uint32_t wm_val_refclk = 0;
+ enum mall_stream_type pipe_mall_type;
memset(&cmd, 0, sizeof(cmd));
// FW command for SUBVP
@@ -734,7 +893,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
*/
if (resource_is_pipe_type(pipe, OTG_MASTER) &&
resource_is_pipe_type(pipe, DPP_PIPE) &&
- pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
subvp_pipes[subvp_count++] = pipe;
}
@@ -742,6 +901,7 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
// For each pipe that is a "main" SUBVP pipe, fill in pipe data for DMUB SUBVP cmd
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
if (!pipe->stream)
continue;
@@ -752,12 +912,11 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
*/
if (resource_is_pipe_type(pipe, OTG_MASTER) &&
resource_is_pipe_type(pipe, DPP_PIPE) &&
- pipe->stream->mall_stream_config.paired_stream &&
- pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ pipe_mall_type == SUBVP_MAIN) {
populate_subvp_cmd_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
} else if (resource_is_pipe_type(pipe, OTG_MASTER) &&
resource_is_pipe_type(pipe, DPP_PIPE) &&
- pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ pipe_mall_type == SUBVP_NONE) {
// Don't need to check for ActiveDRAMClockChangeMargin < 0, not valid in cases where
// we run through DML without calculating "natural" P-state support
populate_subvp_cmd_vblank_pipe_info(dc, context, &cmd, pipe, cmd_pipe_index++);
@@ -779,118 +938,77 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
cmd.fw_assisted_mclk_switch_v2.config_data.watermark_a_cache = wm_val_refclk < 0xFFFF ? wm_val_refclk : 0xFFFF;
}
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
-bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data)
+bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
{
- if (!dc_dmub_srv || !dc_dmub_srv->dmub || !diag_data)
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
return false;
- return dmub_srv_get_diagnostic_data(dc_dmub_srv->dmub, diag_data);
+ return dmub_srv_get_diagnostic_data(dc_dmub_srv->dmub);
}
void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
{
- struct dmub_diagnostic_data diag_data = {0};
+ uint32_t i;
if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
DC_LOG_ERROR("%s: invalid parameters.", __func__);
return;
}
- if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv, &diag_data)) {
+ DC_LOG_ERROR("%s: DMCUB error - collecting diagnostic data\n", __func__);
+
+ if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv)) {
DC_LOG_ERROR("%s: dc_dmub_srv_get_diagnostic_data failed.", __func__);
return;
}
DC_LOG_DEBUG("DMCUB STATE:");
- DC_LOG_DEBUG(" dmcub_version : %08x", diag_data.dmcub_version);
- DC_LOG_DEBUG(" scratch [0] : %08x", diag_data.scratch[0]);
- DC_LOG_DEBUG(" scratch [1] : %08x", diag_data.scratch[1]);
- DC_LOG_DEBUG(" scratch [2] : %08x", diag_data.scratch[2]);
- DC_LOG_DEBUG(" scratch [3] : %08x", diag_data.scratch[3]);
- DC_LOG_DEBUG(" scratch [4] : %08x", diag_data.scratch[4]);
- DC_LOG_DEBUG(" scratch [5] : %08x", diag_data.scratch[5]);
- DC_LOG_DEBUG(" scratch [6] : %08x", diag_data.scratch[6]);
- DC_LOG_DEBUG(" scratch [7] : %08x", diag_data.scratch[7]);
- DC_LOG_DEBUG(" scratch [8] : %08x", diag_data.scratch[8]);
- DC_LOG_DEBUG(" scratch [9] : %08x", diag_data.scratch[9]);
- DC_LOG_DEBUG(" scratch [10] : %08x", diag_data.scratch[10]);
- DC_LOG_DEBUG(" scratch [11] : %08x", diag_data.scratch[11]);
- DC_LOG_DEBUG(" scratch [12] : %08x", diag_data.scratch[12]);
- DC_LOG_DEBUG(" scratch [13] : %08x", diag_data.scratch[13]);
- DC_LOG_DEBUG(" scratch [14] : %08x", diag_data.scratch[14]);
- DC_LOG_DEBUG(" scratch [15] : %08x", diag_data.scratch[15]);
- DC_LOG_DEBUG(" pc : %08x", diag_data.pc);
- DC_LOG_DEBUG(" unk_fault_addr : %08x", diag_data.undefined_address_fault_addr);
- DC_LOG_DEBUG(" inst_fault_addr : %08x", diag_data.inst_fetch_fault_addr);
- DC_LOG_DEBUG(" data_fault_addr : %08x", diag_data.data_write_fault_addr);
- DC_LOG_DEBUG(" inbox1_rptr : %08x", diag_data.inbox1_rptr);
- DC_LOG_DEBUG(" inbox1_wptr : %08x", diag_data.inbox1_wptr);
- DC_LOG_DEBUG(" inbox1_size : %08x", diag_data.inbox1_size);
- DC_LOG_DEBUG(" inbox0_rptr : %08x", diag_data.inbox0_rptr);
- DC_LOG_DEBUG(" inbox0_wptr : %08x", diag_data.inbox0_wptr);
- DC_LOG_DEBUG(" inbox0_size : %08x", diag_data.inbox0_size);
- DC_LOG_DEBUG(" is_enabled : %d", diag_data.is_dmcub_enabled);
- DC_LOG_DEBUG(" is_soft_reset : %d", diag_data.is_dmcub_soft_reset);
- DC_LOG_DEBUG(" is_secure_reset : %d", diag_data.is_dmcub_secure_reset);
- DC_LOG_DEBUG(" is_traceport_en : %d", diag_data.is_traceport_en);
- DC_LOG_DEBUG(" is_cw0_en : %d", diag_data.is_cw0_enabled);
- DC_LOG_DEBUG(" is_cw6_en : %d", diag_data.is_cw6_enabled);
-}
-
-static bool dc_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
-{
- struct pipe_ctx *test_pipe, *split_pipe;
- const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data;
- struct rect r1 = scl_data->recout, r2, r2_half;
- int r1_r = r1.x + r1.width, r1_b = r1.y + r1.height, r2_r, r2_b;
- int cur_layer = pipe_ctx->plane_state->layer_index;
-
- /**
- * Disable the cursor if there's another pipe above this with a
- * plane that contains this pipe's viewport to prevent double cursor
- * and incorrect scaling artifacts.
- */
- for (test_pipe = pipe_ctx->top_pipe; test_pipe;
- test_pipe = test_pipe->top_pipe) {
- // Skip invisible layer and pipe-split plane on same layer
- if (!test_pipe->plane_state->visible || test_pipe->plane_state->layer_index == cur_layer)
- continue;
-
- r2 = test_pipe->plane_res.scl_data.recout;
- r2_r = r2.x + r2.width;
- r2_b = r2.y + r2.height;
- split_pipe = test_pipe;
-
- /**
- * There is another half plane on same layer because of
- * pipe-split, merge together per same height.
- */
- for (split_pipe = pipe_ctx->top_pipe; split_pipe;
- split_pipe = split_pipe->top_pipe)
- if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) {
- r2_half = split_pipe->plane_res.scl_data.recout;
- r2.x = (r2_half.x < r2.x) ? r2_half.x : r2.x;
- r2.width = r2.width + r2_half.width;
- r2_r = r2.x + r2.width;
- break;
- }
-
- if (r1.x >= r2.x && r1.y >= r2.y && r1_r <= r2_r && r1_b <= r2_b)
- return true;
- }
-
- return false;
+ DC_LOG_DEBUG(" dmcub_version : %08x", dc_dmub_srv->dmub->debug.dmcub_version);
+ DC_LOG_DEBUG(" scratch [0] : %08x", dc_dmub_srv->dmub->debug.scratch[0]);
+ DC_LOG_DEBUG(" scratch [1] : %08x", dc_dmub_srv->dmub->debug.scratch[1]);
+ DC_LOG_DEBUG(" scratch [2] : %08x", dc_dmub_srv->dmub->debug.scratch[2]);
+ DC_LOG_DEBUG(" scratch [3] : %08x", dc_dmub_srv->dmub->debug.scratch[3]);
+ DC_LOG_DEBUG(" scratch [4] : %08x", dc_dmub_srv->dmub->debug.scratch[4]);
+ DC_LOG_DEBUG(" scratch [5] : %08x", dc_dmub_srv->dmub->debug.scratch[5]);
+ DC_LOG_DEBUG(" scratch [6] : %08x", dc_dmub_srv->dmub->debug.scratch[6]);
+ DC_LOG_DEBUG(" scratch [7] : %08x", dc_dmub_srv->dmub->debug.scratch[7]);
+ DC_LOG_DEBUG(" scratch [8] : %08x", dc_dmub_srv->dmub->debug.scratch[8]);
+ DC_LOG_DEBUG(" scratch [9] : %08x", dc_dmub_srv->dmub->debug.scratch[9]);
+ DC_LOG_DEBUG(" scratch [10] : %08x", dc_dmub_srv->dmub->debug.scratch[10]);
+ DC_LOG_DEBUG(" scratch [11] : %08x", dc_dmub_srv->dmub->debug.scratch[11]);
+ DC_LOG_DEBUG(" scratch [12] : %08x", dc_dmub_srv->dmub->debug.scratch[12]);
+ DC_LOG_DEBUG(" scratch [13] : %08x", dc_dmub_srv->dmub->debug.scratch[13]);
+ DC_LOG_DEBUG(" scratch [14] : %08x", dc_dmub_srv->dmub->debug.scratch[14]);
+ DC_LOG_DEBUG(" scratch [15] : %08x", dc_dmub_srv->dmub->debug.scratch[15]);
+ for (i = 0; i < DMUB_PC_SNAPSHOT_COUNT; i++)
+ DC_LOG_DEBUG(" pc[%d] : %08x", i, dc_dmub_srv->dmub->debug.pc[i]);
+ DC_LOG_DEBUG(" unk_fault_addr : %08x", dc_dmub_srv->dmub->debug.undefined_address_fault_addr);
+ DC_LOG_DEBUG(" inst_fault_addr : %08x", dc_dmub_srv->dmub->debug.inst_fetch_fault_addr);
+ DC_LOG_DEBUG(" data_fault_addr : %08x", dc_dmub_srv->dmub->debug.data_write_fault_addr);
+ DC_LOG_DEBUG(" inbox1_rptr : %08x", dc_dmub_srv->dmub->debug.inbox1_rptr);
+ DC_LOG_DEBUG(" inbox1_wptr : %08x", dc_dmub_srv->dmub->debug.inbox1_wptr);
+ DC_LOG_DEBUG(" inbox1_size : %08x", dc_dmub_srv->dmub->debug.inbox1_size);
+ DC_LOG_DEBUG(" inbox0_rptr : %08x", dc_dmub_srv->dmub->debug.inbox0_rptr);
+ DC_LOG_DEBUG(" inbox0_wptr : %08x", dc_dmub_srv->dmub->debug.inbox0_wptr);
+ DC_LOG_DEBUG(" inbox0_size : %08x", dc_dmub_srv->dmub->debug.inbox0_size);
+ DC_LOG_DEBUG(" outbox1_rptr : %08x", dc_dmub_srv->dmub->debug.outbox1_rptr);
+ DC_LOG_DEBUG(" outbox1_wptr : %08x", dc_dmub_srv->dmub->debug.outbox1_wptr);
+ DC_LOG_DEBUG(" outbox1_size : %08x", dc_dmub_srv->dmub->debug.outbox1_size);
+ DC_LOG_DEBUG(" is_enabled : %d", dc_dmub_srv->dmub->debug.is_dmcub_enabled);
+ DC_LOG_DEBUG(" is_soft_reset : %d", dc_dmub_srv->dmub->debug.is_dmcub_soft_reset);
+ DC_LOG_DEBUG(" is_secure_reset : %d", dc_dmub_srv->dmub->debug.is_dmcub_secure_reset);
+ DC_LOG_DEBUG(" is_traceport_en : %d", dc_dmub_srv->dmub->debug.is_traceport_en);
+ DC_LOG_DEBUG(" is_cw0_en : %d", dc_dmub_srv->dmub->debug.is_cw0_enabled);
+ DC_LOG_DEBUG(" is_cw6_en : %d", dc_dmub_srv->dmub->debug.is_cw6_enabled);
}
static bool dc_dmub_should_update_cursor_data(struct pipe_ctx *pipe_ctx)
{
if (pipe_ctx->plane_state != NULL) {
- if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
- return false;
-
- if (dc_can_pipe_disable_cursor(pipe_ctx))
+ if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
+ resource_can_pipe_disable_cursor(pipe_ctx))
return false;
}
@@ -1016,7 +1134,7 @@ void dc_send_update_cursor_info_to_dmu(
pipe_idx, pCtx->plane_res.hubp, pCtx->plane_res.dpp);
/* Combine 2nd cmds update_curosr_info to DMU */
- dm_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd_list(pCtx->stream->ctx, 2, cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
}
@@ -1030,28 +1148,1103 @@ bool dc_dmub_check_min_version(struct dmub_srv *srv)
void dc_dmub_srv_enable_dpia_trace(const struct dc *dc)
{
struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
- struct dmub_srv *dmub;
- enum dmub_status status;
- static const uint32_t timeout_us = 30;
if (!dc_dmub_srv || !dc_dmub_srv->dmub) {
DC_LOG_ERROR("%s: invalid parameters.", __func__);
return;
}
- dmub = dc_dmub_srv->dmub;
-
- status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1, 0x0010, timeout_us);
- if (status != DMUB_STATUS_OK) {
+ if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1,
+ 0x0010, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
DC_LOG_ERROR("timeout updating trace buffer mask word\n");
return;
}
- status = dmub_srv_send_gpint_command(dmub, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK, 0x0000, timeout_us);
- if (status != DMUB_STATUS_OK) {
+ if (!dc_wake_and_execute_gpint(dc->ctx, DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK,
+ 0x0000, NULL, DM_DMUB_WAIT_TYPE_WAIT)) {
DC_LOG_ERROR("timeout updating trace buffer mask word\n");
return;
}
DC_LOG_DEBUG("Enabled DPIA trace\n");
-} \ No newline at end of file
+}
+
+void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index)
+{
+ dmub_srv_subvp_save_surf_addr(dc_dmub_srv->dmub, addr, subvp_index);
+}
+
+bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
+{
+ struct dc_context *dc_ctx;
+ enum dmub_status status;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return true;
+
+ if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
+ return true;
+
+ dc_ctx = dc_dmub_srv->ctx;
+
+ if (wait) {
+ if (dc_dmub_srv->ctx->dc->debug.disable_timeout) {
+ do {
+ status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+ } while (status != DMUB_STATUS_OK);
+ } else {
+ status = dmub_srv_wait_for_hw_pwr_up(dc_dmub_srv->dmub, 500000);
+ if (status != DMUB_STATUS_OK) {
+ DC_ERROR("Error querying DMUB hw power up status: error=%d\n", status);
+ return false;
+ }
+ }
+ } else
+ return dmub_srv_is_hw_pwr_up(dc_dmub_srv->dmub);
+
+ return true;
+}
+
+static int count_active_streams(const struct dc *dc)
+{
+ int i, count = 0;
+
+ for (i = 0; i < dc->current_state->stream_count; ++i) {
+ struct dc_stream_state *stream = dc->current_state->streams[i];
+
+ if (stream && (!stream->dpms_off || dc->config.disable_ips_in_dpms_off))
+ count += 1;
+ }
+
+ return count;
+}
+
+static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
+{
+ volatile const struct dmub_shared_state_ips_fw *ips_fw;
+ struct dc_dmub_srv *dc_dmub_srv;
+ union dmub_rb_cmd cmd = {0};
+
+ if (dc->debug.dmcub_emulation)
+ return;
+
+ if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
+ return;
+
+ dc_dmub_srv = dc->ctx->dmub_srv;
+ ips_fw = &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.idle_opt_notify_idle.header.type = DMUB_CMD__IDLE_OPT;
+ cmd.idle_opt_notify_idle.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE;
+ cmd.idle_opt_notify_idle.header.payload_bytes =
+ sizeof(cmd.idle_opt_notify_idle) -
+ sizeof(cmd.idle_opt_notify_idle.header);
+
+ cmd.idle_opt_notify_idle.cntl_data.driver_idle = allow_idle;
+
+ if (dc->work_arounds.skip_psr_ips_crtc_disable)
+ cmd.idle_opt_notify_idle.cntl_data.skip_otg_disable = true;
+
+ if (allow_idle) {
+ volatile struct dmub_shared_state_ips_driver *ips_driver =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER].data.ips_driver;
+ union dmub_shared_state_ips_driver_signals new_signals;
+
+ DC_LOG_IPS(
+ "%s wait idle (ips1_commit=%u ips2_commit=%u)",
+ __func__,
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL);
+
+ memset(&new_signals, 0, sizeof(new_signals));
+
+ new_signals.bits.allow_idle = 1; /* always set */
+
+ if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
+ dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ new_signals.bits.allow_z10 = 1;
+ // New in IPSv2.0
+ new_signals.bits.allow_ips1z8 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
+ new_signals.bits.allow_ips1 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
+ // IPSv1.0 only
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+ // IPSv1.0 only
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ } else if (dc->config.disable_ips == DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF) {
+ /* TODO: Move this logic out to hwseq */
+ if (count_active_streams(dc) == 0) {
+ /* IPS2 - Display off */
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ new_signals.bits.allow_z10 = 1;
+ // New in IPSv2.0
+ new_signals.bits.allow_ips1z8 = 1;
+ } else {
+ /* RCG only */
+ new_signals.bits.allow_pg = 0;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 0;
+ new_signals.bits.allow_z10 = 0;
+ }
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_Z8_RETENTION) {
+ new_signals.bits.allow_pg = 1;
+ new_signals.bits.allow_ips1 = 1;
+ new_signals.bits.allow_ips2 = 1;
+ new_signals.bits.allow_z10 = 1;
+ }
+ // Setting RCG allow bits (IPSv2.0)
+ if (dc->config.disable_ips_rcg == DMUB_IPS_RCG_ENABLE) {
+ new_signals.bits.allow_ips0_rcg = 1;
+ new_signals.bits.allow_ips1_rcg = 1;
+ } else if (dc->config.disable_ips_rcg == DMUB_IPS0_RCG_DISABLE) {
+ new_signals.bits.allow_ips1_rcg = 1;
+ } else if (dc->config.disable_ips_rcg == DMUB_IPS1_RCG_DISABLE) {
+ new_signals.bits.allow_ips0_rcg = 1;
+ }
+ // IPS dynamic allow bits (IPSv2 change, vpb use case)
+ if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_IPS1_AND_RCG) {
+ new_signals.bits.allow_dynamic_ips1 = 1;
+ } else if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_ALL) {
+ new_signals.bits.allow_dynamic_ips1 = 1;
+ new_signals.bits.allow_dynamic_ips1_z8 = 1;
+ }
+ ips_driver->signals = new_signals;
+ dc_dmub_srv->driver_signals = ips_driver->signals;
+ }
+
+ DC_LOG_IPS(
+ "%s send allow_idle=%d (ips1_commit=%u ips2_commit=%u)",
+ __func__,
+ allow_idle,
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ /* NOTE: This does not use the "wake" interface since this is part of the wake path. */
+ /* We also do not perform a wait since DMCUB could enter idle after the notification. */
+ dm_execute_dmub_cmd(dc->ctx, &cmd, allow_idle ? DM_DMUB_WAIT_TYPE_NO_WAIT : DM_DMUB_WAIT_TYPE_WAIT);
+
+ /* Register access should stop at this point. */
+ if (allow_idle)
+ dc_dmub_srv->needs_idle_wake = true;
+}
+
+static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
+{
+ struct dc_dmub_srv *dc_dmub_srv;
+ uint32_t rcg_exit_count = 0, ips1_exit_count = 0, ips2_exit_count = 0, ips1z8_exit_count = 0;
+
+ if (dc->debug.dmcub_emulation)
+ return;
+
+ if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
+ return;
+
+ dc_dmub_srv = dc->ctx->dmub_srv;
+
+ if (dc->clk_mgr->funcs->exit_low_power_state) {
+ volatile const struct dmub_shared_state_ips_fw *ips_fw =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+ volatile struct dmub_shared_state_ips_driver *ips_driver =
+ &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER].data.ips_driver;
+ union dmub_shared_state_ips_driver_signals prev_driver_signals = ips_driver->signals;
+
+ rcg_exit_count = ips_fw->rcg_exit_count;
+ ips1_exit_count = ips_fw->ips1_exit_count;
+ ips2_exit_count = ips_fw->ips2_exit_count;
+ ips1z8_exit_count = ips_fw->ips1_z8ret_exit_count;
+
+ ips_driver->signals.all = 0;
+ dc_dmub_srv->driver_signals = ips_driver->signals;
+
+ DC_LOG_IPS(
+ "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u ips1z8=%u) (count rcg=%u ips1=%u ips2=%u ips1_z8=%u)",
+ __func__,
+ ips_driver->signals.bits.allow_ips1,
+ ips_driver->signals.bits.allow_ips2,
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit,
+ ips_fw->signals.bits.ips1z8_commit,
+ ips_fw->rcg_entry_count,
+ ips_fw->ips1_entry_count,
+ ips_fw->ips2_entry_count,
+ ips_fw->ips1_z8ret_entry_count);
+
+ /* Note: register access has technically not resumed for DCN here, but we
+ * need to be message PMFW through our standard register interface.
+ */
+ dc_dmub_srv->needs_idle_wake = false;
+
+ if (!dc->caps.ips_v2_support && ((prev_driver_signals.bits.allow_ips2 || prev_driver_signals.all == 0) &&
+ (!dc->debug.optimize_ips_handshake ||
+ ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle))) {
+ DC_LOG_IPS(
+ "wait IPS2 eval (ips1_commit=%u ips2_commit=%u )",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ if (!dc->debug.optimize_ips_handshake || !ips_fw->signals.bits.ips2_commit)
+ udelay(dc->debug.ips2_eval_delay_us);
+
+ DC_LOG_IPS(
+ "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ // Tell PMFW to exit low power state
+ dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
+
+ if (ips_fw->signals.bits.ips2_commit) {
+
+ DC_LOG_IPS(
+ "wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ // Wait for IPS2 entry upper bound
+ udelay(dc->debug.ips2_entry_delay_us);
+
+ DC_LOG_IPS(
+ "exit IPS2 #2 (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
+
+ DC_LOG_IPS(
+ "wait IPS2 commit clear (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ while (ips_fw->signals.bits.ips2_commit)
+ udelay(1);
+
+ DC_LOG_IPS(
+ "wait hw_pwr_up (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true))
+ ASSERT(0);
+
+ DC_LOG_IPS(
+ "resync inbox1 (ips1_commit=%u ips2_commit=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit);
+
+ dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub);
+ }
+ }
+
+ dc_dmub_srv_notify_idle(dc, false);
+ if (prev_driver_signals.bits.allow_ips1 || prev_driver_signals.all == 0) {
+ DC_LOG_IPS(
+ "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u ips1z8=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit,
+ ips_fw->signals.bits.ips1z8_commit);
+
+ while (ips_fw->signals.bits.ips1_commit)
+ udelay(1);
+
+ DC_LOG_IPS(
+ "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u ips1z8=%u)",
+ ips_fw->signals.bits.ips1_commit,
+ ips_fw->signals.bits.ips2_commit,
+ ips_fw->signals.bits.ips1z8_commit);
+ }
+ }
+
+ if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true))
+ ASSERT(0);
+
+ DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u ips1z8=%u)",
+ __func__,
+ rcg_exit_count,
+ ips1_exit_count,
+ ips2_exit_count,
+ ips1z8_exit_count);
+}
+
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state)
+{
+ struct dmub_srv *dmub;
+
+ if (!dc_dmub_srv)
+ return;
+
+ dmub = dc_dmub_srv->dmub;
+
+ if (power_state == DC_ACPI_CM_POWER_STATE_D0)
+ dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0);
+ else
+ dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
+}
+
+void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv,
+ enum dc_acpi_cm_power_state power_state)
+{
+ union dmub_rb_cmd cmd;
+
+ if (!dc_dmub_srv)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.idle_opt_set_dc_power_state.header.type = DMUB_CMD__IDLE_OPT;
+ cmd.idle_opt_set_dc_power_state.header.sub_type = DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE;
+ cmd.idle_opt_set_dc_power_state.header.payload_bytes =
+ sizeof(cmd.idle_opt_set_dc_power_state) - sizeof(cmd.idle_opt_set_dc_power_state.header);
+
+ if (power_state == DC_ACPI_CM_POWER_STATE_D0) {
+ cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D0;
+ } else if (power_state == DC_ACPI_CM_POWER_STATE_D3) {
+ cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D3;
+ } else {
+ cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_UNKNOWN;
+ }
+
+ dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv)
+{
+ volatile const struct dmub_shared_state_ips_fw *ips_fw;
+ bool reallow_idle = false, should_detect = false;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ if (dc_dmub_srv->dmub->shared_state &&
+ dc_dmub_srv->dmub->meta_info.feature_bits.bits.shared_state_link_detection) {
+ ips_fw = &dc_dmub_srv->dmub->shared_state[DMUB_SHARED_SHARE_FEATURE__IPS_FW].data.ips_fw;
+ return ips_fw->signals.bits.detection_required;
+ }
+
+ /* Detection may require reading scratch 0 - exit out of idle prior to the read. */
+ if (dc_dmub_srv->idle_allowed) {
+ dc_dmub_srv_apply_idle_power_optimizations(dc_dmub_srv->ctx->dc, false);
+ reallow_idle = true;
+ }
+
+ should_detect = dmub_srv_should_detect(dc_dmub_srv->dmub);
+
+ /* Re-enter idle if we're not about to immediately redetect links. */
+ if (!should_detect && reallow_idle && dc_dmub_srv->idle_exit_counter == 0 &&
+ !dc_dmub_srv->ctx->dc->debug.disable_dmub_reallow_idle)
+ dc_dmub_srv_apply_idle_power_optimizations(dc_dmub_srv->ctx->dc, true);
+
+ return should_detect;
+}
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle)
+{
+ struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return;
+
+ allow_idle &= (!dc->debug.ips_disallow_entry);
+
+ if (dc_dmub_srv->idle_allowed == allow_idle)
+ return;
+
+ DC_LOG_IPS("%s state change: old=%d new=%d", __func__, dc_dmub_srv->idle_allowed, allow_idle);
+
+ /*
+ * Entering a low power state requires a driver notification.
+ * Powering up the hardware requires notifying PMFW and DMCUB.
+ * Clearing the driver idle allow requires a DMCUB command.
+ * DMCUB commands requires the DMCUB to be powered up and restored.
+ */
+
+ if (!allow_idle) {
+ dc_dmub_srv->idle_exit_counter += 1;
+
+ dc_dmub_srv_exit_low_power_state(dc);
+ /*
+ * Idle is considered fully exited only after the sequence above
+ * fully completes. If we have a race of two threads exiting
+ * at the same time then it's safe to perform the sequence
+ * twice as long as we're not re-entering.
+ *
+ * Infinite command submission is avoided by using the
+ * dm_execute_dmub_cmd submission instead of the "wake" helpers.
+ */
+ dc_dmub_srv->idle_allowed = false;
+
+ dc_dmub_srv->idle_exit_counter -= 1;
+ if (dc_dmub_srv->idle_exit_counter < 0) {
+ ASSERT(0);
+ dc_dmub_srv->idle_exit_counter = 0;
+ }
+ } else {
+ /* Consider idle as notified prior to the actual submission to
+ * prevent multiple entries. */
+ dc_dmub_srv->idle_allowed = true;
+
+ dc_dmub_srv_notify_idle(dc, allow_idle);
+ }
+}
+
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+ enum dm_dmub_wait_type wait_type)
+{
+ return dc_wake_and_execute_dmub_cmd_list(ctx, 1, cmd, wait_type);
+}
+
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+ union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type)
+{
+ struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+ bool result = false, reallow_idle = false;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ if (count == 0)
+ return true;
+
+ if (dc_dmub_srv->idle_allowed) {
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+ reallow_idle = true;
+ }
+
+ /*
+ * These may have different implementations in DM, so ensure
+ * that we guide it to the expected helper.
+ */
+ if (count > 1)
+ result = dm_execute_dmub_cmd_list(ctx, count, cmd, wait_type);
+ else
+ result = dm_execute_dmub_cmd(ctx, cmd, wait_type);
+
+ if (result && reallow_idle && dc_dmub_srv->idle_exit_counter == 0 &&
+ !ctx->dc->debug.disable_dmub_reallow_idle)
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+ return result;
+}
+
+static bool dc_dmub_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+ uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+ struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+ const uint32_t wait_us = wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT ? 0 : 30;
+ enum dmub_status status;
+
+ if (response)
+ *response = 0;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ status = dmub_srv_send_gpint_command(dc_dmub_srv->dmub, command_code, param, wait_us);
+ if (status != DMUB_STATUS_OK) {
+ if (status == DMUB_STATUS_TIMEOUT && wait_type == DM_DMUB_WAIT_TYPE_NO_WAIT)
+ return true;
+
+ return false;
+ }
+
+ if (response && wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)
+ dmub_srv_get_gpint_response(dc_dmub_srv->dmub, response);
+
+ return true;
+}
+
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+ uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type)
+{
+ struct dc_dmub_srv *dc_dmub_srv = ctx->dmub_srv;
+ bool result = false, reallow_idle = false;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return false;
+
+ if (dc_dmub_srv->idle_allowed) {
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, false);
+ reallow_idle = true;
+ }
+
+ result = dc_dmub_execute_gpint(ctx, command_code, param, response, wait_type);
+
+ if (result && reallow_idle && dc_dmub_srv->idle_exit_counter == 0 &&
+ !ctx->dc->debug.disable_dmub_reallow_idle)
+ dc_dmub_srv_apply_idle_power_optimizations(ctx->dc, true);
+
+ return result;
+}
+
+static void dc_dmub_srv_rb_based_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable)
+{
+ uint8_t num_cmds = 1;
+ uint32_t i;
+ union dmub_rb_cmd cmd[2 * MAX_STREAMS + 1];
+ struct dmub_rb_cmd_fams2 *global_cmd = &cmd[0].fams2_config;
+
+ memset(cmd, 0, sizeof(union dmub_rb_cmd) * (2 * MAX_STREAMS + 1));
+ /* fill in generic command header */
+ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ global_cmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+
+ if (enable) {
+ /* send global configuration parameters */
+ memcpy(&global_cmd->config.global, &context->bw_ctx.bw.dcn.fams2_global_config, sizeof(struct dmub_cmd_fams2_global_config));
+
+ /* copy static feature configuration overrides */
+ global_cmd->config.global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery;
+ global_cmd->config.global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug;
+ global_cmd->config.global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip;
+
+ /* construct per-stream configs */
+ for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) {
+ struct dmub_rb_cmd_fams2 *stream_base_cmd = &cmd[i+1].fams2_config;
+ struct dmub_rb_cmd_fams2 *stream_sub_state_cmd = &cmd[i+1+context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config;
+
+ /* configure command header */
+ stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ stream_base_cmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+ stream_base_cmd->header.multi_cmd_pending = 1;
+ stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ stream_sub_state_cmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+ stream_sub_state_cmd->header.multi_cmd_pending = 1;
+ /* copy stream static base state */
+ memcpy(&stream_base_cmd->config,
+ &context->bw_ctx.bw.dcn.fams2_stream_base_params[i],
+ sizeof(union dmub_cmd_fams2_config));
+ /* copy stream static sub state */
+ memcpy(&stream_sub_state_cmd->config,
+ &context->bw_ctx.bw.dcn.fams2_stream_sub_params[i],
+ sizeof(union dmub_cmd_fams2_config));
+ }
+ }
+
+ /* apply feature configuration based on current driver state */
+ global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2;
+ global_cmd->config.global.features.bits.enable = enable;
+
+ if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) {
+ /* set multi pending for global, and unset for last stream cmd */
+ global_cmd->header.multi_cmd_pending = 1;
+ cmd[2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0;
+ num_cmds += 2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams;
+ }
+
+ dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+static void dc_dmub_srv_ib_based_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable)
+{
+ struct dmub_fams2_config_v2 *config = (struct dmub_fams2_config_v2 *)dc->ctx->dmub_srv->dmub->ib_mem_gart.cpu_addr;
+ union dmub_rb_cmd cmd;
+ uint32_t i;
+
+ memset(config, 0, sizeof(*config));
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.ib_fams2_config.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.ib_fams2_config.header.sub_type = DMUB_CMD__FAMS2_IB_CONFIG;
+
+ cmd.ib_fams2_config.ib_data.src.quad_part = dc->ctx->dmub_srv->dmub->ib_mem_gart.gpu_addr;
+ cmd.ib_fams2_config.ib_data.size = sizeof(*config);
+
+ if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) {
+ /* copy static feature configuration overrides */
+ config->global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery;
+ config->global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip;
+ config->global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug;
+
+ /* send global configuration parameters */
+ memcpy(&config->global, &context->bw_ctx.bw.dcn.fams2_global_config,
+ sizeof(struct dmub_cmd_fams2_global_config));
+
+ /* construct per-stream configs */
+ for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) {
+ /* copy stream static base state */
+ memcpy(&config->stream_v1[i].base,
+ &context->bw_ctx.bw.dcn.fams2_stream_base_params[i],
+ sizeof(config->stream_v1[i].base));
+
+ /* copy stream static sub-state */
+ memcpy(&config->stream_v1[i].sub_state,
+ &context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[i],
+ sizeof(config->stream_v1[i].sub_state));
+ }
+ }
+
+ config->global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2;
+ config->global.features.bits.enable = enable;
+
+ dm_execute_dmub_cmd_list(dc->ctx, 1, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dc_dmub_srv_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable)
+{
+ if (dc->debug.fams_version.major == 2)
+ dc_dmub_srv_rb_based_fams2_update_config(dc, context, enable);
+ if (dc->debug.fams_version.major == 3)
+ dc_dmub_srv_ib_based_fams2_update_config(dc, context, enable);
+}
+
+void dc_dmub_srv_fams2_drr_update(struct dc *dc,
+ uint32_t tg_inst,
+ uint32_t vtotal_min,
+ uint32_t vtotal_max,
+ uint32_t vtotal_mid,
+ uint32_t vtotal_mid_frame_num,
+ bool program_manual_trigger)
+{
+ union dmub_rb_cmd cmd = { 0 };
+
+ cmd.fams2_drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmd.fams2_drr_update.header.sub_type = DMUB_CMD__FAMS2_DRR_UPDATE;
+ cmd.fams2_drr_update.dmub_optc_state_req.tg_inst = tg_inst;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_max = vtotal_max;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_min = vtotal_min;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid = vtotal_mid;
+ cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num;
+ cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger;
+
+ cmd.fams2_drr_update.header.payload_bytes =
+ sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header);
+
+ dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void dc_dmub_srv_fams2_passthrough_flip(
+ struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count)
+{
+ int plane_index;
+ union dmub_rb_cmd cmds[MAX_PLANES];
+ struct dc_plane_address *address;
+ struct dc_plane_state *plane_state;
+ int num_cmds = 0;
+ struct dc_stream_status *stream_status = dc_stream_get_status(stream);
+
+ if (surface_count <= 0 || stream_status == NULL)
+ return;
+
+ memset(cmds, 0, sizeof(union dmub_rb_cmd) * MAX_PLANES);
+
+ /* build command for each surface update */
+ for (plane_index = 0; plane_index < surface_count; plane_index++) {
+ plane_state = srf_updates[plane_index].surface;
+ address = &plane_state->address;
+
+ /* skip if there is no address update for plane */
+ if (!srf_updates[plane_index].flip_addr)
+ continue;
+
+ /* build command header */
+ cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP;
+ cmds[num_cmds].fams2_flip.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header);
+
+ /* for chaining multiple commands, all but last command should set to 1 */
+ cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1;
+
+ /* set topology info */
+ cmds[num_cmds].fams2_flip.flip_info.pipe_mask = dc_plane_get_pipe_mask(state, plane_state);
+ if (stream_status)
+ cmds[num_cmds].fams2_flip.flip_info.otg_inst = stream_status->primary_otg_inst;
+
+ cmds[num_cmds].fams2_flip.flip_info.config.bits.is_immediate = plane_state->flip_immediate;
+
+ /* build address info for command */
+ switch (address->type) {
+ case PLN_ADDR_TYPE_GRAPHICS:
+ if (address->grph.addr.quad_part == 0) {
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_lo =
+ address->grph.meta_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_hi =
+ (uint16_t)address->grph.meta_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_lo =
+ address->grph.addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_hi =
+ (uint16_t)address->grph.addr.high_part;
+ break;
+ case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE:
+ if (address->video_progressive.luma_addr.quad_part == 0 ||
+ address->video_progressive.chroma_addr.quad_part == 0) {
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_lo =
+ address->video_progressive.luma_meta_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_hi =
+ (uint16_t)address->video_progressive.luma_meta_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_c_lo =
+ address->video_progressive.chroma_meta_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.meta_addr_c_hi =
+ (uint16_t)address->video_progressive.chroma_meta_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_lo =
+ address->video_progressive.luma_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_hi =
+ (uint16_t)address->video_progressive.luma_addr.high_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_c_lo =
+ address->video_progressive.chroma_addr.low_part;
+ cmds[num_cmds].fams2_flip.flip_info.addr_info.surf_addr_c_hi =
+ (uint16_t)address->video_progressive.chroma_addr.high_part;
+ break;
+ default:
+ // Should never be hit
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ num_cmds++;
+ }
+
+ if (num_cmds > 0) {
+ cmds[num_cmds - 1].fams2_flip.header.multi_cmd_pending = 0;
+ dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmds, DM_DMUB_WAIT_TYPE_WAIT);
+ }
+}
+
+
+bool dc_dmub_srv_ips_residency_cntl(const struct dc_context *ctx, uint8_t panel_inst, bool start_measurement)
+{
+ union dmub_rb_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.ips_residency_cntl.header.type = DMUB_CMD__IPS;
+ cmd.ips_residency_cntl.header.sub_type = DMUB_CMD__IPS_RESIDENCY_CNTL;
+ cmd.ips_residency_cntl.header.payload_bytes = sizeof(struct dmub_cmd_ips_residency_cntl_data);
+
+ // only panel_inst=0 is supported at the moment
+ cmd.ips_residency_cntl.cntl_data.panel_inst = panel_inst;
+ cmd.ips_residency_cntl.cntl_data.start_measurement = start_measurement;
+
+ if (!dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return false;
+
+ return true;
+}
+
+bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t panel_inst, struct dmub_ips_residency_info *driver_info,
+ enum ips_residency_mode ips_mode)
+{
+ union dmub_rb_cmd cmd;
+ uint32_t bytes = sizeof(struct dmub_ips_residency_info);
+
+ dmub_flush_buffer_mem(&ctx->dmub_srv->dmub->scratch_mem_fb);
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.ips_query_residency_info.header.type = DMUB_CMD__IPS;
+ cmd.ips_query_residency_info.header.sub_type = DMUB_CMD__IPS_QUERY_RESIDENCY_INFO;
+ cmd.ips_query_residency_info.header.payload_bytes = sizeof(struct dmub_cmd_ips_query_residency_info_data);
+
+ cmd.ips_query_residency_info.info_data.dest.quad_part = ctx->dmub_srv->dmub->scratch_mem_fb.gpu_addr;
+ cmd.ips_query_residency_info.info_data.size = bytes;
+ cmd.ips_query_residency_info.info_data.panel_inst = panel_inst;
+ cmd.ips_query_residency_info.info_data.ips_mode = (uint32_t)ips_mode;
+
+ if (!dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) ||
+ cmd.ips_query_residency_info.header.ret_status == 0)
+ return false;
+
+ // copy the result to the output since ret_status != 0 means the command returned data
+ memcpy(driver_info, ctx->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes);
+
+ return true;
+}
+
+bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_INIT_CONFIG;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.init_data.gpu_addr_base.quad_part = dc_ctx->dmub_srv->dmub->lsdma_rb_fb.gpu_addr;
+ lsdma_data->u.init_data.ring_size = dc_ctx->dmub_srv->dmub->lsdma_rb_fb.size;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Init failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_linear_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t count
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_LINEAR_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.linear_copy_data.count = count - 1; // LSDMA controller expects bytes to copy -1
+ lsdma_data->u.linear_copy_data.src_lo = src_addr & 0xFFFFFFFF;
+ lsdma_data->u.linear_copy_data.src_hi = (src_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.linear_copy_data.dst_lo = dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.linear_copy_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Linear Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_linear_sub_window_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_linear_sub_window_copy_params copy_data
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_LINEAR_SUB_WINDOW_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.linear_sub_window_copy_data.tmz = copy_data.tmz;
+ lsdma_data->u.linear_sub_window_copy_data.element_size = copy_data.element_size;
+ lsdma_data->u.linear_sub_window_copy_data.src_lo = copy_data.src_lo;
+ lsdma_data->u.linear_sub_window_copy_data.src_hi = copy_data.src_hi;
+ lsdma_data->u.linear_sub_window_copy_data.src_x = copy_data.src_x;
+ lsdma_data->u.linear_sub_window_copy_data.src_y = copy_data.src_y;
+ lsdma_data->u.linear_sub_window_copy_data.src_pitch = copy_data.src_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.src_slice_pitch = copy_data.src_slice_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.dst_lo = copy_data.dst_lo;
+ lsdma_data->u.linear_sub_window_copy_data.dst_hi = copy_data.dst_hi;
+ lsdma_data->u.linear_sub_window_copy_data.dst_x = copy_data.dst_x;
+ lsdma_data->u.linear_sub_window_copy_data.dst_y = copy_data.dst_y;
+ lsdma_data->u.linear_sub_window_copy_data.dst_pitch = copy_data.dst_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.dst_slice_pitch = copy_data.dst_slice_pitch;
+ lsdma_data->u.linear_sub_window_copy_data.rect_x = copy_data.rect_x;
+ lsdma_data->u.linear_sub_window_copy_data.rect_y = copy_data.rect_y;
+ lsdma_data->u.linear_sub_window_copy_data.src_cache_policy = copy_data.src_cache_policy;
+ lsdma_data->u.linear_sub_window_copy_data.dst_cache_policy = copy_data.dst_cache_policy;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Linear Sub Window Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_tiled_to_tiled_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_send_tiled_to_tiled_copy_command_params params
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_TILED_TO_TILED_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.tiled_copy_data.src_addr_lo = params.src_addr & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.src_addr_hi = (params.src_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.dst_addr_lo = params.dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.dst_addr_hi = (params.dst_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.tiled_copy_data.src_x = params.src_x;
+ lsdma_data->u.tiled_copy_data.src_y = params.src_y;
+ lsdma_data->u.tiled_copy_data.dst_x = params.dst_x;
+ lsdma_data->u.tiled_copy_data.dst_y = params.dst_y;
+ lsdma_data->u.tiled_copy_data.src_width = params.src_width;
+ lsdma_data->u.tiled_copy_data.dst_width = params.dst_width;
+ lsdma_data->u.tiled_copy_data.src_swizzle_mode = params.swizzle_mode;
+ lsdma_data->u.tiled_copy_data.dst_swizzle_mode = params.swizzle_mode;
+ lsdma_data->u.tiled_copy_data.src_element_size = params.element_size;
+ lsdma_data->u.tiled_copy_data.dst_element_size = params.element_size;
+ lsdma_data->u.tiled_copy_data.rect_x = params.rect_x;
+ lsdma_data->u.tiled_copy_data.rect_y = params.rect_y;
+ lsdma_data->u.tiled_copy_data.dcc = params.dcc;
+ lsdma_data->u.tiled_copy_data.tmz = params.tmz;
+ lsdma_data->u.tiled_copy_data.read_compress = params.read_compress;
+ lsdma_data->u.tiled_copy_data.write_compress = params.write_compress;
+ lsdma_data->u.tiled_copy_data.src_height = params.src_height;
+ lsdma_data->u.tiled_copy_data.dst_height = params.dst_height;
+ lsdma_data->u.tiled_copy_data.data_format = params.data_format;
+ lsdma_data->u.tiled_copy_data.max_com = params.max_com;
+ lsdma_data->u.tiled_copy_data.max_uncom = params.max_uncom;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Tiled to Tiled Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_pio_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t overlap_disable
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_PIO_COPY;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.pio_copy_data.packet.fields.byte_count = byte_count;
+ lsdma_data->u.pio_copy_data.packet.fields.overlap_disable = overlap_disable;
+ lsdma_data->u.pio_copy_data.src_lo = src_addr & 0xFFFFFFFF;
+ lsdma_data->u.pio_copy_data.src_hi = (src_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.pio_copy_data.dst_lo = dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.pio_copy_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA PIO Copy failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_pio_constfill_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t data
+)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_PIO_CONSTFILL;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.pio_constfill_data.packet.fields.constant_fill = 1;
+ lsdma_data->u.pio_constfill_data.packet.fields.byte_count = byte_count;
+ lsdma_data->u.pio_constfill_data.dst_lo = dst_addr & 0xFFFFFFFF;
+ lsdma_data->u.pio_constfill_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF;
+ lsdma_data->u.pio_constfill_data.data = data;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA PIO Constfill failed in DMUB");
+
+ return result;
+}
+
+bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uint32_t reg_addr, uint32_t reg_data)
+{
+ struct dc_context *dc_ctx = dc_dmub_srv->ctx;
+ union dmub_rb_cmd cmd;
+ enum dm_dmub_wait_type wait_type;
+ struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data;
+ bool result;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.cmd_common.header.type = DMUB_CMD__LSDMA;
+ cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_POLL_REG_WRITE;
+ wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT;
+
+ lsdma_data->u.reg_write_data.reg_addr = reg_addr;
+ lsdma_data->u.reg_write_data.reg_data = reg_data;
+
+ result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type);
+
+ if (!result)
+ DC_ERROR("LSDMA Poll Reg failed in DMUB");
+
+ return result;
+}
+
+void dc_dmub_srv_release_hw(const struct dc *dc)
+{
+ struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+ union dmub_rb_cmd cmd = {0};
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.idle_opt_notify_idle.header.type = DMUB_CMD__IDLE_OPT;
+ cmd.idle_opt_notify_idle.header.sub_type = DMUB_CMD__IDLE_OPT_RELEASE_HW;
+ cmd.idle_opt_notify_idle.header.payload_bytes =
+ sizeof(cmd.idle_opt_notify_idle) -
+ sizeof(cmd.idle_opt_notify_idle.header);
+
+ dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index bb3fe162dd93..7ef93444ef3c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -35,6 +35,7 @@ struct pipe_ctx;
struct dc_crtc_timing_adjust;
struct dc_crtc_timing;
struct dc_state;
+struct dc_surface_update;
struct dc_reg_helper_state {
bool gather_in_progress;
@@ -50,18 +51,31 @@ struct dc_dmub_srv {
struct dc_context *ctx;
void *dm;
+
+ int32_t idle_exit_counter;
+ union dmub_shared_state_ips_driver_signals driver_signals;
+ bool idle_allowed;
+ bool needs_idle_wake;
};
-void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
+bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv);
bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv);
+bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv,
+ unsigned int count,
+ union dmub_rb_cmd *cmd_list);
+
+bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv,
+ enum dm_dmub_wait_type wait_type,
+ union dmub_rb_cmd *cmd_list);
+
bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type);
bool dc_dmub_srv_notify_stream_mask(struct dc_dmub_srv *dc_dmub_srv,
- unsigned int stream_mask);
+ unsigned int stream_mask);
bool dc_dmub_srv_is_restore_required(struct dc_dmub_srv *dc_dmub_srv);
@@ -80,7 +94,7 @@ void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv);
void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv);
void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data);
-bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *dmub_oca);
+bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv);
void dc_dmub_setup_subvp_dmub_command(struct dc *dc, struct dc_state *context, bool enable);
void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv);
@@ -89,5 +103,232 @@ void dc_send_update_cursor_info_to_dmu(struct pipe_ctx *pCtx, uint8_t pipe_idx);
bool dc_dmub_check_min_version(struct dmub_srv *srv);
void dc_dmub_srv_enable_dpia_trace(const struct dc *dc);
+void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index);
+
+bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle);
+
+/**
+ * dc_dmub_srv_set_power_state() - Sets the power state for DMUB service.
+ *
+ * Controls whether messaging the DMCUB or interfacing with it via HW register
+ * interaction is permittable.
+ *
+ * @dc_dmub_srv - The DC DMUB service pointer
+ * @power_state - the DC power state
+ */
+void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state);
+
+/**
+ * dc_dmub_srv_notify_fw_dc_power_state() - Notifies firmware of the DC power state.
+ *
+ * Differs from dc_dmub_srv_set_power_state in that it needs to access HW in order
+ * to message DMCUB of the state transition. Should come after the D0 exit and
+ * before D3 set power state.
+ *
+ * @dc_dmub_srv - The DC DMUB service pointer
+ * @power_state - the DC power state
+ */
+void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv,
+ enum dc_acpi_cm_power_state power_state);
+/**
+ * @dc_dmub_srv_should_detect() - Checks if link detection is required.
+ *
+ * While in idle power states we may need driver to manually redetect in
+ * the case of a missing hotplug. Should be called from a polling timer.
+ *
+ * Return: true if redetection is required.
+ */
+bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv);
+
+/**
+ * dc_wake_and_execute_dmub_cmd() - Wrapper for DMUB command execution.
+ *
+ * Refer to dc_wake_and_execute_dmub_cmd_list() for usage and limitations,
+ * This function is a convenience wrapper for a single command execution.
+ *
+ * @ctx: DC context
+ * @cmd: The command to send/receive
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd,
+ enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_dmub_cmd_list() - Wrapper for DMUB command list execution.
+ *
+ * If the DMCUB hardware was asleep then it wakes the DMUB before
+ * executing the command and attempts to re-enter if the command
+ * submission was successful.
+ *
+ * This should be the preferred command submission interface provided
+ * the DC lock is acquired.
+ *
+ * Entry/exit out of idle power optimizations would need to be
+ * manually performed otherwise through dc_allow_idle_optimizations().
+ *
+ * @ctx: DC context
+ * @count: Number of commands to send/receive
+ * @cmd: Array of commands to send
+ * @wait_type: The wait behavior for the execution
+ *
+ * Return: true on command submission success, false otherwise
+ */
+bool dc_wake_and_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count,
+ union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
+
+/**
+ * dc_wake_and_execute_gpint()
+ *
+ * @ctx: DC context
+ * @command_code: The command ID to send to DMCUB
+ * @param: The parameter to message DMCUB
+ * @response: Optional response out value - may be NULL.
+ * @wait_type: The wait behavior for the execution
+ */
+bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_command command_code,
+ uint16_t param, uint32_t *response, enum dm_dmub_wait_type wait_type);
+
+void dc_dmub_srv_fams2_update_config(struct dc *dc,
+ struct dc_state *context,
+ bool enable);
+void dc_dmub_srv_fams2_drr_update(struct dc *dc,
+ uint32_t tg_inst,
+ uint32_t vtotal_min,
+ uint32_t vtotal_max,
+ uint32_t vtotal_mid,
+ uint32_t vtotal_mid_frame_num,
+ bool program_manual_trigger);
+void dc_dmub_srv_fams2_passthrough_flip(
+ struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream,
+ struct dc_surface_update *srf_updates,
+ int surface_count);
+
+bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv);
+bool dmub_lsdma_send_linear_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t count);
+
+struct lsdma_linear_sub_window_copy_params {
+ uint32_t src_lo;
+ uint32_t src_hi;
+
+ uint32_t dst_lo;
+ uint32_t dst_hi;
+
+ uint32_t src_x : 16;
+ uint32_t src_y : 16;
+
+ uint32_t dst_x : 16;
+ uint32_t dst_y : 16;
+
+ uint32_t rect_x : 16;
+ uint32_t rect_y : 16;
+
+ uint32_t src_pitch : 16;
+ uint32_t dst_pitch : 16;
+
+ uint32_t src_slice_pitch;
+ uint32_t dst_slice_pitch;
+
+ uint32_t tmz : 1;
+ uint32_t element_size : 3;
+ uint32_t src_cache_policy : 3;
+ uint32_t dst_cache_policy : 3;
+ uint32_t padding : 22;
+};
+
+bool dmub_lsdma_send_linear_sub_window_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_linear_sub_window_copy_params copy_data
+);
+bool dmub_lsdma_send_pio_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t src_addr,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t overlap_disable);
+bool dmub_lsdma_send_pio_constfill_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ uint64_t dst_addr,
+ uint32_t byte_count,
+ uint32_t data);
+
+struct lsdma_send_tiled_to_tiled_copy_command_params {
+ uint64_t src_addr;
+ uint64_t dst_addr;
+
+ uint32_t src_x : 16;
+ uint32_t src_y : 16;
+
+ uint32_t dst_x : 16;
+ uint32_t dst_y : 16;
+
+ uint32_t src_width : 16;
+ uint32_t dst_width : 16;
+
+ uint32_t rect_x : 16;
+ uint32_t rect_y : 16;
+
+ uint32_t src_height : 16;
+ uint32_t dst_height : 16;
+
+ uint32_t data_format : 6;
+ uint32_t swizzle_mode : 5;
+ uint32_t element_size : 3;
+ uint32_t dcc : 1;
+ uint32_t tmz : 1;
+ uint32_t read_compress : 2;
+ uint32_t write_compress : 2;
+ uint32_t max_com : 2;
+ uint32_t max_uncom : 1;
+ uint32_t padding : 9;
+};
+
+bool dmub_lsdma_send_tiled_to_tiled_copy_command(
+ struct dc_dmub_srv *dc_dmub_srv,
+ struct lsdma_send_tiled_to_tiled_copy_command_params params);
+bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uint32_t reg_addr, uint32_t reg_data);
+
+/**
+ * struct ips_residency_info - struct containing info from dmub_ips_residency_stats
+ *
+ * @ips_mode: The mode of IPS that the follow stats appertain to
+ * @residency_percent: The percentage of time spent in given IPS mode in millipercent
+ * @entry_counter: The number of entries made in to this IPS state
+ * @total_active_time_us: uint32_t array of length 2 representing time in the given IPS mode
+ * in microseconds. Index 0 is lower 32 bits, index 1 is upper 32 bits.
+ * @total_inactive_time_us: uint32_t array of length 2 representing time outside the given IPS mode
+ * in microseconds. Index 0 is lower 32 bits, index 1 is upper 32 bits.
+ * @histogram: Histogram of given IPS state durations - bucket definitions in dmub_ips.c
+ */
+struct ips_residency_info {
+ enum ips_residency_mode ips_mode;
+ unsigned int residency_percent;
+ unsigned int entry_counter;
+ unsigned int total_active_time_us[2];
+ unsigned int total_inactive_time_us[2];
+ unsigned int histogram[16];
+};
+
+bool dc_dmub_srv_ips_residency_cntl(const struct dc_context *ctx, uint8_t panel_inst, bool start_measurement);
+
+bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t panel_inst,
+ struct dmub_ips_residency_info *driver_info,
+ enum ips_residency_mode ips_mode);
+
+/**
+ * dc_dmub_srv_release_hw() - Notifies DMUB service that HW access is no longer required.
+ *
+ * @dc - pointer to DC object
+ */
+void dc_dmub_srv_release_hw(const struct dc *dc);
#endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index cfaa39c5dd16..db669ccb1d58 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -137,12 +137,18 @@ enum dp_link_encoding {
enum dp_test_link_rate {
DP_TEST_LINK_RATE_RBR = 0x06,
+ DP_TEST_LINK_RATE_RATE_2 = 0x08, // Rate_2 - 2.16 Gbps/Lane
+ DP_TEST_LINK_RATE_RATE_3 = 0x09, // Rate_3 - 2.43 Gbps/Lane
DP_TEST_LINK_RATE_HBR = 0x0A,
+ DP_TEST_LINK_RATE_RBR2 = 0x0C, // Rate_5 (RBR2) - 3.24 Gbps/Lane
+ DP_TEST_LINK_RATE_RATE_6 = 0x10, // Rate_6 - 4.32 Gbps/Lane
DP_TEST_LINK_RATE_HBR2 = 0x14,
+ DP_TEST_LINK_RATE_RATE_8 = 0x19, // Rate_8 - 6.75 Gbps/Lane
DP_TEST_LINK_RATE_HBR3 = 0x1E,
DP_TEST_LINK_RATE_UHBR10 = 0x01,
DP_TEST_LINK_RATE_UHBR20 = 0x02,
- DP_TEST_LINK_RATE_UHBR13_5 = 0x03,
+ DP_TEST_LINK_RATE_UHBR13_5_LEGACY = 0x03, /* For backward compatibility*/
+ DP_TEST_LINK_RATE_UHBR13_5 = 0x04,
};
struct dc_link_settings {
@@ -153,6 +159,16 @@ struct dc_link_settings {
uint8_t link_rate_set;
};
+struct dc_tunnel_settings {
+ bool should_enable_dp_tunneling;
+ bool should_use_dp_bw_allocation;
+ uint8_t cm_id;
+ uint8_t group_id;
+ uint32_t bw_granularity;
+ uint32_t estimated_bw;
+ uint32_t allocated_bw;
+};
+
union dc_dp_ffe_preset {
struct {
uint8_t level : 4;
@@ -294,6 +310,19 @@ union lane_align_status_updated {
uint8_t raw;
};
+union link_service_irq_vector_esi0 {
+ struct {
+ uint8_t DP_LINK_RX_CAP_CHANGED:1;
+ uint8_t DP_LINK_STATUS_CHANGED:1;
+ uint8_t DP_LINK_STREAM_STATUS_CHANGED:1;
+ uint8_t DP_LINK_HDMI_LINK_STATUS_CHANGED:1;
+ uint8_t DP_LINK_CONNECTED_OFF_ENTRY_REQUESTED:1;
+ uint8_t DP_LINK_TUNNELING_IRQ:1;
+ uint8_t reserved:2;
+ } bits;
+ uint8_t raw;
+};
+
union lane_adjust {
struct {
uint8_t VOLTAGE_SWING_LANE:2;
@@ -404,14 +433,6 @@ union dwnstream_port_caps_byte3_hdmi {
uint8_t raw;
};
-union hdmi_sink_encoded_link_bw_support {
- struct {
- uint8_t HDMI_SINK_ENCODED_LINK_BW_SUPPORT:3;
- uint8_t RESERVED:5;
- } bits;
- uint8_t raw;
-};
-
union hdmi_encoded_link_bw {
struct {
uint8_t FRL_MODE:1; // Bit 0
@@ -421,7 +442,28 @@ union hdmi_encoded_link_bw {
uint8_t BW_32Gbps:1;
uint8_t BW_40Gbps:1;
uint8_t BW_48Gbps:1;
- uint8_t RESERVED:1; // Bit 7
+ uint8_t FRL_LINK_TRAINING_FINISHED:1; // Bit 7
+ } bits;
+ uint8_t raw;
+};
+
+union hdmi_tx_link_status {
+ struct {
+ uint8_t HDMI_TX_LINK_ACTIVE_STATUS:1;
+ uint8_t HDMI_TX_READY_STATUS:1;
+ uint8_t RESERVED:6;
+ } bits;
+ uint8_t raw;
+};
+
+union autonomous_mode_and_frl_link_status {
+ struct {
+ uint8_t FRL_LT_IN_PROGRESS_STATUS:1;
+ uint8_t FRL_LT_LINK_CONFIG_IN_PROGRESS:3;
+ uint8_t RESERVED:1;
+ uint8_t FALLBACK_POLICY:1;
+ uint8_t FALLBACK_POLICY_VALID:1;
+ uint8_t REGULATED_AUTONOMOUS_MODE_SUPPORTED:1;
} bits;
uint8_t raw;
};
@@ -464,8 +506,10 @@ union sink_status {
uint8_t raw;
};
-/*6-byte structure corresponding to 6 registers (200h-205h)
-read during handling of HPD-IRQ*/
+/* 7-byte structure corresponding to 6 registers (200h-205h)
+ * and LINK_SERVICE_IRQ_ESI0 (2005h) for tunneling IRQ
+ * read during handling of HPD-IRQ
+ */
union hpd_irq_data {
struct {
union sink_count sink_cnt;/* 200h */
@@ -473,9 +517,10 @@ union hpd_irq_data {
union lane_status lane01_status;/* 202h */
union lane_status lane23_status;/* 203h */
union lane_align_status_updated lane_status_updated;/* 204h */
- union sink_status sink_status;
+ union sink_status sink_status;/* 205h */
+ union link_service_irq_vector_esi0 link_service_irq_esi0;/* 2005h */
} bytes;
- uint8_t raw[6];
+ uint8_t raw[7];
};
union down_stream_port_count {
@@ -908,81 +953,34 @@ union dpia_info {
uint8_t raw;
};
+/* DPCD[0xE0020] USB4_DRIVER_BW_CAPABILITY register. */
+union usb4_driver_bw_cap {
+ struct {
+ uint8_t rsvd :7;
+ uint8_t driver_bw_alloc_support :1;
+ } bits;
+ uint8_t raw;
+};
+
+/* DPCD[0xE0021] DP_IN_ADAPTER_TUNNEL_INFORMATION register. */
+union dpia_tunnel_info {
+ struct {
+ uint8_t group_id :3;
+ uint8_t rsvd :5;
+ } bits;
+ uint8_t raw;
+};
+
/* DP Tunneling over USB4 */
struct dpcd_usb4_dp_tunneling_info {
union dp_tun_cap_support dp_tun_cap;
union dpia_info dpia_info;
+ union usb4_driver_bw_cap driver_bw_cap;
+ union dpia_tunnel_info dpia_tunnel_info;
uint8_t usb4_driver_id;
uint8_t usb4_topology_id[DPCD_USB4_TOPOLOGY_ID_LEN];
};
-#ifndef DP_MAIN_LINK_CHANNEL_CODING_CAP
-#define DP_MAIN_LINK_CHANNEL_CODING_CAP 0x006
-#endif
-#ifndef DP_SINK_VIDEO_FALLBACK_FORMATS
-#define DP_SINK_VIDEO_FALLBACK_FORMATS 0x020
-#endif
-#ifndef DP_FEC_CAPABILITY_1
-#define DP_FEC_CAPABILITY_1 0x091
-#endif
-#ifndef DP_DFP_CAPABILITY_EXTENSION_SUPPORT
-#define DP_DFP_CAPABILITY_EXTENSION_SUPPORT 0x0A3
-#endif
-#ifndef DP_DSC_CONFIGURATION
-#define DP_DSC_CONFIGURATION 0x161
-#endif
-#ifndef DP_PHY_SQUARE_PATTERN
-#define DP_PHY_SQUARE_PATTERN 0x249
-#endif
-#ifndef DP_128b_132b_SUPPORTED_LINK_RATES
-#define DP_128b_132b_SUPPORTED_LINK_RATES 0x2215
-#endif
-#ifndef DP_128b_132b_TRAINING_AUX_RD_INTERVAL
-#define DP_128b_132b_TRAINING_AUX_RD_INTERVAL 0x2216
-#endif
-#ifndef DP_TEST_264BIT_CUSTOM_PATTERN_7_0
-#define DP_TEST_264BIT_CUSTOM_PATTERN_7_0 0X2230
-#endif
-#ifndef DP_TEST_264BIT_CUSTOM_PATTERN_263_256
-#define DP_TEST_264BIT_CUSTOM_PATTERN_263_256 0X2250
-#endif
-#ifndef DP_DSC_SUPPORT_AND_DECODER_COUNT
-#define DP_DSC_SUPPORT_AND_DECODER_COUNT 0x2260
-#endif
-#ifndef DP_DSC_MAX_SLICE_COUNT_AND_AGGREGATION_0
-#define DP_DSC_MAX_SLICE_COUNT_AND_AGGREGATION_0 0x2270
-#endif
-#ifndef DP_DSC_DECODER_0_MAXIMUM_SLICE_COUNT_MASK
-#define DP_DSC_DECODER_0_MAXIMUM_SLICE_COUNT_MASK (1 << 0)
-#endif
-#ifndef DP_DSC_DECODER_0_AGGREGATION_SUPPORT_MASK
-#define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_MASK (0b111 << 1)
-#endif
-#ifndef DP_DSC_DECODER_0_AGGREGATION_SUPPORT_SHIFT
-#define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_SHIFT 1
-#endif
-#ifndef DP_DSC_DECODER_COUNT_MASK
-#define DP_DSC_DECODER_COUNT_MASK (0b111 << 5)
-#endif
-#ifndef DP_DSC_DECODER_COUNT_SHIFT
-#define DP_DSC_DECODER_COUNT_SHIFT 5
-#endif
-#ifndef DP_MAIN_LINK_CHANNEL_CODING_SET
-#define DP_MAIN_LINK_CHANNEL_CODING_SET 0x108
-#endif
-#ifndef DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER
-#define DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER 0xF0006
-#endif
-#ifndef DP_PHY_REPEATER_128b_132b_RATES
-#define DP_PHY_REPEATER_128b_132b_RATES 0xF0007
-#endif
-#ifndef DP_128b_132b_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1
-#define DP_128b_132b_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 0xF0022
-#endif
-#ifndef DP_INTRA_HOP_AUX_REPLY_INDICATION
-#define DP_INTRA_HOP_AUX_REPLY_INDICATION (1 << 3)
-/* TODO - Use DRM header to replace above once available */
-#endif // DP_INTRA_HOP_AUX_REPLY_INDICATION
union dp_main_line_channel_coding_cap {
struct {
uint8_t DP_8b_10b_SUPPORTED :1;
@@ -1020,6 +1018,15 @@ union dp_128b_132b_supported_lttpr_link_rates {
uint8_t raw;
};
+union dp_alpm_lttpr_cap {
+ struct {
+ uint8_t AUX_LESS_ALPM_SUPPORTED :1;
+ uint8_t ASSR_SUPPORTED :1;
+ uint8_t RESERVED :6;
+ } bits;
+ uint8_t raw;
+};
+
union dp_sink_video_fallback_formats {
struct {
uint8_t dp_1024x768_60Hz_24bpp_support :1;
@@ -1030,6 +1037,29 @@ union dp_sink_video_fallback_formats {
uint8_t raw;
};
+union dp_receive_port0_cap {
+ struct {
+ uint8_t RESERVED :1;
+ uint8_t LOCAL_EDID_PRESENT :1;
+ uint8_t ASSOCIATED_TO_PRECEDING_PORT:1;
+ uint8_t HBLANK_EXPANSION_CAPABLE :1;
+ uint8_t BUFFER_SIZE_UNIT :1;
+ uint8_t BUFFER_SIZE_PER_PORT :1;
+ uint8_t HBLANK_REDUCTION_CAPABLE :1;
+ uint8_t RESERVED2:1;
+ uint8_t BUFFER_SIZE:8;
+ } bits;
+ uint8_t raw[2];
+};
+
+union dpcd_max_uncompressed_pixel_rate_cap {
+ struct {
+ uint16_t max_uncompressed_pixel_rate_cap :15;
+ uint16_t valid :1;
+ } bits;
+ uint8_t raw[2];
+};
+
union dp_fec_capability1 {
struct {
uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1;
@@ -1090,10 +1120,11 @@ union dp_128b_132b_training_aux_rd_interval {
union edp_alpm_caps {
struct {
- uint8_t AUX_WAKE_ALPM_CAP :1;
- uint8_t PM_STATE_2A_SUPPORT :1;
- uint8_t AUX_LESS_ALPM_CAP :1;
- uint8_t RESERVED :5;
+ uint8_t AUX_WAKE_ALPM_CAP :1;
+ uint8_t PM_STATE_2A_SUPPORT :1;
+ uint8_t AUX_LESS_ALPM_CAP :1;
+ uint8_t AUX_LESS_ALPM_ML_PHY_SLEEP_STATUS_SUPPORTED :1;
+ uint8_t RESERVED :4;
} bits;
uint8_t raw;
};
@@ -1156,7 +1187,10 @@ struct dc_lttpr_caps {
uint8_t max_ext_timeout;
union dp_main_link_channel_coding_lttpr_cap main_link_channel_coding;
union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates;
+ union dp_alpm_lttpr_cap alpm;
uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1];
+ uint8_t lttpr_ieee_oui[3]; // Always read from closest LTTPR to host
+ uint8_t lttpr_device_id[6]; // Always read from closest LTTPR to host
};
struct dc_dongle_dfp_cap_ext {
@@ -1185,6 +1219,7 @@ struct dc_dongle_caps {
uint32_t dp_hdmi_max_bpc;
uint32_t dp_hdmi_max_pixel_clk_in_khz;
uint32_t dp_hdmi_frl_max_link_bw_in_kbps;
+ uint32_t dp_hdmi_regulated_autonomous_mode_support;
struct dc_dongle_dfp_cap_ext dfp_cap_ext;
};
@@ -1219,6 +1254,7 @@ struct dpcd_caps {
int8_t branch_dev_name[6];
int8_t branch_hw_revision;
int8_t branch_fw_revision[2];
+ int8_t branch_vendor_specific_data[4];
bool allow_invalid_MSA_timing_param;
bool panel_mode_edp;
@@ -1231,6 +1267,7 @@ struct dpcd_caps {
struct dc_lttpr_caps lttpr_caps;
struct adaptive_sync_caps adaptive_sync_caps;
struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info;
+ union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap;
union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates;
union dp_main_line_channel_coding_cap channel_coding_cap;
@@ -1243,6 +1280,11 @@ struct dpcd_caps {
struct edp_psr_info psr_info;
struct replay_info pr_info;
+ uint16_t edp_oled_emission_rate;
+ union dp_receive_port0_cap receive_port0_cap;
+ /* Indicates the number of SST links supported by MSO (Multi-Stream Output) */
+ uint8_t mso_cap_sst_links_supported;
+ uint8_t dp_edp_general_cap_2;
};
union dpcd_sink_ext_caps {
@@ -1256,7 +1298,7 @@ union dpcd_sink_ext_caps {
uint8_t oled : 1;
uint8_t reserved_2 : 1;
uint8_t miniled : 1;
- uint8_t reserved : 1;
+ uint8_t emission_output : 1;
} bits;
uint8_t raw;
};
@@ -1288,8 +1330,7 @@ union replay_enable_and_configuration {
unsigned char FREESYNC_PANEL_REPLAY_MODE :1;
unsigned char TIMING_DESYNC_ERROR_VERIFICATION :1;
unsigned char STATE_TRANSITION_ERROR_DETECTION :1;
- unsigned char RESERVED0 :1;
- unsigned char RESERVED1 :4;
+ unsigned char RESERVED :5;
} bits;
unsigned char raw;
};
@@ -1309,7 +1350,9 @@ union dpcd_alpm_configuration {
struct {
unsigned char ENABLE : 1;
unsigned char IRQ_HPD_ENABLE : 1;
- unsigned char RESERVED : 6;
+ unsigned char ALPM_MODE_SEL : 1;
+ unsigned char ACDS_PERIOD_DURATION : 1;
+ unsigned char RESERVED : 4;
} bits;
unsigned char raw;
};
@@ -1402,12 +1445,30 @@ struct dp_trace {
#ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX
#define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110
#endif
+#ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP
+#define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c
+#endif
+#ifndef DP_LTTPR_ALPM_CAPABILITIES
+#define DP_LTTPR_ALPM_CAPABILITIES 0xF0009
+#endif
+#ifndef DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS
+#define DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS 0x303C
+#endif
#ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE
#define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50
#endif
#ifndef DP_TUNNELING_IRQ
#define DP_TUNNELING_IRQ (1 << 5)
#endif
+#ifndef DP_BRANCH_VENDOR_SPECIFIC_START
+#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C
+#endif
+#ifndef DP_LTTPR_IEEE_OUI
+#define DP_LTTPR_IEEE_OUI 0xF003D
+#endif
+#ifndef DP_LTTPR_DEVICE_ID
+#define DP_LTTPR_DEVICE_ID 0xF0040
+#endif
/** USB4 DPCD BW Allocation Registers Chapter 10.7 **/
#ifndef DP_TUNNELING_CAPABILITIES
#define DP_TUNNELING_CAPABILITIES 0xE000D /* 1.4a */
@@ -1433,10 +1494,38 @@ struct dp_trace {
#ifndef DP_TUNNELING_STATUS
#define DP_TUNNELING_STATUS 0xE0025 /* 1.4a */
#endif
+#ifndef DP_TUNNELING_MAX_LINK_RATE
+#define DP_TUNNELING_MAX_LINK_RATE 0xE0028 /* 1.4a */
+#endif
+#ifndef DP_TUNNELING_MAX_LANE_COUNT
+#define DP_TUNNELING_MAX_LANE_COUNT 0xE0029 /* 1.4a */
+#endif
#ifndef DPTX_BW_ALLOCATION_MODE_CONTROL
#define DPTX_BW_ALLOCATION_MODE_CONTROL 0xE0030 /* 1.4a */
#endif
#ifndef REQUESTED_BW
#define REQUESTED_BW 0xE0031 /* 1.4a */
#endif
+# ifndef DP_TUNNELING_BW_ALLOC_BITS_MASK
+# define DP_TUNNELING_BW_ALLOC_BITS_MASK (0x0F << 0)
+# endif
+# ifndef DP_TUNNELING_BW_REQUEST_FAILED
+# define DP_TUNNELING_BW_REQUEST_FAILED (1 << 0)
+# endif
+# ifndef DP_TUNNELING_BW_REQUEST_SUCCEEDED
+# define DP_TUNNELING_BW_REQUEST_SUCCEEDED (1 << 1)
+# endif
+# ifndef DP_TUNNELING_ESTIMATED_BW_CHANGED
+# define DP_TUNNELING_ESTIMATED_BW_CHANGED (1 << 2)
+# endif
+# ifndef DP_TUNNELING_BW_ALLOC_CAP_CHANGED
+# define DP_TUNNELING_BW_ALLOC_CAP_CHANGED (1 << 3)
+# endif
+# ifndef DPTX_BW_ALLOC_UNMASK_IRQ
+# define DPTX_BW_ALLOC_UNMASK_IRQ (1 << 6)
+# endif
+# ifndef DPTX_BW_ALLOC_MODE_ENABLE
+# define DPTX_BW_ALLOC_MODE_ENABLE (1 << 7)
+# endif
+
#endif /* DC_DP_TYPES_H */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
index fe3078b8789e..9d18f1c08079 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
@@ -59,6 +59,7 @@ struct dc_dsc_config_options {
uint32_t max_target_bpp_limit_override_x16;
uint32_t slice_height_granularity;
uint32_t dsc_force_odm_hslice_override;
+ bool force_dsc_when_not_needed;
};
bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
@@ -93,6 +94,11 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
const int num_slices_h,
const bool is_dp);
+void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc,
+ const struct dsc_dec_dpcd_caps *dsc_sink_caps);
+void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc,
+ const struct dc_crtc_timing *timing);
+
/* TODO - Hardware/specs limitation should be owned by dc dsc and returned to DM,
* and DM can choose to OVERRIDE the limitation on CASE BY CASE basis.
* Hardware/specs limitation should not be writable by DM.
@@ -100,7 +106,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
*/
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
- struct dc_dsc_policy *policy);
+ struct dc_dsc_policy *policy,
+ const enum dc_link_encoding_format link_encoding);
void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.c b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c
new file mode 100644
index 000000000000..fee69642fb93
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#include "dc_fused_io.h"
+
+#include "dm_helpers.h"
+#include "gpio.h"
+
+static bool op_i2c_convert(
+ union dmub_rb_cmd *cmd,
+ const struct mod_hdcp_atomic_op_i2c *op,
+ enum dmub_cmd_fused_request_type type,
+ uint32_t ddc_line,
+ bool over_aux
+)
+{
+ struct dmub_cmd_fused_request *req = &cmd->fused_io.request;
+ struct dmub_cmd_fused_request_location_i2c *loc = &req->u.i2c;
+
+ if (!op || op->size > sizeof(req->buffer))
+ return false;
+
+ req->type = type;
+ loc->is_aux = false;
+ loc->ddc_line = ddc_line;
+ loc->over_aux = over_aux;
+ loc->address = op->address;
+ loc->offset = op->offset;
+ loc->length = op->size;
+ memcpy(req->buffer, op->data, op->size);
+
+ return true;
+}
+
+static bool op_aux_convert(
+ union dmub_rb_cmd *cmd,
+ const struct mod_hdcp_atomic_op_aux *op,
+ enum dmub_cmd_fused_request_type type,
+ uint32_t ddc_line
+)
+{
+ struct dmub_cmd_fused_request *req = &cmd->fused_io.request;
+ struct dmub_cmd_fused_request_location_aux *loc = &req->u.aux;
+
+ if (!op || op->size > sizeof(req->buffer))
+ return false;
+
+ req->type = type;
+ loc->is_aux = true;
+ loc->ddc_line = ddc_line;
+ loc->address = op->address;
+ loc->length = op->size;
+ memcpy(req->buffer, op->data, op->size);
+
+ return true;
+}
+
+static bool atomic_write_poll_read(
+ struct dc_link *link,
+ union dmub_rb_cmd commands[3],
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ const uint8_t count = 3;
+ const uint32_t timeout_per_request_us = 10000;
+ const uint32_t timeout_per_aux_transaction_us = 10000;
+ uint64_t timeout_us = 0;
+
+ commands[1].fused_io.request.poll_mask_msb = poll_mask_msb;
+ commands[1].fused_io.request.timeout_us = poll_timeout_us;
+
+ for (uint8_t i = 0; i < count; i++) {
+ struct dmub_rb_cmd_fused_io *io = &commands[i].fused_io;
+
+ io->header.type = DMUB_CMD__FUSED_IO;
+ io->header.sub_type = DMUB_CMD__FUSED_IO_EXECUTE;
+ io->header.multi_cmd_pending = i != count - 1;
+ io->header.payload_bytes = sizeof(commands[i].fused_io) - sizeof(io->header);
+
+ timeout_us += timeout_per_request_us + io->request.timeout_us;
+ if (!io->request.timeout_us && io->request.u.aux.is_aux)
+ timeout_us += timeout_per_aux_transaction_us * (io->request.u.aux.length / 16);
+ }
+
+ if (!dm_helpers_execute_fused_io(link->ctx, link, commands, count, timeout_us))
+ return false;
+
+ return commands[0].fused_io.request.status == FUSED_REQUEST_STATUS_SUCCESS;
+}
+
+bool dm_atomic_write_poll_read_i2c(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_i2c *write,
+ const struct mod_hdcp_atomic_op_i2c *poll,
+ struct mod_hdcp_atomic_op_i2c *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ if (!link)
+ return false;
+
+ const bool over_aux = false;
+ const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en;
+
+ union dmub_rb_cmd commands[3] = { 0 };
+ const bool converted = op_i2c_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line, over_aux)
+ && op_i2c_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line, over_aux)
+ && op_i2c_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line, over_aux);
+
+ if (!converted)
+ return false;
+
+ const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb);
+
+ memcpy(read->data, commands[0].fused_io.request.buffer, read->size);
+ return result;
+}
+
+bool dm_atomic_write_poll_read_aux(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_aux *write,
+ const struct mod_hdcp_atomic_op_aux *poll,
+ struct mod_hdcp_atomic_op_aux *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+)
+{
+ if (!link)
+ return false;
+
+ const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en;
+ union dmub_rb_cmd commands[3] = { 0 };
+ const bool converted = op_aux_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line)
+ && op_aux_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line)
+ && op_aux_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line);
+
+ if (!converted)
+ return false;
+
+ const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb);
+
+ memcpy(read->data, commands[0].fused_io.request.buffer, read->size);
+ return result;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.h b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h
new file mode 100644
index 000000000000..c74917240985
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __DC_FUSED_IO_H__
+#define __DC_FUSED_IO_H__
+
+#include "dc.h"
+#include "mod_hdcp.h"
+
+bool dm_atomic_write_poll_read_i2c(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_i2c *write,
+ const struct mod_hdcp_atomic_op_i2c *poll,
+ struct mod_hdcp_atomic_op_i2c *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+);
+
+bool dm_atomic_write_poll_read_aux(
+ struct dc_link *link,
+ const struct mod_hdcp_atomic_op_aux *write,
+ const struct mod_hdcp_atomic_op_aux *poll,
+ struct mod_hdcp_atomic_op_aux *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+);
+
+#endif // __DC_FUSED_IO_H__
+
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 3907eeff560c..5a365bd19933 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -36,6 +36,9 @@
#include "dc_dmub_srv.h"
#include "reg_helper.h"
+#define DC_LOGGER \
+ ctx->logger
+
static inline void submit_dmub_read_modify_write(
struct dc_reg_helper_state *offload,
const struct dc_context *ctx)
@@ -47,7 +50,7 @@ static inline void submit_dmub_read_modify_write(
cmd_buf->header.payload_bytes =
sizeof(struct dmub_cmd_read_modify_write_sequence) * offload->reg_seq_count;
- dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
memset(cmd_buf, 0, sizeof(*cmd_buf));
@@ -64,7 +67,7 @@ static inline void submit_dmub_burst_write(
cmd_buf->header.payload_bytes =
sizeof(uint32_t) * offload->reg_seq_count;
- dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
memset(cmd_buf, 0, sizeof(*cmd_buf));
@@ -77,7 +80,7 @@ static inline void submit_dmub_reg_wait(
{
struct dmub_rb_cmd_reg_wait *cmd_buf = &offload->cmd_data.reg_wait;
- dm_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ dc_wake_and_execute_dmub_cmd(ctx, &offload->cmd_data, DM_DMUB_WAIT_TYPE_NO_WAIT);
memset(cmd_buf, 0, sizeof(*cmd_buf));
offload->reg_seq_count = 0;
@@ -88,11 +91,6 @@ struct dc_reg_value_masks {
uint32_t mask;
};
-struct dc_reg_sequence {
- uint32_t addr;
- struct dc_reg_value_masks value_masks;
-};
-
static inline void set_reg_field_value_masks(
struct dc_reg_value_masks *field_value_mask,
uint32_t value,
@@ -263,7 +261,6 @@ uint32_t generic_reg_set_ex(const struct dc_context *ctx,
va_end(ap);
-
/* mmio write directly */
reg_val = (reg_val & ~field_value_mask.mask) | field_value_mask.value;
@@ -685,13 +682,19 @@ void reg_sequence_wait_done(const struct dc_context *ctx)
if (offload &&
ctx->dc->debug.dmub_offload_enabled &&
!ctx->dc->debug.dmcub_emulation) {
- dc_dmub_srv_wait_idle(ctx->dmub_srv);
+ dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL);
}
}
char *dce_version_to_string(const int version)
{
switch (version) {
+ case DCE_VERSION_6_0:
+ return "DCE 6.0";
+ case DCE_VERSION_6_1:
+ return "DCE 6.1";
+ case DCE_VERSION_6_4:
+ return "DCE 6.4";
case DCE_VERSION_8_0:
return "DCE 8.0";
case DCE_VERSION_8_1:
@@ -729,7 +732,7 @@ char *dce_version_to_string(const int version)
case DCN_VERSION_3_03:
return "DCN 3.0.3";
case DCN_VERSION_3_1:
- return "DCN 3.1";
+ return "DCN 3.1.2";
case DCN_VERSION_3_14:
return "DCN 3.1.4";
case DCN_VERSION_3_15:
@@ -740,7 +743,20 @@ char *dce_version_to_string(const int version)
return "DCN 3.2";
case DCN_VERSION_3_21:
return "DCN 3.2.1";
+ case DCN_VERSION_3_5:
+ return "DCN 3.5";
+ case DCN_VERSION_3_51:
+ return "DCN 3.5.1";
+ case DCN_VERSION_3_6:
+ return "DCN 3.6";
+ case DCN_VERSION_4_01:
+ return "DCN 4.0.1";
default:
return "Unknown";
}
}
+
+bool dc_supports_vrr(const enum dce_version v)
+{
+ return v >= DCE_VERSION_8_0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 100d62162b71..667852517246 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -60,6 +60,7 @@ union large_integer {
enum dc_plane_addr_type {
PLN_ADDR_TYPE_GRAPHICS = 0,
+ PLN_ADDR_TYPE_3DLUT,
PLN_ADDR_TYPE_GRPH_STEREO,
PLN_ADDR_TYPE_VIDEO_PROGRESSIVE,
PLN_ADDR_TYPE_RGBEA
@@ -67,7 +68,7 @@ enum dc_plane_addr_type {
struct dc_plane_address {
enum dc_plane_addr_type type;
- bool tmz_surface;
+ uint8_t tmz_surface;
union {
struct{
PHYSICAL_ADDRESS_LOC addr;
@@ -76,6 +77,10 @@ struct dc_plane_address {
union large_integer dcc_const_color;
} grph;
+ struct {
+ PHYSICAL_ADDRESS_LOC addr;
+ } lut3d;
+
/*stereo*/
struct {
PHYSICAL_ADDRESS_LOC left_addr;
@@ -93,7 +98,6 @@ struct dc_plane_address {
PHYSICAL_ADDRESS_LOC right_alpha_addr;
PHYSICAL_ADDRESS_LOC right_alpha_meta_addr;
union large_integer right_alpha_dcc_const_color;
-
} grph_stereo;
/*video progressive*/
@@ -244,7 +248,7 @@ enum pixel_format {
#define DC_MAX_DIRTY_RECTS 3
struct dc_flip_addrs {
struct dc_plane_address address;
- unsigned int flip_timestamp_in_us;
+ unsigned long long flip_timestamp_in_us;
bool flip_immediate;
/* TODO: add flip duration for FreeSync */
bool triplebuffer_flips;
@@ -263,6 +267,9 @@ enum tripleBuffer_enable {
DC_TRIPLEBUFFER_DISABLE = 0x0,
DC_TRIPLEBUFFER_ENABLE = 0x1,
};
+enum tile_split_values_new {
+ DC_SURF_TILE_SPLIT_1KB = 0x4,
+};
/* TODO: These values come from hardware spec. We need to readdress this
* if they ever change.
@@ -320,86 +327,115 @@ enum swizzle_mode_values {
DC_SW_UNKNOWN = DC_SW_MAX
};
-union dc_tiling_info {
-
- struct {
- /* Specifies the number of memory banks for tiling
- * purposes.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 2,4,8,16
- */
- unsigned int num_banks;
- /* Specifies the number of tiles in the x direction
- * to be incorporated into the same bank.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 1,2,4,8
- */
- unsigned int bank_width;
- unsigned int bank_width_c;
- /* Specifies the number of tiles in the y direction to
- * be incorporated into the same bank.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 1,2,4,8
- */
- unsigned int bank_height;
- unsigned int bank_height_c;
- /* Specifies the macro tile aspect ratio. Only applies
- * to 2D and 3D tiling modes.
- */
- unsigned int tile_aspect;
- unsigned int tile_aspect_c;
- /* Specifies the number of bytes that will be stored
- * contiguously for each tile.
- * If the tile data requires more storage than this
- * amount, it is split into multiple slices.
- * This field must not be larger than
- * GB_ADDR_CONFIG.DRAM_ROW_SIZE.
- * Only applies to 2D and 3D tiling modes.
- * For color render targets, TILE_SPLIT >= 256B.
- */
- enum tile_split_values tile_split;
- enum tile_split_values tile_split_c;
- /* Specifies the addressing within a tile.
- * 0x0 - DISPLAY_MICRO_TILING
- * 0x1 - THIN_MICRO_TILING
- * 0x2 - DEPTH_MICRO_TILING
- * 0x3 - ROTATED_MICRO_TILING
- */
- enum tile_mode_values tile_mode;
- enum tile_mode_values tile_mode_c;
- /* Specifies the number of pipes and how they are
- * interleaved in the surface.
- * Refer to memory addressing document for complete
- * details and constraints.
- */
- unsigned int pipe_config;
- /* Specifies the tiling mode of the surface.
- * THIN tiles use an 8x8x1 tile size.
- * THICK tiles use an 8x8x4 tile size.
- * 2D tiling modes rotate banks for successive Z slices
- * 3D tiling modes rotate pipes and banks for Z slices
- * Refer to memory addressing document for complete
- * details and constraints.
- */
- enum array_mode_values array_mode;
- } gfx8;
-
- struct {
- enum swizzle_mode_values swizzle;
- unsigned int num_pipes;
- unsigned int max_compressed_frags;
- unsigned int pipe_interleave;
-
- unsigned int num_banks;
- unsigned int num_shader_engines;
- unsigned int num_rb_per_se;
- bool shaderEnable;
+// Definition of swizzle modes with addr3 ASICs
+enum swizzle_mode_addr3_values {
+ DC_ADDR3_SW_LINEAR = 0,
+ DC_ADDR3_SW_256B_2D = 1,
+ DC_ADDR3_SW_4KB_2D = 2,
+ DC_ADDR3_SW_64KB_2D = 3,
+ DC_ADDR3_SW_256KB_2D = 4,
+ DC_ADDR3_SW_4KB_3D = 5,
+ DC_ADDR3_SW_64KB_3D = 6,
+ DC_ADDR3_SW_256KB_3D = 7,
+ DC_ADDR3_SW_MAX = 8,
+ DC_ADDR3_SW_UNKNOWN = DC_ADDR3_SW_MAX
+};
+
+enum dc_gfxversion {
+ DcGfxVersion7 = 0,
+ DcGfxVersion8,
+ DcGfxVersion9,
+ DcGfxVersion10,
+ DcGfxVersion11,
+ DcGfxAddr3,
+ DcGfxVersionUnknown
+};
+
+ struct dc_tiling_info {
+ unsigned int gfxversion; // Specifies which part of the union to use. Must use DalGfxVersion enum
+ union {
+ struct {
+ /* Specifies the number of memory banks for tiling
+ * purposes.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 2,4,8,16
+ */
+ unsigned int num_banks;
+ /* Specifies the number of tiles in the x direction
+ * to be incorporated into the same bank.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 1,2,4,8
+ */
+ unsigned int bank_width;
+ unsigned int bank_width_c;
+ /* Specifies the number of tiles in the y direction to
+ * be incorporated into the same bank.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 1,2,4,8
+ */
+ unsigned int bank_height;
+ unsigned int bank_height_c;
+ /* Specifies the macro tile aspect ratio. Only applies
+ * to 2D and 3D tiling modes.
+ */
+ unsigned int tile_aspect;
+ unsigned int tile_aspect_c;
+ /* Specifies the number of bytes that will be stored
+ * contiguously for each tile.
+ * If the tile data requires more storage than this
+ * amount, it is split into multiple slices.
+ * This field must not be larger than
+ * GB_ADDR_CONFIG.DRAM_ROW_SIZE.
+ * Only applies to 2D and 3D tiling modes.
+ * For color render targets, TILE_SPLIT >= 256B.
+ */
+ enum tile_split_values tile_split;
+ enum tile_split_values tile_split_c;
+ /* Specifies the addressing within a tile.
+ * 0x0 - DISPLAY_MICRO_TILING
+ * 0x1 - THIN_MICRO_TILING
+ * 0x2 - DEPTH_MICRO_TILING
+ * 0x3 - ROTATED_MICRO_TILING
+ */
+ enum tile_mode_values tile_mode;
+ enum tile_mode_values tile_mode_c;
+ /* Specifies the number of pipes and how they are
+ * interleaved in the surface.
+ * Refer to memory addressing document for complete
+ * details and constraints.
+ */
+ unsigned int pipe_config;
+ /* Specifies the tiling mode of the surface.
+ * THIN tiles use an 8x8x1 tile size.
+ * THICK tiles use an 8x8x4 tile size.
+ * 2D tiling modes rotate banks for successive Z slices
+ * 3D tiling modes rotate pipes and banks for Z slices
+ * Refer to memory addressing document for complete
+ * details and constraints.
+ */
+ enum array_mode_values array_mode;
+ } gfx8;
- bool meta_linear;
- bool rb_aligned;
- bool pipe_aligned;
- unsigned int num_pkrs;
- } gfx9;
+ struct {
+ enum swizzle_mode_values swizzle;
+ unsigned int num_pipes;
+ unsigned int max_compressed_frags;
+ unsigned int pipe_interleave;
+
+ unsigned int num_banks;
+ unsigned int num_shader_engines;
+ unsigned int num_rb_per_se;
+ bool shaderEnable;
+
+ bool meta_linear;
+ bool rb_aligned;
+ bool pipe_aligned;
+ unsigned int num_pkrs;
+ } gfx9;/*gfx9, gfx10 and above*/
+ struct {
+ enum swizzle_mode_addr3_values swizzle;
+ } gfx_addr3;/*gfx with addr3 and above*/
+ };
};
/* Rotation angle */
@@ -461,10 +497,12 @@ struct dc_cursor_mi_param {
unsigned int pixel_clk_khz;
unsigned int ref_clk_khz;
struct rect viewport;
+ struct rect recout;
struct fixed31_32 h_scale_ratio;
struct fixed31_32 v_scale_ratio;
enum dc_rotation_angle rotation;
bool mirror;
+ struct dc_stream_state *stream;
};
/* IPP related types */
@@ -615,7 +653,8 @@ enum dc_color_space {
COLOR_SPACE_YCBCR709_LIMITED,
COLOR_SPACE_2020_RGB_FULLRANGE,
COLOR_SPACE_2020_RGB_LIMITEDRANGE,
- COLOR_SPACE_2020_YCBCR,
+ COLOR_SPACE_2020_YCBCR_LIMITED,
+ COLOR_SPACE_2020_YCBCR_FULL,
COLOR_SPACE_ADOBERGB,
COLOR_SPACE_DCIP3,
COLOR_SPACE_DISPLAYNATIVE,
@@ -623,6 +662,7 @@ enum dc_color_space {
COLOR_SPACE_APPCTRL,
COLOR_SPACE_CUSTOMPOINTS,
COLOR_SPACE_YCBCR709_BLACK,
+ COLOR_SPACE_2020_YCBCR = COLOR_SPACE_2020_YCBCR_LIMITED,
};
enum dc_dither_option {
@@ -769,9 +809,6 @@ struct dc_crtc_timing_flags {
uint32_t LTE_340MCSC_SCRAMBLE:1;
uint32_t DSC : 1; /* Use DSC with this timing */
-#ifndef TRIM_FSFT
- uint32_t FAST_TRANSPORT: 1;
-#endif
uint32_t VBLANK_SYNCHRONIZABLE: 1;
};
@@ -829,9 +866,7 @@ struct dc_dsc_config {
uint32_t version_minor; /* DSC minor version. Full version is formed as 1.version_minor. */
bool ycbcr422_simple; /* Tell DSC engine to convert YCbCr 4:2:2 to 'YCbCr 4:2:2 simple'. */
int32_t rc_buffer_size; /* DSC RC buffer block size in bytes */
-#if defined(CONFIG_DRM_AMD_DC_FP)
bool is_frl; /* indicate if DSC is applied based on HDMI FRL sink's capability */
-#endif
bool is_dp; /* indicate if DSC is applied based on DP's capability */
uint32_t mst_pbn; /* pbn of display on dsc mst hub */
const struct dc_dsc_rc_params_override *rc_params_ovrd; /* DM owned memory. If not NULL, apply custom dsc rc params */
@@ -939,24 +974,25 @@ struct dc_crtc_timing {
uint32_t pix_clk_100hz;
uint32_t min_refresh_in_uhz;
+ uint32_t max_refresh_in_uhz;
uint32_t vic;
uint32_t hdmi_vic;
uint32_t rid;
uint32_t fr_index;
+ uint32_t frl_uncompressed_video_bandwidth_in_kbps;
enum dc_timing_3d_format timing_3d_format;
enum dc_color_depth display_color_depth;
enum dc_pixel_encoding pixel_encoding;
enum dc_aspect_ratio aspect_ratio;
enum scanning_type scan_type;
-#ifndef TRIM_FSFT
- uint32_t fast_transport_output_rate_100hz;
-#endif
-
struct dc_crtc_timing_flags flags;
uint32_t dsc_fixed_bits_per_pixel_x16; /* DSC target bitrate in 1/16 of bpp (e.g. 128 -> 8bpp) */
struct dc_dsc_config dsc_cfg;
+
+ /* The number of pixels that HBlank has been expanded by from the original EDID timing. */
+ uint32_t expanded_hblank;
};
enum trigger_delay {
@@ -981,6 +1017,8 @@ struct dc_crtc_timing_adjust {
uint32_t v_total_max;
uint32_t v_total_mid;
uint32_t v_total_mid_frame_num;
+ uint32_t allow_otg_v_count_halt;
+ uint8_t timing_adjust_pending;
};
@@ -1057,6 +1095,25 @@ enum cm_gamut_coef_format {
CM_GAMUT_REMAP_COEF_FORMAT_S3_12 = 1
};
+enum mpcc_gamut_remap_mode_select {
+ MPCC_GAMUT_REMAP_MODE_SELECT_0 = 0,
+ MPCC_GAMUT_REMAP_MODE_SELECT_1,
+ MPCC_GAMUT_REMAP_MODE_SELECT_2
+};
+
+enum mpcc_gamut_remap_id {
+ MPCC_OGAM_GAMUT_REMAP,
+ MPCC_MCM_FIRST_GAMUT_REMAP,
+ MPCC_MCM_SECOND_GAMUT_REMAP,
+ MPCC_RMCM_GAMUT_REMAP,
+};
+
+enum cursor_matrix_mode {
+ CUR_MATRIX_BYPASS = 0,
+ CUR_MATRIX_SET_A,
+ CUR_MATRIX_SET_B
+};
+
struct mcif_warmup_params {
union large_integer start_address;
unsigned int address_increment;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h
new file mode 100644
index 000000000000..14feb843e694
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_H_
+#define _DC_PLANE_H_
+
+#include "dc_hw_types.h"
+
+union dc_plane_status_update_flags {
+ struct {
+ uint32_t address : 1;
+ } bits;
+ uint32_t raw;
+};
+
+struct dc_plane_state *dc_create_plane_state(const struct dc *dc);
+const struct dc_plane_status *dc_plane_get_status(
+ const struct dc_plane_state *plane_state,
+ union dc_plane_status_update_flags flags);
+void dc_plane_state_retain(struct dc_plane_state *plane_state);
+void dc_plane_state_release(struct dc_plane_state *plane_state);
+
+void dc_plane_force_dcc_and_tiling_disable(struct dc_plane_state *plane_state,
+ bool clear_tiling);
+
+
+void dc_plane_copy_config(struct dc_plane_state *dst, const struct dc_plane_state *src);
+
+#endif /* _DC_PLANE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
new file mode 100644
index 000000000000..ab13335f1d01
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane_priv.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_PLANE_PRIV_H_
+#define _DC_PLANE_PRIV_H_
+
+#include "dc_plane.h"
+
+void dc_plane_construct(struct dc_context *ctx, struct dc_plane_state *plane_state);
+void dc_plane_destruct(struct dc_plane_state *plane_state);
+uint8_t dc_plane_get_pipe_mask(struct dc_state *dc_state, const struct dc_plane_state *plane_state);
+
+#endif /* _DC_PLANE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
new file mode 100644
index 000000000000..55704d4457ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dc_spl_translate.h"
+#include "dcn20/dcn20_dpp.h"
+#include "dcn32/dcn32_dpp.h"
+#include "dcn401/dcn401_dpp.h"
+
+static struct spl_callbacks dcn2_spl_callbacks = {
+ .spl_calc_lb_num_partitions = dscl2_spl_calc_lb_num_partitions,
+};
+static struct spl_callbacks dcn32_spl_callbacks = {
+ .spl_calc_lb_num_partitions = dscl32_spl_calc_lb_num_partitions,
+};
+static struct spl_callbacks dcn401_spl_callbacks = {
+ .spl_calc_lb_num_partitions = dscl401_spl_calc_lb_num_partitions,
+};
+static void populate_splrect_from_rect(struct spl_rect *spl_rect, const struct rect *rect)
+{
+ spl_rect->x = rect->x;
+ spl_rect->y = rect->y;
+ spl_rect->width = rect->width;
+ spl_rect->height = rect->height;
+}
+static void populate_rect_from_splrect(struct rect *rect, const struct spl_rect *spl_rect)
+{
+ rect->x = spl_rect->x;
+ rect->y = spl_rect->y;
+ rect->width = spl_rect->width;
+ rect->height = spl_rect->height;
+}
+static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality,
+ const struct scaling_taps *scaling_quality)
+{
+ spl_scaling_quality->h_taps_c = scaling_quality->h_taps_c;
+ spl_scaling_quality->h_taps = scaling_quality->h_taps;
+ spl_scaling_quality->v_taps_c = scaling_quality->v_taps_c;
+ spl_scaling_quality->v_taps = scaling_quality->v_taps;
+ spl_scaling_quality->integer_scaling = scaling_quality->integer_scaling;
+}
+static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality,
+ const struct spl_taps *spl_scaling_quality)
+{
+ scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1;
+ scaling_quality->h_taps = spl_scaling_quality->h_taps + 1;
+ scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1;
+ scaling_quality->v_taps = spl_scaling_quality->v_taps + 1;
+}
+static void populate_ratios_from_splratios(struct scaling_ratios *ratios,
+ const struct ratio *spl_ratios)
+{
+ ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19);
+ ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19);
+ ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19);
+ ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19);
+}
+static void populate_inits_from_splinits(struct scl_inits *inits,
+ const struct init *spl_inits)
+{
+ inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19);
+ inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19);
+ inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19);
+ inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19);
+}
+static void populate_splformat_from_format(enum spl_pixel_format *spl_pixel_format, const enum pixel_format pixel_format)
+{
+ if (pixel_format < PIXEL_FORMAT_INVALID)
+ *spl_pixel_format = (enum spl_pixel_format)pixel_format;
+ else
+ *spl_pixel_format = SPL_PIXEL_FORMAT_INVALID;
+}
+/// @brief Translate SPL input parameters from pipe context
+/// @param pipe_ctx
+/// @param spl_in
+void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_in *spl_in)
+{
+ const struct dc_plane_state *plane_state = pipe_ctx->plane_state;
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ struct rect odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx);
+
+ // Assign the function to calculate the number of partitions in the line buffer
+ // This is used to determine the vtap support
+ switch (plane_state->ctx->dce_version) {
+ case DCN_VERSION_2_0:
+ spl_in->callbacks = dcn2_spl_callbacks;
+ break;
+ case DCN_VERSION_3_2:
+ spl_in->callbacks = dcn32_spl_callbacks;
+ break;
+ case DCN_VERSION_4_01:
+ spl_in->callbacks = dcn401_spl_callbacks;
+ break;
+ default:
+ spl_in->callbacks = dcn2_spl_callbacks;
+ }
+ // Make format field from spl_in point to plane_res scl_data format
+ populate_splformat_from_format(&spl_in->basic_in.format, pipe_ctx->plane_res.scl_data.format);
+ // Make view_format from basic_out point to view_format from stream
+ spl_in->basic_out.view_format = (enum spl_view_3d)stream->view_format;
+ // Populate spl input basic input clip rect from plane state clip rect
+ populate_splrect_from_rect(&spl_in->basic_in.clip_rect, &plane_state->clip_rect);
+ // Populate spl input basic out src rect from stream src rect
+ populate_splrect_from_rect(&spl_in->basic_out.src_rect, &stream->src);
+ // Populate spl input basic out dst rect from stream dst rect
+ populate_splrect_from_rect(&spl_in->basic_out.dst_rect, &stream->dst);
+ // Make spl input basic input info rotation field point to plane state rotation
+ spl_in->basic_in.rotation = (enum spl_rotation_angle)plane_state->rotation;
+ // Populate spl input basic input src rect from plane state src rect
+ populate_splrect_from_rect(&spl_in->basic_in.src_rect, &plane_state->src_rect);
+ // Populate spl input basic input dst rect from plane state dst rect
+ populate_splrect_from_rect(&spl_in->basic_in.dst_rect, &plane_state->dst_rect);
+ // Make spl input basic input info horiz mirror field point to plane state horz mirror
+ spl_in->basic_in.horizontal_mirror = plane_state->horizontal_mirror;
+
+ // Calculate horizontal splits and split index
+ spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned = false;
+ spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices =
+ resource_get_mpc_slice_count(pipe_ctx);
+
+ if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE)
+ spl_in->basic_in.mpc_h_slice_index = 0;
+ else
+ spl_in->basic_in.mpc_h_slice_index = resource_get_mpc_slice_index(pipe_ctx);
+
+ populate_splrect_from_rect(&spl_in->basic_out.odm_slice_rect, &odm_slice_src);
+ spl_in->basic_out.odm_combine_factor = 0;
+ spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx);
+ // Make spl input basic out info output_size width point to stream h active
+ spl_in->basic_out.output_size.width =
+ stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
+ // Make spl input basic out info output_size height point to v active
+ spl_in->basic_out.output_size.height =
+ stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
+ spl_in->basic_out.max_downscale_src_width =
+ pipe_ctx->stream->ctx->dc->debug.max_downscale_src_width;
+ spl_in->basic_out.always_scale = pipe_ctx->stream->ctx->dc->debug.always_scale;
+ // Make spl input basic output info alpha_en field point to plane res scl_data lb_params alpha_en
+ spl_in->basic_out.alpha_en = pipe_ctx->plane_res.scl_data.lb_params.alpha_en;
+ spl_in->basic_out.use_two_pixels_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing);
+ // Make spl input basic input info scaling quality field point to plane state scaling_quality
+ populate_spltaps_from_taps(&spl_in->scaling_quality, &plane_state->scaling_quality);
+ // Translate edge adaptive scaler preference
+ spl_in->prefer_easf = pipe_ctx->stream->ctx->dc->config.prefer_easf;
+ spl_in->disable_easf = false;
+ if (pipe_ctx->stream->ctx->dc->debug.force_easf == 1)
+ spl_in->prefer_easf = false;
+ else if (pipe_ctx->stream->ctx->dc->debug.force_easf == 2)
+ spl_in->disable_easf = true;
+ /* Translate adaptive sharpening preference */
+ unsigned int sharpness_setting = pipe_ctx->stream->ctx->dc->debug.force_sharpness;
+ unsigned int force_sharpness_level = pipe_ctx->stream->ctx->dc->debug.force_sharpness_level;
+ if (sharpness_setting == SHARPNESS_HW_OFF)
+ spl_in->adaptive_sharpness.enable = false;
+ else if (sharpness_setting == SHARPNESS_ZERO) {
+ spl_in->adaptive_sharpness.enable = true;
+ spl_in->adaptive_sharpness.sharpness_level = 0;
+ } else if (sharpness_setting == SHARPNESS_CUSTOM) {
+ /* SAT: read harpness_range from dc_plane_state */
+ spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_min = plane_state->sharpness_range.sdr_rgb_min;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_max = plane_state->sharpness_range.sdr_rgb_max;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_mid = plane_state->sharpness_range.sdr_rgb_mid;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_min = plane_state->sharpness_range.sdr_yuv_min;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_max = plane_state->sharpness_range.sdr_yuv_max;
+ spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_mid = plane_state->sharpness_range.sdr_yuv_mid;
+ spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_min = plane_state->sharpness_range.hdr_rgb_min;
+ spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_max = plane_state->sharpness_range.hdr_rgb_max;
+ spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_mid = plane_state->sharpness_range.hdr_rgb_mid;
+
+ if (force_sharpness_level > 0) {
+ if (force_sharpness_level > 10)
+ force_sharpness_level = 10;
+ spl_in->adaptive_sharpness.enable = true;
+ spl_in->adaptive_sharpness.sharpness_level = force_sharpness_level;
+ } else if (!plane_state->adaptive_sharpness_en) {
+ spl_in->adaptive_sharpness.enable = false;
+ spl_in->adaptive_sharpness.sharpness_level = 0;
+ } else {
+ spl_in->adaptive_sharpness.enable = true;
+ spl_in->adaptive_sharpness.sharpness_level = plane_state->sharpness_level;
+ }
+ }
+ // Translate linear light scaling preference
+ if (pipe_ctx->stream->ctx->dc->debug.force_lls > 0)
+ spl_in->lls_pref = pipe_ctx->stream->ctx->dc->debug.force_lls;
+ else
+ spl_in->lls_pref = plane_state->linear_light_scaling;
+ /* Translate chroma subsampling offset ( cositing ) */
+ if (pipe_ctx->stream->ctx->dc->debug.force_cositing)
+ spl_in->basic_in.cositing = pipe_ctx->stream->ctx->dc->debug.force_cositing - 1;
+ else
+ spl_in->basic_in.cositing = plane_state->cositing;
+ /* Translate transfer function */
+ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type;
+ spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf;
+
+ spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active;
+ spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active;
+
+ spl_in->sharpen_policy = (enum sharpen_policy)plane_state->adaptive_sharpness_policy;
+ spl_in->debug.scale_to_sharpness_policy =
+ (enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy;
+
+ /* Check if it is stream is in fullscreen and if its HDR.
+ * Use this to determine sharpness levels
+ */
+ spl_in->is_fullscreen = pipe_ctx->stream->sharpening_required;
+ spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream);
+ spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits;
+}
+
+/// @brief Translate SPL output parameters to pipe context
+/// @param pipe_ctx
+/// @param spl_out
+void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out)
+{
+ // Make scaler data recout point to spl output field recout
+ populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout);
+ // Make scaler data ratios point to spl output field ratios
+ populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios);
+ // Make scaler data viewport point to spl output field viewport
+ populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport);
+ // Make scaler data viewport_c point to spl output field viewport_c
+ populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c);
+ // Make scaler data taps point to spl output field scaling taps
+ populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps);
+ // Make scaler data init point to spl output field init
+ populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h
new file mode 100644
index 000000000000..eaa5c5373b28
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_SPL_TRANSLATE_H__
+#define __DC_SPL_TRANSLATE_H__
+#include "dc.h"
+#include "resource.h"
+#include "dm_helpers.h"
+
+/* Map SPL input parameters to pipe context
+ * @pipe_ctx: pipe context
+ * @spl_in: spl input structure
+ */
+void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_in *spl_in);
+
+/* Map SPL output parameters to pipe context
+ * @pipe_ctx: pipe context
+ * @spl_out: spl output structure
+ */
+void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out);
+
+#endif /* __DC_SPL_TRANSLATE_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h
new file mode 100644
index 000000000000..db1e63a7d460
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_H_
+#define _DC_STATE_H_
+
+#include "inc/core_status.h"
+
+struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *params);
+void dc_state_copy(struct dc_state *dst_state, struct dc_state *src_state);
+struct dc_state *dc_state_create_copy(struct dc_state *src_state);
+void dc_state_copy_current(struct dc *dc, struct dc_state *dst_state);
+struct dc_state *dc_state_create_current_copy(struct dc *dc);
+void dc_state_construct(struct dc *dc, struct dc_state *state);
+void dc_state_destruct(struct dc_state *state);
+void dc_state_retain(struct dc_state *state);
+void dc_state_release(struct dc_state *state);
+
+enum dc_status dc_state_add_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream);
+
+enum dc_status dc_state_remove_stream(
+ const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream);
+
+bool dc_state_add_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state);
+
+bool dc_state_remove_plane(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *state);
+
+bool dc_state_rem_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_state *state);
+
+bool dc_state_add_all_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state * const *plane_states,
+ int plane_count,
+ struct dc_state *state);
+
+struct dc_stream_status *dc_state_get_stream_status(
+ struct dc_state *state,
+ const struct dc_stream_state *stream);
+#endif /* _DC_STATE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
new file mode 100644
index 000000000000..1d9bae56ff6a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STATE_PRIV_H_
+#define _DC_STATE_PRIV_H_
+
+#include "dc_state.h"
+#include "dc_stream.h"
+
+struct dc_stream_state *dc_state_get_stream_from_id(const struct dc_state *state, unsigned int id);
+
+/* Get the type of the provided resource (none, phantom, main) based on the provided
+ * context. If the context is unavailable, determine only if phantom or not.
+ */
+enum mall_stream_type dc_state_get_pipe_subvp_type(const struct dc_state *state,
+ const struct pipe_ctx *pipe_ctx);
+enum mall_stream_type dc_state_get_stream_subvp_type(const struct dc_state *state,
+ const struct dc_stream_state *stream);
+
+/* Gets the phantom stream if main is provided, gets the main if phantom is provided.*/
+struct dc_stream_state *dc_state_get_paired_subvp_stream(const struct dc_state *state,
+ const struct dc_stream_state *stream);
+
+/* allocate's phantom stream or plane and returns pointer to the object */
+struct dc_stream_state *dc_state_create_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *main_stream);
+struct dc_plane_state *dc_state_create_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *main_plane);
+
+/* deallocate's phantom stream or plane */
+void dc_state_release_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream);
+void dc_state_release_phantom_plane(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *phantom_plane);
+
+/* add/remove phantom stream to context and generate subvp meta data */
+enum dc_status dc_state_add_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream,
+ struct dc_stream_state *main_stream);
+enum dc_status dc_state_remove_phantom_stream(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream);
+
+bool dc_state_add_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state);
+
+bool dc_state_remove_phantom_plane(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *phantom_plane,
+ struct dc_state *state);
+
+bool dc_state_rem_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_state *state,
+ bool should_release_planes);
+
+bool dc_state_add_all_phantom_planes_for_stream(
+ const struct dc *dc,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state * const *phantom_planes,
+ int plane_count,
+ struct dc_state *state);
+
+bool dc_state_remove_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state);
+
+void dc_state_release_phantom_streams_and_planes(
+ const struct dc *dc,
+ struct dc_state *state);
+
+bool dc_state_is_fams2_in_use(
+ const struct dc *dc,
+ const struct dc_state *state);
+
+
+void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit);
+
+bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream,
+ struct dc_state *state);
+
+void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state,
+ bool limit);
+
+bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state);
+
+bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream,
+ struct dc_state *state);
+
+bool dc_state_is_subvp_in_use(struct dc_state *state);
+
+#endif /* _DC_STATE_PRIV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 3697ea1d14c1..76cf9fdedab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -38,6 +38,16 @@ struct timing_sync_info {
bool master;
};
+struct mall_stream_config {
+ /* MALL stream config to indicate if the stream is phantom or not.
+ * We will use a phantom stream to indicate that the pipe is phantom.
+ */
+ enum mall_stream_type type;
+ struct dc_stream_state *paired_stream; // master / slave stream
+ bool subvp_limit_cursor_size; /* stream has/is using subvp limiting hw cursor support */
+ bool cursor_size_limit_subvp; /* stream is using hw cursor config preventing subvp */
+};
+
struct dc_stream_status {
int primary_otg_inst;
int stream_enc_inst;
@@ -48,8 +58,10 @@ struct dc_stream_status {
int plane_count;
int audio_inst;
struct timing_sync_info timing_sync_info;
- struct dc_plane_state *plane_states[MAX_SURFACE_NUM];
+ struct dc_plane_state *plane_states[MAX_SURFACES];
bool is_abm_supported;
+ struct mall_stream_config mall_stream_config;
+ bool fpo_in_use;
};
enum hubp_dmdata_mode {
@@ -132,6 +144,8 @@ union stream_update_flags {
uint32_t mst_bw : 1;
uint32_t crtc_timing_adjust : 1;
uint32_t fams_changed : 1;
+ uint32_t scaler_sharpener : 1;
+ uint32_t sharpening_required : 1;
} bits;
uint32_t raw;
@@ -147,33 +161,26 @@ struct test_pattern {
#define SUBVP_DRR_MARGIN_US 100 // 100us for DRR margin (SubVP + DRR)
-enum mall_stream_type {
- SUBVP_NONE, // subvp not in use
- SUBVP_MAIN, // subvp in use, this stream is main stream
- SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
-};
-
-struct mall_stream_config {
- /* MALL stream config to indicate if the stream is phantom or not.
- * We will use a phantom stream to indicate that the pipe is phantom.
+struct dc_stream_debug_options {
+ char force_odm_combine_segments;
+ /*
+ * When force_odm_combine_segments is non zero, allow dc to
+ * temporarily transition to ODM bypass when minimal transition state
+ * is required to prevent visual glitches showing on the screen
*/
- enum mall_stream_type type;
- struct dc_stream_state *paired_stream; // master / slave stream
+ char allow_transition_for_forced_odm;
};
-/* Temp struct used to save and restore MALL config
- * during validation.
- *
- * TODO: Move MALL config into dc_state instead of stream struct
- * to avoid needing to save/restore.
- */
-struct mall_temp_config {
- struct mall_stream_config mall_stream_config[MAX_PIPES];
- bool is_phantom_plane[MAX_PIPES];
-};
+#define LUMINANCE_DATA_TABLE_SIZE 10
-struct dc_stream_debug_options {
- char force_odm_combine_segments;
+struct luminance_data {
+ bool is_valid;
+ int refresh_rate_hz[LUMINANCE_DATA_TABLE_SIZE];
+ int luminance_millinits[LUMINANCE_DATA_TABLE_SIZE];
+ int flicker_criteria_milli_nits_GAMING;
+ int flicker_criteria_milli_nits_STATIC;
+ int nominal_refresh_rate;
+ int dm_max_decrease_from_nominal;
};
struct dc_stream_state {
@@ -188,7 +195,6 @@ struct dc_stream_state {
struct link_encoder *link_enc;
struct dc_stream_debug_options debug;
struct dc_panel_patch sink_patches;
- union display_content_support content_support;
struct dc_crtc_timing timing;
struct dc_crtc_timing_adjust adjust;
struct dc_info_packet vrr_infopacket;
@@ -197,6 +203,7 @@ struct dc_stream_state {
struct dc_info_packet hfvsif_infopacket;
struct dc_info_packet vtem_infopacket;
struct dc_info_packet adaptive_sync_infopacket;
+ struct dc_info_packet avi_infopacket;
uint8_t dsc_packed_pps[128];
struct rect src; /* composition area */
struct rect dst; /* stream addressable area */
@@ -207,11 +214,12 @@ struct dc_stream_state {
PHYSICAL_ADDRESS_LOC dmdata_address;
bool use_dynamic_meta;
- struct dc_transfer_func *out_transfer_func;
+ struct dc_transfer_func out_transfer_func;
struct colorspace_transform gamut_remap_matrix;
struct dc_csc_transform csc_color_matrix;
enum dc_color_space output_color_space;
+ enum display_content_type content_type;
enum dc_dither_option dither_option;
enum view_3d_format view_format;
@@ -263,6 +271,8 @@ struct dc_stream_state {
struct dc_cursor_attributes cursor_attributes;
struct dc_cursor_position cursor_position;
+ bool hw_cursor_req;
+
uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode
/* from stream struct */
@@ -300,9 +310,11 @@ struct dc_stream_state {
bool has_non_synchronizable_pclk;
bool vblank_synchronized;
- bool fpo_in_use;
- struct mall_stream_config mall_stream_config;
- bool skip_edp_power_down;
+ bool is_phantom;
+
+ struct luminance_data lumin_data;
+ bool scaler_sharpener_update;
+ bool sharpening_required;
};
#define ABM_LEVEL_IMMEDIATE_DISABLE 255
@@ -324,6 +336,8 @@ struct dc_stream_update {
struct dc_info_packet *hfvsif_infopacket;
struct dc_info_packet *vtem_infopacket;
struct dc_info_packet *adaptive_sync_infopacket;
+ struct dc_info_packet *avi_infopacket;
+
bool *dpms_off;
bool integer_scaling_update;
bool *allow_freesync;
@@ -344,6 +358,12 @@ struct dc_stream_update {
struct test_pattern *pending_test_pattern;
struct dc_crtc_timing_adjust *crtc_timing_adjust;
+
+ struct dc_cursor_attributes *cursor_attributes;
+ struct dc_cursor_position *cursor_position;
+ bool *hw_cursor_req;
+ bool *scaler_sharpener_update;
+ bool *sharpening_required;
};
bool dc_is_stream_unchanged(
@@ -416,45 +436,14 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream,
uint32_t *h_position,
uint32_t *v_position);
-enum dc_status dc_add_stream_to_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream);
-
-enum dc_status dc_remove_stream_from_ctx(
- struct dc *dc,
- struct dc_state *new_ctx,
- struct dc_stream_state *stream);
-
-
-bool dc_add_plane_to_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state *plane_state,
- struct dc_state *context);
-
-bool dc_remove_plane_from_context(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state *plane_state,
- struct dc_state *context);
-
-bool dc_rem_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_state *context);
-
-bool dc_add_all_planes_for_stream(
- const struct dc *dc,
- struct dc_stream_state *stream,
- struct dc_plane_state * const *plane_states,
- int plane_count,
- struct dc_state *context);
-
bool dc_stream_add_writeback(struct dc *dc,
struct dc_stream_state *stream,
struct dc_writeback_info *wb_info);
+bool dc_stream_fc_disable_writeback(struct dc *dc,
+ struct dc_stream_state *stream,
+ uint32_t dwb_pipe_inst);
+
bool dc_stream_remove_writeback(struct dc *dc,
struct dc_stream_state *stream,
uint32_t dwb_pipe_inst);
@@ -463,10 +452,6 @@ enum dc_status dc_stream_add_dsc_to_resource(struct dc *dc,
struct dc_state *state,
struct dc_stream_state *stream);
-bool dc_stream_warmup_writeback(struct dc *dc,
- int num_dwb,
- struct dc_writeback_info *wb_info);
-
bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream);
bool dc_stream_set_dynamic_metadata(struct dc *dc,
@@ -476,14 +461,6 @@ bool dc_stream_set_dynamic_metadata(struct dc *dc,
enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream);
/*
- * Set up streams and links associated to drive sinks
- * The streams parameter is an absolute set of all active streams.
- *
- * After this call:
- * Phy, Encoder, Timing Generator are programmed and enabled.
- * New streams are enabled with blank stream; no memory read.
- */
-/*
* Enable stereo when commit_streams is not required,
* for example, frame alternate.
*/
@@ -515,30 +492,43 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink);
void dc_stream_retain(struct dc_stream_state *dc_stream);
void dc_stream_release(struct dc_stream_state *dc_stream);
-struct dc_stream_status *dc_stream_get_status_from_state(
- struct dc_state *state,
- struct dc_stream_state *stream);
struct dc_stream_status *dc_stream_get_status(
struct dc_stream_state *dc_stream);
-#ifndef TRIM_FSFT
-bool dc_optimize_timing_for_fsft(
- struct dc_stream_state *pStream,
- unsigned int max_input_rate_in_khz);
-#endif
-
/*******************************************************************************
* Cursor interfaces - To manages the cursor within a stream
******************************************************************************/
/* TODO: Deprecated once we switch to dc_set_cursor_position */
+
+void program_cursor_attributes(
+ struct dc *dc,
+ struct dc_stream_state *stream);
+
+void program_cursor_position(
+ struct dc *dc,
+ struct dc_stream_state *stream);
+
+bool dc_stream_check_cursor_attributes(
+ const struct dc_stream_state *stream,
+ struct dc_state *state,
+ const struct dc_cursor_attributes *attributes);
+
bool dc_stream_set_cursor_attributes(
struct dc_stream_state *stream,
const struct dc_cursor_attributes *attributes);
+bool dc_stream_program_cursor_attributes(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_attributes *attributes);
+
bool dc_stream_set_cursor_position(
struct dc_stream_state *stream,
const struct dc_cursor_position *position);
+bool dc_stream_program_cursor_position(
+ struct dc_stream_state *stream,
+ const struct dc_cursor_position *position);
+
bool dc_stream_adjust_vmin_vmax(struct dc *dc,
struct dc_stream_state *stream,
@@ -548,26 +538,29 @@ bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
struct dc_stream_state *stream,
uint32_t *refresh_rate);
-bool dc_stream_get_crtc_position(struct dc *dc,
- struct dc_stream_state **stream,
- int num_streams,
- unsigned int *v_pos,
- unsigned int *nom_v_pos);
-
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
bool dc_stream_forward_crc_window(struct dc_stream_state *stream,
struct rect *rect,
+ uint8_t phy_id,
bool is_stop);
+
+bool dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream,
+ struct crc_window *window,
+ uint8_t phy_id,
+ bool stop);
#endif
bool dc_stream_configure_crc(struct dc *dc,
struct dc_stream_state *stream,
struct crc_params *crc_window,
bool enable,
- bool continuous);
+ bool continuous,
+ uint8_t idx,
+ bool reset);
bool dc_stream_get_crc(struct dc *dc,
struct dc_stream_state *stream,
+ uint8_t idx,
uint32_t *r_cr,
uint32_t *g_y,
uint32_t *b_cb);
@@ -589,11 +582,16 @@ bool dc_stream_set_gamut_remap(struct dc *dc,
bool dc_stream_program_csc_matrix(struct dc *dc,
struct dc_stream_state *stream);
-bool dc_stream_get_crtc_position(struct dc *dc,
- struct dc_stream_state **stream,
- int num_streams,
- unsigned int *v_pos,
- unsigned int *nom_v_pos);
+struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream,
+ bool allocate_one);
+
+void dc_stream_release_3dlut_for_stream(
+ const struct dc *dc,
+ const struct dc_stream_state *stream);
+
+void dc_stream_init_rmcm_3dlut(struct dc *dc);
struct pipe_ctx *dc_stream_get_pipe_ctx(struct dc_stream_state *stream);
@@ -602,4 +600,8 @@ void dc_dmub_update_dirty_rect(struct dc *dc,
struct dc_stream_state *stream,
struct dc_surface_update *srf_updates,
struct dc_state *context);
+
+bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream);
+bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream);
+
#endif /* DC_STREAM_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
new file mode 100644
index 000000000000..ca37eac20986
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream_priv.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DC_STREAM_PRIV_H_
+#define _DC_STREAM_PRIV_H_
+
+#include "dc_stream.h"
+
+bool dc_stream_construct(struct dc_stream_state *stream,
+ struct dc_sink *dc_sink_data);
+void dc_stream_destruct(struct dc_stream_state *stream);
+
+void dc_stream_assign_stream_id(struct dc_stream_state *stream);
+
+/*
+ * Finds the highest refresh rate that can be achieved
+ * from starting_freq while staying within flicker criteria
+ */
+int dc_stream_calculate_max_flickerless_refresh_rate(struct dc_stream_state *stream,
+ int starting_refresh_hz,
+ bool is_gaming);
+
+/*
+ * Finds the lowest refresh rate that can be achieved
+ * from starting_freq while staying within flicker criteria
+ */
+int dc_stream_calculate_min_flickerless_refresh_rate(struct dc_stream_state *stream,
+ int starting_refresh_hz,
+ bool is_gaming);
+
+/*
+ * Determines if there will be a flicker when moving between 2 refresh rates
+ */
+bool dc_stream_is_refresh_rate_range_flickerless(struct dc_stream_state *stream,
+ int hz1,
+ int hz2,
+ bool is_gaming);
+
+/*
+ * Determines the max instant vtotal delta increase that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_decrease(struct dc_stream_state *stream,
+ bool is_gaming);
+
+/*
+ * Determines the max instant vtotal delta decrease that can be applied without
+ * flickering for a given stream
+ */
+unsigned int dc_stream_get_max_flickerless_instant_vtotal_increase(struct dc_stream_state *stream,
+ bool is_gaming);
+
+#endif // _DC_STREAM_PRIV_H_
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 445ad79001ce..b5aa03a3e39c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -76,7 +76,6 @@ struct dc_perf_trace {
unsigned long last_entry_write;
};
-#define MAX_SURFACE_NUM 6
#define NUM_PIXEL_FORMATS 10
enum tiling_mode {
@@ -163,18 +162,6 @@ struct dc_edid {
#define AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS 20
-union display_content_support {
- unsigned int raw;
- struct {
- unsigned int valid_content_type :1;
- unsigned int game_content :1;
- unsigned int cinema_content :1;
- unsigned int photo_content :1;
- unsigned int graphics_content :1;
- unsigned int reserved :27;
- } bits;
-};
-
struct dc_panel_patch {
unsigned int dppowerup_delay;
unsigned int extra_t12_ms;
@@ -188,7 +175,14 @@ struct dc_panel_patch {
unsigned int embedded_tiled_slave;
unsigned int disable_fams;
unsigned int skip_avmute;
+ unsigned int skip_audio_sab_check;
unsigned int mst_start_top_delay;
+ unsigned int remove_sink_ext_caps;
+ unsigned int disable_colorimetry;
+ uint8_t blankstream_before_otg_off;
+ bool oled_optimize_display_on;
+ unsigned int force_mst_blocked_discovery;
+ unsigned int wait_after_dpcd_poweroff_ms;
};
struct dc_edid_caps {
@@ -207,8 +201,6 @@ struct dc_edid_caps {
uint32_t audio_latency;
uint32_t video_latency;
- union display_content_support content_support;
-
uint8_t qs_bit;
uint8_t qy_bit;
@@ -219,6 +211,7 @@ struct dc_edid_caps {
bool edid_hdmi;
bool hdr_supported;
+ bool rr_capable;
struct dc_panel_patch panel_patch;
};
@@ -271,6 +264,7 @@ enum dc_timing_source {
TIMING_SOURCE_EDID_4BYTE,
TIMING_SOURCE_EDID_CEA_DISPLAYID_VTDB,
TIMING_SOURCE_EDID_CEA_RID,
+ TIMING_SOURCE_EDID_DISPLAYID_TYPE5,
TIMING_SOURCE_VBIOS,
TIMING_SOURCE_CV,
TIMING_SOURCE_TV,
@@ -435,7 +429,7 @@ struct dc_dwb_params {
enum dwb_capture_rate capture_rate; /* controls the frame capture rate */
struct scaling_taps scaler_taps; /* Scaling taps */
enum dwb_subsample_position subsample_position;
- struct dc_transfer_func *out_transfer_func;
+ const struct dc_transfer_func *out_transfer_func;
};
/* audio*/
@@ -569,6 +563,12 @@ struct dc_info_packet_128 {
uint8_t sb[128];
};
+struct dc_edid_read_policy {
+ uint32_t max_retry_count;
+ uint32_t delay_time_ms;
+ uint32_t ignore_checksum;
+};
+
#define DC_PLANE_UPDATE_TIMES_MAX 10
struct dc_plane_flip_time {
@@ -577,6 +577,12 @@ struct dc_plane_flip_time {
unsigned int prev_update_time_in_us;
};
+enum dc_alpm_mode {
+ DC_ALPM_AUXWAKE = 0,
+ DC_ALPM_AUXLESS = 1,
+ DC_ALPM_UNSUPPORTED = 0xF,
+};
+
enum dc_psr_state {
PSR_STATE0 = 0x0,
PSR_STATE1,
@@ -603,6 +609,7 @@ enum dc_psr_state {
PSR_STATE5c,
PSR_STATE_HWLOCK_MGR,
PSR_STATE_POLLVUPDATE,
+ PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME,
PSR_STATE_INVALID = 0xFF
};
@@ -621,6 +628,7 @@ struct psr_config {
unsigned int line_time_in_us;
uint8_t rate_control_caps;
uint16_t dsc_slice_height;
+ bool os_request_force_ffu;
};
union dmcu_psr_level {
@@ -733,6 +741,7 @@ struct psr_context {
unsigned int line_time_in_us;
uint8_t rate_control_caps;
uint16_t dsc_slice_height;
+ bool os_request_force_ffu;
};
struct colorspace_transform {
@@ -787,6 +796,7 @@ struct dc_context {
struct dc *dc;
void *driver_context; /* e.g. amdgpu_device */
+ struct dal_logger *logger;
struct dc_perf_trace *perf_trace;
void *cgs_device;
@@ -808,6 +818,7 @@ struct dc_context {
struct cp_psp cp_psp;
uint32_t *dcn_reg_offsets;
uint32_t *nbio_reg_offsets;
+ uint32_t *clk_reg_offsets;
};
/* DSC DPCD capabilities */
@@ -881,6 +892,14 @@ struct dsc_dec_dpcd_caps {
bool is_dp; /* Decoded format */
};
+struct hblank_expansion_dpcd_caps {
+ bool expansion_supported;
+ bool reduction_supported;
+ bool buffer_unit_bytes; /* True: buffer size in bytes. False: buffer size in pixels*/
+ bool buffer_per_port; /* True: buffer size per port. False: buffer size per lane*/
+ uint32_t buffer_size; /* Add 1 to value and multiply by 32 */
+};
+
struct dc_golden_table {
uint16_t dc_golden_table_ver;
uint32_t aux_dphy_rx_control0_val;
@@ -931,11 +950,24 @@ struct display_endpoint_id {
enum display_endpoint_type ep_type;
};
+enum backlight_control_type {
+ BACKLIGHT_CONTROL_PWM = 0,
+ BACKLIGHT_CONTROL_VESA_AUX = 1,
+ BACKLIGHT_CONTROL_AMD_AUX = 2,
+};
+
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+#define MAX_CRC_WINDOW_NUM 2
+
struct otg_phy_mux {
uint8_t phy_output_num;
uint8_t otg_output_num;
};
+
+struct crc_window {
+ struct rect rect;
+ bool enable;
+};
#endif
enum dc_detect_reason {
@@ -1002,10 +1034,6 @@ struct link_mst_stream_allocation_table {
struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
};
-struct backlight_settings {
- uint32_t backlight_millinits;
-};
-
/* PSR feature flags */
struct psr_settings {
bool psr_feature_enabled; // PSR is supported by sink
@@ -1023,6 +1051,13 @@ struct psr_settings {
unsigned int psr_sdp_transmit_line_num_deadline;
uint8_t force_ffu_mode;
unsigned int psr_power_opt;
+
+ /**
+ * Some panels cannot handle idle pattern during PSR entry.
+ * To power down phy before disable stream to avoid sending
+ * idle pattern.
+ */
+ uint8_t power_down_phy_before_disable_stream;
};
enum replay_coasting_vtotal_type {
@@ -1033,35 +1068,123 @@ enum replay_coasting_vtotal_type {
PR_COASTING_TYPE_NUM,
};
+enum replay_link_off_frame_count_level {
+ PR_LINK_OFF_FRAME_COUNT_FAIL = 0x0,
+ PR_LINK_OFF_FRAME_COUNT_GOOD = 0x2,
+ PR_LINK_OFF_FRAME_COUNT_BEST = 0x6,
+};
+
+/*
+ * This is general Interface for Replay to
+ * set an 32 bit variable to dmub
+ * The Message_type indicates which variable
+ * passed to DMUB.
+ */
+enum replay_FW_Message_type {
+ Replay_Msg_Not_Support = -1,
+ Replay_Set_Timing_Sync_Supported,
+ Replay_Set_Residency_Frameupdate_Timer,
+ Replay_Set_Pseudo_VTotal,
+ Replay_Disabled_Adaptive_Sync_SDP,
+ Replay_Set_General_Cmd,
+};
+
union replay_error_status {
struct {
- unsigned char STATE_TRANSITION_ERROR :1;
- unsigned char LINK_CRC_ERROR :1;
- unsigned char DESYNC_ERROR :1;
- unsigned char RESERVED :5;
+ unsigned int STATE_TRANSITION_ERROR :1;
+ unsigned int LINK_CRC_ERROR :1;
+ unsigned int DESYNC_ERROR :1;
+ unsigned int RESERVED_3 :1;
+ unsigned int LOW_RR_INCORRECT_VTOTAL :1;
+ unsigned int NO_DOUBLED_RR :1;
+ unsigned int RESERVED_6_7 :2;
} bits;
unsigned char raw;
};
-struct replay_config {
- bool replay_supported; // Replay feature is supported
- unsigned int replay_power_opt_supported; // Power opt flags that are supported
- bool replay_smu_opt_supported; // SMU optimization is supported
- unsigned int replay_enable_option; // Replay enablement option
- uint32_t debug_flags; // Replay debug flags
- bool replay_timing_sync_supported; // Replay desync is supported
- union replay_error_status replay_error_status; // Replay error status
+union replay_low_refresh_rate_enable_options {
+ struct {
+ //BIT[0-3]: Replay Low Hz Support control
+ unsigned int ENABLE_LOW_RR_SUPPORT :1;
+ unsigned int SKIP_ASIC_CHECK :1;
+ unsigned int RESERVED_2_3 :2;
+ //BIT[4-15]: Replay Low Hz Enable Scenarios
+ unsigned int ENABLE_STATIC_SCREEN :1;
+ unsigned int ENABLE_FULL_SCREEN_VIDEO :1;
+ unsigned int ENABLE_GENERAL_UI :1;
+ unsigned int RESERVED_7_15 :9;
+ //BIT[16-31]: Replay Low Hz Enable Check
+ unsigned int ENABLE_STATIC_FLICKER_CHECK :1;
+ unsigned int RESERVED_17_31 :15;
+ } bits;
+ unsigned int raw;
};
-/* Replay feature flags */
+struct replay_config {
+ /* Replay feature is supported */
+ bool replay_supported;
+ /* Replay caps support DPCD & EDID caps*/
+ bool replay_cap_support;
+ /* Power opt flags that are supported */
+ unsigned int replay_power_opt_supported;
+ /* SMU optimization is supported */
+ bool replay_smu_opt_supported;
+ /* Replay enablement option */
+ unsigned int replay_enable_option;
+ /* Replay debug flags */
+ uint32_t debug_flags;
+ /* Replay sync is supported */
+ bool replay_timing_sync_supported;
+ /* Replay Disable desync error check. */
+ bool force_disable_desync_error_check;
+ /* Replay Received Desync Error HPD. */
+ bool received_desync_error_hpd;
+ /* Replay feature is supported long vblank */
+ bool replay_support_fast_resync_in_ultra_sleep_mode;
+ /* Replay error status */
+ union replay_error_status replay_error_status;
+ /* Replay Low Hz enable Options */
+ union replay_low_refresh_rate_enable_options low_rr_enable_options;
+ /* Replay coasting vtotal is within low refresh rate range. */
+ bool low_rr_activated;
+ /* Replay low refresh rate supported*/
+ bool low_rr_supported;
+ /* Replay Video Conferencing Optimization Enabled */
+ bool replay_video_conferencing_optimization_enabled;
+ /* Replay alpm mode */
+ enum dc_alpm_mode alpm_mode;
+ /* Replay full screen only */
+ bool os_request_force_ffu;
+};
+
+/* Replay feature flags*/
struct replay_settings {
- struct replay_config config; // Replay configuration
- bool replay_feature_enabled; // Replay feature is ready for activating
- bool replay_allow_active; // Replay is currently active
- unsigned int replay_power_opt_active; // Power opt flags that are activated currently
- bool replay_smu_opt_enable; // SMU optimization is enabled
- uint16_t coasting_vtotal; // Current Coasting vtotal
- uint16_t coasting_vtotal_table[PR_COASTING_TYPE_NUM]; // Coasting vtotal table
+ /* Replay configuration */
+ struct replay_config config;
+ /* Replay feature is ready for activating */
+ bool replay_feature_enabled;
+ /* Replay is currently active */
+ bool replay_allow_active;
+ /* Replay is currently active */
+ bool replay_allow_long_vblank;
+ /* Power opt flags that are activated currently */
+ unsigned int replay_power_opt_active;
+ /* SMU optimization is enabled */
+ bool replay_smu_opt_enable;
+ /* Current Coasting vtotal */
+ uint32_t coasting_vtotal;
+ /* Coasting vtotal table */
+ uint32_t coasting_vtotal_table[PR_COASTING_TYPE_NUM];
+ /* Defer Update Coasting vtotal table */
+ uint32_t defer_update_coasting_vtotal_table[PR_COASTING_TYPE_NUM];
+ /* Maximum link off frame count */
+ uint32_t link_off_frame_count;
+ /* Replay pseudo vtotal for low refresh rate*/
+ uint16_t low_rr_full_screen_video_pseudo_vtotal;
+ /* Replay last pseudo vtotal set to DMUB */
+ uint16_t last_pseudo_vtotal;
+ /* Replay desync error */
+ uint32_t replay_desync_error_fail_count;
};
/* To split out "global" and "per-panel" config settings.
@@ -1094,6 +1217,7 @@ struct dc_panel_config {
bool rc_disable;
bool rc_allow_static_screen;
bool rc_allow_fullscreen_VPB;
+ bool read_psrcap_again;
unsigned int replay_enable_option;
} psr;
/* ABM */
@@ -1113,25 +1237,174 @@ struct dc_panel_config {
} ilr;
};
+#define MAX_SINKS_PER_LINK 4
+
/*
* USB4 DPIA BW ALLOCATION STRUCTS
*/
struct dc_dpia_bw_alloc {
- int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already
- int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
- int sink_max_bw; // The Max BW that sink can require/support
+ int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks
+ int link_verified_bw; // The Verified BW that link can allocated and use that has been verified already
+ int link_max_bw; // The Max BW that link can require/support
+ int allocated_bw; // The Actual Allocated BW for this DPIA
int estimated_bw; // The estimated available BW for this DPIA
int bw_granularity; // BW Granularity
+ int dp_overhead; // DP overhead in dp tunneling
bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM
- bool response_ready; // Response ready from the CM side
+ uint8_t nrd_max_lane_count; // Non-reduced max lane count
+ uint8_t nrd_max_link_rate; // Non-reduced max link rate
};
-#define MAX_SINKS_PER_LINK 4
-
enum dc_hpd_enable_select {
HPD_EN_FOR_ALL_EDP = 0,
HPD_EN_FOR_PRIMARY_EDP_ONLY,
HPD_EN_FOR_SECONDARY_EDP_ONLY,
};
+enum dc_cm2_shaper_3dlut_setting {
+ DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL,
+ DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER,
+ /* Bypassing Shaper will always bypass 3DLUT */
+ DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT
+};
+
+enum dc_cm2_gpu_mem_layout {
+ DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB,
+ DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR,
+ DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR
+};
+
+enum dc_cm2_gpu_mem_pixel_component_order {
+ DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA,
+};
+
+enum dc_cm2_gpu_mem_format {
+ DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB,
+ DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB,
+ DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10
+};
+
+struct dc_cm2_gpu_mem_format_parameters {
+ enum dc_cm2_gpu_mem_format format;
+ union {
+ struct {
+ /* bias & scale for float only */
+ uint16_t bias;
+ uint16_t scale;
+ } float_params;
+ };
+};
+
+enum dc_cm2_gpu_mem_size {
+ DC_CM2_GPU_MEM_SIZE_171717,
+ DC_CM2_GPU_MEM_SIZE_TRANSFORMED,
+};
+
+struct dc_cm2_gpu_mem_parameters {
+ struct dc_plane_address addr;
+ enum dc_cm2_gpu_mem_layout layout;
+ struct dc_cm2_gpu_mem_format_parameters format_params;
+ enum dc_cm2_gpu_mem_pixel_component_order component_order;
+ enum dc_cm2_gpu_mem_size size;
+ uint16_t bit_depth;
+};
+
+enum dc_cm2_transfer_func_source {
+ DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM,
+ DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM
+};
+
+struct dc_cm2_component_settings {
+ enum dc_cm2_shaper_3dlut_setting shaper_3dlut_setting;
+ bool lut1d_enable;
+};
+
+/*
+ * All pointers in this struct must remain valid for as long as the 3DLUTs are used
+ */
+struct dc_cm2_func_luts {
+ const struct dc_transfer_func *shaper;
+ struct {
+ enum dc_cm2_transfer_func_source lut3d_src;
+ union {
+ const struct dc_3dlut *lut3d_func;
+ struct dc_cm2_gpu_mem_parameters gpu_mem_params;
+ };
+ bool rmcm_3dlut_shaper_select;
+ bool mpc_3dlut_enable;
+ bool rmcm_3dlut_enable;
+ bool mpc_mcm_post_blend;
+ uint8_t rmcm_tmz;
+ } lut3d_data;
+ const struct dc_transfer_func *lut1d_func;
+};
+
+struct dc_cm2_parameters {
+ struct dc_cm2_component_settings component_settings;
+ struct dc_cm2_func_luts cm2_luts;
+};
+
+enum mall_stream_type {
+ SUBVP_NONE, // subvp not in use
+ SUBVP_MAIN, // subvp in use, this stream is main stream
+ SUBVP_PHANTOM, // subvp in use, this stream is a phantom stream
+};
+
+enum dc_power_source_type {
+ DC_POWER_SOURCE_AC, // wall power
+ DC_POWER_SOURCE_DC, // battery power
+};
+
+struct dc_state_create_params {
+ enum dc_power_source_type power_source;
+};
+
+struct dc_commit_streams_params {
+ struct dc_stream_state **streams;
+ uint8_t stream_count;
+ enum dc_power_source_type power_source;
+};
+
+struct set_backlight_level_params {
+ /* backlight in pwm */
+ uint32_t backlight_pwm_u16_16;
+ /* brightness ramping */
+ uint32_t frame_ramp;
+ /* backlight control type
+ * 0: PWM backlight control
+ * 1: VESA AUX backlight control
+ * 2: AMD AUX backlight control
+ */
+ enum backlight_control_type control_type;
+ /* backlight in millinits */
+ uint32_t backlight_millinits;
+ /* transition time in ms */
+ uint32_t transition_time_in_ms;
+ /* minimum luminance in nits */
+ uint32_t min_luminance;
+ /* maximum luminance in nits */
+ uint32_t max_luminance;
+ /* minimum backlight in pwm */
+ uint32_t min_backlight_pwm;
+ /* maximum backlight in pwm */
+ uint32_t max_backlight_pwm;
+ /* AUX HW instance */
+ uint8_t aux_inst;
+};
+
+enum dc_validate_mode {
+ /* validate the mode and program HW */
+ DC_VALIDATE_MODE_AND_PROGRAMMING = 0,
+ /* only validate the mode */
+ DC_VALIDATE_MODE_ONLY = 1,
+ /* validate the mode and get the max state (voltage level) */
+ DC_VALIDATE_MODE_AND_STATE_INDEX = 2,
+};
+
+struct dc_validation_dpia_set {
+ const struct dc_link *link;
+ const struct dc_tunnel_settings *tunnel_settings;
+ uint32_t required_bw;
+};
+
#endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/Makefile b/drivers/gpu/drm/amd/display/dc/dccg/Makefile
new file mode 100644
index 000000000000..1d5cf0f8e79d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/Makefile
@@ -0,0 +1,103 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'dccg' sub-component of DAL.
+#
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+DCCG_DCN20 = dcn20_dccg.o
+
+AMD_DAL_DCCG_DCN20 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn20/,$(DCCG_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN20)
+
+###############################################################################
+
+DCCG_DCN201 = dcn201_dccg.o
+
+AMD_DAL_DCCG_DCN201 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn201/,$(DCCG_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN201)
+
+###############################################################################
+
+DCCG_DCN21 = dcn21_dccg.o
+
+AMD_DAL_DCCG_DCN21 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn21/,$(DCCG_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN21)
+
+###############################################################################
+DCCG_DCN30 = dcn30_dccg.o
+
+AMD_DAL_DCCG_DCN30 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn30/,$(DCCG_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN30)
+
+###############################################################################
+DCCG_DCN301 = dcn301_dccg.o
+
+AMD_DAL_DCCG_DCN301 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn301/,$(DCCG_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN301)
+
+###############################################################################
+
+DCCG_DCN31 = dcn31_dccg.o
+
+AMD_DAL_DCCG_DCN31 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn31/,$(DCCG_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN31)
+
+###############################################################################
+
+DCCG_DCN314 = dcn314_dccg.o
+
+AMD_DAL_DCCG_DCN314 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn314/,$(DCCG_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN314)
+
+###############################################################################
+
+DCCG_DCN32 = dcn32_dccg.o
+
+AMD_DAL_DCCG_DCN32 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn32/,$(DCCG_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN32)
+
+###############################################################################
+
+DCCG_DCN35 = dcn35_dccg.o
+
+AMD_DAL_DCCG_DCN35 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn35/,$(DCCG_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN35)
+
+###############################################################################
+DCCG_DCN401 = dcn401_dccg.o
+
+AMD_DAL_DCCG_DCN401 = $(addprefix $(AMDDALPATH)/dc/dccg/dcn401/,$(DCCG_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DCCG_DCN401)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c
index 5999b2da3a01..5999b2da3a01 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h
index c8602bcfa393..a9b88f5e0c04 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h
@@ -240,12 +240,133 @@
type DTBCLK_P3_EN;\
type DENTIST_DISPCLK_CHG_DONE;
+#define DCCG35_REG_FIELD_LIST(type) \
+ type DPPCLK0_EN;\
+ type DPPCLK1_EN;\
+ type DPPCLK2_EN;\
+ type DPPCLK3_EN;\
+ type DSCCLK0_EN;\
+ type DSCCLK1_EN;\
+ type DSCCLK2_EN;\
+ type DSCCLK3_EN;\
+ type DISPCLK_DCCG_GATE_DISABLE;\
+ type DCCG_GLOBAL_FGCG_REP_DIS; \
+ type PHYASYMCLK_EN;\
+ type PHYASYMCLK_SRC_SEL;\
+ type PHYBSYMCLK_EN;\
+ type PHYBSYMCLK_SRC_SEL;\
+ type PHYCSYMCLK_EN;\
+ type PHYCSYMCLK_SRC_SEL;\
+ type PHYDSYMCLK_EN;\
+ type PHYDSYMCLK_SRC_SEL;\
+ type PHYESYMCLK_EN;\
+ type PHYESYMCLK_SRC_SEL;\
+ type PHYASYMCLK_ROOT_GATE_DISABLE;\
+ type PHYBSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYCSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYDSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYESYMCLK_ROOT_GATE_DISABLE;\
+ type HDMISTREAMCLK0_GATE_DISABLE;\
+ type HDMISTREAMCLK1_GATE_DISABLE;\
+ type HDMISTREAMCLK2_GATE_DISABLE;\
+ type HDMISTREAMCLK3_GATE_DISABLE;\
+ type HDMISTREAMCLK4_GATE_DISABLE;\
+ type HDMISTREAMCLK5_GATE_DISABLE;\
+ type SYMCLKA_CLOCK_ENABLE;\
+ type SYMCLKB_CLOCK_ENABLE;\
+ type SYMCLKC_CLOCK_ENABLE;\
+ type SYMCLKD_CLOCK_ENABLE;\
+ type SYMCLKE_CLOCK_ENABLE;\
+ type SYMCLKA_FE_EN;\
+ type SYMCLKB_FE_EN;\
+ type SYMCLKC_FE_EN;\
+ type SYMCLKD_FE_EN;\
+ type SYMCLKE_FE_EN;\
+ type SYMCLKA_SRC_SEL;\
+ type SYMCLKB_SRC_SEL;\
+ type SYMCLKC_SRC_SEL;\
+ type SYMCLKD_SRC_SEL;\
+ type SYMCLKE_SRC_SEL;\
+ type SYMCLKA_FE_SRC_SEL;\
+ type SYMCLKB_FE_SRC_SEL;\
+ type SYMCLKC_FE_SRC_SEL;\
+ type SYMCLKD_FE_SRC_SEL;\
+ type SYMCLKE_FE_SRC_SEL;\
+ type DTBCLK_P0_GATE_DISABLE;\
+ type DTBCLK_P1_GATE_DISABLE;\
+ type DTBCLK_P2_GATE_DISABLE;\
+ type DTBCLK_P3_GATE_DISABLE;\
+ type DSCCLK0_ROOT_GATE_DISABLE;\
+ type DSCCLK1_ROOT_GATE_DISABLE;\
+ type DSCCLK2_ROOT_GATE_DISABLE;\
+ type DSCCLK3_ROOT_GATE_DISABLE;\
+ type SYMCLKA_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKB_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKC_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKD_FE_ROOT_GATE_DISABLE;\
+ type SYMCLKE_FE_ROOT_GATE_DISABLE;\
+ type DPPCLK0_ROOT_GATE_DISABLE;\
+ type DPPCLK1_ROOT_GATE_DISABLE;\
+ type DPPCLK2_ROOT_GATE_DISABLE;\
+ type DPPCLK3_ROOT_GATE_DISABLE;\
+ type HDMISTREAMCLK0_ROOT_GATE_DISABLE;\
+ type SYMCLKA_ROOT_GATE_DISABLE;\
+ type SYMCLKB_ROOT_GATE_DISABLE;\
+ type SYMCLKC_ROOT_GATE_DISABLE;\
+ type SYMCLKD_ROOT_GATE_DISABLE;\
+ type SYMCLKE_ROOT_GATE_DISABLE;\
+ type PHYA_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYB_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYC_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYD_REFCLK_ROOT_GATE_DISABLE;\
+ type PHYE_REFCLK_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK0_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK1_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK2_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK3_ROOT_GATE_DISABLE;\
+ type DPSTREAMCLK0_GATE_DISABLE;\
+ type DPSTREAMCLK1_GATE_DISABLE;\
+ type DPSTREAMCLK2_GATE_DISABLE;\
+ type DPSTREAMCLK3_GATE_DISABLE;\
+ type SYMCLKA_FE_GATE_DISABLE;\
+ type SYMCLKB_FE_GATE_DISABLE;\
+ type SYMCLKC_FE_GATE_DISABLE;\
+ type SYMCLKD_FE_GATE_DISABLE;\
+ type SYMCLKE_FE_GATE_DISABLE;\
+ type SYMCLKA_GATE_DISABLE;\
+ type SYMCLKB_GATE_DISABLE;\
+ type SYMCLKC_GATE_DISABLE;\
+ type SYMCLKD_GATE_DISABLE;\
+ type SYMCLKE_GATE_DISABLE;\
+
+
+#define DCCG401_REG_FIELD_LIST(type) \
+ type OTG0_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO0_INT;\
+ type OTG1_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO1_INT;\
+ type OTG2_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO2_INT;\
+ type OTG3_TMDS_PIXEL_RATE_DIV;\
+ type DPDTO3_INT;\
+ type SYMCLK32_ROOT_LE2_GATE_DISABLE;\
+ type SYMCLK32_ROOT_LE3_GATE_DISABLE;\
+ type SYMCLK32_LE2_GATE_DISABLE;\
+ type SYMCLK32_LE3_GATE_DISABLE;\
+ type SYMCLK32_LE2_SRC_SEL;\
+ type SYMCLK32_LE3_SRC_SEL;\
+ type SYMCLK32_LE2_EN;\
+ type SYMCLK32_LE3_EN;\
+ type DP_DTO_ENABLE[MAX_PIPES];
+
struct dccg_shift {
DCCG_REG_FIELD_LIST(uint8_t)
DCCG3_REG_FIELD_LIST(uint8_t)
DCCG31_REG_FIELD_LIST(uint8_t)
DCCG314_REG_FIELD_LIST(uint8_t)
DCCG32_REG_FIELD_LIST(uint8_t)
+ DCCG35_REG_FIELD_LIST(uint8_t)
+ DCCG401_REG_FIELD_LIST(uint8_t)
};
struct dccg_mask {
@@ -254,44 +375,59 @@ struct dccg_mask {
DCCG31_REG_FIELD_LIST(uint32_t)
DCCG314_REG_FIELD_LIST(uint32_t)
DCCG32_REG_FIELD_LIST(uint32_t)
+ DCCG35_REG_FIELD_LIST(uint32_t)
+ DCCG401_REG_FIELD_LIST(uint32_t)
};
+#define DCCG_REG_VARIABLE_LIST \
+ uint32_t DPPCLK_DTO_CTRL; \
+ uint32_t DPPCLK_DTO_PARAM[6]; \
+ uint32_t REFCLK_CNTL; \
+ uint32_t DISPCLK_FREQ_CHANGE_CNTL; \
+ uint32_t OTG_PIXEL_RATE_CNTL[MAX_PIPES]; \
+ uint32_t HDMICHARCLK_CLOCK_CNTL[6]; \
+ uint32_t PHYASYMCLK_CLOCK_CNTL; \
+ uint32_t PHYBSYMCLK_CLOCK_CNTL; \
+ uint32_t PHYCSYMCLK_CLOCK_CNTL; \
+ uint32_t PHYDSYMCLK_CLOCK_CNTL; \
+ uint32_t PHYESYMCLK_CLOCK_CNTL; \
+ uint32_t DTBCLK_DTO_MODULO[MAX_PIPES]; \
+ uint32_t DTBCLK_DTO_PHASE[MAX_PIPES]; \
+ uint32_t DCCG_AUDIO_DTBCLK_DTO_MODULO; \
+ uint32_t DCCG_AUDIO_DTBCLK_DTO_PHASE; \
+ uint32_t DCCG_AUDIO_DTO_SOURCE; \
+ uint32_t DPSTREAMCLK_CNTL; \
+ uint32_t HDMISTREAMCLK_CNTL; \
+ uint32_t SYMCLK32_SE_CNTL; \
+ uint32_t SYMCLK32_LE_CNTL; \
+ uint32_t DENTIST_DISPCLK_CNTL; \
+ uint32_t DSCCLK_DTO_CTRL; \
+ uint32_t DSCCLK0_DTO_PARAM; \
+ uint32_t DSCCLK1_DTO_PARAM; \
+ uint32_t DSCCLK2_DTO_PARAM; \
+ uint32_t DSCCLK3_DTO_PARAM; \
+ uint32_t DPSTREAMCLK_ROOT_GATE_DISABLE; \
+ uint32_t DPSTREAMCLK_GATE_DISABLE; \
+ uint32_t DCCG_GATE_DISABLE_CNTL; \
+ uint32_t DCCG_GATE_DISABLE_CNTL2; \
+ uint32_t DCCG_GATE_DISABLE_CNTL3; \
+ uint32_t HDMISTREAMCLK0_DTO_PARAM; \
+ uint32_t DCCG_GATE_DISABLE_CNTL4; \
+ uint32_t OTG_PIXEL_RATE_DIV; \
+ uint32_t DTBCLK_P_CNTL; \
+ uint32_t DPPCLK_CTRL; \
+ uint32_t DCCG_GATE_DISABLE_CNTL5; \
+ uint32_t DCCG_GATE_DISABLE_CNTL6; \
+ uint32_t DCCG_GLOBAL_FGCG_REP_CNTL; \
+ uint32_t SYMCLKA_CLOCK_ENABLE; \
+ uint32_t SYMCLKB_CLOCK_ENABLE; \
+ uint32_t SYMCLKC_CLOCK_ENABLE; \
+ uint32_t SYMCLKD_CLOCK_ENABLE; \
+ uint32_t SYMCLKE_CLOCK_ENABLE; \
+ uint32_t DP_DTO_MODULO[MAX_PIPES]; \
+ uint32_t DP_DTO_PHASE[MAX_PIPES]
struct dccg_registers {
- uint32_t DPPCLK_DTO_CTRL;
- uint32_t DPPCLK_DTO_PARAM[6];
- uint32_t REFCLK_CNTL;
- uint32_t DISPCLK_FREQ_CHANGE_CNTL;
- uint32_t OTG_PIXEL_RATE_CNTL[MAX_PIPES];
- uint32_t HDMICHARCLK_CLOCK_CNTL[6];
- uint32_t PHYASYMCLK_CLOCK_CNTL;
- uint32_t PHYBSYMCLK_CLOCK_CNTL;
- uint32_t PHYCSYMCLK_CLOCK_CNTL;
- uint32_t PHYDSYMCLK_CLOCK_CNTL;
- uint32_t PHYESYMCLK_CLOCK_CNTL;
- uint32_t DTBCLK_DTO_MODULO[MAX_PIPES];
- uint32_t DTBCLK_DTO_PHASE[MAX_PIPES];
- uint32_t DCCG_AUDIO_DTBCLK_DTO_MODULO;
- uint32_t DCCG_AUDIO_DTBCLK_DTO_PHASE;
- uint32_t DCCG_AUDIO_DTO_SOURCE;
- uint32_t DPSTREAMCLK_CNTL;
- uint32_t HDMISTREAMCLK_CNTL;
- uint32_t SYMCLK32_SE_CNTL;
- uint32_t SYMCLK32_LE_CNTL;
- uint32_t DENTIST_DISPCLK_CNTL;
- uint32_t DSCCLK_DTO_CTRL;
- uint32_t DSCCLK0_DTO_PARAM;
- uint32_t DSCCLK1_DTO_PARAM;
- uint32_t DSCCLK2_DTO_PARAM;
- uint32_t DSCCLK3_DTO_PARAM;
- uint32_t DPSTREAMCLK_ROOT_GATE_DISABLE;
- uint32_t DPSTREAMCLK_GATE_DISABLE;
- uint32_t DCCG_GATE_DISABLE_CNTL;
- uint32_t DCCG_GATE_DISABLE_CNTL2;
- uint32_t DCCG_GATE_DISABLE_CNTL3;
- uint32_t HDMISTREAMCLK0_DTO_PARAM;
- uint32_t DCCG_GATE_DISABLE_CNTL4;
- uint32_t OTG_PIXEL_RATE_DIV;
- uint32_t DTBCLK_P_CNTL;
+ DCCG_REG_VARIABLE_LIST;
};
struct dcn_dccg {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.c
index 9a3402148fde..9a3402148fde 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.h
index 80888b0484fb..80888b0484fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn201/dcn201_dccg.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c
index d07c04458d31..d07c04458d31 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.h
index b7efa777ec73..b7efa777ec73 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn21/dcn21_dccg.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.c
index d445dfefc047..d445dfefc047 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.h
index 35a613bb08bf..3f1da7f3a91c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn30/dcn30_dccg.h
@@ -29,15 +29,9 @@
#include "dcn20/dcn20_dccg.h"
-#define DCCG_REG_LIST_DCN3AG() \
- DCCG_COMMON_REG_LIST_DCN_BASE(),\
- SR(PHYASYMCLK_CLOCK_CNTL),\
- SR(PHYBSYMCLK_CLOCK_CNTL),\
- SR(PHYCSYMCLK_CLOCK_CNTL)
-
-
#define DCCG_REG_LIST_DCN30() \
DCCG_REG_LIST_DCN2(),\
+ DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 4),\
@@ -46,19 +40,10 @@
SR(PHYBSYMCLK_CLOCK_CNTL),\
SR(PHYCSYMCLK_CLOCK_CNTL)
-#define DCCG_MASK_SH_LIST_DCN3AG(mask_sh) \
- DCCG_MASK_SH_LIST_DCN2_1(mask_sh),\
- DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
- DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
- DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
- DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
- DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
- DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_SRC_SEL, mask_sh),\
- DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_EN, mask_sh),\
- DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_FORCE_SRC_SEL, mask_sh)
-
#define DCCG_MASK_SH_LIST_DCN3(mask_sh) \
DCCG_MASK_SH_LIST_DCN2(mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.c
index 97e9be87afd9..97e9be87afd9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h
index 73db962dbc03..067e49cb238e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn301/dcn301_dccg.h
@@ -56,10 +56,4 @@ struct dccg *dccg301_create(
const struct dccg_shift *dccg_shift,
const struct dccg_mask *dccg_mask);
-struct dccg *dccg301_create(
- struct dc_context *ctx,
- const struct dccg_registers *regs,
- const struct dccg_shift *dccg_shift,
- const struct dccg_mask *dccg_mask);
-
#endif //__DCN301_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn302/dcn302_dccg.h
index c884dde1bb25..c884dde1bb25 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn302/dcn302_dccg.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h
index 294bd757bcb5..2e12fb643005 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn303/dcn303_dccg.h
@@ -2,6 +2,24 @@
/*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
* Authors: AMD
*/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c
index 8664f0c4c9b7..8664f0c4c9b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h
index e3caaacf7493..cd261051dc2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn31/dcn31_dccg.h
@@ -34,12 +34,14 @@
DCCG_SRII(DTO_PARAM, DPPCLK, 1),\
DCCG_SRII(DTO_PARAM, DPPCLK, 2),\
DCCG_SRII(DTO_PARAM, DPPCLK, 3),\
+ DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0),\
SR(PHYASYMCLK_CLOCK_CNTL),\
SR(PHYBSYMCLK_CLOCK_CNTL),\
SR(PHYCSYMCLK_CLOCK_CNTL),\
SR(PHYDSYMCLK_CLOCK_CNTL),\
SR(PHYESYMCLK_CLOCK_CNTL),\
SR(DPSTREAMCLK_CNTL),\
+ SR(HDMISTREAMCLK_CNTL),\
SR(SYMCLK32_SE_CNTL),\
SR(SYMCLK32_LE_CNTL),\
DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0),\
@@ -78,6 +80,8 @@
DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_EN, mask_sh),\
DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_FORCE_SRC_SEL, mask_sh),\
DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_FORCE_EN, mask_sh),\
@@ -92,6 +96,8 @@
DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK_PIPE1_EN, mask_sh),\
DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK_PIPE2_EN, mask_sh),\
DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK_PIPE3_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_DTO_FORCE_DIS, mask_sh),\
DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
@@ -126,7 +132,7 @@
DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
- DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE, mask_sh), \
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_MODE, mask_sh),\
DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\
DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\
DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c
index ad3f019a784f..8f6edd8e9beb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.c
@@ -58,8 +58,8 @@ static void dccg314_trigger_dio_fifo_resync(
static void dccg314_get_pixel_rate_div(
struct dccg *dccg,
uint32_t otg_inst,
- enum pixel_rate_div *k1,
- enum pixel_rate_div *k2)
+ uint32_t *k1,
+ uint32_t *k2)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA;
@@ -93,8 +93,8 @@ static void dccg314_get_pixel_rate_div(
return;
}
- *k1 = (enum pixel_rate_div)val_k1;
- *k2 = (enum pixel_rate_div)val_k2;
+ *k1 = val_k1;
+ *k2 = val_k2;
}
static void dccg314_set_pixel_rate_div(
@@ -104,7 +104,8 @@ static void dccg314_set_pixel_rate_div(
enum pixel_rate_div k2)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
- enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k1 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k2 = PIXEL_RATE_DIV_NA;
// Don't program 0xF into the register field. Not valid since
// K1 / K2 field is only 1 / 2 bits wide
@@ -245,7 +246,7 @@ static void dccg314_set_dtbclk_dto(
}
}
-static void dccg314_set_dpstreamclk(
+void dccg314_set_dpstreamclk(
struct dccg *dccg,
enum streamclk_source src,
int otg_inst,
@@ -373,8 +374,10 @@ static const struct dccg_funcs dccg314_funcs = {
.disable_dsc = dccg31_disable_dscclk,
.enable_dsc = dccg31_enable_dscclk,
.set_pixel_rate_div = dccg314_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg314_get_pixel_rate_div,
.trigger_dio_fifo_resync = dccg314_trigger_dio_fifo_resync,
.set_valid_pixel_rate = dccg314_set_valid_pixel_rate,
+ .set_dtbclk_p_src = dccg314_set_dtbclk_p_src
};
struct dccg *dccg314_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h
index 8e07d3151f91..60ea1d248deb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn314/dcn314_dccg.h
@@ -203,4 +203,10 @@ struct dccg *dccg314_create(
const struct dccg_shift *dccg_shift,
const struct dccg_mask *dccg_mask);
+void dccg314_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst);
+
#endif //__DCN314_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c
index 921f58c0c729..21a6ca5ca192 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.c
@@ -58,8 +58,8 @@ static void dccg32_trigger_dio_fifo_resync(
static void dccg32_get_pixel_rate_div(
struct dccg *dccg,
uint32_t otg_inst,
- enum pixel_rate_div *k1,
- enum pixel_rate_div *k2)
+ uint32_t *k1,
+ uint32_t *k2)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA;
@@ -93,8 +93,8 @@ static void dccg32_get_pixel_rate_div(
return;
}
- *k1 = (enum pixel_rate_div)val_k1;
- *k2 = (enum pixel_rate_div)val_k2;
+ *k1 = val_k1;
+ *k2 = val_k2;
}
static void dccg32_set_pixel_rate_div(
@@ -104,8 +104,8 @@ static void dccg32_set_pixel_rate_div(
enum pixel_rate_div k2)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k1 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k2 = PIXEL_RATE_DIV_NA;
// Don't program 0xF into the register field. Not valid since
// K1 / K2 field is only 1 / 2 bits wide
@@ -344,7 +344,9 @@ static const struct dccg_funcs dccg32_funcs = {
.otg_add_pixel = dccg32_otg_add_pixel,
.otg_drop_pixel = dccg32_otg_drop_pixel,
.set_pixel_rate_div = dccg32_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg32_get_pixel_rate_div,
.trigger_dio_fifo_resync = dccg32_trigger_dio_fifo_resync,
+ .set_dtbclk_p_src = dccg32_set_dtbclk_p_src,
};
struct dccg *dccg32_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h
index cf5508718122..cf5508718122 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn32/dcn32_dccg.h
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
new file mode 100644
index 000000000000..de6d62401362
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -0,0 +1,2470 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "resource.h"
+#include "dcn35_dccg.h"
+
+#define TO_DCN_DCCG(dccg)\
+ container_of(dccg, struct dcn_dccg, base)
+
+#define REG(reg) \
+ (dccg_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
+
+#define CTX \
+ dccg_dcn->base.ctx
+#include "logger_types.h"
+#define DC_LOGGER \
+ dccg->ctx->logger
+
+enum symclk_fe_source {
+ SYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A
+ SYMCLK_FE_SYMCLK_B,
+ SYMCLK_FE_SYMCLK_C,
+ SYMCLK_FE_SYMCLK_D,
+ SYMCLK_FE_SYMCLK_E,
+ SYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum symclk_be_source {
+ SYMCLK_BE_PHYCLK = 0, // Select phy clk when sym_clk_enable = 1
+ SYMCLK_BE_DPIACLK_810 = 4,
+ SYMCLK_BE_DPIACLK_162 = 5,
+ SYMCLK_BE_DPIACLK_540 = 6,
+ SYMCLK_BE_DPIACLK_270 = 7,
+ SYMCLK_BE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum physymclk_source {
+ PHYSYMCLK_PHYCLK = 0, // Select symclk as source of clock which is output to PHY through DCIO.
+ PHYSYMCLK_PHYD18CLK, // Select phyd18clk as the source of clock which is output to PHY through DCIO.
+ PHYSYMCLK_PHYD32CLK, // Select phyd32clk as the source of clock which is output to PHY through DCIO.
+ PHYSYMCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum dtbclk_source {
+ DTBCLK_DPREFCLK = 0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same)
+ DTBCLK_DPREFCLK_0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same)
+ DTBCLK_DTBCLK0, // Selects source for DTBCLK_P# as DTBCLK0
+ DTBCLK_DTBCLK1, // Selects source for DTBCLK_P# as DTBCLK0
+ DTBCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum dppclk_clock_source {
+ DPP_REFCLK = 0, // refclk is selected
+ DPP_DCCG_DTO, // Functional clock selected is DTO tuned DPPCLK
+};
+
+enum dp_stream_clk_source {
+ DP_STREAM_DTBCLK_P0 = 0, // Selects functional for DP_STREAM_CLK as DTBCLK_P#
+ DP_STREAM_DTBCLK_P1,
+ DP_STREAM_DTBCLK_P2,
+ DP_STREAM_DTBCLK_P3,
+ DP_STREAM_DTBCLK_P4,
+ DP_STREAM_DTBCLK_P5,
+ DP_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum hdmi_char_clk {
+ HDMI_CHAR_PHYAD18CLK = 0, // Selects functional for hdmi_char_clk as UNIPHYA PHYD18CLK
+ HDMI_CHAR_PHYBD18CLK,
+ HDMI_CHAR_PHYCD18CLK,
+ HDMI_CHAR_PHYDD18CLK,
+ HDMI_CHAR_PHYED18CLK,
+ HDMI_CHAR_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum hdmi_stream_clk_source {
+ HDMI_STREAM_DTBCLK_P0 = 0, // Selects functional for HDMI_STREAM_CLK as DTBCLK_P#
+ HDMI_STREAM_DTBCLK_P1,
+ HDMI_STREAM_DTBCLK_P2,
+ HDMI_STREAM_DTBCLK_P3,
+ HDMI_STREAM_DTBCLK_P4,
+ HDMI_STREAM_DTBCLK_P5,
+ HDMI_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum symclk32_se_clk_source {
+ SYMCLK32_SE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK
+ SYMCLK32_SE_PHYBD32CLK,
+ SYMCLK32_SE_PHYCD32CLK,
+ SYMCLK32_SE_PHYDD32CLK,
+ SYMCLK32_SE_PHYED32CLK,
+ SYMCLK32_SE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum symclk32_le_clk_source {
+ SYMCLK32_LE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK
+ SYMCLK32_LE_PHYBD32CLK,
+ SYMCLK32_LE_PHYCD32CLK,
+ SYMCLK32_LE_PHYDD32CLK,
+ SYMCLK32_LE_PHYED32CLK,
+ SYMCLK32_LE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software
+};
+
+enum dsc_clk_source {
+ DSC_CLK_REF_CLK = 0, // Ref clock selected for DSC_CLK
+ DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock
+};
+
+
+static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool allow_rcg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && allow_rcg)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ /* Wait for clock to ramp */
+ if (!allow_rcg)
+ udelay(10);
+}
+
+static void dccg35_set_symclk32_se_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable)
+ return;
+
+ /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */
+ /* SYMCLK32_SE#_GATE_DISABLE will clock gate in HPO only */
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk32_le_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE0_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE1_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_physymclk_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk_fe_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKA_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKB_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKC_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKD_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKE_FE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk_be_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* TBD add symclk_be in rcg control bits */
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKA_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKB_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKC_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKD_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ SYMCLKE_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable)
+{
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+}
+
+static void dccg35_set_dppclk_rcg(struct dccg *dccg, int inst, bool allow_rcg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && allow_rcg)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ /* Wait for clock to ramp */
+ if (!allow_rcg)
+ udelay(10);
+}
+
+static void dccg35_set_dpstreamclk_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK0_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK1_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK2_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK3_GATE_DISABLE, enable ? 0 : 1,
+ DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_smclk32_se_rcg(
+ struct dccg *dccg,
+ int inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable)
+ return;
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_dsc_clk_src_new(struct dccg *dccg, int inst, enum dsc_clk_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* DSCCLK#_EN=0 switches to refclock from functional clock */
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, src);
+ break;
+ case 1:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, src);
+ break;
+ case 2:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, src);
+ break;
+ case 3:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk32_se_src_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_se_clk_source src
+ )
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE0_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE0_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE1_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE1_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE2_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE2_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE3_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src,
+ SYMCLK32_SE3_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static int
+dccg35_is_symclk32_se_src_functional_le_new(struct dccg *dccg, int symclk_32_se_inst, int symclk_32_le_inst)
+{
+ uint32_t en;
+ uint32_t src_sel;
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_GET_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, &src_sel, SYMCLK32_SE3_EN, &en);
+
+ if (en == 1 && src_sel == symclk_32_le_inst)
+ return 1;
+
+ return 0;
+}
+
+
+static void dccg35_set_symclk32_le_src_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_le_clk_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src,
+ SYMCLK32_LE0_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src,
+ SYMCLK32_LE1_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dcn35_set_dppclk_src_new(struct dccg *dccg,
+ int inst, enum dppclk_clock_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, src);
+ break;
+ case 1:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, src);
+ break;
+ case 2:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, src);
+ break;
+ case 3:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+}
+
+static void dccg35_set_dtbclk_p_src_new(
+ struct dccg *dccg,
+ enum dtbclk_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* If DTBCLK_P#_EN is 0 refclock is selected as functional clock
+ * If DTBCLK_P#_EN is 1 functional clock is selected as DTBCLK_P#_SRC_SEL
+ */
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P0_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P1_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P2_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src,
+ DTBCLK_P3_EN, (src == DTBCLK_REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_dpstreamclk_src_new(
+ struct dccg *dccg,
+ enum dp_stream_clk_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK0_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK1_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+
+ break;
+ case 2:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK2_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+
+ break;
+ case 3:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN,
+ (src == DP_STREAM_REFCLK) ? 0 : 1,
+ DPSTREAMCLK3_SRC_SEL,
+ (src == DP_STREAM_REFCLK) ? 0 : src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_physymclk_src_new(
+ struct dccg *dccg,
+ enum physymclk_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYASYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYBSYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 2:
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYCSYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 3:
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYDSYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ case 4:
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN,
+ (src == PHYSYMCLK_REFCLK) ? 0 : 1,
+ PHYESYMCLK_SRC_SEL,
+ (src == PHYSYMCLK_REFCLK) ? 0 : src);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_symclk_be_src_new(
+ struct dccg *dccg,
+ enum symclk_be_source src,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKA_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKB_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKC_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKD_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1,
+ SYMCLKE_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src);
+ break;
+ }
+}
+
+static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg,
+ int symclk_fe_inst,
+ int symclk_be_inst)
+{
+
+ uint32_t en = 0;
+ uint32_t src_sel = 0;
+
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (symclk_fe_inst) {
+ case 0:
+ REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, &src_sel, SYMCLKA_FE_EN, &en);
+ break;
+ case 1:
+ REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, &src_sel, SYMCLKB_FE_EN, &en);
+ break;
+ case 2:
+ REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, &src_sel, SYMCLKC_FE_EN, &en);
+ break;
+ case 3:
+ REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, &src_sel, SYMCLKD_FE_EN, &en);
+ break;
+ case 4:
+ REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, &src_sel, SYMCLKE_FE_EN, &en);
+ break;
+ }
+
+ if (en == 1 && src_sel == symclk_be_inst)
+ return 1;
+
+ return 0;
+}
+
+static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum symclk_fe_source src, int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKA_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKB_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKC_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKD_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1,
+ SYMCLKE_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src);
+ break;
+ }
+}
+
+static uint32_t dccg35_is_fe_rcg(struct dccg *dccg, int inst)
+{
+ uint32_t enable = 0;
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKA_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 1:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKB_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 2:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKC_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 3:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKD_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ case 4:
+ REG_GET(DCCG_GATE_DISABLE_CNTL5,
+ SYMCLKE_FE_ROOT_GATE_DISABLE, &enable);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+ return enable;
+}
+
+static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst)
+{
+ uint32_t disable_l1 = 0;
+ uint32_t disable_l2 = 0;
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, &disable_l2);
+ break;
+ case 1:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, &disable_l2);
+ break;
+ case 2:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, &disable_l2);
+ break;
+ case 3:
+ REG_GET_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, &disable_l1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, &disable_l2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return 0;
+ }
+
+ /* return true if either block level or DCCG level gating is active */
+ return (disable_l1 | disable_l2);
+}
+
+static void dccg35_enable_symclk_fe_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk_fe_source src)
+{
+ dccg35_set_symclk_fe_rcg(dccg, inst, false);
+ dccg35_set_symclk_fe_src_new(dccg, src, inst);
+}
+
+static void dccg35_disable_symclk_fe_new(
+ struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_symclk_fe_src_new(dccg, SYMCLK_FE_REFCLK, inst);
+ dccg35_set_symclk_fe_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_symclk_be_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk_be_source src)
+{
+ dccg35_set_symclk_be_rcg(dccg, inst, false);
+ dccg35_set_symclk_be_src_new(dccg, inst, src);
+}
+
+static void dccg35_disable_symclk_be_new(
+ struct dccg *dccg,
+ int inst)
+{
+ int i;
+
+ /* Switch from functional clock to refclock */
+ dccg35_set_symclk_be_src_new(dccg, inst, SYMCLK_BE_REFCLK);
+
+ /* Check if any other SE connected LE and disable them */
+ for (i = 0; i < 4; i++) {
+ /* Make sure FE is not already in RCG */
+ if (dccg35_is_fe_rcg(dccg, i) == 0) {
+ if (dccg35_is_symclk_fe_src_functional_be(dccg, i, inst))
+ dccg35_disable_symclk_fe_new(dccg, i);
+ }
+ }
+ /* Safe to RCG SYMCLK*/
+ dccg35_set_symclk_be_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_symclk32_se_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_se_clk_source src)
+{
+ dccg35_set_symclk32_se_rcg(dccg, inst, false);
+ dccg35_set_symclk32_se_src_new(dccg, inst, src);
+}
+
+static void dccg35_disable_symclk32_se_new(
+ struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_symclk32_se_src_new(dccg, SYMCLK32_SE_REFCLK, inst);
+ dccg35_set_symclk32_se_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_symclk32_le_new(
+ struct dccg *dccg,
+ int inst,
+ enum symclk32_le_clk_source src)
+{
+ dccg35_set_symclk32_le_rcg(dccg, inst, false);
+ dccg35_set_symclk32_le_src_new(dccg, inst, src);
+}
+
+static void dccg35_disable_symclk32_le_new(
+ struct dccg *dccg,
+ int inst)
+{
+ int i;
+
+ /* Switch from functional clock to refclock */
+ dccg35_set_symclk32_le_src_new(dccg, inst, SYMCLK32_LE_REFCLK);
+
+ /* Check if any SE are connected and disable SE as well */
+ for (i = 0; i < 4; i++) {
+ /* Make sure FE is not already in RCG */
+ if (dccg35_is_symclk32_se_rcg(dccg, i) == 0) {
+ /* Disable and SE connected to this LE before RCG */
+ if (dccg35_is_symclk32_se_src_functional_le_new(dccg, i, inst))
+ dccg35_disable_symclk32_se_new(dccg, i);
+ }
+ }
+ /* Safe to RCG SYM32_LE*/
+ dccg35_set_symclk32_le_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_physymclk_new(struct dccg *dccg,
+ int inst,
+ enum physymclk_source src)
+{
+ dccg35_set_physymclk_rcg(dccg, inst, false);
+ dccg35_set_physymclk_src_new(dccg, src, inst);
+}
+
+static void dccg35_disable_physymclk_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_physymclk_src_new(dccg, PHYSYMCLK_REFCLK, inst);
+ dccg35_set_physymclk_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_dpp_clk_new(
+ struct dccg *dccg,
+ int inst,
+ enum dppclk_clock_source src)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* Sanitize inst before use in array de-ref */
+ if (inst < 0) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ dccg35_set_dppclk_rcg(dccg, inst, false);
+ dcn35_set_dppclk_src_new(dccg, inst, src);
+ /* Switch DPP clock to DTO */
+ REG_SET_2(DPPCLK_DTO_PARAM[inst], 0,
+ DPPCLK0_DTO_PHASE, 0xFF,
+ DPPCLK0_DTO_MODULO, 0xFF);
+}
+
+
+static void dccg35_disable_dpp_clk_new(
+ struct dccg *dccg,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* Sanitize inst before use in array de-ref */
+ if (inst < 0) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ dcn35_set_dppclk_src_new(dccg, inst, DPP_REFCLK);
+ REG_SET_2(DPPCLK_DTO_PARAM[inst], 0,
+ DPPCLK0_DTO_PHASE, 0,
+ DPPCLK0_DTO_MODULO, 1);
+ dccg35_set_dppclk_rcg(dccg, inst, true);
+}
+
+static void dccg35_disable_dscclk_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_dsc_clk_src_new(dccg, inst, DSC_CLK_REF_CLK);
+ dccg35_set_dsc_clk_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_dscclk_new(struct dccg *dccg,
+ int inst,
+ enum dsc_clk_source src)
+{
+ dccg35_set_dsc_clk_rcg(dccg, inst, false);
+ dccg35_set_dsc_clk_src_new(dccg, inst, src);
+}
+
+static void dccg35_enable_dtbclk_p_new(struct dccg *dccg,
+ enum dtbclk_source src,
+ int inst)
+{
+ dccg35_set_dtbclk_p_rcg(dccg, inst, false);
+ dccg35_set_dtbclk_p_src_new(dccg, src, inst);
+}
+
+static void dccg35_disable_dtbclk_p_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst);
+ dccg35_set_dtbclk_p_rcg(dccg, inst, true);
+}
+
+static void dccg35_disable_dpstreamclk_new(struct dccg *dccg,
+ int inst)
+{
+ dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst);
+ dccg35_set_dpstreamclk_rcg(dccg, inst, true);
+}
+
+static void dccg35_enable_dpstreamclk_new(struct dccg *dccg,
+ enum dp_stream_clk_source src,
+ int inst)
+{
+ dccg35_set_dpstreamclk_rcg(dccg, inst, false);
+ dccg35_set_dpstreamclk_src_new(dccg, src, inst);
+}
+
+static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t dispclk_rdivider_value = 0;
+
+ REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value);
+ if (dispclk_rdivider_value != 0)
+ REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
+}
+
+static void dcn35_set_dppclk_enable(struct dccg *dccg,
+ uint32_t dpp_inst, uint32_t enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+
+ switch (dpp_inst) {
+ case 0:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, enable);
+ break;
+ case 1:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, enable);
+ break;
+ case 2:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, enable);
+ break;
+ case 3:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, enable);
+ break;
+ default:
+ break;
+ }
+ DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable);
+
+}
+
+static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst,
+ int req_dppclk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst]) {
+ /*
+ * Do not update the DPPCLK DTO if the clock is stopped.
+ */
+ return;
+ }
+
+ if (dccg->ref_dppclk && req_dppclk) {
+ int ref_dppclk = dccg->ref_dppclk;
+ int modulo, phase;
+
+ // phase / modulo = dpp pipe clk / dpp global clk
+ modulo = 0xff; // use FF at the end
+ phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
+
+ if (phase > 0xff) {
+ ASSERT(false);
+ phase = 0xff;
+ }
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, false);
+
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, phase,
+ DPPCLK0_DTO_MODULO, modulo);
+
+ dcn35_set_dppclk_enable(dccg, dpp_inst, true);
+ } else {
+ dcn35_set_dppclk_enable(dccg, dpp_inst, false);
+ /*we have this in hwss: disable_plane*/
+ //dccg35_set_dppclk_rcg(dccg, dpp_inst, true);
+ }
+ dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
+}
+
+static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg,
+ uint32_t dpp_inst, uint32_t disallow_rcg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && !disallow_rcg)
+ return;
+
+
+ switch (dpp_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, disallow_rcg);
+ break;
+ default:
+ break;
+ }
+
+ /* Wait for clock to ramp */
+ if (disallow_rcg)
+ udelay(10);
+}
+
+static void dccg35_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *k1,
+ uint32_t *k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t val_k1 = PIXEL_RATE_DIV_NA, val_k2 = PIXEL_RATE_DIV_NA;
+
+ *k1 = PIXEL_RATE_DIV_NA;
+ *k2 = PIXEL_RATE_DIV_NA;
+
+ switch (otg_inst) {
+ case 0:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, &val_k1,
+ OTG0_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 1:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, &val_k1,
+ OTG1_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 2:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, &val_k1,
+ OTG2_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ case 3:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, &val_k1,
+ OTG3_PIXEL_RATE_DIVK2, &val_k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ *k1 = val_k1;
+ *k2 = val_k2;
+}
+
+static void dccg35_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div k1,
+ enum pixel_rate_div k2)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t cur_k1 = PIXEL_RATE_DIV_NA;
+ uint32_t cur_k2 = PIXEL_RATE_DIV_NA;
+
+
+ // Don't program 0xF into the register field. Not valid since
+ // K1 / K2 field is only 1 / 2 bits wide
+ if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ dccg35_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
+ if (k1 == cur_k1 && k2 == cur_k2)
+ return;
+
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG0_PIXEL_RATE_DIVK1, k1,
+ OTG0_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 1:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG1_PIXEL_RATE_DIVK1, k1,
+ OTG1_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 2:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG2_PIXEL_RATE_DIVK1, k1,
+ OTG2_PIXEL_RATE_DIVK2, k2);
+ break;
+ case 3:
+ REG_UPDATE_2(OTG_PIXEL_RATE_DIV,
+ OTG3_PIXEL_RATE_DIVK1, k1,
+ OTG3_PIXEL_RATE_DIVK2, k2);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg35_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t p_src_sel = 0; /* selects dprefclk */
+ if (src == DTBCLK0)
+ p_src_sel = 2; /* selects dtbclk0 */
+
+ switch (otg_inst) {
+ case 0:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P0_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, p_src_sel,
+ DTBCLK_P0_EN, 1);
+ break;
+ case 1:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P1_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, p_src_sel,
+ DTBCLK_P1_EN, 1);
+ break;
+ case 2:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P2_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, p_src_sel,
+ DTBCLK_P2_EN, 1);
+ break;
+ case 3:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P3_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, p_src_sel,
+ DTBCLK_P3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+/* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */
+static void dccg35_set_dtbclk_dto(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* DTO Output Rate / Pixel Rate = 1/4 */
+ int req_dtbclk_khz = params->pixclk_khz / 4;
+
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1);
+ break;
+ }
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = req_dtbclk_khz * 1000;
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
+
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
+ 1, 100);
+
+ /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */
+ dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1);
+
+ /* The recommended programming sequence to enable DTBCLK DTO to generate
+ * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
+ * be set only after DTO is enabled.
+ * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the
+ * programming is handled in program_pix_clk() regardless, so it can be removed from here.
+ */
+ DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO enabled: pixclk_khz=%d, ref_dtbclk_khz=%d, req_dtbclk_khz=%d, phase=%d, modulo=%d\n",
+ __func__, params->otg_inst, params->pixclk_khz,
+ params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo);
+
+ } else {
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 0);
+ break;
+ }
+
+ /**
+ * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the
+ * programming is handled in program_pix_clk() regardless, so it can be removed from here.
+ */
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0);
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+
+ DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO disabled\n", __func__, params->otg_inst);
+ }
+}
+
+static void dccg35_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set the dtbclk_p source */
+ dccg35_set_dtbclk_p_src(dccg, src, otg_inst);
+
+ /* enabled to select one of the DTBCLKs for pipe */
+ switch (dp_hpo_inst) {
+ case 0:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK0_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK1_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK2_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN,
+ (src == REFCLK) ? 0 : 1, DPSTREAMCLK3_SRC_SEL, otg_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, (src == REFCLK) ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_EN = %d, DPSTREAMCLK_SRC_SEL = %d\n",
+ __func__, dp_hpo_inst, (src == REFCLK) ? 0 : 1, otg_inst);
+}
+
+
+static void dccg35_set_dpstreamclk_root_clock_gating(
+ struct dccg *dccg,
+ int dp_hpo_inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dp_hpo_inst) {
+ case 0:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ case 1:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ case 2:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ case 3:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 1 : 0);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, enable ? 1 : 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_ROOT_GATE_DISABLE = %d\n",
+ __func__, dp_hpo_inst, enable ? 1 : 0);
+}
+
+
+
+static void dccg35_set_physymclk_root_clock_gating(
+ struct dccg *dccg,
+ int phy_inst,
+ bool enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ return;
+
+ switch (phy_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ case 4:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE: %d\n", __func__, phy_inst, enable ? 0 : 1);
+
+}
+
+static void dccg35_set_physymclk(
+ struct dccg *dccg,
+ int phy_inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* Force PHYSYMCLK on and Select phyd32clk as the source of clock which is output to PHY through DCIO */
+ switch (phy_inst) {
+ case 0:
+ if (force_enable) {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 1,
+ PHYASYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 0,
+ PHYASYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 1:
+ if (force_enable) {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 1,
+ PHYBSYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 0,
+ PHYBSYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 2:
+ if (force_enable) {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 1,
+ PHYCSYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 0,
+ PHYCSYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 3:
+ if (force_enable) {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 1,
+ PHYDSYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 0,
+ PHYDSYMCLK_SRC_SEL, 0);
+ }
+ break;
+ case 4:
+ if (force_enable) {
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
+ PHYESYMCLK_EN, 1,
+ PHYESYMCLK_SRC_SEL, clk_src);
+ } else {
+ REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL,
+ PHYESYMCLK_EN, 0,
+ PHYESYMCLK_SRC_SEL, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ DC_LOG_DEBUG("%s: phy_inst(%d) PHYxSYMCLK_EN = %d, PHYxSYMCLK_SRC_SEL = %d\n",
+ __func__, phy_inst, force_enable ? 1 : 0, clk_src);
+}
+
+static void dccg35_set_valid_pixel_rate(
+ struct dccg *dccg,
+ int ref_dtbclk_khz,
+ int otg_inst,
+ int pixclk_khz)
+{
+ struct dtbclk_dto_params dto_params = {0};
+
+ dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
+ dto_params.otg_inst = otg_inst;
+ dto_params.pixclk_khz = pixclk_khz;
+ dto_params.is_hdmi = true;
+
+ dccg35_set_dtbclk_dto(dccg, &dto_params);
+}
+
+static void dccg35_dpp_root_clock_control(
+ struct dccg *dccg,
+ unsigned int dpp_inst,
+ bool clock_on)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst] == clock_on)
+ return;
+
+ if (clock_on) {
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, false);
+
+ /* turn off the DTO and leave phase/modulo at max */
+ dcn35_set_dppclk_enable(dccg, dpp_inst, 1);
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, 0xFF,
+ DPPCLK0_DTO_MODULO, 0xFF);
+ } else {
+ dcn35_set_dppclk_enable(dccg, dpp_inst, 0);
+ /* turn on the DTO to generate a 0hz clock */
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, 0,
+ DPPCLK0_DTO_MODULO, 1);
+ /*we have this in hwss: disable_plane*/
+ //dccg35_set_dppclk_rcg(dccg, dpp_inst, true);
+ }
+
+ dccg->dpp_clock_gated[dpp_inst] = !clock_on;
+ DC_LOG_DEBUG("%s: dpp_inst(%d) clock_on = %d\n", __func__, dpp_inst, clock_on);
+}
+
+static void dccg35_disable_symclk32_se(
+ struct dccg *dccg,
+ int hpo_se_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set refclk as the source for symclk32_se */
+ switch (hpo_se_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE0_SRC_SEL, 0,
+ SYMCLK32_SE0_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE0_GATE_DISABLE, 0);
+ }
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE1_SRC_SEL, 0,
+ SYMCLK32_SE1_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE1_GATE_DISABLE, 0);
+ }
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE2_SRC_SEL, 0,
+ SYMCLK32_SE2_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE2_GATE_DISABLE, 0);
+ }
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLK32_SE_CNTL,
+ SYMCLK32_SE3_SRC_SEL, 0,
+ SYMCLK32_SE3_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) {
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, 0);
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL3,
+// SYMCLK32_ROOT_SE3_GATE_DISABLE, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+static void dccg35_init_cb(struct dccg *dccg)
+{
+ (void)dccg;
+ /* Any RCG should be done when driver enter low power mode*/
+}
+void dccg35_init(struct dccg *dccg)
+{
+ int otg_inst;
+ /* Set HPO stream encoder to use refclk to avoid case where PHY is
+ * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which
+ * will cause DCN to hang.
+ */
+ for (otg_inst = 0; otg_inst < 4; otg_inst++)
+ dccg35_disable_symclk32_se(dccg, otg_inst);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ for (otg_inst = 0; otg_inst < 2; otg_inst++) {
+ dccg31_disable_symclk32_le(dccg, otg_inst);
+ dccg31_set_symclk32_le_root_clock_gating(dccg, otg_inst, false);
+ DC_LOG_DEBUG("%s: OTG%d SYMCLK32_LE disabled and root clock gating disabled\n",
+ __func__, otg_inst);
+ }
+
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// for (otg_inst = 0; otg_inst < 4; otg_inst++)
+// dccg35_disable_symclk_se(dccg, otg_inst, otg_inst);
+
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ for (otg_inst = 0; otg_inst < 4; otg_inst++) {
+ dccg35_set_dpstreamclk(dccg, REFCLK, otg_inst,
+ otg_inst);
+ dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false);
+ DC_LOG_DEBUG("%s: OTG%d DPSTREAMCLK disabled and root clock gating disabled\n",
+ __func__, otg_inst);
+ }
+
+/*
+ dccg35_enable_global_fgcg_rep(
+ dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits
+ .dccg_global_fgcg_rep);*/
+}
+
+void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(DCCG_GLOBAL_FGCG_REP_CNTL, DCCG_GLOBAL_FGCG_REP_DIS, !value);
+}
+
+static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ //Disable DTO
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 0);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ /* Wait for clock to ramp */
+ udelay(10);
+}
+
+static void dccg35_disable_dscclk(struct dccg *dccg,
+ int inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0);
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0);
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0);
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0);
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 0);
+ break;
+ default:
+ return;
+ }
+
+ /* Wait for clock ramp */
+ udelay(10);
+}
+
+static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (link_enc_inst) {
+ case 0:
+ REG_UPDATE(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, 1);
+ break;
+ case 4:
+ REG_UPDATE(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_CLOCK_ENABLE, 1);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, 1);
+ break;
+ }
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 1,
+ SYMCLKA_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 1,
+ SYMCLKB_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 1,
+ SYMCLKC_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 1,
+ SYMCLKD_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_FE_EN, 1,
+ SYMCLKE_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ }
+}
+
+/*get other front end connected to this backend*/
+static uint8_t dccg35_get_number_enabled_symclk_fe_connected_to_be(struct dccg *dccg, uint32_t link_enc_inst)
+{
+ uint8_t num_enabled_symclk_fe = 0;
+ uint32_t fe_clk_en[5] = {0}, be_clk_sel[5] = {0};
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, &fe_clk_en[0],
+ SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]);
+
+ REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, &fe_clk_en[1],
+ SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]);
+
+ REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, &fe_clk_en[2],
+ SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]);
+
+ REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, &fe_clk_en[3],
+ SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]);
+
+ REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, &fe_clk_en[4],
+ SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]);
+
+ uint8_t i;
+
+ for (i = 0; i < ARRAY_SIZE(fe_clk_en); i++) {
+ if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst)
+ num_enabled_symclk_fe++;
+ }
+ return num_enabled_symclk_fe;
+}
+
+static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ uint8_t num_enabled_symclk_fe = 0;
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 0,
+ SYMCLKA_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 0,
+ SYMCLKB_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 0,
+ SYMCLKC_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 0,
+ SYMCLKD_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ case 4:
+ REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_FE_EN, 0,
+ SYMCLKE_FE_SRC_SEL, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, 0);
+ break;
+ }
+
+ /*check other enabled symclk fe connected to this be */
+ num_enabled_symclk_fe = dccg35_get_number_enabled_symclk_fe_connected_to_be(dccg, link_enc_inst);
+ /*only turn off backend clk if other front end attached to this backend are all off,
+ for mst, only turn off the backend if this is the last front end*/
+ if (num_enabled_symclk_fe == 0) {
+ switch (link_enc_inst) {
+ case 0:
+ REG_UPDATE(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, 0);
+ break;
+ case 4:
+ REG_UPDATE(SYMCLKE_CLOCK_ENABLE,
+ SYMCLKE_CLOCK_ENABLE, 0);
+// if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+// REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, 0);
+ break;
+ }
+ }
+}
+
+static void dccg35_set_dpstreamclk_cb(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+
+ enum dtbclk_source dtb_clk_src;
+ enum dp_stream_clk_source dp_stream_clk_src;
+
+ switch (src) {
+ case REFCLK:
+ dtb_clk_src = DTBCLK_REFCLK;
+ dp_stream_clk_src = DP_STREAM_REFCLK;
+ break;
+ case DPREFCLK:
+ dtb_clk_src = DTBCLK_DPREFCLK;
+ dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst;
+ break;
+ case DTBCLK0:
+ dtb_clk_src = DTBCLK_DTBCLK0;
+ dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst;
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ if (dtb_clk_src == DTBCLK_REFCLK &&
+ dp_stream_clk_src == DP_STREAM_REFCLK) {
+ dccg35_disable_dtbclk_p_new(dccg, otg_inst);
+ dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst);
+ } else {
+ dccg35_enable_dtbclk_p_new(dccg, dtb_clk_src, otg_inst);
+ dccg35_enable_dpstreamclk_new(dccg,
+ dp_stream_clk_src,
+ dp_hpo_inst);
+ }
+}
+
+static void dccg35_set_dpstreamclk_root_clock_gating_cb(
+ struct dccg *dccg,
+ int dp_hpo_inst,
+ bool power_on)
+{
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Instance 0 is implied here since only one streamclock resource
+ * Redundant as gating when enabled is acheived through set_dpstreamclk
+ */
+ if (power_on)
+ dccg35_enable_dpstreamclk_new(dccg,
+ DP_STREAM_REFCLK,
+ dp_hpo_inst);
+ else
+ dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst);
+}
+
+static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst,
+ int req_dppclk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->dpp_clock_gated[dpp_inst]) {
+ /*
+ * Do not update the DPPCLK DTO if the clock is stopped.
+ */
+ return;
+ }
+
+ if (dccg->ref_dppclk && req_dppclk) {
+ int ref_dppclk = dccg->ref_dppclk;
+ int modulo, phase;
+
+ // phase / modulo = dpp pipe clk / dpp global clk
+ modulo = 0xff; // use FF at the end
+ phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
+
+ if (phase > 0xff) {
+ ASSERT(false);
+ phase = 0xff;
+ }
+
+ /* Enable DPP CLK DTO output */
+ dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_DCCG_DTO);
+
+ /* Program DTO */
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, phase,
+ DPPCLK0_DTO_MODULO, modulo);
+ } else
+ dccg35_disable_dpp_clk_new(dccg, dpp_inst);
+
+ dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
+}
+
+static void dccg35_dpp_root_clock_control_cb(
+ struct dccg *dccg,
+ unsigned int dpp_inst,
+ bool power_on)
+{
+ if (dccg->dpp_clock_gated[dpp_inst] == power_on)
+ return;
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Redundant as gating when enabled is acheived through update_dpp_dto
+ */
+ dccg35_set_dppclk_rcg(dccg, dpp_inst, !power_on);
+
+ dccg->dpp_clock_gated[dpp_inst] = !power_on;
+}
+
+static void dccg35_enable_symclk32_se_cb(
+ struct dccg *dccg,
+ int inst,
+ enum phyd32clk_clock_source phyd32clk)
+{
+ dccg35_enable_symclk32_se_new(dccg, inst, (enum symclk32_se_clk_source)phyd32clk);
+}
+
+static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst)
+{
+ dccg35_disable_symclk32_se_new(dccg, inst);
+}
+
+static void dccg35_enable_symclk32_le_cb(
+ struct dccg *dccg,
+ int inst,
+ enum phyd32clk_clock_source src)
+{
+ dccg35_enable_symclk32_le_new(dccg, inst, (enum symclk32_le_clk_source) src);
+}
+
+static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst)
+{
+ dccg35_disable_symclk32_le_new(dccg, inst);
+}
+
+static void dccg35_set_symclk32_le_root_clock_gating_cb(
+ struct dccg *dccg,
+ int inst,
+ bool power_on)
+{
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Redundant as gating when enabled is acheived through disable_symclk32_le
+ */
+ if (power_on)
+ dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK);
+ else
+ dccg35_disable_symclk32_le_new(dccg, inst);
+}
+
+static void dccg35_set_physymclk_cb(
+ struct dccg *dccg,
+ int inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable)
+{
+ /* force_enable = 0 indicates we can switch to ref clock */
+ if (force_enable)
+ dccg35_enable_physymclk_new(dccg, inst, (enum physymclk_source)clk_src);
+ else
+ dccg35_disable_physymclk_new(dccg, inst);
+}
+
+static void dccg35_set_physymclk_root_clock_gating_cb(
+ struct dccg *dccg,
+ int inst,
+ bool power_on)
+{
+ /* Redundant RCG already done in disable_physymclk
+ * power_on = 1 indicates we need to ungate
+ */
+ if (power_on)
+ dccg35_enable_physymclk_new(dccg, inst, PHYSYMCLK_REFCLK);
+ else
+ dccg35_disable_physymclk_new(dccg, inst);
+}
+
+static void dccg35_set_symclk32_le_root_clock_gating(
+ struct dccg *dccg,
+ int inst,
+ bool power_on)
+{
+ /* power_on set indicates we need to ungate
+ * Currently called from optimize_bandwidth and prepare_bandwidth calls
+ * Since clock source is not passed restore to refclock on ungate
+ * Redundant as gating when enabled is acheived through disable_symclk32_le
+ */
+ if (power_on)
+ dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK);
+ else
+ dccg35_disable_symclk32_le_new(dccg, inst);
+}
+
+static void dccg35_set_dtbclk_p_src_cb(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t inst)
+{
+ if (src == DTBCLK0)
+ dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, inst);
+ else
+ dccg35_disable_dtbclk_p_new(dccg, inst);
+}
+
+static void dccg35_set_dtbclk_dto_cb(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params)
+{
+ /* set_dtbclk_p_src typ called earlier to switch to DTBCLK
+ * if params->ref_dtbclk_khz and req_dtbclk_khz are 0 switch to ref-clock
+ */
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ /* DTO Output Rate / Pixel Rate = 1/4 */
+ int req_dtbclk_khz = params->pixclk_khz / 4;
+
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
+ uint32_t modulo, phase;
+
+ dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, params->otg_inst);
+
+ // phase / modulo = dtbclk / dtbclk ref
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = req_dtbclk_khz * 1000;
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
+
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
+ 1, 100);
+
+ /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */
+ dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1);
+
+ /* The recommended programming sequence to enable DTBCLK DTO to generate
+ * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
+ * be set only after DTO is enabled
+ */
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ PIPE_DTO_SRC_SEL[params->otg_inst], 2);
+ } else {
+ dccg35_disable_dtbclk_p_new(dccg, params->otg_inst);
+
+ REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0,
+ PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
+
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
+ }
+}
+
+static void dccg35_disable_dscclk_cb(struct dccg *dccg,
+ int inst)
+{
+ dccg35_disable_dscclk_new(dccg, inst);
+}
+
+static void dccg35_enable_dscclk_cb(struct dccg *dccg, int inst)
+{
+ dccg35_enable_dscclk_new(dccg, inst, DSC_DTO_TUNED_CK_GPU_DISCLK_3);
+}
+
+static void dccg35_enable_symclk_se_cb(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ /* Switch to functional clock if already not selected */
+ dccg35_enable_symclk_be_new(dccg, SYMCLK_BE_PHYCLK, link_enc_inst);
+
+ dccg35_enable_symclk_fe_new(dccg, stream_enc_inst, (enum symclk_fe_source) link_enc_inst);
+
+}
+
+static void dccg35_disable_symclk_se_cb(
+ struct dccg *dccg,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst)
+{
+ dccg35_disable_symclk_fe_new(dccg, stream_enc_inst);
+
+ /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */
+}
+
+void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating)
+{
+ dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating);
+}
+
+static const struct dccg_funcs dccg35_funcs_new = {
+ .update_dpp_dto = dccg35_update_dpp_dto_cb,
+ .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb,
+ .get_dccg_ref_freq = dccg31_get_dccg_ref_freq,
+ .dccg_init = dccg35_init_cb,
+ .set_dpstreamclk = dccg35_set_dpstreamclk_cb,
+ .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb,
+ .enable_symclk32_se = dccg35_enable_symclk32_se_cb,
+ .disable_symclk32_se = dccg35_disable_symclk32_se_cb,
+ .enable_symclk32_le = dccg35_enable_symclk32_le_cb,
+ .disable_symclk32_le = dccg35_disable_symclk32_le_cb,
+ .set_symclk32_le_root_clock_gating = dccg35_set_symclk32_le_root_clock_gating_cb,
+ .set_physymclk = dccg35_set_physymclk_cb,
+ .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating_cb,
+ .set_dtbclk_dto = dccg35_set_dtbclk_dto_cb,
+ .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .otg_add_pixel = dccg31_otg_add_pixel,
+ .otg_drop_pixel = dccg31_otg_drop_pixel,
+ .set_dispclk_change_mode = dccg31_set_dispclk_change_mode,
+ .disable_dsc = dccg35_disable_dscclk_cb,
+ .enable_dsc = dccg35_enable_dscclk_cb,
+ .set_pixel_rate_div = dccg35_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg35_get_pixel_rate_div,
+ .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync,
+ .set_valid_pixel_rate = dccg35_set_valid_pixel_rate,
+ .enable_symclk_se = dccg35_enable_symclk_se_cb,
+ .disable_symclk_se = dccg35_disable_symclk_se_cb,
+ .set_dtbclk_p_src = dccg35_set_dtbclk_p_src_cb,
+};
+
+static const struct dccg_funcs dccg35_funcs = {
+ .update_dpp_dto = dccg35_update_dpp_dto,
+ .dpp_root_clock_control = dccg35_dpp_root_clock_control,
+ .get_dccg_ref_freq = dccg31_get_dccg_ref_freq,
+ .dccg_init = dccg35_init,
+ .set_dpstreamclk = dccg35_set_dpstreamclk,
+ .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating,
+ .enable_symclk32_se = dccg31_enable_symclk32_se,
+ .disable_symclk32_se = dccg35_disable_symclk32_se,
+ .enable_symclk32_le = dccg31_enable_symclk32_le,
+ .disable_symclk32_le = dccg31_disable_symclk32_le,
+ .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating,
+ .set_physymclk = dccg35_set_physymclk,
+ .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating,
+ .set_dtbclk_dto = dccg35_set_dtbclk_dto,
+ .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .otg_add_pixel = dccg31_otg_add_pixel,
+ .otg_drop_pixel = dccg31_otg_drop_pixel,
+ .set_dispclk_change_mode = dccg31_set_dispclk_change_mode,
+ .disable_dsc = dccg35_disable_dscclk,
+ .enable_dsc = dccg35_enable_dscclk,
+ .set_pixel_rate_div = dccg35_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg35_get_pixel_rate_div,
+ .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync,
+ .set_valid_pixel_rate = dccg35_set_valid_pixel_rate,
+ .enable_symclk_se = dccg35_enable_symclk_se,
+ .disable_symclk_se = dccg35_disable_symclk_se,
+ .set_dtbclk_p_src = dccg35_set_dtbclk_p_src,
+ .dccg_root_gate_disable_control = dccg35_root_gate_disable_control,
+};
+
+struct dccg *dccg35_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask)
+{
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+ struct dccg *base;
+
+ if (dccg_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+ (void)&dccg35_disable_symclk_be_new;
+ (void)&dccg35_set_symclk32_le_root_clock_gating;
+ (void)&dccg35_set_smclk32_se_rcg;
+ (void)&dccg35_funcs_new;
+
+ base = &dccg_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &dccg35_funcs;
+
+ dccg_dcn->regs = regs;
+ dccg_dcn->dccg_shift = dccg_shift;
+ dccg_dcn->dccg_mask = dccg_mask;
+
+ return &dccg_dcn->base;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h
new file mode 100644
index 000000000000..51f98c5c51c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCN35_DCCG_H__
+#define __DCN35_DCCG_H__
+
+#include "dcn314/dcn314_dccg.h"
+
+#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+
+#define DCCG_REG_LIST_DCN35() \
+ DCCG_REG_LIST_DCN314(),\
+ SR(DPPCLK_CTRL),\
+ SR(DCCG_GATE_DISABLE_CNTL4),\
+ SR(DCCG_GATE_DISABLE_CNTL5),\
+ SR(DCCG_GATE_DISABLE_CNTL6),\
+ SR(DCCG_GLOBAL_FGCG_REP_CNTL),\
+ SR(SYMCLKA_CLOCK_ENABLE),\
+ SR(SYMCLKB_CLOCK_ENABLE),\
+ SR(SYMCLKC_CLOCK_ENABLE),\
+ SR(SYMCLKD_CLOCK_ENABLE),\
+ SR(SYMCLKE_CLOCK_ENABLE)
+
+#define DCCG_MASK_SH_LIST_DCN35(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK0_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK1_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK2_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK3_EN, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLK_DTO, ENABLE, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DTBCLKDTO, ENABLE_STATUS, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK1, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_PIXEL_RATE_DIVK2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, DROP_PIXEL, 3, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GLOBAL_FGCG_REP_CNTL, DCCG_GLOBAL_FGCG_REP_DIS, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYE_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK0_DTO_PARAM, HDMISTREAMCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL, DISPCLK_DCCG_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+
+struct dccg *dccg35_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask);
+
+void dccg35_init(struct dccg *dccg);
+
+void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value);
+void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating);
+
+
+#endif //__DCN35_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c
new file mode 100644
index 000000000000..0b8ed9b94d3c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn401_dccg.h"
+#include "dcn31/dcn31_dccg.h"
+
+/*
+#include "dmub_common.h"
+#include "dmcub_reg_access_helper.h"
+
+#include "dmub401_common.h"
+#include "dmub401_regs.h"
+#include "dmub401_dccg.h"
+*/
+
+#define TO_DCN_DCCG(dccg)\
+ container_of(dccg, struct dcn_dccg, base)
+
+#define REG(reg) \
+ (dccg_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dccg_dcn->dccg_shift->field_name, dccg_dcn->dccg_mask->field_name
+
+#define CTX \
+ dccg_dcn->base.ctx
+#define DC_LOGGER \
+ dccg->ctx->logger
+
+static void dcn401_set_dppclk_enable(struct dccg *dccg,
+ uint32_t dpp_inst, uint32_t enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dpp_inst) {
+ case 0:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, enable);
+ break;
+ case 1:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, enable);
+ break;
+ case 2:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, enable);
+ break;
+ case 3:
+ REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, enable);
+ break;
+ default:
+ break;
+ }
+}
+void dccg401_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ if (dccg->ref_dppclk && req_dppclk) {
+ int ref_dppclk = dccg->ref_dppclk;
+ int modulo, phase;
+
+ // phase / modulo = dpp pipe clk / dpp global clk
+ modulo = 0xff; // use FF at the end
+ phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk;
+
+ if (phase > 0xff) {
+ ASSERT(false);
+ phase = 0xff;
+ }
+
+ REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0,
+ DPPCLK0_DTO_PHASE, phase,
+ DPPCLK0_DTO_MODULO, modulo);
+ dcn401_set_dppclk_enable(dccg, dpp_inst, true);
+ } else {
+ dcn401_set_dppclk_enable(dccg, dpp_inst, false);
+ }
+
+ dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk;
+}
+
+/* This function is a workaround for writing to OTG_PIXEL_RATE_DIV
+ * without the probability of causing a DIG FIFO error.
+ */
+static void dccg401_wait_for_dentist_change_done(
+ struct dccg *dccg)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t dentist_dispclk_value = REG_READ(DENTIST_DISPCLK_CNTL);
+
+ REG_WRITE(DENTIST_DISPCLK_CNTL, dentist_dispclk_value);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 2000);
+}
+
+void dccg401_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *tmds_div,
+ uint32_t *dp_dto_int)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t val_tmds_div = PIXEL_RATE_DIV_NA;
+
+ switch (otg_inst) {
+ case 0:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG0_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO0_INT, dp_dto_int);
+ break;
+ case 1:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG1_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO1_INT, dp_dto_int);
+ break;
+ case 2:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG2_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO2_INT, dp_dto_int);
+ break;
+ case 3:
+ REG_GET_2(OTG_PIXEL_RATE_DIV,
+ OTG3_TMDS_PIXEL_RATE_DIV, &val_tmds_div,
+ DPDTO3_INT, dp_dto_int);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ *tmds_div = val_tmds_div == 0 ? PIXEL_RATE_DIV_BY_2 : PIXEL_RATE_DIV_BY_4;
+}
+
+void dccg401_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div tmds_div,
+ enum pixel_rate_div unused)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ uint32_t cur_tmds_div = PIXEL_RATE_DIV_NA;
+ uint32_t dp_dto_int;
+ uint32_t reg_val;
+
+ // only 2 and 4 are valid on dcn401
+ if (tmds_div != PIXEL_RATE_DIV_BY_2 && tmds_div != PIXEL_RATE_DIV_BY_4) {
+ return;
+ }
+
+ dccg401_get_pixel_rate_div(dccg, otg_inst, &cur_tmds_div, &dp_dto_int);
+ if (tmds_div == cur_tmds_div)
+ return;
+
+ // encode enum to register value
+ reg_val = tmds_div == PIXEL_RATE_DIV_BY_4 ? 1 : 0;
+
+ switch (otg_inst) {
+ case 0:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG0_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ case 1:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG1_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ case 2:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG2_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ case 3:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ OTG3_TMDS_PIXEL_RATE_DIV, reg_val);
+
+ dccg401_wait_for_dentist_change_done(dccg);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+
+void dccg401_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ uint32_t p_src_sel = 0; /* selects dprefclk */
+ if (src == DTBCLK0)
+ p_src_sel = 2; /* selects dtbclk0 */
+
+ switch (otg_inst) {
+ case 0:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P0_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P0_SRC_SEL, p_src_sel,
+ DTBCLK_P0_EN, 1);
+ break;
+ case 1:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P1_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P1_SRC_SEL, p_src_sel,
+ DTBCLK_P1_EN, 1);
+ break;
+ case 2:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P2_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P2_SRC_SEL, p_src_sel,
+ DTBCLK_P2_EN, 1);
+ break;
+ case 3:
+ if (src == REFCLK)
+ REG_UPDATE(DTBCLK_P_CNTL,
+ DTBCLK_P3_EN, 0);
+ else
+ REG_UPDATE_2(DTBCLK_P_CNTL,
+ DTBCLK_P3_SRC_SEL, p_src_sel,
+ DTBCLK_P3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+}
+
+void dccg401_set_physymclk(
+ struct dccg *dccg,
+ int phy_inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* Force PHYSYMCLK on and Select phyd32clk as the source of clock which is output to PHY through DCIO */
+ switch (phy_inst) {
+ case 0:
+ if (force_enable) {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 1,
+ PHYASYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL,
+ PHYASYMCLK_EN, 0,
+ PHYASYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYASYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ case 1:
+ if (force_enable) {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 1,
+ PHYBSYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL,
+ PHYBSYMCLK_EN, 0,
+ PHYBSYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYBSYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ case 2:
+ if (force_enable) {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 1,
+ PHYCSYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL,
+ PHYCSYMCLK_EN, 0,
+ PHYCSYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYCSYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ case 3:
+ if (force_enable) {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 1,
+ PHYDSYMCLK_SRC_SEL, clk_src);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, 1);
+ } else {
+ REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL,
+ PHYDSYMCLK_EN, 0,
+ PHYDSYMCLK_SRC_SEL, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL2,
+ PHYDSYMCLK_ROOT_GATE_DISABLE, 0);
+ }
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_get_dccg_ref_freq(struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz)
+{
+ /*
+ * Assume refclk is sourced from xtalin
+ * expect 100MHz
+ */
+ *dccg_ref_freq_inKhz = xtalin_freq_inKhz;
+ return;
+}
+
+static void dccg401_otg_add_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_ADD_PIXEL[otg_inst], 1);
+}
+
+static void dccg401_otg_drop_pixel(struct dccg *dccg,
+ uint32_t otg_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[otg_inst],
+ OTG_DROP_PIXEL[otg_inst], 1);
+}
+
+void dccg401_enable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst,
+ enum phyd32clk_clock_source phyd32clk)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* select one of the PHYD32CLKs as the source for symclk32_le */
+ switch (hpo_le_inst) {
+ case 0:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE0_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, phyd32clk,
+ SYMCLK32_LE0_EN, 1);
+ break;
+ case 1:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE1_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, phyd32clk,
+ SYMCLK32_LE1_EN, 1);
+ break;
+ case 2:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE2_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE2_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE2_SRC_SEL, phyd32clk,
+ SYMCLK32_LE2_EN, 1);
+ break;
+ case 3:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE3_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE3_GATE_DISABLE, 1);
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE3_SRC_SEL, phyd32clk,
+ SYMCLK32_LE3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_disable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* set refclk as the source for symclk32_le */
+ switch (hpo_le_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE0_SRC_SEL, 0,
+ SYMCLK32_LE0_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE0_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE1_SRC_SEL, 0,
+ SYMCLK32_LE1_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE1_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE2_SRC_SEL, 0,
+ SYMCLK32_LE2_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE2_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE2_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLK32_LE_CNTL,
+ SYMCLK32_LE3_SRC_SEL, 0,
+ SYMCLK32_LE3_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_LE3_GATE_DISABLE, 0,
+ SYMCLK32_ROOT_LE3_GATE_DISABLE, 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+static void dccg401_enable_dpstreamclk(struct dccg *dccg, int otg_inst, int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ /* enabled to select one of the DTBCLKs for pipe */
+ switch (dp_hpo_inst) {
+ case 0:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK0_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK0_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK0_SRC_SEL, otg_inst,
+ DPSTREAMCLK0_EN, 1);
+ break;
+ case 1:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK1_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK1_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK1_SRC_SEL, otg_inst,
+ DPSTREAMCLK1_EN, 1);
+ break;
+ case 2:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK2_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK2_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK2_SRC_SEL, otg_inst,
+ DPSTREAMCLK2_EN, 1);
+ break;
+ case 3:
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK3_ROOT_GATE_DISABLE, 1,
+ DPSTREAMCLK3_GATE_DISABLE, 1);
+ REG_UPDATE_2(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK3_SRC_SEL, otg_inst,
+ DPSTREAMCLK3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3,
+ DPSTREAMCLK_GATE_DISABLE, 1,
+ DPSTREAMCLK_ROOT_GATE_DISABLE, 1);
+}
+
+void dccg401_disable_dpstreamclk(struct dccg *dccg, int dp_hpo_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dp_hpo_inst) {
+ case 0:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK0_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK0_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK0_GATE_DISABLE, 0);
+ break;
+ case 1:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK1_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK1_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK1_GATE_DISABLE, 0);
+ break;
+ case 2:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK2_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK2_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK2_GATE_DISABLE, 0);
+ break;
+ case 3:
+ REG_UPDATE(DPSTREAMCLK_CNTL,
+ DPSTREAMCLK3_EN, 0);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5,
+ DPSTREAMCLK3_ROOT_GATE_DISABLE, 0,
+ DPSTREAMCLK3_GATE_DISABLE, 0);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst)
+{
+ /* enabled to select one of the DTBCLKs for pipe */
+ if (src == REFCLK)
+ dccg401_disable_dpstreamclk(dccg, dp_hpo_inst);
+ else
+ dccg401_enable_dpstreamclk(dccg, otg_inst, dp_hpo_inst);
+}
+
+void dccg401_set_dp_dto(
+ struct dccg *dccg,
+ const struct dp_dto_params *params)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ bool enable = false;
+
+ if (params->otg_inst > 3) {
+ /* dcn401 only has 4 instances */
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ if (!params->refclk_hz) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ if (!dc_is_tmds_signal(params->signal)) {
+ uint64_t dto_integer;
+ uint64_t dto_phase_hz;
+ uint64_t dto_modulo_hz = params->refclk_hz;
+
+ enable = true;
+
+ /* Set DTO values:
+ * int = target_pix_rate / reference_clock
+ * phase = target_pix_rate - int * reference_clock,
+ * modulo = reference_clock */
+ dto_integer = div_u64(params->pixclk_hz, dto_modulo_hz);
+ dto_phase_hz = params->pixclk_hz - dto_integer * dto_modulo_hz;
+
+ if (dto_phase_hz <= 0 && dto_integer <= 0) {
+ /* negative pixel rate should never happen */
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE0_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE0_GATE_DISABLE, 1,
+ SYMCLK32_LE0_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE0_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE1_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE1_GATE_DISABLE, 1,
+ SYMCLK32_LE1_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE1_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE2_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE2_GATE_DISABLE, 1,
+ SYMCLK32_LE2_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE2_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, 1);
+ REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL3,
+ SYMCLK32_SE3_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_SE3_GATE_DISABLE, 1,
+ SYMCLK32_LE3_GATE_DISABLE, 1,
+ SYMCLK32_ROOT_LE3_GATE_DISABLE, 1);
+ break;
+ }
+
+ dccg401_set_dtbclk_p_src(dccg, params->clk_src, params->otg_inst);
+
+ REG_WRITE(DP_DTO_PHASE[params->otg_inst], dto_phase_hz);
+ REG_WRITE(DP_DTO_MODULO[params->otg_inst], dto_modulo_hz);
+
+ switch (params->otg_inst) {
+ case 0:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO0_INT, dto_integer);
+ break;
+ case 1:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO1_INT, dto_integer);
+ break;
+ case 2:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO2_INT, dto_integer);
+ break;
+ case 3:
+ REG_UPDATE(OTG_PIXEL_RATE_DIV,
+ DPDTO3_INT, dto_integer);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+ }
+
+ /* Toggle DTO */
+ REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DP_DTO_ENABLE[params->otg_inst], enable,
+ PIPE_DTO_SRC_SEL[params->otg_inst], enable);
+}
+
+void dccg401_init(struct dccg *dccg)
+{
+ /* Set HPO stream encoder to use refclk to avoid case where PHY is
+ * disabled and SYMCLK32 for HPO SE is sourced from PHYD32CLK which
+ * will cause DCN to hang.
+ */
+ dccg31_disable_symclk32_se(dccg, 0);
+ dccg31_disable_symclk32_se(dccg, 1);
+ dccg31_disable_symclk32_se(dccg, 2);
+ dccg31_disable_symclk32_se(dccg, 3);
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) {
+ dccg401_disable_symclk32_le(dccg, 0);
+ dccg401_disable_symclk32_le(dccg, 1);
+ dccg401_disable_symclk32_le(dccg, 2);
+ dccg401_disable_symclk32_le(dccg, 3);
+ }
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) {
+ dccg401_disable_dpstreamclk(dccg, 0);
+ dccg401_disable_dpstreamclk(dccg, 1);
+ dccg401_disable_dpstreamclk(dccg, 2);
+ dccg401_disable_dpstreamclk(dccg, 3);
+ }
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) {
+ dccg401_set_physymclk(dccg, 0, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg401_set_physymclk(dccg, 1, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg401_set_physymclk(dccg, 2, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ dccg401_set_physymclk(dccg, 3, PHYSYMCLK_FORCE_SRC_SYMCLK, false);
+ }
+}
+
+void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, uint32_t num_slices_h)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (inst) {
+ case 0:
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 1,
+ DSCCLK0_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1);
+
+ break;
+ case 1:
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 1,
+ DSCCLK1_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1);
+ break;
+ case 2:
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 1,
+ DSCCLK2_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1);
+ break;
+ case 3:
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 1,
+ DSCCLK3_DTO_MODULO, 1);
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+}
+
+void dccg401_set_ref_dscclk(struct dccg *dccg,
+ uint32_t dsc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (dsc_inst) {
+ case 0:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0);
+ REG_UPDATE_2(DSCCLK0_DTO_PARAM,
+ DSCCLK0_DTO_PHASE, 0,
+ DSCCLK0_DTO_MODULO, 0);
+ break;
+ case 1:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0);
+ REG_UPDATE_2(DSCCLK1_DTO_PARAM,
+ DSCCLK1_DTO_PHASE, 0,
+ DSCCLK1_DTO_MODULO, 0);
+ break;
+ case 2:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0);
+ REG_UPDATE_2(DSCCLK2_DTO_PARAM,
+ DSCCLK2_DTO_PHASE, 0,
+ DSCCLK2_DTO_MODULO, 0);
+ break;
+ case 3:
+ REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0);
+ REG_UPDATE_2(DSCCLK3_DTO_PARAM,
+ DSCCLK3_DTO_PHASE, 0,
+ DSCCLK3_DTO_MODULO, 0);
+ break;
+ default:
+ return;
+ }
+}
+
+void dccg401_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 1,
+ SYMCLKA_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 1,
+ SYMCLKB_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 1,
+ SYMCLKC_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 1,
+ SYMCLKD_FE_SRC_SEL, link_enc_inst);
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se)
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, 1);
+ break;
+ }
+}
+
+void dccg401_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+{
+ struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+
+ switch (stream_enc_inst) {
+ case 0:
+ REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE,
+ SYMCLKA_FE_EN, 0,
+ SYMCLKA_FE_SRC_SEL, 0);
+ break;
+ case 1:
+ REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE,
+ SYMCLKB_FE_EN, 0,
+ SYMCLKB_FE_SRC_SEL, 0);
+ break;
+ case 2:
+ REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE,
+ SYMCLKC_FE_EN, 0,
+ SYMCLKC_FE_SRC_SEL, 0);
+ break;
+ case 3:
+ REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE,
+ SYMCLKD_FE_EN, 0,
+ SYMCLKD_FE_SRC_SEL, 0);
+ break;
+ }
+}
+
+static const struct dccg_funcs dccg401_funcs = {
+ .update_dpp_dto = dccg401_update_dpp_dto,
+ .get_dccg_ref_freq = dccg401_get_dccg_ref_freq,
+ .dccg_init = dccg401_init,
+ .set_dpstreamclk = dccg401_set_dpstreamclk,
+ .enable_symclk32_se = dccg31_enable_symclk32_se,
+ .disable_symclk32_se = dccg31_disable_symclk32_se,
+ .enable_symclk32_le = dccg401_enable_symclk32_le,
+ .disable_symclk32_le = dccg401_disable_symclk32_le,
+ .set_physymclk = dccg401_set_physymclk,
+ .set_dtbclk_dto = NULL,
+ .set_dto_dscclk = dccg401_set_dto_dscclk,
+ .set_ref_dscclk = dccg401_set_ref_dscclk,
+ .set_valid_pixel_rate = NULL,
+ .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en,
+ .set_audio_dtbclk_dto = NULL,
+ .otg_add_pixel = dccg401_otg_add_pixel,
+ .otg_drop_pixel = dccg401_otg_drop_pixel,
+ .set_pixel_rate_div = dccg401_set_pixel_rate_div,
+ .get_pixel_rate_div = dccg401_get_pixel_rate_div,
+ .set_dp_dto = dccg401_set_dp_dto,
+ .enable_symclk_se = dccg401_enable_symclk_se,
+ .disable_symclk_se = dccg401_disable_symclk_se,
+ .set_dtbclk_p_src = dccg401_set_dtbclk_p_src,
+};
+
+struct dccg *dccg401_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask)
+{
+ struct dcn_dccg *dccg_dcn = kzalloc(sizeof(*dccg_dcn), GFP_KERNEL);
+ struct dccg *base;
+
+ if (dccg_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ base = &dccg_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &dccg401_funcs;
+
+ dccg_dcn->regs = regs;
+ dccg_dcn->dccg_shift = dccg_shift;
+ dccg_dcn->dccg_mask = dccg_mask;
+
+ return &dccg_dcn->base;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h
new file mode 100644
index 000000000000..5947a35363aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN401_DCCG_H__
+#define __DCN401_DCCG_H__
+
+#include "dcn32/dcn32_dccg.h"
+
+#define DCCG_SFII(block, reg_name, field_prefix, field_name, inst, post_fix)\
+ .field_prefix ## _ ## field_name[inst] = block ## inst ## _ ## reg_name ## __ ## field_prefix ## inst ## _ ## field_name ## post_fix
+
+#define DCCG_MASK_SH_LIST_DCN401(mask_sh) \
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 0, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 1, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 2, mask_sh),\
+ DCCG_SFI(DPPCLK_DTO_CTRL, DTO_DB_EN, DPPCLK, 3, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK0_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK1_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK2_EN, mask_sh),\
+ DCCG_SF(DPPCLK_CTRL, DPPCLK3_EN, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DPPCLK0_DTO_PARAM, DPPCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_EN, mask_sh),\
+ DCCG_SF(HDMICHARCLK0_CLOCK_CNTL, HDMICHARCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, mask_sh),\
+ DCCG_SF(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK1_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK2_SRC_SEL, mask_sh),\
+ DCCG_SF(DPSTREAMCLK_CNTL, DPSTREAMCLK3_SRC_SEL, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_EN, mask_sh),\
+ DCCG_SF(HDMISTREAMCLK_CNTL, HDMISTREAMCLK0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE1_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE2_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_SE_CNTL, SYMCLK32_SE3_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE0_EN, mask_sh),\
+ DCCG_SF(SYMCLK32_LE_CNTL, SYMCLK32_LE1_EN, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, OTG, ADD_PIXEL, 3, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG0_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO0_INT, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG1_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO1_INT, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG2_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO2_INT, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, OTG3_TMDS_PIXEL_RATE_DIV, mask_sh),\
+ DCCG_SF(OTG_PIXEL_RATE_DIV, DPDTO3_INT, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P0_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P1_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\
+ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh),\
+ DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\
+ DCCG_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, DP_DTO, ENABLE, 3, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 0, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 1, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 2, mask_sh),\
+ DCCG_SFII(OTG, PIXEL_RATE_CNTL, PIPE, DTO_SRC_SEL, 3, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\
+ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_MODULO, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, mask_sh),\
+ DCCG_SF(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_MODULO, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, HDMISTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_ROOT_LE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_SE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL3, SYMCLK32_LE3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, HDMICHARCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYA_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYB_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYC_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL4, PHYD_REFCLK_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_FE_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(DCCG_GATE_DISABLE_CNTL6, HDMISTREAMCLK0_ROOT_GATE_DISABLE, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, mask_sh),\
+ DCCG_SF(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, mask_sh),\
+ DCCG_SF(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, mask_sh),\
+
+void dccg401_init(struct dccg *dccg);
+
+void dccg401_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk);
+void dccg401_get_dccg_ref_freq(struct dccg *dccg,
+ unsigned int xtalin_freq_inKhz,
+ unsigned int *dccg_ref_freq_inKhz);
+void dccg401_set_dpstreamclk(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ int otg_inst,
+ int dp_hpo_inst);
+void dccg401_enable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst,
+ enum phyd32clk_clock_source phyd32clk);
+void dccg401_disable_symclk32_le(
+ struct dccg *dccg,
+ int hpo_le_inst);
+void dccg401_disable_dpstreamclk(struct dccg *dccg, int dp_hpo_inst);
+void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, uint32_t num_slices_h);
+void dccg401_set_ref_dscclk(struct dccg *dccg,
+ uint32_t dsc_inst);
+void dccg401_set_src_sel(
+ struct dccg *dccg,
+ const struct dtbclk_dto_params *params);
+void dccg401_set_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ enum pixel_rate_div tmds_div,
+ enum pixel_rate_div unused);
+void dccg401_get_pixel_rate_div(
+ struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *tmds_div,
+ uint32_t *dp_dto_int);
+void dccg401_set_dp_dto(
+ struct dccg *dccg,
+ const struct dp_dto_params *params);
+void dccg401_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst);
+void dccg401_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst);
+void dccg401_set_dtbclk_p_src(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst);
+struct dccg *dccg401_create(
+ struct dc_context *ctx,
+ const struct dccg_registers *regs,
+ const struct dccg_shift *dccg_shift,
+ const struct dccg_mask *dccg_mask);
+
+void dccg401_set_physymclk(
+ struct dccg *dccg,
+ int phy_inst,
+ enum physymclk_clock_source clk_src,
+ bool force_enable);
+
+#endif //__DCN401_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dce/Makefile b/drivers/gpu/drm/amd/display/dc/dce/Makefile
index 15b64c26d5a2..986e0e7abbc2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce/Makefile
@@ -26,7 +26,7 @@
# - register programming through common macros that look up register
# offset/shift/mask stored in dce_hw struct
-DCE = dce_audio.o dce_stream_encoder.o dce_link_encoder.o dce_hwseq.o \
+DCE = dce_audio.o dce_stream_encoder.o dce_link_encoder.o \
dce_mem_input.o dce_clock_source.o dce_scl_filters.o dce_transform.o \
dce_opp.o dce_dmcu.o dce_abm.o dce_ipp.o dce_aux.o \
dce_i2c.o dce_i2c_hw.o dce_i2c_sw.o dmub_psr.o dmub_abm.o dmub_abm_lcd.o dce_panel_cntl.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index 874b132fe1d7..a6006776333d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -135,7 +135,7 @@ static void dmcu_set_backlight_level(
0, 1, 80000);
}
-static void dce_abm_init(struct abm *abm, uint32_t backlight)
+static void dce_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
{
struct dce_abm *abm_dce = TO_DCE_ABM(abm);
@@ -162,7 +162,7 @@ static void dce_abm_init(struct abm *abm, uint32_t backlight)
BL1_PWM_TARGET_ABM_LEVEL, backlight);
REG_UPDATE(BL1_PWM_USER_LEVEL,
- BL1_PWM_USER_LEVEL, backlight);
+ BL1_PWM_USER_LEVEL, user_level);
REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
index 168cb7094c95..3d819fc5654c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h
@@ -128,21 +128,6 @@
SRI(DC_ABM1_ACE_THRES_12, ABM, id), \
NBIO_SR(BIOS_SCRATCH_2)
-#define ABM_DCN32_REG_LIST(id)\
- SRI(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \
- SRI(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \
- SRI(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \
- SRI(DC_ABM1_HG_MISC_CTRL, ABM, id), \
- SRI(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \
- SRI(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \
- SRI(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \
- SRI(BL1_PWM_USER_LEVEL, ABM, id), \
- SRI(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \
- SRI(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \
- SRI(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \
- SRI(DC_ABM1_ACE_THRES_12, ABM, id), \
- NBIO_SR(BIOS_SCRATCH_2)
-
#define ABM_SF(reg_name, field_name, post_fix)\
.field_name = reg_name ## __ ## field_name ## post_fix
@@ -183,8 +168,7 @@
ABM_SF(DC_ABM1_HGLS_REG_READ_PROGRESS, \
ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh)
-#define ABM_MASK_SH_LIST_DCN10(mask_sh) \
- ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh), \
+#define ABM_MASK_SH_LIST_DCN10_COMMON(mask_sh) \
ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
ABM1_HG_NUM_OF_BINS_SEL, mask_sh), \
ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
@@ -214,9 +198,13 @@
ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh)
-#define ABM_MASK_SH_LIST_DCN20(mask_sh) ABM_MASK_SH_LIST_DCE110(mask_sh)
+#define ABM_MASK_SH_LIST_DCN10(mask_sh) \
+ ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh), \
+ ABM_MASK_SH_LIST_DCN10_COMMON(mask_sh)
+#define ABM_MASK_SH_LIST_DCN20(mask_sh) ABM_MASK_SH_LIST_DCE110(mask_sh)
#define ABM_MASK_SH_LIST_DCN30(mask_sh) ABM_MASK_SH_LIST_DCN10(mask_sh)
+#define ABM_MASK_SH_LIST_DCN35(mask_sh) ABM_MASK_SH_LIST_DCN10_COMMON(mask_sh)
#define ABM_MASK_SH_LIST_DCN32(mask_sh) \
ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
@@ -248,6 +236,70 @@
ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh)
+#define ABM_MASK_SH_LIST_DCN401(mask_sh) \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_NUM_OF_BINS_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_VMAX_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_MISC_CTRL, \
+ ABM1_HG_BIN_BITWIDTH_SIZE_SEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_R, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_G, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_IPCSC_COEFF_SEL, \
+ ABM1_IPCSC_COEFF_SEL_B, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_CURRENT_ABM_LEVEL, \
+ BL1_PWM_CURRENT_ABM_LEVEL, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_TARGET_ABM_LEVEL, \
+ BL1_PWM_TARGET_ABM_LEVEL, mask_sh), \
+ ABM_SF(ABM0_BL1_PWM_USER_LEVEL, \
+ BL1_PWM_USER_LEVEL, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, \
+ ABM1_LS_MIN_PIXEL_VALUE_THRES, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, \
+ ABM1_LS_MAX_PIXEL_VALUE_THRES, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_HG_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_LS_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HGLS_REG_READ_PROGRESS, \
+ ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_OFFSET_SLOPE_DATA, \
+ ABM1_ACE_SLOPE_DATA, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_OFFSET_SLOPE_DATA, \
+ ABM1_ACE_OFFSET_DATA, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_OFFSET_SLOPE_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_THRES_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_IGNORE_MASTER_LOCK_EN, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_READBACK_DB_REG_VALUE_EN, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_DBUF_REG_UPDATE_PENDING, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_PWL_CNTL, \
+ ABM1_ACE_LOCK, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_THRES_DATA, \
+ ABM1_ACE_THRES_DATA_1, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_ACE_THRES_DATA, \
+ ABM1_ACE_THRES_DATA_2, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_RESULT_DATA, \
+ ABM1_HG_RESULT_DATA, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_RESULT_INDEX, \
+ ABM1_HG_RESULT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_33_40_SHIFT_INDEX, \
+ ABM1_HG_BIN_33_40_SHIFT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_33_64_SHIFT_FLAG, \
+ ABM1_HG_BIN_33_64_SHIFT_FLAG, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_41_48_SHIFT_INDEX, \
+ ABM1_HG_BIN_41_48_SHIFT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_49_56_SHIFT_INDEX, \
+ ABM1_HG_BIN_49_56_SHIFT_INDEX, mask_sh), \
+ ABM_SF(ABM0_DC_ABM1_HG_BIN_57_64_SHIFT_INDEX, \
+ ABM1_HG_BIN_57_64_SHIFT_INDEX, mask_sh)
+
#define ABM_REG_FIELD_LIST(type) \
type ABM1_HG_NUM_OF_BINS_SEL; \
type ABM1_HG_VMAX_SEL; \
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index c94a966c6612..eeed840073fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -239,27 +239,295 @@ static void check_audio_bandwidth_hdmi(
}
}
}
+static struct fixed31_32 get_link_symbol_clk_freq_mhz(enum dc_link_rate link_rate)
+{
+ switch (link_rate) {
+ case LINK_RATE_LOW:
+ return dc_fixpt_from_int(162); /* 162 MHz */
+ case LINK_RATE_HIGH:
+ return dc_fixpt_from_int(270); /* 270 MHz */
+ case LINK_RATE_HIGH2:
+ return dc_fixpt_from_int(540); /* 540 MHz */
+ case LINK_RATE_HIGH3:
+ return dc_fixpt_from_int(810); /* 810 MHz */
+ case LINK_RATE_UHBR10:
+ return dc_fixpt_from_fraction(3125, 10); /* 312.5 MHz */
+ case LINK_RATE_UHBR13_5:
+ return dc_fixpt_from_fraction(421875, 1000); /* 421.875 MHz */
+ case LINK_RATE_UHBR20:
+ return dc_fixpt_from_int(625); /* 625 MHz */
+ default:
+ /* Unexpected case, this requires debug if encountered. */
+ ASSERT(0);
+ return dc_fixpt_from_int(0);
+ }
+}
+
+struct dp_audio_layout_config {
+ uint8_t layouts_per_sample_denom;
+ uint8_t symbols_per_layout;
+ uint8_t max_layouts_per_audio_sdp;
+};
+
+static void get_audio_layout_config(
+ uint32_t channel_count,
+ enum dp_link_encoding encoding,
+ struct dp_audio_layout_config *output)
+{
+ /* Assuming L-PCM audio. Current implementation uses max 1 layout per SDP,
+ * with each layout being the same size (8ch layout).
+ */
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ }
+}
-/*For DP SST, calculate if specified sample rates can fit into a given timing */
-static void check_audio_bandwidth_dpsst(
+static uint32_t get_av_stream_map_lane_count(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t av_stream_map_lane_count = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (!is_mst)
+ av_stream_map_lane_count = lane_count;
+ else
+ av_stream_map_lane_count = 4;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ av_stream_map_lane_count = 4;
+ }
+
+ ASSERT(av_stream_map_lane_count != 0);
+
+ return av_stream_map_lane_count;
+}
+
+static uint32_t get_audio_sdp_overhead(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t audio_sdp_overhead = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (is_mst)
+ audio_sdp_overhead = 16; /* 4 * 2 + 8 */
+ else
+ audio_sdp_overhead = lane_count * 2 + 8;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ audio_sdp_overhead = 10; /* 4 x 2.5 */
+ }
+
+ ASSERT(audio_sdp_overhead != 0);
+
+ return audio_sdp_overhead;
+}
+
+static uint32_t calculate_required_audio_bw_in_symbols(
const struct audio_crtc_info *crtc_info,
+ const struct dp_audio_layout_config *layout_config,
uint32_t channel_count,
- union audio_sample_rates *sample_rates)
+ uint32_t sample_rate_hz,
+ uint32_t av_stream_map_lane_count,
+ uint32_t audio_sdp_overhead)
+{
+ /* DP spec recommends between 1.05 to 1.1 safety margin to prevent sample under-run */
+ struct fixed31_32 audio_sdp_margin = dc_fixpt_from_fraction(110, 100);
+ struct fixed31_32 horizontal_line_freq_khz = dc_fixpt_from_fraction(
+ crtc_info->requested_pixel_clock_100Hz, (long long)crtc_info->h_total * 10);
+ struct fixed31_32 samples_per_line;
+ struct fixed31_32 layouts_per_line;
+ struct fixed31_32 symbols_per_sdp_max_layout;
+ struct fixed31_32 remainder;
+ uint32_t num_sdp_with_max_layouts;
+ uint32_t required_symbols_per_hblank;
+
+ samples_per_line = dc_fixpt_from_fraction(sample_rate_hz, 1000);
+ samples_per_line = dc_fixpt_div(samples_per_line, horizontal_line_freq_khz);
+ layouts_per_line = dc_fixpt_div_int(samples_per_line, layout_config->layouts_per_sample_denom);
+
+ num_sdp_with_max_layouts = dc_fixpt_floor(
+ dc_fixpt_div_int(layouts_per_line, layout_config->max_layouts_per_audio_sdp));
+ symbols_per_sdp_max_layout = dc_fixpt_from_int(
+ layout_config->max_layouts_per_audio_sdp * layout_config->symbols_per_layout);
+ symbols_per_sdp_max_layout = dc_fixpt_add_int(symbols_per_sdp_max_layout, audio_sdp_overhead);
+ symbols_per_sdp_max_layout = dc_fixpt_mul(symbols_per_sdp_max_layout, audio_sdp_margin);
+ required_symbols_per_hblank = num_sdp_with_max_layouts;
+ required_symbols_per_hblank *= ((dc_fixpt_ceil(symbols_per_sdp_max_layout) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+
+ if (num_sdp_with_max_layouts != dc_fixpt_ceil(
+ dc_fixpt_div_int(layouts_per_line, layout_config->max_layouts_per_audio_sdp))) {
+ remainder = dc_fixpt_sub_int(layouts_per_line,
+ num_sdp_with_max_layouts * layout_config->max_layouts_per_audio_sdp);
+ remainder = dc_fixpt_mul_int(remainder, layout_config->symbols_per_layout);
+ remainder = dc_fixpt_add_int(remainder, audio_sdp_overhead);
+ remainder = dc_fixpt_mul(remainder, audio_sdp_margin);
+ required_symbols_per_hblank += ((dc_fixpt_ceil(remainder) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+ }
+
+ return required_symbols_per_hblank;
+}
+
+/* Current calculation only applicable for 8b/10b MST and 128b/132b SST/MST.
+ */
+static uint32_t calculate_available_hblank_bw_in_symbols(
+ const struct audio_crtc_info *crtc_info,
+ const struct audio_dp_link_info *dp_link_info)
{
- /* do nothing */
+ uint64_t hblank = crtc_info->h_total - crtc_info->h_active;
+ struct fixed31_32 hblank_time_msec =
+ dc_fixpt_from_fraction(hblank * 10, crtc_info->requested_pixel_clock_100Hz);
+ struct fixed31_32 lsclkfreq_mhz =
+ get_link_symbol_clk_freq_mhz(dp_link_info->link_rate);
+ struct fixed31_32 average_stream_sym_bw_frac;
+ struct fixed31_32 peak_stream_bw_kbps;
+ struct fixed31_32 bits_per_pixel;
+ struct fixed31_32 link_bw_kbps;
+ struct fixed31_32 available_stream_sym_count;
+ uint32_t available_hblank_bw = 0; /* in stream symbols */
+
+ if (crtc_info->dsc_bits_per_pixel) {
+ bits_per_pixel = dc_fixpt_from_fraction(crtc_info->dsc_bits_per_pixel, 16);
+ } else {
+ switch (crtc_info->color_depth) {
+ case COLOR_DEPTH_666:
+ bits_per_pixel = dc_fixpt_from_int(6);
+ break;
+ case COLOR_DEPTH_888:
+ bits_per_pixel = dc_fixpt_from_int(8);
+ break;
+ case COLOR_DEPTH_101010:
+ bits_per_pixel = dc_fixpt_from_int(10);
+ break;
+ case COLOR_DEPTH_121212:
+ bits_per_pixel = dc_fixpt_from_int(12);
+ break;
+ default:
+ /* Default to commonly supported color depth. */
+ bits_per_pixel = dc_fixpt_from_int(8);
+ break;
+ }
+
+ bits_per_pixel = dc_fixpt_mul_int(bits_per_pixel, 3);
+
+ if (crtc_info->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ bits_per_pixel = dc_fixpt_div_int(bits_per_pixel, 3);
+ bits_per_pixel = dc_fixpt_mul_int(bits_per_pixel, 2);
+ } else if (crtc_info->pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+ bits_per_pixel = dc_fixpt_div_int(bits_per_pixel, 2);
+ }
+ }
+
+ /* Use simple stream BW calculation because mainlink overhead is
+ * accounted for separately in the audio BW calculations.
+ */
+ peak_stream_bw_kbps = dc_fixpt_from_fraction(crtc_info->requested_pixel_clock_100Hz, 10);
+ peak_stream_bw_kbps = dc_fixpt_mul(peak_stream_bw_kbps, bits_per_pixel);
+ link_bw_kbps = dc_fixpt_from_int(dp_link_info->link_bandwidth_kbps);
+ average_stream_sym_bw_frac = dc_fixpt_div(peak_stream_bw_kbps, link_bw_kbps);
+
+ available_stream_sym_count = dc_fixpt_mul_int(hblank_time_msec, 1000);
+ available_stream_sym_count = dc_fixpt_mul(available_stream_sym_count, lsclkfreq_mhz);
+ available_stream_sym_count = dc_fixpt_mul(available_stream_sym_count, average_stream_sym_bw_frac);
+ available_hblank_bw = dc_fixpt_floor(available_stream_sym_count);
+ available_hblank_bw *= dp_link_info->lane_count;
+ available_hblank_bw -= crtc_info->dsc_num_slices * 4; /* EOC overhead */
+
+ if (available_hblank_bw < dp_link_info->hblank_min_symbol_width)
+ /* Each symbol takes 4 frames */
+ available_hblank_bw = 4 * dp_link_info->hblank_min_symbol_width;
+
+ if (available_hblank_bw < 12)
+ available_hblank_bw = 0;
+ else
+ available_hblank_bw -= 12; /* Main link overhead */
+
+ return available_hblank_bw;
}
-/*For DP MST, calculate if specified sample rates can fit into a given timing */
-static void check_audio_bandwidth_dpmst(
+static void check_audio_bandwidth_dp(
const struct audio_crtc_info *crtc_info,
+ const struct audio_dp_link_info *dp_link_info,
uint32_t channel_count,
union audio_sample_rates *sample_rates)
{
- /* do nothing */
+ struct dp_audio_layout_config layout_config = {0};
+ uint32_t available_hblank_bw;
+ uint32_t av_stream_map_lane_count;
+ uint32_t audio_sdp_overhead;
+
+ /* TODO: Add validation for SST 8b/10 case */
+ if (!dp_link_info->is_mst && dp_link_info->encoding == DP_8b_10b_ENCODING)
+ return;
+
+ available_hblank_bw = calculate_available_hblank_bw_in_symbols(
+ crtc_info, dp_link_info);
+ av_stream_map_lane_count = get_av_stream_map_lane_count(
+ dp_link_info->encoding, dp_link_info->lane_count, dp_link_info->is_mst);
+ audio_sdp_overhead = get_audio_sdp_overhead(
+ dp_link_info->encoding, dp_link_info->lane_count, dp_link_info->is_mst);
+ get_audio_layout_config(
+ channel_count, dp_link_info->encoding, &layout_config);
+
+ if (layout_config.max_layouts_per_audio_sdp == 0 ||
+ layout_config.symbols_per_layout == 0 ||
+ layout_config.layouts_per_sample_denom == 0) {
+ return;
+ }
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 192000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_192 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 176400,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_176_4 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 96000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_96 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 88200,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_88_2 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 48000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_48 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 44100,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_44_1 = 0;
+ if (available_hblank_bw < calculate_required_audio_bw_in_symbols(
+ crtc_info, &layout_config, channel_count, 32000,
+ av_stream_map_lane_count, audio_sdp_overhead))
+ sample_rates->rate.RATE_32 = 0;
}
static void check_audio_bandwidth(
const struct audio_crtc_info *crtc_info,
+ const struct audio_dp_link_info *dp_link_info,
uint32_t channel_count,
enum signal_type signal,
union audio_sample_rates *sample_rates)
@@ -271,12 +539,9 @@ static void check_audio_bandwidth(
break;
case SIGNAL_TYPE_EDP:
case SIGNAL_TYPE_DISPLAY_PORT:
- check_audio_bandwidth_dpsst(
- crtc_info, channel_count, sample_rates);
- break;
case SIGNAL_TYPE_DISPLAY_PORT_MST:
- check_audio_bandwidth_dpmst(
- crtc_info, channel_count, sample_rates);
+ check_audio_bandwidth_dp(
+ crtc_info, dp_link_info, channel_count, sample_rates);
break;
default:
break;
@@ -365,6 +630,11 @@ void dce_aud_az_enable(struct audio *audio)
audio->inst, value);
}
+void dce_aud_az_disable_hbr_audio(struct audio *audio)
+{
+ set_high_bit_rate_capable(audio, false);
+}
+
void dce_aud_az_disable(struct audio *audio)
{
uint32_t value;
@@ -394,7 +664,8 @@ void dce_aud_az_configure(
struct audio *audio,
enum signal_type signal,
const struct audio_crtc_info *crtc_info,
- const struct audio_info *audio_info)
+ const struct audio_info *audio_info,
+ const struct audio_dp_link_info *dp_link_info)
{
struct dce_audio *aud = DCE_AUD(audio);
@@ -407,6 +678,10 @@ void dce_aud_az_configure(
bool is_ac3_supported = false;
union audio_sample_rates sample_rate;
uint32_t strlen = 0;
+
+ if (signal == SIGNAL_TYPE_VIRTUAL)
+ return;
+
value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL);
set_reg_field_value(value, 1,
AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
@@ -525,6 +800,7 @@ void dce_aud_az_configure(
check_audio_bandwidth(
crtc_info,
+ dp_link_info,
channel_count,
signal,
&sample_rates);
@@ -584,6 +860,7 @@ void dce_aud_az_configure(
check_audio_bandwidth(
crtc_info,
+ dp_link_info,
8,
signal,
&sample_rate);
@@ -778,7 +1055,7 @@ static void get_azalia_clock_info_dp(
/*audio_dto_module = dpDtoSourceClockInkhz * 10,000;
* [khz] ->[100Hz] */
azalia_clock_info->audio_dto_module =
- pll_info->dp_dto_source_clock_in_khz * 10;
+ pll_info->audio_dto_source_clock_in_khz * 10;
}
void dce_aud_wall_dto_setup(
@@ -1021,6 +1298,7 @@ static const struct audio_funcs funcs = {
.az_enable = dce_aud_az_enable,
.az_disable = dce_aud_az_disable,
.az_configure = dce_aud_az_configure,
+ .az_disable_hbr_audio = dce_aud_az_disable_hbr_audio,
.destroy = dce_aud_destroy,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h
index dbd2cfed0603..1b7b8b079af4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h
@@ -166,11 +166,13 @@ void dce_aud_hw_init(struct audio *audio);
void dce_aud_az_enable(struct audio *audio);
void dce_aud_az_disable(struct audio *audio);
+void dce_aud_az_disable_hbr_audio(struct audio *audio);
void dce_aud_az_configure(struct audio *audio,
enum signal_type signal,
const struct audio_crtc_info *crtc_info,
- const struct audio_info *audio_info);
+ const struct audio_info *audio_info,
+ const struct audio_dp_link_info *dp_link_info);
void dce_aud_wall_dto_setup(struct audio *audio,
enum signal_type signal,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index 739298d2dff3..673bb87d2c17 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -725,20 +725,34 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
for (i = 0; i < AUX_MAX_RETRIES; i++) {
DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
LOG_FLAG_I2cAux_DceAux,
- "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: address=0x%04x length=%u write=%d mot=%d",
+ "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: "
+ "address=0x%04x length=%u write=%d mot=%d is_i2c=%d is_dpia=%d ddc_hw_inst=%d",
ddc && ddc->link ? ddc->link->link_index : UINT_MAX,
i + 1,
(int)AUX_MAX_RETRIES,
payload->address,
payload->length,
(unsigned int) payload->write,
- (unsigned int) payload->mot);
+ (unsigned int) payload->mot,
+ payload->i2c_over_aux,
+ (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false,
+ ddc->link->ddc_hw_inst);
if (payload->write)
dce_aux_log_payload(" write", payload->data, payload->length, 16);
- ret = dce_aux_transfer_raw(ddc, payload, &operation_result);
+
+ /* Check whether aux to be processed via dmub or dcn directly */
+ if (ddc->ctx->dc->debug.enable_dmub_aux_for_legacy_ddc
+ || ddc->ddc_pin == NULL) {
+ ret = dce_aux_transfer_dmub_raw(ddc, payload, &operation_result);
+ } else {
+ ret = dce_aux_transfer_raw(ddc, payload, &operation_result);
+ }
+
DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION,
LOG_FLAG_I2cAux_DceAux,
- "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u",
+ "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: "
+ "address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d "
+ "payload->reply=%u is_i2c=%d is_dpia=%d ddc_hw_inst=%d",
ddc && ddc->link ? ddc->link->link_index : UINT_MAX,
i + 1,
(int)AUX_MAX_RETRIES,
@@ -748,7 +762,10 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
(unsigned int) payload->mot,
ret,
(int)operation_result,
- (unsigned int) *payload->reply);
+ (unsigned int) *payload->reply,
+ payload->i2c_over_aux,
+ (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false,
+ ddc->link->ddc_hw_inst);
if (!payload->write)
dce_aux_log_payload(" read", payload->data, ret > 0 ? ret : 0, 16);
@@ -770,7 +787,7 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
aux_defer_retries,
AUX_MAX_RETRIES);
goto fail;
- } else
+ } else
udelay(300);
} else if (payload->write && ret > 0) {
/* sink requested more time to complete the write via AUX_ACKM */
@@ -790,7 +807,6 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
payload->write_status_update = true;
payload->length = 0;
udelay(300);
-
} else
return true;
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index ed8936405dfa..b4f5b4a6331a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -34,6 +34,7 @@
#include "dce_clock_source.h"
#include "clk_mgr.h"
+#include "dccg.h"
#include "reg_helper.h"
@@ -43,7 +44,10 @@
#define CTX \
clk_src->base.ctx
-#define DC_LOGGER_INIT()
+#define DC_LOGGER \
+ calc_pll_cs->ctx->logger
+#define DC_LOGGER_INIT() \
+ struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll
#undef FN
#define FN(reg_name, field_name) \
@@ -213,7 +217,7 @@ static bool calc_fb_divider_checking_tolerance(
actual_calc_clk_100hz = (uint64_t)feedback_divider *
calc_pll_cs->fract_fb_divider_factor +
fract_feedback_divider;
- actual_calc_clk_100hz *= calc_pll_cs->ref_freq_khz * 10;
+ actual_calc_clk_100hz *= (uint64_t)calc_pll_cs->ref_freq_khz * 10;
actual_calc_clk_100hz =
div_u64(actual_calc_clk_100hz,
ref_divider * post_divider *
@@ -676,7 +680,7 @@ static bool calculate_ss(
* so have to divided by 100 * 100*/
ss_amount = dc_fixpt_mul(
fb_div, dc_fixpt_from_fraction(ss_data->percentage,
- 100 * ss_data->percentage_divider));
+ 100 * (long long)ss_data->percentage_divider));
ds_data->feedback_amount = dc_fixpt_floor(ss_amount);
ss_nslip_amount = dc_fixpt_sub(ss_amount,
@@ -691,8 +695,8 @@ static bool calculate_ss(
/* compute SS_STEP_SIZE_DSFRAC */
modulation_time = dc_fixpt_from_fraction(
- pll_settings->reference_freq * 1000,
- pll_settings->reference_divider * ss_data->modulation_freq_hz);
+ pll_settings->reference_freq * (uint64_t)1000,
+ pll_settings->reference_divider * (uint64_t)ss_data->modulation_freq_hz);
if (ss_data->flags.CENTER_SPREAD)
modulation_time = dc_fixpt_div_int(modulation_time, 4);
@@ -971,6 +975,13 @@ static bool dcn31_program_pix_clk(
look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10);
struct bp_pixel_clock_parameters bp_pc_params = {0};
enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+
+ // Apply ssed(spread spectrum) dpref clock for edp and dp
+ if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 &&
+ dc_is_dp_signal(pix_clk_params->signal_type) &&
+ encoding == DP_8b_10b_ENCODING)
+ dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
+
// For these signal types Driver to program DP_DTO without calling VBIOS Command table
if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
if (e) {
@@ -1053,6 +1064,108 @@ static bool dcn31_program_pix_clk(
return true;
}
+static bool dcn401_program_pix_clk(
+ struct clock_source *clock_source,
+ struct pixel_clk_params *pix_clk_params,
+ enum dp_link_encoding encoding,
+ struct pll_settings *pll_settings)
+{
+ struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
+ unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0;
+ const struct pixel_rate_range_table_entry *e =
+ look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10);
+ struct bp_pixel_clock_parameters bp_pc_params = {0};
+ enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ struct dp_dto_params dto_params = { 0 };
+
+ dto_params.otg_inst = inst;
+ dto_params.signal = pix_clk_params->signal_type;
+
+ // all but TMDS gets Driver to program DP_DTO without calling VBIOS Command table
+ if (!dc_is_tmds_signal(pix_clk_params->signal_type)) {
+ long long dtbclk_p_src_clk_khz;
+
+ dtbclk_p_src_clk_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
+ dto_params.clk_src = DPREFCLK;
+
+ if (e) {
+ dto_params.pixclk_hz = e->target_pixel_rate_khz;
+ dto_params.pixclk_hz *= e->mult_factor;
+ dto_params.refclk_hz = dtbclk_p_src_clk_khz;
+ dto_params.refclk_hz *= e->div_factor;
+ } else {
+ dto_params.pixclk_hz = pix_clk_params->requested_pix_clk_100hz;
+ dto_params.pixclk_hz *= 100;
+ dto_params.refclk_hz = dtbclk_p_src_clk_khz;
+ dto_params.refclk_hz *= 1000;
+ }
+
+ /* enable DP DTO */
+ clock_source->ctx->dc->res_pool->dccg->funcs->set_dp_dto(
+ clock_source->ctx->dc->res_pool->dccg,
+ &dto_params);
+
+ } else {
+ if (pll_settings->actual_pix_clk_100hz > 6000000UL)
+ return false;
+
+ /* disables DP DTO when provided with TMDS signal type */
+ clock_source->ctx->dc->res_pool->dccg->funcs->set_dp_dto(
+ clock_source->ctx->dc->res_pool->dccg,
+ &dto_params);
+
+ /*ATOMBIOS expects pixel rate adjusted by deep color ratio)*/
+ bp_pc_params.controller_id = pix_clk_params->controller_id;
+ bp_pc_params.pll_id = clock_source->id;
+ bp_pc_params.target_pixel_clock_100hz = pll_settings->actual_pix_clk_100hz;
+ bp_pc_params.encoder_object_id = pix_clk_params->encoder_object_id;
+ bp_pc_params.signal_type = pix_clk_params->signal_type;
+
+ // Make sure we send the correct color depth to DMUB for HDMI
+ if (pix_clk_params->signal_type == SIGNAL_TYPE_HDMI_TYPE_A) {
+ switch (pix_clk_params->color_depth) {
+ case COLOR_DEPTH_888:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ break;
+ case COLOR_DEPTH_101010:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_30;
+ break;
+ case COLOR_DEPTH_121212:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_36;
+ break;
+ case COLOR_DEPTH_161616:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_48;
+ break;
+ default:
+ bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+ break;
+ }
+ bp_pc_params.color_depth = bp_pc_colour_depth;
+ }
+
+ if (clock_source->id != CLOCK_SOURCE_ID_DP_DTO) {
+ bp_pc_params.flags.SET_GENLOCK_REF_DIV_SRC =
+ pll_settings->use_external_clk;
+ bp_pc_params.flags.SET_XTALIN_REF_SRC =
+ !pll_settings->use_external_clk;
+ if (pix_clk_params->flags.SUPPORT_YCBCR420) {
+ bp_pc_params.flags.SUPPORT_YUV_420 = 1;
+ }
+ }
+ if (clk_src->bios->funcs->set_pixel_clock(
+ clk_src->bios, &bp_pc_params) != BP_RESULT_OK)
+ return false;
+ /* Resync deep color DTO */
+ if (clock_source->id != CLOCK_SOURCE_ID_DP_DTO)
+ dce112_program_pixel_clk_resync(clk_src,
+ pix_clk_params->signal_type,
+ pix_clk_params->color_depth,
+ pix_clk_params->flags.SUPPORT_YCBCR420);
+ }
+
+ return true;
+}
+
static bool dce110_clock_source_power_down(
struct clock_source *clk_src)
{
@@ -1084,6 +1197,7 @@ static bool get_pixel_clk_frequency_100hz(
struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
unsigned int clock_hz = 0;
unsigned int modulo_hz = 0;
+ unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
clock_hz = REG_READ(PHASE[inst]);
@@ -1096,7 +1210,7 @@ static bool get_pixel_clk_frequency_100hz(
modulo_hz = REG_READ(MODULO[inst]);
if (modulo_hz)
*pixel_clk_khz = div_u64((uint64_t)clock_hz*
- clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
+ dp_dto_ref_khz*10,
modulo_hz);
else
*pixel_clk_khz = 0;
@@ -1254,6 +1368,7 @@ static uint32_t dcn3_get_pix_clk_dividers(
struct pll_settings *pll_settings)
{
unsigned long long actual_pix_clk_100Hz = pix_clk_params ? pix_clk_params->requested_pix_clk_100hz : 0;
+ struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(cs);
DC_LOGGER_INIT();
@@ -1302,6 +1417,13 @@ static const struct clock_source_funcs dcn31_clk_src_funcs = {
.get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz
};
+static const struct clock_source_funcs dcn401_clk_src_funcs = {
+ .cs_power_down = dce110_clock_source_power_down,
+ .program_pix_clk = dcn401_program_pix_clk,
+ .get_pix_clk_dividers = dcn3_get_pix_clk_dividers,
+ .get_pixel_clk_frequency_100hz = get_pixel_clk_frequency_100hz
+};
+
/*****************************************/
/* Constructor */
/*****************************************/
@@ -1719,6 +1841,21 @@ bool dcn31_clk_src_construct(
return ret;
}
+bool dcn401_clk_src_construct(
+ struct dce110_clk_src *clk_src,
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ const struct dce110_clk_src_shift *cs_shift,
+ const struct dce110_clk_src_mask *cs_mask)
+{
+ bool ret = dce112_clk_src_construct(clk_src, ctx, bios, id, regs, cs_shift, cs_mask);
+
+ clk_src->base.funcs = &dcn401_clk_src_funcs;
+
+ return ret;
+}
bool dcn301_clk_src_construct(
struct dce110_clk_src *clk_src,
struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
index f600b7431e23..94128f7a18b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h
@@ -257,7 +257,7 @@ bool dce110_clk_src_construct(
struct dce110_clk_src *clk_src,
struct dc_context *ctx,
struct dc_bios *bios,
- enum clock_source_id,
+ enum clock_source_id id,
const struct dce110_clk_src_regs *regs,
const struct dce110_clk_src_shift *cs_shift,
const struct dce110_clk_src_mask *cs_mask);
@@ -307,6 +307,14 @@ bool dcn31_clk_src_construct(
const struct dce110_clk_src_shift *cs_shift,
const struct dce110_clk_src_mask *cs_mask);
+bool dcn401_clk_src_construct(
+ struct dce110_clk_src *clk_src,
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ const struct dce110_clk_src_shift *cs_shift,
+ const struct dce110_clk_src_mask *cs_mask);
/* this table is use to find *1.001 and /1.001 pixel rates from non-precise pixel rate */
struct pixel_rate_range_table_entry {
unsigned int range_min_khz;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index b87bfecb7755..a8e79104b684 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -586,7 +586,8 @@ static void dcn10_dmcu_set_psr_enable(struct dmcu *dmcu, bool enable, bool wait)
if (state == PSR_STATE0)
break;
}
- fsleep(500);
+ /* must *not* be fsleep - this can be called from high irq levels */
+ udelay(500);
}
/* assert if max retry hit */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
index 4f552c3e7663..365dd2e37aea 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
@@ -22,9 +22,6 @@
* Authors: AMD
*
*/
-
-#include <linux/delay.h>
-
#include "resource.h"
#include "dce_i2c.h"
#include "dce_i2c_hw.h"
@@ -295,24 +292,51 @@ static void set_speed(
FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2);
}
+static bool acquire_engine(struct dce_i2c_hw *dce_i2c_hw)
+{
+ uint32_t arbitrate = 0;
+
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ switch (arbitrate) {
+ case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW:
+ return true;
+ case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW:
+ return false;
+ case DC_I2C_STATUS__DC_I2C_STATUS_IDLE:
+ default:
+ break;
+ }
+
+ REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, true);
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW)
+ return false;
+
+ return true;
+}
+
static bool setup_engine(
struct dce_i2c_hw *dce_i2c_hw)
{
+ // Deassert soft reset to unblock I2C engine registers
+ REG_UPDATE(DC_I2C_CONTROL, DC_I2C_SOFT_RESET, false);
+
uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE;
uint32_t reset_length = 0;
- if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) {
- if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL) {
- REG_UPDATE(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, 0);
- REG_WAIT(DIO_MEM_PWR_STATUS, I2C_MEM_PWR_STATE, 0, 0, 5);
- }
- }
+ if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) {
+ if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL) {
+ REG_UPDATE(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, 0);
+ REG_WAIT(DIO_MEM_PWR_STATUS, I2C_MEM_PWR_STATE, 0, 0, 5);
+ }
+ }
- /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
- REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
+ if (dce_i2c_hw->masks->DC_I2C_DDC1_CLK_EN)
+ REG_UPDATE_N(SETUP, 1,
+ FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN), 1);
- /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
- REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
+ if (!acquire_engine(dce_i2c_hw))
+ return false;
/*set SW requested I2c speed to default, if API calls in it will be override later*/
set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz);
@@ -321,9 +345,8 @@ static bool setup_engine(
i2c_setup_limit = dce_i2c_hw->setup_limit;
/* Program pin select */
- REG_UPDATE_6(DC_I2C_CONTROL,
+ REG_UPDATE_5(DC_I2C_CONTROL,
DC_I2C_GO, 0,
- DC_I2C_SOFT_RESET, 0,
DC_I2C_SEND_RESET, 0,
DC_I2C_SW_STATUS_RESET, 1,
DC_I2C_TRANSACTION_COUNT, 0,
@@ -353,6 +376,32 @@ static bool setup_engine(
return true;
}
+/**
+ * cntl_stuck_hw_workaround - Workaround for I2C engine stuck state
+ * @dce_i2c_hw: Pointer to dce_i2c_hw structure
+ *
+ * If we boot without an HDMI display, the I2C engine does not get initialized
+ * correctly. One of its symptoms is that SW_USE_I2C does not get cleared after
+ * acquire. After setting SW_DONE_USING_I2C on release, the engine gets
+ * immediately reacquired by SW, preventing DMUB from using it.
+ *
+ * This function checks the I2C arbitration status and applies a release
+ * workaround if necessary.
+ */
+static void cntl_stuck_hw_workaround(struct dce_i2c_hw *dce_i2c_hw)
+{
+ uint32_t arbitrate = 0;
+
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW)
+ return;
+
+ // Still acquired after release, release again as a workaround
+ REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true);
+ REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate);
+ ASSERT(arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW);
+}
+
static void release_engine(
struct dce_i2c_hw *dce_i2c_hw)
{
@@ -380,9 +429,9 @@ static void release_engine(
/*for HW HDCP Ri polling failure w/a test*/
set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz_hdcp);
- /* Release I2C after reset, so HW or DMCU could use it */
- REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1,
- DC_I2C_SW_USE_I2C_REG_REQ, 0);
+ // Release I2C engine so it can be used by HW or DMCU, automatically clears SW_USE_I2C
+ REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true);
+ cntl_stuck_hw_workaround(dce_i2c_hw);
if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) {
if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL)
@@ -542,7 +591,7 @@ static bool dce_i2c_hw_engine_submit_payload(struct dce_i2c_hw *dce_i2c_hw,
DCE_I2C_TRANSACTION_ACTION_I2C_WRITE;
- request.address = (uint8_t) ((payload->address << 1) | !payload->write);
+ request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1));
request.length = payload->length;
request.data = payload->data;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
index 3f45ecd189a2..a9a16f645994 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
@@ -188,6 +188,7 @@ struct dce_i2c_shift {
uint8_t DC_I2C_REG_RW_CNTL_STATUS;
uint8_t I2C_LIGHT_SLEEP_FORCE;
uint8_t I2C_MEM_PWR_STATE;
+ uint8_t DC_I2C_DDC1_CLK_EN;
};
struct dce_i2c_mask {
@@ -232,6 +233,7 @@ struct dce_i2c_mask {
uint32_t DC_I2C_REG_RW_CNTL_STATUS;
uint32_t I2C_LIGHT_SLEEP_FORCE;
uint32_t I2C_MEM_PWR_STATE;
+ uint32_t DC_I2C_DDC1_CLK_EN;
};
#define I2C_COMMON_MASK_SH_LIST_DCN2(mask_sh)\
@@ -243,6 +245,14 @@ struct dce_i2c_mask {
I2C_SF(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh),\
I2C_SF(DIO_MEM_PWR_STATUS, I2C_MEM_PWR_STATE, mask_sh)
+#define I2C_COMMON_MASK_SH_LIST_DCN35(mask_sh)\
+ I2C_COMMON_MASK_SH_LIST_DCN30(mask_sh),\
+ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN, mask_sh)
+
+#define I2C_COMMON_MASK_SH_LIST_DCN401(mask_sh)\
+ I2C_COMMON_MASK_SH_LIST_DCN30(mask_sh),\
+ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN, mask_sh)
+
struct dce_i2c_registers {
uint32_t SETUP;
uint32_t SPEED;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
index e188447c8156..2d73b94c515c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
@@ -451,7 +451,7 @@ static bool dce_i2c_sw_engine_submit_payload(struct dce_i2c_sw *engine,
DCE_I2C_TRANSACTION_ACTION_I2C_WRITE_MOT :
DCE_I2C_TRANSACTION_ACTION_I2C_WRITE;
- request.address = (uint8_t) ((payload->address << 1) | !payload->write);
+ request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1));
request.length = payload->length;
request.data = payload->data;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 136bd93c3b65..0c50fe266c8a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -896,13 +896,13 @@ void dce110_link_encoder_construct(
enc110->base.id, &bp_cap_info);
/* Override features with DCE-specific values */
- if (BP_RESULT_OK == result) {
+ if (result == BP_RESULT_OK) {
enc110->base.features.flags.bits.IS_HBR2_CAPABLE =
bp_cap_info.DP_HBR2_EN;
enc110->base.features.flags.bits.IS_HBR3_CAPABLE =
bp_cap_info.DP_HBR3_EN;
enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
- } else {
+ } else if (result != BP_RESULT_NORECORD) {
DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
__func__,
result);
@@ -1361,7 +1361,10 @@ void dce110_link_encoder_dp_set_lane_settings(
cntl.lane_settings = training_lane_set.raw;
/* call VBIOS table to set voltage swing and pre-emphasis */
- link_transmitter_control(enc110, &cntl);
+ if (link_transmitter_control(enc110, &cntl) != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", __func__);
+ BREAK_TO_DEBUGGER();
+ }
}
}
@@ -1795,13 +1798,13 @@ void dce60_link_encoder_construct(
enc110->base.id, &bp_cap_info);
/* Override features with DCE-specific values */
- if (BP_RESULT_OK == result) {
+ if (result == BP_RESULT_OK) {
enc110->base.features.flags.bits.IS_HBR2_CAPABLE =
bp_cap_info.DP_HBR2_EN;
enc110->base.features.flags.bits.IS_HBR3_CAPABLE =
bp_cap_info.DP_HBR3_EN;
enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
- } else {
+ } else if (result != BP_RESULT_NORECORD) {
DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
__func__,
result);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
index 4cdd4dacb761..1c2009e38aa1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -98,7 +98,7 @@ static enum mi_bits_per_pixel get_mi_bpp(
}
static enum mi_tiling_format get_mi_tiling(
- union dc_tiling_info *tiling_info)
+ struct dc_tiling_info *tiling_info)
{
switch (tiling_info->gfx8.array_mode) {
case DC_ARRAY_1D_TILED_THIN1:
@@ -133,7 +133,7 @@ static bool is_vert_scan(enum dc_rotation_angle rotation)
static void dce_mi_program_pte_vm(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation)
{
struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
@@ -430,7 +430,7 @@ static void dce120_mi_program_display_marks(struct mem_input *mi,
}
static void program_tiling(
- struct dce_mem_input *dce_mi, const union dc_tiling_info *info)
+ struct dce_mem_input *dce_mi, const struct dc_tiling_info *info)
{
if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */
REG_UPDATE_6(GRPH_CONTROL,
@@ -481,7 +481,6 @@ static void program_tiling(
}
}
-
static void program_size_and_rotation(
struct dce_mem_input *dce_mi,
enum dc_rotation_angle rotation,
@@ -627,10 +626,31 @@ static void program_grph_pixel_format(
GRPH_PRESCALE_B_SIGN, sign);
}
+static void dce_mi_clear_tiling(
+ struct mem_input *mi)
+{
+ struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
+
+ if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_SW_MODE, DC_SW_LINEAR);
+ }
+
+ if (dce_mi->masks->GRPH_MICRO_TILE_MODE) { /* GFX8 */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_ARRAY_MODE, DC_SW_LINEAR);
+ }
+
+ if (dce_mi->masks->GRPH_ARRAY_MODE) { /* GFX6 but reuses gfx8 struct */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_ARRAY_MODE, DC_SW_LINEAR);
+ }
+}
+
static void dce_mi_program_surface_config(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -642,8 +662,7 @@ static void dce_mi_program_surface_config(
program_tiling(dce_mi, tiling_info);
program_size_and_rotation(dce_mi, rotation, plane_size);
- if (format >= SURFACE_PIXEL_FORMAT_GRPH_BEGIN &&
- format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
+ if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
program_grph_pixel_format(dce_mi, format);
}
@@ -651,7 +670,7 @@ static void dce_mi_program_surface_config(
static void dce60_mi_program_surface_config(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation, /* not used in DCE6 */
struct dc_plane_dcc_param *dcc,
@@ -663,8 +682,7 @@ static void dce60_mi_program_surface_config(
program_tiling(dce_mi, tiling_info);
dce60_program_size(dce_mi, rotation, plane_size);
- if (format >= SURFACE_PIXEL_FORMAT_GRPH_BEGIN &&
- format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
+ if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
program_grph_pixel_format(dce_mi, format);
}
#endif
@@ -886,7 +904,8 @@ static const struct mem_input_funcs dce_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -899,7 +918,8 @@ static const struct mem_input_funcs dce60_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce60_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
#endif
@@ -912,7 +932,8 @@ static const struct mem_input_funcs dce112_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
static const struct mem_input_funcs dce120_mi_funcs = {
@@ -924,7 +945,8 @@ static const struct mem_input_funcs dce120_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
void dce_mem_input_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
index f98400efdd9b..e34e445a4013 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
@@ -181,6 +181,7 @@ struct dce_mem_input_registers {
SFB(blk, GRPH_ENABLE, GRPH_ENABLE, mask_sh),\
SFB(blk, GRPH_CONTROL, GRPH_DEPTH, mask_sh),\
SFB(blk, GRPH_CONTROL, GRPH_FORMAT, mask_sh),\
+ SFB(blk, GRPH_CONTROL, GRPH_NUM_BANKS, mask_sh),\
SFB(blk, GRPH_X_START, GRPH_X_START, mask_sh),\
SFB(blk, GRPH_Y_START, GRPH_Y_START, mask_sh),\
SFB(blk, GRPH_X_END, GRPH_X_END, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
index 63ae4bc2a2e5..f342da5a5e50 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
@@ -575,7 +575,6 @@ static void dce60_opp_program_clamping_and_pixel_encoding(
}
#endif
-
static void program_formatter_420_memory(struct output_pixel_processor *opp)
{
struct dce110_opp *opp110 = TO_DCE110_OPP(opp);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h
index bf1ffc3629c7..3d9be87aae45 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.h
@@ -111,6 +111,7 @@ enum dce110_opp_reg_type {
OPP_SF(FMT_DITHER_RAND_R_SEED, FMT_RAND_R_SEED, mask_sh),\
OPP_SF(FMT_DITHER_RAND_G_SEED, FMT_RAND_G_SEED, mask_sh),\
OPP_SF(FMT_DITHER_RAND_B_SEED, FMT_RAND_B_SEED, mask_sh),\
+ OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_EN, mask_sh),\
OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_RESET, mask_sh),\
OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_OFFSET, mask_sh),\
OPP_SF(FMT_BIT_DEPTH_CONTROL, FMT_TEMPORAL_DITHER_DEPTH, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
index e8570060d007..de31fb1b6819 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c
@@ -218,7 +218,7 @@ static void dce_driver_set_backlight(struct panel_cntl *panel_cntl,
* contain integer component, lower 16 bits contain fractional component
* of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24
*/
- active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period;
+ active_duty_cycle = backlight_pwm_u16_16 * (uint64_t)masked_pwm_period;
/* 1.3 Calculate 16 bit active duty cycle from integer and fractional
* components shift by bitCount then mask 16 bits and add rounding bit
@@ -290,4 +290,5 @@ void dce_panel_cntl_construct(
dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs;
dce_panel_cntl->base.ctx = init_data->ctx;
dce_panel_cntl->base.inst = init_data->inst;
+ dce_panel_cntl->base.pwrseq_inst = 0;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index f810825322ba..1130d7619b26 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -277,7 +277,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
uint32_t misc1 = 0;
uint32_t h_blank;
uint32_t h_back_porch;
- uint8_t synchronous_clock = 0; /* asynchronous mode */
uint8_t colorimetry_bpc;
uint8_t dynamic_range_rgb = 0; /*full range*/
uint8_t dynamic_range_ycbcr = 1; /*bt709*/
@@ -380,7 +379,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
break;
}
- misc0 = misc0 | synchronous_clock;
misc0 = colorimetry_bpc << 5;
if (REG(DP_MSA_TIMING_PARAM1)) {
@@ -420,7 +418,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
dynamic_range_rgb = 1; /*limited range*/
break;
case COLOR_SPACE_2020_RGB_FULLRANGE:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
case COLOR_SPACE_XR_RGB:
case COLOR_SPACE_MSREF_SCRGB:
case COLOR_SPACE_ADOBERGB:
@@ -432,6 +430,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
case COLOR_SPACE_APPCTRL:
case COLOR_SPACE_CUSTOMPOINTS:
case COLOR_SPACE_UNKNOWN:
+ default:
/* do nothing */
break;
}
@@ -1025,6 +1024,7 @@ static void dce110_reset_hdmi_stream_attribute(
struct stream_encoder *enc)
{
struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
+
if (enc110->se_mask->HDMI_DATA_SCRAMBLE_EN)
REG_UPDATE_5(HDMI_CONTROL,
HDMI_PACKET_GEN_VERSION, 1,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
index 670d5ab9d998..1ab5ae9b5ea5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
@@ -154,10 +154,13 @@ static bool dce60_setup_scaling_configuration(
REG_SET(SCL_BYPASS_CONTROL, 0, SCL_BYPASS_MODE, 0);
if (data->taps.h_taps + data->taps.v_taps <= 2) {
- /* Set bypass */
-
- /* DCE6 has no SCL_MODE register, skip scale mode programming */
+ /* Disable scaler functionality */
+ REG_WRITE(SCL_SCALER_ENABLE, 0);
+ /* Clear registers that can cause glitches even when the scaler is off */
+ REG_WRITE(SCL_TAP_CONTROL, 0);
+ REG_WRITE(SCL_AUTOMATIC_MODE_CONTROL, 0);
+ REG_WRITE(SCL_F_SHARP_CONTROL, 0);
return false;
}
@@ -165,7 +168,7 @@ static bool dce60_setup_scaling_configuration(
SCL_H_NUM_OF_TAPS, data->taps.h_taps - 1,
SCL_V_NUM_OF_TAPS, data->taps.v_taps - 1);
- /* DCE6 has no SCL_MODE register, skip scale mode programming */
+ REG_WRITE(SCL_SCALER_ENABLE, 1);
/* DCE6 has no SCL_BOUNDARY_MODE bit, skip replace out of bound pixels */
@@ -502,6 +505,8 @@ static void dce60_transform_set_scaler(
REG_SET(DC_LB_MEM_SIZE, 0,
DC_LB_MEM_SIZE, xfm_dce->lb_memory_size);
+ REG_WRITE(SCL_UPDATE, 0x00010000);
+
/* Clear SCL_F_SHARP_CONTROL value to 0 */
REG_WRITE(SCL_F_SHARP_CONTROL, 0);
@@ -527,8 +532,7 @@ static void dce60_transform_set_scaler(
if (coeffs_v != xfm_dce->filter_v || coeffs_h != xfm_dce->filter_h) {
/* 4. Program vertical filters */
if (xfm_dce->filter_v == NULL)
- REG_SET(SCL_VERT_FILTER_CONTROL, 0,
- SCL_V_2TAP_HARDCODE_COEF_EN, 0);
+ REG_WRITE(SCL_VERT_FILTER_CONTROL, 0);
program_multi_taps_filter(
xfm_dce,
data->taps.v_taps,
@@ -542,8 +546,7 @@ static void dce60_transform_set_scaler(
/* 5. Program horizontal filters */
if (xfm_dce->filter_h == NULL)
- REG_SET(SCL_HORZ_FILTER_CONTROL, 0,
- SCL_H_2TAP_HARDCODE_COEF_EN, 0);
+ REG_WRITE(SCL_HORZ_FILTER_CONTROL, 0);
program_multi_taps_filter(
xfm_dce,
data->taps.h_taps,
@@ -566,6 +569,8 @@ static void dce60_transform_set_scaler(
/* DCE6 has no SCL_COEF_UPDATE_COMPLETE bit to flip to new coefficient memory */
/* DCE6 DATA_FORMAT register does not support ALPHA_EN */
+
+ REG_WRITE(SCL_UPDATE, 0);
}
#endif
@@ -1408,7 +1413,7 @@ void dce110_opp_set_csc_default(
static void program_pwl(struct dce_transform *xfm_dce,
const struct pwl_params *params)
{
- int retval;
+ uint32_t retval;
uint8_t max_tries = 10;
uint8_t counter = 0;
uint32_t i = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
index cbce194ec7b8..eb716e8337e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h
@@ -155,6 +155,9 @@
SRI(SCL_COEF_RAM_TAP_DATA, SCL, id), \
SRI(VIEWPORT_START, SCL, id), \
SRI(VIEWPORT_SIZE, SCL, id), \
+ SRI(SCL_SCALER_ENABLE, SCL, id), \
+ SRI(SCL_HORZ_FILTER_INIT_RGB_LUMA, SCL, id), \
+ SRI(SCL_HORZ_FILTER_INIT_CHROMA, SCL, id), \
SRI(SCL_HORZ_FILTER_SCALE_RATIO, SCL, id), \
SRI(SCL_VERT_FILTER_SCALE_RATIO, SCL, id), \
SRI(SCL_VERT_FILTER_INIT, SCL, id), \
@@ -590,6 +593,7 @@ struct dce_transform_registers {
uint32_t SCL_VERT_FILTER_SCALE_RATIO;
uint32_t SCL_HORZ_FILTER_INIT;
#if defined(CONFIG_DRM_AMD_DC_SI)
+ uint32_t SCL_SCALER_ENABLE;
uint32_t SCL_HORZ_FILTER_INIT_RGB_LUMA;
uint32_t SCL_HORZ_FILTER_INIT_CHROMA;
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
index d3e6544022b7..3b9011ef9b68 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c
@@ -28,6 +28,8 @@
#include "dc.h"
#include "core_types.h"
#include "dmub_cmd.h"
+#include "dc_dmub_srv.h"
+#include "dmub/dmub_srv.h"
#define TO_DMUB_ABM(abm)\
container_of(abm, struct dce_abm, base)
@@ -57,18 +59,22 @@ static unsigned int abm_feature_support(struct abm *abm, unsigned int panel_inst
return ret;
}
-static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight)
+static void dmub_abm_init_ex(struct abm *abm, uint32_t backlight, uint32_t user_level)
{
- dmub_abm_init(abm, backlight);
+ dmub_abm_init(abm, backlight, user_level);
}
static unsigned int dmub_abm_get_current_backlight_ex(struct abm *abm)
{
+ dc_allow_idle_optimizations(abm->ctx->dc, false);
+
return dmub_abm_get_current_backlight(abm);
}
static unsigned int dmub_abm_get_target_backlight_ex(struct abm *abm)
{
+ dc_allow_idle_optimizations(abm->ctx->dc, false);
+
return dmub_abm_get_target_backlight(abm);
}
@@ -145,7 +151,11 @@ static bool dmub_abm_save_restore_ex(
return ret;
}
-static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+static bool dmub_abm_set_pipe_ex(struct abm *abm,
+ uint32_t otg_inst,
+ uint32_t option,
+ uint32_t panel_inst,
+ uint32_t pwrseq_inst)
{
bool ret = false;
unsigned int feature_support;
@@ -153,7 +163,7 @@ static bool dmub_abm_set_pipe_ex(struct abm *abm, uint32_t otg_inst, uint32_t op
feature_support = abm_feature_support(abm, panel_inst);
if (feature_support == ABM_LCD_SUPPORT)
- ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst);
+ ret = dmub_abm_set_pipe(abm, otg_inst, option, panel_inst, pwrseq_inst);
return ret;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
index 592a8f7a1c6d..a641ae04450c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c
@@ -34,11 +34,7 @@
#include "reg_helper.h"
#include "fixed31_32.h"
-#ifdef _WIN32
-#include "atombios.h"
-#else
#include "atom.h"
-#endif
#define TO_DMUB_ABM(abm)\
container_of(abm, struct dce_abm, base)
@@ -76,10 +72,10 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc)
cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask;
cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
-void dmub_abm_init(struct abm *abm, uint32_t backlight)
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level)
{
struct dce_abm *dce_abm = TO_DMUB_ABM(abm);
@@ -106,7 +102,7 @@ void dmub_abm_init(struct abm *abm, uint32_t backlight)
BL1_PWM_TARGET_ABM_LEVEL, backlight);
REG_UPDATE(BL1_PWM_USER_LEVEL,
- BL1_PWM_USER_LEVEL, backlight);
+ BL1_PWM_USER_LEVEL, user_level);
REG_UPDATE_2(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES,
ABM1_LS_MIN_PIXEL_VALUE_THRES, 0,
@@ -155,7 +151,7 @@ bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask)
cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask;
cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -186,7 +182,7 @@ void dmub_abm_init_config(struct abm *abm,
cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
@@ -203,7 +199,7 @@ bool dmub_abm_set_pause(struct abm *abm, bool pause, unsigned int panel_inst, un
cmd.abm_pause.abm_pause_data.panel_mask = panel_mask;
cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_pause_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -244,9 +240,10 @@ bool dmub_abm_save_restore(
cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask;
- cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore);
+ cmd.abm_save_restore.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
// Copy iramtable data into local structure
memcpy((void *)pData, dc->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes);
@@ -254,22 +251,27 @@ bool dmub_abm_save_restore(
return true;
}
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+bool dmub_abm_set_pipe(struct abm *abm,
+ uint32_t otg_inst,
+ uint32_t option,
+ uint32_t panel_inst,
+ uint32_t pwrseq_inst)
{
union dmub_rb_cmd cmd;
struct dc_context *dc = abm->ctx;
- uint32_t ramping_boundary = 0xFFFF;
+ uint8_t ramping_boundary = 0xFF;
memset(&cmd, 0, sizeof(cmd));
cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+ cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -291,8 +293,26 @@ bool dmub_abm_set_backlight_level(struct abm *abm,
cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
+bool dmub_abm_set_event(struct abm *abm, unsigned int scaling_enable, unsigned int scaling_strength_map,
+ unsigned int panel_inst)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = abm->ctx;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_set_event.header.type = DMUB_CMD__ABM;
+ cmd.abm_set_event.header.sub_type = DMUB_CMD__ABM_SET_EVENT;
+ cmd.abm_set_event.abm_set_event_data.vb_scaling_enable = scaling_enable;
+ cmd.abm_set_event.abm_set_event_data.vb_scaling_strength_mapping = scaling_strength_map;
+ cmd.abm_set_event.abm_set_event_data.panel_mask = (1<<panel_inst);
+ cmd.abm_set_event.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_event_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
index 853564d7f471..13f54f1df780 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.h
@@ -30,7 +30,7 @@
struct abm_save_restore;
-void dmub_abm_init(struct abm *abm, uint32_t backlight);
+void dmub_abm_init(struct abm *abm, uint32_t backlight, uint32_t user_level);
bool dmub_abm_set_level(struct abm *abm, uint32_t level, uint8_t panel_mask);
unsigned int dmub_abm_get_current_backlight(struct abm *abm);
unsigned int dmub_abm_get_target_backlight(struct abm *abm);
@@ -44,9 +44,11 @@ bool dmub_abm_save_restore(
struct dc_context *dc,
unsigned int panel_inst,
struct abm_save_restore *pData);
-bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst);
+bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst);
bool dmub_abm_set_backlight_level(struct abm *abm,
unsigned int backlight_pwm_u16_16,
unsigned int frame_ramp,
unsigned int panel_inst);
+bool dmub_abm_set_event(struct abm *abm, unsigned int scaling_enable, unsigned int scaling_strength_map,
+ unsigned int panel_inst);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
index 2aa0e01a6891..d37ecfdde4f1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
@@ -47,7 +47,7 @@ void dmub_hw_lock_mgr_cmd(struct dc_dmub_srv *dmub_srv,
if (!lock)
cmd.lock_hw.lock_hw_data.should_release = 1;
- dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
@@ -63,7 +63,26 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
bool should_use_dmub_lock(struct dc_link *link)
{
+ /* ASIC doesn't support DMUB */
+ if (!link->ctx->dmub_srv)
+ return false;
+
if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
return true;
+
+ if (link->replay_settings.replay_feature_enabled)
+ return true;
+
+ /* only use HW lock for PSR1 on single eDP */
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_1) {
+ struct dc_link *edp_links[MAX_NUM_EDP];
+ int edp_num;
+
+ dc_get_edp_links(link->dc, edp_links, &edp_num);
+
+ if (edp_num == 1)
+ return true;
+ }
+
return false;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
index d8009b2dc56a..98a778996e1a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_outbox.c
@@ -48,5 +48,5 @@ void dmub_enable_outbox_notification(struct dc_dmub_srv *dmub_srv)
sizeof(cmd.outbox1_enable.header);
cmd.outbox1_enable.enable = true;
- dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index 0f24b6fbd220..87af4fdc04a6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -35,6 +35,7 @@
static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3};
static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5};
+static const uint8_t DP_SINK_DEVICE_STR_ID_3[] = {0x42, 0x61, 0x6c, 0x73, 0x61};
/*
* Convert dmcub psr state to dmcu psr state.
@@ -93,6 +94,8 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state)
state = PSR_STATE_HWLOCK_MGR;
else if (raw_state == 0x61)
state = PSR_STATE_POLLVUPDATE;
+ else if (raw_state == 0x62)
+ state = PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME;
else
state = PSR_STATE_INVALID;
@@ -104,23 +107,18 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state)
*/
static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, uint8_t panel_inst)
{
- struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
uint32_t raw_state = 0;
uint32_t retry_count = 0;
- enum dmub_status status;
do {
// Send gpint command and wait for ack
- status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_PSR_STATE, panel_inst, 30);
-
- if (status == DMUB_STATUS_OK) {
- // GPINT was executed, get response
- dmub_srv_get_gpint_response(srv, &raw_state);
+ if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_PSR_STATE, panel_inst, &raw_state,
+ DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
*state = convert_psr_state(raw_state);
- } else
+ } else {
// Return invalid state when GPINT times out
*state = PSR_STATE_INVALID;
-
+ }
} while (++retry_count <= 1000 && *state == PSR_STATE_INVALID);
// Assert if max retry hit
@@ -170,7 +168,7 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state *
cmd.psr_set_version.psr_set_version_data.panel_inst = panel_inst;
cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -198,7 +196,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8
cmd.psr_enable.header.payload_bytes = 0; // Send header only
- dm_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
/* Below loops 1000 x 500us = 500 ms.
* Exit PSR may need to wait 1-2 frames to power up. Timeout after at
@@ -216,7 +214,8 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait, uint8
break;
}
- fsleep(500);
+ /* must *not* be fsleep - this can be called from high irq levels */
+ udelay(500);
}
/* assert if max retry hit */
@@ -246,7 +245,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_
cmd.psr_set_level.psr_set_level_data.psr_level = psr_level;
cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1;
cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst;
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
@@ -265,7 +264,7 @@ static void dmub_psr_set_sink_vtotal_in_psr_active(struct dmub_psr *dmub,
cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_idle = psr_vtotal_idle;
cmd.psr_set_vtotal.psr_set_vtotal_data.psr_vtotal_su = psr_vtotal_su;
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
@@ -284,7 +283,7 @@ static void dmub_psr_set_power_opt(struct dmub_psr *dmub, unsigned int power_opt
cmd.psr_set_power_opt.psr_set_power_opt_data.power_opt = power_opt;
cmd.psr_set_power_opt.psr_set_power_opt_data.panel_inst = panel_inst;
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
@@ -295,7 +294,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
struct psr_context *psr_context,
uint8_t panel_inst)
{
- union dmub_rb_cmd cmd;
+ union dmub_rb_cmd cmd = { 0 };
struct dc_context *dc = dmub->ctx;
struct dmub_cmd_psr_copy_settings_data *copy_settings_data
= &cmd.psr_copy_settings.psr_copy_settings_data;
@@ -366,6 +365,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR;
copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1;
copy_settings_data->debug.bitfields.force_full_frame_update = 0;
+ copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm;
if (psr_context->su_granularity_required == 0)
copy_settings_data->su_y_granularity = 0;
@@ -390,9 +390,8 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1,
sizeof(DP_SINK_DEVICE_STR_ID_1)))
link->psr_settings.force_ffu_mode = 1;
- else
- link->psr_settings.force_ffu_mode = 0;
- copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode;
+
+ copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode || psr_context->os_request_force_ffu;
if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
!link->dc->debug.disable_fec) &&
@@ -408,13 +407,24 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
else
copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 0;
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_1 &&
+ link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_0022B9 &&
+ !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_3,
+ sizeof(DP_SINK_DEVICE_STR_ID_3))) {
+ copy_settings_data->poweroff_before_vertical_line = 16;
+ }
+
//WA for PSR1 on specific TCON, require frame delay for frame re-lock
copy_settings_data->relock_delay_frame_cnt = 0;
if (link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_001CF8)
copy_settings_data->relock_delay_frame_cnt = 2;
+
+ copy_settings_data->power_down_phy_before_disable_stream =
+ link->psr_settings.power_down_phy_before_disable_stream;
+
copy_settings_data->dsc_slice_height = psr_context->dsc_slice_height;
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -435,21 +445,22 @@ static void dmub_psr_force_static(struct dmub_psr *dmub, uint8_t panel_inst)
cmd.psr_force_static.header.sub_type = DMUB_CMD__PSR_FORCE_STATIC;
cmd.psr_enable.header.payload_bytes = 0;
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
* Get PSR residency from firmware.
*/
-static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency, uint8_t panel_inst)
+static void dmub_psr_get_residency(struct dmub_psr *dmub, uint32_t *residency,
+ uint8_t panel_inst, enum psr_residency_mode mode)
{
- struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
uint16_t param = (uint16_t)(panel_inst << 8);
- /* Send gpint command and wait for ack */
- dmub_srv_send_gpint_command(srv, DMUB_GPINT__PSR_RESIDENCY, param, 30);
+ param |= mode;
- dmub_srv_get_gpint_response(srv, residency);
+ /* Send gpint command and wait for ack */
+ dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__PSR_RESIDENCY, param, residency,
+ DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
}
static const struct dmub_psr_funcs psr_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
index 289e42070ece..a6e282d950c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h
@@ -27,6 +27,7 @@
#define _DMUB_PSR_H_
#include "dc_types.h"
+#include "dmub_cmd.h"
struct dc_link;
struct dmub_psr_funcs;
@@ -46,7 +47,7 @@ struct dmub_psr_funcs {
uint8_t panel_inst);
void (*psr_force_static)(struct dmub_psr *dmub, uint8_t panel_inst);
void (*psr_get_residency)(struct dmub_psr *dmub, uint32_t *residency,
- uint8_t panel_inst);
+ uint8_t panel_inst, enum psr_residency_mode mode);
void (*psr_set_sink_vtotal_in_psr_active)(struct dmub_psr *dmub,
uint16_t psr_vtotal_idle, uint16_t psr_vtotal_su);
void (*psr_set_power_opt)(struct dmub_psr *dmub, unsigned int power_opt, uint8_t panel_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
index 28149e53c2a6..f9542edff14b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c
@@ -1,29 +1,9 @@
-/*
- * Copyright 2023 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
#include "dc.h"
+#include "link_service.h"
#include "dc_dmub_srv.h"
#include "dmub/dmub_srv.h"
#include "core_types.h"
@@ -33,26 +13,25 @@
#define MAX_PIPES 6
+#define GPINT_RETRY_NUM 20
+
+static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3};
+static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5};
+
/*
* Get Replay state from firmware.
*/
static void dmub_replay_get_state(struct dmub_replay *dmub, enum replay_state *state, uint8_t panel_inst)
{
- struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
- /* uint32_t raw_state = 0; */
uint32_t retry_count = 0;
- enum dmub_status status;
do {
// Send gpint command and wait for ack
- status = dmub_srv_send_gpint_command(srv, DMUB_GPINT__GET_REPLAY_STATE, panel_inst, 30);
-
- if (status == DMUB_STATUS_OK) {
- // GPINT was executed, get response
- dmub_srv_get_gpint_response(srv, (uint32_t *)state);
- } else
+ if (!dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__GET_REPLAY_STATE, panel_inst,
+ (uint32_t *)state, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) {
// Return invalid state when GPINT times out
*state = REPLAY_STATE_INVALID;
+ }
} while (++retry_count <= 1000 && *state == REPLAY_STATE_INVALID);
// Assert if max retry hit
@@ -84,7 +63,7 @@ static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait,
cmd.replay_enable.header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_enable_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
/* Below loops 1000 x 500us = 500 ms.
* Exit REPLAY may need to wait 1-2 frames to power up. Timeout after at
@@ -102,14 +81,14 @@ static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait,
break;
}
- fsleep(500);
+ /* must *not* be fsleep - this can be called from high irq levels */
+ udelay(500);
}
/* assert if max retry hit */
if (retry_count >= 1000)
ASSERT(0);
}
-
}
/*
@@ -127,7 +106,7 @@ static void dmub_replay_set_power_opt(struct dmub_replay *dmub, unsigned int pow
cmd.replay_set_power_opt.replay_set_power_opt_data.power_opt = power_opt;
cmd.replay_set_power_opt.replay_set_power_opt_data.panel_inst = panel_inst;
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
@@ -190,6 +169,9 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub,
copy_settings_data->max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable;
copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported;
+ copy_settings_data->replay_support_fast_resync_in_ultra_sleep_mode = link->replay_settings.config.replay_support_fast_resync_in_ultra_sleep_mode;
+
+ copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm;
copy_settings_data->flags.u32All = 0;
copy_settings_data->flags.bitfields.fec_enable_status = (link->fec_state == dc_link_fec_enabled);
@@ -200,17 +182,28 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub,
(link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
!link->panel_config.dsc.disable_dsc_edp &&
link->dc->caps.edp_dsc_support)) &&
- link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_38EC11 /*&&
+ link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_38EC11 &&
(!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1,
sizeof(DP_SINK_DEVICE_STR_ID_1)) ||
!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_2,
- sizeof(DP_SINK_DEVICE_STR_ID_2)))*/)
+ sizeof(DP_SINK_DEVICE_STR_ID_2))))
copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 1;
else
copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0;
+ copy_settings_data->flags.bitfields.alpm_mode = (enum dmub_alpm_mode)link->replay_settings.config.alpm_mode;
+ if (link->replay_settings.config.alpm_mode == DC_ALPM_AUXLESS) {
+ copy_settings_data->auxless_alpm_data.lfps_setup_ns = dc->dc->debug.auxless_alpm_lfps_setup_ns;
+ copy_settings_data->auxless_alpm_data.lfps_period_ns = dc->dc->debug.auxless_alpm_lfps_period_ns;
+ copy_settings_data->auxless_alpm_data.lfps_silence_ns = dc->dc->debug.auxless_alpm_lfps_silence_ns;
+ copy_settings_data->auxless_alpm_data.lfps_t1_t2_override_us =
+ dc->dc->debug.auxless_alpm_lfps_t1t2_us;
+ copy_settings_data->auxless_alpm_data.lfps_t1_t2_offset_us =
+ dc->dc->debug.auxless_alpm_lfps_t1t2_offset_us;
+ copy_settings_data->auxless_alpm_data.lttpr_count = link->dc->link_srv->dp_get_lttpr_count(link);
+ }
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -219,52 +212,213 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub,
* Set coasting vtotal.
*/
static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub,
- uint16_t coasting_vtotal,
+ uint32_t coasting_vtotal,
uint8_t panel_inst)
{
union dmub_rb_cmd cmd;
struct dc_context *dc = dmub->ctx;
+ struct dmub_rb_cmd_replay_set_coasting_vtotal *pCmd = NULL;
+
+ pCmd = &(cmd.replay_set_coasting_vtotal);
memset(&cmd, 0, sizeof(cmd));
- cmd.replay_set_coasting_vtotal.header.type = DMUB_CMD__REPLAY;
- cmd.replay_set_coasting_vtotal.header.sub_type = DMUB_CMD__REPLAY_SET_COASTING_VTOTAL;
- cmd.replay_set_coasting_vtotal.header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data);
- cmd.replay_set_coasting_vtotal.replay_set_coasting_vtotal_data.coasting_vtotal = coasting_vtotal;
+ pCmd->header.type = DMUB_CMD__REPLAY;
+ pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_COASTING_VTOTAL;
+ pCmd->header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data);
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF);
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16;
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/*
* Get Replay residency from firmware.
*/
static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst,
- uint32_t *residency, const bool is_start, const bool is_alpm)
+ uint32_t *residency, const bool is_start, enum pr_residency_mode mode)
{
- struct dmub_srv *srv = dmub->ctx->dmub_srv->dmub;
uint16_t param = (uint16_t)(panel_inst << 8);
-
- if (is_alpm)
- param |= REPLAY_RESIDENCY_MODE_ALPM;
+ uint32_t i = 0;
+
+ switch (mode) {
+ case PR_RESIDENCY_MODE_PHY:
+ param |= REPLAY_RESIDENCY_FIELD_MODE_PHY;
+ break;
+ case PR_RESIDENCY_MODE_ALPM:
+ param |= REPLAY_RESIDENCY_FIELD_MODE_ALPM;
+ break;
+ case PR_RESIDENCY_MODE_IPS2:
+ param |= REPLAY_RESIDENCY_REVISION_1;
+ param |= REPLAY_RESIDENCY_FIELD_MODE2_IPS;
+ break;
+ case PR_RESIDENCY_MODE_FRAME_CNT:
+ param |= REPLAY_RESIDENCY_REVISION_1;
+ param |= REPLAY_RESIDENCY_FIELD_MODE2_FRAME_CNT;
+ break;
+ case PR_RESIDENCY_MODE_ENABLEMENT_PERIOD:
+ param |= REPLAY_RESIDENCY_REVISION_1;
+ param |= REPLAY_RESIDENCY_FIELD_MODE2_EN_PERIOD;
+ break;
+ default:
+ break;
+ }
if (is_start)
param |= REPLAY_RESIDENCY_ENABLE;
- // Send gpint command and wait for ack
- dmub_srv_send_gpint_command(srv, DMUB_GPINT__REPLAY_RESIDENCY, param, 30);
+ for (i = 0; i < GPINT_RETRY_NUM; i++) {
+ // Send gpint command and wait for ack
+ if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__REPLAY_RESIDENCY, param,
+ residency, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return;
- if (!is_start)
- dmub_srv_get_gpint_response(srv, residency);
- else
- *residency = 0;
+ udelay(100);
+ }
+
+ // it means gpint retry many times
+ *residency = 0;
+}
+
+/*
+ * Set REPLAY power optimization flags and coasting vtotal.
+ */
+static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dmub,
+ unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *dc = dmub->ctx;
+ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal *pCmd = NULL;
+
+ pCmd = &(cmd.replay_set_power_opt_and_coasting_vtotal);
+
+ memset(&cmd, 0, sizeof(cmd));
+ pCmd->header.type = DMUB_CMD__REPLAY;
+ pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL;
+ pCmd->header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) -
+ sizeof(struct dmub_cmd_header);
+ pCmd->replay_set_power_opt_data.power_opt = power_opt;
+ pCmd->replay_set_power_opt_data.panel_inst = panel_inst;
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF);
+ pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16;
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+/*
+ * send Replay general cmd to DMUB.
+ */
+static void dmub_replay_send_cmd(struct dmub_replay *dmub,
+ enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element)
+{
+ union dmub_rb_cmd cmd;
+ struct dc_context *ctx = NULL;
+
+ if (dmub == NULL || cmd_element == NULL)
+ return;
+
+ ctx = dmub->ctx;
+ if (ctx != NULL) {
+
+ if (msg != Replay_Msg_Not_Support) {
+ memset(&cmd, 0, sizeof(cmd));
+ //Header
+ cmd.replay_set_timing_sync.header.type = DMUB_CMD__REPLAY;
+ } else
+ return;
+ } else
+ return;
+
+ switch (msg) {
+ case Replay_Set_Timing_Sync_Supported:
+ //Header
+ cmd.replay_set_timing_sync.header.sub_type =
+ DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED;
+ cmd.replay_set_timing_sync.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_timing_sync) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst =
+ cmd_element->sync_data.panel_inst;
+ cmd.replay_set_timing_sync.replay_set_timing_sync_data.timing_sync_supported =
+ cmd_element->sync_data.timing_sync_supported;
+ break;
+ case Replay_Set_Residency_Frameupdate_Timer:
+ //Header
+ cmd.replay_set_frameupdate_timer.header.sub_type =
+ DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER;
+ cmd.replay_set_frameupdate_timer.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_frameupdate_timer.data.panel_inst =
+ cmd_element->panel_inst;
+ cmd.replay_set_frameupdate_timer.data.enable =
+ cmd_element->timer_data.enable;
+ cmd.replay_set_frameupdate_timer.data.frameupdate_count =
+ cmd_element->timer_data.frameupdate_count;
+ break;
+ case Replay_Set_Pseudo_VTotal:
+ //Header
+ cmd.replay_set_pseudo_vtotal.header.sub_type =
+ DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL;
+ cmd.replay_set_pseudo_vtotal.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_pseudo_vtotal.data.panel_inst =
+ cmd_element->pseudo_vtotal_data.panel_inst;
+ cmd.replay_set_pseudo_vtotal.data.vtotal =
+ cmd_element->pseudo_vtotal_data.vtotal;
+ break;
+ case Replay_Disabled_Adaptive_Sync_SDP:
+ //Header
+ cmd.replay_disabled_adaptive_sync_sdp.header.sub_type =
+ DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP;
+ cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst =
+ cmd_element->disabled_adaptive_sync_sdp_data.panel_inst;
+ cmd.replay_disabled_adaptive_sync_sdp.data.force_disabled =
+ cmd_element->disabled_adaptive_sync_sdp_data.force_disabled;
+ break;
+ case Replay_Set_General_Cmd:
+ //Header
+ cmd.replay_set_general_cmd.header.sub_type =
+ DMUB_CMD__REPLAY_SET_GENERAL_CMD;
+ cmd.replay_set_general_cmd.header.payload_bytes =
+ sizeof(struct dmub_rb_cmd_replay_set_general_cmd) -
+ sizeof(struct dmub_cmd_header);
+ //Cmd Body
+ cmd.replay_set_general_cmd.data.panel_inst =
+ cmd_element->set_general_cmd_data.panel_inst;
+ cmd.replay_set_general_cmd.data.subtype =
+ cmd_element->set_general_cmd_data.subtype;
+ cmd.replay_set_general_cmd.data.param1 =
+ cmd_element->set_general_cmd_data.param1;
+ cmd.replay_set_general_cmd.data.param2 =
+ cmd_element->set_general_cmd_data.param2;
+ break;
+ case Replay_Msg_Not_Support:
+ default:
+ return;
+ break;
+ }
+
+ dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
static const struct dmub_replay_funcs replay_funcs = {
- .replay_copy_settings = dmub_replay_copy_settings,
- .replay_enable = dmub_replay_enable,
- .replay_get_state = dmub_replay_get_state,
- .replay_set_power_opt = dmub_replay_set_power_opt,
- .replay_set_coasting_vtotal = dmub_replay_set_coasting_vtotal,
- .replay_residency = dmub_replay_residency,
+ .replay_copy_settings = dmub_replay_copy_settings,
+ .replay_enable = dmub_replay_enable,
+ .replay_get_state = dmub_replay_get_state,
+ .replay_set_power_opt = dmub_replay_set_power_opt,
+ .replay_set_coasting_vtotal = dmub_replay_set_coasting_vtotal,
+ .replay_residency = dmub_replay_residency,
+ .replay_set_power_opt_and_coasting_vtotal = dmub_replay_set_power_opt_and_coasting_vtotal,
+ .replay_send_cmd = dmub_replay_send_cmd,
};
/*
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
index e8385bbf51fc..e6346c0ffc0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h
@@ -1,27 +1,6 @@
-/*
- * Copyright 2023 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
#ifndef _DMUB_REPLAY_H_
#define _DMUB_REPLAY_H_
@@ -45,10 +24,14 @@ struct dmub_replay_funcs {
struct replay_context *replay_context, uint8_t panel_inst);
void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt,
uint8_t panel_inst);
- void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint16_t coasting_vtotal,
+ void (*replay_send_cmd)(struct dmub_replay *dmub,
+ enum replay_FW_Message_type msg, union dmub_replay_cmd_set *cmd_element);
+ void (*replay_set_coasting_vtotal)(struct dmub_replay *dmub, uint32_t coasting_vtotal,
uint8_t panel_inst);
void (*replay_residency)(struct dmub_replay *dmub,
- uint8_t panel_inst, uint32_t *residency, const bool is_start, const bool is_alpm);
+ uint8_t panel_inst, uint32_t *residency, const bool is_start, const enum pr_residency_mode mode);
+ void (*replay_set_power_opt_and_coasting_vtotal)(struct dmub_replay *dmub,
+ unsigned int power_opt, uint8_t panel_inst, uint32_t coasting_vtotal);
};
struct dmub_replay *dmub_replay_create(struct dc_context *ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index 84ab48df0c26..c307f040e48f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -23,11 +23,11 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = -Wno-override-init
DCE110 = dce110_timing_generator.o \
-dce110_compressor.o dce110_hw_sequencer.o dce110_resource.o \
-dce110_opp_regamma_v.o dce110_opp_csc_v.o dce110_timing_generator_v.o \
+dce110_compressor.o dce110_opp_regamma_v.o \
+dce110_opp_csc_v.o dce110_timing_generator_v.o \
dce110_mem_input_v.o dce110_opp_v.o dce110_transform_v.o
AMD_DAL_DCE110 = $(addprefix $(AMDDALPATH)/dc/dce110/,$(DCE110))
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
index d241ee13b293..59a0961b49da 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
@@ -409,19 +409,6 @@ void dce110_compressor_destroy(struct compressor **compressor)
*compressor = NULL;
}
-void get_max_support_fbc_buffersize(unsigned int *max_x, unsigned int *max_y)
-{
- *max_x = FBC_MAX_X;
- *max_y = FBC_MAX_Y;
-
- /* if (m_smallLocalFrameBufferMemory == 1)
- * {
- * *max_x = FBC_MAX_X_SG;
- * *max_y = FBC_MAX_Y_SG;
- * }
- */
-}
-
static const struct compressor_funcs dce110_compressor_funcs = {
.power_up_fbc = dce110_compressor_power_up_fbc,
.enable_fbc = dce110_compressor_enable_fbc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h
index 26c7335a1cbf..223c57941e92 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.h
@@ -75,7 +75,5 @@ void dce110_compressor_program_lpt_control(struct compressor *cp,
bool dce110_compressor_is_lpt_enabled_in_hw(struct compressor *cp);
-void get_max_support_fbc_buffersize(unsigned int *max_x, unsigned int *max_y);
-
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
index db7557a1c613..2c43c2422638 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
@@ -76,7 +76,6 @@ UNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C__GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C_MAS
mmUNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C,
value);
- temp = 0;
value = 0;
temp = address.low_part >>
UNP_GRPH_PRIMARY_SURFACE_ADDRESS_C__GRPH_PRIMARY_SURFACE_ADDRESS_C__SHIFT;
@@ -112,7 +111,6 @@ UNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L__GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L_MAS
mmUNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L,
value);
- temp = 0;
value = 0;
temp = address.low_part >>
UNP_GRPH_PRIMARY_SURFACE_ADDRESS_L__GRPH_PRIMARY_SURFACE_ADDRESS_L__SHIFT;
@@ -164,7 +162,7 @@ static void enable(struct dce_mem_input *mem_input110)
static void program_tiling(
struct dce_mem_input *mem_input110,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
uint32_t value = 0;
@@ -525,7 +523,7 @@ static const unsigned int dvmm_Hw_Setting_Linear[4][9] = {
/* Helper to get table entry from surface info */
static const unsigned int *get_dvmm_hw_setting(
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum surface_pixel_format format,
bool chroma)
{
@@ -565,7 +563,7 @@ static const unsigned int *get_dvmm_hw_setting(
static void dce_mem_input_v_program_pte_vm(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation)
{
struct dce_mem_input *mem_input110 = TO_DCE_MEM_INPUT(mem_input);
@@ -638,7 +636,7 @@ static void dce_mem_input_v_program_pte_vm(
static void dce_mem_input_v_program_surface_config(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 6424e7f279dc..61b0807693fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -1955,6 +1955,7 @@ void dce110_tg_program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
@@ -2015,6 +2016,23 @@ bool dce110_tg_validate_timing(struct timing_generator *tg,
return dce110_timing_generator_validate_timing(tg, timing, SIGNAL_TYPE_NONE);
}
+/* "Container" vs. "pixel" is a concept within HW blocks, mostly those closer to the back-end. It works like this:
+ *
+ * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 4:2:2 Simple) pixel rate is the same as
+ * container rate.
+ *
+ * - In 4:2:0 (DSC or uncompressed) there are two pixels per container, hence the target container rate has to be
+ * halved to maintain the correct pixel rate.
+ *
+ * - Unlike 4:2:2 uncompressed, DSC 4:2:2 Native also has two pixels per container (this happens when DSC is applied
+ * to it) and has to be treated the same as 4:2:0, i.e. target containter rate has to be halved in this case as well.
+ *
+ */
+bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing)
+{
+ return timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+}
+
void dce110_tg_wait_for_state(struct timing_generator *tg,
enum crtc_state state)
{
@@ -2109,70 +2127,131 @@ bool dce110_configure_crc(struct timing_generator *tg,
cntl_addr = CRTC_REG(mmCRTC_CRC_CNTL);
- /* First, disable CRC before we configure it. */
- dm_write_reg(tg->ctx, cntl_addr, 0);
+ if (!params->enable || params->reset)
+ /* First, disable CRC before we configure it. */
+ dm_write_reg(tg->ctx, cntl_addr, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL);
- set_reg_field_value(value, params->windowa_x_start,
- CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_START);
- set_reg_field_value(value, params->windowa_x_end,
- CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window A y axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL);
- set_reg_field_value(value, params->windowa_y_start,
- CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_START);
- set_reg_field_value(value, params->windowa_y_end,
- CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window B x axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL);
- set_reg_field_value(value, params->windowb_x_start,
- CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_START);
- set_reg_field_value(value, params->windowb_x_end,
- CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window B y axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL);
- set_reg_field_value(value, params->windowb_y_start,
- CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_START);
- set_reg_field_value(value, params->windowb_y_end,
- CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- value = 0;
- set_reg_field_value(value, params->continuous_mode ? 1 : 0,
- CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
- set_reg_field_value(value, params->selection,
- CRTC_CRC_CNTL, CRTC_CRC0_SELECT);
- set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
- dm_write_reg(tg->ctx, cntl_addr, value);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL);
+ set_reg_field_value(value, params->windowa_x_start,
+ CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_START);
+ set_reg_field_value(value, params->windowa_x_end,
+ CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window A y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL);
+ set_reg_field_value(value, params->windowa_y_start,
+ CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_START);
+ set_reg_field_value(value, params->windowa_y_end,
+ CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL);
+ set_reg_field_value(value, params->windowb_x_start,
+ CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_START);
+ set_reg_field_value(value, params->windowb_x_end,
+ CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL);
+ set_reg_field_value(value, params->windowb_y_start,
+ CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_START);
+ set_reg_field_value(value, params->windowb_y_end,
+ CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Set crc mode and selection, and enable.*/
+ value = 0;
+ set_reg_field_value(value, params->continuous_mode ? 1 : 0,
+ CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
+ set_reg_field_value(value, params->selection,
+ CRTC_CRC_CNTL, CRTC_CRC0_SELECT);
+ set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
+ dm_write_reg(tg->ctx, cntl_addr, value);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_X_CONTROL);
+ set_reg_field_value(value, params->windowa_x_start,
+ CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_START);
+ set_reg_field_value(value, params->windowa_x_end,
+ CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window A y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_Y_CONTROL);
+ set_reg_field_value(value, params->windowa_y_start,
+ CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_START);
+ set_reg_field_value(value, params->windowa_y_end,
+ CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_X_CONTROL);
+ set_reg_field_value(value, params->windowb_x_start,
+ CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_START);
+ set_reg_field_value(value, params->windowb_x_end,
+ CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_Y_CONTROL);
+ set_reg_field_value(value, params->windowb_y_start,
+ CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_START);
+ set_reg_field_value(value, params->windowb_y_end,
+ CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Set crc mode and selection, and enable.*/
+ value = 0;
+ set_reg_field_value(value, params->continuous_mode ? 1 : 0,
+ CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
+ set_reg_field_value(value, params->selection,
+ CRTC_CRC_CNTL, CRTC_CRC1_SELECT);
+ set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
+ dm_write_reg(tg->ctx, cntl_addr, value);
+ break;
+ default:
+ return false;
+ }
return true;
}
-bool dce110_get_crc(struct timing_generator *tg,
+bool dce110_get_crc(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
uint32_t addr = 0;
@@ -2188,14 +2267,30 @@ bool dce110_get_crc(struct timing_generator *tg,
if (!field)
return false;
- addr = CRTC_REG(mmCRTC_CRC0_DATA_RG);
- value = dm_read_reg(tg->ctx, addr);
- *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR);
- *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y);
+ switch (idx) {
+ case 0:
+ addr = CRTC_REG(mmCRTC_CRC0_DATA_RG);
+ value = dm_read_reg(tg->ctx, addr);
+ *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR);
+ *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y);
- addr = CRTC_REG(mmCRTC_CRC0_DATA_B);
- value = dm_read_reg(tg->ctx, addr);
- *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB);
+ addr = CRTC_REG(mmCRTC_CRC0_DATA_B);
+ value = dm_read_reg(tg->ctx, addr);
+ *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB);
+ break;
+ case 1:
+ addr = CRTC_REG(mmCRTC_CRC1_DATA_RG);
+ value = dm_read_reg(tg->ctx, addr);
+ *r_cr = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_R_CR);
+ *g_y = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_G_Y);
+
+ addr = CRTC_REG(mmCRTC_CRC1_DATA_B);
+ value = dm_read_reg(tg->ctx, addr);
+ *b_cb = get_reg_field_value(value, CRTC_CRC1_DATA_B, CRC1_B_CB);
+ break;
+ default:
+ return false;
+ }
return true;
}
@@ -2239,6 +2334,7 @@ static const struct timing_generator_funcs dce110_tg_funcs = {
.is_tg_enabled = dce110_is_tg_enabled,
.configure_crc = dce110_configure_crc,
.get_crc = dce110_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce110_timing_generator_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index d8a5ed7b485d..e4f5cad64f32 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -261,6 +261,7 @@ void dce110_tg_program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios);
@@ -285,7 +286,9 @@ bool dce110_arm_vert_intr(
bool dce110_configure_crc(struct timing_generator *tg,
const struct crc_params *params);
-bool dce110_get_crc(struct timing_generator *tg,
+bool dce110_get_crc(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
+bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing);
+
#endif /* __DC_TIMING_GENERATOR_DCE110_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index c509384fff54..9837dec837ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -438,6 +438,7 @@ static void dce110_timing_generator_v_program_timing(struct timing_generator *tg
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
@@ -682,7 +683,8 @@ static const struct timing_generator_funcs dce110_tg_v_funcs = {
.tear_down_global_swap_lock =
dce110_timing_generator_v_tear_down_global_swap_lock,
.enable_advanced_request =
- dce110_timing_generator_v_enable_advanced_request
+ dce110_timing_generator_v_enable_advanced_request,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce110_timing_generator_v_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index 9de6501702d2..683866797709 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -23,10 +23,9 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = -Wno-override-init
-DCE112 = dce112_compressor.o dce112_hw_sequencer.o \
-dce112_resource.o
+DCE112 = dce112_compressor.o
AMD_DAL_DCE112 = $(addprefix $(AMDDALPATH)/dc/dce112/,$(DCE112))
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index a9cc4b73270b..8f508e662748 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -24,10 +24,9 @@
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = -Wno-override-init
-DCE120 = dce120_resource.o dce120_timing_generator.o \
-dce120_hw_sequencer.o
+DCE120 = dce120_timing_generator.o
AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120))
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 4af0c70098c4..31c4f44ceaac 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -697,6 +697,7 @@ static void dce120_tg_program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
@@ -1099,45 +1100,79 @@ static bool dce120_configure_crc(struct timing_generator *tg,
if (!dce120_is_tg_enabled(tg))
return false;
- /* First, disable CRC before we configure it. */
- dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
- tg110->offsets.crtc, 0);
+ if (!params->enable || params->reset)
+ /* First, disable CRC before we configure it. */
+ dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
+ tg110->offsets.crtc, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
- CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
-
- /* Window A y axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
- CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
-
- /* Window B x axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
- CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
-
- /* Window B y axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
- CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
- CRTC_CRC_EN, params->continuous_mode ? 1 : 0,
- CRTC_CRC0_SELECT, params->selection,
- CRTC_CRC_EN, 1);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable.*/
+ CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+ CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ CRTC_CRC0_SELECT, params->selection,
+ CRTC_CRC_EN, 1);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_START, params->windowa_x_start,
+ CRTC_CRC1_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_START, params->windowa_y_start,
+ CRTC_CRC1_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_START, params->windowb_x_start,
+ CRTC_CRC1_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_START, params->windowb_y_start,
+ CRTC_CRC1_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable */
+ CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+ CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ CRTC_CRC1_SELECT, params->selection,
+ CRTC_CRC_EN, 1);
+ break;
+ default:
+ return false;
+ }
return true;
}
-static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
- uint32_t *g_y, uint32_t *b_cb)
+static bool dce120_get_crc(struct timing_generator *tg, uint8_t idx,
+ uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
uint32_t value, field;
@@ -1150,14 +1185,30 @@ static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
if (!field)
return false;
- value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
- tg110->offsets.crtc);
- *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
- *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
+ switch (idx) {
+ case 0:
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
+ tg110->offsets.crtc);
+ *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
+ *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
- value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
- tg110->offsets.crtc);
- *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
+ tg110->offsets.crtc);
+ *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+ break;
+ case 1:
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_RG,
+ tg110->offsets.crtc);
+ *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_R_CR);
+ *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_G_Y);
+
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_B,
+ tg110->offsets.crtc);
+ *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_B, CRC1_B_CB);
+ break;
+ default:
+ return false;
+ }
return true;
}
@@ -1197,6 +1248,7 @@ static const struct timing_generator_funcs dce120_tg_funcs = {
.is_tg_enabled = dce120_is_tg_enabled,
.configure_crc = dce120_configure_crc,
.get_crc = dce120_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
index fee331accc0e..824f73eb3326 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
@@ -23,10 +23,9 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init
-DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \
- dce60_resource.o
+DCE60 = dce60_timing_generator.o
AMD_DAL_DCE60 = $(addprefix $(AMDDALPATH)/dc/dce60/,$(DCE60))
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
index c1a85ee374d9..e691a1cf3356 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c
@@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
if (!use_vbios)
program_pix_dur(tg, timing->pix_clk_100hz);
- dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);
+ dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios);
}
static void dce60_timing_generator_enable_advanced_request(
@@ -238,6 +239,7 @@ static const struct timing_generator_funcs dce60_tg_funcs = {
dce60_timing_generator_enable_advanced_request,
.configure_crc = dce60_configure_crc,
.get_crc = dce110_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce60_timing_generator_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index 0a9d1a350d8b..fba189d26652 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -23,10 +23,9 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = -Wno-override-init
-DCE80 = dce80_timing_generator.o dce80_hw_sequencer.o \
- dce80_resource.o
+DCE80 = dce80_timing_generator.o
AMD_DAL_DCE80 = $(addprefix $(AMDDALPATH)/dc/dce80/,$(DCE80))
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
index b8fd43dc010b..88e7a1fc9a30 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
@@ -105,19 +105,20 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz)
dm_write_reg(tg->ctx, addr, value);
}
-static void program_timing(struct timing_generator *tg,
+static void dce80_timing_generator_program_timing(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
int vready_offset,
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
if (!use_vbios)
program_pix_dur(tg, timing->pix_clk_100hz);
- dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);
+ dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios);
}
static void dce80_timing_generator_enable_advanced_request(
@@ -184,7 +185,7 @@ static void dce80_timing_generator_enable_advanced_request(
static const struct timing_generator_funcs dce80_tg_funcs = {
.validate_timing = dce110_tg_validate_timing,
- .program_timing = program_timing,
+ .program_timing = dce80_timing_generator_program_timing,
.enable_crtc = dce110_timing_generator_enable_crtc,
.disable_crtc = dce110_timing_generator_disable_crtc,
.is_counter_moving = dce110_timing_generator_is_counter_moving,
@@ -220,6 +221,7 @@ static const struct timing_generator_funcs dce80_tg_funcs = {
dce80_timing_generator_enable_advanced_request,
.configure_crc = dce110_configure_crc,
.get_crc = dce110_get_crc,
+ .is_two_pixels_per_container = dce110_is_two_pixels_per_container,
};
void dce80_timing_generator_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 62ad1a11bff9..e1f6623d4936 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -22,12 +22,9 @@
#
# Makefile for DCN.
-DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \
+DCN10 = dcn10_ipp.o \
dcn10_hw_sequencer_debug.o \
- dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
- dcn10_hubp.o dcn10_mpc.o \
- dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
- dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
+ dcn10_cm_common.o \
AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 3538973bd0c6..dcd2cdfe91eb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -24,7 +24,7 @@
*/
#include "dc.h"
#include "reg_helper.h"
-#include "dcn10_dpp.h"
+#include "dcn10/dcn10_dpp.h"
#include "dcn10_cm_common.h"
#include "custom_float.h"
@@ -62,6 +62,26 @@ void cm_helper_program_color_matrices(
}
+void cm_helper_read_color_matrices(struct dc_context *ctx,
+ uint16_t *regval,
+ const struct color_matrices_reg *reg)
+{
+ uint32_t cur_csc_reg, regval0, regval1;
+ unsigned int i = 0;
+
+ for (cur_csc_reg = reg->csc_c11_c12;
+ cur_csc_reg <= reg->csc_c33_c34; cur_csc_reg++) {
+ REG_GET_2(cur_csc_reg,
+ csc_c11, &regval0,
+ csc_c12, &regval1);
+
+ regval[2 * i] = regval0;
+ regval[(2 * i) + 1] = regval1;
+
+ i++;
+ }
+}
+
void cm_helper_program_xfer_func(
struct dc_context *ctx,
const struct pwl_params *params,
@@ -345,23 +365,18 @@ bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx,
region_start = -MAX_LOW_POINT;
region_end = NUMBER_REGIONS - MAX_LOW_POINT;
} else {
- /* 11 segments
- * segment is from 2^-10 to 2^1
+ /* 13 segments
+ * segment is from 2^-12 to 2^0
* There are less than 256 points, for optimization
*/
- seg_distr[0] = 3;
- seg_distr[1] = 4;
- seg_distr[2] = 4;
- seg_distr[3] = 4;
- seg_distr[4] = 4;
- seg_distr[5] = 4;
- seg_distr[6] = 4;
- seg_distr[7] = 4;
- seg_distr[8] = 4;
- seg_distr[9] = 4;
- seg_distr[10] = 1;
-
- region_start = -10;
+ const uint8_t SEG_COUNT = 12;
+
+ for (i = 0; i < SEG_COUNT; i++)
+ seg_distr[i] = 4;
+
+ seg_distr[SEG_COUNT] = 1;
+
+ region_start = -SEG_COUNT;
region_end = 1;
}
@@ -382,6 +397,11 @@ bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx,
i += increment) {
if (j == hw_points - 1)
break;
+ if (i >= TRANSFER_FUNC_POINTS) {
+ DC_LOG_ERROR("Index out of bounds: i=%d, TRANSFER_FUNC_POINTS=%d\n",
+ i, TRANSFER_FUNC_POINTS);
+ return false;
+ }
rgb_resulted[j].red = output_tf->tf_pts.red[i];
rgb_resulted[j].green = output_tf->tf_pts.green[i];
rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
@@ -566,6 +586,8 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
i += increment) {
if (j == hw_points - 1)
break;
+ if (i >= TRANSFER_FUNC_POINTS)
+ return false;
rgb_resulted[j].red = output_tf->tf_pts.red[i];
rgb_resulted[j].green = output_tf->tf_pts.green[i];
rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
index 0a68b63d6126..decc50b1ac53 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
@@ -114,5 +114,7 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
const struct dc_transfer_func *output_tf,
struct pwl_params *lut_params);
-
+void cm_helper_read_color_matrices(struct dc_context *ctx,
+ uint16_t *regval,
+ const struct color_matrices_reg *reg);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 46a2ebcabd1a..baf663b661c8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -27,12 +27,12 @@
#include "core_types.h"
#include "resource.h"
#include "custom_float.h"
-#include "dcn10_hw_sequencer.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dce110/dce110_hwseq.h"
#include "dce/dce_hwseq.h"
#include "abm.h"
#include "dmcu.h"
-#include "dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
#include "dcn10/dcn10_dpp.h"
#include "dcn10/dcn10_mpc.h"
#include "timing_generator.h"
@@ -40,13 +40,13 @@
#include "ipp.h"
#include "mpc.h"
#include "reg_helper.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
+#include "dcn10/dcn10_hubp.h"
+#include "dcn10/dcn10_hubbub.h"
#include "dcn10_cm_common.h"
#include "clk_mgr.h"
__printf(3, 4)
-unsigned int snprintf_count(char *pbuf, unsigned int bufsize, char *fmt, ...)
+unsigned int snprintf_count(char *pbuf, unsigned int bufsize, const char *fmt, ...)
{
int ret_vsnprintf;
unsigned int chars_printed;
@@ -392,7 +392,7 @@ static unsigned int dcn10_get_mpcc_states(struct dc *dc, char *pBuf, unsigned in
remaining_buffer -= chars_printed;
pBuf += chars_printed;
- for (i = 0; i < pool->pipe_count; i++) {
+ for (i = 0; i < pool->mpcc_count; i++) {
struct mpcc_state s = {0};
pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
@@ -429,7 +429,9 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int
struct dcn_otg_state s = {0};
int pix_clk = 0;
- optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
+ if (tg->funcs->read_otg_state)
+ tg->funcs->read_otg_state(tg, &s);
+
pix_clk = dc->current_state->res_ctx.pipe_ctx[i].stream_res.pix_clk_params.requested_pix_clk_100hz / 10;
//only print if OTG master is enabled
@@ -495,7 +497,8 @@ static void dcn10_clear_otpc_underflow(struct dc *dc)
struct timing_generator *tg = pool->timing_generators[i];
struct dcn_otg_state s = {0};
- optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
+ if (tg->funcs->read_otg_state)
+ tg->funcs->read_otg_state(tg, &s);
if (s.otg_enabled & 1)
tg->funcs->clear_optc_underflow(tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index abaed2121feb..25ba0d310d46 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -1,13 +1,7 @@
# SPDX-License-Identifier: MIT
-#
-# Makefile for DCN.
+# Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
-DCN20 = dcn20_resource.o dcn20_init.o dcn20_hwseq.o dcn20_dpp.o dcn20_dpp_cm.o dcn20_hubp.o \
- dcn20_mpc.o dcn20_opp.o dcn20_hubbub.o dcn20_optc.o dcn20_mmhubbub.o \
- dcn20_stream_encoder.o dcn20_link_encoder.o dcn20_dccg.o \
- dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
-
-DCN20 += dcn20_dsc.o
+DCN20 = dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o
AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c
index f8667be57046..80779e85e2c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.c
@@ -299,6 +299,17 @@ void dwb2_set_scaler(struct dwbc *dwbc, struct dc_dwb_params *params)
}
}
+
+ if (dwbc20->dwbc_mask->WBSCL_COEF_RAM_SEL) {
+ /* Swap double buffered coefficient set */
+ uint32_t wbscl_mode = REG_READ(WBSCL_MODE);
+ bool coef_ram_current = get_reg_field_value_ex(
+ wbscl_mode, dwbc20->dwbc_mask->WBSCL_COEF_RAM_SEL_CURRENT,
+ dwbc20->dwbc_shift->WBSCL_COEF_RAM_SEL_CURRENT);
+
+ REG_UPDATE(WBSCL_MODE, WBSCL_COEF_RAM_SEL, !coef_ram_current);
+ }
+
}
static const struct dwbc_funcs dcn20_dwbc_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
index 994fb732a7cb..a0d437f0ce2b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb_scl.c
@@ -690,6 +690,9 @@ static void wbscl_set_scaler_filter(
int pair;
uint16_t odd_coef, even_coef;
+ if (!filter)
+ return;
+
for (phase = 0; phase < (NUM_PHASES / 2 + 1); phase++) {
for (pair = 0; pair < tap_pairs; pair++) {
even_coef = filter[phase * taps + 2 * pair];
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
index 96c263223315..5bc3bc60a2ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
@@ -38,6 +38,9 @@
#define FN(reg_name, field_name) \
vmid->shifts->field_name, vmid->masks->field_name
+#define DC_LOGGER \
+ CTX->logger
+
static void dcn20_wait_for_vmid_ready(struct dcn20_vmid *vmid)
{
/* According the hardware spec, we need to poll for the lowest
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
index 5c9ce2cebb0f..c9f4a5a9f522 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile
@@ -1,10 +1,8 @@
# SPDX-License-Identifier: MIT
-#
-# Makefile for DCN.
-DCN201 = dcn201_init.o dcn201_resource.o dcn201_hwseq.o \
- dcn201_hubbub.o\
- dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \
- dcn201_dccg.o dcn201_link_encoder.o
+# Copyright © 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
+
+DCN201 = dcn201_mpc.o dcn201_opp.o \
+ dcn201_link_encoder.o
AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h
index 8b95ef251332..be25e8dc0636 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h
@@ -30,6 +30,10 @@
#define DPCS_DCN201_MASK_SH_LIST(mask_sh)\
DPCS_MASK_SH_LIST(mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2, VCO_LD_VAL_OVRD, mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2, VCO_LD_VAL_OVRD_EN, mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3, REF_LD_VAL_OVRD, mask_sh),\
+ LE_SF(DPCSSYS_CR0_RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3, REF_LD_VAL_OVRD_EN, mask_sh),\
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL2, RDPCS_PHY_DPALT_DISABLE_ACK, mask_sh),\
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL2, RDPCS_PHY_DPALT_DISABLE, mask_sh),\
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL2, RDPCS_PHY_DPALT_DP4, mask_sh),\
@@ -44,7 +48,15 @@
LE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL11, RDPCS_PHY_DP_REF_CLK_EN, mask_sh)
#define DPCS_DCN201_REG_LIST(id) \
- DPCS_DCN2_CMN_REG_LIST(id)
+ DPCS_DCN2_CMN_REG_LIST(id), \
+ SRI_IX(RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE0_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE1_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE1_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE2_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE2_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE3_DIG_PCS_XF_RX_OVRD_IN_2, DPCSSYS_CR, id), \
+ SRI_IX(RAWLANE3_DIG_PCS_XF_RX_OVRD_IN_3, DPCSSYS_CR, id)
void dcn201_link_encoder_construct(
struct dcn20_link_encoder *enc20,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c
index 8e77db46a409..e83367a9b6b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c
@@ -50,9 +50,11 @@ static struct opp_funcs dcn201_opp_funcs = {
.opp_set_disp_pattern_generator = opp2_set_disp_pattern_generator,
.opp_program_dpg_dimensions = opp2_program_dpg_dimensions,
.dpg_is_blanked = opp2_dpg_is_blanked,
+ .dpg_is_pending = opp2_dpg_is_pending,
.opp_dpg_set_blank_color = opp2_dpg_set_blank_color,
.opp_destroy = opp1_destroy,
.opp_program_left_edge_extra_pixel = opp2_program_left_edge_extra_pixel,
+ .opp_get_left_edge_extra_pixel_count = opp2_get_left_edge_extra_pixel_count,
};
void dcn201_opp_construct(struct dcn201_opp *oppn201,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h
index aca389ec1779..edb7f9653cb6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h
@@ -42,8 +42,7 @@
OPP_MASK_SH_LIST_DCN20(mask_sh)
#define OPP_DCN201_REG_FIELD_LIST(type) \
- OPP_DCN20_REG_FIELD_LIST(type);
-
+ OPP_DCN20_REG_FIELD_LIST(type)
struct dcn201_opp_shift {
OPP_DCN201_REG_FIELD_LIST(uint8_t);
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index 0dc06e428999..c215f3cc6e44 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: MIT
-#
-# Makefile for DCN21.
+# Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
-DCN21 = dcn21_init.o dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o \
- dcn21_hwseq.o dcn21_link_encoder.o dcn21_dccg.o
+DCN21 = dcn21_link_encoder.o
AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index 4a3e9e47b6b6..b17277de0340 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -23,26 +23,10 @@
#
#
-DCN30 := \
- dcn30_init.o \
- dcn30_hubbub.o \
- dcn30_hubp.o \
- dcn30_dpp.o \
- dcn30_optc.o \
- dcn30_dccg.o \
- dcn30_hwseq.o \
- dcn30_mpc.o dcn30_vpg.o \
+DCN30 := dcn30_vpg.o \
dcn30_afmt.o \
- dcn30_dio_stream_encoder.o \
- dcn30_dwb.o \
- dcn30_dpp_cm.o \
- dcn30_dwb_cm.o \
dcn30_cm_common.o \
dcn30_mmhubbub.o \
- dcn30_resource.o \
- dcn30_dio_link_encoder.o
-
-
AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
index e0df9b0065f9..0690c346f2c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
@@ -26,9 +26,9 @@
#include "dm_services.h"
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn30_dpp.h"
+#include "dcn30/dcn30_dpp.h"
#include "basics/conversion.h"
-#include "dcn30_cm_common.h"
+#include "dcn30/dcn30_cm_common.h"
#include "custom_float.h"
#define REG(reg) reg
@@ -114,7 +114,6 @@ bool cm3_helper_translate_curve_to_hw_format(
struct pwl_result_data *rgb;
struct pwl_result_data *rgb_plus_1;
struct pwl_result_data *rgb_minus_1;
- struct fixed31_32 end_value;
int32_t region_start, region_end;
int32_t i;
@@ -141,23 +140,18 @@ bool cm3_helper_translate_curve_to_hw_format(
region_start = -MAX_LOW_POINT;
region_end = NUMBER_REGIONS - MAX_LOW_POINT;
} else {
- /* 11 segments
- * segment is from 2^-10 to 2^0
+ /* 13 segments
+ * segment is from 2^-12 to 2^0
* There are less than 256 points, for optimization
*/
- seg_distr[0] = 3;
- seg_distr[1] = 4;
- seg_distr[2] = 4;
- seg_distr[3] = 4;
- seg_distr[4] = 4;
- seg_distr[5] = 4;
- seg_distr[6] = 4;
- seg_distr[7] = 4;
- seg_distr[8] = 4;
- seg_distr[9] = 4;
- seg_distr[10] = 1;
-
- region_start = -10;
+ const uint8_t SEG_COUNT = 12;
+
+ for (i = 0; i < SEG_COUNT; i++)
+ seg_distr[i] = 4;
+
+ seg_distr[SEG_COUNT] = 1;
+
+ region_start = -SEG_COUNT;
region_end = 1;
}
@@ -176,8 +170,10 @@ bool cm3_helper_translate_curve_to_hw_format(
NUMBER_SW_SEGMENTS;
for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
i += increment) {
- if (j == hw_points - 1)
+ if (j == hw_points)
break;
+ if (i >= TRANSFER_FUNC_POINTS)
+ return false;
rgb_resulted[j].red = output_tf->tf_pts.red[i];
rgb_resulted[j].green = output_tf->tf_pts.green[i];
rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
@@ -187,13 +183,13 @@ bool cm3_helper_translate_curve_to_hw_format(
/* last point */
start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
- rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
- rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
- rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+ rgb_resulted[hw_points].red = output_tf->tf_pts.red[start_index];
+ rgb_resulted[hw_points].green = output_tf->tf_pts.green[start_index];
+ rgb_resulted[hw_points].blue = output_tf->tf_pts.blue[start_index];
- rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
- rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
- rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+ rgb_resulted[hw_points+1].red = rgb_resulted[hw_points].red;
+ rgb_resulted[hw_points+1].green = rgb_resulted[hw_points].green;
+ rgb_resulted[hw_points+1].blue = rgb_resulted[hw_points].blue;
// All 3 color channels have same x
corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
@@ -220,34 +216,16 @@ bool cm3_helper_translate_curve_to_hw_format(
/* see comment above, m_arrPoints[1].y should be the Y value for the
* region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
*/
- corner_points[1].red.y = rgb_resulted[hw_points - 1].red;
- corner_points[1].green.y = rgb_resulted[hw_points - 1].green;
- corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue;
+ corner_points[1].red.y = rgb_resulted[hw_points].red;
+ corner_points[1].green.y = rgb_resulted[hw_points].green;
+ corner_points[1].blue.y = rgb_resulted[hw_points].blue;
corner_points[1].red.slope = dc_fixpt_zero;
corner_points[1].green.slope = dc_fixpt_zero;
corner_points[1].blue.slope = dc_fixpt_zero;
- if (output_tf->tf == TRANSFER_FUNCTION_PQ || output_tf->tf == TRANSFER_FUNCTION_HLG) {
- /* for PQ/HLG, we want to have a straight line from last HW X point,
- * and the slope to be such that we hit 1.0 at 10000/1000 nits.
- */
-
- if (output_tf->tf == TRANSFER_FUNCTION_PQ)
- end_value = dc_fixpt_from_int(125);
- else
- end_value = dc_fixpt_from_fraction(125, 10);
-
- corner_points[1].red.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y),
- dc_fixpt_sub(end_value, corner_points[1].red.x));
- corner_points[1].green.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y),
- dc_fixpt_sub(end_value, corner_points[1].green.x));
- corner_points[1].blue.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y),
- dc_fixpt_sub(end_value, corner_points[1].blue.x));
- }
- lut_params->hw_points_num = hw_points;
+ // DCN3+ have 257 pts in lieu of no separate slope registers
+ // Prior HW had 256 base+slope pairs
+ lut_params->hw_points_num = hw_points + 1;
k = 0;
for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
@@ -267,187 +245,37 @@ bool cm3_helper_translate_curve_to_hw_format(
rgb_plus_1 = rgb_resulted + 1;
rgb_minus_1 = rgb;
- i = 1;
- while (i != hw_points + 1) {
- if (i >= hw_points - 1) {
- if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
- rgb_plus_1->red = dc_fixpt_add(rgb->red, rgb_minus_1->delta_red);
- if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
- rgb_plus_1->green = dc_fixpt_add(rgb->green, rgb_minus_1->delta_green);
- if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
- rgb_plus_1->blue = dc_fixpt_add(rgb->blue, rgb_minus_1->delta_blue);
- }
-
- rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
+ if (fixpoint == true) {
+ i = 1;
+ while (i != hw_points + 2) {
+ if (i >= hw_points) {
+ if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
+ rgb_plus_1->red = dc_fixpt_add(rgb->red,
+ rgb_minus_1->delta_red);
+ if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
+ rgb_plus_1->green = dc_fixpt_add(rgb->green,
+ rgb_minus_1->delta_green);
+ if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
+ rgb_plus_1->blue = dc_fixpt_add(rgb->blue,
+ rgb_minus_1->delta_blue);
+ }
- if (fixpoint == true) {
rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red);
rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green);
rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue);
rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red);
rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green);
rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue);
- }
-
- ++rgb_plus_1;
- rgb_minus_1 = rgb;
- ++rgb;
- ++i;
- }
- cm3_helper_convert_to_custom_float(rgb_resulted,
- lut_params->corner_points,
- hw_points, fixpoint);
-
- return true;
-}
-
-#define NUM_DEGAMMA_REGIONS 12
-
-
-bool cm3_helper_translate_curve_to_degamma_hw_format(
- const struct dc_transfer_func *output_tf,
- struct pwl_params *lut_params)
-{
- struct curve_points3 *corner_points;
- struct pwl_result_data *rgb_resulted;
- struct pwl_result_data *rgb;
- struct pwl_result_data *rgb_plus_1;
-
- int32_t region_start, region_end;
- int32_t i;
- uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
- if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
- return false;
-
- corner_points = lut_params->corner_points;
- rgb_resulted = lut_params->rgb_resulted;
- hw_points = 0;
-
- memset(lut_params, 0, sizeof(struct pwl_params));
- memset(seg_distr, 0, sizeof(seg_distr));
-
- region_start = -NUM_DEGAMMA_REGIONS;
- region_end = 0;
-
-
- for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
- seg_distr[i] = -1;
- /* 12 segments
- * segments are from 2^-12 to 0
- */
- for (i = 0; i < NUM_DEGAMMA_REGIONS ; i++)
- seg_distr[i] = 4;
-
- for (k = 0; k < MAX_REGIONS_NUMBER; k++) {
- if (seg_distr[k] != -1)
- hw_points += (1 << seg_distr[k]);
- }
-
- j = 0;
- for (k = 0; k < (region_end - region_start); k++) {
- increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
- start_index = (region_start + k + MAX_LOW_POINT) *
- NUMBER_SW_SEGMENTS;
- for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS;
- i += increment) {
- if (j == hw_points - 1)
- break;
- rgb_resulted[j].red = output_tf->tf_pts.red[i];
- rgb_resulted[j].green = output_tf->tf_pts.green[i];
- rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
- j++;
+ ++rgb_plus_1;
+ rgb_minus_1 = rgb;
+ ++rgb;
+ ++i;
}
}
-
- /* last point */
- start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
- rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
- rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
- rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
-
- corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
- dc_fixpt_from_int(region_start));
- corner_points[0].green.x = corner_points[0].red.x;
- corner_points[0].blue.x = corner_points[0].red.x;
- corner_points[1].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
- dc_fixpt_from_int(region_end));
- corner_points[1].green.x = corner_points[1].red.x;
- corner_points[1].blue.x = corner_points[1].red.x;
-
- corner_points[0].red.y = rgb_resulted[0].red;
- corner_points[0].green.y = rgb_resulted[0].green;
- corner_points[0].blue.y = rgb_resulted[0].blue;
-
- /* see comment above, m_arrPoints[1].y should be the Y value for the
- * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
- */
- corner_points[1].red.y = rgb_resulted[hw_points - 1].red;
- corner_points[1].green.y = rgb_resulted[hw_points - 1].green;
- corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue;
- corner_points[1].red.slope = dc_fixpt_zero;
- corner_points[1].green.slope = dc_fixpt_zero;
- corner_points[1].blue.slope = dc_fixpt_zero;
-
- if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
- /* for PQ, we want to have a straight line from last HW X point,
- * and the slope to be such that we hit 1.0 at 10000 nits.
- */
- const struct fixed31_32 end_value =
- dc_fixpt_from_int(125);
-
- corner_points[1].red.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y),
- dc_fixpt_sub(end_value, corner_points[1].red.x));
- corner_points[1].green.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y),
- dc_fixpt_sub(end_value, corner_points[1].green.x));
- corner_points[1].blue.slope = dc_fixpt_div(
- dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y),
- dc_fixpt_sub(end_value, corner_points[1].blue.x));
- }
-
- lut_params->hw_points_num = hw_points;
-
- k = 0;
- for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
- if (seg_distr[k] != -1) {
- lut_params->arr_curve_points[k].segments_num =
- seg_distr[k];
- lut_params->arr_curve_points[i].offset =
- lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
- }
- k++;
- }
-
- if (seg_distr[k] != -1)
- lut_params->arr_curve_points[k].segments_num = seg_distr[k];
-
- rgb = rgb_resulted;
- rgb_plus_1 = rgb_resulted + 1;
-
- i = 1;
- while (i != hw_points + 1) {
- if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
- rgb_plus_1->red = rgb->red;
- if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
- rgb_plus_1->green = rgb->green;
- if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
- rgb_plus_1->blue = rgb->blue;
-
- rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
- rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
- rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
-
- ++rgb_plus_1;
- ++rgb;
- ++i;
- }
cm3_helper_convert_to_custom_float(rgb_resulted,
lut_params->corner_points,
- hw_points, false);
+ hw_points+1, fixpoint);
return true;
}
@@ -603,24 +431,6 @@ bool cm3_helper_convert_to_custom_float(
return false;
}
- if (!convert_to_custom_float_format(rgb->delta_red, &fmt,
- &rgb->delta_red_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->delta_green, &fmt,
- &rgb->delta_green_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
- if (!convert_to_custom_float_format(rgb->delta_blue, &fmt,
- &rgb->delta_blue_reg)) {
- BREAK_TO_DEBUGGER();
- return false;
- }
-
++rgb;
++i;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h
index ed9a5549c389..466ba20b9c61 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h
@@ -26,6 +26,7 @@
#ifndef __DAL_DCN30_VPG_H__
#define __DAL_DCN30_VPG_H__
+#include "vpg.h"
#define DCN30_VPG_FROM_VPG(vpg)\
container_of(vpg, struct dcn30_vpg, base)
@@ -132,28 +133,6 @@ struct dcn30_vpg_mask {
VPG_DCN3_REG_FIELD_LIST(uint32_t);
};
-struct vpg;
-
-struct vpg_funcs {
- void (*update_generic_info_packet)(
- struct vpg *vpg,
- uint32_t packet_index,
- const struct dc_info_packet *info_packet,
- bool immediate_update);
-
- void (*vpg_poweron)(
- struct vpg *vpg);
-
- void (*vpg_powerdown)(
- struct vpg *vpg);
-};
-
-struct vpg {
- const struct vpg_funcs *funcs;
- struct dc_context *ctx;
- int inst;
-};
-
struct dcn30_vpg {
struct vpg base;
const struct dcn30_vpg_registers *regs;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 9002cb10a6ae..fb4814ab3f05 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -1,18 +1,9 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
+# SPDX-License-Identifier: MIT
+# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Makefile for dcn30.
-DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \
- dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o \
- dcn301_optc.o
+DCN301 = dcn301_panel_cntl.o
AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
index ad0df1a72a90..9e96a3ace207 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c
@@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct(
dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs;
dcn301_panel_cntl->base.ctx = init_data->ctx;
dcn301_panel_cntl->base.inst = init_data->inst;
+ dcn301_panel_cntl->base.pwrseq_inst = 0;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
deleted file mode 100644
index ebd01cb467b7..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-# Authors: AMD
-#
-# Makefile for dcn302.
-
-DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o
-
-AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
deleted file mode 100644
index 8702e0b7fda3..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: MIT
-#
-# Copyright (C) 2021 Advanced Micro Devices, Inc. All the rights reserved
-#
-# Authors: AMD
-#
-# Makefile for dcn303.
-
-DCN3_03 = dcn303_init.o dcn303_hwseq.o dcn303_resource.o
-
-AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_03)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c
deleted file mode 100644
index b48b732aa647..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#include "dcn303_hwseq.h"
-
-#include "dce/dce_hwseq.h"
-
-#include "reg_helper.h"
-#include "dc.h"
-
-#define DC_LOGGER_INIT(logger)
-
-#define CTX \
- hws->ctx
-#define REG(reg)\
- hws->regs->reg
-
-#undef FN
-#define FN(reg_name, field_name) \
- hws->shifts->field_name, hws->masks->field_name
-
-
-void dcn303_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on)
-{
- /*DCN303 removes PG registers*/
-}
-
-void dcn303_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
-{
- /*DCN303 removes PG registers*/
-}
-
-void dcn303_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool power_on)
-{
- /*DCN303 removes PG registers*/
-}
-
-void dcn303_enable_power_gating_plane(struct dce_hwseq *hws, bool enable)
-{
- /*DCN303 removes PG registers*/
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h
deleted file mode 100644
index 8b69a3b76c11..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_hwseq.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#ifndef __DC_HWSS_DCN303_H__
-#define __DC_HWSS_DCN303_H__
-
-#include "hw_sequencer_private.h"
-
-void dcn303_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on);
-void dcn303_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on);
-void dcn303_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool power_on);
-void dcn303_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
-
-#endif /* __DC_HWSS_DCN303_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c
deleted file mode 100644
index f499f8ab5e47..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#include "dcn303_hwseq.h"
-#include "dcn30/dcn30_init.h"
-#include "dc.h"
-
-#include "dcn303_init.h"
-
-void dcn303_hw_sequencer_construct(struct dc *dc)
-{
- dcn30_hw_sequencer_construct(dc);
-
- dc->hwseq->funcs.dpp_pg_control = dcn303_dpp_pg_control;
- dc->hwseq->funcs.hubp_pg_control = dcn303_hubp_pg_control;
- dc->hwseq->funcs.dsc_pg_control = dcn303_dsc_pg_control;
- dc->hwseq->funcs.enable_power_gating_plane = dcn303_enable_power_gating_plane;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h
deleted file mode 100644
index 66b1e3604f07..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_init.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#ifndef __DC_DCN303_INIT_H__
-#define __DC_DCN303_INIT_H__
-
-struct dc;
-
-void dcn303_hw_sequencer_construct(struct dc *dc);
-
-#endif /* __DC_DCN303_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h
deleted file mode 100644
index 9c7d79540900..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright (C) 2021 Advanced Micro Devices, Inc.
- *
- * Authors: AMD
- */
-
-#ifndef _DCN303_RESOURCE_H_
-#define _DCN303_RESOURCE_H_
-
-#include "core_types.h"
-
-extern struct _vcs_dpi_ip_params_st dcn3_03_ip;
-extern struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc;
-
-struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc);
-
-void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
-
-#endif /* _DCN303_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index ec041e3cda30..d510e4652c18 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -1,18 +1,11 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved.
#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
#
# Makefile for dcn31.
-DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o dcn31_init.o dcn31_hubp.o \
- dcn31_dccg.o dcn31_optc.o dcn31_dio_link_encoder.o dcn31_panel_cntl.o \
- dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \
+DCN31 = dcn31_panel_cntl.o \
+ dcn31_apg.o \
dcn31_afmt.o dcn31_vpg.o
AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 217acd4e292a..f9961a6446f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -50,9 +50,9 @@ static bool dcn31_query_backlight_info(struct panel_cntl *panel_cntl, union dmub
cmd->panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
cmd->panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO;
cmd->panel_cntl.header.payload_bytes = sizeof(cmd->panel_cntl.data);
- cmd->panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+ cmd->panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
- return dm_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
+ return dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY);
}
static uint32_t dcn31_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl)
@@ -70,6 +70,7 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
struct dcn31_panel_cntl *dcn31_panel_cntl = TO_DCN31_PANEL_CNTL(panel_cntl);
struct dc_dmub_srv *dc_dmub_srv = panel_cntl->ctx->dmub_srv;
union dmub_rb_cmd cmd;
+ uint32_t freq_to_set = panel_cntl->ctx->dc->debug.pwm_freq;
if (!dc_dmub_srv)
return 0;
@@ -78,14 +79,14 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_CNTL_HW_INIT;
cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data);
- cmd.panel_cntl.data.inst = dcn31_panel_cntl->base.inst;
+ cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
cmd.panel_cntl.data.bl_pwm_cntl = panel_cntl->stored_backlight_registers.BL_PWM_CNTL;
cmd.panel_cntl.data.bl_pwm_period_cntl = panel_cntl->stored_backlight_registers.BL_PWM_PERIOD_CNTL;
cmd.panel_cntl.data.bl_pwm_ref_div1 =
panel_cntl->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
cmd.panel_cntl.data.bl_pwm_ref_div2 =
panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2;
- if (!dm_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
return 0;
panel_cntl->stored_backlight_registers.BL_PWM_CNTL = cmd.panel_cntl.data.bl_pwm_cntl;
@@ -96,6 +97,19 @@ static uint32_t dcn31_panel_cntl_hw_init(struct panel_cntl *panel_cntl)
panel_cntl->stored_backlight_registers.PANEL_PWRSEQ_REF_DIV2 =
cmd.panel_cntl.data.bl_pwm_ref_div2;
+ if (freq_to_set >= MIN_DEBUG_FREQ_HZ && freq_to_set <= MAX_DEBUG_FREQ_HZ) {
+ uint32_t xtal = panel_cntl->ctx->dc->res_pool->ref_clocks.dccg_ref_clock_inKhz;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.panel_cntl.header.type = DMUB_CMD__PANEL_CNTL;
+ cmd.panel_cntl.header.sub_type = DMUB_CMD__PANEL_DEBUG_PWM_FREQ;
+ cmd.panel_cntl.header.payload_bytes = sizeof(cmd.panel_cntl.data);
+ cmd.panel_cntl.data.pwrseq_inst = dcn31_panel_cntl->base.pwrseq_inst;
+ cmd.panel_cntl.data.bl_pwm_cntl = xtal;
+ cmd.panel_cntl.data.bl_pwm_period_cntl = freq_to_set;
+ if (!dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return 0;
+ }
return cmd.panel_cntl.data.current_backlight;
}
@@ -154,7 +168,33 @@ void dcn31_panel_cntl_construct(
struct dcn31_panel_cntl *dcn31_panel_cntl,
const struct panel_cntl_init_data *init_data)
{
+
dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
dcn31_panel_cntl->base.ctx = init_data->ctx;
dcn31_panel_cntl->base.inst = init_data->inst;
+
+ if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1) {
+ //If supported, power sequencer mapping shall follow the DIG instance
+ uint8_t pwrseq_inst = 0xF;
+
+ switch (init_data->eng_id) {
+ case ENGINE_ID_DIGA:
+ pwrseq_inst = 0;
+ break;
+ case ENGINE_ID_DIGB:
+ pwrseq_inst = 1;
+ break;
+ default:
+ DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id);
+ ASSERT(false);
+ break;
+ }
+
+ dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst;
+ } else {
+ /* If not supported, pwrseq will be assigned in order,
+ * so first pwrseq will be assigned to first panel instance (legacy behavior)
+ */
+ dcn31_panel_cntl->base.pwrseq_inst = dcn31_panel_cntl->base.inst;
+ }
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h
index d33ccd6ef8c3..8cf0259e211e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.h
@@ -29,6 +29,9 @@
#include "panel_cntl.h"
#include "dce/dce_panel_cntl.h"
+#define MIN_DEBUG_FREQ_HZ 200
+#define MAX_DEBUG_FREQ_HZ 6250
+
struct dcn31_panel_cntl {
struct panel_cntl base;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c
index f1deb1c3c363..cfb923d85630 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c
@@ -63,7 +63,12 @@ void vpg31_poweron(struct vpg *vpg)
{
struct dcn31_vpg *vpg31 = DCN31_VPG_FROM_VPG(vpg);
- if (vpg->ctx->dc->debug.enable_mem_low_power.bits.vpg == false)
+ uint32_t vpg_gsp_mem_pwr_state;
+
+ REG_GET(VPG_MEM_PWR, VPG_GSP_MEM_PWR_STATE, &vpg_gsp_mem_pwr_state);
+
+ if (vpg->ctx->dc->debug.enable_mem_low_power.bits.vpg == false &&
+ vpg_gsp_mem_pwr_state == 0)
return;
REG_UPDATE_2(VPG_MEM_PWR, VPG_GSP_MEM_LIGHT_SLEEP_DIS, 1, VPG_GSP_LIGHT_SLEEP_FORCE, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h
index 0e76eabce441..609e58dbd056 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h
@@ -26,6 +26,7 @@
#ifndef __DAL_DCN31_VPG_H__
#define __DAL_DCN31_VPG_H__
+#include "vpg.h"
#define DCN31_VPG_FROM_VPG(vpg)\
container_of(vpg, struct dcn31_vpg, base)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
deleted file mode 100644
index 702c28c2560e..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# (c) Copyright 2022 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
-#
-# Makefile for dcn314.
-
-DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \
- dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o
-
-AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN314)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
deleted file mode 100644
index e943b643ab6b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# (c) Copyright 2022 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
-#
-# Makefile for dcn32.
-
-DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_hwseq.o dcn32_init.o \
- dcn32_dccg.o dcn32_optc.o dcn32_mmhubbub.o dcn32_hubp.o dcn32_dpp.o \
- dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \
- dcn32_resource_helpers.o dcn32_mpc.o
-
-AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN32)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
deleted file mode 100644
index 0a199c83bb5b..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# (c) Copyright 2020 Advanced Micro Devices, Inc. All the rights reserved
-#
-# All rights reserved. This notice is intended as a precaution against
-# inadvertent publication and does not imply publication or any waiver
-# of confidentiality. The year included in the foregoing notice is the
-# year of creation of the work.
-#
-# Authors: AMD
-#
-# Makefile for dcn321.
-
-DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o
-
-AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321))
-
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN321)
diff --git a/drivers/gpu/drm/amd/display/dc/dio/Makefile b/drivers/gpu/drm/amd/display/dc/dio/Makefile
new file mode 100644
index 000000000000..0dfd480976f7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/Makefile
@@ -0,0 +1,117 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+#
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN10
+###############################################################################
+DIO_DCN10 = dcn10_link_encoder.o dcn10_stream_encoder.o
+
+AMD_DAL_DIO_DCN10 = $(addprefix $(AMDDALPATH)/dc/dio/dcn10/,$(DIO_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN10)
+
+###############################################################################
+# DCN20
+###############################################################################
+DIO_DCN20 = dcn20_link_encoder.o dcn20_stream_encoder.o
+
+AMD_DAL_DIO_DCN20 = $(addprefix $(AMDDALPATH)/dc/dio/dcn20/,$(DIO_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN20)
+
+###############################################################################
+# DCN30
+###############################################################################
+DIO_DCN30 = dcn30_dio_link_encoder.o dcn30_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN30 = $(addprefix $(AMDDALPATH)/dc/dio/dcn30/,$(DIO_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN30)
+
+###############################################################################
+# DCN301
+###############################################################################
+DIO_DCN301 = dcn301_dio_link_encoder.o
+
+AMD_DAL_DIO_DCN301 = $(addprefix $(AMDDALPATH)/dc/dio/dcn301/,$(DIO_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN301)
+
+###############################################################################
+# DCN31
+###############################################################################
+DIO_DCN31 = dcn31_dio_link_encoder.o
+
+AMD_DAL_DIO_DCN31 = $(addprefix $(AMDDALPATH)/dc/dio/dcn31/,$(DIO_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN31)
+
+###############################################################################
+# DCN314
+###############################################################################
+DIO_DCN314 = dcn314_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN314 = $(addprefix $(AMDDALPATH)/dc/dio/dcn314/,$(DIO_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN314)
+
+###############################################################################
+# DCN32
+###############################################################################
+DIO_DCN32 = dcn32_dio_link_encoder.o dcn32_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN32 = $(addprefix $(AMDDALPATH)/dc/dio/dcn32/,$(DIO_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN32)
+
+###############################################################################
+# DCN35
+###############################################################################
+DIO_DCN35 = dcn35_dio_link_encoder.o dcn35_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN35 = $(addprefix $(AMDDALPATH)/dc/dio/dcn35/,$(DIO_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN35)
+
+###############################################################################
+# DCN321
+###############################################################################
+DIO_DCN321 = dcn321_dio_link_encoder.o
+
+AMD_DAL_DIO_DCN321 = $(addprefix $(AMDDALPATH)/dc/dio/dcn321/,$(DIO_DCN321))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN321)
+
+
+###############################################################################
+# DCN401
+###############################################################################
+DIO_DCN401 = dcn401_dio_link_encoder.o dcn401_dio_stream_encoder.o
+
+AMD_DAL_DIO_DCN401 = $(addprefix $(AMDDALPATH)/dc/dio/dcn401/,$(DIO_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN401)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c
index 377f1ba1a81b..1c1228116487 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c
@@ -812,7 +812,7 @@ bool dcn10_link_encoder_validate_output_with_stream(
enc10, &stream->timing);
break;
case SIGNAL_TYPE_EDP:
- is_valid = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? true : false;
+ is_valid = stream->timing.pixel_encoding == PIXEL_ENCODING_RGB;
break;
case SIGNAL_TYPE_VIRTUAL:
is_valid = true;
@@ -1104,6 +1104,7 @@ void dcn10_link_encoder_dp_set_lane_settings(
union dpcd_training_lane_set training_lane_set = { { 0 } };
int32_t lane = 0;
struct bp_transmitter_control cntl = { 0 };
+ enum bp_result result;
if (!link_settings) {
BREAK_TO_DEBUGGER();
@@ -1138,7 +1139,12 @@ void dcn10_link_encoder_dp_set_lane_settings(
cntl.lane_settings = training_lane_set.raw;
/* call VBIOS table to set voltage swing and pre-emphasis */
- link_transmitter_control(enc10, &cntl);
+ result = link_transmitter_control(enc10, &cntl);
+
+ if (result != BP_RESULT_OK) {
+ DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", __func__);
+ BREAK_TO_DEBUGGER();
+ }
}
}
@@ -1439,7 +1445,6 @@ enum signal_type dcn10_get_dig_mode(
default:
return SIGNAL_TYPE_NONE;
}
- return SIGNAL_TYPE_NONE;
}
void dcn10_link_encoder_get_max_link_cap(struct link_encoder *enc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h
index 773380ef4997..b7a89c39f445 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.h
@@ -167,7 +167,8 @@ struct dcn10_link_enc_registers {
uint32_t DIO_LINKD_CNTL;
uint32_t DIO_LINKE_CNTL;
uint32_t DIO_LINKF_CNTL;
- uint32_t DIG_FIFO_CTRL0;
+ uint32_t DIO_CLK_CNTL;
+ uint32_t DIG_BE_CLK_CNTL;
};
#define LE_SF(reg_name, field_name, post_fix)\
@@ -473,15 +474,41 @@ struct dcn10_link_enc_registers {
type HPO_DP_ENC_SEL;\
type HPO_HDMI_ENC_SEL
-#define DCN32_LINK_ENCODER_REG_FIELD_LIST(type) \
- type DIG_FIFO_OUTPUT_PIXEL_MODE
+#define DCN35_LINK_ENCODER_REG_FIELD_LIST(type) \
+ type DIG_BE_ENABLE;\
+ type DIG_RB_SWITCH_EN;\
+ type DIG_BE_MODE;\
+ type DIG_BE_CLK_EN;\
+ type DIG_BE_SOFT_RESET;\
+ type HDCP_SOFT_RESET;\
+ type DIG_BE_SYMCLK_G_CLOCK_ON;\
+ type DIG_BE_SYMCLK_G_HDCP_CLOCK_ON;\
+ type DIG_BE_SYMCLK_G_TMDS_CLOCK_ON;\
+ type DISPCLK_R_GATE_DIS;\
+ type DISPCLK_G_GATE_DIS;\
+ type REFCLK_R_GATE_DIS;\
+ type REFCLK_G_GATE_DIS;\
+ type SOCCLK_G_GATE_DIS;\
+ type SYMCLK_FE_R_GATE_DIS;\
+ type SYMCLK_FE_G_GATE_DIS;\
+ type SYMCLK_R_GATE_DIS;\
+ type SYMCLK_G_GATE_DIS;\
+ type DIO_FGCG_REP_DIS;\
+ type DISPCLK_G_HDCP_GATE_DIS;\
+ type SYMCLKA_G_HDCP_GATE_DIS;\
+ type SYMCLKB_G_HDCP_GATE_DIS;\
+ type SYMCLKC_G_HDCP_GATE_DIS;\
+ type SYMCLKD_G_HDCP_GATE_DIS;\
+ type SYMCLKE_G_HDCP_GATE_DIS;\
+ type SYMCLKF_G_HDCP_GATE_DIS;\
+ type SYMCLKG_G_HDCP_GATE_DIS
struct dcn10_link_enc_shift {
DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
DCN20_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
DCN30_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
DCN31_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
- DCN32_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
+ DCN35_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
};
struct dcn10_link_enc_mask {
@@ -489,7 +516,7 @@ struct dcn10_link_enc_mask {
DCN20_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
DCN30_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
DCN31_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
- DCN32_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
+ DCN35_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
};
struct dcn10_link_encoder {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
index f496e952ceec..d928b4dcf6b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c
@@ -28,7 +28,7 @@
#include "dcn10_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#include "dcn30/dcn30_afmt.h"
@@ -255,7 +255,6 @@ void enc1_stream_encoder_dp_set_stream_attribute(
uint32_t misc1 = 0;
uint32_t h_blank;
uint32_t h_back_porch;
- uint8_t synchronous_clock = 0; /* asynchronous mode */
uint8_t colorimetry_bpc;
uint8_t dp_pixel_encoding = 0;
uint8_t dp_component_depth = 0;
@@ -362,7 +361,6 @@ void enc1_stream_encoder_dp_set_stream_attribute(
break;
}
- misc0 = misc0 | synchronous_clock;
misc0 = colorimetry_bpc << 5;
switch (output_color_space) {
@@ -393,7 +391,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
break;
case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
case COLOR_SPACE_2020_RGB_FULLRANGE:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
case COLOR_SPACE_XR_RGB:
case COLOR_SPACE_MSREF_SCRGB:
case COLOR_SPACE_ADOBERGB:
@@ -406,6 +404,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
case COLOR_SPACE_CUSTOMPOINTS:
case COLOR_SPACE_UNKNOWN:
case COLOR_SPACE_YCBCR709_BLACK:
+ default:
/* do nothing */
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h
index 9d5e2a7848dd..54a6a4ebd636 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.h
@@ -127,7 +127,6 @@ struct dcn10_stream_enc_registers {
uint32_t AFMT_60958_1;
uint32_t AFMT_60958_2;
uint32_t DIG_FE_CNTL;
- uint32_t DIG_FE_CNTL2;
uint32_t DIG_FIFO_STATUS;
uint32_t DP_MSE_RATE_CNTL;
uint32_t DP_MSE_RATE_UPDATE;
@@ -188,6 +187,9 @@ struct dcn10_stream_enc_registers {
uint32_t HDMI_GENERIC_PACKET_CONTROL10;
uint32_t DIG_CLOCK_PATTERN;
uint32_t DIG_FIFO_CTRL0;
+ uint32_t DIG_FE_CLK_CNTL;
+ uint32_t DIG_FE_EN_CNTL;
+ uint32_t STREAM_MAPPER_CONTROL;
};
@@ -567,22 +569,43 @@ struct dcn10_stream_enc_registers {
type DP_SEC_GSP11_ENABLE;\
type DP_SEC_GSP11_LINE_NUM
-#define SE_REG_FIELD_LIST_DCN3_2(type) \
+#define SE_REG_FIELD_LIST_DCN3_1_COMMON(type) \
type DIG_FIFO_OUTPUT_PIXEL_MODE;\
type DP_PIXEL_PER_CYCLE_PROCESSING_MODE;\
type DIG_SYMCLK_FE_ON;\
type DIG_FIFO_READ_START_LEVEL;\
type DIG_FIFO_ENABLE;\
type DIG_FIFO_RESET;\
- type DIG_FIFO_RESET_DONE
-
+ type DIG_FIFO_RESET_DONE;\
+ type PIXEL_ENCODING_TYPE;\
+ type UNCOMPRESSED_PIXEL_FORMAT;\
+ type UNCOMPRESSED_COMPONENT_DEPTH
+
+#define SE_REG_FIELD_LIST_DCN3_5_COMMON(type) \
+ type DIG_FE_CLK_EN;\
+ type DIG_FE_MODE;\
+ type DIG_FE_SOFT_RESET;\
+ type DIG_FE_ENABLE;\
+ type DIG_FE_SYMCLK_FE_G_CLOCK_ON;\
+ type DIG_FE_DISPCLK_G_CLOCK_ON;\
+ type DIG_FE_SYMCLK_FE_G_AFMT_CLOCK_ON;\
+ type DIG_FE_SYMCLK_FE_G_TMDS_CLOCK_ON;\
+ type DIG_FE_SOCCLK_G_AFMT_CLOCK_ON;\
+ type DIG_STREAM_LINK_TARGET
+
+#define SE_REG_FIELD_LIST_DCN4_01_COMMON(type) \
+ type COMPRESSED_PIXEL_FORMAT;\
+ type DP_VID_N_INTERVAL;\
+ type DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE;\
+ type DP_STEER_FIFO_ENABLE
struct dcn10_stream_encoder_shift {
SE_REG_FIELD_LIST_DCN1_0(uint8_t);
uint8_t HDMI_ACP_SEND;
SE_REG_FIELD_LIST_DCN2_0(uint8_t);
SE_REG_FIELD_LIST_DCN3_0(uint8_t);
- SE_REG_FIELD_LIST_DCN3_2(uint8_t);
-
+ SE_REG_FIELD_LIST_DCN3_1_COMMON(uint8_t);
+ SE_REG_FIELD_LIST_DCN3_5_COMMON(uint8_t);
+ SE_REG_FIELD_LIST_DCN4_01_COMMON(uint32_t);
};
struct dcn10_stream_encoder_mask {
@@ -590,8 +613,9 @@ struct dcn10_stream_encoder_mask {
uint32_t HDMI_ACP_SEND;
SE_REG_FIELD_LIST_DCN2_0(uint32_t);
SE_REG_FIELD_LIST_DCN3_0(uint32_t);
- SE_REG_FIELD_LIST_DCN3_2(uint32_t);
-
+ SE_REG_FIELD_LIST_DCN3_1_COMMON(uint32_t);
+ SE_REG_FIELD_LIST_DCN3_5_COMMON(uint32_t);
+ SE_REG_FIELD_LIST_DCN4_01_COMMON(uint32_t);
};
struct dcn10_stream_encoder {
@@ -649,9 +673,6 @@ void enc1_stream_encoder_send_immediate_sdp_message(
void enc1_stream_encoder_stop_dp_info_packets(
struct stream_encoder *enc);
-void enc1_stream_encoder_reset_fifo(
- struct stream_encoder *enc);
-
void enc1_stream_encoder_dp_blank(
struct dc_link *link,
struct stream_encoder *enc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c
index 51a57dae1811..51a57dae1811 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h
index b2b266953d18..762c579fcb44 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.h
@@ -147,7 +147,8 @@
LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_DATA_SWAP, mask_sh),\
LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_DATA_ORDER_INVERT, mask_sh),\
LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_EN, mask_sh),\
- LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_RD_START_DELAY, mask_sh)
+ LE_SF(DPCSTX0_DPCSTX_TX_CNTL, DPCS_TX_FIFO_RD_START_DELAY, mask_sh),\
+ LE_SF(DPCSTX0_DPCSTX_DEBUG_CONFIG, DPCS_DBG_CBUS_DIS, mask_sh)
#define DPCS_DCN2_MASK_SH_LIST(mask_sh)\
DPCS_MASK_SH_LIST(mask_sh),\
@@ -231,6 +232,8 @@
SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
SRI(DPCSTX_TX_CLOCK_CNTL, DPCSTX, id), \
SRI(DPCSTX_TX_CNTL, DPCSTX, id), \
+ SRI(DPCSTX_DEBUG_CONFIG, DPCSTX, id), \
+ SRI(RDPCSTX_DEBUG_CONFIG, RDPCSTX, id), \
SR(RDPCSTX0_RDPCSTX_SCRATCH)
@@ -279,7 +282,6 @@ struct mpll_cfg {
uint32_t tx_peaking_lvl;
uint32_t ctr_reqs_pll;
-
};
struct dpcssys_phy_seq_cfg {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
index 0b47aeb60e79..bec0b4aaeb2b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c
@@ -29,7 +29,7 @@
#include "dcn20_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h
index baa1e539f341..baa1e539f341 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c
index 1fb8fd7afc95..b8e31b5ea114 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.c
@@ -30,8 +30,6 @@
#include "dcn30_dio_link_encoder.h"
#include "stream_encoder.h"
#include "dc_bios_types.h"
-/* #include "dcn3ag/dcn3ag_phy_fw.h" */
-
#include "gpio_service_interface.h"
#define CTX \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h
index f2d90f2b8bf1..5b6177c2ae98 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_link_encoder.h
@@ -55,7 +55,8 @@
SRI(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id)
#define LINK_ENCODER_MASK_SH_LIST_DCN30(mask_sh) \
- LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh)
+ LINK_ENCODER_MASK_SH_LIST_DCN20(mask_sh),\
+ LE_SF(DIG0_TMDS_DCBALANCER_CONTROL, TMDS_SYNC_DCBAL_EN, mask_sh)
#define DPCS_DCN3_MASK_SH_LIST(mask_sh)\
DPCS_DCN2_MASK_SH_LIST(mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c
index 005dbe099a7a..e93be7b6d9b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.c
@@ -29,9 +29,6 @@
#include "reg_helper.h"
#include "hw_shared.h"
#include "dc.h"
-#include "core_types.h"
-#include <linux/delay.h>
-
#define DC_LOGGER \
enc1->base.ctx->logger
@@ -50,7 +47,7 @@
enc1->base.ctx
-static void enc3_update_hdmi_info_packet(
+void enc3_update_hdmi_info_packet(
struct dcn10_stream_encoder *enc1,
uint32_t packet_index,
const struct dc_info_packet *info_packet)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h
index 06310973ded2..830ce7e47035 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn30/dcn30_dio_stream_encoder.h
@@ -322,6 +322,10 @@ void enc3_dp_set_dsc_pps_info_packet(
struct stream_encoder *enc,
bool enable,
uint8_t *dsc_packed_pps,
- bool immediate_update);
+ bool immediate_update);
+void enc3_update_hdmi_info_packet(
+ struct dcn10_stream_encoder *enc1,
+ uint32_t packet_index,
+ const struct dc_info_packet *info_packet);
#endif /* __DC_DIO_STREAM_ENCODER_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c
index 1b39a6e8a1ac..1b39a6e8a1ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h
index 49f8d91d4951..49f8d91d4951 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
index 4596f3bac1b4..84cc2ddc52fe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
@@ -37,7 +37,7 @@
#include "link_enc_cfg.h"
#include "dc_dmub_srv.h"
#include "dal_asic_id.h"
-#include "link.h"
+#include "link_service.h"
#define CTX \
enc10->base.ctx
@@ -125,7 +125,7 @@ static bool query_dp_alt_from_dmub(struct link_encoder *enc,
cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
- if (!dm_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
return false;
return true;
@@ -205,7 +205,7 @@ void dcn31_link_encoder_set_dio_phy_mux(
}
}
-static void enc31_hw_init(struct link_encoder *enc)
+void enc31_hw_init(struct link_encoder *enc)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
@@ -436,7 +436,7 @@ static bool link_dpia_control(struct dc_context *dc_ctx,
cmd.dig1_dpia_control.dpia_control = *dpia_control;
- dm_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -653,8 +653,9 @@ void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_lin
if (!query_dp_alt_from_dmub(enc, &cmd))
return;
- if (cmd.query_dp_alt.data.is_usb &&
- cmd.query_dp_alt.data.is_dp4 == 0)
+ if (cmd.query_dp_alt.data.is_dp_alt_disable == 0 &&
+ cmd.query_dp_alt.data.is_usb &&
+ cmd.query_dp_alt.data.is_dp4 == 0)
link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
return;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h
index 221671563a0b..ee78ba80797c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.h
@@ -89,6 +89,7 @@
SRI(RDPCSTX_PHY_FUSE1, RDPCSTX, id), \
SRI(RDPCSTX_PHY_FUSE2, RDPCSTX, id), \
SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
+ SRI(RDPCSTX_DEBUG_CONFIG, RDPCSTX, id), \
SR(RDPCSTX0_RDPCSTX_SCRATCH), \
SRI(RDPCSTX_PHY_RX_LD_VAL, RDPCSTX, id),\
SRI(RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG, RDPCSTX, id)
@@ -222,6 +223,7 @@
SRI(RDPCSTX_PHY_FUSE1, RDPCSTX, id), \
SRI(RDPCSTX_PHY_FUSE2, RDPCSTX, id), \
SRI(RDPCSTX_PHY_FUSE3, RDPCSTX, id), \
+ SRI(RDPCSTX_DEBUG_CONFIG, RDPCSTX, id), \
SR(RDPCSTX0_RDPCSTX_SCRATCH), \
SRI(RDPCSTX_PHY_RX_LD_VAL, RDPCSTX, id),\
SRI(RDPCSTX_DMCU_DPALT_DIS_BLOCK_REG, RDPCSTX, id)
@@ -283,4 +285,6 @@ bool dcn31_link_encoder_is_in_alt_mode(
void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc,
struct dc_link_settings *link_settings);
+void enc31_hw_init(struct link_encoder *enc);
+
#endif /* __DC_LINK_ENCODER__DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c
index 467509a65fa7..3e85e9c3d2cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c
@@ -30,7 +30,7 @@
#include "dcn314_dio_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
@@ -49,7 +49,7 @@
#define CTX \
enc1->base.ctx
-static void enc314_reset_fifo(struct stream_encoder *enc, bool reset)
+void enc314_reset_fifo(struct stream_encoder *enc, bool reset)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
uint32_t reset_val = reset ? 1 : 0;
@@ -64,7 +64,7 @@ static void enc314_reset_fifo(struct stream_encoder *enc, bool reset)
udelay(10);
}
-static void enc314_enable_fifo(struct stream_encoder *enc)
+void enc314_enable_fifo(struct stream_encoder *enc)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
@@ -76,14 +76,23 @@ static void enc314_enable_fifo(struct stream_encoder *enc)
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
}
-static void enc314_disable_fifo(struct stream_encoder *enc)
+void enc314_disable_fifo(struct stream_encoder *enc)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
}
-static void enc314_dp_set_odm_combine(
+static bool enc314_is_fifo_enabled(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val;
+
+ REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val);
+ return (reset_val != 0);
+}
+
+void enc314_dp_set_odm_combine(
struct stream_encoder *enc,
bool odm_combine)
{
@@ -93,7 +102,7 @@ static void enc314_dp_set_odm_combine(
}
/* setup stream encoder in dvi mode */
-static void enc314_stream_encoder_dvi_set_stream_attribute(
+void enc314_stream_encoder_dvi_set_stream_attribute(
struct stream_encoder *enc,
struct dc_crtc_timing *crtc_timing,
bool is_dual_link)
@@ -133,7 +142,7 @@ static void enc314_stream_encoder_dvi_set_stream_attribute(
}
/* setup stream encoder in hdmi mode */
-static void enc314_stream_encoder_hdmi_set_stream_attribute(
+void enc314_stream_encoder_hdmi_set_stream_attribute(
struct stream_encoder *enc,
struct dc_crtc_timing *crtc_timing,
int actual_pix_clk_khz,
@@ -274,7 +283,7 @@ static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
return two_pix;
}
-static void enc314_stream_encoder_dp_blank(
+void enc314_stream_encoder_dp_blank(
struct dc_link *link,
struct stream_encoder *enc)
{
@@ -285,7 +294,7 @@ static void enc314_stream_encoder_dp_blank(
enc314_disable_fifo(enc);
}
-static void enc314_stream_encoder_dp_unblank(
+void enc314_stream_encoder_dp_unblank(
struct dc_link *link,
struct stream_encoder *enc,
const struct encoder_unblank_param *param)
@@ -380,7 +389,7 @@ static void enc314_stream_encoder_dp_unblank(
* sc_bytes_per_pixel: DP_DSC_BYTES_PER_PIXEL removed in DCN32
* dsc_slice_width: DP_DSC_SLICE_WIDTH removed in DCN32
*/
-static void enc314_dp_set_dsc_config(struct stream_encoder *enc,
+void enc314_dp_set_dsc_config(struct stream_encoder *enc,
enum optc_dsc_mode dsc_mode,
uint32_t dsc_bytes_per_pixel,
uint32_t dsc_slice_width)
@@ -393,7 +402,7 @@ static void enc314_dp_set_dsc_config(struct stream_encoder *enc,
/* this function read dsc related register fields to be logged later in dcn10_log_hw_state
* into a dcn_dsc_state struct.
*/
-static void enc314_read_state(struct stream_encoder *enc, struct enc_state *s)
+void enc314_read_state(struct stream_encoder *enc, struct enc_state *s)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
@@ -410,7 +419,7 @@ static void enc314_read_state(struct stream_encoder *enc, struct enc_state *s)
}
}
-static void enc314_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container)
+void enc314_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
@@ -468,6 +477,7 @@ static const struct stream_encoder_funcs dcn314_str_enc_funcs = {
.enable_fifo = enc314_enable_fifo,
.disable_fifo = enc314_disable_fifo,
+ .is_fifo_enabled = enc314_is_fifo_enabled,
.set_input_mode = enc314_set_dig_input_mode,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h
index ed0772387903..86548be591be 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h
@@ -312,4 +312,44 @@ void enc3_dp_set_dsc_pps_info_packet(
uint8_t *dsc_packed_pps,
bool immediate_update);
+void enc314_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link);
+
+void enc314_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio);
+
+void enc314_stream_encoder_dp_blank(
+ struct dc_link *link,
+ struct stream_encoder *enc);
+
+void enc314_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param);
+
+void enc314_reset_fifo(struct stream_encoder *enc, bool reset);
+
+void enc314_enable_fifo(struct stream_encoder *enc);
+
+void enc314_disable_fifo(struct stream_encoder *enc);
+
+void enc314_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container);
+
+void enc314_read_state(struct stream_encoder *enc, struct enc_state *s);
+
+void enc314_dp_set_odm_combine(
+ struct stream_encoder *enc,
+ bool odm_combine);
+
+void enc314_dp_set_dsc_config(
+ struct stream_encoder *enc,
+ enum optc_dsc_mode dsc_mode,
+ uint32_t dsc_bytes_per_pixel,
+ uint32_t dsc_slice_width);
+
#endif /* __DC_DIO_STREAM_ENCODER_DCN314_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c
index 501388014855..06907e8a4eda 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.c
@@ -34,6 +34,7 @@
#include "dc_bios_types.h"
#include "link_enc_cfg.h"
+#include "dc_dmub_srv.h"
#include "gpio_service_interface.h"
#ifndef MIN
@@ -61,6 +62,38 @@
#define AUX_REG_WRITE(reg_name, val) \
dm_write_reg(CTX, AUX_REG(reg_name), val)
+static uint8_t phy_id_from_transmitter(enum transmitter t)
+{
+ uint8_t phy_id;
+
+ switch (t) {
+ case TRANSMITTER_UNIPHY_A:
+ phy_id = 0;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ phy_id = 1;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ phy_id = 2;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ phy_id = 3;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ phy_id = 4;
+ break;
+ case TRANSMITTER_UNIPHY_F:
+ phy_id = 5;
+ break;
+ case TRANSMITTER_UNIPHY_G:
+ phy_id = 6;
+ break;
+ default:
+ phy_id = 0;
+ break;
+ }
+ return phy_id;
+}
void enc32_hw_init(struct link_encoder *enc)
{
@@ -117,38 +150,50 @@ void dcn32_link_encoder_enable_dp_output(
}
}
-static bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc)
+static bool query_dp_alt_from_dmub(struct link_encoder *enc,
+ union dmub_rb_cmd *cmd)
{
struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
- uint32_t dp_alt_mode_disable = 0;
- bool is_usb_c_alt_mode = false;
- if (enc->features.flags.bits.DP_IS_USB_C) {
- /* if value == 1 alt mode is disabled, otherwise it is enabled */
- REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, &dp_alt_mode_disable);
- is_usb_c_alt_mode = (dp_alt_mode_disable == 0);
- }
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->query_dp_alt.header.type = DMUB_CMD__VBIOS;
+ cmd->query_dp_alt.header.sub_type =
+ DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT;
+ cmd->query_dp_alt.header.payload_bytes = sizeof(cmd->query_dp_alt.data);
+ cmd->query_dp_alt.data.phy_id = phy_id_from_transmitter(enc10->base.transmitter);
- return is_usb_c_alt_mode;
+ if (!dc_wake_and_execute_dmub_cmd(enc->ctx, cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY))
+ return false;
+
+ return true;
+}
+
+bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc)
+{
+ union dmub_rb_cmd cmd;
+
+ if (!query_dp_alt_from_dmub(enc, &cmd))
+ return false;
+
+ return (cmd.query_dp_alt.data.is_dp_alt_disable == 0);
}
-static void dcn32_link_encoder_get_max_link_cap(struct link_encoder *enc,
+void dcn32_link_encoder_get_max_link_cap(struct link_encoder *enc,
struct dc_link_settings *link_settings)
{
- struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
- uint32_t is_in_usb_c_dp4_mode = 0;
+ union dmub_rb_cmd cmd;
dcn10_link_encoder_get_max_link_cap(enc, link_settings);
- /* in usb c dp2 mode, max lane count is 2 */
- if (enc->funcs->is_in_alt_mode && enc->funcs->is_in_alt_mode(enc)) {
- REG_GET(RDPCSPIPE_PHY_CNTL6, RDPCS_PHY_DPALT_DP4, &is_in_usb_c_dp4_mode);
- if (!is_in_usb_c_dp4_mode)
- link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
- }
+ if (!query_dp_alt_from_dmub(enc, &cmd))
+ return;
+ if (cmd.query_dp_alt.data.is_usb &&
+ cmd.query_dp_alt.data.is_dp4 == 0)
+ link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
}
+
static const struct link_encoder_funcs dcn32_link_enc_funcs = {
.read_state = link_enc2_read_state,
.validate_output_with_stream =
@@ -203,9 +248,6 @@ void dcn32_link_encoder_construct(
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
-
enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
enc10->base.features = *enc_features;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h
index bbcfce06bec0..35d23d9db45e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_link_encoder.h
@@ -26,15 +26,7 @@
#ifndef __DC_LINK_ENCODER__DCN32_H__
#define __DC_LINK_ENCODER__DCN32_H__
-#include "dcn31/dcn31_dio_link_encoder.h"
-
-#define LE_DCN32_REG_LIST(id)\
- LE_DCN31_REG_LIST(id),\
- SRI(DIG_FIFO_CTRL0, DIG, id)
-
-#define LINK_ENCODER_MASK_SH_LIST_DCN32(mask_sh) \
- LINK_ENCODER_MASK_SH_LIST_DCN31(mask_sh),\
- LE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, mask_sh)
+#include "dcn30/dcn30_dio_link_encoder.h"
void dcn32_link_encoder_construct(
struct dcn20_link_encoder *enc20,
@@ -53,4 +45,9 @@ void dcn32_link_encoder_enable_dp_output(
const struct dc_link_settings *link_settings,
enum clock_source_id clock_source);
+bool dcn32_link_encoder_is_in_alt_mode(struct link_encoder *enc);
+
+void dcn32_link_encoder_get_max_link_cap(struct link_encoder *enc,
+ struct dc_link_settings *link_settings);
+
#endif /* __DC_LINK_ENCODER__DCN32_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c
index 2fef1419ae91..3523d1cdc1a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c
@@ -29,7 +29,7 @@
#include "dcn32_dio_stream_encoder.h"
#include "reg_helper.h"
#include "hw_shared.h"
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
#define DC_LOGGER \
@@ -52,11 +52,11 @@
static void enc32_dp_set_odm_combine(
struct stream_encoder *enc,
- bool odm_combine)
+ bool two_pixel_per_cyle)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
- REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, odm_combine ? 1 : 0);
+ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, two_pixel_per_cyle ? 1 : 0);
}
/* setup stream encoder in dvi mode */
@@ -241,46 +241,12 @@ static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
return two_pix;
}
-static bool is_h_timing_divisible_by_2(const struct dc_crtc_timing *timing)
-{
- /* math borrowed from function of same name in inc/resource
- * checks if h_timing is divisible by 2
- */
-
- bool divisible = false;
- uint16_t h_blank_start = 0;
- uint16_t h_blank_end = 0;
-
- if (timing) {
- h_blank_start = timing->h_total - timing->h_front_porch;
- h_blank_end = h_blank_start - timing->h_addressable;
-
- /* HTOTAL, Hblank start/end, and Hsync start/end all must be
- * divisible by 2 in order for the horizontal timing params
- * to be considered divisible by 2. Hsync start is always 0.
- */
- divisible = (timing->h_total % 2 == 0) &&
- (h_blank_start % 2 == 0) &&
- (h_blank_end % 2 == 0) &&
- (timing->h_sync_width % 2 == 0);
- }
- return divisible;
-}
-
-static bool is_dp_dig_pixel_rate_div_policy(struct dc *dc, const struct dc_crtc_timing *timing)
-{
- /* should be functionally the same as dcn32_is_dp_dig_pixel_rate_div_policy for DP encoders*/
- return is_h_timing_divisible_by_2(timing) &&
- dc->debug.enable_dp_dig_pixel_rate_div_policy;
-}
-
void enc32_stream_encoder_dp_unblank(
struct dc_link *link,
struct stream_encoder *enc,
const struct encoder_unblank_param *param)
{
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
- struct dc *dc = enc->ctx->dc;
if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
uint32_t n_vid = 0x8000;
@@ -291,7 +257,7 @@ void enc32_stream_encoder_dp_unblank(
/* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */
if (is_two_pixels_per_containter(&param->timing) || param->opp_cnt > 1
- || is_dp_dig_pixel_rate_div_policy(dc, &param->timing)) {
+ || param->pix_per_cycle > 1) {
/*this logic should be the same in get_pixel_clock_parameters() */
n_multiply = 1;
pix_per_cycle = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h
index 1be5410cce97..ca53d39561d2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.h
@@ -177,11 +177,12 @@
SE_SF(DIG0_DIG_FE_CNTL, DIG_SYMCLK_FE_ON, mask_sh),\
SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, mask_sh),\
SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\
SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\
SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\
- SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh),\
- SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, mask_sh)
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh)
+
void dcn32_dio_stream_encoder_construct(
struct dcn10_stream_encoder *enc1,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c
index 13be5f06d987..2ed382a8e79c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c
@@ -23,7 +23,6 @@
*
*/
-
#include "reg_helper.h"
#include "core_types.h"
@@ -127,11 +126,6 @@ void dcn321_link_encoder_construct(
* while doing the DP sink detect
*/
-/* if (dal_adapter_service_is_feature_supported(as,
- FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
- enc10->base.features.flags.bits.
- DP_SINK_DETECT_POLL_DATA_PIN = true;*/
-
enc10->base.output_signals =
SIGNAL_TYPE_DVI_SINGLE_LINK |
SIGNAL_TYPE_DVI_DUAL_LINK |
@@ -191,7 +185,6 @@ void dcn321_link_encoder_construct(
__func__,
result);
}
- if (enc10->base.ctx->dc->debug.hdmi20_disable) {
+ if (enc10->base.ctx->dc->debug.hdmi20_disable)
enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
- }
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h
index 2205f39b0a24..2205f39b0a24 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.h
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c
new file mode 100644
index 000000000000..9972911330b6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c
@@ -0,0 +1,391 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn35_dio_link_encoder.h"
+#include "dc_dmub_srv.h"
+#define CTX \
+ enc10->base.ctx
+#define DC_LOGGER \
+ enc10->base.ctx->logger
+
+#define REG(reg)\
+ (enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc10->link_shift->field_name, enc10->link_mask->field_name
+/*
+ * @brief
+ * Trigger Source Select
+ * ASIC-dependent, actual values for register programming
+ */
+#define DCN35_DIG_FE_SOURCE_SELECT_INVALID 0x0
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGA 0x1
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGB 0x2
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGC 0x4
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGD 0x08
+#define DCN35_DIG_FE_SOURCE_SELECT_DIGE 0x10
+
+
+bool dcn35_is_dig_enabled(struct link_encoder *enc)
+{
+ uint32_t enabled;
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, &enabled);
+ return (enabled == 1);
+}
+
+enum signal_type dcn35_get_dig_mode(
+ struct link_encoder *enc)
+{
+ uint32_t value;
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_MODE, &value);
+ switch (value) {
+ case 0:
+ return SIGNAL_TYPE_DISPLAY_PORT;
+ case 2:
+ return SIGNAL_TYPE_DVI_SINGLE_LINK;
+ case 3:
+ return SIGNAL_TYPE_HDMI_TYPE_A;
+ case 5:
+ return SIGNAL_TYPE_DISPLAY_PORT_MST;
+ default:
+ return SIGNAL_TYPE_NONE;
+ }
+}
+
+void dcn35_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ switch (signal) {
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ /* DP SST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 0);
+ break;
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 5);
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ /* invalid mode ! */
+ break;
+ }
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, 1);
+
+}
+
+void dcn35_link_encoder_init(struct link_encoder *enc)
+{
+ enc31_hw_init(enc);
+ dcn35_link_encoder_set_fgcg(enc, enc->ctx->dc->debug.enable_fine_grain_clock_gating.bits.dio);
+}
+
+void dcn35_link_encoder_set_fgcg(struct link_encoder *enc, bool enable)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_UPDATE(DIO_CLK_CNTL, DIO_FGCG_REP_DIS, !enable);
+}
+
+static const struct link_encoder_funcs dcn35_link_enc_funcs = {
+ .read_state = link_enc2_read_state,
+ .validate_output_with_stream =
+ dcn30_link_encoder_validate_output_with_stream,
+ .hw_init = dcn35_link_encoder_init,
+ .setup = dcn35_link_encoder_setup,
+ .enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+ .enable_dp_output = dcn35_link_encoder_enable_dp_output,
+ .enable_dp_mst_output = dcn35_link_encoder_enable_dp_mst_output,
+ .disable_output = dcn35_link_encoder_disable_output,
+ .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+ .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+ .update_mst_stream_allocation_table =
+ dcn10_link_encoder_update_mst_stream_allocation_table,
+ .psr_program_dp_dphy_fast_training =
+ dcn10_psr_program_dp_dphy_fast_training,
+ .psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+ .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+ .enable_hpd = dcn10_link_encoder_enable_hpd,
+ .disable_hpd = dcn10_link_encoder_disable_hpd,
+ .is_dig_enabled = dcn35_is_dig_enabled,
+ .destroy = dcn10_link_encoder_destroy,
+ .fec_set_enable = enc2_fec_set_enable,
+ .fec_set_ready = enc2_fec_set_ready,
+ .fec_is_active = enc2_fec_is_active,
+ .get_dig_frontend = dcn10_get_dig_frontend,
+ .get_dig_mode = dcn35_get_dig_mode,
+ .is_in_alt_mode = dcn31_link_encoder_is_in_alt_mode,
+ .get_max_link_cap = dcn31_link_encoder_get_max_link_cap,
+ .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux,
+ .enable_dpia_output = dcn35_link_encoder_enable_dpia_output,
+ .disable_dpia_output = dcn35_link_encoder_disable_dpia_output,
+};
+
+void dcn35_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask)
+{
+ struct bp_connector_speed_cap_info bp_cap_info = {0};
+ const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+ enum bp_result result = BP_RESULT_OK;
+ struct dcn10_link_encoder *enc10 = &enc20->enc10;
+
+ enc10->base.funcs = &dcn35_link_enc_funcs;
+ enc10->base.ctx = init_data->ctx;
+ enc10->base.id = init_data->encoder;
+
+ enc10->base.hpd_source = init_data->hpd_source;
+ enc10->base.connector = init_data->connector;
+
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
+
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+
+ enc10->base.transmitter = init_data->transmitter;
+
+ /* set the flag to indicate whether driver poll the I2C data pin
+ * while doing the DP sink detect
+ */
+
+/* if (dal_adapter_service_is_feature_supported(as,
+ * FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+ * enc10->base.features.flags.bits.
+ * DP_SINK_DETECT_POLL_DATA_PIN = true;
+ */
+
+ enc10->base.output_signals =
+ SIGNAL_TYPE_DVI_SINGLE_LINK |
+ SIGNAL_TYPE_DVI_DUAL_LINK |
+ SIGNAL_TYPE_LVDS |
+ SIGNAL_TYPE_DISPLAY_PORT |
+ SIGNAL_TYPE_DISPLAY_PORT_MST |
+ SIGNAL_TYPE_EDP |
+ SIGNAL_TYPE_HDMI_TYPE_A;
+
+ enc10->link_regs = link_regs;
+ enc10->aux_regs = aux_regs;
+ enc10->hpd_regs = hpd_regs;
+ enc10->link_shift = link_shift;
+ enc10->link_mask = link_mask;
+
+ switch (enc10->base.transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ enc10->base.preferred_engine = ENGINE_ID_DIGA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ enc10->base.preferred_engine = ENGINE_ID_DIGB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ enc10->base.preferred_engine = ENGINE_ID_DIGC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ enc10->base.preferred_engine = ENGINE_ID_DIGD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ enc10->base.preferred_engine = ENGINE_ID_DIGE;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ }
+
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+ if (bp_funcs->get_connector_speed_cap_info)
+ result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
+ enc10->base.connector, &bp_cap_info);
+
+ /* Override features with DCE-specific values */
+ if (result == BP_RESULT_OK) {
+ enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+ bp_cap_info.DP_HBR2_EN;
+ enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+ bp_cap_info.DP_HBR3_EN;
+ enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ enc10->base.features.flags.bits.IS_DP2_CAPABLE = 1;
+ enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
+ enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
+ enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+
+ } else {
+ DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+ __func__,
+ result);
+ }
+ if (enc10->base.ctx->dc->debug.hdmi20_disable)
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+
+}
+
+/* DPIA equivalent of link_transmitter_control. */
+static bool link_dpia_control(struct dc_context *dc_ctx,
+ struct dmub_cmd_dig_dpia_control_data *dpia_control)
+{
+ union dmub_rb_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.dig1_dpia_control.header.type = DMUB_CMD__DPIA;
+ cmd.dig1_dpia_control.header.sub_type =
+ DMUB_CMD__DPIA_DIG1_DPIA_CONTROL;
+ cmd.dig1_dpia_control.header.payload_bytes =
+ sizeof(cmd.dig1_dpia_control) -
+ sizeof(cmd.dig1_dpia_control.header);
+
+ cmd.dig1_dpia_control.dpia_control = *dpia_control;
+
+ dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+static void link_encoder_disable(struct dcn10_link_encoder *enc10)
+{
+ /* reset training complete */
+ REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0);
+}
+
+void dcn35_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ if (!enc->ctx->dc->config.unify_link_enc_assignment)
+ dcn31_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+ else {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
+ dcn20_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+ }
+}
+
+void dcn35_link_encoder_enable_dp_mst_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ if (!enc->ctx->dc->config.unify_link_enc_assignment)
+ dcn31_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source);
+ else {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
+ dcn10_link_encoder_enable_dp_mst_output(enc, link_settings, clock_source);
+ }
+}
+
+void dcn35_link_encoder_disable_output(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ if (!enc->ctx->dc->config.unify_link_enc_assignment)
+ dcn31_link_encoder_disable_output(enc, signal);
+ else {
+ DC_LOG_DEBUG("%s: enc_id(%d)\n", __func__, enc->preferred_engine);
+ dcn10_link_encoder_disable_output(enc, signal);
+ }
+}
+
+void dcn35_link_encoder_enable_dpia_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 };
+
+ enc1_configure_encoder(enc10, link_settings);
+
+ dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_ENABLE;
+ dpia_control.enc_id = enc->preferred_engine;
+ dpia_control.mode_laneset.digmode = digmode;
+ dpia_control.lanenum = (uint8_t)link_settings->lane_count;
+ dpia_control.symclk_10khz = link_settings->link_rate *
+ LINK_RATE_REF_FREQ_IN_KHZ / 10;
+ /* DIG_BE_CNTL.DIG_HPD_SELECT set to 5 (hpdsel - 1) to indicate HPD pin unused by DPIA. */
+ dpia_control.hpdsel = 6;
+ dpia_control.dpia_id = dpia_id;
+ dpia_control.fec_rdy = fec_rdy;
+
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
+ link_dpia_control(enc->ctx, &dpia_control);
+}
+
+void dcn35_link_encoder_disable_dpia_output(
+ struct link_encoder *enc,
+ uint8_t dpia_id,
+ uint8_t digmode)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 };
+
+ if (enc->funcs->is_dig_enabled && !enc->funcs->is_dig_enabled(enc))
+ return;
+
+ dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_DISABLE;
+ dpia_control.enc_id = enc->preferred_engine;
+ dpia_control.mode_laneset.digmode = digmode;
+ dpia_control.dpia_id = dpia_id;
+
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
+ link_dpia_control(enc->ctx, &dpia_control);
+
+ link_encoder_disable(enc10);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h
new file mode 100644
index 000000000000..5712e6553fab
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN35_H__
+#define __DC_LINK_ENCODER__DCN35_H__
+
+#include "dcn32/dcn32_dio_link_encoder.h"
+#include "dcn30/dcn30_dio_link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+
+#define LINK_ENCODER_MASK_SH_LIST_DCN35(mask_sh) \
+ LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_BE_ENABLE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_RB_SWITCH_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_MODE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, HDCP_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_HDCP_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_TMDS_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\
+ LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\
+ LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\
+ LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_READY_SHADOW, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_ACTIVE_STATUS, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_START_WINDOW, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_HALF_SYM_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_TRANSITION_FILTER_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_START, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_STOP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_PHASE_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_DETECTION_THRESHOLD, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_SYMBOLS, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_PRECHARGE_SKIP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh),\
+ LE_SF(DIO_LINKA_CNTL, ENC_TYPE_SEL, mask_sh),\
+ LE_SF(DIO_LINKA_CNTL, HPO_DP_ENC_SEL, mask_sh),\
+ LE_SF(DIO_LINKA_CNTL, HPO_HDMI_ENC_SEL, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DISPCLK_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DISPCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, REFCLK_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, REFCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SOCCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_FE_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_FE_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_R_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLK_G_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DIO_FGCG_REP_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, DISPCLK_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKA_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKB_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKC_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKD_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKE_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKF_G_HDCP_GATE_DIS, mask_sh),\
+ LE_SF(DIO_CLK_CNTL, SYMCLKG_G_HDCP_GATE_DIS, mask_sh)
+
+
+void dcn35_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask);
+
+void dcn35_link_encoder_init(struct link_encoder *enc);
+void dcn35_link_encoder_set_fgcg(struct link_encoder *enc, bool enabled);
+bool dcn35_is_dig_enabled(struct link_encoder *enc);
+
+enum signal_type dcn35_get_dig_mode(struct link_encoder *enc);
+void dcn35_link_encoder_setup(struct link_encoder *enc, enum signal_type signal);
+
+/*
+ * Enable DP transmitter and its encoder.
+ */
+void dcn35_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+/*
+ * Enable DP transmitter and its encoder in MST mode.
+ */
+void dcn35_link_encoder_enable_dp_mst_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+/*
+ * Disable transmitter and its encoder.
+ */
+void dcn35_link_encoder_disable_output(
+ struct link_encoder *enc,
+ enum signal_type signal);
+
+/*
+ * Enable DP transmitter and its encoder for dpia port.
+ */
+void dcn35_link_encoder_enable_dpia_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy);
+
+/*
+ * Disable transmitter and its encoder for dpia port.
+ */
+void dcn35_link_encoder_disable_dpia_output(
+ struct link_encoder *enc,
+ uint8_t dpia_id,
+ uint8_t digmode);
+
+#endif /* __DC_LINK_ENCODER__DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c
new file mode 100644
index 000000000000..fd5d1dbf9dc6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c
@@ -0,0 +1,520 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "dc_bios_types.h"
+#include "dcn30/dcn30_dio_stream_encoder.h"
+#include "dcn314/dcn314_dio_stream_encoder.h"
+#include "dcn32/dcn32_dio_stream_encoder.h"
+#include "dcn35_dio_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+#include "link_service.h"
+#include "dpcd_defs.h"
+
+#define DC_LOGGER \
+ enc1->base.ctx->logger
+
+#define REG(reg)\
+ (enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+#define CTX \
+ enc1->base.ctx
+/* setup stream encoder in dvi mode */
+static void enc35_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = is_dual_link ?
+ SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+ cntl.enable_dp_audio = false;
+ cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10;
+ cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_DVI_MODE : TMDS-DVI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+ ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+/* setup stream encoder in hdmi mode */
+static void enc35_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ cntl.enable_dp_audio = enable_audio;
+ cntl.pixel_clock = actual_pix_clk_khz;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_HDMI_MODE : TMDS-HDMI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ enc314_enable_fifo(enc);
+ }
+
+ /* Configure pixel encoding */
+ enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+ /* setup HDMI engine */
+ REG_UPDATE_6(HDMI_CONTROL,
+ HDMI_PACKET_GEN_VERSION, 1,
+ HDMI_KEEPOUT_MODE, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0,
+ HDMI_DATA_SCRAMBLE_EN, 0,
+ HDMI_NO_EXTRA_NULL_PACKET_FILLED, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+
+ /* Configure color depth */
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ break;
+ case COLOR_DEPTH_101010:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_121212:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_161616:
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 3,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ break;
+ default:
+ break;
+ }
+
+ if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+ * Clock channel frequency is 1/4 of character rate.
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 1);
+ } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+ /* TODO: New feature for DCE11, still need to implement */
+
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+ * Clock channel frequency is the same
+ * as character rate
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+ }
+
+
+ /* Enable transmission of General Control packet on every frame */
+ REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+ HDMI_GC_CONT, 1,
+ HDMI_GC_SEND, 1,
+ HDMI_NULL_SEND, 1);
+
+ /* Disable Audio Content Protection packet transmission */
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+
+ /* following belongs to audio */
+ /* Enable Audio InfoFrame packet transmission. */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+ /* update double-buffered AUDIO_INFO registers immediately */
+ ASSERT(enc->afmt);
+ enc->afmt->funcs->audio_info_immediate_update(enc->afmt);
+
+ /* Select line number on which to send Audio InfoFrame packets */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+ VBI_LINE_0 + 2);
+
+ /* set HDMI GC AVMUTE */
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+ switch (crtc_timing->pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 1);
+ break;
+ default:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 0);
+ break;
+ }
+ REG_UPDATE(HDMI_CONTROL, TMDS_COLOR_FORMAT, 0);
+}
+
+
+
+static void enc35_stream_encoder_enable(
+ struct stream_encoder *enc,
+ enum signal_type signal,
+ bool enable)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (enable) {
+ switch (signal) {
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 5);
+ break;
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_VIRTUAL:
+ /* DP SST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 0);
+ break;
+ default:
+ /* invalid mode ! */
+ ASSERT_CRITICAL(false);
+ }
+ }
+}
+
+static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
+{
+ bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+
+ two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !timing->dsc_cfg.ycbcr422_simple);
+ return two_pix;
+}
+
+static void enc35_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+ uint32_t n_vid = 0x8000;
+ uint32_t m_vid;
+ uint32_t n_multiply = 0;
+ uint32_t pix_per_cycle = 0;
+ uint64_t m_vid_l = n_vid;
+
+ /* YCbCr 4:2:0 : Computed VID_M will be 2X the input rate */
+ if (is_two_pixels_per_containter(&param->timing) || param->opp_cnt > 1
+ || param->pix_per_cycle > 1) {
+ /*this logic should be the same in get_pixel_clock_parameters() */
+ n_multiply = 1;
+ pix_per_cycle = 1;
+ }
+ /* M / N = Fstream / Flink
+ * m_vid / n_vid = pixel rate / link rate
+ */
+
+ m_vid_l *= param->timing.pix_clk_100hz / 10;
+ m_vid_l = div_u64(m_vid_l,
+ param->link_settings.link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ);
+
+ m_vid = (uint32_t) m_vid_l;
+
+ /* enable auto measurement */
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+ /* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+ * therefore program initial value for Mvid and Nvid
+ */
+
+ REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+ REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+ REG_UPDATE_2(DP_VID_TIMING,
+ DP_VID_M_N_GEN_EN, 1,
+ DP_VID_N_MUL, n_multiply);
+
+ REG_UPDATE(DP_PIXEL_FORMAT,
+ DP_PIXEL_PER_CYCLE_PROCESSING_MODE,
+ pix_per_cycle);
+ }
+
+ /* make sure stream is disabled before resetting steer fifo */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
+
+ /* DIG_START is removed from the register spec */
+
+ /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+ * that it overflows during mode transition, and sometimes doesn't recover.
+ */
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+ udelay(10);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+ /* wait 100us for DIG/DP logic to prime
+ * (i.e. a few video lines)
+ */
+ udelay(100);
+
+ /* the hardware would start sending video at the start of the next DP
+ * frame (i.e. rising edge of the vblank).
+ * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+ * register has no effect on enable transition! HW always makes sure
+ * VID_STREAM enable at start of next frame, and this is not
+ * programmable
+ */
+
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+
+ /*
+ * DIG Resync FIFO now needs to be explicitly enabled.
+ * This should come after DP_VID_STREAM_ENABLE per HW docs.
+ */
+ enc314_enable_fifo(enc);
+
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+}
+
+static void enc35_stream_encoder_map_to_link(
+ struct stream_encoder *enc,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ ASSERT(stream_enc_inst < 5 && link_enc_inst < 5);
+ REG_UPDATE(STREAM_MAPPER_CONTROL,
+ DIG_STREAM_LINK_TARGET, link_enc_inst);
+}
+
+static void enc35_reset_fifo(struct stream_encoder *enc, bool reset)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val = reset ? 1 : 0;
+ uint32_t is_symclk_on;
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, reset_val);
+ REG_GET(DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_CLOCK_ON, &is_symclk_on);
+
+ if (is_symclk_on)
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, reset_val, 10, 5000);
+ else
+ udelay(10);
+}
+
+static bool enc35_is_fifo_enabled(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t reset_val;
+
+ REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val);
+ return reset_val != 0;
+}
+void enc35_disable_fifo(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
+}
+
+void enc35_enable_fifo(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
+
+ enc35_reset_fifo(enc, true);
+ enc35_reset_fifo(enc, false);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+}
+
+static uint32_t enc35_get_pixels_per_cycle(struct stream_encoder *enc)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t value;
+
+ REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, &value);
+
+ switch (value) {
+ case 0:
+ return 1;
+ case 1:
+ return 2;
+ default:
+ ASSERT_CRITICAL(false);
+ return 1;
+ }
+}
+
+static const struct stream_encoder_funcs dcn35_str_enc_funcs = {
+ .dp_set_odm_combine =
+ enc314_dp_set_odm_combine,
+ .dp_set_stream_attribute =
+ enc2_stream_encoder_dp_set_stream_attribute,
+ .hdmi_set_stream_attribute =
+ enc35_stream_encoder_hdmi_set_stream_attribute,
+ .dvi_set_stream_attribute =
+ enc35_stream_encoder_dvi_set_stream_attribute,
+ .set_throttled_vcp_size =
+ enc1_stream_encoder_set_throttled_vcp_size,
+ .update_hdmi_info_packets =
+ enc3_stream_encoder_update_hdmi_info_packets,
+ .stop_hdmi_info_packets =
+ enc3_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc3_stream_encoder_update_dp_info_packets_sdp_line_num,
+ .update_dp_info_packets =
+ enc3_stream_encoder_update_dp_info_packets,
+ .stop_dp_info_packets =
+ enc1_stream_encoder_stop_dp_info_packets,
+ .dp_blank =
+ enc314_stream_encoder_dp_blank,
+ .dp_unblank =
+ enc35_stream_encoder_dp_unblank,
+ .audio_mute_control = enc3_audio_mute_control,
+
+ .dp_audio_setup = enc3_se_dp_audio_setup,
+ .dp_audio_enable = enc3_se_dp_audio_enable,
+ .dp_audio_disable = enc1_se_dp_audio_disable,
+
+ .hdmi_audio_setup = enc3_se_hdmi_audio_setup,
+ .hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+ .setup_stereo_sync = enc1_setup_stereo_sync,
+ .set_avmute = enc1_stream_encoder_set_avmute,
+ .dig_connect_to_otg = enc1_dig_connect_to_otg,
+ .dig_source_otg = enc1_dig_source_otg,
+
+ .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format,
+
+ .enc_read_state = enc314_read_state,
+ .dp_set_dsc_config = enc314_dp_set_dsc_config,
+ .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet,
+ .set_dynamic_metadata = enc2_set_dynamic_metadata,
+ .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
+ .enable_stream = enc35_stream_encoder_enable,
+
+ .set_input_mode = enc314_set_dig_input_mode,
+ .enable_fifo = enc35_enable_fifo,
+ .disable_fifo = enc35_disable_fifo,
+ .is_fifo_enabled = enc35_is_fifo_enabled,
+ .map_stream_to_link = enc35_stream_encoder_map_to_link,
+ .get_pixels_per_cycle = enc35_get_pixels_per_cycle,
+};
+
+void dcn35_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask)
+{
+ enc1->base.funcs = &dcn35_str_enc_funcs;
+ enc1->base.ctx = ctx;
+ enc1->base.id = eng_id;
+ enc1->base.bp = bp;
+ enc1->base.vpg = vpg;
+ enc1->base.afmt = afmt;
+ enc1->regs = regs;
+ enc1->se_shift = se_shift;
+ enc1->se_mask = se_mask;
+ enc1->base.stream_enc_inst = vpg->inst;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h
new file mode 100644
index 000000000000..ddb33fdfb4ee
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DC_DIO_STREAM_ENCODER_DCN35_H__
+#define __DC_DIO_STREAM_ENCODER_DCN35_H__
+
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "stream_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
+
+/* Register bit field name change */
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS__SHIFT 0x8
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN__SHIFT 0x9
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON__SHIFT 0xa
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP__SHIFT 0xe
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT__SHIFT 0xf
+
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_GATE_DIS_MASK 0x00000100L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_EN_MASK 0x00000200L
+#define RDPCSTX0_RDPCSTX_CLOCK_CNTL__RDPCS_SYMCLK_DIV2_CLOCK_ON_MASK 0x00000400L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_SWAP_MASK 0x00004000L
+#define DPCSTX0_DPCSTX_TX_CNTL__DPCS_TX_DATA_ORDER_INVERT_MASK 0x00008000L
+
+
+#define SE_DCN35_REG_LIST(id)\
+ SRI(AFMT_CNTL, DIG, id), \
+ SRI(DIG_FE_CNTL, DIG, id), \
+ SRI(HDMI_CONTROL, DIG, id), \
+ SRI(HDMI_DB_CONTROL, DIG, id), \
+ SRI(HDMI_GC, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \
+ SRI(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL0, DIG, id), \
+ SRI(HDMI_INFOFRAME_CONTROL1, DIG, id), \
+ SRI(HDMI_VBI_PACKET_CONTROL, DIG, id), \
+ SRI(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_PACKET_CONTROL, DIG, id),\
+ SRI(HDMI_ACR_32_0, DIG, id),\
+ SRI(HDMI_ACR_32_1, DIG, id),\
+ SRI(HDMI_ACR_44_0, DIG, id),\
+ SRI(HDMI_ACR_44_1, DIG, id),\
+ SRI(HDMI_ACR_48_0, DIG, id),\
+ SRI(HDMI_ACR_48_1, DIG, id),\
+ SRI(DP_DB_CNTL, DP, id), \
+ SRI(DP_MSA_MISC, DP, id), \
+ SRI(DP_MSA_VBID_MISC, DP, id), \
+ SRI(DP_MSA_COLORIMETRY, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM1, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM2, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM3, DP, id), \
+ SRI(DP_MSA_TIMING_PARAM4, DP, id), \
+ SRI(DP_MSE_RATE_CNTL, DP, id), \
+ SRI(DP_MSE_RATE_UPDATE, DP, id), \
+ SRI(DP_PIXEL_FORMAT, DP, id), \
+ SRI(DP_SEC_CNTL, DP, id), \
+ SRI(DP_SEC_CNTL1, DP, id), \
+ SRI(DP_SEC_CNTL2, DP, id), \
+ SRI(DP_SEC_CNTL5, DP, id), \
+ SRI(DP_SEC_CNTL6, DP, id), \
+ SRI(DP_STEER_FIFO, DP, id), \
+ SRI(DP_VID_M, DP, id), \
+ SRI(DP_VID_N, DP, id), \
+ SRI(DP_VID_STREAM_CNTL, DP, id), \
+ SRI(DP_VID_TIMING, DP, id), \
+ SRI(DP_SEC_AUD_N, DP, id), \
+ SRI(DP_SEC_TIMESTAMP, DP, id), \
+ SRI(DP_DSC_CNTL, DP, id), \
+ SRI(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI(DP_SEC_FRAMING4, DP, id), \
+ SRI(DP_GSP11_CNTL, DP, id), \
+ SRI(DME_CONTROL, DME, id),\
+ SRI(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI(DIG_FE_CNTL, DIG, id), \
+ SRI(DIG_FE_EN_CNTL, DIG, id), \
+ SRI(DIG_FE_CLK_CNTL, DIG, id), \
+ SRI(DIG_CLOCK_PATTERN, DIG, id), \
+ SRI(DIG_FIFO_CTRL0, DIG, id),\
+ SRI(STREAM_MAPPER_CONTROL, DIG, id)
+
+
+#define SE_COMMON_MASK_SH_LIST_DCN35(mask_sh)\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_PER_CYCLE_PROCESSING_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_NO_EXTRA_NULL_PACKET_FILLED, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+ SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+ SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+ SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, TMDS_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, TMDS_COLOR_FORMAT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh), \
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC0_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC1_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC2_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC3_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC4_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC5_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC6_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC7_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC8_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC9_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC10_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC11_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC12_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC13_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL10, HDMI_GENERIC14_LINE, mask_sh),\
+ SE_SF(DP0_DP_DSC_CNTL, DP_DSC_MODE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_ENGINE_EN, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_HUBP_REQUESTOR_ID, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_STREAM_TYPE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, DOLBY_VISION_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_EN_CNTL, DIG_FE_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SOFT_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_DISPCLK_G_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_AFMT_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SYMCLK_FE_G_TMDS_CLOCK_ON, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SOCCLK_G_AFMT_CLOCK_ON, mask_sh),\
+ SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
+ SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh),\
+ SE_SF(DIG0_STREAM_MAPPER_CONTROL, DIG_STREAM_LINK_TARGET, mask_sh),
+
+void dcn35_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask);
+
+void enc3_stream_encoder_update_hdmi_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_stop_hdmi_info_packets(
+ struct stream_encoder *enc);
+
+void enc3_stream_encoder_update_dp_info_packets_sdp_line_num(
+ struct stream_encoder *enc,
+ struct encoder_info_frame *info_frame);
+
+void enc3_stream_encoder_update_dp_info_packets(
+ struct stream_encoder *enc,
+ const struct encoder_info_frame *info_frame);
+
+void enc3_audio_mute_control(
+ struct stream_encoder *enc,
+ bool mute);
+
+void enc3_se_dp_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info);
+
+void enc3_se_dp_audio_enable(
+ struct stream_encoder *enc);
+
+void enc3_se_hdmi_audio_setup(
+ struct stream_encoder *enc,
+ unsigned int az_inst,
+ struct audio_info *info,
+ struct audio_crtc_info *audio_crtc_info);
+
+void enc3_dp_set_dsc_pps_info_packet(
+ struct stream_encoder *enc,
+ bool enable,
+ uint8_t *dsc_packed_pps,
+ bool immediate_update);
+
+void enc35_disable_fifo(
+ struct stream_encoder *enc);
+
+void enc35_enable_fifo(
+ struct stream_encoder *enc);
+
+
+#endif /* __DC_DIO_STREAM_ENCODER_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c
new file mode 100644
index 000000000000..7e558ca195ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn32/dcn32_dio_link_encoder.h"
+#include "dcn401_dio_link_encoder.h"
+#include "stream_encoder.h"
+#include "dc_bios_types.h"
+
+#include "gpio_service_interface.h"
+
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+#define CTX \
+ enc10->base.ctx
+#define DC_LOGGER \
+ enc10->base.ctx->logger
+
+#define REG(reg)\
+ (enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc10->link_shift->field_name, enc10->link_mask->field_name
+
+#define AUX_REG(reg)\
+ (enc10->aux_regs->reg)
+
+#define AUX_REG_READ(reg_name) \
+ dm_read_reg(CTX, AUX_REG(reg_name))
+
+#define AUX_REG_WRITE(reg_name, val) \
+ dm_write_reg(CTX, AUX_REG(reg_name), val)
+
+#ifndef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+
+void enc401_hw_init(struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+/*
+ 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2
+ 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4
+ 02 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__7to8 : 7/8
+ 03 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__15to16 : 15/16
+ 04 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__31to32 : 31/32
+ 05 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__63to64 : 63/64
+ 06 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__127to128 : 127/128
+ 07 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__255to256 : 255/256
+*/
+
+/*
+ AUX_REG_UPDATE_5(AUX_DPHY_RX_CONTROL0,
+ AUX_RX_START_WINDOW = 1 [6:4]
+ AUX_RX_RECEIVE_WINDOW = 1 default is 2 [10:8]
+ AUX_RX_HALF_SYM_DETECT_LEN = 1 [13:12] default is 1
+ AUX_RX_TRANSITION_FILTER_EN = 1 [16] default is 1
+ AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT [17] is 0 default is 0
+ AUX_RX_ALLOW_BELOW_THRESHOLD_START [18] is 1 default is 1
+ AUX_RX_ALLOW_BELOW_THRESHOLD_STOP [19] is 1 default is 1
+ AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3
+ AUX_RX_DETECTION_THRESHOLD [30:28] = 1
+*/
+ AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110);
+
+ AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a);
+
+ //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32;
+ // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk
+ // 27MHz -> 0xd
+ // 100MHz -> 0x32
+ // 48MHz -> 0x18
+
+ // Set TMDS_CTL0 to 1. This is a legacy setting.
+ REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1);
+
+ dcn10_aux_initialize(enc10);
+}
+
+
+void dcn401_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source)
+{
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ dcn10_link_encoder_enable_dp_output(enc, link_settings, clock_source);
+ return;
+ }
+}
+
+void dcn401_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ switch (signal) {
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ /* DP SST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 0);
+ break;
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_MODE, 5);
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ /* invalid mode ! */
+ break;
+ }
+ REG_UPDATE(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, 1);
+ REG_UPDATE(DIG_BE_EN_CNTL, DIG_BE_ENABLE, 1);
+}
+
+bool dcn401_is_dig_enabled(struct link_encoder *enc)
+{
+ uint32_t clk_enabled;
+ uint32_t dig_enabled;
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, &clk_enabled);
+ REG_GET(DIG_BE_EN_CNTL, DIG_BE_ENABLE, &dig_enabled);
+ return (clk_enabled == 1 && dig_enabled == 1);
+}
+
+enum signal_type dcn401_get_dig_mode(
+ struct link_encoder *enc)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ uint32_t value;
+ REG_GET(DIG_BE_CLK_CNTL, DIG_BE_MODE, &value);
+ switch (value) {
+ case 0:
+ return SIGNAL_TYPE_DISPLAY_PORT;
+ case 2:
+ return SIGNAL_TYPE_DVI_SINGLE_LINK;
+ case 3:
+ return SIGNAL_TYPE_HDMI_TYPE_A;
+ case 5:
+ return SIGNAL_TYPE_DISPLAY_PORT_MST;
+ default:
+ return SIGNAL_TYPE_NONE;
+ }
+}
+
+static const struct link_encoder_funcs dcn401_link_enc_funcs = {
+ .read_state = link_enc2_read_state,
+ .validate_output_with_stream =
+ dcn30_link_encoder_validate_output_with_stream,
+ .hw_init = enc401_hw_init,
+ .setup = dcn401_link_encoder_setup,
+ .enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+ .enable_dp_output = dcn401_link_encoder_enable_dp_output,
+ .enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output,
+ .disable_output = dcn10_link_encoder_disable_output,
+ .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+ .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+ .update_mst_stream_allocation_table =
+ dcn10_link_encoder_update_mst_stream_allocation_table,
+ .psr_program_dp_dphy_fast_training =
+ dcn10_psr_program_dp_dphy_fast_training,
+ .psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+ .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+ .enable_hpd = dcn10_link_encoder_enable_hpd,
+ .disable_hpd = dcn10_link_encoder_disable_hpd,
+ .is_dig_enabled = dcn401_is_dig_enabled,
+ .destroy = dcn10_link_encoder_destroy,
+ .fec_set_enable = enc2_fec_set_enable,
+ .fec_set_ready = enc2_fec_set_ready,
+ .fec_is_active = enc2_fec_is_active,
+ .get_dig_frontend = dcn10_get_dig_frontend,
+ .get_dig_mode = dcn401_get_dig_mode,
+ .is_in_alt_mode = dcn32_link_encoder_is_in_alt_mode,
+ .get_max_link_cap = dcn32_link_encoder_get_max_link_cap,
+ .set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux,
+};
+
+void dcn401_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask)
+{
+ struct bp_connector_speed_cap_info bp_cap_info = {0};
+ const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+ enum bp_result result = BP_RESULT_OK;
+ struct dcn10_link_encoder *enc10 = &enc20->enc10;
+
+ enc10->base.funcs = &dcn401_link_enc_funcs;
+ enc10->base.ctx = init_data->ctx;
+ enc10->base.id = init_data->encoder;
+
+ enc10->base.hpd_source = init_data->hpd_source;
+ enc10->base.connector = init_data->connector;
+
+
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+
+ enc10->base.transmitter = init_data->transmitter;
+
+ /* set the flag to indicate whether driver poll the I2C data pin
+ * while doing the DP sink detect
+ */
+
+/* if (dal_adapter_service_is_feature_supported(as,
+ FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+ enc10->base.features.flags.bits.
+ DP_SINK_DETECT_POLL_DATA_PIN = true;*/
+
+ enc10->base.output_signals =
+ SIGNAL_TYPE_DVI_SINGLE_LINK |
+ SIGNAL_TYPE_DVI_DUAL_LINK |
+ SIGNAL_TYPE_LVDS |
+ SIGNAL_TYPE_DISPLAY_PORT |
+ SIGNAL_TYPE_DISPLAY_PORT_MST |
+ SIGNAL_TYPE_EDP |
+ SIGNAL_TYPE_HDMI_TYPE_A;
+
+ enc10->link_regs = link_regs;
+ enc10->aux_regs = aux_regs;
+ enc10->hpd_regs = hpd_regs;
+ enc10->link_shift = link_shift;
+ enc10->link_mask = link_mask;
+
+ switch (enc10->base.transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ enc10->base.preferred_engine = ENGINE_ID_DIGA;
+ break;
+ case TRANSMITTER_UNIPHY_B:
+ enc10->base.preferred_engine = ENGINE_ID_DIGB;
+ break;
+ case TRANSMITTER_UNIPHY_C:
+ enc10->base.preferred_engine = ENGINE_ID_DIGC;
+ break;
+ case TRANSMITTER_UNIPHY_D:
+ enc10->base.preferred_engine = ENGINE_ID_DIGD;
+ break;
+ case TRANSMITTER_UNIPHY_E:
+ enc10->base.preferred_engine = ENGINE_ID_DIGE;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+ }
+
+ /* default to one to mirror Windows behavior */
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+ if (bp_funcs->get_connector_speed_cap_info)
+ result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
+ enc10->base.connector, &bp_cap_info);
+
+ /* Override features with DCE-specific values */
+ if (result == BP_RESULT_OK) {
+ enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+ bp_cap_info.DP_HBR2_EN;
+ enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+ bp_cap_info.DP_HBR3_EN;
+ enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ enc10->base.features.flags.bits.IS_DP2_CAPABLE = 1;
+ enc10->base.features.flags.bits.IS_UHBR10_CAPABLE = bp_cap_info.DP_UHBR10_EN;
+ enc10->base.features.flags.bits.IS_UHBR13_5_CAPABLE = bp_cap_info.DP_UHBR13_5_EN;
+ enc10->base.features.flags.bits.IS_UHBR20_CAPABLE = bp_cap_info.DP_UHBR20_EN;
+ } else {
+ DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+ __func__,
+ result);
+ }
+ if (enc10->base.ctx->dc->debug.hdmi20_disable) {
+ enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h
new file mode 100644
index 000000000000..6baab8302b81
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_link_encoder.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN401_H__
+#define __DC_LINK_ENCODER__DCN401_H__
+
+#include "dcn30/dcn30_dio_link_encoder.h"
+
+#define LINK_ENCODER_MASK_SH_LIST_DCN401(mask_sh) \
+ LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_BE_ENABLE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_RB_SWITCH_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_MODE, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_CLK_EN, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, HDCP_SOFT_RESET, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_HDCP_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_BE_CLK_CNTL, DIG_BE_SYMCLK_G_TMDS_CLOCK_ON, mask_sh),\
+ LE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\
+ LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\
+ LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\
+ LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\
+ LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\
+ LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\
+ LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\
+ LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\
+ LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\
+ LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\
+ LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_EN, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_READY_SHADOW, mask_sh),\
+ LE_SF(DP0_DP_DPHY_CNTL, DPHY_FEC_ACTIVE_STATUS, mask_sh),\
+ LE_SF(DIG0_TMDS_CTL_BITS, TMDS_CTL0, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_START_WINDOW, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_HALF_SYM_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_TRANSITION_FILTER_EN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_START, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_ALLOW_BELOW_THRESHOLD_STOP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_PHASE_DETECT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_DETECTION_THRESHOLD, mask_sh), \
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_SYMBOLS, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_PRECHARGE_SKIP, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\
+ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh)
+
+void dcn401_link_encoder_construct(
+ struct dcn20_link_encoder *enc20,
+ const struct encoder_init_data *init_data,
+ const struct encoder_feature_support *enc_features,
+ const struct dcn10_link_enc_registers *link_regs,
+ const struct dcn10_link_enc_aux_registers *aux_regs,
+ const struct dcn10_link_enc_hpd_registers *hpd_regs,
+ const struct dcn10_link_enc_shift *link_shift,
+ const struct dcn10_link_enc_mask *link_mask);
+
+void enc401_hw_init(struct link_encoder *enc);
+
+void dcn401_link_encoder_enable_dp_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ enum clock_source_id clock_source);
+
+void dcn401_link_encoder_setup(
+ struct link_encoder *enc,
+ enum signal_type signal);
+
+enum signal_type dcn401_get_dig_mode(
+ struct link_encoder *enc);
+
+bool dcn401_is_dig_enabled(struct link_encoder *enc);
+
+enum signal_type dcn401_get_dig_mode(struct link_encoder *enc);
+#endif /* __DC_LINK_ENCODER__DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c
new file mode 100644
index 000000000000..99aab70ef3e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c
@@ -0,0 +1,856 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dc_bios_types.h"
+#include "dcn30/dcn30_dio_stream_encoder.h"
+#include "dcn32/dcn32_dio_stream_encoder.h"
+#include "dcn35/dcn35_dio_stream_encoder.h"
+
+#include "dcn401_dio_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+#include "link_service.h"
+#include "dpcd_defs.h"
+
+#define DC_LOGGER \
+ enc1->base.ctx->logger
+
+#define REG(reg)\
+ (enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+#define CTX \
+ enc1->base.ctx
+
+
+
+static void enc401_dp_set_odm_combine(
+ struct stream_encoder *enc,
+ bool odm_combine)
+{
+}
+
+/* setup stream encoder in dvi mode */
+void enc401_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = is_dual_link ?
+ SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+ cntl.enable_dp_audio = false;
+ cntl.pixel_clock = crtc_timing->pix_clk_100hz / 10;
+ cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_DVI_MODE : TMDS-DVI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+ ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+ enc401_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+
+/* setup stream encoder in hdmi mode */
+void enc401_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (!enc->ctx->dc->debug.avoid_vbios_exec_table) {
+ struct bp_encoder_control cntl = {0};
+
+ cntl.action = ENCODER_CONTROL_SETUP;
+ cntl.engine_id = enc1->base.id;
+ cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ cntl.enable_dp_audio = enable_audio;
+ cntl.pixel_clock = actual_pix_clk_khz;
+ cntl.lanes_number = LANE_COUNT_FOUR;
+
+ if (enc1->base.bp->funcs->encoder_control(
+ enc1->base.bp, &cntl) != BP_RESULT_OK)
+ return;
+
+ } else {
+
+ //Set pattern for clock channel, default vlue 0x63 does not work
+ REG_UPDATE(DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, 0x1F);
+
+ //DIG_BE_TMDS_HDMI_MODE : TMDS-HDMI mode is already set in link_encoder_setup
+
+ //DIG_SOURCE_SELECT is already set in dig_connect_to_otg
+
+ /* DIG_START is removed from the register spec */
+ }
+
+ /* Configure pixel encoding */
+ enc401_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+ /* setup HDMI engine */
+ REG_UPDATE_6(HDMI_CONTROL,
+ HDMI_PACKET_GEN_VERSION, 1,
+ HDMI_KEEPOUT_MODE, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0,
+ HDMI_DATA_SCRAMBLE_EN, 0,
+ HDMI_NO_EXTRA_NULL_PACKET_FILLED, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+
+ /* Configure color depth */
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+ break;
+ case COLOR_DEPTH_101010:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 1,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_121212:
+ if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 0);
+ } else {
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 2,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ }
+ break;
+ case COLOR_DEPTH_161616:
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DEEP_COLOR_DEPTH, 3,
+ HDMI_DEEP_COLOR_ENABLE, 1);
+ break;
+ default:
+ break;
+ }
+
+ if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+ * Clock channel frequency is 1/4 of character rate.
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 1);
+ } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+ /* TODO: New feature for DCE11, still need to implement */
+
+ /* enable HDMI data scrambler
+ * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+ * Clock channel frequency is the same
+ * as character rate
+ */
+ REG_UPDATE_2(HDMI_CONTROL,
+ HDMI_DATA_SCRAMBLE_EN, 1,
+ HDMI_CLOCK_CHANNEL_RATE, 0);
+ }
+
+
+ /* Enable transmission of General Control packet on every frame */
+ REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+ HDMI_GC_CONT, 1,
+ HDMI_GC_SEND, 1,
+ HDMI_NULL_SEND, 1);
+
+ /* Disable Audio Content Protection packet transmission */
+ REG_UPDATE(HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, 0);
+ /* following belongs to audio */
+ /* Enable Audio InfoFrame packet transmission. */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+ /* update double-buffered AUDIO_INFO registers immediately */
+ ASSERT(enc->afmt);
+ enc->afmt->funcs->audio_info_immediate_update(enc->afmt);
+
+ /* Select line number on which to send Audio InfoFrame packets */
+ REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+ VBI_LINE_0 + 2);
+
+ /* set HDMI GC AVMUTE */
+ REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+}
+
+void enc401_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ // The naming of this field is confusing, what it means is the output mode of otg, which
+ // is the input mode of the dig
+ switch (pix_per_container) {
+ case 2:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x1);
+ break;
+ case 4:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x2);
+ break;
+ case 8:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x3);
+ break;
+ default:
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, 0x0);
+ break;
+ }
+}
+
+static bool is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
+{
+ bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+
+ two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !timing->dsc_cfg.ycbcr422_simple);
+ return two_pix;
+}
+
+void enc401_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+ uint32_t n_vid = 0x8000;
+ uint32_t m_vid;
+ uint32_t pix_per_container = 1;
+ uint64_t m_vid_l = n_vid;
+
+ /* YCbCr 4:2:0 or YCbCr4:2:2 simple + DSC: Computed VID_M will be 2X the input rate */
+ if (is_two_pixels_per_containter(&param->timing)) {
+ pix_per_container = 2;
+ }
+
+ /* M / N = Fstream / Flink
+ * m_vid / n_vid = pixel rate / link rate
+ */
+ m_vid_l *= param->timing.pix_clk_100hz / pix_per_container / 10;
+ m_vid_l = div_u64(m_vid_l,
+ param->link_settings.link_rate
+ * LINK_RATE_REF_FREQ_IN_KHZ);
+
+ m_vid = (uint32_t) m_vid_l;
+
+ /* enable auto measurement */
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+ /* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+ * therefore program initial value for Mvid and Nvid
+ */
+
+ REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+ REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+ /* reduce jitter based on read rate */
+ switch (param->pix_per_cycle) {
+ case 2:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x1);
+ break;
+ case 4:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x2);
+ break;
+ case 8:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x3);
+ break;
+ default:
+ REG_UPDATE(DP_VID_TIMING, DP_VID_N_INTERVAL, 0x0);
+ break;
+ }
+
+ REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 1);
+ }
+
+ /* make sure stream is disabled before resetting steer fifo */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
+
+ /* DIG_START is removed from the register spec */
+
+ /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+ * that it overflows during mode transition, and sometimes doesn't recover.
+ */
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+ udelay(10);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_ENABLE, 1);
+
+ REG_UPDATE_2(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 1, DP_VID_STREAM_DIS_DEFER, 2);
+ udelay(200);
+
+ /* DIG Resync FIFO now needs to be explicitly enabled
+ */
+ /* read start level = 0 will bring underflow / overflow and DIG_FIFO_ERROR = 1
+ * so set it to 1/2 full = 7 before reset as suggested by hardware team.
+ */
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1);
+
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0);
+
+ REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000);
+
+ REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
+
+ /* wait 100us for DIG/DP logic to prime
+ * (i.e. a few video lines)
+ */
+ udelay(100);
+
+ /* the hardware would start sending video at the start of the next DP
+ * frame (i.e. rising edge of the vblank).
+ * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+ * register has no effect on enable transition! HW always guarantees
+ * VID_STREAM enable at start of next frame, and this is not
+ * programmable
+ */
+
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_ENABLE_DP_VID_STREAM);
+}
+
+/* this function read dsc related register fields to be logged later in dcn10_log_hw_state
+ * into a dcn_dsc_state struct.
+ */
+void enc401_read_state(struct stream_encoder *enc, struct enc_state *s)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ //if dsc is enabled, continue to read
+ REG_GET(DP_PIXEL_FORMAT, PIXEL_ENCODING_TYPE, &s->dsc_mode);
+
+ if (s->dsc_mode) {
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, &s->sec_gsp_pps_line_num);
+
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, &s->vbid6_line_reference);
+ REG_GET(DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, &s->vbid6_line_num);
+
+ REG_GET(DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, &s->sec_gsp_pps_enable);
+ REG_GET(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, &s->sec_stream_enable);
+ }
+}
+
+void enc401_stream_encoder_enable(
+ struct stream_encoder *enc,
+ enum signal_type signal,
+ bool enable)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ if (enable) {
+ switch (signal) {
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ /* TMDS-DVI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 2);
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ /* TMDS-HDMI */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 3);
+ break;
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ /* DP MST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 5);
+ break;
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_VIRTUAL:
+ /* DP SST */
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_MODE, 0);
+ break;
+ default:
+ /* invalid mode ! */
+ ASSERT_CRITICAL(false);
+ }
+
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
+ } else {
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
+ }
+}
+
+void enc401_stream_encoder_dp_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ enum dc_color_space output_color_space,
+ bool use_vsc_sdp_for_colorimetry,
+ uint32_t enable_sdp_splitting)
+{
+ uint32_t h_active_start;
+ uint32_t v_active_start;
+ uint32_t misc0 = 0;
+ uint32_t misc1 = 0;
+ uint32_t h_blank;
+ uint32_t h_back_porch;
+ uint8_t colorimetry_bpc;
+ uint8_t dp_pixel_encoding = 0;
+ uint8_t dp_component_depth = 0;
+ uint8_t dp_translate_pixel_enc = 0;
+ // Fix set but not used warnings
+ //uint8_t dp_pixel_encoding_type = 0;
+ uint8_t dp_compressed_pixel_format = 0;
+
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ struct dc_crtc_timing hw_crtc_timing = *crtc_timing;
+
+ if (hw_crtc_timing.flags.INTERLACE) {
+ /*the input timing is in VESA spec format with Interlace flag =1*/
+ hw_crtc_timing.v_total /= 2;
+ hw_crtc_timing.v_border_top /= 2;
+ hw_crtc_timing.v_addressable /= 2;
+ hw_crtc_timing.v_border_bottom /= 2;
+ hw_crtc_timing.v_front_porch /= 2;
+ hw_crtc_timing.v_sync_width /= 2;
+ }
+
+
+ /* set pixel encoding */
+ switch (hw_crtc_timing.pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR422;
+ break;
+ case PIXEL_ENCODING_YCBCR444:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR444;
+
+ if (hw_crtc_timing.flags.Y_ONLY)
+ if (hw_crtc_timing.display_color_depth != COLOR_DEPTH_666)
+ /* HW testing only, no use case yet.
+ * Color depth of Y-only could be
+ * 8, 10, 12, 16 bits
+ */
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_Y_ONLY;
+
+ /* Note: DP_MSA_MISC1 bit 7 is the indicator
+ * of Y-only mode.
+ * This bit is set in HW if register
+ * DP_PIXEL_ENCODING is programmed to 0x4
+ */
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR420;
+ break;
+ default:
+ dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_RGB444;
+ break;
+ }
+
+ misc1 = REG_READ(DP_MSA_MISC);
+ /* For YCbCr420 and BT2020 Colorimetry Formats, VSC SDP shall be used.
+ * When MISC1, bit 6, is Set to 1, a Source device uses a VSC SDP to indicate the
+ * Pixel Encoding/Colorimetry Format and that a Sink device shall ignore MISC1, bit 7,
+ * and MISC0, bits 7:1 (MISC1, bit 7, and MISC0, bits 7:1, become "don't care").
+ */
+ if (use_vsc_sdp_for_colorimetry)
+ misc1 = misc1 | 0x40;
+ else
+ misc1 = misc1 & ~0x40;
+
+ /* set color depth */
+ switch (hw_crtc_timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC;
+ break;
+ case COLOR_DEPTH_888:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_8BPC;
+ break;
+ case COLOR_DEPTH_101010:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_10BPC;
+ break;
+ case COLOR_DEPTH_121212:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_12BPC;
+ break;
+ case COLOR_DEPTH_161616:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_16BPC;
+ break;
+ default:
+ dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC;
+ break;
+ }
+
+ if (hw_crtc_timing.flags.DSC) {
+ // Fix set but not used error
+ //dp_pixel_encoding_type = 1;
+ switch (hw_crtc_timing.pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR444:
+ dp_compressed_pixel_format = 0;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ dp_compressed_pixel_format = 1;
+ if (hw_crtc_timing.dsc_cfg.ycbcr422_simple)
+ dp_compressed_pixel_format = 0;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ dp_compressed_pixel_format = 1;
+ break;
+ default:
+ dp_compressed_pixel_format = 0;
+ break;
+ }
+ } else {
+ // Fix set but not used error
+ //dp_pixel_encoding_type = 0;
+ switch (dp_pixel_encoding) {
+ case DP_PIXEL_ENCODING_TYPE_RGB444:
+ dp_translate_pixel_enc = 0;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_YCBCR422:
+ dp_translate_pixel_enc = 1;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_YCBCR444:
+ dp_translate_pixel_enc = 0;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_Y_ONLY:
+ dp_translate_pixel_enc = 3;
+ break;
+ case DP_PIXEL_ENCODING_TYPE_YCBCR420:
+ dp_translate_pixel_enc = 2;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+ /* Set DP pixel encoding and component depth */
+ REG_UPDATE_4(DP_PIXEL_FORMAT,
+ PIXEL_ENCODING_TYPE, hw_crtc_timing.flags.DSC ? 1 : 0,
+ UNCOMPRESSED_PIXEL_FORMAT, dp_translate_pixel_enc,
+ UNCOMPRESSED_COMPONENT_DEPTH, dp_component_depth,
+ COMPRESSED_PIXEL_FORMAT, dp_compressed_pixel_format);
+
+ /* set dynamic range and YCbCr range */
+
+ switch (hw_crtc_timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ colorimetry_bpc = 0;
+ break;
+ case COLOR_DEPTH_888:
+ colorimetry_bpc = 1;
+ break;
+ case COLOR_DEPTH_101010:
+ colorimetry_bpc = 2;
+ break;
+ case COLOR_DEPTH_121212:
+ colorimetry_bpc = 3;
+ break;
+ default:
+ colorimetry_bpc = 0;
+ break;
+ }
+
+ misc0 = colorimetry_bpc << 5;
+
+ switch (output_color_space) {
+ case COLOR_SPACE_SRGB:
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ break;
+ case COLOR_SPACE_SRGB_LIMITED:
+ misc0 = misc0 | 0x8; /* bit3=1 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ break;
+ case COLOR_SPACE_YCBCR601:
+ case COLOR_SPACE_YCBCR601_LIMITED:
+ misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444)
+ misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+ break;
+ case COLOR_SPACE_YCBCR709:
+ case COLOR_SPACE_YCBCR709_LIMITED:
+ misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
+ misc1 = misc1 & ~0x80; /* bit7 = 0*/
+ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444)
+ misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+ break;
+ case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+ case COLOR_SPACE_2020_RGB_FULLRANGE:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
+ case COLOR_SPACE_XR_RGB:
+ case COLOR_SPACE_MSREF_SCRGB:
+ case COLOR_SPACE_ADOBERGB:
+ case COLOR_SPACE_DCIP3:
+ case COLOR_SPACE_XV_YCC_709:
+ case COLOR_SPACE_XV_YCC_601:
+ case COLOR_SPACE_DISPLAYNATIVE:
+ case COLOR_SPACE_DOLBYVISION:
+ case COLOR_SPACE_APPCTRL:
+ case COLOR_SPACE_CUSTOMPOINTS:
+ case COLOR_SPACE_UNKNOWN:
+ case COLOR_SPACE_YCBCR709_BLACK:
+ default:
+ /* do nothing */
+ break;
+ }
+
+ REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0);
+ REG_WRITE(DP_MSA_MISC, misc1); /* MSA_MISC1 */
+
+ /* dcn new register
+ * dc_crtc_timing is vesa dmt struct. data from edid
+ */
+ REG_SET_2(DP_MSA_TIMING_PARAM1, 0,
+ DP_MSA_HTOTAL, hw_crtc_timing.h_total,
+ DP_MSA_VTOTAL, hw_crtc_timing.v_total);
+
+ /* calculate from vesa timing parameters
+ * h_active_start related to leading edge of sync
+ */
+
+ h_blank = hw_crtc_timing.h_total - hw_crtc_timing.h_border_left -
+ hw_crtc_timing.h_addressable - hw_crtc_timing.h_border_right;
+
+ h_back_porch = h_blank - hw_crtc_timing.h_front_porch -
+ hw_crtc_timing.h_sync_width;
+
+ /* start at beginning of left border */
+ h_active_start = hw_crtc_timing.h_sync_width + h_back_porch;
+
+
+ v_active_start = hw_crtc_timing.v_total - hw_crtc_timing.v_border_top -
+ hw_crtc_timing.v_addressable - hw_crtc_timing.v_border_bottom -
+ hw_crtc_timing.v_front_porch;
+
+
+ /* start at beginning of left border */
+ REG_SET_2(DP_MSA_TIMING_PARAM2, 0,
+ DP_MSA_HSTART, h_active_start,
+ DP_MSA_VSTART, v_active_start);
+
+ REG_SET_4(DP_MSA_TIMING_PARAM3, 0,
+ DP_MSA_HSYNCWIDTH,
+ hw_crtc_timing.h_sync_width,
+ DP_MSA_HSYNCPOLARITY,
+ !hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY,
+ DP_MSA_VSYNCWIDTH,
+ hw_crtc_timing.v_sync_width,
+ DP_MSA_VSYNCPOLARITY,
+ !hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY);
+
+ /* HWDITH include border or overscan */
+ REG_SET_2(DP_MSA_TIMING_PARAM4, 0,
+ DP_MSA_HWIDTH, hw_crtc_timing.h_border_left +
+ hw_crtc_timing.h_addressable + hw_crtc_timing.h_border_right,
+ DP_MSA_VHEIGHT, hw_crtc_timing.v_border_top +
+ hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom);
+
+ REG_UPDATE(DP_SEC_FRAMING4,
+ DP_SST_SDP_SPLITTING, enable_sdp_splitting);
+}
+
+void enc401_stream_encoder_map_to_link(
+ struct stream_encoder *enc,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ REG_UPDATE(STREAM_MAPPER_CONTROL,
+ DIG_STREAM_LINK_TARGET, link_enc_inst);
+}
+
+static const struct stream_encoder_funcs dcn401_str_enc_funcs = {
+ .dp_set_odm_combine =
+ enc401_dp_set_odm_combine,
+ .dp_set_stream_attribute =
+ enc401_stream_encoder_dp_set_stream_attribute,
+ .hdmi_set_stream_attribute =
+ enc401_stream_encoder_hdmi_set_stream_attribute,
+ .dvi_set_stream_attribute =
+ enc401_stream_encoder_dvi_set_stream_attribute,
+ .set_throttled_vcp_size =
+ enc1_stream_encoder_set_throttled_vcp_size,
+ .update_hdmi_info_packets =
+ enc3_stream_encoder_update_hdmi_info_packets,
+ .stop_hdmi_info_packets =
+ enc3_stream_encoder_stop_hdmi_info_packets,
+ .update_dp_info_packets_sdp_line_num =
+ enc3_stream_encoder_update_dp_info_packets_sdp_line_num,
+ .update_dp_info_packets =
+ enc3_stream_encoder_update_dp_info_packets,
+ .stop_dp_info_packets =
+ enc1_stream_encoder_stop_dp_info_packets,
+ .dp_blank =
+ enc1_stream_encoder_dp_blank,
+ .dp_unblank =
+ enc401_stream_encoder_dp_unblank,
+ .audio_mute_control = enc3_audio_mute_control,
+
+ .dp_audio_setup = enc3_se_dp_audio_setup,
+ .dp_audio_enable = enc3_se_dp_audio_enable,
+ .dp_audio_disable = enc1_se_dp_audio_disable,
+
+ .hdmi_audio_setup = enc3_se_hdmi_audio_setup,
+ .hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+ .setup_stereo_sync = enc1_setup_stereo_sync,
+ .set_avmute = enc1_stream_encoder_set_avmute,
+ .dig_connect_to_otg = enc1_dig_connect_to_otg,
+ .dig_source_otg = enc1_dig_source_otg,
+
+ .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format,
+
+ .enc_read_state = enc401_read_state,
+ .dp_set_dsc_config = NULL,
+ .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet,
+ .set_dynamic_metadata = enc401_set_dynamic_metadata,
+ .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute,
+ .enable_stream = enc401_stream_encoder_enable,
+
+ .set_input_mode = enc401_set_dig_input_mode,
+ .enable_fifo = enc35_enable_fifo,
+ .disable_fifo = enc35_disable_fifo,
+ .map_stream_to_link = enc401_stream_encoder_map_to_link,
+};
+
+void dcn401_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask)
+{
+ enc1->base.funcs = &dcn401_str_enc_funcs;
+ enc1->base.ctx = ctx;
+ enc1->base.id = eng_id;
+ enc1->base.bp = bp;
+ enc1->base.vpg = vpg;
+ enc1->base.afmt = afmt;
+ enc1->regs = regs;
+ enc1->se_shift = se_shift;
+ enc1->se_mask = se_mask;
+ enc1->base.stream_enc_inst = vpg->inst;
+}
+
+void enc401_set_dynamic_metadata(struct stream_encoder *enc,
+ bool enable_dme,
+ uint32_t hubp_requestor_id,
+ enum dynamic_metadata_mode dmdata_mode)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+ if (enable_dme) {
+ REG_UPDATE_2(DME_CONTROL,
+ METADATA_HUBP_REQUESTOR_ID, hubp_requestor_id,
+ METADATA_STREAM_TYPE, (dmdata_mode == dmdata_dolby_vision) ? 1 : 0);
+
+ /* Use default line reference DP_SOF for bringup.
+ * Should use OTG_SOF for DRR cases
+ */
+ if (dmdata_mode == dmdata_dp)
+ REG_UPDATE_3(DP_SEC_METADATA_TRANSMISSION,
+ DP_SEC_METADATA_PACKET_ENABLE, 1,
+ DP_SEC_METADATA_PACKET_LINE_REFERENCE, 0,
+ DP_SEC_METADATA_PACKET_LINE, 20);
+ else {
+ REG_UPDATE_3(HDMI_METADATA_PACKET_CONTROL,
+ HDMI_METADATA_PACKET_ENABLE, 1,
+ HDMI_METADATA_PACKET_LINE_REFERENCE, 0,
+ HDMI_METADATA_PACKET_LINE, 2);
+
+ if (dmdata_mode == dmdata_dolby_vision)
+ REG_UPDATE(HDMI_CONTROL,
+ DOLBY_VISION_EN, 1);
+ }
+
+ REG_UPDATE(DME_CONTROL,
+ METADATA_ENGINE_EN, 1);
+ } else {
+ REG_UPDATE(DME_CONTROL,
+ METADATA_ENGINE_EN, 0);
+
+ if (dmdata_mode == dmdata_dp)
+ REG_UPDATE(DP_SEC_METADATA_TRANSMISSION,
+ DP_SEC_METADATA_PACKET_ENABLE, 0);
+ else {
+ REG_UPDATE(HDMI_METADATA_PACKET_CONTROL,
+ HDMI_METADATA_PACKET_ENABLE, 0);
+ REG_UPDATE(HDMI_CONTROL,
+ DOLBY_VISION_EN, 0);
+ }
+ }
+}
+void enc401_stream_encoder_set_stream_attribute_helper(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_crtc_timing *crtc_timing)
+{
+ switch (crtc_timing->pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 1);
+ break;
+ default:
+ REG_UPDATE(HDMI_CONTROL, TMDS_PIXEL_ENCODING, 0);
+ break;
+ }
+ REG_UPDATE(HDMI_CONTROL, TMDS_COLOR_FORMAT, 0);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h
new file mode 100644
index 000000000000..d6b00cd246b1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2021 - Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DIO_STREAM_ENCODER_DCN401_H__
+#define __DC_DIO_STREAM_ENCODER_DCN401_H__
+
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "stream_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
+
+#define SE_COMMON_MASK_SH_LIST_DCN401(mask_sh)\
+ SE_SF(DP0_DP_PIXEL_FORMAT, PIXEL_ENCODING_TYPE, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, UNCOMPRESSED_PIXEL_FORMAT, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, UNCOMPRESSED_COMPONENT_DEPTH, mask_sh),\
+ SE_SF(DP0_DP_PIXEL_FORMAT, COMPRESSED_PIXEL_FORMAT, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_NO_EXTRA_NULL_PACKET_FILLED, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_ACP_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+ SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+ SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+ SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+ SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+ SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+ SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+ SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+ SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+ SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+ SE_SF(DIG1_HDMI_CONTROL, TMDS_PIXEL_ENCODING, mask_sh),\
+ SE_SF(DIG1_HDMI_CONTROL, TMDS_COLOR_FORMAT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+ SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+ SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+ SE_SF(DP0_DP_VID_TIMING, DP_VID_N_INTERVAL, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_SOURCE_SELECT, mask_sh), \
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC2_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC3_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC4_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC5_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC6_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC7_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC8_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC9_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC10_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC11_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC12_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC13_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_CONT, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL6, HDMI_GENERIC14_SEND, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC0_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL1, HDMI_GENERIC1_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC2_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL2, HDMI_GENERIC3_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC4_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL3, HDMI_GENERIC5_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC6_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL4, HDMI_GENERIC7_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC8_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL7, HDMI_GENERIC9_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC10_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL8, HDMI_GENERIC11_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC12_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL9, HDMI_GENERIC13_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL10, HDMI_GENERIC14_LINE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_MSA_VBID_MISC, DP_VBID6_LINE_NUM, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_ENGINE_EN, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_HUBP_REQUESTOR_ID, mask_sh),\
+ SE_SF(DME0_DME_CONTROL, METADATA_STREAM_TYPE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DP0_DP_SEC_METADATA_TRANSMISSION, DP_SEC_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\
+ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\
+ SE_SF(DIG0_HDMI_CONTROL, DOLBY_VISION_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_EN_CNTL, DIG_FE_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_MODE, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CLK_CNTL, DIG_FE_SOFT_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+ SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\
+ SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_PER_CYCLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET, mask_sh),\
+ SE_SF(DIG0_DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, mask_sh),\
+ SE_SF(DIG0_STREAM_MAPPER_CONTROL, DIG_STREAM_LINK_TARGET, mask_sh),
+
+
+void dcn401_dio_stream_encoder_construct(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_context *ctx,
+ struct dc_bios *bp,
+ enum engine_id eng_id,
+ struct vpg *vpg,
+ struct afmt *afmt,
+ const struct dcn10_stream_enc_registers *regs,
+ const struct dcn10_stream_encoder_shift *se_shift,
+ const struct dcn10_stream_encoder_mask *se_mask);
+
+void enc401_set_dynamic_metadata(struct stream_encoder *enc,
+ bool enable_dme,
+ uint32_t hubp_requestor_id,
+ enum dynamic_metadata_mode dmdata_mode);
+void enc401_stream_encoder_set_stream_attribute_helper(
+ struct dcn10_stream_encoder *enc1,
+ struct dc_crtc_timing *crtc_timing);
+void enc401_stream_encoder_dp_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ enum dc_color_space output_color_space,
+ bool use_vsc_sdp_for_colorimetry,
+ uint32_t enable_sdp_splitting);
+void enc401_stream_encoder_dvi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ bool is_dual_link);
+void enc401_stream_encoder_dp_unblank(
+ struct dc_link *link,
+ struct stream_encoder *enc,
+ const struct encoder_unblank_param *param);
+void enc401_stream_encoder_enable(
+ struct stream_encoder *enc,
+ enum signal_type signal,
+ bool enable);
+void enc401_set_dig_input_mode(struct stream_encoder *enc, unsigned int pix_per_container);
+void enc401_stream_encoder_map_to_link(
+ struct stream_encoder *enc,
+ uint32_t stream_enc_inst,
+ uint32_t link_enc_inst);
+void enc401_read_state(struct stream_encoder *enc, struct enc_state *s);
+void enc401_stream_encoder_hdmi_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing,
+ int actual_pix_clk_khz,
+ bool enable_audio);
+#endif /* __DC_DIO_STREAM_ENCODER_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
index 4229369c57f4..f4d3f04ec857 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_cp_psp.h
@@ -26,6 +26,9 @@
#ifndef DM_CP_PSP_IF__H
#define DM_CP_PSP_IF__H
+/*
+ * Interface to CPLIB/PSP to enable ASSR
+ */
struct dc_link;
struct cp_psp_stream_config {
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 7ce9a5b6c33b..9d160b39e8c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -36,6 +36,7 @@
struct dc_dp_mst_stream_allocation_table;
struct aux_payload;
enum aux_return_code_type;
+enum set_config_status;
/*
* Allocate memory accessible by the GPU
@@ -103,10 +104,16 @@ enum act_return_status dm_helpers_dp_mst_poll_for_allocation_change_trigger(
/*
* Sends ALLOCATE_PAYLOAD message.
*/
-bool dm_helpers_dp_mst_send_payload_allocation(
+void dm_helpers_dp_mst_send_payload_allocation(
struct dc_context *ctx,
- const struct dc_stream_state *stream,
- bool enable);
+ const struct dc_stream_state *stream);
+
+/*
+ * Update mst manager relevant variables
+ */
+void dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream);
bool dm_helpers_dp_mst_start_top_mgr(
struct dc_context *ctx,
@@ -146,11 +153,24 @@ bool dm_helpers_submit_i2c(
const struct dc_link *link,
struct i2c_command *cmd);
+bool dm_helpers_execute_fused_io(
+ struct dc_context *ctx,
+ struct dc_link *link,
+ union dmub_rb_cmd *commands,
+ uint8_t count,
+ uint32_t timeout_us
+);
+
bool dm_helpers_dp_write_dsc_enable(
struct dc_context *ctx,
const struct dc_stream_state *stream,
bool enable
);
+
+bool dm_helpers_dp_write_hblank_reduction(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream);
+
bool dm_helpers_is_dp_sink_present(
struct dc_link *link);
@@ -194,7 +214,7 @@ int dm_helper_dmub_aux_transfer_sync(
const struct dc_link *link,
struct aux_payload *payload,
enum aux_return_code_type *operation_result);
-enum set_config_status;
+
int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
const struct dc_link *link,
struct set_config_cmd_payload *payload,
@@ -203,4 +223,7 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link);
enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid);
+bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream);
+bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream);
+
#endif /* __DM_HELPERS__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4440d08743aa..b0e17a594ec3 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -40,8 +40,9 @@ enum pp_smu_ver {
PP_SMU_UNSUPPORTED,
PP_SMU_VER_RV,
PP_SMU_VER_NV,
- PP_SMU_VER_RN,
+ PP_SMU_VER_RN,
+ PP_SMU_VER_VG,
PP_SMU_VER_MAX
};
@@ -247,6 +248,7 @@ struct pp_smu_funcs_nv {
#define PP_SMU_NUM_MEMCLK_DPM_LEVELS 4
#define PP_SMU_NUM_DCLK_DPM_LEVELS 8
#define PP_SMU_NUM_VCLK_DPM_LEVELS 8
+#define PP_SMU_NUM_VPECLK_DPM_LEVELS 8
struct dpm_clock {
uint32_t Freq; // In MHz
@@ -262,6 +264,7 @@ struct dpm_clocks {
struct dpm_clock MemClocks[PP_SMU_NUM_MEMCLK_DPM_LEVELS];
struct dpm_clock VClocks[PP_SMU_NUM_VCLK_DPM_LEVELS];
struct dpm_clock DClocks[PP_SMU_NUM_DCLK_DPM_LEVELS];
+ struct dpm_clock VPEClocks[PP_SMU_NUM_VPECLK_DPM_LEVELS];
};
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index d0eed3b4771e..fbbf9c757b3c 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -143,7 +143,7 @@ void generic_reg_wait(const struct dc_context *ctx,
unsigned int delay_between_poll_us, unsigned int time_out_num_tries,
const char *func_name, int line);
-unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...);
+unsigned int snprintf_count(char *pBuf, unsigned int bufSize, const char *fmt, ...);
/* These macros need to be used with soc15 registers in order to retrieve
* the actual offset.
@@ -275,12 +275,30 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc
#define PERF_TRACE_CTX(__CTX) dm_perf_trace_timestamp(__func__, __LINE__, __CTX)
/*
+ * SMU message tracing
+ */
+void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx);
+void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx);
+
+#define TRACE_SMU_MSG_DELAY(msg_id, param_in, delay, ctx) dm_trace_smu_enter(msg_id, param_in, delay, ctx)
+#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx)
+#define TRACE_SMU_MSG_ENTER(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx)
+#define TRACE_SMU_MSG_EXIT(success, response, ctx) dm_trace_smu_exit(success, response, ctx)
+
+/*
* DMUB Interfaces
*/
bool dm_execute_dmub_cmd(const struct dc_context *ctx, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
bool dm_execute_dmub_cmd_list(const struct dc_context *ctx, unsigned int count, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type);
/*
+ * ACPI Interfaces
+ */
+void dm_acpi_process_phy_transition_interlock(
+ const struct dc_context *ctx,
+ struct dm_process_phy_transition_init_params process_phy_transition_init_params);
+
+/*
* Debug and verification hooks
*/
@@ -294,4 +312,6 @@ void dm_dtn_log_end(struct dc_context *ctx,
char *dce_version_to_string(const int version);
+bool dc_supports_vrr(const enum dce_version v);
+
#endif /* __DM_SERVICES_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
index facf269c4326..3b093b8699ab 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h
@@ -127,7 +127,7 @@ struct dm_pp_single_disp_config {
uint32_t src_height;
uint32_t src_width;
uint32_t v_refresh;
- uint32_t sym_clock; /* HDMI only */
+ uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */
struct dc_link_settings link_settings; /* DP only */
};
@@ -275,4 +275,30 @@ enum dm_dmub_wait_type {
DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY,
};
+enum dm_acpi_transition_link_type {
+ hdmi_tmds,
+ hdmi_frl,
+ dp_8b_10b,
+ dp_128b_132b,
+ none
+};
+
+struct dm_process_phy_transition_init_params {
+ uint32_t phy_id;
+ uint8_t action;
+ uint32_t sym_clock_10khz;
+ enum signal_type signal;
+ enum dc_lane_count display_port_lanes_count;
+ enum dc_link_rate display_port_link_rate;
+ uint32_t transition_bitmask;
+ uint8_t hdmi_frl_num_lanes;
+};
+
+struct dm_process_phy_transition_input_params {
+ uint32_t phy_id;
+ uint32_t transition_id;
+ uint32_t phy_configuration;
+ uint32_t data_rate;
+};
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 77cf5545c94c..b357683b4255 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -25,54 +25,30 @@
# It provides the general basic services required by other DAL
# subcomponents.
-ifdef CONFIG_X86
-dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
-dml_ccflags := $(dml_ccflags-y) -msse
-endif
-
-ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_ARM64
-dml_rcflags := -mgeneral-regs-only
-endif
-
-ifdef CONFIG_LOONGARCH
-dml_ccflags := -mfpu=64
-dml_rcflags := -msoft-float
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifneq ($(call gcc-min-version, 70100),y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml_ccflags += -mpreferred-stack-boundary=4
-else
-dml_ccflags += -msse2
-endif
-endif
+dml_ccflags := $(CC_FLAGS_FPU)
+dml_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
-frame_warn_flag := -Wframe-larger-than=2048
+ ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+ frame_warn_limit := 3072
+ else
+ frame_warn_limit := 2048
+ endif
+
+ ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y)
+ frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit)
+ endif
endif
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
@@ -87,6 +63,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(fram
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn35/dcn35_fpu.o := $(dml_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn351/dcn351_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags)
@@ -109,6 +87,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
@@ -120,6 +100,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn35/dcn35_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn351/dcn351_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags)
@@ -132,8 +114,6 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
-DML = calcs/dce_calcs.o calcs/custom_float.o calcs/bw_fixed.o
-
ifdef CONFIG_DRM_AMD_DC_FP
DML += display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o
DML += dcn10/dcn10_fpu.o
@@ -152,6 +132,8 @@ DML += dcn301/dcn301_fpu.o
DML += dcn302/dcn302_fpu.o
DML += dcn303/dcn303_fpu.o
DML += dcn314/dcn314_fpu.o
+DML += dcn35/dcn35_fpu.o
+DML += dcn351/dcn351_fpu.o
DML += dsc/rc_calc_fpu.o
DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
index 50b0434354f8..74962791302f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
@@ -30,7 +30,7 @@
#include "dcn_calc_auto.h"
#include "dal_asic_id.h"
#include "resource.h"
-#include "dcn10/dcn10_resource.h"
+#include "resource/dcn10/dcn10_resource.h"
#include "dcn10/dcn10_hubbub.h"
#include "dml/dml1_display_rq_dlg_calc.h"
@@ -690,7 +690,7 @@ static void hack_disable_optional_pipe_split(struct dcn_bw_internal_vars *v)
static void hack_force_pipe_split(struct dcn_bw_internal_vars *v,
unsigned int pixel_rate_100hz)
{
- float pixel_rate_mhz = pixel_rate_100hz / 10000;
+ float pixel_rate_mhz = pixel_rate_100hz / 10000.0;
/*
* force enabling pipe split by lower dpp clock for DPM0 to just
@@ -748,7 +748,7 @@ static unsigned int get_highest_allowed_voltage_level(bool is_vmin_only_asic)
bool dcn_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
/*
* we want a breakdown of the various stages of validation, which the
@@ -1119,7 +1119,7 @@ bool dcn_validate_bandwidth(
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (v->voltage_level != number_of_states_plus_one && !fast_validate) {
+ if (v->voltage_level != number_of_states_plus_one && validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) {
float bw_consumed = v->total_bandwidth_consumed_gbyte_per_second;
if (bw_consumed < v->fabric_and_dram_bandwidth_vmin0p65)
@@ -1286,7 +1286,7 @@ bool dcn_validate_bandwidth(
}
} else if (v->voltage_level == number_of_states_plus_one) {
BW_VAL_TRACE_SKIP(fail);
- } else if (fast_validate) {
+ } else if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
}
@@ -1312,138 +1312,6 @@ bool dcn_validate_bandwidth(
return false;
}
-static unsigned int dcn_find_normalized_clock_vdd_Level(
- const struct dc *dc,
- enum dm_pp_clock_type clocks_type,
- int clocks_in_khz)
-{
- int vdd_level = dcn_bw_v_min0p65;
-
- if (clocks_in_khz == 0)/*todo some clock not in the considerations*/
- return vdd_level;
-
- switch (clocks_type) {
- case DM_PP_CLOCK_TYPE_DISPLAY_CLK:
- if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmax0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vnom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmin0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
- case DM_PP_CLOCK_TYPE_DISPLAYPHYCLK:
- if (clocks_in_khz > dc->dcn_soc->phyclkv_max0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_nom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_mid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_min0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- case DM_PP_CLOCK_TYPE_DPPCLK:
- if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmax0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vnom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmin0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- case DM_PP_CLOCK_TYPE_MEMORY_CLK:
- {
- unsigned factor = (ddr4_dram_factor_single_Channel * dc->dcn_soc->number_of_channels);
-
- if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9*1000000/factor) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8*1000000/factor) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72*1000000/factor) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65*1000000/factor) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- }
- break;
-
- case DM_PP_CLOCK_TYPE_DCFCLK:
- if (clocks_in_khz > dc->dcn_soc->dcfclkv_max0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_nom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_mid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_min0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- default:
- break;
- }
- return vdd_level;
-}
-
-unsigned int dcn_find_dcfclk_suits_all(
- const struct dc *dc,
- struct dc_clocks *clocks)
-{
- unsigned vdd_level, vdd_level_temp;
- unsigned dcf_clk;
-
- /*find a common supported voltage level*/
- vdd_level = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DISPLAY_CLK, clocks->dispclk_khz);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DISPLAYPHYCLK, clocks->phyclk_khz);
-
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DPPCLK, clocks->dppclk_khz);
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
-
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_MEMORY_CLK, clocks->fclk_khz);
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DCFCLK, clocks->dcfclk_khz);
-
- /*find that level conresponding dcfclk*/
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- if (vdd_level == dcn_bw_v_max0p91) {
- BREAK_TO_DEBUGGER();
- dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
- } else if (vdd_level == dcn_bw_v_max0p9)
- dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
- else if (vdd_level == dcn_bw_v_nom0p8)
- dcf_clk = dc->dcn_soc->dcfclkv_nom0p8*1000;
- else if (vdd_level == dcn_bw_v_mid0p72)
- dcf_clk = dc->dcn_soc->dcfclkv_mid0p72*1000;
- else
- dcf_clk = dc->dcn_soc->dcfclkv_min0p65*1000;
-
- DC_LOG_BANDWIDTH_CALCS("\tdcf_clk for voltage = %d\n", dcf_clk);
- return dcf_clk;
-}
-
void dcn_bw_update_from_pplib_fclks(
struct dc *dc,
struct dm_pp_clock_levels_with_voltage *fclks)
@@ -1453,10 +1321,9 @@ void dcn_bw_update_from_pplib_fclks(
ASSERT(fclks->num_levels);
vmin0p65_idx = 0;
- vmid0p72_idx = fclks->num_levels -
- (fclks->num_levels > 2 ? 3 : (fclks->num_levels > 1 ? 2 : 1));
- vnom0p8_idx = fclks->num_levels - (fclks->num_levels > 1 ? 2 : 1);
- vmax0p9_idx = fclks->num_levels - 1;
+ vmid0p72_idx = fclks->num_levels > 2 ? fclks->num_levels - 3 : 0;
+ vnom0p8_idx = fclks->num_levels > 1 ? fclks->num_levels - 2 : 0;
+ vmax0p9_idx = fclks->num_levels > 0 ? fclks->num_levels - 1 : 0;
dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 =
32 * (fclks->data[vmin0p65_idx].clocks_in_khz / 1000.0) / 1000.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 2cbdd75429ff..6e669a2c5b2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -36,7 +36,7 @@
* Define the maximum amount of states supported by the ASIC. Every ASIC has a
* specific number of states; this macro defines the maximum number of states.
*/
-#define DC__VOLTAGE_STATES 20
+#define DC__VOLTAGE_STATES 40
#define DC__NUM_DPP__4 1
#define DC__NUM_DPP__0_PRESENT 1
#define DC__NUM_DPP__1_PRESENT 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index 63219ecd8478..1bf6b12f5663 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -29,4 +29,4 @@
void dcn10_resource_construct_fp(struct dc *dc);
-#endif /* __DCN20_FPU_H__ */
+#endif /* __DCN10_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index 5805fb02af14..7aaf13bbd4e4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -30,10 +30,12 @@
#include "dcn20/dcn20_resource.h"
#include "dcn21/dcn21_resource.h"
#include "clk_mgr/dcn21/rn_clk_mgr.h"
-
-#include "link.h"
+#include "link_service.h"
#include "dcn20_fpu.h"
+#include "dc_state_priv.h"
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
#ifndef MAX
@@ -438,7 +440,115 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = {
.use_urgent_burst_bw = 0
};
-struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = { 0 };
+struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv12_soc = {
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 560.0,
+ .fabricclk_mhz = 560.0,
+ .dispclk_mhz = 513.0,
+ .dppclk_mhz = 513.0,
+ .phyclk_mhz = 540.0,
+ .socclk_mhz = 560.0,
+ .dscclk_mhz = 171.0,
+ .dram_speed_mts = 1069.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 694.0,
+ .fabricclk_mhz = 694.0,
+ .dispclk_mhz = 642.0,
+ .dppclk_mhz = 642.0,
+ .phyclk_mhz = 600.0,
+ .socclk_mhz = 694.0,
+ .dscclk_mhz = 214.0,
+ .dram_speed_mts = 1324.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 875.0,
+ .fabricclk_mhz = 875.0,
+ .dispclk_mhz = 734.0,
+ .dppclk_mhz = 734.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 875.0,
+ .dscclk_mhz = 245.0,
+ .dram_speed_mts = 1670.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 1000.0,
+ .fabricclk_mhz = 1000.0,
+ .dispclk_mhz = 1100.0,
+ .dppclk_mhz = 1100.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1000.0,
+ .dscclk_mhz = 367.0,
+ .dram_speed_mts = 2000.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 2000.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 1200.0,
+ .dispclk_mhz = 1284.0,
+ .dppclk_mhz = 1284.0,
+ .phyclk_mhz = 810.0,
+ .socclk_mhz = 1200.0,
+ .dscclk_mhz = 428.0,
+ .dram_speed_mts = 2000.0,
+ },
+ },
+
+ .num_states = 5,
+ .sr_exit_time_us = 1.9,
+ .sr_enter_plus_exit_time_us = 4.4,
+ .urgent_latency_us = 3.0,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 40.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 40.0,
+ .max_avg_sdp_bw_use_normal_percent = 40.0,
+ .max_avg_dram_bw_use_normal_percent = 40.0,
+ .writeback_latency_us = 12.0,
+ .ideal_dram_bw_after_urgent_percent = 40.0,
+ .max_request_size_bytes = 256,
+ .dram_channel_width_bytes = 16,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .dcn_downspread_percent = 0.5,
+ .downspread_percent = 0.5,
+ .dram_page_open_time_ns = 50.0,
+ .dram_rw_turnaround_time_ns = 17.5,
+ .dram_return_buffer_per_channel_bytes = 8192,
+ .round_trip_ping_latency_dcfclk_cycles = 131,
+ .urgent_out_of_order_return_per_channel_bytes = 4096,
+ .channel_interleave_bytes = 256,
+ .num_banks = 8,
+ .num_chans = 16,
+ .vmm_page_size_bytes = 4096,
+ .dram_clock_change_latency_us = 45.0,
+ .writeback_dram_clock_change_latency_us = 23.0,
+ .return_bus_width_bytes = 64,
+ .dispclk_dppclk_vco_speed_mhz = 3850,
+ .xfc_bus_transport_time_us = 20,
+ .xfc_xbuf_latency_tolerance_us = 50,
+ .use_urgent_burst_bw = 0,
+};
struct _vcs_dpi_ip_params_st dcn2_1_ip = {
.odm_capable = 1,
@@ -948,10 +1058,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc
{
int plane_count;
int i;
- unsigned int min_dst_y_next_start_us;
plane_count = 0;
- min_dst_y_next_start_us = 0;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
if (context->res_ctx.pipe_ctx[i].plane_state)
plane_count++;
@@ -973,29 +1081,21 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc
else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
struct dc_link *link = context->streams[0]->sink->link;
struct dc_stream_status *stream_status = &context->stream_status[0];
- struct dc_stream_state *current_stream = context->streams[0];
int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
- bool is_pwrseq0 = link->link_index == 0;
- bool isFreesyncVideo;
-
- isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max;
- isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) {
- min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us;
- break;
- }
- }
+ bool is_pwrseq0 = (link && link->link_index == 0);
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
/* Don't support multi-plane configurations */
if (stream_status->plane_count > 1)
return DCN_ZSTATE_SUPPORT_DISALLOW;
- if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000))
+ if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
return DCN_ZSTATE_SUPPORT_ALLOW;
- else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr)
- return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
+ else if (is_pwrseq0 && (is_psr || is_replay))
+ return DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY;
else
return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY : DCN_ZSTATE_SUPPORT_DISALLOW;
} else {
@@ -1031,7 +1131,8 @@ static void dcn20_adjust_freesync_v_startup(
patched_crtc_timing.v_addressable -
patched_crtc_timing.v_border_top;
- newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start);
+ /* The newVStartUp is 1 line before vsync point */
+ newVstartup = asic_blank_end + 1;
*vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start);
}
@@ -1085,7 +1186,7 @@ void dcn20_calculate_dlg_params(struct dc *dc,
pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
- if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1213,7 +1314,7 @@ static void swizzle_to_dml_params(
int dcn20_populate_dml_pipes_from_context(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int pipe_cnt, i;
bool synchronized_vblank = true;
@@ -1310,11 +1411,11 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc,
pipes[pipe_cnt].dout.is_virtual = 0;
pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
- switch (resource_get_num_odm_splits(&res_ctx->pipe_ctx[i])) {
- case 1:
+ switch (resource_get_odm_slice_count(&res_ctx->pipe_ctx[i])) {
+ case 2:
pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_2to1;
break;
- case 3:
+ case 4:
pipes[pipe_cnt].pipe.dest.odm_combine = dm_odm_combine_mode_4to1;
break;
default:
@@ -1435,7 +1536,7 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc,
*/
if (res_ctx->pipe_ctx[i].plane_state &&
(res_ctx->pipe_ctx[i].plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
- res_ctx->pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM))
+ dc_state_get_pipe_subvp_type(context, &res_ctx->pipe_ctx[i]) == SUBVP_PHANTOM))
pipes[pipe_cnt].pipe.src.num_cursors = 0;
else
pipes[pipe_cnt].pipe.src.num_cursors = dc->dml.ip.number_of_cursors;
@@ -1461,6 +1562,8 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc,
pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width;
pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256;
pipes[pipe_cnt].pipe.src.source_format = dm_444_32;
+ pipes[pipe_cnt].pipe.src.cur0_src_width = 0;
+ pipes[pipe_cnt].pipe.src.cur1_src_width = 0;
pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/
pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/
pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/
@@ -1629,7 +1732,7 @@ void dcn20_calculate_wm(struct dc *dc, struct dc_state *context,
int *out_pipe_cnt,
int *pipe_split_from,
int vlevel,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int pipe_cnt, i, pipe_idx;
@@ -1676,10 +1779,10 @@ void dcn20_calculate_wm(struct dc *dc, struct dc_state *context,
if (pipe_cnt != pipe_idx) {
if (dc->res_pool->funcs->populate_dml_pipes)
pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
- context, pipes, fast_validate);
+ context, pipes, validate_mode);
else
pipe_cnt = dcn20_populate_dml_pipes_from_context(dc,
- context, pipes, fast_validate);
+ context, pipes, validate_mode);
}
*out_pipe_cnt = pipe_cnt;
@@ -1781,10 +1884,10 @@ void dcn20_update_bounding_box(struct dc *dc,
bb->clock_limits[i].fabricclk_mhz = (min_fclk_required_by_uclk < min_dcfclk) ?
min_dcfclk : min_fclk_required_by_uclk;
- bb->clock_limits[i].socclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000) ?
+ bb->clock_limits[i].socclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->socClockInKhz / 1000.0) ?
max_clocks->socClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
- bb->clock_limits[i].dcfclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000) ?
+ bb->clock_limits[i].dcfclk_mhz = (bb->clock_limits[i].fabricclk_mhz > max_clocks->dcfClockInKhz / 1000.0) ?
max_clocks->dcfClockInKhz / 1000 : bb->clock_limits[i].fabricclk_mhz;
bb->clock_limits[i].dispclk_mhz = max_clocks->displayClockInKhz / 1000;
@@ -1816,35 +1919,35 @@ void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb,
// First pass - cap all clocks higher than the reported max
for (i = 0; i < bb->num_states; i++) {
- if ((bb->clock_limits[i].dcfclk_mhz > (max_clocks.dcfClockInKhz / 1000))
+ if ((bb->clock_limits[i].dcfclk_mhz > (max_clocks.dcfClockInKhz / 1000.0))
&& max_clocks.dcfClockInKhz != 0)
bb->clock_limits[i].dcfclk_mhz = (max_clocks.dcfClockInKhz / 1000);
- if ((bb->clock_limits[i].dram_speed_mts > (max_clocks.uClockInKhz / 1000) * 16)
+ if ((bb->clock_limits[i].dram_speed_mts > (max_clocks.uClockInKhz / 1000.0) * 16)
&& max_clocks.uClockInKhz != 0)
bb->clock_limits[i].dram_speed_mts = (max_clocks.uClockInKhz / 1000) * 16;
- if ((bb->clock_limits[i].fabricclk_mhz > (max_clocks.fabricClockInKhz / 1000))
+ if ((bb->clock_limits[i].fabricclk_mhz > (max_clocks.fabricClockInKhz / 1000.0))
&& max_clocks.fabricClockInKhz != 0)
bb->clock_limits[i].fabricclk_mhz = (max_clocks.fabricClockInKhz / 1000);
- if ((bb->clock_limits[i].dispclk_mhz > (max_clocks.displayClockInKhz / 1000))
+ if ((bb->clock_limits[i].dispclk_mhz > (max_clocks.displayClockInKhz / 1000.0))
&& max_clocks.displayClockInKhz != 0)
bb->clock_limits[i].dispclk_mhz = (max_clocks.displayClockInKhz / 1000);
- if ((bb->clock_limits[i].dppclk_mhz > (max_clocks.dppClockInKhz / 1000))
+ if ((bb->clock_limits[i].dppclk_mhz > (max_clocks.dppClockInKhz / 1000.0))
&& max_clocks.dppClockInKhz != 0)
bb->clock_limits[i].dppclk_mhz = (max_clocks.dppClockInKhz / 1000);
- if ((bb->clock_limits[i].phyclk_mhz > (max_clocks.phyClockInKhz / 1000))
+ if ((bb->clock_limits[i].phyclk_mhz > (max_clocks.phyClockInKhz / 1000.0))
&& max_clocks.phyClockInKhz != 0)
bb->clock_limits[i].phyclk_mhz = (max_clocks.phyClockInKhz / 1000);
- if ((bb->clock_limits[i].socclk_mhz > (max_clocks.socClockInKhz / 1000))
+ if ((bb->clock_limits[i].socclk_mhz > (max_clocks.socClockInKhz / 1000.0))
&& max_clocks.socClockInKhz != 0)
bb->clock_limits[i].socclk_mhz = (max_clocks.socClockInKhz / 1000);
- if ((bb->clock_limits[i].dscclk_mhz > (max_clocks.dscClockInKhz / 1000))
+ if ((bb->clock_limits[i].dscclk_mhz > (max_clocks.dscClockInKhz / 1000.0))
&& max_clocks.dscClockInKhz != 0)
bb->clock_limits[i].dscclk_mhz = (max_clocks.dscClockInKhz / 1000);
}
@@ -1923,7 +2026,7 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st
}
static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes)
{
bool out = false;
@@ -1932,12 +2035,11 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
int vlevel = 0;
int pipe_split_from[MAX_PIPES];
int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
DC_LOGGER_INIT(dc->ctx->logger);
BW_VAL_TRACE_COUNT();
- out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate);
+ out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, validate_mode);
if (pipe_cnt == 0)
goto validate_out;
@@ -1947,12 +2049,12 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (fast_validate) {
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
goto validate_out;
}
- dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
+ dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, validate_mode);
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
BW_VAL_TRACE_END_WATERMARKS();
@@ -1967,16 +2069,14 @@ validate_fail:
out = false;
validate_out:
- kfree(pipes);
BW_VAL_TRACE_FINISH();
return out;
}
-bool dcn20_validate_bandwidth_fp(struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
+bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes)
{
bool voltage_supported = false;
bool full_pstate_supported = false;
@@ -1994,12 +2094,11 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc,
/*Unsafe due to current pipe merge and split logic*/
ASSERT(context != dc->current_state);
- if (fast_validate) {
- return dcn20_validate_bandwidth_internal(dc, context, true);
- }
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ return dcn20_validate_bandwidth_internal(dc, context, validate_mode, pipes);
// Best case, we support full UCLK switch latency
- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING, pipes);
full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 ||
@@ -2011,7 +2110,8 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc,
// Fallback: Try to only support G6 temperature read latency
context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us;
- voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
+ memset(pipes, 0, dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st));
+ voltage_supported = dcn20_validate_bandwidth_internal(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING, pipes);
dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) {
@@ -2054,14 +2154,14 @@ void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v,
int dcn21_populate_dml_pipes_from_context(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
uint32_t pipe_cnt;
int i;
dc_assert_fp_enabled();
- pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+ pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
for (i = 0; i < pipe_cnt; i++) {
@@ -2137,7 +2237,7 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context,
int *out_pipe_cnt,
int *pipe_split_from,
int vlevel_req,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int pipe_cnt, i, pipe_idx;
int vlevel, vlevel_max;
@@ -2179,10 +2279,10 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context,
if (pipe_cnt != pipe_idx) {
if (dc->res_pool->funcs->populate_dml_pipes)
pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc,
- context, pipes, fast_validate);
+ context, pipes, validate_mode);
else
pipe_cnt = dcn21_populate_dml_pipes_from_context(dc,
- context, pipes, fast_validate);
+ context, pipes, validate_mode);
}
*out_pipe_cnt = pipe_cnt;
@@ -2216,9 +2316,8 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context,
&context->bw_ctx.dml, pipes, pipe_cnt);
}
-bool dcn21_validate_bandwidth_fp(struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
+bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes)
{
bool out = false;
@@ -2227,7 +2326,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
int vlevel = 0;
int pipe_split_from[MAX_PIPES];
int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
DC_LOGGER_INIT(dc->ctx->logger);
BW_VAL_TRACE_COUNT();
@@ -2237,7 +2335,7 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
/*Unsafe due to current pipe merge and split logic*/
ASSERT(context != dc->current_state);
- out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate);
+ out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, validate_mode);
if (pipe_cnt == 0)
goto validate_out;
@@ -2247,12 +2345,12 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (fast_validate) {
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
goto validate_out;
}
- dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
+ dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, validate_mode);
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
BW_VAL_TRACE_END_WATERMARKS();
@@ -2267,7 +2365,6 @@ validate_fail:
out = false;
validate_out:
- kfree(pipes);
BW_VAL_TRACE_FINISH();
@@ -2276,7 +2373,7 @@ validate_out:
static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_limit_table *clk_table, unsigned int high_voltage_lvl)
{
- struct _vcs_dpi_voltage_scaling_st low_pstate_lvl;
+ struct _vcs_dpi_voltage_scaling_st low_pstate_lvl = {0};
int i;
low_pstate_lvl.state = 1;
@@ -2381,7 +2478,7 @@ void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc,
int pipe_cnt, i, j;
double max_calc_writeback_dispclk;
double writeback_dispclk;
- struct writeback_st dout_wb;
+ struct writeback_st dout_wb = {0};
dc_assert_fp_enabled();
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
index c51badf7b68a..aed00039ca62 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h
@@ -44,14 +44,14 @@ void dcn20_calculate_dlg_params(struct dc *dc,
int dcn20_populate_dml_pipes_from_context(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
void dcn20_calculate_wm(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int *out_pipe_cnt,
int *pipe_split_from,
int vlevel,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb,
struct pp_smu_nv_clock_table max_clocks);
void dcn20_update_bounding_box(struct dc *dc,
@@ -61,9 +61,8 @@ void dcn20_update_bounding_box(struct dc *dc,
unsigned int num_states);
void dcn20_patch_bounding_box(struct dc *dc,
struct _vcs_dpi_soc_bounding_box_st *bb);
-bool dcn20_validate_bandwidth_fp(struct dc *dc,
- struct dc_state *context,
- bool fast_validate);
+bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes);
void dcn20_fpu_set_wm_ranges(int i,
struct pp_smu_wm_range_sets *ranges,
struct _vcs_dpi_soc_bounding_box_st *loaded_bb);
@@ -76,10 +75,9 @@ void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v,
int dcn21_populate_dml_pipes_from_context(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
-bool dcn21_validate_bandwidth_fp(struct dc *dc,
- struct dc_state *context,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
+bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, enum
+ dc_validate_mode, display_e2e_pipe_params_st *pipes);
void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index 7bf4bb7ad044..0c8c4a080c50 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -785,12 +785,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBW = 0;
@@ -1017,7 +1014,7 @@ static unsigned int CalculateVMAndRowBytes(
if (ScanDirection == dm_horz)
FractionOfPTEReturnDrop = 0;
else
- FractionOfPTEReturnDrop = 7 / 8;
+ FractionOfPTEReturnDrop = 7.0 / 8;
} else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeight = 16 * BlockHeight256Bytes;
PixelPTEReqWidth = 16 * BlockWidth256Bytes;
@@ -3231,22 +3228,22 @@ static unsigned int TruncToValidBPP(
if (Format == dm_420) {
if (DecimalBPP < 6)
return BPP_INVALID;
- else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16)
- return 1.5 * DSCInputBitPerComponent - 1 / 16;
+ else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16)
+ return 1.5 * DSCInputBitPerComponent - 1.0 / 16;
else
return dml_floor(16 * DecimalBPP, 1) / 16;
} else if (Format == dm_n422) {
if (DecimalBPP < 7)
return BPP_INVALID;
- else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16)
- return 2 * DSCInputBitPerComponent - 1 / 16;
+ else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16)
+ return 2 * DSCInputBitPerComponent - 1.0 / 16;
else
return dml_floor(16 * DecimalBPP, 1) / 16;
} else {
if (DecimalBPP < 8)
return BPP_INVALID;
- else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16)
- return 3 * DSCInputBitPerComponent - 1 / 16;
+ else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16)
+ return 3 * DSCInputBitPerComponent - 1.0 / 16;
else
return dml_floor(16 * DecimalBPP, 1) / 16;
}
@@ -4322,7 +4319,7 @@ void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
locals->RoundedUpMaxSwathSizeBytesC = 0;
}
- if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) {
+ if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024.0 / 2) {
locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k];
locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k];
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
index 989d83ee3842..c935903b68e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
@@ -845,12 +845,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBW = 0;
@@ -1077,7 +1074,7 @@ static unsigned int CalculateVMAndRowBytes(
if (ScanDirection == dm_horz)
FractionOfPTEReturnDrop = 0;
else
- FractionOfPTEReturnDrop = 7 / 8;
+ FractionOfPTEReturnDrop = 7.0 / 8;
} else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeight = 16 * BlockHeight256Bytes;
PixelPTEReqWidth = 16 * BlockWidth256Bytes;
@@ -4443,7 +4440,7 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
locals->RoundedUpMaxSwathSizeBytesC = 0;
}
- if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024 / 2) {
+ if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte[0] * 1024.0 / 2) {
locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k];
locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k];
} else {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
index 548cdef8a8ad..9c58ff1069d6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c
@@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
@@ -446,8 +443,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -494,8 +489,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -653,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
// the dpte_group_bytes is reduced for the specific case of vertical
// access of a tile surface that has dpte request of 8x1 ptes.
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
//full size
@@ -688,12 +681,11 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
const display_pipe_source_params_st *pipe_src_param,
bool is_chroma)
{
- bool mode_422 = false;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// TODO check if ppe apply for both luma and chroma in 422 case
if (is_chroma) {
@@ -825,7 +817,6 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
double min_dst_y_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -971,7 +962,6 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
- mode_422 = false; // TODO
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1148,18 +1138,8 @@ static void dml20_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (htaps_l <= 1)
min_hratio_fact_l = 2.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
index 0fc9f3e3ffae..570e6e39eb45 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c
@@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib,
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
@@ -446,8 +443,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -494,8 +489,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -653,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
// the dpte_group_bytes is reduced for the specific case of vertical
// access of a tile surface that has dpte request of 8x1 ptes.
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
//full size
@@ -688,12 +681,11 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
const display_pipe_source_params_st *pipe_src_param,
bool is_chroma)
{
- bool mode_422 = false;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// TODO check if ppe apply for both luma and chroma in 422 case
if (is_chroma) {
@@ -825,7 +817,6 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
double min_dst_y_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -972,7 +963,6 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
- mode_422 = false; // TODO
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1149,18 +1139,8 @@ static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (htaps_l <= 1)
min_hratio_fact_l = 2.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 57cf0358cc43..cd8cca651419 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -1049,12 +1049,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -1399,7 +1396,7 @@ static unsigned int CalculateVMAndRowBytes(
if (ScanDirection == dm_horz)
FractionOfPTEReturnDrop = 0;
else
- FractionOfPTEReturnDrop = 7 / 8;
+ FractionOfPTEReturnDrop = 7.0 / 8;
} else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
index 618f4b682ab1..f549da082c01 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
@@ -53,7 +53,7 @@ static void calculate_ttu_cursor(
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -297,9 +297,6 @@ static void handle_det_buf_split(
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
-
- if (req128_c && log2_swath_height_c > 0)
- log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;
@@ -438,8 +435,6 @@ static void get_meta_and_pte_attr(
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -488,8 +483,6 @@ static void get_meta_and_pte_attr(
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -659,7 +652,7 @@ static void get_meta_and_pte_attr(
if (hostvm_enable)
rq_sizing_param->dpte_group_bytes = 512;
else {
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
//full size
@@ -697,12 +690,11 @@ static void get_surf_rq_param(
const display_pipe_params_st *pipe_param,
bool is_chroma)
{
- bool mode_422 = false;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma) {
@@ -871,7 +863,6 @@ static void dml_rq_dlg_get_dlg_params(
double min_dst_y_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -1023,7 +1014,6 @@ static void dml_rq_dlg_get_dlg_params(
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
- mode_422 = false; // FIXME
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0);
// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1);
@@ -1200,18 +1190,8 @@ static void dml_rq_dlg_get_dlg_params(
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
index ccb4ad78f667..e5f5c0663750 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
@@ -178,89 +178,13 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_0_soc = {
};
-void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
- double vtotal_avg)
-{
- struct optc *optc1 = DCN10TG_FROM_TG(optc);
- double vtotal_min, vtotal_max;
- double ratio, modulo, phase;
- uint32_t vblank_start;
- uint32_t v_total_mask_value = 0;
-
- dc_assert_fp_enabled();
-
- /* Compute VTOTAL_MIN and VTOTAL_MAX, so that
- * VOTAL_MAX - VTOTAL_MIN = 1
- */
- v_total_mask_value = 16;
- vtotal_min = dcn_bw_floor(vtotal_avg);
- vtotal_max = dcn_bw_ceil(vtotal_avg);
-
- /* Check that bottom VBLANK is at least 2 lines tall when running with
- * VTOTAL_MIN. Note that VTOTAL registers are defined as 'total number
- * of lines in a frame - 1'.
- */
- REG_GET(OTG_V_BLANK_START_END, OTG_V_BLANK_START,
- &vblank_start);
- ASSERT(vtotal_min >= vblank_start + 1);
-
- /* Special case where the average frame rate can be achieved
- * without using the DTO
- */
- if (vtotal_min == vtotal_max) {
- REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
-
- optc->funcs->set_vtotal_min_max(optc, 0, 0);
- REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
- REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
- REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
- OTG_V_TOTAL_MIN_SEL, 0,
- OTG_V_TOTAL_MAX_SEL, 0,
- OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
- return;
- }
-
- ratio = vtotal_max - vtotal_avg;
- modulo = 65536.0 * 65536.0 - 1.0; /* 2^32 - 1 */
- phase = ratio * modulo;
-
- /* Special cases where the DTO phase gets rounded to 0 or
- * to DTO modulo
- */
- if (phase <= 0 || phase >= modulo) {
- REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL,
- phase <= 0 ?
- (uint32_t)vtotal_max : (uint32_t)vtotal_min);
- REG_SET(OTG_V_TOTAL_MIN, 0, OTG_V_TOTAL_MIN, 0);
- REG_SET(OTG_V_TOTAL_MAX, 0, OTG_V_TOTAL_MAX, 0);
- REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, 0);
- REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, 0);
- REG_UPDATE_3(OTG_V_TOTAL_CONTROL,
- OTG_V_TOTAL_MIN_SEL, 0,
- OTG_V_TOTAL_MAX_SEL, 0,
- OTG_SET_V_TOTAL_MIN_MASK_EN, 0);
- return;
- }
- REG_UPDATE_6(OTG_V_TOTAL_CONTROL,
- OTG_V_TOTAL_MIN_SEL, 1,
- OTG_V_TOTAL_MAX_SEL, 1,
- OTG_SET_V_TOTAL_MIN_MASK_EN, 1,
- OTG_SET_V_TOTAL_MIN_MASK, v_total_mask_value,
- OTG_VTOTAL_MID_REPLACING_MIN_EN, 0,
- OTG_VTOTAL_MID_REPLACING_MAX_EN, 0);
- REG_SET(OTG_V_TOTAL, 0, OTG_V_TOTAL, (uint32_t)vtotal_min);
- optc->funcs->set_vtotal_min_max(optc, vtotal_min, vtotal_max);
- REG_SET(OTG_M_CONST_DTO0, 0, OTG_M_CONST_DTO_PHASE, (uint32_t)phase);
- REG_SET(OTG_M_CONST_DTO1, 0, OTG_M_CONST_DTO_MODULO, (uint32_t)modulo);
-}
-
void dcn30_fpu_populate_dml_writeback_from_context(
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
{
int pipe_cnt, i, j;
double max_calc_writeback_dispclk;
double writeback_dispclk;
- struct writeback_st dout_wb;
+ struct writeback_st dout_wb = {0};
dc_assert_fp_enabled();
@@ -387,13 +311,17 @@ void dcn30_fpu_calculate_wm_and_dlg(
double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
unsigned int dummy_latency_index = 0;
+ struct dc_stream_status *stream_status = NULL;
dc_assert_fp_enabled();
context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
- for (i = 0; i < context->stream_count; i++) {
+ for (i = 0; i < context->stream_count; i++) {
+ stream_status = NULL;
if (context->streams[i])
- context->streams[i]->fpo_in_use = false;
+ stream_status = dc_state_get_stream_status(context, context->streams[i]);
+ if (stream_status)
+ stream_status->fpo_in_use = false;
}
if (!pstate_en) {
@@ -411,7 +339,8 @@ void dcn30_fpu_calculate_wm_and_dlg(
* newly found dummy_latency_index
*/
context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
- dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false, true);
+ dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel,
+ DC_VALIDATE_MODE_AND_PROGRAMMING, true);
maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
@@ -702,7 +631,8 @@ int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
while (dummy_latency_index < max_latency_table_entries) {
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
- dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false, true);
+ dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel,
+ DC_VALIDATE_MODE_AND_PROGRAMMING, true);
if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank ==
dm_allow_self_refresh_and_mclk_switch)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
index cab864095ce7..e3b6ad6a8784 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
@@ -29,9 +29,6 @@
#include "core_types.h"
#include "dcn20/dcn20_optc.h"
-void optc3_fpu_set_vrr_m_const(struct timing_generator *optc,
- double vtotal_avg);
-
void dcn30_fpu_populate_dml_writeback_from_context(
struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index ad741a723c0e..8d24763938ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -281,10 +281,10 @@ static void CalculateDynamicMetadataParameters(
double DISPCLK,
double DCFClkDeepSleep,
double PixelClock,
- long HTotal,
- long VBlank,
- long DynamicMetadataTransmittedBytes,
- long DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired,
int InterlaceEnable,
bool ProgressiveToInterlaceUnitInOPP,
double *Tsetup,
@@ -1002,6 +1002,7 @@ static bool CalculatePrefetchSchedule(
dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
- (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
Tsw_oto = Lsw_oto * LineTime;
@@ -1280,12 +1281,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -1775,15 +1773,6 @@ static unsigned int CalculateVMAndRowBytes(
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
*PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
- } else if (MacroTileSizeBytes == 4096) {
- PixelPTEReqHeightPTEs = 1;
- *PixelPTEReqHeight = MacroTileHeight;
- *PixelPTEReqWidth = 8 * *MacroTileWidth;
- *PTERequestSize = 64;
- if (ScanDirection != dm_vert)
- FractionOfPTEReturnDrop = 0;
- else
- FractionOfPTEReturnDrop = 7 / 8;
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
@@ -3277,8 +3266,8 @@ static double CalculateWriteBackDelay(
static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
- double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
- long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
+ double DCFClkDeepSleep, double PixelClock, unsigned int HTotal, unsigned int VBlank, unsigned int DynamicMetadataTransmittedBytes,
+ int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
{
double TotalRepeaterDelayTime = 0;
@@ -3535,14 +3524,13 @@ static double TruncToValidBPP(
return DesiredBPP;
}
}
- return BPP_INVALID;
}
void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
{
struct vba_vars_st *v = &mode_lib->vba;
int MinPrefetchMode, MaxPrefetchMode;
- int i;
+ int i, start_state;
unsigned int j, k, m;
bool EnoughWritebackUnits = true;
bool WritebackModeSupport = true;
@@ -3553,6 +3541,11 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+ if (mode_lib->validate_max_state)
+ start_state = v->soc.num_states - 1;
+ else
+ start_state = 0;
+
CalculateMinAndMaxPrefetchMode(
mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
&MinPrefetchMode, &MaxPrefetchMode);
@@ -3851,7 +3844,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SingleDPPViewportSizeSupportPerPlane,
&v->ViewportSizeSupport[0][0]);
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
@@ -4007,7 +4000,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*Total Available Pipes Support Check*/
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
v->TotalAvailablePipesSupport[i][j] = true;
@@ -4046,7 +4039,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
v->RequiresDSC[i][k] = false;
v->RequiresFEC[i][k] = false;
@@ -4174,7 +4167,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
}
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
v->DIOSupport[i] = true;
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
@@ -4185,7 +4178,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- for (i = 0; i < v->soc.num_states; ++i) {
+ for (i = start_state; i < v->soc.num_states; ++i) {
v->ODMCombine4To1SupportCheckOK[i] = true;
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
@@ -4197,7 +4190,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
v->NotEnoughDSCUnits[i] = false;
v->TotalDSCUnitsRequired = 0.0;
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -4217,7 +4210,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
/*DSC Delay per state*/
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
if (v->OutputBppPerState[i][k] == BPP_INVALID) {
v->BPP = 0.0;
@@ -4268,7 +4261,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Calculate Swath, DET Configuration, DCFCLKDeepSleep
//
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
for (k = 0; k < v->NumberOfActivePlanes; ++k) {
v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
@@ -4333,7 +4326,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
}
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
@@ -4571,7 +4564,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Calculate Return BW
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
if (v->BlendingAndTiming[k] == k) {
@@ -4630,7 +4623,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
}
@@ -4641,7 +4634,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
if (v->ClampMinDCFCLK) {
/* Clamp calculated values to actual minimum */
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
@@ -4651,7 +4644,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
}
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
v->ReturnBusWidth * v->DCFCLKState[i][j],
@@ -4669,7 +4662,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Re-ordering Buffer Support Check
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
> (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
@@ -4687,7 +4680,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
}
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
@@ -4703,7 +4696,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
//Prefetch Check
- for (i = 0; i < mode_lib->soc.num_states; ++i) {
+ for (i = start_state; i < mode_lib->soc.num_states; ++i) {
for (j = 0; j <= 1; ++j) {
int NextPrefetchModeState = MinPrefetchMode;
@@ -5075,7 +5068,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
/*PTE Buffer Size Check*/
- for (i = 0; i < v->soc.num_states; i++) {
+ for (i = start_state; i < v->soc.num_states; i++) {
for (j = 0; j < 2; j++) {
v->PTEBufferSizeNotExceeded[i][j] = true;
for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
@@ -5128,7 +5121,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
ViewportExceedsSurface = true;
if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
- && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
+ && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
ViewportExceedsSurface = true;
}
@@ -5136,7 +5129,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
/*Mode Support, Voltage State and SOC Configuration*/
- for (i = v->soc.num_states - 1; i >= 0; i--) {
+ for (i = v->soc.num_states - 1; i >= start_state; i--) {
for (j = 0; j < 2; j++) {
if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
&& v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
@@ -5158,7 +5151,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
}
{
unsigned int MaximumMPCCombine = 0;
- for (i = v->soc.num_states; i >= 0; i--) {
+ for (i = v->soc.num_states; i >= start_state; i--) {
if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
v->VoltageLevel = i;
v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index 0497a5d74a62..4fb37df54d59 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -392,8 +392,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
blk_bytes = surf_linear ?
256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double)blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -464,8 +462,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
- log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -624,7 +620,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib,
if (hostvm_enable)
rq_sizing_param->dpte_group_bytes = 512;
else {
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
rq_sizing_param->dpte_group_bytes = 2048;
@@ -660,13 +656,12 @@ static void get_surf_rq_param(struct display_mode_lib *mode_lib,
bool is_chroma,
bool is_alpha)
{
- bool mode_422 = 0;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
unsigned int surface_height = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma | is_alpha) {
@@ -934,7 +929,6 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
double min_dst_y_ttu_vblank = 0;
unsigned int dlg_vblank_start = 0;
bool dual_plane = false;
- bool mode_422 = false;
unsigned int access_dir = 0;
unsigned int vp_height_l = 0;
unsigned int vp_width_l = 0;
@@ -1083,7 +1077,6 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
// Source
// dcc_en = src.dcc;
dual_plane = is_dual_plane((enum source_format_class)(src->source_format));
- mode_422 = false; // TODO
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
vp_height_l = src->viewport_height;
vp_width_l = src->viewport_width;
@@ -1301,18 +1294,8 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
@@ -1579,6 +1562,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
disp_dlg_regs->refcyc_per_pte_group_vblank_l =
(unsigned int)(dst_y_per_row_vblank * (double)htotal
* ref_freq_to_pix_freq / (double)dpte_groups_per_row_ub_l);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 6ce90678b33c..1aaa77265eed 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -320,13 +320,13 @@ static void calculate_wm_set_for_vlevel(int vlevel,
}
-void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits;
struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool);
struct clk_limit_table *clk_table = &bw_params->clk_table;
unsigned int i, closest_clk_lvl;
- int j;
+ int j = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0;
dc_assert_fp_enabled();
@@ -338,6 +338,15 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
dcn3_01_soc.num_chans = bw_params->num_channels;
ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
for (i = 0; i < clk_table->num_entries; i++) {
/* loop backwards*/
for (closest_clk_lvl = 0, j = dcn3_01_soc.num_states - 1; j >= 0; j--) {
@@ -353,8 +362,13 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2;
- s[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
- s[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+ /* Clocks independent of voltage level. */
+ s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
s[i].dram_bw_per_chan_gbps =
dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
s[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
@@ -409,7 +423,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info)
dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10;
}
-void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
+void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
@@ -435,12 +449,12 @@ void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
&context->bw_ctx.dml, pipes, pipe_cnt);
/* WM Set C */
table_entry = &bw_params->wm_table.entries[WM_C];
- vlevel = min(max(vlevel_req, 2), vlevel_max);
+ vlevel = clamp(vlevel_req, 2, vlevel_max);
calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
&context->bw_ctx.dml, pipes, pipe_cnt);
/* WM Set B */
table_entry = &bw_params->wm_table.entries[WM_B];
- vlevel = min(max(vlevel_req, 1), vlevel_max);
+ vlevel = clamp(vlevel_req, 1, vlevel_max);
calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
&context->bw_ctx.dml, pipes, pipe_cnt);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
index 774b0fdfc80b..3e103e23dc6f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h
@@ -26,15 +26,14 @@
#ifndef __DCN301_FPU_H__
#define __DCN301_FPU_H__
-void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
+void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
void dcn301_fpu_set_wm_ranges(int i,
struct pp_smu_wm_range_sets *ranges,
struct _vcs_dpi_soc_bounding_box_st *loaded_bb);
-void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info);
-
-void dcn301_calculate_wm_and_dlg_fp(struct dc *dc,
+void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c
index e2bcd205aa93..8d7c59ec701d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c
@@ -280,7 +280,7 @@ void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p
j = 0;
/* create the final dcfclk and uclk table */
while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
} else {
@@ -304,6 +304,16 @@ void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
}
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
dcn3_02_soc.num_states = num_states;
for (i = 0; i < dcn3_02_soc.num_states; i++) {
dcn3_02_soc.clock_limits[i].state = i;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
index 3eb3a021ab7d..b5d3fd4c3694 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c
@@ -266,6 +266,17 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p
optimal_uclk_for_dcfclk_sta_targets[i] =
bw_params->clk_table.entries[j].memclk_mhz * 16;
break;
+ } else {
+ /* condition where (dcfclk_sta_targets[i] >= optimal_dcfclk_for_uclk[j]):
+ * This is required for dcn303 because it just so happens that the memory
+ * bandwidth is low enough such that all the optimal DCFCLK for each UCLK
+ * is lower than the smallest DCFCLK STA target. In this case we need to
+ * populate the optimal UCLK for each DCFCLK STA target to be the max UCLK.
+ */
+ if (j == num_uclk_states - 1) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ }
}
}
}
@@ -274,7 +285,7 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p
j = 0;
/* create the final dcfclk and uclk table */
while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
} else {
@@ -299,6 +310,16 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
}
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
dcn3_03_soc.num_states = num_states;
for (i = 0; i < dcn3_03_soc.num_states; i++) {
dcn3_03_soc.clock_limits[i].state = i;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index deb6d162a2d5..1a28061bb9ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -291,6 +291,7 @@ static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
.do_urgent_latency_adjustment = false,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
.num_chans = 4,
.dummy_pstate_latency_us = 10.0
};
@@ -438,6 +439,7 @@ static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
.do_urgent_latency_adjustment = false,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .dispclk_dppclk_vco_speed_mhz = 2500.0,
};
void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
@@ -485,6 +487,7 @@ void dcn31_calculate_wm_and_dlg_fp(
{
int i, pipe_idx, total_det = 0, active_hubp_count = 0;
double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ uint32_t cstate_enter_plus_exit_z8_ns;
dc_assert_fp_enabled();
@@ -504,6 +507,13 @@ void dcn31_calculate_wm_and_dlg_fp(
pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
+ cstate_enter_plus_exit_z8_ns =
+ get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+
+ if (get_stutter_period(&context->bw_ctx.dml, pipes, pipe_cnt) < dc->debug.minimum_z8_residency_time &&
+ cstate_enter_plus_exit_z8_ns < dc->debug.minimum_z8_residency_time * 1000)
+ cstate_enter_plus_exit_z8_ns = dc->debug.minimum_z8_residency_time * 1000;
+
/* Set A:
* All clocks min required
*
@@ -514,7 +524,7 @@ void dcn31_calculate_wm_and_dlg_fp(
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
- context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = cstate_enter_plus_exit_z8_ns;
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
@@ -637,9 +647,9 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
- if (clk_table->num_entries) {
+
+ if (clk_table->num_entries)
dcn3_1_soc.num_states = clk_table->num_entries;
- }
memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits));
@@ -752,23 +762,11 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
break;
}
}
- // Ported from DCN315
- if (clk_table->num_entries == 1) {
- /*smu gives one DPM level, let's take the highest one*/
- closest_clk_lvl = dcn3_16_soc.num_states - 1;
- }
s[i].state = i;
/* Clocks dependent on voltage level. */
s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- if (clk_table->num_entries == 1 &&
- s[i].dcfclk_mhz <
- dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
- /*SMU fix not released yet*/
- s[i].dcfclk_mhz =
- dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
- }
s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
s[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz *
@@ -789,9 +787,9 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
}
- if (clk_table->num_entries) {
+
+ if (clk_table->num_entries)
dcn3_16_soc.num_states = clk_table->num_entries;
- }
memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits));
@@ -810,6 +808,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc)
{
+ dc_assert_fp_enabled();
+
return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0);
}
@@ -817,6 +817,8 @@ int dcn_get_approx_det_segs_required_for_pstate(
struct _vcs_dpi_soc_bounding_box_st *soc,
int pix_clk_100hz, int bpp, int seg_size_kb)
{
+ dc_assert_fp_enabled();
+
/* Roughly calculate required crb to hide latency. In practice there is slightly
* more buffer available for latency hiding
*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
index 8f9c8faed260..dfcc5d50071e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -30,6 +30,7 @@
#define DCN3_15_DEFAULT_DET_SIZE 192
#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
#define DCN3_16_DEFAULT_DET_SIZE 192
+#define DCN3_16_MIN_COMPBUF_SIZE_KB 128
void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
int pipe_cnt);
@@ -54,5 +55,5 @@ int dcn_get_approx_det_segs_required_for_pstate(
int dcn31x_populate_dml_pipes_from_context(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
#endif /* __DCN31_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index adea459e7d36..ed59c77bc6f6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -1105,6 +1105,7 @@ static bool CalculatePrefetchSchedule(
Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
Tpre_rounded = dst_y_prefetch_equ * LineTime;
@@ -1444,12 +1445,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -1924,15 +1922,6 @@ static unsigned int CalculateVMAndRowBytes(
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
*PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
- } else if (MacroTileSizeBytes == 4096) {
- PixelPTEReqHeightPTEs = 1;
- *PixelPTEReqHeight = MacroTileHeight;
- *PixelPTEReqWidth = 8 * *MacroTileWidth;
- *PTERequestSize = 64;
- if (ScanDirection != dm_vert)
- FractionOfPTEReturnDrop = 0;
- else
- FractionOfPTEReturnDrop = 7 / 8;
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
@@ -3617,7 +3606,7 @@ static double TruncToValidBPP(
NonDSCBPP1 = 15;
NonDSCBPP2 = 18;
MinDSCBPP = 6;
- MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
} else if (Format == dm_444) {
NonDSCBPP0 = 24;
NonDSCBPP1 = 30;
@@ -3679,7 +3668,6 @@ static double TruncToValidBPP(
return DesiredBPP;
}
}
- return BPP_INVALID;
}
static noinline void CalculatePrefetchSchedulePerPlane(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index 4113ce79c4af..bfeb01477f0c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -413,8 +413,6 @@ static void get_meta_and_pte_attr(
log2_blk256_height = dml_log2((double) blk256_height);
blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -481,8 +479,6 @@ static void get_meta_and_pte_attr(
log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -619,7 +615,7 @@ static void get_meta_and_pte_attr(
if (hostvm_enable)
rq_sizing_param->dpte_group_bytes = 512;
else {
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
rq_sizing_param->dpte_group_bytes = 2048;
@@ -655,13 +651,12 @@ static void get_surf_rq_param(
bool is_chroma,
bool is_alpha)
{
- bool mode_422 = 0;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
unsigned int surface_height = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma | is_alpha) {
@@ -888,7 +883,6 @@ static void dml_rq_dlg_get_dlg_params(
double min_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -1004,7 +998,6 @@ static void dml_rq_dlg_get_dlg_params(
// Prefetch Calc
// Source
dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
- mode_422 = 0;
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
vp_height_l = src->viewport_height;
vp_width_l = src->viewport_width;
@@ -1142,18 +1135,8 @@ static void dml_rq_dlg_get_dlg_params(
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
index fb21572750e8..df9d50b9b57c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -306,17 +306,17 @@ static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int i, pipe_cnt;
struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
+ struct pipe_ctx *pipe = 0;
bool upscaled = false;
const unsigned int max_allowed_vblank_nom = 1023;
dc_assert_fp_enabled();
- dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+ dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_crtc_timing *timing;
@@ -409,6 +409,9 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
}
+ if (dc->debug.force_odm_combine_4to1)
+ context->bw_ctx.dml.ip.odm_combine_4to1_supported = true;
+
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
index d32c5bb99f4c..362ac79184ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h
@@ -35,6 +35,6 @@
void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index a94aa0f21a7f..9f3938a50240 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -24,10 +24,7 @@
*
*/
-#define UNIT_TEST 0
-#if !UNIT_TEST
#include "dc.h"
-#endif
#include "../display_mode_lib.h"
#include "display_mode_vba_314.h"
#include "../dml_inline_defs.h"
@@ -1126,6 +1123,7 @@ static bool CalculatePrefetchSchedule(
Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
+ dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
Tpre_rounded = dst_y_prefetch_equ * LineTime;
@@ -1464,12 +1462,9 @@ static bool CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -1944,15 +1939,6 @@ static unsigned int CalculateVMAndRowBytes(
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
*PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
- } else if (MacroTileSizeBytes == 4096) {
- PixelPTEReqHeightPTEs = 1;
- *PixelPTEReqHeight = MacroTileHeight;
- *PixelPTEReqWidth = 8 * *MacroTileWidth;
- *PTERequestSize = 64;
- if (ScanDirection != dm_vert)
- FractionOfPTEReturnDrop = 0;
- else
- FractionOfPTEReturnDrop = 7 / 8;
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
@@ -2311,6 +2297,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
v->OutputFormat[k],
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
}
+ v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelay[k] / v->HActive[k], 1);
v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
} else {
v->DSCDelay[k] = 0;
@@ -3725,7 +3712,7 @@ static double TruncToValidBPP(
NonDSCBPP1 = 15;
NonDSCBPP2 = 18;
MinDSCBPP = 6;
- MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
} else if (Format == dm_444) {
NonDSCBPP0 = 24;
NonDSCBPP1 = 30;
@@ -3787,7 +3774,6 @@ static double TruncToValidBPP(
return DesiredBPP;
}
}
- return BPP_INVALID;
}
static noinline void CalculatePrefetchSchedulePerPlane(
@@ -4719,6 +4705,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_
v->OutputFormat[k],
v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
}
+ v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelayPerState[i][k] / v->HActive[k], 1.0);
v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
} else {
v->DSCDelayPerState[i][k] = 0.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
index b3e8dc08030c..04df263ff65e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c
@@ -501,8 +501,6 @@ static void get_meta_and_pte_attr(
log2_blk256_height = dml_log2((double) blk256_height);
blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size);
log2_blk_bytes = dml_log2((double) blk_bytes);
- log2_blk_height = 0;
- log2_blk_width = 0;
// remember log rule
// "+" in log is multiply
@@ -569,8 +567,6 @@ static void get_meta_and_pte_attr(
log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height;
meta_req_width = 1 << log2_meta_req_width;
meta_req_height = 1 << log2_meta_req_height;
- log2_meta_row_height = 0;
- meta_row_width_ub = 0;
// the dimensions of a meta row are meta_row_width x meta_row_height in elements.
// calculate upper bound of the meta_row_width
@@ -707,7 +703,7 @@ static void get_meta_and_pte_attr(
if (hostvm_enable)
rq_sizing_param->dpte_group_bytes = 512;
else {
- if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group
+ if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group
rq_sizing_param->dpte_group_bytes = 512;
else
rq_sizing_param->dpte_group_bytes = 2048;
@@ -743,13 +739,12 @@ static void get_surf_rq_param(
bool is_chroma,
bool is_alpha)
{
- bool mode_422 = 0;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
unsigned int surface_height = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
// FIXME check if ppe apply for both luma and chroma in 422 case
if (is_chroma | is_alpha) {
@@ -973,7 +968,6 @@ static void dml_rq_dlg_get_dlg_params(
double min_ttu_vblank;
unsigned int dlg_vblank_start;
bool dual_plane;
- bool mode_422;
unsigned int access_dir;
unsigned int vp_height_l;
unsigned int vp_width_l;
@@ -1091,7 +1085,6 @@ static void dml_rq_dlg_get_dlg_params(
// Prefetch Calc
// Source
dual_plane = is_dual_plane((enum source_format_class) (src->source_format));
- mode_422 = 0;
access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed
vp_height_l = src->viewport_height;
vp_width_l = src->viewport_width;
@@ -1230,18 +1223,8 @@ static void dml_rq_dlg_get_dlg_params(
dpte_row_height_l = rq_dlg_param->rq_l.dpte_row_height;
dpte_row_height_c = rq_dlg_param->rq_c.dpte_row_height;
- if (mode_422) {
- swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element
- swath_width_pixels_ub_c = swath_width_ub_c * 2;
- } else {
- swath_width_pixels_ub_l = swath_width_ub_l * 1;
- swath_width_pixels_ub_c = swath_width_ub_c * 1;
- }
-
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
+ swath_width_pixels_ub_l = swath_width_ub_l;
+ swath_width_pixels_ub_c = swath_width_ub_c;
if (hratio_l <= 1)
min_hratio_fact_l = 2.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 711d4085b33b..8a0f128722b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -31,7 +31,8 @@
// We need this includes for WATERMARKS_* defines
#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h"
#include "dcn30/dcn30_resource.h"
-#include "link.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
#define DC_LOGGER_INIT(logger)
@@ -41,7 +42,16 @@ static const struct subvp_high_refresh_list subvp_high_refresh_list = {
.res = {
{.width = 3840, .height = 2160, },
{.width = 3440, .height = 1440, },
- {.width = 2560, .height = 1440, }},
+ {.width = 2560, .height = 1440, },
+ {.width = 1920, .height = 1080, }},
+};
+
+static const struct subvp_active_margin_list subvp_active_margin_list = {
+ .min_refresh = 55,
+ .max_refresh = 65,
+ .res = {
+ {.width = 2560, .height = 1440, },
+ {.width = 1920, .height = 1080, }},
};
struct _vcs_dpi_ip_params_st dcn3_2_ip = {
@@ -150,8 +160,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
.pct_ideal_sdp_bw_after_urgent = 90.0,
.pct_ideal_fabric_bw_after_urgent = 67.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
.pct_ideal_dram_bw_after_urgent_strobe = 67.0,
.max_avg_sdp_bw_use_normal_percent = 80.0,
.max_avg_fabric_bw_use_normal_percent = 60.0,
@@ -170,6 +180,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
};
+static bool dcn32_apply_merge_split_flags_helper(struct dc *dc, struct dc_state *context,
+ bool *repopulate_pipes, int *split, bool *merge);
+
void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
{
/* defaults */
@@ -277,7 +290,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
- dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, DC_VALIDATE_MODE_AND_PROGRAMMING);
/* for subvp + DRR case, if subvp pipes are still present we support pstate */
if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
@@ -332,7 +345,7 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
if (!pipe->stream)
continue;
- if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (pipe->plane_state && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
pipes[pipe_idx].pipe.dest.vstartup_start =
get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
pipes[pipe_idx].pipe.dest.vupdate_offset =
@@ -347,90 +360,6 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
}
}
-/**
- * dcn32_predict_pipe_split - Predict if pipe split will occur for a given DML pipe
- * @context: [in] New DC state to be programmed
- * @pipe_e2e: [in] DML pipe end to end context
- *
- * This function takes in a DML pipe (pipe_e2e) and predicts if pipe split is required (both
- * ODM and MPC). For pipe split, ODM combine is determined by the ODM mode, and MPC combine is
- * determined by DPPClk requirements
- *
- * This function follows the same policy as DML:
- * - Check for ODM combine requirements / policy first
- * - MPC combine is only chosen if there is no ODM combine requirements / policy in place, and
- * MPC is required
- *
- * Return: Number of splits expected (1 for 2:1 split, 3 for 4:1 split, 0 for no splits).
- */
-uint8_t dcn32_predict_pipe_split(struct dc_state *context,
- display_e2e_pipe_params_st *pipe_e2e)
-{
- double pscl_throughput;
- double pscl_throughput_chroma;
- double dpp_clk_single_dpp, clock;
- double clk_frequency = 0.0;
- double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz;
- bool total_available_pipes_support = false;
- uint32_t number_of_dpp = 0;
- enum odm_combine_mode odm_mode = dm_odm_combine_mode_disabled;
- double req_dispclk_per_surface = 0;
- uint8_t num_splits = 0;
-
- dc_assert_fp_enabled();
-
- dml32_CalculateODMMode(context->bw_ctx.dml.ip.maximum_pixels_per_line_per_dsc_unit,
- pipe_e2e->pipe.dest.hactive,
- pipe_e2e->dout.output_format,
- pipe_e2e->dout.output_type,
- pipe_e2e->pipe.dest.odm_combine_policy,
- context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz,
- context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dispclk_mhz,
- pipe_e2e->dout.dsc_enable != 0,
- 0, /* TotalNumberOfActiveDPP can be 0 since we're predicting pipe split requirement */
- context->bw_ctx.dml.ip.max_num_dpp,
- pipe_e2e->pipe.dest.pixel_rate_mhz,
- context->bw_ctx.dml.soc.dcn_downspread_percent,
- context->bw_ctx.dml.ip.dispclk_ramp_margin_percent,
- context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz,
- pipe_e2e->dout.dsc_slices,
- /* Output */
- &total_available_pipes_support,
- &number_of_dpp,
- &odm_mode,
- &req_dispclk_per_surface);
-
- dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe_e2e->pipe.scale_ratio_depth.hscl_ratio,
- pipe_e2e->pipe.scale_ratio_depth.hscl_ratio_c,
- pipe_e2e->pipe.scale_ratio_depth.vscl_ratio,
- pipe_e2e->pipe.scale_ratio_depth.vscl_ratio_c,
- context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk,
- context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk,
- pipe_e2e->pipe.dest.pixel_rate_mhz,
- pipe_e2e->pipe.src.source_format,
- pipe_e2e->pipe.scale_taps.htaps,
- pipe_e2e->pipe.scale_taps.htaps_c,
- pipe_e2e->pipe.scale_taps.vtaps,
- pipe_e2e->pipe.scale_taps.vtaps_c,
- /* Output */
- &pscl_throughput, &pscl_throughput_chroma,
- &dpp_clk_single_dpp);
-
- clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100);
-
- if (clock > 0)
- clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0) / clock);
-
- if (odm_mode == dm_odm_combine_mode_2to1)
- num_splits = 1;
- else if (odm_mode == dm_odm_combine_mode_4to1)
- num_splits = 3;
- else if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[context->bw_ctx.dml.soc.num_states - 1].dppclk_mhz)
- num_splits = 1;
-
- return num_splits;
-}
-
static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry)
{
float memory_bw_kbytes_sec;
@@ -696,10 +625,12 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc,
* to combine this with SubVP can cause issues with the scheduling).
* - Not TMZ surface
*/
- if (pipe->plane_state && !pipe->top_pipe && !dcn32_is_center_timing(pipe) &&
+ if (pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && !dcn32_is_center_timing(pipe) &&
+ !pipe->stream->hw_cursor_req &&
+ !dc_state_get_stream_cursor_subvp_limit(pipe->stream, context) &&
!(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) &&
(!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) &&
- pipe->stream->mall_stream_config.type == SUBVP_NONE &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE &&
(refresh_rate < 120 || dcn32_allow_subvp_high_refresh_rate(dc, context, pipe)) &&
!pipe->plane_state->address.tmz_surface &&
(vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0 ||
@@ -757,7 +688,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context
// Find the minimum pipe split count for non SubVP pipes
if (resource_is_pipe_type(pipe, OPP_HEAD) &&
- pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE) {
split_cnt = 0;
while (pipe) {
split_cnt++;
@@ -794,7 +725,7 @@ static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context
*/
static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
{
- struct pipe_ctx *subvp_pipes[2];
+ struct pipe_ctx *subvp_pipes[2] = {0};
struct dc_stream_state *phantom = NULL;
uint32_t microschedule_lines = 0;
uint32_t index = 0;
@@ -809,9 +740,9 @@ static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context)
/* Loop to calculate the maximum microschedule time between the two SubVP pipes,
* and also to store the two main SubVP pipe pointers in subvp_pipes[2].
*/
- if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
- pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
- phantom = pipe->stream->mall_stream_config.paired_stream;
+ phantom = dc_state_get_paired_subvp_stream(context, pipe->stream);
+ if (phantom && pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
phantom->timing.v_addressable;
@@ -879,6 +810,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
int16_t stretched_drr_us = 0;
int16_t drr_stretched_vblank_us = 0;
int16_t max_vblank_mallregion = 0;
+ struct dc_stream_state *phantom_stream;
+ bool subvp_found = false;
+ bool drr_found = false;
// Find SubVP pipe
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -891,8 +825,10 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
continue;
// Find the SubVP pipe
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+ subvp_found = true;
break;
+ }
}
// Find the DRR pipe
@@ -900,40 +836,46 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context)
drr_pipe = &context->res_ctx.pipe_ctx[i];
// We check for master pipe only
- if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
- !resource_is_pipe_type(pipe, DPP_PIPE))
+ if (!resource_is_pipe_type(drr_pipe, OTG_MASTER) ||
+ !resource_is_pipe_type(drr_pipe, DPP_PIPE))
continue;
- if (drr_pipe->stream->mall_stream_config.type == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
- (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable))
+ if (dc_state_get_pipe_subvp_type(context, drr_pipe) == SUBVP_NONE && drr_pipe->stream->ignore_msa_timing_param &&
+ (drr_pipe->stream->allow_freesync || drr_pipe->stream->vrr_active_variable || drr_pipe->stream->vrr_active_fixed)) {
+ drr_found = true;
break;
+ }
}
- main_timing = &pipe->stream->timing;
- phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing;
- drr_timing = &drr_pipe->stream->timing;
- prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
- (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
- dc->caps.subvp_prefetch_end_to_mall_start_us;
- subvp_active_us = main_timing->v_addressable * main_timing->h_total /
- (double)(main_timing->pix_clk_100hz * 100) * 1000000;
- drr_frame_us = drr_timing->v_total * drr_timing->h_total /
- (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
- // P-State allow width and FW delays already included phantom_timing->v_addressable
- mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
- (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
- stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
- drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
- (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
- max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+ phantom_stream = dc_state_get_paired_subvp_stream(context, pipe->stream);
+ if (phantom_stream && subvp_found && drr_found) {
+ main_timing = &pipe->stream->timing;
+ phantom_timing = &phantom_stream->timing;
+ drr_timing = &drr_pipe->stream->timing;
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ dc->caps.subvp_prefetch_end_to_mall_start_us;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+ drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+ max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+ }
/* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
* highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
* for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
* and the max of (VBLANK blanking time, MALL region)).
*/
- if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 &&
- subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0)
+ if (drr_timing &&
+ stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 &&
+ subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0)
schedulable = true;
return schedulable;
@@ -970,6 +912,8 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
struct dc_crtc_timing *main_timing = NULL;
struct dc_crtc_timing *phantom_timing = NULL;
struct dc_crtc_timing *vblank_timing = NULL;
+ struct dc_stream_state *phantom_stream;
+ enum mall_stream_type pipe_mall_type;
/* For SubVP + VBLANK/DRR cases, we assume there can only be
* a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
@@ -979,6 +923,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &context->res_ctx.pipe_ctx[i];
+ pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
// We check for master pipe, but it shouldn't matter since we only need
// the pipe for timing info (stream should be same for any pipe splits)
@@ -986,18 +931,19 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context)
!resource_is_pipe_type(pipe, DPP_PIPE))
continue;
- if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ if (!found && pipe_mall_type == SUBVP_NONE) {
// Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
vblank_index = i;
found = true;
}
- if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
subvp_pipe = pipe;
}
- if (found) {
+ if (found && subvp_pipe) {
+ phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream);
main_timing = &subvp_pipe->stream->timing;
- phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+ phantom_timing = &phantom_stream->timing;
vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
// Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
// Also include the prefetch end to mallstart delay time
@@ -1052,9 +998,9 @@ static bool subvp_subvp_admissable(struct dc *dc,
continue;
if (pipe->plane_state && !pipe->top_pipe &&
- pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
- pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+ pipe->stream->timing.v_total * (uint64_t)pipe->stream->timing.h_total - (uint64_t)1);
refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
@@ -1101,23 +1047,23 @@ static bool subvp_validate_static_schedulability(struct dc *dc,
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
if (!pipe->stream)
continue;
if (pipe->plane_state && !pipe->top_pipe) {
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN)
+ if (pipe_mall_type == SUBVP_MAIN)
subvp_count++;
- if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ if (pipe_mall_type == SUBVP_NONE)
non_subvp_pipes++;
- }
}
// Count how many planes that aren't SubVP/phantom are capable of VACTIVE
// switching (SubVP + VACTIVE unsupported). In situations where we force
// SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vlevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] > 0 &&
- pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ pipe_mall_type == SUBVP_NONE) {
vactive_count++;
}
pipe_idx++;
@@ -1144,18 +1090,364 @@ static bool subvp_validate_static_schedulability(struct dc *dc,
return schedulable;
}
-static void dcn32_full_validate_bw_helper(struct dc *dc,
+static void assign_subvp_index(struct dc *dc, struct dc_state *context)
+{
+ int i;
+ int index = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
+ dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
+ pipe_ctx->subvp_index = index++;
+ } else {
+ pipe_ctx->subvp_index = 0;
+ }
+ }
+}
+
+struct pipe_slice_table {
+ struct {
+ struct dc_stream_state *stream;
+ int slice_count;
+ } odm_combines[MAX_STREAMS];
+ int odm_combine_count;
+
+ struct {
+ struct pipe_ctx *pri_pipe;
+ struct dc_plane_state *plane;
+ int slice_count;
+ } mpc_combines[MAX_PLANES];
+ int mpc_combine_count;
+};
+
+
+static void update_slice_table_for_stream(struct pipe_slice_table *table,
+ struct dc_stream_state *stream, int diff)
+{
+ int i;
+
+ for (i = 0; i < table->odm_combine_count; i++) {
+ if (table->odm_combines[i].stream == stream) {
+ table->odm_combines[i].slice_count += diff;
+ break;
+ }
+ }
+
+ if (i == table->odm_combine_count) {
+ table->odm_combine_count++;
+ table->odm_combines[i].stream = stream;
+ table->odm_combines[i].slice_count = diff;
+ }
+}
+
+static void update_slice_table_for_plane(struct pipe_slice_table *table,
+ struct pipe_ctx *dpp_pipe, struct dc_plane_state *plane, int diff)
+{
+ int i;
+ struct pipe_ctx *pri_dpp_pipe = resource_get_primary_dpp_pipe(dpp_pipe);
+
+ for (i = 0; i < table->mpc_combine_count; i++) {
+ if (table->mpc_combines[i].plane == plane &&
+ table->mpc_combines[i].pri_pipe == pri_dpp_pipe) {
+ table->mpc_combines[i].slice_count += diff;
+ break;
+ }
+ }
+
+ if (i == table->mpc_combine_count) {
+ table->mpc_combine_count++;
+ table->mpc_combines[i].plane = plane;
+ table->mpc_combines[i].pri_pipe = pri_dpp_pipe;
+ table->mpc_combines[i].slice_count = diff;
+ }
+}
+
+static void init_pipe_slice_table_from_context(
+ struct pipe_slice_table *table,
+ struct dc_state *context)
+{
+ int i, j;
+ struct pipe_ctx *otg_master;
+ struct pipe_ctx *dpp_pipes[MAX_PIPES];
+ struct dc_stream_state *stream;
+ int count;
+
+ memset(table, 0, sizeof(*table));
+
+ for (i = 0; i < context->stream_count; i++) {
+ stream = context->streams[i];
+ otg_master = resource_get_otg_master_for_stream(
+ &context->res_ctx, stream);
+ if (!otg_master)
+ continue;
+
+ count = resource_get_odm_slice_count(otg_master);
+ update_slice_table_for_stream(table, stream, count);
+
+ count = resource_get_dpp_pipes_for_opp_head(otg_master,
+ &context->res_ctx, dpp_pipes);
+ for (j = 0; j < count; j++)
+ if (dpp_pipes[j]->plane_state)
+ update_slice_table_for_plane(table, dpp_pipes[j],
+ dpp_pipes[j]->plane_state, 1);
+ }
+}
+
+static bool update_pipe_slice_table_with_split_flags(
+ struct pipe_slice_table *table,
+ struct dc *dc,
+ struct dc_state *context,
+ struct vba_vars_st *vba,
+ int split[MAX_PIPES],
+ bool merge[MAX_PIPES])
+{
+ /* NOTE: we are deprecating the support for the concept of pipe splitting
+ * or pipe merging. Instead we append slices to the end and remove
+ * slices from the end. The following code converts a pipe split or
+ * merge to an append or remove operation.
+ *
+ * For example:
+ * When split flags describe the following pipe connection transition
+ *
+ * from:
+ * pipe 0 (split=2) -> pipe 1 (split=2)
+ * to: (old behavior)
+ * pipe 0 -> pipe 2 -> pipe 1 -> pipe 3
+ *
+ * the code below actually does:
+ * pipe 0 -> pipe 1 -> pipe 2 -> pipe 3
+ *
+ * This is the new intended behavior and for future DCNs we will retire
+ * the old concept completely.
+ */
+ struct pipe_ctx *pipe;
+ bool odm;
+ int dc_pipe_idx, dml_pipe_idx = 0;
+ bool updated = false;
+
+ for (dc_pipe_idx = 0;
+ dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) {
+ pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
+ if (resource_is_pipe_type(pipe, FREE_PIPE))
+ continue;
+
+ if (merge[dc_pipe_idx]) {
+ if (resource_is_pipe_type(pipe, OPP_HEAD))
+ /* merging OPP head means reducing ODM slice
+ * count by 1
+ */
+ update_slice_table_for_stream(table, pipe->stream, -1);
+ else if (resource_is_pipe_type(pipe, DPP_PIPE) &&
+ resource_get_odm_slice_index(resource_get_opp_head(pipe)) == 0)
+ /* merging DPP pipe of the first ODM slice means
+ * reducing MPC slice count by 1
+ */
+ update_slice_table_for_plane(table, pipe, pipe->plane_state, -1);
+ updated = true;
+ }
+
+ if (split[dc_pipe_idx]) {
+ odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] !=
+ dm_odm_combine_mode_disabled;
+ if (odm && resource_is_pipe_type(pipe, OPP_HEAD))
+ update_slice_table_for_stream(
+ table, pipe->stream, split[dc_pipe_idx] - 1);
+ else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE))
+ update_slice_table_for_plane(table, pipe,
+ pipe->plane_state, split[dc_pipe_idx] - 1);
+ updated = true;
+ }
+ dml_pipe_idx++;
+ }
+ return updated;
+}
+
+static void update_pipes_with_slice_table(struct dc *dc, struct dc_state *context,
+ struct pipe_slice_table *table)
+{
+ int i;
+
+ for (i = 0; i < table->odm_combine_count; i++)
+ resource_update_pipes_for_stream_with_slice_count(context,
+ dc->current_state, dc->res_pool,
+ table->odm_combines[i].stream,
+ table->odm_combines[i].slice_count);
+
+ for (i = 0; i < table->mpc_combine_count; i++)
+ resource_update_pipes_for_plane_with_slice_count(context,
+ dc->current_state, dc->res_pool,
+ table->mpc_combines[i].plane,
+ table->mpc_combines[i].slice_count);
+}
+
+static bool update_pipes_with_split_flags(struct dc *dc, struct dc_state *context,
+ struct vba_vars_st *vba, int split[MAX_PIPES],
+ bool merge[MAX_PIPES])
+{
+ struct pipe_slice_table slice_table;
+ bool updated;
+
+ init_pipe_slice_table_from_context(&slice_table, context);
+ updated = update_pipe_slice_table_with_split_flags(
+ &slice_table, dc, context, vba,
+ split, merge);
+ update_pipes_with_slice_table(dc, context, &slice_table);
+ return updated;
+}
+
+static bool should_apply_odm_power_optimization(struct dc *dc,
+ struct dc_state *context, struct vba_vars_st *v, int *split,
+ bool *merge)
+{
+ struct dc_stream_state *stream = context->streams[0];
+ struct pipe_slice_table slice_table;
+ int i;
+
+ /*
+ * this debug flag allows us to disable ODM power optimization feature
+ * unconditionally. we force the feature off if this is set to false.
+ */
+ if (!dc->debug.enable_single_display_2to1_odm_policy)
+ return false;
+
+ /* current design and test coverage is only limited to allow ODM power
+ * optimization for single stream. Supporting it for multiple streams
+ * use case would require additional algorithm to decide how to
+ * optimize power consumption when there are not enough free pipes to
+ * allocate for all the streams. This level of optimization would
+ * require multiple attempts of revalidation to make an optimized
+ * decision. Unfortunately We do not support revalidation flow in
+ * current version of DML.
+ */
+ if (context->stream_count != 1)
+ return false;
+
+ /*
+ * Our hardware doesn't support ODM for HDMI TMDS
+ */
+ if (dc_is_hdmi_signal(stream->signal))
+ return false;
+
+ /*
+ * ODM Combine 2:1 requires horizontal timing divisible by 2 so each
+ * ODM segment has the same size.
+ */
+ if (!is_h_timing_divisible_by_2(stream))
+ return false;
+
+ /*
+ * No power benefits if the timing's pixel clock is not high enough to
+ * raise display clock from minimum power state.
+ */
+ if (stream->timing.pix_clk_100hz * 100 <= DCN3_2_VMIN_DISPCLK_HZ)
+ return false;
+
+ if (dc->config.enable_windowed_mpo_odm) {
+ /*
+ * ODM power optimization should only be allowed if the feature
+ * can be seamlessly toggled off within an update. This would
+ * require that the feature is applied on top of a minimal
+ * state. A minimal state is defined as a state validated
+ * without the need of pipe split. Therefore, when transition to
+ * toggle the feature off, the same stream and plane
+ * configuration can be supported by the pipe resource in the
+ * first ODM slice alone without the need to acquire extra
+ * resources.
+ */
+ init_pipe_slice_table_from_context(&slice_table, context);
+ update_pipe_slice_table_with_split_flags(
+ &slice_table, dc, context, v,
+ split, merge);
+ for (i = 0; i < slice_table.mpc_combine_count; i++)
+ if (slice_table.mpc_combines[i].slice_count > 1)
+ return false;
+
+ for (i = 0; i < slice_table.odm_combine_count; i++)
+ if (slice_table.odm_combines[i].slice_count > 1)
+ return false;
+ } else {
+ /*
+ * the new ODM power optimization feature reduces software
+ * design limitation and allows ODM power optimization to be
+ * supported even with presence of overlay planes. The new
+ * feature is enabled based on enable_windowed_mpo_odm flag. If
+ * the flag is not set, we limit our feature scope due to
+ * previous software design limitation
+ */
+ if (context->stream_status[0].plane_count != 1)
+ return false;
+
+ if (memcmp(&context->stream_status[0].plane_states[0]->clip_rect,
+ &stream->src, sizeof(struct rect)) != 0)
+ return false;
+
+ if (stream->src.width >= 5120 &&
+ stream->src.width > stream->dst.width)
+ return false;
+ }
+ return true;
+}
+
+static void try_odm_power_optimization_and_revalidate(
+ struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int *split,
+ bool *merge,
+ unsigned int *vlevel,
+ int pipe_cnt)
+{
+ int i;
+ unsigned int new_vlevel;
+ unsigned int cur_policy[MAX_PIPES];
+
+ for (i = 0; i < pipe_cnt; i++) {
+ cur_policy[i] = pipes[i].pipe.dest.odm_combine_policy;
+ pipes[i].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+ }
+
+ new_vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
+ if (new_vlevel < context->bw_ctx.dml.soc.num_states) {
+ memset(split, 0, MAX_PIPES * sizeof(int));
+ memset(merge, 0, MAX_PIPES * sizeof(bool));
+ *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, new_vlevel, split, merge);
+ context->bw_ctx.dml.vba.VoltageLevel = *vlevel;
+ } else {
+ for (i = 0; i < pipe_cnt; i++)
+ pipes[i].pipe.dest.odm_combine_policy = cur_policy[i];
+ }
+}
+
+static bool is_test_pattern_enabled(
+ struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
+ return true;
+ }
+
+ return false;
+}
+
+static bool dcn32_full_validate_bw_helper(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int *vlevel,
int *split,
bool *merge,
- int *pipe_cnt)
+ int *pipe_cnt,
+ bool *repopulate_pipes)
{
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
unsigned int dc_pipe_idx = 0;
int i = 0;
bool found_supported_config = false;
+ int vlevel_temp = 0;
dc_assert_fp_enabled();
@@ -1179,31 +1471,27 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
vba->VoltageLevel = *vlevel;
}
+ /* Apply split and merge flags before checking for subvp */
+ if (!dcn32_apply_merge_split_flags_helper(dc, context, repopulate_pipes, split, merge))
+ return false;
+ memset(split, 0, MAX_PIPES * sizeof(int));
+ memset(merge, 0, MAX_PIPES * sizeof(bool));
+
/* Conditions for setting up phantom pipes for SubVP:
* 1. Not force disable SubVP
- * 2. Full update (i.e. !fast_validate)
+ * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING)
* 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?)
* 4. Display configuration passes validation
* 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch)
*/
if (!dc->debug.force_disable_subvp && !dc->caps.dmub_caps.gecc_enable && dcn32_all_pipes_have_stream_and_plane(dc, context) &&
- !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) &&
- (*vlevel == context->bw_ctx.dml.soc.num_states ||
+ !dcn32_mpo_in_use(context) && !dcn32_any_surfaces_rotated(dc, context) && !is_test_pattern_enabled(context) &&
+ (*vlevel == context->bw_ctx.dml.soc.num_states || (vba->DRAMSpeedPerState[*vlevel] != vba->DRAMSpeedPerState[0] &&
+ vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported) ||
vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
dc->debug.force_subvp_mclk_switch)) {
- dcn32_merge_pipes_for_subvp(dc, context);
- memset(merge, 0, MAX_PIPES * sizeof(bool));
-
- /* to re-initialize viewport after the pipe merge */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
- if (!pipe_ctx->plane_state || !pipe_ctx->stream)
- continue;
-
- resource_build_scaling_params(pipe_ctx);
- }
+ vlevel_temp = *vlevel;
while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) &&
dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) {
@@ -1229,7 +1517,8 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx);
- *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
// Populate dppclk to trigger a recalculate in dml_get_voltage_level
// so the phantom pipe DLG params can be assigned correctly.
pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0);
@@ -1263,12 +1552,17 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
}
}
+ if (vba->DRAMSpeedPerState[*vlevel] >= vba->DRAMSpeedPerState[vlevel_temp])
+ found_supported_config = false;
+
// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
// remove phantom pipes and repopulate dml pipes
if (!found_supported_config) {
- dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
+ dc_state_remove_phantom_streams_and_planes(dc, context);
+ dc_state_release_phantom_streams_and_planes(dc, context);
vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
- *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
+ *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
*vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
/* This may adjust vlevel and maxMpcComb */
@@ -1288,14 +1582,19 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
* add phantom pipes. If pipe split (ODM / MPC) is required, both the main
* and phantom pipes will be split in the regular pipe splitting sequence.
*/
- memset(split, 0, MAX_PIPES * sizeof(int));
- memset(merge, 0, MAX_PIPES * sizeof(bool));
*vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
vba->VoltageLevel = *vlevel;
// Note: We can't apply the phantom pipes to hardware at this time. We have to wait
// until driver has acquired the DMCUB lock to do it safely.
+ assign_subvp_index(dc, context);
}
}
+
+ if (should_apply_odm_power_optimization(dc, context, vba, split, merge))
+ try_odm_power_optimization_and_revalidate(
+ dc, context, pipes, split, merge, vlevel, *pipe_cnt);
+
+ return true;
}
static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
@@ -1412,7 +1711,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
pipe_idx);
- if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) {
// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
context->res_ctx.pipe_ctx[i].unbounded_req = false;
@@ -1444,7 +1743,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
context->res_ctx.pipe_ctx[i].plane_state != context->res_ctx.pipe_ctx[i].top_pipe->plane_state) &&
context->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
/* SS: all active surfaces stored in MALL */
- if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type != SUBVP_PHANTOM) {
+ if (dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) != SUBVP_PHANTOM) {
context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[i].surface_size_in_mall_bytes;
if (context->res_ctx.pipe_ctx[i].stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) {
@@ -1635,18 +1934,218 @@ static bool dcn32_split_stream_for_mpc_or_odm(
return true;
}
+static bool dcn32_apply_merge_split_flags_helper(
+ struct dc *dc,
+ struct dc_state *context,
+ bool *repopulate_pipes,
+ int *split,
+ bool *merge)
+{
+ int i, pipe_idx;
+ bool newly_split[MAX_PIPES] = { false };
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ if (dc->config.enable_windowed_mpo_odm) {
+ if (update_pipes_with_split_flags(
+ dc, context, vba, split, merge))
+ *repopulate_pipes = true;
+ } else {
+
+ /* the code below will be removed once windowed mpo odm is fully
+ * enabled.
+ */
+ /* merge pipes if necessary */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ /*skip pipes that don't need merging*/
+ if (!merge[i])
+ continue;
+
+ /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
+ if (pipe->prev_odm_pipe) {
+ /*split off odm pipe*/
+ pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
+ if (pipe->next_odm_pipe)
+ pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
+
+ /*2:1ODM+MPC Split MPO to Single Pipe + MPC Split MPO*/
+ if (pipe->bottom_pipe) {
+ if (pipe->bottom_pipe->prev_odm_pipe || pipe->bottom_pipe->next_odm_pipe) {
+ /*MPC split rules will handle this case*/
+ pipe->bottom_pipe->top_pipe = NULL;
+ } else {
+ /* when merging an ODM pipes, the bottom MPC pipe must now point to
+ * the previous ODM pipe and its associated stream assets
+ */
+ if (pipe->prev_odm_pipe->bottom_pipe) {
+ /* 3 plane MPO*/
+ pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe->bottom_pipe;
+ pipe->prev_odm_pipe->bottom_pipe->bottom_pipe = pipe->bottom_pipe;
+ } else {
+ /* 2 plane MPO*/
+ pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe;
+ pipe->prev_odm_pipe->bottom_pipe = pipe->bottom_pipe;
+ }
+
+ memcpy(&pipe->bottom_pipe->stream_res, &pipe->bottom_pipe->top_pipe->stream_res, sizeof(struct stream_resource));
+ }
+ }
+
+ if (pipe->top_pipe) {
+ pipe->top_pipe->bottom_pipe = NULL;
+ }
+
+ pipe->bottom_pipe = NULL;
+ pipe->next_odm_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ pipe->top_pipe = NULL;
+ pipe->prev_odm_pipe = NULL;
+ if (pipe->stream_res.dsc)
+ dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ memset(&pipe->link_res, 0, sizeof(pipe->link_res));
+ *repopulate_pipes = true;
+ } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
+ struct pipe_ctx *top_pipe = pipe->top_pipe;
+ struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
+
+ top_pipe->bottom_pipe = bottom_pipe;
+ if (bottom_pipe)
+ bottom_pipe->top_pipe = top_pipe;
+
+ pipe->top_pipe = NULL;
+ pipe->bottom_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ memset(&pipe->link_res, 0, sizeof(pipe->link_res));
+ *repopulate_pipes = true;
+ } else
+ ASSERT(0); /* Should never try to merge master pipe */
+
+ }
+
+ for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *hsplit_pipe = NULL;
+ bool odm;
+ int old_index = -1;
+
+ if (!pipe->stream || newly_split[i])
+ continue;
+
+ pipe_idx++;
+ odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
+
+ if (!pipe->plane_state && !odm)
+ continue;
+
+ if (split[i]) {
+ if (odm) {
+ if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ } else {
+ if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else if (old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ }
+ hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(hsplit_pipe);
+ if (!hsplit_pipe)
+ return false;
+
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, hsplit_pipe, odm))
+ return false;
+
+ newly_split[hsplit_pipe->pipe_idx] = true;
+ *repopulate_pipes = true;
+ }
+ if (split[i] == 4) {
+ struct pipe_ctx *pipe_4to1;
+
+ if (odm && old_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ return false;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ pipe, pipe_4to1, odm))
+ return false;
+ newly_split[pipe_4to1->pipe_idx] = true;
+
+ if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
+ && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
+ old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
+ else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
+ old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
+ old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
+ else
+ old_index = -1;
+ pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
+ ASSERT(pipe_4to1);
+ if (!pipe_4to1)
+ return false;
+ if (!dcn32_split_stream_for_mpc_or_odm(
+ dc, &context->res_ctx,
+ hsplit_pipe, pipe_4to1, odm))
+ return false;
+ newly_split[pipe_4to1->pipe_idx] = true;
+ }
+ if (odm)
+ dcn20_build_mapped_resource(dc, context, pipe->stream);
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (!resource_build_scaling_params(pipe))
+ return false;
+ }
+ }
+
+ for (i = 0; i < context->stream_count; i++) {
+ struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx,
+ context->streams[i]);
+
+ if (otg_master)
+ resource_build_test_pattern_params(&context->res_ctx, otg_master);
+ }
+ }
+ return true;
+}
+
bool dcn32_internal_validate_bw(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int *pipe_cnt_out,
int *vlevel_out,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool out = false;
bool repopulate_pipes = false;
int split[MAX_PIPES] = { 0 };
bool merge[MAX_PIPES] = { false };
- bool newly_split[MAX_PIPES] = { false };
int pipe_cnt, i, pipe_idx;
int vlevel = context->bw_ctx.dml.soc.num_states;
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
@@ -1657,12 +2156,15 @@ bool dcn32_internal_validate_bw(struct dc *dc,
if (!pipes)
return false;
- // For each full update, remove all existing phantom pipes first
- dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
+ /* For each full update, remove all existing phantom pipes first */
+ dc_state_remove_phantom_streams_and_planes(dc, context);
+ dc_state_release_phantom_streams_and_planes(dc, context);
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ for (i = 0; i < context->stream_count; i++)
+ resource_update_pipes_for_stream_with_slice_count(context, dc->current_state, dc->res_pool, context->streams[i], 1);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
if (!pipe_cnt) {
out = true;
@@ -1672,10 +2174,13 @@ bool dcn32_internal_validate_bw(struct dc *dc,
dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
context->bw_ctx.dml.soc.max_vratio_pre = dcn32_determine_max_vratio_prefetch(dc, context);
- if (!fast_validate)
- dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt);
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) {
+ if (!dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge,
+ &pipe_cnt, &repopulate_pipes))
+ goto validate_fail;
+ }
- if (fast_validate ||
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING ||
(dc->debug.dml_disallow_alternate_prefetch_modes &&
(vlevel == context->bw_ctx.dml.soc.num_states ||
vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) {
@@ -1692,7 +2197,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_none;
- context->bw_ctx.dml.validate_max_state = fast_validate;
+ context->bw_ctx.dml.validate_max_state = (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING);
vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
context->bw_ctx.dml.validate_max_state = false;
@@ -1701,7 +2206,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
memset(split, 0, sizeof(split));
memset(merge, 0, sizeof(merge));
vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
- // dcn20_validate_apply_pipe_split_flags can modify voltage level outside of DML
+ /* dcn20_validate_apply_pipe_split_flags can modify voltage level outside of DML */
vba->VoltageLevel = vlevel;
}
}
@@ -1730,175 +2235,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
pipe_idx++;
}
- /* merge pipes if necessary */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- /*skip pipes that don't need merging*/
- if (!merge[i])
- continue;
-
- /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
- if (pipe->prev_odm_pipe) {
- /*split off odm pipe*/
- pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
- if (pipe->next_odm_pipe)
- pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
-
- /*2:1ODM+MPC Split MPO to Single Pipe + MPC Split MPO*/
- if (pipe->bottom_pipe) {
- if (pipe->bottom_pipe->prev_odm_pipe || pipe->bottom_pipe->next_odm_pipe) {
- /*MPC split rules will handle this case*/
- pipe->bottom_pipe->top_pipe = NULL;
- } else {
- /* when merging an ODM pipes, the bottom MPC pipe must now point to
- * the previous ODM pipe and its associated stream assets
- */
- if (pipe->prev_odm_pipe->bottom_pipe) {
- /* 3 plane MPO*/
- pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe->bottom_pipe;
- pipe->prev_odm_pipe->bottom_pipe->bottom_pipe = pipe->bottom_pipe;
- } else {
- /* 2 plane MPO*/
- pipe->bottom_pipe->top_pipe = pipe->prev_odm_pipe;
- pipe->prev_odm_pipe->bottom_pipe = pipe->bottom_pipe;
- }
-
- memcpy(&pipe->bottom_pipe->stream_res, &pipe->bottom_pipe->top_pipe->stream_res, sizeof(struct stream_resource));
- }
- }
-
- if (pipe->top_pipe) {
- pipe->top_pipe->bottom_pipe = NULL;
- }
-
- pipe->bottom_pipe = NULL;
- pipe->next_odm_pipe = NULL;
- pipe->plane_state = NULL;
- pipe->stream = NULL;
- pipe->top_pipe = NULL;
- pipe->prev_odm_pipe = NULL;
- if (pipe->stream_res.dsc)
- dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
- memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
- memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
- memset(&pipe->link_res, 0, sizeof(pipe->link_res));
- repopulate_pipes = true;
- } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
- struct pipe_ctx *top_pipe = pipe->top_pipe;
- struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
-
- top_pipe->bottom_pipe = bottom_pipe;
- if (bottom_pipe)
- bottom_pipe->top_pipe = top_pipe;
-
- pipe->top_pipe = NULL;
- pipe->bottom_pipe = NULL;
- pipe->plane_state = NULL;
- pipe->stream = NULL;
- memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
- memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
- memset(&pipe->link_res, 0, sizeof(pipe->link_res));
- repopulate_pipes = true;
- } else
- ASSERT(0); /* Should never try to merge master pipe */
-
- }
-
- for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
- struct pipe_ctx *hsplit_pipe = NULL;
- bool odm;
- int old_index = -1;
-
- if (!pipe->stream || newly_split[i])
- continue;
-
- pipe_idx++;
- odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
-
- if (!pipe->plane_state && !odm)
- continue;
-
- if (split[i]) {
- if (odm) {
- if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
- else if (old_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->pipe_idx;
- } else {
- if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
- else if (old_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->pipe_idx;
- }
- hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index);
- ASSERT(hsplit_pipe);
- if (!hsplit_pipe)
- goto validate_fail;
-
- if (!dcn32_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- pipe, hsplit_pipe, odm))
- goto validate_fail;
-
- newly_split[hsplit_pipe->pipe_idx] = true;
- repopulate_pipes = true;
- }
- if (split[i] == 4) {
- struct pipe_ctx *pipe_4to1;
-
- if (odm && old_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->pipe_idx;
- else if (!odm && old_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->pipe_idx;
- else
- old_index = -1;
- pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
- ASSERT(pipe_4to1);
- if (!pipe_4to1)
- goto validate_fail;
- if (!dcn32_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- pipe, pipe_4to1, odm))
- goto validate_fail;
- newly_split[pipe_4to1->pipe_idx] = true;
-
- if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
- && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
- old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
- else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
- old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
- old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
- else
- old_index = -1;
- pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index);
- ASSERT(pipe_4to1);
- if (!pipe_4to1)
- goto validate_fail;
- if (!dcn32_split_stream_for_mpc_or_odm(
- dc, &context->res_ctx,
- hsplit_pipe, pipe_4to1, odm))
- goto validate_fail;
- newly_split[pipe_4to1->pipe_idx] = true;
- }
- if (odm)
- dcn20_build_mapped_resource(dc, context, pipe->stream);
- }
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->plane_state) {
- if (!resource_build_scaling_params(pipe))
- goto validate_fail;
- }
- }
+ if (!dcn32_apply_merge_split_flags_helper(dc, context, &repopulate_pipes, split, merge))
+ goto validate_fail;
/* Actual dsc count per stream dsc validation*/
if (!dcn20_validate_dsc(dc, context)) {
@@ -1911,7 +2249,9 @@ bool dcn32_internal_validate_bw(struct dc *dc,
int flag_vlevel = vlevel;
int i;
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
+ if (!dc->config.enable_windowed_mpo_odm)
+ dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes);
/* repopulate_pipes = 1 means the pipes were either split or merged. In this case
* we have to re-calculate the DET allocation and run through DML once more to
@@ -1920,7 +2260,9 @@ bool dcn32_internal_validate_bw(struct dc *dc,
* */
context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+
vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+
if (vlevel == context->bw_ctx.dml.soc.num_states) {
/* failed after DET size changes */
goto validate_fail;
@@ -1934,6 +2276,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
vba->VoltageLevel = i;
vlevel = i;
flags_valid = true;
+ break;
}
}
@@ -1964,6 +2307,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
int i, pipe_idx, vlevel_temp = 0;
double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz;
double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
+ double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed;
double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation;
bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
dm_dram_clock_change_unsupported;
@@ -1975,6 +2319,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
bool need_fclk_lat_as_dummy = false;
bool is_subvp_p_drr = false;
struct dc_stream_state *fpo_candidate_stream = NULL;
+ struct dc_stream_status *stream_status = NULL;
dc_assert_fp_enabled();
@@ -2000,7 +2345,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
}
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
- dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, DC_VALIDATE_MODE_AND_PROGRAMMING);
maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
if (is_subvp_p_drr) {
context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
@@ -2009,8 +2354,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
for (i = 0; i < context->stream_count; i++) {
+ stream_status = NULL;
if (context->streams[i])
- context->streams[i]->fpo_in_use = false;
+ stream_status = dc_state_get_stream_status(context, context->streams[i]);
+ if (stream_status)
+ stream_status->fpo_in_use = false;
}
if (!pstate_en || (!dc->debug.disable_fpo_optimizations &&
@@ -2018,7 +2366,9 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
/* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */
fpo_candidate_stream = dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context);
if (fpo_candidate_stream) {
- fpo_candidate_stream->fpo_in_use = true;
+ stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
+ if (stream_status)
+ stream_status->fpo_in_use = true;
context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = true;
}
@@ -2041,7 +2391,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
context->bw_ctx.dml.soc.fclk_change_latency_us =
dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
}
- dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel_temp, false);
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel_temp,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
if (vlevel_temp < vlevel) {
vlevel = vlevel_temp;
maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
@@ -2055,11 +2406,15 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
*/
context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
for (i = 0; i < context->stream_count; i++) {
+ stream_status = NULL;
if (context->streams[i])
- context->streams[i]->fpo_in_use = false;
+ stream_status = dc_state_get_stream_status(context, context->streams[i]);
+ if (stream_status)
+ stream_status->fpo_in_use = false;
}
context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
- dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+ dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel,
+ DC_VALIDATE_MODE_AND_PROGRAMMING);
}
}
}
@@ -2151,7 +2506,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
}
if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
- min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+ min_dram_speed_mts = dram_speed_from_validation;
min_dram_speed_mts_margin = 160;
context->bw_ctx.dml.soc.dram_clock_change_latency_us =
@@ -2703,12 +3058,14 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
/* Override from passed dc->bb_overrides if available*/
if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
&& dc->bb_overrides.sr_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
}
if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000)
!= dc->bb_overrides.sr_enter_plus_exit_time_ns
&& dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
dcn3_2_soc.sr_enter_plus_exit_time_us =
dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
}
@@ -2716,12 +3073,14 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
&& dc->bb_overrides.urgent_latency_ns) {
dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ dc->dml2_options.bbox_overrides.urgent_latency_us =
dcn3_2_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
}
if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000)
!= dc->bb_overrides.dram_clock_change_latency_ns
&& dc->bb_overrides.dram_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
dcn3_2_soc.dram_clock_change_latency_us =
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
@@ -2729,6 +3088,7 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000)
!= dc->bb_overrides.fclk_clock_change_latency_ns
&& dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.fclk_change_latency_us =
dcn3_2_soc.fclk_change_latency_us =
dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
}
@@ -2746,14 +3106,17 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
if (bb_info.dram_clock_change_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
dcn3_2_soc.dram_clock_change_latency_us =
bb_info.dram_clock_change_latency_100ns * 10;
if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
dcn3_2_soc.sr_enter_plus_exit_time_us =
bb_info.dram_sr_enter_exit_latency_100ns * 10;
if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
dcn3_2_soc.sr_exit_time_us =
bb_info.dram_sr_exit_latency_100ns * 10;
}
@@ -2761,12 +3124,14 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
/* Override from VBIOS for num_chan */
if (dc->ctx->dc_bios->vram_info.num_chans) {
+ dc->dml2_options.bbox_overrides.dram_num_chan =
dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
dcn3_2_soc.mall_allocated_for_dcn_mbytes = (double)(dcn32_calc_num_avail_chans_for_mall(dc,
dc->ctx->dc_bios->vram_info.num_chans) * dc->caps.mall_size_per_mem_channel);
}
if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dc->dml2_options.bbox_overrides.dram_chanel_width_bytes =
dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
/* DML DSC delay factor workaround */
@@ -2777,6 +3142,10 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
/* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.disp_pll_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000.0;
+ dc->dml2_options.bbox_overrides.dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ dc->dml2_options.bbox_overrides.dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000.0;
/* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
if (bw_params->clk_table.entries[0].memclk_mhz) {
@@ -2860,7 +3229,7 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
j = 0;
// create the final dcfclk and uclk table
while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
} else {
@@ -2884,6 +3253,16 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
}
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
dcn3_2_soc.num_states = num_states;
for (i = 0; i < dcn3_2_soc.num_states; i++) {
dcn3_2_soc.clock_limits[i].state = i;
@@ -2932,6 +3311,72 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
if (dc->current_state)
dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32);
}
+
+ if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) {
+ unsigned int i = 0;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_states = dc->clk_mgr->bw_params->clk_table.num_entries;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dppclk_levels;
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) {
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ }
+ }
+ }
}
void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
@@ -2947,25 +3392,24 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe)
{
bool allow = false;
uint32_t refresh_rate = 0;
+ uint32_t min_refresh = subvp_active_margin_list.min_refresh;
+ uint32_t max_refresh = subvp_active_margin_list.max_refresh;
+ uint32_t i;
- /* Allow subvp on displays that have active margin for 2560x1440@60hz displays
- * only for now. There must be no scaling as well.
- *
- * For now we only enable on 2560x1440@60hz displays to enable 4K60 + 1440p60 configs
- * for p-state switching.
- */
- if (pipe->stream && pipe->plane_state) {
- refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
- pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
- / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
- if (pipe->stream->timing.v_addressable == 1440 &&
- pipe->stream->timing.h_addressable == 2560 &&
- refresh_rate >= 55 && refresh_rate <= 65 &&
- pipe->plane_state->src_rect.height == 1440 &&
- pipe->plane_state->src_rect.width == 2560 &&
- pipe->plane_state->dst_rect.height == 1440 &&
- pipe->plane_state->dst_rect.width == 2560)
+ for (i = 0; i < SUBVP_ACTIVE_MARGIN_LIST_LEN; i++) {
+ uint32_t width = subvp_active_margin_list.res[i].width;
+ uint32_t height = subvp_active_margin_list.res[i].height;
+
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
+ (uint64_t)pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+ refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
+ refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
+
+ if (refresh_rate >= min_refresh && refresh_rate <= max_refresh &&
+ dcn32_check_native_scaling_for_res(pipe, width, height)) {
allow = true;
+ break;
+ }
}
return allow;
}
@@ -3084,7 +3528,15 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- if (!pipe->stream)
+ /* In DCN32/321, FPO uses per-pipe P-State force.
+ * If there's no planes, HUBP is power gated and
+ * therefore programming UCLK_PSTATE_FORCE does
+ * nothing (P-State will always be asserted naturally
+ * on a pipe that has HUBP power gated. Therefore we
+ * only want to enable FPO if the FPO pipe has both
+ * a stream and a plane.
+ */
+ if (!pipe->stream || !pipe->plane_state)
continue;
if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
@@ -3100,15 +3552,16 @@ void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *co
*
* @dc: current dc state
* @context: new dc state
+ * @fpo_candidate_stream: candidate stream to be chosen for FPO
* @vactive_margin_req_us: The vactive marign required for a vactive pipe to be considered "found"
*
* Return: True if VACTIVE display is found, false otherwise
*/
-bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req_us)
+bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, struct dc_stream_state *fpo_candidate_stream, uint32_t vactive_margin_req_us)
{
unsigned int i, pipe_idx;
const struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
- bool vactive_found = false;
+ bool vactive_found = true;
unsigned int blank_us = 0;
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
@@ -3117,11 +3570,20 @@ bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint
if (!pipe->stream)
continue;
+ /* Don't need to check for vactive margin on the FPO candidate stream */
+ if (fpo_candidate_stream && pipe->stream == fpo_candidate_stream) {
+ pipe_idx++;
+ continue;
+ }
+
+ /* Every plane (apart from the ones driven by the FPO pipes) needs to have active margin
+ * in order for us to have found a valid "vactive" config for FPO + Vactive
+ */
blank_us = ((pipe->stream->timing.v_total - pipe->stream->timing.v_addressable) * pipe->stream->timing.h_total /
(double)(pipe->stream->timing.pix_clk_100hz * 100)) * 1000000;
- if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] >= vactive_margin_req_us &&
- !(pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed) && blank_us < dc->debug.fpo_vactive_max_blank_us) {
- vactive_found = true;
+ if (vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] < vactive_margin_req_us ||
+ pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed || blank_us >= dc->debug.fpo_vactive_max_blank_us) {
+ vactive_found = false;
break;
}
pipe_idx++;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index defbee866be6..273d2bd79d85 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -36,9 +36,6 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
display_e2e_pipe_params_st *pipes,
int pipe_cnt);
-uint8_t dcn32_predict_pipe_split(struct dc_state *context,
- display_e2e_pipe_params_st *pipe_e2e);
-
void dcn32_set_phantom_stream_timing(struct dc *dc,
struct dc_state *context,
struct pipe_ctx *ref_pipe,
@@ -52,7 +49,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
display_e2e_pipe_params_st *pipes,
int *pipe_cnt_out,
int *vlevel_out,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -74,7 +71,7 @@ void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
void dcn32_assign_fpo_vactive_candidate(struct dc *dc, const struct dc_state *context, struct dc_stream_state **fpo_candidate_stream);
-bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, uint32_t vactive_margin_req);
+bool dcn32_find_vactive_pipe(struct dc *dc, const struct dc_state *context, struct dc_stream_state *fpo_candidate_stream, uint32_t vactive_margin_req);
void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index cbdfb762c10c..0782a34689a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -813,6 +813,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
(v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ||
v->DCFCLKPerState[mode_lib->vba.VoltageLevel] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+ mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
+
/* Output */
&v->DSTXAfterScaler[k],
&v->DSTYAfterScaler[k],
@@ -3317,6 +3319,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
v->SwathHeightCThisState[k], v->TWait,
(v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ || v->DCFCLKState[i][j] <= DCFCLK_FREQ_EXTRA_PREFETCH_REQ_MHZ) ?
mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
+ mode_lib->vba.PrefetchModePerState[i][j] > 0 || mode_lib->vba.DRAMClockChangeRequirementFinal == false,
/* Output */
&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
@@ -3361,6 +3364,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
&mode_lib->vba.UrgentBurstFactorLumaPre[k],
&mode_lib->vba.UrgentBurstFactorChromaPre[k],
&mode_lib->vba.NotUrgentLatencyHidingPre[k]);
+
+ v->cursor_bw_pre[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] /
+ 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * v->VRatioPreY[i][j][k];
}
{
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index ecea008f19d3..0748ef36a16a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1401,13 +1401,13 @@ void dml32_CalculateOutputLink(
if (Output == dm_dp2p0) {
*OutBpp = 0;
if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
- PHYCLKD32PerState >= 10000 / 32) {
+ PHYCLKD32PerState >= 10000.0 / 32) {
*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
- if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
+ if (*OutBpp == 0 && PHYCLKD32PerState < 13500.0 / 32 && DSCEnable == true &&
ForcedOutputLinkBPP == 0) {
*RequiresDSC = true;
LinkDSCEnable = true;
@@ -1423,7 +1423,7 @@ void dml32_CalculateOutputLink(
*OutputRate = dm_output_rate_dp_rate_uhbr10;
}
if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
- *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
+ *OutBpp == 0 && PHYCLKD32PerState >= 13500.0 / 32) {
*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
@@ -1595,14 +1595,16 @@ double dml32_TruncToValidBPP(
unsigned int NonDSCBPP0;
unsigned int NonDSCBPP1;
unsigned int NonDSCBPP2;
+ unsigned int NonDSCBPP3 = BPP_INVALID;
if (Format == dm_420) {
NonDSCBPP0 = 12;
NonDSCBPP1 = 15;
NonDSCBPP2 = 18;
MinDSCBPP = 6;
- MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
} else if (Format == dm_444) {
+ NonDSCBPP3 = 18;
NonDSCBPP0 = 24;
NonDSCBPP1 = 30;
NonDSCBPP2 = 36;
@@ -1650,6 +1652,8 @@ double dml32_TruncToValidBPP(
MaxLinkBPP = 2 * MaxLinkBPP;
}
+ *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
+
if (DesiredBPP == 0) {
if (DSCEnable) {
if (MaxLinkBPP < MinDSCBPP)
@@ -1665,6 +1669,8 @@ double dml32_TruncToValidBPP(
return NonDSCBPP1;
else if (MaxLinkBPP >= NonDSCBPP0)
return 16.0;
+ else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID && MaxLinkBPP >= NonDSCBPP3)
+ return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections.
else
return BPP_INVALID;
}
@@ -1676,10 +1682,6 @@ double dml32_TruncToValidBPP(
else
return DesiredBPP;
}
-
- *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
-
- return BPP_INVALID;
} // TruncToValidBPP
double dml32_RequiredDTBCLK(
@@ -1975,8 +1977,8 @@ void dml32_CalculateVMRowAndSwath(
unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
unsigned int PDEAndMetaPTEBytesFrameY;
unsigned int PDEAndMetaPTEBytesFrameC;
- unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
- unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
+ unsigned int MetaRowByteY[DC__NUM_DPP__MAX] = {0};
+ unsigned int MetaRowByteC[DC__NUM_DPP__MAX] = {0};
unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
@@ -3423,6 +3425,7 @@ bool dml32_CalculatePrefetchSchedule(
unsigned int SwathHeightC,
double TWait,
double TPreReq,
+ bool ExtendPrefetchIfPossible,
/* Output */
double *DSTXAfterScaler,
double *DSTYAfterScaler,
@@ -3892,12 +3895,32 @@ bool dml32_CalculatePrefetchSchedule(
/* Clamp to oto for bandwidth calculation */
LinesForPrefetchBandwidth = dst_y_prefetch_oto;
} else {
- *DestinationLinesForPrefetch = dst_y_prefetch_equ;
- TimeForFetchingMetaPTE = Tvm_equ;
- TimeForFetchingRowInVBlank = Tr0_equ;
- *PrefetchBandwidth = prefetch_bw_equ;
- /* Clamp to equ for bandwidth calculation */
- LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+ /* For mode programming we want to extend the prefetch as much as possible
+ * (up to oto, or as long as we can for equ) if we're not already applying
+ * the 60us prefetch requirement. This is to avoid intermittent underflow
+ * issues during prefetch.
+ *
+ * The prefetch extension is applied under the following scenarios:
+ * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
+ * 2. We're using subvp or drr methods of p-state switch, in which case we
+ * we don't care if prefetch takes up more of the blanking time
+ *
+ * Mode programming typically chooses the smallest prefetch time possible
+ * (i.e. highest bandwidth during prefetch) presumably to create margin between
+ * p-states / c-states that happen in vblank and prefetch. Therefore we only
+ * apply this prefetch extension when p-state in vblank is not required (UCLK
+ * p-states take up the most vblank time).
+ */
+ if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
+ MyError = true;
+ } else {
+ *DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ TimeForFetchingMetaPTE = Tvm_equ;
+ TimeForFetchingRowInVBlank = Tr0_equ;
+ *PrefetchBandwidth = prefetch_bw_equ;
+ /* Clamp to equ for bandwidth calculation */
+ LinesForPrefetchBandwidth = dst_y_prefetch_equ;
+ }
}
*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
@@ -4078,12 +4101,9 @@ bool dml32_CalculatePrefetchSchedule(
if (MyError) {
*PrefetchBandwidth = 0;
- TimeForFetchingMetaPTE = 0;
- TimeForFetchingRowInVBlank = 0;
*DestinationLinesToRequestVMInVBlank = 0;
*DestinationLinesToRequestRowInVBlank = 0;
*DestinationLinesForPrefetch = 0;
- LinesToRequestPrefetchPixelData = 0;
*VRatioPrefetchY = 0;
*VRatioPrefetchC = 0;
*RequiredPrefetchPixDataBWLuma = 0;
@@ -4270,7 +4290,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
unsigned int i, j, k;
unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
unsigned int DRAMClockChangeSupportNumber = 0;
- unsigned int LastSurfaceWithoutMargin;
+ unsigned int LastSurfaceWithoutMargin = 0;
unsigned int DRAMClockChangeMethod = 0;
bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
double MinActiveFCLKChangeMargin = 0.;
@@ -4661,10 +4681,6 @@ void dml32_CalculateMinAndMaxPrefetchMode(
} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
*MinPrefetchMode = 0;
*MaxPrefetchMode = 0;
- } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
- dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
- *MinPrefetchMode = 0;
- *MaxPrefetchMode = 3;
} else {
*MinPrefetchMode = 0;
*MaxPrefetchMode = 3;
@@ -5639,9 +5655,9 @@ void dml32_CalculateStutterEfficiency(
double LastZ8StutterPeriod = 0.0;
double LastStutterPeriod = 0.0;
unsigned int TotalNumberOfActiveOTG = 0;
- double doublePixelClock;
- unsigned int doubleHTotal;
- unsigned int doubleVTotal;
+ double doublePixelClock = 0;
+ unsigned int doubleHTotal = 0;
+ unsigned int doubleVTotal = 0;
bool SameTiming = true;
double DETBufferingTimeY;
double SwathWidthYCriticalSurface = 0.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 592d174df6c6..5d34735df83d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -747,6 +747,7 @@ bool dml32_CalculatePrefetchSchedule(
unsigned int SwathHeightC,
double TWait,
double TPreReq,
+ bool ExtendPrefetchIfPossible,
/* Output */
double *DSTXAfterScaler,
double *DSTYAfterScaler,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
index 9ba6cb67655f..6c75aa82327a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
@@ -139,7 +139,6 @@ void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs,
if (dual_plane) {
unsigned int p1_pte_row_height_linear = get_dpte_row_height_linear_c(mode_lib, e2e_pipe_param,
num_pipes, pipe_idx);
- ;
if (src->sw_mode == dm_sw_linear)
ASSERT(p1_pte_row_height_linear >= 8);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index b26fcf86014c..e0a1dc89ce43 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -139,8 +139,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
.pct_ideal_sdp_bw_after_urgent = 90.0,
.pct_ideal_fabric_bw_after_urgent = 67.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
.pct_ideal_dram_bw_after_urgent_strobe = 67.0,
.max_avg_sdp_bw_use_normal_percent = 80.0,
.max_avg_fabric_bw_use_normal_percent = 60.0,
@@ -616,12 +616,14 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
/* Override from passed dc->bb_overrides if available*/
if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
&& dc->bb_overrides.sr_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
}
if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000)
!= dc->bb_overrides.sr_enter_plus_exit_time_ns
&& dc->bb_overrides.sr_enter_plus_exit_time_ns) {
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
dcn3_21_soc.sr_enter_plus_exit_time_us =
dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
}
@@ -629,12 +631,14 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns
&& dc->bb_overrides.urgent_latency_ns) {
dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
+ dc->dml2_options.bbox_overrides.urgent_latency_us =
dcn3_21_soc.urgent_latency_pixel_data_only_us = dc->bb_overrides.urgent_latency_ns / 1000.0;
}
if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000)
!= dc->bb_overrides.dram_clock_change_latency_ns
&& dc->bb_overrides.dram_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
dcn3_21_soc.dram_clock_change_latency_us =
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
}
@@ -642,6 +646,7 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000)
!= dc->bb_overrides.fclk_clock_change_latency_ns
&& dc->bb_overrides.fclk_clock_change_latency_ns) {
+ dc->dml2_options.bbox_overrides.fclk_change_latency_us =
dcn3_21_soc.fclk_change_latency_us =
dc->bb_overrides.fclk_clock_change_latency_ns / 1000;
}
@@ -659,14 +664,17 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) {
if (bb_info.dram_clock_change_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us =
dcn3_21_soc.dram_clock_change_latency_us =
bb_info.dram_clock_change_latency_100ns * 10;
if (bb_info.dram_sr_enter_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us =
dcn3_21_soc.sr_enter_plus_exit_time_us =
bb_info.dram_sr_enter_exit_latency_100ns * 10;
if (bb_info.dram_sr_exit_latency_100ns > 0)
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us =
dcn3_21_soc.sr_exit_time_us =
bb_info.dram_sr_exit_latency_100ns * 10;
}
@@ -674,12 +682,14 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
/* Override from VBIOS for num_chan */
if (dc->ctx->dc_bios->vram_info.num_chans) {
+ dc->dml2_options.bbox_overrides.dram_num_chan =
dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans;
dcn3_21_soc.mall_allocated_for_dcn_mbytes = (double)(dcn32_calc_num_avail_chans_for_mall(dc,
dc->ctx->dc_bios->vram_info.num_chans) * dc->caps.mall_size_per_mem_channel);
}
if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
+ dc->dml2_options.bbox_overrides.dram_chanel_width_bytes =
dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
/* DML DSC delay factor workaround */
@@ -690,6 +700,10 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
/* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.disp_pll_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ dc->dml2_options.bbox_overrides.xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000.0;
+ dc->dml2_options.bbox_overrides.dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
+ dc->dml2_options.bbox_overrides.dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000.0;
/* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */
if (dc->debug.use_legacy_soc_bb_mechanism) {
@@ -765,7 +779,7 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
j = 0;
// create the final dcfclk and uclk table
while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
} else {
@@ -789,6 +803,16 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16;
}
+ /* bw_params->clk_table.entries[MAX_NUM_DPM_LVL].
+ * MAX_NUM_DPM_LVL is 8.
+ * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES].
+ * DC__VOLTAGE_STATES is 40.
+ */
+ if (num_states > MAX_NUM_DPM_LVL) {
+ ASSERT(0);
+ return;
+ }
+
dcn3_21_soc.num_states = num_states;
for (i = 0; i < dcn3_21_soc.num_states; i++) {
dcn3_21_soc.clock_limits[i].state = i;
@@ -836,5 +860,72 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
if (dc->current_state)
dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
+
+ if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) {
+ unsigned int i = 0;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_states = dc->clk_mgr->bw_params->clk_table.num_entries;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels;
+
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dppclk_levels;
+
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz)
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz;
+ }
+
+ for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) {
+ if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) {
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
+ }
+ }
+ }
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
new file mode 100644
index 000000000000..817a370e80a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#include "resource.h"
+#include "dcn35_fpu.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn35/dcn35_resource.h"
+#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dml_inline_defs.h"
+
+#include "link_service.h"
+
+#define DC_LOGGER_INIT(logger)
+
+struct _vcs_dpi_ip_params_st dcn3_5_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 1536,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,/*not used*/
+ .opp_output_buffer_lines = 1,/*not used*/
+ .pixel_chunk_size_kbytes = 8,
+ //.alpha_pixel_chunk_size_kbytes = 4;/*new*/
+ //.min_pixel_chunk_size_bytes = 1024;/*new*/
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,/*delta from 10*/
+ .dsc422_native_support = true,/*delta from false*/
+ .is_line_buffer_bpp_fixed = true,/*new*/
+ .line_buffer_fixed_bpp = 32,/*delta from 48*/
+ .line_buffer_size_bits = 986880,/*delta from 789504*/
+ .max_line_buffer_lines = 32,/*delta from 12*/
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ /*.max_num_hdmi_frl_outputs = 1; new in dml2*/
+ /*.max_num_dp2p0_outputs = 2; new in dml2*/
+ /*.max_num_dp2p0_streams = 4; new in dml2*/
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 68,/*changed from 64,*/
+ .dpte_buffer_size_in_pte_reqs_chroma = 36,/*changed from 34*/
+ /*.dcc_meta_buffer_size_bytes = 6272; new to dml2*/
+ .dispclk_ramp_margin_percent = 1.11,/*delta from 1*/
+ /*.dppclk_delay_subtotal = 47;
+ .dppclk_delay_scl = 50;
+ .dppclk_delay_scl_lb_only = 16;
+ .dppclk_delay_cnvc_formatter = 28;
+ .dppclk_delay_cnvc_cursor = 6;
+ .dispclk_delay_subtotal = 125;*/ /*new to dml2*/
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47, /* changed from 46,*/
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,/*changed from 27,*/
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125, /*changed from 119,*/
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+// .config_return_buffer_segment_size_in_kbytes = 64;/*required, hard coded in dml2_translate_ip_params*/
+
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
+ .fclk_change_latency_us = 24.0,
+ .usr_retraining_latency_us = 2,
+ .writeback_latency_us = 12.0,
+
+ .dram_channel_width_bytes = 4,/*not exist in dml2*/
+ .round_trip_ping_latency_dcfclk_cycles = 106,/*not exist in dml2*/
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .dram_clock_change_latency_us = 34.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_fabric_bw_after_urgent = 80.0, /*new to dml2*/
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = 0,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
+void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
+{
+ //TODO
+}
+
+
+/*
+ * dcn35_update_bw_bounding_box
+ *
+ * This would override some dcn3_5 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params)
+{
+ unsigned int i, closest_clk_lvl;
+ int j;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dc->scratch.update_bw_bounding_box.clock_limits;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dc_assert_fp_enabled();
+
+ dcn3_5_ip.max_num_otg =
+ dc->res_pool->res_cap->num_timing_generator;
+ dcn3_5_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_5_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_5_soc.num_states - 1;
+ j >= 0; j--) {
+ if (dcn3_5_soc.clock_limits[j].dcfclk_mhz <=
+ clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_5_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz <
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+
+ clock_limits[i].fabricclk_mhz =
+ clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz =
+ clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz &&
+ clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts =
+ clk_table->entries[i].memclk_mhz * 2 *
+ clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ?
+ max_dispclk_mhz :
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ?
+ max_dppclk_mhz :
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz =
+ dcn3_5_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+
+ memcpy(dcn3_5_soc.clock_limits, clock_limits,
+ sizeof(dcn3_5_soc.clock_limits));
+
+ if (clk_table->num_entries)
+ dcn3_5_soc.num_states = clk_table->num_entries;
+
+ if (max_dispclk_mhz) {
+ dcn3_5_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+ if ((int)(dcn3_5_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_5_soc.dram_clock_change_latency_us =
+ dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+ dcn3_5_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_time_ns > 0)
+ dcn3_5_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+ dcn3_5_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+ dcn3_5_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+ dcn3_5_soc.sr_enter_plus_exit_z8_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip,
+ DML_PROJECT_DCN31);
+
+ /*copy to dml2, before dml2_create*/
+ if (clk_table->num_entries > 2) {
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ dc->dml2_options.bbox_overrides.clks_table.num_states =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ clock_limits[i].dcfclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ clock_limits[i].fabricclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ clock_limits[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ clock_limits[i].dppclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ clock_limits[i].socclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dram_speed_mts = clock_limits[i].dram_speed_mts;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
+ }
+ }
+
+ /* Update latency values */
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us;
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
+ format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+/*
+ * micro_sec_to_vert_lines () - converts time to number of vertical lines for a given timing
+ *
+ * @param: num_us: number of microseconds
+ * @return: number of vertical lines. If exact number of vertical lines is not found then
+ * it will round up to next number of lines to guarantee num_us
+ */
+static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_timing *timing)
+{
+ unsigned int num_lines = 0;
+ unsigned int lines_time_in_ns = 1000.0 *
+ (((float)timing->h_total * 1000.0) /
+ ((float)timing->pix_clk_100hz / 10.0));
+
+ num_lines = dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+
+ return num_lines;
+}
+
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+ unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+ v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+ v_blank = timing->v_total - v_active;
+ v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+ return v_back_porch;
+}
+
+int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe = 0;
+ bool upscaled = false;
+ const unsigned int max_allowed_vblank_nom = 1023;
+
+ dc_assert_fp_enabled();
+
+ dcn31_populate_dml_pipes_from_context(dc, context, pipes,
+ validate_mode);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+ unsigned int num_lines = 0;
+ unsigned int v_back_porch = 0;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ num_lines = micro_sec_to_vert_lines(dcn3_5_ip.VBlankNomDefaultUS, timing);
+ v_back_porch = get_vertical_back_porch(timing);
+
+ if (pipe->stream->adjust.v_total_max ==
+ pipe->stream->adjust.v_total_min &&
+ pipe->stream->adjust.v_total_min > timing->v_total) {
+ pipes[pipe_cnt].pipe.dest.vtotal =
+ pipe->stream->adjust.v_total_min;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total -
+ pipes[pipe_cnt].pipe.dest.vactive;
+ }
+
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
+ // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of vblank to vstartup signal
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height <
+ pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width <
+ pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the
+ * plane. We need to require support for immediate flip or
+ * underflow can be intermittently experienced depending on peak
+ * b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+
+ dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+ pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 384;/*per guide*/
+ dc->config.enable_4to1MPC = false;
+
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 &&
+ pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) &&
+ pipe->plane_state->src_rect.width <= 5120) {
+ /*
+ * Limit to 5k max to avoid forced pipe split when there
+ * is not enough detile for swath
+ */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >=
+ dc->debug.crb_alloc_policy_min_disp_count &&
+ dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes =
+ dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP &&
+ dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode ==
+ dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy =
+ dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
+
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+ unsigned int i, plane_count = 0;
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ dc_assert_fp_enabled();
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ if (context->stream_count == 0 || plane_count == 0) {
+ support = DCN_ZSTATE_SUPPORT_ALLOW;
+ } else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+ bool is_pwrseq0 = link && link->link_index == 0;
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
+ int minmum_z8_residency =
+ dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+ bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+ int minmum_z10_residency =
+ dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000;
+ bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency;
+
+ /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
+ if (is_pwrseq0 && allow_z10)
+ support = DCN_ZSTATE_SUPPORT_ALLOW;
+ else if (is_pwrseq0 && (is_psr || is_replay))
+ support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY;
+ else if (allow_z8)
+ support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+
+ }
+
+ DC_LOG_SMU("zstate_support: %d, StutterPeriod: %d\n", support,
+ (int)context->bw_ctx.dml.vba.StutterPeriod);
+
+ context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
new file mode 100644
index 000000000000..d121c5afce71
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_FPU_H__
+#define __DCN35_FPU_H__
+
+#include "clk_mgr.h"
+
+void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr);
+
+void dcn35_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params);
+
+int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+
+void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
new file mode 100644
index 000000000000..77023b619f1e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
@@ -0,0 +1,639 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+#include "resource.h"
+#include "dcn351_fpu.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn35/dcn35_resource.h"
+#include "dcn351/dcn351_resource.h"
+#include "dml/dcn31/dcn31_fpu.h"
+#include "dml/dcn35/dcn35_fpu.h"
+#include "dml/dml_inline_defs.h"
+
+#include "link_service.h"
+
+#define DC_LOGGER_INIT(logger)
+
+struct _vcs_dpi_ip_params_st dcn3_51_ip = {
+ .VBlankNomDefaultUS = 668,
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = 1536,
+ .config_return_buffer_size_in_kbytes = 1792,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,/*not used*/
+ .opp_output_buffer_lines = 1,/*not used*/
+ .pixel_chunk_size_kbytes = 8,
+ //.alpha_pixel_chunk_size_kbytes = 4;/*new*/
+ //.min_pixel_chunk_size_bytes = 1024;/*new*/
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,/*delta from 10*/
+ .dsc422_native_support = true,/*delta from false*/
+ .is_line_buffer_bpp_fixed = true,/*new*/
+ .line_buffer_fixed_bpp = 32,/*delta from 48*/
+ .line_buffer_size_bits = 986880,/*delta from 789504*/
+ .max_line_buffer_lines = 32,/*delta from 12*/
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ /*.max_num_hdmi_frl_outputs = 1; new in dml2*/
+ /*.max_num_dp2p0_outputs = 2; new in dml2*/
+ /*.max_num_dp2p0_streams = 4; new in dml2*/
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 68,/*changed from 64,*/
+ .dpte_buffer_size_in_pte_reqs_chroma = 36,/*changed from 34*/
+ /*.dcc_meta_buffer_size_bytes = 6272; new to dml2*/
+ .dispclk_ramp_margin_percent = 1.11,/*delta from 1*/
+ /*.dppclk_delay_subtotal = 47;
+ .dppclk_delay_scl = 50;
+ .dppclk_delay_scl_lb_only = 16;
+ .dppclk_delay_cnvc_formatter = 28;
+ .dppclk_delay_cnvc_cursor = 6;
+ .dispclk_delay_subtotal = 125;*/ /*new to dml2*/
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 47, /* changed from 46,*/
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,/*changed from 27,*/
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 125, /*changed from 119,*/
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+// .config_return_buffer_segment_size_in_kbytes = 64;/*required, hard coded in dml2_translate_ip_params*/
+
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .socclk_mhz = 600.0,
+ .dram_speed_mts = 3200.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 600.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 200.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dcfclk_mhz = 600.0,
+ .fabricclk_mhz = 1000.0,
+ .socclk_mhz = 733.0,
+ .dram_speed_mts = 6400.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 266.7,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dcfclk_mhz = 738.0,
+ .fabricclk_mhz = 1200.0,
+ .socclk_mhz = 880.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 266.7,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dcfclk_mhz = 800.0,
+ .fabricclk_mhz = 1400.0,
+ .socclk_mhz = 978.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 960.0,
+ .dppclk_mhz = 960.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 320.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dcfclk_mhz = 873.0,
+ .fabricclk_mhz = 1600.0,
+ .socclk_mhz = 1100.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1066.7,
+ .dppclk_mhz = 1066.7,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 355.6,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 960.0,
+ .fabricclk_mhz = 1700.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1200.0,
+ .dppclk_mhz = 1200.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 1067.0,
+ .fabricclk_mhz = 1850.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1371.4,
+ .dppclk_mhz = 1371.4,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 457.1,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 2000.0,
+ .socclk_mhz = 1467.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1600.0,
+ .dppclk_mhz = 1600.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 533.3,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 8,
+ .sr_exit_time_us = 28.0,
+ .sr_enter_plus_exit_time_us = 30.0,
+ .sr_exit_z8_time_us = 263.0,
+ .sr_enter_plus_exit_z8_time_us = 363.0,
+ .fclk_change_latency_us = 24.0,
+ .usr_retraining_latency_us = 2,
+ .writeback_latency_us = 12.0,
+
+ .dram_channel_width_bytes = 4,/*not exist in dml2*/
+ .round_trip_ping_latency_dcfclk_cycles = 106,/*not exist in dml2*/
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .dram_clock_change_latency_us = 34,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_fabric_bw_after_urgent = 80.0, /*new to dml2*/
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = 0,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .num_chans = 4,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
+};
+
+/*
+ * dcn351_update_bw_bounding_box
+ *
+ * This would override some dcn3_51 ip_or_soc initial parameters hardcoded from
+ * spreadsheet with actual values as per dGPU SKU:
+ * - with passed few options from dc->config
+ * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might
+ * need to get it from PM FW)
+ * - with passed latency values (passed in ns units) in dc-> bb override for
+ * debugging purposes
+ * - with passed latencies from VBIOS (in 100_ns units) if available for
+ * certain dGPU SKU
+ * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU
+ * of the same ASIC)
+ * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM
+ * FW for different clocks (which might differ for certain dGPU SKU of the
+ * same ASIC)
+ */
+void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params)
+{
+ unsigned int i, closest_clk_lvl;
+ int j;
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st *clock_limits =
+ dc->scratch.update_bw_bounding_box.clock_limits;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+
+ dc_assert_fp_enabled();
+
+ dcn3_51_ip.max_num_otg =
+ dc->res_pool->res_cap->num_timing_generator;
+ dcn3_51_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_51_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_51_soc.num_states - 1;
+ j >= 0; j--) {
+ if (dcn3_51_soc.clock_limits[j].dcfclk_mhz <=
+ clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_51_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz <
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+
+ clock_limits[i].fabricclk_mhz =
+ clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz =
+ clk_table->entries[i].socclk_mhz;
+
+ if (clk_table->entries[i].memclk_mhz &&
+ clk_table->entries[i].wck_ratio)
+ clock_limits[i].dram_speed_mts =
+ clk_table->entries[i].memclk_mhz * 2 *
+ clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ?
+ max_dispclk_mhz :
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ?
+ max_dppclk_mhz :
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz =
+ dcn3_51_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+
+ memcpy(dcn3_51_soc.clock_limits, clock_limits,
+ sizeof(dcn3_51_soc.clock_limits));
+
+ if (clk_table->num_entries)
+ dcn3_51_soc.num_states = clk_table->num_entries;
+
+ if (max_dispclk_mhz) {
+ dcn3_51_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+ if ((int)(dcn3_51_soc.dram_clock_change_latency_us * 1000)
+ != dc->debug.dram_clock_change_latency_ns
+ && dc->debug.dram_clock_change_latency_ns) {
+ dcn3_51_soc.dram_clock_change_latency_us =
+ dc->debug.dram_clock_change_latency_ns / 1000.0;
+ }
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns > 0)
+ dcn3_51_soc.dram_clock_change_latency_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_time_ns > 0)
+ dcn3_51_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns > 0)
+ dcn3_51_soc.sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_exit_z8_time_ns > 0)
+ dcn3_51_soc.sr_exit_z8_time_us = dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns > 0)
+ dcn3_51_soc.sr_enter_plus_exit_z8_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_51_soc, &dcn3_51_ip,
+ DML_PROJECT_DCN31);
+
+ /*copy to dml2, before dml2_create*/
+ if (clk_table->num_entries > 2) {
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ dc->dml2_options.bbox_overrides.clks_table.num_states =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
+ clock_limits[i].dcfclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
+ clock_limits[i].fabricclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
+ clock_limits[i].dispclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
+ clock_limits[i].dppclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
+ clock_limits[i].socclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
+ clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dram_speed_mts = clock_limits[i].dram_speed_mts;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
+ clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
+ }
+ }
+
+ /* Update latency values */
+ dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_51_soc.dram_clock_change_latency_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_51_soc.sr_exit_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_51_soc.sr_enter_plus_exit_time_us;
+
+ dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_51_soc.sr_exit_z8_time_us;
+ dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_51_soc.sr_enter_plus_exit_z8_time_us;
+}
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+ return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN ||
+ format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+/*
+ * micro_sec_to_vert_lines () - converts time to number of vertical lines for a given timing
+ *
+ * @param: num_us: number of microseconds
+ * @return: number of vertical lines. If exact number of vertical lines is not found then
+ * it will round up to next number of lines to guarantee num_us
+ */
+static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_timing *timing)
+{
+ unsigned int num_lines = 0;
+ unsigned int lines_time_in_ns = 1000.0 *
+ (((float)timing->h_total * 1000.0) /
+ ((float)timing->pix_clk_100hz / 10.0));
+
+ num_lines = dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+
+ return num_lines;
+}
+
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+ unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+ v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+ v_blank = timing->v_total - v_active;
+ v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+ return v_back_porch;
+}
+
+int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe = 0;
+ bool upscaled = false;
+ const unsigned int max_allowed_vblank_nom = 1023;
+
+ dc_assert_fp_enabled();
+
+ dcn31_populate_dml_pipes_from_context(dc, context, pipes,
+ validate_mode);
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_crtc_timing *timing;
+ unsigned int num_lines = 0;
+ unsigned int v_back_porch = 0;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ pipe = &res_ctx->pipe_ctx[i];
+ timing = &pipe->stream->timing;
+
+ num_lines = micro_sec_to_vert_lines(dcn3_51_ip.VBlankNomDefaultUS, timing);
+ v_back_porch = get_vertical_back_porch(timing);
+
+ if (pipe->stream->adjust.v_total_max ==
+ pipe->stream->adjust.v_total_min &&
+ pipe->stream->adjust.v_total_min > timing->v_total) {
+ pipes[pipe_cnt].pipe.dest.vtotal =
+ pipe->stream->adjust.v_total_min;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total -
+ pipes[pipe_cnt].pipe.dest.vactive;
+ }
+
+ pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
+ // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of vblank to vstartup signal
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
+ pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
+
+ if (pipe->plane_state &&
+ (pipe->plane_state->src_rect.height <
+ pipe->plane_state->dst_rect.height ||
+ pipe->plane_state->src_rect.width <
+ pipe->plane_state->dst_rect.width))
+ upscaled = true;
+
+ /*
+ * Immediate flip can be set dynamically after enabling the
+ * plane. We need to require support for immediate flip or
+ * underflow can be intermittently experienced depending on peak
+ * b/w requirements.
+ */
+ pipes[pipe_cnt].pipe.src.immediate_flip = true;
+
+ pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+
+ dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+
+ pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ pipes[pipe_cnt].pipe.src.dcc_rate = 3;
+ pipes[pipe_cnt].dout.dsc_input_bpc = 0;
+ pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256;
+
+ if (pipes[pipe_cnt].dout.dsc_enable) {
+ switch (timing->display_color_depth) {
+ case COLOR_DEPTH_888:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ pipes[pipe_cnt].dout.dsc_input_bpc = 12;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ pipe_cnt++;
+ }
+
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 384;/*per guide*/
+ dc->config.enable_4to1MPC = false;
+
+ if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
+ if (is_dual_plane(pipe->plane_state->format)
+ && pipe->plane_state->src_rect.width <= 1920 &&
+ pipe->plane_state->src_rect.height <= 1080) {
+ dc->config.enable_4to1MPC = true;
+ } else if (!is_dual_plane(pipe->plane_state->format) &&
+ pipe->plane_state->src_rect.width <= 5120) {
+ /*
+ * Limit to 5k max to avoid forced pipe split when there
+ * is not enough detile for swath
+ */
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ pipes[0].pipe.src.unbounded_req_mode = true;
+ }
+ } else if (context->stream_count >=
+ dc->debug.crb_alloc_policy_min_disp_count &&
+ dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes =
+ dc->debug.crb_alloc_policy * 64;
+ } else if (context->stream_count >= 3 && upscaled) {
+ context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->stream->signal == SIGNAL_TYPE_EDP &&
+ dc->debug.seamless_boot_odm_combine &&
+ pipe->stream->apply_seamless_boot_optimization) {
+
+ if (pipe->stream->apply_boot_odm_mode ==
+ dm_odm_combine_policy_2to1) {
+ context->bw_ctx.dml.vba.ODMCombinePolicy =
+ dm_odm_combine_policy_2to1;
+ break;
+ }
+ }
+ }
+
+ return pipe_cnt;
+}
+
+void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW;
+ unsigned int i, plane_count = 0;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ /*dcn351 does not support z9/z10*/
+ if (context->stream_count == 0 || plane_count == 0) {
+ support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+ } else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+ bool is_pwrseq0 = link && link->link_index == 0;
+ bool is_psr = (link && (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) && !link->panel_config.psr.disable_psr);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
+ int minmum_z8_residency =
+ dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
+ bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
+
+ /*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
+ if (is_pwrseq0 && (is_psr || is_replay))
+ support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW;
+ }
+ context->bw_ctx.bw.dcn.clk.zstate_support = support;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h
new file mode 100644
index 000000000000..f71d9d8d0759
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef __DCN351_FPU_H__
+#define __DCN351_FPU_H__
+
+#include "clk_mgr.h"
+
+void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
+ struct clk_bw_params *bw_params);
+
+int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode);
+
+void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index fb17f8868cb4..641a8cd019cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -523,6 +523,7 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned int vupdate_offset;
unsigned int vupdate_width;
unsigned int vready_offset;
+ unsigned int pstate_keepout;
unsigned char interlaced;
double pixel_rate_mhz;
unsigned char synchronized_vblank_all_planes;
@@ -632,6 +633,7 @@ struct _vcs_dpi_display_dlg_regs_st {
unsigned int ref_freq_to_pix_freq;
unsigned int vratio_prefetch;
unsigned int vratio_prefetch_c;
+ unsigned int refcyc_per_tdlut_group;
unsigned int refcyc_per_pte_group_vblank_l;
unsigned int refcyc_per_pte_group_vblank_c;
unsigned int refcyc_per_meta_chunk_vblank_l;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 9a3ded311195..85453bbb4f9b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -1099,8 +1099,13 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib)
// Total Available Pipes Support Check
for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
- total_pipes += mode_lib->vba.DPPPerPlane[k];
pipe_idx = get_pipe_idx(mode_lib, k);
+ if (pipe_idx == -1) {
+ ASSERT(0);
+ continue; // skip inactive planes
+ }
+ total_pipes += mode_lib->vba.DPPPerPlane[k];
+
if (mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz > 0.0)
mode_lib->vba.DPPCLK[k] = mode_lib->vba.cache_pipes[pipe_idx].clks_cfg.dppclk_mhz;
else
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
index 412e75eb4704..12ff65b6a7e5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c
@@ -122,17 +122,6 @@ void print__data_rq_misc_params_st(struct display_mode_lib *mode_lib, const stru
dml_print("DML_RQ_DLG_CALC: =====================================\n");
}
-void print__rq_dlg_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_rq_dlg_params_st *rq_dlg_param)
-{
- dml_print("DML_RQ_DLG_CALC: =====================================\n");
- dml_print("DML_RQ_DLG_CALC: DISPLAY_RQ_DLG_PARAM_ST\n");
- dml_print("DML_RQ_DLG_CALC: <LUMA>\n");
- print__data_rq_dlg_params_st(mode_lib, &rq_dlg_param->rq_l);
- dml_print("DML_RQ_DLG_CALC: <CHROMA>\n");
- print__data_rq_dlg_params_st(mode_lib, &rq_dlg_param->rq_c);
- dml_print("DML_RQ_DLG_CALC: =====================================\n");
-}
-
void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_dlg_sys_params_st *dlg_sys_param)
{
dml_print("DML_RQ_DLG_CALC: =====================================\n");
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
index ebcd717744e5..2bc64c4081dc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.h
@@ -35,7 +35,6 @@ void print__rq_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dp
void print__data_rq_sizing_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_sizing_params_st *rq_sizing);
void print__data_rq_dlg_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_dlg_params_st *rq_dlg_param);
void print__data_rq_misc_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_misc_params_st *rq_misc_param);
-void print__rq_dlg_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_rq_dlg_params_st *rq_dlg_param);
void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_dlg_sys_params_st *dlg_sys_param);
void print__data_rq_regs_st(struct display_mode_lib *mode_lib, const struct _vcs_dpi_display_data_rq_regs_st *rq_regs);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
index 3df559c591f8..88dc2b97e7bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c
@@ -39,7 +39,7 @@
static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma)
{
- unsigned int ret_val = 0;
+ unsigned int ret_val = 1;
if (source_format == dm_444_16) {
if (!is_chroma)
@@ -559,12 +559,11 @@ static void get_surf_rq_param(
const struct _vcs_dpi_display_pipe_source_params_st *pipe_src_param,
bool is_chroma)
{
- bool mode_422 = 0;
unsigned int vp_width = 0;
unsigned int vp_height = 0;
unsigned int data_pitch = 0;
unsigned int meta_pitch = 0;
- unsigned int ppe = mode_422 ? 2 : 1;
+ unsigned int ppe = 1;
bool surf_linear;
bool surf_vert;
unsigned int bytes_per_element;
@@ -1596,11 +1595,6 @@ void dml1_rq_dlg_get_dlg_params(
swath_width_pixels_ub_c = swath_width_ub_c * 1;
}
- hscale_pixel_rate_l = 0.;
- hscale_pixel_rate_c = 0.;
- min_hratio_fact_l = 1.0;
- min_hratio_fact_c = 1.0;
-
if (htaps_l <= 1)
min_hratio_fact_l = 2.0;
else if (htaps_l <= 6) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index 072bd0539605..6b2ab4ec2b5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -66,11 +66,15 @@ static inline double dml_max5(double a, double b, double c, double d, double e)
static inline double dml_ceil(double a, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_ceil2(a, granularity);
}
static inline double dml_floor(double a, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_floor2(a, granularity);
}
@@ -114,11 +118,15 @@ static inline double dml_ceil_2(double f)
static inline double dml_ceil_ex(double x, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_ceil2(x, granularity);
}
static inline double dml_floor_ex(double x, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_floor2(x, granularity);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
new file mode 100644
index 000000000000..4c21ce42054c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -0,0 +1,141 @@
+# SPDX-License-Identifier: MIT */
+#
+# Copyright 2023 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+# Makefile for dml2.
+
+dml2_ccflags := $(CC_FLAGS_FPU)
+dml2_rcflags := $(CC_FLAGS_NO_FPU)
+
+ifneq ($(CONFIG_FRAME_WARN),0)
+ ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+ ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy)
+ frame_warn_limit := 4096
+ else
+ frame_warn_limit := 3072
+ endif
+ else
+ frame_warn_limit := 2048
+ endif
+
+ ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y)
+ frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit)
+ endif
+endif
+
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_core
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_mcg/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_dpmm/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_pmo/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_standalone_libraries/
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/inc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/inc
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/
+
+CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_wrapper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_utils.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_policy.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_translation_helper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_mall_phantom.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml_display_rq_dlg_calc.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml2_dc_resource_mgmt.o := $(dml2_ccflags)
+
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/display_mode_core.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/display_mode_util.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_wrapper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_utils.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_policy.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_translation_helper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_mall_phantom.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml_display_rq_dlg_calc.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml2_dc_resource_mgmt.o := $(dml2_rcflags)
+
+DML2 = display_mode_core.o display_mode_util.o dml2_wrapper.o \
+ dml2_utils.o dml2_policy.o dml2_translation_helper.o dml2_dc_resource_mgmt.o dml2_mall_phantom.o \
+ dml_display_rq_dlg_calc.o
+
+AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2/,$(DML2))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DML2)
+
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_ccflags) $(frame_warn_flag)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags)
+
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags)
+
+DML21 := src/dml2_top/dml2_top_interfaces.o
+DML21 += src/dml2_top/dml2_top_soc15.o
+DML21 += src/dml2_core/dml2_core_dcn4.o
+DML21 += src/dml2_core/dml2_core_utils.o
+DML21 += src/dml2_core/dml2_core_factory.o
+DML21 += src/dml2_core/dml2_core_dcn4_calcs.o
+DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o
+DML21 += src/dml2_dpmm/dml2_dpmm_factory.o
+DML21 += src/dml2_mcg/dml2_mcg_dcn4.o
+DML21 += src/dml2_mcg/dml2_mcg_factory.o
+DML21 += src/dml2_pmo/dml2_pmo_dcn3.o
+DML21 += src/dml2_pmo/dml2_pmo_factory.o
+DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o
+DML21 += src/dml2_standalone_libraries/lib_float_math.o
+DML21 += dml21_translation_helper.o
+DML21 += dml21_wrapper.o
+DML21 += dml21_utils.o
+
+AMD_DAL_DML21 = $(addprefix $(AMDDALPATH)/dc/dml2/dml21/,$(DML21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DML21)
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h b/drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h
new file mode 100644
index 000000000000..e450445bc05d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/cmntypes.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __CMNTYPES_H__
+#define __CMNTYPES_H__
+
+#ifdef __GNUC__
+#if __GNUC__ == 4 && __GNUC_MINOR__ > 7
+typedef unsigned int uint;
+#endif
+#endif
+
+typedef signed char int8, *pint8;
+typedef signed short int16, *pint16;
+typedef signed int int32, *pint32;
+typedef signed int64, *pint64;
+
+typedef unsigned char uint8, *puint8;
+typedef unsigned short uint16, *puint16;
+typedef unsigned int uint32, *puint32;
+typedef unsigned uint64, *puint64;
+
+typedef unsigned long int ulong;
+typedef unsigned char uchar;
+typedef unsigned int uint;
+
+typedef void *pvoid;
+typedef char *pchar;
+typedef const void *const_pvoid;
+typedef const char *const_pchar;
+
+typedef struct rgba_struct {
+ uint8 a;
+ uint8 r;
+ uint8 g;
+ uint8 b;
+} rgba_t;
+
+typedef struct {
+ uint8 blue;
+ uint8 green;
+ uint8 red;
+ uint8 alpha;
+} gen_color_t;
+
+typedef union {
+ uint32 val;
+ gen_color_t f;
+} gen_color_u;
+
+//
+// Types to make it easy to get or set the bits of a float/double.
+// Avoids automatic casting from int to float and back.
+//
+#if 0
+typedef union {
+ uint32 i;
+ float f;
+} uintfloat32;
+
+typedef union {
+ uint64 i;
+ double f;
+} uintfloat64;
+
+#ifndef UNREFERENCED_PARAMETER
+#define UNREFERENCED_PARAMETER(x) x = x
+#endif
+#endif
+
+#endif //__CMNTYPES_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
new file mode 100644
index 000000000000..4b9b2e84d381
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -0,0 +1,10335 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "display_mode_core.h"
+#include "display_mode_util.h"
+#include "display_mode_lib_defines.h"
+
+#include "dml_assert.h"
+
+#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
+#define TB_BORROWED_MAX 400
+#define DML_MAX_VSTARTUP_START 1023
+
+// ---------------------------
+// Declaration Begins
+// ---------------------------
+static void CalculateBytePerPixelAndBlockSizes(
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+ // Output
+ dml_uint_t *BytePerPixelY,
+ dml_uint_t *BytePerPixelC,
+ dml_float_t *BytePerPixelDETY,
+ dml_float_t *BytePerPixelDETC,
+ dml_uint_t *BlockHeight256BytesY,
+ dml_uint_t *BlockHeight256BytesC,
+ dml_uint_t *BlockWidth256BytesY,
+ dml_uint_t *BlockWidth256BytesC,
+ dml_uint_t *MacroTileHeightY,
+ dml_uint_t *MacroTileHeightC,
+ dml_uint_t *MacroTileWidthY,
+ dml_uint_t *MacroTileWidthC);
+
+static dml_float_t CalculateWriteBackDISPCLK(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t PixelClock,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackHTaps,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackSourceWidth,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t HTotal,
+ dml_uint_t WritebackLineBufferSize,
+ dml_float_t DISPCLKDPPCLKVCOSpeed);
+
+static void CalculateVMRowAndSwath(
+ struct display_mode_lib_scratch_st *s,
+ struct CalculateVMRowAndSwath_params_st *p);
+
+static void CalculateOutputLink(
+ dml_float_t PHYCLKPerState,
+ dml_float_t PHYCLKD18PerState,
+ dml_float_t PHYCLKD32PerState,
+ dml_float_t Downspreading,
+ dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClockBackEnd,
+ dml_float_t ForcedOutputLinkBPP,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t NumberOfDSCSlices,
+ dml_float_t AudioSampleRate,
+ dml_uint_t AudioSampleLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+ enum dml_dsc_enable DSCEnable,
+ dml_uint_t OutputLinkDPLanes,
+ enum dml_output_link_dp_rate OutputLinkDPRate,
+
+ // Output
+ dml_bool_t *RequiresDSC,
+ dml_bool_t *RequiresFEC,
+ dml_float_t *OutBpp,
+ enum dml_output_type_and_rate__type *OutputType,
+ enum dml_output_type_and_rate__rate *OutputRate,
+ dml_uint_t *RequiredSlots);
+
+static void CalculateODMMode(
+ dml_uint_t MaximumPixelsPerLinePerDSCUnit,
+ dml_uint_t HActive,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ enum dml_odm_use_policy ODMUse,
+ dml_float_t StateDispclk,
+ dml_float_t MaxDispclk,
+ dml_bool_t DSCEnable,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t MaxNumDPP,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_uint_t NumberOfDSCSlices,
+
+ // Output
+ dml_bool_t *TotalAvailablePipesSupport,
+ dml_uint_t *NumberOfDPP,
+ enum dml_odm_mode *ODMMode,
+ dml_float_t *RequiredDISPCLKPerSurface);
+
+static dml_float_t CalculateRequiredDispclk(
+ enum dml_odm_mode ODMMode,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t MaxDispclkSingle);
+
+static void CalculateSinglePipeDPPCLKAndSCLThroughput(
+ dml_float_t HRatio,
+ dml_float_t HRatioChroma,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t MaxDCHUBToPSCLThroughput,
+ dml_float_t MaxPSCLToLBThroughput,
+ dml_float_t PixelClock,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t HTaps,
+ dml_uint_t HTapsChroma,
+ dml_uint_t VTaps,
+ dml_uint_t VTapsChroma,
+
+ // Output
+ dml_float_t *PSCL_THROUGHPUT,
+ dml_float_t *PSCL_THROUGHPUT_CHROMA,
+ dml_float_t *DPPCLKUsingSingleDPP);
+
+static void CalculateDPPCLK(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t DPPCLKUsingSingleDPP[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_float_t *GlobalDPPCLK,
+ dml_float_t Dppclk[]);
+
+static void CalculateMALLUseForStaticScreen(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCNFinal,
+ enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t one_row_per_frame_fits_in_buffer[],
+
+ // Output
+ dml_bool_t UsesMALLForStaticScreen[]);
+
+static dml_uint_t dscceComputeDelay(
+ dml_uint_t bpc,
+ dml_float_t BPP,
+ dml_uint_t sliceWidth,
+ dml_uint_t numSlices,
+ enum dml_output_format_class pixelFormat,
+ enum dml_output_encoder_class Output);
+
+static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat,
+ enum dml_output_encoder_class Output);
+
+static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
+ struct CalculatePrefetchSchedule_params_st *p);
+
+static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed);
+
+static void CalculateDCCConfiguration(
+ dml_bool_t DCCEnabled,
+ dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceWidthLuma,
+ dml_uint_t SurfaceWidthChroma,
+ dml_uint_t SurfaceHeightLuma,
+ dml_uint_t SurfaceHeightChroma,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t RequestHeight256ByteLuma,
+ dml_uint_t RequestHeight256ByteChroma,
+ enum dml_swizzle_mode TilingFormat,
+ dml_uint_t BytePerPixelY,
+ dml_uint_t BytePerPixelC,
+ dml_float_t BytePerPixelDETY,
+ dml_float_t BytePerPixelDETC,
+ enum dml_rotation_angle SourceScan,
+ // Output
+ dml_uint_t *MaxUncompressedBlockLuma,
+ dml_uint_t *MaxUncompressedBlockChroma,
+ dml_uint_t *MaxCompressedBlockLuma,
+ dml_uint_t *MaxCompressedBlockChroma,
+ dml_uint_t *IndependentBlockLuma,
+ dml_uint_t *IndependentBlockChroma);
+
+static dml_uint_t CalculatePrefetchSourceLines(
+ dml_float_t VRatio,
+ dml_uint_t VTaps,
+ dml_bool_t Interlace,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+ dml_uint_t SwathHeight,
+ enum dml_rotation_angle SourceScan,
+ dml_bool_t ViewportStationary,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+
+ // Output
+ dml_uint_t *VInitPreFill,
+ dml_uint_t *MaxNumSwath);
+
+static dml_uint_t CalculateVMAndRowBytes(
+ dml_bool_t ViewportStationary,
+ dml_bool_t DCCEnable,
+ dml_uint_t NumberOfDPPs,
+ dml_uint_t BlockHeight256Bytes,
+ dml_uint_t BlockWidth256Bytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceTiling,
+ dml_uint_t BytePerPixel,
+ enum dml_rotation_angle SourceScan,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t GPUVMMinPageSizeKBytes,
+ dml_uint_t PTEBufferSizeInRequests,
+ dml_uint_t Pitch,
+ dml_uint_t DCCMetaPitch,
+ dml_uint_t MacroTileWidth,
+ dml_uint_t MacroTileHeight,
+
+ // Output
+ dml_uint_t *MetaRowByte,
+ dml_uint_t *PixelPTEBytesPerRow,
+ dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
+ dml_uint_t *dpte_row_width_ub,
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *dpte_row_height_linear,
+ dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
+ dml_uint_t *dpte_row_width_ub_one_row_per_frame,
+ dml_uint_t *dpte_row_height_one_row_per_frame,
+ dml_uint_t *MetaRequestWidth,
+ dml_uint_t *MetaRequestHeight,
+ dml_uint_t *meta_row_width,
+ dml_uint_t *meta_row_height,
+ dml_uint_t *PixelPTEReqWidth,
+ dml_uint_t *PixelPTEReqHeight,
+ dml_uint_t *PTERequestSize,
+ dml_uint_t *DPDE0BytesFrame,
+ dml_uint_t *MetaPTEBytesFrame);
+
+static dml_float_t CalculateTWait(
+ dml_uint_t PrefetchMode,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ dml_bool_t DRRDisplay,
+ dml_float_t DRAMClockChangeLatency,
+ dml_float_t FCLKChangeLatency,
+ dml_float_t UrgentLatency,
+ dml_float_t SREnterPlusExitTime);
+
+static void CalculatePrefetchMode(
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
+ dml_uint_t *MinPrefetchMode,
+ dml_uint_t *MaxPrefetchMode);
+
+static void CalculateRowBandwidth(
+ dml_bool_t GPUVMEnable,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_bool_t DCCEnable,
+ dml_float_t LineTime,
+ dml_uint_t MetaRowByteLuma,
+ dml_uint_t MetaRowByteChroma,
+ dml_uint_t meta_row_height_luma,
+ dml_uint_t meta_row_height_chroma,
+ dml_uint_t PixelPTEBytesPerRowLuma,
+ dml_uint_t PixelPTEBytesPerRowChroma,
+ dml_uint_t dpte_row_height_luma,
+ dml_uint_t dpte_row_height_chroma,
+ // Output
+ dml_float_t *meta_row_bw,
+ dml_float_t *dpte_row_bw);
+
+static void CalculateFlipSchedule(
+ dml_float_t HostVMInefficiencyFactor,
+ dml_float_t UrgentExtraLatency,
+ dml_float_t UrgentLatency,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_bool_t HostVMEnable,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t HostVMMinPageSize,
+ dml_float_t PDEAndMetaPTEBytesPerFrame,
+ dml_float_t MetaRowBytes,
+ dml_float_t DPTEBytesPerRow,
+ dml_float_t BandwidthAvailableForImmediateFlip,
+ dml_uint_t TotImmediateFlipBytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t LineTime,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t Tno_bw,
+ dml_bool_t DCCEnable,
+ dml_uint_t dpte_row_height,
+ dml_uint_t meta_row_height,
+ dml_uint_t dpte_row_height_chroma,
+ dml_uint_t meta_row_height_chroma,
+ dml_bool_t use_one_row_for_frame_flip,
+
+ // Output
+ dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
+ dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
+ dml_float_t *final_flip_bw,
+ dml_bool_t *ImmediateFlipSupportedForPipe);
+
+static dml_float_t CalculateWriteBackDelay(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t WritebackDestinationHeight,
+ dml_uint_t WritebackSourceHeight,
+ dml_uint_t HTotal);
+
+static void CalculateVUpdateAndDynamicMetadataParameters(
+ dml_uint_t MaxInterDCNTileRepeaters,
+ dml_float_t Dppclk,
+ dml_float_t DISPCLK,
+ dml_float_t DCFClkDeepSleep,
+ dml_float_t PixelClock,
+ dml_uint_t HTotal,
+ dml_uint_t VBlank,
+ dml_uint_t DynamicMetadataTransmittedBytes,
+ dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
+ dml_uint_t InterlaceEnable,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+ dml_float_t *TSetup,
+ dml_float_t *Tdmbf,
+ dml_float_t *Tdmec,
+ dml_float_t *Tdmsks,
+ dml_uint_t *VUpdateOffsetPix,
+ dml_uint_t *VUpdateWidthPix,
+ dml_uint_t *VReadyOffsetPix);
+
+static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported);
+
+static dml_float_t TruncToValidBPP(
+ dml_float_t LinkBitRate,
+ dml_uint_t Lanes,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClock,
+ dml_float_t DesiredBPP,
+ dml_bool_t DSCEnable,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class Format,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t DSCSlices,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+ // Output
+ dml_uint_t *RequiredSlotsSingle);
+
+static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct display_mode_lib_scratch_st *s,
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p);
+
+static void CalculateDCFCLKDeepSleep(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_uint_t SwathWidthY[],
+ dml_uint_t SwathWidthC[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t ReturnBusWidth,
+
+ // Output
+ dml_float_t *DCFCLKDeepSleep);
+
+static void CalculateUrgentBurstFactor(
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_uint_t swath_width_luma_ub,
+ dml_uint_t swath_width_chroma_ub,
+ dml_uint_t SwathHeightY,
+ dml_uint_t SwathHeightC,
+ dml_float_t LineTime,
+ dml_float_t UrgentLatency,
+ dml_float_t CursorBufferSize,
+ dml_uint_t CursorWidth,
+ dml_uint_t CursorBPP,
+ dml_float_t VRatio,
+ dml_float_t VRatioC,
+ dml_float_t BytePerPixelInDETY,
+ dml_float_t BytePerPixelInDETC,
+ dml_uint_t DETBufferSizeY,
+ dml_uint_t DETBufferSizeC,
+ // Output
+ dml_float_t *UrgentBurstFactorCursor,
+ dml_float_t *UrgentBurstFactorLuma,
+ dml_float_t *UrgentBurstFactorChroma,
+ dml_bool_t *NotEnoughUrgentLatencyHiding);
+
+static dml_float_t RequiredDTBCLK(
+ dml_bool_t DSCEnable,
+ dml_float_t PixelClock,
+ enum dml_output_format_class OutputFormat,
+ dml_float_t OutputBpp,
+ dml_uint_t DSCSlices,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayoutSingle);
+
+static void UseMinimumDCFCLK(
+ struct display_mode_lib_scratch_st *scratch,
+ struct UseMinimumDCFCLK_params_st *p);
+
+static void CalculatePixelDeliveryTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t VRatioPrefetchY[],
+ dml_float_t VRatioPrefetchC[],
+ dml_uint_t swath_width_luma_ub[],
+ dml_uint_t swath_width_chroma_ub[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t NumberOfCursors[],
+ dml_uint_t CursorWidth[],
+ dml_uint_t CursorBPP[],
+ dml_uint_t BlockWidth256BytesY[],
+ dml_uint_t BlockHeight256BytesY[],
+ dml_uint_t BlockWidth256BytesC[],
+ dml_uint_t BlockHeight256BytesC[],
+
+ // Output
+ dml_float_t DisplayPipeLineDeliveryTimeLuma[],
+ dml_float_t DisplayPipeLineDeliveryTimeChroma[],
+ dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ dml_float_t CursorRequestDeliveryTime[],
+ dml_float_t CursorRequestDeliveryTimePrefetch[]);
+
+static void CalculateMetaAndPTETimes(
+ dml_bool_t use_one_row_for_frame[],
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t MetaChunkSize,
+ dml_uint_t MinMetaChunkSizeBytes,
+ dml_uint_t HTotal[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t DestinationLinesToRequestRowInVBlank[],
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t dpte_row_height[],
+ dml_uint_t dpte_row_height_chroma[],
+ dml_uint_t meta_row_width[],
+ dml_uint_t meta_row_width_chroma[],
+ dml_uint_t meta_row_height[],
+ dml_uint_t meta_row_height_chroma[],
+ dml_uint_t meta_req_width[],
+ dml_uint_t meta_req_width_chroma[],
+ dml_uint_t meta_req_height[],
+ dml_uint_t meta_req_height_chroma[],
+ dml_uint_t dpte_group_bytes[],
+ dml_uint_t PTERequestSizeY[],
+ dml_uint_t PTERequestSizeC[],
+ dml_uint_t PixelPTEReqWidthY[],
+ dml_uint_t PixelPTEReqHeightY[],
+ dml_uint_t PixelPTEReqWidthC[],
+ dml_uint_t PixelPTEReqHeightC[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+
+ // Output
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_C[],
+ dml_float_t TimePerMetaChunkNominal[],
+ dml_float_t TimePerChromaMetaChunkNominal[],
+ dml_float_t TimePerMetaChunkVBlank[],
+ dml_float_t TimePerChromaMetaChunkVBlank[],
+ dml_float_t TimePerMetaChunkFlip[],
+ dml_float_t TimePerChromaMetaChunkFlip[],
+ dml_float_t time_per_pte_group_nom_luma[],
+ dml_float_t time_per_pte_group_vblank_luma[],
+ dml_float_t time_per_pte_group_flip_luma[],
+ dml_float_t time_per_pte_group_nom_chroma[],
+ dml_float_t time_per_pte_group_vblank_chroma[],
+ dml_float_t time_per_pte_group_flip_chroma[]);
+
+static void CalculateVMGroupAndRequestTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t HTotal[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t DestinationLinesToRequestVMInVBlank[],
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+ dml_uint_t vm_group_bytes[],
+ dml_uint_t dpde0_bytes_per_frame_ub_l[],
+ dml_uint_t dpde0_bytes_per_frame_ub_c[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_l[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_c[],
+
+ // Output
+ dml_float_t TimePerVMGroupVBlank[],
+ dml_float_t TimePerVMGroupFlip[],
+ dml_float_t TimePerVMRequestVBlank[],
+ dml_float_t TimePerVMRequestFlip[]);
+
+static void CalculateStutterEfficiency(
+ struct display_mode_lib_scratch_st *scratch,
+ struct CalculateStutterEfficiency_params_st *p);
+
+static void CalculateSwathAndDETConfiguration(
+ struct display_mode_lib_scratch_st *scratch,
+ struct CalculateSwathAndDETConfiguration_params_st *p);
+
+static void CalculateSwathWidth(
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ enum dml_source_format_class SourcePixelFormat[],
+ enum dml_rotation_angle SourceScan[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportWidth[],
+ dml_uint_t ViewportHeight[],
+ dml_uint_t ViewportXStart[],
+ dml_uint_t ViewportYStart[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ enum dml_odm_mode ODMMode[],
+ dml_uint_t BytePerPixY[],
+ dml_uint_t BytePerPixC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t BlendingAndTiming[],
+ dml_uint_t HActive[],
+ dml_float_t HRatio[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_uint_t SwathWidthSingleDPPY[],
+ dml_uint_t SwathWidthSingleDPPC[],
+ dml_uint_t SwathWidthY[],
+ dml_uint_t SwathWidthC[],
+ dml_uint_t MaximumSwathHeightY[],
+ dml_uint_t MaximumSwathHeightC[],
+ dml_uint_t swath_width_luma_ub[],
+ dml_uint_t swath_width_chroma_ub[]);
+
+static dml_float_t CalculateExtraLatency(
+ dml_uint_t RoundTripPingLatencyCycles,
+ dml_uint_t ReorderingBytes,
+ dml_float_t DCFCLK,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_float_t ReturnBW,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels);
+
+static dml_uint_t CalculateExtraLatencyBytes(
+ dml_uint_t ReorderingBytes,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels);
+
+static dml_float_t CalculateUrgentLatency(
+ dml_float_t UrgentLatencyPixelDataOnly,
+ dml_float_t UrgentLatencyPixelMixedWithVMData,
+ dml_float_t UrgentLatencyVMDataOnly,
+ dml_bool_t DoUrgentLatencyAdjustment,
+ dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
+ dml_float_t UrgentLatencyAdjustmentFabricClockReference,
+ dml_float_t FabricClockSingle);
+
+static dml_bool_t UnboundedRequest(
+ enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_bool_t NoChromaOrLinear,
+ enum dml_output_encoder_class Output);
+
+static void CalculateSurfaceSizeInMall(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCN,
+ enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ dml_bool_t DCCEnable[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportXStartY[],
+ dml_uint_t ViewportYStartY[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t ViewportWidthY[],
+ dml_uint_t ViewportHeightY[],
+ dml_uint_t BytesPerPixelY[],
+ dml_uint_t ViewportWidthC[],
+ dml_uint_t ViewportHeightC[],
+ dml_uint_t BytesPerPixelC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t ReadBlockWidthY[],
+ dml_uint_t ReadBlockWidthC[],
+ dml_uint_t ReadBlockHeightY[],
+ dml_uint_t ReadBlockHeightC[],
+
+ // Output
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t *ExceededMALLSize);
+
+static void CalculateDETBufferSize(
+ dml_uint_t DETSizeOverride[],
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t UnboundedRequestEnabled,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t MaxTotalDETInKByte,
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t MinCompressedBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
+ dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
+ enum dml_source_format_class SourcePixelFormat[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t RotesY[],
+ dml_uint_t RoundedUpMaxSwathSizeBytesC[],
+ dml_uint_t DPPPerSurface[],
+ // Output
+ dml_uint_t DETBufferSizeInKByte[],
+ dml_uint_t *CompressedBufferSizeInkByte);
+
+static void CalculateMaxDETAndMinCompressedBufferSize(
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
+ dml_uint_t ROBBufferSizeInKByte,
+ dml_uint_t MaxNumDPP,
+ dml_bool_t nomDETInKByteOverrideEnable,
+ dml_uint_t nomDETInKByteOverrideValue,
+
+ // Output
+ dml_uint_t *MaxTotalDETInKByte,
+ dml_uint_t *nomDETInKByte,
+ dml_uint_t *MinCompressedBufferSizeInKByte);
+
+static dml_uint_t DSCDelayRequirement(
+ dml_bool_t DSCEnabled,
+ enum dml_odm_mode ODMMode,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_float_t OutputBpp,
+ dml_uint_t HActive,
+ dml_uint_t HTotal,
+ dml_uint_t NumberOfDSCSlices,
+ enum dml_output_format_class OutputFormat,
+ enum dml_output_encoder_class Output,
+ dml_float_t PixelClock,
+ dml_float_t PixelClockBackEnd);
+
+static dml_bool_t CalculateVActiveBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[]);
+
+static void CalculatePrefetchBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *PrefetchBandwidth,
+ dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *PrefetchBandwidthSupport);
+
+static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t cursor_bw_pre[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[]);
+
+static void CalculateImmediateFlipBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
+ dml_float_t final_flip_bw[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *TotalBandwidth,
+ dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *ImmediateFlipBandwidthSupport);
+
+// ---------------------------
+// Declaration Ends
+// ---------------------------
+
+static dml_uint_t dscceComputeDelay(
+ dml_uint_t bpc,
+ dml_float_t BPP,
+ dml_uint_t sliceWidth,
+ dml_uint_t numSlices,
+ enum dml_output_format_class pixelFormat,
+ enum dml_output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ dml_uint_t rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
+ Delay, pixels;
+
+ if (pixelFormat == dml_420)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else if (pixelFormat == dml_444)
+ pixelsPerClock = 1;
+ else if (pixelFormat == dml_n422)
+ pixelsPerClock = 2;
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1));
+
+ //compute ssm delay
+ if (bpc == 8)
+ D = 81;
+ else if (bpc == 10)
+ D = 89;
+ else
+ D = 113;
+
+ //divide by pixel per cycle to compute slice width as seen by DSC
+ w = sliceWidth / pixelsPerClock;
+
+ //422 mode has an additional cycle of delay
+ if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422)
+ s = 0;
+ else
+ s = 1;
+
+ //main calculation for the dscce
+ ix = initalXmitDelay + 45;
+ wx = (w + 2) / 3;
+ p = 3 * wx - w;
+ l0 = ix / w;
+ a = ix + p * l0;
+ ax = (a + 2) / 3 + D + 6 + 1;
+ L = (ax + wx - 1) / wx;
+ if ((ix % w) == 0 && p != 0)
+ lstall = 1;
+ else
+ lstall = 0;
+ Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
+
+ //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
+ pixels = Delay * 3 * pixelsPerClock;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: bpc: %u\n", __func__, bpc);
+ dml_print("DML::%s: BPP: %f\n", __func__, BPP);
+ dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
+ dml_print("DML::%s: numSlices: %u\n", __func__, numSlices);
+ dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
+ dml_print("DML::%s: Output: %u\n", __func__, Output);
+ dml_print("DML::%s: pixels: %u\n", __func__, pixels);
+#endif
+ return pixels;
+}
+
+static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output)
+{
+ dml_uint_t Delay = 0;
+
+ if (pixelFormat == dml_420) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 2;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 13;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc gets pixels every other cycle
+ Delay = Delay + 3;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else if (pixelFormat == dml_n422) {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 25;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 10;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // sft
+ Delay = Delay + 1;
+ } else {
+ // sfr
+ Delay = Delay + 2;
+ // dsccif
+ Delay = Delay + 0;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 12;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // dscc - output cdc fifo
+ Delay = Delay + 7;
+ // dscc - output serializer
+ Delay = Delay + 1;
+ // dscc - cdc uncertainty
+ Delay = Delay + 2;
+ // sft
+ Delay = Delay + 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
+ dml_print("DML::%s: Delay = %u\n", __func__, Delay);
+#endif
+
+ return Delay;
+}
+
+static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
+ struct CalculatePrefetchSchedule_params_st *p)
+{
+ struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals;
+
+ s->MyError = false;
+ s->DPPCycles = 0;
+ s->DISPCLKCycles = 0;
+ s->DSTTotalPixelsAfterScaler = 0.0;
+ s->LineTime = 0.0;
+ s->dst_y_prefetch_equ = 0.0;
+ s->prefetch_bw_oto = 0.0;
+ s->Tvm_oto = 0.0;
+ s->Tr0_oto = 0.0;
+ s->Tvm_oto_lines = 0.0;
+ s->Tr0_oto_lines = 0.0;
+ s->dst_y_prefetch_oto = 0.0;
+ s->TimeForFetchingMetaPTE = 0.0;
+ s->TimeForFetchingRowInVBlank = 0.0;
+ s->LinesToRequestPrefetchPixelData = 0.0;
+ s->HostVMDynamicLevelsTrips = 0;
+ s->trip_to_mem = 0.0;
+ s->Tvm_trips = 0.0;
+ s->Tr0_trips = 0.0;
+ s->Tvm_trips_rounded = 0.0;
+ s->Tr0_trips_rounded = 0.0;
+ s->max_Tsw = 0.0;
+ s->Lsw_oto = 0.0;
+ s->Tpre_rounded = 0.0;
+ s->prefetch_bw_equ = 0.0;
+ s->Tvm_equ = 0.0;
+ s->Tr0_equ = 0.0;
+ s->Tdmbf = 0.0;
+ s->Tdmec = 0.0;
+ s->Tdmsks = 0.0;
+ s->prefetch_sw_bytes = 0.0;
+ s->prefetch_bw_pr = 0.0;
+ s->bytes_pp = 0.0;
+ s->dep_bytes = 0.0;
+ s->min_Lsw_oto = 0.0;
+ s->Tsw_est1 = 0.0;
+ s->Tsw_est3 = 0.0;
+
+ if (p->GPUVMEnable == true && p->HostVMEnable == true) {
+ s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels;
+ } else {
+ s->HostVMDynamicLevelsTrips = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+ dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels);
+ dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
+ dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup);
+ dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable);
+ dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor);
+ dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+#endif
+ CalculateVUpdateAndDynamicMetadataParameters(
+ p->MaxInterDCNTileRepeaters,
+ p->myPipe->Dppclk,
+ p->myPipe->Dispclk,
+ p->myPipe->DCFClkDeepSleep,
+ p->myPipe->PixelClock,
+ p->myPipe->HTotal,
+ p->myPipe->VBlank,
+ p->DynamicMetadataTransmittedBytes,
+ p->DynamicMetadataLinesBeforeActiveRequired,
+ p->myPipe->InterlaceEnable,
+ p->myPipe->ProgressiveToInterlaceUnitInOPP,
+ p->TSetup,
+
+ // Output
+ &s->Tdmbf,
+ &s->Tdmec,
+ &s->Tdmsks,
+ p->VUpdateOffsetPix,
+ p->VUpdateWidthPix,
+ p->VReadyOffsetPix);
+
+ s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
+ s->trip_to_mem = p->UrgentLatency;
+ s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1);
+
+ if (p->DynamicMetadataVMEnabled == true) {
+ *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem;
+ } else {
+ *p->Tdmdl = p->TWait + p->UrgentExtraLatency;
+ }
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ if (DynamicMetadataEnable == false) {
+ *Tdmdl = 0.0;
+ }
+#endif
+
+ if (p->DynamicMetadataEnable == true) {
+ if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
+ *p->NotEnoughTimeForDynamicMetadata = true;
+ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0);
+
+ if (p->myPipe->ScalerEnabled)
+ s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
+ else
+ s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
+
+ s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
+
+ s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal;
+
+ if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
+ return true;
+
+ *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0);
+ *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
+ ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) +
+ ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
+ dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+ dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
+ dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
+ dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
+ dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
+ dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
+ dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
+#endif
+
+ if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
+ *p->DSTYAfterScaler = 1;
+ else
+ *p->DSTYAfterScaler = 0;
+
+ s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
+ *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
+ *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal)));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
+#endif
+
+ s->MyError = false;
+
+ s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
+
+ if (p->GPUVMEnable == true) {
+ s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ if (p->GPUVMPageTableLevels >= 3) {
+ *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1);
+ } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) {
+ s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tno_bw = p->UrgentExtraLatency;
+ } else {
+ *p->Tno_bw = 0;
+ }
+ } else if (p->myPipe->DCCEnable == true) {
+ s->Tvm_trips_rounded = s->LineTime / 4.0;
+ s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tno_bw = 0;
+ } else {
+ s->Tvm_trips_rounded = s->LineTime / 4.0;
+ s->Tr0_trips_rounded = s->LineTime / 2.0;
+ *p->Tno_bw = 0;
+ }
+ s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0);
+ s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0);
+
+ if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4;
+ } else {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
+ }
+
+ s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface;
+ if (p->myPipe->VRatio < 1.0)
+ s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
+
+ s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
+
+ s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
+ s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
+
+ s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__;
+ s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0);
+ s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
+
+ if (p->GPUVMEnable == true) {
+ s->Tvm_oto = dml_max3(
+ s->Tvm_trips,
+ *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
+ s->LineTime / 4.0);
+ } else
+ s->Tvm_oto = s->LineTime / 4.0;
+
+ if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
+ s->Tr0_oto = dml_max4(
+ s->Tr0_trips,
+ (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto,
+ (s->LineTime - s->Tvm_oto)/2.0,
+ s->LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips);
+ dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto);
+ dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4);
+#endif
+ } else
+ s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0;
+
+ s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
+ s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
+ s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
+
+ s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
+ s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
+ dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
+ dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw);
+ dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency);
+ dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+ dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
+ dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
+ dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips);
+ dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips);
+ dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
+ dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
+ dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
+ dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
+ dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
+ dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
+ dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+ dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
+#endif
+
+ s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
+
+ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
+
+ dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime);
+ dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup);
+ dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
+ dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
+ dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
+ dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
+ dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
+ dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
+ dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
+
+ s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor);
+
+ if (s->prefetch_sw_bytes < s->dep_bytes) {
+ s->prefetch_sw_bytes = 2 * s->dep_bytes;
+ }
+
+ *p->DestinationLinesToRequestVMInVBlank = 0;
+ *p->DestinationLinesToRequestRowInVBlank = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ if (s->dst_y_prefetch_equ > 1) {
+
+ if (s->Tpre_rounded - *p->Tno_bw > 0) {
+ s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte
+ + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor
+ + s->prefetch_sw_bytes)
+ / (s->Tpre_rounded - *p->Tno_bw);
+ s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1;
+ } else
+ s->PrefetchBandwidth1 = 0;
+
+ if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
+ s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) /
+ (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
+ }
+
+ if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
+ s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
+ (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
+ else
+ s->PrefetchBandwidth2 = 0;
+
+ if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
+ s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
+ (s->Tpre_rounded - s->Tvm_trips_rounded);
+ s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3;
+ }
+ else
+ s->PrefetchBandwidth3 = 0;
+
+
+ if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
+ s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
+ }
+
+ if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
+ s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
+ else
+ s->PrefetchBandwidth4 = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
+ dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+ dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
+ dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
+ dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
+ dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1);
+ dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2);
+ dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3);
+ dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4);
+#endif
+ {
+ dml_bool_t Case1OK;
+ dml_bool_t Case2OK;
+ dml_bool_t Case3OK;
+
+ if (s->PrefetchBandwidth1 > 0) {
+ if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) {
+ Case1OK = true;
+ } else {
+ Case1OK = false;
+ }
+ } else {
+ Case1OK = false;
+ }
+
+ if (s->PrefetchBandwidth2 > 0) {
+ if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) {
+ Case2OK = true;
+ } else {
+ Case2OK = false;
+ }
+ } else {
+ Case2OK = false;
+ }
+
+ if (s->PrefetchBandwidth3 > 0) {
+ if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) {
+ Case3OK = true;
+ } else {
+ Case3OK = false;
+ }
+ } else {
+ Case3OK = false;
+ }
+
+ if (Case1OK) {
+ s->prefetch_bw_equ = s->PrefetchBandwidth1;
+ } else if (Case2OK) {
+ s->prefetch_bw_equ = s->PrefetchBandwidth2;
+ } else if (Case3OK) {
+ s->prefetch_bw_equ = s->PrefetchBandwidth3;
+ } else {
+ s->prefetch_bw_equ = s->PrefetchBandwidth4;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK);
+ dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK);
+ dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK);
+ dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
+#endif
+
+ if (s->prefetch_bw_equ > 0) {
+ if (p->GPUVMEnable == true) {
+ s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4);
+ } else {
+ s->Tvm_equ = s->LineTime / 4;
+ }
+
+ if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
+ s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4);
+ } else {
+ s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2;
+ }
+ } else {
+ s->Tvm_equ = 0;
+ s->Tr0_equ = 0;
+ dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
+ }
+ }
+
+
+ if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
+ *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto;
+ s->TimeForFetchingMetaPTE = s->Tvm_oto;
+ s->TimeForFetchingRowInVBlank = s->Tr0_oto;
+
+ *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
+ *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+ } else {
+ *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ;
+ s->TimeForFetchingMetaPTE = s->Tvm_equ;
+ s->TimeForFetchingRowInVBlank = s->Tr0_equ;
+
+ if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) {
+ *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
+ *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+ } else {
+ *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
+ *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+ }
+ }
+
+ s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
+#endif
+
+ if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) {
+ *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
+ dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
+#endif
+ if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
+ *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY,
+ (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
+ } else {
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
+ *p->VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
+#endif
+ }
+
+ *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
+ dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
+#endif
+ if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
+ *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
+ } else {
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
+ *p->VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
+#endif
+ }
+
+ *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData
+ * p->myPipe->BytePerPixelY
+ * p->swath_width_luma_ub / s->LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma);
+#endif
+ *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData
+ *p->myPipe->BytePerPixelC
+ *p->swath_width_chroma_ub / s->LineTime;
+ } else {
+ s->MyError = true;
+ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData);
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ *p->RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE);
+ dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE);
+ dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank);
+ dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime);
+ dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime);
+ dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
+ dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
+ dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
+
+ } else {
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
+ s->TimeForFetchingMetaPTE = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->DestinationLinesToRequestVMInVBlank = 0;
+ *p->DestinationLinesToRequestRowInVBlank = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ *p->RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ {
+ dml_float_t prefetch_vm_bw;
+ dml_float_t prefetch_row_bw;
+
+ if (p->PDEAndMetaPTEBytesFrame == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*p->DestinationLinesToRequestVMInVBlank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
+ dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+#endif
+ prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
+ }
+
+ if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) {
+ prefetch_row_bw = 0;
+ } else if (*p->DestinationLinesToRequestRowInVBlank > 0) {
+ prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
+ dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ s->MyError = true;
+ dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
+ }
+
+ *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (s->MyError) {
+ s->TimeForFetchingMetaPTE = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->DestinationLinesToRequestVMInVBlank = 0;
+ *p->DestinationLinesToRequestRowInVBlank = 0;
+ *p->DestinationLinesForPrefetch = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixDataBWLuma = 0;
+ *p->RequiredPrefetchPixDataBWChroma = 0;
+ }
+
+ return s->MyError;
+} // CalculatePrefetchSchedule
+
+static void CalculateBytePerPixelAndBlockSizes(
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+
+ // Output
+ dml_uint_t *BytePerPixelY,
+ dml_uint_t *BytePerPixelC,
+ dml_float_t *BytePerPixelDETY,
+ dml_float_t *BytePerPixelDETC,
+ dml_uint_t *BlockHeight256BytesY,
+ dml_uint_t *BlockHeight256BytesC,
+ dml_uint_t *BlockWidth256BytesY,
+ dml_uint_t *BlockWidth256BytesC,
+ dml_uint_t *MacroTileHeightY,
+ dml_uint_t *MacroTileHeightC,
+ dml_uint_t *MacroTileWidthY,
+ dml_uint_t *MacroTileWidthC)
+{
+ if (SourcePixelFormat == dml_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dml_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dml_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ *BytePerPixelDETY = (dml_float_t) (4.0 / 3);
+ *BytePerPixelDETC = (dml_float_t) (8.0 / 3);
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
+ dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
+ dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
+#endif
+ if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32
+ || SourcePixelFormat == dml_444_16
+ || SourcePixelFormat == dml_444_8
+ || SourcePixelFormat == dml_mono_16
+ || SourcePixelFormat == dml_mono_8
+ || SourcePixelFormat == dml_rgbe)) {
+ if (SurfaceTiling == dml_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ } else if (SourcePixelFormat == dml_444_64) {
+ *BlockHeight256BytesY = 4;
+ } else if (SourcePixelFormat == dml_444_8) {
+ *BlockHeight256BytesY = 16;
+ } else {
+ *BlockHeight256BytesY = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else {
+ if (SurfaceTiling == dml_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dml_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dml_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
+ dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
+ dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
+ dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
+#endif
+
+ if (SurfaceTiling == dml_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
+ dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
+ dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
+ dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
+#endif
+} // CalculateBytePerPixelAndBlockSizes
+
+static noinline_for_stack dml_float_t CalculateTWait(
+ dml_uint_t PrefetchMode,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ dml_bool_t DRRDisplay,
+ dml_float_t DRAMClockChangeLatency,
+ dml_float_t FCLKChangeLatency,
+ dml_float_t UrgentLatency,
+ dml_float_t SREnterPlusExitTime)
+{
+ dml_float_t TWait = 0.0;
+
+ if (PrefetchMode == 0 &&
+ !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) &&
+ !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
+ TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
+ } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
+ TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
+ } else {
+ TWait = UrgentLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode);
+ dml_print("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+} // CalculateTWait
+
+
+/// @brief Calculate the "starting point" for prefetch calculation
+/// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation
+/// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from
+/// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving
+/// features).
+static void CalculatePrefetchMode(
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
+ dml_uint_t *MinPrefetchMode,
+ dml_uint_t *MaxPrefetchMode)
+{
+ if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) {
+ *MinPrefetchMode = 0; // consider all pwr saving features
+ *MaxPrefetchMode = 3; // consider just urgent latency
+ } else {
+ if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) {
+ *MinPrefetchMode = 3;
+ } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) {
+ *MinPrefetchMode = 2;
+ } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) {
+ *MinPrefetchMode = 1;
+ } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) {
+ *MinPrefetchMode = 0;
+ } else {
+ dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank);
+ ASSERT(0);
+ }
+ *MaxPrefetchMode = *MinPrefetchMode;
+ }
+} // CalculatePrefetchMode
+
+static dml_float_t CalculateWriteBackDISPCLK(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t PixelClock,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackHTaps,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackSourceWidth,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t HTotal,
+ dml_uint_t WritebackLineBufferSize,
+ dml_float_t DISPCLKDPPCLKVCOSpeed)
+{
+ dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth;
+ return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
+}
+
+static dml_float_t CalculateWriteBackDelay(
+ enum dml_source_format_class WritebackPixelFormat,
+ dml_float_t WritebackHRatio,
+ dml_float_t WritebackVRatio,
+ dml_uint_t WritebackVTaps,
+ dml_uint_t WritebackDestinationWidth,
+ dml_uint_t WritebackDestinationHeight,
+ dml_uint_t WritebackSourceHeight,
+ dml_uint_t HTotal)
+{
+ dml_float_t CalculateWriteBackDelay;
+ dml_float_t Line_length;
+ dml_float_t Output_lines_last_notclamped;
+ dml_float_t WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+static void CalculateVUpdateAndDynamicMetadataParameters(
+ dml_uint_t MaxInterDCNTileRepeaters,
+ dml_float_t Dppclk,
+ dml_float_t Dispclk,
+ dml_float_t DCFClkDeepSleep,
+ dml_float_t PixelClock,
+ dml_uint_t HTotal,
+ dml_uint_t VBlank,
+ dml_uint_t DynamicMetadataTransmittedBytes,
+ dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
+ dml_uint_t InterlaceEnable,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+
+ // Output
+ dml_float_t *TSetup,
+ dml_float_t *Tdmbf,
+ dml_float_t *Tdmec,
+ dml_float_t *Tdmsks,
+ dml_uint_t *VUpdateOffsetPix,
+ dml_uint_t *VUpdateWidthPix,
+ dml_uint_t *VReadyOffsetPix)
+{
+ dml_float_t TotalRepeaterDelayTime;
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
+ *VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
+ *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0));
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0) {
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ } else {
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+ }
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
+ *Tdmsks = *Tdmsks / 2;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
+ dml_print("DML::%s: VBlank = %u\n", __func__, VBlank);
+ dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
+ dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
+ dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
+
+ dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
+ dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
+ dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
+
+ dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+static void CalculateRowBandwidth(
+ dml_bool_t GPUVMEnable,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_bool_t DCCEnable,
+ dml_float_t LineTime,
+ dml_uint_t MetaRowByteLuma,
+ dml_uint_t MetaRowByteChroma,
+ dml_uint_t meta_row_height_luma,
+ dml_uint_t meta_row_height_chroma,
+ dml_uint_t PixelPTEBytesPerRowLuma,
+ dml_uint_t PixelPTEBytesPerRowChroma,
+ dml_uint_t dpte_row_height_luma,
+ dml_uint_t dpte_row_height_chroma,
+ // Output
+ dml_float_t *meta_row_bw,
+ dml_float_t *dpte_row_bw)
+{
+ if (DCCEnable != true) {
+ *meta_row_bw = 0;
+ } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
+ + VRatioChroma * MetaRowByteChroma
+ / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ + VRatioChroma * PixelPTEBytesPerRowChroma
+ / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+/// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule
+/// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes
+static void CalculateFlipSchedule(
+ dml_float_t HostVMInefficiencyFactor,
+ dml_float_t UrgentExtraLatency,
+ dml_float_t UrgentLatency,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_bool_t HostVMEnable,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t HostVMMinPageSize,
+ dml_float_t PDEAndMetaPTEBytesPerFrame,
+ dml_float_t MetaRowBytes,
+ dml_float_t DPTEBytesPerRow,
+ dml_float_t BandwidthAvailableForImmediateFlip,
+ dml_uint_t TotImmediateFlipBytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_float_t LineTime,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t Tno_bw,
+ dml_bool_t DCCEnable,
+ dml_uint_t dpte_row_height,
+ dml_uint_t meta_row_height,
+ dml_uint_t dpte_row_height_chroma,
+ dml_uint_t meta_row_height_chroma,
+ dml_bool_t use_one_row_for_frame_flip,
+
+ // Output
+ dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
+ dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
+ dml_float_t *final_flip_bw,
+ dml_bool_t *ImmediateFlipSupportedForPipe)
+{
+ dml_float_t min_row_time = 0.0;
+ dml_uint_t HostVMDynamicLevelsTrips = 0;
+ dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0;
+ dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0;
+ dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe
+
+ if (GPUVMEnable == true && HostVMEnable == true) {
+ HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
+ } else {
+ HostVMDynamicLevelsTrips = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+#endif
+
+ if (TotImmediateFlipBytes > 0) {
+ if (use_one_row_for_frame_flip) {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
+ } else {
+ ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
+ }
+ if (GPUVMEnable == true) {
+ TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+ UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ LineTime / 4.0);
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ }
+ if ((GPUVMEnable == true || DCCEnable == true)) {
+ TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
+ } else {
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ }
+
+ *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
+ *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
+
+ if (GPUVMEnable == true) {
+ *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+ (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+ } else if ((GPUVMEnable == true || DCCEnable == true)) {
+ *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
+ } else {
+ *final_flip_bw = 0;
+ }
+ } else {
+ TimeForFetchingMetaPTEImmediateFlip = 0;
+ TimeForFetchingRowInVBlankImmediateFlip = 0;
+ *DestinationLinesToRequestVMInImmediateFlip = 0;
+ *DestinationLinesToRequestRowInImmediateFlip = 0;
+ *final_flip_bw = 0;
+ }
+
+ if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
+ }
+ } else {
+ if (GPUVMEnable == true && DCCEnable != true) {
+ min_row_time = dpte_row_height * LineTime / VRatio;
+ } else if (GPUVMEnable != true && DCCEnable == true) {
+ min_row_time = meta_row_height * LineTime / VRatio;
+ } else {
+ min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
+ }
+ }
+
+ if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
+
+ dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes);
+ dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
+ dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+ dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
+ dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW);
+ dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame);
+ dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
+ dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
+
+ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
+ dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+ dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+} // CalculateFlipSchedule
+
+static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed)
+{
+ if (Clock <= 0.0)
+ return 0.0;
+ else {
+ if (round_up)
+ return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
+ else
+ return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
+ }
+}
+
+static void CalculateDCCConfiguration(
+ dml_bool_t DCCEnabled,
+ dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceWidthLuma,
+ dml_uint_t SurfaceWidthChroma,
+ dml_uint_t SurfaceHeightLuma,
+ dml_uint_t SurfaceHeightChroma,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t RequestHeight256ByteLuma,
+ dml_uint_t RequestHeight256ByteChroma,
+ enum dml_swizzle_mode TilingFormat,
+ dml_uint_t BytePerPixelY,
+ dml_uint_t BytePerPixelC,
+ dml_float_t BytePerPixelDETY,
+ dml_float_t BytePerPixelDETC,
+ enum dml_rotation_angle SourceScan,
+ // Output
+ dml_uint_t *MaxUncompressedBlockLuma,
+ dml_uint_t *MaxUncompressedBlockChroma,
+ dml_uint_t *MaxCompressedBlockLuma,
+ dml_uint_t *MaxCompressedBlockChroma,
+ dml_uint_t *IndependentBlockLuma,
+ dml_uint_t *IndependentBlockChroma)
+{
+ dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ dml_uint_t yuv420;
+ dml_uint_t horz_div_l;
+ dml_uint_t horz_div_c;
+ dml_uint_t vert_div_l;
+ dml_uint_t vert_div_c;
+
+ dml_uint_t swath_buf_size;
+ dml_float_t detile_buf_vp_horz_limit;
+ dml_float_t detile_buf_vp_vert_limit;
+
+ dml_uint_t MAS_vp_horz_limit;
+ dml_uint_t MAS_vp_vert_limit;
+ dml_uint_t max_vp_horz_width;
+ dml_uint_t max_vp_vert_height;
+ dml_uint_t eff_surf_width_l;
+ dml_uint_t eff_surf_width_c;
+ dml_uint_t eff_surf_height_l;
+ dml_uint_t eff_surf_height_c;
+
+ dml_uint_t full_swath_bytes_horz_wc_l;
+ dml_uint_t full_swath_bytes_horz_wc_c;
+ dml_uint_t full_swath_bytes_vert_wc_l;
+ dml_uint_t full_swath_bytes_vert_wc_c;
+
+ dml_uint_t req128_horz_wc_l;
+ dml_uint_t req128_horz_wc_c;
+ dml_uint_t req128_vert_wc_l;
+ dml_uint_t req128_vert_wc_c;
+
+ dml_uint_t segment_order_horz_contiguous_luma;
+ dml_uint_t segment_order_horz_contiguous_chroma;
+ dml_uint_t segment_order_vert_contiguous_luma;
+ dml_uint_t segment_order_vert_contiguous_chroma;
+
+ typedef enum{
+ REQ_256Bytes,
+ REQ_128BytesNonContiguous,
+ REQ_128BytesContiguous,
+ REQ_NA
+ } RequestType;
+
+ RequestType RequestLuma;
+ RequestType RequestChroma;
+
+ yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0);
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dml_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit));
+ max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit));
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dml_420_10) {
+ full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
+ dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
+ dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
+ dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
+ dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
+ dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
+ dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
+#endif
+
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ if (req128_horz_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_horz_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_horz_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ } else {
+ if (req128_vert_wc_l == 0) {
+ RequestLuma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_luma == 0) {
+ RequestLuma = REQ_128BytesNonContiguous;
+ } else {
+ RequestLuma = REQ_128BytesContiguous;
+ }
+ if (req128_vert_wc_c == 0) {
+ RequestChroma = REQ_256Bytes;
+ } else if (segment_order_vert_contiguous_chroma == 0) {
+ RequestChroma = REQ_128BytesNonContiguous;
+ } else {
+ RequestChroma = REQ_128BytesContiguous;
+ }
+ }
+
+ if (RequestLuma == REQ_256Bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (RequestLuma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (RequestChroma == REQ_256Bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (RequestChroma == REQ_128BytesContiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
+ dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
+ dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
+ dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
+ dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
+ dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
+#endif
+
+} // CalculateDCCConfiguration
+
+static dml_uint_t CalculatePrefetchSourceLines(
+ dml_float_t VRatio,
+ dml_uint_t VTaps,
+ dml_bool_t Interlace,
+ dml_bool_t ProgressiveToInterlaceUnitInOPP,
+ dml_uint_t SwathHeight,
+ enum dml_rotation_angle SourceScan,
+ dml_bool_t ViewportStationary,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+
+ // Output
+ dml_uint_t *VInitPreFill,
+ dml_uint_t *MaxNumSwath)
+{
+
+ dml_uint_t vp_start_rot = 0;
+ dml_uint_t sw0_tmp = 0;
+ dml_uint_t MaxPartialSwath = 0;
+ dml_float_t numLines = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
+ dml_print("DML::%s: VTaps = %u\n", __func__, VTaps);
+ dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
+ dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
+ dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
+ dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1));
+ else
+ *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1));
+
+ if (ViewportStationary) {
+ if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) {
+ vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) {
+ vp_start_rot = ViewportXStart;
+ } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) {
+ vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill) {
+ *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1);
+ } else {
+ *MaxNumSwath = 1;
+ }
+ MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight));
+ } else {
+ *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1);
+ if (*VInitPreFill > 1) {
+ MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight));
+ } else {
+ MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight));
+ }
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
+ dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
+ dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
+ dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
+ dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return (dml_uint_t)(numLines);
+
+} // CalculatePrefetchSourceLines
+
+static dml_uint_t CalculateVMAndRowBytes(
+ dml_bool_t ViewportStationary,
+ dml_bool_t DCCEnable,
+ dml_uint_t NumberOfDPPs,
+ dml_uint_t BlockHeight256Bytes,
+ dml_uint_t BlockWidth256Bytes,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t SurfaceTiling,
+ dml_uint_t BytePerPixel,
+ enum dml_rotation_angle SourceScan,
+ dml_uint_t SwathWidth,
+ dml_uint_t ViewportHeight,
+ dml_uint_t ViewportXStart,
+ dml_uint_t ViewportYStart,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t GPUVMMinPageSizeKBytes,
+ dml_uint_t PTEBufferSizeInRequests,
+ dml_uint_t Pitch,
+ dml_uint_t DCCMetaPitch,
+ dml_uint_t MacroTileWidth,
+ dml_uint_t MacroTileHeight,
+
+ // Output
+ dml_uint_t *MetaRowByte,
+ dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation
+ dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
+ dml_uint_t *dpte_row_width_ub,
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *dpte_row_height_linear,
+ dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
+ dml_uint_t *dpte_row_width_ub_one_row_per_frame,
+ dml_uint_t *dpte_row_height_one_row_per_frame,
+ dml_uint_t *MetaRequestWidth,
+ dml_uint_t *MetaRequestHeight,
+ dml_uint_t *meta_row_width,
+ dml_uint_t *meta_row_height,
+ dml_uint_t *PixelPTEReqWidth,
+ dml_uint_t *PixelPTEReqHeight,
+ dml_uint_t *PTERequestSize,
+ dml_uint_t *DPDE0BytesFrame,
+ dml_uint_t *MetaPTEBytesFrame)
+{
+ dml_uint_t MPDEBytesFrame;
+ dml_uint_t DCCMetaSurfaceBytes;
+ dml_uint_t ExtraDPDEBytesFrame;
+ dml_uint_t PDEAndMetaPTEBytesFrame;
+ dml_uint_t MacroTileSizeBytes;
+ dml_uint_t vp_height_meta_ub;
+ dml_uint_t vp_height_dpte_ub;
+
+ dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ *MetaRequestHeight = 8 * BlockHeight256Bytes;
+ *MetaRequestWidth = 8 * BlockWidth256Bytes;
+ if (SurfaceTiling == dml_sw_linear) {
+ *meta_row_height = 32;
+ *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ *meta_row_height = *MetaRequestHeight;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
+ } else {
+ *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth);
+ }
+ *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0);
+ } else {
+ *meta_row_height = *MetaRequestWidth;
+ if (ViewportStationary && NumberOfDPPs == 1) {
+ *meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight));
+ } else {
+ *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight);
+ }
+ *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0);
+ }
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
+ vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes));
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
+ } else {
+ vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
+ }
+
+ DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0);
+
+ if (GPUVMEnable == true) {
+ *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64);
+ MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
+ } else {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ }
+
+ if (DCCEnable != true) {
+ *MetaPTEBytesFrame = 0;
+ MPDEBytesFrame = 0;
+ *MetaRowByte = 0;
+ }
+
+ MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
+
+ if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
+ vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight));
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight);
+ } else {
+ vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight);
+ }
+
+ if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
+ *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1));
+ ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
+ } else {
+ *DPDE0BytesFrame = 0;
+ ExtraDPDEBytesFrame = 0;
+ }
+
+ PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear);
+ dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel);
+ dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels);
+ dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
+ dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
+ dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
+ dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
+ dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame);
+ dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame);
+ dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame);
+ dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight);
+ dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth);
+ dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
+#endif
+
+ if (SurfaceTiling == dml_sw_linear) {
+ *PixelPTEReqHeight = 1;
+ *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
+ *PTERequestSize = 64;
+ } else if (GPUVMMinPageSizeKBytes == 4) {
+ *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
+ *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
+ *PTERequestSize = 128;
+ } else {
+ *PixelPTEReqHeight = MacroTileHeight;
+ *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
+ *PTERequestSize = 64;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
+ dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight);
+ dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth);
+ dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
+ dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize);
+ dml_print("DML::%s: Pitch = %u\n", __func__, Pitch);
+#endif
+
+ *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth);
+ *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
+
+ if (SurfaceTiling == dml_sw_linear) {
+ *dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)));
+ dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
+ dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
+ dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
+ dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+
+ *dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth);
+ *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1);
+ if (*dpte_row_height_linear > 128)
+ *dpte_row_height_linear = 128;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub);
+#endif
+
+ } else if (!dml_is_vertical_rotation(SourceScan)) {
+ *dpte_row_height = *PixelPTEReqHeight;
+
+ if (GPUVMMinPageSizeKBytes > 64) {
+ *dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth);
+ } else if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth));
+ } else {
+ *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub);
+#endif
+
+ ASSERT(*PixelPTEReqWidth);
+ if (*PixelPTEReqWidth != 0)
+ *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
+ } else {
+ *dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth));
+
+ if (ViewportStationary && (NumberOfDPPs == 1)) {
+ *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight));
+ } else {
+ *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight);
+ }
+
+ *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub);
+#endif
+ }
+
+ if (GPUVMEnable != true)
+ *PixelPTEBytesPerRow = 0;
+
+ *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+ dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear);
+ dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow);
+ dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage);
+ dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame);
+ dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame);
+ dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame);
+#endif
+
+ dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
+
+ return PDEAndMetaPTEBytesFrame;
+} // CalculateVMAndRowBytes
+
+static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported)
+{
+ dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg);
+
+ //Progressive To Interlace Unit Effect
+ for (dml_uint_t k = 0; k < num_active_planes; ++k) {
+ display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k];
+ if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) {
+ display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k];
+ }
+ }
+}
+
+static dml_float_t TruncToValidBPP(
+ dml_float_t LinkBitRate,
+ dml_uint_t Lanes,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClock,
+ dml_float_t DesiredBPP,
+ dml_bool_t DSCEnable,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class Format,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t DSCSlices,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+
+ // Output
+ dml_uint_t *RequiredSlots)
+{
+ dml_float_t MaxLinkBPP;
+ dml_uint_t MinDSCBPP;
+ dml_float_t MaxDSCBPP;
+ dml_uint_t NonDSCBPP0;
+ dml_uint_t NonDSCBPP1;
+ dml_uint_t NonDSCBPP2;
+
+ if (Format == dml_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
+ } else if (Format == dml_444) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
+ } else {
+ if (Output == dml_hdmi) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dml_n422) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
+ }
+ }
+
+ if (Output == dml_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
+ } else if (DSCEnable && Output == dml_dp) {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
+ }
+
+ if (DSCEnable) {
+ if (ODMModeDSC == dml_odm_mode_combine_4to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ } else if (ODMModeDSC == dml_odm_mode_combine_2to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ } else if (ODMModeDSC == dml_odm_mode_split_1to2) {
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+ } else {
+ if (ODMModeNoDSC == dml_odm_mode_combine_4to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 16);
+ } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) {
+ MaxLinkBPP = dml_min(MaxLinkBPP, 32);
+ } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) {
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+ }
+
+ *RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1));
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP) {
+ return __DML_DPP_INVALID__;
+ } else if (MaxLinkBPP >= MaxDSCBPP) {
+ return MaxDSCBPP;
+ } else {
+ return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ }
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2) {
+ return NonDSCBPP2;
+ } else if (MaxLinkBPP >= NonDSCBPP1) {
+ return NonDSCBPP1;
+ } else if (MaxLinkBPP >= NonDSCBPP0) {
+ return NonDSCBPP0;
+ } else {
+ return __DML_DPP_INVALID__;
+ }
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
+ return __DML_DPP_INVALID__;
+ } else {
+ return DesiredBPP;
+ }
+ }
+} // TruncToValidBPP
+
+static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct display_mode_lib_scratch_st *scratch,
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p)
+{
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
+
+ s->TotalActiveWriteback = 0;
+ p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
+ p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
+ p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+ p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
+ dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
+ dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
+ dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+ dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
+ dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
+ dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
+ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
+ dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
+#endif
+
+ s->TotalActiveWriteback = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->WritebackEnable[k] == true) {
+ s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ }
+ if (p->USRRetrainingRequiredFinal)
+ p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
+ }
+
+ if (p->USRRetrainingRequiredFinal)
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (p->USRRetrainingRequiredFinal)
+ p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
+ dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
+ dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
+ dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal);
+ dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ s->TotalPixelBW = 0.0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
+ * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]);
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+
+ s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(p->HRatio[k], 1.0)), 1)) - (p->VTaps[k] - 1));
+ s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(p->HRatioChroma[k], 1.0)), 1)) - (p->VTapsChroma[k] - 1));
+
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
+ dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
+ dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]);
+ dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]);
+ dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]);
+#endif
+
+ s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]);
+ s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]);
+
+ s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
+ if (p->UnboundedRequestEnabled) {
+ s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW;
+ }
+
+ s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETY[k], p->SwathHeightY[k]));
+ s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
+
+ s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k];
+ }
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
+ s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETC[k], p->SwathHeightC[k]));
+ s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k];
+ s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k];
+ }
+ s->ActiveClockChangeLatencyHiding = dml_min(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
+ } else {
+ s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
+ }
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark;
+ s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark;
+ s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
+
+ if (p->WritebackEnable[k]) {
+ s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0);
+ if (p->WritebackPixelFormat[k] == dml_444_64) {
+ s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
+ }
+ s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
+
+ s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
+ s->ActiveFCLKChangeLatencyMargin[k] = dml_min(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
+ }
+ p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
+ p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
+ }
+
+ *p->USRRetrainingSupport = true;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) {
+ *p->USRRetrainingSupport = false;
+ }
+ }
+
+ s->FoundCriticalSurface = false;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface)
+ || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
+ s->FoundCriticalSurface = true;
+ *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
+ }
+ }
+
+ for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) {
+ for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) {
+ if (i == j ||
+ (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) ||
+ (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) ||
+ (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) ||
+ (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) ||
+ (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) {
+ s->SynchronizedSurfaces[i][j] = true;
+ } else {
+ s->SynchronizedSurfaces[i][j] = false;
+ }
+ }
+ }
+
+ s->FCLKChangeSupportNumber = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) {
+ if (!(p->PrefetchMode[k] <= 1)) {
+ s->FCLKChangeSupportNumber = 3;
+ } else if (s->FCLKChangeSupportNumber == 0) {
+ s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1);
+ s->LastSurfaceWithoutMargin = k;
+ } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2))
+ s->FCLKChangeSupportNumber = 3;
+ }
+ }
+
+ if (s->FCLKChangeSupportNumber == 0) {
+ *p->FCLKChangeSupport = dml_fclock_change_vactive;
+ } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) {
+ *p->FCLKChangeSupport = dml_fclock_change_vblank;
+ } else {
+ *p->FCLKChangeSupport = dml_fclock_change_unsupported;
+ }
+
+ s->DRAMClockChangeMethod = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
+ s->DRAMClockChangeMethod = 1;
+ else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport)
+ s->DRAMClockChangeMethod = 2;
+ }
+
+ s->DRAMClockChangeSupportNumber = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) ||
+ ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) ||
+ ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) {
+ if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support
+ s->DRAMClockChangeSupportNumber = 3;
+ } else if (s->DRAMClockChangeSupportNumber == 0) {
+ s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1;
+ s->LastSurfaceWithoutMargin = k;
+ } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) {
+ s->DRAMClockChangeSupportNumber = 3;
+ }
+ }
+ }
+
+ if (s->DRAMClockChangeMethod == 0) { // No MALL usage
+ if (s->DRAMClockChangeSupportNumber == 0) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive;
+ } else if (s->DRAMClockChangeSupportNumber == 1) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank;
+ } else if (s->DRAMClockChangeSupportNumber == 2) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr;
+ } else {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
+ }
+ } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame
+ if (s->DRAMClockChangeSupportNumber == 0) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame;
+ } else if (s->DRAMClockChangeSupportNumber == 1) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame;
+ } else if (s->DRAMClockChangeSupportNumber == 2) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame;
+ } else {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
+ }
+ } else { // Any pipe using MALL subviewport
+ if (s->DRAMClockChangeSupportNumber == 0) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp;
+ } else if (s->DRAMClockChangeSupportNumber == 1) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp;
+ } else if (s->DRAMClockChangeSupportNumber == 2) {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp;
+ } else {
+ *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
+ }
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ s->dst_y_pstate = (dml_uint_t)(dml_ceil((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), 1));
+ s->src_y_pstate_l = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatio[k], p->SwathHeightY[k]));
+ s->src_y_ahead_l = (dml_uint_t)(dml_floor(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
+ s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
+ dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
+ dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
+ dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
+ dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]);
+ dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
+#endif
+ p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->src_y_pstate_c = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatioChroma[k], p->SwathHeightC[k]));
+ s->src_y_ahead_c = (dml_uint_t)(dml_floor(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
+ s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k];
+ p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(s->sub_vp_lines_l, s->sub_vp_lines_c));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
+ dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
+ dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]);
+ dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
+#endif
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport);
+ dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport);
+ dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
+ dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
+#endif
+} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
+
+static void CalculateDCFCLKDeepSleep(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_uint_t SwathWidthY[],
+ dml_uint_t SwathWidthC[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t ReturnBusWidth,
+
+ // Output
+ dml_float_t *DCFClkDeepSleep)
+{
+ dml_float_t DisplayPipeLineDeliveryTimeLuma;
+ dml_float_t DisplayPipeLineDeliveryTimeChroma;
+ dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidth = 0.0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+
+ *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
+ dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
+ dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+ }
+ dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+} // CalculateDCFCLKDeepSleep
+
+static void CalculateUrgentBurstFactor(
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
+ dml_uint_t swath_width_luma_ub,
+ dml_uint_t swath_width_chroma_ub,
+ dml_uint_t SwathHeightY,
+ dml_uint_t SwathHeightC,
+ dml_float_t LineTime,
+ dml_float_t UrgentLatency,
+ dml_float_t CursorBufferSize,
+ dml_uint_t CursorWidth,
+ dml_uint_t CursorBPP,
+ dml_float_t VRatio,
+ dml_float_t VRatioC,
+ dml_float_t BytePerPixelInDETY,
+ dml_float_t BytePerPixelInDETC,
+ dml_uint_t DETBufferSizeY,
+ dml_uint_t DETBufferSizeC,
+ // Output
+ dml_float_t *UrgentBurstFactorCursor,
+ dml_float_t *UrgentBurstFactorLuma,
+ dml_float_t *UrgentBurstFactorChroma,
+ dml_bool_t *NotEnoughUrgentLatencyHiding)
+{
+ dml_float_t LinesInDETLuma;
+ dml_float_t LinesInDETChroma;
+ dml_uint_t LinesInCursorBuffer;
+ dml_float_t CursorBufferSizeInTime;
+ dml_float_t DETBufferSizeInTimeLuma;
+ dml_float_t DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
+ if (VRatio > 0) {
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 0;
+ } else {
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorCursor = 1;
+ }
+ }
+
+ LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ if (VRatio > 0) {
+ DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 0;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorLuma = 1;
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
+
+ if (VRatioC > 0) {
+ DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 0;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ } else {
+ *UrgentBurstFactorChroma = 1;
+ }
+ }
+} // CalculateUrgentBurstFactor
+
+static void CalculatePixelDeliveryTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t VRatioPrefetchY[],
+ dml_float_t VRatioPrefetchC[],
+ dml_uint_t swath_width_luma_ub[],
+ dml_uint_t swath_width_chroma_ub[],
+ dml_uint_t DPPPerSurface[],
+ dml_float_t HRatio[],
+ dml_float_t HRatioChroma[],
+ dml_float_t PixelClock[],
+ dml_float_t PSCL_THROUGHPUT[],
+ dml_float_t PSCL_THROUGHPUT_CHROMA[],
+ dml_float_t Dppclk[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t NumberOfCursors[],
+ dml_uint_t CursorWidth[],
+ dml_uint_t CursorBPP[],
+ dml_uint_t BlockWidth256BytesY[],
+ dml_uint_t BlockHeight256BytesY[],
+ dml_uint_t BlockWidth256BytesC[],
+ dml_uint_t BlockHeight256BytesC[],
+
+ // Output
+ dml_float_t DisplayPipeLineDeliveryTimeLuma[],
+ dml_float_t DisplayPipeLineDeliveryTimeChroma[],
+ dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
+ dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
+ dml_float_t CursorRequestDeliveryTime[],
+ dml_float_t CursorRequestDeliveryTimePrefetch[])
+{
+ dml_float_t req_per_swath_ub;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]);
+ dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
+ dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
+ dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]);
+ dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+
+ if (VRatio[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (VRatioChroma[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
+ } else {
+ req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
+#endif
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
+ } else {
+ req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
+#endif
+ DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_uint_t cursor_req_per_width;
+ cursor_req_per_width = (dml_uint_t)(dml_ceil((dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, 1.0));
+ if (NumberOfCursors[k] > 0) {
+ if (VRatio[k] <= 1) {
+ CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ if (VRatioPrefetchY[k] <= 1) {
+ CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
+ } else {
+ CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
+ }
+ } else {
+ CursorRequestDeliveryTime[k] = 0;
+ CursorRequestDeliveryTimePrefetch[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]);
+ dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
+ dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
+#endif
+ }
+} // CalculatePixelDeliveryTimes
+
+static void CalculateMetaAndPTETimes(
+ dml_bool_t use_one_row_for_frame[],
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t MetaChunkSize,
+ dml_uint_t MinMetaChunkSizeBytes,
+ dml_uint_t HTotal[],
+ dml_float_t VRatio[],
+ dml_float_t VRatioChroma[],
+ dml_float_t DestinationLinesToRequestRowInVBlank[],
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t BytePerPixelY[],
+ dml_uint_t BytePerPixelC[],
+ enum dml_rotation_angle SourceScan[],
+ dml_uint_t dpte_row_height[],
+ dml_uint_t dpte_row_height_chroma[],
+ dml_uint_t meta_row_width[],
+ dml_uint_t meta_row_width_chroma[],
+ dml_uint_t meta_row_height[],
+ dml_uint_t meta_row_height_chroma[],
+ dml_uint_t meta_req_width[],
+ dml_uint_t meta_req_width_chroma[],
+ dml_uint_t meta_req_height[],
+ dml_uint_t meta_req_height_chroma[],
+ dml_uint_t dpte_group_bytes[],
+ dml_uint_t PTERequestSizeY[],
+ dml_uint_t PTERequestSizeC[],
+ dml_uint_t PixelPTEReqWidthY[],
+ dml_uint_t PixelPTEReqHeightY[],
+ dml_uint_t PixelPTEReqWidthC[],
+ dml_uint_t PixelPTEReqHeightC[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+
+ // Output
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_L[],
+ dml_float_t DST_Y_PER_META_ROW_NOM_C[],
+ dml_float_t TimePerMetaChunkNominal[],
+ dml_float_t TimePerChromaMetaChunkNominal[],
+ dml_float_t TimePerMetaChunkVBlank[],
+ dml_float_t TimePerChromaMetaChunkVBlank[],
+ dml_float_t TimePerMetaChunkFlip[],
+ dml_float_t TimePerChromaMetaChunkFlip[],
+ dml_float_t time_per_pte_group_nom_luma[],
+ dml_float_t time_per_pte_group_vblank_luma[],
+ dml_float_t time_per_pte_group_flip_luma[],
+ dml_float_t time_per_pte_group_nom_chroma[],
+ dml_float_t time_per_pte_group_vblank_chroma[],
+ dml_float_t time_per_pte_group_flip_chroma[])
+{
+ dml_uint_t meta_chunk_width;
+ dml_uint_t min_meta_chunk_width;
+ dml_uint_t meta_chunk_per_row_int;
+ dml_uint_t meta_row_remainder;
+ dml_uint_t meta_chunk_threshold;
+ dml_uint_t meta_chunks_per_row_ub;
+ dml_uint_t meta_chunk_width_chroma;
+ dml_uint_t min_meta_chunk_width_chroma;
+ dml_uint_t meta_chunk_per_row_int_chroma;
+ dml_uint_t meta_row_remainder_chroma;
+ dml_uint_t meta_chunk_threshold_chroma;
+ dml_uint_t meta_chunks_per_row_ub_chroma;
+ dml_uint_t dpte_group_width_luma;
+ dml_uint_t dpte_groups_per_row_luma_ub;
+ dml_uint_t dpte_group_width_chroma;
+ dml_uint_t dpte_groups_per_row_chroma_ub;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
+ }
+ DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
+ if (BytePerPixelC[k] == 0) {
+ DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ } else {
+ DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (DCCEnable[k] == true) {
+ meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
+ min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
+ meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = meta_row_width[k] % meta_chunk_width;
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
+ } else {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
+ }
+ if (meta_row_remainder <= meta_chunk_threshold) {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ } else {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+ }
+ TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
+ if (BytePerPixelC[k] == 0) {
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma);
+ meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ } else {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+ }
+ TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ TimePerMetaChunkNominal[k] = 0;
+ TimePerMetaChunkVBlank[k] = 0;
+ TimePerMetaChunkFlip[k] = 0;
+ TimePerChromaMetaChunkNominal[k] = 0;
+ TimePerChromaMetaChunkVBlank[k] = 0;
+ TimePerChromaMetaChunkFlip[k] = 0;
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (GPUVMEnable == true) {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]);
+ } else {
+ dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]);
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, 1.0));
+ }
+
+ dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]);
+ dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]);
+ dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
+ dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
+
+ time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
+ if (BytePerPixelC[k] == 0) {
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]);
+ } else {
+ dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]);
+ }
+
+ if (use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, 1.0));
+ }
+ dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
+ dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
+
+ time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ time_per_pte_group_nom_luma[k] = 0;
+ time_per_pte_group_vblank_luma[k] = 0;
+ time_per_pte_group_flip_luma[k] = 0;
+ time_per_pte_group_nom_chroma[k] = 0;
+ time_per_pte_group_vblank_chroma[k] = 0;
+ time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]);
+ dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
+
+ dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
+ dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
+ dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]);
+ dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]);
+ dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+static void CalculateVMGroupAndRequestTimes(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t GPUVMEnable,
+ dml_uint_t GPUVMMaxPageTableLevels,
+ dml_uint_t HTotal[],
+ dml_uint_t BytePerPixelC[],
+ dml_float_t DestinationLinesToRequestVMInVBlank[],
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
+ dml_bool_t DCCEnable[],
+ dml_float_t PixelClock[],
+ dml_uint_t dpte_row_width_luma_ub[],
+ dml_uint_t dpte_row_width_chroma_ub[],
+ dml_uint_t vm_group_bytes[],
+ dml_uint_t dpde0_bytes_per_frame_ub_l[],
+ dml_uint_t dpde0_bytes_per_frame_ub_c[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_l[],
+ dml_uint_t meta_pte_bytes_per_frame_ub_c[],
+
+ // Output
+ dml_float_t TimePerVMGroupVBlank[],
+ dml_float_t TimePerVMGroupFlip[],
+ dml_float_t TimePerVMRequestVBlank[],
+ dml_float_t TimePerVMRequestFlip[])
+{
+ dml_uint_t num_group_per_lower_vm_stage;
+ dml_uint_t num_req_per_lower_vm_stage;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+#endif
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]);
+ dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
+ dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0) +
+ dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], 1.0));
+ } else {
+ num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0));
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
+ } else {
+ num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
+ } else {
+ num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
+ dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
+ }
+ }
+ }
+
+ if (DCCEnable[k] == false) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (GPUVMMaxPageTableLevels == 1) {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ } else {
+ if (BytePerPixelC[k] > 0) {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
+ } else {
+ num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
+ }
+ }
+ }
+
+ TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
+ TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+ TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
+
+ if (GPUVMMaxPageTableLevels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+} // CalculateVMGroupAndRequestTimes
+
+static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch,
+ struct CalculateStutterEfficiency_params_st *p)
+{
+ dml_float_t DETBufferingTimeY = 0;
+ dml_float_t SwathWidthYCriticalSurface = 0;
+ dml_float_t SwathHeightYCriticalSurface = 0;
+ dml_float_t VActiveTimeCriticalSurface = 0;
+ dml_float_t FrameTimeCriticalSurface = 0;
+ dml_uint_t BytePerPixelYCriticalSurface = 0;
+ dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0;
+ dml_uint_t DETBufferSizeYCriticalSurface = 0;
+ dml_float_t MinTTUVBlankCriticalSurface = 0;
+ dml_uint_t BlockWidth256BytesYCriticalSurface = 0;
+ dml_bool_t SinglePlaneCriticalSurface = 0;
+ dml_bool_t SinglePipeCriticalSurface = 0;
+ dml_float_t TotalCompressedReadBandwidth = 0;
+ dml_float_t TotalRowReadBandwidth = 0;
+ dml_float_t AverageDCCCompressionRate = 0;
+ dml_float_t EffectiveCompressedBufferSize = 0;
+ dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0;
+ dml_float_t StutterBurstTime = 0;
+ dml_uint_t TotalActiveWriteback = 0;
+ dml_float_t LinesInDETY = 0;
+ dml_float_t LinesInDETYRoundedDownToSwath = 0;
+ dml_float_t MaximumEffectiveCompressionLuma = 0;
+ dml_float_t MaximumEffectiveCompressionChroma = 0;
+ dml_float_t TotalZeroSizeRequestReadBandwidth = 0;
+ dml_float_t TotalZeroSizeCompressedReadBandwidth = 0;
+ dml_float_t AverageDCCZeroSizeFraction = 0;
+ dml_float_t AverageZeroSizeCompressionRate = 0;
+
+ dml_bool_t FoundCriticalSurface = false;
+
+ dml_uint_t TotalNumberOfActiveOTG = 0;
+ dml_float_t SinglePixelClock = 0;
+ dml_uint_t SingleHTotal = 0;
+ dml_uint_t SingleVTotal = 0;
+ dml_bool_t SameTiming = true;
+
+ dml_float_t LastStutterPeriod = 0.0;
+ dml_float_t LastZ8StutterPeriod = 0.0;
+
+ dml_uint_t SwathSizeCriticalSurface;
+ dml_uint_t LastChunkOfSwathSize;
+ dml_uint_t MissingPartOfLastSwathOfDETSize;
+
+ TotalZeroSizeRequestReadBandwidth = 0;
+ TotalZeroSizeCompressedReadBandwidth = 0;
+ TotalRowReadBandwidth = 0;
+ TotalCompressedReadBandwidth = 0;
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ if (p->DCCEnable[k] == true) {
+ if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionLuma = 2;
+ } else {
+ MaximumEffectiveCompressionLuma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(p->NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]);
+ dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
+
+ if (p->ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
+ MaximumEffectiveCompressionChroma = 2;
+ } else {
+ MaximumEffectiveCompressionChroma = 4;
+ }
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(p->NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]);
+ dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma);
+#endif
+ TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k];
+ TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
+ }
+ TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
+ }
+ }
+
+ AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth;
+ AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
+ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
+ dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
+ dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
+ dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
+ dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B);
+ dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte);
+#endif
+ if (AverageDCCZeroSizeFraction == 1) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate;
+ } else if (AverageDCCZeroSizeFraction > 0) {
+ AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
+ EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) +
+ dml_min(((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate,
+ ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
+ dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
+ (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) +
+ ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
+ dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
+ dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+#endif
+
+ *p->StutterPeriod = 0;
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, p->SwathHeightY[k]);
+ DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY);
+ dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
+ dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]);
+ dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]);
+ dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]);
+ dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
+ dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]);
+#endif
+
+ if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) {
+ dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *p->StutterPeriod = DETBufferingTimeY;
+ FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VTotal[k]/2.0, 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+ VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VActive[k]/2.0, 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
+ BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
+ SwathWidthYCriticalSurface = p->SwathWidthY[k];
+ SwathHeightYCriticalSurface = p->SwathHeightY[k];
+ BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
+ LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
+ DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
+ MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
+ SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
+ SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
+ dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
+ dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface);
+ dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface);
+ dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface);
+ dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface);
+ dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface);
+ dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface);
+ dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface);
+ dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface);
+ dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*p->StutterPeriod * p->TotalDataReadBandwidth, EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte);
+ dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth);
+ dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
+ dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
+ dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
+ dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
+ dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
+#endif
+
+ StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW);
+ dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth));
+ dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
+ dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+#endif
+ StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
+
+ dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
+
+ TotalActiveWriteback = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->WritebackEnable[k]) {
+ TotalActiveWriteback = TotalActiveWriteback + 1;
+ }
+ }
+
+ if (TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
+ dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
+ dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+#endif
+ *p->StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ } else {
+ *p->StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->NumberOfStutterBurstsPerFrame = 0;
+ *p->Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ if (p->BlendingAndTiming[k] == k) {
+ if (TotalNumberOfActiveOTG == 0) {
+ SinglePixelClock = p->PixelClock[k];
+ SingleHTotal = p->HTotal[k];
+ SingleVTotal = p->VTotal[k];
+ } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ }
+ }
+ }
+
+ if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
+ LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
+
+ if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming &&
+ LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) {
+ *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *p->StutterEfficiency = 0;
+ }
+
+ if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
+ if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) {
+ *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
+ } else {
+ *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
+ }
+ } else {
+ *p->Z8StutterEfficiency = 0.;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
+ dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
+ dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
+ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+ dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
+ dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
+ dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface));
+ LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024);
+ MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface);
+
+ *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) &&
+ (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface);
+ dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface);
+ dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
+ dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize);
+ dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize);
+ dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+} // CalculateStutterEfficiency
+
+/// \CalculateSwathAndDETConfiguration
+/// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.)
+static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch,
+ struct CalculateSwathAndDETConfiguration_params_st *p)
+{
+ dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__];
+ dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__];
+ dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__];
+
+ dml_uint_t TotalActiveDPP = 0;
+ dml_bool_t NoChromaOrLinearSurfaces = true;
+ dml_uint_t SurfaceDoingUnboundedRequest = 0;
+
+ dml_uint_t DETBufferSizeInKByteForSwathCalculation;
+
+ const long TTUFIFODEPTH = 8;
+ const long MAXIMUMCOMPRESSION = 4;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
+ }
+#endif
+ CalculateSwathWidth(p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ p->SourcePixelFormat,
+ p->SourceScan,
+ p->ViewportStationary,
+ p->ViewportWidth,
+ p->ViewportHeight,
+ p->ViewportXStart,
+ p->ViewportYStart,
+ p->ViewportXStartC,
+ p->ViewportYStartC,
+ p->SurfaceWidthY,
+ p->SurfaceWidthC,
+ p->SurfaceHeightY,
+ p->SurfaceHeightC,
+ p->ODMMode,
+ p->BytePerPixY,
+ p->BytePerPixC,
+ p->Read256BytesBlockHeightY,
+ p->Read256BytesBlockHeightC,
+ p->Read256BytesBlockWidthY,
+ p->Read256BytesBlockWidthC,
+ p->BlendingAndTiming,
+ p->HActive,
+ p->HRatio,
+ p->DPPPerSurface,
+
+ // Output
+ SwathWidthSingleDPP,
+ SwathWidthSingleDPPChroma,
+ p->SwathWidth,
+ p->SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ p->swath_width_luma_ub,
+ p->swath_width_chroma_ub);
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
+ RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+ if (p->SourcePixelFormat[k] == dml_420_10) {
+ RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesY[k], 256));
+ RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesC[k], 256));
+ }
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
+ if (p->DPPPerSurface[k] > 0)
+ SurfaceDoingUnboundedRequest = k;
+ if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 ||
+ p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha
+ || p->SurfaceTiling[k] == dml_sw_linear) {
+ NoChromaOrLinearSurfaces = false;
+ }
+ }
+
+ *p->UnboundedRequestEnabled = UnboundedRequest(p->UseUnboundedRequestingFinal, TotalActiveDPP,
+ NoChromaOrLinearSurfaces, p->Output[0]);
+
+ CalculateDETBufferSize(p->DETSizeOverride,
+ p->UseMALLForPStateChange,
+ p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ *p->UnboundedRequestEnabled,
+ p->nomDETInKByte,
+ p->MaxTotalDETInKByte,
+ p->ConfigReturnBufferSizeInKByte,
+ p->MinCompressedBufferSizeInKByte,
+ p->ConfigReturnBufferSegmentSizeInkByte,
+ p->CompressedBufferSegmentSizeInkByteFinal,
+ p->SourcePixelFormat,
+ p->ReadBandwidthLuma,
+ p->ReadBandwidthChroma,
+ RoundedUpMaxSwathSizeBytesY,
+ RoundedUpMaxSwathSizeBytesC,
+ p->DPPPerSurface,
+
+ // Output
+ p->DETBufferSizeInKByte, // per hubp pipe
+ p->CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal);
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
+#endif
+
+ *p->ViewportSizeSupport = true;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+
+ DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+#endif
+
+ if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
+ } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ } else {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
+ }
+
+ if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
+ p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
+ *p->ViewportSizeSupport = false;
+ p->ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ p->ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (p->SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
+ p->DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
+ dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+
+ *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64;
+ if (*p->UnboundedRequestEnabled) {
+ *p->compbuf_reserved_space_64b = dml_max(*p->compbuf_reserved_space_64b,
+ (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64)
+ - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64));
+ }
+ *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256;
+} // CalculateSwathAndDETConfiguration
+
+static void CalculateSwathWidth(
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ enum dml_source_format_class SourcePixelFormat[],
+ enum dml_rotation_angle SourceScan[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportWidth[],
+ dml_uint_t ViewportHeight[],
+ dml_uint_t ViewportXStart[],
+ dml_uint_t ViewportYStart[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ enum dml_odm_mode ODMMode[],
+ dml_uint_t BytePerPixY[],
+ dml_uint_t BytePerPixC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t BlendingAndTiming[],
+ dml_uint_t HActive[],
+ dml_float_t HRatio[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_uint_t SwathWidthSingleDPPY[],
+ dml_uint_t SwathWidthSingleDPPC[],
+ dml_uint_t SwathWidthY[], // per-pipe
+ dml_uint_t SwathWidthC[], // per-pipe
+ dml_uint_t MaximumSwathHeightY[],
+ dml_uint_t MaximumSwathHeightC[],
+ dml_uint_t swath_width_luma_ub[], // per-pipe
+ dml_uint_t swath_width_chroma_ub[]) // per-pipe
+{
+ enum dml_odm_mode MainSurfaceODMMode;
+ dml_uint_t surface_width_ub_l;
+ dml_uint_t surface_height_ub_l;
+ dml_uint_t surface_width_ub_c = 0;
+ dml_uint_t surface_height_ub_c = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+#endif
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ SwathWidthSingleDPPY[k] = ViewportWidth[k];
+ } else {
+ SwathWidthSingleDPPY[k] = ViewportHeight[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]);
+ dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]);
+ dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+#endif
+
+ MainSurfaceODMMode = ODMMode[k];
+ for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
+ if (BlendingAndTiming[k] == j) {
+ MainSurfaceODMMode = ODMMode[j];
+ }
+ }
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) {
+ SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k], true)));
+ } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) {
+ SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k], true)));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]);
+ dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]);
+ dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
+ dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
+ dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
+#endif
+
+ if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ SwathWidthC[k] = SwathWidthSingleDPPC[k];
+ }
+
+ surface_width_ub_l = (dml_uint_t)dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
+ surface_height_ub_l = (dml_uint_t)dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
+
+ if (!dml_is_vertical_rotation(SourceScan[k])) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_floor(ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStart[k], Read256BytesBlockWidthY[k])));
+ } else {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]));
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_width_ub_c = (dml_uint_t)dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], Read256BytesBlockWidthC[k])));
+ } else {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]));
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])));
+ } else {
+ swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
+ }
+ if (BytePerPixC[k] > 0) {
+ surface_height_ub_c = (dml_uint_t)dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
+ if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], Read256BytesBlockHeightC[k])));
+ } else {
+ swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
+ }
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
+ dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
+ dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
+ dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
+ dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
+ dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
+ dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
+ dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
+ dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]);
+ dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
+ dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
+#endif
+
+ }
+} // CalculateSwathWidth
+
+static noinline_for_stack dml_float_t CalculateExtraLatency(
+ dml_uint_t RoundTripPingLatencyCycles,
+ dml_uint_t ReorderingBytes,
+ dml_float_t DCFCLK,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_float_t ReturnBW,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels)
+{
+ dml_float_t ExtraLatencyBytes;
+ dml_float_t ExtraLatency;
+
+ ExtraLatencyBytes = CalculateExtraLatencyBytes(
+ ReorderingBytes,
+ TotalNumberOfActiveDPP,
+ PixelChunkSizeInKByte,
+ TotalNumberOfDCCActiveDPP,
+ MetaChunkSize,
+ GPUVMEnable,
+ HostVMEnable,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ HostVMInefficiencyFactor,
+ HostVMMinPageSize,
+ HostVMMaxNonCachedPageTableLevels);
+
+ ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
+ dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
+ dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
+#endif
+
+ return ExtraLatency;
+} // CalculateExtraLatency
+
+static dml_uint_t CalculateHostVMDynamicLevels(
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels)
+{
+ dml_uint_t HostVMDynamicLevels = 0;
+
+ if (GPUVMEnable && HostVMEnable) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+ return HostVMDynamicLevels;
+}
+
+static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t PixelChunkSizeInKByte,
+ dml_uint_t TotalNumberOfDCCActiveDPP,
+ dml_uint_t MetaChunkSize,
+ dml_bool_t GPUVMEnable,
+ dml_bool_t HostVMEnable,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t NumberOfDPP[],
+ dml_uint_t dpte_group_bytes[],
+ dml_float_t HostVMInefficiencyFactor,
+ dml_uint_t HostVMMinPageSize,
+ dml_uint_t HostVMMaxNonCachedPageTableLevels)
+{
+ dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
+ dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
+
+ if (GPUVMEnable == true) {
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ }
+ }
+ return (dml_uint_t)(ret);
+}
+
+static dml_float_t CalculateUrgentLatency(
+ dml_float_t UrgentLatencyPixelDataOnly,
+ dml_float_t UrgentLatencyPixelMixedWithVMData,
+ dml_float_t UrgentLatencyVMDataOnly,
+ dml_bool_t DoUrgentLatencyAdjustment,
+ dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
+ dml_float_t UrgentLatencyAdjustmentFabricClockReference,
+ dml_float_t FabricClock)
+{
+ dml_float_t ret;
+
+ ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ return ret;
+}
+
+static dml_float_t RequiredDTBCLK(
+ dml_bool_t DSCEnable,
+ dml_float_t PixelClock,
+ enum dml_output_format_class OutputFormat,
+ dml_float_t OutputBpp,
+ dml_uint_t DSCSlices,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_uint_t AudioRate,
+ dml_uint_t AudioLayout)
+{
+ if (DSCEnable != true) {
+ return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+ } else {
+ dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2);
+ dml_float_t HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ dml_float_t HCBlank = 64 + 32 * dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+ return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+ }
+}
+
+static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p)
+{
+ struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals;
+
+ s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
+ for (dml_uint_t j = 0; j < 2; ++j) {
+
+
+ s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]);
+ }
+
+ for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
+ s->NoOfDPPState[k] = p->NoOfDPP[j][k];
+ }
+
+ s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j];
+
+ s->DCFCLKRequiredForAverageBandwidth = dml_max(p->ProjectedDCFCLKDeepSleep[j], s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth);
+
+ s->ExtraLatencyBytes = CalculateExtraLatencyBytes(p->ReorderingBytes, p->TotalNumberOfActiveDPP[j], p->PixelChunkSizeInKByte, p->TotalNumberOfDCCActiveDPP[j],
+ p->MetaChunkSize, p->GPUVMEnable, p->HostVMEnable, p->NumberOfActiveSurfaces, s->NoOfDPPState, p->dpte_group_bytes,
+ 1, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
+ s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth;
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ dml_float_t DCFCLKCyclesRequiredInPrefetch;
+ dml_float_t PrefetchTime;
+
+ s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth;
+ DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k];
+ s->PrefetchPixelLinesTime[k] = dml_max(p->PrefetchLinesY[j][k], p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k];
+ s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+
+ s->MinimumTWait = CalculateTWait(p->MaxPrefetchMode,
+ p->UseMALLForPStateChange[k],
+ p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ p->DRRDisplay[k],
+ p->DRAMClockChangeLatencyFinal,
+ p->FCLKChangeLatency,
+ p->UrgLatency,
+ p->SREnterPlusExitTime);
+
+ PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k];
+
+ if (PrefetchTime > 0) {
+ dml_float_t ExpectedVRatioPrefetch;
+ ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4);
+ if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth;
+ }
+ } else {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
+ }
+ if (p->DynamicMetadataEnable[k] == true) {
+ dml_float_t TSetupPipe;
+ dml_float_t TdmbfPipe;
+ dml_float_t TdmsksPipe;
+ dml_float_t TdmecPipe;
+ dml_float_t AllowedTimeForUrgentExtraLatency;
+
+ CalculateVUpdateAndDynamicMetadataParameters(
+ p->MaxInterDCNTileRepeaters,
+ p->RequiredDPPCLKPerSurface[j][k],
+ p->RequiredDISPCLK[j],
+ p->ProjectedDCFCLKDeepSleep[j],
+ p->PixelClock[k],
+ p->HTotal[k],
+ p->VTotal[k] - p->VActive[k],
+ p->DynamicMetadataTransmittedBytes[k],
+ p->DynamicMetadataLinesBeforeActiveRequired[k],
+ p->Interlace[k],
+ p->ProgressiveToInterlaceUnitInOPP,
+
+ // Output
+ &TSetupPipe,
+ &TdmbfPipe,
+ &TdmecPipe,
+ &TdmsksPipe,
+ &s->dummy1,
+ &s->dummy2,
+ &s->dummy3);
+
+ AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k];
+ if (AllowedTimeForUrgentExtraLatency > 0) {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(s->DCFCLKRequiredForPeakBandwidthPerSurface[k], s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
+ } else {
+ s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
+ }
+ }
+ }
+ s->DCFCLKRequiredForPeakBandwidth = 0;
+ for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
+ s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k];
+ }
+ s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0);
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ dml_float_t MaximumTvmPlus2Tr0PlusTsw;
+ MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k];
+ if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) {
+ s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState;
+ } else {
+ s->DCFCLKRequiredForPeakBandwidth = dml_max3(s->DCFCLKRequiredForPeakBandwidth,
+ 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4),
+ (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0));
+ }
+ }
+ p->DCFCLKState[j] = dml_min(p->DCFCLKPerState, 1.05 * dml_max(s->DCFCLKRequiredForAverageBandwidth, s->DCFCLKRequiredForPeakBandwidth));
+ }
+}
+
+
+static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_bool_t NoChromaOrLinear,
+ enum dml_output_encoder_class Output)
+{
+ dml_bool_t ret_val = false;
+
+ ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable
+ && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
+ if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) {
+ ret_val = false;
+ }
+ return (ret_val);
+}
+
+static void CalculateSurfaceSizeInMall(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCN,
+ enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
+ dml_bool_t DCCEnable[],
+ dml_bool_t ViewportStationary[],
+ dml_uint_t ViewportXStartY[],
+ dml_uint_t ViewportYStartY[],
+ dml_uint_t ViewportXStartC[],
+ dml_uint_t ViewportYStartC[],
+ dml_uint_t ViewportWidthY[],
+ dml_uint_t ViewportHeightY[],
+ dml_uint_t BytesPerPixelY[],
+ dml_uint_t ViewportWidthC[],
+ dml_uint_t ViewportHeightC[],
+ dml_uint_t BytesPerPixelC[],
+ dml_uint_t SurfaceWidthY[],
+ dml_uint_t SurfaceWidthC[],
+ dml_uint_t SurfaceHeightY[],
+ dml_uint_t SurfaceHeightC[],
+ dml_uint_t Read256BytesBlockWidthY[],
+ dml_uint_t Read256BytesBlockWidthC[],
+ dml_uint_t Read256BytesBlockHeightY[],
+ dml_uint_t Read256BytesBlockHeightC[],
+ dml_uint_t ReadBlockWidthY[],
+ dml_uint_t ReadBlockWidthC[],
+ dml_uint_t ReadBlockHeightY[],
+ dml_uint_t ReadBlockHeightC[],
+
+ // Output
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t *ExceededMALLSize)
+{
+ dml_uint_t TotalSurfaceSizeInMALL = 0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ViewportStationary[k]) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], ReadBlockWidthY[k])) *
+ dml_min(dml_ceil(SurfaceHeightY[k], ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) *
+ BytesPerPixelY[k]);
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * BytesPerPixelC[k]);
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) *
+ dml_min(dml_ceil(SurfaceHeightY[k], 8 * Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256);
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_min(dml_ceil(SurfaceWidthC[k], 8 * Read256BytesBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], 8 * Read256BytesBlockWidthC[k])) *
+ dml_min(dml_ceil(SurfaceHeightC[k], 8 * Read256BytesBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256);
+ }
+ }
+ } else {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
+ }
+ if (DCCEnable[k] == true) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) *
+ dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256);
+
+ if (Read256BytesBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
+ dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) *
+ dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256);
+ }
+ }
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable)
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+ }
+ *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
+} // CalculateSurfaceSizeInMall
+
+static void CalculateDETBufferSize(
+ dml_uint_t DETSizeOverride[],
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t ForceSingleDPP,
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_bool_t UnboundedRequestEnabled,
+ dml_uint_t nomDETInKByte,
+ dml_uint_t MaxTotalDETInKByte,
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t MinCompressedBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
+ dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
+ enum dml_source_format_class SourcePixelFormat[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_uint_t RoundedUpMaxSwathSizeBytesY[],
+ dml_uint_t RoundedUpMaxSwathSizeBytesC[],
+ dml_uint_t DPPPerSurface[],
+ // Output
+ dml_uint_t DETBufferSizeInKByte[],
+ dml_uint_t *CompressedBufferSizeInkByte)
+{
+ dml_uint_t DETBufferSizePoolInKByte;
+ dml_uint_t NextDETBufferPieceInKByte;
+ dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__];
+ dml_bool_t NextPotentialSurfaceToAssignDETPieceFound;
+ dml_uint_t NextSurfaceToAssignDETPiece;
+ dml_float_t TotalBandwidth;
+ dml_float_t BandwidthOfSurfacesNotAssignedDETPiece;
+ dml_uint_t max_minDET;
+ dml_uint_t minDET;
+ dml_uint_t minDET_pipe;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
+ dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
+ dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal);
+#endif
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (DETSizeOverride[0] > 0) {
+ DETBufferSizeInKByte[0] = DETSizeOverride[0];
+ } else {
+ DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(128.0, dml_ceil(2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = 0;
+ if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
+ max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
+ } else {
+ max_minDET = nomDETInKByte;
+ }
+ minDET = 128;
+ minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (minDET <= max_minDET && minDET_pipe == 0) {
+ if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
+ minDET_pipe = minDET;
+ minDET = minDET + ConfigReturnBufferSegmentSizeInkByte;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET);
+ dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET);
+ dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
+ dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
+#endif
+
+ if (minDET_pipe == 0) {
+ minDET_pipe = (dml_uint_t)(dml_max(128, dml_ceil(((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe);
+#endif
+ }
+
+ if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (DETSizeOverride[k] > 0) {
+ DETBufferSizeInKByte[k] = DETSizeOverride[k];
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = minDET_pipe;
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
+ dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]);
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte);
+#endif
+ }
+
+ TotalBandwidth = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
+ TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ }
+ dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+#endif
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
+ BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece);
+#endif
+ }
+
+ for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ NextSurfaceToAssignDETPiece = 0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece);
+#endif
+ if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
+ NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+#endif
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue
+ //
+ //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
+ // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
+ //
+ //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1);
+ //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2);
+
+ NextDETBufferPieceInKByte = (dml_uint_t)(dml_min(
+ dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), true)
+ * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
+ dml_floor((dml_float_t) DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte);
+ dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece);
+ dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
+ dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte);
+ dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
+#endif
+
+ DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
+ BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+#endif
+} // CalculateDETBufferSize
+
+
+/// @brief Calculate the bound for return buffer sizing
+static void CalculateMaxDETAndMinCompressedBufferSize(
+ dml_uint_t ConfigReturnBufferSizeInKByte,
+ dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
+ dml_uint_t ROBBufferSizeInKByte,
+ dml_uint_t MaxNumDPP,
+ dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA
+
+ // Output
+ dml_uint_t *MaxTotalDETInKByte,
+ dml_uint_t *nomDETInKByte,
+ dml_uint_t *MinCompressedBufferSizeInKByte)
+{
+ *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
+ *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
+ dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
+ dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
+ dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
+ dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
+#endif
+
+ if (nomDETInKByteOverrideEnable) {
+ *nomDETInKByte = nomDETInKByteOverrideValue;
+ dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
+ }
+} // CalculateMaxDETAndMinCompressedBufferSize
+
+/// @brief Calculate all the RQ request attributes, like row height and # swath
+static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch,
+ struct CalculateVMRowAndSwath_params_st *p)
+{
+ struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals;
+
+ s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->GPUVMEnable, p->HostVMEnable, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->HostVMEnable == true) {
+ p->vm_group_bytes[k] = 512;
+ p->dpte_group_bytes[k] = 512;
+ } else if (p->GPUVMEnable == true) {
+ p->vm_group_bytes[k] = 2048;
+ if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
+ p->dpte_group_bytes[k] = 512;
+ } else {
+ p->dpte_group_bytes[k] = 2048;
+ }
+ } else {
+ p->vm_group_bytes[k] = 0;
+ p->dpte_group_bytes[k] = 0;
+ }
+
+ if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 ||
+ p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) {
+ if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
+ s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
+ s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
+ s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
+ }
+
+ s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
+ p->myPipe[k].ViewportStationary,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].DPPPerSurface,
+ p->myPipe[k].BlockHeight256BytesC,
+ p->myPipe[k].BlockWidth256BytesC,
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].SurfaceTiling,
+ p->myPipe[k].BytePerPixelC,
+ p->myPipe[k].SourceScan,
+ p->SwathWidthC[k],
+ p->myPipe[k].ViewportHeightChroma,
+ p->myPipe[k].ViewportXStartC,
+ p->myPipe[k].ViewportYStartC,
+ p->GPUVMEnable,
+ p->GPUVMMaxPageTableLevels,
+ p->GPUVMMinPageSizeKBytes[k],
+ s->PTEBufferSizeInRequestsForChroma[k],
+ p->myPipe[k].PitchC,
+ p->myPipe[k].DCCMetaPitchC,
+ p->myPipe[k].BlockWidthC,
+ p->myPipe[k].BlockHeightC,
+
+ // Output
+ &s->MetaRowByteC[k],
+ &s->PixelPTEBytesPerRowC[k],
+ &s->PixelPTEBytesPerRowStorageC[k],
+ &p->dpte_row_width_chroma_ub[k],
+ &p->dpte_row_height_chroma[k],
+ &p->dpte_row_height_linear_chroma[k],
+ &s->PixelPTEBytesPerRowC_one_row_per_frame[k],
+ &s->dpte_row_width_chroma_ub_one_row_per_frame[k],
+ &s->dpte_row_height_chroma_one_row_per_frame[k],
+ &p->meta_req_width_chroma[k],
+ &p->meta_req_height_chroma[k],
+ &p->meta_row_width_chroma[k],
+ &p->meta_row_height_chroma[k],
+ &p->PixelPTEReqWidthC[k],
+ &p->PixelPTEReqHeightC[k],
+ &p->PTERequestSizeC[k],
+ &p->dpde0_bytes_per_frame_ub_c[k],
+ &p->meta_pte_bytes_per_frame_ub_c[k]);
+
+ p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines (
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].VTapsChroma,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightC,
+ p->myPipe[k].SourceScan,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthC[k],
+ p->myPipe[k].ViewportHeightChroma,
+ p->myPipe[k].ViewportXStartC,
+ p->myPipe[k].ViewportYStartC,
+
+ // Output
+ &p->VInitPreFillC[k],
+ &p->MaxNumSwathC[k]);
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
+ s->PTEBufferSizeInRequestsForChroma[k] = 0;
+ s->PixelPTEBytesPerRowC[k] = 0;
+ s->PixelPTEBytesPerRowStorageC[k] = 0;
+ s->PDEAndMetaPTEBytesFrameC = 0;
+ s->MetaRowByteC[k] = 0;
+ p->MaxNumSwathC[k] = 0;
+ p->PrefetchSourceLinesC[k] = 0;
+ s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
+ p->myPipe[k].ViewportStationary,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].DPPPerSurface,
+ p->myPipe[k].BlockHeight256BytesY,
+ p->myPipe[k].BlockWidth256BytesY,
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].SurfaceTiling,
+ p->myPipe[k].BytePerPixelY,
+ p->myPipe[k].SourceScan,
+ p->SwathWidthY[k],
+ p->myPipe[k].ViewportHeight,
+ p->myPipe[k].ViewportXStart,
+ p->myPipe[k].ViewportYStart,
+ p->GPUVMEnable,
+ p->GPUVMMaxPageTableLevels,
+ p->GPUVMMinPageSizeKBytes[k],
+ s->PTEBufferSizeInRequestsForLuma[k],
+ p->myPipe[k].PitchY,
+ p->myPipe[k].DCCMetaPitchY,
+ p->myPipe[k].BlockWidthY,
+ p->myPipe[k].BlockHeightY,
+
+ // Output
+ &s->MetaRowByteY[k],
+ &s->PixelPTEBytesPerRowY[k],
+ &s->PixelPTEBytesPerRowStorageY[k],
+ &p->dpte_row_width_luma_ub[k],
+ &p->dpte_row_height_luma[k],
+ &p->dpte_row_height_linear_luma[k],
+ &s->PixelPTEBytesPerRowY_one_row_per_frame[k],
+ &s->dpte_row_width_luma_ub_one_row_per_frame[k],
+ &s->dpte_row_height_luma_one_row_per_frame[k],
+ &p->meta_req_width[k],
+ &p->meta_req_height[k],
+ &p->meta_row_width[k],
+ &p->meta_row_height[k],
+ &p->PixelPTEReqWidthY[k],
+ &p->PixelPTEReqHeightY[k],
+ &p->PTERequestSizeY[k],
+ &p->dpde0_bytes_per_frame_ub_l[k],
+ &p->meta_pte_bytes_per_frame_ub_l[k]);
+
+ p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VTaps,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightY,
+ p->myPipe[k].SourceScan,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthY[k],
+ p->myPipe[k].ViewportHeight,
+ p->myPipe[k].ViewportXStart,
+ p->myPipe[k].ViewportYStart,
+
+ // Output
+ &p->VInitPreFillY[k],
+ &p->MaxNumSwathY[k]);
+
+ p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels);
+ p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k];
+
+ if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
+ p->PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ p->PTEBufferSizeNotExceeded[k] = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+#endif
+ }
+ s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]);
+ dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY);
+ dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC);
+ dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
+ dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
+#endif
+ }
+
+ CalculateMALLUseForStaticScreen(
+ p->NumberOfActiveSurfaces,
+ p->MALLAllocatedForDCN,
+ p->UseMALLForStaticScreen, // mode
+ p->SurfaceSizeInMALL,
+ s->one_row_per_frame_fits_in_buffer,
+ // Output
+ p->UsesMALLForStaticScreen); // boolen
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->PTEBufferModeOverrideEn[k] == 1) {
+ p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k];
+ }
+ p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
+ (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64);
+ p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(p->GPUVMMinPageSizeKBytes[k] * 1024) - 12);
+ }
+
+ for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
+ dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]);
+#endif
+ p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
+ (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan));
+
+ p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame);
+
+ if (p->use_one_row_for_frame[k]) {
+ p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
+ p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
+ p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
+ p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
+ p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) {
+ p->DCCMetaBufferSizeNotExceeded[k] = true;
+ } else {
+ p->DCCMetaBufferSizeNotExceeded[k] = false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]);
+ dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes);
+ dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
+#endif
+ }
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
+ p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
+ if (p->use_one_row_for_frame[k])
+ p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
+
+ CalculateRowBandwidth(
+ p->GPUVMEnable,
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
+ s->MetaRowByteY[k],
+ s->MetaRowByteC[k],
+ p->meta_row_height[k],
+ p->meta_row_height_chroma[k],
+ s->PixelPTEBytesPerRowY[k],
+ s->PixelPTEBytesPerRowC[k],
+ p->dpte_row_height_luma[k],
+ p->dpte_row_height_chroma[k],
+
+ // Output
+ &p->meta_row_bw[k],
+ &p->dpte_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+ dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
+ dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]);
+ dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
+ dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
+ dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+ dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
+ dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+}
+
+static void CalculateOutputLink(
+ dml_float_t PHYCLKPerState,
+ dml_float_t PHYCLKD18PerState,
+ dml_float_t PHYCLKD32PerState,
+ dml_float_t Downspreading,
+ dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ dml_uint_t HTotal,
+ dml_uint_t HActive,
+ dml_float_t PixelClockBackEnd,
+ dml_float_t ForcedOutputLinkBPP,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_uint_t NumberOfDSCSlices,
+ dml_float_t AudioSampleRate,
+ dml_uint_t AudioSampleLayout,
+ enum dml_odm_mode ODMModeNoDSC,
+ enum dml_odm_mode ODMModeDSC,
+ enum dml_dsc_enable DSCEnable,
+ dml_uint_t OutputLinkDPLanes,
+ enum dml_output_link_dp_rate OutputLinkDPRate,
+
+ // Output
+ dml_bool_t *RequiresDSC,
+ dml_bool_t *RequiresFEC,
+ dml_float_t *OutBpp,
+ enum dml_output_type_and_rate__type *OutputType,
+ enum dml_output_type_and_rate__rate *OutputRate,
+ dml_uint_t *RequiredSlots)
+{
+ dml_bool_t LinkDSCEnable;
+ dml_uint_t dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+
+ *OutputType = dml_output_type_unknown;
+ *OutputRate = dml_output_rate_unknown;
+
+ if (IsMainSurfaceUsingTheIndicatedTiming) {
+ if (Output == dml_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dml_output_type_hdmi;
+
+ } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) {
+ if (DSCEnable == dml_dsc_enable) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp || Output == dml_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dml_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ }
+ if (Output == dml_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32.0) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32.0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dml_output_type_dp2p0;
+ *OutputRate = dml_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32.0) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dml_output_type_dp2p0;
+ *OutputRate = dml_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dml_output_type_dp2p0;
+ *OutputRate = dml_output_rate_dp_rate_uhbr20;
+ }
+ } else { // output is dp or edp
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
+ *OutputRate = dml_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) {
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
+ *OutputRate = dml_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
+ *OutputRate = dml_output_rate_dp_rate_hbr3;
+ }
+ }
+ }
+ }
+}
+
+/// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy
+static void CalculateODMMode(
+ dml_uint_t MaximumPixelsPerLinePerDSCUnit,
+ dml_uint_t HActive,
+ enum dml_output_encoder_class Output,
+ enum dml_output_format_class OutputFormat,
+ enum dml_odm_use_policy ODMUse,
+ dml_float_t StateDispclk,
+ dml_float_t MaxDispclk,
+ dml_bool_t DSCEnable,
+ dml_uint_t TotalNumberOfActiveDPP,
+ dml_uint_t MaxNumDPP,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_uint_t NumberOfDSCSlices,
+
+ // Output
+ dml_bool_t *TotalAvailablePipesSupport,
+ dml_uint_t *NumberOfDPP,
+ enum dml_odm_mode *ODMMode,
+ dml_float_t *RequiredDISPCLKPerSurface)
+{
+
+ dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine;
+ dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
+ *TotalAvailablePipesSupport = true;
+
+ if (OutputFormat == dml_420) {
+ if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH)
+ *TotalAvailablePipesSupport = false;
+ else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH)
+ ODMUse = dml_odm_use_policy_combine_4to1;
+ else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH && ODMUse != dml_odm_use_policy_combine_4to1)
+ ODMUse = dml_odm_use_policy_combine_2to1;
+ if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1)
+ *TotalAvailablePipesSupport = false;
+ if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1)
+ *TotalAvailablePipesSupport = false;
+ }
+
+ if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed)
+ *ODMMode = dml_odm_mode_bypass;
+ else if (ODMUse == dml_odm_use_policy_combine_2to1)
+ *ODMMode = dml_odm_mode_combine_2to1;
+ else if (ODMUse == dml_odm_use_policy_combine_4to1)
+ *ODMMode = dml_odm_mode_combine_4to1;
+ else if (ODMUse == dml_odm_use_policy_split_1to2)
+ *ODMMode = dml_odm_mode_split_1to2;
+ else if (ODMUse == dml_odm_use_policy_mso_1to2)
+ *ODMMode = dml_odm_mode_mso_1to2;
+ else if (ODMUse == dml_odm_use_policy_mso_1to4)
+ *ODMMode = dml_odm_mode_mso_1to4;
+
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPP = 0;
+
+ if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
+ (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) || NumberOfDSCSlices > 8)))) {
+ if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
+ *ODMMode = dml_odm_mode_combine_4to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPP = 4;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
+ ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
+ (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
+ if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
+ *ODMMode = dml_odm_mode_combine_2to1;
+ *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPP = 2;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ } else {
+ if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) {
+ *NumberOfDPP = 1;
+ } else {
+ *TotalAvailablePipesSupport = false;
+ }
+ }
+}
+
+/// @brief Calculate the required DISPCLK given the odm mode and pixclk
+static dml_float_t CalculateRequiredDispclk(
+ enum dml_odm_mode ODMMode,
+ dml_float_t PixelClock,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKRampingMargin,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t MaxDispclk)
+{
+ dml_float_t RequiredDispclk = 0.;
+ dml_float_t PixelClockAfterODM;
+
+ dml_float_t DISPCLKWithRampingRoundedToDFSGranularity;
+ dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity;
+ dml_float_t MaxDispclkRoundedDownToDFSGranularity;
+
+ if (ODMMode == dml_odm_mode_combine_4to1) {
+ PixelClockAfterODM = PixelClock / 4;
+ } else if (ODMMode == dml_odm_mode_combine_2to1) {
+ PixelClockAfterODM = PixelClock / 2;
+ } else {
+ PixelClockAfterODM = PixelClock;
+ }
+
+ DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
+ DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
+ MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
+
+ if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
+ RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
+ } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
+ RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
+ } else {
+ RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
+ }
+
+ return RequiredDispclk;
+}
+
+/// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter
+static void CalculateSinglePipeDPPCLKAndSCLThroughput(
+ dml_float_t HRatio,
+ dml_float_t HRatioChroma,
+ dml_float_t VRatio,
+ dml_float_t VRatioChroma,
+ dml_float_t MaxDCHUBToPSCLThroughput,
+ dml_float_t MaxPSCLToLBThroughput,
+ dml_float_t PixelClock,
+ enum dml_source_format_class SourcePixelFormat,
+ dml_uint_t HTaps,
+ dml_uint_t HTapsChroma,
+ dml_uint_t VTaps,
+ dml_uint_t VTapsChroma,
+
+ // Output
+ dml_float_t *PSCL_THROUGHPUT,
+ dml_float_t *PSCL_THROUGHPUT_CHROMA,
+ dml_float_t *DPPCLKUsingSingleDPP)
+{
+ dml_float_t DPPCLKUsingSingleDPPLuma;
+ dml_float_t DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / dml_ceil((dml_float_t) HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / dml_ceil((dml_float_t) HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+/// @brief Calculate the actual dppclk freq
+/// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane
+/// @param DPPPerSurface Number of DPP for each plane
+static void CalculateDPPCLK(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
+ dml_float_t DISPCLKDPPCLKVCOSpeed,
+ dml_float_t DPPCLKUsingSingleDPP[],
+ dml_uint_t DPPPerSurface[],
+
+ // Output
+ dml_float_t *GlobalDPPCLK,
+ dml_float_t Dppclk[])
+{
+ *GlobalDPPCLK = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
+ *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
+ }
+ *GlobalDPPCLK = RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
+
+ dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK);
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
+ dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]);
+ }
+}
+
+static void CalculateMALLUseForStaticScreen(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_uint_t MALLAllocatedForDCNFinal,
+ enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
+ dml_uint_t SurfaceSizeInMALL[],
+ dml_bool_t one_row_per_frame_fits_in_buffer[],
+
+ // Output
+ dml_bool_t UsesMALLForStaticScreen[])
+{
+
+ dml_uint_t SurfaceToAddToMALL;
+ dml_bool_t CanAddAnotherSurfaceToMALL;
+ dml_uint_t TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable);
+ if (UsesMALLForStaticScreen[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]);
+ dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
+ !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+ dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]);
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
+ dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+ }
+ }
+}
+
+// @brief Calculate return bw for VM only traffic
+dml_float_t dml_get_return_bw_mbps_vm_only(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed)
+{
+ dml_float_t VMDataOnlyReturnBW =
+ dml_min3(soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
+ DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes *
+ ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
+#endif
+ return VMDataOnlyReturnBW;
+}
+
+// Function: dml_get_return_bw_mbps
+// Megabyte per second
+dml_float_t dml_get_return_bw_mbps(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed)
+{
+ dml_float_t ReturnBW = 0.;
+ dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK;
+ dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
+ dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ dml_float_t PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100);
+ dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
+ IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
+ IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100);
+
+ if (HostVMEnable != true) {
+ ReturnBW = PixelDataOnlyReturnBW;
+ } else {
+ ReturnBW = PixelMixedWithVMDataReturnBW;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
+ dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
+ dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
+#endif
+ return ReturnBW;
+}
+
+// Function: dml_get_return_dram_bw_mbps
+// Megabyte per second
+static dml_float_t dml_get_return_dram_bw_mbps(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DRAMSpeed)
+{
+ dml_float_t ReturnDRAMBW = 0.;
+ dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
+ dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100;
+ dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
+ soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100;
+
+ if (HostVMEnable != true) {
+ ReturnDRAMBW = PixelDataOnlyReturnBW;
+ } else {
+ ReturnDRAMBW = PixelMixedWithVMDataReturnBW;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
+ dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
+ dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
+ dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
+ dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
+ dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW);
+#endif
+ return ReturnDRAMBW;
+}
+
+/// @brief BACKEND
+static dml_uint_t DSCDelayRequirement(
+ dml_bool_t DSCEnabled,
+ enum dml_odm_mode ODMMode,
+ dml_uint_t DSCInputBitPerComponent,
+ dml_float_t OutputBpp,
+ dml_uint_t HActive,
+ dml_uint_t HTotal,
+ dml_uint_t NumberOfDSCSlices,
+ enum dml_output_format_class OutputFormat,
+ enum dml_output_encoder_class Output,
+ dml_float_t PixelClock,
+ dml_float_t PixelClockBackEnd)
+{
+ dml_uint_t DSCDelayRequirement_val = 0;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+ if (ODMMode == dml_odm_mode_combine_4to1) {
+ DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
+ (dml_uint_t) (NumberOfDSCSlices / 4.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
+ } else if (ODMMode == dml_odm_mode_combine_2to1) {
+ DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
+ (dml_uint_t) (NumberOfDSCSlices / 2.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
+ } else {
+ DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)((dml_float_t) dml_ceil(HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
+ NumberOfDSCSlices, OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
+ }
+ DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil((dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, 1.0));
+ DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled);
+ dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode);
+ dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ dml_print("DML::%s: HActive = %u\n", __func__, HActive);
+ dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
+ dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+ dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
+ dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
+ dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
+ dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return DSCDelayRequirement_val;
+}
+
+static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[])
+{
+ dml_bool_t NotEnoughUrgentLatencyHiding = false;
+ dml_bool_t CalculateVActiveBandwithSupport_val = false;
+ dml_float_t VActiveBandwith = 0;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
+ }
+
+ CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding);
+ dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val);
+#endif
+ return CalculateVActiveBandwithSupport_val;
+}
+
+static void CalculatePrefetchBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ dml_bool_t NotUrgentLatencyHiding[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *PrefetchBandwidth,
+ dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *PrefetchBandwidthSupport)
+{
+ dml_bool_t NotEnoughUrgentLatencyHiding = false;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (NotUrgentLatencyHiding[k]) {
+ NotEnoughUrgentLatencyHiding = true;
+ }
+ }
+
+ *PrefetchBandwidth = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ *PrefetchBandwidthNotIncludingMALLPrefetch = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
+ *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch
+ + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k]
+ + cursor_bw[k] * UrgentBurstFactorCursor[k]
+ + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k]
+ + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
+ + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+
+ *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
+ *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth);
+ dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth);
+ dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport);
+#endif
+}
+
+static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t cursor_bw_pre[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[])
+{
+ dml_float_t ret_val = ReturnBW;
+
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ret_val = ret_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) +
+ cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u\n", __func__, k);
+ dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
+ dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
+ dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
+ dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
+
+ dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
+ dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
+ dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
+ dml_print("DML::%s: ret_val = %f\n", __func__, ret_val);
+#endif
+ }
+
+ return ret_val;
+}
+
+static void CalculateImmediateFlipBandwithSupport(
+ dml_uint_t NumberOfActiveSurfaces,
+ dml_float_t ReturnBW,
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
+ enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
+ dml_float_t final_flip_bw[],
+ dml_float_t ReadBandwidthLuma[],
+ dml_float_t ReadBandwidthChroma[],
+ dml_float_t PrefetchBandwidthLuma[],
+ dml_float_t PrefetchBandwidthChroma[],
+ dml_float_t cursor_bw[],
+ dml_float_t meta_row_bandwidth[],
+ dml_float_t dpte_row_bandwidth[],
+ dml_float_t cursor_bw_pre[],
+ dml_float_t prefetch_vmrow_bw[],
+ dml_uint_t NumberOfDPP[],
+ dml_float_t UrgentBurstFactorLuma[],
+ dml_float_t UrgentBurstFactorChroma[],
+ dml_float_t UrgentBurstFactorCursor[],
+ dml_float_t UrgentBurstFactorLumaPre[],
+ dml_float_t UrgentBurstFactorChromaPre[],
+ dml_float_t UrgentBurstFactorCursorPre[],
+
+ // Output
+ dml_float_t *TotalBandwidth,
+ dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
+ dml_float_t *FractionOfUrgentBandwidth,
+ dml_bool_t *ImmediateFlipBandwidthSupport)
+{
+ *TotalBandwidth = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
+
+
+
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ } else {
+ *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k = %u\n", __func__, k);
+ dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
+ dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
+ dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]);
+ dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
+ dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
+ dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
+ dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
+ dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
+ dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
+ dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
+ dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
+ dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
+ dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]);
+ dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]);
+#endif
+ }
+
+ *TotalBandwidthNotIncludingMALLPrefetch = 0;
+ for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)
+ *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
+ + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ else
+ *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
+ NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k])
+ + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
+ NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
+ + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
+ }
+ }
+
+ *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
+ *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
+ dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
+ dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport);
+#endif
+}
+
+static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock)
+{
+ dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0);
+
+ return dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
+}
+
+/// @brief Calculate the maximum vstartup for mode support and mode programming consideration
+/// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge
+static dml_uint_t CalculateMaxVStartup(
+ dml_uint_t plane_idx,
+ dml_bool_t ptoi_supported,
+ dml_uint_t vblank_nom_default_us,
+ struct dml_timing_cfg_st *timing,
+ dml_float_t write_back_delay_us)
+{
+ dml_uint_t vblank_size = 0;
+ dml_uint_t max_vstartup_lines = 0;
+ const dml_uint_t max_allowed_vblank_nom = 1023;
+
+ dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx];
+ dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx];
+
+ dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(vblank_nom_default_us, timing->HTotal[plane_idx],
+ timing->PixelClock[plane_idx]);
+ dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(vblank_actual, vblank_nom_default_in_line);
+
+ // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of VBlank to VStartup signal
+ dml_uint_t vblank_nom_vsync_capped = dml_max(vblank_nom_input,
+ timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2);
+ dml_uint_t vblank_nom_max_allowed_capped = dml_min(vblank_nom_vsync_capped, max_allowed_vblank_nom);
+ dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ?
+ vblank_nom_default_in_line : vblank_nom_max_allowed_capped;
+
+ vblank_size = (dml_uint_t) dml_min(vblank_actual, vblank_avail);
+
+ if (timing->Interlace[plane_idx] && !ptoi_supported)
+ max_vstartup_lines = (dml_uint_t) (dml_floor(vblank_size/2.0, 1.0));
+ else
+ max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(1.0, dml_ceil(write_back_delay_us/line_time_us, 1.0));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx);
+ dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]);
+ dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
+ dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us);
+ dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
+ dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
+ dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
+#endif
+ max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START);
+ return max_vstartup_lines;
+}
+
+static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib,
+ struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
+ dml_uint_t j,
+ dml_uint_t k)
+{
+ CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k];
+ CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
+ CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
+ CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
+ CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
+ CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
+ CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
+ CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
+ CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
+ CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
+ CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency;
+ CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
+ CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k];
+ CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k];
+ CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k];
+ CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k];
+ CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k];
+ CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k];
+ CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k];
+ CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
+ CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
+ CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait;
+ CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k];
+ CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k];
+ CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
+}
+
+static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
+{
+ struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct DmlPipe *myPipe;
+ dml_uint_t j, k;
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
+ mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
+ mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
+ mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
+ mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
+ mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
+ mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j];
+ mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j];
+ mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
+ mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
+ mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
+ }
+
+ mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.NotUrgentLatencyHiding,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.meta_row_bandwidth_this_state,
+ mode_lib->ms.dpte_row_bandwidth_this_state,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j]);
+
+ s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only(
+ &mode_lib->ms.soc,
+ mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.DCFCLKState[j],
+ mode_lib->ms.state.fabricclk_mhz,
+ mode_lib->ms.state.dram_speed_mts);
+
+ s->HostVMInefficiencyFactor = 1;
+ if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
+ s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState;
+
+ mode_lib->ms.ExtraLatency = CalculateExtraLatency(
+ mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
+ s->ReorderingBytes,
+ mode_lib->ms.DCFCLKState[j],
+ mode_lib->ms.TotalNumberOfActiveDPP[j],
+ mode_lib->ms.ip.pixel_chunk_size_kbytes,
+ mode_lib->ms.TotalNumberOfDCCActiveDPP[j],
+ mode_lib->ms.ip.meta_chunk_size_kbytes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.dpte_group_bytes,
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
+
+ s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
+ s->MaxVStartup = 0;
+ s->AllPrefetchModeTested = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
+ s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
+ }
+
+ do {
+ s->MaxVStartup = s->NextMaxVStartup;
+ s->AllPrefetchModeTested = true;
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k];
+ mode_lib->ms.TWait = CalculateTWait(
+ mode_lib->ms.PrefetchMode[k],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
+ mode_lib->ms.state.dram_clock_change_latency_us,
+ mode_lib->ms.state.fclk_change_latency_us,
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.state.sr_enter_plus_exit_time_us);
+
+ myPipe = &s->myPipe;
+ myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
+ myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j];
+ myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
+ myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
+ myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
+ myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
+ myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
+ myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
+ myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ myPipe->ODMMode = mode_lib->ms.ODMModePerState[k];
+ myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k);
+ dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]);
+ dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup);
+ dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]);
+ dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
+ dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]);
+#endif
+
+ CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
+ CalculatePrefetchSchedule_params->myPipe = myPipe;
+ CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k]));
+ CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k];
+ CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
+ CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
+ CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
+ CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
+ CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
+ CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
+ CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
+ CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
+ CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
+
+ set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k);
+
+ mode_lib->ms.support.NoTimeForPrefetch[j][k] =
+ CalculatePrefetchSchedule(&mode_lib->scratch,
+ CalculatePrefetchSchedule_params);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateUrgentBurstFactor(
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.swath_width_luma_ub_this_state[k],
+ mode_lib->ms.swath_width_chroma_ub_this_state[k],
+ mode_lib->ms.SwathHeightYThisState[k],
+ mode_lib->ms.SwathHeightCThisState[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ mode_lib->ms.VRatioPreY[j][k],
+ mode_lib->ms.VRatioPreC[j][k],
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeYThisState[k],
+ mode_lib->ms.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->ms.UrgentBurstFactorCursorPre[k],
+ &mode_lib->ms.UrgentBurstFactorLumaPre[k],
+ &mode_lib->ms.UrgentBurstFactorChromaPre[k],
+ &mode_lib->ms.NotUrgentLatencyHidingPre[k]);
+
+ mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] *
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k];
+ }
+
+ {
+ CalculatePrefetchBandwithSupport(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.NotUrgentLatencyHidingPre,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.meta_row_bandwidth_this_state,
+ mode_lib->ms.dpte_row_bandwidth_this_state,
+ mode_lib->ms.cursor_bw_pre,
+ mode_lib->ms.prefetch_vmrow_bw,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
+ mode_lib->ms.UrgentBurstFactorLumaPre,
+ mode_lib->ms.UrgentBurstFactorChromaPre,
+ mode_lib->ms.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &s->dummy_single[0], // dml_float_t *PrefetchBandwidth
+ &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
+ &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth
+ &mode_lib->ms.support.PrefetchSupported[j]);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0
+ || mode_lib->ms.LinesForMetaPTE[k] >= 32.0
+ || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0
+ || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) {
+ mode_lib->ms.support.PrefetchSupported[j] = false;
+ }
+ }
+
+ mode_lib->ms.support.DynamicMetadataSupported[j] = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) {
+ mode_lib->ms.support.DynamicMetadataSupported[j] = false;
+ }
+ }
+
+ mode_lib->ms.support.VRatioInPrefetchSupported[j] = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true ||
+ mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
+ (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) {
+ mode_lib->ms.support.VRatioInPrefetchSupported[j] = false;
+ }
+ }
+
+ s->AnyLinesForVMOrRowTooLarge = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) {
+ s->AnyLinesForVMOrRowTooLarge = true;
+ }
+ }
+
+ if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) {
+ mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.cursor_bw_pre,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
+ mode_lib->ms.UrgentBurstFactorLumaPre,
+ mode_lib->ms.UrgentBurstFactorChromaPre,
+ mode_lib->ms.UrgentBurstFactorCursorPre);
+
+ mode_lib->ms.TotImmediateFlipBytes = 0;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) {
+ mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]);
+ if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) {
+ mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]);
+ } else {
+ mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k];
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateFlipSchedule(
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.ExtraLatency,
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
+ mode_lib->ms.MetaRowBytes[j][k],
+ mode_lib->ms.DPTEBytesPerRow[j][k],
+ mode_lib->ms.BandwidthAvailableForImmediateFlip,
+ mode_lib->ms.TotImmediateFlipBytes,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]),
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.Tno_bw[k],
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
+ mode_lib->ms.dpte_row_height[k],
+ mode_lib->ms.meta_row_height[k],
+ mode_lib->ms.dpte_row_height_chroma[k],
+ mode_lib->ms.meta_row_height_chroma[k],
+ mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24
+
+ /* Output */
+ &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k],
+ &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k],
+ &mode_lib->ms.final_flip_bw[k],
+ &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ {
+ CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBWPerState[j],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.policy.ImmediateFlipRequirement,
+ mode_lib->ms.final_flip_bw,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
+ mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.meta_row_bandwidth_this_state,
+ mode_lib->ms.dpte_row_bandwidth_this_state,
+ mode_lib->ms.cursor_bw_pre,
+ mode_lib->ms.prefetch_vmrow_bw,
+ mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
+ mode_lib->ms.UrgentBurstFactorLumaPre,
+ mode_lib->ms.UrgentBurstFactorChromaPre,
+ mode_lib->ms.UrgentBurstFactorCursorPre,
+
+ /* output */
+ &s->dummy_single[0], // dml_float_t *TotalBandwidth
+ &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch
+ &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth
+ &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false))
+ mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
+ }
+
+ } else { // if prefetch not support, assume iflip not supported
+ mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
+ }
+
+ if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) {
+ s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
+
+ if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
+ s->AllPrefetchModeTested = false;
+ }
+ } else {
+ s->NextMaxVStartup = s->NextMaxVStartup - 1;
+ }
+ } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true &&
+ mode_lib->ms.support.VRatioInPrefetchSupported[j] == true &&
+ // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok
+ // If there is hostvm, DCN needs to support iflip for invalidation
+ ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) ||
+ (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested)));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k];
+ }
+
+ s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
+ s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
+ s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
+ s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
+ s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
+ s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
+ s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
+ s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
+ s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
+ s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
+ s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
+
+ CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
+ CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode;
+ CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
+ CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
+ CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
+ CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j];
+ CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j];
+ CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
+ CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
+ CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height;
+ CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
+ CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
+ CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
+ CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz;
+ CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
+ CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
+ CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
+ CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
+ CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
+ CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
+ CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
+ CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
+ CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
+ CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
+ CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
+ CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
+ CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
+ CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
+ CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
+ CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
+ CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
+ CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
+ CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
+ CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
+ CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState;
+ CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState;
+
+ // Output
+ CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
+ CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
+ CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
+ CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
+ CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
+ CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
+ CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
+ CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin;
+
+ CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch,
+ CalculateWatermarks_params);
+
+ } // for j
+}
+
+/// @brief The Mode Support function.
+dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
+{
+ struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params;
+ struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+
+ dml_uint_t j, k, m;
+
+ mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
+ dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
+
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ms.ip.rob_buffer_size_kbytes,
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA
+ mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA
+
+ /* Output */
+ &mode_lib->ms.MaxTotalDETInKByte,
+ &mode_lib->ms.NomDETInKByte,
+ &mode_lib->ms.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
+
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ /*Scale Ratio, taps Support Check*/
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false
+ && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha)
+ || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0
+ || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0
+ || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0
+ || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0
+ || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0
+ || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1)
+ || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio
+ || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio
+ || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k]
+ || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k]
+ || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
+ && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 ||
+ (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) ||
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio ||
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio ||
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] ||
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ }
+ }
+
+ /*Source Format, Pixel Format and Scan Support Check*/
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) {
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateBytePerPixelAndBlockSizes(
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
+
+ /* Output */
+ &mode_lib->ms.BytePerPixelY[k],
+ &mode_lib->ms.BytePerPixelC[k],
+ &mode_lib->ms.BytePerPixelInDETY[k],
+ &mode_lib->ms.BytePerPixelInDETC[k],
+ &mode_lib->ms.Read256BlockHeightY[k],
+ &mode_lib->ms.Read256BlockHeightC[k],
+ &mode_lib->ms.Read256BlockWidthY[k],
+ &mode_lib->ms.Read256BlockWidthC[k],
+ &mode_lib->ms.MacroTileHeightY[k],
+ &mode_lib->ms.MacroTileHeightC[k],
+ &mode_lib->ms.MacroTileWidthY[k],
+ &mode_lib->ms.MacroTileWidthC[k]);
+ }
+
+ /*Bandwidth Support Check*/
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k];
+ mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k];
+ } else {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
+ mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
+ }
+ }
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0;
+ }
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true
+ && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) {
+ mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
+ * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
+ / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
+ * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
+ / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0;
+ } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
+ * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
+ / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
+ * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
+ / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0;
+ } else {
+ mode_lib->ms.WriteBandwidth[k] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+ mode_lib->ms.support.WritebackLatencySupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true &&
+ (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) {
+ mode_lib->ms.support.WritebackLatencySupport = false;
+ }
+ }
+
+ /*Writeback Mode Support Check*/
+ s->TotalNumberOfActiveWriteback = 0;
+ for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
+ }
+ }
+
+ mode_lib->ms.support.EnoughWritebackUnits = 1;
+ if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) {
+ mode_lib->ms.support.EnoughWritebackUnits = false;
+ }
+
+ /*Writeback Scale Ratio and Taps Support Check*/
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k]
+ || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k]
+ || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ mode_lib->ms.cache_display_cfg.plane.HRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.plane.HTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
+ /* Output */
+ &mode_lib->ms.PSCL_FACTOR[k],
+ &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
+ &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
+ s->MaximumSwathWidthSupportLuma = 8192;
+ } else if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
+ s->MaximumSwathWidthSupportLuma = 7680;
+ } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
+ s->MaximumSwathWidthSupportLuma = 4320;
+ } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) {
+ s->MaximumSwathWidthSupportLuma = 3840;
+ } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ s->MaximumSwathWidthSupportLuma = 3072;
+ } else {
+ s->MaximumSwathWidthSupportLuma = 6144;
+ }
+
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) {
+ s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0);
+ } else {
+ s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
+ }
+ mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatio[k], 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] /
+ (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatio[k], 1.0) - 2, 0.0));
+ if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma =
+ mode_lib->ms.ip.line_buffer_size_bits
+ * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], 1.0)
+ / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k]
+ / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]
+ + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], 1.0) - 2, 0.0));
+ }
+ mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
+ mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
+ }
+
+ /*Number Of DSC Slices*/
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
+ mode_lib->ms.cache_display_cfg.output.DSCEnable[k] != dml_dsc_disable) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = mode_lib->ms.cache_display_cfg.output.DSCSlices[k];
+
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] == 0) {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, 4));
+ } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
+ } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
+ } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
+ }
+ }
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
+ }
+ }
+
+ CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
+ CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
+ CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
+ CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
+ CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
+ CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
+ CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
+ CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
+ CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
+ CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
+ CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
+ CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
+ CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0];
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5];
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
+
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch,
+ CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */
+
+ s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
+ s->MPCCombineMethodAsPossible = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage)
+ s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
+ if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible)
+ s->MPCCombineMethodAsPossible = true;
+ }
+ mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible;
+
+ for (j = 0; j < 2; j++) {
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = 0;
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateODMMode(
+ mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.policy.ODMUse[k],
+ mode_lib->ms.state.dispclk_mhz,
+ mode_lib->ms.max_state.dispclk_mhz,
+ false, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP[j],
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.ip.dispclk_ramp_margin_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportNoDSC,
+ &s->NumberOfDPPNoDSC,
+ &s->ODMModeNoDSC,
+ &s->RequiredDISPCLKPerSurfaceNoDSC);
+
+ CalculateODMMode(
+ mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.policy.ODMUse[k],
+ mode_lib->ms.state.dispclk_mhz,
+ mode_lib->ms.max_state.dispclk_mhz,
+ true, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP[j],
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.ip.dispclk_ramp_margin_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportDSC,
+ &s->NumberOfDPPDSC,
+ &s->ODMModeDSC,
+ &s->RequiredDISPCLKPerSurfaceDSC);
+
+ CalculateOutputLink(
+ mode_lib->ms.state.phyclk_mhz,
+ mode_lib->ms.state.phyclk_d18_mhz,
+ mode_lib->ms.state.phyclk_d32_mhz,
+ mode_lib->ms.soc.phy_downspread_percent,
+ (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k),
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
+ mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k],
+ mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k],
+ s->ODMModeNoDSC,
+ s->ODMModeDSC,
+ mode_lib->ms.cache_display_cfg.output.DSCEnable[k],
+ mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k],
+ mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k],
+
+ /* Output */
+ &mode_lib->ms.RequiresDSC[k],
+ &mode_lib->ms.RequiresFEC[k],
+ &mode_lib->ms.OutputBppPerState[k],
+ &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng
+ &mode_lib->ms.OutputRatePerState[k],
+ &mode_lib->ms.RequiredSlots[k]);
+
+ if (mode_lib->ms.RequiresDSC[k] == false) {
+ mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC;
+ if (!s->TotalAvailablePipesSupportNoDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC;
+ } else {
+ mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC;
+ if (!s->TotalAvailablePipesSupportDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC;
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 4;
+ } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 2;
+ } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 1;
+ } else if (RoundToDFSGranularity(mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100),
+ 1, mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz &&
+ mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 1;
+ } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
+ mode_lib->ms.MPCCombine[j][k] = true;
+ mode_lib->ms.NoOfDPP[j][k] = 2;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
+ } else {
+ mode_lib->ms.MPCCombine[j][k] = false;
+ mode_lib->ms.NoOfDPP[j][k] = 1;
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ }
+ }
+
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0;
+ s->NoChromaOrLinear = true;
+ for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NoOfDPP[j][k] == 1)
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1;
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8
+ || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10
+ || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12
+ || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha
+ || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
+ s->NoChromaOrLinear = false;
+ }
+ }
+
+ if (j == 1 && !UnboundedRequest(mode_lib->ms.policy.UseUnboundedRequesting,
+ mode_lib->ms.TotalNumberOfActiveDPP[j], s->NoChromaOrLinear,
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) {
+ while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) {
+ s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+ s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage &&
+ mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth &&
+ (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) &&
+ mode_lib->ms.MPCCombine[j][k] == false) {
+ s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
+ s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
+ }
+ }
+ mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true;
+ mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
+ mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1;
+ }
+ }
+
+ //DISPCLK/DPPCLK
+ mode_lib->ms.WritebackRequiredDISPCLK = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
+ mode_lib->ms.WritebackRequiredDISPCLK = dml_max(mode_lib->ms.WritebackRequiredDISPCLK,
+ CalculateWriteBackDISPCLK(mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.ip.writeback_line_buffer_buffer_size,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
+ }
+ }
+
+ mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDISPCLK[j] = dml_max(mode_lib->ms.RequiredDISPCLK[j], mode_lib->ms.RequiredDISPCLKPerSurface[j][k]);
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ }
+
+ CalculateDPPCLK(mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.MinDPPCLKUsingSingleDPP,
+ mode_lib->ms.NoOfDPPThisState,
+ /* Output */
+ &mode_lib->ms.GlobalDPPCLK,
+ mode_lib->ms.RequiredDPPCLKThisState);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k];
+ }
+
+ mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz));
+
+ if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
+ mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
+ }
+ } // j
+
+ /* Total Available OTG, HDMIFRL, DP Support Check */
+ s->TotalNumberOfActiveOTG = 0;
+ s->TotalNumberOfActiveHDMIFRL = 0;
+ s->TotalNumberOfActiveDP2p0 = 0;
+ s->TotalNumberOfActiveDP2p0Outputs = 0;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
+ s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) {
+ s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) {
+ s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
+ }
+ }
+ }
+ }
+
+ mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg);
+ mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs);
+ mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs);
+
+ /* Display IO and DSC Support Check */
+ mode_lib->ms.support.NonsupportedDSCInputBPC = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
+ !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0
+ || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0
+ || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0
+ || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component
+ )) {
+ mode_lib->ms.support.NonsupportedDSCInputBPC = true;
+ }
+ }
+
+ mode_lib->ms.support.ExceededMultistreamSlots = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) {
+ s->TotalSlots = mode_lib->ms.RequiredSlots[k];
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k)
+ s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j];
+ }
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63)
+ mode_lib->ms.support.ExceededMultistreamSlots = true;
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64)
+ mode_lib->ms.support.ExceededMultistreamSlots = true;
+ }
+ }
+ mode_lib->ms.support.LinkCapacitySupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
+ mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) {
+ mode_lib->ms.support.LinkCapacitySupport = false;
+ }
+ }
+
+ mode_lib->ms.support.P2IWith420 = false;
+ mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
+ mode_lib->ms.support.DSC422NativeNotSupported = false;
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
+ mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
+ mode_lib->ms.support.NotEnoughLanesForMSO = false;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true)
+ mode_lib->ms.support.P2IWith420 = true;
+
+ if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0)
+ mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true;
+ if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support)
+ mode_lib->ms.support.DSC422NativeNotSupported = true;
+
+ if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) &&
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) ||
+ ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) &&
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0))
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
+
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na)
+ mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ }
+ }
+
+ if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k)
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j)
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ }
+ }
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 ||
+ mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4))
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
+
+ if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) ||
+ (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4))
+ mode_lib->ms.support.NotEnoughLanesForMSO = true;
+ }
+ }
+
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl &&
+ RequiredDTBCLK(
+ mode_lib->ms.RequiresDSC[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.OutputBppPerState[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
+ mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) {
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
+ }
+ }
+
+ mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true;
+ mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) {
+ mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false;
+ }
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) {
+ mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false;
+ }
+ }
+
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
+ if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) {
+ s->DSCFormatFactor = 2;
+ } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) {
+ s->DSCFormatFactor = 1;
+ } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
+ s->DSCFormatFactor = 2;
+ } else {
+ s->DSCFormatFactor = 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+#endif
+ if (mode_lib->ms.RequiresDSC[k] == true) {
+ if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
+ dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz);
+ dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
+#endif
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+ } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+ } else {
+ if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+ }
+ }
+ }
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
+#endif
+
+ /* Check DSC Unit and Slices Support */
+ mode_lib->ms.support.NotEnoughDSCUnits = false;
+ mode_lib->ms.support.NotEnoughDSCSlices = false;
+ s->TotalDSCUnitsRequired = 0;
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.RequiresDSC[k] == true) {
+ if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
+ if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4;
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
+ if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2;
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ } else {
+ if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1;
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ }
+ }
+ }
+ if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) {
+ mode_lib->ms.support.NotEnoughDSCUnits = true;
+ }
+
+ /*DSC Delay per state*/
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
+ mode_lib->ms.ODMModePerState[k],
+ mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
+ mode_lib->ms.OutputBppPerState[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
+ for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) {
+ mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m];
+ }
+ }
+ }
+ }
+
+ //Calculate Swath, DET Configuration, DCFCLKDeepSleep
+ //
+ for (j = 0; j < 2; ++j) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k];
+ }
+
+ CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
+ CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
+ CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
+ CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
+ CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
+ CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
+ CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
+ CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
+ CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
+ CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
+ CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState;
+ CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
+ CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state;
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state;
+ CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState;
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState;
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j];
+
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch,
+ CalculateSwathAndDETConfiguration_params);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k];
+ mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k];
+ mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k];
+ mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k];
+ mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k];
+ mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k];
+ mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState;
+ mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState;
+ mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k];
+ mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k];
+ mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k];
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ }
+
+ CalculateSurfaceSizeInMall(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
+ mode_lib->ms.Read256BlockWidthY,
+ mode_lib->ms.Read256BlockWidthC,
+ mode_lib->ms.Read256BlockHeightY,
+ mode_lib->ms.Read256BlockHeightC,
+ mode_lib->ms.MacroTileWidthY,
+ mode_lib->ms.MacroTileWidthC,
+ mode_lib->ms.MacroTileHeightY,
+ mode_lib->ms.MacroTileHeightC,
+
+ /* Output */
+ mode_lib->ms.SurfaceSizeInMALL,
+ &mode_lib->ms.support.ExceededMALLSize);
+
+ for (j = 0; j < 2; j++) {
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
+ mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
+ mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
+ mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
+ mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
+ mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
+ mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
+ mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
+ mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
+ mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
+ mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
+ }
+
+ mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k];
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
+ s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
+ s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
+ s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
+ s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
+ s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
+ s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
+ s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
+ s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
+ s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
+ s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
+ s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
+ s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
+ s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
+ s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
+ s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
+ s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
+ s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
+ s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
+ s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
+ s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
+ s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
+ }
+
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
+ CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
+ CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
+ CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState;
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0];
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1];
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4];
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5];
+ CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6];
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7];
+ CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8];
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9];
+ CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height;
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10];
+ CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12];
+ CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13];
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15];
+ CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16];
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17];
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18];
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19];
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20];
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState;
+ CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC;
+ CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state;
+ CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState;
+ CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState;
+ CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state;
+ CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21];
+
+ CalculateVMRowAndSwath(&mode_lib->scratch,
+ CalculateVMRowAndSwath_params);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k];
+ mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k];
+ mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k];
+ mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k];
+ mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k];
+ mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k];
+ mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k];
+ mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k];
+ mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k];
+ }
+
+ mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false)
+ mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]);
+#endif
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]);
+#endif
+
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false)
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false;
+ }
+
+ mode_lib->ms.UrgLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
+ mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
+ mode_lib->ms.state.urgent_latency_vm_data_only_us,
+ mode_lib->ms.soc.do_urgent_latency_adjustment,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
+ mode_lib->ms.state.fabricclk_mhz);
+
+ /* Getter functions work at mp interface so copy the urgent latency to mp*/
+ mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateUrgentBurstFactor(
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.swath_width_luma_ub_this_state[k],
+ mode_lib->ms.swath_width_chroma_ub_this_state[k],
+ mode_lib->ms.SwathHeightYThisState[k],
+ mode_lib->ms.SwathHeightCThisState[k],
+ (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeYThisState[k],
+ mode_lib->ms.DETBufferSizeCThisState[k],
+ /* Output */
+ &mode_lib->ms.UrgentBurstFactorCursor[j][k],
+ &mode_lib->ms.UrgentBurstFactorLuma[j][k],
+ &mode_lib->ms.UrgentBurstFactorChroma[j][k],
+ &mode_lib->ms.NotUrgentLatencyHiding[k]);
+ }
+
+ CalculateDCFCLKDeepSleep(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ mode_lib->ms.SwathWidthYThisState,
+ mode_lib->ms.SwathWidthCThisState,
+ mode_lib->ms.NoOfDPPThisState,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ mode_lib->ms.PSCL_FACTOR,
+ mode_lib->ms.PSCL_FACTOR_CHROMA,
+ mode_lib->ms.RequiredDPPCLKThisState,
+ mode_lib->ms.ReadBandwidthLuma,
+ mode_lib->ms.ReadBandwidthChroma,
+ mode_lib->ms.soc.return_bus_width_bytes,
+
+ /* Output */
+ &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]);
+ }
+
+ //Calculate Return BW
+ for (j = 0; j < 2; ++j) {
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j];
+ } else {
+ mode_lib->ms.WritebackDelayTime[k] = 0.0;
+ }
+ for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) {
+ mode_lib->ms.WritebackDelayTime[k] = dml_max(mode_lib->ms.WritebackDelayTime[k],
+ mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]);
+ }
+ }
+ }
+ }
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) {
+ mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m];
+ }
+ }
+ }
+ s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ s->MaximumVStartup[j][k] = CalculateMaxVStartup(k,
+ mode_lib->ms.ip.ptoi_supported,
+ mode_lib->ms.ip.vblank_nom_default_us,
+ &mode_lib->ms.cache_display_cfg.timing,
+ mode_lib->ms.WritebackDelayTime[k]);
+
+ s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes[j], s->MaximumVStartup[j][k]));
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]);
+ dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]);
+#endif
+ }
+ }
+
+ s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz;
+ }
+
+ /* Immediate Flip and MALL parameters */
+ s->ImmediateFlipRequiredFinal = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
+ }
+
+ mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified ||
+ ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) &&
+ (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required));
+ }
+ mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal;
+
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) &&
+ (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe));
+ }
+
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
+ ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) ||
+ ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame));
+ }
+
+ s->FullFrameMALLPStateMethod = false;
+ s->SubViewportMALLPStateMethod = false;
+ s->PhantomPipeMALLPStateMethod = false;
+ s->SubViewportMALLRefreshGreaterThan120Hz = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
+ s->FullFrameMALLPStateMethod = true;
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) {
+ s->SubViewportMALLPStateMethod = true;
+ if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120)
+ s->SubViewportMALLRefreshGreaterThan120Hz = true;
+ }
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)
+ s->PhantomPipeMALLPStateMethod = true;
+ }
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod)
+ || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
+
+ if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) {
+ UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
+ UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
+ UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter;
+ UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us;
+ UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
+ UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
+ UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes;
+ UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles;
+ UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes;
+ UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes;
+ UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
+ UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
+ UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent;
+ UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent;
+ UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes;
+ UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired;
+ UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
+ UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface;
+ UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK;
+ UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency;
+ UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP;
+ UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep;
+ UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup;
+ UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP;
+ UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP;
+ UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY;
+ UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC;
+ UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states;
+ UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states;
+ UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY;
+ UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC;
+ UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame;
+ UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
+ UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes;
+ UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable;
+ UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
+ UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
+ UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz;
+ UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState;
+
+ UseMinimumDCFCLK(&mode_lib->scratch,
+ UseMinimumDCFCLK_params);
+
+ } // UseMinimumRequiredDCFCLK == true
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.DCFCLKState[j], mode_lib->ms.state.fabricclk_mhz,
+ mode_lib->ms.state.dram_speed_mts);
+ mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.state.dram_speed_mts);
+ }
+
+ //Re-ordering Buffer Support Check
+ for (j = 0; j < 2; ++j) {
+ if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] >
+ (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) {
+ mode_lib->ms.support.ROBSupport[j] = true;
+ } else {
+ mode_lib->ms.support.ROBSupport[j] = false;
+ }
+ dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__);
+ }
+
+ //Vertical Active BW support check
+ s->MaxTotalVActiveRDBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
+ }
+
+ for (j = 0; j < 2; ++j) {
+ mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0,
+ mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0,
+ mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes *
+ ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ?
+ mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0);
+
+ if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) {
+ mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true;
+ } else {
+ mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false;
+ }
+ }
+
+ /* Prefetch Check */
+ dml_prefetch_check(mode_lib);
+
+ // End of Prefetch Check
+ dml_print("DML::%s: Done prefetch calculation\n", __func__);
+
+ /*Cursor Support Check*/
+ mode_lib->ms.support.CursorSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) {
+ if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) {
+ mode_lib->ms.support.CursorSupport = false;
+ }
+ }
+ }
+
+ /*Valid Pitch Check*/
+ mode_lib->ms.support.PitchSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.support.AlignedYPitch[k] = dml_ceil(
+ dml_max(mode_lib->ms.cache_display_cfg.surface.PitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]),
+ mode_lib->ms.MacroTileWidthY[k]);
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
+ } else {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
+ }
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
+ && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) {
+ mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.PitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), mode_lib->ms.MacroTileWidthC[k]);
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
+ } else {
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ }
+ } else {
+ mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ }
+ if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] ||
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) {
+ mode_lib->ms.support.PitchSupport = false;
+ }
+ }
+
+ mode_lib->ms.support.ViewportExceedsSurface = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+ if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 &&
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) {
+ if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on
+ dml_print("DML::%s: checking support for j=%u\n", __func__, j);
+ dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx);
+
+ s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx);
+ s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts);
+
+ s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal ||
+ (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) ||
+ mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported);
+ s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal ||
+ (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) ||
+ mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported);
+
+ if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true
+ && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true
+ && mode_lib->ms.support.ViewportSizeSupport[j] == true
+ && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
+ && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
+ && !mode_lib->ms.support.BPPForMultistreamNotIndicated
+ && !mode_lib->ms.support.MultistreamWithHDMIOreDP
+ && !mode_lib->ms.support.ExceededMultistreamSlots
+ && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
+ && !mode_lib->ms.support.NotEnoughLanesForMSO
+ && mode_lib->ms.support.LinkCapacitySupport == true
+ && !mode_lib->ms.support.P2IWith420
+ && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP
+ && !mode_lib->ms.support.DSC422NativeNotSupported
+ && !mode_lib->ms.support.MPCCombineMethodIncompatible
+ && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true
+ && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true
+ && mode_lib->ms.support.NotEnoughDSCUnits == false
+ && !mode_lib->ms.support.NotEnoughDSCSlices
+ && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false
+ && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
+ && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
+ && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
+ && mode_lib->ms.support.ROBSupport[j] == true
+ && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true
+ && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true
+ && mode_lib->ms.support.NumberOfOTGSupport == true
+ && mode_lib->ms.support.NumberOfHDMIFRLSupport == true
+ && mode_lib->ms.support.NumberOfDP2p0Support == true
+ && mode_lib->ms.support.EnoughWritebackUnits == true
+ && mode_lib->ms.support.WritebackLatencySupport == true
+ && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true
+ && mode_lib->ms.support.CursorSupport == true
+ && mode_lib->ms.support.PitchSupport == true
+ && mode_lib->ms.support.ViewportExceedsSurface == false
+ && mode_lib->ms.support.PrefetchSupported[j] == true
+ && mode_lib->ms.support.VActiveBandwithSupport[j] == true
+ && mode_lib->ms.support.DynamicMetadataSupported[j] == true
+ && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true
+ && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true
+ && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true
+ && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true
+ && mode_lib->ms.support.NonsupportedDSCInputBPC == false
+ && !mode_lib->ms.support.ExceededMALLSize
+ && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j])
+ && s->dram_clock_change_support == true
+ && s->f_clock_change_support == true
+ && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) {
+ dml_print("DML::%s: mode is supported\n", __func__);
+ mode_lib->ms.support.ModeSupport[j] = true;
+ } else {
+ dml_print("DML::%s: mode is NOT supported\n", __func__);
+ mode_lib->ms.support.ModeSupport[j] = false;
+ dml_print_mode_support(mode_lib, j);
+ }
+ }
+
+ mode_lib->ms.support.MaximumMPCCombine = 0;
+ mode_lib->ms.support.ModeIsSupported = 0;
+ if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine
+ mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true;
+
+ // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc.
+ if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible ||
+ (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal &&
+ (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) &&
+ !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) ||
+ ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr
+ || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame
+ || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp
+ ) &&
+ mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported)))
+ || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal &&
+ ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) ||
+ (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) {
+ mode_lib->ms.support.MaximumMPCCombine = 1;
+ } else {
+ mode_lib->ms.support.MaximumMPCCombine = 0;
+ }
+ }
+
+ // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
+ mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip
+ mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
+ mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
+
+ dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported);
+ dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine);
+ dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+ dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled);
+ dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte);
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
+ }
+
+ mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts;
+ mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz;
+ mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz;
+ mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine];
+ mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine];
+ mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine];
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k];
+ } else {
+ mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass;
+ }
+
+ mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
+ mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
+ mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k];
+ mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k];
+ mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k];
+ mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k];
+ }
+
+ return mode_lib->ms.support.ModeIsSupported;
+} // dml_core_mode_support
+
+/// @brief This function calculates some parameters thats are needed ahead of the mode programming function all
+void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib)
+{
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ms.ip.rob_buffer_size_kbytes,
+ mode_lib->ms.ip.max_num_dpp,
+ mode_lib->ms.policy.NomDETInKByteOverrideEnable,
+ mode_lib->ms.policy.NomDETInKByteOverrideValue,
+
+ /* Output */
+ &mode_lib->ms.MaxTotalDETInKByte,
+ &mode_lib->ms.NomDETInKByte,
+ &mode_lib->ms.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
+
+ mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(&mode_lib->ms.soc,
+ mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.DCFCLK,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.DRAMSpeed);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
+
+} // dml_core_mode_support_partial
+
+/// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state
+void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg)
+{
+ struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals;
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+ struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
+ struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+
+ struct mode_program_st *locals = &mode_lib->mp;
+ struct DmlPipe *myPipe;
+ dml_uint_t j = 0, k = 0;
+ dml_float_t TWait;
+ dml_bool_t isInterlaceTiming;
+
+ mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
+ mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg);
+ dml_calc_pipe_plane_mapping(&mode_lib->ms.cache_display_cfg.hw, mode_lib->mp.pipe_plane);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- START --- \n", __func__);
+ dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
+ dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
+#endif
+
+ s->DSCFormatFactor = 0;
+
+ // Unlike dppclk and dispclk which can be calculated in mode_programming
+ // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation)
+ if (clk_cfg->dcfclk_option != dml_use_override_freq)
+ locals->Dcfclk = mode_lib->ms.DCFCLK;
+ else
+ locals->Dcfclk = clk_cfg->dcfclk_mhz;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print_dml_policy(&mode_lib->ms.policy);
+ dml_print_soc_state_bounding_box(&mode_lib->ms.state);
+ dml_print_soc_bounding_box(&mode_lib->ms.soc);
+ dml_print_clk_cfg(clk_cfg);
+
+ dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+ dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk);
+ dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
+#endif
+
+ locals->WritebackDISPCLK = 0.0;
+ locals->GlobalDPPCLK = 0.0;
+
+ // DISPCLK and DPPCLK Calculation
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
+ locals->WritebackDISPCLK =
+ dml_max(
+ locals->WritebackDISPCLK,
+ CalculateWriteBackDISPCLK(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.ip.writeback_line_buffer_buffer_size,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
+ }
+ }
+
+ locals->Dispclk_calculated = locals->WritebackDISPCLK;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ locals->Dispclk_calculated = dml_max(locals->Dispclk_calculated, CalculateRequiredDispclk(
+ mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.ip.dispclk_ramp_margin_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ mode_lib->ms.max_state.dispclk_mhz));
+ }
+ }
+ if (clk_cfg->dispclk_option == dml_use_required_freq)
+ locals->Dispclk = locals->Dispclk_calculated;
+ else if (clk_cfg->dispclk_option == dml_use_override_freq)
+ locals->Dispclk = clk_cfg->dispclk_mhz;
+ else
+ locals->Dispclk = mode_lib->ms.state.dispclk_mhz;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk);
+#endif
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ mode_lib->ms.cache_display_cfg.plane.HRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.plane.HTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
+ mode_lib->ms.cache_display_cfg.plane.VTaps[k],
+ mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
+
+ /* Output */
+ &locals->PSCL_THROUGHPUT[k],
+ &locals->PSCL_THROUGHPUT_CHROMA[k],
+ &locals->DPPCLKUsingSingleDPP[k]);
+ }
+
+ CalculateDPPCLK(mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.dcn_downspread_percent,
+ mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
+ locals->DPPCLKUsingSingleDPP,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ /* Output */
+ &locals->GlobalDPPCLK,
+ locals->Dppclk_calculated);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (clk_cfg->dppclk_option[k] == dml_use_required_freq)
+ locals->Dppclk[k] = locals->Dppclk_calculated[k];
+ else if (clk_cfg->dppclk_option[k] == dml_use_override_freq)
+ locals->Dppclk[k] = clk_cfg->dppclk_mhz[k];
+ else
+ locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]);
+#endif
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateBytePerPixelAndBlockSizes(
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
+
+ /* Output */
+ &locals->BytePerPixelY[k],
+ &locals->BytePerPixelC[k],
+ &locals->BytePerPixelDETY[k],
+ &locals->BytePerPixelDETC[k],
+ &locals->BlockHeight256BytesY[k],
+ &locals->BlockHeight256BytesC[k],
+ &locals->BlockWidth256BytesY[k],
+ &locals->BlockWidth256BytesC[k],
+ &locals->BlockHeightY[k],
+ &locals->BlockHeightC[k],
+ &locals->BlockWidthY[k],
+ &locals->BlockWidthC[k]);
+ }
+
+
+ dml_print("DML::%s: %u\n", __func__, __LINE__);
+ CalculateSwathWidth(
+ false, // ForceSingleDPP
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
+ mode_lib->ms.cache_display_cfg.hw.ODMMode,
+ locals->BytePerPixelY,
+ locals->BytePerPixelC,
+ locals->BlockHeight256BytesY,
+ locals->BlockHeight256BytesC,
+ locals->BlockWidth256BytesY,
+ locals->BlockWidth256BytesC,
+ mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
+ mode_lib->ms.cache_display_cfg.timing.HActive,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+
+ /* Output */
+ locals->SwathWidthSingleDPPY,
+ locals->SwathWidthSingleDPPC,
+ locals->SwathWidthY,
+ locals->SwathWidthC,
+ s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[]
+ s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[]
+ locals->swath_width_luma_ub,
+ locals->swath_width_chroma_ub);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
+ dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
+ }
+
+ CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
+ CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
+ CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
+ CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
+ CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
+ CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
+ CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
+ CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
+ CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
+ CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
+ CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
+ CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
+ CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode;
+ CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC;
+ CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
+ CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
+
+ // VBA_DELTA
+ // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch,
+ CalculateSwathAndDETConfiguration_params);
+
+ // DCFCLK Deep Sleep
+ CalculateDCFCLKDeepSleep(
+ mode_lib->ms.num_active_planes,
+ locals->BytePerPixelY,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ locals->SwathWidthY,
+ locals->SwathWidthC,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->PSCL_THROUGHPUT,
+ locals->PSCL_THROUGHPUT_CHROMA,
+ locals->Dppclk,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ mode_lib->ms.soc.return_bus_width_bytes,
+
+ /* Output */
+ &locals->DCFCLKDeepSleep);
+
+ // DSCCLK
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) {
+ locals->DSCCLK_calculated[k] = 0.0;
+ } else {
+ if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420)
+ s->DSCFormatFactor = 2;
+ else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444)
+ s->DSCFormatFactor = 1;
+ else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
+ s->DSCFormatFactor = 2;
+ else
+ s->DSCFormatFactor = 1;
+ if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1)
+ locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
+ else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1)
+ locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
+ else
+ locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
+ }
+ }
+
+ // DSC Delay
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k],
+ mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
+ mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
+ mode_lib->ms.cache_display_cfg.output.OutputBpp[k],
+ mode_lib->ms.cache_display_cfg.timing.HActive[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k],
+ mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k],
+ mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
+ mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces
+ if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j])
+ locals->DSCDelay[k] = locals->DSCDelay[j];
+
+ // Prefetch
+ CalculateSurfaceSizeInMall(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
+ mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
+ locals->BytePerPixelY,
+ mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
+ mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
+ locals->BlockWidth256BytesY,
+ locals->BlockWidth256BytesC,
+ locals->BlockHeight256BytesY,
+ locals->BlockHeight256BytesC,
+ locals->BlockWidthY,
+ locals->BlockWidthC,
+ locals->BlockHeightY,
+ locals->BlockHeightC,
+
+ /* Output */
+ locals->SurfaceSizeInTheMALL,
+ &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
+ s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
+ s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
+ s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
+ s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
+ s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
+ s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k];
+ s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k];
+ s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k];
+ s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k];
+ s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
+ s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k];
+ s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k];
+ s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
+ s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
+ s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
+ s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
+ s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
+ s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
+ s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
+ s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
+ s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
+ s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
+ s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
+ s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
+ s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
+ s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k];
+ s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k];
+ }
+
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
+ CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY;
+ CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC;
+ CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
+ CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma;
+ CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width;
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma;
+ CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height;
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma;
+ CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width;
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma;
+ CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height;
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes;
+ CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY;
+ CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC;
+ CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC;
+ CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC;
+ CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw;
+ CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow;
+ CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame;
+ CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip;
+ CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen;
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE;
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE;
+
+ CalculateVMRowAndSwath(&mode_lib->scratch,
+ CalculateVMRowAndSwath_params);
+
+ s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(&mode_lib->ms.soc,
+ mode_lib->ms.state.use_ideal_dram_bw_strobe,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ locals->Dcfclk,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.DRAMSpeed);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk);
+ dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes);
+ dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+ dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes);
+ dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent);
+ dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed);
+ dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans);
+ dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes);
+ dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx);
+ dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx);
+ dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe);
+ dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW);
+ dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
+#endif
+
+ s->HostVMInefficiencyFactor = 1.0;
+ if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
+ s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW;
+
+ s->TotalDCCActiveDPP = 0;
+ s->TotalActiveDPP = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k])
+ s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ }
+
+ locals->UrgentExtraLatency = CalculateExtraLatency(
+ mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
+ s->ReorderBytes,
+ locals->Dcfclk,
+ s->TotalActiveDPP,
+ mode_lib->ms.ip.pixel_chunk_size_kbytes,
+ s->TotalDCCActiveDPP,
+ mode_lib->ms.ip.meta_chunk_size_kbytes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->dpte_group_bytes,
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
+
+ locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ locals->WritebackDelay[k] =
+ mode_lib->ms.state.writeback_latency_us
+ + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk;
+ } else
+ locals->WritebackDelay[k] = 0;
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k
+ && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) {
+ locals->WritebackDelay[k] =
+ dml_max(
+ locals->WritebackDelay[k],
+ mode_lib->ms.state.writeback_latency_us
+ + CalculateWriteBackDelay(
+ mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j],
+ mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk);
+ }
+ }
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ for (j = 0; j < mode_lib->ms.num_active_planes; ++j)
+ if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j)
+ locals->WritebackDelay[k] = locals->WritebackDelay[j];
+
+ locals->UrgentLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
+ mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
+ mode_lib->ms.state.urgent_latency_vm_data_only_us,
+ mode_lib->ms.soc.do_urgent_latency_adjustment,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
+ mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
+ mode_lib->ms.FabricClock);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateUrgentBurstFactor(mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ locals->swath_width_luma_ub[k],
+ locals->swath_width_chroma_ub[k],
+ locals->SwathHeightY[k],
+ locals->SwathHeightC[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ locals->UrgentLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ locals->BytePerPixelDETY[k],
+ locals->BytePerPixelDETC[k],
+ locals->DETBufferSizeY[k],
+ locals->DETBufferSizeC[k],
+
+ /* output */
+ &locals->UrgBurstFactorCursor[k],
+ &locals->UrgBurstFactorLuma[k],
+ &locals->UrgBurstFactorChroma[k],
+ &locals->NoUrgentLatencyHiding[k]);
+
+ locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 /
+ ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
+ }
+
+ s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+ s->MaxVStartupAllPlanes = 0;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->MaxVStartupLines[k] = CalculateMaxVStartup(k,
+ mode_lib->ms.ip.ptoi_supported,
+ mode_lib->ms.ip.vblank_nom_default_us,
+ &mode_lib->ms.cache_display_cfg.timing,
+ locals->WritebackDelay[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]);
+#endif
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes, s->MaxVStartupLines[k]));
+
+ s->ImmediateFlipRequirementFinal = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal);
+#endif
+
+ // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement
+ // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature
+ // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter)
+ s->iteration = 0;
+ s->MaxTotalRDBandwidth = 0;
+ s->AllPrefetchModeTested = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
+ s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
+ }
+
+ do {
+ s->MaxTotalRDBandwidthNoUrgentBurst = 0.0;
+ s->DestinationLineTimesForPrefetchLessThan2 = false;
+ s->VRatioPrefetchMoreThanMax = false;
+
+ dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines);
+
+ s->AllPrefetchModeTested = true;
+ s->MaxTotalRDBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->PrefetchMode[k] = s->NextPrefetchMode[k];
+ TWait = CalculateTWait(
+ locals->PrefetchMode[k],
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
+ mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
+ mode_lib->ms.state.dram_clock_change_latency_us,
+ mode_lib->ms.state.fclk_change_latency_us,
+ locals->UrgentLatency,
+ mode_lib->ms.state.sr_enter_plus_exit_time_us);
+
+ myPipe = &s->myPipe;
+ myPipe->Dppclk = locals->Dppclk[k];
+ myPipe->Dispclk = locals->Dispclk;
+ myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+ myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
+ myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
+ myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
+ myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
+ myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
+ myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
+ myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
+ myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
+ myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
+ myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
+ myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
+ myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
+ myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
+ myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
+ myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k];
+ myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
+ myPipe->BytePerPixelY = locals->BytePerPixelY[k];
+ myPipe->BytePerPixelC = locals->BytePerPixelC[k];
+ myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
+ dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
+ dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]);
+#endif
+
+ CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
+ CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
+ CalculatePrefetchSchedule_params->myPipe = myPipe;
+ CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k];
+ CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
+ CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
+ CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
+ CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
+ CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
+ CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
+ CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
+ CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k];
+ CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
+ CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
+ CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
+ CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
+ CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
+ CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
+ CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
+ CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency;
+ CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency;
+ CalculatePrefetchSchedule_params->TCalc = locals->TCalc;
+ CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k];
+ CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k];
+ CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k];
+ CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k];
+ CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k];
+ CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k];
+ CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k];
+ CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k];
+ CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k];
+ CalculatePrefetchSchedule_params->TWait = TWait;
+ CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k];
+ CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k];
+ CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k];
+ CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k];
+ CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k];
+ CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k];
+ CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k];
+ CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k];
+ CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k];
+ CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k];
+ CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k];
+ CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k];
+ CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k];
+
+ locals->NoTimeToPrefetch[k] =
+ CalculatePrefetchSchedule(&mode_lib->scratch,
+ CalculatePrefetchSchedule_params);
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
+#endif
+ locals->VStartup[k] = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
+ locals->VStartupMin[k] = locals->VStartup[k];
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateUrgentBurstFactor(
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
+ locals->swath_width_luma_ub[k],
+ locals->swath_width_chroma_ub[k],
+ locals->SwathHeightY[k],
+ locals->SwathHeightC[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ locals->UrgentLatency,
+ mode_lib->ms.ip.cursor_buffer_size,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
+ locals->VRatioPrefetchY[k],
+ locals->VRatioPrefetchC[k],
+ locals->BytePerPixelDETY[k],
+ locals->BytePerPixelDETC[k],
+ locals->DETBufferSizeY[k],
+ locals->DETBufferSizeC[k],
+ /* Output */
+ &locals->UrgBurstFactorCursorPre[k],
+ &locals->UrgBurstFactorLumaPre[k],
+ &locals->UrgBurstFactorChromaPre[k],
+ &locals->NoUrgentLatencyHidingPre[k]);
+
+ locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]);
+ dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]);
+
+ dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]);
+ dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]);
+
+ dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]);
+ dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
+ dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]);
+ dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]);
+ dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]);
+ dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]);
+ dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]);
+ dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]);
+ dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst);
+#endif
+ if (locals->DestinationLinesForPrefetch[k] < 2)
+ s->DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
+ ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
+ (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__)))
+ s->VRatioPrefetchMoreThanMax = true;
+
+ //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
+ //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
+ //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) {
+ // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
+ //}
+
+ //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) {
+ // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
+ //}
+ }
+
+ locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst);
+ dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW);
+ dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth);
+#endif
+
+ CalculatePrefetchBandwithSupport(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ locals->NoUrgentLatencyHidingPre,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->UrgBurstFactorLuma,
+ locals->UrgBurstFactorChroma,
+ locals->UrgBurstFactorCursor,
+ locals->UrgBurstFactorLumaPre,
+ locals->UrgBurstFactorChromaPre,
+ locals->UrgBurstFactorCursorPre,
+
+ /* output */
+ &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
+ &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
+ &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
+ &locals->PrefetchModeSupported);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ s->dummy_unit_vector[k] = 1.0;
+
+ CalculatePrefetchBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ locals->NoUrgentLatencyHidingPre,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+
+ /* output */
+ &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
+ &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
+ &locals->FractionOfUrgentBandwidth,
+ &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport
+
+
+
+ if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) {
+ dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
+ dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
+ locals->PrefetchModeSupported = false;
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) {
+ dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
+ dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]);
+ locals->PrefetchModeSupported = false;
+ }
+ }
+
+
+ if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) {
+ locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->cursor_bw_pre,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->UrgBurstFactorLuma,
+ locals->UrgBurstFactorChroma,
+ locals->UrgBurstFactorCursor,
+ locals->UrgBurstFactorLumaPre,
+ locals->UrgBurstFactorChromaPre,
+ locals->UrgBurstFactorCursorPre);
+
+ locals->TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
+ locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]);
+ if (locals->use_one_row_for_frame_flip[k]) {
+ locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]);
+ } else {
+ locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k = %u\n", __func__, k);
+ dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
+ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]);
+ dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]);
+ dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]);
+ dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes);
+#endif
+ }
+ }
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ CalculateFlipSchedule(
+ s->HostVMInefficiencyFactor,
+ locals->UrgentExtraLatency,
+ locals->UrgentLatency,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
+ locals->PDEAndMetaPTEBytesFrame[k],
+ locals->MetaRowByte[k],
+ locals->PixelPTEBytesPerRow[k],
+ locals->BandwidthAvailableForImmediateFlip,
+ locals->TotImmediateFlipBytes,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatio[k],
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
+ locals->Tno_bw[k],
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
+ locals->dpte_row_height[k],
+ locals->meta_row_height[k],
+ locals->dpte_row_height_chroma[k],
+ locals->meta_row_height_chroma[k],
+ locals->use_one_row_for_frame_flip[k],
+
+ /* Output */
+ &locals->DestinationLinesToRequestVMInImmediateFlip[k],
+ &locals->DestinationLinesToRequestRowInImmediateFlip[k],
+ &locals->final_flip_bw[k],
+ &locals->ImmediateFlipSupportedForPipe[k]);
+ }
+
+ CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.policy.ImmediateFlipRequirement,
+ locals->final_flip_bw,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ locals->UrgBurstFactorLuma,
+ locals->UrgBurstFactorChroma,
+ locals->UrgBurstFactorCursor,
+ locals->UrgBurstFactorLumaPre,
+ locals->UrgBurstFactorChromaPre,
+ locals->UrgBurstFactorCursorPre,
+
+ /* output */
+ &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
+ &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
+ &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
+ &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport
+
+ CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
+ mode_lib->ms.ReturnBW,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ mode_lib->ms.policy.ImmediateFlipRequirement,
+ locals->final_flip_bw,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->RequiredPrefetchPixDataBWLuma,
+ locals->RequiredPrefetchPixDataBWChroma,
+ locals->cursor_bw,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+ locals->cursor_bw_pre,
+ locals->prefetch_vmrow_bw,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+ s->dummy_unit_vector,
+
+ /* output */
+ &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
+ &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
+ &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth
+ &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) {
+ locals->ImmediateFlipSupported = false;
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
+#endif
+ }
+ }
+ } else {
+ locals->ImmediateFlipSupported = false;
+ locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth;
+ locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth;
+ locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ }
+
+ /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
+ locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true &&
+ ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) ||
+ locals->ImmediateFlipSupported)) ? true : false;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported);
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
+ dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
+ dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable);
+ dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+ dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported);
+#endif
+ dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes);
+
+ s->VStartupLines = s->VStartupLines + 1;
+
+ if (s->VStartupLines > s->MaxVStartupAllPlanes) {
+ s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
+
+ if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
+ s->AllPrefetchModeTested = false;
+ dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested);
+ }
+ } else {
+ s->AllPrefetchModeTested = false;
+ }
+ s->iteration++;
+ if (s->iteration > 2500) {
+ dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__);
+ ASSERT(0);
+ }
+ } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested));
+
+ if (locals->PrefetchAndImmediateFlipSupported) {
+ dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes);
+ } else {
+ dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes);
+ }
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ {
+ s->mmSOCParameters.UrgentLatency = locals->UrgentLatency;
+ s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency;
+ s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
+ s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
+ s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
+ s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
+ s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
+ s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
+ s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
+ s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
+ s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
+
+ CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
+ CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode;
+ CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
+ CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
+ CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
+ CalculateWatermarks_params->DCFCLK = locals->Dcfclk;
+ CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW;
+ CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
+ CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
+ CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes;
+ CalculateWatermarks_params->meta_row_height = locals->meta_row_height;
+ CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma;
+ CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
+ CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
+ CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
+ CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
+ CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY;
+ CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC;
+ CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY;
+ CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC;
+ CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
+ CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY;
+ CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC;
+ CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
+ CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
+ CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
+ CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
+ CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
+ CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
+ CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
+ CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY;
+ CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC;
+ CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler;
+ CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler;
+ CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
+ CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
+ CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
+ CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
+ CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
+ CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
+ CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
+
+ // Output
+ CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
+ CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
+ CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
+ CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
+ CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
+ CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
+ CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
+
+ CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ &mode_lib->scratch,
+ CalculateWatermarks_params);
+
+ /* Copy the calculated watermarks to mp.Watermark as the getter functions are
+ * implemented by the DML team to copy the calculated values from the mp.Watermark interface.
+ * &mode_lib->mp.Watermark and &locals->Watermark are the same address, memcpy may lead to
+ * unexpected behavior. memmove should be used.
+ */
+ memmove(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark);
+ locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
+ mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark);
+ } else {
+ locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ locals->WritebackAllowFCLKChangeEndPosition[k] = 0;
+ }
+ }
+ }
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ CalculatePixelDeliveryTimes(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ locals->VRatioPrefetchY,
+ locals->VRatioPrefetchC,
+ locals->swath_width_luma_ub,
+ locals->swath_width_chroma_ub,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ mode_lib->ms.cache_display_cfg.plane.HRatio,
+ mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->PSCL_THROUGHPUT,
+ locals->PSCL_THROUGHPUT_CHROMA,
+ locals->Dppclk,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ mode_lib->ms.cache_display_cfg.plane.NumberOfCursors,
+ mode_lib->ms.cache_display_cfg.plane.CursorWidth,
+ mode_lib->ms.cache_display_cfg.plane.CursorBPP,
+ locals->BlockWidth256BytesY,
+ locals->BlockHeight256BytesY,
+ locals->BlockWidth256BytesC,
+ locals->BlockHeight256BytesC,
+
+ /* Output */
+ locals->DisplayPipeLineDeliveryTimeLuma,
+ locals->DisplayPipeLineDeliveryTimeChroma,
+ locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
+ locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
+ locals->DisplayPipeRequestDeliveryTimeLuma,
+ locals->DisplayPipeRequestDeliveryTimeChroma,
+ locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ locals->DisplayPipeRequestDeliveryTimeChromaPrefetch,
+ locals->CursorRequestDeliveryTime,
+ locals->CursorRequestDeliveryTimePrefetch);
+
+ CalculateMetaAndPTETimes(
+ locals->use_one_row_for_frame,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.ip.meta_chunk_size_kbytes,
+ mode_lib->ms.ip.min_meta_chunk_size_bytes,
+ mode_lib->ms.cache_display_cfg.timing.HTotal,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
+ locals->DestinationLinesToRequestRowInVBlank,
+ locals->DestinationLinesToRequestRowInImmediateFlip,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->BytePerPixelY,
+ locals->BytePerPixelC,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ locals->dpte_row_height,
+ locals->dpte_row_height_chroma,
+ locals->meta_row_width,
+ locals->meta_row_width_chroma,
+ locals->meta_row_height,
+ locals->meta_row_height_chroma,
+ locals->meta_req_width,
+ locals->meta_req_width_chroma,
+ locals->meta_req_height,
+ locals->meta_req_height_chroma,
+ locals->dpte_group_bytes,
+ locals->PTERequestSizeY,
+ locals->PTERequestSizeC,
+ locals->PixelPTEReqWidthY,
+ locals->PixelPTEReqHeightY,
+ locals->PixelPTEReqWidthC,
+ locals->PixelPTEReqHeightC,
+ locals->dpte_row_width_luma_ub,
+ locals->dpte_row_width_chroma_ub,
+
+ /* Output */
+ locals->DST_Y_PER_PTE_ROW_NOM_L,
+ locals->DST_Y_PER_PTE_ROW_NOM_C,
+ locals->DST_Y_PER_META_ROW_NOM_L,
+ locals->DST_Y_PER_META_ROW_NOM_C,
+ locals->TimePerMetaChunkNominal,
+ locals->TimePerChromaMetaChunkNominal,
+ locals->TimePerMetaChunkVBlank,
+ locals->TimePerChromaMetaChunkVBlank,
+ locals->TimePerMetaChunkFlip,
+ locals->TimePerChromaMetaChunkFlip,
+ locals->time_per_pte_group_nom_luma,
+ locals->time_per_pte_group_vblank_luma,
+ locals->time_per_pte_group_flip_luma,
+ locals->time_per_pte_group_nom_chroma,
+ locals->time_per_pte_group_vblank_chroma,
+ locals->time_per_pte_group_flip_chroma);
+
+ CalculateVMGroupAndRequestTimes(
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
+ mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
+ mode_lib->ms.cache_display_cfg.timing.HTotal,
+ locals->BytePerPixelC,
+ locals->DestinationLinesToRequestVMInVBlank,
+ locals->DestinationLinesToRequestVMInImmediateFlip,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ locals->dpte_row_width_luma_ub,
+ locals->dpte_row_width_chroma_ub,
+ locals->vm_group_bytes,
+ locals->dpde0_bytes_per_frame_ub_l,
+ locals->dpde0_bytes_per_frame_ub_c,
+ locals->meta_pte_bytes_per_frame_ub_l,
+ locals->meta_pte_bytes_per_frame_ub_c,
+
+ /* Output */
+ locals->TimePerVMGroupVBlank,
+ locals->TimePerVMGroupFlip,
+ locals->TimePerVMRequestVBlank,
+ locals->TimePerVMRequestFlip);
+
+ // Min TTUVBlank
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (locals->PrefetchMode[k] == 0) {
+ locals->MinTTUVBlank[k] = dml_max4(
+ locals->Watermark.DRAMClockChangeWatermark,
+ locals->Watermark.FCLKChangeWatermark,
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.UrgentWatermark);
+ } else if (locals->PrefetchMode[k] == 1) {
+ locals->MinTTUVBlank[k] = dml_max3(
+ locals->Watermark.FCLKChangeWatermark,
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.UrgentWatermark);
+ } else if (locals->PrefetchMode[k] == 2) {
+ locals->MinTTUVBlank[k] = dml_max(
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.UrgentWatermark);
+ } else {
+ locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark;
+ }
+ if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k])
+ locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k];
+ }
+
+ // DCC Configuration
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
+#endif
+ CalculateDCCConfiguration(
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
+ mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal,
+ mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k],
+ mode_lib->ms.NomDETInKByte,
+ locals->BlockHeight256BytesY[k],
+ locals->BlockHeight256BytesC[k],
+ mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
+ locals->BytePerPixelY[k],
+ locals->BytePerPixelC[k],
+ locals->BytePerPixelDETY[k],
+ locals->BytePerPixelDETC[k],
+ mode_lib->ms.cache_display_cfg.plane.SourceScan[k],
+ /* Output */
+ &locals->DCCYMaxUncompressedBlock[k],
+ &locals->DCCCMaxUncompressedBlock[k],
+ &locals->DCCYMaxCompressedBlock[k],
+ &locals->DCCCMaxCompressedBlock[k],
+ &locals->DCCYIndependentBlock[k],
+ &locals->DCCCIndependentBlock[k]);
+ }
+
+ // VStartup Adjustment
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]);
+#endif
+
+ locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
+ dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+ dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]);
+#endif
+
+ locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin;
+ if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) {
+ locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin;
+ }
+
+ isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported);
+
+ // The actual positioning of the vstartup
+ locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
+
+ s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
+ mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
+ s->LSetup = dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
+ s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k];
+
+ if (s->blank_lines_remaining < 0) {
+ dml_print("ERROR: Vstartup is larger than vblank!?\n");
+ s->blank_lines_remaining = 0;
+ ASSERT(0);
+ }
+ locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
+
+ // debug only
+ s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
+ mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k])
+ + dml_max(1.0, dml_ceil((dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0))
+ + dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
+
+ if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / (double) mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <=
+ (isInterlaceTiming ?
+ dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, 1.0) :
+ (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) {
+ locals->VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ locals->VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]);
+ dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]);
+ dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]);
+ dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]);
+ dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]);
+ dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]);
+ dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]);
+ dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]);
+ dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
+ dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]);
+ dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]);
+ dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START);
+ dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ //Maximum Bandwidth Used
+ s->TotalWRBandwidth = 0;
+ s->WRBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) {
+ s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
+ (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4;
+ } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
+ s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
+ (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8;
+ }
+ s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
+ }
+
+ locals->TotalDataReadBandwidth = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth);
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
+ dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
+#endif
+ }
+
+ locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
+ locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch
+ + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
+ }
+ }
+
+ CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
+ CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
+ CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
+ CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries;
+ CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries;
+ CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
+ CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
+ CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth;
+ CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk;
+ CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW;
+ CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b;
+ CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs;
+ CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us;
+ CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
+ CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
+ CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
+ CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark;
+ CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark;
+ CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
+ CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
+ CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank;
+ CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
+ CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY;
+ CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY;
+ CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY;
+ CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY;
+ CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY;
+ CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC;
+ CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma;
+ CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma;
+ CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma;
+ CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma;
+ CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
+ CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
+ CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
+ CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
+ CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
+ CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY;
+ CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY;
+ CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC;
+ CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC;
+ CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock;
+ CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock;
+ CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
+ CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable;
+ CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma;
+ CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw;
+ CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw;
+ CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank;
+ CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency;
+ CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame;
+ CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank;
+ CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency;
+ CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame;
+ CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod;
+ CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+
+ // Stutter Efficiency
+ CalculateStutterEfficiency(&mode_lib->scratch,
+ CalculateStutterEfficiency_params);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ {
+ dml_float_t dummy_single[2];
+ dml_uint_t dummy_integer[1];
+ dml_bool_t dummy_boolean[1];
+
+ // Calculate z8 stutter eff assuming 0 reserved space
+ CalculateStutterEfficiency(
+ locals->CompressedBufferSizeInkByte,
+ mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
+ locals->UnboundedRequestEnabled,
+ mode_lib->ms.ip.meta_fifo_size_in_kentries,
+ mode_lib->ms.ip.zero_size_buffer_entries,
+ mode_lib->ms.ip.pixel_chunk_size_kbytes,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.ip.rob_buffer_size_kbytes,
+ locals->TotalDataReadBandwidth,
+ locals->Dcfclk,
+ mode_lib->ms.ReturnBW,
+ 0, //mode_lib->ms.ip.compbuf_reserved_space_64b,
+ 0, //mode_lib->ms.ip.compbuf_reserved_space_zs,
+ mode_lib->ms.state.sr_exit_time_us,
+ mode_lib->ms.state.sr_exit_z8_time_us,
+ mode_lib->ms.policy.SynchronizeTimingsFinal,
+ mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
+ locals->Watermark.StutterEnterPlusExitWatermark,
+ locals->Watermark.Z8StutterEnterPlusExitWatermark,
+ mode_lib->ms.ip.ptoi_supported,
+ mode_lib->ms.cache_display_cfg.timing.Interlace,
+ locals->MinTTUVBlank,
+ mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
+ mode_lib->ms.DETBufferSizeY,
+ locals->BytePerPixelY,
+ locals->BytePerPixelDETY,
+ locals->SwathWidthY,
+ mode_lib->ms.SwathHeightY,
+ mode_lib->ms.SwathHeightC,
+ mode_lib->ms.cache_display_cfg.surface.DCCRateLuma,
+ mode_lib->ms.cache_display_cfg.surface.DCCRateChroma,
+ mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma,
+ mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma,
+ mode_lib->ms.cache_display_cfg.timing.HTotal,
+ mode_lib->ms.cache_display_cfg.timing.VTotal,
+ mode_lib->ms.cache_display_cfg.timing.PixelClock,
+ mode_lib->ms.cache_display_cfg.plane.VRatio,
+ mode_lib->ms.cache_display_cfg.plane.SourceScan,
+ locals->BlockHeight256BytesY,
+ locals->BlockWidth256BytesY,
+ locals->BlockHeight256BytesC,
+ locals->BlockWidth256BytesC,
+ locals->DCCYMaxUncompressedBlock,
+ locals->DCCCMaxUncompressedBlock,
+ mode_lib->ms.cache_display_cfg.timing.VActive,
+ mode_lib->ms.cache_display_cfg.surface.DCCEnable,
+ mode_lib->ms.cache_display_cfg.writeback.WritebackEnable,
+ locals->ReadBandwidthSurfaceLuma,
+ locals->ReadBandwidthSurfaceChroma,
+ locals->meta_row_bw,
+ locals->dpte_row_bw,
+
+ /* Output */
+ &dummy_single[0],
+ &dummy_single[1],
+ &dummy_integer[0],
+ &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase,
+ &locals->Z8StutterEfficiencyBestCase,
+ &locals->Z8NumberOfStutterBurstsPerFrameBestCase,
+ &locals->StutterPeriodBestCase,
+ &dummy_boolean[0]);
+ }
+#else
+ locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank;
+ locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency;
+ locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame;
+ locals->StutterPeriodBestCase = locals->StutterPeriod;
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: --- END --- \n", __func__);
+#endif
+} // dml_core_mode_programming
+
+/// Function: dml_core_get_row_heights
+/// @brief Get row height for DPTE and META with minimal input.
+void dml_core_get_row_heights(
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *meta_row_height,
+ const struct display_mode_lib_st *mode_lib,
+ dml_bool_t is_plane1,
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+ enum dml_rotation_angle ScanDirection,
+ dml_uint_t pitch,
+ dml_uint_t GPUVMMinPageSizeKBytes)
+{
+ dml_uint_t BytePerPixelY;
+ dml_uint_t BytePerPixelC;
+ dml_float_t BytePerPixelInDETY;
+ dml_float_t BytePerPixelInDETC;
+ dml_uint_t BlockHeight256BytesY;
+ dml_uint_t BlockHeight256BytesC;
+ dml_uint_t BlockWidth256BytesY;
+ dml_uint_t BlockWidth256BytesC;
+ dml_uint_t MacroTileWidthY;
+ dml_uint_t MacroTileWidthC;
+ dml_uint_t MacroTileHeightY;
+ dml_uint_t MacroTileHeightC;
+
+ dml_uint_t BytePerPixel;
+ dml_uint_t BlockHeight256Bytes;
+ dml_uint_t BlockWidth256Bytes;
+ dml_uint_t MacroTileWidth;
+ dml_uint_t MacroTileHeight;
+ dml_uint_t PTEBufferSizeInRequests;
+
+ dml_uint_t dummy_integer[16];
+
+ CalculateBytePerPixelAndBlockSizes(
+ SourcePixelFormat,
+ SurfaceTiling,
+
+ /* Output */
+ &BytePerPixelY,
+ &BytePerPixelC,
+ &BytePerPixelInDETY,
+ &BytePerPixelInDETC,
+ &BlockHeight256BytesY,
+ &BlockHeight256BytesC,
+ &BlockWidth256BytesY,
+ &BlockWidth256BytesC,
+ &MacroTileHeightY,
+ &MacroTileHeightC,
+ &MacroTileWidthY,
+ &MacroTileWidthC);
+
+ BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
+ BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
+ BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
+ MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
+ MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
+ PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1);
+ dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
+ dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
+ dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
+ dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
+ dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
+ dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
+ dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
+ dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
+ dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+#endif
+
+ // just supply with enough parameters to calculate meta and dte
+ CalculateVMAndRowBytes(
+ 0, // dml_bool_t ViewportStationary,
+ 1, // dml_bool_t DCCEnable,
+ 1, // dml_uint_t NumberOfDPPs,
+ BlockHeight256Bytes,
+ BlockWidth256Bytes,
+ SourcePixelFormat,
+ SurfaceTiling,
+ BytePerPixel,
+ ScanDirection,
+ 0, // dml_uint_t SwathWidth,
+ 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful)
+ 0, // dml_uint_t ViewportXStart,
+ 0, // dml_uint_t ViewportYStart,
+ 1, // dml_bool_t GPUVMEnable,
+ 4, // dml_uint_t GPUVMMaxPageTableLevels,
+ GPUVMMinPageSizeKBytes,
+ PTEBufferSizeInRequests,
+ pitch,
+ 0, // dml_uint_t DCCMetaPitch,
+ MacroTileWidth,
+ MacroTileHeight,
+
+ // /* Output */
+ &dummy_integer[0], // dml_uint_t *MetaRowByte,
+ &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow,
+ &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage,
+ &dummy_integer[3], // dml_uint_t *dpte_row_width_ub,
+ dpte_row_height,
+ &dummy_integer[4], // dml_uint_t *dpte_row_height_linear
+ &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
+ &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame,
+ &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame,
+ &dummy_integer[8], // dml_uint_t *MetaRequestWidth,
+ &dummy_integer[9], // dml_uint_t *MetaRequestHeight,
+ &dummy_integer[10], // dml_uint_t *meta_row_width,
+ meta_row_height,
+ &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth,
+ &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight,
+ &dummy_integer[13], // dml_uint_t *PTERequestSize,
+ &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame,
+ &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame)
+
+#ifdef __DML_RQ_DLG_CALC_DEBUG__
+ dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+ dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height);
+#endif
+}
+
+static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box(
+ const struct soc_states_st *states,
+ dml_uint_t state_idx)
+{
+ dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states);
+
+ if (state_idx >= (dml_uint_t)states->num_states) {
+ dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states);
+ ASSERT(0);
+ }
+ return (states->state_array[state_idx]);
+}
+
+/// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have
+/// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated.
+///
+static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx)
+{
+ mode_lib->ms.state_idx = state_idx;
+ mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1;
+ mode_lib->ms.soc = mode_lib->soc;
+ mode_lib->ms.ip = mode_lib->ip;
+ mode_lib->ms.policy = mode_lib->policy;
+ mode_lib->ms.state = dml_get_soc_state_bounding_box(&mode_lib->states, state_idx);
+ mode_lib->ms.max_state = dml_get_soc_state_bounding_box(&mode_lib->states, mode_lib->states.num_states - 1);
+}
+
+static void cache_display_cfg(struct display_mode_lib_st *mode_lib,
+ const struct dml_display_cfg_st *display_cfg)
+{
+ mode_lib->ms.cache_display_cfg = *display_cfg;
+}
+
+static void fetch_socbb_params(struct display_mode_lib_st *mode_lib)
+{
+ struct soc_state_bounding_box_st *state = &mode_lib->ms.state;
+
+ // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step
+ // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support
+ mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz;
+ mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts;
+ mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz;
+ mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz;
+}
+
+/// @brief Use display_cfg directly for mode_support calculation
+/// Calculated values and informational output are stored in mode_lib.vba data struct
+/// The display configuration is described with pipes struct and num_pipes
+/// This function is used when physical resource mapping is not finalized (for example,
+/// don't know how many pipes to represent a surface)
+/// @param mode_lib Contains the bounding box and policy setting.
+/// @param state_idx Power state index
+/// @param display_cfg Display configurations. A display
+dml_bool_t dml_mode_support(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg)
+{
+ dml_bool_t is_mode_support;
+
+ dml_print("DML::%s: ------------- START ----------\n", __func__);
+ cache_ip_soc_cfg(mode_lib, state_idx);
+ cache_display_cfg(mode_lib, display_cfg);
+
+ fetch_socbb_params(mode_lib);
+
+ dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
+
+ is_mode_support = dml_core_mode_support(mode_lib);
+
+ dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support);
+ dml_print("DML::%s: ------------- DONE ----------\n", __func__);
+ return is_mode_support;
+}
+
+/// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported)
+/// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions
+/// Calculated values include: watermarks, dlg, rq reg, different clock frequency
+/// This function returns 1 when there is no error.
+/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK
+/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values.
+/// @param state_idx Power state idx chosen
+/// @param display_cfg Display Configuration
+/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming
+/// TODO: Add clk_cfg input, could be useful for standalone mode
+dml_bool_t dml_mode_programming(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg,
+ bool call_standalone)
+{
+ struct dml_clk_cfg_st clk_cfg;
+ memset(&clk_cfg, 0, sizeof(clk_cfg));
+
+ clk_cfg.dcfclk_option = dml_use_required_freq;
+ clk_cfg.dispclk_option = dml_use_required_freq;
+ for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k)
+ clk_cfg.dppclk_option[k] = dml_use_required_freq;
+
+ dml_print("DML::%s: ------------- START ----------\n", __func__);
+ dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
+ dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone);
+
+ cache_ip_soc_cfg(mode_lib, state_idx);
+ cache_display_cfg(mode_lib, display_cfg);
+
+ fetch_socbb_params(mode_lib);
+ if (call_standalone) {
+ mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine
+ dml_core_mode_support_partial(mode_lib);
+ }
+
+ dml_core_mode_programming(mode_lib, &clk_cfg);
+
+ dml_print("DML::%s: ------------- DONE ----------\n", __func__);
+ dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
+ return mode_lib->mp.PrefetchAndImmediateFlipSupported;
+}
+
+static dml_uint_t mode_support_pwr_states(
+ dml_uint_t *lowest_state_idx,
+ struct display_mode_lib_st *mode_lib,
+ const struct dml_display_cfg_st *display_cfg,
+ dml_uint_t start_state_idx,
+ dml_uint_t end_state_idx)
+{
+ dml_uint_t state_idx = 0;
+ dml_bool_t mode_is_supported = 0;
+ *lowest_state_idx = end_state_idx;
+
+ if (end_state_idx < start_state_idx)
+ ASSERT(0);
+
+ if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based
+ ASSERT(0);
+
+ for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) {
+ if (dml_mode_support(mode_lib, state_idx, display_cfg)) {
+ dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx);
+ mode_is_supported = 1;
+ *lowest_state_idx = state_idx;
+ break;
+ }
+ }
+
+ return mode_is_supported;
+}
+
+dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params)
+{
+ dml_uint_t result;
+
+ result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx,
+ in_out_params->mode_lib,
+ in_out_params->in_display_cfg,
+ in_out_params->in_start_state_idx,
+ in_out_params->mode_lib->states.num_states - 1);
+
+ if (result)
+ *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
+
+ return result;
+}
+
+dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
+{
+ dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
+ dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]);
+ return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe);
+}
+
+#define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \
+{ \
+ dml_uint_t plane_idx; \
+ plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \
+ return (type) interval_var[plane_idx]; \
+}
+
+#define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \
+{ \
+ return (type) internal_var; \
+}
+
+dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark);
+dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark);
+dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency);
+dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark);
+dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark);
+dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
+dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark);
+dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth);
+dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
+dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency);
+dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep);
+dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
+dml_get_var_func(wm_writeback_urgent, dml_float_t, mode_lib->mp.Watermark.WritebackUrgentWatermark);
+dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency);
+dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
+dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency);
+dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
+dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod);
+dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase);
+dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
+dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase);
+dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency);
+dml_get_var_func(fclk_change_latency, dml_float_t, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
+dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated);
+dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth);
+dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW);
+dml_get_var_func(return_dram_bw, dml_float_t, mode_lib->ms.ReturnDRAMBW);
+dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc);
+dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte);
+dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes);
+dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes);
+dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes);
+dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes);
+dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes);
+dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes);
+
+dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency
+dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated);
+dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated);
+dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank);
+dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY);
+dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC);
+dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler);
+dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler);
+dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank);
+dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank);
+dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch);
+dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip);
+dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip);
+dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L);
+dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C);
+dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L);
+dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C);
+dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank);
+dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip);
+dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank);
+dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip);
+dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm);
+dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
+dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime);
+dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip);
+dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip);
+dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma);
+dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma);
+dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes);
+dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes);
+dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY);
+dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC);
+dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height);
+dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma);
+dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear);
+dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma);
+dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height);
+dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma);
+
+dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup);
+dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix);
+dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix);
+dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix);
+dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC);
+dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START);
+dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY);
+dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC);
+dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen);
+dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL);
+dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock);
+dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock);
+dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock);
+dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock);
+dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock);
+dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock);
+dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported);
+dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE);
+dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE);
+dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow);
+dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte);
+dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h
new file mode 100644
index 000000000000..a38ed89c47a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_CORE_H__
+#define __DISPLAY_MODE_CORE_H__
+
+#include "display_mode_core_structs.h"
+
+struct display_mode_lib_st;
+
+dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib);
+void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib);
+void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg);
+
+void dml_core_get_row_heights(
+ dml_uint_t *dpte_row_height,
+ dml_uint_t *meta_row_height,
+ const struct display_mode_lib_st *mode_lib,
+ dml_bool_t is_plane1,
+ enum dml_source_format_class SourcePixelFormat,
+ enum dml_swizzle_mode SurfaceTiling,
+ enum dml_rotation_angle ScanDirection,
+ dml_uint_t pitch,
+ dml_uint_t GPUVMMinPageSizeKBytes);
+
+dml_float_t dml_get_return_bw_mbps_vm_only(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed);
+
+dml_float_t dml_get_return_bw_mbps(
+ const struct soc_bounding_box_st *soc,
+ dml_bool_t use_ideal_dram_bw_strobe,
+ dml_bool_t HostVMEnable,
+ dml_float_t DCFCLK,
+ dml_float_t FabricClock,
+ dml_float_t DRAMSpeed);
+
+dml_bool_t dml_mode_support(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg);
+
+dml_bool_t dml_mode_programming(
+ struct display_mode_lib_st *mode_lib,
+ dml_uint_t state_idx,
+ const struct dml_display_cfg_st *display_cfg,
+ bool call_standalone);
+
+dml_uint_t dml_mode_support_ex(
+ struct dml_mode_support_ex_params_st *in_out_params);
+
+dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx);
+
+#define dml_get_per_surface_var_decl(variable, type) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx)
+#define dml_get_var_decl(var, type) type dml_get_##var(struct display_mode_lib_st *mode_lib)
+
+dml_get_var_decl(wm_urgent, dml_float_t);
+dml_get_var_decl(wm_stutter_exit, dml_float_t);
+dml_get_var_decl(wm_stutter_enter_exit, dml_float_t);
+dml_get_var_decl(wm_memory_trip, dml_float_t);
+dml_get_var_decl(wm_dram_clock_change, dml_float_t);
+dml_get_var_decl(wm_z8_stutter_enter_exit, dml_float_t);
+dml_get_var_decl(wm_z8_stutter, dml_float_t);
+dml_get_var_decl(urgent_latency, dml_float_t);
+dml_get_var_decl(clk_dcf_deepsleep, dml_float_t);
+dml_get_var_decl(wm_fclk_change, dml_float_t);
+dml_get_var_decl(wm_usr_retraining, dml_float_t);
+dml_get_var_decl(urgent_latency, dml_float_t);
+
+dml_get_var_decl(wm_writeback_dram_clock_change, dml_float_t);
+dml_get_var_decl(wm_writeback_urgent, dml_float_t);
+dml_get_var_decl(stutter_efficiency_no_vblank, dml_float_t);
+dml_get_var_decl(stutter_efficiency, dml_float_t);
+dml_get_var_decl(stutter_efficiency_z8, dml_float_t);
+dml_get_var_decl(stutter_num_bursts_z8, dml_float_t);
+dml_get_var_decl(stutter_period, dml_float_t);
+dml_get_var_decl(stutter_efficiency_z8_bestcase, dml_float_t);
+dml_get_var_decl(stutter_num_bursts_z8_bestcase, dml_float_t);
+dml_get_var_decl(stutter_period_bestcase, dml_float_t);
+dml_get_var_decl(urgent_latency, dml_float_t);
+dml_get_var_decl(urgent_extra_latency, dml_float_t);
+dml_get_var_decl(fclk_change_latency, dml_float_t);
+dml_get_var_decl(nonurgent_latency, dml_float_t);
+dml_get_var_decl(dispclk_calculated, dml_float_t);
+dml_get_var_decl(total_data_read_bw, dml_float_t);
+dml_get_var_decl(return_bw, dml_float_t);
+dml_get_var_decl(return_dram_bw, dml_float_t);
+dml_get_var_decl(tcalc, dml_float_t);
+dml_get_var_decl(fraction_of_urgent_bandwidth, dml_float_t);
+dml_get_var_decl(fraction_of_urgent_bandwidth_imm_flip, dml_float_t);
+dml_get_var_decl(comp_buffer_size_kbytes, dml_uint_t);
+dml_get_var_decl(pixel_chunk_size_in_kbyte, dml_uint_t);
+dml_get_var_decl(alpha_pixel_chunk_size_in_kbyte, dml_uint_t);
+dml_get_var_decl(meta_chunk_size_in_kbyte, dml_uint_t);
+dml_get_var_decl(min_pixel_chunk_size_in_byte, dml_uint_t);
+dml_get_var_decl(min_meta_chunk_size_in_byte, dml_uint_t);
+dml_get_var_decl(total_immediate_flip_bytes, dml_uint_t);
+
+dml_get_per_surface_var_decl(dsc_delay, dml_uint_t);
+dml_get_per_surface_var_decl(dppclk_calculated, dml_float_t);
+dml_get_per_surface_var_decl(dscclk_calculated, dml_float_t);
+dml_get_per_surface_var_decl(min_ttu_vblank_in_us, dml_float_t);
+dml_get_per_surface_var_decl(vratio_prefetch_l, dml_float_t);
+dml_get_per_surface_var_decl(vratio_prefetch_c, dml_float_t);
+dml_get_per_surface_var_decl(dst_x_after_scaler, dml_uint_t);
+dml_get_per_surface_var_decl(dst_y_after_scaler, dml_uint_t);
+dml_get_per_surface_var_decl(dst_y_per_vm_vblank, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_row_vblank, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_prefetch, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_vm_flip, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_row_flip, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_l, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_pte_row_nom_c, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_l, dml_float_t);
+dml_get_per_surface_var_decl(dst_y_per_meta_row_nom_c, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_group_vblank_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_group_flip_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_req_vblank_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_req_flip_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_vm_dmdata_in_us, dml_float_t);
+dml_get_per_surface_var_decl(dmdata_dl_delta_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_line_delivery_pre_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_req_delivery_pre_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_nom_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_vblank_c_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_l_in_us, dml_float_t);
+dml_get_per_surface_var_decl(refcyc_per_pte_group_flip_c_in_us, dml_float_t);
+
+dml_get_per_surface_var_decl(dpte_group_size_in_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(vm_group_size_in_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(swath_height_l, dml_uint_t);
+dml_get_per_surface_var_decl(swath_height_c, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_l, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_c, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_linear_l, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_row_height_linear_c, dml_uint_t);
+dml_get_per_surface_var_decl(meta_row_height_l, dml_uint_t);
+dml_get_per_surface_var_decl(meta_row_height_c, dml_uint_t);
+dml_get_per_surface_var_decl(vstartup_calculated, dml_uint_t);
+dml_get_per_surface_var_decl(vupdate_offset, dml_uint_t);
+dml_get_per_surface_var_decl(vupdate_width, dml_uint_t);
+dml_get_per_surface_var_decl(vready_offset, dml_uint_t);
+dml_get_per_surface_var_decl(vready_at_or_after_vsync, dml_uint_t);
+dml_get_per_surface_var_decl(min_dst_y_next_start, dml_uint_t);
+dml_get_per_surface_var_decl(det_stored_buffer_size_l_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(det_stored_buffer_size_c_bytes, dml_uint_t);
+dml_get_per_surface_var_decl(use_mall_for_static_screen, dml_uint_t);
+dml_get_per_surface_var_decl(surface_size_for_mall, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_uncompressed_block_l, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_uncompressed_block_c, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_compressed_block_l, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_max_compressed_block_c, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_independent_block_l, dml_uint_t);
+dml_get_per_surface_var_decl(dcc_independent_block_c, dml_uint_t);
+dml_get_per_surface_var_decl(max_active_dram_clock_change_latency_supported, dml_uint_t);
+dml_get_per_surface_var_decl(pte_buffer_mode, dml_uint_t);
+dml_get_per_surface_var_decl(bigk_fragment_size, dml_uint_t);
+dml_get_per_surface_var_decl(dpte_bytes_per_row, dml_uint_t);
+dml_get_per_surface_var_decl(meta_bytes_per_row, dml_uint_t);
+dml_get_per_surface_var_decl(det_buffer_size_kbytes, dml_uint_t);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h
new file mode 100644
index 000000000000..dbeb08466092
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h
@@ -0,0 +1,2033 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_CORE_STRUCT_H__
+#define __DISPLAY_MODE_CORE_STRUCT_H__
+
+#include "display_mode_lib_defines.h"
+#include "dml_top_display_cfg_types.h"
+
+enum dml_project_id {
+ dml_project_invalid = 0,
+ dml_project_default = 1,
+ dml_project_dcn32 = dml_project_default,
+ dml_project_dcn321 = 2,
+ dml_project_dcn35 = 3,
+ dml_project_dcn351 = 4,
+ dml_project_dcn401 = 5,
+ dml_project_dcn36 = 6,
+};
+enum dml_prefetch_modes {
+ dml_prefetch_support_uclk_fclk_and_stutter_if_possible = 0,
+ dml_prefetch_support_uclk_fclk_and_stutter = 1,
+ dml_prefetch_support_fclk_and_stutter = 2,
+ dml_prefetch_support_stutter = 3,
+ dml_prefetch_support_none = 4
+};
+enum dml_use_mall_for_pstate_change_mode {
+ dml_use_mall_pstate_change_disable = 0,
+ dml_use_mall_pstate_change_full_frame = 1,
+ dml_use_mall_pstate_change_sub_viewport = 2,
+ dml_use_mall_pstate_change_phantom_pipe = 3,
+ dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4,
+ dml_use_mall_pstate_change_imall = 5
+};
+enum dml_use_mall_for_static_screen_mode {
+ dml_use_mall_static_screen_disable = 0,
+ dml_use_mall_static_screen_enable = 1,
+ dml_use_mall_static_screen_optimize = 2
+};
+enum dml_output_encoder_class {
+ dml_dp = 0,
+ dml_edp = 1,
+ dml_dp2p0 = 2,
+ dml_hdmi = 3,
+ dml_hdmifrl = 4,
+ dml_none = 5
+};
+enum dml_output_link_dp_rate{
+ dml_dp_rate_na = 0,
+ dml_dp_rate_hbr = 1,
+ dml_dp_rate_hbr2 = 2,
+ dml_dp_rate_hbr3 = 3,
+ dml_dp_rate_uhbr10 = 4,
+ dml_dp_rate_uhbr13p5 = 5,
+ dml_dp_rate_uhbr20 = 6
+};
+enum dml_output_type_and_rate__type{
+ dml_output_type_unknown = 0,
+ dml_output_type_dp = 1,
+ dml_output_type_edp = 2,
+ dml_output_type_dp2p0 = 3,
+ dml_output_type_hdmi = 4,
+ dml_output_type_hdmifrl = 5
+};
+enum dml_output_type_and_rate__rate {
+ dml_output_rate_unknown = 0,
+ dml_output_rate_dp_rate_hbr = 1,
+ dml_output_rate_dp_rate_hbr2 = 2,
+ dml_output_rate_dp_rate_hbr3 = 3,
+ dml_output_rate_dp_rate_uhbr10 = 4,
+ dml_output_rate_dp_rate_uhbr13p5 = 5,
+ dml_output_rate_dp_rate_uhbr20 = 6,
+ dml_output_rate_hdmi_rate_3x3 = 7,
+ dml_output_rate_hdmi_rate_6x3 = 8,
+ dml_output_rate_hdmi_rate_6x4 = 9,
+ dml_output_rate_hdmi_rate_8x4 = 10,
+ dml_output_rate_hdmi_rate_10x4 = 11,
+ dml_output_rate_hdmi_rate_12x4 = 12
+};
+enum dml_output_format_class {
+ dml_444 = 0,
+ dml_s422 = 1,
+ dml_n422 = 2,
+ dml_420 = 3
+};
+enum dml_source_format_class {
+ dml_444_8 = 0,
+ dml_444_16 = 1,
+ dml_444_32 = 2,
+ dml_444_64 = 3,
+ dml_420_8 = 4,
+ dml_420_10 = 5,
+ dml_420_12 = 6,
+ dml_422_8 = 7,
+ dml_422_10 = 8,
+ dml_rgbe_alpha = 9,
+ dml_rgbe = 10,
+ dml_mono_8 = 11,
+ dml_mono_16 = 12
+};
+enum dml_output_bpc_class {
+ dml_out_6 = 0,
+ dml_out_8 = 1,
+ dml_out_10 = 2,
+ dml_out_12 = 3,
+ dml_out_16 = 4
+};
+enum dml_output_standard_class {
+ dml_std_cvt = 0,
+ dml_std_cea = 1,
+ dml_std_cvtr2 = 2
+};
+enum dml_rotation_angle {
+ dml_rotation_0 = 0,
+ dml_rotation_90 = 1,
+ dml_rotation_180 = 2,
+ dml_rotation_270 = 3,
+ dml_rotation_0m = 4,
+ dml_rotation_90m = 5,
+ dml_rotation_180m = 6,
+ dml_rotation_270m = 7
+};
+enum dml_swizzle_mode {
+ dml_sw_linear = 0,
+ dml_sw_256b_s = 1,
+ dml_sw_256b_d = 2,
+ dml_sw_256b_r = 3,
+ dml_sw_4kb_z = 4,
+ dml_sw_4kb_s = 5,
+ dml_sw_4kb_d = 6,
+ dml_sw_4kb_r = 7,
+ dml_sw_64kb_z = 8,
+ dml_sw_64kb_s = 9,
+ dml_sw_64kb_d = 10,
+ dml_sw_64kb_r = 11,
+ dml_sw_256kb_z = 12,
+ dml_sw_256kb_s = 13,
+ dml_sw_256kb_d = 14,
+ dml_sw_256kb_r = 15,
+ dml_sw_64kb_z_t = 16,
+ dml_sw_64kb_s_t = 17,
+ dml_sw_64kb_d_t = 18,
+ dml_sw_64kb_r_t = 19,
+ dml_sw_4kb_z_x = 20,
+ dml_sw_4kb_s_x = 21,
+ dml_sw_4kb_d_x = 22,
+ dml_sw_4kb_r_x = 23,
+ dml_sw_64kb_z_x = 24,
+ dml_sw_64kb_s_x = 25,
+ dml_sw_64kb_d_x = 26,
+ dml_sw_64kb_r_x = 27,
+ dml_sw_256kb_z_x = 28,
+ dml_sw_256kb_s_x = 29,
+ dml_sw_256kb_d_x = 30,
+ dml_sw_256kb_r_x = 31,
+ dml_sw_256b_2d = 32,
+ dml_sw_4kb_2d = 33,
+ dml_sw_64kb_2d = 34,
+ dml_sw_256kb_2d = 35
+};
+enum dml_lb_depth {
+ dml_lb_6 = 0,
+ dml_lb_8 = 1,
+ dml_lb_10 = 2,
+ dml_lb_12 = 3,
+ dml_lb_16 = 4
+};
+enum dml_voltage_state {
+ dml_vmin_lv = 0,
+ dml_vmin = 1,
+ dml_vmid = 2,
+ dml_vnom = 3,
+ dml_vmax = 4
+};
+enum dml_source_macro_tile_size {
+ dml_4k_tile = 0,
+ dml_64k_tile = 1,
+ dml_256k_tile = 2
+};
+enum dml_cursor_bpp {
+ dml_cur_2bit = 0,
+ dml_cur_32bit = 1,
+ dml_cur_64bit = 2
+};
+enum dml_dram_clock_change_support {
+ dml_dram_clock_change_vactive = 0,
+ dml_dram_clock_change_vblank = 1,
+ dml_dram_clock_change_vblank_drr = 2,
+ dml_dram_clock_change_vactive_w_mall_full_frame = 3,
+ dml_dram_clock_change_vactive_w_mall_sub_vp = 4,
+ dml_dram_clock_change_vblank_w_mall_full_frame = 5,
+ dml_dram_clock_change_vblank_drr_w_mall_full_frame = 6,
+ dml_dram_clock_change_vblank_w_mall_sub_vp = 7,
+ dml_dram_clock_change_vblank_drr_w_mall_sub_vp = 8,
+ dml_dram_clock_change_unsupported = 9
+};
+enum dml_fclock_change_support {
+ dml_fclock_change_vactive = 0,
+ dml_fclock_change_vblank = 1,
+ dml_fclock_change_unsupported = 2
+};
+enum dml_dsc_enable {
+ dml_dsc_disable = 0,
+ dml_dsc_enable = 1,
+ dml_dsc_enable_if_necessary = 2
+};
+enum dml_mpc_use_policy {
+ dml_mpc_disabled = 0,
+ dml_mpc_as_possible = 1,
+ dml_mpc_as_needed_for_voltage = 2,
+ dml_mpc_as_needed_for_pstate_and_voltage = 3,
+ dml_mpc_as_needed = 4,
+ dml_mpc_2to1 = 5
+};
+enum dml_odm_use_policy {
+ dml_odm_use_policy_bypass = 0,
+ dml_odm_use_policy_combine_as_needed = 1,
+ dml_odm_use_policy_combine_2to1 = 2,
+ dml_odm_use_policy_combine_3to1 = 3,
+ dml_odm_use_policy_combine_4to1 = 4,
+ dml_odm_use_policy_split_1to2 = 5,
+ dml_odm_use_policy_mso_1to2 = 6,
+ dml_odm_use_policy_mso_1to4 = 7
+};
+enum dml_odm_mode {
+ dml_odm_mode_bypass = 0,
+ dml_odm_mode_combine_2to1 = 1,
+ dml_odm_mode_combine_3to1 = 2,
+ dml_odm_mode_combine_4to1 = 3,
+ dml_odm_mode_split_1to2 = 4,
+ dml_odm_mode_mso_1to2 = 5,
+ dml_odm_mode_mso_1to4 = 6
+};
+enum dml_writeback_configuration {
+ dml_whole_buffer_for_single_stream_no_interleave = 0,
+ dml_whole_buffer_for_single_stream_interleave = 1
+};
+enum dml_immediate_flip_requirement {
+ dml_immediate_flip_not_required = 0,
+ dml_immediate_flip_required = 1,
+ dml_immediate_flip_if_possible = 2
+};
+enum dml_unbounded_requesting_policy {
+ dml_unbounded_requesting_enable = 0,
+ dml_unbounded_requesting_edp_only = 1,
+ dml_unbounded_requesting_disable = 2
+};
+enum dml_clk_cfg_policy {
+ dml_use_required_freq = 0,
+ dml_use_override_freq = 1,
+ dml_use_state_freq = 2
+};
+
+
+struct soc_state_bounding_box_st {
+ dml_float_t socclk_mhz;
+ dml_float_t dscclk_mhz;
+ dml_float_t phyclk_mhz;
+ dml_float_t phyclk_d18_mhz;
+ dml_float_t phyclk_d32_mhz;
+ dml_float_t dtbclk_mhz;
+ dml_float_t fabricclk_mhz;
+ dml_float_t dcfclk_mhz;
+ dml_float_t dispclk_mhz;
+ dml_float_t dppclk_mhz;
+ dml_float_t dram_speed_mts;
+ dml_float_t urgent_latency_pixel_data_only_us;
+ dml_float_t urgent_latency_pixel_mixed_with_vm_data_us;
+ dml_float_t urgent_latency_vm_data_only_us;
+ dml_float_t writeback_latency_us;
+ dml_float_t urgent_latency_adjustment_fabric_clock_component_us;
+ dml_float_t urgent_latency_adjustment_fabric_clock_reference_mhz;
+ dml_float_t sr_exit_time_us;
+ dml_float_t sr_enter_plus_exit_time_us;
+ dml_float_t sr_exit_z8_time_us;
+ dml_float_t sr_enter_plus_exit_z8_time_us;
+ dml_float_t dram_clock_change_latency_us;
+ dml_float_t fclk_change_latency_us;
+ dml_float_t usr_retraining_latency_us;
+ dml_bool_t use_ideal_dram_bw_strobe;
+ dml_float_t g6_temp_read_blackout_us;
+
+ struct {
+ dml_uint_t urgent_ramp_uclk_cycles;
+ dml_uint_t trip_to_memory_uclk_cycles;
+ dml_uint_t meta_trip_to_memory_uclk_cycles;
+ dml_uint_t maximum_latency_when_urgent_uclk_cycles;
+ dml_uint_t average_latency_when_urgent_uclk_cycles;
+ dml_uint_t maximum_latency_when_non_urgent_uclk_cycles;
+ dml_uint_t average_latency_when_non_urgent_uclk_cycles;
+ } dml_dcn401_uclk_dpm_dependent_soc_qos_params;
+};
+
+struct soc_bounding_box_st {
+ dml_float_t dprefclk_mhz;
+ dml_float_t xtalclk_mhz;
+ dml_float_t pcierefclk_mhz;
+ dml_float_t refclk_mhz;
+ dml_float_t amclk_mhz;
+ dml_uint_t max_outstanding_reqs;
+ dml_float_t pct_ideal_sdp_bw_after_urgent;
+ dml_float_t pct_ideal_fabric_bw_after_urgent;
+ dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only;
+ dml_float_t pct_ideal_dram_bw_after_urgent_pixel_and_vm;
+ dml_float_t pct_ideal_dram_bw_after_urgent_vm_only;
+ dml_float_t pct_ideal_dram_bw_after_urgent_strobe;
+ dml_float_t max_avg_sdp_bw_use_normal_percent;
+ dml_float_t max_avg_fabric_bw_use_normal_percent;
+ dml_float_t max_avg_dram_bw_use_normal_percent;
+ dml_float_t max_avg_dram_bw_use_normal_strobe_percent;
+
+ dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent;
+ dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only;
+ dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent;
+ dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent;
+ dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent;
+
+ dml_uint_t round_trip_ping_latency_dcfclk_cycles;
+ dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ dml_uint_t urgent_out_of_order_return_per_channel_vm_only_bytes;
+ dml_uint_t num_chans;
+ dml_uint_t return_bus_width_bytes;
+ dml_uint_t dram_channel_width_bytes;
+ dml_uint_t fabric_datapath_to_dcn_data_return_bytes;
+ dml_uint_t hostvm_min_page_size_kbytes;
+ dml_uint_t gpuvm_min_page_size_kbytes;
+ dml_float_t phy_downspread_percent;
+ dml_float_t dcn_downspread_percent;
+ dml_float_t smn_latency_us;
+ dml_uint_t mall_allocated_for_dcn_mbytes;
+ dml_float_t dispclk_dppclk_vco_speed_mhz;
+ dml_bool_t do_urgent_latency_adjustment;
+
+ dml_uint_t mem_word_bytes;
+ dml_uint_t num_dcc_mcaches;
+ dml_uint_t mcache_size_bytes;
+ dml_uint_t mcache_line_size_bytes;
+
+ struct {
+ dml_bool_t UseNewDCN401SOCParameters;
+ dml_uint_t df_qos_response_time_fclk_cycles;
+ dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles;
+ dml_uint_t mall_overhead_fclk_cycles;
+ dml_uint_t meta_trip_adder_fclk_cycles;
+ dml_uint_t average_transport_distance_fclk_cycles;
+ dml_float_t umc_urgent_ramp_latency_margin;
+ dml_float_t umc_max_latency_margin;
+ dml_float_t umc_average_latency_margin;
+ dml_float_t fabric_max_transport_latency_margin;
+ dml_float_t fabric_average_transport_latency_margin;
+ } dml_dcn401_soc_qos_params;
+
+};
+
+struct ip_params_st {
+ dml_uint_t vblank_nom_default_us;
+ dml_uint_t rob_buffer_size_kbytes;
+ dml_uint_t config_return_buffer_size_in_kbytes;
+ dml_uint_t config_return_buffer_segment_size_in_kbytes;
+ dml_uint_t compressed_buffer_segment_size_in_kbytes;
+ dml_uint_t meta_fifo_size_in_kentries;
+ dml_uint_t zero_size_buffer_entries;
+ dml_uint_t dpte_buffer_size_in_pte_reqs_luma;
+ dml_uint_t dpte_buffer_size_in_pte_reqs_chroma;
+ dml_uint_t dcc_meta_buffer_size_bytes;
+ dml_bool_t gpuvm_enable;
+ dml_bool_t hostvm_enable;
+ dml_uint_t gpuvm_max_page_table_levels;
+ dml_uint_t hostvm_max_page_table_levels;
+ dml_uint_t pixel_chunk_size_kbytes;
+ dml_uint_t alpha_pixel_chunk_size_kbytes;
+ dml_uint_t min_pixel_chunk_size_bytes;
+ dml_uint_t meta_chunk_size_kbytes;
+ dml_uint_t min_meta_chunk_size_bytes;
+ dml_uint_t writeback_chunk_size_kbytes;
+ dml_uint_t line_buffer_size_bits;
+ dml_uint_t max_line_buffer_lines;
+ dml_uint_t writeback_interface_buffer_size_kbytes;
+ dml_uint_t max_num_dpp;
+ dml_uint_t max_num_otg;
+ dml_uint_t max_num_wb;
+ dml_uint_t max_dchub_pscl_bw_pix_per_clk;
+ dml_uint_t max_pscl_lb_bw_pix_per_clk;
+ dml_uint_t max_lb_vscl_bw_pix_per_clk;
+ dml_uint_t max_vscl_hscl_bw_pix_per_clk;
+ dml_float_t max_hscl_ratio;
+ dml_float_t max_vscl_ratio;
+ dml_uint_t max_hscl_taps;
+ dml_uint_t max_vscl_taps;
+ dml_uint_t num_dsc;
+ dml_uint_t maximum_dsc_bits_per_component;
+ dml_uint_t maximum_pixels_per_line_per_dsc_unit;
+ dml_bool_t dsc422_native_support;
+ dml_bool_t cursor_64bpp_support;
+ dml_float_t dispclk_ramp_margin_percent;
+ dml_uint_t dppclk_delay_subtotal;
+ dml_uint_t dppclk_delay_scl;
+ dml_uint_t dppclk_delay_scl_lb_only;
+ dml_uint_t dppclk_delay_cnvc_formatter;
+ dml_uint_t dppclk_delay_cnvc_cursor;
+ dml_uint_t cursor_buffer_size;
+ dml_uint_t cursor_chunk_size;
+ dml_uint_t dispclk_delay_subtotal;
+ dml_bool_t dynamic_metadata_vm_enabled;
+ dml_uint_t max_inter_dcn_tile_repeaters;
+ dml_uint_t max_num_hdmi_frl_outputs;
+ dml_uint_t max_num_dp2p0_outputs;
+ dml_uint_t max_num_dp2p0_streams;
+ dml_bool_t dcc_supported;
+ dml_bool_t ptoi_supported;
+ dml_float_t writeback_max_hscl_ratio;
+ dml_float_t writeback_max_vscl_ratio;
+ dml_float_t writeback_min_hscl_ratio;
+ dml_float_t writeback_min_vscl_ratio;
+ dml_uint_t writeback_max_hscl_taps;
+ dml_uint_t writeback_max_vscl_taps;
+ dml_uint_t writeback_line_buffer_buffer_size;
+};
+
+struct DmlPipe {
+ dml_float_t Dppclk;
+ dml_float_t Dispclk;
+ dml_float_t PixelClock;
+ dml_float_t DCFClkDeepSleep;
+ dml_uint_t DPPPerSurface;
+ dml_bool_t ScalerEnabled;
+ enum dml_rotation_angle SourceScan;
+ dml_uint_t ViewportHeight;
+ dml_uint_t ViewportHeightChroma;
+ dml_uint_t BlockWidth256BytesY;
+ dml_uint_t BlockHeight256BytesY;
+ dml_uint_t BlockWidth256BytesC;
+ dml_uint_t BlockHeight256BytesC;
+ dml_uint_t BlockWidthY;
+ dml_uint_t BlockHeightY;
+ dml_uint_t BlockWidthC;
+ dml_uint_t BlockHeightC;
+ dml_uint_t InterlaceEnable;
+ dml_uint_t NumberOfCursors;
+ dml_uint_t VBlank;
+ dml_uint_t HTotal;
+ dml_uint_t HActive;
+ dml_bool_t DCCEnable;
+ enum dml_odm_mode ODMMode;
+ enum dml_source_format_class SourcePixelFormat;
+ enum dml_swizzle_mode SurfaceTiling;
+ dml_uint_t BytePerPixelY;
+ dml_uint_t BytePerPixelC;
+ dml_bool_t ProgressiveToInterlaceUnitInOPP;
+ dml_float_t VRatio;
+ dml_float_t VRatioChroma;
+ dml_uint_t VTaps;
+ dml_uint_t VTapsChroma;
+ dml_uint_t PitchY;
+ dml_uint_t DCCMetaPitchY;
+ dml_uint_t PitchC;
+ dml_uint_t DCCMetaPitchC;
+ dml_bool_t ViewportStationary;
+ dml_uint_t ViewportXStart;
+ dml_uint_t ViewportYStart;
+ dml_uint_t ViewportXStartC;
+ dml_uint_t ViewportYStartC;
+ dml_bool_t FORCE_ONE_ROW_FOR_FRAME;
+ dml_uint_t SwathHeightY;
+ dml_uint_t SwathHeightC;
+};
+
+struct Watermarks {
+ dml_float_t UrgentWatermark;
+ dml_float_t WritebackUrgentWatermark;
+ dml_float_t DRAMClockChangeWatermark;
+ dml_float_t FCLKChangeWatermark;
+ dml_float_t WritebackDRAMClockChangeWatermark;
+ dml_float_t WritebackFCLKChangeWatermark;
+ dml_float_t StutterExitWatermark;
+ dml_float_t StutterEnterPlusExitWatermark;
+ dml_float_t Z8StutterExitWatermark;
+ dml_float_t Z8StutterEnterPlusExitWatermark;
+ dml_float_t USRRetrainingWatermark;
+};
+
+struct SOCParametersList {
+ dml_float_t UrgentLatency;
+ dml_float_t ExtraLatency;
+ dml_float_t WritebackLatency;
+ dml_float_t DRAMClockChangeLatency;
+ dml_float_t FCLKChangeLatency;
+ dml_float_t SRExitTime;
+ dml_float_t SREnterPlusExitTime;
+ dml_float_t SRExitZ8Time;
+ dml_float_t SREnterPlusExitZ8Time;
+ dml_float_t USRRetrainingLatency;
+ dml_float_t SMNLatency;
+};
+
+/// @brief Struct that represent Plane configration of a display cfg
+struct dml_plane_cfg_st {
+ //
+ // Pipe/Surface Parameters
+ //
+ dml_bool_t GPUVMEnable; /// <brief Set if any pipe has GPUVM enable
+ dml_bool_t HostVMEnable; /// <brief Set if any pipe has HostVM enable
+
+ dml_uint_t GPUVMMaxPageTableLevels; /// <brief GPUVM level; max of all pipes'
+ dml_uint_t HostVMMaxPageTableLevels; /// <brief HostVM level; max of all pipes'; that is the number of non-cache HVM level
+
+ dml_uint_t GPUVMMinPageSizeKBytes[__DML_NUM_PLANES__];
+ dml_bool_t ForceOneRowForFrame[__DML_NUM_PLANES__];
+ dml_bool_t PTEBufferModeOverrideEn[__DML_NUM_PLANES__]; //< brief when override enable; the DML will only check the given pte buffer and will use the pte buffer mode as is
+ dml_bool_t PTEBufferMode[__DML_NUM_PLANES__];
+ dml_uint_t ViewportWidth[__DML_NUM_PLANES__];
+ dml_uint_t ViewportHeight[__DML_NUM_PLANES__];
+ dml_uint_t ViewportWidthChroma[__DML_NUM_PLANES__];
+ dml_uint_t ViewportHeightChroma[__DML_NUM_PLANES__];
+ dml_uint_t ViewportXStart[__DML_NUM_PLANES__];
+ dml_uint_t ViewportXStartC[__DML_NUM_PLANES__];
+ dml_uint_t ViewportYStart[__DML_NUM_PLANES__];
+ dml_uint_t ViewportYStartC[__DML_NUM_PLANES__];
+ dml_bool_t ViewportStationary[__DML_NUM_PLANES__];
+
+ dml_bool_t ScalerEnabled[__DML_NUM_PLANES__];
+ dml_float_t HRatio[__DML_NUM_PLANES__];
+ dml_float_t VRatio[__DML_NUM_PLANES__];
+ dml_float_t HRatioChroma[__DML_NUM_PLANES__];
+ dml_float_t VRatioChroma[__DML_NUM_PLANES__];
+ dml_uint_t HTaps[__DML_NUM_PLANES__];
+ dml_uint_t VTaps[__DML_NUM_PLANES__];
+ dml_uint_t HTapsChroma[__DML_NUM_PLANES__];
+ dml_uint_t VTapsChroma[__DML_NUM_PLANES__];
+ dml_uint_t LBBitPerPixel[__DML_NUM_PLANES__];
+
+ enum dml_rotation_angle SourceScan[__DML_NUM_PLANES__];
+ dml_uint_t ScalerRecoutWidth[__DML_NUM_PLANES__];
+
+ dml_bool_t DynamicMetadataEnable[__DML_NUM_PLANES__];
+ dml_uint_t DynamicMetadataLinesBeforeActiveRequired[__DML_NUM_PLANES__];
+ dml_uint_t DynamicMetadataTransmittedBytes[__DML_NUM_PLANES__];
+ dml_uint_t DETSizeOverride[__DML_NUM_PLANES__]; /// <brief user can specify the desire DET buffer usage per-plane
+
+ dml_uint_t NumberOfCursors[__DML_NUM_PLANES__];
+ dml_uint_t CursorWidth[__DML_NUM_PLANES__];
+ dml_uint_t CursorBPP[__DML_NUM_PLANES__];
+
+ dml_bool_t setup_for_tdlut[__DML_NUM_PLANES__];
+ enum dml2_tdlut_addressing_mode tdlut_addressing_mode[__DML_NUM_PLANES__];
+ enum dml2_tdlut_width_mode tdlut_width_mode[__DML_NUM_PLANES__];
+
+ enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[__DML_NUM_PLANES__];
+ enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[__DML_NUM_PLANES__];
+
+ dml_uint_t BlendingAndTiming[__DML_NUM_PLANES__]; /// <brief From which timing group (like OTG) that this plane is getting its timing from. Mode check also need this info for example to check num OTG; encoder; dsc etc.
+}; // dml_plane_cfg_st;
+
+/// @brief Surface Parameters
+struct dml_surface_cfg_st {
+ enum dml_swizzle_mode SurfaceTiling[__DML_NUM_PLANES__];
+ enum dml_source_format_class SourcePixelFormat[__DML_NUM_PLANES__];
+ dml_uint_t PitchY[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceWidthY[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceHeightY[__DML_NUM_PLANES__];
+ dml_uint_t PitchC[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceWidthC[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceHeightC[__DML_NUM_PLANES__];
+
+ dml_bool_t DCCEnable[__DML_NUM_PLANES__];
+ dml_uint_t DCCMetaPitchY[__DML_NUM_PLANES__];
+ dml_uint_t DCCMetaPitchC[__DML_NUM_PLANES__];
+
+ dml_float_t DCCRateLuma[__DML_NUM_PLANES__];
+ dml_float_t DCCRateChroma[__DML_NUM_PLANES__];
+ dml_float_t DCCFractionOfZeroSizeRequestsLuma[__DML_NUM_PLANES__];
+ dml_float_t DCCFractionOfZeroSizeRequestsChroma[__DML_NUM_PLANES__];
+}; // dml_surface_cfg_st
+
+/// @brief structure that represents the timing configuration
+struct dml_timing_cfg_st {
+ dml_uint_t HTotal[__DML_NUM_PLANES__];
+ dml_uint_t VTotal[__DML_NUM_PLANES__];
+ dml_uint_t HBlankEnd[__DML_NUM_PLANES__];
+ dml_uint_t VBlankEnd[__DML_NUM_PLANES__];
+ dml_uint_t RefreshRate[__DML_NUM_PLANES__];
+ dml_uint_t VFrontPorch[__DML_NUM_PLANES__];
+ dml_float_t PixelClock[__DML_NUM_PLANES__];
+ dml_uint_t HActive[__DML_NUM_PLANES__];
+ dml_uint_t VActive[__DML_NUM_PLANES__];
+ dml_bool_t Interlace[__DML_NUM_PLANES__];
+ dml_bool_t DRRDisplay[__DML_NUM_PLANES__];
+ dml_uint_t VBlankNom[__DML_NUM_PLANES__];
+}; // dml_timing_cfg_st;
+
+/// @brief structure that represents the output stream
+struct dml_output_cfg_st {
+ // Output Setting
+ dml_uint_t DSCInputBitPerComponent[__DML_NUM_PLANES__];
+ enum dml_output_format_class OutputFormat[__DML_NUM_PLANES__];
+ enum dml_output_encoder_class OutputEncoder[__DML_NUM_PLANES__];
+ dml_uint_t OutputMultistreamId[__DML_NUM_PLANES__];
+ dml_bool_t OutputMultistreamEn[__DML_NUM_PLANES__];
+ dml_float_t OutputBpp[__DML_NUM_PLANES__]; //< brief Use by mode_programming to specify a output bpp; user can use the output from mode_support (support.OutputBpp)
+ dml_float_t PixelClockBackEnd[__DML_NUM_PLANES__];
+ enum dml_dsc_enable DSCEnable[__DML_NUM_PLANES__]; //< brief for mode support check; use to determine if dsc is required
+ dml_uint_t OutputLinkDPLanes[__DML_NUM_PLANES__];
+ enum dml_output_link_dp_rate OutputLinkDPRate[__DML_NUM_PLANES__];
+ dml_float_t ForcedOutputLinkBPP[__DML_NUM_PLANES__];
+ dml_uint_t AudioSampleRate[__DML_NUM_PLANES__];
+ dml_uint_t AudioSampleLayout[__DML_NUM_PLANES__];
+ dml_bool_t OutputDisabled[__DML_NUM_PLANES__];
+ dml_uint_t DSCSlices[__DML_NUM_PLANES__];
+}; // dml_timing_cfg_st;
+
+/// @brief Writeback Setting
+struct dml_writeback_cfg_st {
+ enum dml_source_format_class WritebackPixelFormat[__DML_NUM_PLANES__];
+ dml_bool_t WritebackEnable[__DML_NUM_PLANES__];
+ dml_uint_t ActiveWritebacksPerSurface[__DML_NUM_PLANES__];
+ dml_uint_t WritebackDestinationWidth[__DML_NUM_PLANES__];
+ dml_uint_t WritebackDestinationHeight[__DML_NUM_PLANES__];
+ dml_uint_t WritebackSourceWidth[__DML_NUM_PLANES__];
+ dml_uint_t WritebackSourceHeight[__DML_NUM_PLANES__];
+ dml_uint_t WritebackHTaps[__DML_NUM_PLANES__];
+ dml_uint_t WritebackVTaps[__DML_NUM_PLANES__];
+ dml_float_t WritebackHRatio[__DML_NUM_PLANES__];
+ dml_float_t WritebackVRatio[__DML_NUM_PLANES__];
+}; // dml_writeback_cfg_st;
+
+/// @brief Hardware resource specific; mainly used by mode_programming when test/sw wants to do some specific setting
+/// which are not the same as what the mode support stage derive. When call mode_support with mode_programm; the hw-specific
+// resource will be set to what the mode_support layer recommends
+struct dml_hw_resource_st {
+ enum dml_odm_mode ODMMode[__DML_NUM_PLANES__]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage
+ dml_uint_t DPPPerSurface[__DML_NUM_PLANES__]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4.
+ dml_bool_t DSCEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the DSC is enabled; used in mode_programming
+ dml_uint_t NumberOfDSCSlices[__DML_NUM_PLANES__]; /// <brief Indicate how many slices needed to support the given mode
+ dml_float_t DLGRefClkFreqMHz; /// <brief DLG Global Reference timer
+};
+
+/// @brief To control the clk usage for model programming
+struct dml_clk_cfg_st {
+ enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
+ enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
+ enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__];
+
+ dml_float_t dcfclk_mhz;
+ dml_float_t dispclk_mhz;
+ dml_float_t dppclk_mhz[__DML_NUM_PLANES__];
+}; // dml_clk_cfg_st
+
+/// @brief DML display configuration.
+/// Describe how to display a surface in multi-plane setup and output to different output and writeback using the specified timgin
+struct dml_display_cfg_st {
+ struct dml_surface_cfg_st surface;
+ struct dml_plane_cfg_st plane;
+ struct dml_timing_cfg_st timing;
+ struct dml_output_cfg_st output;
+ struct dml_writeback_cfg_st writeback;
+ unsigned int num_surfaces;
+ unsigned int num_timings;
+
+ struct dml_hw_resource_st hw; //< brief for mode programming
+ struct dml_clk_cfg_st clk_overrides; //< brief for mode programming clk override
+}; // dml_display_cfg_st
+
+/// @brief DML mode evaluation and programming policy
+/// Those knobs that affect mode support and mode programming
+struct dml_mode_eval_policy_st {
+ // -------------------
+ // Policy
+ // -------------------
+ enum dml_mpc_use_policy MPCCombineUse[__DML_NUM_PLANES__]; /// <brief MPC Combine mode as selected by the user; used in mode check stage
+ enum dml_odm_use_policy ODMUse[__DML_NUM_PLANES__]; /// <brief ODM mode as selected by the user; used in mode check stage
+ enum dml_unbounded_requesting_policy UseUnboundedRequesting; ///< brief Unbounded request mode preference
+ enum dml_immediate_flip_requirement ImmediateFlipRequirement[__DML_NUM_PLANES__]; /// <brief Is immediate flip a requirement for this plane. When host vm is present iflip is needed regardless
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank[__DML_NUM_PLANES__]; /// <brief To specify if the DML should calculate the values for support different pwr saving features (cstate; pstate; etc.) during vblank
+
+ enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal;
+ bool UseOnlyMaxPrefetchModes;
+ dml_bool_t UseMinimumRequiredDCFCLK; //<brief When set the mode_check stage will figure the min DCFCLK freq to support the given display configuration. User can tell use the output DCFCLK for mode programming.
+ dml_bool_t DRAMClockChangeRequirementFinal;
+ dml_bool_t FCLKChangeRequirementFinal;
+ dml_bool_t USRRetrainingRequiredFinal;
+ dml_bool_t EnhancedPrefetchScheduleAccelerationFinal;
+
+ dml_bool_t NomDETInKByteOverrideEnable; //<brief Nomimal DET buffer size for a pipe. If this size fit the required 2 swathes; DML will use this DET size
+ dml_uint_t NomDETInKByteOverrideValue;
+
+ dml_bool_t DCCProgrammingAssumesScanDirectionUnknownFinal;
+ dml_bool_t SynchronizeTimingsFinal;
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ dml_bool_t AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported; //<brief if set; the mode support will say mode is supported even though the DRAM clock change is not support (assuming the soc will be stay in max power state)
+ dml_bool_t AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported; //<brief if set; the mode support will say mode is supported even though the Fabric clock change is not support (assuming the soc will be stay in max power state
+};
+
+/// @brief Contains important information after the mode support steps. Also why a mode is not supported.
+struct dml_mode_support_info_st {
+ //-----------------
+ // Mode Support Information
+ //-----------------
+ dml_bool_t ModeIsSupported; //<brief Is the mode support any voltage and combine setting
+ dml_bool_t ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming
+ dml_uint_t MaximumMPCCombine; //<brief If using MPC combine helps the power saving support; then this will be set to 1
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t CompressedBufferSizeInkByte;
+
+ /* Mode Support Reason */
+ dml_bool_t WritebackLatencySupport;
+ dml_bool_t ScaleRatioAndTapsSupport;
+ dml_bool_t SourceFormatPixelAndScanSupport;
+ dml_bool_t MPCCombineMethodIncompatible;
+ dml_bool_t P2IWith420;
+ dml_bool_t DSCOnlyIfNecessaryWithBPP;
+ dml_bool_t DSC422NativeNotSupported;
+ dml_bool_t LinkRateDoesNotMatchDPVersion;
+ dml_bool_t LinkRateForMultistreamNotIndicated;
+ dml_bool_t BPPForMultistreamNotIndicated;
+ dml_bool_t MultistreamWithHDMIOreDP;
+ dml_bool_t MSOOrODMSplitWithNonDPLink;
+ dml_bool_t NotEnoughLanesForMSO;
+ dml_bool_t NumberOfOTGSupport;
+ dml_bool_t NumberOfHDMIFRLSupport;
+ dml_bool_t NumberOfDP2p0Support;
+ dml_bool_t NonsupportedDSCInputBPC;
+ dml_bool_t WritebackScaleRatioAndTapsSupport;
+ dml_bool_t CursorSupport;
+ dml_bool_t PitchSupport;
+ dml_bool_t ViewportExceedsSurface;
+ dml_bool_t ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ dml_bool_t ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ dml_bool_t InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ dml_bool_t InvalidCombinationOfMALLUseForPState;
+ dml_bool_t ExceededMALLSize;
+ dml_bool_t EnoughWritebackUnits;
+
+ dml_bool_t ExceededMultistreamSlots;
+ dml_bool_t ODMCombineTwoToOneSupportCheckOK;
+ dml_bool_t ODMCombineFourToOneSupportCheckOK;
+ dml_bool_t NotEnoughDSCUnits;
+ dml_bool_t NotEnoughDSCSlices;
+ dml_bool_t PixelsPerLinePerDSCUnitSupport;
+ dml_bool_t DSCCLKRequiredMoreThanSupported;
+ dml_bool_t DTBCLKRequiredMoreThanSupported;
+ dml_bool_t LinkCapacitySupport;
+
+ dml_bool_t ROBSupport[2];
+ dml_bool_t PTEBufferSizeNotExceeded[2];
+ dml_bool_t DCCMetaBufferSizeNotExceeded[2];
+ dml_bool_t TotalVerticalActiveBandwidthSupport[2];
+ enum dml_dram_clock_change_support DRAMClockChangeSupport[2];
+ dml_float_t ActiveDRAMClockChangeLatencyMargin[__DML_NUM_PLANES__];
+ dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__];
+ enum dml_fclock_change_support FCLKChangeSupport[2];
+ dml_bool_t USRRetrainingSupport[2];
+ dml_bool_t VActiveBandwithSupport[2];
+ dml_bool_t PrefetchSupported[2];
+ dml_bool_t DynamicMetadataSupported[2];
+ dml_bool_t VRatioInPrefetchSupported[2];
+ dml_bool_t DISPCLK_DPPCLK_Support[2];
+ dml_bool_t TotalAvailablePipesSupport[2];
+ dml_bool_t ModeSupport[2];
+ dml_bool_t ViewportSizeSupport[2];
+ dml_bool_t ImmediateFlipSupportedForState[2];
+
+ dml_bool_t NoTimeForPrefetch[2][__DML_NUM_PLANES__];
+ dml_bool_t NoTimeForDynamicMetadata[2][__DML_NUM_PLANES__];
+
+ dml_bool_t MPCCombineEnable[__DML_NUM_PLANES__]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting
+ enum dml_odm_mode ODMMode[__DML_NUM_PLANES__]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage
+ dml_uint_t DPPPerSurface[__DML_NUM_PLANES__]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4.
+ dml_bool_t DSCEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the DSC is actually required; used in mode_programming
+ dml_bool_t FECEnabled[__DML_NUM_PLANES__]; /// <brief Indicate if the FEC is actually required
+ dml_uint_t NumberOfDSCSlices[__DML_NUM_PLANES__]; /// <brief Indicate how many slices needed to support the given mode
+
+ dml_float_t OutputBpp[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__type OutputType[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__rate OutputRate[__DML_NUM_PLANES__];
+
+ dml_float_t AlignedDCCMetaPitchY[__DML_NUM_PLANES__]; /// <brief Pitch value that is aligned to tiling setting
+ dml_float_t AlignedDCCMetaPitchC[__DML_NUM_PLANES__];
+ dml_float_t AlignedYPitch[__DML_NUM_PLANES__];
+ dml_float_t AlignedCPitch[__DML_NUM_PLANES__];
+ dml_float_t MaxTotalVerticalActiveAvailableBandwidth[2]; /// <brief nominal bw available for display
+}; // dml_mode_support_info_st
+
+/// @brief Treat this as the intermediate values and outputs of mode check function. User can query the content of the struct to know more about the result of mode evaluation.
+struct mode_support_st {
+ struct ip_params_st ip;
+ struct soc_bounding_box_st soc;
+ struct soc_state_bounding_box_st state; //<brief Per-state bbox values; only 1 state per compute
+ struct dml_mode_eval_policy_st policy;
+
+ dml_uint_t state_idx; //<brief The power state idx for the power state under this computation
+ dml_uint_t max_state_idx; //<brief The MAX power state idx
+ struct soc_state_bounding_box_st max_state; //<brief The MAX power state; some algo needs to know the max state info to determine if
+ struct dml_display_cfg_st cache_display_cfg; // <brief A copy of the current display cfg in consideration
+
+ // Physical info; only using for programming
+ dml_uint_t num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg
+
+ // Calculated Clocks
+ dml_float_t RequiredDISPCLK[2]; /// <brief Required DISPCLK; depends on pixel rate; odm mode etc.
+ dml_float_t RequiredDPPCLKThisState[__DML_NUM_PLANES__];
+ dml_float_t DCFCLKState[2]; /// <brief recommended DCFCLK freq; calculated by DML. If UseMinimumRequiredDCFCLK is not set; then it will be just the state DCFCLK; else it will min DCFCLK for support
+ dml_float_t RequiredDISPCLKPerSurface[2][__DML_NUM_PLANES__];
+ dml_float_t RequiredDPPCLKPerSurface[2][__DML_NUM_PLANES__];
+
+ dml_float_t FabricClock; /// <brief Basically just the clock freq at the min (or given) state
+ dml_float_t DRAMSpeed; /// <brief Basically just the clock freq at the min (or given) state
+ dml_float_t SOCCLK; /// <brief Basically just the clock freq at the min (or given) state
+ dml_float_t DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting
+ dml_float_t GlobalDPPCLK; /// <brief the Max DPPCLK freq out of all pipes
+
+ // ----------------------------------
+ // Mode Support Info and fail reason
+ // ----------------------------------
+ struct dml_mode_support_info_st support;
+
+ // These are calculated before the ModeSupport and ModeProgram step
+ // They represent the bound for the return buffer sizing
+ dml_uint_t MaxTotalDETInKByte;
+ dml_uint_t NomDETInKByte;
+ dml_uint_t MinCompressedBufferSizeInKByte;
+
+ // Info obtained at the end of mode support calculations
+ // The reported info is at the "optimal" state and combine setting
+ dml_float_t ReturnBW;
+ dml_float_t ReturnDRAMBW;
+ dml_uint_t DETBufferSizeInKByte[__DML_NUM_PLANES__]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value.
+ dml_uint_t DETBufferSizeY[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeC[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightY[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightC[__DML_NUM_PLANES__];
+
+ // ----------------------------------
+ // Intermediates/Informational
+ // ----------------------------------
+ dml_uint_t TotImmediateFlipBytes;
+ dml_bool_t DCCEnabledInAnySurface;
+ dml_float_t WritebackRequiredDISPCLK;
+ dml_float_t TimeCalc;
+ dml_float_t TWait;
+
+ dml_uint_t SwathWidthYAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthCAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightYAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightCAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthYThisState[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthCThisState[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightYThisState[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightCThisState[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeInKByteAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeYAllStates[2][__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeCAllStates[2][__DML_NUM_PLANES__];
+ dml_bool_t UnboundedRequestEnabledAllStates[2];
+ dml_uint_t CompressedBufferSizeInkByteAllStates[2];
+ dml_bool_t UnboundedRequestEnabledThisState;
+ dml_uint_t CompressedBufferSizeInkByteThisState;
+ dml_uint_t DETBufferSizeInKByteThisState[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeYThisState[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeCThisState[__DML_NUM_PLANES__];
+ dml_float_t VRatioPreY[2][__DML_NUM_PLANES__];
+ dml_float_t VRatioPreC[2][__DML_NUM_PLANES__];
+ dml_uint_t swath_width_luma_ub_all_states[2][__DML_NUM_PLANES__];
+ dml_uint_t swath_width_chroma_ub_all_states[2][__DML_NUM_PLANES__];
+ dml_uint_t swath_width_luma_ub_this_state[__DML_NUM_PLANES__];
+ dml_uint_t swath_width_chroma_ub_this_state[__DML_NUM_PLANES__];
+ dml_uint_t RequiredSlots[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesPerFrame[2][__DML_NUM_PLANES__];
+ dml_uint_t MetaRowBytes[2][__DML_NUM_PLANES__];
+ dml_uint_t DPTEBytesPerRow[2][__DML_NUM_PLANES__];
+ dml_uint_t PrefetchLinesY[2][__DML_NUM_PLANES__];
+ dml_uint_t PrefetchLinesC[2][__DML_NUM_PLANES__];
+ dml_uint_t MaxNumSwY[__DML_NUM_PLANES__]; /// <brief Max number of swath for prefetch
+ dml_uint_t MaxNumSwC[__DML_NUM_PLANES__]; /// <brief Max number of swath for prefetch
+ dml_uint_t PrefillY[__DML_NUM_PLANES__];
+ dml_uint_t PrefillC[__DML_NUM_PLANES__];
+
+ dml_uint_t PrefetchLinesYThisState[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchLinesCThisState[__DML_NUM_PLANES__];
+ dml_uint_t DPTEBytesPerRowThisState[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesPerFrameThisState[__DML_NUM_PLANES__];
+ dml_uint_t MetaRowBytesThisState[__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame[2][__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_flip[2][__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_this_state[__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_flip_this_state[__DML_NUM_PLANES__];
+
+ dml_float_t LineTimesForPrefetch[__DML_NUM_PLANES__];
+ dml_float_t LinesForMetaPTE[__DML_NUM_PLANES__];
+ dml_float_t LinesForMetaAndDPTERow[__DML_NUM_PLANES__];
+ dml_float_t SwathWidthYSingleDPP[__DML_NUM_PLANES__];
+ dml_float_t SwathWidthCSingleDPP[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelY[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelC[__DML_NUM_PLANES__];
+ dml_float_t BytePerPixelInDETY[__DML_NUM_PLANES__];
+ dml_float_t BytePerPixelInDETC[__DML_NUM_PLANES__];
+
+ dml_uint_t Read256BlockHeightY[__DML_NUM_PLANES__];
+ dml_uint_t Read256BlockWidthY[__DML_NUM_PLANES__];
+ dml_uint_t Read256BlockHeightC[__DML_NUM_PLANES__];
+ dml_uint_t Read256BlockWidthC[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileHeightY[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileHeightC[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileWidthY[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileWidthC[__DML_NUM_PLANES__];
+ dml_float_t PSCL_FACTOR[__DML_NUM_PLANES__];
+ dml_float_t PSCL_FACTOR_CHROMA[__DML_NUM_PLANES__];
+ dml_float_t MaximumSwathWidthLuma[__DML_NUM_PLANES__];
+ dml_float_t MaximumSwathWidthChroma[__DML_NUM_PLANES__];
+ dml_float_t Tno_bw[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t WritebackDelayTime[__DML_NUM_PLANES__];
+ dml_uint_t dpte_group_bytes[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__];
+ dml_float_t UrgLatency;
+ dml_float_t UrgentBurstFactorCursor[2][__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorCursorPre[__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorLuma[2][__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorLumaPre[__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorChroma[2][__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorChromaPre[__DML_NUM_PLANES__];
+ dml_float_t MaximumSwathWidthInLineBufferLuma;
+ dml_float_t MaximumSwathWidthInLineBufferChroma;
+ dml_float_t ExtraLatency;
+
+ // Backend
+ dml_bool_t RequiresDSC[__DML_NUM_PLANES__];
+ dml_bool_t RequiresFEC[__DML_NUM_PLANES__];
+ dml_float_t OutputBppPerState[__DML_NUM_PLANES__];
+ dml_uint_t DSCDelayPerState[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__type OutputTypePerState[__DML_NUM_PLANES__];
+ enum dml_output_type_and_rate__rate OutputRatePerState[__DML_NUM_PLANES__];
+
+ // Bandwidth Related Info
+ dml_float_t BandwidthAvailableForImmediateFlip;
+ dml_float_t ReadBandwidthLuma[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthChroma[__DML_NUM_PLANES__];
+ dml_float_t WriteBandwidth[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixelDataBWLuma[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixelDataBWChroma[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw_pre[__DML_NUM_PLANES__];
+ dml_float_t prefetch_vmrow_bw[__DML_NUM_PLANES__];
+ dml_float_t final_flip_bw[__DML_NUM_PLANES__];
+ dml_float_t meta_row_bandwidth_this_state[__DML_NUM_PLANES__];
+ dml_float_t dpte_row_bandwidth_this_state[__DML_NUM_PLANES__];
+ dml_float_t ReturnBWPerState[2];
+ dml_float_t ReturnDRAMBWPerState[2];
+ dml_float_t meta_row_bandwidth[2][__DML_NUM_PLANES__];
+ dml_float_t dpte_row_bandwidth[2][__DML_NUM_PLANES__];
+
+ // Something that should be feedback to caller
+ enum dml_odm_mode ODMModePerState[__DML_NUM_PLANES__];
+ enum dml_odm_mode ODMModeThisState[__DML_NUM_PLANES__];
+ dml_uint_t SurfaceSizeInMALL[__DML_NUM_PLANES__];
+ dml_uint_t NoOfDPP[2][__DML_NUM_PLANES__];
+ dml_uint_t NoOfDPPThisState[__DML_NUM_PLANES__];
+ dml_bool_t MPCCombine[2][__DML_NUM_PLANES__];
+ dml_bool_t MPCCombineThisState[__DML_NUM_PLANES__];
+ dml_float_t ProjectedDCFCLKDeepSleep[2];
+ dml_float_t MinDPPCLKUsingSingleDPP[__DML_NUM_PLANES__];
+ dml_bool_t SingleDPPViewportSizeSupportPerSurface[__DML_NUM_PLANES__];
+ dml_bool_t ImmediateFlipSupportedForPipe[__DML_NUM_PLANES__];
+ dml_bool_t NotUrgentLatencyHiding[__DML_NUM_PLANES__];
+ dml_bool_t NotUrgentLatencyHidingPre[__DML_NUM_PLANES__];
+ dml_bool_t PTEBufferSizeNotExceededPerState[__DML_NUM_PLANES__];
+ dml_bool_t DCCMetaBufferSizeNotExceededPerState[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t TotalNumberOfActiveDPP[2];
+ dml_uint_t TotalNumberOfSingleDPPSurfaces[2];
+ dml_uint_t TotalNumberOfDCCActiveDPP[2];
+
+ dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__];
+
+}; // mode_support_st
+
+/// @brief A mega structure that houses various info for model programming step.
+struct mode_program_st {
+
+ //-------------
+ // Intermediate/Informational
+ //-------------
+ dml_float_t UrgentLatency;
+ dml_float_t UrgentLatencyWithUSRRetraining;
+ dml_uint_t VInitPreFillY[__DML_NUM_PLANES__];
+ dml_uint_t VInitPreFillC[__DML_NUM_PLANES__];
+ dml_uint_t MaxNumSwathY[__DML_NUM_PLANES__];
+ dml_uint_t MaxNumSwathC[__DML_NUM_PLANES__];
+
+ dml_float_t BytePerPixelDETY[__DML_NUM_PLANES__];
+ dml_float_t BytePerPixelDETC[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelY[__DML_NUM_PLANES__];
+ dml_uint_t BytePerPixelC[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthY[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthC[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPPY[__DML_NUM_PLANES__];
+ dml_uint_t SwathWidthSingleDPPC[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthSurfaceLuma[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthSurfaceChroma[__DML_NUM_PLANES__];
+
+ dml_uint_t PixelPTEBytesPerRow[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesFrame[__DML_NUM_PLANES__];
+ dml_uint_t MetaRowByte[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchSourceLinesY[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixDataBWLuma[__DML_NUM_PLANES__];
+ dml_float_t RequiredPrefetchPixDataBWChroma[__DML_NUM_PLANES__];
+ dml_uint_t PrefetchSourceLinesC[__DML_NUM_PLANES__];
+ dml_float_t PSCL_THROUGHPUT[__DML_NUM_PLANES__];
+ dml_float_t PSCL_THROUGHPUT_CHROMA[__DML_NUM_PLANES__];
+ dml_uint_t DSCDelay[__DML_NUM_PLANES__];
+ dml_float_t DPPCLKUsingSingleDPP[__DML_NUM_PLANES__];
+
+ dml_uint_t MacroTileWidthY[__DML_NUM_PLANES__];
+ dml_uint_t MacroTileWidthC[__DML_NUM_PLANES__];
+ dml_uint_t BlockHeight256BytesY[__DML_NUM_PLANES__];
+ dml_uint_t BlockHeight256BytesC[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidth256BytesY[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidth256BytesC[__DML_NUM_PLANES__];
+
+ dml_uint_t BlockHeightY[__DML_NUM_PLANES__];
+ dml_uint_t BlockHeightC[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidthY[__DML_NUM_PLANES__];
+ dml_uint_t BlockWidthC[__DML_NUM_PLANES__];
+
+ dml_uint_t SurfaceSizeInTheMALL[__DML_NUM_PLANES__];
+ dml_float_t VRatioPrefetchY[__DML_NUM_PLANES__];
+ dml_float_t VRatioPrefetchC[__DML_NUM_PLANES__];
+ dml_float_t Tno_bw[__DML_NUM_PLANES__];
+ dml_float_t final_flip_bw[__DML_NUM_PLANES__];
+ dml_float_t prefetch_vmrow_bw[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw[__DML_NUM_PLANES__];
+ dml_float_t cursor_bw_pre[__DML_NUM_PLANES__];
+ dml_float_t WritebackDelay[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_linear[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_width[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_height[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_width[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_luma_ub[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_chroma_ub[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_linear_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_width_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_req_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_width_chroma[__DML_NUM_PLANES__];
+ dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__];
+ dml_uint_t vm_group_bytes[__DML_NUM_PLANES__];
+ dml_uint_t dpte_group_bytes[__DML_NUM_PLANES__];
+ dml_float_t meta_row_bw[__DML_NUM_PLANES__];
+ dml_float_t dpte_row_bw[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorCursor[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorCursorPre[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorLuma[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorLumaPre[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorChroma[__DML_NUM_PLANES__];
+ dml_float_t UrgBurstFactorChromaPre[__DML_NUM_PLANES__];
+
+ dml_uint_t swath_width_luma_ub[__DML_NUM_PLANES__];
+ dml_uint_t swath_width_chroma_ub[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqWidthY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqHeightY[__DML_NUM_PLANES__];
+ dml_uint_t PTERequestSizeY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqWidthC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEReqHeightC[__DML_NUM_PLANES__];
+ dml_uint_t PTERequestSizeC[__DML_NUM_PLANES__];
+
+ dml_float_t Tdmdl_vm[__DML_NUM_PLANES__];
+ dml_float_t Tdmdl[__DML_NUM_PLANES__];
+ dml_float_t TSetup[__DML_NUM_PLANES__];
+ dml_uint_t dpde0_bytes_per_frame_ub_l[__DML_NUM_PLANES__];
+ dml_uint_t meta_pte_bytes_per_frame_ub_l[__DML_NUM_PLANES__];
+ dml_uint_t dpde0_bytes_per_frame_ub_c[__DML_NUM_PLANES__];
+ dml_uint_t meta_pte_bytes_per_frame_ub_c[__DML_NUM_PLANES__];
+
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t compbuf_reserved_space_64b;
+ dml_uint_t compbuf_reserved_space_zs;
+ dml_uint_t CompressedBufferSizeInkByte;
+
+ dml_bool_t NoUrgentLatencyHiding[__DML_NUM_PLANES__];
+ dml_bool_t NoUrgentLatencyHidingPre[__DML_NUM_PLANES__];
+ dml_float_t UrgentExtraLatency;
+ dml_bool_t PrefetchAndImmediateFlipSupported;
+ dml_float_t TotalDataReadBandwidth;
+ dml_float_t BandwidthAvailableForImmediateFlip;
+ dml_bool_t NotEnoughTimeForDynamicMetadata[__DML_NUM_PLANES__];
+
+ dml_float_t ReadBandwidthLuma[__DML_NUM_PLANES__];
+ dml_float_t ReadBandwidthChroma[__DML_NUM_PLANES__];
+
+ dml_float_t total_dcn_read_bw_with_flip;
+ dml_float_t total_dcn_read_bw_with_flip_no_urgent_burst;
+ dml_float_t TotalDataReadBandwidthNotIncludingMALLPrefetch;
+ dml_float_t total_dcn_read_bw_with_flip_not_including_MALL_prefetch;
+ dml_float_t non_urgent_total_dcn_read_bw_with_flip;
+ dml_float_t non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch;
+
+ dml_bool_t use_one_row_for_frame[__DML_NUM_PLANES__];
+ dml_bool_t use_one_row_for_frame_flip[__DML_NUM_PLANES__];
+
+ dml_float_t TCalc;
+ dml_uint_t TotImmediateFlipBytes;
+
+ // -------------------
+ // Output
+ // -------------------
+ dml_uint_t pipe_plane[__DML_NUM_PLANES__]; // <brief used mainly by dv to map the pipe inst to plane index within DML core; the plane idx of a pipe
+ dml_uint_t num_active_pipes;
+
+ dml_bool_t NoTimeToPrefetch[__DML_NUM_PLANES__]; /// <brief Prefetch schedule calculation result
+
+ // Support
+ dml_uint_t PrefetchMode[__DML_NUM_PLANES__]; /// <brief prefetch mode used for prefetch support check in mode programming step
+ dml_bool_t PrefetchModeSupported; /// <brief Is the prefetch mode (bandwidth and latency) supported
+ dml_bool_t ImmediateFlipSupported;
+ dml_bool_t ImmediateFlipSupportedForPipe[__DML_NUM_PLANES__];
+
+ // Clock
+ dml_float_t Dcfclk;
+ dml_float_t Dispclk; /// <brief dispclk being used in mode programming
+ dml_float_t Dppclk[__DML_NUM_PLANES__]; /// <brief dppclk being used in mode programming
+ dml_float_t WritebackDISPCLK;
+ dml_float_t GlobalDPPCLK;
+
+ //@ brief These "calculated" dispclk and dppclk clocks are calculated in the mode programming step.
+ // Depends on the dml_clk_cfg_st option; these calculated values may not used in subsequent calculation.
+ // Possible DV usage: Calculated values fetched by test once after mode_programming step and then possibly
+ // use the values as min and adjust the actual freq used for the 2nd pass
+ dml_float_t Dispclk_calculated;
+ dml_float_t Dppclk_calculated[__DML_NUM_PLANES__];
+
+ dml_float_t DSCCLK_calculated[__DML_NUM_PLANES__]; //< brief Required DSCCLK freq. Backend; not used in any subsequent calculations for now
+ dml_float_t DCFCLKDeepSleep;
+
+ // ARB reg
+ dml_bool_t DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+ struct Watermarks Watermark;
+
+ // DCC compression control
+ dml_uint_t DCCYMaxUncompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCYMaxCompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCYIndependentBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCCMaxUncompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCCMaxCompressedBlock[__DML_NUM_PLANES__];
+ dml_uint_t DCCCIndependentBlock[__DML_NUM_PLANES__];
+
+ // Stutter Efficiency
+ dml_float_t StutterEfficiency;
+ dml_float_t StutterEfficiencyNotIncludingVBlank;
+ dml_uint_t NumberOfStutterBurstsPerFrame;
+ dml_float_t Z8StutterEfficiency;
+ dml_uint_t Z8NumberOfStutterBurstsPerFrame;
+ dml_float_t Z8StutterEfficiencyNotIncludingVBlank;
+ dml_float_t StutterPeriod;
+ dml_float_t Z8StutterEfficiencyBestCase;
+ dml_uint_t Z8NumberOfStutterBurstsPerFrameBestCase;
+ dml_float_t Z8StutterEfficiencyNotIncludingVBlankBestCase;
+ dml_float_t StutterPeriodBestCase;
+
+ // DLG TTU reg
+ dml_float_t MIN_DST_Y_NEXT_START[__DML_NUM_PLANES__];
+ dml_bool_t VREADY_AT_OR_AFTER_VSYNC[__DML_NUM_PLANES__];
+ dml_uint_t DSTYAfterScaler[__DML_NUM_PLANES__];
+ dml_uint_t DSTXAfterScaler[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesForPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestVMInVBlank[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestRowInVBlank[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestVMInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t DestinationLinesToRequestRowInImmediateFlip[__DML_NUM_PLANES__];
+ dml_float_t MinTTUVBlank[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeLuma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeChroma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeLuma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeChroma[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[__DML_NUM_PLANES__];
+ dml_float_t CursorRequestDeliveryTime[__DML_NUM_PLANES__];
+ dml_float_t CursorRequestDeliveryTimePrefetch[__DML_NUM_PLANES__];
+
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_L[__DML_NUM_PLANES__];
+ dml_float_t DST_Y_PER_PTE_ROW_NOM_C[__DML_NUM_PLANES__];
+ dml_float_t DST_Y_PER_META_ROW_NOM_L[__DML_NUM_PLANES__];
+ dml_float_t DST_Y_PER_META_ROW_NOM_C[__DML_NUM_PLANES__];
+ dml_float_t TimePerMetaChunkNominal[__DML_NUM_PLANES__];
+ dml_float_t TimePerChromaMetaChunkNominal[__DML_NUM_PLANES__];
+ dml_float_t TimePerMetaChunkVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerChromaMetaChunkVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerMetaChunkFlip[__DML_NUM_PLANES__];
+ dml_float_t TimePerChromaMetaChunkFlip[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_nom_luma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_nom_chroma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_vblank_luma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_vblank_chroma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_flip_luma[__DML_NUM_PLANES__];
+ dml_float_t time_per_pte_group_flip_chroma[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMGroupVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMGroupFlip[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMRequestVBlank[__DML_NUM_PLANES__];
+ dml_float_t TimePerVMRequestFlip[__DML_NUM_PLANES__];
+
+ dml_float_t FractionOfUrgentBandwidth;
+ dml_float_t FractionOfUrgentBandwidthImmediateFlip;
+
+ // RQ registers
+ dml_bool_t PTE_BUFFER_MODE[__DML_NUM_PLANES__];
+ dml_uint_t BIGK_FRAGMENT_SIZE[__DML_NUM_PLANES__];
+
+ dml_uint_t SubViewportLinesNeededInMALL[__DML_NUM_PLANES__];
+ dml_bool_t UsesMALLForStaticScreen[__DML_NUM_PLANES__];
+
+ // OTG
+ dml_uint_t VStartupMin[__DML_NUM_PLANES__]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos.
+ dml_uint_t VStartup[__DML_NUM_PLANES__]; /// <brief The vstartup value for OTG programming (will set to max vstartup; but now bounded by min(vblank_nom. actual vblank))
+ dml_uint_t VUpdateOffsetPix[__DML_NUM_PLANES__];
+ dml_uint_t VUpdateWidthPix[__DML_NUM_PLANES__];
+ dml_uint_t VReadyOffsetPix[__DML_NUM_PLANES__];
+
+ // Latency and Support
+ dml_float_t MaxActiveFCLKChangeLatencySupported;
+ dml_bool_t USRRetrainingSupport;
+ enum dml_fclock_change_support FCLKChangeSupport;
+ enum dml_dram_clock_change_support DRAMClockChangeSupport;
+ dml_float_t MaxActiveDRAMClockChangeLatencySupported[__DML_NUM_PLANES__];
+ dml_float_t WritebackAllowFCLKChangeEndPosition[__DML_NUM_PLANES__];
+ dml_float_t WritebackAllowDRAMClockChangeEndPosition[__DML_NUM_PLANES__];
+
+ // buffer sizing
+ dml_uint_t DETBufferSizeInKByte[__DML_NUM_PLANES__]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value.
+ dml_uint_t DETBufferSizeY[__DML_NUM_PLANES__];
+ dml_uint_t DETBufferSizeC[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightY[__DML_NUM_PLANES__];
+ dml_uint_t SwathHeightC[__DML_NUM_PLANES__];
+}; // mode_program_st
+
+struct soc_states_st {
+ dml_uint_t num_states; /// <brief num of soc pwr states
+ struct soc_state_bounding_box_st state_array[__DML_MAX_STATE_ARRAY_SIZE__]; /// <brief fixed size array that holds states struct
+};
+
+struct UseMinimumDCFCLK_params_st {
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_bool_t *DRRDisplay;
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ dml_uint_t MaxInterDCNTileRepeaters;
+ dml_uint_t MaxPrefetchMode;
+ dml_float_t DRAMClockChangeLatencyFinal;
+ dml_float_t FCLKChangeLatency;
+ dml_float_t SREnterPlusExitTime;
+ dml_uint_t ReturnBusWidth;
+ dml_uint_t RoundTripPingLatencyCycles;
+ dml_uint_t ReorderingBytes;
+ dml_uint_t PixelChunkSizeInKByte;
+ dml_uint_t MetaChunkSize;
+ dml_bool_t GPUVMEnable;
+ dml_uint_t GPUVMMaxPageTableLevels;
+ dml_bool_t HostVMEnable;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t HostVMMinPageSize;
+ dml_uint_t HostVMMaxNonCachedPageTableLevels;
+ dml_bool_t DynamicMetadataVMEnabled;
+ dml_bool_t ImmediateFlipRequirement;
+ dml_bool_t ProgressiveToInterlaceUnitInOPP;
+ dml_float_t MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation;
+ dml_float_t PercentOfIdealSDPPortBWReceivedAfterUrgLatency;
+ dml_uint_t *VTotal;
+ dml_uint_t *VActive;
+ dml_uint_t *DynamicMetadataTransmittedBytes;
+ dml_uint_t *DynamicMetadataLinesBeforeActiveRequired;
+ dml_bool_t *Interlace;
+ dml_float_t (*RequiredDPPCLKPerSurface)[__DML_NUM_PLANES__];
+ dml_float_t *RequiredDISPCLK;
+ dml_float_t UrgLatency;
+ dml_uint_t (*NoOfDPP)[__DML_NUM_PLANES__];
+ dml_float_t *ProjectedDCFCLKDeepSleep;
+ dml_uint_t (*MaximumVStartup)[__DML_NUM_PLANES__];
+ dml_uint_t *TotalNumberOfActiveDPP;
+ dml_uint_t *TotalNumberOfDCCActiveDPP;
+ dml_uint_t *dpte_group_bytes;
+ dml_uint_t (*PrefetchLinesY)[__DML_NUM_PLANES__];
+ dml_uint_t (*PrefetchLinesC)[__DML_NUM_PLANES__];
+ dml_uint_t (*swath_width_luma_ub_all_states)[__DML_NUM_PLANES__];
+ dml_uint_t (*swath_width_chroma_ub_all_states)[__DML_NUM_PLANES__];
+ dml_uint_t *BytePerPixelY;
+ dml_uint_t *BytePerPixelC;
+ dml_uint_t *HTotal;
+ dml_float_t *PixelClock;
+ dml_uint_t (*PDEAndMetaPTEBytesPerFrame)[__DML_NUM_PLANES__];
+ dml_uint_t (*DPTEBytesPerRow)[__DML_NUM_PLANES__];
+ dml_uint_t (*MetaRowBytes)[__DML_NUM_PLANES__];
+ dml_bool_t *DynamicMetadataEnable;
+ dml_float_t *ReadBandwidthLuma;
+ dml_float_t *ReadBandwidthChroma;
+ dml_float_t DCFCLKPerState;
+ dml_float_t *DCFCLKState;
+};
+
+struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st {
+ dml_bool_t USRRetrainingRequiredFinal;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_uint_t *PrefetchMode;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t MaxLineBufferLines;
+ dml_uint_t LineBufferSize;
+ dml_uint_t WritebackInterfaceBufferSize;
+ dml_float_t DCFCLK;
+ dml_float_t ReturnBW;
+ dml_bool_t SynchronizeTimingsFinal;
+ dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
+ dml_bool_t *DRRDisplay;
+ dml_uint_t *dpte_group_bytes;
+ dml_uint_t *meta_row_height;
+ dml_uint_t *meta_row_height_chroma;
+ struct SOCParametersList mmSOCParameters;
+ dml_uint_t WritebackChunkSize;
+ dml_float_t SOCCLK;
+ dml_float_t DCFClkDeepSleep;
+ dml_uint_t *DETBufferSizeY;
+ dml_uint_t *DETBufferSizeC;
+ dml_uint_t *SwathHeightY;
+ dml_uint_t *SwathHeightC;
+ dml_uint_t *LBBitPerPixel;
+ dml_uint_t *SwathWidthY;
+ dml_uint_t *SwathWidthC;
+ dml_float_t *HRatio;
+ dml_float_t *HRatioChroma;
+ dml_uint_t *VTaps;
+ dml_uint_t *VTapsChroma;
+ dml_float_t *VRatio;
+ dml_float_t *VRatioChroma;
+ dml_uint_t *HTotal;
+ dml_uint_t *VTotal;
+ dml_uint_t *VActive;
+ dml_float_t *PixelClock;
+ dml_uint_t *BlendingAndTiming;
+ dml_uint_t *DPPPerSurface;
+ dml_float_t *BytePerPixelDETY;
+ dml_float_t *BytePerPixelDETC;
+ dml_uint_t *DSTXAfterScaler;
+ dml_uint_t *DSTYAfterScaler;
+ dml_bool_t *WritebackEnable;
+ enum dml_source_format_class *WritebackPixelFormat;
+ dml_uint_t *WritebackDestinationWidth;
+ dml_uint_t *WritebackDestinationHeight;
+ dml_uint_t *WritebackSourceHeight;
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t CompressedBufferSizeInkByte;
+
+ // Output
+ struct Watermarks *Watermark;
+ enum dml_dram_clock_change_support *DRAMClockChangeSupport;
+ dml_float_t *MaxActiveDRAMClockChangeLatencySupported;
+ dml_uint_t *SubViewportLinesNeededInMALL;
+ enum dml_fclock_change_support *FCLKChangeSupport;
+ dml_float_t *MaxActiveFCLKChangeLatencySupported;
+ dml_bool_t *USRRetrainingSupport;
+ dml_float_t *ActiveDRAMClockChangeLatencyMargin;
+};
+
+struct CalculateVMRowAndSwath_params_st {
+ dml_uint_t NumberOfActiveSurfaces;
+ struct DmlPipe *myPipe;
+ dml_uint_t *SurfaceSizeInMALL;
+ dml_uint_t PTEBufferSizeInRequestsLuma;
+ dml_uint_t PTEBufferSizeInRequestsChroma;
+ dml_uint_t DCCMetaBufferSizeBytes;
+ enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_uint_t MALLAllocatedForDCN;
+ dml_uint_t *SwathWidthY;
+ dml_uint_t *SwathWidthC;
+ dml_bool_t GPUVMEnable;
+ dml_bool_t HostVMEnable;
+ dml_uint_t HostVMMaxNonCachedPageTableLevels;
+ dml_uint_t GPUVMMaxPageTableLevels;
+ dml_uint_t *GPUVMMinPageSizeKBytes;
+ dml_uint_t HostVMMinPageSize;
+ dml_bool_t *PTEBufferModeOverrideEn;
+ dml_bool_t *PTEBufferModeOverrideVal;
+
+ // Output
+ dml_bool_t *PTEBufferSizeNotExceeded;
+ dml_bool_t *DCCMetaBufferSizeNotExceeded;
+ dml_uint_t *dpte_row_width_luma_ub;
+ dml_uint_t *dpte_row_width_chroma_ub;
+ dml_uint_t *dpte_row_height_luma;
+ dml_uint_t *dpte_row_height_chroma;
+ dml_uint_t *dpte_row_height_linear_luma; // VBA_DELTA
+ dml_uint_t *dpte_row_height_linear_chroma; // VBA_DELTA
+ dml_uint_t *meta_req_width;
+ dml_uint_t *meta_req_width_chroma;
+ dml_uint_t *meta_req_height;
+ dml_uint_t *meta_req_height_chroma;
+ dml_uint_t *meta_row_width;
+ dml_uint_t *meta_row_width_chroma;
+ dml_uint_t *meta_row_height;
+ dml_uint_t *meta_row_height_chroma;
+ dml_uint_t *vm_group_bytes;
+ dml_uint_t *dpte_group_bytes;
+ dml_uint_t *PixelPTEReqWidthY;
+ dml_uint_t *PixelPTEReqHeightY;
+ dml_uint_t *PTERequestSizeY;
+ dml_uint_t *PixelPTEReqWidthC;
+ dml_uint_t *PixelPTEReqHeightC;
+ dml_uint_t *PTERequestSizeC;
+ dml_uint_t *dpde0_bytes_per_frame_ub_l;
+ dml_uint_t *meta_pte_bytes_per_frame_ub_l;
+ dml_uint_t *dpde0_bytes_per_frame_ub_c;
+ dml_uint_t *meta_pte_bytes_per_frame_ub_c;
+ dml_uint_t *PrefetchSourceLinesY;
+ dml_uint_t *PrefetchSourceLinesC;
+ dml_uint_t *VInitPreFillY;
+ dml_uint_t *VInitPreFillC;
+ dml_uint_t *MaxNumSwathY;
+ dml_uint_t *MaxNumSwathC;
+ dml_float_t *meta_row_bw;
+ dml_float_t *dpte_row_bw;
+ dml_uint_t *PixelPTEBytesPerRow;
+ dml_uint_t *PDEAndMetaPTEBytesFrame;
+ dml_uint_t *MetaRowByte;
+ dml_bool_t *use_one_row_for_frame;
+ dml_bool_t *use_one_row_for_frame_flip;
+ dml_bool_t *UsesMALLForStaticScreen;
+ dml_bool_t *PTE_BUFFER_MODE;
+ dml_uint_t *BIGK_FRAGMENT_SIZE;
+};
+
+struct CalculateSwathAndDETConfiguration_params_st {
+ dml_uint_t *DETSizeOverride;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_uint_t ConfigReturnBufferSizeInKByte;
+ dml_uint_t ROBBufferSizeInKByte;
+ dml_uint_t MaxTotalDETInKByte;
+ dml_uint_t MinCompressedBufferSizeInKByte;
+ dml_uint_t PixelChunkSizeInKByte;
+ dml_bool_t ForceSingleDPP;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t nomDETInKByte;
+ enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal;
+ dml_uint_t ConfigReturnBufferSegmentSizeInkByte;
+ dml_uint_t CompressedBufferSegmentSizeInkByteFinal;
+ enum dml_output_encoder_class *Output;
+ dml_float_t *ReadBandwidthLuma;
+ dml_float_t *ReadBandwidthChroma;
+ dml_float_t *MaximumSwathWidthLuma;
+ dml_float_t *MaximumSwathWidthChroma;
+ enum dml_rotation_angle *SourceScan;
+ dml_bool_t *ViewportStationary;
+ enum dml_source_format_class *SourcePixelFormat;
+ enum dml_swizzle_mode *SurfaceTiling;
+ dml_uint_t *ViewportWidth;
+ dml_uint_t *ViewportHeight;
+ dml_uint_t *ViewportXStart;
+ dml_uint_t *ViewportYStart;
+ dml_uint_t *ViewportXStartC;
+ dml_uint_t *ViewportYStartC;
+ dml_uint_t *SurfaceWidthY;
+ dml_uint_t *SurfaceWidthC;
+ dml_uint_t *SurfaceHeightY;
+ dml_uint_t *SurfaceHeightC;
+ dml_uint_t *Read256BytesBlockHeightY;
+ dml_uint_t *Read256BytesBlockHeightC;
+ dml_uint_t *Read256BytesBlockWidthY;
+ dml_uint_t *Read256BytesBlockWidthC;
+ enum dml_odm_mode *ODMMode;
+ dml_uint_t *BlendingAndTiming;
+ dml_uint_t *BytePerPixY;
+ dml_uint_t *BytePerPixC;
+ dml_float_t *BytePerPixDETY;
+ dml_float_t *BytePerPixDETC;
+ dml_uint_t *HActive;
+ dml_float_t *HRatio;
+ dml_float_t *HRatioChroma;
+ dml_uint_t *DPPPerSurface;
+ dml_uint_t *swath_width_luma_ub;
+ dml_uint_t *swath_width_chroma_ub;
+ dml_uint_t *SwathWidth;
+ dml_uint_t *SwathWidthChroma;
+ dml_uint_t *SwathHeightY;
+ dml_uint_t *SwathHeightC;
+ dml_uint_t *DETBufferSizeInKByte;
+ dml_uint_t *DETBufferSizeY;
+ dml_uint_t *DETBufferSizeC;
+ dml_bool_t *UnboundedRequestEnabled;
+ dml_uint_t *compbuf_reserved_space_64b;
+ dml_uint_t *compbuf_reserved_space_zs;
+ dml_uint_t *CompressedBufferSizeInkByte;
+ dml_bool_t *ViewportSizeSupportPerSurface;
+ dml_bool_t *ViewportSizeSupport;
+};
+
+struct CalculateStutterEfficiency_params_st {
+ dml_uint_t CompressedBufferSizeInkByte;
+ enum dml_use_mall_for_pstate_change_mode *UseMALLForPStateChange;
+ dml_bool_t UnboundedRequestEnabled;
+ dml_uint_t MetaFIFOSizeInKEntries;
+ dml_uint_t ZeroSizeBufferEntries;
+ dml_uint_t PixelChunkSizeInKByte;
+ dml_uint_t NumberOfActiveSurfaces;
+ dml_uint_t ROBBufferSizeInKByte;
+ dml_float_t TotalDataReadBandwidth;
+ dml_float_t DCFCLK;
+ dml_float_t ReturnBW;
+ dml_uint_t CompbufReservedSpace64B;
+ dml_uint_t CompbufReservedSpaceZs;
+ dml_float_t SRExitTime;
+ dml_float_t SRExitZ8Time;
+ dml_bool_t SynchronizeTimingsFinal;
+ dml_uint_t *BlendingAndTiming;
+ dml_float_t StutterEnterPlusExitWatermark;
+ dml_float_t Z8StutterEnterPlusExitWatermark;
+ dml_bool_t ProgressiveToInterlaceUnitInOPP;
+ dml_bool_t *Interlace;
+ dml_float_t *MinTTUVBlank;
+ dml_uint_t *DPPPerSurface;
+ dml_uint_t *DETBufferSizeY;
+ dml_uint_t *BytePerPixelY;
+ dml_float_t *BytePerPixelDETY;
+ dml_uint_t *SwathWidthY;
+ dml_uint_t *SwathHeightY;
+ dml_uint_t *SwathHeightC;
+ dml_float_t *NetDCCRateLuma;
+ dml_float_t *NetDCCRateChroma;
+ dml_float_t *DCCFractionOfZeroSizeRequestsLuma;
+ dml_float_t *DCCFractionOfZeroSizeRequestsChroma;
+ dml_uint_t *HTotal;
+ dml_uint_t *VTotal;
+ dml_float_t *PixelClock;
+ dml_float_t *VRatio;
+ enum dml_rotation_angle *SourceScan;
+ dml_uint_t *BlockHeight256BytesY;
+ dml_uint_t *BlockWidth256BytesY;
+ dml_uint_t *BlockHeight256BytesC;
+ dml_uint_t *BlockWidth256BytesC;
+ dml_uint_t *DCCYMaxUncompressedBlock;
+ dml_uint_t *DCCCMaxUncompressedBlock;
+ dml_uint_t *VActive;
+ dml_bool_t *DCCEnable;
+ dml_bool_t *WritebackEnable;
+ dml_float_t *ReadBandwidthSurfaceLuma;
+ dml_float_t *ReadBandwidthSurfaceChroma;
+ dml_float_t *meta_row_bw;
+ dml_float_t *dpte_row_bw;
+ dml_float_t *StutterEfficiencyNotIncludingVBlank;
+ dml_float_t *StutterEfficiency;
+ dml_uint_t *NumberOfStutterBurstsPerFrame;
+ dml_float_t *Z8StutterEfficiencyNotIncludingVBlank;
+ dml_float_t *Z8StutterEfficiency;
+ dml_uint_t *Z8NumberOfStutterBurstsPerFrame;
+ dml_float_t *StutterPeriod;
+ dml_bool_t *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+};
+
+struct CalculatePrefetchSchedule_params_st {
+ dml_bool_t EnhancedPrefetchScheduleAccelerationFinal;
+ dml_float_t HostVMInefficiencyFactor;
+ struct DmlPipe *myPipe;
+ dml_uint_t DSCDelay;
+ dml_float_t DPPCLKDelaySubtotalPlusCNVCFormater;
+ dml_float_t DPPCLKDelaySCL;
+ dml_float_t DPPCLKDelaySCLLBOnly;
+ dml_float_t DPPCLKDelayCNVCCursor;
+ dml_float_t DISPCLKDelaySubtotal;
+ dml_uint_t DPP_RECOUT_WIDTH;
+ enum dml_output_format_class OutputFormat;
+ dml_uint_t MaxInterDCNTileRepeaters;
+ dml_uint_t VStartup;
+ dml_uint_t MaxVStartup;
+ dml_uint_t GPUVMPageTableLevels;
+ dml_bool_t GPUVMEnable;
+ dml_bool_t HostVMEnable;
+ dml_uint_t HostVMMaxNonCachedPageTableLevels;
+ dml_uint_t HostVMMinPageSize;
+ dml_bool_t DynamicMetadataEnable;
+ dml_bool_t DynamicMetadataVMEnabled;
+ int DynamicMetadataLinesBeforeActiveRequired;
+ dml_uint_t DynamicMetadataTransmittedBytes;
+ dml_float_t UrgentLatency;
+ dml_float_t UrgentExtraLatency;
+ dml_float_t TCalc;
+ dml_uint_t PDEAndMetaPTEBytesFrame;
+ dml_uint_t MetaRowByte;
+ dml_uint_t PixelPTEBytesPerRow;
+ dml_float_t PrefetchSourceLinesY;
+ dml_uint_t VInitPreFillY;
+ dml_uint_t MaxNumSwathY;
+ dml_float_t PrefetchSourceLinesC;
+ dml_uint_t VInitPreFillC;
+ dml_uint_t MaxNumSwathC;
+ dml_uint_t swath_width_luma_ub;
+ dml_uint_t swath_width_chroma_ub;
+ dml_uint_t SwathHeightY;
+ dml_uint_t SwathHeightC;
+ dml_float_t TWait;
+ dml_uint_t *DSTXAfterScaler;
+ dml_uint_t *DSTYAfterScaler;
+ dml_float_t *DestinationLinesForPrefetch;
+ dml_float_t *DestinationLinesToRequestVMInVBlank;
+ dml_float_t *DestinationLinesToRequestRowInVBlank;
+ dml_float_t *VRatioPrefetchY;
+ dml_float_t *VRatioPrefetchC;
+ dml_float_t *RequiredPrefetchPixDataBWLuma;
+ dml_float_t *RequiredPrefetchPixDataBWChroma;
+ dml_bool_t *NotEnoughTimeForDynamicMetadata;
+ dml_float_t *Tno_bw;
+ dml_float_t *prefetch_vmrow_bw;
+ dml_float_t *Tdmdl_vm;
+ dml_float_t *Tdmdl;
+ dml_float_t *TSetup;
+ dml_uint_t *VUpdateOffsetPix;
+ dml_uint_t *VUpdateWidthPix;
+ dml_uint_t *VReadyOffsetPix;
+};
+
+struct dml_core_mode_support_locals_st {
+ dml_bool_t dummy_boolean[2];
+ dml_uint_t dummy_integer[3];
+ dml_uint_t dummy_integer_array[22][__DML_NUM_PLANES__];
+ enum dml_odm_mode dummy_odm_mode[__DML_NUM_PLANES__];
+ dml_bool_t dummy_boolean_array[2][__DML_NUM_PLANES__];
+ dml_uint_t MaxVStartupAllPlanes[2];
+ dml_uint_t MaximumVStartup[2][__DML_NUM_PLANES__];
+ dml_uint_t DSTYAfterScaler[__DML_NUM_PLANES__];
+ dml_uint_t DSTXAfterScaler[__DML_NUM_PLANES__];
+ dml_uint_t NextPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MinPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MaxPrefetchMode[__DML_NUM_PLANES__];
+ dml_float_t dummy_single[3];
+ dml_float_t dummy_single_array[__DML_NUM_PLANES__];
+ struct Watermarks dummy_watermark;
+ struct SOCParametersList mSOCParameters;
+ struct DmlPipe myPipe;
+ struct DmlPipe SurfParameters[__DML_NUM_PLANES__];
+ dml_uint_t TotalNumberOfActiveWriteback;
+ dml_uint_t MaximumSwathWidthSupportLuma;
+ dml_uint_t MaximumSwathWidthSupportChroma;
+ dml_bool_t MPCCombineMethodAsNeededForPStateChangeAndVoltage;
+ dml_bool_t MPCCombineMethodAsPossible;
+ dml_bool_t TotalAvailablePipesSupportNoDSC;
+ dml_uint_t NumberOfDPPNoDSC;
+ enum dml_odm_mode ODMModeNoDSC;
+ dml_float_t RequiredDISPCLKPerSurfaceNoDSC;
+ dml_bool_t TotalAvailablePipesSupportDSC;
+ dml_uint_t NumberOfDPPDSC;
+ enum dml_odm_mode ODMModeDSC;
+ dml_float_t RequiredDISPCLKPerSurfaceDSC;
+ dml_bool_t NoChromaOrLinear;
+ dml_float_t BWOfNonCombinedSurfaceOfMaximumBandwidth;
+ dml_uint_t NumberOfNonCombinedSurfaceOfMaximumBandwidth;
+ dml_uint_t TotalNumberOfActiveOTG;
+ dml_uint_t TotalNumberOfActiveHDMIFRL;
+ dml_uint_t TotalNumberOfActiveDP2p0;
+ dml_uint_t TotalNumberOfActiveDP2p0Outputs;
+ dml_uint_t TotalSlots;
+ dml_uint_t DSCFormatFactor;
+ dml_uint_t TotalDSCUnitsRequired;
+ dml_uint_t ReorderingBytes;
+ dml_bool_t ImmediateFlipRequiredFinal;
+ dml_bool_t FullFrameMALLPStateMethod;
+ dml_bool_t SubViewportMALLPStateMethod;
+ dml_bool_t PhantomPipeMALLPStateMethod;
+ dml_bool_t SubViewportMALLRefreshGreaterThan120Hz;
+ dml_float_t MaxTotalVActiveRDBandwidth;
+ dml_float_t VMDataOnlyReturnBWPerState;
+ dml_float_t HostVMInefficiencyFactor;
+ dml_uint_t NextMaxVStartup;
+ dml_uint_t MaxVStartup;
+ dml_bool_t AllPrefetchModeTested;
+ dml_bool_t AnyLinesForVMOrRowTooLarge;
+ dml_bool_t is_max_pwr_state;
+ dml_bool_t is_max_dram_pwr_state;
+ dml_bool_t dram_clock_change_support;
+ dml_bool_t f_clock_change_support;
+};
+
+struct dml_core_mode_programming_locals_st {
+ dml_uint_t DSCFormatFactor;
+ dml_uint_t dummy_integer_array[2][__DML_NUM_PLANES__];
+ enum dml_output_encoder_class dummy_output_encoder_array[__DML_NUM_PLANES__];
+ dml_float_t dummy_single_array[2][__DML_NUM_PLANES__];
+ dml_uint_t dummy_long_array[4][__DML_NUM_PLANES__];
+ dml_bool_t dummy_boolean_array[2][__DML_NUM_PLANES__];
+ dml_bool_t dummy_boolean[1];
+ struct DmlPipe SurfaceParameters[__DML_NUM_PLANES__];
+ dml_uint_t ReorderBytes;
+ dml_float_t VMDataOnlyReturnBW;
+ dml_float_t HostVMInefficiencyFactor;
+ dml_uint_t TotalDCCActiveDPP;
+ dml_uint_t TotalActiveDPP;
+ dml_uint_t VStartupLines;
+ dml_uint_t MaxVStartupLines[__DML_NUM_PLANES__]; /// <brief more like vblank for the plane's OTG
+ dml_uint_t MaxVStartupAllPlanes;
+ dml_bool_t ImmediateFlipRequirementFinal;
+ int iteration;
+ dml_float_t MaxTotalRDBandwidth;
+ dml_float_t MaxTotalRDBandwidthNoUrgentBurst;
+ dml_bool_t DestinationLineTimesForPrefetchLessThan2;
+ dml_bool_t VRatioPrefetchMoreThanMax;
+ dml_float_t MaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ dml_uint_t NextPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MinPrefetchMode[__DML_NUM_PLANES__];
+ dml_uint_t MaxPrefetchMode[__DML_NUM_PLANES__];
+ dml_bool_t AllPrefetchModeTested;
+ dml_float_t dummy_unit_vector[__DML_NUM_PLANES__];
+ dml_float_t NonUrgentMaxTotalRDBandwidth;
+ dml_float_t NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ dml_float_t dummy_single[2];
+ struct SOCParametersList mmSOCParameters;
+ dml_float_t Tvstartup_margin;
+ dml_float_t dlg_vblank_start;
+ dml_float_t LSetup;
+ dml_float_t blank_lines_remaining;
+ dml_float_t old_MIN_DST_Y_NEXT_START;
+ dml_float_t TotalWRBandwidth;
+ dml_float_t WRBandwidth;
+ struct Watermarks dummy_watermark;
+ struct DmlPipe myPipe;
+};
+
+struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st {
+ dml_float_t ActiveDRAMClockChangeLatencyMargin[__DML_NUM_PLANES__];
+ dml_float_t ActiveFCLKChangeLatencyMargin[__DML_NUM_PLANES__];
+ dml_float_t USRRetrainingLatencyMargin[__DML_NUM_PLANES__];
+
+ dml_bool_t SynchronizedSurfaces[__DML_NUM_PLANES__][__DML_NUM_PLANES__];
+ dml_float_t EffectiveLBLatencyHidingY;
+ dml_float_t EffectiveLBLatencyHidingC;
+ dml_float_t LinesInDETY[__DML_NUM_PLANES__];
+ dml_float_t LinesInDETC[__DML_NUM_PLANES__];
+ dml_uint_t LinesInDETYRoundedDownToSwath[__DML_NUM_PLANES__];
+ dml_uint_t LinesInDETCRoundedDownToSwath[__DML_NUM_PLANES__];
+ dml_float_t FullDETBufferingTimeY;
+ dml_float_t FullDETBufferingTimeC;
+ dml_float_t WritebackDRAMClockChangeLatencyMargin;
+ dml_float_t WritebackFCLKChangeLatencyMargin;
+ dml_float_t WritebackLatencyHiding;
+
+ dml_uint_t TotalActiveWriteback;
+ dml_uint_t LBLatencyHidingSourceLinesY[__DML_NUM_PLANES__];
+ dml_uint_t LBLatencyHidingSourceLinesC[__DML_NUM_PLANES__];
+ dml_float_t TotalPixelBW;
+ dml_float_t EffectiveDETBufferSizeY;
+ dml_float_t ActiveClockChangeLatencyHidingY;
+ dml_float_t ActiveClockChangeLatencyHidingC;
+ dml_float_t ActiveClockChangeLatencyHiding;
+ dml_bool_t FoundCriticalSurface;
+ dml_uint_t LastSurfaceWithoutMargin;
+ dml_uint_t FCLKChangeSupportNumber;
+ dml_uint_t DRAMClockChangeMethod;
+ dml_uint_t DRAMClockChangeSupportNumber;
+ dml_uint_t dst_y_pstate;
+ dml_uint_t src_y_pstate_l;
+ dml_uint_t src_y_pstate_c;
+ dml_uint_t src_y_ahead_l;
+ dml_uint_t src_y_ahead_c;
+ dml_uint_t sub_vp_lines_l;
+ dml_uint_t sub_vp_lines_c;
+};
+
+struct CalculateVMRowAndSwath_locals_st {
+ dml_uint_t PTEBufferSizeInRequestsForLuma[__DML_NUM_PLANES__];
+ dml_uint_t PTEBufferSizeInRequestsForChroma[__DML_NUM_PLANES__];
+ dml_uint_t PDEAndMetaPTEBytesFrameY;
+ dml_uint_t PDEAndMetaPTEBytesFrameC;
+ dml_uint_t MetaRowByteY[__DML_NUM_PLANES__];
+ dml_uint_t MetaRowByteC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowStorageY[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowStorageC[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowY_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t PixelPTEBytesPerRowC_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_luma_ub_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_luma_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_width_chroma_ub_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_uint_t dpte_row_height_chroma_one_row_per_frame[__DML_NUM_PLANES__];
+ dml_bool_t one_row_per_frame_fits_in_buffer[__DML_NUM_PLANES__];
+
+ dml_uint_t HostVMDynamicLevels;
+};
+
+struct UseMinimumDCFCLK_locals_st {
+ dml_uint_t dummy1;
+ dml_uint_t dummy2;
+ dml_uint_t dummy3;
+ dml_float_t NormalEfficiency;
+ dml_float_t TotalMaxPrefetchFlipDPTERowBandwidth[2];
+
+ dml_float_t PixelDCFCLKCyclesRequiredInPrefetch[__DML_NUM_PLANES__];
+ dml_float_t PrefetchPixelLinesTime[__DML_NUM_PLANES__];
+ dml_float_t DCFCLKRequiredForPeakBandwidthPerSurface[__DML_NUM_PLANES__];
+ dml_float_t DynamicMetadataVMExtraLatency[__DML_NUM_PLANES__];
+ dml_float_t MinimumTWait;
+ dml_float_t DPTEBandwidth;
+ dml_float_t DCFCLKRequiredForAverageBandwidth;
+ dml_uint_t ExtraLatencyBytes;
+ dml_float_t ExtraLatencyCycles;
+ dml_float_t DCFCLKRequiredForPeakBandwidth;
+ dml_uint_t NoOfDPPState[__DML_NUM_PLANES__];
+ dml_float_t MinimumTvmPlus2Tr0;
+};
+
+struct CalculatePrefetchSchedule_locals_st {
+ dml_bool_t MyError;
+ dml_uint_t DPPCycles;
+ dml_uint_t DISPCLKCycles;
+ dml_float_t DSTTotalPixelsAfterScaler;
+ dml_float_t LineTime;
+ dml_float_t dst_y_prefetch_equ;
+ dml_float_t prefetch_bw_oto;
+ dml_float_t Tvm_oto;
+ dml_float_t Tr0_oto;
+ dml_float_t Tvm_oto_lines;
+ dml_float_t Tr0_oto_lines;
+ dml_float_t dst_y_prefetch_oto;
+ dml_float_t TimeForFetchingMetaPTE;
+ dml_float_t TimeForFetchingRowInVBlank;
+ dml_float_t LinesToRequestPrefetchPixelData;
+ dml_uint_t HostVMDynamicLevelsTrips;
+ dml_float_t trip_to_mem;
+ dml_float_t Tvm_trips;
+ dml_float_t Tr0_trips;
+ dml_float_t Tvm_trips_rounded;
+ dml_float_t Tr0_trips_rounded;
+ dml_float_t max_Tsw;
+ dml_float_t Lsw_oto;
+ dml_float_t Tpre_rounded;
+ dml_float_t prefetch_bw_equ;
+ dml_float_t Tvm_equ;
+ dml_float_t Tr0_equ;
+ dml_float_t Tdmbf;
+ dml_float_t Tdmec;
+ dml_float_t Tdmsks;
+ dml_float_t prefetch_sw_bytes;
+ dml_float_t prefetch_bw_pr;
+ dml_float_t bytes_pp;
+ dml_float_t dep_bytes;
+ dml_float_t min_Lsw_oto;
+ dml_float_t Tsw_est1;
+ dml_float_t Tsw_est3;
+ dml_float_t PrefetchBandwidth1;
+ dml_float_t PrefetchBandwidth2;
+ dml_float_t PrefetchBandwidth3;
+ dml_float_t PrefetchBandwidth4;
+};
+
+/// @brief To minimize stack usage; function locals are instead placed into this scratch structure which is allocated per context
+struct display_mode_lib_scratch_st {
+ // Scratch space for function locals
+ struct dml_core_mode_support_locals_st dml_core_mode_support_locals;
+ struct dml_core_mode_programming_locals_st dml_core_mode_programming_locals;
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
+ struct CalculateVMRowAndSwath_locals_st CalculateVMRowAndSwath_locals;
+ struct UseMinimumDCFCLK_locals_st UseMinimumDCFCLK_locals;
+ struct CalculatePrefetchSchedule_locals_st CalculatePrefetchSchedule_locals;
+
+ // Scratch space for function params
+ struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct CalculateVMRowAndSwath_params_st CalculateVMRowAndSwath_params;
+ struct UseMinimumDCFCLK_params_st UseMinimumDCFCLK_params;
+ struct CalculateSwathAndDETConfiguration_params_st CalculateSwathAndDETConfiguration_params;
+ struct CalculateStutterEfficiency_params_st CalculateStutterEfficiency_params;
+ struct CalculatePrefetchSchedule_params_st CalculatePrefetchSchedule_params;
+};
+
+/// @brief Represent the overall soc/ip enviroment. It contains data structure represent the soc/ip characteristic and also structures that hold calculation output
+struct display_mode_lib_st {
+ dml_uint_t project;
+
+ //@brief Mode evaluation and programming policy
+ struct dml_mode_eval_policy_st policy;
+
+ //@brief IP/SOC characteristic
+ struct ip_params_st ip;
+ struct soc_bounding_box_st soc;
+ struct soc_states_st states;
+
+ //@brief Mode Support and Mode programming struct
+ // Used to hold input; intermediate and output of the calculations
+ struct mode_support_st ms; // struct for mode support
+ struct mode_program_st mp; // struct for mode programming
+
+ struct display_mode_lib_scratch_st scratch;
+};
+
+struct dml_mode_support_ex_params_st {
+ struct display_mode_lib_st *mode_lib;
+ const struct dml_display_cfg_st *in_display_cfg;
+ dml_uint_t in_start_state_idx;
+ dml_uint_t out_lowest_state_idx;
+ struct dml_mode_support_info_st *out_evaluation_info;
+};
+
+typedef struct _vcs_dpi_dml_display_rq_regs_st dml_display_rq_regs_st;
+typedef struct _vcs_dpi_dml_display_dlg_regs_st dml_display_dlg_regs_st;
+typedef struct _vcs_dpi_dml_display_ttu_regs_st dml_display_ttu_regs_st;
+typedef struct _vcs_dpi_dml_display_arb_params_st dml_display_arb_params_st;
+typedef struct _vcs_dpi_dml_display_plane_rq_regs_st dml_display_plane_rq_regs_st;
+
+struct _vcs_dpi_dml_display_dlg_regs_st {
+ dml_uint_t refcyc_h_blank_end;
+ dml_uint_t dlg_vblank_end;
+ dml_uint_t min_dst_y_next_start;
+ dml_uint_t refcyc_per_htotal;
+ dml_uint_t refcyc_x_after_scaler;
+ dml_uint_t dst_y_after_scaler;
+ dml_uint_t dst_y_prefetch;
+ dml_uint_t dst_y_per_vm_vblank;
+ dml_uint_t dst_y_per_row_vblank;
+ dml_uint_t dst_y_per_vm_flip;
+ dml_uint_t dst_y_per_row_flip;
+ dml_uint_t ref_freq_to_pix_freq;
+ dml_uint_t vratio_prefetch;
+ dml_uint_t vratio_prefetch_c;
+ dml_uint_t refcyc_per_pte_group_vblank_l;
+ dml_uint_t refcyc_per_pte_group_vblank_c;
+ dml_uint_t refcyc_per_meta_chunk_vblank_l;
+ dml_uint_t refcyc_per_meta_chunk_vblank_c;
+ dml_uint_t refcyc_per_pte_group_flip_l;
+ dml_uint_t refcyc_per_pte_group_flip_c;
+ dml_uint_t refcyc_per_meta_chunk_flip_l;
+ dml_uint_t refcyc_per_meta_chunk_flip_c;
+ dml_uint_t dst_y_per_pte_row_nom_l;
+ dml_uint_t dst_y_per_pte_row_nom_c;
+ dml_uint_t refcyc_per_pte_group_nom_l;
+ dml_uint_t refcyc_per_pte_group_nom_c;
+ dml_uint_t dst_y_per_meta_row_nom_l;
+ dml_uint_t dst_y_per_meta_row_nom_c;
+ dml_uint_t refcyc_per_meta_chunk_nom_l;
+ dml_uint_t refcyc_per_meta_chunk_nom_c;
+ dml_uint_t refcyc_per_line_delivery_pre_l;
+ dml_uint_t refcyc_per_line_delivery_pre_c;
+ dml_uint_t refcyc_per_line_delivery_l;
+ dml_uint_t refcyc_per_line_delivery_c;
+ dml_uint_t refcyc_per_vm_group_vblank;
+ dml_uint_t refcyc_per_vm_group_flip;
+ dml_uint_t refcyc_per_vm_req_vblank;
+ dml_uint_t refcyc_per_vm_req_flip;
+ dml_uint_t dst_y_offset_cur0;
+ dml_uint_t chunk_hdl_adjust_cur0;
+ dml_uint_t dst_y_offset_cur1;
+ dml_uint_t chunk_hdl_adjust_cur1;
+ dml_uint_t vready_after_vcount0;
+ dml_uint_t dst_y_delta_drq_limit;
+ dml_uint_t refcyc_per_vm_dmdata;
+ dml_uint_t dmdata_dl_delta;
+};
+
+struct _vcs_dpi_dml_display_ttu_regs_st {
+ dml_uint_t qos_level_low_wm;
+ dml_uint_t qos_level_high_wm;
+ dml_uint_t min_ttu_vblank;
+ dml_uint_t qos_level_flip;
+ dml_uint_t refcyc_per_req_delivery_l;
+ dml_uint_t refcyc_per_req_delivery_c;
+ dml_uint_t refcyc_per_req_delivery_cur0;
+ dml_uint_t refcyc_per_req_delivery_cur1;
+ dml_uint_t refcyc_per_req_delivery_pre_l;
+ dml_uint_t refcyc_per_req_delivery_pre_c;
+ dml_uint_t refcyc_per_req_delivery_pre_cur0;
+ dml_uint_t refcyc_per_req_delivery_pre_cur1;
+ dml_uint_t qos_level_fixed_l;
+ dml_uint_t qos_level_fixed_c;
+ dml_uint_t qos_level_fixed_cur0;
+ dml_uint_t qos_level_fixed_cur1;
+ dml_uint_t qos_ramp_disable_l;
+ dml_uint_t qos_ramp_disable_c;
+ dml_uint_t qos_ramp_disable_cur0;
+ dml_uint_t qos_ramp_disable_cur1;
+};
+
+struct _vcs_dpi_dml_display_arb_params_st {
+ dml_uint_t max_req_outstanding;
+ dml_uint_t min_req_outstanding;
+ dml_uint_t sat_level_us;
+ dml_uint_t hvm_max_qos_commit_threshold;
+ dml_uint_t hvm_min_req_outstand_commit_threshold;
+ dml_uint_t compbuf_reserved_space_kbytes;
+};
+
+struct _vcs_dpi_dml_display_plane_rq_regs_st {
+ dml_uint_t chunk_size;
+ dml_uint_t min_chunk_size;
+ dml_uint_t meta_chunk_size;
+ dml_uint_t min_meta_chunk_size;
+ dml_uint_t dpte_group_size;
+ dml_uint_t mpte_group_size;
+ dml_uint_t swath_height;
+ dml_uint_t pte_row_height_linear;
+};
+
+struct _vcs_dpi_dml_display_rq_regs_st {
+ dml_display_plane_rq_regs_st rq_regs_l;
+ dml_display_plane_rq_regs_st rq_regs_c;
+ dml_uint_t drq_expansion_mode;
+ dml_uint_t prq_expansion_mode;
+ dml_uint_t mrq_expansion_mode;
+ dml_uint_t crq_expansion_mode;
+ dml_uint_t plane1_base_address;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_lib_defines.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_lib_defines.h
new file mode 100644
index 000000000000..14d389525296
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_lib_defines.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_LIB_DEFINES_H__
+#define __DISPLAY_MODE_LIB_DEFINES_H__
+
+#define DCN_DML__DML_STANDALONE 1
+#define DCN_DML__DML_STANDALONE__1 1
+#define DCN_DML__PRESENT 1
+#define DCN_DML__PRESENT__1 1
+#define DCN_DML__NUM_PLANE 8
+#define DCN_DML__NUM_PLANE__8 1
+#define DCN_DML__NUM_CURSOR 1
+#define DCN_DML__NUM_CURSOR__1 1
+#define DCN_DML__NUM_PWR_STATE 30
+#define DCN_DML__NUM_PWR_STATE__30 1
+#define DCN_DML__VM_PRESENT 1
+#define DCN_DML__VM_PRESENT__1 1
+#define DCN_DML__HOST_VM_PRESENT 1
+#define DCN_DML__HOST_VM_PRESENT__1 1
+#define DCN_DML__DWB 1
+
+#include "dml_depedencies.h"
+
+#include "dml_logging.h"
+#include "dml_assert.h"
+
+// To enable a lot of debug msg
+#define __DML_VBA_DEBUG__
+#define __DML_VBA_ENABLE_INLINE_CHECK_ 0
+#define __DML_VBA_MIN_VSTARTUP__ 9 //<brief At which vstartup the DML start to try if the mode can be supported
+#define __DML_ARB_TO_RET_DELAY__ 7 + 95 //<brief Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
+#define __DML_MIN_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation
+#define __DML_MAX_VRATIO_PRE__ 4.0 //<brief Prefetch schedule max vratio
+#define __DML_MAX_VRATIO_PRE_OTO__ 4.0 //<brief Prefetch schedule max vratio for one to one scheduling calculation for prefetch
+#define __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ 6.0 //<brief Prefetch schedule max vratio when enhance prefetch schedule acceleration is enabled and vstartup is earliest possible already
+#define __DML_NUM_PLANES__ DCN_DML__NUM_PLANE
+#define __DML_NUM_CURSORS__ DCN_DML__NUM_CURSOR
+#define __DML_DPP_INVALID__ 0
+#define __DML_NUM_DMB__ DCN_DML__DWB
+#define __DML_PIPE_NO_PLANE__ 99
+
+#define __DML_MAX_STATE_ARRAY_SIZE__ DCN_DML__NUM_PWR_STATE
+
+// Compilation define
+#define __DML_DLL_EXPORT__
+
+typedef int dml_int_t; // int is 32-bit in C/C++, but Integer datatype is 16-bit in VBA. this should map to Long in VBA
+typedef unsigned int dml_uint_t;
+typedef double dml_float_t;
+
+// Note: bool is 8-bit in C/C++, but Boolean is 16-bit in VBA, use "short" in C/C++ DLL so the struct work when vba uses DLL
+// Or the VBA side don't use Boolean, just use "Byte", then C side can use bool
+typedef bool dml_bool_t;
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c
new file mode 100644
index 000000000000..89890c88fd66
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c
@@ -0,0 +1,798 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "display_mode_util.h"
+
+static dml_float_t _log(float in)
+{
+ int * const exp_ptr = (int *)(&in);
+ int x = *exp_ptr;
+ const int log_2 = ((x >> 23) & 255) - 128;
+
+ x &= ~(255 << 23);
+ x += 127 << 23;
+ *exp_ptr = x;
+
+ in = ((-1.0f / 3) * in + 2) * in - 2.0f / 3;
+
+ return (in + log_2);
+}
+
+dml_bool_t dml_util_is_420(enum dml_source_format_class source_format)
+{
+ dml_bool_t val = false;
+
+ switch (source_format) {
+ case dml_444_16:
+ val = 0;
+ break;
+ case dml_444_32:
+ val = 0;
+ break;
+ case dml_444_64:
+ val = 0;
+ break;
+ case dml_420_8:
+ val = 1;
+ break;
+ case dml_420_10:
+ val = 1;
+ break;
+ case dml_422_8:
+ val = 0;
+ break;
+ case dml_422_10:
+ val = 0;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+static inline float dcn_bw_pow(float a, float exp)
+{
+ float temp;
+ /*ASSERT(exp == (int)exp);*/
+ if ((int)exp == 0)
+ return 1;
+ temp = dcn_bw_pow(a, (int)(exp / 2));
+ if (((int)exp % 2) == 0) {
+ return temp * temp;
+ } else {
+ if ((int)exp > 0)
+ return a * temp * temp;
+ else
+ return (temp * temp) / a;
+ }
+}
+
+static inline float dcn_bw_ceil2(const float arg, const float significance)
+{
+ ASSERT(significance != 0);
+
+ return ((int)(arg / significance + 0.99999)) * significance;
+}
+
+static inline float dcn_bw_floor2(const float arg, const float significance)
+{
+ ASSERT(significance != 0);
+
+ return ((int)(arg / significance)) * significance;
+}
+
+dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity)
+{
+ if (granularity == 0)
+ return 0;
+ //return (dml_float_t) (ceil(x / granularity) * granularity);
+ return (dml_float_t)dcn_bw_ceil2(x, granularity);
+}
+
+dml_float_t dml_floor(dml_float_t x, dml_float_t granularity)
+{
+ if (granularity == 0)
+ return 0;
+ //return (dml_float_t) (floor(x / granularity) * granularity);
+ return (dml_float_t)dcn_bw_floor2(x, granularity);
+}
+
+dml_float_t dml_min(dml_float_t x, dml_float_t y)
+{
+ if (x != x)
+ return y;
+ if (y != y)
+ return x;
+ if (x < y)
+ return x;
+ else
+ return y;
+}
+
+dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z)
+{
+ return dml_min(dml_min(x, y), z);
+}
+
+dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w)
+{
+ return dml_min(dml_min(dml_min(x, y), z), w);
+}
+
+dml_float_t dml_max(dml_float_t x, dml_float_t y)
+{
+ if (x != x)
+ return y;
+ if (y != y)
+ return x;
+if (x > y)
+ return x;
+ else
+ return y;
+}
+dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z)
+{
+ return dml_max(dml_max(x, y), z);
+}
+dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d)
+{
+ return dml_max(dml_max(a, b), dml_max(c, d));
+}
+dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e)
+{
+ return dml_max(dml_max4(a, b, c, d), e);
+}
+dml_float_t dml_log(dml_float_t x, dml_float_t base)
+{
+ return (dml_float_t) (_log(x) / _log(base));
+}
+
+dml_float_t dml_log2(dml_float_t x)
+{
+ return (dml_float_t) (_log(x) / _log(2));
+}
+
+dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding)
+{
+// if (bankers_rounding)
+// return (dml_float_t) lrint(val);
+// else {
+// return round(val);
+ double round_pt = 0.5;
+ double ceil = dml_ceil(val, 1);
+ double floor = dml_floor(val, 1);
+
+ if (val - floor >= round_pt)
+ return ceil;
+ else
+ return floor;
+// }
+}
+
+dml_float_t dml_pow(dml_float_t base, int exp)
+{
+ return (dml_float_t) dcn_bw_pow(base, exp);
+}
+
+dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up)
+{
+ dml_uint_t remainder;
+
+ if (multiple == 0)
+ return num;
+
+ remainder = num % multiple;
+ if (remainder == 0)
+ return num;
+
+ if (up)
+ return (num + multiple - remainder);
+ else
+ return (num - remainder);
+}
+
+void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *rq_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_PLANE_RQ_REGS_ST\n");
+ dml_print("DML: chunk_size = 0x%x\n", rq_regs->chunk_size);
+ dml_print("DML: min_chunk_size = 0x%x\n", rq_regs->min_chunk_size);
+ dml_print("DML: meta_chunk_size = 0x%x\n", rq_regs->meta_chunk_size);
+ dml_print("DML: min_meta_chunk_size = 0x%x\n", rq_regs->min_meta_chunk_size);
+ dml_print("DML: dpte_group_size = 0x%x\n", rq_regs->dpte_group_size);
+ dml_print("DML: mpte_group_size = 0x%x\n", rq_regs->mpte_group_size);
+ dml_print("DML: swath_height = 0x%x\n", rq_regs->swath_height);
+ dml_print("DML: pte_row_height_linear = 0x%x\n", rq_regs->pte_row_height_linear);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_RQ_REGS_ST\n");
+ dml_print("DML: <LUMA> \n");
+ dml_print_data_rq_regs_st(&rq_regs->rq_regs_l);
+ dml_print("DML: <CHROMA> \n");
+ dml_print_data_rq_regs_st(&rq_regs->rq_regs_c);
+ dml_print("DML: drq_expansion_mode = 0x%x\n", rq_regs->drq_expansion_mode);
+ dml_print("DML: prq_expansion_mode = 0x%x\n", rq_regs->prq_expansion_mode);
+ dml_print("DML: mrq_expansion_mode = 0x%x\n", rq_regs->mrq_expansion_mode);
+ dml_print("DML: crq_expansion_mode = 0x%x\n", rq_regs->crq_expansion_mode);
+ dml_print("DML: plane1_base_address = 0x%x\n", rq_regs->plane1_base_address);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_DLG_REGS_ST \n");
+ dml_print("DML: refcyc_h_blank_end = 0x%x\n", dlg_regs->refcyc_h_blank_end);
+ dml_print("DML: dlg_vblank_end = 0x%x\n", dlg_regs->dlg_vblank_end);
+ dml_print("DML: min_dst_y_next_start = 0x%x\n", dlg_regs->min_dst_y_next_start);
+ dml_print("DML: refcyc_per_htotal = 0x%x\n", dlg_regs->refcyc_per_htotal);
+ dml_print("DML: refcyc_x_after_scaler = 0x%x\n", dlg_regs->refcyc_x_after_scaler);
+ dml_print("DML: dst_y_after_scaler = 0x%x\n", dlg_regs->dst_y_after_scaler);
+ dml_print("DML: dst_y_prefetch = 0x%x\n", dlg_regs->dst_y_prefetch);
+ dml_print("DML: dst_y_per_vm_vblank = 0x%x\n", dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML: dst_y_per_row_vblank = 0x%x\n", dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML: dst_y_per_vm_flip = 0x%x\n", dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML: dst_y_per_row_flip = 0x%x\n", dlg_regs->dst_y_per_row_flip);
+ dml_print("DML: ref_freq_to_pix_freq = 0x%x\n", dlg_regs->ref_freq_to_pix_freq);
+ dml_print("DML: vratio_prefetch = 0x%x\n", dlg_regs->vratio_prefetch);
+ dml_print("DML: vratio_prefetch_c = 0x%x\n", dlg_regs->vratio_prefetch_c);
+ dml_print("DML: refcyc_per_pte_group_vblank_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_l);
+ dml_print("DML: refcyc_per_pte_group_vblank_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_vblank_c);
+ dml_print("DML: refcyc_per_meta_chunk_vblank_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_l);
+ dml_print("DML: refcyc_per_meta_chunk_vblank_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_vblank_c);
+ dml_print("DML: refcyc_per_pte_group_flip_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_l);
+ dml_print("DML: refcyc_per_pte_group_flip_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_flip_c);
+ dml_print("DML: refcyc_per_meta_chunk_flip_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_l);
+ dml_print("DML: refcyc_per_meta_chunk_flip_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_flip_c);
+ dml_print("DML: dst_y_per_pte_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_l);
+ dml_print("DML: dst_y_per_pte_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_pte_row_nom_c);
+ dml_print("DML: refcyc_per_pte_group_nom_l = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_l);
+ dml_print("DML: refcyc_per_pte_group_nom_c = 0x%x\n", dlg_regs->refcyc_per_pte_group_nom_c);
+ dml_print("DML: dst_y_per_meta_row_nom_l = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_l);
+ dml_print("DML: dst_y_per_meta_row_nom_c = 0x%x\n", dlg_regs->dst_y_per_meta_row_nom_c);
+ dml_print("DML: refcyc_per_meta_chunk_nom_l = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_l);
+ dml_print("DML: refcyc_per_meta_chunk_nom_c = 0x%x\n", dlg_regs->refcyc_per_meta_chunk_nom_c);
+ dml_print("DML: refcyc_per_line_delivery_pre_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_l);
+ dml_print("DML: refcyc_per_line_delivery_pre_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_pre_c);
+ dml_print("DML: refcyc_per_line_delivery_l = 0x%x\n", dlg_regs->refcyc_per_line_delivery_l);
+ dml_print("DML: refcyc_per_line_delivery_c = 0x%x\n", dlg_regs->refcyc_per_line_delivery_c);
+ dml_print("DML: refcyc_per_vm_group_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_group_vblank);
+ dml_print("DML: refcyc_per_vm_group_flip = 0x%x\n", dlg_regs->refcyc_per_vm_group_flip);
+ dml_print("DML: refcyc_per_vm_req_vblank = 0x%x\n", dlg_regs->refcyc_per_vm_req_vblank);
+ dml_print("DML: refcyc_per_vm_req_flip = 0x%x\n", dlg_regs->refcyc_per_vm_req_flip);
+ dml_print("DML: chunk_hdl_adjust_cur0 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur0);
+ dml_print("DML: dst_y_offset_cur1 = 0x%x\n", dlg_regs->dst_y_offset_cur1);
+ dml_print("DML: chunk_hdl_adjust_cur1 = 0x%x\n", dlg_regs->chunk_hdl_adjust_cur1);
+ dml_print("DML: vready_after_vcount0 = 0x%x\n", dlg_regs->vready_after_vcount0);
+ dml_print("DML: dst_y_delta_drq_limit = 0x%x\n", dlg_regs->dst_y_delta_drq_limit);
+ dml_print("DML: refcyc_per_vm_dmdata = 0x%x\n", dlg_regs->refcyc_per_vm_dmdata);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DISPLAY_TTU_REGS_ST \n");
+ dml_print("DML: qos_level_low_wm = 0x%x\n", ttu_regs->qos_level_low_wm);
+ dml_print("DML: qos_level_high_wm = 0x%x\n", ttu_regs->qos_level_high_wm);
+ dml_print("DML: min_ttu_vblank = 0x%x\n", ttu_regs->min_ttu_vblank);
+ dml_print("DML: qos_level_flip = 0x%x\n", ttu_regs->qos_level_flip);
+ dml_print("DML: refcyc_per_req_delivery_pre_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_l);
+ dml_print("DML: refcyc_per_req_delivery_l = 0x%x\n", ttu_regs->refcyc_per_req_delivery_l);
+ dml_print("DML: refcyc_per_req_delivery_pre_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_c);
+ dml_print("DML: refcyc_per_req_delivery_c = 0x%x\n", ttu_regs->refcyc_per_req_delivery_c);
+ dml_print("DML: refcyc_per_req_delivery_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur0);
+ dml_print("DML: refcyc_per_req_delivery_pre_cur0 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML: refcyc_per_req_delivery_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_cur1);
+ dml_print("DML: refcyc_per_req_delivery_pre_cur1 = 0x%x\n", ttu_regs->refcyc_per_req_delivery_pre_cur1);
+ dml_print("DML: qos_level_fixed_l = 0x%x\n", ttu_regs->qos_level_fixed_l);
+ dml_print("DML: qos_ramp_disable_l = 0x%x\n", ttu_regs->qos_ramp_disable_l);
+ dml_print("DML: qos_level_fixed_c = 0x%x\n", ttu_regs->qos_level_fixed_c);
+ dml_print("DML: qos_ramp_disable_c = 0x%x\n", ttu_regs->qos_ramp_disable_c);
+ dml_print("DML: qos_level_fixed_cur0 = 0x%x\n", ttu_regs->qos_level_fixed_cur0);
+ dml_print("DML: qos_ramp_disable_cur0 = 0x%x\n", ttu_regs->qos_ramp_disable_cur0);
+ dml_print("DML: qos_level_fixed_cur1 = 0x%x\n", ttu_regs->qos_level_fixed_cur1);
+ dml_print("DML: qos_ramp_disable_cur1 = 0x%x\n", ttu_regs->qos_ramp_disable_cur1);
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DML_MODE_EVAL_POLICY_ST\n");
+ dml_print("DML: Policy: UseUnboundedRequesting = 0x%x\n", policy->UseUnboundedRequesting);
+ dml_print("DML: Policy: UseMinimumRequiredDCFCLK = 0x%x\n", policy->UseMinimumRequiredDCFCLK);
+ dml_print("DML: Policy: DRAMClockChangeRequirementFinal = 0x%x\n", policy->DRAMClockChangeRequirementFinal);
+ dml_print("DML: Policy: FCLKChangeRequirementFinal = 0x%x\n", policy->FCLKChangeRequirementFinal);
+ dml_print("DML: Policy: USRRetrainingRequiredFinal = 0x%x\n", policy->USRRetrainingRequiredFinal);
+ dml_print("DML: Policy: EnhancedPrefetchScheduleAccelerationFinal = 0x%x\n", policy->EnhancedPrefetchScheduleAccelerationFinal);
+ dml_print("DML: Policy: NomDETInKByteOverrideEnable = 0x%x\n", policy->NomDETInKByteOverrideEnable);
+ dml_print("DML: Policy: NomDETInKByteOverrideValue = 0x%x\n", policy->NomDETInKByteOverrideValue);
+ dml_print("DML: Policy: DCCProgrammingAssumesScanDirectionUnknownFinal = 0x%x\n", policy->DCCProgrammingAssumesScanDirectionUnknownFinal);
+ dml_print("DML: Policy: SynchronizeTimingsFinal = 0x%x\n", policy->SynchronizeTimingsFinal);
+ dml_print("DML: Policy: SynchronizeDRRDisplaysForUCLKPStateChangeFinal = 0x%x\n", policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal);
+ dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported);
+ dml_print("DML: Policy: AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = 0x%x\n", policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported);
+
+ for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) {
+ dml_print("DML: i=%0d, Policy: MPCCombineUse = 0x%x\n", i, policy->MPCCombineUse[i]);
+ dml_print("DML: i=%0d, Policy: ODMUse = 0x%x\n", i, policy->ODMUse[i]);
+ dml_print("DML: i=%0d, Policy: ImmediateFlipRequirement = 0x%x\n", i, policy->ImmediateFlipRequirement[i]);
+ dml_print("DML: i=%0d, Policy: AllowForPStateChangeOrStutterInVBlank = 0x%x\n", i, policy->AllowForPStateChangeOrStutterInVBlank[i]);
+ }
+ dml_print("DML: ===================================== \n");
+}
+
+void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j)
+{
+ dml_print("DML: MODE SUPPORT: ===============================================\n");
+ dml_print("DML: MODE SUPPORT: Voltage State %d\n", j);
+ dml_print("DML: MODE SUPPORT: Mode Supported : %s\n", mode_lib->ms.support.ModeSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Scale Ratio And Taps : %s\n", mode_lib->ms.support.ScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Source Format Pixel And Scan : %s\n", mode_lib->ms.support.SourceFormatPixelAndScanSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Viewport Size : %s\n", mode_lib->ms.support.ViewportSizeSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Link Rate Does Not Match DP Version : %s\n", mode_lib->ms.support.LinkRateDoesNotMatchDPVersion == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Link Rate For Multistream Not Indicated : %s\n", mode_lib->ms.support.LinkRateForMultistreamNotIndicated == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: BPP For Multi stream Not Indicated : %s\n", mode_lib->ms.support.BPPForMultistreamNotIndicated == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Multistream With HDMI Or eDP : %s\n", mode_lib->ms.support.MultistreamWithHDMIOreDP == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Exceeded Multistream Slots : %s\n", mode_lib->ms.support.ExceededMultistreamSlots == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: MSO Or ODM Split With Non DP Link : %s\n", mode_lib->ms.support.MSOOrODMSplitWithNonDPLink == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Not Enough Lanes For MSO : %s\n", mode_lib->ms.support.NotEnoughLanesForMSO == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: LinkCapacitySupport : %s\n", mode_lib->ms.support.LinkCapacitySupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: P2IWith420 : %s\n", mode_lib->ms.support.P2IWith420 == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DSCOnlyIfNecessaryWithBPP : %s\n", mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DSC422NativeNotSupported : %s\n", mode_lib->ms.support.DSC422NativeNotSupported == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: MPCCombineMethodIncompatible : %s\n", mode_lib->ms.support.MPCCombineMethodIncompatible == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ODMCombineTwoToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ODMCombineFourToOneSupportCheckOK : %s\n", mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: NotEnoughDSCUnits : %s\n", mode_lib->ms.support.NotEnoughDSCUnits == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: NotEnoughDSCSlices : %s\n", mode_lib->ms.support.NotEnoughDSCSlices == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe : %s\n", mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPStateAndStaticScreen : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DSCCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: PixelsPerLinePerDSCUnitSupport : %s\n", mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DTBCLKRequiredMoreThanSupported : %s\n", mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: InvalidCombinationOfMALLUseForPState : %s\n", mode_lib->ms.support.InvalidCombinationOfMALLUseForPState == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified : %s\n", mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ROB Support : %s\n", mode_lib->ms.support.ROBSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DISPCLK DPPCLK Support : %s\n", mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Total Available Pipes Support : %s\n", mode_lib->ms.support.TotalAvailablePipesSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Number Of OTG Support : %s\n", mode_lib->ms.support.NumberOfOTGSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Number Of DP2p0 Support : %s\n", mode_lib->ms.support.NumberOfDP2p0Support == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Writeback Latency Support : %s\n", mode_lib->ms.support.WritebackLatencySupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Writeback Scale Ratio And Taps Support : %s\n", mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Cursor Support : %s\n", mode_lib->ms.support.CursorSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Pitch Support : %s\n", mode_lib->ms.support.PitchSupport == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Viewport Exceeds Surface : %s\n", mode_lib->ms.support.ViewportExceedsSurface == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Prefetch Supported : %s\n", mode_lib->ms.support.PrefetchSupported[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: VActive Bandwith Support : %s\n", mode_lib->ms.support.VActiveBandwithSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Dynamic Metadata Supported : %s\n", mode_lib->ms.support.DynamicMetadataSupported[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Total Vertical Active Bandwidth Support : %s\n", mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: VRatio In Prefetch Supported : %s\n", mode_lib->ms.support.VRatioInPrefetchSupported[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: PTE Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: DCC Meta Buffer Size Not Exceeded : %s\n", mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Non supported DSC Input BPC : %s\n", mode_lib->ms.support.NonsupportedDSCInputBPC == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Exceeded MALL Size : %s\n", mode_lib->ms.support.ExceededMALLSize == false ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: Host VM or Immediate Flip Supported : %s\n", ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !mode_lib->scratch.dml_core_mode_support_locals.ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j]) ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: dram clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.dram_clock_change_support == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: f_clock change support : %s\n", mode_lib->scratch.dml_core_mode_support_locals.f_clock_change_support == true ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: USR Retraining Support : %s\n", (!mode_lib->ms.policy.USRRetrainingRequiredFinal || &mode_lib->ms.support.USRRetrainingSupport[j]) ? "Supported" : "NOT Supported");
+ dml_print("DML: MODE SUPPORT: ===============================================\n");
+}
+
+void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only)
+{
+ dml_print("DML: ===================================== \n");
+ dml_print("DML: DML_MODE_SUPPORT_INFO_ST\n");
+ if (!fail_only || support->ModeIsSupported == 0)
+ dml_print("DML: support: ModeIsSupported = 0x%x\n", support->ModeIsSupported);
+ if (!fail_only || support->ImmediateFlipSupport == 0)
+ dml_print("DML: support: ImmediateFlipSupport = 0x%x\n", support->ImmediateFlipSupport);
+ if (!fail_only || support->WritebackLatencySupport == 0)
+ dml_print("DML: support: WritebackLatencySupport = 0x%x\n", support->WritebackLatencySupport);
+ if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
+ dml_print("DML: support: ScaleRatioAndTapsSupport = 0x%x\n", support->ScaleRatioAndTapsSupport);
+ if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
+ dml_print("DML: support: SourceFormatPixelAndScanSupport = 0x%x\n", support->SourceFormatPixelAndScanSupport);
+ if (!fail_only || support->MPCCombineMethodIncompatible == 1)
+ dml_print("DML: support: MPCCombineMethodIncompatible = 0x%x\n", support->MPCCombineMethodIncompatible);
+ if (!fail_only || support->P2IWith420 == 1)
+ dml_print("DML: support: P2IWith420 = 0x%x\n", support->P2IWith420);
+ if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1)
+ dml_print("DML: support: DSCOnlyIfNecessaryWithBPP = 0x%x\n", support->DSCOnlyIfNecessaryWithBPP);
+ if (!fail_only || support->DSC422NativeNotSupported == 1)
+ dml_print("DML: support: DSC422NativeNotSupported = 0x%x\n", support->DSC422NativeNotSupported);
+ if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
+ dml_print("DML: support: LinkRateDoesNotMatchDPVersion = 0x%x\n", support->LinkRateDoesNotMatchDPVersion);
+ if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
+ dml_print("DML: support: LinkRateForMultistreamNotIndicated = 0x%x\n", support->LinkRateForMultistreamNotIndicated);
+ if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
+ dml_print("DML: support: BPPForMultistreamNotIndicated = 0x%x\n", support->BPPForMultistreamNotIndicated);
+ if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
+ dml_print("DML: support: MultistreamWithHDMIOreDP = 0x%x\n", support->MultistreamWithHDMIOreDP);
+ if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
+ dml_print("DML: support: MSOOrODMSplitWithNonDPLink = 0x%x\n", support->MSOOrODMSplitWithNonDPLink);
+ if (!fail_only || support->NotEnoughLanesForMSO == 1)
+ dml_print("DML: support: NotEnoughLanesForMSO = 0x%x\n", support->NotEnoughLanesForMSO);
+ if (!fail_only || support->NumberOfOTGSupport == 0)
+ dml_print("DML: support: NumberOfOTGSupport = 0x%x\n", support->NumberOfOTGSupport);
+ if (!fail_only || support->NumberOfDP2p0Support == 0)
+ dml_print("DML: support: NumberOfDP2p0Support = 0x%x\n", support->NumberOfDP2p0Support);
+ if (!fail_only || support->NonsupportedDSCInputBPC == 1)
+ dml_print("DML: support: NonsupportedDSCInputBPC = 0x%x\n", support->NonsupportedDSCInputBPC);
+ if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
+ dml_print("DML: support: WritebackScaleRatioAndTapsSupport = 0x%x\n", support->WritebackScaleRatioAndTapsSupport);
+ if (!fail_only || support->CursorSupport == 0)
+ dml_print("DML: support: CursorSupport = 0x%x\n", support->CursorSupport);
+ if (!fail_only || support->PitchSupport == 0)
+ dml_print("DML: support: PitchSupport = 0x%x\n", support->PitchSupport);
+ if (!fail_only || support->ViewportExceedsSurface == 1)
+ dml_print("DML: support: ViewportExceedsSurface = 0x%x\n", support->ViewportExceedsSurface);
+ if (!fail_only || support->ExceededMALLSize == 1)
+ dml_print("DML: support: ExceededMALLSize = 0x%x\n", support->ExceededMALLSize);
+ if (!fail_only || support->EnoughWritebackUnits == 0)
+ dml_print("DML: support: EnoughWritebackUnits = 0x%x\n", support->EnoughWritebackUnits);
+ if (!fail_only || support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified == 1)
+ dml_print("DML: support: ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = 0x%x\n", support->ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified);
+ if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
+ dml_print("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = 0x%x\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
+ dml_print("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = 0x%x\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
+ dml_print("DML: support: InvalidCombinationOfMALLUseForPState = 0x%x\n", support->InvalidCombinationOfMALLUseForPState);
+
+ if (!fail_only || support->ExceededMultistreamSlots == 1)
+ dml_print("DML: support: ExceededMultistreamSlots = 0x%x\n", support->ExceededMultistreamSlots);
+ if (!fail_only || support->ODMCombineTwoToOneSupportCheckOK == 0)
+ dml_print("DML: support: ODMCombineTwoToOneSupportCheckOK = 0x%x\n", support->ODMCombineTwoToOneSupportCheckOK);
+ if (!fail_only || support->ODMCombineFourToOneSupportCheckOK == 0)
+ dml_print("DML: support: ODMCombineFourToOneSupportCheckOK = 0x%x\n", support->ODMCombineFourToOneSupportCheckOK);
+ if (!fail_only || support->NotEnoughDSCUnits == 1)
+ dml_print("DML: support: NotEnoughDSCUnits = 0x%x\n", support->NotEnoughDSCUnits);
+ if (!fail_only || support->NotEnoughDSCSlices == 1)
+ dml_print("DML: support: NotEnoughDSCSlices = 0x%x\n", support->NotEnoughDSCSlices);
+ if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
+ dml_print("DML: support: PixelsPerLinePerDSCUnitSupport = 0x%x\n", support->PixelsPerLinePerDSCUnitSupport);
+ if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
+ dml_print("DML: support: DSCCLKRequiredMoreThanSupported = 0x%x\n", support->DSCCLKRequiredMoreThanSupported);
+ if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
+ dml_print("DML: support: DTBCLKRequiredMoreThanSupported = 0x%x\n", support->DTBCLKRequiredMoreThanSupported);
+ if (!fail_only || support->LinkCapacitySupport == 0)
+ dml_print("DML: support: LinkCapacitySupport = 0x%x\n", support->LinkCapacitySupport);
+
+ for (dml_uint_t j = 0; j < 2; j++) {
+ if (!fail_only || support->DRAMClockChangeSupport[j] == dml_dram_clock_change_unsupported)
+ dml_print("DML: support: combine=%d, DRAMClockChangeSupport = %d\n", j, support->DRAMClockChangeSupport[j]);
+ if (!fail_only || support->FCLKChangeSupport[j] == dml_fclock_change_unsupported)
+ dml_print("DML: support: combine=%d, FCLKChangeSupport = %d\n", j, support->FCLKChangeSupport[j]);
+ if (!fail_only || support->ROBSupport[j] == 0)
+ dml_print("DML: support: combine=%d, ROBSupport = %d\n", j, support->ROBSupport[j]);
+ if (!fail_only || support->PTEBufferSizeNotExceeded[j] == 0)
+ dml_print("DML: support: combine=%d, PTEBufferSizeNotExceeded = %d\n", j, support->PTEBufferSizeNotExceeded[j]);
+ if (!fail_only || support->DCCMetaBufferSizeNotExceeded[j] == 0)
+ dml_print("DML: support: combine=%d, DCCMetaBufferSizeNotExceeded = %d\n", j, support->DCCMetaBufferSizeNotExceeded[j]);
+ if (!fail_only || support->TotalVerticalActiveBandwidthSupport[j] == 0)
+ dml_print("DML: support: combine=%d, TotalVerticalActiveBandwidthSupport = %d\n", j, support->TotalVerticalActiveBandwidthSupport[j]);
+ if (!fail_only || support->USRRetrainingSupport[j] == 0)
+ dml_print("DML: support: combine=%d, USRRetrainingSupport = %d\n", j, support->USRRetrainingSupport[j]);
+ if (!fail_only || support->VActiveBandwithSupport[j] == 0)
+ dml_print("DML: support: combine=%d, VActiveBandwithSupport = %d\n", j, support->VActiveBandwithSupport[j]);
+ if (!fail_only || support->PrefetchSupported[j] == 0)
+ dml_print("DML: support: combine=%d, PrefetchSupported = %d\n", j, support->PrefetchSupported[j]);
+ if (!fail_only || support->DynamicMetadataSupported[j] == 0)
+ dml_print("DML: support: combine=%d, DynamicMetadataSupported = %d\n", j, support->DynamicMetadataSupported[j]);
+ if (!fail_only || support->VRatioInPrefetchSupported[j] == 0)
+ dml_print("DML: support: combine=%d, VRatioInPrefetchSupported = %d\n", j, support->VRatioInPrefetchSupported[j]);
+ if (!fail_only || support->DISPCLK_DPPCLK_Support[j] == 0)
+ dml_print("DML: support: combine=%d, DISPCLK_DPPCLK_Support = %d\n", j, support->DISPCLK_DPPCLK_Support[j]);
+ if (!fail_only || support->TotalAvailablePipesSupport[j] == 0)
+ dml_print("DML: support: combine=%d, TotalAvailablePipesSupport = %d\n", j, support->TotalAvailablePipesSupport[j]);
+ if (!fail_only || support->ModeSupport[j] == 0)
+ dml_print("DML: support: combine=%d, ModeSupport = %d\n", j, support->ModeSupport[j]);
+ if (!fail_only || support->ViewportSizeSupport[j] == 0)
+ dml_print("DML: support: combine=%d, ViewportSizeSupport = %d\n", j, support->ViewportSizeSupport[j]);
+ if (!fail_only || support->ImmediateFlipSupportedForState[j] == 0)
+ dml_print("DML: support: combine=%d, ImmediateFlipSupportedForState = %d\n", j, support->ImmediateFlipSupportedForState[j]);
+ }
+}
+
+void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane)
+{
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: timing_cfg: plane=%d, HTotal = %d\n", i, timing->HTotal[i]);
+ dml_print("DML: timing_cfg: plane=%d, VTotal = %d\n", i, timing->VTotal[i]);
+ dml_print("DML: timing_cfg: plane=%d, HActive = %d\n", i, timing->HActive[i]);
+ dml_print("DML: timing_cfg: plane=%d, VActive = %d\n", i, timing->VActive[i]);
+ dml_print("DML: timing_cfg: plane=%d, VFrontPorch = %d\n", i, timing->VFrontPorch[i]);
+ dml_print("DML: timing_cfg: plane=%d, VBlankNom = %d\n", i, timing->VBlankNom[i]);
+ dml_print("DML: timing_cfg: plane=%d, RefreshRate = %d\n", i, timing->RefreshRate[i]);
+ dml_print("DML: timing_cfg: plane=%d, PixelClock = %f\n", i, timing->PixelClock[i]);
+ dml_print("DML: timing_cfg: plane=%d, Interlace = %d\n", i, timing->Interlace[i]);
+ dml_print("DML: timing_cfg: plane=%d, DRRDisplay = %d\n", i, timing->DRRDisplay[i]);
+ }
+}
+
+void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane)
+{
+ dml_print("DML: plane_cfg: num_plane = %d\n", num_plane);
+ dml_print("DML: plane_cfg: GPUVMEnable = %d\n", plane->GPUVMEnable);
+ dml_print("DML: plane_cfg: HostVMEnable = %d\n", plane->HostVMEnable);
+ dml_print("DML: plane_cfg: GPUVMMaxPageTableLevels = %d\n", plane->GPUVMMaxPageTableLevels);
+ dml_print("DML: plane_cfg: HostVMMaxPageTableLevels = %d\n", plane->HostVMMaxPageTableLevels);
+
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: plane_cfg: plane=%d, GPUVMMinPageSizeKBytes = %d\n", i, plane->GPUVMMinPageSizeKBytes[i]);
+ dml_print("DML: plane_cfg: plane=%d, ForceOneRowForFrame = %d\n", i, plane->ForceOneRowForFrame[i]);
+ dml_print("DML: plane_cfg: plane=%d, PTEBufferModeOverrideEn = %d\n", i, plane->PTEBufferModeOverrideEn[i]);
+ dml_print("DML: plane_cfg: plane=%d, PTEBufferMode = %d\n", i, plane->PTEBufferMode[i]);
+ dml_print("DML: plane_cfg: plane=%d, DETSizeOverride = %d\n", i, plane->DETSizeOverride[i]);
+ dml_print("DML: plane_cfg: plane=%d, UseMALLForStaticScreen = %d\n", i, plane->UseMALLForStaticScreen[i]);
+ dml_print("DML: plane_cfg: plane=%d, UseMALLForPStateChange = %d\n", i, plane->UseMALLForPStateChange[i]);
+ dml_print("DML: plane_cfg: plane=%d, BlendingAndTiming = %d\n", i, plane->BlendingAndTiming[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportWidth = %d\n", i, plane->ViewportWidth[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportHeight = %d\n", i, plane->ViewportHeight[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportWidthChroma = %d\n", i, plane->ViewportWidthChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportHeightChroma = %d\n", i, plane->ViewportHeightChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportXStart = %d\n", i, plane->ViewportXStart[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportXStartC = %d\n", i, plane->ViewportXStartC[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportYStart = %d\n", i, plane->ViewportYStart[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportYStartC = %d\n", i, plane->ViewportYStartC[i]);
+ dml_print("DML: plane_cfg: plane=%d, ViewportStationary = %d\n", i, plane->ViewportStationary[i]);
+ dml_print("DML: plane_cfg: plane=%d, ScalerEnabled = %d\n", i, plane->ScalerEnabled[i]);
+ dml_print("DML: plane_cfg: plane=%d, HRatio = %3.2f\n", i, plane->HRatio[i]);
+ dml_print("DML: plane_cfg: plane=%d, VRatio = %3.2f\n", i, plane->VRatio[i]);
+ dml_print("DML: plane_cfg: plane=%d, HRatioChroma = %3.2f\n", i, plane->HRatioChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, VRatioChroma = %3.2f\n", i, plane->VRatioChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, HTaps = %d\n", i, plane->HTaps[i]);
+ dml_print("DML: plane_cfg: plane=%d, VTaps = %d\n", i, plane->VTaps[i]);
+ dml_print("DML: plane_cfg: plane=%d, HTapsChroma = %d\n", i, plane->HTapsChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, VTapsChroma = %d\n", i, plane->VTapsChroma[i]);
+ dml_print("DML: plane_cfg: plane=%d, LBBitPerPixel = %d\n", i, plane->LBBitPerPixel[i]);
+ dml_print("DML: plane_cfg: plane=%d, SourceScan = %d\n", i, plane->SourceScan[i]);
+ dml_print("DML: plane_cfg: plane=%d, ScalerRecoutWidth = %d\n", i, plane->ScalerRecoutWidth[i]);
+ dml_print("DML: plane_cfg: plane=%d, NumberOfCursors = %d\n", i, plane->NumberOfCursors[i]);
+ dml_print("DML: plane_cfg: plane=%d, CursorWidth = %d\n", i, plane->CursorWidth[i]);
+ dml_print("DML: plane_cfg: plane=%d, CursorBPP = %d\n", i, plane->CursorBPP[i]);
+
+ dml_print("DML: plane_cfg: plane=%d, DynamicMetadataEnable = %d\n", i, plane->DynamicMetadataEnable[i]);
+ dml_print("DML: plane_cfg: plane=%d, DynamicMetadataLinesBeforeActiveRequired = %d\n", i, plane->DynamicMetadataLinesBeforeActiveRequired[i]);
+ dml_print("DML: plane_cfg: plane=%d, DynamicMetadataTransmittedBytes = %d\n", i, plane->DynamicMetadataTransmittedBytes[i]);
+ }
+}
+
+void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane)
+{
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: surface_cfg: plane=%d, PitchY = %d\n", i, surface->PitchY[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceWidthY = %d\n", i, surface->SurfaceWidthY[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceHeightY = %d\n", i, surface->SurfaceHeightY[i]);
+ dml_print("DML: surface_cfg: plane=%d, PitchC = %d\n", i, surface->PitchC[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceWidthC = %d\n", i, surface->SurfaceWidthC[i]);
+ dml_print("DML: surface_cfg: plane=%d, SurfaceHeightC = %d\n", i, surface->SurfaceHeightC[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCEnable = %d\n", i, surface->DCCEnable[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchY = %d\n", i, surface->DCCMetaPitchY[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCMetaPitchC = %d\n", i, surface->DCCMetaPitchC[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCRateLuma = %f\n", i, surface->DCCRateLuma[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCRateChroma = %f\n", i, surface->DCCRateChroma[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsLuma = %f\n", i, surface->DCCFractionOfZeroSizeRequestsLuma[i]);
+ dml_print("DML: surface_cfg: plane=%d, DCCFractionOfZeroSizeRequestsChroma= %f\n", i, surface->DCCFractionOfZeroSizeRequestsChroma[i]);
+ }
+}
+
+void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane)
+{
+ for (dml_uint_t i = 0; i < num_plane; i++) {
+ dml_print("DML: hw_resource: plane=%d, ODMMode = %d\n", i, hw->ODMMode[i]);
+ dml_print("DML: hw_resource: plane=%d, DPPPerSurface = %d\n", i, hw->DPPPerSurface[i]);
+ dml_print("DML: hw_resource: plane=%d, DSCEnabled = %d\n", i, hw->DSCEnabled[i]);
+ dml_print("DML: hw_resource: plane=%d, NumberOfDSCSlices = %d\n", i, hw->NumberOfDSCSlices[i]);
+ }
+ dml_print("DML: hw_resource: DLGRefClkFreqMHz = %f\n", hw->DLGRefClkFreqMHz);
+}
+
+__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state)
+{
+ dml_print("DML: state_bbox: socclk_mhz = %f\n", state->socclk_mhz);
+ dml_print("DML: state_bbox: dscclk_mhz = %f\n", state->dscclk_mhz);
+ dml_print("DML: state_bbox: phyclk_mhz = %f\n", state->phyclk_mhz);
+ dml_print("DML: state_bbox: phyclk_d18_mhz = %f\n", state->phyclk_d18_mhz);
+ dml_print("DML: state_bbox: phyclk_d32_mhz = %f\n", state->phyclk_d32_mhz);
+ dml_print("DML: state_bbox: dtbclk_mhz = %f\n", state->dtbclk_mhz);
+ dml_print("DML: state_bbox: dispclk_mhz = %f\n", state->dispclk_mhz);
+ dml_print("DML: state_bbox: dppclk_mhz = %f\n", state->dppclk_mhz);
+ dml_print("DML: state_bbox: fabricclk_mhz = %f\n", state->fabricclk_mhz);
+ dml_print("DML: state_bbox: dcfclk_mhz = %f\n", state->dcfclk_mhz);
+ dml_print("DML: state_bbox: dram_speed_mts = %f\n", state->dram_speed_mts);
+ dml_print("DML: state_bbox: urgent_latency_pixel_data_only_us = %f\n", state->urgent_latency_pixel_data_only_us);
+ dml_print("DML: state_bbox: urgent_latency_pixel_mixed_with_vm_data_us = %f\n", state->urgent_latency_pixel_mixed_with_vm_data_us);
+ dml_print("DML: state_bbox: urgent_latency_vm_data_only_us = %f\n", state->urgent_latency_vm_data_only_us);
+ dml_print("DML: state_bbox: writeback_latency_us = %f\n", state->writeback_latency_us);
+ dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_component_us = %f\n", state->urgent_latency_adjustment_fabric_clock_component_us);
+ dml_print("DML: state_bbox: urgent_latency_adjustment_fabric_clock_reference_mhz= %f\n", state->urgent_latency_adjustment_fabric_clock_reference_mhz);
+ dml_print("DML: state_bbox: sr_exit_time_us = %f\n", state->sr_exit_time_us);
+ dml_print("DML: state_bbox: sr_enter_plus_exit_time_us = %f\n", state->sr_enter_plus_exit_time_us);
+ dml_print("DML: state_bbox: sr_exit_z8_time_us = %f\n", state->sr_exit_z8_time_us);
+ dml_print("DML: state_bbox: sr_enter_plus_exit_z8_time_us = %f\n", state->sr_enter_plus_exit_z8_time_us);
+ dml_print("DML: state_bbox: dram_clock_change_latency_us = %f\n", state->dram_clock_change_latency_us);
+ dml_print("DML: state_bbox: fclk_change_latency_us = %f\n", state->fclk_change_latency_us);
+ dml_print("DML: state_bbox: usr_retraining_latency_us = %f\n", state->usr_retraining_latency_us);
+ dml_print("DML: state_bbox: use_ideal_dram_bw_strobe = %d\n", state->use_ideal_dram_bw_strobe);
+}
+
+__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc)
+{
+ dml_print("DML: soc_bbox: dprefclk_mhz = %f\n", soc->dprefclk_mhz);
+ dml_print("DML: soc_bbox: xtalclk_mhz = %f\n", soc->xtalclk_mhz);
+ dml_print("DML: soc_bbox: pcierefclk_mhz = %f\n", soc->pcierefclk_mhz);
+ dml_print("DML: soc_bbox: refclk_mhz = %f\n", soc->refclk_mhz);
+ dml_print("DML: soc_bbox: amclk_mhz = %f\n", soc->amclk_mhz);
+
+ dml_print("DML: soc_bbox: max_outstanding_reqs = %f\n", soc->max_outstanding_reqs);
+ dml_print("DML: soc_bbox: pct_ideal_sdp_bw_after_urgent = %f\n", soc->pct_ideal_sdp_bw_after_urgent);
+ dml_print("DML: soc_bbox: pct_ideal_fabric_bw_after_urgent = %f\n", soc->pct_ideal_fabric_bw_after_urgent);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_only);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_pixel_and_vm = %f\n", soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_vm_only = %f\n", soc->pct_ideal_dram_bw_after_urgent_vm_only);
+ dml_print("DML: soc_bbox: pct_ideal_dram_bw_after_urgent_strobe = %f\n", soc->pct_ideal_dram_bw_after_urgent_strobe);
+ dml_print("DML: soc_bbox: max_avg_sdp_bw_use_normal_percent = %f\n", soc->max_avg_sdp_bw_use_normal_percent);
+ dml_print("DML: soc_bbox: max_avg_fabric_bw_use_normal_percent = %f\n", soc->max_avg_fabric_bw_use_normal_percent);
+ dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_percent = %f\n", soc->max_avg_dram_bw_use_normal_percent);
+ dml_print("DML: soc_bbox: max_avg_dram_bw_use_normal_strobe_percent = %f\n", soc->max_avg_dram_bw_use_normal_strobe_percent);
+ dml_print("DML: soc_bbox: round_trip_ping_latency_dcfclk_cycles = %d\n", soc->round_trip_ping_latency_dcfclk_cycles);
+ dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_only_bytes);
+ dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes);
+ dml_print("DML: soc_bbox: urgent_out_of_order_return_per_channel_vm_only_bytes = %d\n", soc->urgent_out_of_order_return_per_channel_vm_only_bytes);
+ dml_print("DML: soc_bbox: num_chans = %d\n", soc->num_chans);
+ dml_print("DML: soc_bbox: return_bus_width_bytes = %d\n", soc->return_bus_width_bytes);
+ dml_print("DML: soc_bbox: dram_channel_width_bytes = %d\n", soc->dram_channel_width_bytes);
+ dml_print("DML: soc_bbox: fabric_datapath_to_dcn_data_return_bytes = %d\n", soc->fabric_datapath_to_dcn_data_return_bytes);
+ dml_print("DML: soc_bbox: hostvm_min_page_size_kbytes = %d\n", soc->hostvm_min_page_size_kbytes);
+ dml_print("DML: soc_bbox: gpuvm_min_page_size_kbytes = %d\n", soc->gpuvm_min_page_size_kbytes);
+ dml_print("DML: soc_bbox: phy_downspread_percent = %f\n", soc->phy_downspread_percent);
+ dml_print("DML: soc_bbox: dcn_downspread_percent = %f\n", soc->dcn_downspread_percent);
+ dml_print("DML: soc_bbox: smn_latency_us = %f\n", soc->smn_latency_us);
+ dml_print("DML: soc_bbox: mall_allocated_for_dcn_mbytes = %d\n", soc->mall_allocated_for_dcn_mbytes);
+ dml_print("DML: soc_bbox: dispclk_dppclk_vco_speed_mhz = %f\n", soc->dispclk_dppclk_vco_speed_mhz);
+ dml_print("DML: soc_bbox: do_urgent_latency_adjustment = %d\n", soc->do_urgent_latency_adjustment);
+}
+
+__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg)
+{
+ dml_print("DML: clk_cfg: 0-use_required, 1-use pipe.clks_cfg, 2-use state bbox\n");
+ dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option);
+ dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option);
+
+ dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz);
+ dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz);
+
+ for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) {
+ dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]);
+ dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]);
+ }
+}
+
+dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle Scan)
+{
+ dml_bool_t is_vert = false;
+ if (Scan == dml_rotation_90 || Scan == dml_rotation_90m || Scan == dml_rotation_270 || Scan == dml_rotation_270m) {
+ is_vert = true;
+ } else {
+ is_vert = false;
+ }
+ return is_vert;
+} // dml_is_vertical_rotation
+
+dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp)
+{
+ switch (ebpp) {
+ case dml_cur_2bit:
+ return 2;
+ case dml_cur_32bit:
+ return 32;
+ case dml_cur_64bit:
+ return 64;
+ default:
+ return 0;
+ }
+}
+
+/// @brief Determine the physical pipe to logical plane mapping using the display_cfg
+dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg)
+{
+ dml_uint_t num_active_planes = 0;
+
+ for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; k++) {
+ if (display_cfg->plane.ViewportWidth[k] > 0)
+ num_active_planes = num_active_planes + 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: num_active_planes = %d\n", __func__, num_active_planes);
+#endif
+ return num_active_planes;
+}
+
+/// @brief Determine the physical pipe to logical plane mapping using the display_cfg
+dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg)
+{
+ dml_uint_t num_active_pipes = 0;
+
+ for (dml_uint_t j = 0; j < dml_get_num_active_planes(display_cfg); j++) {
+ num_active_pipes = num_active_pipes + display_cfg->hw.DPPPerSurface[j];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
+#endif
+ return num_active_pipes;
+}
+
+dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
+{
+ dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
+ return plane_idx;
+}
+
+dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx)
+{
+ dml_uint_t pipe_idx = 0;
+ dml_bool_t pipe_found = 0;
+
+ ASSERT(plane_idx < __DML_NUM_PLANES__);
+
+ for (dml_uint_t i = 0; i < __DML_NUM_PLANES__; i++) {
+ if (plane_idx == mode_lib->mp.pipe_plane[i]) {
+ pipe_idx = i;
+ pipe_found = 1;
+ break;
+ }
+ }
+ ASSERT(pipe_found != 0);
+
+ return pipe_idx;
+}
+
+void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane)
+{
+ dml_uint_t pipe_idx = 0;
+
+ for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k) {
+ pipe_plane[k] = __DML_PIPE_NO_PLANE__;
+ }
+
+ for (dml_uint_t plane_idx = 0; plane_idx < __DML_NUM_PLANES__; plane_idx++) {
+ for (dml_uint_t i = 0; i < hw->DPPPerSurface[plane_idx]; i++) {
+ pipe_plane[pipe_idx] = plane_idx;
+ pipe_idx++;
+ }
+ }
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h
new file mode 100644
index 000000000000..113b0265e1d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DISPLAY_MODE_UTIL_H__
+#define __DISPLAY_MODE_UTIL_H__
+
+#include "display_mode_core_structs.h"
+#include "cmntypes.h"
+
+
+#include "dml_assert.h"
+#include "dml_logging.h"
+
+__DML_DLL_EXPORT__ dml_bool_t dml_util_is_420(enum dml_source_format_class source_format);
+__DML_DLL_EXPORT__ dml_float_t dml_ceil(dml_float_t x, dml_float_t granularity);
+__DML_DLL_EXPORT__ dml_float_t dml_floor(dml_float_t x, dml_float_t granularity);
+__DML_DLL_EXPORT__ dml_float_t dml_min(dml_float_t x, dml_float_t y);
+__DML_DLL_EXPORT__ dml_float_t dml_min3(dml_float_t x, dml_float_t y, dml_float_t z);
+__DML_DLL_EXPORT__ dml_float_t dml_min4(dml_float_t x, dml_float_t y, dml_float_t z, dml_float_t w);
+__DML_DLL_EXPORT__ dml_float_t dml_max(dml_float_t x, dml_float_t y);
+__DML_DLL_EXPORT__ dml_float_t dml_max3(dml_float_t x, dml_float_t y, dml_float_t z);
+__DML_DLL_EXPORT__ dml_float_t dml_max4(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d);
+__DML_DLL_EXPORT__ dml_float_t dml_max5(dml_float_t a, dml_float_t b, dml_float_t c, dml_float_t d, dml_float_t e);
+__DML_DLL_EXPORT__ dml_float_t dml_log(dml_float_t x, dml_float_t base);
+__DML_DLL_EXPORT__ dml_float_t dml_log2(dml_float_t x);
+__DML_DLL_EXPORT__ dml_float_t dml_round(dml_float_t val, dml_bool_t bankers_rounding);
+__DML_DLL_EXPORT__ dml_float_t dml_pow(dml_float_t base, int exp);
+__DML_DLL_EXPORT__ dml_uint_t dml_round_to_multiple(dml_uint_t num, dml_uint_t multiple, dml_bool_t up);
+__DML_DLL_EXPORT__ dml_bool_t dml_is_vertical_rotation(enum dml_rotation_angle scan);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_cursor_bit_per_pixel(enum dml_cursor_bpp ebpp);
+__DML_DLL_EXPORT__ void dml_print_data_rq_regs_st(const dml_display_plane_rq_regs_st *data_rq_regs);
+__DML_DLL_EXPORT__ void dml_print_rq_regs_st(const dml_display_rq_regs_st *rq_regs);
+__DML_DLL_EXPORT__ void dml_print_dlg_regs_st(const dml_display_dlg_regs_st *dlg_regs);
+__DML_DLL_EXPORT__ void dml_print_ttu_regs_st(const dml_display_ttu_regs_st *ttu_regs);
+__DML_DLL_EXPORT__ void dml_print_dml_policy(const struct dml_mode_eval_policy_st *policy);
+__DML_DLL_EXPORT__ void dml_print_mode_support(struct display_mode_lib_st *mode_lib, dml_uint_t j);
+__DML_DLL_EXPORT__ void dml_print_dml_mode_support_info(const struct dml_mode_support_info_st *support, dml_bool_t fail_only);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_timing(const struct dml_timing_cfg_st *timing, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_plane(const struct dml_plane_cfg_st *plane, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_surface(const struct dml_surface_cfg_st *surface, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_dml_display_cfg_hw_resource(const struct dml_hw_resource_st *hw, dml_uint_t num_plane);
+__DML_DLL_EXPORT__ void dml_print_soc_state_bounding_box(const struct soc_state_bounding_box_st *state);
+__DML_DLL_EXPORT__ void dml_print_soc_bounding_box(const struct soc_bounding_box_st *soc);
+__DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg);
+
+__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_planes(const struct dml_display_cfg_st *display_cfg);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_num_active_pipes(const struct dml_display_cfg_st *display_cfg);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_plane_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx);
+__DML_DLL_EXPORT__ dml_uint_t dml_get_pipe_idx(const struct display_mode_lib_st *mode_lib, dml_uint_t plane_idx);
+__DML_DLL_EXPORT__ void dml_calc_pipe_plane_mapping(const struct dml_hw_resource_st *hw, dml_uint_t *pipe_plane);
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
new file mode 100644
index 000000000000..bf5e7f4e0416
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -0,0 +1,929 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml21_wrapper.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_internal_shared_types.h"
+#include "dml2_internal_types.h"
+#include "dml21_utils.h"
+#include "dml21_translation_helper.h"
+#include "soc_and_ip_translator.h"
+
+static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options,
+ const struct dc *in_dc,
+ const struct dml2_configuration_options *config)
+{
+ bool disable_fams2 = !in_dc->debug.fams2_config.bits.enable;
+
+ /* ODM options */
+ pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm;
+ pmo_options->disable_dyn_odm_for_multi_stream = true;
+ pmo_options->disable_dyn_odm_for_stream_with_svp = true;
+
+ pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1);
+
+ /* NOTE: DRR and SubVP Require FAMS2 */
+ pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) ||
+ in_dc->debug.force_disable_subvp ||
+ disable_fams2;
+ pmo_options->disable_drr_clamped = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) ||
+ disable_fams2;
+ pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) ||
+ disable_fams2;
+ pmo_options->disable_fams2 = disable_fams2;
+
+ pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE ||
+ in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY;
+ pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE;
+}
+
+static enum dml2_project_id dml21_dcn_revision_to_dml2_project_id(enum dce_version dcn_version)
+{
+ enum dml2_project_id project_id;
+ switch (dcn_version) {
+ case DCN_VERSION_4_01:
+ project_id = dml2_project_dcn4x_stage2_auto_drr_svp;
+ break;
+ default:
+ project_id = dml2_project_invalid;
+ DC_ERR("unsupported dcn version for DML21!");
+ break;
+ }
+
+ return project_id;
+}
+
+void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init,
+ const struct dml2_configuration_options *config,
+ const struct dc *in_dc)
+{
+ dml_init->options.project_id = dml21_dcn_revision_to_dml2_project_id(in_dc->ctx->dce_version);
+
+ if (config->use_native_soc_bb_construction) {
+ in_dc->soc_and_ip_translator->translator_funcs->get_soc_bb(&dml_init->soc_bb, in_dc, config);
+ in_dc->soc_and_ip_translator->translator_funcs->get_ip_caps(&dml_init->ip_caps);
+ } else {
+ dml_init->soc_bb = config->external_socbb_ip_params->soc_bb;
+ dml_init->ip_caps = config->external_socbb_ip_params->ip_params;
+ }
+
+ dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config);
+}
+
+static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
+{
+ unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
+
+ if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
+ max_hw_v_total -= stream->timing.v_front_porch + 1;
+ }
+
+ return max_hw_v_total;
+}
+
+static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing,
+ struct dc_stream_state *stream,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_context *dml_ctx)
+{
+ unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz;
+ uint32_t pix_clk_100hz;
+
+ timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
+ timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
+ timing->h_front_porch = stream->timing.h_front_porch;
+ timing->v_front_porch = stream->timing.v_front_porch;
+ timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10;
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0)
+ timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10;
+ if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ timing->pixel_clock_khz *= 2;
+ timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding;
+ timing->v_total = stream->timing.v_total;
+ timing->h_sync_width = stream->timing.h_sync_width;
+ timing->interlaced = stream->timing.flags.INTERLACE;
+
+ hblank_start = stream->timing.h_total - stream->timing.h_front_porch;
+
+ timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding
+ - stream->timing.h_border_left - stream->timing.h_border_right;
+
+ if (hblank_start < stream->timing.h_addressable)
+ timing->h_blank_end = 0;
+
+ vblank_start = stream->timing.v_total - stream->timing.v_front_porch;
+
+ timing->v_blank_end = vblank_start - stream->timing.v_addressable
+ - stream->timing.v_border_top - stream->timing.v_border_bottom;
+
+ timing->drr_config.enabled = stream->ignore_msa_timing_param;
+ timing->drr_config.drr_active_variable = stream->vrr_active_variable;
+ timing->drr_config.drr_active_fixed = stream->vrr_active_fixed;
+ timing->drr_config.disallowed = !stream->allow_freesync;
+
+ /* limit min refresh rate to DC cap */
+ min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz;
+ if (stream->ctx->dc->caps.max_v_total != 0) {
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) {
+ pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz;
+ } else {
+ pix_clk_100hz = stream->timing.pix_clk_100hz;
+ }
+ min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL),
+ (timing->h_total * (long long)calc_max_hardware_v_total(stream)));
+ }
+
+ timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz);
+
+ if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase &&
+ stream->ctx->dc->config.enable_fpo_flicker_detection == 1)
+ timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false);
+ else
+ timing->drr_config.max_instant_vtotal_delta = 0;
+
+ if (stream->timing.flags.DSC) {
+ timing->dsc.enable = dml2_dsc_enable;
+ timing->dsc.overrides.num_slices = stream->timing.dsc_cfg.num_slices_h;
+ timing->dsc.dsc_compressed_bpp_x16 = stream->timing.dsc_cfg.bits_per_pixel;
+ } else
+ timing->dsc.enable = dml2_dsc_disable;
+
+ switch (stream->timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ timing->bpc = 6;
+ break;
+ case COLOR_DEPTH_888:
+ timing->bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ timing->bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ timing->bpc = 12;
+ break;
+ case COLOR_DEPTH_141414:
+ timing->bpc = 14;
+ break;
+ case COLOR_DEPTH_161616:
+ timing->bpc = 16;
+ break;
+ case COLOR_DEPTH_999:
+ timing->bpc = 9;
+ break;
+ case COLOR_DEPTH_111111:
+ timing->bpc = 11;
+ break;
+ default:
+ timing->bpc = 8;
+ break;
+ }
+
+ timing->vblank_nom = timing->v_total - timing->v_active;
+}
+
+static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output,
+ struct dc_stream_state *stream, const struct pipe_ctx *pipe)
+{
+ output->output_dp_lane_count = 4;
+
+ switch (stream->signal) {
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ output->output_encoder = dml2_dp;
+ if (check_dp2p0_output_encoder(pipe))
+ output->output_encoder = dml2_dp2p0;
+ break;
+ case SIGNAL_TYPE_EDP:
+ output->output_encoder = dml2_edp;
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ output->output_encoder = dml2_hdmi;
+ break;
+ default:
+ output->output_encoder = dml2_dp;
+ }
+
+ switch (stream->timing.pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ case PIXEL_ENCODING_YCBCR444:
+ output->output_format = dml2_444;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ output->output_format = dml2_420;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ if (stream->timing.flags.DSC && !stream->timing.dsc_cfg.ycbcr422_simple)
+ output->output_format = dml2_n422;
+ else
+ output->output_format = dml2_s422;
+ break;
+ default:
+ output->output_format = dml2_444;
+ break;
+ }
+
+ switch (stream->signal) {
+ case SIGNAL_TYPE_NONE:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_LVDS:
+ case SIGNAL_TYPE_RGB:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_VIRTUAL:
+ default:
+ output->output_dp_link_rate = dml2_dp_rate_na;
+ break;
+ }
+
+ output->audio_sample_layout = stream->audio_info.modes->sample_size;
+ output->audio_sample_rate = stream->audio_info.modes->max_bit_rate;
+ output->output_disabled = true;
+
+ //TODO : New to DML2.1. How do we populate this ?
+ // output->validate_output
+}
+
+static void populate_dml21_stream_overrides_from_stream_state(
+ struct dml2_stream_parameters *stream_desc,
+ struct dc_stream_state *stream,
+ struct dc_stream_status *stream_status)
+{
+ switch (stream->debug.force_odm_combine_segments) {
+ case 0:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_auto;
+ break;
+ case 1:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_bypass;
+ break;
+ case 2:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_combine_2to1;
+ break;
+ case 3:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_combine_3to1;
+ break;
+ case 4:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_combine_4to1;
+ break;
+ default:
+ stream_desc->overrides.odm_mode = dml2_odm_mode_auto;
+ break;
+ }
+ if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy ||
+ stream->debug.force_odm_combine_segments > 0)
+ stream_desc->overrides.disable_dynamic_odm = true;
+ stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp ||
+ stream->hw_cursor_req ||
+ stream_status->mall_stream_config.cursor_size_limit_subvp;
+}
+
+static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode)
+{
+ enum dml2_swizzle_mode dml2_mode = dml2_sw_linear;
+
+ switch (addr3_mode) {
+ case DC_ADDR3_SW_LINEAR:
+ dml2_mode = dml2_sw_linear;
+ break;
+ case DC_ADDR3_SW_256B_2D:
+ dml2_mode = dml2_sw_256b_2d;
+ break;
+ case DC_ADDR3_SW_4KB_2D:
+ dml2_mode = dml2_sw_4kb_2d;
+ break;
+ case DC_ADDR3_SW_64KB_2D:
+ dml2_mode = dml2_sw_64kb_2d;
+ break;
+ case DC_ADDR3_SW_256KB_2D:
+ dml2_mode = dml2_sw_256kb_2d;
+ break;
+ default:
+ /* invalid swizzle mode for DML2.1 */
+ ASSERT(false);
+ dml2_mode = dml2_sw_linear;
+ }
+
+ return dml2_mode;
+}
+
+static enum dml2_swizzle_mode gfx9_to_dml2_swizzle_mode(enum swizzle_mode_values gfx9_mode)
+{
+ enum dml2_swizzle_mode dml2_mode = dml2_sw_64kb_2d;
+
+ switch (gfx9_mode) {
+ case DC_SW_LINEAR:
+ dml2_mode = dml2_sw_linear;
+ break;
+ case DC_SW_256_D:
+ case DC_SW_256_R:
+ dml2_mode = dml2_sw_256b_2d;
+ break;
+ case DC_SW_4KB_D:
+ case DC_SW_4KB_R:
+ case DC_SW_4KB_R_X:
+ dml2_mode = dml2_sw_4kb_2d;
+ break;
+ case DC_SW_64KB_D:
+ case DC_SW_64KB_D_X:
+ case DC_SW_64KB_R:
+ case DC_SW_64KB_R_X:
+ dml2_mode = dml2_sw_64kb_2d;
+ break;
+ case DC_SW_256B_S:
+ case DC_SW_4KB_S:
+ case DC_SW_64KB_S:
+ case DC_SW_VAR_S:
+ case DC_SW_VAR_D:
+ case DC_SW_VAR_R:
+ case DC_SW_64KB_S_T:
+ case DC_SW_64KB_D_T:
+ case DC_SW_4KB_S_X:
+ case DC_SW_4KB_D_X:
+ case DC_SW_64KB_S_X:
+ case DC_SW_VAR_S_X:
+ case DC_SW_VAR_D_X:
+ case DC_SW_VAR_R_X:
+ default:
+ /*
+ * invalid swizzle mode for DML2.1. This could happen because
+ * DML21 is not intended to be used by N-1 in production. To
+ * properly filter out unsupported swizzle modes, we will need
+ * to fix capability reporting when DML2.1 is used for N-1 in
+ * dc. So DML will only receive DML21 supported swizzle modes.
+ * This implementation is not added and has a low value because
+ * the supported swizzle modes should already cover most of our
+ * N-1 test cases.
+ */
+ return dml2_sw_64kb_2d;
+ }
+
+ return dml2_mode;
+}
+
+static void populate_dml21_dummy_surface_cfg(struct dml2_surface_cfg *surface, const struct dc_stream_state *stream)
+{
+ surface->plane0.width = stream->timing.h_addressable;
+ surface->plane0.height = stream->timing.v_addressable;
+ surface->plane1.width = stream->timing.h_addressable;
+ surface->plane1.height = stream->timing.v_addressable;
+ surface->plane0.pitch = ((surface->plane0.width + 127) / 128) * 128;
+ surface->plane1.pitch = 0;
+ surface->dcc.enable = false;
+ surface->dcc.informative.dcc_rate_plane0 = 1.0;
+ surface->dcc.informative.dcc_rate_plane1 = 1.0;
+ surface->dcc.informative.fraction_of_zero_size_request_plane0 = 0;
+ surface->dcc.informative.fraction_of_zero_size_request_plane1 = 0;
+ surface->tiling = dml2_sw_64kb_2d;
+}
+
+static void populate_dml21_dummy_plane_cfg(struct dml2_plane_parameters *plane, const struct dc_stream_state *stream)
+{
+ unsigned int width, height;
+
+ if (stream->timing.h_addressable > 3840)
+ width = 3840;
+ else
+ width = stream->timing.h_addressable; // 4K max
+
+ if (stream->timing.v_addressable > 2160)
+ height = 2160;
+ else
+ height = stream->timing.v_addressable; // 4K max
+
+ plane->cursor.cursor_bpp = 32;
+
+ plane->cursor.cursor_width = 256;
+ plane->cursor.num_cursors = 1;
+
+ plane->composition.viewport.plane0.width = width;
+ plane->composition.viewport.plane0.height = height;
+ plane->composition.viewport.plane1.width = 0;
+ plane->composition.viewport.plane1.height = 0;
+
+ plane->composition.viewport.stationary = false;
+ plane->composition.viewport.plane0.x_start = 0;
+ plane->composition.viewport.plane0.y_start = 0;
+ plane->composition.viewport.plane1.x_start = 0;
+ plane->composition.viewport.plane1.y_start = 0;
+
+ plane->composition.scaler_info.enabled = false;
+ plane->composition.rotation_angle = dml2_rotation_0;
+ plane->composition.scaler_info.plane0.h_ratio = 1.0;
+ plane->composition.scaler_info.plane0.v_ratio = 1.0;
+ plane->composition.scaler_info.plane1.h_ratio = 0;
+ plane->composition.scaler_info.plane1.v_ratio = 0;
+ plane->composition.scaler_info.plane0.h_taps = 1;
+ plane->composition.scaler_info.plane0.v_taps = 1;
+ plane->composition.scaler_info.plane1.h_taps = 0;
+ plane->composition.scaler_info.plane1.v_taps = 0;
+ plane->composition.scaler_info.rect_out_width = width;
+ plane->pixel_format = dml2_444_32;
+
+ plane->dynamic_meta_data.enable = false;
+ plane->overrides.gpuvm_min_page_size_kbytes = 256;
+}
+
+static void populate_dml21_surface_config_from_plane_state(
+ const struct dc *in_dc,
+ struct dml2_surface_cfg *surface,
+ const struct dc_plane_state *plane_state)
+{
+ surface->plane0.pitch = plane_state->plane_size.surface_pitch;
+ surface->plane1.pitch = plane_state->plane_size.chroma_pitch;
+ surface->plane0.height = plane_state->plane_size.surface_size.height;
+ surface->plane0.width = plane_state->plane_size.surface_size.width;
+ surface->plane1.height = plane_state->plane_size.chroma_size.height;
+ surface->plane1.width = plane_state->plane_size.chroma_size.width;
+ surface->dcc.enable = plane_state->dcc.enable;
+ surface->dcc.informative.dcc_rate_plane0 = 1.0;
+ surface->dcc.informative.dcc_rate_plane1 = 1.0;
+ surface->dcc.informative.fraction_of_zero_size_request_plane0 = plane_state->dcc.independent_64b_blks;
+ surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c;
+ surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch;
+ surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c;
+
+ // Update swizzle / array mode based on the gfx_format
+ switch (plane_state->tiling_info.gfxversion) {
+ case DcGfxVersion7:
+ case DcGfxVersion8:
+ break;
+ case DcGfxVersion9:
+ case DcGfxVersion10:
+ case DcGfxVersion11:
+ surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle);
+ break;
+ case DcGfxAddr3:
+ surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle);
+ break;
+ }
+}
+
+static const struct scaler_data *get_scaler_data_for_plane(
+ struct dml2_context *dml_ctx,
+ const struct dc_plane_state *in,
+ const struct dc_state *context)
+{
+ int i;
+ struct pipe_ctx *temp_pipe = &dml_ctx->v21.scratch.temp_pipe;
+
+ memset(temp_pipe, 0, sizeof(struct pipe_ctx));
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state == in && !pipe->prev_odm_pipe) {
+ temp_pipe->stream = pipe->stream;
+ temp_pipe->plane_state = pipe->plane_state;
+ temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
+ temp_pipe->stream_res = pipe->stream_res;
+ temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding;
+ temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding;
+ temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz;
+ dml_ctx->config.callbacks.build_scaling_params(temp_pipe);
+ break;
+ }
+ }
+
+ ASSERT(i < MAX_PIPES);
+ return &temp_pipe->plane_res.scl_data;
+}
+
+static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dml_ctx,
+ struct dml2_plane_parameters *plane, const struct dc_plane_state *plane_state,
+ const struct dc_state *context, unsigned int stream_index)
+{
+ const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context);
+ struct dc_stream_state *stream = context->streams[stream_index];
+
+ plane->cursor.cursor_bpp = 32;
+ plane->cursor.cursor_width = 256;
+ plane->cursor.num_cursors = 1;
+
+ switch (plane_state->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ plane->pixel_format = dml2_420_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ plane->pixel_format = dml2_420_10;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ plane->pixel_format = dml2_444_64;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ plane->pixel_format = dml2_444_16;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
+ plane->pixel_format = dml2_444_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ plane->pixel_format = dml2_rgbe_alpha;
+ break;
+ default:
+ plane->pixel_format = dml2_444_32;
+ break;
+ }
+
+ plane->composition.viewport.plane0.height = scaler_data->viewport.height;
+ plane->composition.viewport.plane0.width = scaler_data->viewport.width;
+ plane->composition.viewport.plane1.height = scaler_data->viewport_c.height;
+ plane->composition.viewport.plane1.width = scaler_data->viewport_c.width;
+ plane->composition.viewport.plane0.x_start = scaler_data->viewport.x;
+ plane->composition.viewport.plane0.y_start = scaler_data->viewport.y;
+ plane->composition.viewport.plane1.x_start = scaler_data->viewport_c.x;
+ plane->composition.viewport.plane1.y_start = scaler_data->viewport_c.y;
+ plane->composition.viewport.stationary = false;
+ plane->composition.scaler_info.enabled = scaler_data->ratios.horz.value != dc_fixpt_one.value ||
+ scaler_data->ratios.horz_c.value != dc_fixpt_one.value ||
+ scaler_data->ratios.vert.value != dc_fixpt_one.value ||
+ scaler_data->ratios.vert_c.value != dc_fixpt_one.value;
+
+ if (!scaler_data->taps.h_taps) {
+ /* Above logic determines scaling should be enabled even when there are no taps for
+ * certain cases. Hence do corrective active and disable scaling.
+ */
+ plane->composition.scaler_info.enabled = false;
+ } else if ((plane_state->ctx->dc->config.use_spl == true) &&
+ (plane->composition.scaler_info.enabled == false)) {
+ /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then
+ * allow case where ratio is 1 but taps > 1
+ */
+ if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) ||
+ (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1))
+ plane->composition.scaler_info.enabled = true;
+ }
+
+ /* always_scale is only used for debug purposes not used in production but has to be
+ * maintained for certain complainces. */
+ if (plane_state->ctx->dc->debug.always_scale == true) {
+ plane->composition.scaler_info.enabled = true;
+ }
+
+ if (plane->composition.scaler_info.enabled == false) {
+ plane->composition.scaler_info.plane0.h_ratio = 1.0;
+ plane->composition.scaler_info.plane0.v_ratio = 1.0;
+ plane->composition.scaler_info.plane1.h_ratio = 1.0;
+ plane->composition.scaler_info.plane1.v_ratio = 1.0;
+ } else {
+ plane->composition.scaler_info.plane0.h_ratio = (double)scaler_data->ratios.horz.value / (1ULL << 32);
+ plane->composition.scaler_info.plane0.v_ratio = (double)scaler_data->ratios.vert.value / (1ULL << 32);
+ plane->composition.scaler_info.plane1.h_ratio = (double)scaler_data->ratios.horz_c.value / (1ULL << 32);
+ plane->composition.scaler_info.plane1.v_ratio = (double)scaler_data->ratios.vert_c.value / (1ULL << 32);
+ }
+
+ if (!scaler_data->taps.h_taps) {
+ plane->composition.scaler_info.plane0.h_taps = 1;
+ plane->composition.scaler_info.plane1.h_taps = 1;
+ } else {
+ plane->composition.scaler_info.plane0.h_taps = scaler_data->taps.h_taps;
+ plane->composition.scaler_info.plane1.h_taps = scaler_data->taps.h_taps_c;
+ }
+ if (!scaler_data->taps.v_taps) {
+ plane->composition.scaler_info.plane0.v_taps = 1;
+ plane->composition.scaler_info.plane1.v_taps = 1;
+ } else {
+ plane->composition.scaler_info.plane0.v_taps = scaler_data->taps.v_taps;
+ plane->composition.scaler_info.plane1.v_taps = scaler_data->taps.v_taps_c;
+ }
+
+ plane->composition.viewport.stationary = false;
+
+ if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) {
+ plane->tdlut.setup_for_tdlut = true;
+
+ switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) {
+ case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB:
+ case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR:
+ plane->tdlut.tdlut_addressing_mode = dml2_tdlut_sw_linear;
+ break;
+ case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR:
+ plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear;
+ break;
+ }
+
+ switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) {
+ case DC_CM2_GPU_MEM_SIZE_171717:
+ plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube;
+ break;
+ case DC_CM2_GPU_MEM_SIZE_TRANSFORMED:
+ default:
+ //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined
+ break;
+ }
+ }
+ plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable;
+
+ plane->dynamic_meta_data.enable = false;
+ plane->dynamic_meta_data.lines_before_active_required = 0;
+ plane->dynamic_meta_data.transmitted_bytes = 0;
+
+ plane->composition.scaler_info.rect_out_width = plane_state->dst_rect.width;
+ plane->composition.rotation_angle = (enum dml2_rotation_angle) plane_state->rotation;
+ plane->stream_index = stream_index;
+
+ plane->overrides.gpuvm_min_page_size_kbytes = 256;
+
+ plane->immediate_flip = plane_state->flip_immediate;
+
+ plane->composition.rect_out_height_spans_vactive =
+ plane_state->dst_rect.height >= stream->src.height &&
+ stream->dst.height >= stream->timing.v_addressable;
+}
+
+//TODO : Could be possibly moved to a common helper layer.
+static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id)
+{
+ int i, j;
+
+ if (!plane_id)
+ return false;
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->stream_id == stream_id) {
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ if (context->stream_status[i].plane_states[j] == plane) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *dml_ctx, const struct dc_stream_state *stream)
+{
+ int i = 0;
+ int location = -1;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) {
+ location = i;
+ break;
+ }
+ }
+
+ return location;
+}
+
+unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id,
+ const struct dc_plane_state *plane, const struct dc_state *context)
+{
+ unsigned int plane_id;
+ int i = 0;
+ int location = -1;
+
+ if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) {
+ ASSERT(false);
+ return -1;
+ }
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) {
+ location = i;
+ break;
+ }
+ }
+
+ return location;
+}
+
+static enum dml2_uclk_pstate_change_strategy dml21_force_pstate_method_to_uclk_state_change_strategy(enum dml2_force_pstate_methods force_pstate_method)
+{
+ enum dml2_uclk_pstate_change_strategy val = dml2_uclk_pstate_change_strategy_auto;
+
+ switch (force_pstate_method) {
+ case dml2_force_pstate_method_vactive:
+ val = dml2_uclk_pstate_change_strategy_force_vactive;
+ break;
+ case dml2_force_pstate_method_vblank:
+ val = dml2_uclk_pstate_change_strategy_force_vblank;
+ break;
+ case dml2_force_pstate_method_drr:
+ val = dml2_uclk_pstate_change_strategy_force_drr;
+ break;
+ case dml2_force_pstate_method_subvp:
+ val = dml2_uclk_pstate_change_strategy_force_mall_svp;
+ break;
+ case dml2_force_pstate_method_auto:
+ default:
+ val = dml2_uclk_pstate_change_strategy_auto;
+ }
+
+ return val;
+}
+
+bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ int stream_index, plane_index;
+ int disp_cfg_stream_location, disp_cfg_plane_location;
+ struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config;
+ unsigned int plane_count = 0;
+
+ memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+
+ dml_dispcfg->gpuvm_enable = dml_ctx->config.gpuvm_enable;
+ dml_dispcfg->gpuvm_max_page_table_levels = 4;
+ dml_dispcfg->hostvm_enable = false;
+ dml_dispcfg->minimize_det_reallocation = true;
+ dml_dispcfg->overrides.enable_subvp_implicit_pmo = true;
+
+ if (in_dc->debug.disable_unbounded_requesting) {
+ dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true;
+ dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false;
+ }
+
+ for (stream_index = 0; stream_index < context->stream_count; stream_index++) {
+ disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]);
+
+ if (disp_cfg_stream_location < 0)
+ disp_cfg_stream_location = dml_dispcfg->num_streams++;
+
+ ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx);
+ populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]);
+ populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]);
+
+ dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed;
+ dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed;
+ dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.stutter_enter_exit = dml2_twait_budgeting_setting_if_needed;
+
+ dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[stream_index]->stream_id;
+ dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true;
+
+ if (context->stream_status[stream_index].plane_count == 0) {
+ disp_cfg_plane_location = dml_dispcfg->num_planes++;
+ populate_dml21_dummy_surface_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->streams[stream_index]);
+ populate_dml21_dummy_plane_cfg(&dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->streams[stream_index]);
+ dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
+ } else {
+ for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) {
+ disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context);
+
+ if (disp_cfg_plane_location < 0)
+ disp_cfg_plane_location = dml_dispcfg->num_planes++;
+
+ ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+
+ populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]);
+ populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index);
+ dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
+
+ if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
+ dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true;
+
+ /* apply forced pstate policy */
+ if (dml_ctx->config.pmo.force_pstate_method_enable) {
+ dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy =
+ dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]);
+ }
+
+ plane_count++;
+ }
+ }
+ }
+
+ if (plane_count == 0) {
+ dml_dispcfg->overrides.all_streams_blanked = true;
+ }
+
+ return true;
+}
+
+void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context)
+{
+ /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz;
+ context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz;
+ context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz;
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz;
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported;
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0;
+ context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz;
+ context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz;
+ context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz;
+ context->bw_ctx.bw.dcn.clk.stutter_efficiency.base_efficiency = in_ctx->v21.mode_programming.programming->stutter.base_percent_efficiency;
+ context->bw_ctx.bw.dcn.clk.stutter_efficiency.low_power_efficiency = in_ctx->v21.mode_programming.programming->stutter.low_power_percent_efficiency;
+}
+
+static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index)
+{
+ struct dml2_dchub_watermark_regs *wm_regs = NULL;
+
+ switch (wm_index) {
+ case DML2_DCHUB_WATERMARK_SET_A:
+ wm_regs = &watermarks->dcn4x.a;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_B:
+ wm_regs = &watermarks->dcn4x.b;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_C:
+ wm_regs = &watermarks->dcn4x.c;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_D:
+ wm_regs = &watermarks->dcn4x.d;
+ break;
+ case DML2_DCHUB_WATERMARK_SET_NUM:
+ default:
+ /* invalid wm set index */
+ wm_regs = NULL;
+ }
+
+ return wm_regs;
+}
+
+void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx)
+{
+ const struct dml2_display_cfg_programming *programming = in_ctx->v21.mode_programming.programming;
+
+ unsigned int wm_index;
+
+ /* copy watermark sets from DML */
+ for (wm_index = 0; wm_index < programming->global_regs.num_watermark_sets; wm_index++) {
+ struct dml2_dchub_watermark_regs *wm_regs = wm_set_index_to_dc_wm_set(watermarks, wm_index);
+
+ if (wm_regs)
+ memcpy(wm_regs,
+ &programming->global_regs.wm_regs[wm_index],
+ sizeof(struct dml2_dchub_watermark_regs));
+ }
+}
+
+void dml21_map_hw_resources(struct dml2_context *dml_ctx)
+{
+ unsigned int i = 0;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i];
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = true;
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] = dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i];
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = true;
+ }
+
+}
+
+void dml21_get_pipe_mcache_config(
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct dml2_pipe_configuration_descriptor *mcache_pipe_config)
+{
+ mcache_pipe_config->plane0.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport.x;
+ mcache_pipe_config->plane0.viewport_width = pipe_ctx->plane_res.scl_data.viewport.width;
+
+ mcache_pipe_config->plane1.viewport_x_start = pipe_ctx->plane_res.scl_data.viewport_c.x;
+ mcache_pipe_config->plane1.viewport_width = pipe_ctx->plane_res.scl_data.viewport_c.width;
+
+ mcache_pipe_config->plane1_enabled =
+ dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format);
+}
+
+void dml21_set_dc_p_state_type(
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming,
+ bool sub_vp_enabled)
+{
+ switch (stream_programming->uclk_pstate_method) {
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
+ pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
+ break;
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
+ if (sub_vp_enabled)
+ pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
+ else
+ pipe_ctx->p_state_type = P_STATE_V_BLANK;
+ break;
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
+ pipe_ctx->p_state_type = P_STATE_SUB_VP;
+ break;
+ case dml2_pstate_method_fw_drr:
+ if (sub_vp_enabled)
+ pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
+ else
+ pipe_ctx->p_state_type = P_STATE_FPO;
+ break;
+ default:
+ pipe_ctx->p_state_type = P_STATE_UNKNOWN;
+ break;
+ }
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h
new file mode 100644
index 000000000000..9880d3e0398e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#ifndef _DML21_TRANSLATION_HELPER_H_
+#define _DML21_TRANSLATION_HELPER_H_
+
+struct dc;
+struct dc_state;
+struct dcn_watermarks;
+union dcn_watermark_set;
+struct pipe_ctx;
+struct dc_plane_state;
+
+struct dml2_context;
+struct dml2_configuration_options;
+struct dml2_initialize_instance_in_out;
+
+void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc);
+bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx);
+void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context);
+void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx);
+void dml21_map_hw_resources(struct dml2_context *dml_ctx);
+void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config);
+void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled);
+unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c
new file mode 100644
index 000000000000..ee721606b883
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#include "dml2_internal_shared_types.h"
+#include "dml21_translation_helper.h"
+#include "dml2_internal_types.h"
+#include "dml21_utils.h"
+#include "dml2_dc_resource_mgmt.h"
+
+#include "dml2_core_dcn4_calcs.h"
+
+int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id)
+{
+ int i;
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id)
+ return i;
+ }
+
+ return -1;
+}
+
+int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id)
+{
+ int i;
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id)
+ return i;
+ }
+
+ return -1;
+}
+
+bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id)
+{
+ int i, j;
+
+ if (!plane_id)
+ return false;
+
+ for (i = 0; i < state->stream_count; i++) {
+ for (j = 0; j < state->stream_status[i].plane_count; j++) {
+ if (state->stream_status[i].plane_states[j] == plane) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id)
+{
+ return 0xffff & plane_id;
+}
+
+void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index)
+{
+ unsigned int i = 0;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml_ctx->v21.mode_programming.programming->plane_programming[i].plane_descriptor->stream_index == stream_index) {
+ *dml_pipe_idx = i;
+ return;
+ }
+ }
+}
+
+void find_pipe_regs_idx(const struct dml2_context *dml_ctx,
+ struct pipe_ctx *pipe, unsigned int *pipe_regs_idx)
+{
+ struct pipe_ctx *opp_head = dml_ctx->config.callbacks.get_opp_head(pipe);
+
+ *pipe_regs_idx = dml_ctx->config.callbacks.get_odm_slice_index(opp_head);
+
+ if (pipe->plane_state)
+ *pipe_regs_idx += dml_ctx->config.callbacks.get_mpc_slice_index(pipe);
+}
+
+/* places pipe references into pipes arrays and returns number of pipes */
+int dml21_find_dc_pipes_for_plane(const struct dc *in_dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx,
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ int dml_plane_idx)
+{
+ unsigned int dml_stream_index;
+ unsigned int main_stream_id;
+ unsigned int dc_plane_index;
+ struct dc_stream_state *dc_main_stream;
+ struct dc_stream_status *dc_main_stream_status;
+ struct dc_plane_state *dc_main_plane;
+ struct dc_stream_state *dc_phantom_stream;
+ struct dc_stream_status *dc_phantom_stream_status;
+ struct dc_plane_state *dc_phantom_plane;
+ int num_pipes = 0;
+
+ memset(dc_main_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ memset(dc_phantom_pipes, 0, sizeof(struct pipe_ctx *) * __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+
+ dml_stream_index = dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_idx].plane_descriptor->stream_index;
+ main_stream_id = dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index];
+
+ dc_main_stream = dml_ctx->config.callbacks.get_stream_from_id(context, main_stream_id);
+ dc_main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_main_stream);
+ if (!dc_main_stream_status)
+ return num_pipes;
+
+ /* find main plane based on id */
+ dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_idx]);
+ dc_main_plane = dc_main_stream_status->plane_states[dc_plane_index];
+
+ if (dc_main_plane) {
+ num_pipes = dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_main_plane, &context->res_ctx, dc_main_pipes);
+ } else {
+ /* stream was configured with dummy plane, so get pipes from opp head */
+ struct pipe_ctx *otg_master_pipe = dml_ctx->config.callbacks.get_otg_master_for_stream(&context->res_ctx, dc_main_stream);
+ if (otg_master_pipe != NULL)
+ num_pipes = dml_ctx->config.callbacks.get_opp_heads_for_otg_master(otg_master_pipe, &context->res_ctx, dc_main_pipes);
+ }
+
+ /* if phantom exists, find associated pipes */
+ dc_phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, dc_main_stream);
+ if (dc_phantom_stream && num_pipes > 0) {
+ dc_phantom_stream_status = dml_ctx->config.callbacks.get_stream_status(context, dc_phantom_stream);
+
+ if (dc_phantom_stream_status) {
+ /* phantom plane will have same index as main */
+ dc_phantom_plane = dc_phantom_stream_status->plane_states[dc_plane_index];
+
+ if (dc_phantom_plane) {
+ /* only care about phantom pipes if they contain the phantom plane */
+ dml_ctx->config.callbacks.get_dpp_pipes_for_plane(dc_phantom_plane, &context->res_ctx, dc_phantom_pipes);
+ }
+ }
+ }
+
+ return num_pipes;
+}
+
+void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming)
+{
+ union dml2_global_sync_programming *global_sync = &stream_programming->global_sync;
+
+ if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
+ /* phantom has its own global sync */
+ global_sync = &stream_programming->phantom_stream.global_sync;
+ }
+
+ memcpy(&pipe_ctx->global_sync,
+ global_sync,
+ sizeof(union dml2_global_sync_programming));
+}
+
+void dml21_populate_mall_allocation_size(struct dc_state *context,
+ struct dml2_context *in_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct pipe_ctx *dc_pipe)
+{
+
+ /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */
+ /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */
+ if (dc_pipe->stream && dc_pipe->plane_state &&
+ (dc_pipe->top_pipe == NULL ||
+ dc_pipe->plane_state != dc_pipe->top_pipe->plane_state) &&
+ dc_pipe->prev_odm_pipe == NULL) {
+ /* SS: all active surfaces stored in MALL */
+ if (in_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, dc_pipe) != SUBVP_PHANTOM) {
+ dc_pipe->surface_size_in_mall_bytes = pln_prog->surface_size_mall_bytes;
+ context->bw_ctx.bw.dcn.mall_ss_size_bytes += dc_pipe->surface_size_in_mall_bytes;
+ } else {
+ /* SUBVP: phantom surfaces only stored in MALL */
+ dc_pipe->surface_size_in_mall_bytes = pln_prog->svp_size_mall_bytes;
+ context->bw_ctx.bw.dcn.mall_subvp_size_bytes += dc_pipe->surface_size_in_mall_bytes;
+ }
+ }
+}
+
+bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
+{
+ /* If this assert is hit then we have a link encoder dynamic management issue */
+ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
+ return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
+ pipe_ctx->link_res.hpo_dp_link_enc &&
+ dc_is_dp_signal(pipe_ctx->stream->signal));
+}
+
+
+static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) &&
+ dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog,
+ struct dml2_per_stream_programming *stream_prog)
+{
+ unsigned int pipe_reg_index = 0;
+
+ dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog);
+ find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index);
+
+ if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
+ memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set));
+ pipe_ctx->unbounded_req = false;
+ pipe_ctx->det_buffer_size_kb = 0;
+ } else {
+ memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set));
+ pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled;
+ pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64;
+ }
+
+ pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz;
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz;
+
+ dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx);
+
+ bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context);
+
+ dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled);
+}
+
+static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx,
+ const struct dc *dc,
+ struct dc_state *context,
+ struct dc_stream_state *main_stream,
+ struct dml2_per_stream_programming *stream_programming)
+{
+ struct dc_stream_state *phantom_stream;
+ struct dml2_stream_parameters *phantom_stream_descriptor = &stream_programming->phantom_stream.descriptor;
+
+ phantom_stream = dml_ctx->config.svp_pstate.callbacks.create_phantom_stream(dc, context, main_stream);
+ if (!phantom_stream)
+ return NULL;
+
+ /* copy details of phantom stream from main */
+ memcpy(&phantom_stream->timing, &main_stream->timing, sizeof(phantom_stream->timing));
+ memcpy(&phantom_stream->src, &main_stream->src, sizeof(phantom_stream->src));
+ memcpy(&phantom_stream->dst, &main_stream->dst, sizeof(phantom_stream->dst));
+
+ /* modify timing for phantom */
+ phantom_stream->timing.v_front_porch = phantom_stream_descriptor->timing.v_front_porch;
+ phantom_stream->timing.v_addressable = phantom_stream_descriptor->timing.v_active;
+ phantom_stream->timing.v_total = phantom_stream_descriptor->timing.v_total;
+ phantom_stream->timing.flags.DSC = 0; // phantom always has DSC disabled
+
+ phantom_stream->dst.y = 0;
+ phantom_stream->dst.height = stream_programming->phantom_stream.descriptor.timing.v_active;
+
+ phantom_stream->src.y = 0;
+ phantom_stream->src.height = (double)phantom_stream_descriptor->timing.v_active * (double)main_stream->src.height / (double)main_stream->dst.height;
+
+ phantom_stream->use_dynamic_meta = false;
+
+ dml_ctx->config.svp_pstate.callbacks.add_phantom_stream(dc, context, phantom_stream, main_stream);
+
+ return phantom_stream;
+}
+
+static struct dc_plane_state *dml21_add_phantom_plane(struct dml2_context *dml_ctx,
+ const struct dc *dc,
+ struct dc_state *context,
+ struct dc_stream_state *phantom_stream,
+ struct dc_plane_state *main_plane,
+ struct dml2_per_plane_programming *plane_programming)
+{
+ struct dc_plane_state *phantom_plane;
+
+ phantom_plane = dml_ctx->config.svp_pstate.callbacks.create_phantom_plane(dc, context, main_plane);
+ if (!phantom_plane)
+ return NULL;
+
+ phantom_plane->format = main_plane->format;
+ phantom_plane->rotation = main_plane->rotation;
+ phantom_plane->visible = main_plane->visible;
+
+ memcpy(&phantom_plane->address, &main_plane->address, sizeof(phantom_plane->address));
+ memcpy(&phantom_plane->scaling_quality, &main_plane->scaling_quality,
+ sizeof(phantom_plane->scaling_quality));
+ memcpy(&phantom_plane->src_rect, &main_plane->src_rect, sizeof(phantom_plane->src_rect));
+ memcpy(&phantom_plane->dst_rect, &main_plane->dst_rect, sizeof(phantom_plane->dst_rect));
+ memcpy(&phantom_plane->clip_rect, &main_plane->clip_rect, sizeof(phantom_plane->clip_rect));
+ memcpy(&phantom_plane->plane_size, &main_plane->plane_size,
+ sizeof(phantom_plane->plane_size));
+ memcpy(&phantom_plane->tiling_info, &main_plane->tiling_info,
+ sizeof(phantom_plane->tiling_info));
+ memcpy(&phantom_plane->dcc, &main_plane->dcc, sizeof(phantom_plane->dcc));
+
+ phantom_plane->format = main_plane->format;
+ phantom_plane->rotation = main_plane->rotation;
+ phantom_plane->visible = main_plane->visible;
+
+ /* Shadow pipe has small viewport. */
+ phantom_plane->clip_rect.y = 0;
+ phantom_plane->clip_rect.height = phantom_stream->src.height;
+
+ dml_ctx->config.svp_pstate.callbacks.add_phantom_plane(dc, phantom_stream, phantom_plane, context);
+
+ return phantom_plane;
+}
+
+void dml21_handle_phantom_streams_planes(const struct dc *dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ unsigned int dml_stream_index, dml_plane_index, dc_plane_index;
+ struct dc_stream_state *main_stream;
+ struct dc_stream_status *main_stream_status;
+ struct dc_stream_state *phantom_stream;
+ struct dc_plane_state *main_plane;
+ bool phantoms_added = false;
+
+ /* create phantom streams and planes and add to context */
+ for (dml_stream_index = 0; dml_stream_index < dml_ctx->v21.mode_programming.programming->display_config.num_streams; dml_stream_index++) {
+ /* iterate through DML streams looking for phantoms */
+ if (dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index].phantom_stream.enabled) {
+ /* find associated dc stream */
+ main_stream = dml_ctx->config.callbacks.get_stream_from_id(context,
+ dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_stream_index]);
+
+ main_stream_status = dml_ctx->config.callbacks.get_stream_status(context, main_stream);
+
+ if (!main_stream_status || main_stream_status->plane_count == 0)
+ continue;
+
+ /* create phantom stream for subvp enabled stream */
+ phantom_stream = dml21_add_phantom_stream(dml_ctx,
+ dc,
+ context,
+ main_stream,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_index]);
+
+ if (!phantom_stream)
+ continue;
+
+ /* iterate through DML planes associated with this stream */
+ for (dml_plane_index = 0; dml_plane_index < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_plane_index++) {
+ if (dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index].plane_descriptor->stream_index == dml_stream_index) {
+ /* find associated dc plane */
+ dc_plane_index = dml21_get_dc_plane_idx_from_plane_id(dml_ctx->v21.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[dml_plane_index]);
+ main_plane = main_stream_status->plane_states[dc_plane_index];
+
+ /* create phantom planes for subvp enabled plane */
+ dml21_add_phantom_plane(dml_ctx,
+ dc,
+ context,
+ phantom_stream,
+ main_plane,
+ &dml_ctx->v21.mode_programming.programming->plane_programming[dml_plane_index]);
+
+ phantoms_added = true;
+ }
+ }
+ }
+ }
+
+ if (phantoms_added)
+ dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, dc->current_state);
+}
+
+void dml21_build_fams2_programming(const struct dc *dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx)
+{
+ int i, j, k;
+ unsigned int num_fams2_streams = 0;
+
+ /* reset fams2 data */
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2, 0, sizeof(union dmub_fams2_stream_static_sub_state_v2) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config));
+
+ if (dml_ctx->v21.mode_programming.programming->fams2_required) {
+ for (i = 0; i < context->stream_count; i++) {
+ int dml_stream_idx;
+ struct dc_stream_state *phantom_stream;
+ struct dc_stream_status *phantom_status;
+ enum fams2_stream_type type = 0;
+
+ union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams];
+ union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams];
+
+ struct dc_stream_state *stream = context->streams[i];
+
+ if (context->stream_status[i].plane_count == 0 ||
+ dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) {
+ /* can ignore blanked or phantom streams */
+ continue;
+ }
+
+ dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id);
+ if (dml_stream_idx < 0) {
+ ASSERT(dml_stream_idx >= 0);
+ continue;
+ }
+
+ /* copy static state from PMO */
+ memcpy(static_base_state,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params,
+ sizeof(union dmub_cmd_fams2_config));
+
+ if (dc->debug.fams_version.major == 3) {
+ memcpy(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[num_fams2_streams],
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params_v2,
+ sizeof(union dmub_fams2_stream_static_sub_state_v2));
+ } else {
+ memcpy(static_sub_state,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params,
+ sizeof(union dmub_cmd_fams2_config));
+ }
+
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ type = static_base_state->stream_v1.base.type;
+
+ /* get information from context */
+ static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count;
+ static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst;
+
+ /* populate pipe masks for planes */
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ for (k = 0; k < dc->res_pool->pipe_count; k++) {
+ if (context->res_ctx.pipe_ctx[k].stream &&
+ context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id &&
+ context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) {
+ static_base_state->stream_v1.base.pipe_mask |= (1 << k);
+ static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k);
+ }
+ }
+ }
+ }
+
+
+ /* get per method programming */
+ switch (type) {
+ case FAMS2_STREAM_TYPE_VBLANK:
+ case FAMS2_STREAM_TYPE_VACTIVE:
+ case FAMS2_STREAM_TYPE_DRR:
+ break;
+ case FAMS2_STREAM_TYPE_SUBVP:
+ phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream);
+ if (!phantom_stream)
+ break;
+
+ phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream);
+
+ /* phantom status should always be present */
+ ASSERT(phantom_status);
+ if (!phantom_status)
+ break;
+
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst;
+
+ /* populate pipe masks for phantom planes */
+ for (j = 0; j < phantom_status->plane_count; j++) {
+ for (k = 0; k < dc->res_pool->pipe_count; k++) {
+ if (context->res_ctx.pipe_ctx[k].stream &&
+ context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id &&
+ context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) {
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k);
+ static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k);
+ }
+ }
+ }
+ }
+ }
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+
+ num_fams2_streams++;
+ }
+ }
+
+ if (num_fams2_streams > 0) {
+ /* copy FAMS2 configuration */
+ memcpy(&context->bw_ctx.bw.dcn.fams2_global_config,
+ &dml_ctx->v21.mode_programming.programming->fams2_global_config,
+ sizeof(struct dmub_cmd_fams2_global_config));
+
+ context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams;
+ }
+
+ context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable;
+}
+
+bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format)
+{
+ return source_format >= dml2_420_8 && source_format <= dml2_rgbe_alpha;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h
new file mode 100644
index 000000000000..4bff52eaaef8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#ifndef _DML21_UTILS_H_
+#define _DML21_UTILS_H_
+
+struct dc_state;
+struct dc_plane_state;
+struct pipe_ctx;
+
+struct dml2_context;
+struct dml2_display_rq_regs;
+struct dml2_display_dlg_regs;
+struct dml2_display_ttu_regs;
+
+int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id);
+int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id);
+bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id);
+void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming);
+void dml21_populate_mall_allocation_size(struct dc_state *context,
+ struct dml2_context *in_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct pipe_ctx *dc_pipe);
+bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx);
+void find_valid_pipe_idx_for_stream_index(const struct dml2_context *dml_ctx, unsigned int *dml_pipe_idx, unsigned int stream_index);
+void find_pipe_regs_idx(const struct dml2_context *dml_ctx,
+ struct pipe_ctx *pipe, unsigned int *pipe_regs_idx);
+int dml21_find_dc_pipes_for_plane(const struct dc *in_dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx,
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__],
+ int dml_plane_idx);
+void dml21_program_dc_pipe(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_plane_programming *pln_prog,
+ struct dml2_per_stream_programming *stream_prog);
+void dml21_handle_phantom_streams_planes(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx);
+unsigned int dml21_get_dc_plane_idx_from_plane_id(unsigned int plane_id);
+void dml21_build_fams2_programming(const struct dc *dc,
+ struct dc_state *context,
+ struct dml2_context *dml_ctx);
+bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
new file mode 100644
index 000000000000..08f7f03b1023
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_internal_types.h"
+#include "dml_top.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_internal_shared_types.h"
+#include "dml21_utils.h"
+#include "dml21_translation_helper.h"
+#include "dml2_dc_resource_mgmt.h"
+
+#define INVALID -1
+
+static bool dml21_allocate_memory(struct dml2_context **dml_ctx)
+{
+ *dml_ctx = vzalloc(sizeof(struct dml2_context));
+ if (!(*dml_ctx))
+ return false;
+
+ (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance));
+ if (!((*dml_ctx)->v21.dml_init.dml2_instance))
+ return false;
+
+ (*dml_ctx)->v21.mode_support.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance;
+ (*dml_ctx)->v21.mode_programming.dml2_instance = (*dml_ctx)->v21.dml_init.dml2_instance;
+
+ (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config;
+ (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config;
+
+ (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming));
+ if (!((*dml_ctx)->v21.mode_programming.programming))
+ return false;
+
+ return true;
+}
+
+static void dml21_populate_configuration_options(const struct dc *in_dc,
+ struct dml2_context *dml_ctx,
+ const struct dml2_configuration_options *config)
+{
+ dml_ctx->config = *config;
+
+ /* UCLK P-State options */
+ if (in_dc->debug.dml21_force_pstate_method) {
+ dml_ctx->config.pmo.force_pstate_method_enable = true;
+ for (int i = 0; i < MAX_PIPES; i++)
+ dml_ctx->config.pmo.force_pstate_method_values[i] = in_dc->debug.dml21_force_pstate_method_values[i];
+ } else {
+ dml_ctx->config.pmo.force_pstate_method_enable = false;
+ }
+}
+
+static void dml21_init(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config)
+{
+
+ dml_ctx->architecture = dml2_architecture_21;
+
+ dml21_populate_configuration_options(in_dc, dml_ctx, config);
+
+ DC_FP_START();
+
+ dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, &dml_ctx->config, in_dc);
+
+ dml2_initialize_instance(&dml_ctx->v21.dml_init);
+
+ DC_FP_END();
+}
+
+bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config)
+{
+ /* Allocate memory for initializing DML21 instance */
+ if (!dml21_allocate_memory(dml_ctx))
+ return false;
+
+ dml21_init(in_dc, *dml_ctx, config);
+
+ return true;
+}
+
+void dml21_destroy(struct dml2_context *dml2)
+{
+ vfree(dml2->v21.dml_init.dml2_instance);
+ vfree(dml2->v21.mode_programming.programming);
+}
+
+static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state,
+ struct dml2_context *in_ctx, unsigned int pipe_cnt)
+{
+ unsigned int dml_prog_idx = 0, dc_pipe_index = 0, num_dpps_required = 0;
+ struct dml2_per_plane_programming *pln_prog = NULL;
+ struct dml2_per_stream_programming *stream_prog = NULL;
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0};
+ int num_pipes;
+ unsigned int dml_phantom_prog_idx;
+
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+
+ /* copy global DCHUBBUB arbiter registers */
+ memcpy(&context->bw_ctx.bw.dcn.arb_regs, &in_ctx->v21.mode_programming.programming->global_regs.arb_regs, sizeof(struct dml2_display_arb_regs));
+
+ /* legacy only */
+ context->bw_ctx.bw.dcn.compbuf_size_kb = (int)in_ctx->v21.mode_programming.programming->global_regs.arb_regs.compbuf_size * 64;
+
+ context->bw_ctx.bw.dcn.mall_ss_size_bytes = 0;
+ context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0;
+ context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0;
+
+ /* phantom's start after main planes */
+ dml_phantom_prog_idx = in_ctx->v21.mode_programming.programming->display_config.num_planes;
+
+ for (dml_prog_idx = 0; dml_prog_idx < DML2_MAX_PLANES; dml_prog_idx++) {
+ pln_prog = &in_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+
+ if (!pln_prog->plane_descriptor)
+ continue;
+
+ stream_prog = &in_ctx->v21.mode_programming.programming->stream_programming[pln_prog->plane_descriptor->stream_index];
+ num_dpps_required = pln_prog->num_dpps_required;
+
+ if (num_dpps_required == 0) {
+ continue;
+ }
+ num_pipes = dml21_find_dc_pipes_for_plane(dc, context, in_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
+
+ if (num_pipes <= 0)
+ continue;
+
+ /* program each pipe */
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ dml21_program_dc_pipe(in_ctx, context, dc_main_pipes[dc_pipe_index], pln_prog, stream_prog);
+
+ if (pln_prog->phantom_plane.valid && dc_phantom_pipes[dc_pipe_index]) {
+ dml21_program_dc_pipe(in_ctx, context, dc_phantom_pipes[dc_pipe_index], pln_prog, stream_prog);
+ }
+ }
+
+ /* copy per plane mcache allocation */
+ memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation));
+ if (pln_prog->phantom_plane.valid) {
+ memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx],
+ &pln_prog->phantom_plane.mcache_allocation,
+ sizeof(struct dml2_mcache_surface_allocation));
+
+ dml_phantom_prog_idx++;
+ }
+ }
+
+ /* assign global clocks */
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+ if (in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values > 1) {
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz =
+ in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.num_clk_values] * 1000;
+ } else {
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dispclk.clk_values_khz[0] * 1000;
+ }
+
+ if (in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values > 1) {
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz =
+ in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.num_clk_values] * 1000;
+ } else {
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v21.dml_init.soc_bb.clk_table.dppclk.clk_values_khz[0] * 1000;
+ }
+
+ /* get global mall allocation */
+ if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) {
+ context->bw_ctx.bw.dcn.clk.num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes);
+ } else {
+ context->bw_ctx.bw.dcn.clk.num_ways = 0;
+ }
+}
+
+static void dml21_prepare_mcache_params(struct dml2_context *dml_ctx, struct dc_state *context, struct dc_mcache_params *mcache_params)
+{
+ int dc_plane_idx = 0;
+ int dml_prog_idx, stream_idx, plane_idx;
+ struct dml2_per_plane_programming *pln_prog = NULL;
+
+ for (stream_idx = 0; stream_idx < context->stream_count; stream_idx++) {
+ for (plane_idx = 0; plane_idx < context->stream_status[stream_idx].plane_count; plane_idx++) {
+ dml_prog_idx = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_idx]->stream_id, context->stream_status[stream_idx].plane_states[plane_idx], context);
+ if (dml_prog_idx == INVALID) {
+ continue;
+ }
+ pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+ mcache_params[dc_plane_idx].valid = pln_prog->mcache_allocation.valid;
+ mcache_params[dc_plane_idx].num_mcaches_plane0 = pln_prog->mcache_allocation.num_mcaches_plane0;
+ mcache_params[dc_plane_idx].num_mcaches_plane1 = pln_prog->mcache_allocation.num_mcaches_plane1;
+ mcache_params[dc_plane_idx].requires_dedicated_mall_mcache = pln_prog->mcache_allocation.requires_dedicated_mall_mcache;
+ mcache_params[dc_plane_idx].last_slice_sharing.plane0_plane1 = pln_prog->mcache_allocation.last_slice_sharing.plane0_plane1;
+ memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane0,
+ pln_prog->mcache_allocation.mcache_x_offsets_plane0,
+ sizeof(int) * (DML2_MAX_MCACHES + 1));
+ memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane1,
+ pln_prog->mcache_allocation.mcache_x_offsets_plane1,
+ sizeof(int) * (DML2_MAX_MCACHES + 1));
+ dc_plane_idx++;
+ }
+ }
+}
+
+static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ bool result = false;
+ struct dml2_build_mode_programming_in_out *mode_programming = &dml_ctx->v21.mode_programming;
+ struct dc_mcache_params mcache_params[MAX_PLANES] = {0};
+
+ memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg));
+ memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+ memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params, 0, sizeof(struct dml2_core_mode_programming_in_out));
+
+ if (!context)
+ return true;
+
+ if (context->stream_count == 0) {
+ dml21_build_fams2_programming(in_dc, context, dml_ctx);
+ return true;
+ }
+
+ /* scrub phantom's from current dc_state */
+ dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context);
+ dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context);
+
+ /* Populate stream, plane mappings and other fields in display config. */
+ DC_FP_START();
+ result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx);
+ DC_FP_END();
+ if (!result)
+ return false;
+
+ DC_FP_START();
+ result = dml2_build_mode_programming(mode_programming);
+ DC_FP_END();
+ if (!result)
+ return false;
+
+ /* Check and map HW resources */
+ if (result && !dml_ctx->config.skip_hw_state_mapping) {
+ dml21_map_hw_resources(dml_ctx);
+ dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, in_dc->current_state);
+ /* if subvp phantoms are present, expand them into dc context */
+ dml21_handle_phantom_streams_planes(in_dc, context, dml_ctx);
+
+ if (in_dc->res_pool->funcs->program_mcache_pipe_config) {
+ //Prepare mcache params for each plane based on mcache output from DML
+ dml21_prepare_mcache_params(dml_ctx, context, mcache_params);
+
+ //populate mcache regs to each pipe
+ dml_ctx->config.callbacks.allocate_mcache(context, mcache_params);
+ }
+ }
+
+ /* Copy DML CLK, WM and REG outputs to bandwidth context */
+ if (result && !dml_ctx->config.skip_hw_state_mapping) {
+ dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count);
+ dml21_copy_clocks_to_dc_state(dml_ctx, context);
+ dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx);
+ dml21_build_fams2_programming(in_dc, context, dml_ctx);
+ }
+
+ return true;
+}
+
+static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ bool is_supported = false;
+ struct dml2_initialize_instance_in_out *dml_init = &dml_ctx->v21.dml_init;
+ struct dml2_check_mode_supported_in_out *mode_support = &dml_ctx->v21.mode_support;
+
+ memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg));
+ memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+ memset(&dml_ctx->v21.mode_programming.dml2_instance->scratch.check_mode_supported_locals.mode_support_params, 0, sizeof(struct dml2_core_mode_support_in_out));
+
+ if (!context || context->stream_count == 0)
+ return true;
+
+ /* Scrub phantom's from current dc_state */
+ dml_ctx->config.svp_pstate.callbacks.remove_phantom_streams_and_planes(in_dc, context);
+ dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context);
+
+ mode_support->dml2_instance = dml_init->dml2_instance;
+ DC_FP_START();
+ dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx);
+ DC_FP_END();
+ dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming;
+ DC_FP_START();
+ is_supported = dml2_check_mode_supported(mode_support);
+ DC_FP_END();
+ if (!is_supported)
+ return false;
+
+ return true;
+}
+
+bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx,
+ enum dc_validate_mode validate_mode)
+{
+ bool out = false;
+
+ /* Use dml21_check_mode_support for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ out = dml21_check_mode_support(in_dc, context, dml_ctx);
+ else
+ out = dml21_mode_check_and_programming(in_dc, context, dml_ctx);
+
+ return out;
+}
+
+void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx)
+{
+ unsigned int dml_prog_idx, dml_phantom_prog_idx, dc_pipe_index;
+ int num_pipes;
+ struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0};
+
+ struct dml2_per_plane_programming *pln_prog = NULL;
+ struct dml2_plane_mcache_configuration_descriptor *mcache_config = NULL;
+ struct prepare_mcache_programming_locals *l = &dml_ctx->v21.scratch.prepare_mcache_locals;
+
+ if (context->stream_count == 0) {
+ return;
+ }
+
+ memset(&l->build_mcache_programming_params, 0, sizeof(struct dml2_build_mcache_programming_in_out));
+ l->build_mcache_programming_params.dml2_instance = dml_ctx->v21.dml_init.dml2_instance;
+
+ /* phantom's start after main planes */
+ dml_phantom_prog_idx = dml_ctx->v21.mode_programming.programming->display_config.num_planes;
+
+ /* Build mcache programming parameters per plane per pipe */
+ for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) {
+ pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+
+ mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_prog_idx];
+ memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor));
+ mcache_config->plane_descriptor = pln_prog->plane_descriptor;
+ mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx];
+ mcache_config->num_pipes = pln_prog->num_dpps_required;
+ l->build_mcache_programming_params.num_configurations++;
+
+ if (pln_prog->num_dpps_required == 0) {
+ continue;
+ }
+
+ num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
+ if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL ||
+ dc_main_pipes[0]->plane_state == NULL)
+ continue;
+
+ /* get config for each pipe */
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_main_pipes[dc_pipe_index]);
+ dml21_get_pipe_mcache_config(context, dc_main_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]);
+ }
+
+ /* get config for each phantom pipe */
+ if (pln_prog->phantom_plane.valid &&
+ dc_phantom_pipes[0] &&
+ dc_main_pipes[0]->stream &&
+ dc_phantom_pipes[0]->plane_state) {
+ mcache_config = &l->build_mcache_programming_params.mcache_configurations[dml_phantom_prog_idx];
+ memset(mcache_config, 0, sizeof(struct dml2_plane_mcache_configuration_descriptor));
+ mcache_config->plane_descriptor = pln_prog->plane_descriptor;
+ mcache_config->mcache_allocation = &context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx];
+ mcache_config->num_pipes = pln_prog->num_dpps_required;
+ l->build_mcache_programming_params.num_configurations++;
+
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_phantom_pipes[dc_pipe_index]);
+ dml21_get_pipe_mcache_config(context, dc_phantom_pipes[dc_pipe_index], pln_prog, &mcache_config->pipe_configurations[dc_pipe_index]);
+ }
+
+ /* increment phantom index */
+ dml_phantom_prog_idx++;
+ }
+ }
+
+ /* Call to generate mcache programming per plane per pipe for the given display configuration */
+ dml2_build_mcache_programming(&l->build_mcache_programming_params);
+
+ /* get per plane per pipe mcache programming */
+ for (dml_prog_idx = 0; dml_prog_idx < dml_ctx->v21.mode_programming.programming->display_config.num_planes; dml_prog_idx++) {
+ pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx];
+
+ num_pipes = dml21_find_dc_pipes_for_plane(in_dc, context, dml_ctx, dc_main_pipes, dc_phantom_pipes, dml_prog_idx);
+ if (num_pipes <= 0 || dc_main_pipes[0]->stream == NULL ||
+ dc_main_pipes[0]->plane_state == NULL)
+ continue;
+
+ /* get config for each pipe */
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_main_pipes[dc_pipe_index]);
+ if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index]) {
+ memcpy(&dc_main_pipes[dc_pipe_index]->mcache_regs,
+ l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_prog_idx][dc_pipe_index],
+ sizeof(struct dml2_hubp_pipe_mcache_regs));
+ }
+ }
+
+ /* get config for each phantom pipe */
+ if (pln_prog->phantom_plane.valid &&
+ dc_phantom_pipes[0] &&
+ dc_main_pipes[0]->stream &&
+ dc_phantom_pipes[0]->plane_state) {
+ for (dc_pipe_index = 0; dc_pipe_index < num_pipes; dc_pipe_index++) {
+ ASSERT(dc_phantom_pipes[dc_pipe_index]);
+ if (l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index]) {
+ memcpy(&dc_phantom_pipes[dc_pipe_index]->mcache_regs,
+ l->build_mcache_programming_params.per_plane_pipe_mcache_regs[dml_phantom_prog_idx][dc_pipe_index],
+ sizeof(struct dml2_hubp_pipe_mcache_regs));
+ }
+ }
+ /* increment phantom index */
+ dml_phantom_prog_idx++;
+ }
+ }
+}
+
+void dml21_copy(struct dml2_context *dst_dml_ctx,
+ struct dml2_context *src_dml_ctx)
+{
+ /* Preserve references to internals */
+ struct dml2_instance *dst_dml2_instance = dst_dml_ctx->v21.dml_init.dml2_instance;
+ struct dml2_display_cfg_programming *dst_dml2_programming = dst_dml_ctx->v21.mode_programming.programming;
+
+ /* Copy context */
+ memcpy(dst_dml_ctx, src_dml_ctx, sizeof(struct dml2_context));
+
+ /* Copy Internals */
+ memcpy(dst_dml2_instance, src_dml_ctx->v21.dml_init.dml2_instance, sizeof(struct dml2_instance));
+ memcpy(dst_dml2_programming, src_dml_ctx->v21.mode_programming.programming, sizeof(struct dml2_display_cfg_programming));
+
+ /* Restore references to internals */
+ dst_dml_ctx->v21.dml_init.dml2_instance = dst_dml2_instance;
+
+ dst_dml_ctx->v21.mode_support.dml2_instance = dst_dml2_instance;
+ dst_dml_ctx->v21.mode_programming.dml2_instance = dst_dml2_instance;
+
+ dst_dml_ctx->v21.mode_support.display_config = &dst_dml_ctx->v21.display_config;
+ dst_dml_ctx->v21.mode_programming.display_config = dst_dml_ctx->v21.mode_support.display_config;
+
+ dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming;
+
+ DC_FP_START();
+
+ /* need to initialize copied instance for internal references to be correct */
+ dml2_initialize_instance(&dst_dml_ctx->v21.dml_init);
+
+ DC_FP_END();
+}
+
+bool dml21_create_copy(struct dml2_context **dst_dml_ctx,
+ struct dml2_context *src_dml_ctx)
+{
+ /* Allocate memory for initializing DML21 instance */
+ if (!dml21_allocate_memory(dst_dml_ctx))
+ return false;
+
+ dml21_copy(*dst_dml_ctx, src_dml_ctx);
+
+ return true;
+}
+
+void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config)
+{
+ dml21_init(in_dc, dml_ctx, config);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h
new file mode 100644
index 000000000000..15f92029d2e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#ifndef _DML21_WRAPPER_H_
+#define _DML21_WRAPPER_H_
+
+#include "os_types.h"
+#include "dml_top_soc_parameter_types.h"
+#include "dml_top_display_cfg_types.h"
+
+struct dc;
+struct dc_state;
+struct dml2_configuration_options;
+struct dml2_context;
+enum dc_validate_mode;
+
+/**
+ * dml2_create - Creates dml21_context.
+ * @in_dc: dc.
+ * @dml2: Created dml21 context.
+ * @config: dml21 configuration options.
+ *
+ * Create of DML21 is done as part of dc_state creation.
+ * DML21 IP, SOC and STATES are initialized at
+ * creation time.
+ *
+ * Return: True if dml2 is successfully created, false otherwise.
+ */
+bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config);
+void dml21_destroy(struct dml2_context *dml2);
+void dml21_copy(struct dml2_context *dst_dml_ctx,
+ struct dml2_context *src_dml_ctx);
+bool dml21_create_copy(struct dml2_context **dst_dml_ctx,
+ struct dml2_context *src_dml_ctx);
+void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config);
+
+/**
+ * dml21_validate - Determines if a display configuration is supported or not.
+ * @in_dc: dc.
+ * @context: dc_state to be validated.
+ * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX
+ * will not populate context.res_ctx.
+ *
+ * Based on fast_validate option internally would call:
+ *
+ * -dml21_mode_check_and_programming - for DC_VALIDATE_MODE_AND_PROGRAMMING option
+ * Calculates if dc_state can be supported on the input display
+ * configuration. If supported, generates the necessary HW
+ * programming for the new dc_state.
+ *
+ * -dml21_check_mode_support - for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX option
+ * Calculates if dc_state can be supported for the input display
+ * config.
+
+ * Context: Two threads may not invoke this function concurrently unless they reference
+ * separate dc_states for validation.
+ * Return: True if mode is supported, false otherwise.
+ */
+bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx,
+ enum dc_validate_mode validate_mode);
+
+/* Prepare hubp mcache_regs for hubp mcache ID and split coordinate programming */
+void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx);
+
+/* Structure for inputting external SOCBB and DCNIP values for tool based debugging. */
+struct socbb_ip_params_external {
+ struct dml2_ip_capabilities ip_params;
+ struct dml2_soc_bb soc_bb;
+};
+
+/*mcache parameters decided by dml*/
+struct dc_mcache_params {
+ bool valid;
+ /*
+ * For iMALL, dedicated mall mcaches are required (sharing of last
+ * slice possible), for legacy phantom or phantom without return
+ * the only mall mcaches need to be valid.
+ */
+ bool requires_dedicated_mall_mcache;
+ unsigned int num_mcaches_plane0;
+ unsigned int num_mcaches_plane1;
+ /*
+ * Generally, plane0/1 slices must use a disjoint set of caches
+ * but in some cases the final segement of the two planes can
+ * use the same cache. If plane0_plane1 is set, then this is
+ * allowed.
+ *
+ * Similarly, the caches allocated to MALL prefetcher are generally
+ * disjoint, but if mall_prefetch is set, then the final segment
+ * between the main and the mall pixel requestor can use the same
+ * cache.
+ *
+ * Note that both bits may be set at the same time.
+ */
+ struct {
+ bool mall_comb_mcache_p0;
+ bool mall_comb_mcache_p1;
+ bool plane0_plane1;
+ } last_slice_sharing;
+ /*
+ * A plane is divided into vertical slices of mcaches,
+ * which wrap on the surface width.
+ *
+ * For example, if the surface width is 7680, and split into
+ * three slices of equal width, the boundary array would contain
+ * [2560, 5120, 7680]
+ *
+ * The assignments are
+ * 0 = [0 .. 2559]
+ * 1 = [2560 .. 5119]
+ * 2 = [5120 .. 7679]
+ * 0 = [7680 .. INF]
+ * The final element implicitly is the same as the first, and
+ * at first seems invalid since it is never referenced (since)
+ * it is outside the surface. However, its useful when shifting
+ * (see below).
+ *
+ * For any given valid mcache assignment, a shifted version, wrapped
+ * on the surface width boundary is also assumed to be valid.
+ *
+ * For example, shifting [2560, 5120, 7680] by -50 results in
+ * [2510, 5170, 7630].
+ *
+ * The assignments are now:
+ * 0 = [0 .. 2509]
+ * 1 = [2510 .. 5169]
+ * 2 = [5170 .. 7629]
+ * 0 = [7630 .. INF]
+ */
+ int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1];
+ int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1];
+};
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h
new file mode 100644
index 000000000000..793e1c038efd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#ifndef __DML_DML_DCN4_SOC_BB__
+#define __DML_DML_DCN4_SOC_BB__
+
+#include "dml_top_soc_parameter_types.h"
+
+static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = {
+ .derate_table = {
+ .system_active_urgent = {
+ .dram_derate_percent_pixel = 22,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 76,
+ .dcfclk_derate_percent = 100,
+ },
+ .system_active_average = {
+ .dram_derate_percent_pixel = 17,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 57,
+ .dcfclk_derate_percent = 75,
+ },
+ .dcn_mall_prefetch_urgent = {
+ .dram_derate_percent_pixel = 40,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ .dcn_mall_prefetch_average = {
+ .dram_derate_percent_pixel = 33,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 62,
+ .dcfclk_derate_percent = 83,
+ },
+ .system_idle_average = {
+ .dram_derate_percent_pixel = 70,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ },
+ .writeback = {
+ .base_latency_us = 12,
+ .scaling_factor_us = 0,
+ .scaling_factor_mhz = 0,
+ },
+ .qos_params = {
+ .dcn4x = {
+ .df_qos_response_time_fclk_cycles = 300,
+ .max_round_trip_to_furthest_cs_fclk_cycles = 350,
+ .mall_overhead_fclk_cycles = 50,
+ .meta_trip_adder_fclk_cycles = 36,
+ .average_transport_distance_fclk_cycles = 257,
+ .umc_urgent_ramp_latency_margin = 50,
+ .umc_max_latency_margin = 30,
+ .umc_average_latency_margin = 20,
+ .fabric_max_transport_latency_margin = 20,
+ .fabric_average_transport_latency_margin = 10,
+
+ .per_uclk_dpm_params = {
+ {
+ .minimum_uclk_khz = 97 * 1000,
+ .urgent_ramp_uclk_cycles = 472,
+ .trip_to_memory_uclk_cycles = 827,
+ .meta_trip_to_memory_uclk_cycles = 827,
+ .maximum_latency_when_urgent_uclk_cycles = 72,
+ .average_latency_when_urgent_uclk_cycles = 61,
+ .maximum_latency_when_non_urgent_uclk_cycles = 827,
+ .average_latency_when_non_urgent_uclk_cycles = 118,
+ },
+ },
+ },
+ },
+ .qos_type = dml2_qos_param_type_dcn4x,
+};
+
+static const struct dml2_soc_bb dml2_socbb_dcn401 = {
+ .clk_table = {
+ .uclk = {
+ .clk_values_khz = {97000},
+ .num_clk_values = 1,
+ },
+ .fclk = {
+ .clk_values_khz = {300000, 2500000},
+ .num_clk_values = 2,
+ },
+ .dcfclk = {
+ .clk_values_khz = {200000, 1564000},
+ .num_clk_values = 2,
+ },
+ .dispclk = {
+ .clk_values_khz = {100000, 2000000},
+ .num_clk_values = 2,
+ },
+ .dppclk = {
+ .clk_values_khz = {100000, 2000000},
+ .num_clk_values = 2,
+ },
+ .dtbclk = {
+ .clk_values_khz = {100000, 1564000},
+ .num_clk_values = 2,
+ },
+ .phyclk = {
+ .clk_values_khz = {810000, 810000},
+ .num_clk_values = 2,
+ },
+ .socclk = {
+ .clk_values_khz = {300000, 1200000},
+ .num_clk_values = 2,
+ },
+ .dscclk = {
+ .clk_values_khz = {666667, 666667},
+ .num_clk_values = 2,
+ },
+ .phyclk_d18 = {
+ .clk_values_khz = {625000, 625000},
+ .num_clk_values = 2,
+ },
+ .phyclk_d32 = {
+ .clk_values_khz = {625000, 625000},
+ .num_clk_values = 2,
+ },
+ .dram_config = {
+ .channel_width_bytes = 2,
+ .channel_count = 16,
+ .transactions_per_clock = 16,
+ },
+ },
+
+ .qos_parameters = {
+ .derate_table = {
+ .system_active_urgent = {
+ .dram_derate_percent_pixel = 22,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 76,
+ .dcfclk_derate_percent = 100,
+ },
+ .system_active_average = {
+ .dram_derate_percent_pixel = 15,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 57,
+ .dcfclk_derate_percent = 75,
+ },
+ .dcn_mall_prefetch_urgent = {
+ .dram_derate_percent_pixel = 40,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ .dcn_mall_prefetch_average = {
+ .dram_derate_percent_pixel = 30,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 62,
+ .dcfclk_derate_percent = 83,
+ },
+ .system_idle_average = {
+ .dram_derate_percent_pixel = 70,
+ .dram_derate_percent_vm = 0,
+ .dram_derate_percent_pixel_and_vm = 0,
+ .fclk_derate_percent = 83,
+ .dcfclk_derate_percent = 100,
+ },
+ },
+ .writeback = {
+ .base_latency_us = 0,
+ .scaling_factor_us = 0,
+ .scaling_factor_mhz = 0,
+ },
+ .qos_params = {
+ .dcn4x = {
+ .df_qos_response_time_fclk_cycles = 300,
+ .max_round_trip_to_furthest_cs_fclk_cycles = 350,
+ .mall_overhead_fclk_cycles = 50,
+ .meta_trip_adder_fclk_cycles = 36,
+ .average_transport_distance_fclk_cycles = 260,
+ .umc_urgent_ramp_latency_margin = 50,
+ .umc_max_latency_margin = 30,
+ .umc_average_latency_margin = 20,
+ .fabric_max_transport_latency_margin = 20,
+ .fabric_average_transport_latency_margin = 10,
+
+ .per_uclk_dpm_params = {
+ {
+ // State 1
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 472,
+ .trip_to_memory_uclk_cycles = 827,
+ .meta_trip_to_memory_uclk_cycles = 827,
+ .maximum_latency_when_urgent_uclk_cycles = 72,
+ .average_latency_when_urgent_uclk_cycles = 72,
+ .maximum_latency_when_non_urgent_uclk_cycles = 827,
+ .average_latency_when_non_urgent_uclk_cycles = 117,
+ },
+ {
+ // State 2
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 546,
+ .trip_to_memory_uclk_cycles = 848,
+ .meta_trip_to_memory_uclk_cycles = 848,
+ .maximum_latency_when_urgent_uclk_cycles = 146,
+ .average_latency_when_urgent_uclk_cycles = 146,
+ .maximum_latency_when_non_urgent_uclk_cycles = 848,
+ .average_latency_when_non_urgent_uclk_cycles = 133,
+ },
+ {
+ // State 3
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 564,
+ .trip_to_memory_uclk_cycles = 853,
+ .meta_trip_to_memory_uclk_cycles = 853,
+ .maximum_latency_when_urgent_uclk_cycles = 164,
+ .average_latency_when_urgent_uclk_cycles = 164,
+ .maximum_latency_when_non_urgent_uclk_cycles = 853,
+ .average_latency_when_non_urgent_uclk_cycles = 136,
+ },
+ {
+ // State 4
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 613,
+ .trip_to_memory_uclk_cycles = 869,
+ .meta_trip_to_memory_uclk_cycles = 869,
+ .maximum_latency_when_urgent_uclk_cycles = 213,
+ .average_latency_when_urgent_uclk_cycles = 213,
+ .maximum_latency_when_non_urgent_uclk_cycles = 869,
+ .average_latency_when_non_urgent_uclk_cycles = 149,
+ },
+ {
+ // State 5
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 632,
+ .trip_to_memory_uclk_cycles = 874,
+ .meta_trip_to_memory_uclk_cycles = 874,
+ .maximum_latency_when_urgent_uclk_cycles = 232,
+ .average_latency_when_urgent_uclk_cycles = 232,
+ .maximum_latency_when_non_urgent_uclk_cycles = 874,
+ .average_latency_when_non_urgent_uclk_cycles = 153,
+ },
+ {
+ // State 6
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 665,
+ .trip_to_memory_uclk_cycles = 885,
+ .meta_trip_to_memory_uclk_cycles = 885,
+ .maximum_latency_when_urgent_uclk_cycles = 265,
+ .average_latency_when_urgent_uclk_cycles = 265,
+ .maximum_latency_when_non_urgent_uclk_cycles = 885,
+ .average_latency_when_non_urgent_uclk_cycles = 161,
+ },
+ {
+ // State 7
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 689,
+ .trip_to_memory_uclk_cycles = 895,
+ .meta_trip_to_memory_uclk_cycles = 895,
+ .maximum_latency_when_urgent_uclk_cycles = 289,
+ .average_latency_when_urgent_uclk_cycles = 289,
+ .maximum_latency_when_non_urgent_uclk_cycles = 895,
+ .average_latency_when_non_urgent_uclk_cycles = 167,
+ },
+ {
+ // State 8
+ .minimum_uclk_khz = 0,
+ .urgent_ramp_uclk_cycles = 716,
+ .trip_to_memory_uclk_cycles = 902,
+ .meta_trip_to_memory_uclk_cycles = 902,
+ .maximum_latency_when_urgent_uclk_cycles = 316,
+ .average_latency_when_urgent_uclk_cycles = 316,
+ .maximum_latency_when_non_urgent_uclk_cycles = 902,
+ .average_latency_when_non_urgent_uclk_cycles = 174,
+ },
+ },
+ },
+ },
+ .qos_type = dml2_qos_param_type_dcn4x,
+ },
+
+ .power_management_parameters = {
+ .dram_clk_change_blackout_us = 400,
+ .fclk_change_blackout_us = 0,
+ .g7_ppt_blackout_us = 0,
+ .stutter_enter_plus_exit_latency_us = 54,
+ .stutter_exit_latency_us = 41,
+ .z8_stutter_enter_plus_exit_latency_us = 0,
+ .z8_stutter_exit_latency_us = 0,
+ /*
+ .g6_temp_read_blackout_us = {
+ 23.00,
+ 10.00,
+ 10.00,
+ 8.00,
+ 8.00,
+ 5.00,
+ 5.00,
+ 5.00,
+ },
+ */
+ },
+
+ .vmin_limit = {
+ .dispclk_khz = 600 * 1000,
+ },
+
+ .dprefclk_mhz = 720,
+ .xtalclk_mhz = 100,
+ .pcie_refclk_mhz = 100,
+ .dchub_refclk_mhz = 50,
+ .mall_allocated_for_dcn_mbytes = 64,
+ .max_outstanding_reqs = 512,
+ .fabric_datapath_to_dcn_data_return_bytes = 64,
+ .return_bus_width_bytes = 64,
+ .hostvm_min_page_size_kbytes = 0,
+ .gpuvm_min_page_size_kbytes = 256,
+ .phy_downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.38,
+ .dispclk_dppclk_vco_speed_mhz = 4500,
+ .do_urgent_latency_adjustment = 0,
+ .mem_word_bytes = 32,
+ .num_dcc_mcaches = 8,
+ .mcache_size_bytes = 2048,
+ .mcache_line_size_bytes = 32,
+ .max_fclk_for_uclk_dpm_khz = 1250 * 1000,
+};
+
+static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = {
+ .pipe_count = 4,
+ .otg_count = 4,
+ .num_dsc = 4,
+ .max_num_dp2p0_streams = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_dp2p0_outputs = 4,
+ .rob_buffer_size_kbytes = 192,
+ .config_return_buffer_size_in_kbytes = 1344,
+ .config_return_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 22,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .cursor_buffer_size = 24,
+ .max_flip_time_us = 80,
+ .max_flip_time_lines = 32,
+ .hostvm_mode = 0,
+ .subvp_drr_scheduling_margin_us = 100,
+ .subvp_prefetch_end_to_mall_start_us = 15,
+ .subvp_fw_processing_delay = 15,
+ .max_vactive_det_fill_delay_us = 400,
+
+ .fams2 = {
+ .max_allow_delay_us = 100 * 1000,
+ .scheduling_delay_us = 550,
+ .vertical_interrupt_ack_delay_us = 40,
+ .allow_programming_delay_us = 18,
+ .min_allow_width_us = 20,
+ .subvp_df_throttle_delay_us = 100,
+ .subvp_programming_delay_us = 200,
+ .subvp_prefetch_to_mall_delay_us = 18,
+ .drr_programming_delay_us = 35,
+
+ .lock_timeout_us = 5000,
+ .recovery_timeout_us = 5000,
+ .flip_programming_delay_us = 300,
+ },
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h
new file mode 100644
index 000000000000..281d7ad230d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml2_external_lib_deps.h
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_EXTERNAL_LIB_DEPS__
+#define __DML2_EXTERNAL_LIB_DEPS__
+
+#include "os_types.h"
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h
new file mode 100644
index 000000000000..a64ec4dcf11a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_H__
+#define __DML_TOP_H__
+
+#include "dml_top_types.h"
+
+/*
+ * Top Level Interface for DML2
+ */
+
+/*
+ * Returns the size of the DML instance for the caller to allocate
+ */
+unsigned int dml2_get_instance_size_bytes(void);
+
+/*
+ * Initializes the DML instance (i.e. with configuration, soc BB, IP params, etc...)
+ */
+bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out);
+
+/*
+ * Determines if the input mode is supported (boolean) on the SoC at all. Does not return
+ * information on how mode should be programmed.
+ */
+bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out);
+
+/*
+ * Determines the full (optimized) programming for the input mode. Returns minimum
+ * clocks as well as dchub register programming values for all pipes, additional meta
+ * such as ODM or MPCC combine factors.
+ */
+bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out);
+
+/*
+ * Determines the correct per pipe mcache register programming for a valid mode.
+ * The mcache allocation must have been calculated (successfully) in a previous
+ * call to dml2_build_mode_programming.
+ * The actual hubp viewport dimensions be what the actual registers will be
+ * programmed to (i.e. based on scaler setup).
+ */
+bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h
new file mode 100644
index 000000000000..91955bbe24b8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __dml2_TOP_DCHUB_REGISTERS_H__
+#define __dml2_TOP_DCHUB_REGISTERS_H__
+
+#include "dml2_external_lib_deps.h"
+// These types are uint32_t as they represent actual calculated register values for HW
+
+struct dml2_display_dlg_regs {
+ uint32_t refcyc_h_blank_end;
+ uint32_t dlg_vblank_end;
+ uint32_t min_dst_y_next_start;
+ uint32_t refcyc_per_htotal;
+ uint32_t refcyc_x_after_scaler;
+ uint32_t dst_y_after_scaler;
+ uint32_t dst_y_prefetch;
+ uint32_t dst_y_per_vm_vblank;
+ uint32_t dst_y_per_row_vblank;
+ uint32_t dst_y_per_vm_flip;
+ uint32_t dst_y_per_row_flip;
+ uint32_t ref_freq_to_pix_freq;
+ uint32_t vratio_prefetch;
+ uint32_t vratio_prefetch_c;
+ uint32_t refcyc_per_tdlut_group;
+ uint32_t refcyc_per_pte_group_vblank_l;
+ uint32_t refcyc_per_pte_group_vblank_c;
+ uint32_t refcyc_per_pte_group_flip_l;
+ uint32_t refcyc_per_pte_group_flip_c;
+ uint32_t dst_y_per_pte_row_nom_l;
+ uint32_t dst_y_per_pte_row_nom_c;
+ uint32_t refcyc_per_pte_group_nom_l;
+ uint32_t refcyc_per_pte_group_nom_c;
+ uint32_t refcyc_per_line_delivery_pre_l;
+ uint32_t refcyc_per_line_delivery_pre_c;
+ uint32_t refcyc_per_line_delivery_l;
+ uint32_t refcyc_per_line_delivery_c;
+ uint32_t refcyc_per_vm_group_vblank;
+ uint32_t refcyc_per_vm_group_flip;
+ uint32_t refcyc_per_vm_req_vblank;
+ uint32_t refcyc_per_vm_req_flip;
+ uint32_t dst_y_offset_cur0;
+ uint32_t chunk_hdl_adjust_cur0;
+ uint32_t vready_after_vcount0;
+ uint32_t dst_y_delta_drq_limit;
+ uint32_t refcyc_per_vm_dmdata;
+ uint32_t dmdata_dl_delta;
+ uint32_t dst_y_svp_drq_limit;
+
+ // MRQ
+ uint32_t refcyc_per_meta_chunk_vblank_l;
+ uint32_t refcyc_per_meta_chunk_vblank_c;
+ uint32_t refcyc_per_meta_chunk_flip_l;
+ uint32_t refcyc_per_meta_chunk_flip_c;
+ uint32_t dst_y_per_meta_row_nom_l;
+ uint32_t dst_y_per_meta_row_nom_c;
+ uint32_t refcyc_per_meta_chunk_nom_l;
+ uint32_t refcyc_per_meta_chunk_nom_c;
+};
+
+struct dml2_display_ttu_regs {
+ uint32_t qos_level_low_wm;
+ uint32_t qos_level_high_wm;
+ uint32_t min_ttu_vblank;
+ uint32_t qos_level_flip;
+ uint32_t refcyc_per_req_delivery_l;
+ uint32_t refcyc_per_req_delivery_c;
+ uint32_t refcyc_per_req_delivery_cur0;
+ uint32_t refcyc_per_req_delivery_pre_l;
+ uint32_t refcyc_per_req_delivery_pre_c;
+ uint32_t refcyc_per_req_delivery_pre_cur0;
+ uint32_t qos_level_fixed_l;
+ uint32_t qos_level_fixed_c;
+ uint32_t qos_level_fixed_cur0;
+ uint32_t qos_ramp_disable_l;
+ uint32_t qos_ramp_disable_c;
+ uint32_t qos_ramp_disable_cur0;
+};
+
+struct dml2_display_arb_regs {
+ uint32_t max_req_outstanding;
+ uint32_t min_req_outstanding;
+ uint32_t sat_level_us;
+ uint32_t hvm_max_qos_commit_threshold;
+ uint32_t hvm_min_req_outstand_commit_threshold;
+ uint32_t compbuf_reserved_space_kbytes;
+ uint32_t compbuf_size;
+ uint32_t sdpif_request_rate_limit;
+ uint32_t allow_sdpif_rate_limit_when_cstate_req;
+ uint32_t dcfclk_deep_sleep_hysteresis;
+ uint32_t pstate_stall_threshold;
+};
+
+struct dml2_cursor_dlg_regs{
+ uint32_t dst_x_offset; // CURSOR0_DST_X_OFFSET
+ uint32_t dst_y_offset; // CURSOR0_DST_Y_OFFSET
+ uint32_t chunk_hdl_adjust; // CURSOR0_CHUNK_HDL_ADJUST
+
+ uint32_t qos_level_fixed;
+ uint32_t qos_ramp_disable;
+};
+
+struct dml2_display_plane_rq_regs {
+ uint32_t chunk_size;
+ uint32_t min_chunk_size;
+ uint32_t dpte_group_size;
+ uint32_t mpte_group_size;
+ uint32_t swath_height;
+ uint32_t pte_row_height_linear;
+
+ // MRQ
+ uint32_t meta_chunk_size;
+ uint32_t min_meta_chunk_size;
+};
+
+struct dml2_display_rq_regs {
+ struct dml2_display_plane_rq_regs rq_regs_l;
+ struct dml2_display_plane_rq_regs rq_regs_c;
+ uint32_t drq_expansion_mode;
+ uint32_t prq_expansion_mode;
+ uint32_t crq_expansion_mode;
+ uint32_t plane1_base_address;
+ uint32_t unbounded_request_enabled;
+
+ // MRQ
+ uint32_t mrq_expansion_mode;
+};
+
+struct dml2_display_mcache_regs {
+ uint32_t mcache_id_first;
+ uint32_t mcache_id_second;
+ uint32_t split_location;
+};
+
+struct dml2_hubp_pipe_mcache_regs {
+ struct {
+ struct dml2_display_mcache_regs p0;
+ struct dml2_display_mcache_regs p1;
+ } main;
+ struct {
+ struct dml2_display_mcache_regs p0;
+ struct dml2_display_mcache_regs p1;
+ } mall;
+};
+
+struct dml2_dchub_per_pipe_register_set {
+ struct dml2_display_rq_regs rq_regs;
+ struct dml2_display_ttu_regs ttu_regs;
+ struct dml2_display_dlg_regs dlg_regs;
+
+ uint32_t det_size;
+};
+
+struct dml2_dchub_watermark_regs {
+ /* watermarks */
+ uint32_t urgent;
+ uint32_t sr_enter;
+ uint32_t sr_exit;
+ uint32_t sr_enter_z8;
+ uint32_t sr_exit_z8;
+ uint32_t sr_enter_low_power;
+ uint32_t sr_exit_low_power;
+ uint32_t uclk_pstate;
+ uint32_t fclk_pstate;
+ uint32_t temp_read_or_ppt;
+ uint32_t usr;
+ /* qos */
+ uint32_t refcyc_per_trip_to_mem;
+ uint32_t refcyc_per_meta_trip_to_mem;
+ uint32_t frac_urg_bw_flip;
+ uint32_t frac_urg_bw_nom;
+ uint32_t frac_urg_bw_mall;
+};
+
+enum dml2_dchub_watermark_reg_set_index {
+ DML2_DCHUB_WATERMARK_SET_A = 0,
+ DML2_DCHUB_WATERMARK_SET_B = 1,
+ DML2_DCHUB_WATERMARK_SET_C = 2,
+ DML2_DCHUB_WATERMARK_SET_D = 3,
+ DML2_DCHUB_WATERMARK_SET_NUM = 4,
+};
+
+struct dml2_dchub_global_register_set {
+ struct dml2_display_arb_regs arb_regs;
+ struct dml2_dchub_watermark_regs wm_regs[DML2_DCHUB_WATERMARK_SET_NUM];
+ unsigned int num_watermark_sets;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
new file mode 100644
index 000000000000..e8dc6471c0be
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__
+#define __DML_TOP_DISPLAY_CFG_TYPES_H__
+
+#include "dml2_external_lib_deps.h"
+
+#define DML2_MAX_PLANES 8
+#define DML2_MAX_DCN_PIPES 8
+#define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches
+#define DML2_MAX_WRITEBACK 3
+
+enum dml2_swizzle_mode {
+ dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled
+ dml2_sw_256b_2d,
+ dml2_sw_4kb_2d,
+ dml2_sw_64kb_2d,
+ dml2_sw_256kb_2d,
+
+ dml2_gfx11_sw_linear,
+ dml2_gfx11_sw_64kb_d,
+ dml2_gfx11_sw_64kb_d_t,
+ dml2_gfx11_sw_64kb_d_x,
+ dml2_gfx11_sw_64kb_r_x,
+ dml2_gfx11_sw_256kb_d_x,
+ dml2_gfx11_sw_256kb_r_x,
+
+};
+
+enum dml2_source_format_class {
+ dml2_444_8 = 0,
+ dml2_444_16 = 1,
+ dml2_444_32 = 2,
+ dml2_444_64 = 3,
+ dml2_420_8 = 4,
+ dml2_420_10 = 5,
+ dml2_420_12 = 6,
+ dml2_rgbe_alpha = 9,
+ dml2_rgbe = 10,
+ dml2_mono_8 = 11,
+ dml2_mono_16 = 12,
+ dml2_422_planar_8 = 13,
+ dml2_422_planar_10 = 14,
+ dml2_422_planar_12 = 15,
+ dml2_422_packed_8 = 16,
+ dml2_422_packed_10 = 17,
+ dml2_422_packed_12 = 18
+};
+
+enum dml2_rotation_angle {
+ dml2_rotation_0 = 0,
+ dml2_rotation_90 = 1,
+ dml2_rotation_180 = 2,
+ dml2_rotation_270 = 3
+};
+
+enum dml2_output_format_class {
+ dml2_444 = 0,
+ dml2_s422 = 1,
+ dml2_n422 = 2,
+ dml2_420 = 3
+};
+
+enum dml2_output_encoder_class {
+ dml2_dp = 0,
+ dml2_edp = 1,
+ dml2_dp2p0 = 2,
+ dml2_hdmi = 3,
+ dml2_hdmifrl = 4,
+ dml2_none = 5
+};
+
+enum dml2_output_link_dp_rate {
+ dml2_dp_rate_na = 0,
+ dml2_dp_rate_hbr = 1,
+ dml2_dp_rate_hbr2 = 2,
+ dml2_dp_rate_hbr3 = 3,
+ dml2_dp_rate_uhbr10 = 4,
+ dml2_dp_rate_uhbr13p5 = 5,
+ dml2_dp_rate_uhbr20 = 6
+};
+
+enum dml2_uclk_pstate_change_strategy {
+ dml2_uclk_pstate_change_strategy_auto = 0,
+ dml2_uclk_pstate_change_strategy_force_vactive = 1,
+ dml2_uclk_pstate_change_strategy_force_vblank = 2,
+ dml2_uclk_pstate_change_strategy_force_drr = 3,
+ dml2_uclk_pstate_change_strategy_force_mall_svp = 4,
+ dml2_uclk_pstate_change_strategy_force_mall_full_frame = 5,
+};
+
+enum dml2_svp_mode_override {
+ dml2_svp_mode_override_auto = 0,
+ dml2_svp_mode_override_main_pipe = 1,
+ dml2_svp_mode_override_phantom_pipe = 2, //does not need to be defined explicitly, main overrides result in implicit phantom additions
+ dml2_svp_mode_override_phantom_pipe_no_data_return = 3,
+ dml2_svp_mode_override_imall = 4
+};
+
+enum dml2_refresh_from_mall_mode_override {
+ dml2_refresh_from_mall_mode_override_auto = 0,
+ dml2_refresh_from_mall_mode_override_force_disable = 1,
+ dml2_refresh_from_mall_mode_override_force_enable = 2
+};
+
+enum dml2_odm_mode {
+ dml2_odm_mode_auto = 0,
+ dml2_odm_mode_bypass,
+ dml2_odm_mode_combine_2to1,
+ dml2_odm_mode_combine_3to1,
+ dml2_odm_mode_combine_4to1,
+ dml2_odm_mode_split_1to2,
+ dml2_odm_mode_mso_1to2,
+ dml2_odm_mode_mso_1to4
+};
+
+enum dml2_scaling_transform {
+ dml2_scaling_transform_explicit = 0,
+ dml2_scaling_transform_fullscreen,
+ dml2_scaling_transform_aspect_ratio,
+ dml2_scaling_transform_centered
+};
+
+enum dml2_dsc_enable_option {
+ dml2_dsc_disable = 0,
+ dml2_dsc_enable = 1,
+ dml2_dsc_enable_if_necessary = 2
+};
+
+enum dml2_tdlut_addressing_mode {
+ dml2_tdlut_sw_linear = 0,
+ dml2_tdlut_simple_linear = 1
+};
+
+enum dml2_tdlut_width_mode {
+ dml2_tdlut_width_17_cube = 0,
+ dml2_tdlut_width_33_cube = 1
+};
+
+enum dml2_twait_budgeting_setting {
+ dml2_twait_budgeting_setting_ignore = 0,// Ignore this budget in twait
+
+ dml2_twait_budgeting_setting_if_needed, // Budget for it only if needed
+ //(i.e. UCLK/FCLK DPM cannot be supported in active)
+
+ dml2_twait_budgeting_setting_try, // Budget for it as long as there is an SoC state that
+ // can support it
+};
+
+struct dml2_get_cursor_dlg_reg{
+ unsigned int cursor_x_position;
+ unsigned int cursor_hotspot_x;
+ unsigned int cursor_primary_offset;
+ unsigned int cursor_secondary_offset;
+ bool cursor_stereo_en;
+ bool cursor_2x_magnify;
+ double hratio;
+ double pixel_rate_mhz;
+ double dlg_refclk_mhz;
+};
+
+/// @brief Surface Parameters
+struct dml2_surface_cfg {
+ enum dml2_swizzle_mode tiling;
+
+ struct {
+ unsigned long pitch; // In elements, two pixels per element in 422 packed format
+ unsigned long width;
+ unsigned long height;
+ } plane0;
+
+
+ struct {
+ unsigned long pitch;
+ unsigned long width;
+ unsigned long height;
+ } plane1;
+
+ struct {
+ bool enable;
+ struct {
+ unsigned long pitch;
+ } plane0;
+ struct {
+ unsigned long pitch;
+ } plane1;
+
+ struct {
+ double dcc_rate_plane0;
+ double dcc_rate_plane1;
+ double fraction_of_zero_size_request_plane0;
+ double fraction_of_zero_size_request_plane1;
+ } informative;
+ } dcc;
+};
+
+
+struct dml2_composition_cfg {
+ enum dml2_rotation_angle rotation_angle;
+ bool mirrored;
+ enum dml2_scaling_transform scaling_transform;
+ bool rect_out_height_spans_vactive;
+
+ struct {
+ bool stationary;
+ struct {
+ unsigned long width;
+ unsigned long height;
+ unsigned long x_start;
+ unsigned long y_start;
+ } plane0;
+
+ struct {
+ unsigned long width;
+ unsigned long height;
+ unsigned long x_start;
+ unsigned long y_start;
+ } plane1;
+ } viewport;
+
+ struct {
+ bool enabled;
+ bool upsp_enabled;
+ struct {
+ double h_ratio;
+ double v_ratio;
+ unsigned int h_taps;
+ unsigned int v_taps;
+ } plane0;
+
+ struct {
+ double h_ratio;
+ double v_ratio;
+ unsigned int h_taps;
+ unsigned int v_taps;
+ } plane1;
+
+ unsigned long rect_out_width;
+ } scaler_info;
+};
+
+struct dml2_timing_cfg {
+ unsigned long h_total;
+ unsigned long v_total;
+ unsigned long h_blank_end;
+ unsigned long v_blank_end;
+ unsigned long h_front_porch;
+ unsigned long v_front_porch;
+ unsigned long h_sync_width;
+ unsigned long pixel_clock_khz;
+ unsigned long h_active;
+ unsigned long v_active;
+ unsigned int bpc; //FIXME: review with Jun
+ struct {
+ enum dml2_dsc_enable_option enable;
+ unsigned int dsc_compressed_bpp_x16;
+ struct {
+ // for dv to specify num dsc slices to use
+ unsigned int num_slices;
+ } overrides;
+ } dsc;
+ bool interlaced;
+ struct {
+ /* static */
+ bool enabled;
+ unsigned long min_refresh_uhz;
+ unsigned int max_instant_vtotal_delta;
+ /* dynamic */
+ bool disallowed;
+ bool drr_active_variable;
+ bool drr_active_fixed;
+ } drr_config;
+ unsigned long vblank_nom;
+};
+
+struct dml2_link_output_cfg {
+ enum dml2_output_format_class output_format;
+ enum dml2_output_encoder_class output_encoder;
+ unsigned int output_dp_lane_count;
+ enum dml2_output_link_dp_rate output_dp_link_rate;
+ unsigned long audio_sample_rate;
+ unsigned long audio_sample_layout;
+ bool output_disabled; // The stream does not go to a backend for output to a physical
+ //connector (e.g. writeback only, phantom pipe) goes to writeback
+ bool validate_output; // Do not validate the link configuration for this display stream.
+};
+
+struct dml2_writeback_info {
+ enum dml2_source_format_class pixel_format;
+ unsigned long input_width;
+ unsigned long input_height;
+ unsigned long output_width;
+ unsigned long output_height;
+ unsigned long v_taps;
+ unsigned long h_taps;
+ unsigned long v_taps_chroma;
+ unsigned long h_taps_chroma;
+ double h_ratio;
+ double v_ratio;
+};
+
+struct dml2_writeback_cfg {
+ unsigned int active_writebacks_per_stream;
+ struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK];
+};
+
+struct dml2_plane_parameters {
+ unsigned int stream_index; // Identifies which plane will be composed
+
+ enum dml2_source_format_class pixel_format;
+ /*
+ * The surface and composition structures use
+ * the terms plane0 and plane1. These planes
+ * are expected to hold the following data based
+ * on the pixel format.
+ *
+ * RGB or YUV Non-Planar Types:
+ * dml2_444_8
+ * dml2_444_16
+ * dml2_444_32
+ * dml2_444_64
+ * dml2_rgbe
+ *
+ * plane0 = argb or rgbe
+ * plane1 = not used
+ *
+ * YUV Planar-Types:
+ * dml2_420_8
+ * dml2_420_10
+ * dml2_420_12
+ *
+ * plane0 = luma
+ * plane1 = chroma
+ *
+ * RGB Planar Types:
+ * dml2_rgbe_alpha
+ *
+ * plane0 = rgbe
+ * plane1 = alpha
+ *
+ * Mono Non-Planar Types:
+ * dml2_mono_8
+ * dml2_mono_16
+ *
+ * plane0 = luma
+ * plane1 = not used
+ */
+
+ struct dml2_surface_cfg surface;
+ struct dml2_composition_cfg composition;
+
+ struct {
+ bool enable;
+ unsigned long lines_before_active_required;
+ unsigned long transmitted_bytes;
+ } dynamic_meta_data;
+
+ struct {
+ unsigned int num_cursors;
+ unsigned long cursor_width;
+ unsigned long cursor_bpp;
+ } cursor;
+
+ // For TDLUT, SW would assume TDLUT is setup and enable all the time and
+ // budget for worst case addressing/width mode
+ struct {
+ bool setup_for_tdlut;
+ enum dml2_tdlut_addressing_mode tdlut_addressing_mode;
+ enum dml2_tdlut_width_mode tdlut_width_mode;
+ bool tdlut_mpc_width_flag;
+ } tdlut;
+
+ bool immediate_flip;
+
+ struct {
+ // Logical overrides to power management policies (usually)
+ enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
+ enum dml2_refresh_from_mall_mode_override refresh_from_mall;
+ unsigned int det_size_override_kb;
+ unsigned int mpcc_combine_factor;
+
+ // reserved_vblank_time_ns is the minimum time to reserve in vblank for Twait
+ // The actual reserved vblank time used for the corresponding stream in mode_programming would be at least as much as this per-plane override.
+ long reserved_vblank_time_ns;
+ unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay
+ unsigned int gpuvm_min_page_size_kbytes;
+ unsigned int hostvm_min_page_size_kbytes;
+
+ enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config
+
+ struct {
+ // HW specific overrides, there's almost no reason to mess with these
+ // generally used for debugging or simulation
+ bool force_one_row_for_frame;
+ struct {
+ bool enable;
+ bool value;
+ } force_pte_buffer_mode;
+ double dppclk_mhz;
+ } hw;
+ } overrides;
+};
+
+struct dml2_stream_parameters {
+ struct dml2_timing_cfg timing;
+ struct dml2_link_output_cfg output;
+ struct dml2_writeback_cfg writeback;
+
+ struct {
+ enum dml2_odm_mode odm_mode;
+ bool disable_dynamic_odm;
+ bool disable_subvp;
+ int minimum_vblank_idle_requirement_us;
+ bool minimize_active_latency_hiding;
+
+ struct {
+ struct {
+ enum dml2_twait_budgeting_setting uclk_pstate;
+ enum dml2_twait_budgeting_setting fclk_pstate;
+ enum dml2_twait_budgeting_setting stutter_enter_exit;
+ } twait_budgeting;
+ } hw;
+ } overrides;
+};
+
+struct dml2_display_cfg {
+ bool gpuvm_enable;
+ bool ffbm_enable;
+ bool hostvm_enable;
+
+ // Allocate DET proportionally between streams based on pixel rate
+ // and then allocate proportionally between planes.
+ bool minimize_det_reallocation;
+
+ unsigned int gpuvm_max_page_table_levels;
+ unsigned int hostvm_max_non_cached_page_table_levels;
+
+ struct dml2_plane_parameters plane_descriptors[DML2_MAX_PLANES];
+ struct dml2_stream_parameters stream_descriptors[DML2_MAX_PLANES];
+
+ unsigned int num_planes;
+ unsigned int num_streams;
+
+ struct {
+ struct {
+ // HW specific overrides, there's almost no reason to mess with these
+ // generally used for debugging or simulation
+ struct {
+ bool enable;
+ bool value;
+ } force_unbounded_requesting;
+
+ struct {
+ bool enable;
+ bool value;
+ } force_nom_det_size_kbytes;
+ bool mode_support_check_disable;
+ bool mcache_admissibility_check_disable;
+ bool surface_viewport_size_check_disable;
+ double dlg_ref_clk_mhz;
+ double dispclk_mhz;
+ double dcfclk_mhz;
+ bool optimize_tdlut_scheduling; // TBD: for DV, will set this to 1, to ensure tdlut schedule is calculated based on address/width mode
+ } hw;
+
+ struct {
+ bool uclk_pstate_change_disable;
+ bool fclk_pstate_change_disable;
+ bool g6_temp_read_pstate_disable;
+ bool g7_ppt_pstate_disable;
+ } power_management;
+
+ bool enhanced_prefetch_schedule_acceleration;
+ bool dcc_programming_assumes_scan_direction_unknown;
+ bool synchronize_timings;
+ bool synchronize_ddr_displays_for_uclk_pstate_change;
+ bool max_outstanding_when_urgent_expected_disable;
+ bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming
+ unsigned int best_effort_min_active_latency_hiding_us;
+ bool all_streams_blanked;
+ } overrides;
+};
+
+struct dml2_pipe_configuration_descriptor {
+ struct {
+ unsigned int viewport_x_start;
+ unsigned int viewport_width;
+ } plane0;
+
+ struct {
+ unsigned int viewport_x_start;
+ unsigned int viewport_width;
+ } plane1;
+
+ bool plane1_enabled;
+ bool imall_enabled;
+};
+
+struct dml2_plane_mcache_configuration_descriptor {
+ const struct dml2_plane_parameters *plane_descriptor;
+ const struct dml2_mcache_surface_allocation *mcache_allocation;
+
+ struct dml2_pipe_configuration_descriptor pipe_configurations[DML2_MAX_DCN_PIPES];
+ char num_pipes;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h
new file mode 100644
index 000000000000..8f624a912e78
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_POLICY_TYPES_H__
+#define __DML_TOP_POLICY_TYPES_H__
+
+struct dml2_policy_parameters {
+ unsigned long odm_combine_dispclk_threshold_khz;
+ unsigned int max_immediate_flip_latency;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h
new file mode 100644
index 000000000000..176f55947664
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__
+#define __DML_TOP_SOC_PARAMETER_TYPES_H__
+
+#include "dml2_external_lib_deps.h"
+
+#define DML_MAX_CLK_TABLE_SIZE 20
+
+struct dml2_soc_derate_values {
+ unsigned int dram_derate_percent_pixel;
+ unsigned int dram_derate_percent_vm;
+ unsigned int dram_derate_percent_pixel_and_vm;
+
+ unsigned int fclk_derate_percent;
+ unsigned int dcfclk_derate_percent;
+};
+
+struct dml2_soc_derates {
+ struct dml2_soc_derate_values system_active_urgent;
+ struct dml2_soc_derate_values system_active_average;
+ struct dml2_soc_derate_values dcn_mall_prefetch_urgent;
+ struct dml2_soc_derate_values dcn_mall_prefetch_average;
+ struct dml2_soc_derate_values system_idle_average;
+};
+
+struct dml2_dcn32x_soc_qos_params {
+ struct {
+ unsigned int base_latency_us;
+ unsigned int base_latency_pixel_vm_us;
+ unsigned int base_latency_vm_us;
+ unsigned int scaling_factor_fclk_us;
+ unsigned int scaling_factor_mhz;
+ } urgent_latency_us;
+
+ unsigned int loaded_round_trip_latency_fclk_cycles;
+ unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes;
+};
+
+struct dml2_dcn4_uclk_dpm_dependent_qos_params {
+ unsigned long minimum_uclk_khz;
+ unsigned int urgent_ramp_uclk_cycles;
+ unsigned int trip_to_memory_uclk_cycles;
+ unsigned int meta_trip_to_memory_uclk_cycles;
+ unsigned int maximum_latency_when_urgent_uclk_cycles;
+ unsigned int average_latency_when_urgent_uclk_cycles;
+ unsigned int maximum_latency_when_non_urgent_uclk_cycles;
+ unsigned int average_latency_when_non_urgent_uclk_cycles;
+};
+
+struct dml2_dcn4x_soc_qos_params {
+ unsigned int df_qos_response_time_fclk_cycles;
+ unsigned int max_round_trip_to_furthest_cs_fclk_cycles;
+ unsigned int mall_overhead_fclk_cycles;
+ unsigned int meta_trip_adder_fclk_cycles;
+ unsigned int average_transport_distance_fclk_cycles;
+ double umc_urgent_ramp_latency_margin;
+ double umc_max_latency_margin;
+ double umc_average_latency_margin;
+ double fabric_max_transport_latency_margin;
+ double fabric_average_transport_latency_margin;
+ struct dml2_dcn4_uclk_dpm_dependent_qos_params per_uclk_dpm_params[DML_MAX_CLK_TABLE_SIZE];
+};
+
+enum dml2_qos_param_type {
+ dml2_qos_param_type_dcn3,
+ dml2_qos_param_type_dcn4x
+};
+
+struct dml2_soc_qos_parameters {
+ struct dml2_soc_derates derate_table;
+ struct {
+ unsigned int base_latency_us;
+ unsigned int scaling_factor_us;
+ unsigned int scaling_factor_mhz;
+ } writeback;
+
+ union {
+ struct dml2_dcn32x_soc_qos_params dcn32x;
+ struct dml2_dcn4x_soc_qos_params dcn4x;
+ } qos_params;
+
+ enum dml2_qos_param_type qos_type;
+};
+
+struct dml2_soc_power_management_parameters {
+ double dram_clk_change_blackout_us;
+ double dram_clk_change_read_only_us;
+ double dram_clk_change_write_only_us;
+ double fclk_change_blackout_us;
+ double g7_ppt_blackout_us;
+ double g7_temperature_read_blackout_us;
+ double stutter_enter_plus_exit_latency_us;
+ double stutter_exit_latency_us;
+ double low_power_stutter_enter_plus_exit_latency_us;
+ double low_power_stutter_exit_latency_us;
+ double z8_stutter_enter_plus_exit_latency_us;
+ double z8_stutter_exit_latency_us;
+ double z8_min_idle_time;
+ double g6_temp_read_blackout_us[DML_MAX_CLK_TABLE_SIZE];
+ double type_b_dram_clk_change_blackout_us;
+ double type_b_ppt_blackout_us;
+};
+
+struct dml2_clk_table {
+ unsigned long clk_values_khz[DML_MAX_CLK_TABLE_SIZE];
+ unsigned char num_clk_values;
+};
+
+struct dml2_dram_params {
+ unsigned int channel_width_bytes;
+ unsigned int channel_count;
+ unsigned int transactions_per_clock;
+};
+
+struct dml2_soc_state_table {
+ struct dml2_clk_table uclk;
+ struct dml2_clk_table fclk;
+ struct dml2_clk_table dcfclk;
+ struct dml2_clk_table dispclk;
+ struct dml2_clk_table dppclk;
+ struct dml2_clk_table dtbclk;
+ struct dml2_clk_table phyclk;
+ struct dml2_clk_table socclk;
+ struct dml2_clk_table dscclk;
+ struct dml2_clk_table phyclk_d18;
+ struct dml2_clk_table phyclk_d32;
+
+ struct dml2_dram_params dram_config;
+};
+
+struct dml2_soc_vmin_clock_limits {
+ unsigned long dispclk_khz;
+ unsigned long dcfclk_khz;
+};
+
+struct dml2_soc_bb {
+ struct dml2_soc_state_table clk_table;
+ struct dml2_soc_qos_parameters qos_parameters;
+ struct dml2_soc_power_management_parameters power_management_parameters;
+ struct dml2_soc_vmin_clock_limits vmin_limit;
+
+ double lower_bound_bandwidth_dchub;
+ unsigned int dprefclk_mhz;
+ unsigned int xtalclk_mhz;
+ unsigned int pcie_refclk_mhz;
+ unsigned int dchub_refclk_mhz;
+ unsigned int mall_allocated_for_dcn_mbytes;
+ unsigned int max_outstanding_reqs;
+ unsigned long fabric_datapath_to_dcn_data_return_bytes;
+ unsigned long return_bus_width_bytes;
+ unsigned long hostvm_min_page_size_kbytes;
+ unsigned long gpuvm_min_page_size_kbytes;
+ double phy_downspread_percent;
+ double dcn_downspread_percent;
+ double dispclk_dppclk_vco_speed_mhz;
+ bool no_dfs;
+ bool do_urgent_latency_adjustment;
+ unsigned int mem_word_bytes;
+ unsigned int num_dcc_mcaches;
+ unsigned int mcache_size_bytes;
+ unsigned int mcache_line_size_bytes;
+ unsigned long max_fclk_for_uclk_dpm_khz;
+};
+
+struct dml2_ip_capabilities {
+ unsigned int pipe_count;
+ unsigned int otg_count;
+ unsigned int num_dsc;
+ unsigned int max_num_dp2p0_streams;
+ unsigned int max_num_hdmi_frl_outputs;
+ unsigned int max_num_dp2p0_outputs;
+ unsigned int max_num_wb;
+ unsigned int rob_buffer_size_kbytes;
+ unsigned int config_return_buffer_size_in_kbytes;
+ unsigned int config_return_buffer_segment_size_in_kbytes;
+ unsigned int meta_fifo_size_in_kentries;
+ unsigned int compressed_buffer_segment_size_in_kbytes;
+ unsigned int cursor_buffer_size;
+ unsigned int max_flip_time_us;
+ unsigned int max_flip_time_lines;
+ unsigned int hostvm_mode;
+ unsigned int subvp_drr_scheduling_margin_us;
+ unsigned int subvp_prefetch_end_to_mall_start_us;
+ unsigned int subvp_fw_processing_delay;
+ unsigned int max_vactive_det_fill_delay_us;
+
+ /* FAMS2 delays */
+ struct {
+ unsigned int max_allow_delay_us;
+ unsigned int scheduling_delay_us;
+ unsigned int vertical_interrupt_ack_delay_us; // delay to acknowledge vline int
+ unsigned int allow_programming_delay_us; // time requires to program allow
+ unsigned int min_allow_width_us;
+ unsigned int subvp_df_throttle_delay_us;
+ unsigned int subvp_programming_delay_us;
+ unsigned int subvp_prefetch_to_mall_delay_us;
+ unsigned int drr_programming_delay_us;
+
+ unsigned int lock_timeout_us;
+ unsigned int recovery_timeout_us;
+ unsigned int flip_programming_delay_us;
+ } fams2;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
new file mode 100644
index 000000000000..41adb1104d0f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
@@ -0,0 +1,737 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML_TOP_TYPES_H__
+#define __DML_TOP_TYPES_H__
+
+#include "dml_top_display_cfg_types.h"
+#include "dml_top_soc_parameter_types.h"
+#include "dml_top_policy_types.h"
+#include "dml_top_dchub_registers.h"
+
+#include "dmub_cmd.h"
+
+struct dml2_instance;
+
+enum dml2_project_id {
+ dml2_project_invalid = 0,
+ dml2_project_dcn4x_stage1,
+ dml2_project_dcn4x_stage2,
+ dml2_project_dcn4x_stage2_auto_drr_svp,
+};
+
+enum dml2_pstate_change_support {
+ dml2_pstate_change_vactive = 0,
+ dml2_pstate_change_vblank = 1,
+ dml2_pstate_change_vblank_and_vactive = 2,
+ dml2_pstate_change_drr = 3,
+ dml2_pstate_change_mall_svp = 4,
+ dml2_pstate_change_mall_full_frame = 6,
+ dml2_pstate_change_unsupported = 7
+};
+
+enum dml2_output_type_and_rate__type {
+ dml2_output_type_unknown = 0,
+ dml2_output_type_dp = 1,
+ dml2_output_type_edp = 2,
+ dml2_output_type_dp2p0 = 3,
+ dml2_output_type_hdmi = 4,
+ dml2_output_type_hdmifrl = 5
+};
+
+enum dml2_output_type_and_rate__rate {
+ dml2_output_rate_unknown = 0,
+ dml2_output_rate_dp_rate_hbr = 1,
+ dml2_output_rate_dp_rate_hbr2 = 2,
+ dml2_output_rate_dp_rate_hbr3 = 3,
+ dml2_output_rate_dp_rate_uhbr10 = 4,
+ dml2_output_rate_dp_rate_uhbr13p5 = 5,
+ dml2_output_rate_dp_rate_uhbr20 = 6,
+ dml2_output_rate_hdmi_rate_3x3 = 7,
+ dml2_output_rate_hdmi_rate_6x3 = 8,
+ dml2_output_rate_hdmi_rate_6x4 = 9,
+ dml2_output_rate_hdmi_rate_8x4 = 10,
+ dml2_output_rate_hdmi_rate_10x4 = 11,
+ dml2_output_rate_hdmi_rate_12x4 = 12,
+ dml2_output_rate_hdmi_rate_16x4 = 13,
+ dml2_output_rate_hdmi_rate_20x4 = 14
+};
+
+struct dml2_pmo_options {
+ bool disable_vblank;
+ bool disable_svp;
+ bool disable_drr_var;
+ bool disable_drr_clamped;
+ bool disable_drr_var_when_var_active;
+ bool disable_drr_clamped_when_var_active;
+ bool disable_fams2;
+ bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */
+ bool disable_dyn_odm;
+ bool disable_dyn_odm_for_multi_stream;
+ bool disable_dyn_odm_for_stream_with_svp;
+};
+
+struct dml2_options {
+ enum dml2_project_id project_id;
+ struct dml2_pmo_options pmo_options;
+};
+
+struct dml2_initialize_instance_in_out {
+ struct dml2_instance *dml2_instance;
+ struct dml2_options options;
+ struct dml2_soc_bb soc_bb;
+ struct dml2_ip_capabilities ip_caps;
+
+ struct {
+ void *explicit_ip_bb;
+ unsigned int explicit_ip_bb_size;
+ } overrides;
+};
+
+struct dml2_reset_instance_in_out {
+ struct dml2_instance *dml2_instance;
+};
+
+struct dml2_check_mode_supported_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+ const struct dml2_display_cfg *display_config;
+
+ /*
+ * Outputs
+ */
+ bool is_supported;
+};
+
+struct dml2_mcache_surface_allocation {
+ bool valid;
+ /*
+ * For iMALL, dedicated mall mcaches are required (sharing of last
+ * slice possible), for legacy phantom or phantom without return
+ * the only mall mcaches need to be valid.
+ */
+ bool requires_dedicated_mall_mcache;
+
+ unsigned int num_mcaches_plane0;
+ unsigned int num_mcaches_plane1;
+ /*
+ * A plane is divided into vertical slices of mcaches,
+ * which wrap on the surface width.
+ *
+ * For example, if the surface width is 7680, and split into
+ * three slices of equal width, the boundary array would contain
+ * [2560, 5120, 7680]
+ *
+ * The assignments are
+ * 0 = [0 .. 2559]
+ * 1 = [2560 .. 5119]
+ * 2 = [5120 .. 7679]
+ * 0 = [7680 .. INF]
+ * The final element implicitly is the same as the first, and
+ * at first seems invalid since it is never referenced (since)
+ * it is outside the surface. However, its useful when shifting
+ * (see below).
+ *
+ * For any given valid mcache assignment, a shifted version, wrapped
+ * on the surface width boundary is also assumed to be valid.
+ *
+ * For example, shifting [2560, 5120, 7680] by -50 results in
+ * [2510, 5170, 7630].
+ *
+ * The assignments are now:
+ * 0 = [0 .. 2509]
+ * 1 = [2510 .. 5169]
+ * 2 = [5170 .. 7629]
+ * 0 = [7630 .. INF]
+ */
+ int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1];
+ int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1];
+
+ /*
+ * Shift grainularity is not necessarily 1
+ */
+ struct {
+ int p0;
+ int p1;
+ } shift_granularity;
+
+ /*
+ * MCacheIDs have global scope in the SoC, and they are stored here.
+ * These IDs are generally not valid until all planes in a display
+ * configuration have had their mcache requirements calculated.
+ */
+ int global_mcache_ids_plane0[DML2_MAX_MCACHES + 1];
+ int global_mcache_ids_plane1[DML2_MAX_MCACHES + 1];
+ int global_mcache_ids_mall_plane0[DML2_MAX_MCACHES + 1];
+ int global_mcache_ids_mall_plane1[DML2_MAX_MCACHES + 1];
+
+ /*
+ * Generally, plane0/1 slices must use a disjoint set of caches
+ * but in some cases the final segement of the two planes can
+ * use the same cache. If plane0_plane1 is set, then this is
+ * allowed.
+ *
+ * Similarly, the caches allocated to MALL prefetcher are generally
+ * disjoint, but if mall_prefetch is set, then the final segment
+ * between the main and the mall pixel requestor can use the same
+ * cache.
+ *
+ * Note that both bits may be set at the same time.
+ */
+ struct {
+ bool mall_comb_mcache_p0;
+ bool mall_comb_mcache_p1;
+ bool plane0_plane1;
+ } last_slice_sharing;
+
+ struct {
+ int meta_row_bytes_plane0;
+ int meta_row_bytes_plane1;
+ } informative;
+};
+
+enum dml2_pstate_method {
+ dml2_pstate_method_na = 0,
+ /* hw exclusive modes */
+ dml2_pstate_method_vactive = 1,
+ dml2_pstate_method_vblank = 2,
+ dml2_pstate_method_reserved_hw = 5,
+ /* fw assisted exclusive modes */
+ dml2_pstate_method_fw_svp = 6,
+ dml2_pstate_method_reserved_fw = 10,
+ /* fw assisted modes requiring drr modulation */
+ dml2_pstate_method_fw_vactive_drr = 11,
+ dml2_pstate_method_fw_vblank_drr = 12,
+ dml2_pstate_method_fw_svp_drr = 13,
+ dml2_pstate_method_reserved_fw_drr_clamped = 20,
+ dml2_pstate_method_fw_drr = 21,
+ dml2_pstate_method_reserved_fw_drr_var = 22,
+ dml2_pstate_method_count
+};
+
+struct dml2_per_plane_programming {
+ const struct dml2_plane_parameters *plane_descriptor;
+
+ union {
+ struct {
+ unsigned long dppclk_khz;
+ } dcn4x;
+ } min_clocks;
+
+ struct dml2_mcache_surface_allocation mcache_allocation;
+
+ // If a stream is using automatic or forced odm combine
+ // and the stream for this plane has num_odms_required > 1
+ // num_dpps_required is always equal to num_odms_required for
+ // ALL planes of the stream
+
+ // If a stream is using odm split, then this value is always 1
+ unsigned int num_dpps_required;
+
+ enum dml2_pstate_method uclk_pstate_support_method;
+
+ // MALL size requirements for MALL SS and SubVP
+ unsigned int surface_size_mall_bytes;
+ unsigned int svp_size_mall_bytes;
+
+ struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES];
+
+ struct {
+ bool valid;
+ struct dml2_plane_parameters descriptor;
+ struct dml2_mcache_surface_allocation mcache_allocation;
+ struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES];
+ } phantom_plane;
+};
+
+union dml2_global_sync_programming {
+ struct {
+ unsigned int vstartup_lines;
+ unsigned int vupdate_offset_pixels;
+ unsigned int vupdate_vupdate_width_pixels;
+ unsigned int vready_offset_pixels;
+ unsigned int pstate_keepout_start_lines;
+ } dcn4x;
+};
+
+struct dml2_per_stream_programming {
+ const struct dml2_stream_parameters *stream_descriptor;
+
+ union {
+ struct {
+ unsigned long dscclk_khz;
+ unsigned long dtbclk_khz;
+ unsigned long phyclk_khz;
+ } dcn4x;
+ } min_clocks;
+
+ union dml2_global_sync_programming global_sync;
+
+ unsigned int num_odms_required;
+
+ enum dml2_pstate_method uclk_pstate_method;
+
+ struct {
+ bool enabled;
+ struct dml2_stream_parameters descriptor;
+ union dml2_global_sync_programming global_sync;
+ } phantom_stream;
+
+ union dmub_cmd_fams2_config fams2_base_params;
+ union {
+ union dmub_cmd_fams2_config fams2_sub_params;
+ union dmub_fams2_stream_static_sub_state_v2 fams2_sub_params_v2;
+ };
+};
+
+//-----------------
+// Mode Support Information
+//-----------------
+
+struct dml2_mode_support_info {
+ bool ModeIsSupported; //<brief Is the mode support any voltage and combine setting
+ bool ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming
+ // Mode Support Reason
+ bool WritebackLatencySupport;
+ bool ScaleRatioAndTapsSupport;
+ bool SourceFormatPixelAndScanSupport;
+ bool P2IWith420;
+ bool DSCOnlyIfNecessaryWithBPP;
+ bool DSC422NativeNotSupported;
+ bool LinkRateDoesNotMatchDPVersion;
+ bool LinkRateForMultistreamNotIndicated;
+ bool BPPForMultistreamNotIndicated;
+ bool MultistreamWithHDMIOreDP;
+ bool MSOOrODMSplitWithNonDPLink;
+ bool NotEnoughLanesForMSO;
+ bool NumberOfOTGSupport;
+ bool NumberOfHDMIFRLSupport;
+ bool NumberOfDP2p0Support;
+ bool WritebackScaleRatioAndTapsSupport;
+ bool CursorSupport;
+ bool PitchSupport;
+ bool ViewportExceedsSurface;
+ bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ bool InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ bool InvalidCombinationOfMALLUseForPState;
+ bool ExceededMALLSize;
+ bool EnoughWritebackUnits;
+ bool ExceededMultistreamSlots;
+ bool NotEnoughDSCUnits;
+ bool NotEnoughDSCSlices;
+ bool PixelsPerLinePerDSCUnitSupport;
+ bool DSCCLKRequiredMoreThanSupported;
+ bool DTBCLKRequiredMoreThanSupported;
+ bool LinkCapacitySupport;
+ bool ROBSupport;
+ bool OutstandingRequestsSupport;
+ bool OutstandingRequestsUrgencyAvoidance;
+ bool PTEBufferSizeNotExceeded;
+ bool DCCMetaBufferSizeNotExceeded;
+ bool TotalVerticalActiveBandwidthSupport;
+ bool VActiveBandwidthSupport;
+ enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES];
+ bool USRRetrainingSupport;
+ bool PrefetchSupported;
+ bool DynamicMetadataSupported;
+ bool VRatioInPrefetchSupported;
+ bool DISPCLK_DPPCLK_Support;
+ bool TotalAvailablePipesSupport;
+ bool ViewportSizeSupport;
+ bool ImmediateFlipSupportedForState;
+ double MaxTotalVerticalActiveAvailableBandwidth;
+ bool MPCCombineEnable[DML2_MAX_PLANES]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting
+ enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage
+ unsigned int DPPPerSurface[DML2_MAX_PLANES]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4.
+ bool DSCEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the DSC is actually required; used in mode_programming
+ bool FECEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the FEC is actually required
+ unsigned int NumberOfDSCSlices[DML2_MAX_PLANES]; /// <brief Indicate how many slices needed to support the given mode
+ double OutputBpp[DML2_MAX_PLANES];
+ enum dml2_output_type_and_rate__type OutputType[DML2_MAX_PLANES];
+ enum dml2_output_type_and_rate__rate OutputRate[DML2_MAX_PLANES];
+ unsigned int AlignedYPitch[DML2_MAX_PLANES];
+ unsigned int AlignedCPitch[DML2_MAX_PLANES];
+ bool g6_temp_read_support;
+ bool temp_read_or_ppt_support;
+}; // dml2_mode_support_info
+
+struct dml2_display_cfg_programming {
+ struct dml2_display_cfg display_config;
+
+ union {
+ struct {
+ unsigned long dcfclk_khz;
+ unsigned long fclk_khz;
+ unsigned long uclk_khz;
+ unsigned long socclk_khz;
+ unsigned long dispclk_khz;
+ unsigned long dcfclk_deepsleep_khz;
+ unsigned long dpp_ref_khz;
+ } dcn32x;
+ struct {
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } active;
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } idle;
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } svp_prefetch;
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } svp_prefetch_no_throttle;
+
+ unsigned long deepsleep_dcfclk_khz;
+ unsigned long dispclk_khz;
+ unsigned long dpprefclk_khz;
+ unsigned long dtbrefclk_khz;
+ unsigned long socclk_khz;
+
+ struct {
+ uint32_t dispclk_did;
+ uint32_t dpprefclk_did;
+ uint32_t dtbrefclk_did;
+ } divider_ids;
+ } dcn4x;
+ } min_clocks;
+
+ bool uclk_pstate_supported;
+ bool fclk_pstate_supported;
+
+ /* indicates this configuration requires FW to support */
+ bool fams2_required;
+ struct dmub_cmd_fams2_global_config fams2_global_config;
+
+ struct {
+ bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition
+ uint8_t base_percent_efficiency; //LP1
+ uint8_t low_power_percent_efficiency; //LP2
+ } stutter;
+
+ struct {
+ bool meets_eco; // Stutter cycles will meet Z8 ECO criteria
+ bool supported_in_blank; // Changing to configurations where this is false requires Z8 to be disabled during the transition
+ } z8_stutter;
+
+ struct dml2_dchub_global_register_set global_regs;
+
+ struct dml2_per_plane_programming plane_programming[DML2_MAX_PLANES];
+ struct dml2_per_stream_programming stream_programming[DML2_MAX_PLANES];
+
+ // Don't access this structure directly, access it through plane_programming.pipe_regs
+ struct dml2_dchub_per_pipe_register_set pipe_regs[DML2_MAX_PLANES];
+
+ struct {
+ struct {
+ double urgent_us;
+ double writeback_urgent_us;
+ double writeback_pstate_us;
+ double writeback_fclk_pstate_us;
+ double cstate_exit_us;
+ double cstate_enter_plus_exit_us;
+ double z8_cstate_exit_us;
+ double z8_cstate_enter_plus_exit_us;
+ double pstate_change_us;
+ double fclk_pstate_change_us;
+ double usr_retraining_us;
+ double temp_read_or_ppt_watermark_us;
+ } watermarks;
+
+ struct {
+ unsigned int swath_width_plane0;
+ unsigned int swath_height_plane0;
+ unsigned int swath_height_plane1;
+ unsigned int dpte_row_height_plane0;
+ unsigned int dpte_row_height_plane1;
+ unsigned int meta_row_height_plane0;
+ unsigned int meta_row_height_plane1;
+ } plane_info[DML2_MAX_PLANES];
+
+ struct {
+ unsigned int total_num_dpps_required;
+ } dpp;
+
+ struct {
+ unsigned long long total_surface_size_in_mall_bytes;
+ unsigned int subviewport_lines_needed_in_mall[DML2_MAX_PLANES];
+ } mall;
+
+ struct {
+ double urgent_latency_us; // urgent ramp latency
+ double max_non_urgent_latency_us;
+ double max_urgent_latency_us;
+ double avg_non_urgent_latency_us;
+ double avg_urgent_latency_us;
+ double wm_memory_trip_us;
+ double meta_trip_memory_us;
+ double fraction_of_urgent_bandwidth; // nom
+ double fraction_of_urgent_bandwidth_immediate_flip;
+ double fraction_of_urgent_bandwidth_mall;
+ double max_active_fclk_change_latency_supported;
+ unsigned int min_return_latency_in_dcfclk;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ double dram_vm_only_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ double dram_vm_only_bw_mbps;
+ } sys_active;
+ } urg_bw_available;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } avg_bw_available;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } non_urg_bw_required;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } non_urg_bw_required_with_flip;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+
+ } urg_bw_required;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } urg_bw_required_with_flip;
+
+ struct {
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } svp_prefetch;
+
+ struct {
+ double sdp_bw_mbps;
+ double dram_bw_mbps;
+ } sys_active;
+ } avg_bw_required;
+ } qos;
+
+ struct {
+ unsigned long long det_size_in_kbytes[DML2_MAX_PLANES];
+ unsigned long long DETBufferSizeY[DML2_MAX_PLANES];
+ unsigned long long comp_buffer_size_kbytes;
+ bool UnboundedRequestEnabled;
+ unsigned int compbuf_reserved_space_64b;
+ } crb;
+
+ struct {
+ unsigned int max_uncompressed_block_plane0;
+ unsigned int max_compressed_block_plane0;
+ unsigned int independent_block_plane0;
+ unsigned int max_uncompressed_block_plane1;
+ unsigned int max_compressed_block_plane1;
+ unsigned int independent_block_plane1;
+ } dcc_control[DML2_MAX_PLANES];
+
+ struct {
+ double stutter_efficiency;
+ double stutter_efficiency_with_vblank;
+ double stutter_num_bursts;
+
+ struct {
+ double stutter_efficiency;
+ double stutter_efficiency_with_vblank;
+ double stutter_num_bursts;
+ double stutter_period;
+
+ struct {
+ double stutter_efficiency;
+ double stutter_num_bursts;
+ double stutter_period;
+ } bestcase;
+ } z8;
+ } power_management;
+
+ struct {
+ double min_ttu_vblank_us[DML2_MAX_PLANES];
+ bool vready_at_or_after_vsync[DML2_MAX_PLANES];
+ double min_dst_y_next_start[DML2_MAX_PLANES];
+ bool cstate_max_cap_mode;
+ bool hw_debug5;
+ unsigned int dcfclk_deep_sleep_hysteresis;
+ unsigned int dst_x_after_scaler[DML2_MAX_PLANES];
+ unsigned int dst_y_after_scaler[DML2_MAX_PLANES];
+ unsigned int prefetch_source_lines_plane0[DML2_MAX_PLANES];
+ unsigned int prefetch_source_lines_plane1[DML2_MAX_PLANES];
+ bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES];
+ bool UsesMALLForStaticScreen[DML2_MAX_PLANES];
+ unsigned int CursorDstXOffset[DML2_MAX_PLANES];
+ unsigned int CursorDstYOffset[DML2_MAX_PLANES];
+ unsigned int CursorChunkHDLAdjust[DML2_MAX_PLANES];
+ unsigned int dpte_group_bytes[DML2_MAX_PLANES];
+ unsigned int vm_group_bytes[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeLuma[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeChroma[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[DML2_MAX_PLANES];
+ double TimePerVMGroupVBlank[DML2_MAX_PLANES];
+ double TimePerVMGroupFlip[DML2_MAX_PLANES];
+ double TimePerVMRequestVBlank[DML2_MAX_PLANES];
+ double TimePerVMRequestFlip[DML2_MAX_PLANES];
+ double Tdmdl_vm[DML2_MAX_PLANES];
+ double Tdmdl[DML2_MAX_PLANES];
+ unsigned int VStartup[DML2_MAX_PLANES];
+ unsigned int VUpdateOffsetPix[DML2_MAX_PLANES];
+ unsigned int VUpdateWidthPix[DML2_MAX_PLANES];
+ unsigned int VReadyOffsetPix[DML2_MAX_PLANES];
+
+ double DST_Y_PER_PTE_ROW_NOM_L[DML2_MAX_PLANES];
+ double DST_Y_PER_PTE_ROW_NOM_C[DML2_MAX_PLANES];
+ double time_per_pte_group_nom_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_nom_chroma[DML2_MAX_PLANES];
+ double time_per_pte_group_vblank_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_vblank_chroma[DML2_MAX_PLANES];
+ double time_per_pte_group_flip_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_flip_chroma[DML2_MAX_PLANES];
+ double VRatioPrefetchY[DML2_MAX_PLANES];
+ double VRatioPrefetchC[DML2_MAX_PLANES];
+ double DestinationLinesForPrefetch[DML2_MAX_PLANES];
+ double DestinationLinesToRequestVMInVBlank[DML2_MAX_PLANES];
+ double DestinationLinesToRequestRowInVBlank[DML2_MAX_PLANES];
+ double DestinationLinesToRequestVMInImmediateFlip[DML2_MAX_PLANES];
+ double DestinationLinesToRequestRowInImmediateFlip[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeLuma[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeChroma[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES];
+
+ double WritebackRequiredBandwidth;
+ double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES];
+ double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES];
+ double DSCCLK_calculated[DML2_MAX_PLANES];
+ unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES];
+ bool PTE_BUFFER_MODE[DML2_MAX_PLANES];
+ double DSCDelay[DML2_MAX_PLANES];
+ double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES];
+ unsigned int PrefetchMode[DML2_MAX_PLANES]; // LEGACY_ONLY
+ bool ROBUrgencyAvoidance;
+ double LowestPrefetchMargin;
+ } misc;
+
+ struct dml2_mode_support_info mode_support_info;
+ unsigned int voltage_level; // LEGACY_ONLY
+
+ // For DV only
+ // This is what dml core calculated, only on the full_vp width and assume we have
+ // unlimited # of mcache
+ struct dml2_mcache_surface_allocation non_optimized_mcache_allocation[DML2_MAX_PLANES];
+
+ bool failed_prefetch;
+ bool failed_uclk_pstate;
+ bool failed_mcache_validation;
+ bool failed_dpmm;
+ bool failed_mode_programming;
+ bool failed_mode_programming_dcfclk;
+ bool failed_mode_programming_prefetch;
+ bool failed_mode_programming_flip;
+ bool failed_map_watermarks;
+ } informative;
+};
+
+struct dml2_build_mode_programming_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+ const struct dml2_display_cfg *display_config;
+
+ /*
+ * Outputs
+ */
+ struct dml2_display_cfg_programming *programming;
+};
+
+struct dml2_build_mcache_programming_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+
+ struct dml2_plane_mcache_configuration_descriptor mcache_configurations[DML2_MAX_PLANES];
+ char num_configurations;
+
+ /*
+ * Outputs
+ */
+ // per_plane_pipe_mcache_regs[i][j] refers to the proper programming for the j-th pipe of the
+ // i-th plane (from mcache_configurations)
+ struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES];
+
+ // It's not a good idea to reference this directly, better to use the pointer structure above instead
+ struct dml2_hubp_pipe_mcache_regs mcache_regs_set[DML2_MAX_DCN_PIPES];
+};
+
+struct dml2_unit_test_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+};
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
new file mode 100644
index 000000000000..6ee37386f672
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_internal_shared_types.h"
+#include "dml2_core_shared_types.h"
+#include "dml2_core_dcn4.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_debug.h"
+#include "lib_float_math.h"
+
+struct dml2_core_ip_params core_dcn4_ip_caps_base = {
+ // Hardcoded values for DCN3x
+ .vblank_nom_default_us = 668,
+ .remote_iommu_outstanding_translations = 256,
+ .rob_buffer_size_kbytes = 128,
+ .config_return_buffer_size_in_kbytes = 1280,
+ .config_return_buffer_segment_size_in_kbytes = 64,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .dpte_buffer_size_in_pte_reqs_luma = 68,
+ .dpte_buffer_size_in_pte_reqs_chroma = 36,
+ .pixel_chunk_size_kbytes = 8,
+ .alpha_pixel_chunk_size_kbytes = 4,
+ .min_pixel_chunk_size_bytes = 1024,
+ .writeback_chunk_size_kbytes = 8,
+ .line_buffer_size_bits = 1171920,
+ .max_line_buffer_lines = 32,
+ .writeback_interface_buffer_size_kbytes = 90,
+ //Number of pipes after DCN Pipe harvesting
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .dppclk_delay_subtotal = 47,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 28,
+ .dppclk_delay_cnvc_cursor = 6,
+ .cursor_buffer_size = 24,
+ .cursor_chunk_size = 2,
+ .dispclk_delay_subtotal = 125,
+ .max_inter_dcn_tile_repeaters = 8,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .writeback_line_buffer_buffer_size = 0,
+ .num_dsc = 4,
+ .maximum_dsc_bits_per_component = 12,
+ .maximum_pixels_per_line_per_dsc_unit = 5760,
+ .dsc422_native_support = true,
+ .dcc_supported = true,
+ .ptoi_supported = false,
+
+ .cursor_64bpp_support = true,
+ .dynamic_metadata_vm_enabled = false,
+
+ .max_num_dp2p0_outputs = 4,
+ .max_num_dp2p0_streams = 4,
+ .imall_supported = 1,
+ .max_flip_time_us = 80,
+ .max_flip_time_lines = 32,
+ .words_per_channel = 16,
+
+ .subvp_fw_processing_delay_us = 15,
+ .subvp_pstate_allow_width_us = 20,
+ .subvp_swath_height_margin_lines = 16,
+};
+
+static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params)
+{
+ ip_caps->pipe_count = ip_params->max_num_dpp;
+ ip_caps->otg_count = ip_params->max_num_otg;
+ ip_caps->num_dsc = ip_params->num_dsc;
+ ip_caps->max_num_dp2p0_streams = ip_params->max_num_dp2p0_streams;
+ ip_caps->max_num_dp2p0_outputs = ip_params->max_num_dp2p0_outputs;
+ ip_caps->max_num_hdmi_frl_outputs = ip_params->max_num_hdmi_frl_outputs;
+ ip_caps->rob_buffer_size_kbytes = ip_params->rob_buffer_size_kbytes;
+ ip_caps->config_return_buffer_size_in_kbytes = ip_params->config_return_buffer_size_in_kbytes;
+ ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes;
+ ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries;
+ ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes;
+ ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size;
+ ip_caps->max_flip_time_us = ip_params->max_flip_time_us;
+ ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines;
+ ip_caps->hostvm_mode = ip_params->hostvm_mode;
+
+ // FIXME_STAGE2: cleanup after adding all dv override to ip_caps
+ ip_caps->subvp_drr_scheduling_margin_us = 100;
+ ip_caps->subvp_prefetch_end_to_mall_start_us = 15;
+ ip_caps->subvp_fw_processing_delay = 16;
+
+}
+
+static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, const struct dml2_ip_capabilities *ip_caps)
+{
+ ip_params->max_num_dpp = ip_caps->pipe_count;
+ ip_params->max_num_otg = ip_caps->otg_count;
+ ip_params->num_dsc = ip_caps->num_dsc;
+ ip_params->max_num_dp2p0_streams = ip_caps->max_num_dp2p0_streams;
+ ip_params->max_num_dp2p0_outputs = ip_caps->max_num_dp2p0_outputs;
+ ip_params->max_num_hdmi_frl_outputs = ip_caps->max_num_hdmi_frl_outputs;
+ ip_params->rob_buffer_size_kbytes = ip_caps->rob_buffer_size_kbytes;
+ ip_params->config_return_buffer_size_in_kbytes = ip_caps->config_return_buffer_size_in_kbytes;
+ ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes;
+ ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries;
+ ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes;
+ ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size;
+ ip_params->max_flip_time_us = ip_caps->max_flip_time_us;
+ ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines;
+ ip_params->hostvm_mode = ip_caps->hostvm_mode;
+}
+
+bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out)
+{
+ struct dml2_core_instance *core = in_out->instance;
+
+ if (!in_out->minimum_clock_table)
+ return false;
+ else
+ core->minimum_clock_table = in_out->minimum_clock_table;
+
+ if (in_out->explicit_ip_bb && in_out->explicit_ip_bb_size > 0) {
+ memcpy(&core->clean_me_up.mode_lib.ip, in_out->explicit_ip_bb, in_out->explicit_ip_bb_size);
+
+ // FIXME_STAGE2:
+ // DV still uses stage1 ip_param_st for each variant, need to patch the ip_caps with ip_param info
+ // Should move DV to use ip_caps but need move more overrides to ip_caps
+ patch_ip_caps_with_explicit_ip_params(in_out->ip_caps, in_out->explicit_ip_bb);
+ core->clean_me_up.mode_lib.ip.subvp_pstate_allow_width_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us;
+ core->clean_me_up.mode_lib.ip.subvp_fw_processing_delay_us = core_dcn4_ip_caps_base.subvp_pstate_allow_width_us;
+ core->clean_me_up.mode_lib.ip.subvp_swath_height_margin_lines = core_dcn4_ip_caps_base.subvp_swath_height_margin_lines;
+ } else {
+ memcpy(&core->clean_me_up.mode_lib.ip, &core_dcn4_ip_caps_base, sizeof(struct dml2_core_ip_params));
+ patch_ip_params_with_ip_caps(&core->clean_me_up.mode_lib.ip, in_out->ip_caps);
+ core->clean_me_up.mode_lib.ip.imall_supported = false;
+ }
+
+ memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb));
+ memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
+
+ return true;
+}
+
+static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main,
+ const struct dml2_implicit_svp_meta *meta)
+{
+ memcpy(phantom, main, sizeof(struct dml2_stream_parameters));
+
+ phantom->timing.v_total = meta->v_total;
+ phantom->timing.v_active = meta->v_active;
+ phantom->timing.v_front_porch = meta->v_front_porch;
+ phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active;
+ phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active;
+ phantom->timing.drr_config.enabled = false;
+}
+
+static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main,
+ const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream)
+{
+ memcpy(phantom, main, sizeof(struct dml2_plane_parameters));
+
+ phantom->stream_index = phantom_stream_index;
+ phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable;
+ phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return;
+ phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2(
+ (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0),
+ (double)main->composition.viewport.plane0.height);
+ phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2(
+ (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0),
+ (double)main->composition.viewport.plane1.height);
+ phantom->immediate_flip = false;
+ phantom->dynamic_meta_data.enable = false;
+ phantom->cursor.num_cursors = 0;
+ phantom->cursor.cursor_width = 0;
+ phantom->tdlut.setup_for_tdlut = false;
+}
+
+static void expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg,
+ struct dml2_core_scratch *scratch)
+{
+ unsigned int stream_index, plane_index;
+ const struct dml2_plane_parameters *main_plane;
+ const struct dml2_stream_parameters *main_stream;
+ const struct dml2_stream_parameters *phantom_stream;
+
+ memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg));
+ memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES);
+ memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES);
+ memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES);
+
+ if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo)
+ return;
+
+ /* disable unbounded requesting for all planes until stage 3 has been performed */
+ if (!display_cfg->stage3.performed) {
+ svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true;
+ svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false;
+ }
+ // Create the phantom streams
+ for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) {
+ main_stream = &display_cfg->display_config.stream_descriptors[stream_index];
+ scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index;
+ scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index;
+
+ if (display_cfg->stage3.stream_svp_meta[stream_index].valid) {
+ // Create the phantom stream
+ create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams],
+ main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]);
+
+ // Associate this phantom stream to the main stream
+ scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index;
+ scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams;
+
+ // Increment num streams
+ svp_expanded_display_cfg->num_streams++;
+ }
+ }
+
+ // Create the phantom planes
+ for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) {
+ main_plane = &display_cfg->display_config.plane_descriptors[plane_index];
+
+ if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) {
+ main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index];
+ phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]];
+ create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes],
+ main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream);
+
+ // Associate this phantom plane to the main plane
+ scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index;
+ scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes;
+
+ // Increment num planes
+ svp_expanded_display_cfg->num_planes++;
+
+ // Adjust the main plane settings
+ svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe;
+ }
+ }
+}
+
+static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_instance *core, const struct display_configuation_with_meta *display_cfg,
+ const struct dml2_display_cfg *svp_expanded_display_cfg, struct dml2_display_cfg_programming *programming, struct dml2_core_scratch *scratch)
+{
+ unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index, mcache_index;
+ unsigned int total_main_mcaches_required = 0;
+ int total_pipe_regs_copied = 0;
+ int dml_internal_pipe_index = 0;
+ const struct dml2_plane_parameters *main_plane;
+ const struct dml2_plane_parameters *phantom_plane;
+ const struct dml2_stream_parameters *main_stream;
+ const struct dml2_stream_parameters *phantom_stream;
+
+ // Copy the unexpanded display config to output
+ memcpy(&programming->display_config, &display_cfg->display_config, sizeof(struct dml2_display_cfg));
+
+ // Set the global register values
+ dml2_core_calcs_get_arb_params(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.arb_regs);
+ // Get watermarks uses display config for ref clock override, so it doesn't matter whether we pass the pre or post expansion
+ // display config
+ dml2_core_calcs_get_watermarks(&display_cfg->display_config, &core->clean_me_up.mode_lib, &programming->global_regs.wm_regs[0]);
+
+ // Check if FAMS2 is required
+ if (display_cfg->stage3.performed && display_cfg->stage3.success) {
+ programming->fams2_required = display_cfg->stage3.fams2_required;
+
+ dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config);
+ }
+
+ // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream)
+ for (stream_index = 0; stream_index < programming->display_config.num_streams; stream_index++) {
+ main_stream = &svp_expanded_display_cfg->stream_descriptors[stream_index];
+ phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[stream_index]];
+
+ // Set the descriptor
+ programming->stream_programming[stream_index].stream_descriptor = &programming->display_config.stream_descriptors[stream_index];
+
+ // Set the odm combine factor
+ programming->stream_programming[stream_index].num_odms_required = display_cfg->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used;
+
+ // Check if the stream has implicit SVP enabled
+ if (main_stream != phantom_stream) {
+ // If so, copy the phantom stream descriptor
+ programming->stream_programming[stream_index].phantom_stream.enabled = true;
+ memcpy(&programming->stream_programming[stream_index].phantom_stream.descriptor, phantom_stream, sizeof(struct dml2_stream_parameters));
+ } else {
+ programming->stream_programming[stream_index].phantom_stream.enabled = false;
+ }
+
+ // Due to the way DML indexes data internally, it's easier to populate the rest of the display
+ // stream programming in the next stage
+ }
+
+ dml_internal_pipe_index = 0;
+ total_pipe_regs_copied = 0;
+ stream_already_populated_mask = 0x0;
+
+ // Loop over all main planes
+ for (plane_index = 0; plane_index < programming->display_config.num_planes; plane_index++) {
+ main_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index];
+
+ // Set the descriptor
+ programming->plane_programming[plane_index].plane_descriptor = &programming->display_config.plane_descriptors[plane_index];
+
+ // Set the mpc combine factor
+ programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index];
+
+ // Setup the appropriate p-state strategy
+ if (display_cfg->stage3.performed && display_cfg->stage3.success) {
+ programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index];
+ } else {
+ programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na;
+ }
+
+ dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);
+
+ memcpy(&programming->plane_programming[plane_index].mcache_allocation,
+ &display_cfg->stage2.mcache_allocations[plane_index],
+ sizeof(struct dml2_mcache_surface_allocation));
+ total_main_mcaches_required += programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane0 +
+ programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 -
+ (programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0);
+
+ for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) {
+ // Assign storage for this pipe's register values
+ programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied];
+ memset(programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set));
+ total_pipe_regs_copied++;
+
+ // Populate the main plane regs
+ dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index);
+
+ // Multiple planes can refer to the same stream index, so it's only necessary to populate it once
+ if (!(stream_already_populated_mask & (0x1 << main_plane->stream_index))) {
+ dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index], dml_internal_pipe_index);
+
+ programming->stream_programming[main_plane->stream_index].uclk_pstate_method = programming->plane_programming[plane_index].uclk_pstate_support_method;
+
+ /* unconditionally populate fams2 params */
+ dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib,
+ display_cfg,
+ &programming->stream_programming[main_plane->stream_index].fams2_base_params,
+ &programming->stream_programming[main_plane->stream_index].fams2_sub_params,
+ programming->stream_programming[main_plane->stream_index].uclk_pstate_method,
+ plane_index);
+
+ stream_already_populated_mask |= (0x1 << main_plane->stream_index);
+ }
+ dml_internal_pipe_index++;
+ }
+ }
+
+ for (plane_index = programming->display_config.num_planes; plane_index < svp_expanded_display_cfg->num_planes; plane_index++) {
+ phantom_plane = &svp_expanded_display_cfg->plane_descriptors[plane_index];
+ main_plane_index = scratch->phantom_plane_index_to_main_plane_index[plane_index];
+ main_plane = &svp_expanded_display_cfg->plane_descriptors[main_plane_index];
+
+ programming->plane_programming[main_plane_index].phantom_plane.valid = true;
+ memcpy(&programming->plane_programming[main_plane_index].phantom_plane.descriptor, phantom_plane, sizeof(struct dml2_plane_parameters));
+
+ dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[main_plane_index].svp_size_mall_bytes, dml_internal_pipe_index);
+
+ /* generate mcache allocation, phantoms use identical mcache configuration, but in the MALL set and unique mcache ID's beginning after all main ID's */
+ memcpy(&programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation,
+ &programming->plane_programming[main_plane_index].mcache_allocation,
+ sizeof(struct dml2_mcache_surface_allocation));
+ for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane0; mcache_index++) {
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index] += total_main_mcaches_required;
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane0[mcache_index] =
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index];
+ }
+ for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane1; mcache_index++) {
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index] += total_main_mcaches_required;
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane1[mcache_index] =
+ programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index];
+ }
+
+ for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) {
+ // Assign storage for this pipe's register values
+ programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied];
+ memset(programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set));
+ total_pipe_regs_copied++;
+
+ // Populate the phantom plane regs
+ dml2_core_calcs_get_pipe_regs(svp_expanded_display_cfg, &core->clean_me_up.mode_lib, programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset], dml_internal_pipe_index);
+ // Populate the phantom stream specific programming
+ if (!(stream_already_populated_mask & (0x1 << phantom_plane->stream_index))) {
+ dml2_core_calcs_get_global_sync_programming(&core->clean_me_up.mode_lib, &programming->stream_programming[main_plane->stream_index].phantom_stream.global_sync, dml_internal_pipe_index);
+
+ stream_already_populated_mask |= (0x1 << phantom_plane->stream_index);
+ }
+
+ dml_internal_pipe_index++;
+ }
+ }
+}
+
+bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out)
+{
+ struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance;
+ struct dml2_core_mode_support_locals *l = &core->scratch.mode_support_locals;
+
+ bool result;
+ unsigned int i, stream_index, stream_bitmask;
+ int unsigned odm_count, num_odm_output_segments, dpp_count;
+
+ expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch);
+
+ l->mode_support_ex_params.mode_lib = &core->clean_me_up.mode_lib;
+ l->mode_support_ex_params.in_display_cfg = &l->svp_expanded_display_cfg;
+ l->mode_support_ex_params.min_clk_table = in_out->min_clk_table;
+ l->mode_support_ex_params.min_clk_index = in_out->min_clk_index;
+ l->mode_support_ex_params.out_evaluation_info = &in_out->mode_support_result.cfg_support_info.clean_me_up.support_info;
+
+ result = dml2_core_calcs_mode_support_ex(&l->mode_support_ex_params);
+
+ in_out->mode_support_result.cfg_support_info.is_supported = result;
+
+ if (result) {
+ in_out->mode_support_result.global.dispclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDISPCLK * 1000);
+ in_out->mode_support_result.global.dcfclk_deepsleep_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.dcfclk_deepsleep * 1000);
+ in_out->mode_support_result.global.socclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.SOCCLK * 1000);
+
+ in_out->mode_support_result.global.fclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_fclk_change_supported;
+ in_out->mode_support_result.global.uclk_pstate_supported = l->mode_support_ex_params.out_evaluation_info->global_dram_clock_change_supported;
+
+ in_out->mode_support_result.global.active.fclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.FabricClock * 1000);
+ in_out->mode_support_result.global.active.dcfclk_khz = (unsigned long)(core->clean_me_up.mode_lib.ms.DCFCLK * 1000);
+
+
+ in_out->mode_support_result.global.svp_prefetch.fclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.FabricClock * 1000;
+ in_out->mode_support_result.global.svp_prefetch.dcfclk_khz = (unsigned long)core->clean_me_up.mode_lib.ms.DCFCLK * 1000;
+
+ in_out->mode_support_result.global.active.average_bw_sdp_kbps = 0;
+ in_out->mode_support_result.global.active.urgent_bw_dram_kbps = 0;
+ in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = 0;
+ in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = 0;
+
+ in_out->mode_support_result.global.active.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0);
+ in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.average_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] * 1000), 1.0);
+
+ in_out->mode_support_result.global.active.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0);
+ in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0);
+ in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps);
+ DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps);
+
+ for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) {
+ in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000);
+ }
+
+ stream_bitmask = 0;
+ for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) {
+ odm_count = 1;
+ dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i];
+ num_odm_output_segments = 1;
+
+ switch (l->mode_support_ex_params.out_evaluation_info->ODMMode[i]) {
+ case dml2_odm_mode_bypass:
+ odm_count = 1;
+ dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i];
+ break;
+ case dml2_odm_mode_combine_2to1:
+ odm_count = 2;
+ dpp_count = 2;
+ break;
+ case dml2_odm_mode_combine_3to1:
+ odm_count = 3;
+ dpp_count = 3;
+ break;
+ case dml2_odm_mode_combine_4to1:
+ odm_count = 4;
+ dpp_count = 4;
+ break;
+ case dml2_odm_mode_split_1to2:
+ case dml2_odm_mode_mso_1to2:
+ num_odm_output_segments = 2;
+ break;
+ case dml2_odm_mode_mso_1to4:
+ num_odm_output_segments = 4;
+ break;
+ case dml2_odm_mode_auto:
+ default:
+ odm_count = 1;
+ dpp_count = l->mode_support_ex_params.out_evaluation_info->DPPPerSurface[i];
+ break;
+ }
+
+ in_out->mode_support_result.cfg_support_info.plane_support_info[i].dpps_used = dpp_count;
+
+ dml2_core_calcs_get_plane_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.plane_support_info[i], i);
+
+ stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index;
+
+ in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000;
+ DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz);
+
+ if (!((stream_bitmask >> stream_index) & 0x1)) {
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count;
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_odm_output_segments = num_odm_output_segments;
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].dsc_enable = l->mode_support_ex_params.out_evaluation_info->DSCEnabled[i];
+ in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].num_dsc_slices = l->mode_support_ex_params.out_evaluation_info->NumberOfDSCSlices[i];
+ dml2_core_calcs_get_stream_support_info(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index], i);
+ in_out->mode_support_result.per_stream[stream_index].dtbclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDTBCLK[i] * 1000);
+ stream_bitmask |= 0x1 << stream_index;
+ }
+ }
+ }
+
+ return result;
+}
+
+static int lookup_uclk_dpm_index_by_freq(unsigned long uclk_freq_khz, struct dml2_soc_bb *soc_bb)
+{
+ int i;
+
+ for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) {
+ if (uclk_freq_khz == soc_bb->clk_table.uclk.clk_values_khz[i])
+ return i;
+ }
+ return 0;
+}
+
+bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out)
+{
+ struct dml2_core_instance *core = (struct dml2_core_instance *)in_out->instance;
+ struct dml2_core_mode_programming_locals *l = &core->scratch.mode_programming_locals;
+
+ bool result = false;
+ unsigned int pipe_offset;
+ int dml_internal_pipe_index;
+ int total_pipe_regs_copied = 0;
+ int stream_already_populated_mask = 0;
+
+ int main_stream_index;
+ unsigned int plane_index;
+
+ expand_implict_subvp(in_out->display_cfg, &l->svp_expanded_display_cfg, &core->scratch);
+
+ l->mode_programming_ex_params.mode_lib = &core->clean_me_up.mode_lib;
+ l->mode_programming_ex_params.in_display_cfg = &l->svp_expanded_display_cfg;
+ l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table;
+ l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info;
+ l->mode_programming_ex_params.programming = in_out->programming;
+ l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz,
+ &core->clean_me_up.mode_lib.soc);
+
+ result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params);
+
+ if (result) {
+ // If the input display configuration contains implict SVP, we need to use a special packer
+ if (in_out->display_cfg->display_config.overrides.enable_subvp_implicit_pmo) {
+ pack_mode_programming_params_with_implicit_subvp(core, in_out->display_cfg, &l->svp_expanded_display_cfg, in_out->programming, &core->scratch);
+ } else {
+ memcpy(&in_out->programming->display_config, in_out->display_cfg, sizeof(struct dml2_display_cfg));
+
+ dml2_core_calcs_get_arb_params(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.arb_regs);
+ dml2_core_calcs_get_watermarks(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, &in_out->programming->global_regs.wm_regs[0]);
+
+ dml_internal_pipe_index = 0;
+
+ for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) {
+ in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index];
+
+ if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
+ else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
+ else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
+ else {
+ if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive;
+ else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us)
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank;
+ else
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na;
+ }
+
+ dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);
+
+ memcpy(&in_out->programming->plane_programming[plane_index].mcache_allocation,
+ &in_out->display_cfg->stage2.mcache_allocations[plane_index],
+ sizeof(struct dml2_mcache_surface_allocation));
+
+ for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) {
+ in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index];
+
+ // Assign storage for this pipe's register values
+ in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &in_out->programming->pipe_regs[total_pipe_regs_copied];
+ memset(in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], 0, sizeof(struct dml2_dchub_per_pipe_register_set));
+ total_pipe_regs_copied++;
+
+ // Populate
+ dml2_core_calcs_get_pipe_regs(&l->svp_expanded_display_cfg, &core->clean_me_up.mode_lib, in_out->programming->plane_programming[plane_index].pipe_regs[pipe_offset], dml_internal_pipe_index);
+
+ main_stream_index = in_out->programming->display_config.plane_descriptors[plane_index].stream_index;
+
+ // Multiple planes can refer to the same stream index, so it's only necessary to populate it once
+ if (!(stream_already_populated_mask & (0x1 << main_stream_index))) {
+ in_out->programming->stream_programming[main_stream_index].stream_descriptor = &in_out->programming->display_config.stream_descriptors[main_stream_index];
+ in_out->programming->stream_programming[main_stream_index].num_odms_required = in_out->cfg_support_info->stream_support_info[main_stream_index].odms_used;
+ dml2_core_calcs_get_stream_programming(&core->clean_me_up.mode_lib, &in_out->programming->stream_programming[main_stream_index], dml_internal_pipe_index);
+
+ stream_already_populated_mask |= (0x1 << main_stream_index);
+ }
+ dml_internal_pipe_index++;
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out)
+{
+ struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->instance->clean_me_up.mode_lib;
+
+ if (in_out->mode_is_supported)
+ in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_programming_locals.mode_programming_ex_params.min_clk_index;
+ else
+ in_out->programming->informative.voltage_level = in_out->instance->scratch.mode_support_locals.mode_support_ex_params.min_clk_index;
+
+ dml2_core_calcs_get_informative(mode_lib, in_out->programming);
+ return true;
+}
+
+bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out)
+{
+ memset(in_out->mcache_allocation, 0, sizeof(struct dml2_mcache_surface_allocation));
+
+ dml2_core_calcs_get_mcache_allocation(&in_out->instance->clean_me_up.mode_lib, in_out->mcache_allocation, in_out->plane_index);
+
+ if (in_out->mcache_allocation->num_mcaches_plane0 > 0)
+ in_out->mcache_allocation->mcache_x_offsets_plane0[in_out->mcache_allocation->num_mcaches_plane0 - 1] = in_out->plane_descriptor->surface.plane0.width;
+
+ if (in_out->mcache_allocation->num_mcaches_plane1 > 0)
+ in_out->mcache_allocation->mcache_x_offsets_plane1[in_out->mcache_allocation->num_mcaches_plane1 - 1] = in_out->plane_descriptor->surface.plane1.width;
+
+ in_out->mcache_allocation->requires_dedicated_mall_mcache = false;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h
new file mode 100644
index 000000000000..a68bb001a346
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_CORE_DCN4_H__
+#define __DML2_CORE_DCN4_H__
+bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out);
+bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out);
+bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out);
+bool core_dcn4_populate_informative(struct dml2_core_populate_informative_in_out *in_out);
+bool core_dcn4_calculate_mcache_allocation(struct dml2_calculate_mcache_allocation_in_out *in_out);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
new file mode 100644
index 000000000000..bf62d42b3f78
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@@ -0,0 +1,13315 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#include "dml2_internal_shared_types.h"
+#include "dml2_core_dcn4_calcs.h"
+#include "dml2_debug.h"
+#include "lib_float_math.h"
+#include "dml_top_types.h"
+
+#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
+#define DML_MAX_NUM_OF_SLICES_PER_DSC 4
+#define DML_MAX_COMPRESSION_RATIO 4
+//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW
+//#define DML_GLOBAL_PREFETCH_CHECK
+#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
+#define DML_MAX_VSTARTUP_START 1023
+
+const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
+{
+ switch (bw_type) {
+ case (dml2_core_internal_bw_sdp):
+ return("dml2_core_internal_bw_sdp");
+ case (dml2_core_internal_bw_dram):
+ return("dml2_core_internal_bw_dram");
+ case (dml2_core_internal_bw_max):
+ return("dml2_core_internal_bw_max");
+ default:
+ return("dml2_core_internal_bw_unknown");
+ }
+}
+
+const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
+{
+ switch (dml2_core_internal_soc_state_type) {
+ case (dml2_core_internal_soc_state_sys_idle):
+ return("dml2_core_internal_soc_state_sys_idle");
+ case (dml2_core_internal_soc_state_sys_active):
+ return("dml2_core_internal_soc_state_sys_active");
+ case (dml2_core_internal_soc_state_svp_prefetch):
+ return("dml2_core_internal_soc_state_svp_prefetch");
+ case dml2_core_internal_soc_state_max:
+ default:
+ return("dml2_core_internal_soc_state_unknown");
+ }
+}
+
+static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
+{
+ *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
+ return dividend / divisor;
+}
+
+static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
+{
+ DML_LOG_VERBOSE("DML: ===================================== \n");
+ DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n");
+ if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
+ if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
+ DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
+ if (!fail_only || support->ViewportSizeSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
+ if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
+ DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
+ if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
+ DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
+ if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
+ DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
+ if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
+ DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
+ if (!fail_only || support->ExceededMultistreamSlots == 1)
+ DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
+ if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
+ DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
+ if (!fail_only || support->NotEnoughLanesForMSO == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
+ if (!fail_only || support->P2IWith420 == 1)
+ DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420);
+ if (!fail_only || support->DSC422NativeNotSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
+ if (!fail_only || support->DSCSlicesODMModeSupported == 0)
+ DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
+ if (!fail_only || support->NotEnoughDSCUnits == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
+ if (!fail_only || support->NotEnoughDSCSlices == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
+ if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
+ DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
+ DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
+ if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
+ if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
+ DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
+ if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
+ DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
+ if (!fail_only || support->ROBSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport);
+ if (!fail_only || support->OutstandingRequestsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
+ if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
+ DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
+ if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
+ DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
+ if (!fail_only || support->TotalAvailablePipesSupport == 0)
+ DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
+ if (!fail_only || support->NumberOfOTGSupport == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
+ if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
+ if (!fail_only || support->NumberOfDP2p0Support == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
+ if (!fail_only || support->EnoughWritebackUnits == 0)
+ DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
+ if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
+ if (!fail_only || support->WritebackLatencySupport == 0)
+ DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
+ if (!fail_only || support->CursorSupport == 0)
+ DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport);
+ if (!fail_only || support->PitchSupport == 0)
+ DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport);
+ if (!fail_only || support->ViewportExceedsSurface == 1)
+ DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
+ if (!fail_only || support->PrefetchSupported == 0)
+ DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
+ if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
+ DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
+ if (!fail_only || support->AvgBandwidthSupport == 0)
+ DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
+ if (!fail_only || support->DynamicMetadataSupported == 0)
+ DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
+ if (!fail_only || support->VRatioInPrefetchSupported == 0)
+ DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
+ if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
+ DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
+ if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
+ DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
+ if (!fail_only || support->ExceededMALLSize == 1)
+ DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
+ if (!fail_only || support->g6_temp_read_support == 0)
+ DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
+ if (!fail_only || support->ImmediateFlipSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
+ if (!fail_only || support->LinkCapacitySupport == 0)
+ DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
+
+ if (!fail_only || support->ModeSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport);
+ DML_LOG_VERBOSE("DML: ===================================== \n");
+}
+
+static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
+{
+ for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
+ double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
+ switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
+ case dml2_444:
+ out_bpp[k] = bpc * 3;
+ break;
+ case dml2_s422:
+ out_bpp[k] = bpc * 2;
+ break;
+ case dml2_n422:
+ out_bpp[k] = bpc * 2;
+ break;
+ case dml2_420:
+ default:
+ out_bpp[k] = bpc * 1.5;
+ break;
+ }
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
+ out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
+ } else {
+ out_bpp[k] = 0;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
+ DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
+ DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
+ }
+}
+
+static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
+{
+ unsigned int remainder;
+
+ if (multiple == 0)
+ return num;
+
+ remainder = num % multiple;
+ if (remainder == 0)
+ return num;
+
+ if (up)
+ return (num + multiple - remainder);
+ else
+ return (num - remainder);
+}
+
+static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
+{
+ unsigned int num_active_pipes = 0;
+
+ for (unsigned int k = 0; k < num_planes; k++) {
+ num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
+ return num_active_pipes;
+}
+
+static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
+{
+ unsigned int pipe_idx = 0;
+
+ for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
+ pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
+ }
+
+ for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
+ for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
+ pipe_plane[pipe_idx] = plane_idx;
+ pipe_idx++;
+ }
+ }
+}
+
+static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
+{
+ bool is_phantom = false;
+
+ if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
+ plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
+ is_phantom = true;
+ }
+
+ return is_phantom;
+}
+
+static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
+{
+ unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
+
+ bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
+ DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
+ return is_phantom;
+}
+
+#define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \
+{ \
+unsigned int plane_idx; \
+plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \
+return (type) interval_var[plane_idx]; \
+}
+
+dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes);
+dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes);
+dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY);
+dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC);
+dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear);
+dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma);
+
+dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup);
+dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix);
+dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix);
+dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix);
+dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines);
+dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY);
+dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC);
+dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte);
+dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL);
+
+#define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \
+{ \
+return (type) interval_var[plane_idx]; \
+}
+
+dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l);
+dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l);
+dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l);
+dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c);
+dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c);
+dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c);
+dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l);
+dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c);
+dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache);
+dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL);
+dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines);
+
+#define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \
+{ \
+return (type) interval_var[plane_idx][array_idx]; \
+}
+
+dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l);
+dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c);
+
+#define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \
+{ \
+return (type) internal_var; \
+}
+
+dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark);
+dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark);
+dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark);
+dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
+dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency);
+dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
+
+dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
+dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
+dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us);
+dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
+dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
+dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
+dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
+dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
+dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
+dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
+dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
+dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame);
+dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency);
+dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
+dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod);
+dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase);
+dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
+dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase);
+dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
+dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0);
+
+dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
+
+dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
+
+dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
+dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
+dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us);
+dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
+dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
+
+dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+
+dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte);
+
+dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled);
+dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark);
+dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b);
+dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5);
+dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
+
+static void CalculateMaxDETAndMinCompressedBufferSize(
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int ConfigReturnBufferSegmentSizeInKByte,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int MaxNumDPP,
+ unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
+ unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
+ bool is_mrq_present,
+
+ // Output
+ unsigned int *MaxTotalDETInKByte,
+ unsigned int *nomDETInKByte,
+ unsigned int *MinCompressedBufferSizeInKByte)
+{
+ if (is_mrq_present)
+ *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64);
+ else
+ *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
+
+ *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
+ *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
+
+ DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
+ DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
+ DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
+
+ if (nomDETInKByteOverrideEnable) {
+ *nomDETInKByte = nomDETInKByteOverrideValue;
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
+ }
+}
+
+static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
+{
+ //unsigned int num_active_planes = display_cfg->num_planes;
+
+ //Progressive To Interlace Unit Effect
+ for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
+ PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
+ // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
+ //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
+ }
+ }
+}
+
+static bool dml_is_420(enum dml2_source_format_class source_format)
+{
+ bool val = false;
+
+ switch (source_format) {
+ case dml2_444_8:
+ val = 0;
+ break;
+ case dml2_444_16:
+ val = 0;
+ break;
+ case dml2_444_32:
+ val = 0;
+ break;
+ case dml2_444_64:
+ val = 0;
+ break;
+ case dml2_420_8:
+ val = 1;
+ break;
+ case dml2_420_10:
+ val = 1;
+ break;
+ case dml2_420_12:
+ val = 1;
+ break;
+ case dml2_422_planar_8:
+ val = 0;
+ break;
+ case dml2_422_planar_10:
+ val = 0;
+ break;
+ case dml2_422_planar_12:
+ val = 0;
+ break;
+ case dml2_422_packed_8:
+ val = 0;
+ break;
+ case dml2_422_packed_10:
+ val = 0;
+ break;
+ case dml2_422_packed_12:
+ val = 0;
+ break;
+ case dml2_rgbe_alpha:
+ val = 0;
+ break;
+ case dml2_rgbe:
+ val = 0;
+ break;
+ case dml2_mono_8:
+ val = 0;
+ break;
+ case dml2_mono_16:
+ val = 0;
+ break;
+ default:
+ DML_ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
+{
+ if (sw_mode == dml2_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_sw_256b_2d)
+ return 256;
+ else if (sw_mode == dml2_sw_4kb_2d)
+ return 4096;
+ else if (sw_mode == dml2_sw_64kb_2d)
+ return 65536;
+ else if (sw_mode == dml2_sw_256kb_2d)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
+ return 262144;
+ else {
+ DML_ASSERT(0);
+ return 256;
+ }
+}
+
+static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
+{
+ bool is_vert = false;
+ if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
+ is_vert = true;
+ } else {
+ is_vert = false;
+ }
+ return is_vert;
+}
+
+static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
+{
+ int unsigned version = 0;
+
+ if (sw_mode == dml2_sw_linear ||
+ sw_mode == dml2_sw_256b_2d ||
+ sw_mode == dml2_sw_4kb_2d ||
+ sw_mode == dml2_sw_64kb_2d ||
+ sw_mode == dml2_sw_256kb_2d) {
+ version = 12;
+ } else if (sw_mode == dml2_gfx11_sw_linear ||
+ sw_mode == dml2_gfx11_sw_64kb_d ||
+ sw_mode == dml2_gfx11_sw_64kb_d_t ||
+ sw_mode == dml2_gfx11_sw_64kb_d_x ||
+ sw_mode == dml2_gfx11_sw_64kb_r_x ||
+ sw_mode == dml2_gfx11_sw_256kb_d_x ||
+ sw_mode == dml2_gfx11_sw_256kb_r_x) {
+ version = 11;
+ } else {
+ DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
+ DML_ASSERT(0);
+ }
+
+ return version;
+}
+
+static void CalculateBytePerPixelAndBlockSizes(
+ enum dml2_source_format_class SourcePixelFormat,
+ enum dml2_swizzle_mode SurfaceTiling,
+ unsigned int pitch_y,
+ unsigned int pitch_c,
+
+ // Output
+ unsigned int *BytePerPixelY,
+ unsigned int *BytePerPixelC,
+ double *BytePerPixelDETY,
+ double *BytePerPixelDETC,
+ unsigned int *BlockHeight256BytesY,
+ unsigned int *BlockHeight256BytesC,
+ unsigned int *BlockWidth256BytesY,
+ unsigned int *BlockWidth256BytesC,
+ unsigned int *MacroTileHeightY,
+ unsigned int *MacroTileHeightC,
+ unsigned int *MacroTileWidthY,
+ unsigned int *MacroTileWidthC,
+ bool *surf_linear128_l,
+ bool *surf_linear128_c)
+{
+ *BytePerPixelDETY = 0;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 1;
+
+ if (SourcePixelFormat == dml2_444_64) {
+ *BytePerPixelDETY = 8;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 8;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 0;
+ } else if (SourcePixelFormat == dml2_rgbe_alpha) {
+ *BytePerPixelDETY = 4;
+ *BytePerPixelDETC = 1;
+ *BytePerPixelY = 4;
+ *BytePerPixelC = 1;
+ } else if (SourcePixelFormat == dml2_420_8) {
+ *BytePerPixelDETY = 1;
+ *BytePerPixelDETC = 2;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 2;
+ } else if (SourcePixelFormat == dml2_420_12) {
+ *BytePerPixelDETY = 2;
+ *BytePerPixelDETC = 4;
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else if (SourcePixelFormat == dml2_420_10) {
+ *BytePerPixelDETY = (double)(4.0 / 3);
+ *BytePerPixelDETC = (double)(8.0 / 3);
+ *BytePerPixelY = 2;
+ *BytePerPixelC = 4;
+ } else {
+ DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
+ DML_ASSERT(0);
+ }
+
+ DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
+ DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y);
+ DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c);
+ DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
+ DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
+
+ if (dml_get_gfx_version(SurfaceTiling) == 11) {
+ *surf_linear128_l = 0;
+ *surf_linear128_c = 0;
+ } else {
+ if (SurfaceTiling == dml2_sw_linear) {
+ *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
+
+ if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
+ *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
+ }
+ }
+
+ if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
+ if (SurfaceTiling == dml2_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ } else if (SourcePixelFormat == dml2_444_64) {
+ *BlockHeight256BytesY = 4;
+ } else if (SourcePixelFormat == dml2_444_8) {
+ *BlockHeight256BytesY = 16;
+ } else {
+ *BlockHeight256BytesY = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockHeight256BytesC = 0;
+ *BlockWidth256BytesC = 0;
+ } else { // dual plane
+ if (SurfaceTiling == dml2_sw_linear) {
+ *BlockHeight256BytesY = 1;
+ *BlockHeight256BytesC = 1;
+ } else if (SourcePixelFormat == dml2_rgbe_alpha) {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 16;
+ } else if (SourcePixelFormat == dml2_420_8) {
+ *BlockHeight256BytesY = 16;
+ *BlockHeight256BytesC = 8;
+ } else {
+ *BlockHeight256BytesY = 8;
+ *BlockHeight256BytesC = 8;
+ }
+ *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
+ *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
+ }
+ DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
+ DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
+ DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
+ DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
+
+ if (dml_get_gfx_version(SurfaceTiling) == 11) {
+ if (SurfaceTiling == dml2_gfx11_sw_linear) {
+ *MacroTileHeightY = *BlockHeight256BytesY;
+ *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
+ *MacroTileHeightY = 16 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 16 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ } else {
+ *MacroTileHeightY = 32 * *BlockHeight256BytesY;
+ *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = 32 * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
+ }
+ }
+ } else {
+ unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
+ unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
+
+ if (SurfaceTiling == dml2_sw_linear) {
+ macro_tile_scale = 1;
+ } else if (SurfaceTiling == dml2_sw_4kb_2d) {
+ macro_tile_scale = 4;
+ } else if (SurfaceTiling == dml2_sw_64kb_2d) {
+ macro_tile_scale = 16;
+ } else if (SurfaceTiling == dml2_sw_256kb_2d) {
+ macro_tile_scale = 32;
+ } else {
+ DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
+ DML_ASSERT(0);
+ }
+
+ *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
+ *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
+ *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
+ if (*MacroTileHeightC == 0) {
+ *MacroTileWidthC = 0;
+ } else {
+ *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
+ DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
+ DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
+ DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
+}
+
+static void CalculateSinglePipeDPPCLKAndSCLThroughput(
+ double HRatio,
+ double HRatioChroma,
+ double VRatio,
+ double VRatioChroma,
+ double MaxDCHUBToPSCLThroughput,
+ double MaxPSCLToLBThroughput,
+ double PixelClock,
+ enum dml2_source_format_class SourcePixelFormat,
+ unsigned int HTaps,
+ unsigned int HTapsChroma,
+ unsigned int VTaps,
+ unsigned int VTapsChroma,
+
+ // Output
+ double *PSCL_THROUGHPUT,
+ double *PSCL_THROUGHPUT_CHROMA,
+ double *DPPCLKUsingSingleDPP)
+{
+ double DPPCLKUsingSingleDPPLuma;
+ double DPPCLKUsingSingleDPPChroma;
+
+ if (HRatio > 1) {
+ *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+
+ DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
+
+ if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
+
+ if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
+ *PSCL_THROUGHPUT_CHROMA = 0;
+ *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
+ } else {
+ if (HRatioChroma > 1) {
+ *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
+ } else {
+ *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
+ }
+ DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
+ HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
+ if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
+ DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
+ *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
+ }
+}
+
+static void CalculateSwathWidth(
+ const struct dml2_display_cfg *display_cfg,
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ enum dml2_odm_mode ODMMode[],
+ unsigned int BytePerPixY[],
+ unsigned int BytePerPixC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ bool surf_linear128_l[],
+ bool surf_linear128_c[],
+ unsigned int DPPPerSurface[],
+
+ // Output
+ unsigned int req_per_swath_ub_l[],
+ unsigned int req_per_swath_ub_c[],
+ unsigned int SwathWidthSingleDPPY[], // post-rotated plane width
+ unsigned int SwathWidthSingleDPPC[],
+ unsigned int SwathWidthY[], // per-pipe
+ unsigned int SwathWidthC[], // per-pipe
+ unsigned int MaximumSwathHeightY[],
+ unsigned int MaximumSwathHeightC[],
+ unsigned int swath_width_luma_ub[], // per-pipe
+ unsigned int swath_width_chroma_ub[]) // per-pipe
+{
+ enum dml2_odm_mode MainSurfaceODMMode;
+ double odm_hactive_factor = 1.0;
+ unsigned int req_width_horz_y;
+ unsigned int req_width_horz_c;
+ unsigned int surface_width_ub_l;
+ unsigned int surface_height_ub_l;
+ unsigned int surface_width_ub_c;
+ unsigned int surface_height_ub_c;
+
+ DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ } else {
+ SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+
+ MainSurfaceODMMode = ODMMode[k];
+
+ if (ForceSingleDPP) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ } else {
+ if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
+ odm_hactive_factor = 4.0;
+ else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
+ odm_hactive_factor = 3.0;
+ else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
+ odm_hactive_factor = 2.0;
+
+ if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
+ SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
+ } else if (DPPPerSurface[k] == 2) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
+ DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
+
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
+ SwathWidthC[k] = SwathWidthY[k] / 2;
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
+ } else {
+ SwathWidthC[k] = SwathWidthY[k];
+ SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
+ }
+
+ if (ForceSingleDPP == true) {
+ SwathWidthY[k] = SwathWidthSingleDPPY[k];
+ SwathWidthC[k] = SwathWidthSingleDPPC[k];
+ }
+
+ req_width_horz_y = Read256BytesBlockWidthY[k];
+ req_width_horz_c = Read256BytesBlockWidthC[k];
+
+ if (surf_linear128_l[k])
+ req_width_horz_y = req_width_horz_y / 2;
+
+ if (surf_linear128_c[k])
+ req_width_horz_c = req_width_horz_c / 2;
+
+ surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
+ surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
+ surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
+ surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
+
+ req_per_swath_ub_l[k] = 0;
+ req_per_swath_ub_c[k] = 0;
+ if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
+ } else {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
+ }
+ req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
+
+ if (BytePerPixC[k] > 0) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
+ } else {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
+ }
+ req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ } else {
+ MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
+ MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
+
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
+ } else {
+ swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
+ }
+ req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
+ if (BytePerPixC[k] > 0) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
+ } else {
+ swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
+ }
+ req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
+ } else {
+ swath_width_chroma_ub[k] = 0;
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
+ }
+}
+
+static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
+{
+ bool unb_req_ok = false;
+ bool unb_req_en = false;
+
+ unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
+ unb_req_en = unb_req_ok;
+
+ if (unb_req_force_en) {
+ unb_req_en = unb_req_force_val && unb_req_ok;
+ }
+ DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
+ DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
+ DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
+ DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
+ return unb_req_en;
+}
+
+static void CalculateDETBufferSize(
+ struct dml2_core_shared_CalculateDETBufferSize_locals *l,
+ const struct dml2_display_cfg *display_cfg,
+ bool ForceSingleDPP,
+ unsigned int NumberOfActiveSurfaces,
+ bool UnboundedRequestEnabled,
+ unsigned int nomDETInKByte,
+ unsigned int MaxTotalDETInKByte,
+ unsigned int ConfigReturnBufferSizeInKByte,
+ unsigned int MinCompressedBufferSizeInKByte,
+ unsigned int ConfigReturnBufferSegmentSizeInkByte,
+ unsigned int CompressedBufferSegmentSizeInkByte,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int full_swath_bytes_l[],
+ unsigned int full_swath_bytes_c[],
+ unsigned int DPPPerSurface[],
+ // Output
+ unsigned int DETBufferSizeInKByte[],
+ unsigned int *CompressedBufferSizeInkByte)
+{
+ memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals));
+
+ bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
+ bool NextPotentialSurfaceToAssignDETPieceFound;
+ bool MinimizeReallocationSuccess = false;
+
+ DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+ DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
+
+ // Note: Will use default det size if that fits 2 swaths
+ if (UnboundedRequestEnabled) {
+ if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
+ DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
+ } else {
+ DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
+ }
+ *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
+ } else {
+ l->DETBufferSizePoolInKByte = MaxTotalDETInKByte;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DETBufferSizeInKByte[k] = 0;
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
+ l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
+ } else {
+ l->max_minDET = nomDETInKByte;
+ }
+ l->minDET = 128;
+ l->minDET_pipe = 0;
+
+ // add DET resource until can hold 2 full swaths
+ while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) {
+ if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET)
+ l->minDET_pipe = l->minDET;
+ l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
+ DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
+ DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
+
+ if (l->minDET_pipe == 0) {
+ l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
+ DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
+ }
+
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ DETBufferSizeInKByte[k] = 0;
+ } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
+ DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
+ l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
+ } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) {
+ DETBufferSizeInKByte[k] = l->minDET_pipe;
+ l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
+ }
+
+ if (display_cfg->minimize_det_reallocation) {
+ MinimizeReallocationSuccess = true;
+ // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
+ // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
+ // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
+ // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
+
+ // Calculate total pixel rate
+ for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
+ l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
+ }
+
+ // Calculate per stream DET budget
+ for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
+ l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate);
+ l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k];
+ }
+
+ // Calculate the per stream total bandwidth
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+
+ // Check the minimum can be satisfied by budget
+ if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
+ l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ } else {
+ MinimizeReallocationSuccess = false;
+ break;
+ }
+ }
+ }
+
+ if (MinimizeReallocationSuccess) {
+ // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
+ // budget proportionally across its planes
+ l->ResidualDETAfterRounding = MaxTotalDETInKByte;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index])
+ * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]);
+
+ if (l->IdealDETBudget > DETBufferSizeInKByte[k]) {
+ l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k];
+ if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index])
+ l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index];
+
+ /* split the additional budgeted DET among the pipes per plane */
+ DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k]));
+ l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget;
+ }
+
+ // Round down to segment size
+ DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte;
+
+ l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
+ }
+ }
+ }
+ }
+
+ if (!MinimizeReallocationSuccess) {
+ l->TotalBandwidth = 0;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+ }
+ DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
+ }
+ DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
+ DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
+ l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) {
+ DETPieceAssignedToThisSurfaceAlready[k] = true;
+ l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
+ } else {
+ DETPieceAssignedToThisSurfaceAlready[k] = false;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
+ }
+
+ for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
+ NextPotentialSurfaceToAssignDETPieceFound = false;
+ l->NextSurfaceToAssignDETPiece = 0;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
+ if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
+ ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
+ l->NextSurfaceToAssignDETPiece = k;
+ NextPotentialSurfaceToAssignDETPieceFound = true;
+ }
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
+ DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
+ }
+
+ if (NextPotentialSurfaceToAssignDETPieceFound) {
+ l->NextDETBufferPieceInKByte = (unsigned int)(math_min2(
+ math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece /
+ ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))
+ * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
+ math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
+
+ DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
+ DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
+ DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
+ DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
+ DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
+
+ DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
+ DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
+
+ l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
+ DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
+ l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
+ }
+ }
+ }
+ *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
+ }
+ *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
+
+ DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+ }
+}
+
+static double CalculateRequiredDispclk(
+ enum dml2_odm_mode ODMMode,
+ double PixelClock,
+ bool isTMDS420)
+{
+ double DispClk;
+
+ if (ODMMode == dml2_odm_mode_combine_4to1) {
+ DispClk = PixelClock / 4.0;
+ } else if (ODMMode == dml2_odm_mode_combine_3to1) {
+ DispClk = PixelClock / 3.0;
+ } else if (ODMMode == dml2_odm_mode_combine_2to1) {
+ DispClk = PixelClock / 2.0;
+ } else {
+ DispClk = PixelClock;
+ }
+
+ if (isTMDS420) {
+ double TMDS420MinPixClock = PixelClock / 2.0;
+ DispClk = math_max2(DispClk, TMDS420MinPixClock);
+ }
+
+ return DispClk;
+}
+
+static double TruncToValidBPP(
+ struct dml2_core_shared_TruncToValidBPP_locals *l,
+ double LinkBitRate,
+ unsigned int Lanes,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClock,
+ double DesiredBPP,
+ bool DSCEnable,
+ enum dml2_output_encoder_class Output,
+ enum dml2_output_format_class Format,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int DSCSlices,
+ unsigned int AudioRate,
+ unsigned int AudioLayout,
+ enum dml2_odm_mode ODMModeNoDSC,
+ enum dml2_odm_mode ODMModeDSC,
+
+ // Output
+ unsigned int *RequiredSlots)
+{
+ double MaxLinkBPP;
+ unsigned int MinDSCBPP;
+ double MaxDSCBPP;
+ unsigned int NonDSCBPP0;
+ unsigned int NonDSCBPP1;
+ unsigned int NonDSCBPP2;
+ enum dml2_odm_mode ODMMode;
+
+ if (Format == dml2_420) {
+ NonDSCBPP0 = 12;
+ NonDSCBPP1 = 15;
+ NonDSCBPP2 = 18;
+ MinDSCBPP = 6;
+ MaxDSCBPP = 16;
+ } else if (Format == dml2_444) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 30;
+ NonDSCBPP2 = 36;
+ MinDSCBPP = 8;
+ MaxDSCBPP = 16;
+ } else {
+ if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
+ NonDSCBPP0 = 24;
+ NonDSCBPP1 = 24;
+ NonDSCBPP2 = 24;
+ } else {
+ NonDSCBPP0 = 16;
+ NonDSCBPP1 = 20;
+ NonDSCBPP2 = 24;
+ }
+ if (Format == dml2_n422 || Output == dml2_hdmifrl) {
+ MinDSCBPP = 7;
+ MaxDSCBPP = 16;
+ } else {
+ MinDSCBPP = 8;
+ MaxDSCBPP = 16;
+ }
+ }
+ if (Output == dml2_dp2p0) {
+ MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
+ } else if (DSCEnable && Output == dml2_dp) {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
+ } else {
+ MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
+ }
+
+ ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
+
+ if (ODMMode == dml2_odm_mode_split_1to2) {
+ MaxLinkBPP = 2 * MaxLinkBPP;
+ }
+
+ if (DesiredBPP == 0) {
+ if (DSCEnable) {
+ if (MaxLinkBPP < MinDSCBPP) {
+ return __DML2_CALCS_DPP_INVALID__;
+ } else if (MaxLinkBPP >= MaxDSCBPP) {
+ return MaxDSCBPP;
+ } else {
+ return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
+ }
+ } else {
+ if (MaxLinkBPP >= NonDSCBPP2) {
+ return NonDSCBPP2;
+ } else if (MaxLinkBPP >= NonDSCBPP1) {
+ return NonDSCBPP1;
+ } else if (MaxLinkBPP >= NonDSCBPP0) {
+ return NonDSCBPP0;
+ } else {
+ return __DML2_CALCS_DPP_INVALID__;
+ }
+ }
+ } else {
+ if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
+ (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
+ return __DML2_CALCS_DPP_INVALID__;
+ } else {
+ return DesiredBPP;
+ }
+ }
+}
+
+// updated for dcn4
+static unsigned int dscceComputeDelay(
+ unsigned int bpc,
+ double BPP,
+ unsigned int sliceWidth,
+ unsigned int numSlices,
+ enum dml2_output_format_class pixelFormat,
+ enum dml2_output_encoder_class Output)
+{
+ // valid bpc = source bits per component in the set of {8, 10, 12}
+ // valid bpp = increments of 1/16 of a bit
+ // min = 6/7/8 in N420/N422/444, respectively
+ // max = such that compression is 1:1
+ //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
+ //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
+ //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
+
+ // fixed value
+ unsigned int rcModelSize = 8192;
+
+ // N422/N420 operate at 2 pixels per clock
+ unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
+
+ if (pixelFormat == dml2_420)
+ pixelsPerClock = 2;
+ // #all other modes operate at 1 pixel per clock
+ else if (pixelFormat == dml2_444)
+ pixelsPerClock = 1;
+ else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
+ pixelsPerClock = 2;
+ else
+ pixelsPerClock = 1;
+
+ //initial transmit delay as per PPS
+ initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
+
+ //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
+ slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
+
+ padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
+
+ if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
+ if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
+ initial_xmit_delay++;
+ }
+ }
+
+ //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
+ if (bpc == 8)
+ ssm_group_priming_delay = 83;
+ else if (bpc == 10)
+ ssm_group_priming_delay = 91;
+ else if (bpc == 12)
+ ssm_group_priming_delay = 115;
+ else if (bpc == 14)
+ ssm_group_priming_delay = 123;
+ else
+ ssm_group_priming_delay = 128;
+
+ //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
+ slice_width_groups = (slice_width_modified + 2) / 3;
+
+ //determine number of padded pixels in the last group of a slice line, computed as
+ slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
+
+ //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
+ number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
+
+ //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
+ //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
+ ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
+
+ //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
+ ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
+
+ //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
+ groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
+
+ //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
+ //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
+ lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
+
+ //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
+ //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
+ additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
+
+ //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
+ ssm_pipeline_delay = 2;
+
+ //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
+ obsm_pipeline_delay = 1;
+
+ //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
+ if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
+ cycles_per_group = 6;
+ else
+ cycles_per_group = 3;
+ //delay of the bit stream contruction layer in pixels is the sum of:
+ //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
+ //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
+ //3. additional group of delay if initial transmit delay is reached exactly in a group
+ //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
+ group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
+ pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
+
+ //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
+ pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
+
+ DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc);
+ DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP);
+ DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
+ DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices);
+ DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
+ DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output);
+ DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels);
+ return pixels;
+}
+
+//updated in dcn4
+static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
+{
+ unsigned int Delay = 0;
+ unsigned int dispclk_per_dscclk = 3;
+
+ // sfr
+ Delay = Delay + 2;
+
+ if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
+ dispclk_per_dscclk = 3 * 2;
+ }
+
+ if (pixelFormat == dml2_420) {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 16 * dispclk_per_dscclk;
+
+ // dscc - input deserializer
+ Delay = Delay + 5;
+
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 16 * dispclk_per_dscclk;
+ // dsccif
+ Delay = Delay + 1;
+ // dscc - input deserializer
+ Delay = Delay + 5;
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+
+
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ } else if (pixelFormat == dml2_s422) {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 17 * dispclk_per_dscclk;
+
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ } else {
+ //dscc top delay for pixel compression layer
+ Delay = Delay + 16 * dispclk_per_dscclk;
+ // dscc - input deserializer
+ Delay = Delay + 3;
+ // dscc - input cdc fifo
+ Delay = Delay + 1 + 4 * dispclk_per_dscclk;
+ // dscc - output cdc fifo
+ Delay = Delay + 3 + 1 * dispclk_per_dscclk;
+
+ // dscc - cdc uncertainty
+ Delay = Delay + 3 + 3 * dispclk_per_dscclk;
+ }
+
+ // sft
+ Delay = Delay + 1;
+ DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
+ DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay);
+
+ return Delay;
+}
+
+static unsigned int CalculateHostVMDynamicLevels(
+ bool GPUVMEnable,
+ bool HostVMEnable,
+ unsigned int HostVMMinPageSize,
+ unsigned int HostVMMaxNonCachedPageTableLevels)
+{
+ unsigned int HostVMDynamicLevels = 0;
+
+ if (GPUVMEnable && HostVMEnable) {
+ if (HostVMMinPageSize < 2048)
+ HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
+ else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
+ HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
+ else
+ HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
+ } else {
+ HostVMDynamicLevels = 0;
+ }
+ return HostVMDynamicLevels;
+}
+
+static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
+{
+ unsigned int extra_dpde_bytes;
+ unsigned int extra_mpde_bytes;
+ unsigned int MacroTileSizeBytes;
+ unsigned int vp_height_dpte_ub;
+
+ unsigned int meta_surface_bytes;
+ unsigned int vm_bytes;
+ unsigned int vp_height_meta_ub;
+ unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
+
+ *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
+ *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
+ if (p->SurfaceTiling == dml2_sw_linear) {
+ *p->meta_row_height = 32;
+ *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
+ *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ *p->meta_row_height = *p->MetaRequestHeight;
+ if (p->ViewportStationary && p->NumberOfDPPs == 1) {
+ *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
+ } else {
+ *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
+ }
+ *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
+ } else {
+ *p->meta_row_height = *p->MetaRequestWidth;
+ if (p->ViewportStationary && p->NumberOfDPPs == 1) {
+ *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
+ } else {
+ *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
+ }
+ *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
+ }
+
+ if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
+ vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
+ } else {
+ vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
+ }
+
+ meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
+ DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
+ DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
+ if (p->GPUVMEnable == true) {
+ double meta_vmpg_bytes = 4.0 * 1024.0;
+ *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
+ extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
+ } else {
+ *p->meta_pte_bytes_per_frame_ub = 0;
+ extra_mpde_bytes = 0;
+ }
+
+ if (!p->DCCEnable || !p->mrq_present) {
+ *p->meta_pte_bytes_per_frame_ub = 0;
+ extra_mpde_bytes = 0;
+ *p->meta_row_bytes = 0;
+ }
+
+ if (!p->GPUVMEnable) {
+ *p->PixelPTEBytesPerRow = 0;
+ *p->PixelPTEBytesPerRowStorage = 0;
+ *p->dpte_row_width_ub = 0;
+ *p->dpte_row_height = 0;
+ *p->dpte_row_height_linear = 0;
+ *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
+ *p->dpte_row_width_ub_one_row_per_frame = 0;
+ *p->dpte_row_height_one_row_per_frame = 0;
+ *p->vmpg_width = 0;
+ *p->vmpg_height = 0;
+ *p->PixelPTEReqWidth = 0;
+ *p->PixelPTEReqHeight = 0;
+ *p->PTERequestSize = 0;
+ *p->dpde0_bytes_per_frame_ub = 0;
+ return 0;
+ }
+
+ MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
+
+ if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
+ vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
+ } else {
+ vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
+ }
+
+ if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
+ *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
+ extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
+ } else {
+ *p->dpde0_bytes_per_frame_ub = 0;
+ extra_dpde_bytes = 0;
+ }
+
+ vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
+
+ DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+ DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
+ DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
+ DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
+ DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
+ DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
+ DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
+ DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
+ DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
+ DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
+ DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
+ DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
+ DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
+
+ if (p->SurfaceTiling == dml2_sw_linear) {
+ *p->PixelPTEReqHeight = 1;
+ *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
+ PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
+ *p->PTERequestSize = 64;
+
+ *p->vmpg_height = 1;
+ *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
+ } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
+ *p->PixelPTEReqHeight = p->MacroTileHeight;
+ *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+ *p->PTERequestSize = 64;
+
+ *p->vmpg_height = p->MacroTileHeight;
+ *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+
+ } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
+ // one 64KB tile, is 16x16x256B req
+ *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
+ *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
+ *p->PTERequestSize = 128;
+
+ *p->vmpg_height = *p->PixelPTEReqHeight;
+ *p->vmpg_width = *p->PixelPTEReqWidth;
+ } else {
+ // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
+ *p->PixelPTEReqHeight = p->MacroTileHeight;
+ *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+ *p->PTERequestSize = 64;
+
+ *p->vmpg_height = p->MacroTileHeight;
+ *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
+
+ if (p->GPUVMEnable == true) {
+ DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
+ __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
+ DML_ASSERT(0);
+ }
+ }
+
+ DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
+ DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
+ DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch);
+ DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
+ DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
+
+ *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
+ *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
+ *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
+ *p->dpte_row_height_linear = 0;
+
+ if (p->SurfaceTiling == dml2_sw_linear) {
+ *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
+ *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
+ *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
+
+ // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
+ *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
+ if (*p->dpte_row_height_linear > 128)
+ *p->dpte_row_height_linear = 128;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
+#endif
+
+ } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
+ *p->dpte_row_height = *p->PixelPTEReqHeight;
+
+ if (p->GPUVMMinPageSizeKBytes > 64) {
+ *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
+ } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
+ *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
+ } else {
+ *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
+#endif
+
+ *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
+ } else {
+ *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
+
+ if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
+ *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
+ } else {
+ *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
+ }
+
+ *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
+#endif
+ }
+
+ if (p->GPUVMEnable != true) {
+ *p->PixelPTEBytesPerRow = 0;
+ *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
+ }
+
+ *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
+ DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
+ DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
+#endif
+
+ return vm_bytes;
+} // CalculateVMAndRowBytes
+
+static unsigned int CalculatePrefetchSourceLines(
+ double VRatio,
+ unsigned int VTaps,
+ bool Interlace,
+ bool ProgressiveToInterlaceUnitInOPP,
+ unsigned int SwathHeight,
+ enum dml2_rotation_angle RotationAngle,
+ bool mirrored,
+ bool ViewportStationary,
+ unsigned int SwathWidth,
+ unsigned int ViewportHeight,
+ unsigned int ViewportXStart,
+ unsigned int ViewportYStart,
+
+ // Output
+ unsigned int *VInitPreFill,
+ unsigned int *MaxNumSwath)
+{
+
+ unsigned int vp_start_rot = 0;
+ unsigned int sw0_tmp = 0;
+ unsigned int MaxPartialSwath = 0;
+ double numLines = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+ DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps);
+ DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
+ DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
+ DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
+ DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
+#endif
+ if (ProgressiveToInterlaceUnitInOPP)
+ *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
+ else
+ *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
+
+ if (ViewportStationary) {
+ if (RotationAngle == dml2_rotation_180) {
+ vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
+ } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
+ vp_start_rot = ViewportXStart;
+ } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
+ vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
+ } else {
+ vp_start_rot = ViewportYStart;
+ }
+ sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
+ if (sw0_tmp < *VInitPreFill) {
+ *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
+ } else {
+ *MaxNumSwath = 1;
+ }
+ MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
+ } else {
+ *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
+ if (*VInitPreFill > 1) {
+ MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
+ } else {
+ MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
+ }
+ }
+ numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
+ DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
+ DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
+ DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
+ DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
+#endif
+ return (unsigned int)(numLines);
+
+}
+
+static void CalculateRowBandwidth(
+ bool GPUVMEnable,
+ bool use_one_row_for_frame,
+ enum dml2_source_format_class SourcePixelFormat,
+ double VRatio,
+ double VRatioChroma,
+ bool DCCEnable,
+ double LineTime,
+ unsigned int PixelPTEBytesPerRowLuma,
+ unsigned int PixelPTEBytesPerRowChroma,
+ unsigned int dpte_row_height_luma,
+ unsigned int dpte_row_height_chroma,
+
+ bool mrq_present,
+ unsigned int meta_row_bytes_per_row_ub_l,
+ unsigned int meta_row_bytes_per_row_ub_c,
+ unsigned int meta_row_height_luma,
+ unsigned int meta_row_height_chroma,
+
+ // Output
+ double *dpte_row_bw,
+ double *meta_row_bw)
+{
+ if (!DCCEnable || !mrq_present) {
+ *meta_row_bw = 0;
+ } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
+ *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
+ + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
+ } else {
+ *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
+ }
+
+ if (GPUVMEnable != true) {
+ *dpte_row_bw = 0;
+ } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
+ + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
+ } else {
+ *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
+ }
+}
+
+static void CalculateMALLUseForStaticScreen(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ unsigned int SurfaceSizeInMALL[],
+ bool one_row_per_frame_fits_in_buffer[],
+
+ // Output
+ bool is_using_mall_for_ss[])
+{
+
+ unsigned int SurfaceToAddToMALL;
+ bool CanAddAnotherSurfaceToMALL;
+ unsigned int TotalSurfaceSizeInMALL;
+
+ TotalSurfaceSizeInMALL = 0;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
+ if (is_using_mall_for_ss[k])
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
+#endif
+ }
+
+ SurfaceToAddToMALL = 0;
+ CanAddAnotherSurfaceToMALL = true;
+ while (CanAddAnotherSurfaceToMALL) {
+ CanAddAnotherSurfaceToMALL = false;
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
+ !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
+ (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
+ CanAddAnotherSurfaceToMALL = true;
+ SurfaceToAddToMALL = k;
+ DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
+ }
+ }
+ if (CanAddAnotherSurfaceToMALL) {
+ is_using_mall_for_ss[SurfaceToAddToMALL] = true;
+ TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
+ DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
+#endif
+ }
+ }
+}
+
+static void CalculateDCCConfiguration(
+ bool DCCEnabled,
+ bool DCCProgrammingAssumesScanDirectionUnknown,
+ enum dml2_source_format_class SourcePixelFormat,
+ unsigned int SurfaceWidthLuma,
+ unsigned int SurfaceWidthChroma,
+ unsigned int SurfaceHeightLuma,
+ unsigned int SurfaceHeightChroma,
+ unsigned int nomDETInKByte,
+ unsigned int RequestHeight256ByteLuma,
+ unsigned int RequestHeight256ByteChroma,
+ enum dml2_swizzle_mode TilingFormat,
+ unsigned int BytePerPixelY,
+ unsigned int BytePerPixelC,
+ double BytePerPixelDETY,
+ double BytePerPixelDETC,
+ enum dml2_rotation_angle RotationAngle,
+
+ // Output
+ enum dml2_core_internal_request_type *RequestLuma,
+ enum dml2_core_internal_request_type *RequestChroma,
+ unsigned int *MaxUncompressedBlockLuma,
+ unsigned int *MaxUncompressedBlockChroma,
+ unsigned int *MaxCompressedBlockLuma,
+ unsigned int *MaxCompressedBlockChroma,
+ unsigned int *IndependentBlockLuma,
+ unsigned int *IndependentBlockChroma)
+{
+ unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
+
+ unsigned int segment_order_horz_contiguous_luma;
+ unsigned int segment_order_horz_contiguous_chroma;
+ unsigned int segment_order_vert_contiguous_luma;
+ unsigned int segment_order_vert_contiguous_chroma;
+
+ unsigned int req128_horz_wc_l;
+ unsigned int req128_horz_wc_c;
+ unsigned int req128_vert_wc_l;
+ unsigned int req128_vert_wc_c;
+
+ unsigned int yuv420;
+ unsigned int horz_div_l;
+ unsigned int horz_div_c;
+ unsigned int vert_div_l;
+ unsigned int vert_div_c;
+
+ unsigned int swath_buf_size;
+ double detile_buf_vp_horz_limit;
+ double detile_buf_vp_vert_limit;
+
+ unsigned int MAS_vp_horz_limit;
+ unsigned int MAS_vp_vert_limit;
+ unsigned int max_vp_horz_width;
+ unsigned int max_vp_vert_height;
+ unsigned int eff_surf_width_l;
+ unsigned int eff_surf_width_c;
+ unsigned int eff_surf_height_l;
+ unsigned int eff_surf_height_c;
+
+ unsigned int full_swath_bytes_horz_wc_l;
+ unsigned int full_swath_bytes_horz_wc_c;
+ unsigned int full_swath_bytes_vert_wc_l;
+ unsigned int full_swath_bytes_vert_wc_c;
+
+ if (dml_is_420(SourcePixelFormat))
+ yuv420 = 1;
+ else
+ yuv420 = 0;
+ horz_div_l = 1;
+ horz_div_c = 1;
+ vert_div_l = 1;
+ vert_div_c = 1;
+
+ if (BytePerPixelY == 1)
+ vert_div_l = 0;
+ if (BytePerPixelC == 1)
+ vert_div_c = 0;
+
+ if (BytePerPixelC == 0) {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
+ detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
+ detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
+ } else {
+ swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
+ detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
+ detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
+ }
+
+ if (SourcePixelFormat == dml2_420_10) {
+ detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
+ detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
+ }
+
+ detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
+ detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
+
+ MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
+ MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
+ max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
+ max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
+ eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
+ eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
+ eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
+ eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
+
+ full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
+ full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
+ if (BytePerPixelC > 0) {
+ full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
+ full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
+ } else {
+ full_swath_bytes_horz_wc_c = 0;
+ full_swath_bytes_vert_wc_c = 0;
+ }
+
+ if (SourcePixelFormat == dml2_420_10) {
+ full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
+ full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
+ }
+
+ if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 0;
+ } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 0;
+ req128_horz_wc_c = 1;
+ } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 0;
+ } else {
+ req128_horz_wc_l = 1;
+ req128_horz_wc_c = 1;
+ }
+
+ if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 0;
+ } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 0;
+ req128_vert_wc_c = 1;
+ } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 0;
+ } else {
+ req128_vert_wc_l = 1;
+ req128_vert_wc_c = 1;
+ }
+
+ if (BytePerPixelY == 2) {
+ segment_order_horz_contiguous_luma = 0;
+ segment_order_vert_contiguous_luma = 1;
+ } else {
+ segment_order_horz_contiguous_luma = 1;
+ segment_order_vert_contiguous_luma = 0;
+ }
+
+ if (BytePerPixelC == 2) {
+ segment_order_horz_contiguous_chroma = 0;
+ segment_order_vert_contiguous_chroma = 1;
+ } else {
+ segment_order_horz_contiguous_chroma = 1;
+ segment_order_vert_contiguous_chroma = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
+ DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
+ DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
+ DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
+ DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
+ DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
+ DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
+#endif
+ if (DCCProgrammingAssumesScanDirectionUnknown == true) {
+ if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
+ *RequestLuma = dml2_core_internal_request_type_256_bytes;
+ } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
+ *RequestChroma = dml2_core_internal_request_type_256_bytes;
+ } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ } else if (!dml_is_vertical_rotation(RotationAngle)) {
+ if (req128_horz_wc_l == 0) {
+ *RequestLuma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_horz_contiguous_luma == 0) {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ if (req128_horz_wc_c == 0) {
+ *RequestChroma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_horz_contiguous_chroma == 0) {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ } else {
+ if (req128_vert_wc_l == 0) {
+ *RequestLuma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_vert_contiguous_luma == 0) {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ if (req128_vert_wc_c == 0) {
+ *RequestChroma = dml2_core_internal_request_type_256_bytes;
+ } else if (segment_order_vert_contiguous_chroma == 0) {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
+ } else {
+ *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
+ }
+ }
+
+ if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 256;
+ *IndependentBlockLuma = 0;
+ } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 128;
+ *IndependentBlockLuma = 128;
+ } else {
+ *MaxUncompressedBlockLuma = 256;
+ *MaxCompressedBlockLuma = 64;
+ *IndependentBlockLuma = 64;
+ }
+
+ if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 256;
+ *IndependentBlockChroma = 0;
+ } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 128;
+ *IndependentBlockChroma = 128;
+ } else {
+ *MaxUncompressedBlockChroma = 256;
+ *MaxCompressedBlockChroma = 64;
+ *IndependentBlockChroma = 64;
+ }
+
+ if (DCCEnabled != true || BytePerPixelC == 0) {
+ *MaxUncompressedBlockChroma = 0;
+ *MaxCompressedBlockChroma = 0;
+ *IndependentBlockChroma = 0;
+ }
+
+ if (DCCEnabled != true) {
+ *MaxUncompressedBlockLuma = 0;
+ *MaxCompressedBlockLuma = 0;
+ *IndependentBlockLuma = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
+ DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
+ DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
+ DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
+ DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
+ DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
+#endif
+
+}
+
+static void calculate_mcache_row_bytes(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
+{
+ unsigned int vmpg_bytes = 0;
+ unsigned int blk_bytes = 0;
+ float meta_per_mvmpg_per_channel = 0;
+ unsigned int est_blk_per_vmpg = 2;
+ unsigned int mvmpg_per_row_ub = 0;
+ unsigned int full_vp_width_mvmpg_aligned = 0;
+ unsigned int full_vp_height_mvmpg_aligned = 0;
+ unsigned int meta_per_mvmpg_per_channel_ub = 0;
+ unsigned int mvmpg_per_mcache;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans);
+ DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
+ DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
+ DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
+ DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
+ DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
+ DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
+ DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
+ DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
+ DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
+ DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
+ DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width);
+ DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height);
+ DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
+ DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
+ DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
+#endif
+ DML_ASSERT(p->mcache_line_size_bytes != 0);
+ DML_ASSERT(p->mcache_size_bytes != 0);
+
+ *p->mvmpg_width = 0;
+ *p->mvmpg_height = 0;
+
+ if (p->full_vp_height == 0 && p->full_vp_width == 0) {
+ *p->num_mcaches = 0;
+ *p->mcache_row_bytes = 0;
+ *p->mcache_row_bytes_per_channel = 0;
+ } else {
+ blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
+
+ // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
+ vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
+
+ //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
+ // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
+ // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
+ *p->mvmpg_width = p->blk_width;
+ *p->mvmpg_height = p->blk_height;
+ if (p->gpuvm_enable) {
+ if (vmpg_bytes >= blk_bytes) {
+ *p->mvmpg_width = p->vmpg_width;
+ *p->mvmpg_height = p->vmpg_height;
+ } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
+ DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
+ DML_ASSERT(0);
+ }
+ }
+
+ //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
+ full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
+ full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
+
+ *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
+
+ //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
+ if (!p->surf_vert) { //horizontal access
+ if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
+ *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
+ else
+ *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
+ mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
+ } else { //vertical access
+ if (p->vp_stationary == 1)
+ *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
+ else
+ *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
+ mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
+ }
+
+ if (p->gpuvm_enable) {
+ meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
+
+ //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
+ if (p->surf_vert && vmpg_bytes > blk_bytes) {
+ meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans;
+ }
+
+ *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
+ } else {
+ meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans;
+
+ if (!p->surf_vert)
+ *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
+ else
+ *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
+ }
+
+ meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
+
+ //but for 4KB vmpg with 64KB tile blk
+ if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
+ meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
+
+ // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
+ // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
+ if (p->gpuvm_enable || p->surf_vert) {
+ *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
+ *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans;
+ } else { // horizontal and gpuvm disable
+ *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
+ if (p->mcache_line_size_bytes != 0)
+ *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
+ }
+
+ *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
+ if (p->mcache_size_bytes != 0)
+ *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1);
+
+ mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
+ *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
+ DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
+ DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
+ DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
+ DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
+#endif
+ DML_ASSERT(*p->num_mcaches > 0);
+}
+
+static void calculate_mcache_setting(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_calculate_mcache_setting_params *p)
+{
+ unsigned int n;
+
+ struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
+ memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
+
+ *p->num_mcaches_l = 0;
+ *p->mcache_row_bytes_l = 0;
+ *p->mcache_row_bytes_per_channel_l = 0;
+ *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
+ *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
+
+ *p->num_mcaches_c = 0;
+ *p->mcache_row_bytes_c = 0;
+ *p->mcache_row_bytes_per_channel_c = 0;
+ *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
+ *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
+
+ *p->mall_comb_mcache_l = 0;
+ *p->mall_comb_mcache_c = 0;
+ *p->lc_comb_mcache = 0;
+
+ if (!p->dcc_enable)
+ return;
+
+ l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
+
+ l->l_p.num_chans = p->num_chans;
+ l->l_p.mem_word_bytes = p->mem_word_bytes;
+ l->l_p.mcache_size_bytes = p->mcache_size_bytes;
+ l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
+ l->l_p.gpuvm_enable = p->gpuvm_enable;
+ l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
+ l->l_p.surf_vert = p->surf_vert;
+ l->l_p.vp_stationary = p->vp_stationary;
+ l->l_p.tiling_mode = p->tiling_mode;
+ l->l_p.vp_start_x = p->vp_start_x_l;
+ l->l_p.vp_start_y = p->vp_start_y_l;
+ l->l_p.full_vp_width = p->full_vp_width_l;
+ l->l_p.full_vp_height = p->full_vp_height_l;
+ l->l_p.blk_width = p->blk_width_l;
+ l->l_p.blk_height = p->blk_height_l;
+ l->l_p.vmpg_width = p->vmpg_width_l;
+ l->l_p.vmpg_height = p->vmpg_height_l;
+ l->l_p.full_swath_bytes = p->full_swath_bytes_l;
+ l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
+
+ // output
+ l->l_p.num_mcaches = p->num_mcaches_l;
+ l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
+ l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l;
+ l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
+ l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
+ l->l_p.mvmpg_width = &l->mvmpg_width_l;
+ l->l_p.mvmpg_height = &l->mvmpg_height_l;
+ l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
+ l->l_p.meta_row_width_ub = &l->meta_row_width_l;
+ l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
+
+ calculate_mcache_row_bytes(scratch, &l->l_p);
+ DML_ASSERT(*p->num_mcaches_l > 0);
+
+ if (l->is_dual_plane) {
+ l->c_p.num_chans = p->num_chans;
+ l->c_p.mem_word_bytes = p->mem_word_bytes;
+ l->c_p.mcache_size_bytes = p->mcache_size_bytes;
+ l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
+ l->c_p.gpuvm_enable = p->gpuvm_enable;
+ l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
+ l->c_p.surf_vert = p->surf_vert;
+ l->c_p.vp_stationary = p->vp_stationary;
+ l->c_p.tiling_mode = p->tiling_mode;
+ l->c_p.vp_start_x = p->vp_start_x_c;
+ l->c_p.vp_start_y = p->vp_start_y_c;
+ l->c_p.full_vp_width = p->full_vp_width_c;
+ l->c_p.full_vp_height = p->full_vp_height_c;
+ l->c_p.blk_width = p->blk_width_c;
+ l->c_p.blk_height = p->blk_height_c;
+ l->c_p.vmpg_width = p->vmpg_width_c;
+ l->c_p.vmpg_height = p->vmpg_height_c;
+ l->c_p.full_swath_bytes = p->full_swath_bytes_c;
+ l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
+
+ // output
+ l->c_p.num_mcaches = p->num_mcaches_c;
+ l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
+ l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c;
+ l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
+ l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
+ l->c_p.mvmpg_width = &l->mvmpg_width_c;
+ l->c_p.mvmpg_height = &l->mvmpg_height_c;
+ l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
+ l->c_p.meta_row_width_ub = &l->meta_row_width_c;
+ l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
+
+ calculate_mcache_row_bytes(scratch, &l->c_p);
+ DML_ASSERT(*p->num_mcaches_c > 0);
+ }
+
+ // Sharing for iMALL access
+ l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes;
+ l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes;
+ l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
+ l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
+
+ if (p->imall_enable) {
+ *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
+
+ if (l->is_dual_plane)
+ *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
+ }
+
+ if (!p->surf_vert) // horizonatal access
+ l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
+ else // vertical access
+ l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
+
+ // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
+ if (*p->num_mcaches_l) {
+ l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
+ }
+ if (l->is_dual_plane) {
+ l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
+
+ /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */
+ if (l->mcache_remainder_l && l->mcache_remainder_c) {
+ if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
+ l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
+ (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
+ }
+ *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
+ DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
+ DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
+ DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
+ DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
+ DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
+ DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
+
+ if (l->is_dual_plane) {
+ DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
+ DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
+ DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
+ DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
+ DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
+ DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
+ DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
+ DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
+ DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
+ }
+#endif
+ // calculate split_coordinate
+ l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
+ l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
+
+ for (n = 0; n < *p->num_mcaches_l - 1; n++) {
+ p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
+ }
+ p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
+
+ if (l->is_dual_plane) {
+ for (n = 0; n < *p->num_mcaches_c - 1; n++) {
+ p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
+ }
+ p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
+ }
+#ifdef __DML_VBA_DEBUG__
+ for (n = 0; n < *p->num_mcaches_l; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
+
+ if (l->is_dual_plane) {
+ for (n = 0; n < *p->num_mcaches_c; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
+ }
+#endif
+
+ // Luma/Chroma combine in the last mcache
+ // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
+ if (*p->lc_comb_mcache && l->is_dual_plane) {
+ for (n = 0; n < *p->num_mcaches_l - 1; n++)
+ p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
+ p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
+
+ for (n = 0; n < *p->num_mcaches_c - 1; n++)
+ p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
+ p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
+
+#ifdef __DML_VBA_DEBUG__
+ for (n = 0; n < *p->num_mcaches_l; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
+
+ for (n = 0; n < *p->num_mcaches_c; n++)
+ DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
+#endif
+ }
+
+ *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
+ *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
+}
+
+static void calculate_mall_bw_overhead_factor(
+ double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
+ double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
+
+ // input
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes)
+{
+ for (unsigned int k = 0; k < num_active_planes; ++k) {
+ mall_prefetch_sdp_overhead_factor[k] = 1.0;
+ mall_prefetch_dram_overhead_factor[k] = 1.0;
+
+ // SDP - on the return side
+ if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
+ mall_prefetch_sdp_overhead_factor[k] = 1.25;
+ else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
+ mall_prefetch_sdp_overhead_factor[k] = 0.25;
+
+ // DRAM
+ if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
+ mall_prefetch_dram_overhead_factor[k] = 2.0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
+#endif
+ }
+}
+
+static double dml_get_return_bandwidth_available(
+ const struct dml2_soc_bb *soc,
+ enum dml2_core_internal_soc_state_type state_type,
+ enum dml2_core_internal_bw_type bw_type,
+ bool is_avg_bw,
+ bool is_hvm_en,
+ bool is_hvm_only,
+ double dcfclk_mhz,
+ double fclk_mhz,
+ double dram_bw_mbps)
+{
+ double return_bw_mbps = 0.;
+ double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz;
+ double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
+ double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
+
+ double derate_sdp_factor;
+ double derate_fabric_factor;
+ double derate_dram_factor;
+
+ double derate_sdp_bandwidth;
+ double derate_fabric_bandwidth;
+ double derate_dram_bandwidth;
+
+ if (is_avg_bw) {
+ if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
+ derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
+ } else { // just assume sys_active
+ derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
+ }
+ } else { // urgent bw
+ if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
+ derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
+
+ if (is_hvm_en) {
+ if (is_hvm_only)
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
+ else
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
+ } else {
+ derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
+ }
+ } else { // just assume sys_active
+ derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
+ derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
+
+ if (is_hvm_en) {
+ if (is_hvm_only)
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
+ else
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
+ } else {
+ derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
+ }
+ }
+ }
+
+ derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
+ derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
+ derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
+
+ if (bw_type == dml2_core_internal_bw_sdp)
+ return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
+ else // dml2_core_internal_bw_dram
+ return_bw_mbps = derate_dram_bandwidth;
+
+ DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
+ DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
+ DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
+ DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
+ DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
+ DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
+ DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
+ DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
+ DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
+ DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
+ DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
+ DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
+ return return_bw_mbps;
+}
+
+static noinline_for_stack void calculate_bandwidth_available(
+ double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
+ double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
+ double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
+
+ const struct dml2_soc_bb *soc,
+ bool HostVMEnable,
+ double dcfclk_mhz,
+ double fclk_mhz,
+ double dram_bw_mbps)
+{
+ unsigned int n, m;
+
+ DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
+
+ // Calculate all the bandwidth availabe
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) {
+ avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
+ m, // soc_state
+ n, // bw_type
+ 1, // avg_bw
+ HostVMEnable,
+ 0, // hvm_only
+ dcfclk_mhz,
+ fclk_mhz,
+ dram_bw_mbps);
+
+ urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
+
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
+#endif
+
+ // urg_bandwidth_available_vm_only is indexed by soc_state
+ if (n == dml2_core_internal_bw_dram) {
+ urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
+ urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
+ }
+ }
+
+ avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
+ urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
+#endif
+ }
+}
+
+static void calculate_avg_bandwidth_required(
+ double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+
+ // input
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes,
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double cursor_bw[],
+ double dcc_dram_bw_nom_overhead_factor_p0[],
+ double dcc_dram_bw_nom_overhead_factor_p1[],
+ double mall_prefetch_dram_overhead_factor[],
+ double mall_prefetch_sdp_overhead_factor[])
+{
+ unsigned int n, m, k;
+ double sdp_overhead_factor;
+ double dram_overhead_factor_p0;
+ double dram_overhead_factor_p1;
+
+ // Average BW support check
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
+ avg_bandwidth_required[m][n] = 0;
+ }
+ }
+
+ // SysActive and SVP Prefetch AVG bandwidth Check
+ for (k = 0; k < num_active_planes; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
+ DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
+ DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
+ DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
+#endif
+
+ sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
+ dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
+ dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
+
+ // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
+ // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
+ avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
+ }
+ avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
+ avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
+#endif
+ }
+}
+
+static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
+{
+ struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
+
+ s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->gpuvm_enable == true) {
+ p->vm_group_bytes[k] = 512;
+ p->dpte_group_bytes[k] = 512;
+ } else {
+ p->vm_group_bytes[k] = 0;
+ p->dpte_group_bytes[k] = 0;
+ }
+
+ if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
+ if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
+ s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
+ s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
+ s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
+ }
+
+ scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
+ scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
+ scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
+ scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
+ scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
+ scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
+ scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
+ scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
+ scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
+ scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
+ scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
+ scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
+ scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+ scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
+ scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
+ scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
+ scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
+ scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
+
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
+ scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
+ scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
+
+ scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
+
+ s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
+
+ p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].VTapsChroma,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightC,
+ p->myPipe[k].RotationAngle,
+ p->myPipe[k].mirrored,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthC[k],
+ p->myPipe[k].ViewportHeightC,
+ p->myPipe[k].ViewportXStartC,
+ p->myPipe[k].ViewportYStartC,
+
+ // Output
+ &p->VInitPreFillC[k],
+ &p->MaxNumSwathC[k]);
+ } else {
+ s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
+ s->PTEBufferSizeInRequestsForChroma[k] = 0;
+ s->PixelPTEBytesPerRowC[k] = 0;
+ s->PixelPTEBytesPerRowStorageC[k] = 0;
+ s->vm_bytes_c = 0;
+ p->MaxNumSwathC[k] = 0;
+ p->PrefetchSourceLinesC[k] = 0;
+ s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
+ s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
+ }
+
+ scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
+ scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
+ scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
+ scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
+ scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
+ scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
+ scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
+ scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
+ scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
+ scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
+ scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
+ scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
+ scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
+ scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+ scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
+ scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
+ scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
+ scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
+ scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
+ scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
+
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
+ scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
+ scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
+ scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
+ scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
+
+ scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
+ scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
+
+ s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
+
+ p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VTaps,
+ p->myPipe[k].InterlaceEnable,
+ p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
+ p->myPipe[k].SwathHeightY,
+ p->myPipe[k].RotationAngle,
+ p->myPipe[k].mirrored,
+ p->myPipe[k].ViewportStationary,
+ p->SwathWidthY[k],
+ p->myPipe[k].ViewportHeight,
+ p->myPipe[k].ViewportXStart,
+ p->myPipe[k].ViewportYStart,
+
+ // Output
+ &p->VInitPreFillY[k],
+ &p->MaxNumSwathY[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
+#endif
+ p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
+ p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
+ p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k];
+ p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
+#endif
+ if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
+ p->PTEBufferSizeNotExceeded[k] = true;
+ } else {
+ p->PTEBufferSizeNotExceeded[k] = false;
+ }
+
+ s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
+ s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
+#ifdef __DML_VBA_DEBUG__
+ if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
+ }
+#endif
+ }
+
+ CalculateMALLUseForStaticScreen(
+ p->display_cfg,
+ p->NumberOfActiveSurfaces,
+ p->MALLAllocatedForDCN,
+ p->SurfaceSizeInMALL,
+ s->one_row_per_frame_fits_in_buffer,
+ // Output
+ p->is_using_mall_for_ss);
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->gpuvm_enable) {
+ if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
+ p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
+ }
+ p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
+ dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
+ p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
+ } else {
+ p->PTE_BUFFER_MODE[k] = 0;
+ p->BIGK_FRAGMENT_SIZE[k] = 0;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->DCCMetaBufferSizeNotExceeded[k] = true;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
+#endif
+ p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
+ (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
+
+ p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
+
+ if (p->use_one_row_for_frame[k]) {
+ p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
+ p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
+ p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
+ p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
+ p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
+ }
+
+ if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
+ p->DCCMetaBufferSizeNotExceeded[k] = true;
+ } else {
+ p->DCCMetaBufferSizeNotExceeded[k] = false;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
+#endif
+ }
+
+ s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
+ s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
+ p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
+ p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k];
+ p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k];
+
+ // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
+ if (p->use_one_row_for_frame[k])
+ p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
+
+ CalculateRowBandwidth(
+ p->display_cfg->gpuvm_enable,
+ p->use_one_row_for_frame[k],
+ p->myPipe[k].SourcePixelFormat,
+ p->myPipe[k].VRatio,
+ p->myPipe[k].VRatioChroma,
+ p->myPipe[k].DCCEnable,
+ p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
+ s->PixelPTEBytesPerRowY[k],
+ s->PixelPTEBytesPerRowC[k],
+ p->dpte_row_height_luma[k],
+ p->dpte_row_height_chroma[k],
+
+ p->mrq_present,
+ p->meta_row_bytes_per_row_ub_l[k],
+ p->meta_row_bytes_per_row_ub_c[k],
+ p->meta_row_height_luma[k],
+ p->meta_row_height_chroma[k],
+
+ // Output
+ &p->dpte_row_bw[k],
+ &p->meta_row_bw[k]);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
+#endif
+ }
+}
+
+static double CalculateUrgentLatency(
+ double UrgentLatencyPixelDataOnly,
+ double UrgentLatencyPixelMixedWithVMData,
+ double UrgentLatencyVMDataOnly,
+ bool DoUrgentLatencyAdjustment,
+ double UrgentLatencyAdjustmentFabricClockComponent,
+ double UrgentLatencyAdjustmentFabricClockReference,
+ double FabricClock,
+ double uclk_freq_mhz,
+ enum dml2_qos_param_type qos_type,
+ unsigned int urgent_ramp_uclk_cycles,
+ unsigned int df_qos_response_time_fclk_cycles,
+ unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
+ unsigned int mall_overhead_fclk_cycles,
+ double umc_urgent_ramp_latency_margin,
+ double fabric_max_transport_latency_margin)
+{
+ double urgent_latency = 0;
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
+ + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
+ + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
+ } else {
+ urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
+ if (DoUrgentLatencyAdjustment == true) {
+ urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
+ } else {
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
+ }
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
+#endif
+ return urgent_latency;
+}
+
+static double CalculateTripToMemory(
+ double UrgLatency,
+ double FabricClock,
+ double uclk_freq_mhz,
+ enum dml2_qos_param_type qos_type,
+ unsigned int trip_to_memory_uclk_cycles,
+ unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
+ unsigned int mall_overhead_fclk_cycles,
+ double umc_max_latency_margin,
+ double fabric_max_transport_latency_margin)
+{
+ double trip_to_memory_us;
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
+ + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
+ + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
+ } else {
+ trip_to_memory_us = UrgLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock);
+ DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
+ DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
+ } else {
+ DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
+ }
+ DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
+#endif
+
+
+ return trip_to_memory_us;
+}
+
+static double CalculateMetaTripToMemory(
+ double UrgLatency,
+ double FabricClock,
+ double uclk_freq_mhz,
+ enum dml2_qos_param_type qos_type,
+ unsigned int meta_trip_to_memory_uclk_cycles,
+ unsigned int meta_trip_to_memory_fclk_cycles,
+ double umc_max_latency_margin,
+ double fabric_max_transport_latency_margin)
+{
+ double meta_trip_to_memory_us;
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
+ + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
+ } else {
+ meta_trip_to_memory_us = UrgLatency;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
+ } else {
+ DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
+ }
+ DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
+#endif
+
+
+ return meta_trip_to_memory_us;
+}
+
+static void calculate_cursor_req_attributes(
+ unsigned int cursor_width,
+ unsigned int cursor_bpp,
+
+ // output
+ unsigned int *cursor_lines_per_chunk,
+ unsigned int *cursor_bytes_per_line,
+ unsigned int *cursor_bytes_per_chunk,
+ unsigned int *cursor_bytes)
+{
+ unsigned int cursor_bytes_per_req = 0;
+ unsigned int cursor_width_bytes = 0;
+ unsigned int cursor_height = 0;
+
+ //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
+ //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
+ //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
+
+ //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
+
+ cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
+ if (cursor_width_bytes <= 64)
+ cursor_bytes_per_req = 64;
+ else if (cursor_width_bytes <= 128)
+ cursor_bytes_per_req = 128;
+ else
+ cursor_bytes_per_req = 256;
+
+ //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
+ *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
+
+ //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
+ if (cursor_bpp == 2) {
+ *cursor_lines_per_chunk = 16;
+ } else if (cursor_bpp == 32) {
+ if (cursor_width <= 32)
+ *cursor_lines_per_chunk = 16;
+ else if (cursor_width <= 64)
+ *cursor_lines_per_chunk = 8;
+ else if (cursor_width <= 128)
+ *cursor_lines_per_chunk = 4;
+ else
+ *cursor_lines_per_chunk = 2;
+ } else if (cursor_bpp == 64) {
+ if (cursor_width <= 16)
+ *cursor_lines_per_chunk = 16;
+ else if (cursor_width <= 32)
+ *cursor_lines_per_chunk = 8;
+ else if (cursor_width <= 64)
+ *cursor_lines_per_chunk = 4;
+ else if (cursor_width <= 128)
+ *cursor_lines_per_chunk = 2;
+ else
+ *cursor_lines_per_chunk = 1;
+ } else {
+ if (cursor_width > 0) {
+ DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
+ DML_ASSERT(0);
+ }
+ }
+
+ *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
+
+ // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
+ // Only cursor_width is provided for worst case sizing so assume that the cursor is square
+ cursor_height = cursor_width;
+ *cursor_bytes = *cursor_bytes_per_line * cursor_height;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
+ DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width);
+ DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
+ DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
+ DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1));
+#endif
+}
+
+static void calculate_cursor_urgent_burst_factor(
+ unsigned int CursorBufferSize,
+ unsigned int CursorWidth,
+ unsigned int cursor_bytes_per_chunk,
+ unsigned int cursor_lines_per_chunk,
+ double LineTime,
+ double UrgentLatency,
+
+ double *UrgentBurstFactorCursor,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ unsigned int LinesInCursorBuffer = 0;
+ double CursorBufferSizeInTime = 0;
+
+ if (CursorWidth > 0) {
+ LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
+
+ CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
+ if (CursorBufferSizeInTime - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorCursor = 1;
+ } else {
+ *NotEnoughUrgentLatencyHiding = 0;
+ *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
+ DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
+ DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
+ DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
+#endif
+
+ }
+}
+
+static void CalculateUrgentBurstFactor(
+ const struct dml2_plane_parameters *plane_cfg,
+ unsigned int swath_width_luma_ub,
+ unsigned int swath_width_chroma_ub,
+ unsigned int SwathHeightY,
+ unsigned int SwathHeightC,
+ double LineTime,
+ double UrgentLatency,
+ double VRatio,
+ double VRatioC,
+ double BytePerPixelInDETY,
+ double BytePerPixelInDETC,
+ unsigned int DETBufferSizeY,
+ unsigned int DETBufferSizeC,
+ // Output
+ double *UrgentBurstFactorLuma,
+ double *UrgentBurstFactorChroma,
+ bool *NotEnoughUrgentLatencyHiding)
+{
+ double LinesInDETLuma;
+ double LinesInDETChroma;
+ double DETBufferSizeInTimeLuma;
+ double DETBufferSizeInTimeChroma;
+
+ *NotEnoughUrgentLatencyHiding = 0;
+ *UrgentBurstFactorLuma = 0;
+ *UrgentBurstFactorChroma = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+ DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
+ DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
+#endif
+ DML_ASSERT(VRatio > 0);
+
+ LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
+
+ DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
+ if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorLuma = 1;
+ } else {
+ *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
+ }
+
+ if (BytePerPixelInDETC > 0) {
+ LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
+
+ DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
+ if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
+ *NotEnoughUrgentLatencyHiding = 1;
+ *UrgentBurstFactorChroma = 1;
+ } else {
+ *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
+ DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
+ DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
+ DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
+#endif
+}
+
+static void CalculateDCFCLKDeepSleepTdlut(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ double dispclk,
+ unsigned int tdlut_bytes_to_deliver[],
+ double prefetch_swath_time_us[],
+
+ // Output
+ double *DCFClkDeepSleep)
+{
+ double DisplayPipeLineDeliveryTimeLuma;
+ double DisplayPipeLineDeliveryTimeChroma;
+ double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
+ double ReadBandwidth = 0.0;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma = 0;
+ } else {
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (BytePerPixelC[k] > 0) {
+ DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
+ __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
+ } else {
+ DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
+ }
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
+
+ // adjust for 3dlut delivery time
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
+ double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
+
+ // increase the deepsleep dcfclk to match the original dispclk throughput rate
+ if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk);
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0);
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+#endif
+ }
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
+ }
+
+ *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
+ DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
+ DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
+#endif
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
+ }
+
+ DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
+}
+
+static noinline_for_stack void CalculateDCFCLKDeepSleep(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ // Output
+ double *DCFClkDeepSleep)
+{
+ double zero_double[DML2_MAX_PLANES];
+ unsigned int zero_integer[DML2_MAX_PLANES];
+
+ memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double));
+ memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int));
+
+ CalculateDCFCLKDeepSleepTdlut(
+ display_cfg,
+ NumberOfActiveSurfaces,
+ BytePerPixelY,
+ BytePerPixelC,
+ SwathWidthY,
+ SwathWidthC,
+ DPPPerSurface,
+ PSCL_THROUGHPUT,
+ PSCL_THROUGHPUT_CHROMA,
+ Dppclk,
+ ReadBandwidthLuma,
+ ReadBandwidthChroma,
+ ReturnBusWidth,
+ 0,
+ zero_integer, //tdlut_bytes_to_deliver,
+ zero_double, //prefetch_swath_time_us,
+
+ // Output
+ DCFClkDeepSleep);
+}
+
+static double CalculateWriteBackDelay(
+ enum dml2_source_format_class WritebackPixelFormat,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackDestinationWidth,
+ unsigned int WritebackDestinationHeight,
+ unsigned int WritebackSourceHeight,
+ unsigned int HTotal)
+{
+ double CalculateWriteBackDelay;
+ double Line_length;
+ double Output_lines_last_notclamped;
+ double WritebackVInit;
+
+ WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
+ Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
+ Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
+ if (Output_lines_last_notclamped < 0) {
+ CalculateWriteBackDelay = 0;
+ } else {
+ CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
+ }
+ return CalculateWriteBackDelay;
+}
+
+static unsigned int CalculateMaxVStartup(
+ bool ptoi_supported,
+ unsigned int vblank_nom_default_us,
+ const struct dml2_timing_cfg *timing,
+ double write_back_delay_us)
+{
+ unsigned int vblank_size = 0;
+ unsigned int max_vstartup_lines = 0;
+
+ double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
+ unsigned int vblank_actual = timing->v_total - timing->v_active;
+ unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
+ unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom;
+
+ vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
+
+ if (timing->interlaced && !ptoi_supported)
+ max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0));
+ else
+ max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom);
+ DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
+ DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us);
+ DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
+ DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
+ DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
+#endif
+ max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START);
+ return max_vstartup_lines;
+}
+
+static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
+{
+ unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 };
+ unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 };
+ unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 };
+ unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 };
+ unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 };
+ unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 };
+
+ unsigned int TotalActiveDPP = 0;
+ bool NoChromaOrLinear = true;
+ unsigned int SurfaceDoingUnboundedRequest = 0;
+ unsigned int DETBufferSizeInKByteForSwathCalculation;
+
+ const long TTUFIFODEPTH = 8;
+ const long MAXIMUMCOMPRESSION = 4;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
+ }
+#endif
+ CalculateSwathWidth(
+ p->display_cfg,
+ p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ p->ODMMode,
+ p->BytePerPixY,
+ p->BytePerPixC,
+ p->Read256BytesBlockHeightY,
+ p->Read256BytesBlockHeightC,
+ p->Read256BytesBlockWidthY,
+ p->Read256BytesBlockWidthC,
+ p->surf_linear128_l,
+ p->surf_linear128_c,
+ p->DPPPerSurface,
+
+ // Output
+ p->req_per_swath_ub_l,
+ p->req_per_swath_ub_c,
+ SwathWidthSingleDPP,
+ SwathWidthSingleDPPChroma,
+ p->SwathWidth,
+ p->SwathWidthChroma,
+ MaximumSwathHeightY,
+ MaximumSwathHeightC,
+ p->swath_width_luma_ub,
+ p->swath_width_chroma_ub);
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
+ p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
+#endif
+ if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
+ p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
+ p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
+ if (p->DPPPerSurface[k] > 0)
+ SurfaceDoingUnboundedRequest = k;
+ if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
+ || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
+ NoChromaOrLinear = false;
+ }
+ }
+
+ *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
+
+ CalculateDETBufferSize(
+ &scratch->CalculateDETBufferSize_locals,
+ p->display_cfg,
+ p->ForceSingleDPP,
+ p->NumberOfActiveSurfaces,
+ *p->UnboundedRequestEnabled,
+ p->nomDETInKByte,
+ p->MaxTotalDETInKByte,
+ p->ConfigReturnBufferSizeInKByte,
+ p->MinCompressedBufferSizeInKByte,
+ p->ConfigReturnBufferSegmentSizeInkByte,
+ p->CompressedBufferSegmentSizeInkByte,
+ p->ReadBandwidthLuma,
+ p->ReadBandwidthChroma,
+ p->full_swath_bytes_l,
+ p->full_swath_bytes_c,
+ p->DPPPerSurface,
+
+ // Output
+ p->DETBufferSizeInKByte, // per hubp pipe
+ p->CompressedBufferSizeInkByte);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
+ DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
+ DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
+#endif
+
+ *p->ViewportSizeSupport = true;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+
+ DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+#endif
+ if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
+
+ if (p->surf_linear128_l[k])
+ p->request_size_bytes_luma[k] = 128;
+ else
+ p->request_size_bytes_luma[k] = 256;
+
+ if (p->surf_linear128_c[k])
+ p->request_size_bytes_chroma[k] = 128;
+ else
+ p->request_size_bytes_chroma[k] = 256;
+
+ } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
+ p->request_size_bytes_luma[k] = 256;
+ p->request_size_bytes_chroma[k] = 256;
+
+ } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k];
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
+ p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+ p->request_size_bytes_chroma[k] = 256;
+
+ } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k];
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
+ p->request_size_bytes_luma[k] = 256;
+ p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+
+ } else {
+ p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
+ p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
+ RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
+ RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
+ p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+ p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+ }
+
+ if (p->SwathHeightC[k] == 0)
+ p->request_size_bytes_chroma[k] = 0;
+
+ if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
+ p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
+ *p->ViewportSizeSupport = false;
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
+ p->ViewportSizeSupportPerSurface[k] = false;
+ } else {
+ p->ViewportSizeSupportPerSurface[k] = true;
+ }
+
+ if (p->SwathHeightC[k] == 0) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
+ p->DETBufferSizeC[k] = 0;
+ } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
+ } else {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
+#endif
+ p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
+ p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
+#endif
+
+ }
+
+ *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
+ if (*p->UnboundedRequestEnabled) {
+ *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
+ (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
+ DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
+#endif
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
+#endif
+
+ *p->hw_debug5 = false;
+#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
+ if (p->NumberOfActiveSurfaces > 1)
+ *p->hw_debug5 = true;
+#else
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
+ && p->display_cfg->plane_descriptors[k].surface.dcc.enable
+ && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1)
+ + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
+ *p->hw_debug5 = true;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
+ DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
+ DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
+ DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
+#endif
+ }
+#endif
+}
+
+static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
+ double MaxDispclk,
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ enum dml2_output_format_class OutFormat,
+ bool UseDSC,
+ unsigned int NumberOfDSCSlices,
+ double SurfaceRequiredDISPCLKWithoutODMCombine,
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne)
+{
+ enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock;
+ enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive;
+ enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive;
+ enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass;
+
+ MinimumRequiredODMModeForMaxDispClock =
+ (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass :
+ (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 :
+ (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
+ if (ODMMode < MinimumRequiredODMModeForMaxDispClock)
+ ODMMode = MinimumRequiredODMModeForMaxDispClock;
+
+ if (UseDSC) {
+ MinimumRequiredODMModeForMaxDSCHActive =
+ (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass :
+ (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 :
+ (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
+ if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive)
+ ODMMode = MinimumRequiredODMModeForMaxDSCHActive;
+ }
+
+ if (OutFormat == dml2_420) {
+ MinimumRequiredODMModeForMax420HActive =
+ (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass :
+ (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 :
+ (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
+ if (ODMMode < MinimumRequiredODMModeForMax420HActive)
+ ODMMode = MinimumRequiredODMModeForMax420HActive;
+ }
+
+ if (UseDSC) {
+ if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4)
+ ODMMode = dml2_odm_mode_combine_2to1;
+ if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8)
+ ODMMode = dml2_odm_mode_combine_3to1;
+ if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12)
+ ODMMode = dml2_odm_mode_combine_4to1;
+ }
+
+ return ODMMode;
+}
+
+static void CalculateODMConstraints(
+ enum dml2_odm_mode ODMUse,
+ double SurfaceRequiredDISPCLKWithoutODMCombine,
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne,
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ /* Output */
+ double *DISPCLKRequired,
+ unsigned int *NumberOfDPPRequired,
+ unsigned int *MaxHActiveForDSC,
+ unsigned int *MaxDSCSlices,
+ unsigned int *MaxHActiveFor420)
+{
+ switch (ODMUse) {
+ case dml2_odm_mode_combine_2to1:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ *NumberOfDPPRequired = 2;
+ break;
+ case dml2_odm_mode_combine_3to1:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
+ *NumberOfDPPRequired = 3;
+ break;
+ case dml2_odm_mode_combine_4to1:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ *NumberOfDPPRequired = 4;
+ break;
+ case dml2_odm_mode_auto:
+ case dml2_odm_mode_split_1to2:
+ case dml2_odm_mode_mso_1to2:
+ case dml2_odm_mode_mso_1to4:
+ case dml2_odm_mode_bypass:
+ default:
+ *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine;
+ *NumberOfDPPRequired = 1;
+ break;
+ }
+ *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit;
+ *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC;
+ *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH;
+}
+
+static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
+ double MaxDispclk,
+ unsigned int HActive,
+ enum dml2_output_format_class OutFormat,
+ bool UseDSC,
+ unsigned int NumberOfDSCSlices,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double DISPCLKRequired,
+ unsigned int NumberOfDPPRequired,
+ unsigned int MaxHActiveForDSC,
+ unsigned int MaxDSCSlices,
+ unsigned int MaxHActiveFor420)
+{
+ bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true;
+ bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1);
+ unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1;
+ unsigned int h_timing_div_mode =
+ (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 :
+ (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle;
+
+ if (DISPCLKRequired > MaxDispclk)
+ return false;
+ if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP)
+ return false;
+ if (are_odm_segments_symmetrical) {
+ if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle))
+ return false;
+ }
+ if (HActive % h_timing_div_mode)
+ /*
+ * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and
+ * OTG_H_SYNC_A_START/END all need to be visible by h timing div
+ * mode. This logic only checks H active.
+ */
+ return false;
+
+ if (UseDSC) {
+ if (HActive > MaxHActiveForDSC)
+ return false;
+ if (NumberOfDSCSlices > MaxDSCSlices)
+ return false;
+ if (HActive % NumberOfDSCSlices)
+ return false;
+ if (NumberOfDSCSlices % NumberOfDPPRequired)
+ return false;
+ if (is_max_dsc_slice_required) {
+ if (NumberOfDSCSlices != MaxDSCSlices)
+ return false;
+ }
+ }
+
+ if (OutFormat == dml2_420) {
+ if (HActive > MaxHActiveFor420)
+ return false;
+ }
+
+ return true;
+}
+
+static noinline_for_stack void CalculateODMMode(
+ unsigned int MaximumPixelsPerLinePerDSCUnit,
+ unsigned int HActive,
+ enum dml2_output_format_class OutFormat,
+ enum dml2_output_encoder_class Output,
+ enum dml2_odm_mode ODMUse,
+ double MaxDispclk,
+ bool DSCEnable,
+ unsigned int TotalNumberOfActiveDPP,
+ unsigned int MaxNumDPP,
+ double PixelClock,
+ unsigned int NumberOfDSCSlices,
+
+ // Output
+ bool *TotalAvailablePipesSupport,
+ unsigned int *NumberOfDPP,
+ enum dml2_odm_mode *ODMMode,
+ double *RequiredDISPCLKPerSurface)
+{
+ double SurfaceRequiredDISPCLKWithoutODMCombine;
+ double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
+ double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
+ double DISPCLKRequired;
+ unsigned int NumberOfDPPRequired;
+ unsigned int MaxHActiveForDSC;
+ unsigned int MaxDSCSlices;
+ unsigned int MaxHActiveFor420;
+ bool success;
+ bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0);
+ enum dml2_odm_mode DecidedODMMode;
+ bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi);
+
+ SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420);
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420);
+ SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420);
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse);
+ DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output);
+ DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
+ DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
+ DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
+ DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
+#endif
+ if (ODMUse == dml2_odm_mode_auto)
+ DecidedODMMode = DecideODMMode(HActive,
+ MaxDispclk,
+ MaximumPixelsPerLinePerDSCUnit,
+ OutFormat,
+ UseDSC,
+ NumberOfDSCSlices,
+ SurfaceRequiredDISPCLKWithoutODMCombine,
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne);
+ else
+ DecidedODMMode = ODMUse;
+ CalculateODMConstraints(DecidedODMMode,
+ SurfaceRequiredDISPCLKWithoutODMCombine,
+ SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
+ SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
+ SurfaceRequiredDISPCLKWithODMCombineFourToOne,
+ MaximumPixelsPerLinePerDSCUnit,
+ &DISPCLKRequired,
+ &NumberOfDPPRequired,
+ &MaxHActiveForDSC,
+ &MaxDSCSlices,
+ &MaxHActiveFor420);
+ success = ValidateODMMode(DecidedODMMode,
+ MaxDispclk,
+ HActive,
+ OutFormat,
+ UseDSC,
+ NumberOfDSCSlices,
+ TotalNumberOfActiveDPP,
+ MaxNumDPP,
+ DISPCLKRequired,
+ NumberOfDPPRequired,
+ MaxHActiveForDSC,
+ MaxDSCSlices,
+ MaxHActiveFor420);
+
+ *ODMMode = DecidedODMMode;
+ *TotalAvailablePipesSupport = success;
+ *NumberOfDPP = NumberOfDPPRequired;
+ *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
+ DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
+ DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
+ DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
+#endif
+}
+
+static noinline_for_stack void CalculateOutputLink(
+ struct dml2_core_internal_scratch *s,
+ double PHYCLK,
+ double PHYCLKD18,
+ double PHYCLKD32,
+ double Downspreading,
+ enum dml2_output_encoder_class Output,
+ enum dml2_output_format_class OutputFormat,
+ unsigned int HTotal,
+ unsigned int HActive,
+ double PixelClockBackEnd,
+ double ForcedOutputLinkBPP,
+ unsigned int DSCInputBitPerComponent,
+ unsigned int NumberOfDSCSlices,
+ double AudioSampleRate,
+ unsigned int AudioSampleLayout,
+ enum dml2_odm_mode ODMModeNoDSC,
+ enum dml2_odm_mode ODMModeDSC,
+ enum dml2_dsc_enable_option DSCEnable,
+ unsigned int OutputLinkDPLanes,
+ enum dml2_output_link_dp_rate OutputLinkDPRate,
+
+ // Output
+ bool *RequiresDSC,
+ bool *RequiresFEC,
+ double *OutBpp,
+ enum dml2_core_internal_output_type *OutputType,
+ enum dml2_core_internal_output_type_rate *OutputRate,
+ unsigned int *RequiredSlots)
+{
+ bool LinkDSCEnable;
+ unsigned int dummy;
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = 0;
+
+ *OutputType = dml2_core_internal_output_type_unknown;
+ *OutputRate = dml2_core_internal_output_rate_unknown;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
+ DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
+ DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+ DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
+ DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+ DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
+ DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
+ DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
+ DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output);
+ DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
+#endif
+ {
+ if (Output == dml2_hdmi) {
+ *RequiresDSC = false;
+ *RequiresFEC = false;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = "HDMI";
+ *OutputType = dml2_core_internal_output_type_hdmi;
+ } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
+ if (DSCEnable == dml2_dsc_enable) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp || Output == dml2_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ if (Output == dml2_dp2p0) {
+ *RequiresFEC = true;
+ } else {
+ *RequiresFEC = false;
+ }
+ }
+ if (Output == dml2_dp2p0) {
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR10";
+ *OutputType = dml2_core_internal_output_type_dp2p0;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR13p5";
+ *OutputType = dml2_core_internal_output_type_dp2p0;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " UHBR20";
+ *OutputType = dml2_core_internal_output_type_dp2p0;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
+ }
+ } else { // output is dp or edp
+ *OutBpp = 0;
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR";
+ *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR2";
+ *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
+ }
+ if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ if (Output == dml2_dp) {
+ *RequiresFEC = true;
+ }
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
+ OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
+ }
+ //OutputTypeAndRate = Output & " HBR3";
+ *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
+ *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
+ }
+ }
+ } else if (Output == dml2_hdmifrl) {
+ if (DSCEnable == dml2_dsc_enable) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *RequiresFEC = true;
+ } else {
+ *RequiresDSC = false;
+ LinkDSCEnable = false;
+ *RequiresFEC = false;
+ }
+ *OutBpp = 0;
+ if (PHYCLKD18 >= 3000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "3x3";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "6x3";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "6x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ //OutputTypeAndRate = Output & "8x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *RequiresFEC = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ }
+ //OutputTypeAndRate = Output & "10x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
+ }
+ if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
+ *RequiresDSC = true;
+ LinkDSCEnable = true;
+ *RequiresFEC = true;
+ *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
+ }
+ //OutputTypeAndRate = Output & "12x4";
+ *OutputType = dml2_core_internal_output_type_hdmifrl;
+ *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
+ }
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
+ DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
+ DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
+#endif
+}
+
+static double CalculateWriteBackDISPCLK(
+ enum dml2_source_format_class WritebackPixelFormat,
+ double PixelClock,
+ double WritebackHRatio,
+ double WritebackVRatio,
+ unsigned int WritebackHTaps,
+ unsigned int WritebackVTaps,
+ unsigned int WritebackSourceWidth,
+ unsigned int WritebackDestinationWidth,
+ unsigned int HTotal,
+ unsigned int WritebackLineBufferSize)
+{
+ double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
+
+ DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
+ DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
+ DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
+ return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
+}
+
+static double RequiredDTBCLK(
+ bool DSCEnable,
+ double PixelClock,
+ enum dml2_output_format_class OutputFormat,
+ double OutputBpp,
+ unsigned int DSCSlices,
+ unsigned int HTotal,
+ unsigned int HActive,
+ unsigned int AudioRate,
+ unsigned int AudioLayout)
+{
+ if (DSCEnable != true) {
+ return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
+ } else {
+ double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
+ double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
+ double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
+ double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
+ double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
+ return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
+ }
+}
+
+static unsigned int DSCDelayRequirement(
+ bool DSCEnabled,
+ enum dml2_odm_mode ODMMode,
+ unsigned int DSCInputBitPerComponent,
+ double OutputBpp,
+ unsigned int HActive,
+ unsigned int HTotal,
+ unsigned int NumberOfDSCSlices,
+ enum dml2_output_format_class OutputFormat,
+ enum dml2_output_encoder_class Output,
+ double PixelClock,
+ double PixelClockBackEnd)
+{
+ unsigned int DSCDelayRequirement_val = 0;
+ unsigned int NumberOfDSCSlicesFactor = 1;
+
+ if (DSCEnabled == true && OutputBpp != 0) {
+
+ if (ODMMode == dml2_odm_mode_combine_4to1)
+ NumberOfDSCSlicesFactor = 4;
+ else if (ODMMode == dml2_odm_mode_combine_3to1)
+ NumberOfDSCSlicesFactor = 3;
+ else if (ODMMode == dml2_odm_mode_combine_2to1)
+ NumberOfDSCSlicesFactor = 2;
+
+ DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
+ (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
+
+ DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
+ DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
+
+ } else {
+ DSCDelayRequirement_val = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
+ DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode);
+ DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
+ DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive);
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+ DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
+ DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
+ DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
+ DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
+ DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
+#endif
+
+ return DSCDelayRequirement_val;
+}
+
+static void CalculateSurfaceSizeInMall(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int MALLAllocatedForDCN,
+ unsigned int BytesPerPixelY[],
+ unsigned int BytesPerPixelC[],
+ unsigned int Read256BytesBlockWidthY[],
+ unsigned int Read256BytesBlockWidthC[],
+ unsigned int Read256BytesBlockHeightY[],
+ unsigned int Read256BytesBlockHeightC[],
+ unsigned int ReadBlockWidthY[],
+ unsigned int ReadBlockWidthC[],
+ unsigned int ReadBlockHeightY[],
+ unsigned int ReadBlockHeightC[],
+
+ // Output
+ unsigned int SurfaceSizeInMALL[],
+ bool *ExceededMALLSize)
+{
+ unsigned int TotalSurfaceSizeInMALLForSS = 0;
+ unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
+ unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
+ const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
+
+ if (composition->viewport.stationary) {
+ SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
+ math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
+ math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
+ math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
+ math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
+ math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
+
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
+ math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
+ math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
+ math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
+ math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
+ math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
+ math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
+ }
+ } else {
+ SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
+ math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
+ if (ReadBlockWidthC[k] > 0) {
+ SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
+ math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
+ math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
+ }
+ }
+ }
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ /* SS and Subvp counted separate as they are never used at the same time */
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
+ TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
+ else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
+ TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
+ }
+
+ *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
+ (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
+ DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
+ DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
+ DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
+#endif
+}
+
+static void calculate_tdlut_setting(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_calculate_tdlut_setting_params *p)
+{
+ // locals
+ unsigned int tdlut_bpe = 8;
+ unsigned int tdlut_width;
+ unsigned int tdlut_pitch_bytes;
+ unsigned int tdlut_footprint_bytes;
+ unsigned int vmpg_bytes;
+ unsigned int tdlut_vmpg_per_frame;
+ unsigned int tdlut_pte_req_per_frame;
+ unsigned int tdlut_bytes_per_line;
+ double tdlut_drain_rate;
+ unsigned int tdlut_mpc_width;
+ unsigned int tdlut_bytes_per_group_simple;
+
+ if (!p->setup_for_tdlut) {
+ *p->tdlut_groups_per_2row_ub = 0;
+ *p->tdlut_opt_time = 0;
+ *p->tdlut_drain_time = 0;
+ *p->tdlut_bytes_to_deliver = 0;
+ *p->tdlut_bytes_per_group = 0;
+ *p->tdlut_pte_bytes_per_frame = 0;
+ *p->tdlut_bytes_per_frame = 0;
+ return;
+ }
+
+ if (p->tdlut_mpc_width_flag) {
+ tdlut_mpc_width = 33;
+ tdlut_bytes_per_group_simple = 39*256;
+ } else {
+ tdlut_mpc_width = 17;
+ tdlut_bytes_per_group_simple = 10*256;
+ }
+
+ vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
+
+ if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
+ if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
+ tdlut_width = 4916;
+ else
+ tdlut_width = 35940;
+ } else {
+ if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
+ tdlut_width = 17;
+ else // dml2_tdlut_width_33_cube
+ tdlut_width = 33;
+ }
+
+ if (p->is_gfx11)
+ tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
+ else
+ tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
+
+ if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
+ tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
+ else
+ tdlut_footprint_bytes = tdlut_pitch_bytes;
+
+ if (!p->gpuvm_enable) {
+ tdlut_vmpg_per_frame = 0;
+ tdlut_pte_req_per_frame = 0;
+ } else {
+ tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
+ tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
+ }
+ tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
+ *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
+
+ if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
+ //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
+ *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
+ *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
+ //the delivery cycles is DispClk cycles per line * number of lines * number of slices
+ //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
+ tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
+ } else {
+ //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
+ *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
+ *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
+ //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
+ tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
+ }
+
+ //the tdlut is fetched during the 2 row times of prefetch.
+ if (p->setup_for_tdlut) {
+ *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
+ if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024)
+ *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
+ else
+ *p->tdlut_opt_time = 0;
+ *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
+ *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
+ DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
+ DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
+
+ DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
+ DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
+ DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
+ DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
+ DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
+ DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1));
+ DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
+ DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
+ DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
+ DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
+#endif
+}
+
+static void CalculateTarb(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int PixelChunkSizeInKByte,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ unsigned int tdlut_bytes_per_group[],
+ double HostVMInefficiencyFactor,
+ double HostVMInefficiencyFactorPrefetch,
+ unsigned int HostVMMinPageSize,
+ double ReturnBW,
+ unsigned int MetaChunkSize,
+
+ // output
+ double *Tarb,
+ double *Tarb_prefetch)
+{
+ double extra_bytes = 0;
+ double extra_bytes_prefetch = 0;
+ double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
+
+ if (display_cfg->plane_descriptors[k].surface.dcc.enable)
+ extra_bytes = extra_bytes + (MetaChunkSize * 1024);
+
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
+ extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
+ }
+
+ extra_bytes_prefetch = extra_bytes;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (display_cfg->gpuvm_enable == true) {
+ extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
+ extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
+ }
+ }
+ *Tarb = extra_bytes / ReturnBW;
+ *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
+ DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
+ DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
+#endif
+}
+
+static double CalculateTWait(
+ long reserved_vblank_time_ns,
+ double UrgentLatency,
+ double Ttrip,
+ double g6_temp_read_blackout_us)
+{
+ double TWait;
+ double t_urg_trip = math_max2(UrgentLatency, Ttrip);
+ TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns);
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip);
+ DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait);
+#endif
+ return TWait;
+}
+
+
+static void CalculateVUpdateAndDynamicMetadataParameters(
+ unsigned int MaxInterDCNTileRepeaters,
+ double Dppclk,
+ double Dispclk,
+ double DCFClkDeepSleep,
+ double PixelClock,
+ unsigned int HTotal,
+ unsigned int VBlank,
+ unsigned int DynamicMetadataTransmittedBytes,
+ unsigned int DynamicMetadataLinesBeforeActiveRequired,
+ unsigned int InterlaceEnable,
+ bool ProgressiveToInterlaceUnitInOPP,
+
+ // Output
+ double *TSetup,
+ double *Tdmbf,
+ double *Tdmec,
+ double *Tdmsks,
+ unsigned int *VUpdateOffsetPix,
+ unsigned int *VUpdateWidthPix,
+ unsigned int *VReadyOffsetPix)
+{
+ double TotalRepeaterDelayTime;
+ TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
+ *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
+ *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
+ *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
+ *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
+ *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
+ *Tdmec = HTotal / PixelClock;
+
+ if (DynamicMetadataLinesBeforeActiveRequired == 0) {
+ *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
+ } else {
+ *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
+ }
+ if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
+ *Tdmsks = *Tdmsks / 2;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
+ DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank);
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal);
+ DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock);
+ DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk);
+ DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
+ DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
+ DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
+
+ DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
+ DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
+ DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
+
+ DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
+#endif
+}
+
+static double get_urgent_bandwidth_required(
+ struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
+ const struct dml2_display_cfg *display_cfg,
+ enum dml2_core_internal_soc_state_type state_type,
+ enum dml2_core_internal_bw_type bw_type,
+ bool inc_flip_bw, // including flip bw
+ bool use_qual_row_bw,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ double dcc_dram_bw_nom_overhead_factor_p0[],
+ double dcc_dram_bw_nom_overhead_factor_p1[],
+ double dcc_dram_bw_pref_overhead_factor_p0[],
+ double dcc_dram_bw_pref_overhead_factor_p1[],
+ double mall_prefetch_sdp_overhead_factor[],
+ double mall_prefetch_dram_overhead_factor[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ double PrefetchBandwidthLuma[],
+ double PrefetchBandwidthChroma[],
+ double PrefetchBandwidthMax[],
+ double excess_vactive_fill_bw_l[],
+ double excess_vactive_fill_bw_c[],
+ double cursor_bw[],
+ double dpte_row_bw[],
+ double meta_row_bw[],
+ double prefetch_cursor_bw[],
+ double prefetch_vmrow_bw[],
+ double flip_bw[],
+ double UrgentBurstFactorLuma[],
+ double UrgentBurstFactorChroma[],
+ double UrgentBurstFactorCursor[],
+ double UrgentBurstFactorLumaPre[],
+ double UrgentBurstFactorChromaPre[],
+ double UrgentBurstFactorCursorPre[],
+ /* outputs */
+ double surface_required_bw[],
+ double surface_peak_required_bw[])
+{
+ // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS
+ // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation
+
+ memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
+ l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
+ l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
+ l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
+ l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
+
+ l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
+ l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
+ l->adj_factor_cur = UrgentBurstFactorCursor[k];
+ l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
+ l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
+ l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
+
+ bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
+ bool exclude_this_plane = false;
+
+ // Exclude phantom pipe in bw calculation for non svp prefetch state
+ if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
+ exclude_this_plane = true;
+
+ // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
+ // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
+ if (use_qual_row_bw) {
+ if (display_cfg->hostvm_enable)
+ l->per_plane_flip_bw[k] = 0; // qual_row_bw
+ else if (!display_cfg->plane_descriptors[k].immediate_flip)
+ l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
+ } else {
+ // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM)
+ if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw)
+ l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
+ else
+ l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
+ }
+
+ if (!exclude_this_plane) {
+ l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
+ l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
+ l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
+ l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
+ l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
+ surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max);
+
+ /* export peak required bandwidth for the surface */
+ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
+#endif
+ } else {
+ surface_required_bw[k] = 0.0;
+ }
+
+ l->required_bandwidth_mbps += surface_required_bw[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
+ DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
+ DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
+ DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
+ DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
+ DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
+#endif
+ }
+
+ return l->required_bandwidth_mbps;
+}
+
+static void CalculateExtraLatency(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int ROBBufferSizeInKByte,
+ unsigned int RoundTripPingLatencyCycles,
+ unsigned int ReorderingBytes,
+ double DCFCLK,
+ double FabricClock,
+ unsigned int PixelChunkSizeInKByte,
+ double ReturnBW,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int NumberOfDPP[],
+ unsigned int dpte_group_bytes[],
+ unsigned int tdlut_bytes_per_group[],
+ double HostVMInefficiencyFactor,
+ double HostVMInefficiencyFactorPrefetch,
+ unsigned int HostVMMinPageSize,
+ enum dml2_qos_param_type qos_type,
+ bool max_outstanding_when_urgent_expected,
+ unsigned int max_outstanding_requests,
+ unsigned int request_size_bytes_luma[],
+ unsigned int request_size_bytes_chroma[],
+ unsigned int MetaChunkSize,
+ unsigned int dchub_arb_to_ret_delay,
+ double Ttrip,
+ unsigned int hostvm_mode,
+
+ // output
+ double *ExtraLatency, // Tex
+ double *ExtraLatency_sr, // Tex_sr
+ double *ExtraLatencyPrefetch)
+
+{
+ double Tarb;
+ double Tarb_prefetch;
+ double Tex_trips;
+ unsigned int max_request_size_bytes = 0;
+
+ CalculateTarb(
+ display_cfg,
+ PixelChunkSizeInKByte,
+ NumberOfActiveSurfaces,
+ NumberOfDPP,
+ dpte_group_bytes,
+ tdlut_bytes_per_group,
+ HostVMInefficiencyFactor,
+ HostVMInefficiencyFactorPrefetch,
+ HostVMMinPageSize,
+ ReturnBW,
+ MetaChunkSize,
+ // output
+ &Tarb,
+ &Tarb_prefetch);
+
+ Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ if (request_size_bytes_luma[k] > max_request_size_bytes)
+ max_request_size_bytes = request_size_bytes_luma[k];
+ if (request_size_bytes_chroma[k] > max_request_size_bytes)
+ max_request_size_bytes = request_size_bytes_chroma[k];
+ }
+
+ if (qos_type == dml2_qos_param_type_dcn4x) {
+ *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
+ *ExtraLatency = *ExtraLatency_sr;
+ if (max_outstanding_when_urgent_expected)
+ *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
+ } else {
+ *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
+ *ExtraLatency = *ExtraLatency_sr;
+ }
+ *ExtraLatency = *ExtraLatency + Tex_trips;
+ *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
+ *ExtraLatency = *ExtraLatency + Tarb;
+ *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type);
+ DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
+ DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips);
+ DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected);
+ DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock);
+ DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
+ DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
+ DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
+ DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
+ DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
+#endif
+}
+
+static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
+{
+ struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
+ bool dcc_mrq_enable;
+
+ unsigned int vm_bytes;
+ unsigned int extra_tdpe_bytes;
+ unsigned int tdlut_row_bytes;
+ unsigned int Lo;
+
+ s->NoTimeToPrefetch = false;
+ s->DPPCycles = 0;
+ s->DISPCLKCycles = 0;
+ s->DSTTotalPixelsAfterScaler = 0.0;
+ s->LineTime = 0.0;
+ s->dst_y_prefetch_equ = 0.0;
+ s->prefetch_bw_oto = 0.0;
+ s->Tvm_oto = 0.0;
+ s->Tr0_oto = 0.0;
+ s->Tvm_oto_lines = 0.0;
+ s->Tr0_oto_lines = 0.0;
+ s->dst_y_prefetch_oto = 0.0;
+ s->TimeForFetchingVM = 0.0;
+ s->TimeForFetchingRowInVBlank = 0.0;
+ s->LinesToRequestPrefetchPixelData = 0.0;
+ s->HostVMDynamicLevelsTrips = 0;
+ s->trip_to_mem = 0.0;
+ *p->Tvm_trips = 0.0;
+ *p->Tr0_trips = 0.0;
+ s->Tvm_trips_rounded = 0.0;
+ s->Tr0_trips_rounded = 0.0;
+ s->max_Tsw = 0.0;
+ s->Lsw_oto = 0.0;
+ *p->Tpre_rounded = 0.0;
+ s->prefetch_bw_equ = 0.0;
+ s->Tvm_equ = 0.0;
+ s->Tr0_equ = 0.0;
+ s->Tdmbf = 0.0;
+ s->Tdmec = 0.0;
+ s->Tdmsks = 0.0;
+ *p->prefetch_sw_bytes = 0.0;
+ s->prefetch_bw_pr = 0.0;
+ s->bytes_pp = 0.0;
+ s->dep_bytes = 0.0;
+ s->min_Lsw_oto = 0.0;
+ s->min_Lsw_equ = 0.0;
+ s->Tsw_est1 = 0.0;
+ s->Tsw_est2 = 0.0;
+ s->Tsw_est3 = 0.0;
+ s->cursor_prefetch_bytes = 0;
+ *p->prefetch_cursor_bw = 0;
+ *p->RequiredPrefetchBWMax = 0.0;
+
+ dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
+
+ s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip)
+
+ if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
+ s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
+ } else {
+ s->HostVMDynamicLevelsTrips = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
+ DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
+ DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
+ DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
+ DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
+ DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup);
+ DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
+ DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
+ DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
+ DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+ DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
+#endif
+ CalculateVUpdateAndDynamicMetadataParameters(
+ p->MaxInterDCNTileRepeaters,
+ p->myPipe->Dppclk,
+ p->myPipe->Dispclk,
+ p->myPipe->DCFClkDeepSleep,
+ p->myPipe->PixelClock,
+ p->myPipe->HTotal,
+ p->myPipe->VBlank,
+ p->DynamicMetadataTransmittedBytes,
+ p->DynamicMetadataLinesBeforeActiveRequired,
+ p->myPipe->InterlaceEnable,
+ p->myPipe->ProgressiveToInterlaceUnitInOPP,
+ p->TSetup,
+
+ // Output
+ &s->Tdmbf,
+ &s->Tdmec,
+ &s->Tdmsks,
+ p->VUpdateOffsetPix,
+ p->VUpdateWidthPix,
+ p->VReadyOffsetPix);
+
+ s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
+ s->trip_to_mem = p->Ttrip;
+ *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg);
+ if (dcc_mrq_enable)
+ *p->Tvm_trips_flip = *p->Tvm_trips;
+ else
+ *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
+
+ *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
+ *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
+
+ if (p->DynamicMetadataVMEnabled == true) {
+ *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
+ *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
+ } else {
+ *p->Tdmdl_vm = 0;
+ *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex
+ }
+
+ if (p->DynamicMetadataEnable == true) {
+ if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
+ *p->NotEnoughTimeForDynamicMetadata = true;
+ DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
+ DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+ } else {
+ *p->NotEnoughTimeForDynamicMetadata = false;
+ }
+
+ if (p->myPipe->ScalerEnabled)
+ s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
+ else
+ s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
+
+ s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
+
+ s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
+
+ if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
+ return true;
+
+ *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
+ *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
+ ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
+ ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
+ DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
+ DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
+ DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
+ DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
+ DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
+ DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
+ DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
+ DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
+ DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
+
+ DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
+ DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
+ DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
+ DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
+#endif
+
+ if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
+ *p->DSTYAfterScaler = 1;
+ else
+ *p->DSTYAfterScaler = 0;
+
+ s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
+ *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
+ *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
+ DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
+#endif
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
+ DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
+ DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
+ DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
+#endif
+ if (p->display_cfg->gpuvm_enable) {
+ s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ } else {
+ if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut)
+ s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0);
+ else
+ s->Tvm_trips_rounded = s->LineTime / 4.0;
+ *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
+ }
+
+ s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
+ *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
+
+ if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
+ s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
+ } else {
+ s->Tr0_trips_rounded = s->LineTime / 4.0;
+ *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
+ }
+ s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
+ *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
+ *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
+ } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
+ *p->Tno_bw = p->ExtraLatencyPrefetch;
+ } else {
+ *p->Tno_bw = 0;
+ }
+ } else {
+ *p->Tno_bw = 0;
+ }
+
+ if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3)
+ *p->Tno_bw_flip = *p->Tno_bw;
+ else
+ *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip
+
+ if (dml_is_420(p->myPipe->SourcePixelFormat)) {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
+ } else {
+ s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
+ }
+
+ *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
+ *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
+
+ vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
+ extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
+
+ if (p->setup_for_tdlut)
+ vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
+
+ tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
+
+ s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
+ s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
+ s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
+
+ // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto
+ // Note: in prefetch calculation, acounting is done mostly per-pipe.
+ // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time
+ s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface;
+
+ // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1)
+ s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime;
+
+ if (p->myPipe->BytePerPixelC > 0) {
+ s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface;
+ s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
+ }
+
+ /* oto prefetch bw should be always be less than total vactive bw */
+ //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
+
+ s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
+
+ s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
+
+ s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0;
+
+ s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
+ p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
+ (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
+
+ /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch.
+ * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule
+ * and the required bandwidth increases when going from ms to mp
+ */
+ *p->RequiredPrefetchBWMax = s->prefetch_bw_oto;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
+ DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
+#endif
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ s->Tvm_oto = math_max3(
+ *p->Tvm_trips,
+ *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
+ s->LineTime / 4.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
+#endif
+ } else {
+ s->Tvm_oto = s->Tvm_trips_rounded;
+ }
+
+ if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
+ s->Tr0_oto = math_max3(
+ *p->Tr0_trips,
+ (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
+ s->LineTime / 4.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
+#endif
+ } else
+ s->Tr0_oto = s->LineTime / 4.0;
+
+ s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
+ s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
+ s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
+ if (p->impacted_dst_y_pre > 0) {
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+ s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
+ }
+#endif
+ *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
+
+ //To (time for delay after scaler) in line time
+ Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
+
+ s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
+ s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
+ s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
+ //Tpre_equ in line time
+ if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
+ s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
+ else
+ s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ);
+
+ s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+ if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ)
+ s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ;
+#endif
+
+ s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
+ DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
+ DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
+ DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
+ DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
+ DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
+ DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
+ DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
+ DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
+ DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
+ DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
+ DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
+#endif
+ s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
+ *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
+ DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup);
+ DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
+ DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
+ DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
+ DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
+ DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
+ DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
+ DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait);
+ DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
+ DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
+ DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
+ DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
+ DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
+ DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
+ DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
+ DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
+ DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
+#endif
+
+ *p->dst_y_per_vm_vblank = 0;
+ *p->dst_y_per_row_vblank = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+
+ // Derive bandwidth by finding how much data to move within the time constraint
+ // Tpre_rounded is Tpre rounding to 2-bit fraction
+ // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
+ // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
+ // So that means prefetch bw calculated can be higher since the total time available for prefetch is less
+ bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
+ bool tpre_gt_req_latency = true;
+#if 0
+ // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained.
+ // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages.
+ // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary.
+ tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch);
+#endif
+
+ if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) {
+ s->prefetch_bw1 = 0.;
+ s->prefetch_bw2 = 0.;
+ s->prefetch_bw3 = 0.;
+ s->prefetch_bw4 = 0.;
+
+ // prefetch_bw1: VM + 2*R0 + SW
+ if (*p->Tpre_rounded - *p->Tno_bw > 0) {
+ s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
+ + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
+ + *p->prefetch_sw_bytes)
+ / (*p->Tpre_rounded - *p->Tno_bw);
+ s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1;
+ } else
+ s->prefetch_bw1 = 0;
+
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
+ if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
+ s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
+ (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
+ DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
+ DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
+#endif
+ }
+
+ // prefetch_bw2: VM + SW
+ if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
+ s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) /
+ (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
+ s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2;
+ } else
+ s->prefetch_bw2 = 0;
+
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
+ if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
+ s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
+ }
+
+ // prefetch_bw3: 2*R0 + SW
+ if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) {
+ s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) /
+ (*p->Tpre_rounded - s->Tvm_trips_rounded);
+ s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3;
+ } else
+ s->prefetch_bw3 = 0;
+
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
+ if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
+ s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
+ }
+
+ // prefetch_bw4: SW
+ if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
+ s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
+ else
+ s->prefetch_bw4 = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
+ DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime)));
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
+ DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
+ DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
+ DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
+#endif
+ {
+ bool Case1OK = false;
+ bool Case2OK = false;
+ bool Case3OK = false;
+
+ // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement
+ // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive
+ // vs the latency based number
+
+ // prefetch_bw1: VM + 2*R0 + SW
+ // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data)
+ // here is to make sure equ bw wont be more agressive than the latency-based requirement.
+ // check vm time >= vm_trips
+ // check r0 time >= r0_trips
+
+ double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
+
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
+
+ if (s->prefetch_bw1 > 0) {
+ double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
+ double row_transfer_time = total_row_bytes / s->prefetch_bw1;
+ DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time);
+ DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
+ if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
+ Case1OK = true;
+ }
+ }
+
+ // prefetch_bw2: VM + SW
+ // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw)
+ // check vm time >= vm_trips
+ // check r0 time < r0_trips
+ if (s->prefetch_bw2 > 0) {
+ double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
+ double row_transfer_time = total_row_bytes / s->prefetch_bw2;
+ DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time);
+ DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
+ if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
+ Case2OK = true;
+ }
+ }
+
+ // prefetch_bw3: VM + 2*R0
+ // check vm time < vm_trips
+ // check r0 time >= r0_trips
+ if (s->prefetch_bw3 > 0) {
+ double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
+ double row_transfer_time = total_row_bytes / s->prefetch_bw3;
+ DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time);
+ DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
+ if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
+ Case3OK = true;
+ }
+ }
+
+ if (Case1OK) {
+ s->prefetch_bw_equ = s->prefetch_bw1;
+ } else if (Case2OK) {
+ s->prefetch_bw_equ = s->prefetch_bw2;
+ } else if (Case3OK) {
+ s->prefetch_bw_equ = s->prefetch_bw3;
+ } else {
+ s->prefetch_bw_equ = s->prefetch_bw4;
+ }
+
+ s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
+ p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
+ (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK);
+ DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK);
+ DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK);
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
+#endif
+
+ if (s->prefetch_bw_equ > 0) {
+ if (p->display_cfg->gpuvm_enable == true) {
+ s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
+ } else {
+ s->Tvm_equ = s->LineTime / 4;
+ }
+
+ if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
+ s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
+ *p->Tr0_trips,
+ s->LineTime / 4);
+ } else {
+ s->Tr0_equ = s->LineTime / 4;
+ }
+ } else {
+ s->Tvm_equ = 0;
+ s->Tr0_equ = 0;
+ DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
+ DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
+#endif
+ // Use the more stressful prefetch schedule
+ if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
+ *p->dst_y_prefetch = s->dst_y_prefetch_oto;
+ s->TimeForFetchingVM = s->Tvm_oto;
+ s->TimeForFetchingRowInVBlank = s->Tr0_oto;
+
+ *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
+ *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__);
+#endif
+
+ } else {
+ *p->dst_y_prefetch = s->dst_y_prefetch_equ;
+
+ if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted)
+ *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted;
+
+ s->TimeForFetchingVM = s->Tvm_equ;
+ s->TimeForFetchingRowInVBlank = s->Tr0_equ;
+
+ *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
+ *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
+
+ /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming.
+ * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data
+ * bandwidth may end up higher than what was calculated in mode support.
+ */
+ *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
+#endif
+ }
+
+ // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
+ s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
+
+ s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
+ *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
+ *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
+ DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
+ DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
+
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
+ DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
+ DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
+ DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
+#endif
+ DML_ASSERT(*p->dst_y_prefetch < 64);
+
+ unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
+ if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
+ *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
+ DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
+#endif
+ if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
+ *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
+ (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
+ } else {
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
+ *p->VRatioPrefetchY = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
+#endif
+ }
+
+ *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
+ *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
+ DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
+#endif
+ if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
+ if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
+ *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
+ } else {
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
+ *p->VRatioPrefetchC = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
+ DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
+ DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
+#endif
+ }
+
+ *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
+ *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
+ DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
+#endif
+ } else {
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+ *p->RequiredPrefetchPixelDataBWChroma = 0;
+ }
+ DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
+ DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
+ DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
+ DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
+ DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
+ DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
+ DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
+ DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
+
+ } else {
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
+ __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
+ s->NoTimeToPrefetch = true;
+ s->TimeForFetchingVM = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->dst_y_per_vm_vblank = 0;
+ *p->dst_y_per_row_vblank = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+ *p->RequiredPrefetchPixelDataBWChroma = 0;
+ }
+
+ {
+ double prefetch_vm_bw;
+ double prefetch_row_bw;
+
+ if (vm_bytes == 0) {
+ prefetch_vm_bw = 0;
+ } else if (*p->dst_y_per_vm_vblank > 0) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime);
+#endif
+ prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
+#endif
+ } else {
+ prefetch_vm_bw = 0;
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
+ }
+
+ if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
+ prefetch_row_bw = 0;
+ } else if (*p->dst_y_per_row_vblank > 0) {
+ prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
+ DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
+#endif
+ } else {
+ prefetch_row_bw = 0;
+ s->NoTimeToPrefetch = true;
+ DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
+ }
+
+ *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
+ }
+
+ if (s->NoTimeToPrefetch) {
+ s->TimeForFetchingVM = 0;
+ s->TimeForFetchingRowInVBlank = 0;
+ *p->dst_y_per_vm_vblank = 0;
+ *p->dst_y_per_row_vblank = 0;
+ *p->dst_y_prefetch = 0;
+ s->LinesToRequestPrefetchPixelData = 0;
+ *p->VRatioPrefetchY = 0;
+ *p->VRatioPrefetchC = 0;
+ *p->RequiredPrefetchPixelDataBWLuma = 0;
+ *p->RequiredPrefetchPixelDataBWChroma = 0;
+ *p->prefetch_vmrow_bw = 0;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
+ DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
+ DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
+ DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
+
+ return s->NoTimeToPrefetch;
+}
+
+static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines,
+ unsigned int line_buffer_size_bits,
+ unsigned int num_pipes,
+ unsigned int vp_width,
+ unsigned int vp_height,
+ double h_ratio,
+ enum dml2_rotation_angle rotation_angle)
+{
+ unsigned int num_lb_source_lines = 0;
+ double lb_bit_per_pixel = 57.0;
+ unsigned recin_width = vp_width/num_pipes;
+
+ if (dml_is_vertical_rotation(rotation_angle))
+ recin_width = vp_height/num_pipes;
+
+ num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines,
+ math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0));
+
+ return num_lb_source_lines;
+}
+
+static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[])
+{
+ int max_value = -1;
+ int max_idx = -1;
+ for (unsigned int i = 0; i < num_planes; i++) {
+ if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) {
+ max_value = Trpd_dcfclk_cycles[i];
+ max_idx = i;
+ }
+ }
+ if (max_idx <= 0) {
+ DML_ASSERT(max_idx >= 0);
+ max_idx = this_plane_idx;
+ }
+
+ return max_idx;
+}
+
+static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps)
+{
+ double sum = 0.;
+ for (unsigned int i = 0; i < num_planes; i++) {
+ if (i != exclude_plane_idx) {
+ sum += prefetch_swath_bytes[i];
+ }
+ }
+ return sum / bw_mbps;
+}
+
+// a global check against the aggregate effect of the per plane prefetch schedule
+static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
+{
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
+ unsigned int i, k;
+
+ memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals));
+
+ *p->recalc_prefetch_schedule = 0;
+ s->prefetch_global_check_passed = 1;
+ // worst case if the rob and cdb is fully hogged
+ s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
+ DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
+ DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
+ DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
+ DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
+#endif
+
+ // calculate the return impact from each plane, request is 256B per dcfclk
+ for (i = 0; i < p->num_active_planes; i++) {
+ s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i];
+ s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i];
+ s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i];
+ s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i];
+
+ if (p->pixel_format[i] == dml2_420_10) {
+ s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5);
+ s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5);
+ s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5);
+ s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5);
+ }
+
+ s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l);
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
+ DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
+#endif
+
+ if (s->src_swath_bytes_c[i] > 0) { // dual_plane
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c);
+
+ if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) {
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
+ DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
+#endif
+ }
+
+ s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate
+ s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
+ DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
+ DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
+#endif
+ // clamping to worst case delay which is one which occupy the full rob+cdb
+ if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
+ s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles;
+ }
+
+ // Figure out the impacted prefetch time for each plane
+ // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw
+ for (i = 0; i < p->num_active_planes; i++) {
+ k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i
+ // the rest of planes (except for k) complete for bw
+ p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz;
+ p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps);
+ p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
+#endif
+ }
+
+ if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) {
+ for (i = 0; i < p->num_active_planes; i++) {
+ if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) {
+ s->prefetch_global_check_passed = 0;
+ *p->recalc_prefetch_schedule = 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
+ DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
+#endif
+ }
+ } else {
+ // likely a mode programming calls, assume support, and no recalc - not used anyways
+ s->prefetch_global_check_passed = 1;
+ *p->recalc_prefetch_schedule = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
+ DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
+#endif
+
+ return s->prefetch_global_check_passed;
+}
+
+static void calculate_peak_bandwidth_required(
+ struct dml2_core_internal_scratch *s,
+ struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
+{
+ unsigned int n;
+ unsigned int m;
+
+ struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
+
+ memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
+#endif
+
+ for (unsigned int k = 0; k < p->num_active_planes; ++k) {
+ l->unity_array[k] = 1.0;
+ l->zero_array[k] = 0.0;
+ }
+
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) {
+ get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ 0, //inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ l->zero_array, //PrefetchBandwidthLuma,
+ l->zero_array, //PrefetchBandwidthChroma,
+ l->zero_array, //PrefetchBWMax
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ l->zero_array, //prefetch_cursor_bw,
+ l->zero_array, //prefetch_vmrow_bw,
+ l->zero_array, //flip_bw,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ l->zero_array,
+ p->surface_avg_vactive_required_bw[m][n],
+ p->surface_peak_required_bw[m][n]);
+
+ p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ 0, //inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ l->zero_array, //PrefetchBandwidthLuma,
+ l->zero_array, //PrefetchBandwidthChroma,
+ l->zero_array, //PrefetchBWMax
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ l->zero_array, //prefetch_cursor_bw,
+ l->zero_array, //prefetch_vmrow_bw,
+ l->zero_array, //flip_bw,
+ p->urgent_burst_factor_l,
+ p->urgent_burst_factor_c,
+ p->urgent_burst_factor_cursor,
+ p->urgent_burst_factor_prefetch_l,
+ p->urgent_burst_factor_prefetch_c,
+ p->urgent_burst_factor_prefetch_cursor,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+ p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ p->inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ p->prefetch_bandwidth_l,
+ p->prefetch_bandwidth_c,
+ p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ p->prefetch_cursor_bw,
+ p->prefetch_vmrow_bw,
+ p->flip_bw,
+ p->urgent_burst_factor_l,
+ p->urgent_burst_factor_c,
+ p->urgent_burst_factor_cursor,
+ p->urgent_burst_factor_prefetch_l,
+ p->urgent_burst_factor_prefetch_c,
+ p->urgent_burst_factor_prefetch_cursor,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+ p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ 0, //inc_flip_bw
+ 1, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ p->prefetch_bandwidth_l,
+ p->prefetch_bandwidth_c,
+ p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ p->prefetch_cursor_bw,
+ p->prefetch_vmrow_bw,
+ p->flip_bw,
+ p->urgent_burst_factor_l,
+ p->urgent_burst_factor_c,
+ p->urgent_burst_factor_cursor,
+ p->urgent_burst_factor_prefetch_l,
+ p->urgent_burst_factor_prefetch_c,
+ p->urgent_burst_factor_prefetch_cursor,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+ p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
+ &s->get_urgent_bandwidth_required_locals,
+ p->display_cfg,
+ m,
+ n,
+ p->inc_flip_bw,
+ 0, //use_qual_row_bw
+ p->num_active_planes,
+ p->num_of_dpp,
+ p->dcc_dram_bw_nom_overhead_factor_p0,
+ p->dcc_dram_bw_nom_overhead_factor_p1,
+ p->dcc_dram_bw_pref_overhead_factor_p0,
+ p->dcc_dram_bw_pref_overhead_factor_p1,
+ p->mall_prefetch_sdp_overhead_factor,
+ p->mall_prefetch_dram_overhead_factor,
+ p->surface_read_bandwidth_l,
+ p->surface_read_bandwidth_c,
+ p->prefetch_bandwidth_l,
+ p->prefetch_bandwidth_c,
+ p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw
+ p->excess_vactive_fill_bw_l,
+ p->excess_vactive_fill_bw_c,
+ p->cursor_bw,
+ p->dpte_row_bw,
+ p->meta_row_bw,
+ p->prefetch_cursor_bw,
+ p->prefetch_vmrow_bw,
+ p->flip_bw,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->unity_array,
+ l->surface_dummy_bw,
+ p->surface_peak_required_bw[m][n]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
+ DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
+#endif
+ DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
+ }
+ }
+}
+
+static void check_urgent_bandwidth_support(
+ double *frac_urg_bandwidth_nom,
+ double *frac_urg_bandwidth_mall,
+ bool *vactive_bandwidth_support_ok, // vactive ok
+ bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok
+
+ unsigned int mall_allocated_for_dcn_mbytes,
+ double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
+{
+ double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
+ double frac_urg_bandwidth_mall_sdp;
+ double frac_urg_bandwidth_mall_dram;
+ if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0)
+ frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
+ else
+ frac_urg_bandwidth_mall_sdp = 0.0;
+ if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0)
+ frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
+ else
+ frac_urg_bandwidth_mall_dram = 0.0;
+
+ *bandwidth_support_ok = 1;
+ *vactive_bandwidth_support_ok = 1;
+
+ // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
+ // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
+ // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
+ // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
+
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
+
+ if (mall_allocated_for_dcn_mbytes > 0) {
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
+ *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
+ }
+
+ *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
+ *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
+
+ *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
+
+ if (mall_allocated_for_dcn_mbytes > 0)
+ *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
+
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
+ if (mall_allocated_for_dcn_mbytes > 0) {
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
+ *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
+
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
+ DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
+
+ for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
+ DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
+ __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
+ urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
+ }
+ }
+#endif
+}
+
+static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
+ double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
+{
+ double flip_bw_available_mbps;
+ double flip_bw_available_sdp_mbps;
+ double flip_bw_available_dram_mbps;
+
+ flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
+ flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
+ flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
+ DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
+ DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
+ DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
+#endif
+
+ return flip_bw_available_mbps;
+}
+
+static void calculate_immediate_flip_bandwidth_support(
+ // Output
+ double *frac_urg_bandwidth_flip,
+ bool *flip_bandwidth_support_ok,
+
+ // Input
+ enum dml2_core_internal_soc_state_type eval_state,
+ double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
+{
+ double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
+ double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
+
+ *flip_bandwidth_support_ok = true;
+ for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
+ *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
+ DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
+ DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
+#endif
+ DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
+ }
+
+ *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
+ *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
+ DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
+ DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
+
+ for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
+ DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
+ __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
+ urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
+ }
+ }
+#endif
+}
+
+static void CalculateFlipSchedule(
+ struct dml2_core_internal_scratch *s,
+ bool iflip_enable,
+ bool use_lb_flip_bw,
+ double HostVMInefficiencyFactor,
+ double Tvm_trips_flip,
+ double Tr0_trips_flip,
+ double Tvm_trips_flip_rounded,
+ double Tr0_trips_flip_rounded,
+ bool GPUVMEnable,
+ double vm_bytes, // vm_bytes
+ double DPTEBytesPerRow, // dpte_row_bytes
+ double BandwidthAvailableForImmediateFlip,
+ unsigned int TotImmediateFlipBytes,
+ enum dml2_source_format_class SourcePixelFormat,
+ double LineTime,
+ double VRatio,
+ double VRatioChroma,
+ double Tno_bw_flip,
+ unsigned int dpte_row_height,
+ unsigned int dpte_row_height_chroma,
+ bool use_one_row_for_frame_flip,
+ unsigned int max_flip_time_us,
+ unsigned int max_flip_time_lines,
+ unsigned int per_pipe_flip_bytes,
+ unsigned int meta_row_bytes,
+ unsigned int meta_row_height,
+ unsigned int meta_row_height_chroma,
+ bool dcc_mrq_enable,
+
+ // Output
+ double *dst_y_per_vm_flip,
+ double *dst_y_per_row_flip,
+ double *final_flip_bw,
+ bool *ImmediateFlipSupportedForPipe)
+{
+ struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
+
+ l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
+ l->dpte_row_bytes = DPTEBytesPerRow;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
+ DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
+ DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
+ DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+ DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
+ DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime);
+ DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
+ DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
+ DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
+ DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
+ DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio);
+#endif
+
+ if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
+ if (l->dual_plane) {
+ if (dcc_mrq_enable & GPUVMEnable) {
+ l->min_row_height = math_min2(dpte_row_height, meta_row_height);
+ l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
+ } else if (GPUVMEnable) {
+ l->min_row_height = dpte_row_height;
+ l->min_row_height_chroma = dpte_row_height_chroma;
+ } else {
+ l->min_row_height = meta_row_height;
+ l->min_row_height_chroma = meta_row_height_chroma;
+ }
+ l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
+ } else {
+ if (dcc_mrq_enable & GPUVMEnable)
+ l->min_row_height = math_min2(dpte_row_height, meta_row_height);
+ else if (GPUVMEnable)
+ l->min_row_height = dpte_row_height;
+ else
+ l->min_row_height = meta_row_height;
+
+ l->min_row_time = l->min_row_height * LineTime / VRatio;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
+#endif
+ DML_ASSERT(l->min_row_time > 0);
+
+ if (use_lb_flip_bw) {
+ // For mode check, calculation the flip bw requirement with worst case flip time
+ l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio),
+ math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
+
+ //The lower bound on flip bandwidth
+ // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
+ l->lb_flip_bw = 0;
+
+ if (iflip_enable) {
+ l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
+ l->num_rows = 2;
+ l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
+ l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
+ l->lb_flip_bw = math_max3(
+ l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
+ l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
+ l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
+ DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
+ DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
+
+ if (l->lb_flip_bw > 0) {
+ DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
+ DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
+ DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
+ DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
+ }
+#endif
+ l->lb_flip_bw = math_max3(l->lb_flip_bw,
+ l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
+ (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
+ DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
+#endif
+ }
+
+ *final_flip_bw = l->lb_flip_bw;
+
+ *dst_y_per_vm_flip = 1; // not used
+ *dst_y_per_row_flip = 1; // not used
+ *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded);
+ } else {
+ if (iflip_enable) {
+ l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
+ DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
+ DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
+ DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes);
+#endif
+ if (l->ImmediateFlipBW == 0) {
+ l->Tvm_flip = 0;
+ l->Tr0_flip = 0;
+ } else {
+ l->Tvm_flip = math_max3(Tvm_trips_flip,
+ Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
+ LineTime / 4.0);
+
+ l->Tr0_flip = math_max3(Tr0_trips_flip,
+ (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
+ LineTime / 4.0);
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
+
+ DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
+#endif
+ *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
+ *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
+
+ *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
+ (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
+
+ if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
+ *ImmediateFlipSupportedForPipe = false;
+ } else {
+ *ImmediateFlipSupportedForPipe = iflip_enable;
+ }
+ } else {
+ l->Tvm_flip = 0;
+ l->Tr0_flip = 0;
+ *dst_y_per_vm_flip = 0;
+ *dst_y_per_row_flip = 0;
+ *final_flip_bw = 0;
+ *ImmediateFlipSupportedForPipe = iflip_enable;
+ }
+ }
+ } else {
+ l->Tvm_flip = 0;
+ l->Tr0_flip = 0;
+ *dst_y_per_vm_flip = 0;
+ *dst_y_per_row_flip = 0;
+ *final_flip_bw = 0;
+ *ImmediateFlipSupportedForPipe = iflip_enable;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ if (!use_lb_flip_bw) {
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
+ DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
+ DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
+ DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
+ }
+ DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
+ DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
+#endif
+}
+
+static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
+ struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
+{
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
+
+ enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
+ double reserved_vblank_time_us;
+ bool FoundCriticalSurface = false;
+
+ s->TotalActiveWriteback = 0;
+ p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+#endif
+
+ p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
+ p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
+ p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
+ if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) {
+ p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
+ }
+ p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
+ DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
+ DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
+ DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
+ DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
+ DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
+ DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
+#endif
+
+ s->TotalActiveWriteback = 0;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
+ }
+ }
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ }
+ if (p->USRRetrainingRequired)
+ p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (s->TotalActiveWriteback <= 1) {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
+ } else {
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
+ p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
+ }
+
+ if (p->USRRetrainingRequired)
+ p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+ if (p->USRRetrainingRequired)
+ p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
+ DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
+#endif
+
+ s->TotalPixelBW = 0.0;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
+ double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
+ * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
+ }
+
+ *p->global_fclk_change_supported = true;
+ *p->global_dram_clock_change_supported = true;
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
+ double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
+ double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
+ double LBBitPerPixel = 57;
+
+ s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
+ s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel);
+ DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
+#endif
+
+ s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
+ s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
+
+ s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
+ if (p->UnboundedRequestEnabled) {
+ s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
+ }
+
+ s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
+ s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
+
+ s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
+
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
+ }
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
+ s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
+ s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
+ s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
+ if (p->NumberOfActiveSurfaces > 1) {
+ s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
+ }
+ s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
+ } else {
+ s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
+ }
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
+ s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
+ s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
+ s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us;
+
+ if (p->VActiveLatencyHidingMargin)
+ p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
+
+ if (p->VActiveLatencyHidingUs)
+ p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
+
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0
+ / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0);
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
+ s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
+ }
+ s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
+
+ s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
+
+ s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
+ s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
+ }
+ p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
+
+ uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
+ reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
+
+ p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported;
+ if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
+ p->FCLKChangeSupport[k] = dml2_pstate_change_vactive;
+ else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
+ p->FCLKChangeSupport[k] = dml2_pstate_change_vblank;
+
+ if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported)
+ *p->global_fclk_change_supported = false;
+
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported;
+ if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
+ if (p->display_cfg->overrides.all_streams_blanked ||
+ (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive;
+ else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
+ else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
+ } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp;
+ else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame;
+
+ if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported)
+ *p->global_dram_clock_change_supported = false;
+
+ s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
+ s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
+ s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
+ s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
+#endif
+ p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
+
+ if (p->BytePerPixelDETC[k] > 0) {
+ s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
+ s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
+ s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
+
+ if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
+ p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
+ else
+ p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
+ DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
+#endif
+ }
+ }
+
+ *p->g6_temp_read_support = true;
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) &&
+ (s->g6_temp_read_latency_margin[k] < 0)) {
+ *p->g6_temp_read_support = false;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
+ || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
+ FoundCriticalSurface = true;
+ *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
+ }
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
+ DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
+ DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
+ DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
+#endif
+}
+
+static void calculate_bytes_to_fetch_required_to_hide_latency(
+ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p)
+{
+ unsigned int dst_lines_to_hide;
+ unsigned int src_lines_to_hide_l;
+ unsigned int src_lines_to_hide_c;
+ unsigned int plane_index;
+ unsigned int stream_index;
+
+ for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) {
+ if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index]))
+ continue;
+
+ stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index;
+
+ dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us /
+ ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total /
+ (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0));
+
+ src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide,
+ p->swath_height_l[plane_index]);
+ p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index];
+
+ src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide,
+ p->swath_height_c[plane_index]);
+ p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index];
+
+ if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) {
+ p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index];
+ if (p->meta_row_height_c[plane_index]) {
+ p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index];
+ }
+ }
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index];
+ if (p->dpte_row_height_c[plane_index]) {
+ p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index];
+ }
+ }
+ }
+}
+
+static noinline_for_stack void calculate_vactive_det_fill_latency(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes,
+ unsigned int bytes_required_l[],
+ unsigned int bytes_required_c[],
+ double dcc_dram_bw_nom_overhead_factor_p0[],
+ double dcc_dram_bw_nom_overhead_factor_p1[],
+ double surface_read_bw_l[],
+ double surface_read_bw_c[],
+ double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
+ double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
+ /* output */
+ double vactive_det_fill_delay_us[])
+{
+ double effective_excess_bandwidth;
+ double effective_excess_bandwidth_l;
+ double effective_excess_bandwidth_c;
+ double adj_factor;
+ unsigned int plane_index;
+ unsigned int soc_state;
+ unsigned int bw_type;
+
+ for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
+ continue;
+
+ vactive_det_fill_delay_us[plane_index] = 0.0;
+ for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) {
+ for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) {
+ effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]);
+
+ /* luma */
+ adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0;
+
+ effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
+ if (effective_excess_bandwidth_l > 0.0) {
+ vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l);
+ }
+
+ /* chroma */
+ adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0;
+
+ effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
+ if (effective_excess_bandwidth_c > 0.0) {
+ vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c);
+ }
+ }
+ }
+ }
+}
+
+static void calculate_excess_vactive_bandwidth_required(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int num_active_planes,
+ unsigned int bytes_required_l[],
+ unsigned int bytes_required_c[],
+ /* outputs */
+ double excess_vactive_fill_bw_l[],
+ double excess_vactive_fill_bw_c[])
+{
+ unsigned int plane_index;
+
+ for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
+ continue;
+
+ excess_vactive_fill_bw_l[plane_index] = 0.0;
+ excess_vactive_fill_bw_c[plane_index] = 0.0;
+
+ if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us > 0) {
+ excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
+ excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
+ }
+ }
+}
+
+static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config)
+{
+ double bw_mbps = 0;
+ bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
+
+ return bw_mbps;
+}
+
+static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
+{
+ double uclk_mhz = 0;
+
+ uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
+
+ return uclk_mhz;
+}
+
+static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
+{
+ unsigned int i;
+ unsigned int index = 0;
+
+ for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
+ DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
+
+ if (i == 0)
+ index = 0;
+ else
+ index = i - 1;
+
+ if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
+ per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
+ break;
+ }
+ }
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+ DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index);
+ return index;
+}
+
+static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
+{
+ unsigned int i;
+ bool clk_entry_found = false;
+
+ for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
+ DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
+
+ if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
+ clk_entry_found = true;
+ break;
+ }
+ }
+
+ if (!clk_entry_found)
+ DML_ASSERT(clk_entry_found);
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+ DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i);
+#endif
+ return i;
+}
+
+static unsigned int get_pipe_flip_bytes(
+ double hostvm_inefficiency_factor,
+ unsigned int vm_bytes,
+ unsigned int dpte_row_bytes,
+ unsigned int meta_row_bytes)
+{
+ unsigned int flip_bytes = 0;
+
+ flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes);
+ flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor);
+
+ return flip_bytes;
+}
+
+static void calculate_hostvm_inefficiency_factor(
+ double *HostVMInefficiencyFactor,
+ double *HostVMInefficiencyFactorPrefetch,
+
+ bool gpuvm_enable,
+ bool hostvm_enable,
+ unsigned int remote_iommu_outstanding_translations,
+ unsigned int max_outstanding_reqs,
+ double urg_bandwidth_avail_active_pixel_and_vm,
+ double urg_bandwidth_avail_active_vm_only)
+{
+ *HostVMInefficiencyFactor = 1;
+ *HostVMInefficiencyFactorPrefetch = 1;
+
+ if (gpuvm_enable && hostvm_enable) {
+ *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
+ *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
+
+ if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
+ *HostVMInefficiencyFactorPrefetch = 4;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
+ DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
+ DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
+#endif
+ }
+}
+
+struct dml2_core_internal_g6_temp_read_blackouts_table {
+ struct {
+ unsigned int uclk_khz;
+ unsigned int blackout_us;
+ } entries[DML_MAX_CLK_TABLE_SIZE];
+};
+
+struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = {
+ .entries = {
+ {
+ .uclk_khz = 96000,
+ .blackout_us = 23,
+ },
+ {
+ .uclk_khz = 435000,
+ .blackout_us = 10,
+ },
+ {
+ .uclk_khz = 521000,
+ .blackout_us = 10,
+ },
+ {
+ .uclk_khz = 731000,
+ .blackout_us = 8,
+ },
+ {
+ .uclk_khz = 822000,
+ .blackout_us = 8,
+ },
+ {
+ .uclk_khz = 962000,
+ .blackout_us = 5,
+ },
+ {
+ .uclk_khz = 1069000,
+ .blackout_us = 5,
+ },
+ {
+ .uclk_khz = 1187000,
+ .blackout_us = 5,
+ },
+ },
+};
+
+static double get_g6_temp_read_blackout_us(
+ struct dml2_soc_bb *soc,
+ unsigned int uclk_freq_khz,
+ unsigned int min_clk_index)
+{
+ unsigned int i;
+ unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
+
+ if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) {
+ /* overrides are present in the SoC BB */
+ return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index];
+ }
+
+ /* use internal table */
+ blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
+
+ for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
+ if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz ||
+ core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) {
+ break;
+ }
+
+ blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us;
+ }
+
+ return (double)blackout_us;
+}
+
+static double get_max_urgent_latency_us(
+ struct dml2_dcn4x_soc_qos_params *dcn4x,
+ double uclk_freq_mhz,
+ double FabricClock,
+ unsigned int min_clk_index)
+{
+ double latency;
+ latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz
+ * (1 + dcn4x->umc_max_latency_margin / 100.0)
+ + dcn4x->mall_overhead_fclk_cycles / FabricClock
+ + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock
+ * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0);
+ return latency;
+}
+
+static void calculate_pstate_keepout_dst_lines(
+ const struct dml2_display_cfg *display_cfg,
+ const struct dml2_core_internal_watermarks *watermarks,
+ unsigned int pstate_keepout_dst_lines[])
+{
+ const struct dml2_stream_parameters *stream_descriptor;
+ unsigned int i;
+
+ for (i = 0; i < display_cfg->num_planes; i++) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) {
+ stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index];
+
+ pstate_keepout_dst_lines[i] =
+ (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz));
+
+ if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) {
+ pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1;
+ }
+ }
+ }
+}
+
+static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib,
+ const struct dml2_display_cfg *display_cfg)
+{
+ struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
+ struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+ struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
+#endif
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+
+ double min_return_bw_for_latency;
+ unsigned int k;
+
+ mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
+
+ calculate_hostvm_inefficiency_factor(
+ &s->HostVMInefficiencyFactor,
+ &s->HostVMInefficiencyFactorPrefetch,
+
+ display_cfg->gpuvm_enable,
+ display_cfg->hostvm_enable,
+ mode_lib->ip.remote_iommu_outstanding_translations,
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
+ mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
+
+ mode_lib->ms.Total3dlutActive = 0;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
+ mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
+
+ // Calculate tdlut schedule related terms
+ calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
+ calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+ calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
+ calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
+ calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
+ calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+ calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
+ calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
+
+ // output
+ calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
+ calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
+ calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
+ calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
+
+ calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
+ }
+
+ min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
+ s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ CalculateExtraLatency(
+ display_cfg,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
+ s->ReorderingBytes,
+ mode_lib->ms.DCFCLK,
+ mode_lib->ms.FabricClock,
+ mode_lib->ip.pixel_chunk_size_kbytes,
+ min_return_bw_for_latency,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.NoOfDPP,
+ mode_lib->ms.dpte_group_bytes,
+ s->tdlut_bytes_per_group,
+ s->HostVMInefficiencyFactor,
+ s->HostVMInefficiencyFactorPrefetch,
+ mode_lib->soc.hostvm_min_page_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_type,
+ !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->ms.support.request_size_bytes_luma,
+ mode_lib->ms.support.request_size_bytes_chroma,
+ mode_lib->ip.meta_chunk_size_kbytes,
+ mode_lib->ip.dchub_arb_to_ret_delay,
+ mode_lib->ms.TripToMemory,
+ mode_lib->ip.hostvm_mode,
+
+ // output
+ &mode_lib->ms.ExtraLatency,
+ &mode_lib->ms.ExtraLatency_sr,
+ &mode_lib->ms.ExtraLatencyPrefetch);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ s->impacted_dst_y_pre[k] = 0;
+
+ s->recalc_prefetch_schedule = 0;
+ s->recalc_prefetch_done = 0;
+ do {
+ mode_lib->ms.support.PrefetchSupported = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
+
+ s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->ms.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->ms.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
+
+ mode_lib->ms.TWait[k] = CalculateTWait(
+ display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.TripToMemory,
+ !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
+ get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0);
+
+ myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
+ myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
+ myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
+ myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
+ myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
+ myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+ myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
+ myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+ myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
+ myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
+ myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
+ myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ myPipe->ODMMode = mode_lib->ms.ODMMode[k];
+ myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+ myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
+#endif
+ CalculatePrefetchSchedule_params->display_cfg = display_cfg;
+ CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
+ CalculatePrefetchSchedule_params->myPipe = myPipe;
+ CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
+ CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
+ CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
+ CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
+ CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+ CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
+ CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
+ CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
+ CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+ CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
+ CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
+ CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
+ CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
+ CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
+ CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
+ CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
+ CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
+ CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
+ CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
+ CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
+ CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
+ CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
+ CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
+ CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
+ CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
+ CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
+ CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
+ CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+ CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
+ CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
+ CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
+ CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
+ CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
+ CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
+ CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
+ CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+ CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
+ CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
+ CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
+
+ // output
+ CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
+ CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
+ CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
+ CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
+ CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
+ CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k];
+ CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
+ CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
+ CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
+ CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
+ CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
+ CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
+ CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
+ CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
+ CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
+ CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
+ CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
+ CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
+ CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
+ CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
+ CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
+ CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
+ CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
+ CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
+ CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
+ CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
+ CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
+
+ mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
+
+ mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
+ DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
+ } // for k num_planes
+
+ CalculateDCFCLKDeepSleepTdlut(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.SwathWidthY,
+ mode_lib->ms.SwathWidthC,
+ mode_lib->ms.NoOfDPP,
+ mode_lib->ms.PSCL_FACTOR,
+ mode_lib->ms.PSCL_FACTOR_CHROMA,
+ mode_lib->ms.RequiredDPPCLK,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->soc.return_bus_width_bytes,
+ mode_lib->ms.RequiredDISPCLK,
+ s->tdlut_bytes_to_deliver,
+ s->prefetch_swath_time_us,
+
+ /* Output */
+ &mode_lib->ms.dcfclk_deepsleep);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (mode_lib->ms.dst_y_prefetch[k] < 2.0
+ || mode_lib->ms.LinesForVM[k] >= 32.0
+ || mode_lib->ms.LinesForDPTERow[k] >= 16.0
+ || mode_lib->ms.NoTimeForPrefetch[k] == true
+ || s->DSTYAfterScaler[k] > 8) {
+ mode_lib->ms.support.PrefetchSupported = false;
+ DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
+ }
+ }
+
+ mode_lib->ms.support.DynamicMetadataSupported = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
+ mode_lib->ms.support.DynamicMetadataSupported = false;
+ }
+ }
+
+ mode_lib->ms.support.VRatioInPrefetchSupported = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
+ mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
+ mode_lib->ms.support.VRatioInPrefetchSupported = false;
+ DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+ DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+ DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
+ }
+ }
+
+ mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
+
+ // By default, do not recalc prefetch schedule
+ s->recalc_prefetch_schedule = 0;
+
+ // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
+ if (mode_lib->ms.support.PrefetchSupported) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ // Calculate Urgent burst factor for prefetch
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
+#endif
+ CalculateUrgentBurstFactor(
+ &display_cfg->plane_descriptors[k],
+ mode_lib->ms.swath_width_luma_ub[k],
+ mode_lib->ms.swath_width_chroma_ub[k],
+ mode_lib->ms.SwathHeightY[k],
+ mode_lib->ms.SwathHeightC[k],
+ s->line_times[k],
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.VRatioPreY[k],
+ mode_lib->ms.VRatioPreC[k],
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeY[k],
+ mode_lib->ms.DETBufferSizeC[k],
+ /* Output */
+ &mode_lib->ms.UrgentBurstFactorLumaPre[k],
+ &mode_lib->ms.UrgentBurstFactorChromaPre[k],
+ &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
+ }
+
+ // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
+ // assume flip bw is 0 at this point
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ mode_lib->ms.final_flip_bw[k] = 0;
+
+ calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
+ calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
+ calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
+ calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
+ calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
+ calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
+
+ calculate_peak_bandwidth_params->display_cfg = display_cfg;
+ calculate_peak_bandwidth_params->inc_flip_bw = 0;
+ calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
+ calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
+ calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
+ calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
+
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
+ calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
+ calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+ calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+ calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
+ calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
+ calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
+ calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
+
+ calculate_peak_bandwidth_required(
+ &mode_lib->scratch,
+ calculate_peak_bandwidth_params);
+
+ // Check urg peak bandwidth against available urg bw
+ // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
+ check_urgent_bandwidth_support(
+ &s->dummy_single[0], // double* frac_urg_bandwidth
+ &s->dummy_single[1], // double* frac_urg_bandwidth_mall
+ &mode_lib->ms.support.UrgVactiveBandwidthSupport,
+ &mode_lib->ms.support.PrefetchBandwidthSupported,
+
+ mode_lib->soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->ms.support.non_urg_bandwidth_required,
+ mode_lib->ms.support.urg_vactive_bandwidth_required,
+ mode_lib->ms.support.urg_bandwidth_required,
+ mode_lib->ms.support.urg_bandwidth_available);
+
+ mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
+ DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
+ mode_lib->ms.support.PrefetchSupported = false;
+ DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
+ }
+ }
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
+ CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes;
+ CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC;
+ CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
+ CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
+ if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
+
+ CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
+ ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
+
+ // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
+ CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
+ CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
+ mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
+ s->recalc_prefetch_done = 1;
+ s->recalc_prefetch_schedule = 1;
+ }
+#endif
+ } // prefetch schedule ok, do urg bw and flip schedule
+ } while (s->recalc_prefetch_schedule);
+
+ // Flip Schedule
+ // Both prefetch schedule and BW okay
+ if (mode_lib->ms.support.PrefetchSupported == true) {
+ mode_lib->ms.BandwidthAvailableForImmediateFlip =
+ get_bandwidth_available_for_immediate_flip(
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
+ mode_lib->ms.support.urg_bandwidth_available);
+
+ mode_lib->ms.TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].immediate_flip) {
+ s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.vm_bytes[k],
+ mode_lib->ms.DPTEBytesPerRow[k],
+ mode_lib->ms.meta_row_bytes[k]);
+ } else {
+ s->per_pipe_flip_bytes[k] = 0;
+ }
+ mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
+
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ CalculateFlipSchedule(
+ &mode_lib->scratch,
+ display_cfg->plane_descriptors[k].immediate_flip,
+ 1, // use_lb_flip_bw
+ s->HostVMInefficiencyFactor,
+ s->Tvm_trips_flip[k],
+ s->Tr0_trips_flip[k],
+ s->Tvm_trips_flip_rounded[k],
+ s->Tr0_trips_flip_rounded[k],
+ display_cfg->gpuvm_enable,
+ mode_lib->ms.vm_bytes[k],
+ mode_lib->ms.DPTEBytesPerRow[k],
+ mode_lib->ms.BandwidthAvailableForImmediateFlip,
+ mode_lib->ms.TotImmediateFlipBytes,
+ display_cfg->plane_descriptors[k].pixel_format,
+ (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ms.Tno_bw_flip[k],
+ mode_lib->ms.dpte_row_height[k],
+ mode_lib->ms.dpte_row_height_chroma[k],
+ mode_lib->ms.use_one_row_for_frame_flip[k],
+ mode_lib->ip.max_flip_time_us,
+ mode_lib->ip.max_flip_time_lines,
+ s->per_pipe_flip_bytes[k],
+ mode_lib->ms.meta_row_bytes[k],
+ s->meta_row_height_luma[k],
+ s->meta_row_height_chroma[k],
+ mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
+
+ /* Output */
+ &mode_lib->ms.dst_y_per_vm_flip[k],
+ &mode_lib->ms.dst_y_per_row_flip[k],
+ &mode_lib->ms.final_flip_bw[k],
+ &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
+ calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
+ calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
+ calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
+
+ calculate_peak_bandwidth_params->display_cfg = display_cfg;
+ calculate_peak_bandwidth_params->inc_flip_bw = 1;
+ calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
+ calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
+ calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
+ calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
+
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
+ calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
+ calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+ calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+ calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
+ calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
+ calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
+ calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
+
+ calculate_peak_bandwidth_required(
+ &mode_lib->scratch,
+ calculate_peak_bandwidth_params);
+
+ calculate_immediate_flip_bandwidth_support(
+ &s->dummy_single[0], // double* frac_urg_bandwidth_flip
+ &mode_lib->ms.support.ImmediateFlipSupport,
+
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->ms.support.urg_bandwidth_required_flip,
+ mode_lib->ms.support.non_urg_bandwidth_required_flip,
+ mode_lib->ms.support.urg_bandwidth_available);
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
+ mode_lib->ms.support.ImmediateFlipSupport = false;
+ }
+
+ } else { // if prefetch not support, assume iflip is not supported too
+ mode_lib->ms.support.ImmediateFlipSupport = false;
+ }
+
+ s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
+ s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
+ s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
+ s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
+ s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+ s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
+ s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
+ s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
+ s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
+ s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
+ s->mSOCParameters.USRRetrainingLatency = 0;
+ s->mSOCParameters.SMNLatency = 0;
+ s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx);
+ s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx);
+ s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
+ s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
+
+ CalculateWatermarks_params->display_cfg = display_cfg;
+ CalculateWatermarks_params->USRRetrainingRequired = false;
+ CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
+ CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
+ CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
+ CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
+ CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
+ CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
+ CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
+ CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
+ CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
+ CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
+ CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
+ CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
+ CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
+ CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
+ CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
+ CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
+ CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
+ CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
+ CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
+ CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
+ CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
+ CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
+ CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
+
+ // Output
+ CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
+ CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
+ CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
+ CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
+ CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
+ CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
+ CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
+ CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
+ CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
+ CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
+ CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
+ CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
+
+ CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
+
+ calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
+ DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__);
+
+}
+
+
+static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
+{
+ struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
+ const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
+ const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
+
+ double outstanding_latency_us = 0;
+
+ struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
+ struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
+ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
+ unsigned int k, m, n;
+
+ memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
+ memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
+
+ mode_lib->ms.num_active_planes = display_cfg->num_planes;
+ get_stream_output_bpp(s->OutputBpp, display_cfg);
+
+ mode_lib->ms.state_idx = in_out_params->min_clk_index;
+ mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
+ mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
+ mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
+ mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
+ mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
+ mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000;
+ mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
+ mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000;
+ mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
+ mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
+ mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
+ mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
+ mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
+
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
+ DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
+ DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
+ DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
+ DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
+ DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+ DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
+ DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
+ DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
+ DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+#endif
+
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->ip.max_num_dpp,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
+ mode_lib->ip.dcn_mrq_present,
+
+ /* Output */
+ &mode_lib->ms.MaxTotalDETInKByte,
+ &mode_lib->ms.NomDETInKByte,
+ &mode_lib->ms.MinCompressedBufferSizeInKByte);
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
+
+ /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
+
+ /*Scale Ratio, taps Support Check*/
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
+ // Many core tests are still setting scaling parameters "incorrectly"
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
+ && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
+ || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
+ || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
+ || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
+ && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
+ (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
+ mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
+ }
+ }
+
+ /*Source Format, Pixel Format and Scan Support Check*/
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateBytePerPixelAndBlockSizes(
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].surface.tiling,
+ display_cfg->plane_descriptors[k].surface.plane0.pitch,
+ display_cfg->plane_descriptors[k].surface.plane1.pitch,
+
+ /* Output */
+ &mode_lib->ms.BytePerPixelY[k],
+ &mode_lib->ms.BytePerPixelC[k],
+ &mode_lib->ms.BytePerPixelInDETY[k],
+ &mode_lib->ms.BytePerPixelInDETC[k],
+ &mode_lib->ms.Read256BlockHeightY[k],
+ &mode_lib->ms.Read256BlockHeightC[k],
+ &mode_lib->ms.Read256BlockWidthY[k],
+ &mode_lib->ms.Read256BlockWidthC[k],
+ &mode_lib->ms.MacroTileHeightY[k],
+ &mode_lib->ms.MacroTileHeightC[k],
+ &mode_lib->ms.MacroTileWidthY[k],
+ &mode_lib->ms.MacroTileWidthC[k],
+ &mode_lib->ms.surf_linear128_l[k],
+ &mode_lib->ms.surf_linear128_c[k]);
+ }
+
+ /*Bandwidth Support Check*/
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
+ } else {
+ mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+
+ mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
+ display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
+#endif
+ }
+
+ // Writeback bandwidth
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
+ mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
+ / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
+ / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
+ } else {
+ mode_lib->ms.WriteBandwidth[k][0] = 0.0;
+ }
+ }
+
+ /*Writeback Latency support check*/
+ mode_lib->ms.support.WritebackLatencySupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 &&
+ (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
+ mode_lib->ms.support.WritebackLatencySupport = false;
+ }
+ }
+
+
+ /* Writeback Scale Ratio and Taps Support Check */
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps
+ || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
+ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
+ }
+ }
+ }
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
+ /* Output */
+ &mode_lib->ms.PSCL_FACTOR[k],
+ &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
+ &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
+ }
+
+ // Max Viewport Size support
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
+ s->MaximumSwathWidthSupportLuma = 15360;
+ } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
+ s->MaximumSwathWidthSupportLuma = 7680 + 16;
+ } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
+ s->MaximumSwathWidthSupportLuma = 4320 + 16;
+ } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
+ s->MaximumSwathWidthSupportLuma = 5120 + 16;
+ } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
+ s->MaximumSwathWidthSupportLuma = 3072 + 16;
+ } else {
+ s->MaximumSwathWidthSupportLuma = 6144 + 16;
+ }
+
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
+ s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
+ } else {
+ s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
+ }
+
+ unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits;
+ unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits;
+
+/*
+#if defined(DV_BUILD)
+ // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming.
+ if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) {
+ lb_buffer_size_bits_luma = 34620 * 57;
+ lb_buffer_size_bits_chroma = 13560 * 57;
+ }
+#endif
+*/
+ mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /
+ (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
+ if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
+ } else {
+ mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /
+ (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
+ }
+
+ mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
+ mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
+ DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
+ }
+
+ /* Cursor Support Check */
+ mode_lib->ms.support.CursorSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
+ if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false)
+ mode_lib->ms.support.CursorSupport = false;
+ }
+ }
+
+ /* Valid Pitch Check */
+ mode_lib->ms.support.PitchSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+
+ // data pitch
+ unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
+
+ if (mode_lib->ms.surf_linear128_l[k])
+ alignment_l = alignment_l / 2;
+
+ mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
+ unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
+
+ if (mode_lib->ms.surf_linear128_c[k])
+ alignment_c = alignment_c / 2;
+ mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
+ } else {
+ mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
+ }
+
+ if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
+ mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
+ mode_lib->ms.support.PitchSupport = false;
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
+ DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
+ DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
+#endif
+ }
+
+ // meta pitch
+ if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
+ display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
+
+ if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
+ mode_lib->ms.support.PitchSupport = false;
+
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
+ display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
+
+ if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
+ mode_lib->ms.support.PitchSupport = false;
+ }
+ } else {
+ mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
+ mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
+ }
+ }
+
+ mode_lib->ms.support.ViewportExceedsSurface = false;
+ if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width ||
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
+ DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
+ DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
+ DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
+#endif
+ }
+ if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
+ if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
+ mode_lib->ms.support.ViewportExceedsSurface = true;
+ }
+ }
+ }
+ }
+
+ CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
+ CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
+ CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
+ CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
+
+ // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
+
+ mode_lib->ms.TotalNumberOfActiveDPP = 0;
+ mode_lib->ms.support.TotalAvailablePipesSupport = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ /*Number Of DSC Slices*/
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) {
+
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices;
+ else {
+ if (s->PixelClockBackEnd[k] > 4800) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
+ } else if (s->PixelClockBackEnd[k] > 2400) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
+ } else if (s->PixelClockBackEnd[k] > 1200) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
+ } else if (s->PixelClockBackEnd[k] > 340) {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
+ }
+ }
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
+ }
+
+ CalculateODMMode(
+ mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
+ mode_lib->ms.max_dispclk_freq_mhz,
+ false, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP,
+ mode_lib->ip.max_num_dpp,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportNoDSC,
+ &s->NumberOfDPPNoDSC,
+ &s->ODMModeNoDSC,
+ &s->RequiredDISPCLKPerSurfaceNoDSC);
+
+ CalculateODMMode(
+ mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
+ mode_lib->ms.max_dispclk_freq_mhz,
+ true, // DSCEnable
+ mode_lib->ms.TotalNumberOfActiveDPP,
+ mode_lib->ip.max_num_dpp,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+
+ /* Output */
+ &s->TotalAvailablePipesSupportDSC,
+ &s->NumberOfDPPDSC,
+ &s->ODMModeDSC,
+ &s->RequiredDISPCLKPerSurfaceDSC);
+
+ CalculateOutputLink(
+ &mode_lib->scratch,
+ ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
+ ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
+ ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
+ mode_lib->soc.phy_downspread_percent,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ s->PixelClockBackEnd[k],
+ s->OutputBpp[k],
+ mode_lib->ip.maximum_dsc_bits_per_component,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
+ s->ODMModeNoDSC,
+ s->ODMModeDSC,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
+
+ /* Output */
+ &mode_lib->ms.RequiresDSC[k],
+ &mode_lib->ms.RequiresFEC[k],
+ &mode_lib->ms.OutputBpp[k],
+ &mode_lib->ms.OutputType[k],
+ &mode_lib->ms.OutputRate[k],
+ &mode_lib->ms.RequiredSlots[k]);
+
+ if (s->OutputBpp[k] == 0.0) {
+ s->OutputBpp[k] = mode_lib->ms.OutputBpp[k];
+ }
+
+ if (mode_lib->ms.RequiresDSC[k] == false) {
+ mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
+ if (!s->TotalAvailablePipesSupportNoDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport = false;
+ mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
+ } else {
+ mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
+ mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
+ if (!s->TotalAvailablePipesSupportDSC)
+ mode_lib->ms.support.TotalAvailablePipesSupport = false;
+ mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
+ }
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
+#endif
+
+ // ensure the number dsc slices is integer multiple based on ODM mode
+ mode_lib->ms.support.DSCSlicesODMModeSupported = true;
+ if (mode_lib->ms.RequiresDSC[k]) {
+ // fail a ms check if the override num_slices doesn't align with odm mode setting
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) {
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0);
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12);
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
+#if defined(__DML_VBA_DEBUG__)
+ if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
+ DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
+ DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
+ }
+#endif
+ } else {
+ // safe guard to ensure the dml derived dsc slices and odm setting are compatible
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
+ }
+
+ } else {
+ mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
+ }
+ }
+
+ mode_lib->ms.support.incorrect_imall_usage = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
+ mode_lib->ms.support.incorrect_imall_usage = 1;
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 1;
+
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 4;
+ } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 3;
+ } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 2;
+ } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
+ mode_lib->ms.MPCCombine[k] = true;
+ mode_lib->ms.NoOfDPP[k] = 2;
+ mode_lib->ms.TotalNumberOfActiveDPP++;
+ } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
+ mode_lib->ms.MPCCombine[k] = false;
+ mode_lib->ms.NoOfDPP[k] = 1;
+ if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
+ DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
+ }
+ } else {
+ if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
+ mode_lib->ms.MPCCombine[k] = true;
+ mode_lib->ms.NoOfDPP[k] = 2;
+ mode_lib->ms.TotalNumberOfActiveDPP++;
+ }
+ }
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
+#endif
+ }
+
+ if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
+ mode_lib->ms.support.TotalAvailablePipesSupport = false;
+
+
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
+ for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NoOfDPP[k] == 1)
+ mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
+ }
+
+ //DISPCLK/DPPCLK
+ mode_lib->ms.WritebackRequiredDISPCLK = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
+ CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ mode_lib->ip.writeback_line_buffer_buffer_size));
+ }
+ }
+
+ mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
+ }
+
+ mode_lib->ms.GlobalDPPCLK = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
+ mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
+ }
+
+ mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
+
+ /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */
+ s->TotalNumberOfActiveOTG = 0;
+ s->TotalNumberOfActiveHDMIFRL = 0;
+ s->TotalNumberOfActiveDP2p0 = 0;
+ s->TotalNumberOfActiveDP2p0Outputs = 0;
+ s->TotalNumberOfActiveWriteback = 0;
+ memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
+ s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
+
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0)
+ s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
+
+ s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
+ s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
+ s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
+ // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
+ //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
+ s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
+ //}
+ }
+ }
+ }
+ }
+
+ /* Writeback Mode Support Check */
+ mode_lib->ms.support.EnoughWritebackUnits = 1;
+ if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
+ mode_lib->ms.support.EnoughWritebackUnits = false;
+ }
+ mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
+ mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
+ mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
+
+
+ mode_lib->ms.support.ExceededMultistreamSlots = false;
+ mode_lib->ms.support.LinkCapacitySupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
+ (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
+ mode_lib->ms.support.LinkCapacitySupport = false;
+ }
+ }
+
+ mode_lib->ms.support.P2IWith420 = false;
+ mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
+ mode_lib->ms.support.DSC422NativeNotSupported = false;
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
+ mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
+ mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
+ mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
+ mode_lib->ms.support.NotEnoughLanesForMSO = false;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
+ mode_lib->ms.support.P2IWith420 = true;
+
+ if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
+ mode_lib->ms.support.DSC422NativeNotSupported = true;
+
+ if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
+ ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
+ mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
+
+ // FIXME_STAGE2
+ //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
+ // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
+ // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
+ // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
+ // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
+ // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
+ // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
+ // }
+ //}
+
+ if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
+ // FIXME_STAGE2
+ //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
+ // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
+ // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
+ // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
+ //}
+ }
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
+ mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
+
+ if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
+ (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
+ mode_lib->ms.support.NotEnoughLanesForMSO = true;
+ }
+ }
+
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
+ !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
+ mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK(
+ mode_lib->ms.RequiresDSC[k],
+ s->PixelClockBackEnd[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ mode_lib->ms.OutputBpp[k],
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
+
+ if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) {
+ mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
+ }
+ } else {
+ /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus
+ * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider
+ * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK
+ * required - by setting phantom dtbclk to 0 we ignore it.
+ */
+ mode_lib->ms.RequiredDTBCLK[k] = 0;
+ }
+ }
+
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
+ s->DSCFormatFactor = 2;
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
+ s->DSCFormatFactor = 1;
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
+ s->DSCFormatFactor = 2;
+ } else {
+ s->DSCFormatFactor = 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
+#endif
+ if (mode_lib->ms.RequiresDSC[k] == true) {
+ s->PixelClockBackEndFactor = 3.0;
+
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ s->PixelClockBackEndFactor = 12.0;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ s->PixelClockBackEndFactor = 9.0;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ s->PixelClockBackEndFactor = 6.0;
+
+ mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
+ if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
+ mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
+#endif
+ }
+ }
+ }
+
+ /* Check DSC Unit and Slices Support */
+ mode_lib->ms.support.NotEnoughDSCSlices = false;
+ s->TotalDSCUnitsRequired = 0;
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
+ memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
+ s->NumDSCUnitRequired = 1;
+
+ if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
+ s->NumDSCUnitRequired = 4;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
+ s->NumDSCUnitRequired = 3;
+ else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
+ s->NumDSCUnitRequired = 2;
+
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
+ mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
+ s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
+
+ if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
+ mode_lib->ms.support.NotEnoughDSCSlices = true;
+ }
+ s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
+ }
+
+ mode_lib->ms.support.NotEnoughDSCUnits = false;
+ if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
+ mode_lib->ms.support.NotEnoughDSCUnits = true;
+ }
+
+ /*DSC Delay per state*/
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
+ mode_lib->ms.ODMMode[k],
+ mode_lib->ip.maximum_dsc_bits_per_component,
+ s->OutputBpp[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ mode_lib->ms.support.NumberOfDSCSlices[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ s->PixelClockBackEnd[k]);
+ }
+
+ // Figure out the swath and DET configuration after the num dpp per plane is figured out
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
+
+ // output
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
+ CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
+ CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
+
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
+
+ if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ mode_lib->ms.SurfaceSizeInMALL[k] = 0;
+ mode_lib->ms.support.ExceededMALLSize = 0;
+ } else {
+ CalculateSurfaceSizeInMall(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->soc.mall_allocated_for_dcn_mbytes,
+
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.Read256BlockWidthY,
+ mode_lib->ms.Read256BlockWidthC,
+ mode_lib->ms.Read256BlockHeightY,
+ mode_lib->ms.Read256BlockHeightC,
+ mode_lib->ms.MacroTileWidthY,
+ mode_lib->ms.MacroTileWidthC,
+ mode_lib->ms.MacroTileHeightY,
+ mode_lib->ms.MacroTileHeightC,
+
+ /* Output */
+ mode_lib->ms.SurfaceSizeInMALL,
+ &mode_lib->ms.support.ExceededMALLSize);
+ }
+
+ mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
+ mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
+ }
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
+ s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+ s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
+ s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
+ s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
+ s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
+ s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
+ s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
+ s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
+ s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
+ s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+ s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+ s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
+ s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
+ s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
+ s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+ s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
+ s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
+ s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
+ s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
+ s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
+
+ s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
+ s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
+ }
+
+ CalculateVMRowAndSwath_params->display_cfg = display_cfg;
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
+ CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
+ CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
+ CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
+ CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
+ CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
+ CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
+ CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
+ CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
+ CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
+ CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
+ CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
+ CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
+ CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+ CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
+ CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
+ CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
+ CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
+
+ CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
+
+ mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
+ mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
+
+ if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
+ mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
+#endif
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
+ DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
+#endif
+
+ /* VActive bytes to fetch for UCLK P-State */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+
+ /* outputs */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
+
+ calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
+
+ /* Excess VActive bandwidth required to fill DET */
+ calculate_excess_vactive_bandwidth_required(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ s->pstate_bytes_required_l,
+ s->pstate_bytes_required_c,
+ /* outputs */
+ mode_lib->ms.excess_vactive_fill_bw_l,
+ mode_lib->ms.excess_vactive_fill_bw_c);
+
+ mode_lib->ms.UrgLatency = CalculateUrgentLatency(
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
+ mode_lib->soc.do_urgent_latency_adjustment,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->ms.TripToMemory = CalculateTripToMemory(
+ mode_lib->ms.UrgLatency,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ bool cursor_not_enough_urgent_latency_hiding = false;
+
+ if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
+ calculate_cursor_req_attributes(
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ display_cfg->plane_descriptors[k].cursor.cursor_bpp,
+
+ // output
+ &s->cursor_lines_per_chunk[k],
+ &s->cursor_bytes_per_line[k],
+ &s->cursor_bytes_per_chunk[k],
+ &s->cursor_bytes[k]);
+
+ calculate_cursor_urgent_burst_factor(
+ mode_lib->ip.cursor_buffer_size,
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ s->cursor_bytes_per_chunk[k],
+ s->cursor_lines_per_chunk[k],
+ line_time_us,
+ mode_lib->ms.UrgLatency,
+
+ // output
+ &mode_lib->ms.UrgentBurstFactorCursor[k],
+ &cursor_not_enough_urgent_latency_hiding);
+ }
+
+ mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
+#endif
+
+ CalculateUrgentBurstFactor(
+ &display_cfg->plane_descriptors[k],
+ mode_lib->ms.swath_width_luma_ub[k],
+ mode_lib->ms.swath_width_chroma_ub[k],
+ mode_lib->ms.SwathHeightY[k],
+ mode_lib->ms.SwathHeightC[k],
+ line_time_us,
+ mode_lib->ms.UrgLatency,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ms.BytePerPixelInDETY[k],
+ mode_lib->ms.BytePerPixelInDETC[k],
+ mode_lib->ms.DETBufferSizeY[k],
+ mode_lib->ms.DETBufferSizeC[k],
+
+ // Output
+ &mode_lib->ms.UrgentBurstFactorLuma[k],
+ &mode_lib->ms.UrgentBurstFactorChroma[k],
+ &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
+
+ mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
+ }
+
+ CalculateDCFCLKDeepSleep(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.SwathWidthY,
+ mode_lib->ms.SwathWidthC,
+ mode_lib->ms.NoOfDPP,
+ mode_lib->ms.PSCL_FACTOR,
+ mode_lib->ms.PSCL_FACTOR_CHROMA,
+ mode_lib->ms.RequiredDPPCLK,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->soc.return_bus_width_bytes,
+
+ /* Output */
+ &mode_lib->ms.dcfclk_deepsleep);
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
+ } else {
+ mode_lib->ms.WritebackDelayTime[k] = 0.0;
+ }
+ }
+
+ // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
+ s->MaximumVStartup[k] = CalculateMaxVStartup(
+ mode_lib->ip.ptoi_supported,
+ mode_lib->ip.vblank_nom_default_us,
+ &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
+ mode_lib->ms.WritebackDelayTime[k]);
+ mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
+#endif
+
+ /* Immediate Flip and MALL parameters */
+ s->ImmediateFlipRequired = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
+ }
+
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
+ mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
+ ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
+ (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
+ }
+
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
+ ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
+ ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
+ }
+
+ s->FullFrameMALLPStateMethod = false;
+ s->SubViewportMALLPStateMethod = false;
+ s->PhantomPipeMALLPStateMethod = false;
+ s->SubViewportMALLRefreshGreaterThan120Hz = false;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
+ s->FullFrameMALLPStateMethod = true;
+ if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
+ s->SubViewportMALLPStateMethod = true;
+ if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
+ // For dv, small frame tests will have very high refresh rate
+ unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
+ (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
+ if (refresh_rate > 120)
+ s->SubViewportMALLRefreshGreaterThan120Hz = true;
+ }
+ }
+ if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
+ s->PhantomPipeMALLPStateMethod = true;
+ }
+ mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
+ (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
+ DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
+ DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
+ DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
+ DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
+ DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
+ DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
+ DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
+ DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
+#endif
+
+ mode_lib->ms.support.OutstandingRequestsSupport = true;
+ mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
+
+ mode_lib->ms.support.avg_urgent_latency_us
+ = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
+
+ mode_lib->ms.support.avg_non_urgent_latency_us
+ = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
+
+ mode_lib->ms.support.max_non_urgent_latency_us
+ = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
+ / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
+ outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
+ / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsSupport = false;
+ }
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
+ DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
+ DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
+#endif
+ }
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
+ outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
+ / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsSupport = false;
+ }
+
+ if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
+ mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
+#endif
+ }
+ }
+
+ memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
+ if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
+ mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
+ mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
+ }
+ } else {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
+ calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
+ calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
+ calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
+ calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+
+ calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
+ calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
+ calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
+ calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
+
+ calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
+ calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
+ calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
+ calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
+ calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
+ calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
+
+ calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
+ calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
+ calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
+ calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
+ calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
+ calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
+ calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
+ calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
+
+ // output
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
+
+ calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k];
+ calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
+
+ calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k];
+ calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
+
+ calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
+ calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
+ calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
+
+ calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
+ }
+
+ calculate_mall_bw_overhead_factor(
+ mode_lib->ms.mall_prefetch_sdp_overhead_factor,
+ mode_lib->ms.mall_prefetch_dram_overhead_factor,
+
+ // input
+ display_cfg,
+ mode_lib->ms.num_active_planes);
+ }
+
+ // Calculate all the bandwidth available
+ // Need anothe bw for latency evaluation
+ calculate_bandwidth_available(
+ mode_lib->ms.support.avg_bandwidth_available_min, // not used
+ mode_lib->ms.support.avg_bandwidth_available, // not used
+ mode_lib->ms.support.urg_bandwidth_available_min_latency,
+ mode_lib->ms.support.urg_bandwidth_available, // not used
+ mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
+ mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
+
+ &mode_lib->soc,
+ display_cfg->hostvm_enable,
+ mode_lib->ms.DCFCLK,
+ mode_lib->ms.FabricClock,
+ mode_lib->ms.dram_bw_mbps);
+
+ calculate_bandwidth_available(
+ mode_lib->ms.support.avg_bandwidth_available_min,
+ mode_lib->ms.support.avg_bandwidth_available,
+ mode_lib->ms.support.urg_bandwidth_available_min,
+ mode_lib->ms.support.urg_bandwidth_available,
+ mode_lib->ms.support.urg_bandwidth_available_vm_only,
+ mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
+
+ &mode_lib->soc,
+ display_cfg->hostvm_enable,
+ mode_lib->ms.MaxDCFCLK,
+ mode_lib->ms.MaxFabricClock,
+#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW
+ mode_lib->ms.dram_bw_mbps);
+#else
+ mode_lib->ms.max_dram_bw_mbps);
+#endif
+
+ // Average BW support check
+ calculate_avg_bandwidth_required(
+ mode_lib->ms.support.avg_bandwidth_required,
+ // input
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->ms.cursor_bw,
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
+ mode_lib->ms.mall_prefetch_dram_overhead_factor,
+ mode_lib->ms.mall_prefetch_sdp_overhead_factor);
+
+ for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
+ mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
+ mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
+ mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
+ }
+
+ mode_lib->ms.support.AvgBandwidthSupport = true;
+ mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
+ for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
+ if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
+ mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
+ DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
+
+ }
+ }
+ for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
+ for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
+ if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
+ mode_lib->ms.support.AvgBandwidthSupport = false;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
+#endif
+ }
+ }
+ }
+
+ dml_core_ms_prefetch_check(mode_lib, display_cfg);
+
+ mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
+
+ //Re-ordering Buffer Support Check
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
+ if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
+ / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) {
+ mode_lib->ms.support.ROBSupport = true;
+ } else {
+ mode_lib->ms.support.ROBSupport = false;
+ }
+ } else {
+ if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
+ mode_lib->ms.support.ROBSupport = true;
+ } else {
+ mode_lib->ms.support.ROBSupport = false;
+ }
+ }
+
+ /* VActive fill time calculations (informative) */
+ calculate_vactive_det_fill_latency(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ s->pstate_bytes_required_l,
+ s->pstate_bytes_required_c,
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
+ mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->ms.surface_avg_vactive_required_bw,
+ mode_lib->ms.surface_peak_required_bw,
+ /* outputs */
+ mode_lib->ms.dram_change_vactive_det_fill_delay_us);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
+ DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
+#endif
+
+ /*Mode Support, Voltage State and SOC Configuration*/
+ {
+ if (mode_lib->ms.support.ScaleRatioAndTapsSupport
+ && mode_lib->ms.support.SourceFormatPixelAndScanSupport
+ && mode_lib->ms.support.ViewportSizeSupport
+ && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
+ && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
+ && !mode_lib->ms.support.BPPForMultistreamNotIndicated
+ && !mode_lib->ms.support.MultistreamWithHDMIOreDP
+ && !mode_lib->ms.support.ExceededMultistreamSlots
+ && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
+ && !mode_lib->ms.support.NotEnoughLanesForMSO
+ && !mode_lib->ms.support.P2IWith420
+ && !mode_lib->ms.support.DSC422NativeNotSupported
+ && mode_lib->ms.support.DSCSlicesODMModeSupported
+ && !mode_lib->ms.support.NotEnoughDSCUnits
+ && !mode_lib->ms.support.NotEnoughDSCSlices
+ && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
+ && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
+ && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
+ && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
+ && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
+ && mode_lib->ms.support.ROBSupport
+ && mode_lib->ms.support.OutstandingRequestsSupport
+ && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
+ && mode_lib->ms.support.DISPCLK_DPPCLK_Support
+ && mode_lib->ms.support.TotalAvailablePipesSupport
+ && mode_lib->ms.support.NumberOfOTGSupport
+ && mode_lib->ms.support.NumberOfHDMIFRLSupport
+ && mode_lib->ms.support.NumberOfDP2p0Support
+ && mode_lib->ms.support.EnoughWritebackUnits
+ && mode_lib->ms.support.WritebackLatencySupport
+ && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
+ && mode_lib->ms.support.CursorSupport
+ && mode_lib->ms.support.PitchSupport
+ && !mode_lib->ms.support.ViewportExceedsSurface
+ && mode_lib->ms.support.PrefetchSupported
+ && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
+ && mode_lib->ms.support.AvgBandwidthSupport
+ && mode_lib->ms.support.DynamicMetadataSupported
+ && mode_lib->ms.support.VRatioInPrefetchSupported
+ && mode_lib->ms.support.PTEBufferSizeNotExceeded
+ && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
+ && !mode_lib->ms.support.ExceededMALLSize
+ && mode_lib->ms.support.g6_temp_read_support
+ && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
+ DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__);
+ mode_lib->ms.support.ModeSupport = true;
+ } else {
+ DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__);
+ mode_lib->ms.support.ModeSupport = false;
+ }
+ }
+
+ // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
+ DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
+ DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
+ mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
+ }
+
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
+ mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
+ mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
+ mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
+ mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
+ mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
+
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
+#endif
+ }
+
+#if defined(__DML_VBA_DEBUG__)
+ if (!mode_lib->ms.support.ModeSupport)
+ dml2_print_mode_support_info(&mode_lib->ms.support, true);
+
+ DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__);
+#endif
+
+ return mode_lib->ms.support.ModeSupport;
+}
+
+unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params)
+{
+ unsigned int result;
+
+ DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
+ result = dml_core_mode_support(in_out_params);
+
+ if (result)
+ *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
+
+ DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
+
+ for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
+ DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+
+ DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
+
+ return result;
+}
+
+static void CalculatePixelDeliveryTimes(
+ const struct dml2_display_cfg *display_cfg,
+ const struct core_display_cfg_support_info *cfg_support_info,
+ unsigned int NumberOfActiveSurfaces,
+ double VRatioPrefetchY[],
+ double VRatioPrefetchC[],
+ unsigned int swath_width_luma_ub[],
+ unsigned int swath_width_chroma_ub[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ unsigned int BytePerPixelC[],
+ unsigned int req_per_swath_ub_l[],
+ unsigned int req_per_swath_ub_c[],
+
+ // Output
+ double DisplayPipeLineDeliveryTimeLuma[],
+ double DisplayPipeLineDeliveryTimeChroma[],
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[],
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeLuma[],
+ double DisplayPipeRequestDeliveryTimeChroma[],
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
+{
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
+ DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
+ DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
+#endif
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChroma[k] = 0;
+ } else {
+ if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+
+ if (VRatioPrefetchY[k] <= 1) {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
+ }
+
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ if (VRatioPrefetchC[k] <= 1) {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
+ } else {
+ DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
+ }
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
+#endif
+ }
+
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+
+ DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
+ DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
+ if (BytePerPixelC[k] == 0) {
+ DisplayPipeRequestDeliveryTimeChroma[k] = 0;
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
+ } else {
+ DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
+ DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
+#endif
+ }
+}
+
+static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
+{
+ unsigned int meta_chunk_width;
+ unsigned int min_meta_chunk_width;
+ unsigned int meta_chunk_per_row_int;
+ unsigned int meta_row_remainder;
+ unsigned int meta_chunk_threshold;
+ unsigned int meta_chunks_per_row_ub;
+ unsigned int meta_chunk_width_chroma;
+ unsigned int min_meta_chunk_width_chroma;
+ unsigned int meta_chunk_per_row_int_chroma;
+ unsigned int meta_row_remainder_chroma;
+ unsigned int meta_chunk_threshold_chroma;
+ unsigned int meta_chunks_per_row_ub_chroma;
+ unsigned int dpte_group_width_luma;
+ unsigned int dpte_groups_per_row_luma_ub;
+ unsigned int dpte_group_width_chroma;
+ unsigned int dpte_groups_per_row_chroma_ub;
+ double pixel_clock_mhz;
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ if (p->BytePerPixelC[k] == 0) {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ }
+ p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ if (p->BytePerPixelC[k] == 0) {
+ p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
+ } else {
+ p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
+ meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
+ min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
+ meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
+ meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
+ } else {
+ meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
+ }
+ if (meta_row_remainder <= meta_chunk_threshold) {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
+ } else {
+ meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
+ }
+ p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
+ p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
+ p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
+ p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
+ if (p->BytePerPixelC[k] == 0) {
+ p->TimePerChromaMetaChunkNominal[k] = 0;
+ p->TimePerChromaMetaChunkVBlank[k] = 0;
+ p->TimePerChromaMetaChunkFlip[k] = 0;
+ } else {
+ meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
+ min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
+ meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
+ meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
+ } else {
+ meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
+ }
+ if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
+ } else {
+ meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
+ }
+ p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
+ p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
+ p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
+ }
+ } else {
+ p->TimePerMetaChunkNominal[k] = 0;
+ p->TimePerMetaChunkVBlank[k] = 0;
+ p->TimePerMetaChunkFlip[k] = 0;
+ p->TimePerChromaMetaChunkNominal[k] = 0;
+ p->TimePerChromaMetaChunkVBlank[k] = 0;
+ p->TimePerChromaMetaChunkFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
+#endif
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ if (p->BytePerPixelC[k] == 0) {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
+ } else {
+ p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ }
+ }
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+ if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
+ p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
+ else
+ p->time_per_tdlut_group[k] = 0;
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
+
+ if (p->display_cfg->gpuvm_enable == true) {
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
+ } else {
+ dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
+ }
+ if (p->use_one_row_for_frame[k]) {
+ dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
+ }
+ if (dpte_groups_per_row_luma_ub <= 2) {
+ dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
+
+ p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
+ p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
+ p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
+ if (p->BytePerPixelC[k] == 0) {
+ p->time_per_pte_group_nom_chroma[k] = 0;
+ p->time_per_pte_group_vblank_chroma[k] = 0;
+ p->time_per_pte_group_flip_chroma[k] = 0;
+ } else {
+ if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
+ dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
+ } else {
+ dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
+ }
+
+ if (p->use_one_row_for_frame[k]) {
+ dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
+ } else {
+ dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
+ }
+ if (dpte_groups_per_row_chroma_ub <= 2) {
+ dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
+ }
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
+
+ p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
+ p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
+ p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
+ }
+ } else {
+ p->time_per_pte_group_nom_luma[k] = 0;
+ p->time_per_pte_group_vblank_luma[k] = 0;
+ p->time_per_pte_group_flip_luma[k] = 0;
+ p->time_per_pte_group_nom_chroma[k] = 0;
+ p->time_per_pte_group_vblank_chroma[k] = 0;
+ p->time_per_pte_group_flip_chroma[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
+#endif
+ }
+} // CalculateMetaAndPTETimes
+
+static void CalculateVMGroupAndRequestTimes(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelC[],
+ double dst_y_per_vm_vblank[],
+ double dst_y_per_vm_flip[],
+ unsigned int dpte_row_width_luma_ub[],
+ unsigned int dpte_row_width_chroma_ub[],
+ unsigned int vm_group_bytes[],
+ unsigned int dpde0_bytes_per_frame_ub_l[],
+ unsigned int dpde0_bytes_per_frame_ub_c[],
+ unsigned int tdlut_pte_bytes_per_frame[],
+ unsigned int meta_pte_bytes_per_frame_ub_l[],
+ unsigned int meta_pte_bytes_per_frame_ub_c[],
+ bool mrq_present,
+
+ // Output
+ double TimePerVMGroupVBlank[],
+ double TimePerVMGroupFlip[],
+ double TimePerVMRequestVBlank[],
+ double TimePerVMRequestFlip[])
+{
+ unsigned int num_group_per_lower_vm_stage = 0;
+ unsigned int num_req_per_lower_vm_stage = 0;
+ unsigned int num_group_per_lower_vm_stage_flip;
+ unsigned int num_group_per_lower_vm_stage_pref;
+ unsigned int num_req_per_lower_vm_stage_flip;
+ unsigned int num_req_per_lower_vm_stage_pref;
+ double line_time;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
+#endif
+ for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
+ double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
+#endif
+
+ if (display_cfg->gpuvm_enable) {
+ if (display_cfg->gpuvm_max_page_table_levels >= 2) {
+ num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
+
+ if (BytePerPixelC[k] > 0)
+ num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
+ }
+
+ if (dcc_mrq_enable) {
+ if (BytePerPixelC[k] > 0) {
+ num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) +
+ math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1));
+ } else {
+ num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1));
+ }
+ }
+
+ num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
+ num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
+
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
+ num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
+ if (display_cfg->gpuvm_max_page_table_levels >= 2)
+ num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
+ }
+
+ if (display_cfg->gpuvm_max_page_table_levels >= 2) {
+ num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
+ if (BytePerPixelC[k] > 0)
+ num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
+ }
+
+ if (dcc_mrq_enable) {
+ num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
+ if (BytePerPixelC[k] > 0)
+ num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
+ }
+
+ num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
+ num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
+
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
+ num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
+ }
+
+ line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
+
+ if (num_group_per_lower_vm_stage_pref > 0)
+ TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
+ else
+ TimePerVMGroupVBlank[k] = 0;
+
+ if (num_group_per_lower_vm_stage_flip > 0)
+ TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
+ else
+ TimePerVMGroupFlip[k] = 0;
+
+ if (num_req_per_lower_vm_stage_pref > 0)
+ TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
+ else
+ TimePerVMRequestVBlank[k] = 0.0;
+ if (num_req_per_lower_vm_stage_flip > 0)
+ TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
+ else
+ TimePerVMRequestFlip[k] = 0.0;
+
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref);
+ DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip);
+
+ if (display_cfg->gpuvm_max_page_table_levels > 2) {
+ TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
+ TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
+ TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
+ TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
+ }
+
+ } else {
+ TimePerVMGroupVBlank[k] = 0;
+ TimePerVMGroupFlip[k] = 0;
+ TimePerVMRequestVBlank[k] = 0;
+ TimePerVMRequestFlip[k] = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
+#endif
+ }
+}
+
+static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
+{
+ struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
+
+ unsigned int TotalNumberOfActiveOTG = 0;
+ double SinglePixelClock = 0;
+ unsigned int SingleHTotal = 0;
+ unsigned int SingleVTotal = 0;
+ bool SameTiming = true;
+ bool FoundCriticalSurface = false;
+
+ memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
+ if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
+ if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
+ l->MaximumEffectiveCompressionLuma = 2;
+ } else {
+ l->MaximumEffectiveCompressionLuma = 4;
+ }
+ l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
+#endif
+ l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
+ l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
+
+ if (p->ReadBandwidthSurfaceChroma[k] > 0) {
+ if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
+ l->MaximumEffectiveCompressionChroma = 2;
+ } else {
+ l->MaximumEffectiveCompressionChroma = 4;
+ }
+ l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
+#endif
+ l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
+ l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
+ }
+ } else {
+ l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
+ }
+ l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
+ }
+ }
+
+ l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
+ l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
+ DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
+ DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
+ DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
+
+ DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
+ DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
+ DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
+ DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
+#endif
+ if (l->AverageDCCZeroSizeFraction == 1) {
+ l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
+ l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
+
+
+ } else if (l->AverageDCCZeroSizeFraction > 0) {
+ l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
+ l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
+ (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
+ (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
+ ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
+ : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
+
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
+ DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
+ DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
+#endif
+ } else {
+ l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
+ (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
+ ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
+#endif
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
+ DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
+ DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
+#endif
+
+ *p->StutterPeriod = 0;
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
+ l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
+ l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
+ l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
+ DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
+#endif
+
+ if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
+ bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
+
+ FoundCriticalSurface = true;
+ *p->StutterPeriod = l->DETBufferingTimeY;
+ l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
+ l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
+ l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
+ l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
+ l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
+ l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
+ l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
+ l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
+#endif
+ }
+ }
+ }
+
+ // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
+ // stutter period is calculated only on the det sizing
+ // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
+ // else
+ // the cdb + rob part will be in compressed rate with urg bw (idea bw)
+ // the det part will be return at uncompressed rate with 64B/dcfclk
+ //
+ // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
+ // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
+
+ l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
+ DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
+ DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
+ DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
+ DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
+ DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
+#endif
+
+ l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
+ / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
+ (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
+ / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
+ *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
+ DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
+ DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
+ DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
+#endif
+ l->TotalActiveWriteback = 0;
+ memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
+
+ for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
+ if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
+ if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
+
+ if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0)
+ l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
+
+ if (TotalNumberOfActiveOTG == 0) { // first otg
+ SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
+ } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) ||
+ SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total ||
+ SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
+ SameTiming = false;
+ }
+ TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
+ l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1;
+ }
+ }
+ }
+
+ if (l->TotalActiveWriteback == 0) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
+ DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
+ DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+#endif
+ *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
+ *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
+ } else {
+ *p->StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
+ *p->NumberOfStutterBurstsPerFrame = 0;
+ *p->Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
+ DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
+ DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
+ if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
+ *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
+ } else {
+ *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
+ }
+ } else {
+ *p->StutterEfficiency = 0;
+ *p->NumberOfStutterBurstsPerFrame = 0;
+ }
+
+ if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
+ //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
+ if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
+ *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
+ } else {
+ *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
+ }
+ } else {
+ *p->Z8StutterEfficiency = 0.;
+ *p->Z8NumberOfStutterBurstsPerFrame = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
+ DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming);
+ DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
+ DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
+ DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
+ DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
+ DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
+ DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
+ DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
+ DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
+#endif
+
+ *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
+ DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
+ DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
+#endif
+}
+
+static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
+{
+ const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
+ const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
+ const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
+ struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
+ struct dml2_display_cfg_programming *programming = in_out_params->programming;
+
+ struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
+ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
+ struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
+ struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
+ struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
+ struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
+ struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
+ struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
+ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
+
+ unsigned int k;
+ bool must_support_iflip;
+ const long min_return_uclk_cycles = 83;
+ const long min_return_fclk_cycles = 75;
+ const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
+ double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
+ double max_uclk_mhz = 0;
+ double min_return_latency_in_DCFCLK_cycles = 0;
+
+ DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__);
+
+ memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
+ memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
+
+ s->num_active_planes = display_cfg->num_planes;
+ get_stream_output_bpp(s->OutputBpp, display_cfg);
+
+ mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
+ dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
+
+ mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
+ mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
+ mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config);
+ mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
+ mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
+ s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
+ mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
+ mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
+ DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
+ DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
+ cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
+ cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
+
+ if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
+ DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
+
+ switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
+ case (4):
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
+ break;
+ case (3):
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
+ break;
+ case (2):
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
+ break;
+ default:
+ if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
+ else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
+ else
+ mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
+ break;
+ }
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
+ mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
+ DML_ASSERT(mode_lib->mp.Dppclk[k] > 0);
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
+ mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
+ DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
+ }
+
+ mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
+ mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
+
+ DML_ASSERT(mode_lib->mp.Dcfclk > 0);
+ DML_ASSERT(mode_lib->mp.FabricClock > 0);
+ DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
+ DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
+ DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
+ DML_ASSERT(mode_lib->mp.Dispclk > 0);
+ DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
+ DML_ASSERT(s->SOCCLK > 0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
+ DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
+ DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
+ DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
+ DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
+ DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
+ for (k = 0; k < s->num_active_planes; ++k) {
+ DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
+ }
+ DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
+ DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
+ DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
+ DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
+ DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
+ DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
+ for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
+ DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
+ DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
+ }
+
+ for (k = 0; k < s->num_active_planes; k++)
+ DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+#endif
+
+ CalculateMaxDETAndMinCompressedBufferSize(
+ mode_lib->ip.config_return_buffer_size_in_kbytes,
+ mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->ip.max_num_dpp,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
+ display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
+ mode_lib->ip.dcn_mrq_present,
+
+ /* Output */
+ &s->MaxTotalDETInKByte,
+ &s->NomDETInKByte,
+ &s->MinCompressedBufferSizeInKByte);
+
+
+ PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ CalculateSinglePipeDPPCLKAndSCLThroughput(
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
+ mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
+
+ /* Output */
+ &mode_lib->mp.PSCL_THROUGHPUT[k],
+ &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
+ &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ CalculateBytePerPixelAndBlockSizes(
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].surface.tiling,
+ display_cfg->plane_descriptors[k].surface.plane0.pitch,
+ display_cfg->plane_descriptors[k].surface.plane1.pitch,
+
+ // Output
+ &mode_lib->mp.BytePerPixelY[k],
+ &mode_lib->mp.BytePerPixelC[k],
+ &mode_lib->mp.BytePerPixelInDETY[k],
+ &mode_lib->mp.BytePerPixelInDETC[k],
+ &mode_lib->mp.Read256BlockHeightY[k],
+ &mode_lib->mp.Read256BlockHeightC[k],
+ &mode_lib->mp.Read256BlockWidthY[k],
+ &mode_lib->mp.Read256BlockWidthC[k],
+ &mode_lib->mp.MacroTileHeightY[k],
+ &mode_lib->mp.MacroTileHeightC[k],
+ &mode_lib->mp.MacroTileWidthY[k],
+ &mode_lib->mp.MacroTileWidthC[k],
+ &mode_lib->mp.surf_linear128_l[k],
+ &mode_lib->mp.surf_linear128_c[k]);
+ }
+
+ CalculateSwathWidth(
+ display_cfg,
+ false, // ForceSingleDPP
+ s->num_active_planes,
+ mode_lib->mp.ODMMode,
+ mode_lib->mp.BytePerPixelY,
+ mode_lib->mp.BytePerPixelC,
+ mode_lib->mp.Read256BlockHeightY,
+ mode_lib->mp.Read256BlockHeightC,
+ mode_lib->mp.Read256BlockWidthY,
+ mode_lib->mp.Read256BlockWidthC,
+ mode_lib->mp.surf_linear128_l,
+ mode_lib->mp.surf_linear128_c,
+ mode_lib->mp.NoOfDPP,
+
+ /* Output */
+ mode_lib->mp.req_per_swath_ub_l,
+ mode_lib->mp.req_per_swath_ub_c,
+ mode_lib->mp.SwathWidthSingleDPPY,
+ mode_lib->mp.SwathWidthSingleDPPC,
+ mode_lib->mp.SwathWidthY,
+ mode_lib->mp.SwathWidthC,
+ s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
+ s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
+ mode_lib->mp.swath_width_luma_ub,
+ mode_lib->mp.swath_width_chroma_ub);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
+ mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
+ }
+
+ CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
+ CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
+ CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
+ CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
+ CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c;
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
+ CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
+ CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
+ CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
+ CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
+ CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
+ CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
+ CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
+ CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
+ CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
+ CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
+ CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
+ CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
+ CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
+ CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
+ CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
+ CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
+ CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
+ CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
+ CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
+ CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
+ CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
+
+ // Calculate DET size, swath height here.
+ CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
+
+ // DSC Delay
+ for (k = 0; k < s->num_active_planes; ++k) {
+ mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
+ mode_lib->mp.ODMMode[k],
+ mode_lib->ip.maximum_dsc_bits_per_component,
+ s->OutputBpp[k],
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
+ cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ s->PixelClockBackEnd[k]);
+ }
+
+ // Prefetch
+ if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
+ for (k = 0; k < s->num_active_planes; ++k)
+ mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
+ } else {
+ CalculateSurfaceSizeInMall(
+ display_cfg,
+ s->num_active_planes,
+ mode_lib->soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->mp.BytePerPixelY,
+ mode_lib->mp.BytePerPixelC,
+ mode_lib->mp.Read256BlockWidthY,
+ mode_lib->mp.Read256BlockWidthC,
+ mode_lib->mp.Read256BlockHeightY,
+ mode_lib->mp.Read256BlockHeightC,
+ mode_lib->mp.MacroTileWidthY,
+ mode_lib->mp.MacroTileWidthC,
+ mode_lib->mp.MacroTileHeightY,
+ mode_lib->mp.MacroTileHeightC,
+
+ /* Output */
+ mode_lib->mp.SurfaceSizeInTheMALL,
+ &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
+ s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+ s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
+ s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
+ s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
+ s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
+ s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
+ s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
+ s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
+ s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
+ s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+ s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+ s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
+ s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
+ s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
+ s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+ s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
+ s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
+ s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
+ s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
+ s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
+ s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
+ s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
+ }
+
+ CalculateVMRowAndSwath_params->display_cfg = display_cfg;
+ CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
+ CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
+ CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
+ CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
+ CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
+ CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
+ CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
+ CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
+ CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
+ CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
+ CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
+ CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
+ CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
+ CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
+ CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
+ CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
+ CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
+ CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
+ CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
+ CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
+ CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
+ CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
+ CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
+ CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
+ CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
+ CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
+ CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
+ CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
+ CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
+ CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
+ CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
+ CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
+ CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
+ CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
+ CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
+ CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
+ CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
+ CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
+ CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
+ CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
+ CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
+ CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
+ CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
+ CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
+ CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
+
+ CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
+
+ memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
+ if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
+ for (k = 0; k < s->num_active_planes; k++) {
+ mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
+ mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
+ mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
+ }
+ } else {
+ for (k = 0; k < s->num_active_planes; k++) {
+ calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
+ calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
+ calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
+ calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
+ calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+
+ calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
+ calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
+ calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
+ calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
+ calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
+
+ calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
+ calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
+ calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
+ calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
+ calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
+ calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
+ calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
+ calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
+ calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
+ calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
+
+ calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
+ calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
+ calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
+ calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
+ calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
+ calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
+ calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
+ calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
+ calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
+
+ // output
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
+ calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
+ calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
+
+ calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k];
+ calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
+
+ calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
+ calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k];
+ calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
+ calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
+
+ calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
+ calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
+ calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
+ calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
+ }
+
+ calculate_mall_bw_overhead_factor(
+ mode_lib->mp.mall_prefetch_sdp_overhead_factor,
+ mode_lib->mp.mall_prefetch_dram_overhead_factor,
+
+ // input
+ display_cfg,
+ s->num_active_planes);
+ }
+
+ // Calculate all the bandwidth availabe
+ calculate_bandwidth_available(
+ mode_lib->mp.avg_bandwidth_available_min,
+ mode_lib->mp.avg_bandwidth_available,
+ mode_lib->mp.urg_bandwidth_available_min,
+ mode_lib->mp.urg_bandwidth_available,
+ mode_lib->mp.urg_bandwidth_available_vm_only,
+ mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
+
+ &mode_lib->soc,
+ display_cfg->hostvm_enable,
+ mode_lib->mp.Dcfclk,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.dram_bw_mbps);
+
+
+ calculate_hostvm_inefficiency_factor(
+ &s->HostVMInefficiencyFactor,
+ &s->HostVMInefficiencyFactorPrefetch,
+
+ display_cfg->gpuvm_enable,
+ display_cfg->hostvm_enable,
+ mode_lib->ip.remote_iommu_outstanding_translations,
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
+ mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
+
+ s->TotalDCCActiveDPP = 0;
+ s->TotalActiveDPP = 0;
+ for (k = 0; k < s->num_active_planes; ++k) {
+ s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
+ if (display_cfg->plane_descriptors[k].surface.dcc.enable)
+ s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
+ }
+ // Calculate tdlut schedule related terms
+ for (k = 0; k <= s->num_active_planes - 1; k++) {
+ calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
+ calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+ calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
+ calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
+ calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
+ calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
+ calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
+
+ // output
+ calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
+ calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
+ calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
+ calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
+ calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
+ calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
+ calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
+ }
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
+ s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
+
+ CalculateExtraLatency(
+ display_cfg,
+ mode_lib->ip.rob_buffer_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
+ s->ReorderingBytes,
+ mode_lib->mp.Dcfclk,
+ mode_lib->mp.FabricClock,
+ mode_lib->ip.pixel_chunk_size_kbytes,
+ mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
+ s->num_active_planes,
+ mode_lib->mp.NoOfDPP,
+ mode_lib->mp.dpte_group_bytes,
+ s->tdlut_bytes_per_group,
+ s->HostVMInefficiencyFactor,
+ s->HostVMInefficiencyFactorPrefetch,
+ mode_lib->soc.hostvm_min_page_size_kbytes,
+ mode_lib->soc.qos_parameters.qos_type,
+ !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
+ mode_lib->soc.max_outstanding_reqs,
+ mode_lib->mp.request_size_bytes_luma,
+ mode_lib->mp.request_size_bytes_chroma,
+ mode_lib->ip.meta_chunk_size_kbytes,
+ mode_lib->ip.dchub_arb_to_ret_delay,
+ mode_lib->mp.TripToMemory,
+ mode_lib->ip.hostvm_mode,
+
+ // output
+ &mode_lib->mp.ExtraLatency,
+ &mode_lib->mp.ExtraLatency_sr,
+ &mode_lib->mp.ExtraLatencyPrefetch);
+
+ mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->mp.WritebackDelay[k] =
+ mode_lib->soc.qos_parameters.writeback.base_latency_us
+ + CalculateWriteBackDelay(
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
+ } else
+ mode_lib->mp.WritebackDelay[k] = 0;
+ }
+
+ /* VActive bytes to fetch for UCLK P-State */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+
+ /* outputs */
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
+ calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
+
+ calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
+
+ /* Excess VActive bandwidth required to fill DET */
+ calculate_excess_vactive_bandwidth_required(
+ display_cfg,
+ s->num_active_planes,
+ s->pstate_bytes_required_l,
+ s->pstate_bytes_required_c,
+ /* outputs */
+ mode_lib->mp.excess_vactive_fill_bw_l,
+ mode_lib->mp.excess_vactive_fill_bw_c);
+
+ mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
+ mode_lib->soc.do_urgent_latency_adjustment,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
+ mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->mp.TripToMemory = CalculateTripToMemory(
+ mode_lib->mp.UrgentLatency,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
+
+ mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
+ mode_lib->mp.UrgentLatency,
+ mode_lib->mp.FabricClock,
+ mode_lib->mp.uclk_freq_mhz,
+ mode_lib->soc.qos_parameters.qos_type,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ bool cursor_not_enough_urgent_latency_hiding = false;
+ s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+ s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
+
+ s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->mp.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->mp.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
+ calculate_cursor_req_attributes(
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ display_cfg->plane_descriptors[k].cursor.cursor_bpp,
+
+ // output
+ &s->cursor_lines_per_chunk[k],
+ &s->cursor_bytes_per_line[k],
+ &s->cursor_bytes_per_chunk[k],
+ &s->cursor_bytes[k]);
+
+ calculate_cursor_urgent_burst_factor(
+ mode_lib->ip.cursor_buffer_size,
+ display_cfg->plane_descriptors[k].cursor.cursor_width,
+ s->cursor_bytes_per_chunk[k],
+ s->cursor_lines_per_chunk[k],
+ s->line_times[k],
+ mode_lib->mp.UrgentLatency,
+
+ // output
+ &mode_lib->mp.UrgentBurstFactorCursor[k],
+ &cursor_not_enough_urgent_latency_hiding);
+ }
+ mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k];
+
+ CalculateUrgentBurstFactor(
+ &display_cfg->plane_descriptors[k],
+ mode_lib->mp.swath_width_luma_ub[k],
+ mode_lib->mp.swath_width_chroma_ub[k],
+ mode_lib->mp.SwathHeightY[k],
+ mode_lib->mp.SwathHeightC[k],
+ s->line_times[k],
+ mode_lib->mp.UrgentLatency,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->mp.BytePerPixelInDETY[k],
+ mode_lib->mp.BytePerPixelInDETC[k],
+ mode_lib->mp.DETBufferSizeY[k],
+ mode_lib->mp.DETBufferSizeC[k],
+
+ /* output */
+ &mode_lib->mp.UrgentBurstFactorLuma[k],
+ &mode_lib->mp.UrgentBurstFactorChroma[k],
+ &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
+
+ mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ s->MaxVStartupLines[k] = CalculateMaxVStartup(
+ mode_lib->ip.ptoi_supported,
+ mode_lib->ip.vblank_nom_default_us,
+ &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
+ mode_lib->mp.WritebackDelay[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
+#endif
+ }
+
+ s->immediate_flip_required = false;
+ for (k = 0; k < s->num_active_planes; ++k) {
+ s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
+#endif
+
+ if (s->num_active_planes > 1) {
+ CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes;
+ CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC;
+ CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care
+ CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk;
+ CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
+ CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch;
+
+ // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
+ CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0];
+ CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
+ CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming
+ }
+
+ {
+ s->DestinationLineTimesForPrefetchLessThan2 = false;
+ s->VRatioPrefetchMoreThanMax = false;
+
+ DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
+
+ DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+ mode_lib->mp.TWait[k] = CalculateTWait(
+ display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
+ mode_lib->mp.UrgentLatency,
+ mode_lib->mp.TripToMemory,
+ !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
+ get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
+
+ myPipe->Dppclk = mode_lib->mp.Dppclk[k];
+ myPipe->Dispclk = mode_lib->mp.Dispclk;
+ myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
+ myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k];
+ myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
+ myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
+ myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
+ myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
+ myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
+ myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
+ myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
+ myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
+ myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
+ myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
+ myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
+ myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
+ myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
+ myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
+ myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ myPipe->ODMMode = mode_lib->mp.ODMMode[k];
+ myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
+ myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
+ myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
+ myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
+#endif
+ CalculatePrefetchSchedule_params->display_cfg = display_cfg;
+ CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
+ CalculatePrefetchSchedule_params->myPipe = myPipe;
+ CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k];
+ CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
+ CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
+ CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
+ CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
+ CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
+ CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
+ CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
+ CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k];
+ CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
+ CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
+ CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
+ CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
+ CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
+ CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency;
+ CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch;
+ CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc;
+ CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k];
+ CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k];
+ CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k];
+ CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k];
+ CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k];
+ CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k];
+ CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k];
+ CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k];
+ CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k];
+ CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k];
+ CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k];
+ CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory;
+ CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency;
+ CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
+ CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
+ CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
+ CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
+ CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
+ CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
+ CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
+ CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
+ CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
+ CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+ CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
+ CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
+ CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k];
+
+ // output
+ CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
+ CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k];
+ CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k];
+ CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k];
+ CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k];
+ CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
+ CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k];
+ CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
+ CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
+ CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
+ CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k];
+ CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k];
+ CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k];
+ CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k];
+ CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
+ CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
+ CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
+ CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
+ CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
+ CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
+ CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k];
+ CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
+ CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
+ CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
+ CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
+ CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
+ CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
+ CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0];
+
+ mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
+
+ if (s->impacted_dst_y_pre[k] > 0)
+ mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k];
+ else
+ mode_lib->mp.impacted_prefetch_margin_us[k] = 0;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
+#endif
+ mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
+ } // for k
+
+ mode_lib->mp.PrefetchModeSupported = true;
+ for (k = 0; k < s->num_active_planes; ++k) {
+ if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
+ mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
+ mode_lib->mp.DSTYAfterScaler[k] > 8) {
+ DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
+ mode_lib->mp.PrefetchModeSupported = false;
+ }
+ if (mode_lib->mp.dst_y_prefetch[k] < 2)
+ s->DestinationLineTimesForPrefetchLessThan2 = true;
+
+ if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
+ mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
+ s->VRatioPrefetchMoreThanMax = true;
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+ DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
+ }
+
+ if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
+ DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
+ mode_lib->mp.PrefetchModeSupported = false;
+ }
+ }
+
+ if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
+ DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
+ DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
+ mode_lib->mp.PrefetchModeSupported = false;
+ }
+
+ DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
+ mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
+
+ // Prefetch schedule OK, now check prefetch bw
+ if (mode_lib->mp.PrefetchModeSupported == true) {
+ for (k = 0; k < s->num_active_planes; ++k) {
+ double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ CalculateUrgentBurstFactor(
+ &display_cfg->plane_descriptors[k],
+ mode_lib->mp.swath_width_luma_ub[k],
+ mode_lib->mp.swath_width_chroma_ub[k],
+ mode_lib->mp.SwathHeightY[k],
+ mode_lib->mp.SwathHeightC[k],
+ line_time_us,
+ mode_lib->mp.UrgentLatency,
+ mode_lib->mp.VRatioPrefetchY[k],
+ mode_lib->mp.VRatioPrefetchC[k],
+ mode_lib->mp.BytePerPixelInDETY[k],
+ mode_lib->mp.BytePerPixelInDETC[k],
+ mode_lib->mp.DETBufferSizeY[k],
+ mode_lib->mp.DETBufferSizeC[k],
+ /* Output */
+ &mode_lib->mp.UrgentBurstFactorLumaPre[k],
+ &mode_lib->mp.UrgentBurstFactorChromaPre[k],
+ &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
+
+ DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
+
+ DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
+#endif
+ }
+
+ for (k = 0; k <= s->num_active_planes - 1; k++)
+ mode_lib->mp.final_flip_bw[k] = 0;
+
+ calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required;
+ calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required;
+ calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual;
+ calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required;
+ calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
+ calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
+
+ calculate_peak_bandwidth_params->display_cfg = display_cfg;
+ calculate_peak_bandwidth_params->inc_flip_bw = 0;
+ calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
+ calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
+ calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
+ calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
+
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0];
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
+ calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
+ calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
+ calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
+ calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
+ calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
+ calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
+ calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
+
+ calculate_peak_bandwidth_required(
+ &mode_lib->scratch,
+ calculate_peak_bandwidth_params);
+
+ // Check urg peak bandwidth against available urg bw
+ // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
+ check_urgent_bandwidth_support(
+ &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth
+ &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall
+ &s->dummy_boolean[1], // vactive bw ok
+ &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok
+
+ mode_lib->soc.mall_allocated_for_dcn_mbytes,
+ mode_lib->mp.non_urg_bandwidth_required,
+ mode_lib->mp.urg_vactive_bandwidth_required,
+ mode_lib->mp.urg_bandwidth_required,
+ mode_lib->mp.urg_bandwidth_available);
+
+ if (!mode_lib->mp.PrefetchModeSupported)
+ DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
+ DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
+ mode_lib->mp.PrefetchModeSupported = false;
+ }
+ }
+ } // prefetch schedule ok
+
+ // Prefetch schedule and prefetch bw ok, now check flip bw
+ if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw
+
+ mode_lib->mp.BandwidthAvailableForImmediateFlip =
+ get_bandwidth_available_for_immediate_flip(
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->mp.urg_bandwidth_required_qual, // no flip
+ mode_lib->mp.urg_bandwidth_available);
+ mode_lib->mp.TotImmediateFlipBytes = 0;
+ for (k = 0; k < s->num_active_planes; ++k) {
+ if (display_cfg->plane_descriptors[k].immediate_flip) {
+ s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor,
+ mode_lib->mp.vm_bytes[k],
+ mode_lib->mp.PixelPTEBytesPerRow[k],
+ mode_lib->mp.meta_row_bytes[k]);
+ } else {
+ s->per_pipe_flip_bytes[k] = 0;
+ }
+ mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k);
+ DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
+ DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
+ DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
+ DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
+#endif
+ }
+ for (k = 0; k < s->num_active_planes; ++k) {
+ CalculateFlipSchedule(
+ &mode_lib->scratch,
+ display_cfg->plane_descriptors[k].immediate_flip,
+ 0, // use_lb_flip_bw
+ s->HostVMInefficiencyFactor,
+ s->Tvm_trips_flip[k],
+ s->Tr0_trips_flip[k],
+ s->Tvm_trips_flip_rounded[k],
+ s->Tr0_trips_flip_rounded[k],
+ display_cfg->gpuvm_enable,
+ mode_lib->mp.vm_bytes[k],
+ mode_lib->mp.PixelPTEBytesPerRow[k],
+ mode_lib->mp.BandwidthAvailableForImmediateFlip,
+ mode_lib->mp.TotImmediateFlipBytes,
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->mp.Tno_bw[k],
+ mode_lib->mp.dpte_row_height[k],
+ mode_lib->mp.dpte_row_height_chroma[k],
+ mode_lib->mp.use_one_row_for_frame_flip[k],
+ mode_lib->ip.max_flip_time_us,
+ mode_lib->ip.max_flip_time_lines,
+ s->per_pipe_flip_bytes[k],
+ mode_lib->mp.meta_row_bytes[k],
+ mode_lib->mp.meta_row_height[k],
+ mode_lib->mp.meta_row_height_chroma[k],
+ mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
+
+ // Output
+ &mode_lib->mp.dst_y_per_vm_flip[k],
+ &mode_lib->mp.dst_y_per_row_flip[k],
+ &mode_lib->mp.final_flip_bw[k],
+ &mode_lib->mp.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
+ calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
+ calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
+ calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
+
+ calculate_peak_bandwidth_params->display_cfg = display_cfg;
+ calculate_peak_bandwidth_params->inc_flip_bw = 1;
+ calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
+ calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
+ calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
+ calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
+
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
+ calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
+ calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
+ calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
+ calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
+ calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0];
+ calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
+ calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
+
+ calculate_peak_bandwidth_required(
+ &mode_lib->scratch,
+ calculate_peak_bandwidth_params);
+
+ calculate_immediate_flip_bandwidth_support(
+ &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip
+ &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok
+
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->mp.urg_bandwidth_required_flip,
+ mode_lib->mp.non_urg_bandwidth_required_flip,
+ mode_lib->mp.urg_bandwidth_available);
+
+ if (!mode_lib->mp.ImmediateFlipSupported)
+ DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
+ mode_lib->mp.ImmediateFlipSupported = false;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
+#endif
+ }
+ }
+ } else { // flip or prefetch not support
+ mode_lib->mp.ImmediateFlipSupported = false;
+ }
+
+ // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm)
+ must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required;
+ mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
+ for (k = 0; k < s->num_active_planes; ++k)
+ DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
+ DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
+ DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
+ DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
+#endif
+ DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
+ }
+
+ for (k = 0; k < s->num_active_planes; ++k)
+ DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+
+ if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
+ DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
+ } else {
+ DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
+
+ // DCC Configuration
+ for (k = 0; k < s->num_active_planes; ++k) {
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
+#endif
+ CalculateDCCConfiguration(
+ display_cfg->plane_descriptors[k].surface.dcc.enable,
+ display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown,
+ display_cfg->plane_descriptors[k].pixel_format,
+ display_cfg->plane_descriptors[k].surface.plane0.width,
+ display_cfg->plane_descriptors[k].surface.plane1.width,
+ display_cfg->plane_descriptors[k].surface.plane0.height,
+ display_cfg->plane_descriptors[k].surface.plane1.height,
+ s->NomDETInKByte,
+ mode_lib->mp.Read256BlockHeightY[k],
+ mode_lib->mp.Read256BlockHeightC[k],
+ display_cfg->plane_descriptors[k].surface.tiling,
+ mode_lib->mp.BytePerPixelY[k],
+ mode_lib->mp.BytePerPixelC[k],
+ mode_lib->mp.BytePerPixelInDETY[k],
+ mode_lib->mp.BytePerPixelInDETC[k],
+ display_cfg->plane_descriptors[k].composition.rotation_angle,
+
+ /* Output */
+ &mode_lib->mp.RequestLuma[k],
+ &mode_lib->mp.RequestChroma[k],
+ &mode_lib->mp.DCCYMaxUncompressedBlock[k],
+ &mode_lib->mp.DCCCMaxUncompressedBlock[k],
+ &mode_lib->mp.DCCYMaxCompressedBlock[k],
+ &mode_lib->mp.DCCCMaxCompressedBlock[k],
+ &mode_lib->mp.DCCYIndependentBlock[k],
+ &mode_lib->mp.DCCCIndependentBlock[k]);
+ }
+
+ //Watermarks and NB P-State/DRAM Clock Change Support
+ s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency;
+ s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency;
+ s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr;
+ s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
+ s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
+ s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
+ s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
+ s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
+ s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
+ s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
+ s->mmSOCParameters.USRRetrainingLatency = 0;
+ s->mmSOCParameters.SMNLatency = 0;
+ s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
+ s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index);
+ s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock;
+ s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
+
+ CalculateWatermarks_params->display_cfg = display_cfg;
+ CalculateWatermarks_params->USRRetrainingRequired = false;
+ CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes;
+ CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
+ CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
+ CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
+ CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk;
+ CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
+ CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
+ CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
+ CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
+ CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
+ CalculateWatermarks_params->SOCCLK = s->SOCCLK;
+ CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
+ CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
+ CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
+ CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY;
+ CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC;
+ CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY;
+ CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC;
+ CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
+ CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC;
+ CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler;
+ CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler;
+ CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
+ CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
+ CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height;
+ CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
+ CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
+
+ // Output
+ CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark;
+ CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport;
+ CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported;
+ CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported;
+ CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL;
+ CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport;
+ CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported;
+ CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
+ CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport;
+ CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support;
+ CalculateWatermarks_params->VActiveLatencyHidingMargin = 0;
+ CalculateWatermarks_params->VActiveLatencyHidingUs = 0;
+
+ CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
+
+ for (k = 0; k < s->num_active_planes; ++k) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
+ mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
+ } else {
+ mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0;
+ mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0;
+ }
+ }
+
+ calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines);
+
+ DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
+ DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
+
+ //Display Pipeline Delivery Time in Prefetch, Groups
+ CalculatePixelDeliveryTimes(
+ display_cfg,
+ cfg_support_info,
+ s->num_active_planes,
+ mode_lib->mp.VRatioPrefetchY,
+ mode_lib->mp.VRatioPrefetchC,
+ mode_lib->mp.swath_width_luma_ub,
+ mode_lib->mp.swath_width_chroma_ub,
+ mode_lib->mp.PSCL_THROUGHPUT,
+ mode_lib->mp.PSCL_THROUGHPUT_CHROMA,
+ mode_lib->mp.Dppclk,
+ mode_lib->mp.BytePerPixelC,
+ mode_lib->mp.req_per_swath_ub_l,
+ mode_lib->mp.req_per_swath_ub_c,
+
+ /* Output */
+ mode_lib->mp.DisplayPipeLineDeliveryTimeLuma,
+ mode_lib->mp.DisplayPipeLineDeliveryTimeChroma,
+ mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch,
+ mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch,
+ mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma,
+ mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma,
+ mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch,
+ mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
+
+ CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch;
+ CalculateMetaAndPTETimes_params->display_cfg = display_cfg;
+ CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes;
+ CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
+ CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank;
+ CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip;
+ CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
+ CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC;
+ CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height;
+ CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
+ CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
+ CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
+ CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
+ CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
+ CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
+ CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
+ CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
+ CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
+ CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
+ CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub;
+ CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present;
+
+ CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes;
+ CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes;
+ CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width;
+ CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
+ CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height;
+ CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
+ CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width;
+ CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
+ CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height;
+ CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
+
+ CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group;
+ CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L;
+ CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C;
+ CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma;
+ CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma;
+ CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma;
+ CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma;
+ CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma;
+ CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma;
+ CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L;
+ CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C;
+ CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal;
+ CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal;
+ CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank;
+ CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank;
+ CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip;
+ CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip;
+
+ CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params);
+
+ CalculateVMGroupAndRequestTimes(
+ display_cfg,
+ s->num_active_planes,
+ mode_lib->mp.BytePerPixelC,
+ mode_lib->mp.dst_y_per_vm_vblank,
+ mode_lib->mp.dst_y_per_vm_flip,
+ mode_lib->mp.dpte_row_width_luma_ub,
+ mode_lib->mp.dpte_row_width_chroma_ub,
+ mode_lib->mp.vm_group_bytes,
+ mode_lib->mp.dpde0_bytes_per_frame_ub_l,
+ mode_lib->mp.dpde0_bytes_per_frame_ub_c,
+ s->tdlut_pte_bytes_per_frame,
+ mode_lib->mp.meta_pte_bytes_per_frame_ub_l,
+ mode_lib->mp.meta_pte_bytes_per_frame_ub_c,
+ mode_lib->ip.dcn_mrq_present,
+
+ /* Output */
+ mode_lib->mp.TimePerVMGroupVBlank,
+ mode_lib->mp.TimePerVMGroupFlip,
+ mode_lib->mp.TimePerVMRequestVBlank,
+ mode_lib->mp.TimePerVMRequestFlip);
+
+ // VStartup Adjustment
+ for (k = 0; k < s->num_active_planes; ++k) {
+ bool isInterlaceTiming;
+
+ mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency;
+ if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable)
+ mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
+#endif
+ s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
+#endif
+
+ mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
+ if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) {
+ mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin;
+ }
+
+ isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
+
+ // The actual positioning of the vstartup
+ mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
+
+ s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) :
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
+ s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0;
+ s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
+
+ if (s->blank_lines_remaining < 0) {
+ DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n");
+ s->blank_lines_remaining = 0;
+ DML_ASSERT(0);
+ }
+ mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
+
+ // debug only
+ if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <=
+ (isInterlaceTiming ?
+ math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) :
+ (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) {
+ mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true;
+ } else {
+ mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
+ DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
+#endif
+ }
+
+ //Maximum Bandwidth Used
+ mode_lib->mp.TotalWRBandwidth = 0;
+ for (k = 0; k < display_cfg->num_streams; ++k) {
+ s->WRBandwidth = 0;
+ if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) {
+ s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width /
+ (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height
+ / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000))
+ * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0);
+ mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth;
+ }
+ }
+
+ mode_lib->mp.TotalDataReadBandwidth = 0;
+ for (k = 0; k < s->num_active_planes; ++k) {
+ mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k];
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+ DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
+#endif
+ }
+
+ CalculateStutterEfficiency_params->display_cfg = display_cfg;
+ CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
+ CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
+ CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries;
+ CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries;
+ CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes;
+ CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes;
+ CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes;
+ CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth;
+ CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk;
+ CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active];
+ CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b;
+ CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs;
+ CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
+ CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
+ CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
+ CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
+ CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
+ CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
+ CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank;
+ CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
+ CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
+ CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
+ CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
+ CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY;
+ CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY;
+ CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC;
+ CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY;
+ CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY;
+ CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC;
+ CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
+ CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
+ CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c;
+ CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
+ CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
+ CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
+
+ // output
+ CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
+ CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency;
+ CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame;
+ CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
+ CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency;
+ CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
+ CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod;
+ CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+
+ // Stutter Efficiency
+ CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
+
+#ifdef __DML_VBA_ALLOW_DELTA__
+ // Calculate z8 stutter eff assuming 0 reserved space
+ CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0;
+ CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0;
+
+ CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase;
+ CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase;
+ CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
+ CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase;
+
+ // Stutter Efficiency
+ CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
+#else
+ mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
+ mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency;
+ mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
+ mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod;
+#endif
+ } // PrefetchAndImmediateFlipSupported
+
+ max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0;
+ min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
+ mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
+ mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
+ DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
+ DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles);
+ DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
+ DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
+ DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__);
+#endif
+ return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
+}
+
+bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params)
+{
+ DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__);
+ bool result = dml_core_mode_programming(in_out_params);
+
+ DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result);
+ DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__);
+ return result;
+}
+
+void dml2_core_calcs_get_dpte_row_height(
+ unsigned int *dpte_row_height,
+ struct dml2_core_internal_display_mode_lib *mode_lib,
+ bool is_plane1,
+ enum dml2_source_format_class SourcePixelFormat,
+ enum dml2_swizzle_mode SurfaceTiling,
+ enum dml2_rotation_angle ScanDirection,
+ unsigned int pitch,
+ unsigned int GPUVMMinPageSizeKBytes)
+{
+ unsigned int BytePerPixelY;
+ unsigned int BytePerPixelC;
+ double BytePerPixelInDETY;
+ double BytePerPixelInDETC;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockHeight256BytesC;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int MacroTileWidthY;
+ unsigned int MacroTileWidthC;
+ unsigned int MacroTileHeightY;
+ unsigned int MacroTileHeightC;
+ bool surf_linear_128_l = false;
+ bool surf_linear_128_c = false;
+
+ CalculateBytePerPixelAndBlockSizes(
+ SourcePixelFormat,
+ SurfaceTiling,
+ pitch,
+ pitch,
+
+ /* Output */
+ &BytePerPixelY,
+ &BytePerPixelC,
+ &BytePerPixelInDETY,
+ &BytePerPixelInDETC,
+ &BlockHeight256BytesY,
+ &BlockHeight256BytesC,
+ &BlockWidth256BytesY,
+ &BlockWidth256BytesC,
+ &MacroTileHeightY,
+ &MacroTileHeightC,
+ &MacroTileWidthY,
+ &MacroTileWidthC,
+ &surf_linear_128_l,
+ &surf_linear_128_c);
+
+ unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
+ unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
+ unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
+ unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
+ unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
+ unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
+ DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
+ DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
+ DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
+ DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
+ DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
+ DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
+ DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
+ DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
+ DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
+#endif
+ unsigned int dummy_integer[21];
+
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0;
+
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height;
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14];
+
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19];
+ mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20];
+
+ // just supply with enough parameters to calculate dpte
+ CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
+#endif
+}
+
+static bool is_dual_plane(enum dml2_source_format_class source_format)
+{
+ bool ret_val = false;
+
+ if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
+ ret_val = true;
+
+ return ret_val;
+}
+
+static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
+{
+ unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
+ return plane_idx;
+}
+
+static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs)
+{
+ double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
+
+ wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
+ wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+ wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
+ wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+ wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
+ wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
+ wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
+ wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
+ wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
+ wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz);
+ wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz);
+ wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
+ wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
+ wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
+}
+
+static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
+{
+ if (a == 0)
+ return 0;
+
+ return (math_log2_approx(a) - subtrahend);
+}
+
+void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p)
+{
+ int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) -
+ (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio);
+ cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
+ DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
+ DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
+ DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
+ DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
+#endif
+
+ cursor_dlg_regs->chunk_hdl_adjust = 3;
+ cursor_dlg_regs->dst_y_offset = 0;
+
+ cursor_dlg_regs->qos_level_fixed = 8;
+ cursor_dlg_regs->qos_ramp_disable = 0;
+}
+
+static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
+ const struct dml2_display_cfg *display_cfg,
+ const struct dml2_core_internal_display_mode_lib *mode_lib,
+ unsigned int pipe_idx)
+{
+ unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
+ enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format;
+ enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling;
+ bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format));
+
+ unsigned int pixel_chunk_bytes = 0;
+ unsigned int min_pixel_chunk_bytes = 0;
+ unsigned int dpte_group_bytes = 0;
+ unsigned int mpte_group_bytes = 0;
+
+ unsigned int p1_pixel_chunk_bytes = 0;
+ unsigned int p1_min_pixel_chunk_bytes = 0;
+ unsigned int p1_dpte_group_bytes = 0;
+ unsigned int p1_mpte_group_bytes = 0;
+
+ unsigned int detile_buf_plane1_addr = 0;
+ unsigned int detile_buf_size_in_bytes;
+ double stored_swath_l_bytes;
+ double stored_swath_c_bytes;
+ bool is_phantom_pipe;
+
+ DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
+
+ pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
+ min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
+
+ if (pixel_chunk_bytes == 64 * 1024)
+ min_pixel_chunk_bytes = 0;
+
+ dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx));
+ mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx));
+
+ p1_pixel_chunk_bytes = pixel_chunk_bytes;
+ p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
+ p1_dpte_group_bytes = dpte_group_bytes;
+ p1_mpte_group_bytes = mpte_group_bytes;
+
+ if (source_format == dml2_rgbe_alpha)
+ p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024);
+
+ rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib);
+ rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10);
+ rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10);
+
+ if (min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1);
+
+ if (p1_min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1);
+
+ rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6);
+ rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
+ rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6);
+ rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6);
+
+ detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024);
+
+ if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
+ unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
+#endif
+ DML_ASSERT(p0_pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
+ if (dual_plane) {
+ unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
+#endif
+ if (sw_mode == dml2_sw_linear) {
+ DML_ASSERT(p1_pte_row_height_linear >= 8);
+ }
+ rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
+ }
+ } else {
+ rq_regs->rq_regs_l.pte_row_height_linear = 0;
+ rq_regs->rq_regs_c.pte_row_height_linear = 0;
+ }
+
+ rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0);
+ rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0);
+
+ // FIXME_DCN4, programming guide has dGPU condition
+ if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+
+ stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx);
+ stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx);
+ is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx);
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (dual_plane) {
+ if (is_phantom_pipe) {
+ detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
+ } else {
+ if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
+ detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
+ DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
+ DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
+ DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
+ DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
+ DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
+#endif
+ //DML_LOG_VERBOSE_rq_regs_st(rq_regs);
+ DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
+}
+
+static void rq_dlg_get_dlg_reg(
+ struct dml2_core_internal_scratch *s,
+ struct dml2_display_dlg_regs *disp_dlg_regs,
+ struct dml2_display_ttu_regs *disp_ttu_regs,
+ const struct dml2_display_cfg *display_cfg,
+ const struct dml2_core_internal_display_mode_lib *mode_lib,
+ const unsigned int pipe_idx)
+{
+ struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals;
+
+ memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
+
+ DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
+
+ l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
+ DML_ASSERT(l->plane_idx < DML2_MAX_PLANES);
+
+ l->source_format = dml2_444_8;
+ l->odm_mode = dml2_odm_mode_bypass;
+ l->dual_plane = false;
+ l->htotal = 0;
+ l->hactive = 0;
+ l->hblank_end = 0;
+ l->vblank_end = 0;
+ l->interlaced = false;
+ l->pclk_freq_in_mhz = 0.0;
+ l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
+ l->ref_freq_to_pix_freq = 0.0;
+
+ if (l->plane_idx < DML2_MAX_PLANES) {
+
+ l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing;
+ l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format;
+ l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx];
+
+ l->dual_plane = is_dual_plane(l->source_format);
+
+ l->htotal = l->timing->h_total;
+ l->hactive = l->timing->h_active;
+ l->hblank_end = l->timing->h_blank_end;
+ l->vblank_end = l->timing->v_blank_end;
+ l->interlaced = l->timing->interlaced;
+ l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
+ l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
+
+ DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
+ DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
+ DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
+ DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
+ DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz);
+ DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
+ DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
+ DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
+
+ DML_ASSERT(l->refclk_freq_in_mhz != 0);
+ DML_ASSERT(l->pclk_freq_in_mhz != 0);
+ DML_ASSERT(l->ref_freq_to_pix_freq < 4.0);
+
+ // Need to figure out which side of odm combine we're in
+ // Assume the pipe instance under the same plane is in order
+
+ if (l->odm_mode == dml2_odm_mode_bypass) {
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq);
+ } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) {
+ // find out how many pipe are in this plane
+ l->num_active_pipes = mode_lib->mp.num_active_pipes;
+ l->first_pipe_idx_in_plane = DML2_MAX_PLANES;
+ l->pipe_idx_in_combine = 0; // pipe index within the plane
+ l->odm_combine_factor = 2;
+
+ if (l->odm_mode == dml2_odm_mode_combine_3to1)
+ l->odm_combine_factor = 3;
+ else if (l->odm_mode == dml2_odm_mode_combine_4to1)
+ l->odm_combine_factor = 4;
+
+ for (unsigned int i = 0; i < l->num_active_pipes; i++) {
+ if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) {
+ if (i < l->first_pipe_idx_in_plane) {
+ l->first_pipe_idx_in_plane = i;
+ }
+ }
+ }
+ l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
+
+ disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
+ DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
+ DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
+ DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
+ DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
+ }
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
+
+ DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
+
+ disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
+ disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
+ disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits
+
+ l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
+
+ DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
+ DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
+ DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
+
+ l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
+ disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
+
+ DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
+
+ l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
+ l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
+
+ DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
+ DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
+
+ l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
+
+ DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
+ DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
+ DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
+ DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
+
+ if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
+ DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
+ }
+
+ l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
+
+ DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
+ DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
+
+ // Active
+ l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
+
+ l->refcyc_per_line_delivery_pre_c = 0.0;
+ l->refcyc_per_line_delivery_c = 0.0;
+
+ if (l->dual_plane) {
+ l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
+ }
+
+ disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
+ disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
+
+ l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
+
+ l->refcyc_per_req_delivery_pre_c = 0.0;
+ l->refcyc_per_req_delivery_c = 0.0;
+ if (l->dual_plane) {
+ l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
+ DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
+ }
+
+ // TTU - Cursor
+ DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
+
+ // Assign to register structures
+ disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
+ DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
+
+ disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
+ disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
+ disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2));
+
+ disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
+ disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
+
+ DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
+ DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
+ DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
+ DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
+ disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
+ disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
+ disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
+ disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
+
+ l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+
+ disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2));
+ disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2));
+
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l);
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c);
+ disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l);
+ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c);
+ disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l);
+ disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c);
+ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1);
+
+ l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
+ l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+ l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
+
+ disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2));
+ disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2));
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l);
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c);
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l);
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c);
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l);
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c);
+
+ disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group);
+ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10));
+ disp_ttu_regs->qos_level_low_wm = 0;
+
+ disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq);
+
+ disp_ttu_regs->qos_level_flip = 14;
+ disp_ttu_regs->qos_level_fixed_l = 8;
+ disp_ttu_regs->qos_level_fixed_c = 8;
+ disp_ttu_regs->qos_ramp_disable_l = 0;
+ disp_ttu_regs->qos_ramp_disable_c = 0;
+ disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
+
+ // CHECK for HW registers' range, DML_ASSERT or clamp
+ DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
+ DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
+ DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
+ DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
+ if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
+
+ if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1);
+
+ if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1);
+
+ if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
+
+
+ DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
+ DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
+
+ if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
+ DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
+ l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
+ }
+ if (l->dual_plane) {
+ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
+ DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
+ l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
+ }
+ }
+
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1);
+ if (l->dual_plane) {
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
+ }
+ DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
+ if (l->dual_plane) {
+ DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
+ }
+
+ DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
+ DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
+ DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
+ DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
+ DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
+ DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
+ DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
+
+ DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
+
+ }
+}
+
+static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param)
+{
+ double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
+
+ arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs;
+ arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max
+ arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4;
+ arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit;
+ arb_param->sat_level_us = 60;
+ arb_param->hvm_max_qos_commit_threshold = 0xf;
+ arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
+ arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024;
+ arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
+ arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib);
+ arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
+ arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz);
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
+ DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
+ DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
+ DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
+ DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
+#endif
+
+}
+
+void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out)
+{
+ rq_dlg_get_wm_regs(display_cfg, mode_lib, out);
+}
+
+void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out)
+{
+ rq_dlg_get_arb_params(display_cfg, mode_lib, out);
+}
+
+void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg,
+ struct dml2_core_internal_display_mode_lib *mode_lib,
+ struct dml2_dchub_per_pipe_register_set *out, int pipe_index)
+{
+ rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index);
+ rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index);
+ out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
+}
+
+void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index)
+{
+ out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index);
+ out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index);
+ out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index);
+ out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index);
+ out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index);
+}
+
+void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index)
+{
+ dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index);
+}
+
+void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
+ const struct display_configuation_with_meta *display_cfg,
+ struct dmub_cmd_fams2_global_config *fams2_global_config)
+{
+ fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required;
+
+ if (fams2_global_config->features.bits.enable) {
+ fams2_global_config->features.bits.enable_stall_recovery = true;
+ fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START;
+
+ fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us;
+ fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us;
+ fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us;
+ fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us;
+
+ fams2_global_config->num_streams = display_cfg->display_config.num_streams;
+ }
+}
+
+void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
+ const struct display_configuation_with_meta *display_cfg,
+ union dmub_cmd_fams2_config *fams2_base_programming,
+ union dmub_cmd_fams2_config *fams2_sub_programming,
+ enum dml2_pstate_method pstate_method,
+ int plane_index)
+{
+ const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index];
+ const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index];
+ const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index];
+
+ struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base;
+ union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state;
+
+ unsigned int i;
+
+ if (display_cfg->display_config.overrides.all_streams_blanked) {
+ /* stream is blanked, so do nothing */
+ return;
+ }
+
+ /* from display configuration */
+ base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
+ base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
+ base_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal -
+ stream_descriptor->timing.v_front_porch);
+ base_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal -
+ stream_descriptor->timing.v_front_porch -
+ stream_descriptor->timing.v_active);
+ base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
+
+ /* from meta */
+ base_programming->otg_vline_time_ns =
+ (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0);
+ base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines;
+ base_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines;
+ base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines;
+ base_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal -
+ stream_descriptor->timing.v_front_porch -
+ stream_fams2_meta->method_drr.programming_delay_otg_vlines);
+ base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines;
+ base_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal;
+
+ /* from core */
+ base_programming->config.bits.min_ttu_vblank_usable = true;
+ for (i = 0; i < display_cfg->display_config.num_planes; i++) {
+ /* check if all planes support p-state in blank */
+ if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index &&
+ mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) {
+ base_programming->config.bits.min_ttu_vblank_usable = false;
+ break;
+ }
+ }
+
+ switch (pstate_method) {
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
+ /* legacy vactive */
+ base_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
+ sub_programming->legacy.vactive_det_fill_delay_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline;
+ base_programming->config.bits.clamp_vtotal_min = true;
+ break;
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
+ /* legacy vblank */
+ base_programming->type = FAMS2_STREAM_TYPE_VBLANK;
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline;
+ base_programming->config.bits.clamp_vtotal_min = true;
+ break;
+ case dml2_pstate_method_fw_drr:
+ /* drr */
+ base_programming->type = FAMS2_STREAM_TYPE_DRR;
+ sub_programming->drr.programming_delay_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines;
+ sub_programming->drr.nom_stretched_vtotal =
+ (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal;
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline;
+ /* drr only clamps to vtotal min for single display */
+ base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
+ sub_programming->drr.only_stretch_if_required = true;
+ break;
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
+ /* subvp */
+ base_programming->type = FAMS2_STREAM_TYPE_SUBVP;
+ sub_programming->subvp.vratio_numerator =
+ (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
+ sub_programming->subvp.vratio_denominator = 1000;
+ sub_programming->subvp.programming_delay_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines;
+ sub_programming->subvp.prefetch_to_mall_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
+ sub_programming->subvp.phantom_vtotal =
+ (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal;
+ sub_programming->subvp.phantom_vactive =
+ (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive;
+ sub_programming->subvp.config.bits.is_multi_planar =
+ plane_descriptor->surface.plane1.height > 0;
+ sub_programming->subvp.config.bits.is_yuv420 =
+ plane_descriptor->pixel_format == dml2_420_8 ||
+ plane_descriptor->pixel_format == dml2_420_10 ||
+ plane_descriptor->pixel_format == dml2_420_12;
+
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline;
+ base_programming->config.bits.clamp_vtotal_min = true;
+ break;
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_na:
+ case dml2_pstate_method_count:
+ default:
+ /* this should never happen */
+ break;
+ }
+}
+
+void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx)
+{
+ unsigned int n;
+
+ out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx);
+ out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx);
+ out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx);
+ out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx);
+
+ for (n = 0; n < out->num_mcaches_plane0; n++)
+ out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n);
+
+ for (n = 0; n < out->num_mcaches_plane1; n++)
+ out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n);
+
+ out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx);
+ out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx);
+ out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx);
+ out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx);
+ out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx);
+
+ out->valid = true;
+}
+
+void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index)
+{
+ *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index);
+}
+
+void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx)
+{
+ out->mall_svp_size_requirement_ways = 0;
+
+ out->nominal_vblank_pstate_latency_hiding_us =
+ (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total /
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]);
+
+ out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx];
+
+ out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx];
+
+ out->dram_change_vactive_det_fill_delay_us = (unsigned int)math_ceil(mode_lib->ms.dram_change_vactive_det_fill_delay_us[plane_idx]);
+}
+
+void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index)
+{
+ double phantom_processing_delay_pix;
+ unsigned int phantom_processing_delay_lines;
+ unsigned int phantom_min_v_active_lines;
+ unsigned int phantom_v_active_lines;
+ unsigned int phantom_v_startup_lines;
+ unsigned int phantom_v_blank_lines;
+ unsigned int main_v_blank_lines;
+ unsigned int rem;
+
+ phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) *
+ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000));
+ phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total);
+ dml2_core_div_rem(phantom_processing_delay_pix,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total,
+ &rem);
+ if (rem)
+ phantom_processing_delay_lines++;
+
+ phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index);
+ phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) /
+ display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio);
+ phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines;
+
+ // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank)
+ phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1;
+ main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active;
+ if (phantom_v_blank_lines > main_v_blank_lines)
+ phantom_v_blank_lines = main_v_blank_lines;
+
+ out->phantom_v_active = phantom_v_active_lines;
+ // phantom_vtotal = vactive + vblank
+ out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines;
+
+ out->phantom_min_v_active = phantom_min_v_active_lines;
+ out->phantom_v_startup = phantom_v_startup_lines;
+
+ out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
+ DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
+ DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
+ DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us);
+#endif
+}
+
+void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out)
+{
+ unsigned int k, n;
+
+ out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport;
+ out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport;
+ out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport;
+ out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport;
+ out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport;
+ out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420;
+ out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false;
+ out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported;
+ out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion;
+ out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated;
+ out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated;
+ out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP;
+ out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink;
+ out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO;
+ out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport;
+ out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport;
+ out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support;
+ out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport;
+ out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport;
+ out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport;
+ out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface;
+ out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
+ out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
+ out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
+ out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
+ out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support;
+ out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support;
+
+ out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
+ out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
+ out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices;
+ out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport;
+ out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported;
+ out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported;
+ out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport;
+
+ out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport;
+ out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport;
+ out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance;
+ out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded;
+ out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded;
+
+ out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport;
+ out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport;
+ out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport;
+
+ out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported;
+ out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported;
+ out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported;
+ out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support;
+ out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport;
+ out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport;
+
+ for (k = 0; k < out->display_config.num_planes; k++) {
+
+ out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k];
+ out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k];
+ out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k];
+ out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k];
+ out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k];
+ out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k];
+ out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k];
+ out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k];
+
+ if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown)
+ out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown;
+ else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp)
+ out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp;
+ else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp)
+ out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp;
+ else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0)
+ out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0;
+ else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi)
+ out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi;
+ else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl)
+ out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl;
+
+ if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4;
+ else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4)
+ out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4;
+
+ out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k];
+ out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k];
+ }
+
+ out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib);
+ out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib);
+ out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib);
+ out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib);
+
+ out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib);
+ out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib);
+ out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib);
+ out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib);
+ out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib);
+ out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib);
+ out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib);
+ out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib);
+
+ out->informative.mall.total_surface_size_in_mall_bytes = 0;
+ out->informative.dpp.total_num_dpps_required = 0;
+ for (k = 0; k < out->display_config.num_planes; ++k) {
+ out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k];
+ out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k];
+ }
+
+ out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk;
+ out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib);
+
+ out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib);
+ out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib);
+ out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib);
+
+ out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib);
+ out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib);
+ out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib);
+ out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib);
+ out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib);
+
+ out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib);
+ out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib);
+ out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib);
+ out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib);
+
+ out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib);
+ out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib);
+ out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib);
+ out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib);
+
+ out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib);
+ out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib);
+ out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib);
+
+ out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib);
+ out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib);
+ out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib);
+
+ out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib);
+ out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib);
+ out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib);
+ out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib);
+
+ out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib);
+ out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib);
+ out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib);
+ out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib);
+
+ out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib);
+ out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib);
+ out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib);
+ out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib);
+
+ out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib);
+ out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib);
+ out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib);
+ out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib);
+
+ out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib);
+ out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib);
+
+ out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib);
+ out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib);
+ out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
+
+ out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib);
+ out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
+ out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib);
+
+ out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib);
+ out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
+ out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib);
+ out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib);
+
+ out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib);
+ out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib);
+ out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib);
+
+ out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib);
+
+ out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib);
+
+ out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib);
+
+ out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000;
+
+ for (k = 0; k < out->display_config.num_planes; k++) {
+
+ if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
+ && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
+ && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
+ out->informative.misc.PrefetchMode[k] = 0;
+ else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
+ && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
+ out->informative.misc.PrefetchMode[k] = 1;
+ else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)
+ out->informative.misc.PrefetchMode[k] = 2;
+ else
+ out->informative.misc.PrefetchMode[k] = 3;
+
+ out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k];
+ out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k];
+ out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k];
+ out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k];
+ out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k];
+ out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k];
+ out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k];
+ out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k];
+ out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k];
+ out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k];
+ out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k];
+ out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k];
+ out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k];
+ out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k];
+ out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k];
+ out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k];
+ out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k];
+ out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k];
+ out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k];
+ out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k];
+ out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k];
+ out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k];
+ out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k];
+ out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k];
+ out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k];
+ out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k];
+ out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k];
+ out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k];
+ out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k];
+ out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k];
+ out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k];
+ out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k];
+ out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k];
+ out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k];
+ out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k];
+ out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k];
+ out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k];
+ out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k];
+ out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k];
+ out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k];
+ out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k];
+ out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k];
+ out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k];
+ out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k];
+
+ out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k];
+ out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k];
+ out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k];
+ out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k];
+ out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k];
+ out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k];
+ out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k];
+ out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k];
+ out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k];
+ out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k];
+ out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k];
+ out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k];
+ out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k];
+ out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k];
+ out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k];
+ out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k];
+ out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k];
+ out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
+ out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
+
+ out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0;
+ out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
+ out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
+ out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
+ out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k];
+ out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
+ out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
+ out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
+
+ if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin)
+ out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k];
+ }
+
+ // For this DV informative layer, all pipes in the same planes will just use the same id
+ // will have the optimization and helper layer later on
+ // only work when we can have high "mcache" that fit everything without thrashing the cache
+ for (k = 0; k < out->display_config.num_planes; k++) {
+ out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k);
+ out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k);
+
+ for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) {
+ out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n);
+ out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k;
+ }
+
+ out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k);
+ out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k);
+
+ for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) {
+ out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n);
+ out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
+ }
+ }
+ out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib);
+
+ if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
+ if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
+ / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
+ out->informative.misc.ROBUrgencyAvoidance = true;
+ } else {
+ out->informative.misc.ROBUrgencyAvoidance = false;
+ }
+ } else {
+ out->informative.misc.ROBUrgencyAvoidance = true;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h
new file mode 100644
index 000000000000..27ef0e096b25
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_CORE_DCN4_CALCS_H__
+#define __DML2_CORE_DCN4_CALCS_H__
+
+#include "dml2_core_shared_types.h"
+
+struct dml2_dchub_watermark_regs;
+struct dml2_display_arb_regs;
+struct dml2_per_stream_programming;
+struct dml2_dchub_per_pipe_register_set;
+struct core_plane_support_info;
+struct core_stream_support_info;
+struct dml2_cursor_dlg_regs;
+struct display_configuation_with_meta;
+
+unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params);
+bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params);
+void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out);
+void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out);
+void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *dml2_display_cfg, struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_per_pipe_register_set *out, int pipe_index);
+void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index);
+void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index);
+void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_index);
+void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_index);
+void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out);
+void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index);
+void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index);
+void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, union dmub_cmd_fams2_config *fams2_base_programming, union dmub_cmd_fams2_config *fams2_sub_programming, enum dml2_pstate_method pstate_method, int plane_index);
+void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config);
+
+void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes);
+void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p);
+const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type);
+const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c
new file mode 100644
index 000000000000..640087e862f8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_core_factory.h"
+#include "dml2_core_dcn4.h"
+#include "dml2_external_lib_deps.h"
+
+bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out)
+{
+ bool result = false;
+
+ if (!out)
+ return false;
+
+ memset(out, 0, sizeof(struct dml2_core_instance));
+
+ switch (project_id) {
+ case dml2_project_dcn4x_stage1:
+ result = false;
+ break;
+ case dml2_project_dcn4x_stage2:
+ case dml2_project_dcn4x_stage2_auto_drr_svp:
+ out->initialize = &core_dcn4_initialize;
+ out->mode_support = &core_dcn4_mode_support;
+ out->mode_programming = &core_dcn4_mode_programming;
+ out->populate_informative = &core_dcn4_populate_informative;
+ out->calculate_mcache_allocation = &core_dcn4_calculate_mcache_allocation;
+ result = true;
+ break;
+ case dml2_project_invalid:
+ default:
+ break;
+ }
+
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h
new file mode 100644
index 000000000000..411c514fe65c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_CORE_FACTORY_H__
+#define __DML2_CORE_FACTORY_H__
+
+#include "dml2_internal_shared_types.h"
+#include "dml_top_types.h"
+
+bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance *out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
new file mode 100644
index 000000000000..ffb8c09f37a5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
@@ -0,0 +1,2326 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_CORE_SHARED_TYPES_H__
+#define __DML2_CORE_SHARED_TYPES_H__
+
+#include "dml2_external_lib_deps.h"
+#include "dml_top_display_cfg_types.h"
+#include "dml_top_types.h"
+
+#define __DML_VBA_DEBUG__
+#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 //<brief max vratio for one-to-one prefetch bw scheduling
+#define __DML2_CALCS_MAX_VRATIO_PRE_EQU__ 6.0 //<brief max vratio for equalized prefetch bw scheduling
+#define __DML2_CALCS_MAX_VRATIO_PRE__ 8.0 //<brief max prefetch vratio register limit
+
+#define __DML2_CALCS_DPP_INVALID__ 0
+#define __DML2_CALCS_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation
+#define __DML2_CALCS_PIPE_NO_PLANE__ 99
+
+struct dml2_core_ip_params {
+ unsigned int vblank_nom_default_us;
+ unsigned int remote_iommu_outstanding_translations;
+ unsigned int rob_buffer_size_kbytes;
+ unsigned int config_return_buffer_size_in_kbytes;
+ unsigned int config_return_buffer_segment_size_in_kbytes;
+ unsigned int compressed_buffer_segment_size_in_kbytes;
+ unsigned int meta_fifo_size_in_kentries;
+ unsigned int dpte_buffer_size_in_pte_reqs_luma;
+ unsigned int dpte_buffer_size_in_pte_reqs_chroma;
+ unsigned int pixel_chunk_size_kbytes;
+ unsigned int alpha_pixel_chunk_size_kbytes;
+ unsigned int min_pixel_chunk_size_bytes;
+ unsigned int writeback_chunk_size_kbytes;
+ unsigned int line_buffer_size_bits;
+ unsigned int max_line_buffer_lines;
+ unsigned int writeback_interface_buffer_size_kbytes;
+ unsigned int max_num_dpp;
+ unsigned int max_num_otg;
+ unsigned int max_num_wb;
+ unsigned int max_dchub_pscl_bw_pix_per_clk;
+ unsigned int max_pscl_lb_bw_pix_per_clk;
+ unsigned int max_lb_vscl_bw_pix_per_clk;
+ unsigned int max_vscl_hscl_bw_pix_per_clk;
+ double max_hscl_ratio;
+ double max_vscl_ratio;
+ unsigned int max_hscl_taps;
+ unsigned int max_vscl_taps;
+ unsigned int num_dsc;
+ unsigned int maximum_dsc_bits_per_component;
+ unsigned int maximum_pixels_per_line_per_dsc_unit;
+ bool dsc422_native_support;
+ bool cursor_64bpp_support;
+ double dispclk_ramp_margin_percent;
+ unsigned int dppclk_delay_subtotal;
+ unsigned int dppclk_delay_scl;
+ unsigned int dppclk_delay_scl_lb_only;
+ unsigned int dppclk_delay_cnvc_formatter;
+ unsigned int dppclk_delay_cnvc_cursor;
+ unsigned int cursor_buffer_size;
+ unsigned int cursor_chunk_size;
+ unsigned int dispclk_delay_subtotal;
+ bool dynamic_metadata_vm_enabled;
+ unsigned int max_inter_dcn_tile_repeaters;
+ unsigned int max_num_hdmi_frl_outputs;
+ unsigned int max_num_dp2p0_outputs;
+ unsigned int max_num_dp2p0_streams;
+ bool dcc_supported;
+ bool ptoi_supported;
+ double writeback_max_hscl_ratio;
+ double writeback_max_vscl_ratio;
+ double writeback_min_hscl_ratio;
+ double writeback_min_vscl_ratio;
+ unsigned int writeback_max_hscl_taps;
+ unsigned int writeback_max_vscl_taps;
+ unsigned int writeback_line_buffer_buffer_size;
+
+ unsigned int words_per_channel;
+ bool imall_supported;
+ unsigned int max_flip_time_us;
+ unsigned int max_flip_time_lines;
+ unsigned int subvp_swath_height_margin_lines;
+ unsigned int subvp_fw_processing_delay_us;
+ unsigned int subvp_pstate_allow_width_us;
+
+ // MRQ
+ bool dcn_mrq_present;
+ unsigned int zero_size_buffer_entries;
+ unsigned int compbuf_reserved_space_zs;
+ unsigned int dcc_meta_buffer_size_bytes;
+ unsigned int meta_chunk_size_kbytes;
+ unsigned int min_meta_chunk_size_bytes;
+
+ unsigned int dchub_arb_to_ret_delay; // num of dcfclk
+ unsigned int hostvm_mode;
+};
+
+struct dml2_core_internal_DmlPipe {
+ double Dppclk;
+ double Dispclk;
+ double PixelClock;
+ double DCFClkDeepSleep;
+ unsigned int DPPPerSurface;
+ bool ScalerEnabled;
+ bool UPSPEnabled;
+ enum dml2_rotation_angle RotationAngle;
+ bool mirrored;
+ unsigned int ViewportHeight;
+ unsigned int ViewportHeightC;
+ unsigned int BlockWidth256BytesY;
+ unsigned int BlockHeight256BytesY;
+ unsigned int BlockWidth256BytesC;
+ unsigned int BlockHeight256BytesC;
+ unsigned int BlockWidthY;
+ unsigned int BlockHeightY;
+ unsigned int BlockWidthC;
+ unsigned int BlockHeightC;
+ unsigned int InterlaceEnable;
+ unsigned int NumberOfCursors;
+ unsigned int VBlank;
+ unsigned int HTotal;
+ unsigned int HActive;
+ bool DCCEnable;
+ enum dml2_odm_mode ODMMode;
+ enum dml2_source_format_class SourcePixelFormat;
+ enum dml2_swizzle_mode SurfaceTiling;
+ unsigned int BytePerPixelY;
+ unsigned int BytePerPixelC;
+ bool ProgressiveToInterlaceUnitInOPP;
+ double VRatio;
+ double VRatioChroma;
+ unsigned int VTaps;
+ unsigned int VTapsChroma;
+ unsigned int PitchY;
+ unsigned int PitchC;
+ bool ViewportStationary;
+ unsigned int ViewportXStart;
+ unsigned int ViewportYStart;
+ unsigned int ViewportXStartC;
+ unsigned int ViewportYStartC;
+ bool FORCE_ONE_ROW_FOR_FRAME;
+ unsigned int SwathHeightY;
+ unsigned int SwathHeightC;
+
+ unsigned int DCCMetaPitchY;
+ unsigned int DCCMetaPitchC;
+};
+
+enum dml2_core_internal_request_type {
+ dml2_core_internal_request_type_256_bytes = 0,
+ dml2_core_internal_request_type_128_bytes_non_contiguous = 1,
+ dml2_core_internal_request_type_128_bytes_contiguous = 2,
+ dml2_core_internal_request_type_na = 3
+};
+enum dml2_core_internal_bw_type {
+ dml2_core_internal_bw_sdp = 0,
+ dml2_core_internal_bw_dram = 1,
+ dml2_core_internal_bw_max
+};
+
+enum dml2_core_internal_soc_state_type {
+ dml2_core_internal_soc_state_sys_active = 0,
+ dml2_core_internal_soc_state_svp_prefetch = 1,
+ dml2_core_internal_soc_state_sys_idle = 2,
+ dml2_core_internal_soc_state_max
+};
+
+enum dml2_core_internal_output_type {
+ dml2_core_internal_output_type_unknown = 0,
+ dml2_core_internal_output_type_dp = 1,
+ dml2_core_internal_output_type_edp = 2,
+ dml2_core_internal_output_type_dp2p0 = 3,
+ dml2_core_internal_output_type_hdmi = 4,
+ dml2_core_internal_output_type_hdmifrl = 5
+};
+
+enum dml2_core_internal_output_type_rate {
+ dml2_core_internal_output_rate_unknown = 0,
+ dml2_core_internal_output_rate_dp_rate_hbr = 1,
+ dml2_core_internal_output_rate_dp_rate_hbr2 = 2,
+ dml2_core_internal_output_rate_dp_rate_hbr3 = 3,
+ dml2_core_internal_output_rate_dp_rate_uhbr10 = 4,
+ dml2_core_internal_output_rate_dp_rate_uhbr13p5 = 5,
+ dml2_core_internal_output_rate_dp_rate_uhbr20 = 6,
+ dml2_core_internal_output_rate_hdmi_rate_3x3 = 7,
+ dml2_core_internal_output_rate_hdmi_rate_6x3 = 8,
+ dml2_core_internal_output_rate_hdmi_rate_6x4 = 9,
+ dml2_core_internal_output_rate_hdmi_rate_8x4 = 10,
+ dml2_core_internal_output_rate_hdmi_rate_10x4 = 11,
+ dml2_core_internal_output_rate_hdmi_rate_12x4 = 12,
+ dml2_core_internal_output_rate_hdmi_rate_16x4 = 13,
+ dml2_core_internal_output_rate_hdmi_rate_20x4 = 14
+};
+
+struct dml2_core_internal_watermarks {
+ double UrgentWatermark;
+ double WritebackUrgentWatermark;
+ double DRAMClockChangeWatermark;
+ double FCLKChangeWatermark;
+ double WritebackDRAMClockChangeWatermark;
+ double WritebackFCLKChangeWatermark;
+ double StutterExitWatermark;
+ double StutterEnterPlusExitWatermark;
+ double LowPowerStutterExitWatermark;
+ double LowPowerStutterEnterPlusExitWatermark;
+ double Z8StutterExitWatermark;
+ double Z8StutterEnterPlusExitWatermark;
+ double USRRetrainingWatermark;
+ double temp_read_or_ppt_watermark_us;
+};
+
+struct dml2_core_internal_mode_support_info {
+ //-----------------
+ // Mode Support Information
+ //-----------------
+ bool ImmediateFlipSupport; //<brief Means mode support immediate flip at the max combine setting; determine in mode support and used in mode programming
+
+ // Mode Support Reason/
+ bool WritebackLatencySupport;
+ bool ScaleRatioAndTapsSupport;
+ bool SourceFormatPixelAndScanSupport;
+ bool P2IWith420;
+ bool DSCSlicesODMModeSupported;
+ bool DSCOnlyIfNecessaryWithBPP;
+ bool DSC422NativeNotSupported;
+ bool LinkRateDoesNotMatchDPVersion;
+ bool LinkRateForMultistreamNotIndicated;
+ bool BPPForMultistreamNotIndicated;
+ bool MultistreamWithHDMIOreDP;
+ bool MSOOrODMSplitWithNonDPLink;
+ bool NotEnoughLanesForMSO;
+ bool NumberOfOTGSupport;
+ bool NumberOfHDMIFRLSupport;
+ bool NumberOfDP2p0Support;
+ bool WritebackScaleRatioAndTapsSupport;
+ bool CursorSupport;
+ bool PitchSupport;
+ bool ViewportExceedsSurface;
+ //bool ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified;
+ bool ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
+ bool InvalidCombinationOfMALLUseForPStateAndStaticScreen;
+ bool InvalidCombinationOfMALLUseForPState;
+ bool ExceededMALLSize;
+ bool EnoughWritebackUnits;
+
+ bool ExceededMultistreamSlots;
+ bool NotEnoughDSCUnits;
+ bool NotEnoughDSCSlices;
+ bool PixelsPerLinePerDSCUnitSupport;
+ bool DSCCLKRequiredMoreThanSupported;
+ bool DTBCLKRequiredMoreThanSupported;
+ bool LinkCapacitySupport;
+
+ bool ROBSupport;
+ bool OutstandingRequestsSupport;
+ bool OutstandingRequestsUrgencyAvoidance;
+
+ bool PTEBufferSizeNotExceeded;
+ bool DCCMetaBufferSizeNotExceeded;
+ enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES];
+ enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES];
+ bool global_dram_clock_change_supported;
+ bool global_fclk_change_supported;
+ bool USRRetrainingSupport;
+ bool AvgBandwidthSupport;
+ bool UrgVactiveBandwidthSupport;
+ bool EnoughUrgentLatencyHidingSupport;
+ bool PrefetchScheduleSupported;
+ bool PrefetchSupported;
+ bool PrefetchBandwidthSupported;
+ bool DynamicMetadataSupported;
+ bool VRatioInPrefetchSupported;
+ bool DISPCLK_DPPCLK_Support;
+ bool TotalAvailablePipesSupport;
+ bool ODMSupport;
+ bool ModeSupport;
+ bool ViewportSizeSupport;
+
+ bool MPCCombineEnable[DML2_MAX_PLANES]; /// <brief Indicate if the MPC Combine enable in the given state and optimize mpc combine setting
+ enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; /// <brief ODM mode that is chosen in the mode check stage and will be used in mode programming stage
+ unsigned int DPPPerSurface[DML2_MAX_PLANES]; /// <brief How many DPPs are needed drive the surface to output. If MPCC or ODMC could be 2 or 4.
+ bool DSCEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the DSC is actually required; used in mode_programming
+ bool FECEnabled[DML2_MAX_PLANES]; /// <brief Indicate if the FEC is actually required
+ unsigned int NumberOfDSCSlices[DML2_MAX_PLANES]; /// <brief Indicate how many slices needed to support the given mode
+
+ double OutputBpp[DML2_MAX_PLANES];
+ enum dml2_core_internal_output_type OutputType[DML2_MAX_PLANES];
+ enum dml2_core_internal_output_type_rate OutputRate[DML2_MAX_PLANES];
+
+ unsigned int AlignedYPitch[DML2_MAX_PLANES];
+ unsigned int AlignedCPitch[DML2_MAX_PLANES];
+
+ unsigned int AlignedDCCMetaPitchY[DML2_MAX_PLANES];
+ unsigned int AlignedDCCMetaPitchC[DML2_MAX_PLANES];
+
+ unsigned int request_size_bytes_luma[DML2_MAX_PLANES];
+ unsigned int request_size_bytes_chroma[DML2_MAX_PLANES];
+ enum dml2_core_internal_request_type RequestLuma[DML2_MAX_PLANES];
+ enum dml2_core_internal_request_type RequestChroma[DML2_MAX_PLANES];
+
+ unsigned int DCCYMaxUncompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCYMaxCompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCYIndependentBlock[DML2_MAX_PLANES];
+ unsigned int DCCCMaxUncompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCCMaxCompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCCIndependentBlock[DML2_MAX_PLANES];
+
+ double avg_bandwidth_available_min[dml2_core_internal_soc_state_max];
+ double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_max]; // min between SDP and DRAM, for latency evaluation
+ double urg_bandwidth_available_min[dml2_core_internal_soc_state_max]; // min between SDP and DRAM
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_vm_only bw, sdp has no different derate for vm/non-vm etc.
+ double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_pixel_and_vm bw, sdp has no different derate for vm/non-vm etc.
+
+ double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // active bandwidth, scaled by urg burst factor
+ double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor
+ double urg_bandwidth_required_qual[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor, use qual_row_bw
+ double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth + flip
+
+ double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // same as urg_bandwidth, except not scaled by urg burst factor
+ double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ bool avg_bandwidth_support_ok[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double max_urgent_latency_us;
+ double max_non_urgent_latency_us;
+ double avg_non_urgent_latency_us;
+ double avg_urgent_latency_us;
+ double df_response_time_us;
+
+ bool incorrect_imall_usage;
+
+ bool g6_temp_read_support;
+ bool temp_read_or_ppt_support;
+
+ struct dml2_core_internal_watermarks watermarks;
+ bool dcfclk_support;
+ bool qos_bandwidth_support;
+};
+
+struct dml2_core_internal_mode_support {
+ // Physical info; only using for programming
+ unsigned int state_idx; // <brief min clk state table index for mode support call
+ unsigned int qos_param_index; // to access the uclk dependent qos_parameters table
+ unsigned int active_min_uclk_dpm_index; // to access the min_clk table
+ unsigned int num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg
+
+ // Calculated Clocks
+ double RequiredDISPCLK; /// <brief Required DISPCLK; depends on pixel rate; odm mode etc.
+ double RequiredDPPCLK[DML2_MAX_PLANES];
+ double RequiredDISPCLKPerSurface[DML2_MAX_PLANES];
+ double RequiredDTBCLK[DML2_MAX_PLANES];
+
+ double required_dscclk_freq_mhz[DML2_MAX_PLANES];
+
+ double FabricClock; /// <brief Basically just the clock freq at the min (or given) state
+ double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state
+ double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting
+ double GlobalDPPCLK; /// <brief the Max DPPCLK freq out of all pipes
+ double GlobalDTBCLK; /// <brief the Max DTBCLK freq out of all pipes
+ double uclk_freq_mhz;
+ double dram_bw_mbps;
+ double max_dram_bw_mbps;
+ double min_available_urgent_bandwidth_MBps; /// <brief Minimum guaranteed available urgent return bandwidth in MBps
+
+ double MaxFabricClock; /// <brief Basically just the clock freq at the min (or given) state
+ double MaxDCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting
+ double max_dispclk_freq_mhz;
+ double max_dppclk_freq_mhz;
+ double max_dscclk_freq_mhz;
+
+ bool NoTimeForPrefetch[DML2_MAX_PLANES];
+ bool NoTimeForDynamicMetadata[DML2_MAX_PLANES];
+
+ // ----------------------------------
+ // Mode Support Info and fail reason
+ // ----------------------------------
+ struct dml2_core_internal_mode_support_info support;
+
+ // These are calculated before the ModeSupport and ModeProgram step
+ // They represent the bound for the return buffer sizing
+ unsigned int MaxTotalDETInKByte;
+ unsigned int NomDETInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+
+ // Info obtained at the end of mode support calculations
+ // The reported info is at the "optimal" state and combine setting
+ unsigned int DETBufferSizeInKByte[DML2_MAX_PLANES]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value.
+ unsigned int DETBufferSizeY[DML2_MAX_PLANES];
+ unsigned int DETBufferSizeC[DML2_MAX_PLANES];
+ unsigned int SwathHeightY[DML2_MAX_PLANES];
+ unsigned int SwathHeightC[DML2_MAX_PLANES];
+ unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe
+ unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe
+
+ // ----------------------------------
+ // Intermediates/Informational
+ // ----------------------------------
+ unsigned int TotImmediateFlipBytes;
+ bool DCCEnabledInAnySurface;
+ double WritebackRequiredDISPCLK;
+ double TimeCalc;
+ double TWait[DML2_MAX_PLANES];
+
+ bool UnboundedRequestEnabled;
+ unsigned int compbuf_reserved_space_64b;
+ bool hw_debug5;
+ unsigned int CompressedBufferSizeInkByte;
+ double VRatioPreY[DML2_MAX_PLANES];
+ double VRatioPreC[DML2_MAX_PLANES];
+ unsigned int req_per_swath_ub_l[DML2_MAX_PLANES];
+ unsigned int req_per_swath_ub_c[DML2_MAX_PLANES];
+ unsigned int swath_width_luma_ub[DML2_MAX_PLANES];
+ unsigned int swath_width_chroma_ub[DML2_MAX_PLANES];
+ unsigned int RequiredSlots[DML2_MAX_PLANES];
+ unsigned int vm_bytes[DML2_MAX_PLANES];
+ unsigned int DPTEBytesPerRow[DML2_MAX_PLANES];
+ unsigned int PrefetchLinesY[DML2_MAX_PLANES];
+ unsigned int PrefetchLinesC[DML2_MAX_PLANES];
+ unsigned int MaxNumSwathY[DML2_MAX_PLANES]; /// <brief Max number of swath for prefetch
+ unsigned int MaxNumSwathC[DML2_MAX_PLANES]; /// <brief Max number of swath for prefetch
+ unsigned int PrefillY[DML2_MAX_PLANES];
+ unsigned int PrefillC[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_l[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_c[DML2_MAX_PLANES];
+
+ bool use_one_row_for_frame[DML2_MAX_PLANES];
+ bool use_one_row_for_frame_flip[DML2_MAX_PLANES];
+
+ double dst_y_prefetch[DML2_MAX_PLANES];
+ double LinesForVM[DML2_MAX_PLANES];
+ double LinesForDPTERow[DML2_MAX_PLANES];
+ unsigned int SwathWidthYSingleDPP[DML2_MAX_PLANES];
+ unsigned int SwathWidthCSingleDPP[DML2_MAX_PLANES];
+ unsigned int BytePerPixelY[DML2_MAX_PLANES];
+ unsigned int BytePerPixelC[DML2_MAX_PLANES];
+ double BytePerPixelInDETY[DML2_MAX_PLANES];
+ double BytePerPixelInDETC[DML2_MAX_PLANES];
+
+ unsigned int Read256BlockHeightY[DML2_MAX_PLANES];
+ unsigned int Read256BlockWidthY[DML2_MAX_PLANES];
+ unsigned int Read256BlockHeightC[DML2_MAX_PLANES];
+ unsigned int Read256BlockWidthC[DML2_MAX_PLANES];
+ unsigned int MacroTileHeightY[DML2_MAX_PLANES];
+ unsigned int MacroTileHeightC[DML2_MAX_PLANES];
+ unsigned int MacroTileWidthY[DML2_MAX_PLANES];
+ unsigned int MacroTileWidthC[DML2_MAX_PLANES];
+
+ bool surf_linear128_l[DML2_MAX_PLANES];
+ bool surf_linear128_c[DML2_MAX_PLANES];
+
+ double PSCL_FACTOR[DML2_MAX_PLANES];
+ double PSCL_FACTOR_CHROMA[DML2_MAX_PLANES];
+ double MaximumSwathWidthLuma[DML2_MAX_PLANES];
+ double MaximumSwathWidthChroma[DML2_MAX_PLANES];
+ double Tno_bw[DML2_MAX_PLANES];
+ double Tno_bw_flip[DML2_MAX_PLANES];
+ double dst_y_per_vm_flip[DML2_MAX_PLANES];
+ double dst_y_per_row_flip[DML2_MAX_PLANES];
+ double WritebackDelayTime[DML2_MAX_PLANES];
+ unsigned int dpte_group_bytes[DML2_MAX_PLANES];
+ unsigned int dpte_row_height[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_chroma[DML2_MAX_PLANES];
+ double UrgLatency;
+ double TripToMemory;
+ double UrgentBurstFactorCursor[DML2_MAX_PLANES];
+ double UrgentBurstFactorCursorPre[DML2_MAX_PLANES];
+ double UrgentBurstFactorLuma[DML2_MAX_PLANES];
+ double UrgentBurstFactorLumaPre[DML2_MAX_PLANES];
+ double UrgentBurstFactorChroma[DML2_MAX_PLANES];
+ double UrgentBurstFactorChromaPre[DML2_MAX_PLANES];
+ double MaximumSwathWidthInLineBufferLuma;
+ double MaximumSwathWidthInLineBufferChroma;
+ double ExtraLatency;
+ double ExtraLatency_sr;
+ double ExtraLatencyPrefetch;
+
+ double dcc_dram_bw_nom_overhead_factor_p0[DML2_MAX_PLANES]; // overhead to request meta
+ double dcc_dram_bw_nom_overhead_factor_p1[DML2_MAX_PLANES];
+ double dcc_dram_bw_pref_overhead_factor_p0[DML2_MAX_PLANES]; // overhead to request meta
+ double dcc_dram_bw_pref_overhead_factor_p1[DML2_MAX_PLANES];
+ double mall_prefetch_sdp_overhead_factor[DML2_MAX_PLANES]; // overhead to the imall or phantom pipe
+ double mall_prefetch_dram_overhead_factor[DML2_MAX_PLANES];
+
+ bool is_using_mall_for_ss[DML2_MAX_PLANES];
+ unsigned int meta_row_width_chroma[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqHeightC[DML2_MAX_PLANES];
+ bool PTE_BUFFER_MODE[DML2_MAX_PLANES];
+ unsigned int meta_req_height_chroma[DML2_MAX_PLANES];
+ unsigned int meta_pte_bytes_per_frame_ub_c[DML2_MAX_PLANES];
+ unsigned int dpde0_bytes_per_frame_ub_c[DML2_MAX_PLANES];
+ unsigned int dpte_row_width_luma_ub[DML2_MAX_PLANES];
+ unsigned int meta_req_width[DML2_MAX_PLANES];
+ unsigned int meta_row_width[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqWidthY[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_linear[DML2_MAX_PLANES];
+ unsigned int PTERequestSizeY[DML2_MAX_PLANES];
+ unsigned int dpte_row_width_chroma_ub[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqWidthC[DML2_MAX_PLANES];
+ unsigned int meta_pte_bytes_per_frame_ub_l[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_linear_chroma[DML2_MAX_PLANES];
+ unsigned int PTERequestSizeC[DML2_MAX_PLANES];
+ unsigned int meta_req_height[DML2_MAX_PLANES];
+ unsigned int dpde0_bytes_per_frame_ub_l[DML2_MAX_PLANES];
+ unsigned int meta_req_width_chroma[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqHeightY[DML2_MAX_PLANES];
+ unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES];
+ unsigned int vm_group_bytes[DML2_MAX_PLANES];
+ unsigned int VReadyOffsetPix[DML2_MAX_PLANES];
+ unsigned int VUpdateOffsetPix[DML2_MAX_PLANES];
+ unsigned int VUpdateWidthPix[DML2_MAX_PLANES];
+ double TSetup[DML2_MAX_PLANES];
+ double Tdmdl_vm_raw[DML2_MAX_PLANES];
+ double Tdmdl_raw[DML2_MAX_PLANES];
+ unsigned int VStartupMin[DML2_MAX_PLANES]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos.
+ double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES];
+ double MaxActiveFCLKChangeLatencySupported;
+
+ // Backend
+ bool RequiresDSC[DML2_MAX_PLANES];
+ bool RequiresFEC[DML2_MAX_PLANES];
+ double OutputBpp[DML2_MAX_PLANES];
+ double DesiredOutputBpp[DML2_MAX_PLANES];
+ double PixelClockBackEnd[DML2_MAX_PLANES];
+ unsigned int DSCDelay[DML2_MAX_PLANES];
+ enum dml2_core_internal_output_type OutputType[DML2_MAX_PLANES];
+ enum dml2_core_internal_output_type_rate OutputRate[DML2_MAX_PLANES];
+ bool TotalAvailablePipesSupportNoDSC;
+ bool TotalAvailablePipesSupportDSC;
+ unsigned int NumberOfDPPNoDSC;
+ unsigned int NumberOfDPPDSC;
+ enum dml2_odm_mode ODMModeNoDSC;
+ enum dml2_odm_mode ODMModeDSC;
+ double RequiredDISPCLKPerSurfaceNoDSC;
+ double RequiredDISPCLKPerSurfaceDSC;
+ unsigned int EstimatedNumberOfDSCSlices[DML2_MAX_PLANES];
+
+ // Bandwidth Related Info
+ double BandwidthAvailableForImmediateFlip;
+ double vactive_sw_bw_l[DML2_MAX_PLANES]; // no dcc overhead, for the plane
+ double vactive_sw_bw_c[DML2_MAX_PLANES];
+ double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK];
+ double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES];
+ double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES];
+ /* Max bandwidth calculated from prefetch schedule should be considered in addition to the pixel data bw to avoid ms/mp mismatches.
+ * 1. oto bw should also be considered when calculating peak urgent bw to avoid situations oto/equ mismatches between ms and mp
+ *
+ * 2. equ bandwidth needs to be considered for calculating peak urgent bw when equ schedule is used in mode support.
+ * Some slight difference in variables may cause the pixel data bandwidth to be higher
+ * even though overall equ prefetch bandwidths can be lower going from ms to mp
+ */
+ double RequiredPrefetchBWMax[DML2_MAX_PLANES];
+ double cursor_bw[DML2_MAX_PLANES];
+ double prefetch_cursor_bw[DML2_MAX_PLANES];
+ double prefetch_vmrow_bw[DML2_MAX_PLANES];
+ double final_flip_bw[DML2_MAX_PLANES];
+ double meta_row_bw[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes[DML2_MAX_PLANES];
+ double dpte_row_bw[DML2_MAX_PLANES];
+ double excess_vactive_fill_bw_l[DML2_MAX_PLANES];
+ double excess_vactive_fill_bw_c[DML2_MAX_PLANES];
+ double surface_avg_vactive_required_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES];
+ double surface_peak_required_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES];
+
+ // Something that should be feedback to caller
+ enum dml2_odm_mode ODMMode[DML2_MAX_PLANES];
+ unsigned int SurfaceSizeInMALL[DML2_MAX_PLANES];
+ unsigned int NoOfDPP[DML2_MAX_PLANES];
+ bool MPCCombine[DML2_MAX_PLANES];
+ double dcfclk_deepsleep;
+ double MinDPPCLKUsingSingleDPP[DML2_MAX_PLANES];
+ bool SingleDPPViewportSizeSupportPerSurface[DML2_MAX_PLANES];
+ bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES];
+ bool NotEnoughUrgentLatencyHiding[DML2_MAX_PLANES];
+ bool NotEnoughUrgentLatencyHidingPre[DML2_MAX_PLANES];
+ bool PTEBufferSizeNotExceeded[DML2_MAX_PLANES];
+ bool DCCMetaBufferSizeNotExceeded[DML2_MAX_PLANES];
+ unsigned int TotalNumberOfActiveDPP;
+ unsigned int TotalNumberOfSingleDPPSurfaces;
+ unsigned int TotalNumberOfDCCActiveDPP;
+ unsigned int Total3dlutActive;
+
+ unsigned int SubViewportLinesNeededInMALL[DML2_MAX_PLANES];
+ double VActiveLatencyHidingMargin[DML2_MAX_PLANES];
+ double VActiveLatencyHidingUs[DML2_MAX_PLANES];
+ unsigned int MaxVStartupLines[DML2_MAX_PLANES];
+ double dram_change_vactive_det_fill_delay_us[DML2_MAX_PLANES];
+
+ unsigned int num_mcaches_l[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_l[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_per_channel_l[DML2_MAX_PLANES];
+ unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
+ unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES];
+
+ unsigned int num_mcaches_c[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_c[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_per_channel_c[DML2_MAX_PLANES];
+ unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
+ unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES];
+
+ bool mall_comb_mcache_l[DML2_MAX_PLANES];
+ bool mall_comb_mcache_c[DML2_MAX_PLANES];
+ bool lc_comb_mcache[DML2_MAX_PLANES];
+
+ unsigned int vmpg_width_y[DML2_MAX_PLANES];
+ unsigned int vmpg_height_y[DML2_MAX_PLANES];
+ unsigned int vmpg_width_c[DML2_MAX_PLANES];
+ unsigned int vmpg_height_c[DML2_MAX_PLANES];
+
+ unsigned int meta_row_height_luma[DML2_MAX_PLANES];
+ unsigned int meta_row_height_chroma[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES];
+
+ unsigned int pstate_bytes_required_l[DML2_MAX_PLANES];
+ unsigned int pstate_bytes_required_c[DML2_MAX_PLANES];
+ unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES];
+ unsigned int cursor_bytes_per_line[DML2_MAX_PLANES];
+
+ unsigned int MaximumVStartup[DML2_MAX_PLANES];
+
+ double HostVMInefficiencyFactor;
+ double HostVMInefficiencyFactorPrefetch;
+
+ unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES];
+ double tdlut_opt_time[DML2_MAX_PLANES];
+ double tdlut_drain_time[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES];
+
+ double Tvm_trips_flip[DML2_MAX_PLANES];
+ double Tr0_trips_flip[DML2_MAX_PLANES];
+ double Tvm_trips_flip_rounded[DML2_MAX_PLANES];
+ double Tr0_trips_flip_rounded[DML2_MAX_PLANES];
+
+ unsigned int DSTYAfterScaler[DML2_MAX_PLANES];
+ unsigned int DSTXAfterScaler[DML2_MAX_PLANES];
+
+ enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES];
+};
+
+/// @brief A mega structure that houses various info for model programming step.
+struct dml2_core_internal_mode_program {
+ unsigned int qos_param_index; // to access the uclk dependent dpm table
+ unsigned int active_min_uclk_dpm_index; // to access the min_clk table
+ double FabricClock; /// <brief Basically just the clock freq at the min (or given) state
+ //double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting
+ double dram_bw_mbps;
+ double min_available_urgent_bandwidth_MBps; /// <brief Minimum guaranteed available urgent return bandwidth in MBps
+ double uclk_freq_mhz;
+ unsigned int NoOfDPP[DML2_MAX_PLANES];
+ enum dml2_odm_mode ODMMode[DML2_MAX_PLANES];
+
+ //-------------
+ // Intermediate/Informational
+ //-------------
+ double UrgentLatency;
+ double TripToMemory;
+ double MetaTripToMemory;
+ unsigned int VInitPreFillY[DML2_MAX_PLANES];
+ unsigned int VInitPreFillC[DML2_MAX_PLANES];
+ unsigned int MaxNumSwathY[DML2_MAX_PLANES];
+ unsigned int MaxNumSwathC[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_l[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_c[DML2_MAX_PLANES];
+
+ double BytePerPixelInDETY[DML2_MAX_PLANES];
+ double BytePerPixelInDETC[DML2_MAX_PLANES];
+ unsigned int BytePerPixelY[DML2_MAX_PLANES];
+ unsigned int BytePerPixelC[DML2_MAX_PLANES];
+ unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe
+ unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe
+ unsigned int req_per_swath_ub_l[DML2_MAX_PLANES];
+ unsigned int req_per_swath_ub_c[DML2_MAX_PLANES];
+ unsigned int SwathWidthSingleDPPY[DML2_MAX_PLANES];
+ unsigned int SwathWidthSingleDPPC[DML2_MAX_PLANES];
+ double vactive_sw_bw_l[DML2_MAX_PLANES];
+ double vactive_sw_bw_c[DML2_MAX_PLANES];
+ double excess_vactive_fill_bw_l[DML2_MAX_PLANES];
+ double excess_vactive_fill_bw_c[DML2_MAX_PLANES];
+
+ unsigned int PixelPTEBytesPerRow[DML2_MAX_PLANES];
+ unsigned int vm_bytes[DML2_MAX_PLANES];
+ unsigned int PrefetchSourceLinesY[DML2_MAX_PLANES];
+ double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES];
+ double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES];
+ unsigned int PrefetchSourceLinesC[DML2_MAX_PLANES];
+ double PSCL_THROUGHPUT[DML2_MAX_PLANES];
+ double PSCL_THROUGHPUT_CHROMA[DML2_MAX_PLANES];
+ unsigned int DSCDelay[DML2_MAX_PLANES];
+ double DPPCLKUsingSingleDPP[DML2_MAX_PLANES];
+
+ unsigned int Read256BlockHeightY[DML2_MAX_PLANES];
+ unsigned int Read256BlockWidthY[DML2_MAX_PLANES];
+ unsigned int Read256BlockHeightC[DML2_MAX_PLANES];
+ unsigned int Read256BlockWidthC[DML2_MAX_PLANES];
+ unsigned int MacroTileHeightY[DML2_MAX_PLANES];
+ unsigned int MacroTileHeightC[DML2_MAX_PLANES];
+ unsigned int MacroTileWidthY[DML2_MAX_PLANES];
+ unsigned int MacroTileWidthC[DML2_MAX_PLANES];
+ double MaximumSwathWidthLuma[DML2_MAX_PLANES];
+ double MaximumSwathWidthChroma[DML2_MAX_PLANES];
+
+ bool surf_linear128_l[DML2_MAX_PLANES];
+ bool surf_linear128_c[DML2_MAX_PLANES];
+
+ unsigned int SurfaceSizeInTheMALL[DML2_MAX_PLANES];
+ double VRatioPrefetchY[DML2_MAX_PLANES];
+ double VRatioPrefetchC[DML2_MAX_PLANES];
+ double Tno_bw[DML2_MAX_PLANES];
+ double Tno_bw_flip[DML2_MAX_PLANES];
+ double final_flip_bw[DML2_MAX_PLANES];
+ double prefetch_vmrow_bw[DML2_MAX_PLANES];
+ double cursor_bw[DML2_MAX_PLANES];
+ double prefetch_cursor_bw[DML2_MAX_PLANES];
+ double WritebackDelay[DML2_MAX_PLANES];
+ unsigned int dpte_row_height[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_linear[DML2_MAX_PLANES];
+ unsigned int dpte_row_width_luma_ub[DML2_MAX_PLANES];
+ unsigned int dpte_row_width_chroma_ub[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_chroma[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_linear_chroma[DML2_MAX_PLANES];
+ unsigned int vm_group_bytes[DML2_MAX_PLANES];
+ unsigned int dpte_group_bytes[DML2_MAX_PLANES];
+
+ double dpte_row_bw[DML2_MAX_PLANES];
+ double time_per_tdlut_group[DML2_MAX_PLANES];
+ double UrgentBurstFactorCursor[DML2_MAX_PLANES];
+ double UrgentBurstFactorCursorPre[DML2_MAX_PLANES];
+ double UrgentBurstFactorLuma[DML2_MAX_PLANES];
+ double UrgentBurstFactorLumaPre[DML2_MAX_PLANES];
+ double UrgentBurstFactorChroma[DML2_MAX_PLANES];
+ double UrgentBurstFactorChromaPre[DML2_MAX_PLANES];
+
+ double MaximumSwathWidthInLineBufferLuma;
+ double MaximumSwathWidthInLineBufferChroma;
+
+ unsigned int vmpg_width_y[DML2_MAX_PLANES];
+ unsigned int vmpg_height_y[DML2_MAX_PLANES];
+ unsigned int vmpg_width_c[DML2_MAX_PLANES];
+ unsigned int vmpg_height_c[DML2_MAX_PLANES];
+
+ double meta_row_bw[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes[DML2_MAX_PLANES];
+ unsigned int meta_req_width[DML2_MAX_PLANES];
+ unsigned int meta_req_height[DML2_MAX_PLANES];
+ unsigned int meta_row_width[DML2_MAX_PLANES];
+ unsigned int meta_row_height[DML2_MAX_PLANES];
+ unsigned int meta_req_width_chroma[DML2_MAX_PLANES];
+ unsigned int meta_row_height_chroma[DML2_MAX_PLANES];
+ unsigned int meta_row_width_chroma[DML2_MAX_PLANES];
+ unsigned int meta_req_height_chroma[DML2_MAX_PLANES];
+
+ unsigned int swath_width_luma_ub[DML2_MAX_PLANES];
+ unsigned int swath_width_chroma_ub[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqWidthY[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqHeightY[DML2_MAX_PLANES];
+ unsigned int PTERequestSizeY[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqWidthC[DML2_MAX_PLANES];
+ unsigned int PixelPTEReqHeightC[DML2_MAX_PLANES];
+ unsigned int PTERequestSizeC[DML2_MAX_PLANES];
+
+ double TWait[DML2_MAX_PLANES];
+ double Tdmdl_vm_raw[DML2_MAX_PLANES];
+ double Tdmdl_vm[DML2_MAX_PLANES];
+ double Tdmdl_raw[DML2_MAX_PLANES];
+ double Tdmdl[DML2_MAX_PLANES];
+ double TSetup[DML2_MAX_PLANES];
+ unsigned int dpde0_bytes_per_frame_ub_l[DML2_MAX_PLANES];
+ unsigned int dpde0_bytes_per_frame_ub_c[DML2_MAX_PLANES];
+
+ unsigned int meta_pte_bytes_per_frame_ub_l[DML2_MAX_PLANES];
+ unsigned int meta_pte_bytes_per_frame_ub_c[DML2_MAX_PLANES];
+
+ bool UnboundedRequestEnabled;
+ unsigned int CompressedBufferSizeInkByte;
+ unsigned int compbuf_reserved_space_64b;
+ bool hw_debug5;
+ unsigned int dcfclk_deep_sleep_hysteresis;
+ unsigned int min_return_latency_in_dcfclk;
+
+ bool NotEnoughUrgentLatencyHiding[DML2_MAX_PLANES];
+ bool NotEnoughUrgentLatencyHidingPre[DML2_MAX_PLANES];
+ double ExtraLatency;
+ double ExtraLatency_sr;
+ double ExtraLatencyPrefetch;
+ bool PrefetchAndImmediateFlipSupported;
+ double TotalDataReadBandwidth;
+ double BandwidthAvailableForImmediateFlip;
+ bool NotEnoughTimeForDynamicMetadata[DML2_MAX_PLANES];
+
+ bool use_one_row_for_frame[DML2_MAX_PLANES];
+ bool use_one_row_for_frame_flip[DML2_MAX_PLANES];
+
+ double TCalc;
+ unsigned int TotImmediateFlipBytes;
+
+ unsigned int MaxTotalDETInKByte;
+ unsigned int NomDETInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+ double PixelClockBackEnd[DML2_MAX_PLANES];
+ double OutputBpp[DML2_MAX_PLANES];
+ bool dsc_enable[DML2_MAX_PLANES];
+ unsigned int num_dsc_slices[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES];
+ unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES];
+ unsigned int cursor_bytes_per_line[DML2_MAX_PLANES];
+ unsigned int MaxVStartupLines[DML2_MAX_PLANES]; /// <brief more like vblank for the plane's OTG
+ double HostVMInefficiencyFactor;
+ double HostVMInefficiencyFactorPrefetch;
+ unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES];
+ double tdlut_opt_time[DML2_MAX_PLANES];
+ double tdlut_drain_time[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES];
+ double Tvm_trips_flip[DML2_MAX_PLANES];
+ double Tr0_trips_flip[DML2_MAX_PLANES];
+ double Tvm_trips_flip_rounded[DML2_MAX_PLANES];
+ double Tr0_trips_flip_rounded[DML2_MAX_PLANES];
+ bool immediate_flip_required; // any pipes need immediate flip
+ double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state
+ double TotalWRBandwidth;
+ double max_urgent_latency_us;
+ double df_response_time_us;
+
+ // -------------------
+ // Output
+ // -------------------
+ unsigned int pipe_plane[DML2_MAX_PLANES]; // <brief used mainly by dv to map the pipe inst to plane index within DML core; the plane idx of a pipe
+ unsigned int num_active_pipes;
+
+ bool NoTimeToPrefetch[DML2_MAX_PLANES]; // <brief Prefetch schedule calculation result
+
+ // Support
+ bool UrgVactiveBandwidthSupport;
+ bool PrefetchScheduleSupported;
+ bool UrgentBandwidthSupport;
+ bool PrefetchModeSupported; // <brief Is the prefetch mode (bandwidth and latency) supported
+ bool ImmediateFlipSupported;
+ bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES];
+ bool dcfclk_support;
+
+ // Clock
+ double Dcfclk;
+ double Dispclk; // <brief dispclk being used in mode programming
+ double Dppclk[DML2_MAX_PLANES]; // <brief dppclk being used in mode programming
+ double GlobalDPPCLK;
+
+ double DSCCLK[DML2_MAX_PLANES]; //< brief Required DSCCLK freq. Backend; not used in any subsequent calculations for now
+ double DCFCLKDeepSleep;
+
+ // ARB reg
+ bool DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+ struct dml2_core_internal_watermarks Watermark;
+
+ // DCC compression control
+ unsigned int request_size_bytes_luma[DML2_MAX_PLANES];
+ unsigned int request_size_bytes_chroma[DML2_MAX_PLANES];
+ enum dml2_core_internal_request_type RequestLuma[DML2_MAX_PLANES];
+ enum dml2_core_internal_request_type RequestChroma[DML2_MAX_PLANES];
+ unsigned int DCCYMaxUncompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCYMaxCompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCYIndependentBlock[DML2_MAX_PLANES];
+ unsigned int DCCCMaxUncompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCCMaxCompressedBlock[DML2_MAX_PLANES];
+ unsigned int DCCCIndependentBlock[DML2_MAX_PLANES];
+
+ // Stutter Efficiency
+ double StutterEfficiency;
+ double StutterEfficiencyNotIncludingVBlank;
+ unsigned int NumberOfStutterBurstsPerFrame;
+ double Z8StutterEfficiency;
+ unsigned int Z8NumberOfStutterBurstsPerFrame;
+ double Z8StutterEfficiencyNotIncludingVBlank;
+ double LowPowerStutterEfficiency;
+ double LowPowerStutterEfficiencyNotIncludingVBlank;
+ unsigned int LowPowerNumberOfStutterBurstsPerFrame;
+ double StutterPeriod;
+ double Z8StutterEfficiencyBestCase;
+ unsigned int Z8NumberOfStutterBurstsPerFrameBestCase;
+ double Z8StutterEfficiencyNotIncludingVBlankBestCase;
+ double StutterPeriodBestCase;
+
+ // DLG TTU reg
+ double MIN_DST_Y_NEXT_START[DML2_MAX_PLANES];
+ bool VREADY_AT_OR_AFTER_VSYNC[DML2_MAX_PLANES];
+ unsigned int DSTYAfterScaler[DML2_MAX_PLANES];
+ unsigned int DSTXAfterScaler[DML2_MAX_PLANES];
+ double dst_y_prefetch[DML2_MAX_PLANES];
+ double dst_y_per_vm_vblank[DML2_MAX_PLANES];
+ double dst_y_per_row_vblank[DML2_MAX_PLANES];
+ double dst_y_per_vm_flip[DML2_MAX_PLANES];
+ double dst_y_per_row_flip[DML2_MAX_PLANES];
+ double MinTTUVBlank[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeLuma[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeChroma[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES];
+ double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeLuma[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeChroma[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeLumaPrefetch[DML2_MAX_PLANES];
+ double DisplayPipeRequestDeliveryTimeChromaPrefetch[DML2_MAX_PLANES];
+ unsigned int CursorDstXOffset[DML2_MAX_PLANES];
+ unsigned int CursorDstYOffset[DML2_MAX_PLANES];
+ unsigned int CursorChunkHDLAdjust[DML2_MAX_PLANES];
+
+ double DST_Y_PER_PTE_ROW_NOM_L[DML2_MAX_PLANES];
+ double DST_Y_PER_PTE_ROW_NOM_C[DML2_MAX_PLANES];
+ double time_per_pte_group_nom_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_nom_chroma[DML2_MAX_PLANES];
+ double time_per_pte_group_vblank_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_vblank_chroma[DML2_MAX_PLANES];
+ double time_per_pte_group_flip_luma[DML2_MAX_PLANES];
+ double time_per_pte_group_flip_chroma[DML2_MAX_PLANES];
+ double TimePerVMGroupVBlank[DML2_MAX_PLANES];
+ double TimePerVMGroupFlip[DML2_MAX_PLANES];
+ double TimePerVMRequestVBlank[DML2_MAX_PLANES];
+ double TimePerVMRequestFlip[DML2_MAX_PLANES];
+
+ double DST_Y_PER_META_ROW_NOM_L[DML2_MAX_PLANES];
+ double DST_Y_PER_META_ROW_NOM_C[DML2_MAX_PLANES];
+ double TimePerMetaChunkNominal[DML2_MAX_PLANES];
+ double TimePerChromaMetaChunkNominal[DML2_MAX_PLANES];
+ double TimePerMetaChunkVBlank[DML2_MAX_PLANES];
+ double TimePerChromaMetaChunkVBlank[DML2_MAX_PLANES];
+ double TimePerMetaChunkFlip[DML2_MAX_PLANES];
+ double TimePerChromaMetaChunkFlip[DML2_MAX_PLANES];
+
+ double FractionOfUrgentBandwidth;
+ double FractionOfUrgentBandwidthImmediateFlip;
+ double FractionOfUrgentBandwidthMALL;
+
+ // RQ registers
+ bool PTE_BUFFER_MODE[DML2_MAX_PLANES];
+ unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES];
+ double VActiveLatencyHidingUs[DML2_MAX_PLANES];
+ unsigned int SubViewportLinesNeededInMALL[DML2_MAX_PLANES];
+ bool is_using_mall_for_ss[DML2_MAX_PLANES];
+
+ // OTG
+ unsigned int VStartupMin[DML2_MAX_PLANES]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos.
+ unsigned int VStartup[DML2_MAX_PLANES]; /// <brief The vstartup value for OTG programming (will set to max vstartup; but now bounded by min(vblank_nom. actual vblank))
+ unsigned int VUpdateOffsetPix[DML2_MAX_PLANES];
+ unsigned int VUpdateWidthPix[DML2_MAX_PLANES];
+ unsigned int VReadyOffsetPix[DML2_MAX_PLANES];
+ unsigned int pstate_keepout_dst_lines[DML2_MAX_PLANES];
+
+ // Latency and Support
+ double MaxActiveFCLKChangeLatencySupported;
+ bool USRRetrainingSupport;
+ bool g6_temp_read_support;
+ bool temp_read_or_ppt_support;
+ enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES];
+ enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES];
+ bool global_dram_clock_change_supported;
+ bool global_fclk_change_supported;
+ double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES];
+ double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES];
+ double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES];
+
+ // buffer sizing
+ unsigned int DETBufferSizeInKByte[DML2_MAX_PLANES]; // <brief Recommended DET size configuration for this plane. All pipes under this plane should program the DET buffer size to the calculated value.
+ unsigned int DETBufferSizeY[DML2_MAX_PLANES];
+ unsigned int DETBufferSizeC[DML2_MAX_PLANES];
+ unsigned int SwathHeightY[DML2_MAX_PLANES];
+ unsigned int SwathHeightC[DML2_MAX_PLANES];
+
+ double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // active bandwidth, scaled by urg burst factor
+ double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor
+ double urg_bandwidth_required_qual[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth, scaled by urg burst factor, use qual_row_bw
+ double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // include vm, prefetch, active bandwidth + flip
+ double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // same as urg_bandwidth, except not scaled by urg burst factor
+ double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+
+ double avg_bandwidth_available_min[dml2_core_internal_soc_state_max];
+ double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double urg_bandwidth_available_min[dml2_core_internal_soc_state_max]; // min between SDP and DRAM
+ double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_vm_only bw, sdp has no different derate for vm/non-vm traffic etc.
+ double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max]; // the min of sdp bw and dram_pixel_and_vm bw, sdp has no different derate for vm/non-vm etc.
+
+ double dcc_dram_bw_nom_overhead_factor_p0[DML2_MAX_PLANES];
+ double dcc_dram_bw_nom_overhead_factor_p1[DML2_MAX_PLANES];
+ double dcc_dram_bw_pref_overhead_factor_p0[DML2_MAX_PLANES];
+ double dcc_dram_bw_pref_overhead_factor_p1[DML2_MAX_PLANES];
+ double mall_prefetch_sdp_overhead_factor[DML2_MAX_PLANES];
+ double mall_prefetch_dram_overhead_factor[DML2_MAX_PLANES];
+
+ unsigned int num_mcaches_l[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_l[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_per_channel_l[DML2_MAX_PLANES];
+ unsigned int mcache_offsets_l[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
+ unsigned int mcache_shift_granularity_l[DML2_MAX_PLANES];
+
+ unsigned int num_mcaches_c[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_c[DML2_MAX_PLANES];
+ unsigned int mcache_row_bytes_per_channel_c[DML2_MAX_PLANES];
+ unsigned int mcache_offsets_c[DML2_MAX_PLANES][DML2_MAX_MCACHES + 1];
+ unsigned int mcache_shift_granularity_c[DML2_MAX_PLANES];
+
+ bool mall_comb_mcache_l[DML2_MAX_PLANES];
+ bool mall_comb_mcache_c[DML2_MAX_PLANES];
+ bool lc_comb_mcache[DML2_MAX_PLANES];
+
+ double impacted_prefetch_margin_us[DML2_MAX_PLANES];
+};
+
+struct dml2_core_internal_SOCParametersList {
+ double UrgentLatency;
+ double ExtraLatency_sr;
+ double ExtraLatency;
+ double WritebackLatency;
+ double DRAMClockChangeLatency;
+ double FCLKChangeLatency;
+ double SRExitTime;
+ double SREnterPlusExitTime;
+ double SRExitTimeLowPower;
+ double SREnterPlusExitTimeLowPower;
+ double SRExitZ8Time;
+ double SREnterPlusExitZ8Time;
+ double USRRetrainingLatency;
+ double SMNLatency;
+ double g6_temp_read_blackout_us;
+ double temp_read_or_ppt_blackout_us;
+ double max_urgent_latency_us;
+ double df_response_time_us;
+ enum dml2_qos_param_type qos_type;
+};
+
+struct dml2_core_calcs_mode_support_locals {
+ double PixelClockBackEnd[DML2_MAX_PLANES];
+ double OutputBpp[DML2_MAX_PLANES];
+
+ unsigned int meta_row_height_luma[DML2_MAX_PLANES];
+ unsigned int meta_row_height_chroma[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES];
+
+ bool dummy_boolean[3];
+ unsigned int dummy_integer[3];
+ unsigned int dummy_integer_array[36][DML2_MAX_PLANES];
+ enum dml2_odm_mode dummy_odm_mode[DML2_MAX_PLANES];
+ bool dummy_boolean_array[2][DML2_MAX_PLANES];
+ double dummy_single[3];
+ double dummy_single_array[DML2_MAX_PLANES];
+ struct dml2_core_internal_watermarks dummy_watermark;
+ double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double surface_dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES];
+
+ unsigned int MaximumVStartup[DML2_MAX_PLANES];
+ unsigned int DSTYAfterScaler[DML2_MAX_PLANES];
+ unsigned int DSTXAfterScaler[DML2_MAX_PLANES];
+ struct dml2_core_internal_SOCParametersList mSOCParameters;
+ struct dml2_core_internal_DmlPipe myPipe;
+ struct dml2_core_internal_DmlPipe SurfParameters[DML2_MAX_PLANES];
+ unsigned int TotalNumberOfActiveWriteback;
+ unsigned int MaximumSwathWidthSupportLuma;
+ unsigned int MaximumSwathWidthSupportChroma;
+ bool MPCCombineMethodAsNeededForPStateChangeAndVoltage;
+ bool MPCCombineMethodAsPossible;
+ bool TotalAvailablePipesSupportNoDSC;
+ unsigned int NumberOfDPPNoDSC;
+ enum dml2_odm_mode ODMModeNoDSC;
+ double RequiredDISPCLKPerSurfaceNoDSC;
+ bool TotalAvailablePipesSupportDSC;
+ unsigned int NumberOfDPPDSC;
+ enum dml2_odm_mode ODMModeDSC;
+ double RequiredDISPCLKPerSurfaceDSC;
+ double BWOfNonCombinedSurfaceOfMaximumBandwidth;
+ unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth;
+ unsigned int TotalNumberOfActiveOTG;
+ unsigned int TotalNumberOfActiveHDMIFRL;
+ unsigned int TotalNumberOfActiveDP2p0;
+ unsigned int TotalNumberOfActiveDP2p0Outputs;
+ unsigned int TotalSlots;
+ unsigned int DSCFormatFactor;
+ unsigned int TotalDSCUnitsRequired;
+ unsigned int ReorderingBytes;
+ bool ImmediateFlipRequired;
+ bool FullFrameMALLPStateMethod;
+ bool SubViewportMALLPStateMethod;
+ bool PhantomPipeMALLPStateMethod;
+ bool SubViewportMALLRefreshGreaterThan120Hz;
+
+ double HostVMInefficiencyFactor;
+ double HostVMInefficiencyFactorPrefetch;
+ unsigned int MaxVStartup;
+ double PixelClockBackEndFactor;
+ unsigned int NumDSCUnitRequired;
+
+ double Tvm_trips[DML2_MAX_PLANES];
+ double Tr0_trips[DML2_MAX_PLANES];
+ double Tvm_trips_flip[DML2_MAX_PLANES];
+ double Tr0_trips_flip[DML2_MAX_PLANES];
+ double Tvm_trips_flip_rounded[DML2_MAX_PLANES];
+ double Tr0_trips_flip_rounded[DML2_MAX_PLANES];
+ unsigned int per_pipe_flip_bytes[DML2_MAX_PLANES];
+
+ unsigned int vmpg_width_y[DML2_MAX_PLANES];
+ unsigned int vmpg_height_y[DML2_MAX_PLANES];
+ unsigned int vmpg_width_c[DML2_MAX_PLANES];
+ unsigned int vmpg_height_c[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_l[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_c[DML2_MAX_PLANES];
+
+ unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_row_bytes[DML2_MAX_PLANES];
+ unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES];
+ double tdlut_opt_time[DML2_MAX_PLANES];
+ double tdlut_drain_time[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES];
+
+ unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES];
+ unsigned int cursor_bytes_per_line[DML2_MAX_PLANES];
+ unsigned int cursor_lines_per_chunk[DML2_MAX_PLANES];
+ unsigned int cursor_bytes[DML2_MAX_PLANES];
+ bool stream_visited[DML2_MAX_PLANES];
+
+ unsigned int pstate_bytes_required_l[DML2_MAX_PLANES];
+ unsigned int pstate_bytes_required_c[DML2_MAX_PLANES];
+
+ double prefetch_sw_bytes[DML2_MAX_PLANES];
+ double Tpre_rounded[DML2_MAX_PLANES];
+ double Tpre_oto[DML2_MAX_PLANES];
+ bool recalc_prefetch_schedule;
+ bool recalc_prefetch_done;
+ double impacted_dst_y_pre[DML2_MAX_PLANES];
+ double line_times[DML2_MAX_PLANES];
+ enum dml2_source_format_class pixel_format[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_l[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_c[DML2_MAX_PLANES];
+ double prefetch_swath_time_us[DML2_MAX_PLANES];
+};
+
+struct dml2_core_calcs_mode_programming_locals {
+ double PixelClockBackEnd[DML2_MAX_PLANES];
+ double OutputBpp[DML2_MAX_PLANES];
+ unsigned int num_active_planes; // <brief As determined by either e2e_pipe_param or display_cfg
+ unsigned int MaxTotalDETInKByte;
+ unsigned int NomDETInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+ double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state
+
+ double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
+ double surface_dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES];
+ double surface_dummy_bw0[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES];
+ unsigned int dummy_integer_array[4][DML2_MAX_PLANES];
+ enum dml2_output_encoder_class dummy_output_encoder_array[DML2_MAX_PLANES];
+ double dummy_single_array[2][DML2_MAX_PLANES];
+ unsigned int dummy_long_array[8][DML2_MAX_PLANES];
+ bool dummy_boolean_array[2][DML2_MAX_PLANES];
+ bool dummy_boolean[2];
+ double dummy_single[2];
+ struct dml2_core_internal_watermarks dummy_watermark;
+
+ unsigned int DSCFormatFactor;
+ struct dml2_core_internal_DmlPipe SurfaceParameters[DML2_MAX_PLANES];
+ unsigned int ReorderingBytes;
+ double HostVMInefficiencyFactor;
+ double HostVMInefficiencyFactorPrefetch;
+ unsigned int TotalDCCActiveDPP;
+ unsigned int TotalActiveDPP;
+ unsigned int Total3dlutActive;
+ unsigned int MaxVStartupLines[DML2_MAX_PLANES]; /// <brief more like vblank for the plane's OTG
+ bool immediate_flip_required; // any pipes need immediate flip
+ bool DestinationLineTimesForPrefetchLessThan2;
+ bool VRatioPrefetchMoreThanMax;
+ double MaxTotalRDBandwidthNotIncludingMALLPrefetch;
+ struct dml2_core_internal_SOCParametersList mmSOCParameters;
+ double Tvstartup_margin;
+ double dlg_vblank_start;
+ double LSetup;
+ double blank_lines_remaining;
+ double WRBandwidth;
+ struct dml2_core_internal_DmlPipe myPipe;
+ double PixelClockBackEndFactor;
+ unsigned int vmpg_width_y[DML2_MAX_PLANES];
+ unsigned int vmpg_height_y[DML2_MAX_PLANES];
+ unsigned int vmpg_width_c[DML2_MAX_PLANES];
+ unsigned int vmpg_height_c[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_l[DML2_MAX_PLANES];
+ unsigned int full_swath_bytes_c[DML2_MAX_PLANES];
+
+ unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES];
+ unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES];
+
+ unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES];
+ unsigned int tdlut_row_bytes[DML2_MAX_PLANES];
+ unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES];
+ double tdlut_opt_time[DML2_MAX_PLANES];
+ double tdlut_drain_time[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES];
+
+ unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES];
+ unsigned int cursor_bytes_per_line[DML2_MAX_PLANES];
+ unsigned int cursor_lines_per_chunk[DML2_MAX_PLANES];
+ unsigned int cursor_bytes[DML2_MAX_PLANES];
+
+ double Tvm_trips[DML2_MAX_PLANES];
+ double Tr0_trips[DML2_MAX_PLANES];
+ double Tvm_trips_flip[DML2_MAX_PLANES];
+ double Tr0_trips_flip[DML2_MAX_PLANES];
+ double Tvm_trips_flip_rounded[DML2_MAX_PLANES];
+ double Tr0_trips_flip_rounded[DML2_MAX_PLANES];
+ unsigned int per_pipe_flip_bytes[DML2_MAX_PLANES];
+
+ unsigned int pstate_bytes_required_l[DML2_MAX_PLANES];
+ unsigned int pstate_bytes_required_c[DML2_MAX_PLANES];
+
+ double prefetch_sw_bytes[DML2_MAX_PLANES];
+ double Tpre_rounded[DML2_MAX_PLANES];
+ double Tpre_oto[DML2_MAX_PLANES];
+ bool recalc_prefetch_schedule;
+ double impacted_dst_y_pre[DML2_MAX_PLANES];
+ double line_times[DML2_MAX_PLANES];
+ enum dml2_source_format_class pixel_format[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_l[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_c[DML2_MAX_PLANES];
+ unsigned int num_dsc_slices[DML2_MAX_PLANES];
+ bool dsc_enable[DML2_MAX_PLANES];
+};
+
+struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals {
+ double ActiveDRAMClockChangeLatencyMargin[DML2_MAX_PLANES];
+ double ActiveFCLKChangeLatencyMargin[DML2_MAX_PLANES];
+ double USRRetrainingLatencyMargin[DML2_MAX_PLANES];
+ double g6_temp_read_latency_margin[DML2_MAX_PLANES];
+ double temp_read_or_ppt_latency_margin[DML2_MAX_PLANES];
+
+ double EffectiveLBLatencyHidingY;
+ double EffectiveLBLatencyHidingC;
+ double LinesInDETY[DML2_MAX_PLANES];
+ double LinesInDETC[DML2_MAX_PLANES];
+ unsigned int LinesInDETYRoundedDownToSwath[DML2_MAX_PLANES];
+ unsigned int LinesInDETCRoundedDownToSwath[DML2_MAX_PLANES];
+ double FullDETBufferingTimeY;
+ double FullDETBufferingTimeC;
+ double WritebackDRAMClockChangeLatencyMargin;
+ double WritebackFCLKChangeLatencyMargin;
+ double WritebackLatencyHiding;
+
+ unsigned int TotalActiveWriteback;
+ unsigned int LBLatencyHidingSourceLinesY[DML2_MAX_PLANES];
+ unsigned int LBLatencyHidingSourceLinesC[DML2_MAX_PLANES];
+ double TotalPixelBW;
+ double EffectiveDETBufferSizeY;
+ double ActiveClockChangeLatencyHidingY;
+ double ActiveClockChangeLatencyHidingC;
+ double ActiveClockChangeLatencyHiding;
+ unsigned int dst_y_pstate;
+ unsigned int src_y_pstate_l;
+ unsigned int src_y_pstate_c;
+ unsigned int src_y_ahead_l;
+ unsigned int src_y_ahead_c;
+ unsigned int sub_vp_lines_l;
+ unsigned int sub_vp_lines_c;
+
+};
+
+struct dml2_core_calcs_CalculateVMRowAndSwath_locals {
+ unsigned int PTEBufferSizeInRequestsForLuma[DML2_MAX_PLANES];
+ unsigned int PTEBufferSizeInRequestsForChroma[DML2_MAX_PLANES];
+ unsigned int vm_bytes_l;
+ unsigned int vm_bytes_c;
+ unsigned int PixelPTEBytesPerRowY[DML2_MAX_PLANES];
+ unsigned int PixelPTEBytesPerRowC[DML2_MAX_PLANES];
+ unsigned int PixelPTEBytesPerRowStorageY[DML2_MAX_PLANES];
+ unsigned int PixelPTEBytesPerRowStorageC[DML2_MAX_PLANES];
+ unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DML2_MAX_PLANES];
+ unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DML2_MAX_PLANES];
+ unsigned int dpte_row_width_luma_ub_one_row_per_frame[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_luma_one_row_per_frame[DML2_MAX_PLANES];
+ unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DML2_MAX_PLANES];
+ unsigned int dpte_row_height_chroma_one_row_per_frame[DML2_MAX_PLANES];
+ bool one_row_per_frame_fits_in_buffer[DML2_MAX_PLANES];
+ unsigned int HostVMDynamicLevels;
+ unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES];
+ unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES];
+};
+
+struct dml2_core_calcs_CalculateVMRowAndSwath_params {
+ const struct dml2_display_cfg *display_cfg;
+ unsigned int NumberOfActiveSurfaces;
+ struct dml2_core_internal_DmlPipe *myPipe;
+ unsigned int *SurfaceSizeInMALL;
+ unsigned int PTEBufferSizeInRequestsLuma;
+ unsigned int PTEBufferSizeInRequestsChroma;
+ unsigned int MALLAllocatedForDCN;
+ unsigned int *SwathWidthY;
+ unsigned int *SwathWidthC;
+ unsigned int HostVMMinPageSize;
+ unsigned int DCCMetaBufferSizeBytes;
+ bool mrq_present;
+ enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES];
+
+ // Output
+ bool *PTEBufferSizeNotExceeded;
+ bool *DCCMetaBufferSizeNotExceeded;
+
+ unsigned int *dpte_row_width_luma_ub;
+ unsigned int *dpte_row_width_chroma_ub;
+ unsigned int *dpte_row_height_luma;
+ unsigned int *dpte_row_height_chroma;
+ unsigned int *dpte_row_height_linear_luma; // VBA_DELTA
+ unsigned int *dpte_row_height_linear_chroma; // VBA_DELTA
+
+ unsigned int *vm_group_bytes;
+ unsigned int *dpte_group_bytes;
+ unsigned int *PixelPTEReqWidthY;
+ unsigned int *PixelPTEReqHeightY;
+ unsigned int *PTERequestSizeY;
+ unsigned int *vmpg_width_y;
+ unsigned int *vmpg_height_y;
+
+ unsigned int *PixelPTEReqWidthC;
+ unsigned int *PixelPTEReqHeightC;
+ unsigned int *PTERequestSizeC;
+ unsigned int *vmpg_width_c;
+ unsigned int *vmpg_height_c;
+
+ unsigned int *dpde0_bytes_per_frame_ub_l;
+ unsigned int *dpde0_bytes_per_frame_ub_c;
+
+ unsigned int *PrefetchSourceLinesY;
+ unsigned int *PrefetchSourceLinesC;
+ unsigned int *VInitPreFillY;
+ unsigned int *VInitPreFillC;
+ unsigned int *MaxNumSwathY;
+ unsigned int *MaxNumSwathC;
+ double *dpte_row_bw;
+ unsigned int *PixelPTEBytesPerRow;
+ unsigned int *dpte_row_bytes_per_row_l;
+ unsigned int *dpte_row_bytes_per_row_c;
+ unsigned int *vm_bytes;
+ bool *use_one_row_for_frame;
+ bool *use_one_row_for_frame_flip;
+ bool *is_using_mall_for_ss;
+ bool *PTE_BUFFER_MODE;
+ unsigned int *BIGK_FRAGMENT_SIZE;
+
+ // MRQ
+ unsigned int *meta_req_width_luma;
+ unsigned int *meta_req_height_luma;
+ unsigned int *meta_row_width_luma;
+ unsigned int *meta_row_height_luma;
+ unsigned int *meta_pte_bytes_per_frame_ub_l;
+
+ unsigned int *meta_req_width_chroma;
+ unsigned int *meta_req_height_chroma;
+ unsigned int *meta_row_width_chroma;
+ unsigned int *meta_row_height_chroma;
+ unsigned int *meta_pte_bytes_per_frame_ub_c;
+ double *meta_row_bw;
+ unsigned int *meta_row_bytes;
+ unsigned int *meta_row_bytes_per_row_ub_l;
+ unsigned int *meta_row_bytes_per_row_ub_c;
+};
+
+struct dml2_core_calcs_CalculatePrefetchSchedule_locals {
+ bool NoTimeToPrefetch;
+ unsigned int DPPCycles;
+ unsigned int DISPCLKCycles;
+ double DSTTotalPixelsAfterScaler;
+ double LineTime;
+ double dst_y_prefetch_equ;
+ double prefetch_bw_oto;
+ double per_pipe_vactive_sw_bw;
+ double Tvm_oto;
+ double Tr0_oto;
+ double Tvm_oto_lines;
+ double Tr0_oto_lines;
+ double dst_y_prefetch_oto;
+ double TimeForFetchingVM;
+ double TimeForFetchingRowInVBlank;
+ double LinesToRequestPrefetchPixelData;
+ unsigned int HostVMDynamicLevelsTrips;
+ double trip_to_mem;
+ double Tvm_trips_rounded;
+ double Tr0_trips_rounded;
+ double max_Tsw;
+ double Lsw_oto;
+ double prefetch_bw_equ;
+ double Tvm_equ;
+ double Tr0_equ;
+ double Tdmbf;
+ double Tdmec;
+ double Tdmsks;
+ double total_row_bytes;
+ double prefetch_bw_pr;
+ double bytes_pp;
+ double dep_bytes;
+ double min_Lsw_oto;
+ double min_Lsw_equ;
+ double Tsw_est1;
+ double Tsw_est2;
+ double Tsw_est3;
+ double prefetch_bw1;
+ double prefetch_bw2;
+ double prefetch_bw3;
+ double prefetch_bw4;
+ double dst_y_prefetch_equ_impacted;
+
+ double TWait_p;
+ unsigned int cursor_prefetch_bytes;
+};
+
+struct dml2_core_shared_calculate_det_buffer_size_params {
+ const struct dml2_display_cfg *display_cfg;
+ bool ForceSingleDPP;
+ unsigned int NumberOfActiveSurfaces;
+ bool UnboundedRequestEnabled;
+ unsigned int nomDETInKByte;
+ unsigned int MaxTotalDETInKByte;
+ unsigned int ConfigReturnBufferSizeInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+ unsigned int ConfigReturnBufferSegmentSizeInkByte;
+ unsigned int CompressedBufferSegmentSizeInkByte;
+ double *ReadBandwidthLuma;
+ double *ReadBandwidthChroma;
+ unsigned int *full_swath_bytes_l;
+ unsigned int *full_swath_bytes_c;
+ unsigned int *swath_time_value_us;
+ unsigned int *DPPPerSurface;
+ bool TryToAllocateForWriteLatency;
+ unsigned int bestEffortMinActiveLatencyHidingUs;
+
+ // Output
+ unsigned int *DETBufferSizeInKByte;
+ unsigned int *CompressedBufferSizeInkByte;
+};
+
+struct dml2_core_shared_calculate_vm_and_row_bytes_params {
+ bool ViewportStationary;
+ bool DCCEnable;
+ unsigned int NumberOfDPPs;
+ unsigned int BlockHeight256Bytes;
+ unsigned int BlockWidth256Bytes;
+ enum dml2_source_format_class SourcePixelFormat;
+ unsigned int SurfaceTiling;
+ unsigned int BytePerPixel;
+ enum dml2_rotation_angle RotationAngle;
+ unsigned int SwathWidth; // per pipe
+ unsigned int ViewportHeight;
+ unsigned int ViewportXStart;
+ unsigned int ViewportYStart;
+ bool GPUVMEnable;
+ unsigned int GPUVMMaxPageTableLevels;
+ unsigned int GPUVMMinPageSizeKBytes;
+ unsigned int PTEBufferSizeInRequests;
+ unsigned int Pitch;
+ unsigned int MacroTileWidth;
+ unsigned int MacroTileHeight;
+ bool is_phantom;
+ unsigned int DCCMetaPitch;
+ bool mrq_present;
+
+ // Output
+ unsigned int *PixelPTEBytesPerRow; // for bandwidth calculation
+ unsigned int *PixelPTEBytesPerRowStorage; // for PTE buffer size check
+ unsigned int *dpte_row_width_ub;
+ unsigned int *dpte_row_height;
+ unsigned int *dpte_row_height_linear;
+ unsigned int *PixelPTEBytesPerRow_one_row_per_frame;
+ unsigned int *dpte_row_width_ub_one_row_per_frame;
+ unsigned int *dpte_row_height_one_row_per_frame;
+ unsigned int *vmpg_width;
+ unsigned int *vmpg_height;
+ unsigned int *PixelPTEReqWidth;
+ unsigned int *PixelPTEReqHeight;
+ unsigned int *PTERequestSize;
+ unsigned int *dpde0_bytes_per_frame_ub;
+
+ unsigned int *meta_row_bytes;
+ unsigned int *MetaRequestWidth;
+ unsigned int *MetaRequestHeight;
+ unsigned int *meta_row_width;
+ unsigned int *meta_row_height;
+ unsigned int *meta_pte_bytes_per_frame_ub;
+};
+
+struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals {
+ unsigned int MaximumSwathHeightY[DML2_MAX_PLANES];
+ unsigned int MaximumSwathHeightC[DML2_MAX_PLANES];
+ unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES];
+ unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES];
+ unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES];
+ unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES];
+ unsigned int SwathTimeValueUs[DML2_MAX_PLANES];
+
+ struct dml2_core_shared_calculate_det_buffer_size_params calculate_det_buffer_size_params;
+};
+
+struct dml2_core_shared_TruncToValidBPP_locals {
+};
+
+struct dml2_core_shared_CalculateDETBufferSize_locals {
+ unsigned int DETBufferSizePoolInKByte;
+ unsigned int NextDETBufferPieceInKByte;
+ unsigned int NextSurfaceToAssignDETPiece;
+ double TotalBandwidth;
+ double BandwidthOfSurfacesNotAssignedDETPiece;
+ unsigned int max_minDET;
+ unsigned int minDET;
+ unsigned int minDET_pipe;
+ unsigned int TotalBandwidthPerStream[DML2_MAX_PLANES];
+ unsigned int TotalPixelRate;
+ unsigned int DETBudgetPerStream[DML2_MAX_PLANES];
+ unsigned int RemainingDETBudgetPerStream[DML2_MAX_PLANES];
+ unsigned int IdealDETBudget, DeltaDETBudget;
+ unsigned int ResidualDETAfterRounding;
+};
+
+struct dml2_core_shared_get_urgent_bandwidth_required_locals {
+ double required_bandwidth_mbps;
+ double required_bandwidth_mbps_this_surface;
+ double adj_factor_p0;
+ double adj_factor_p1;
+ double adj_factor_cur;
+ double adj_factor_p0_pre;
+ double adj_factor_p1_pre;
+ double adj_factor_cur_pre;
+ double per_plane_flip_bw[DML2_MAX_PLANES];
+ double mall_svp_prefetch_factor;
+ double tmp_nom_adj_factor_p0;
+ double tmp_nom_adj_factor_p1;
+ double tmp_pref_adj_factor_p0;
+ double tmp_pref_adj_factor_p1;
+ double vm_row_bw;
+ double flip_and_active_bw;
+ double flip_and_prefetch_bw;
+ double flip_and_prefetch_bw_max;
+ double active_and_excess_bw;
+};
+
+struct dml2_core_shared_calculate_peak_bandwidth_required_locals {
+ double unity_array[DML2_MAX_PLANES];
+ double zero_array[DML2_MAX_PLANES];
+ double surface_dummy_bw[DML2_MAX_PLANES];
+};
+
+struct dml2_core_shared_CalculateFlipSchedule_locals {
+ double min_row_time;
+ double Tvm_flip;
+ double Tr0_flip;
+ double ImmediateFlipBW;
+ double dpte_row_bytes;
+ double min_row_height;
+ double min_row_height_chroma;
+ double max_flip_time;
+ double lb_flip_bw;
+ double hvm_scaled_vm_bytes;
+ double num_rows;
+ double hvm_scaled_row_bytes;
+ double hvm_scaled_vm_row_bytes;
+ bool dual_plane;
+};
+
+struct dml2_core_shared_rq_dlg_get_dlg_reg_locals {
+ unsigned int plane_idx;
+ unsigned int stream_idx;
+ enum dml2_source_format_class source_format;
+ const struct dml2_timing_cfg *timing;
+ bool dual_plane;
+ enum dml2_odm_mode odm_mode;
+
+ unsigned int htotal;
+ unsigned int hactive;
+ unsigned int hblank_end;
+ unsigned int vblank_end;
+ bool interlaced;
+ double pclk_freq_in_mhz;
+ double refclk_freq_in_mhz;
+ double ref_freq_to_pix_freq;
+
+ unsigned int num_active_pipes;
+ unsigned int first_pipe_idx_in_plane;
+ unsigned int pipe_idx_in_combine;
+ unsigned int odm_combine_factor;
+
+ double min_ttu_vblank;
+ unsigned int min_dst_y_next_start;
+
+ unsigned int vready_after_vcount0;
+
+ unsigned int dst_x_after_scaler;
+ unsigned int dst_y_after_scaler;
+
+ double dst_y_prefetch;
+ double dst_y_per_vm_vblank;
+ double dst_y_per_row_vblank;
+ double dst_y_per_vm_flip;
+ double dst_y_per_row_flip;
+
+ double max_dst_y_per_vm_vblank;
+ double max_dst_y_per_row_vblank;
+
+ double vratio_pre_l;
+ double vratio_pre_c;
+
+ double refcyc_per_line_delivery_pre_l;
+ double refcyc_per_line_delivery_l;
+
+ double refcyc_per_line_delivery_pre_c;
+ double refcyc_per_line_delivery_c;
+
+ double refcyc_per_req_delivery_pre_l;
+ double refcyc_per_req_delivery_l;
+
+ double refcyc_per_req_delivery_pre_c;
+ double refcyc_per_req_delivery_c;
+
+ double dst_y_per_pte_row_nom_l;
+ double dst_y_per_pte_row_nom_c;
+ double refcyc_per_pte_group_nom_l;
+ double refcyc_per_pte_group_nom_c;
+ double refcyc_per_pte_group_vblank_l;
+ double refcyc_per_pte_group_vblank_c;
+ double refcyc_per_pte_group_flip_l;
+ double refcyc_per_pte_group_flip_c;
+ double refcyc_per_tdlut_group;
+
+ double dst_y_per_meta_row_nom_l;
+ double dst_y_per_meta_row_nom_c;
+ double refcyc_per_meta_chunk_nom_l;
+ double refcyc_per_meta_chunk_nom_c;
+ double refcyc_per_meta_chunk_vblank_l;
+ double refcyc_per_meta_chunk_vblank_c;
+ double refcyc_per_meta_chunk_flip_l;
+ double refcyc_per_meta_chunk_flip_c;
+};
+
+struct dml2_core_shared_CalculateMetaAndPTETimes_params {
+ struct dml2_core_internal_scratch *scratch;
+ const struct dml2_display_cfg *display_cfg;
+ unsigned int NumberOfActiveSurfaces;
+ bool *use_one_row_for_frame;
+ double *dst_y_per_row_vblank;
+ double *dst_y_per_row_flip;
+ unsigned int *BytePerPixelY;
+ unsigned int *BytePerPixelC;
+ unsigned int *dpte_row_height;
+ unsigned int *dpte_row_height_chroma;
+ unsigned int *dpte_group_bytes;
+ unsigned int *PTERequestSizeY;
+ unsigned int *PTERequestSizeC;
+ unsigned int *PixelPTEReqWidthY;
+ unsigned int *PixelPTEReqHeightY;
+ unsigned int *PixelPTEReqWidthC;
+ unsigned int *PixelPTEReqHeightC;
+ unsigned int *dpte_row_width_luma_ub;
+ unsigned int *dpte_row_width_chroma_ub;
+ unsigned int *tdlut_groups_per_2row_ub;
+ bool mrq_present;
+ unsigned int MetaChunkSize;
+ unsigned int MinMetaChunkSizeBytes;
+ unsigned int *meta_row_width;
+ unsigned int *meta_row_width_chroma;
+ unsigned int *meta_row_height;
+ unsigned int *meta_row_height_chroma;
+ unsigned int *meta_req_width;
+ unsigned int *meta_req_width_chroma;
+ unsigned int *meta_req_height;
+ unsigned int *meta_req_height_chroma;
+
+ // Output
+ double *time_per_tdlut_group;
+ double *DST_Y_PER_PTE_ROW_NOM_L;
+ double *DST_Y_PER_PTE_ROW_NOM_C;
+ double *time_per_pte_group_nom_luma;
+ double *time_per_pte_group_vblank_luma;
+ double *time_per_pte_group_flip_luma;
+ double *time_per_pte_group_nom_chroma;
+ double *time_per_pte_group_vblank_chroma;
+ double *time_per_pte_group_flip_chroma;
+
+ double *DST_Y_PER_META_ROW_NOM_L;
+ double *DST_Y_PER_META_ROW_NOM_C;
+
+ double *TimePerMetaChunkNominal;
+ double *TimePerChromaMetaChunkNominal;
+ double *TimePerMetaChunkVBlank;
+ double *TimePerChromaMetaChunkVBlank;
+ double *TimePerMetaChunkFlip;
+ double *TimePerChromaMetaChunkFlip;
+};
+
+struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params {
+ const struct dml2_display_cfg *display_cfg;
+ bool USRRetrainingRequired;
+ unsigned int NumberOfActiveSurfaces;
+ unsigned int MaxLineBufferLines;
+ unsigned int LineBufferSize;
+ unsigned int WritebackInterfaceBufferSize;
+ double DCFCLK;
+ double ReturnBW;
+ bool SynchronizeTimings;
+ bool SynchronizeDRRDisplaysForUCLKPStateChange;
+ unsigned int *dpte_group_bytes;
+ struct dml2_core_internal_SOCParametersList mmSOCParameters;
+ unsigned int WritebackChunkSize;
+ double SOCCLK;
+ double DCFClkDeepSleep;
+ unsigned int *DETBufferSizeY;
+ unsigned int *DETBufferSizeC;
+ unsigned int *SwathHeightY;
+ unsigned int *SwathHeightC;
+ unsigned int *SwathWidthY;
+ unsigned int *SwathWidthC;
+ unsigned int *DPPPerSurface;
+ double *BytePerPixelDETY;
+ double *BytePerPixelDETC;
+ unsigned int *DSTXAfterScaler;
+ unsigned int *DSTYAfterScaler;
+ bool UnboundedRequestEnabled;
+ unsigned int CompressedBufferSizeInkByte;
+ bool max_outstanding_when_urgent_expected;
+ unsigned int max_outstanding_requests;
+ unsigned int max_request_size_bytes;
+ unsigned int *meta_row_height_l;
+ unsigned int *meta_row_height_c;
+
+ // Output
+ struct dml2_core_internal_watermarks *Watermark;
+ enum dml2_pstate_change_support *DRAMClockChangeSupport;
+ bool *global_dram_clock_change_supported;
+ double *MaxActiveDRAMClockChangeLatencySupported;
+ unsigned int *SubViewportLinesNeededInMALL;
+ enum dml2_pstate_change_support *FCLKChangeSupport;
+ bool *global_fclk_change_supported;
+ double *MaxActiveFCLKChangeLatencySupported;
+ bool *USRRetrainingSupport;
+ double *VActiveLatencyHidingMargin;
+ double *VActiveLatencyHidingUs;
+ bool *g6_temp_read_support;
+ bool *temp_read_or_ppt_support;
+};
+
+
+struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params {
+ const struct dml2_display_cfg *display_cfg;
+ unsigned int ConfigReturnBufferSizeInKByte;
+ unsigned int MaxTotalDETInKByte;
+ unsigned int MinCompressedBufferSizeInKByte;
+ unsigned int rob_buffer_size_kbytes;
+ unsigned int pixel_chunk_size_kbytes;
+ bool ForceSingleDPP;
+ unsigned int NumberOfActiveSurfaces;
+ unsigned int nomDETInKByte;
+ unsigned int ConfigReturnBufferSegmentSizeInkByte;
+ unsigned int CompressedBufferSegmentSizeInkByte;
+ double *ReadBandwidthLuma;
+ double *ReadBandwidthChroma;
+ double *MaximumSwathWidthLuma;
+ double *MaximumSwathWidthChroma;
+ unsigned int *Read256BytesBlockHeightY;
+ unsigned int *Read256BytesBlockHeightC;
+ unsigned int *Read256BytesBlockWidthY;
+ unsigned int *Read256BytesBlockWidthC;
+ bool *surf_linear128_l;
+ bool *surf_linear128_c;
+ enum dml2_odm_mode *ODMMode;
+ unsigned int *BytePerPixY;
+ unsigned int *BytePerPixC;
+ double *BytePerPixDETY;
+ double *BytePerPixDETC;
+ unsigned int *DPPPerSurface;
+ bool mrq_present;
+ unsigned int dummy[2][DML2_MAX_PLANES];
+ unsigned int swath_width_luma_ub_single_dpp[DML2_MAX_PLANES];
+ unsigned int swath_width_chroma_ub_single_dpp[DML2_MAX_PLANES];
+
+ // output
+ unsigned int *req_per_swath_ub_l;
+ unsigned int *req_per_swath_ub_c;
+ unsigned int *swath_width_luma_ub;
+ unsigned int *swath_width_chroma_ub;
+ unsigned int *SwathWidth;
+ unsigned int *SwathWidthChroma;
+ unsigned int *SwathHeightY;
+ unsigned int *SwathHeightC;
+ unsigned int *request_size_bytes_luma;
+ unsigned int *request_size_bytes_chroma;
+ unsigned int *DETBufferSizeInKByte;
+ unsigned int *DETBufferSizeY;
+ unsigned int *DETBufferSizeC;
+ unsigned int *full_swath_bytes_l;
+ unsigned int *full_swath_bytes_c;
+ unsigned int *full_swath_bytes_single_dpp_l;
+ unsigned int *full_swath_bytes_single_dpp_c;
+ bool *UnboundedRequestEnabled;
+ unsigned int *compbuf_reserved_space_64b;
+ unsigned int *CompressedBufferSizeInkByte;
+ bool *ViewportSizeSupportPerSurface;
+ bool *ViewportSizeSupport;
+ bool *hw_debug5;
+
+ struct dml2_core_shared_calculation_funcs *funcs;
+};
+
+struct dml2_core_calcs_CalculateStutterEfficiency_locals {
+ double DETBufferingTimeY;
+ double SwathWidthYCriticalSurface;
+ double SwathHeightYCriticalSurface;
+ double VActiveTimeCriticalSurface;
+ double FrameTimeCriticalSurface;
+ unsigned int BytePerPixelYCriticalSurface;
+ unsigned int DETBufferSizeYCriticalSurface;
+ double MinTTUVBlankCriticalSurface;
+ unsigned int BlockWidth256BytesYCriticalSurface;
+ bool SinglePlaneCriticalSurface;
+ bool SinglePipeCriticalSurface;
+ double TotalCompressedReadBandwidth;
+ double TotalRowReadBandwidth;
+ double AverageDCCCompressionRate;
+ double EffectiveCompressedBufferSize;
+ double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
+ double StutterBurstTime;
+ unsigned int TotalActiveWriteback;
+ double LinesInDETY;
+ double LinesInDETYRoundedDownToSwath;
+ double MaximumEffectiveCompressionLuma;
+ double MaximumEffectiveCompressionChroma;
+ double TotalZeroSizeRequestReadBandwidth;
+ double TotalZeroSizeCompressedReadBandwidth;
+ double AverageDCCZeroSizeFraction;
+ double AverageZeroSizeCompressionRate;
+ bool stream_visited[DML2_MAX_PLANES];
+};
+
+struct dml2_core_calcs_CalculateStutterEfficiency_params {
+ const struct dml2_display_cfg *display_cfg;
+ unsigned int CompressedBufferSizeInkByte;
+ bool UnboundedRequestEnabled;
+ unsigned int MetaFIFOSizeInKEntries;
+ unsigned int ZeroSizeBufferEntries;
+ unsigned int PixelChunkSizeInKByte;
+ unsigned int NumberOfActiveSurfaces;
+ unsigned int ROBBufferSizeInKByte;
+ double TotalDataReadBandwidth;
+ double DCFCLK;
+ double ReturnBW;
+ unsigned int CompbufReservedSpace64B;
+ unsigned int CompbufReservedSpaceZs;
+ bool hw_debug5;
+ double SRExitTime;
+ double SRExitTimeLowPower;
+ double SRExitZ8Time;
+ bool SynchronizeTimings;
+ double StutterEnterPlusExitWatermark;
+ double LowPowerStutterEnterPlusExitWatermark;
+ double Z8StutterEnterPlusExitWatermark;
+ bool ProgressiveToInterlaceUnitInOPP;
+ double *MinTTUVBlank;
+ unsigned int *DPPPerSurface;
+ unsigned int *DETBufferSizeY;
+ unsigned int *BytePerPixelY;
+ double *BytePerPixelDETY;
+ unsigned int *SwathWidthY;
+ unsigned int *SwathHeightY;
+ unsigned int *SwathHeightC;
+ unsigned int *BlockHeight256BytesY;
+ unsigned int *BlockWidth256BytesY;
+ unsigned int *BlockHeight256BytesC;
+ unsigned int *BlockWidth256BytesC;
+ unsigned int *DCCYMaxUncompressedBlock;
+ unsigned int *DCCCMaxUncompressedBlock;
+ double *ReadBandwidthSurfaceLuma;
+ double *ReadBandwidthSurfaceChroma;
+ double *meta_row_bw;
+ double *dpte_row_bw;
+ bool rob_alloc_compressed;
+
+ // output
+ double *StutterEfficiencyNotIncludingVBlank;
+ double *StutterEfficiency;
+ double *LowPowerStutterEfficiencyNotIncludingVBlank;
+ double *LowPowerStutterEfficiency;
+ unsigned int *NumberOfStutterBurstsPerFrame;
+ unsigned int *LowPowerNumberOfStutterBurstsPerFrame;
+ double *Z8StutterEfficiencyNotIncludingVBlank;
+ double *Z8StutterEfficiency;
+ unsigned int *Z8NumberOfStutterBurstsPerFrame;
+ double *StutterPeriod;
+ bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
+};
+
+struct dml2_core_calcs_CalculatePrefetchSchedule_params {
+ const struct dml2_display_cfg *display_cfg;
+ double HostVMInefficiencyFactor;
+ struct dml2_core_internal_DmlPipe *myPipe;
+ unsigned int DSCDelay;
+ double DPPCLKDelaySubtotalPlusCNVCFormater;
+ double DPPCLKDelaySCL;
+ double DPPCLKDelaySCLLBOnly;
+ double DPPCLKDelayCNVCCursor;
+ double DISPCLKDelaySubtotal;
+ unsigned int DPP_RECOUT_WIDTH;
+ enum dml2_output_format_class OutputFormat;
+ unsigned int MaxInterDCNTileRepeaters;
+ unsigned int VStartup;
+ unsigned int HostVMMinPageSize;
+ bool DynamicMetadataEnable;
+ bool DynamicMetadataVMEnabled;
+ unsigned int DynamicMetadataLinesBeforeActiveRequired;
+ unsigned int DynamicMetadataTransmittedBytes;
+ double UrgentLatency;
+ double ExtraLatencyPrefetch;
+ double TCalc;
+ unsigned int vm_bytes;
+ unsigned int PixelPTEBytesPerRow;
+ double PrefetchSourceLinesY;
+ unsigned int VInitPreFillY;
+ unsigned int MaxNumSwathY;
+ double PrefetchSourceLinesC;
+ unsigned int VInitPreFillC;
+ unsigned int MaxNumSwathC;
+ unsigned int swath_width_luma_ub; // per-pipe
+ unsigned int swath_width_chroma_ub; // per-pipe
+ unsigned int SwathHeightY;
+ unsigned int SwathHeightC;
+ double TWait;
+ double Ttrip;
+ double Turg;
+ bool setup_for_tdlut;
+ unsigned int tdlut_pte_bytes_per_frame;
+ unsigned int tdlut_bytes_per_frame;
+ double tdlut_opt_time;
+ double tdlut_drain_time;
+
+ unsigned int num_cursors;
+ unsigned int cursor_bytes_per_chunk;
+ unsigned int cursor_bytes_per_line;
+
+ // MRQ
+ bool dcc_enable;
+ bool mrq_present;
+ unsigned int meta_row_bytes;
+ double mall_prefetch_sdp_overhead_factor;
+
+ double impacted_dst_y_pre;
+ double vactive_sw_bw_l; // per surface bw
+ double vactive_sw_bw_c; // per surface bw
+
+ // output
+ unsigned int *DSTXAfterScaler;
+ unsigned int *DSTYAfterScaler;
+ double *dst_y_prefetch;
+ double *dst_y_per_vm_vblank;
+ double *dst_y_per_row_vblank;
+ double *VRatioPrefetchY;
+ double *VRatioPrefetchC;
+ double *RequiredPrefetchPixelDataBWLuma;
+ double *RequiredPrefetchPixelDataBWChroma;
+ double *RequiredPrefetchBWMax;
+ bool *NotEnoughTimeForDynamicMetadata;
+ double *Tno_bw;
+ double *Tno_bw_flip;
+ double *prefetch_vmrow_bw;
+ double *Tdmdl_vm;
+ double *Tdmdl;
+ double *TSetup;
+ double *Tpre_rounded;
+ double *Tpre_oto;
+ double *Tvm_trips;
+ double *Tr0_trips;
+ double *Tvm_trips_flip;
+ double *Tr0_trips_flip;
+ double *Tvm_trips_flip_rounded;
+ double *Tr0_trips_flip_rounded;
+ unsigned int *VUpdateOffsetPix;
+ unsigned int *VUpdateWidthPix;
+ unsigned int *VReadyOffsetPix;
+ double *prefetch_cursor_bw;
+ double *prefetch_sw_bytes;
+ double *prefetch_swath_time_us;
+};
+
+struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params {
+ unsigned int num_active_planes;
+ enum dml2_source_format_class *pixel_format;
+ unsigned int rob_buffer_size_kbytes;
+ unsigned int compressed_buffer_size_kbytes;
+ unsigned int chunk_bytes_l; // same for all planes
+ unsigned int chunk_bytes_c;
+ unsigned int *detile_buffer_size_bytes_l;
+ unsigned int *detile_buffer_size_bytes_c;
+ unsigned int *full_swath_bytes_l;
+ unsigned int *full_swath_bytes_c;
+ unsigned int *lb_source_lines_l;
+ unsigned int *lb_source_lines_c;
+ unsigned int *swath_height_l;
+ unsigned int *swath_height_c;
+ double *prefetch_sw_bytes;
+ double *Tpre_rounded;
+ double *Tpre_oto;
+ double estimated_dcfclk_mhz;
+ double estimated_urg_bandwidth_required_mbps;
+ double *line_time;
+ double *dst_y_prefetch;
+
+ // output
+ bool *recalc_prefetch_schedule;
+ double *impacted_dst_y_pre;
+};
+
+struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals {
+ unsigned int max_Trpd_dcfclk_cycles;
+ unsigned int burst_bytes_to_fill_det;
+ double time_to_fill_det_us;
+ unsigned int accumulated_return_path_dcfclk_cycles[DML2_MAX_PLANES];
+ bool prefetch_global_check_passed;
+ unsigned int src_swath_bytes_l[DML2_MAX_PLANES];
+ unsigned int src_swath_bytes_c[DML2_MAX_PLANES];
+ unsigned int src_detile_buf_size_bytes_l[DML2_MAX_PLANES];
+ unsigned int src_detile_buf_size_bytes_c[DML2_MAX_PLANES];
+};
+
+struct dml2_core_calcs_calculate_mcache_row_bytes_params {
+ unsigned int num_chans;
+ unsigned int mem_word_bytes;
+ unsigned int mcache_size_bytes;
+ unsigned int mcache_line_size_bytes;
+ unsigned int gpuvm_enable;
+ unsigned int gpuvm_page_size_kbytes;
+
+ //enum dml_rotation_angle rotation_angle;
+ bool surf_vert;
+ unsigned int vp_stationary;
+ unsigned int tiling_mode;
+ bool imall_enable;
+
+ unsigned int vp_start_x;
+ unsigned int vp_start_y;
+ unsigned int full_vp_width;
+ unsigned int full_vp_height;
+ unsigned int blk_width;
+ unsigned int blk_height;
+ unsigned int vmpg_width;
+ unsigned int vmpg_height;
+ unsigned int full_swath_bytes;
+ unsigned int bytes_per_pixel;
+
+ // output
+ unsigned int *num_mcaches;
+ unsigned int *mcache_row_bytes;
+ unsigned int *mcache_row_bytes_per_channel;
+ unsigned int *meta_row_width_ub;
+ double *dcc_dram_bw_nom_overhead_factor;
+ double *dcc_dram_bw_pref_overhead_factor;
+ unsigned int *mvmpg_width;
+ unsigned int *mvmpg_height;
+ unsigned int *full_vp_access_width_mvmpg_aligned;
+ unsigned int *mvmpg_per_mcache_lb;
+};
+
+struct dml2_core_shared_calculate_mcache_setting_locals {
+ struct dml2_core_calcs_calculate_mcache_row_bytes_params l_p;
+ struct dml2_core_calcs_calculate_mcache_row_bytes_params c_p;
+
+ bool is_dual_plane;
+ unsigned int mvmpg_width_l;
+ unsigned int mvmpg_height_l;
+ unsigned int full_vp_access_width_mvmpg_aligned_l;
+ unsigned int mvmpg_per_mcache_lb_l;
+ unsigned int meta_row_width_l;
+
+ unsigned int mvmpg_width_c;
+ unsigned int mvmpg_height_c;
+ unsigned int full_vp_access_width_mvmpg_aligned_c;
+ unsigned int mvmpg_per_mcache_lb_c;
+ unsigned int meta_row_width_c;
+
+ unsigned int lc_comb_last_mcache_size;
+ double luma_time_factor;
+ double mcache_remainder_l;
+ double mcache_remainder_c;
+ unsigned int mvmpg_access_width_l;
+ unsigned int mvmpg_access_width_c;
+ unsigned int avg_mcache_element_size_l;
+ unsigned int avg_mcache_element_size_c;
+
+ unsigned int full_vp_access_width_l;
+ unsigned int full_vp_access_width_c;
+};
+
+struct dml2_core_calcs_calculate_mcache_setting_params {
+ bool dcc_enable;
+ unsigned int num_chans;
+ unsigned int mem_word_bytes;
+ unsigned int mcache_size_bytes;
+ unsigned int mcache_line_size_bytes;
+ unsigned int gpuvm_enable;
+ unsigned int gpuvm_page_size_kbytes;
+
+ enum dml2_source_format_class source_format;
+ bool surf_vert;
+ unsigned int vp_stationary;
+ unsigned int tiling_mode;
+ bool imall_enable;
+
+ unsigned int vp_start_x_l;
+ unsigned int vp_start_y_l;
+ unsigned int full_vp_width_l;
+ unsigned int full_vp_height_l;
+ unsigned int blk_width_l;
+ unsigned int blk_height_l;
+ unsigned int vmpg_width_l;
+ unsigned int vmpg_height_l;
+ unsigned int full_swath_bytes_l;
+ unsigned int bytes_per_pixel_l;
+
+ unsigned int vp_start_x_c;
+ unsigned int vp_start_y_c;
+ unsigned int full_vp_width_c;
+ unsigned int full_vp_height_c;
+ unsigned int blk_width_c;
+ unsigned int blk_height_c;
+ unsigned int vmpg_width_c;
+ unsigned int vmpg_height_c;
+ unsigned int full_swath_bytes_c;
+ unsigned int bytes_per_pixel_c;
+
+ // output
+ unsigned int *num_mcaches_l;
+ unsigned int *mcache_row_bytes_l;
+ unsigned int *mcache_row_bytes_per_channel_l;
+ unsigned int *mcache_offsets_l;
+ unsigned int *mcache_shift_granularity_l;
+ double *dcc_dram_bw_nom_overhead_factor_l;
+ double *dcc_dram_bw_pref_overhead_factor_l;
+
+ unsigned int *num_mcaches_c;
+ unsigned int *mcache_row_bytes_c;
+ unsigned int *mcache_row_bytes_per_channel_c;
+ unsigned int *mcache_offsets_c;
+ unsigned int *mcache_shift_granularity_c;
+ double *dcc_dram_bw_nom_overhead_factor_c;
+ double *dcc_dram_bw_pref_overhead_factor_c;
+
+ bool *mall_comb_mcache_l;
+ bool *mall_comb_mcache_c;
+ bool *lc_comb_mcache;
+};
+
+struct dml2_core_calcs_calculate_tdlut_setting_params {
+ // input params
+ double dispclk_mhz;
+ bool setup_for_tdlut;
+ enum dml2_tdlut_width_mode tdlut_width_mode;
+ enum dml2_tdlut_addressing_mode tdlut_addressing_mode;
+ unsigned int cursor_buffer_size;
+ bool gpuvm_enable;
+ unsigned int gpuvm_page_size_kbytes;
+ bool is_gfx11;
+ bool tdlut_mpc_width_flag;
+
+ // output param
+ unsigned int *tdlut_pte_bytes_per_frame;
+ unsigned int *tdlut_bytes_per_frame;
+ unsigned int *tdlut_groups_per_2row_ub;
+ double *tdlut_opt_time;
+ double *tdlut_drain_time;
+ unsigned int *tdlut_bytes_to_deliver;
+ unsigned int *tdlut_bytes_per_group;
+};
+
+struct dml2_core_calcs_calculate_peak_bandwidth_required_params {
+ // output
+ double (*urg_vactive_bandwidth_required)[dml2_core_internal_bw_max];
+ double (*urg_bandwidth_required)[dml2_core_internal_bw_max];
+ double (*urg_bandwidth_required_qual)[dml2_core_internal_bw_max];
+ double (*non_urg_bandwidth_required)[dml2_core_internal_bw_max];
+ double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES];
+ double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES];
+
+ // input
+ const struct dml2_display_cfg *display_cfg;
+ bool inc_flip_bw;
+ unsigned int num_active_planes;
+ unsigned int *num_of_dpp;
+ double *dcc_dram_bw_nom_overhead_factor_p0;
+ double *dcc_dram_bw_nom_overhead_factor_p1;
+ double *dcc_dram_bw_pref_overhead_factor_p0;
+ double *dcc_dram_bw_pref_overhead_factor_p1;
+ double *mall_prefetch_sdp_overhead_factor;
+ double *mall_prefetch_dram_overhead_factor;
+ double *surface_read_bandwidth_l;
+ double *surface_read_bandwidth_c;
+ double *prefetch_bandwidth_l;
+ double *prefetch_bandwidth_c;
+ double *prefetch_bandwidth_max;
+ double *excess_vactive_fill_bw_l;
+ double *excess_vactive_fill_bw_c;
+ double *cursor_bw;
+ double *dpte_row_bw;
+ double *meta_row_bw;
+ double *prefetch_cursor_bw;
+ double *prefetch_vmrow_bw;
+ double *flip_bw;
+ double *urgent_burst_factor_l;
+ double *urgent_burst_factor_c;
+ double *urgent_burst_factor_cursor;
+ double *urgent_burst_factor_prefetch_l;
+ double *urgent_burst_factor_prefetch_c;
+ double *urgent_burst_factor_prefetch_cursor;
+};
+
+struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params {
+ /* inputs */
+ const struct dml2_display_cfg *display_cfg;
+ bool mrq_present;
+ unsigned int num_active_planes;
+ unsigned int *num_of_dpp;
+ unsigned int *meta_row_height_l;
+ unsigned int *meta_row_height_c;
+ unsigned int *meta_row_bytes_per_row_ub_l;
+ unsigned int *meta_row_bytes_per_row_ub_c;
+ unsigned int *dpte_row_height_l;
+ unsigned int *dpte_row_height_c;
+ unsigned int *dpte_bytes_per_row_l;
+ unsigned int *dpte_bytes_per_row_c;
+ unsigned int *byte_per_pix_l;
+ unsigned int *byte_per_pix_c;
+ unsigned int *swath_width_l;
+ unsigned int *swath_width_c;
+ unsigned int *swath_height_l;
+ unsigned int *swath_height_c;
+ double latency_to_hide_us;
+
+ /* outputs */
+ unsigned int *bytes_required_l;
+ unsigned int *bytes_required_c;
+};
+
+// A list of overridable function pointers in the core
+// shared calculation library.
+struct dml2_core_shared_calculation_funcs {
+ void (*calculate_det_buffer_size)(struct dml2_core_shared_calculate_det_buffer_size_params *p);
+};
+
+struct dml2_core_internal_scratch {
+ // Scratch space for function locals
+ struct dml2_core_calcs_mode_support_locals dml_core_mode_support_locals;
+ struct dml2_core_calcs_mode_programming_locals dml_core_mode_programming_locals;
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
+ struct dml2_core_calcs_CalculateVMRowAndSwath_locals CalculateVMRowAndSwath_locals;
+ struct dml2_core_calcs_CalculatePrefetchSchedule_locals CalculatePrefetchSchedule_locals;
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals CheckGlobalPrefetchAdmissibility_locals;
+ struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals CalculateSwathAndDETConfiguration_locals;
+ struct dml2_core_shared_TruncToValidBPP_locals TruncToValidBPP_locals;
+ struct dml2_core_shared_CalculateDETBufferSize_locals CalculateDETBufferSize_locals;
+ struct dml2_core_shared_get_urgent_bandwidth_required_locals get_urgent_bandwidth_required_locals;
+ struct dml2_core_shared_calculate_peak_bandwidth_required_locals calculate_peak_bandwidth_required_locals;
+ struct dml2_core_shared_CalculateFlipSchedule_locals CalculateFlipSchedule_locals;
+ struct dml2_core_shared_rq_dlg_get_dlg_reg_locals rq_dlg_get_dlg_reg_locals;
+ struct dml2_core_calcs_CalculateStutterEfficiency_locals CalculateStutterEfficiency_locals;
+
+ // Scratch space for function params
+ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
+ struct dml2_core_calcs_CalculateVMRowAndSwath_params CalculateVMRowAndSwath_params;
+ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params CalculateSwathAndDETConfiguration_params;
+ struct dml2_core_calcs_CalculateStutterEfficiency_params CalculateStutterEfficiency_params;
+ struct dml2_core_calcs_CalculatePrefetchSchedule_params CalculatePrefetchSchedule_params;
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params CheckGlobalPrefetchAdmissibility_params;
+ struct dml2_core_calcs_calculate_mcache_setting_params calculate_mcache_setting_params;
+ struct dml2_core_calcs_calculate_tdlut_setting_params calculate_tdlut_setting_params;
+ struct dml2_core_shared_calculate_vm_and_row_bytes_params calculate_vm_and_row_bytes_params;
+ struct dml2_core_shared_calculate_mcache_setting_locals calculate_mcache_setting_locals;
+ struct dml2_core_shared_CalculateMetaAndPTETimes_params CalculateMetaAndPTETimes_params;
+ struct dml2_core_calcs_calculate_peak_bandwidth_required_params calculate_peak_bandwidth_params;
+ struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params calculate_bytes_to_fetch_required_to_hide_latency_params;
+};
+
+//struct dml2_svp_mode_override;
+struct dml2_core_internal_display_mode_lib {
+ struct dml2_core_ip_params ip;
+ struct dml2_soc_bb soc;
+ struct dml2_ip_capabilities ip_caps;
+
+ //@brief Mode Support and Mode programming struct
+ // Used to hold input; intermediate and output of the calculations
+ struct dml2_core_internal_mode_support ms; // struct for mode support
+ struct dml2_core_internal_mode_program mp; // struct for mode programming
+ // Available overridable calculators for core_shared.
+ // if null, core_shared will use default calculators.
+ struct dml2_core_shared_calculation_funcs funcs;
+
+ struct dml2_core_internal_scratch scratch;
+};
+
+struct dml2_core_calcs_mode_support_ex {
+ struct dml2_core_internal_display_mode_lib *mode_lib;
+ const struct dml2_display_cfg *in_display_cfg;
+ const struct dml2_mcg_min_clock_table *min_clk_table;
+ int min_clk_index;
+ //unsigned int in_state_index;
+ struct dml2_core_internal_mode_support_info *out_evaluation_info;
+};
+
+struct core_display_cfg_support_info;
+
+struct dml2_core_calcs_mode_programming_ex {
+ struct dml2_core_internal_display_mode_lib *mode_lib;
+ const struct dml2_display_cfg *in_display_cfg;
+ const struct dml2_mcg_min_clock_table *min_clk_table;
+ const struct core_display_cfg_support_info *cfg_support_info;
+ int min_clk_index;
+ struct dml2_display_cfg_programming *programming;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
new file mode 100644
index 000000000000..5f301befed16
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
@@ -0,0 +1,786 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_core_utils.h"
+
+double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
+{
+ *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
+ return dividend / divisor;
+
+}
+
+const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
+{
+ switch (bw_type) {
+ case (dml2_core_internal_bw_sdp):
+ return("dml2_core_internal_bw_sdp");
+ case (dml2_core_internal_bw_dram):
+ return("dml2_core_internal_bw_dram");
+ case (dml2_core_internal_bw_max):
+ return("dml2_core_internal_bw_max");
+ default:
+ return("dml2_core_internal_bw_unknown");
+ }
+}
+
+bool dml2_core_utils_is_420(enum dml2_source_format_class source_format)
+{
+ bool val = false;
+
+ switch (source_format) {
+ case dml2_444_8:
+ val = 0;
+ break;
+ case dml2_444_16:
+ val = 0;
+ break;
+ case dml2_444_32:
+ val = 0;
+ break;
+ case dml2_444_64:
+ val = 0;
+ break;
+ case dml2_420_8:
+ val = 1;
+ break;
+ case dml2_420_10:
+ val = 1;
+ break;
+ case dml2_420_12:
+ val = 1;
+ break;
+ case dml2_rgbe_alpha:
+ val = 0;
+ break;
+ case dml2_rgbe:
+ val = 0;
+ break;
+ case dml2_mono_8:
+ val = 0;
+ break;
+ case dml2_mono_16:
+ val = 0;
+ break;
+ case dml2_422_planar_8:
+ val = 0;
+ break;
+ case dml2_422_planar_10:
+ val = 0;
+ break;
+ case dml2_422_planar_12:
+ val = 0;
+ break;
+ case dml2_422_packed_8:
+ val = 0;
+ break;
+ case dml2_422_packed_10:
+ val = 0;
+ break;
+ case dml2_422_packed_12:
+ val = 0;
+ break;
+ default:
+ DML_ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format)
+{
+ bool val = false;
+
+ switch (source_format) {
+ case dml2_444_8:
+ val = 0;
+ break;
+ case dml2_444_16:
+ val = 0;
+ break;
+ case dml2_444_32:
+ val = 0;
+ break;
+ case dml2_444_64:
+ val = 0;
+ break;
+ case dml2_420_8:
+ val = 0;
+ break;
+ case dml2_420_10:
+ val = 0;
+ break;
+ case dml2_420_12:
+ val = 0;
+ break;
+ case dml2_rgbe_alpha:
+ val = 0;
+ break;
+ case dml2_rgbe:
+ val = 0;
+ break;
+ case dml2_mono_8:
+ val = 0;
+ break;
+ case dml2_mono_16:
+ val = 0;
+ break;
+ case dml2_422_planar_8:
+ val = 1;
+ break;
+ case dml2_422_planar_10:
+ val = 1;
+ break;
+ case dml2_422_planar_12:
+ val = 1;
+ break;
+ case dml2_422_packed_8:
+ val = 0;
+ break;
+ case dml2_422_packed_10:
+ val = 0;
+ break;
+ case dml2_422_packed_12:
+ val = 0;
+ break;
+ default:
+ DML_ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format)
+{
+ bool val = false;
+
+ switch (source_format) {
+ case dml2_444_8:
+ val = 0;
+ break;
+ case dml2_444_16:
+ val = 0;
+ break;
+ case dml2_444_32:
+ val = 0;
+ break;
+ case dml2_444_64:
+ val = 0;
+ break;
+ case dml2_420_8:
+ val = 0;
+ break;
+ case dml2_420_10:
+ val = 0;
+ break;
+ case dml2_420_12:
+ val = 0;
+ break;
+ case dml2_rgbe_alpha:
+ val = 0;
+ break;
+ case dml2_rgbe:
+ val = 0;
+ break;
+ case dml2_mono_8:
+ val = 0;
+ break;
+ case dml2_mono_16:
+ val = 0;
+ break;
+ case dml2_422_planar_8:
+ val = 0;
+ break;
+ case dml2_422_planar_10:
+ val = 0;
+ break;
+ case dml2_422_planar_12:
+ val = 0;
+ break;
+ case dml2_422_packed_8:
+ val = 1;
+ break;
+ case dml2_422_packed_10:
+ val = 1;
+ break;
+ case dml2_422_packed_12:
+ val = 1;
+ break;
+ default:
+ DML_ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
+{
+ DML_LOG_VERBOSE("DML: ===================================== \n");
+ DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n");
+ if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
+ if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
+ DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
+ if (!fail_only || support->ViewportSizeSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
+ if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
+ DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
+ if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
+ DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
+ if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
+ DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
+ if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
+ DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
+ if (!fail_only || support->ExceededMultistreamSlots == 1)
+ DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
+ if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
+ DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
+ if (!fail_only || support->NotEnoughLanesForMSO == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
+ if (!fail_only || support->P2IWith420 == 1)
+ DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420);
+ if (!fail_only || support->DSC422NativeNotSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
+ if (!fail_only || support->DSCSlicesODMModeSupported == 0)
+ DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
+ if (!fail_only || support->NotEnoughDSCUnits == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
+ if (!fail_only || support->NotEnoughDSCSlices == 1)
+ DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
+ if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
+ DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
+ DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
+ if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
+ if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
+ DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
+ if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
+ DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
+ if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
+ DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
+ if (!fail_only || support->ROBSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport);
+ if (!fail_only || support->OutstandingRequestsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
+ if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
+ DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
+ if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
+ DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
+ if (!fail_only || support->TotalAvailablePipesSupport == 0)
+ DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
+ if (!fail_only || support->NumberOfOTGSupport == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
+ if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
+ if (!fail_only || support->NumberOfDP2p0Support == 0)
+ DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
+ if (!fail_only || support->EnoughWritebackUnits == 0)
+ DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
+ if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
+ DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
+ if (!fail_only || support->WritebackLatencySupport == 0)
+ DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
+ if (!fail_only || support->CursorSupport == 0)
+ DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport);
+ if (!fail_only || support->PitchSupport == 0)
+ DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport);
+ if (!fail_only || support->ViewportExceedsSurface == 1)
+ DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
+ if (!fail_only || support->PrefetchSupported == 0)
+ DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
+ if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
+ DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
+ if (!fail_only || support->AvgBandwidthSupport == 0)
+ DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
+ if (!fail_only || support->DynamicMetadataSupported == 0)
+ DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
+ if (!fail_only || support->VRatioInPrefetchSupported == 0)
+ DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
+ if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
+ DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
+ if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
+ DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
+ if (!fail_only || support->ExceededMALLSize == 1)
+ DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
+ if (!fail_only || support->g6_temp_read_support == 0)
+ DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
+ if (!fail_only || support->ImmediateFlipSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
+ if (!fail_only || support->LinkCapacitySupport == 0)
+ DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
+
+ if (!fail_only || support->ModeSupport == 0)
+ DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport);
+ DML_LOG_VERBOSE("DML: ===================================== \n");
+}
+
+const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
+{
+ switch (dml2_core_internal_soc_state_type) {
+ case (dml2_core_internal_soc_state_sys_idle):
+ return("dml2_core_internal_soc_state_sys_idle");
+ case (dml2_core_internal_soc_state_sys_active):
+ return("dml2_core_internal_soc_state_sys_active");
+ case (dml2_core_internal_soc_state_svp_prefetch):
+ return("dml2_core_internal_soc_state_svp_prefetch");
+ case dml2_core_internal_soc_state_max:
+ default:
+ return("dml2_core_internal_soc_state_unknown");
+ }
+}
+
+
+void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
+{
+ for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
+ double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
+ switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
+ case dml2_444:
+ out_bpp[k] = bpc * 3;
+ break;
+ case dml2_s422:
+ out_bpp[k] = bpc * 2;
+ break;
+ case dml2_n422:
+ out_bpp[k] = bpc * 2;
+ break;
+ case dml2_420:
+ default:
+ out_bpp[k] = bpc * 1.5;
+ break;
+ }
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
+ out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
+ } else {
+ out_bpp[k] = 0;
+ }
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
+ DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
+ DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
+#endif
+ }
+}
+
+unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
+{
+ unsigned int remainder;
+
+ if (multiple == 0)
+ return num;
+
+ remainder = num % multiple;
+ if (remainder == 0)
+ return num;
+
+ if (up)
+ return (num + multiple - remainder);
+ else
+ return (num - remainder);
+}
+
+unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
+{
+ unsigned int num_active_pipes = 0;
+
+ for (unsigned int k = 0; k < num_planes; k++) {
+ num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
+#endif
+ return num_active_pipes;
+}
+
+void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
+{
+ unsigned int pipe_idx = 0;
+
+ for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
+ pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
+ }
+
+ for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
+ for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
+ pipe_plane[pipe_idx] = plane_idx;
+ pipe_idx++;
+ }
+ }
+}
+
+bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
+{
+ bool is_phantom = false;
+
+ if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
+ plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
+ is_phantom = true;
+ }
+
+ return is_phantom;
+}
+
+unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel)
+{
+
+ if (sw_mode == dml2_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_sw_256b_2d)
+ return 256;
+ else if (sw_mode == dml2_sw_4kb_2d)
+ return 4096;
+ else if (sw_mode == dml2_sw_64kb_2d)
+ return 65536;
+ else if (sw_mode == dml2_sw_256kb_2d)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
+ return 262144;
+ else {
+ DML_ASSERT(0);
+ return 256;
+ };
+}
+
+bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel)
+{
+ return (byte_per_pixel != 2);
+}
+
+bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode)
+{
+ return sw_mode == dml2_sw_linear;
+};
+
+
+bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan)
+{
+ bool is_vert = false;
+ if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
+ is_vert = true;
+ } else {
+ is_vert = false;
+ }
+ return is_vert;
+}
+
+int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode)
+{
+ int unsigned version = 0;
+
+ if (sw_mode == dml2_sw_linear ||
+ sw_mode == dml2_sw_256b_2d ||
+ sw_mode == dml2_sw_4kb_2d ||
+ sw_mode == dml2_sw_64kb_2d ||
+ sw_mode == dml2_sw_256kb_2d)
+ version = 12;
+ else if (sw_mode == dml2_gfx11_sw_linear ||
+ sw_mode == dml2_gfx11_sw_64kb_d ||
+ sw_mode == dml2_gfx11_sw_64kb_d_t ||
+ sw_mode == dml2_gfx11_sw_64kb_d_x ||
+ sw_mode == dml2_gfx11_sw_64kb_r_x ||
+ sw_mode == dml2_gfx11_sw_256kb_d_x ||
+ sw_mode == dml2_gfx11_sw_256kb_r_x)
+ version = 11;
+ else {
+ DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
+ DML_ASSERT(0);
+ }
+
+ return version;
+}
+
+unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
+{
+ unsigned int i;
+ unsigned int index = 0;
+
+ for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
+ DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
+
+ if (i == 0)
+ index = 0;
+ else
+ index = i - 1;
+
+ if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
+ per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
+ break;
+ }
+ }
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+ DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index);
+#endif
+ return index;
+}
+
+unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
+{
+ unsigned int i;
+ bool clk_entry_found = false;
+
+ for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
+ DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
+
+ if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
+ clk_entry_found = true;
+ break;
+ }
+ }
+
+ if (!clk_entry_found)
+ DML_ASSERT(clk_entry_found);
+#if defined(__DML_VBA_DEBUG__)
+ DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
+ DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i);
+#endif
+ return i;
+}
+
+bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format)
+{
+ bool ret_val = false;
+
+ if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha))
+ ret_val = true;
+
+ return ret_val;
+}
+
+unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
+{
+ if (a == 0)
+ return 0;
+
+ return (math_log2_approx(a) - subtrahend);
+}
+
+static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main,
+ const struct dml2_implicit_svp_meta *meta)
+{
+ memcpy(phantom, main, sizeof(struct dml2_stream_parameters));
+
+ phantom->timing.v_total = meta->v_total;
+ phantom->timing.v_active = meta->v_active;
+ phantom->timing.v_front_porch = meta->v_front_porch;
+ phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active;
+ phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active;
+ phantom->timing.drr_config.enabled = false;
+}
+
+static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main,
+ const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream)
+{
+ memcpy(phantom, main, sizeof(struct dml2_plane_parameters));
+
+ phantom->stream_index = phantom_stream_index;
+ phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable;
+ phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return;
+ phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2(
+ (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0),
+ (double)main->composition.viewport.plane0.height);
+ phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2(
+ (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0),
+ (double)main->composition.viewport.plane1.height);
+ phantom->immediate_flip = false;
+ phantom->dynamic_meta_data.enable = false;
+ phantom->cursor.num_cursors = 0;
+ phantom->cursor.cursor_width = 0;
+ phantom->tdlut.setup_for_tdlut = false;
+}
+
+void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg,
+ struct dml2_core_scratch *scratch)
+{
+ unsigned int stream_index, plane_index;
+ const struct dml2_plane_parameters *main_plane;
+ const struct dml2_stream_parameters *main_stream;
+ const struct dml2_stream_parameters *phantom_stream;
+
+ memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg));
+ memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES);
+ memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES);
+ memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES);
+
+ if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo)
+ return;
+
+ /* disable unbounded requesting for all planes until stage 3 has been performed */
+ if (!display_cfg->stage3.performed) {
+ svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true;
+ svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false;
+ }
+ // Create the phantom streams
+ for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) {
+ main_stream = &display_cfg->display_config.stream_descriptors[stream_index];
+ scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index;
+ scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index;
+
+ if (display_cfg->stage3.stream_svp_meta[stream_index].valid) {
+ // Create the phantom stream
+ create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams],
+ main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]);
+
+ // Associate this phantom stream to the main stream
+ scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index;
+ scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams;
+
+ // Increment num streams
+ svp_expanded_display_cfg->num_streams++;
+ }
+ }
+
+ // Create the phantom planes
+ for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) {
+ main_plane = &display_cfg->display_config.plane_descriptors[plane_index];
+
+ if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) {
+ main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index];
+ phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]];
+ create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes],
+ main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream);
+
+ // Associate this phantom plane to the main plane
+ scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index;
+ scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes;
+
+ // Increment num planes
+ svp_expanded_display_cfg->num_planes++;
+
+ // Adjust the main plane settings
+ svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe;
+ }
+ }
+}
+
+bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor)
+{
+ switch (stream_descriptor->output.output_encoder) {
+ case dml2_dp:
+ case dml2_dp2p0:
+ case dml2_edp:
+ case dml2_hdmi:
+ case dml2_hdmifrl:
+ return true;
+ case dml2_none:
+ default:
+ return false;
+ }
+}
+bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor)
+{
+ switch (stream_descriptor->output.output_encoder) {
+ case dml2_dp:
+ case dml2_dp2p0:
+ case dml2_edp:
+ case dml2_hdmifrl:
+ return true;
+ case dml2_hdmi:
+ case dml2_none:
+ default:
+ return false;
+ }
+}
+
+
+bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor)
+{
+ switch (stream_descriptor->output.output_encoder) {
+ case dml2_dp:
+ case dml2_edp:
+ return true;
+ case dml2_dp2p0:
+ case dml2_hdmi:
+ case dml2_hdmifrl:
+ case dml2_none:
+ default:
+ return false;
+ }
+}
+
+bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor)
+{
+ switch (stream_descriptor->output.output_encoder) {
+ case dml2_dp2p0:
+ return true;
+ case dml2_dp:
+ case dml2_edp:
+ case dml2_hdmi:
+ case dml2_hdmifrl:
+ case dml2_none:
+ default:
+ return false;
+ }
+}
+
+bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor)
+{
+ return dml2_core_utils_is_dio_dp_encoder(stream_descriptor)
+ || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor);
+}
+
+
+bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate)
+{
+ switch (rate) {
+ case dml2_dp_rate_hbr:
+ case dml2_dp_rate_hbr2:
+ case dml2_dp_rate_hbr3:
+ return true;
+ case dml2_dp_rate_na:
+ case dml2_dp_rate_uhbr10:
+ case dml2_dp_rate_uhbr13p5:
+ case dml2_dp_rate_uhbr20:
+ default:
+ return false;
+ }
+}
+
+bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate)
+{
+ switch (rate) {
+ case dml2_dp_rate_uhbr10:
+ case dml2_dp_rate_uhbr13p5:
+ case dml2_dp_rate_uhbr20:
+ return true;
+ case dml2_dp_rate_hbr:
+ case dml2_dp_rate_hbr2:
+ case dml2_dp_rate_hbr3:
+ case dml2_dp_rate_na:
+ default:
+ return false;
+ }
+}
+
+bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode)
+{
+ switch (odm_mode) {
+ case dml2_odm_mode_split_1to2:
+ case dml2_odm_mode_mso_1to2:
+ case dml2_odm_mode_mso_1to4:
+ return true;
+ case dml2_odm_mode_auto:
+ case dml2_odm_mode_bypass:
+ case dml2_odm_mode_combine_2to1:
+ case dml2_odm_mode_combine_3to1:
+ case dml2_odm_mode_combine_4to1:
+ default:
+ return false;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h
new file mode 100644
index 000000000000..95f0d017add4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_CORE_UTILS_H__
+#define __DML2_CORE_UTILS_H__
+#include "dml2_internal_shared_types.h"
+#include "dml2_debug.h"
+#include "lib_float_math.h"
+
+double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder);
+const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type);
+bool dml2_core_utils_is_420(enum dml2_source_format_class source_format);
+bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format);
+bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format);
+void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only);
+const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type);
+void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg);
+unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up);
+unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info);
+void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane);
+bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg);
+unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel);
+bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel);
+bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan);
+bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode);
+int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode);
+unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params);
+unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table);
+bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format);
+unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend);
+void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg,
+ struct dml2_core_scratch *scratch);
+bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor);
+bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor);
+bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor);
+bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor);
+bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor);
+bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate);
+bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate);
+bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode);
+
+#endif /* __DML2_CORE_UTILS_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
new file mode 100644
index 000000000000..22969a533a7b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
@@ -0,0 +1,785 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_dpmm_dcn4.h"
+#include "dml2_internal_shared_types.h"
+#include "dml_top_types.h"
+#include "lib_float_math.h"
+
+static double dram_bw_kbps_to_uclk_khz(unsigned long long bandwidth_kbps, const struct dml2_dram_params *dram_config)
+{
+ double uclk_khz = 0;
+ unsigned long uclk_mbytes_per_tick = 0;
+
+ uclk_mbytes_per_tick = dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock;
+
+ uclk_khz = (double)bandwidth_kbps / uclk_mbytes_per_tick;
+
+ return uclk_khz;
+}
+
+static void get_minimum_clocks_for_latency(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out,
+ double *uclk,
+ double *fclk,
+ double *dcfclk)
+{
+ int min_clock_index_for_latency;
+
+ if (in_out->display_cfg->stage3.success)
+ min_clock_index_for_latency = in_out->display_cfg->stage3.min_clk_index_for_latency;
+ else
+ min_clock_index_for_latency = in_out->display_cfg->stage1.min_clk_index_for_latency;
+
+ *dcfclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_dcfclk_khz;
+ *fclk = in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].min_fclk_khz;
+ *uclk = dram_bw_kbps_to_uclk_khz(in_out->min_clk_table->dram_bw_table.entries[min_clock_index_for_latency].pre_derate_dram_bw_kbps,
+ &in_out->soc_bb->clk_table.dram_config);
+}
+
+static unsigned long dml_round_up(double a)
+{
+ if (a - (unsigned long)a > 0) {
+ return ((unsigned long)a) + 1;
+ }
+ return (unsigned long)a;
+}
+
+static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ double min_uclk_avg, min_uclk_urgent, min_uclk_bw;
+ double min_fclk_avg, min_fclk_urgent, min_fclk_bw;
+ double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw;
+ double min_uclk_latency, min_fclk_latency, min_dcfclk_latency;
+ const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
+
+ min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100);
+
+ min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ if (in_out->display_cfg->display_config.hostvm_enable)
+ min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100);
+ else
+ min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100);
+
+ min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg;
+
+ min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100);
+
+ min_fclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100);
+
+ min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg;
+
+ min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100);
+
+ min_dcfclk_urgent = (double)mode_support_result->global.active.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100);
+
+ min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg;
+
+ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency);
+
+ in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
+ in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
+ in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency);
+}
+
+static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ double min_uclk_avg, min_uclk_urgent, min_uclk_bw;
+ double min_fclk_avg, min_fclk_urgent, min_fclk_bw;
+ double min_dcfclk_avg, min_dcfclk_urgent, min_dcfclk_bw;
+ double min_fclk_latency, min_dcfclk_latency;
+ double min_uclk_latency;
+ const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
+
+ /* assumes DF throttling is enabled */
+ min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100);
+
+ min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100);
+
+ min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg;
+
+ min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100);
+
+ min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100);
+
+ min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg;
+
+ min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100);
+
+ min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100);
+
+ min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg;
+
+ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency);
+
+ in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
+ in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
+ in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency);
+
+ /* assumes DF throttling is disabled */
+ min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100);
+
+ min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100);
+
+ min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg;
+
+ min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100);
+
+ min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100);
+
+ min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg;
+
+ min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100);
+
+ min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100);
+
+ min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg;
+
+ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency);
+
+ in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
+ in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
+ in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency);
+}
+
+static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ double min_uclk_avg;
+ double min_fclk_avg;
+ double min_dcfclk_avg;
+ double min_uclk_latency, min_fclk_latency, min_dcfclk_latency;
+ const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
+
+ min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.active.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dram_derate_percent_pixel / 100);
+
+ min_fclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.fclk_derate_percent / 100);
+
+ min_dcfclk_avg = (double)mode_support_result->global.active.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_idle_average.dcfclk_derate_percent / 100);
+
+ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency);
+
+ in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency);
+ in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency);
+ in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency);
+}
+
+static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id)
+{
+ enum dentist_divider_range {
+ DFS_DIVIDER_RANGE_1_START = 8, /* 2.00 */
+ DFS_DIVIDER_RANGE_1_STEP = 1, /* 0.25 */
+ DFS_DIVIDER_RANGE_2_START = 64, /* 16.00 */
+ DFS_DIVIDER_RANGE_2_STEP = 2, /* 0.50 */
+ DFS_DIVIDER_RANGE_3_START = 128, /* 32.00 */
+ DFS_DIVIDER_RANGE_3_STEP = 4, /* 1.00 */
+ DFS_DIVIDER_RANGE_4_START = 248, /* 62.00 */
+ DFS_DIVIDER_RANGE_4_STEP = 264, /* 66.00 */
+ DFS_DIVIDER_RANGE_SCALE_FACTOR = 4
+ };
+
+ enum DFS_base_divider_id {
+ DFS_BASE_DID_1 = 0x08,
+ DFS_BASE_DID_2 = 0x40,
+ DFS_BASE_DID_3 = 0x60,
+ DFS_BASE_DID_4 = 0x7e,
+ DFS_MAX_DID = 0x7f
+ };
+
+ unsigned int divider;
+
+ if (clock_khz < 1 || vco_freq_khz < 1 || clock_khz > vco_freq_khz)
+ return false;
+
+ clock_khz *= 1.0 + margin;
+
+ divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz));
+
+ /* we want to floor here to get higher clock than required rather than lower */
+ if (divider < DFS_DIVIDER_RANGE_2_START) {
+ if (divider < DFS_DIVIDER_RANGE_1_START)
+ *divider_id = DFS_BASE_DID_1;
+ else
+ *divider_id = DFS_BASE_DID_1 + ((divider - DFS_DIVIDER_RANGE_1_START) / DFS_DIVIDER_RANGE_1_STEP);
+ } else if (divider < DFS_DIVIDER_RANGE_3_START) {
+ *divider_id = DFS_BASE_DID_2 + ((divider - DFS_DIVIDER_RANGE_2_START) / DFS_DIVIDER_RANGE_2_STEP);
+ } else if (divider < DFS_DIVIDER_RANGE_4_START) {
+ *divider_id = DFS_BASE_DID_3 + ((divider - DFS_DIVIDER_RANGE_3_START) / DFS_DIVIDER_RANGE_3_STEP);
+ } else {
+ *divider_id = DFS_BASE_DID_4 + ((divider - DFS_DIVIDER_RANGE_4_START) / DFS_DIVIDER_RANGE_4_STEP);
+ if (*divider_id > DFS_MAX_DID)
+ *divider_id = DFS_MAX_DID;
+ }
+
+ *rounded_khz = vco_freq_khz * DFS_DIVIDER_RANGE_SCALE_FACTOR / divider;
+
+ return true;
+}
+
+static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz,
+ unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz)
+{
+ unsigned long pll_frequency_khz;
+
+ pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000));
+
+ *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32);
+
+ *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32);
+
+ if (dtbrefclk_khz > 0) {
+ *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32);
+ } else {
+ *rounded_dtbrefclk_khz = 0;
+ }
+
+ return true;
+}
+
+static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table)
+{
+ bool result = false;
+ int index = 0;
+
+ if (clock_table->num_clk_values > 2) {
+ while (index < clock_table->num_clk_values && clock_table->clk_values_khz[index] < min_value)
+ index++;
+
+ if (index < clock_table->num_clk_values) {
+ *rounded_value = clock_table->clk_values_khz[index];
+ result = true;
+ }
+ } else if (clock_table->clk_values_khz[clock_table->num_clk_values - 1] >= min_value) {
+ *rounded_value = min_value;
+ result = true;
+ }
+ return result;
+}
+
+static bool round_up_to_next_dpm(unsigned long *clock_value, const struct dml2_clk_table *clock_table)
+{
+ return round_up_and_copy_to_next_dpm(*clock_value, clock_value, clock_table);
+}
+
+static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table)
+{
+ bool result;
+
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk);
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk);
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk);
+
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk);
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk);
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk);
+
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk);
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk);
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk);
+
+ /* these clocks are optional, so they can fail to map, in which case map all to 0 */
+ if (result) {
+ if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) ||
+ !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) ||
+ !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) {
+ display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0;
+ display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0;
+ display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0;
+ }
+ }
+
+ return result;
+}
+
+static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table)
+{
+ bool result;
+ int index;
+
+ result = false;
+ for (index = 0; index < state_table->uclk.num_clk_values; index++) {
+ if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] &&
+ display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] &&
+ display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) {
+ display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index];
+ display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index];
+ display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index];
+ result = true;
+ break;
+ }
+ }
+
+ if (result) {
+ result = false;
+ for (index = 0; index < state_table->uclk.num_clk_values; index++) {
+ if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] &&
+ display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] &&
+ display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) {
+ display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index];
+ display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index];
+ display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index];
+ result = true;
+ break;
+ }
+ }
+ }
+
+ // SVP is not supported on any coarse grained SoCs
+ display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0;
+ display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0;
+ display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0;
+
+ return result;
+}
+
+static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mode_support_result, struct dml2_display_cfg_programming *display_cfg, const struct dml2_soc_state_table *state_table)
+{
+ bool result = false;
+ bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_identical = false;
+ unsigned int i;
+
+ if (!state_table || !display_cfg)
+ return false;
+
+ if (state_table->dcfclk.num_clk_values == 2) {
+ dcfclk_fine_grained = true;
+ }
+
+ if (state_table->fclk.num_clk_values == 2) {
+ fclk_fine_grained = true;
+ }
+
+ if (state_table->fclk.num_clk_values == state_table->dcfclk.num_clk_values &&
+ state_table->fclk.num_clk_values == state_table->uclk.num_clk_values) {
+ clock_state_count_identical = true;
+ }
+
+ if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_identical)
+ result = map_soc_min_clocks_to_dpm_fine_grained(display_cfg, state_table);
+ else
+ result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table);
+
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk);
+
+ for (i = 0; i < DML2_MAX_DCN_PIPES; i++) {
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk);
+ }
+
+ for (i = 0; i < display_cfg->display_config.num_streams; i++) {
+ if (result)
+ result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk);
+ if (result)
+ result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk);
+ if (result)
+ result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk);
+ }
+
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk);
+
+ if (result)
+ result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk);
+
+ return result;
+}
+
+static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask)
+{
+ unsigned int i;
+ bool identical = true;
+ bool contains_drr = false;
+ unsigned int remap_array[DML2_MAX_PLANES];
+ unsigned int remap_array_size = 0;
+
+ // Create a remap array to enable simple iteration through only masked stream indicies
+ for (i = 0; i < display_config->num_streams; i++) {
+ if (mask & (0x1 << i)) {
+ remap_array[remap_array_size++] = i;
+ }
+ }
+
+ // 0 or 1 display is always trivially synchronizable
+ if (remap_array_size <= 1)
+ return true;
+
+ // Check that all displays timings are the same
+ for (i = 1; i < remap_array_size; i++) {
+ if (memcmp(&display_config->stream_descriptors[remap_array[i - 1]].timing, &display_config->stream_descriptors[remap_array[i]].timing, sizeof(struct dml2_timing_cfg))) {
+ identical = false;
+ break;
+ }
+ }
+
+ // Check if any displays are drr
+ for (i = 0; i < remap_array_size; i++) {
+ if (display_config->stream_descriptors[remap_array[i]].timing.drr_config.enabled) {
+ contains_drr = true;
+ break;
+ }
+ }
+
+ // Trivial sync is possible if all displays are identical and none are DRR
+ return !contains_drr && identical;
+}
+
+static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask)
+{
+ unsigned int i;
+ int min_idle_us = 0;
+ unsigned int remap_array[DML2_MAX_PLANES];
+ unsigned int remap_array_size = 0;
+ const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
+
+ // Create a remap array to enable simple iteration through only masked stream indicies
+ for (i = 0; i < in_out->programming->display_config.num_streams; i++) {
+ if (mask & (0x1 << i)) {
+ remap_array[remap_array_size++] = i;
+ }
+ }
+
+ if (remap_array_size == 0)
+ return 0;
+
+ min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[0]].vblank_reserved_time_us;
+
+ for (i = 1; i < remap_array_size; i++) {
+ if (min_idle_us > mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us)
+ min_idle_us = mode_support_result->cfg_support_info.stream_support_info[remap_array[i]].vblank_reserved_time_us;
+ }
+
+ return min_idle_us;
+}
+
+static bool determine_power_management_features_with_vblank_only(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ int min_idle_us;
+
+ if (are_timings_trivially_synchronizable(&in_out->programming->display_config, 0xF)) {
+ min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xF);
+
+ if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)
+ in_out->programming->uclk_pstate_supported = true;
+
+ if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)
+ in_out->programming->fclk_pstate_supported = true;
+ }
+
+ return true;
+}
+
+static int get_displays_without_vactive_margin_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us)
+{
+ unsigned int i;
+ int displays_without_vactive_margin_mask = 0x0;
+ const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
+
+ for (i = 0; i < in_out->programming->display_config.num_planes; i++) {
+ if (mode_support_result->cfg_support_info.plane_support_info[i].active_latency_hiding_us
+ < latency_hiding_requirement_us)
+ displays_without_vactive_margin_mask |= (0x1 << i);
+ }
+
+ return displays_without_vactive_margin_mask;
+}
+
+static int get_displays_with_fams_mask(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int latency_hiding_requirement_us)
+{
+ unsigned int i;
+ int displays_with_fams_mask = 0x0;
+
+ for (i = 0; i < in_out->programming->display_config.num_planes; i++) {
+ if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config != dml2_svp_mode_override_auto)
+ displays_with_fams_mask |= (0x1 << i);
+ }
+
+ return displays_with_fams_mask;
+}
+
+static bool determine_power_management_features_with_vactive_and_vblank(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ int displays_without_vactive_margin_mask = 0x0;
+ int min_idle_us = 0;
+
+ if (in_out->programming->uclk_pstate_supported == false) {
+ displays_without_vactive_margin_mask =
+ get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us));
+
+ if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) {
+ min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask);
+
+ if (min_idle_us >= in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us)
+ in_out->programming->uclk_pstate_supported = true;
+ }
+ }
+
+ if (in_out->programming->fclk_pstate_supported == false) {
+ displays_without_vactive_margin_mask =
+ get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us));
+
+ if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) {
+ min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask);
+
+ if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)
+ in_out->programming->fclk_pstate_supported = true;
+ }
+ }
+
+ return true;
+}
+
+static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ int displays_without_vactive_margin_mask = 0x0;
+ int displays_without_fams_mask = 0x0;
+
+ displays_without_vactive_margin_mask =
+ get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us));
+
+ displays_without_fams_mask =
+ get_displays_with_fams_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.dram_clk_change_blackout_us));
+
+ if ((displays_without_vactive_margin_mask & ~displays_without_fams_mask) == 0)
+ in_out->programming->uclk_pstate_supported = true;
+
+ return true;
+}
+
+static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1];
+ in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1];
+ in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1];
+}
+
+static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1];
+ in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1];
+}
+
+static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ int i;
+ bool result;
+ double dispclk_khz;
+ const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
+
+ calculate_system_active_minimums(in_out);
+ calculate_svp_prefetch_minimums(in_out);
+ calculate_idle_minimums(in_out);
+
+ // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore
+ // active minimums must be boosted to prefetch minimums
+ if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz)
+ in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz;
+
+ if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz)
+ in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz;
+
+ if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz)
+ in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz;
+
+ // need some massaging for the dispclk ramping cases:
+ dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0);
+ // ramping margin should not make dispclk exceed the maximum dispclk speed:
+ dispclk_khz = math_min2(dispclk_khz, in_out->min_clk_table->max_clocks_khz.dispclk);
+ // but still the required dispclk can be more than the maximum dispclk speed:
+ dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0));
+
+ // DPP Ref is always set to max of all DPP clocks
+ for (i = 0; i < DML2_MAX_DCN_PIPES; i++) {
+ if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz)
+ in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz;
+ }
+ in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0));
+
+ // DTB Ref is always set to max of all DTB clocks
+ for (i = 0; i < DML2_MAX_DCN_PIPES; i++) {
+ if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz)
+ in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz;
+ }
+ in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0));
+
+ if (in_out->soc_bb->no_dfs) {
+ round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz,
+ &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz);
+ } else {
+ add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0,
+ (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did);
+
+ add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0,
+ (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did);
+
+ add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0,
+ (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did);
+ }
+
+
+ for (i = 0; i < DML2_MAX_DCN_PIPES; i++) {
+ in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0
+ * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0));
+ }
+
+ in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz;
+ in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz;
+
+ result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table);
+
+ // By default, all power management features are not enabled
+ in_out->programming->fclk_pstate_supported = false;
+ in_out->programming->uclk_pstate_supported = false;
+
+ return result;
+}
+
+bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ bool result;
+
+ result = map_mode_to_soc_dpm(in_out);
+
+ // Check if any can be enabled by nominal vblank idle time
+ determine_power_management_features_with_vblank_only(in_out);
+
+ // Check if any can be enabled in vactive/vblank
+ determine_power_management_features_with_vactive_and_vblank(in_out);
+
+ // Check if any can be enabled via fams
+ determine_power_management_features_with_fams(in_out);
+
+ if (in_out->programming->uclk_pstate_supported == false)
+ clamp_uclk_to_max(in_out);
+
+ if (in_out->programming->fclk_pstate_supported == false)
+ clamp_fclk_to_max(in_out);
+
+ return result;
+}
+
+bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ bool result;
+ int displays_without_vactive_margin_mask = 0x0;
+ int min_idle_us = 0;
+
+ result = map_mode_to_soc_dpm(in_out);
+
+ if (in_out->display_cfg->stage3.success)
+ in_out->programming->uclk_pstate_supported = true;
+
+ displays_without_vactive_margin_mask =
+ get_displays_without_vactive_margin_mask(in_out, (int)(in_out->soc_bb->power_management_parameters.fclk_change_blackout_us));
+
+ if (displays_without_vactive_margin_mask == 0) {
+ in_out->programming->fclk_pstate_supported = true;
+ } else {
+ if (are_timings_trivially_synchronizable(&in_out->programming->display_config, displays_without_vactive_margin_mask)) {
+ min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, displays_without_vactive_margin_mask);
+
+ if (min_idle_us >= in_out->soc_bb->power_management_parameters.fclk_change_blackout_us)
+ in_out->programming->fclk_pstate_supported = true;
+ }
+ }
+
+ if (in_out->programming->uclk_pstate_supported == false)
+ clamp_uclk_to_max(in_out);
+
+ if (in_out->programming->fclk_pstate_supported == false)
+ clamp_fclk_to_max(in_out);
+
+ min_idle_us = find_smallest_idle_time_in_vblank_us(in_out, 0xFF);
+ if (in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 &&
+ min_idle_us >= in_out->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us)
+ in_out->programming->stutter.supported_in_blank = true;
+ else
+ in_out->programming->stutter.supported_in_blank = false;
+
+ // TODO: Fix me Sam
+ if (in_out->soc_bb->power_management_parameters.z8_min_idle_time > 0 &&
+ in_out->programming->informative.power_management.z8.stutter_period >= in_out->soc_bb->power_management_parameters.z8_min_idle_time)
+ in_out->programming->z8_stutter.meets_eco = true;
+ else
+ in_out->programming->z8_stutter.meets_eco = false;
+
+ if (in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 &&
+ min_idle_us >= in_out->soc_bb->power_management_parameters.z8_stutter_exit_latency_us)
+ in_out->programming->z8_stutter.supported_in_blank = true;
+ else
+ in_out->programming->z8_stutter.supported_in_blank = false;
+
+ return result;
+}
+
+bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out)
+{
+ const struct dml2_display_cfg *display_cfg = &in_out->display_cfg->display_config;
+ const struct dml2_core_internal_display_mode_lib *mode_lib = &in_out->core->clean_me_up.mode_lib;
+ struct dml2_dchub_global_register_set *dchubbub_regs = &in_out->programming->global_regs;
+
+ double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
+
+ /* set A */
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
+
+ /* set B */
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
+
+ dchubbub_regs->num_watermark_sets = 2;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h
new file mode 100644
index 000000000000..e7b58f2efda4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_DPMM_DCN4_H__
+#define __DML2_DPMM_DCN4_H__
+
+#include "dml2_internal_shared_types.h"
+
+bool dpmm_dcn3_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out);
+bool dpmm_dcn4_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out);
+bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c
new file mode 100644
index 000000000000..dfd01440737d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_dpmm_factory.h"
+#include "dml2_dpmm_dcn4.h"
+#include "dml2_external_lib_deps.h"
+
+static bool dummy_map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
+{
+ return true;
+}
+
+static bool dummy_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_out)
+{
+ return true;
+}
+
+bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out)
+{
+ bool result = false;
+
+ if (!out)
+ return false;
+
+ memset(out, 0, sizeof(struct dml2_dpmm_instance));
+
+ switch (project_id) {
+ case dml2_project_dcn4x_stage1:
+ out->map_mode_to_soc_dpm = &dummy_map_mode_to_soc_dpm;
+ out->map_watermarks = &dummy_map_watermarks;
+ result = true;
+ break;
+ case dml2_project_dcn4x_stage2:
+ out->map_mode_to_soc_dpm = &dpmm_dcn3_map_mode_to_soc_dpm;
+ out->map_watermarks = &dummy_map_watermarks;
+ result = true;
+ break;
+ case dml2_project_dcn4x_stage2_auto_drr_svp:
+ out->map_mode_to_soc_dpm = &dpmm_dcn4_map_mode_to_soc_dpm;
+ out->map_watermarks = &dpmm_dcn4_map_watermarks;
+ result = true;
+ break;
+ case dml2_project_invalid:
+ default:
+ break;
+ }
+
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h
new file mode 100644
index 000000000000..20ba2e446f1d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_DPMM_FACTORY_H__
+#define __DML2_DPMM_FACTORY_H__
+
+#include "dml2_internal_shared_types.h"
+#include "dml_top_types.h"
+
+bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance *out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c
new file mode 100644
index 000000000000..a265f254152c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_mcg_dcn4.h"
+#include "dml_top_soc_parameter_types.h"
+
+static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table);
+
+bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out)
+{
+ return build_min_clock_table(in_out->soc_bb, in_out->min_clk_table);
+}
+
+static unsigned long long uclk_to_dram_bw_kbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config)
+{
+ unsigned long long bw_kbps = 0;
+
+ bw_kbps = (unsigned long long) uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock;
+
+ return bw_kbps;
+}
+
+static unsigned long round_up_to_quantized_values(unsigned long value, const unsigned long *quantized_values, int num_quantized_values)
+{
+ int i;
+
+ if (!quantized_values)
+ return 0;
+
+ for (i = 0; i < num_quantized_values; i++) {
+ if (quantized_values[i] > value)
+ return quantized_values[i];
+ }
+
+ return 0;
+}
+
+static bool build_min_clk_table_fine_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table)
+{
+ bool dcfclk_fine_grained = false, fclk_fine_grained = false;
+
+ int i;
+ unsigned int j;
+
+ unsigned long min_dcfclk_khz = 0;
+ unsigned long min_fclk_khz = 0;
+ unsigned long prev_100, cur_50;
+
+ if (soc_bb->clk_table.dcfclk.num_clk_values == 2) {
+ dcfclk_fine_grained = true;
+ }
+
+ if (soc_bb->clk_table.fclk.num_clk_values == 2) {
+ fclk_fine_grained = true;
+ }
+
+ min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[0];
+ min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[0];
+
+ // First calculate the table for "balanced" bandwidths across UCLK/FCLK
+ for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) {
+ min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config);
+
+ min_table->dram_bw_table.entries[i].min_fclk_khz = (unsigned long)((((double)min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps * soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100) / ((double)soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100)) / soc_bb->fabric_datapath_to_dcn_data_return_bytes);
+ }
+ min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values;
+
+ // To create the minium table, effectively shift "up" all the dcfclk/fclk entries by 1, and then replace the lowest entry with min fclk/dcfclk
+ for (i = min_table->dram_bw_table.num_entries - 1; i > 0; i--) {
+ prev_100 = min_table->dram_bw_table.entries[i - 1].min_fclk_khz;
+ cur_50 = min_table->dram_bw_table.entries[i].min_fclk_khz / 2;
+ min_table->dram_bw_table.entries[i].min_fclk_khz = prev_100 > cur_50 ? prev_100 : cur_50;
+
+ if (!fclk_fine_grained) {
+ min_table->dram_bw_table.entries[i].min_fclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_fclk_khz, soc_bb->clk_table.fclk.clk_values_khz, soc_bb->clk_table.fclk.num_clk_values);
+ }
+ }
+ min_table->dram_bw_table.entries[0].min_fclk_khz /= 2;
+
+ // Clamp to minimums and maximums
+ for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) {
+ if (min_table->dram_bw_table.entries[i].min_dcfclk_khz < min_dcfclk_khz)
+ min_table->dram_bw_table.entries[i].min_dcfclk_khz = min_dcfclk_khz;
+
+ if (min_table->dram_bw_table.entries[i].min_fclk_khz < min_fclk_khz)
+ min_table->dram_bw_table.entries[i].min_fclk_khz = min_fclk_khz;
+
+ if (soc_bb->max_fclk_for_uclk_dpm_khz > 0 &&
+ min_table->dram_bw_table.entries[i].min_fclk_khz > soc_bb->max_fclk_for_uclk_dpm_khz)
+ min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->max_fclk_for_uclk_dpm_khz;
+
+ min_table->dram_bw_table.entries[i].min_dcfclk_khz =
+ min_table->dram_bw_table.entries[i].min_fclk_khz *
+ soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent;
+
+ min_table->dram_bw_table.entries[i].min_dcfclk_khz =
+ min_table->dram_bw_table.entries[i].min_dcfclk_khz * soc_bb->fabric_datapath_to_dcn_data_return_bytes / soc_bb->return_bus_width_bytes;
+
+ if (!dcfclk_fine_grained) {
+ min_table->dram_bw_table.entries[i].min_dcfclk_khz = round_up_to_quantized_values(min_table->dram_bw_table.entries[i].min_dcfclk_khz, soc_bb->clk_table.dcfclk.clk_values_khz, soc_bb->clk_table.dcfclk.num_clk_values);
+ }
+ }
+
+ // Prune states which are invalid (some clocks exceed maximum)
+ for (i = 0; i < (int)min_table->dram_bw_table.num_entries; i++) {
+ if (min_table->dram_bw_table.entries[i].min_dcfclk_khz > min_table->max_clocks_khz.dcfclk ||
+ min_table->dram_bw_table.entries[i].min_fclk_khz > min_table->max_clocks_khz.fclk) {
+ min_table->dram_bw_table.num_entries = i;
+ break;
+ }
+ }
+
+ // Prune duplicate states
+ for (i = 0; i < (int)min_table->dram_bw_table.num_entries - 1; i++) {
+ if (min_table->dram_bw_table.entries[i].min_dcfclk_khz == min_table->dram_bw_table.entries[i + 1].min_dcfclk_khz &&
+ min_table->dram_bw_table.entries[i].min_fclk_khz == min_table->dram_bw_table.entries[i + 1].min_fclk_khz &&
+ min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps == min_table->dram_bw_table.entries[i + 1].pre_derate_dram_bw_kbps) {
+
+ // i + 1 is the same state as i, so shift everything
+ for (j = i + 1; j < min_table->dram_bw_table.num_entries; j++) {
+ min_table->dram_bw_table.entries[j].min_dcfclk_khz = min_table->dram_bw_table.entries[j + 1].min_dcfclk_khz;
+ min_table->dram_bw_table.entries[j].min_fclk_khz = min_table->dram_bw_table.entries[j + 1].min_fclk_khz;
+ min_table->dram_bw_table.entries[j].pre_derate_dram_bw_kbps = min_table->dram_bw_table.entries[j + 1].pre_derate_dram_bw_kbps;
+ }
+ min_table->dram_bw_table.num_entries--;
+ }
+ }
+
+ return true;
+}
+
+static bool build_min_clk_table_coarse_grained(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table)
+{
+ int i;
+
+ for (i = 0; i < soc_bb->clk_table.uclk.num_clk_values; i++) {
+ min_table->dram_bw_table.entries[i].pre_derate_dram_bw_kbps = uclk_to_dram_bw_kbps(soc_bb->clk_table.uclk.clk_values_khz[i], &soc_bb->clk_table.dram_config);
+ min_table->dram_bw_table.entries[i].min_dcfclk_khz = soc_bb->clk_table.dcfclk.clk_values_khz[i];
+ min_table->dram_bw_table.entries[i].min_fclk_khz = soc_bb->clk_table.fclk.clk_values_khz[i];
+ }
+ min_table->dram_bw_table.num_entries = soc_bb->clk_table.uclk.num_clk_values;
+
+ return true;
+}
+
+static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_mcg_min_clock_table *min_table)
+{
+ bool result;
+ bool dcfclk_fine_grained = false, fclk_fine_grained = false, clock_state_count_equal = false;
+
+ if (!soc_bb || !min_table)
+ return false;
+
+ if (soc_bb->clk_table.dcfclk.num_clk_values < 2 || soc_bb->clk_table.fclk.num_clk_values < 2)
+ return false;
+
+ if (soc_bb->clk_table.uclk.num_clk_values > DML_MCG_MAX_CLK_TABLE_SIZE)
+ return false;
+
+ if (soc_bb->clk_table.dcfclk.num_clk_values == 2) {
+ dcfclk_fine_grained = true;
+ }
+
+ if (soc_bb->clk_table.fclk.num_clk_values == 2) {
+ fclk_fine_grained = true;
+ }
+
+ if (soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.dcfclk.num_clk_values &&
+ soc_bb->clk_table.fclk.num_clk_values == soc_bb->clk_table.uclk.num_clk_values)
+ clock_state_count_equal = true;
+
+ min_table->fixed_clocks_khz.amclk = 0;
+ min_table->fixed_clocks_khz.dprefclk = soc_bb->dprefclk_mhz * 1000;
+ min_table->fixed_clocks_khz.pcierefclk = soc_bb->pcie_refclk_mhz * 1000;
+ min_table->fixed_clocks_khz.dchubrefclk = soc_bb->dchub_refclk_mhz * 1000;
+ min_table->fixed_clocks_khz.xtalclk = soc_bb->xtalclk_mhz * 1000;
+
+ min_table->max_clocks_khz.dispclk = soc_bb->clk_table.dispclk.clk_values_khz[soc_bb->clk_table.dispclk.num_clk_values - 1];
+ min_table->max_clocks_khz.dppclk = soc_bb->clk_table.dppclk.clk_values_khz[soc_bb->clk_table.dppclk.num_clk_values - 1];
+ min_table->max_clocks_khz.dscclk = soc_bb->clk_table.dscclk.clk_values_khz[soc_bb->clk_table.dscclk.num_clk_values - 1];
+ min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1];
+ min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1];
+
+ min_table->max_ss_clocks_khz.dispclk = (unsigned int)((double)min_table->max_clocks_khz.dispclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0));
+ min_table->max_ss_clocks_khz.dppclk = (unsigned int)((double)min_table->max_clocks_khz.dppclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0));
+ min_table->max_ss_clocks_khz.dtbclk = (unsigned int)((double)min_table->max_clocks_khz.dtbclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0));
+
+ min_table->max_clocks_khz.dcfclk = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1];
+ min_table->max_clocks_khz.fclk = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1];
+
+ if (dcfclk_fine_grained || fclk_fine_grained || !clock_state_count_equal)
+ result = build_min_clk_table_fine_grained(soc_bb, min_table);
+ else
+ result = build_min_clk_table_coarse_grained(soc_bb, min_table);
+
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h
new file mode 100644
index 000000000000..02da6f45cbf7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_MCG_DCN4_H__
+#define __DML2_MCG_DCN4_H__
+
+#include "dml2_internal_shared_types.h"
+
+bool mcg_dcn4_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out);
+bool mcg_dcn4_unit_test(void);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c
new file mode 100644
index 000000000000..c60b8fe90819
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_mcg_factory.h"
+#include "dml2_mcg_dcn4.h"
+#include "dml2_external_lib_deps.h"
+
+static bool dummy_build_min_clock_table(struct dml2_mcg_build_min_clock_table_params_in_out *in_out)
+{
+ return true;
+}
+
+bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out)
+{
+ bool result = false;
+
+ if (!out)
+ return false;
+
+ memset(out, 0, sizeof(struct dml2_mcg_instance));
+
+ switch (project_id) {
+ case dml2_project_dcn4x_stage1:
+ out->build_min_clock_table = &dummy_build_min_clock_table;
+ result = true;
+ break;
+ case dml2_project_dcn4x_stage2:
+ case dml2_project_dcn4x_stage2_auto_drr_svp:
+ out->build_min_clock_table = &mcg_dcn4_build_min_clock_table;
+ result = true;
+ break;
+ case dml2_project_invalid:
+ default:
+ break;
+ }
+
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h
new file mode 100644
index 000000000000..ad307deca3b0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_MCG_FACTORY_H__
+#define __DML2_MCG_FACTORY_H__
+
+#include "dml2_internal_shared_types.h"
+#include "dml_top_types.h"
+
+bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance *out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
new file mode 100644
index 000000000000..1b9579a32ff2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
@@ -0,0 +1,706 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_pmo_factory.h"
+#include "dml2_pmo_dcn3.h"
+
+static void sort(double *list_a, int list_a_size)
+{
+ // For all elements b[i] in list_b[]
+ for (int i = 0; i < list_a_size - 1; i++) {
+ // Find the first element of list_a that's larger than b[i]
+ for (int j = i; j < list_a_size - 1; j++) {
+ if (list_a[j] > list_a[j + 1])
+ swap(list_a[j], list_a[j + 1]);
+ }
+ }
+}
+
+static double get_max_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index)
+{
+ struct dml2_plane_parameters *plane_descriptor;
+ long max_reserved_time_ns = 0;
+
+ for (unsigned int i = 0; i < config->display_config.num_planes; i++) {
+ plane_descriptor = &config->display_config.plane_descriptors[i];
+
+ if (plane_descriptor->stream_index == stream_index)
+ if (plane_descriptor->overrides.reserved_vblank_time_ns > max_reserved_time_ns)
+ max_reserved_time_ns = plane_descriptor->overrides.reserved_vblank_time_ns;
+ }
+
+ return (max_reserved_time_ns / 1000.0);
+}
+
+
+static void set_reserved_time_on_all_planes_with_stream_index(struct display_configuation_with_meta *config, unsigned int stream_index, double reserved_time_us)
+{
+ struct dml2_plane_parameters *plane_descriptor;
+
+ for (unsigned int i = 0; i < config->display_config.num_planes; i++) {
+ plane_descriptor = &config->display_config.plane_descriptors[i];
+
+ if (plane_descriptor->stream_index == stream_index)
+ plane_descriptor->overrides.reserved_vblank_time_ns = (long int)(reserved_time_us * 1000);
+ }
+}
+
+static void remove_duplicates(double *list_a, int *list_a_size)
+{
+ int j = 0;
+
+ if (*list_a_size == 0)
+ return;
+
+ for (int i = 1; i < *list_a_size; i++) {
+ if (list_a[j] != list_a[i]) {
+ j++;
+ list_a[j] = list_a[i];
+ }
+ }
+
+ *list_a_size = j + 1;
+}
+
+static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit)
+{
+ if (*mpc_combine_factor < limit) {
+ (*mpc_combine_factor)++;
+ return true;
+ }
+
+ return false;
+}
+
+static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out,
+ int free_pipes)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ unsigned int i;
+ bool result = true;
+
+ for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) {
+ // For pipes that failed dcc mcache check, we want to increase the pipe count.
+ // The logic for doing this depends on how many pipes is already being used,
+ // and whether it's mpcc or odm combine.
+ if (!in_out->dcc_mcache_supported[i]) {
+ // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1
+ if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) {
+ in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor =
+ in_out->cfg_support_info->plane_support_info[i].dpps_used;
+ // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit.
+ if (free_pipes > 0) {
+ if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor,
+ pmo->mpc_combine_limit)) {
+ // We've reached max pipes allocatable to a single plane, so we fail.
+ result = false;
+ break;
+ } else {
+ // Successfully added another pipe to this failing plane.
+ free_pipes--;
+ }
+ } else {
+ // No free pipes to add.
+ result = false;
+ break;
+ }
+ } else {
+ // If the stream of this plane needs ODM combine, no further optimization can be done.
+ result = false;
+ break;
+ }
+ }
+ }
+
+ return result;
+}
+
+static bool iterate_to_next_candidiate(struct dml2_pmo_instance *pmo, int size)
+{
+ int borrow_from, i;
+ bool success = false;
+
+ if (pmo->scratch.pmo_dcn3.current_candidate[0] > 0) {
+ pmo->scratch.pmo_dcn3.current_candidate[0]--;
+ success = true;
+ } else {
+ for (borrow_from = 1; borrow_from < size && pmo->scratch.pmo_dcn3.current_candidate[borrow_from] == 0; borrow_from++)
+ ;
+
+ if (borrow_from < size) {
+ pmo->scratch.pmo_dcn3.current_candidate[borrow_from]--;
+ for (i = 0; i < borrow_from; i++) {
+ pmo->scratch.pmo_dcn3.current_candidate[i] = pmo->scratch.pmo_dcn3.reserved_time_candidates_count[i] - 1;
+ }
+
+ success = true;
+ }
+ }
+
+ return success;
+}
+
+static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated)
+{
+ bool result = true;
+
+ if (*odm_mode == dml2_odm_mode_auto) {
+ switch (odms_calculated) {
+ case 1:
+ *odm_mode = dml2_odm_mode_bypass;
+ break;
+ case 2:
+ *odm_mode = dml2_odm_mode_combine_2to1;
+ break;
+ case 3:
+ *odm_mode = dml2_odm_mode_combine_3to1;
+ break;
+ case 4:
+ *odm_mode = dml2_odm_mode_combine_4to1;
+ break;
+ default:
+ result = false;
+ break;
+ }
+ }
+
+ if (result) {
+ if (*odm_mode == dml2_odm_mode_bypass) {
+ *odm_mode = dml2_odm_mode_combine_2to1;
+ } else if (*odm_mode == dml2_odm_mode_combine_2to1) {
+ *odm_mode = dml2_odm_mode_combine_3to1;
+ } else if (*odm_mode == dml2_odm_mode_combine_3to1) {
+ *odm_mode = dml2_odm_mode_combine_4to1;
+ } else {
+ result = false;
+ }
+ }
+
+ return result;
+}
+
+static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index)
+{
+ unsigned int i, count;
+
+ count = 0;
+ for (i = 0; i < display_cfg->num_planes; i++) {
+ if (display_cfg->plane_descriptors[i].stream_index == stream_index)
+ count++;
+ }
+
+ return count;
+}
+
+static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask)
+{
+ unsigned int i;
+ bool identical = true;
+ bool contains_drr = false;
+ unsigned int remap_array[DML2_MAX_PLANES];
+ unsigned int remap_array_size = 0;
+
+ // Create a remap array to enable simple iteration through only masked stream indicies
+ for (i = 0; i < display_config->display_config.num_streams; i++) {
+ if (mask & (0x1 << i)) {
+ remap_array[remap_array_size++] = i;
+ }
+ }
+
+ // 0 or 1 display is always trivially synchronizable
+ if (remap_array_size <= 1)
+ return true;
+
+ for (i = 1; i < remap_array_size; i++) {
+ if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing,
+ &display_config->display_config.stream_descriptors[remap_array[i]].timing,
+ sizeof(struct dml2_timing_cfg))) {
+ identical = false;
+ break;
+ }
+ }
+
+ for (i = 0; i < remap_array_size; i++) {
+ if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) {
+ contains_drr = true;
+ break;
+ }
+ }
+
+ return !contains_drr && identical;
+}
+
+bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ pmo->soc_bb = in_out->soc_bb;
+ pmo->ip_caps = in_out->ip_caps;
+ pmo->mpc_combine_limit = 2;
+ pmo->odm_combine_limit = 4;
+ pmo->mcg_clock_table_size = in_out->mcg_clock_table_size;
+
+ pmo->options = in_out->options;
+
+ return true;
+}
+
+static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator)
+{
+ /*
+ * Htotal, Hblank start/end, and Hsync start/end all must be divisible
+ * in order for the horizontal timing params to be considered divisible
+ * by 2. Hsync start is always 0.
+ */
+ unsigned long h_blank_start = timing->h_total - timing->h_front_porch;
+
+ return (timing->h_total % denominator == 0) &&
+ (h_blank_start % denominator == 0) &&
+ (timing->h_blank_end % denominator == 0) &&
+ (timing->h_sync_width % denominator == 0);
+}
+
+static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type)
+{
+ switch (encoder_type) {
+ case dml2_dp:
+ case dml2_edp:
+ case dml2_dp2p0:
+ case dml2_none:
+ return true;
+ case dml2_hdmi:
+ case dml2_hdmifrl:
+ default:
+ return false;
+ }
+}
+
+bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out)
+{
+ unsigned int i;
+ const struct dml2_display_cfg *display_config =
+ &in_out->base_display_config->display_config;
+ const struct dml2_core_mode_support_result *mode_support_result =
+ &in_out->base_display_config->mode_support_result;
+
+ if (in_out->instance->options->disable_dyn_odm ||
+ (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1))
+ return false;
+
+ for (i = 0; i < display_config->num_planes; i++)
+ /*
+ * vmin optimization is required to be seamlessly switched off
+ * at any time when the new configuration is no longer
+ * supported. However switching from ODM combine to MPC combine
+ * is not always seamless. When there not enough free pipes, we
+ * will have to use the same secondary OPP heads as secondary
+ * DPP pipes in MPC combine in new state. This transition is
+ * expected to cause glitches. To avoid the transition, we only
+ * allow vmin optimization if the stream's base configuration
+ * doesn't require MPC combine. This condition checks if MPC
+ * combine is enabled. If so do not optimize the stream.
+ */
+ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 &&
+ mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1)
+ in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true;
+
+ for (i = 0; i < display_config->num_streams; i++) {
+ if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm)
+ in_out->base_display_config->stage4.unoptimizable_streams[i] = true;
+ else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid &&
+ in_out->instance->options->disable_dyn_odm_for_stream_with_svp)
+ in_out->base_display_config->stage4.unoptimizable_streams[i] = true;
+ /*
+ * ODM Combine requires horizontal timing divisible by 2 so each
+ * ODM segment has the same size.
+ */
+ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2))
+ in_out->base_display_config->stage4.unoptimizable_streams[i] = true;
+ /*
+ * Our hardware support seamless ODM transitions for DP encoders
+ * only.
+ */
+ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder))
+ in_out->base_display_config->stage4.unoptimizable_streams[i] = true;
+ }
+
+ return true;
+}
+
+bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out)
+{
+ bool is_vmin = true;
+
+ if (in_out->vmin_limits->dispclk_khz > 0 &&
+ in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz)
+ is_vmin = false;
+
+ return is_vmin;
+}
+
+static int find_highest_odm_load_stream_index(
+ const struct dml2_display_cfg *display_config,
+ const struct dml2_core_mode_support_result *mode_support_result)
+{
+ unsigned int i;
+ int odm_load, highest_odm_load = -1, highest_odm_load_index = -1;
+
+ for (i = 0; i < display_config->num_streams; i++) {
+ if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0)
+ odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz
+ / mode_support_result->cfg_support_info.stream_support_info[i].odms_used;
+ else
+ odm_load = 0;
+
+ if (odm_load > highest_odm_load) {
+ highest_odm_load_index = i;
+ highest_odm_load = odm_load;
+ }
+ }
+
+ return highest_odm_load_index;
+}
+
+bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out)
+{
+ int stream_index;
+ const struct dml2_display_cfg *display_config =
+ &in_out->base_display_config->display_config;
+ const struct dml2_core_mode_support_result *mode_support_result =
+ &in_out->base_display_config->mode_support_result;
+ unsigned int odms_used;
+ struct dml2_stream_parameters *stream_descriptor;
+ bool optimizable = false;
+
+ /*
+ * highest odm load stream must be optimizable to continue as dispclk is
+ * bounded by it.
+ */
+ stream_index = find_highest_odm_load_stream_index(display_config,
+ mode_support_result);
+
+ if (stream_index < 0 ||
+ in_out->base_display_config->stage4.unoptimizable_streams[stream_index])
+ return false;
+
+ odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used;
+ if ((int)odms_used >= in_out->instance->odm_combine_limit)
+ return false;
+
+ memcpy(in_out->optimized_display_config,
+ in_out->base_display_config,
+ sizeof(struct display_configuation_with_meta));
+
+ stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index];
+ while (!optimizable && increase_odm_combine_factor(
+ &stream_descriptor->overrides.odm_mode,
+ odms_used)) {
+ switch (stream_descriptor->overrides.odm_mode) {
+ case dml2_odm_mode_combine_2to1:
+ optimizable = true;
+ break;
+ case dml2_odm_mode_combine_3to1:
+ /*
+ * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of
+ * actual pixel rate. Therefore horizontal timing must
+ * be divisible by 4.
+ */
+ if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) {
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) {
+ /*
+ * DSC h slice count must be divisible
+ * by 3.
+ */
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0)
+ optimizable = true;
+ } else {
+ optimizable = true;
+ }
+ }
+ break;
+ case dml2_odm_mode_combine_4to1:
+ /*
+ * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of
+ * actual pixel rate. Therefore horizontal timing must
+ * be divisible by 4.
+ */
+ if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) {
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) {
+ /*
+ * DSC h slice count must be divisible
+ * by 4.
+ */
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0)
+ optimizable = true;
+ } else {
+ optimizable = true;
+ }
+ }
+ break;
+ case dml2_odm_mode_auto:
+ case dml2_odm_mode_bypass:
+ case dml2_odm_mode_split_1to2:
+ case dml2_odm_mode_mso_1to2:
+ case dml2_odm_mode_mso_1to4:
+ default:
+ break;
+ }
+ }
+
+ return optimizable;
+}
+
+bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ unsigned int i, used_pipes, free_pipes, planes_on_stream;
+ bool result;
+
+ if (in_out->display_config != in_out->optimized_display_cfg) {
+ memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg));
+ }
+
+ //Count number of free pipes, and check if any odm combine is in use.
+ used_pipes = 0;
+ for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) {
+ used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used;
+ }
+ free_pipes = pmo->ip_caps->pipe_count - used_pipes;
+
+ // Optimization loop
+ // The goal here is to add more pipes to any planes
+ // which are failing mcache admissibility
+ result = true;
+
+ // The optimization logic depends on whether ODM combine is enabled, and the stream count.
+ if (in_out->optimized_display_cfg->num_streams > 1) {
+ // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes
+ // which are not ODM combined.
+
+ result = optimize_dcc_mcache_no_odm(in_out, free_pipes);
+ } else if (in_out->optimized_display_cfg->num_streams == 1) {
+ // In single stream cases, we still optimize mcache failures when there's ODM combine with some
+ // additional logic.
+
+ if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) {
+ // If ODM combine is enabled, then the logic is to increase ODM combine factor.
+
+ // Optimization for streams with > 1 ODM combine factor is only supported for single display.
+ planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0);
+
+ for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) {
+ // For pipes that failed dcc mcache check, we want to increase the pipe count.
+ // The logic for doing this depends on how many pipes is already being used,
+ // and whether it's mpcc or odm combine.
+ if (!in_out->dcc_mcache_supported[i]) {
+ // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream.
+ if (free_pipes >= planes_on_stream) {
+ if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode,
+ in_out->cfg_support_info->plane_support_info[i].dpps_used)) {
+ result = false;
+ } else {
+ break;
+ }
+ } else {
+ result = false;
+ break;
+ }
+ }
+ }
+ } else {
+ // If ODM combine is not enabled, then we can actually use the same logic as before.
+
+ result = optimize_dcc_mcache_no_odm(in_out, free_pipes);
+ }
+ } else {
+ result = true;
+ }
+
+ return result;
+}
+
+bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+ struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3;
+ const struct dml2_stream_parameters *stream_descriptor;
+ const struct dml2_plane_parameters *plane_descriptor;
+ unsigned int stream_index, plane_index, candidate_count;
+ double min_reserved_vblank_time = 0;
+ int fclk_twait_needed_mask = 0x0;
+ int uclk_twait_needed_mask = 0x0;
+
+ state->performed = true;
+ state->min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency;
+ pmo->scratch.pmo_dcn3.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency;
+ pmo->scratch.pmo_dcn3.max_latency_index = pmo->mcg_clock_table_size - 1;
+ pmo->scratch.pmo_dcn3.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency;
+
+ pmo->scratch.pmo_dcn3.stream_mask = 0xF;
+
+ for (plane_index = 0; plane_index < in_out->base_display_config->display_config.num_planes; plane_index++) {
+ plane_descriptor = &in_out->base_display_config->display_config.plane_descriptors[plane_index];
+ stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[plane_descriptor->stream_index];
+
+ if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us <
+ in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us &&
+ stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_if_needed)
+ uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index);
+
+ if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate == dml2_twait_budgeting_setting_try)
+ uclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index);
+
+ if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[plane_index].active_latency_hiding_us <
+ in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us &&
+ stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_if_needed)
+ fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index);
+
+ if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate == dml2_twait_budgeting_setting_try)
+ fclk_twait_needed_mask |= (0x1 << plane_descriptor->stream_index);
+
+ if (plane_descriptor->overrides.legacy_svp_config != dml2_svp_mode_override_auto) {
+ pmo->scratch.pmo_dcn3.stream_mask &= ~(0x1 << plane_descriptor->stream_index);
+ }
+ }
+
+ for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) {
+ stream_descriptor = &in_out->base_display_config->display_config.stream_descriptors[stream_index];
+
+ // The absolute minimum required time is the minimum of all the required budgets
+ /*
+ if (stream_descriptor->overrides.hw.twait_budgeting.fclk_pstate
+ == dml2_twait_budgeting_setting_require)
+
+ if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) {
+ min_reserved_vblank_time = max_double2(min_reserved_vblank_time,
+ in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us);
+ }
+
+ if (stream_descriptor->overrides.hw.twait_budgeting.uclk_pstate
+ == dml2_twait_budgeting_setting_require) {
+
+ if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) {
+ min_reserved_vblank_time = max_double2(min_reserved_vblank_time,
+ in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us);
+ }
+ }
+
+ if (stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit
+ == dml2_twait_budgeting_setting_require)
+ min_reserved_vblank_time = max_double2(min_reserved_vblank_time,
+ in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us);
+ */
+
+ min_reserved_vblank_time = get_max_reserved_time_on_all_planes_with_stream_index(in_out->base_display_config, stream_index);
+
+ // Insert the absolute minimum into the array
+ candidate_count = 1;
+ pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][0] = min_reserved_vblank_time;
+ pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count;
+
+ if (!(pmo->scratch.pmo_dcn3.stream_mask & (0x1 << stream_index)))
+ continue;
+
+ // For every optional feature, we create a candidate for it only if it's larger minimum.
+ if ((fclk_twait_needed_mask & (0x1 << stream_index)) &&
+ in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us > min_reserved_vblank_time) {
+
+ if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) {
+ pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] =
+ in_out->instance->soc_bb->power_management_parameters.fclk_change_blackout_us;
+ }
+ }
+
+ if ((uclk_twait_needed_mask & (0x1 << stream_index)) &&
+ in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us > min_reserved_vblank_time) {
+
+ if (are_timings_trivially_synchronizable(in_out->base_display_config, pmo->scratch.pmo_dcn3.stream_mask)) {
+ pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] =
+ in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us;
+ }
+ }
+
+ if ((stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_try ||
+ stream_descriptor->overrides.hw.twait_budgeting.stutter_enter_exit == dml2_twait_budgeting_setting_if_needed) &&
+ in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > min_reserved_vblank_time) {
+
+ pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][candidate_count++] =
+ in_out->instance->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us;
+ }
+
+ pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] = candidate_count;
+
+ // Finally sort the array of candidates
+ sort(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index],
+ pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]);
+
+ remove_duplicates(pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index],
+ &pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index]);
+
+ pmo->scratch.pmo_dcn3.current_candidate[stream_index] =
+ pmo->scratch.pmo_dcn3.reserved_time_candidates_count[stream_index] - 1;
+ }
+
+ return true;
+}
+
+bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ unsigned int i, stream_index;
+
+ for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) {
+ stream_index = in_out->base_display_config->display_config.plane_descriptors[i].stream_index;
+
+ if (in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns <
+ pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]] * 1000) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+ unsigned int stream_index;
+ bool success = false;
+ bool reached_end;
+
+ memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta));
+
+ if (in_out->last_candidate_failed) {
+ if (pmo->scratch.pmo_dcn3.cur_latency_index < pmo->scratch.pmo_dcn3.max_latency_index) {
+ // If we haven't tried all the clock bounds to support this state, try a higher one
+ pmo->scratch.pmo_dcn3.cur_latency_index++;
+
+ success = true;
+ } else {
+ // If there's nothing higher to try, then we have to have a smaller canadidate
+ reached_end = !iterate_to_next_candidiate(pmo, in_out->optimized_display_config->display_config.num_streams);
+
+ if (!reached_end) {
+ pmo->scratch.pmo_dcn3.cur_latency_index = pmo->scratch.pmo_dcn3.min_latency_index;
+ success = true;
+ }
+ }
+ } else {
+ success = true;
+ }
+
+ if (success) {
+ in_out->optimized_display_config->stage3.min_clk_index_for_latency = pmo->scratch.pmo_dcn3.cur_latency_index;
+
+ for (stream_index = 0; stream_index < in_out->optimized_display_config->display_config.num_streams; stream_index++) {
+ set_reserved_time_on_all_planes_with_stream_index(in_out->optimized_display_config, stream_index,
+ pmo->scratch.pmo_dcn3.reserved_time_candidates[stream_index][pmo->scratch.pmo_dcn3.current_candidate[stream_index]]);
+ }
+ }
+
+ return success;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h
new file mode 100644
index 000000000000..f00bd9e72a86
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_PMO_DCN3_H__
+#define __DML2_PMO_DCN3_H__
+
+#include "dml2_internal_shared_types.h"
+
+bool pmo_dcn3_initialize(struct dml2_pmo_initialize_in_out *in_out);
+
+bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out);
+
+bool pmo_dcn3_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out);
+bool pmo_dcn3_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out);
+bool pmo_dcn3_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out);
+
+bool pmo_dcn3_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out);
+bool pmo_dcn3_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out);
+bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
new file mode 100644
index 000000000000..d88b3e0082dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
@@ -0,0 +1,2371 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_pmo_factory.h"
+#include "dml2_debug.h"
+#include "lib_float_math.h"
+#include "dml2_pmo_dcn4_fams2.h"
+
+static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding
+static const double MIN_BLANK_STUTTER_FACTOR = 3.0;
+
+static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = {
+ // VActive Preferred
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Then SVP
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Then VBlank
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = false,
+ },
+
+ // Then DRR
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Finally VBlank, but allow base clocks for latency to increase
+ /*
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+ */
+};
+
+static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy);
+
+static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = {
+ // VActive only is preferred
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Then VActive + VBlank
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = false,
+ },
+
+ // Then VBlank only
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = false,
+ },
+
+ // Then SVP + VBlank
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = false,
+ },
+
+ // Then SVP + DRR
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Then SVP + SVP
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Then DRR + VActive
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Then DRR + DRR
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // Finally VBlank, but allow base clocks for latency to increase
+ /*
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+ */
+};
+
+static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy);
+
+static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = {
+ // All VActive
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // VActive + 1 VBlank
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na },
+ .allow_state_increase = false,
+ },
+
+ // All VBlank
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na },
+ .allow_state_increase = false,
+ },
+
+ // All DRR
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+
+ // All VBlank, with state increase allowed
+ /*
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na },
+ .allow_state_increase = true,
+ },
+ */
+};
+
+static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy);
+
+static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = {
+ // All VActive
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive },
+ .allow_state_increase = true,
+ },
+
+ // VActive + 1 VBlank
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank },
+ .allow_state_increase = false,
+ },
+
+ // All Vblank
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank },
+ .allow_state_increase = false,
+ },
+
+ // All DRR
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr },
+ .allow_state_increase = true,
+ },
+
+ // All VBlank, with state increase allowed
+ /*
+ {
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank },
+ .allow_state_increase = true,
+ },
+ */
+};
+
+static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy);
+
+
+static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated)
+{
+ bool result = true;
+
+ if (*odm_mode == dml2_odm_mode_auto) {
+ switch (odms_calculated) {
+ case 1:
+ *odm_mode = dml2_odm_mode_bypass;
+ break;
+ case 2:
+ *odm_mode = dml2_odm_mode_combine_2to1;
+ break;
+ case 3:
+ *odm_mode = dml2_odm_mode_combine_3to1;
+ break;
+ case 4:
+ *odm_mode = dml2_odm_mode_combine_4to1;
+ break;
+ default:
+ result = false;
+ break;
+ }
+ }
+
+ if (result) {
+ if (*odm_mode == dml2_odm_mode_bypass) {
+ *odm_mode = dml2_odm_mode_combine_2to1;
+ } else if (*odm_mode == dml2_odm_mode_combine_2to1) {
+ *odm_mode = dml2_odm_mode_combine_3to1;
+ } else if (*odm_mode == dml2_odm_mode_combine_3to1) {
+ *odm_mode = dml2_odm_mode_combine_4to1;
+ } else {
+ result = false;
+ }
+ }
+
+ return result;
+}
+
+static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit)
+{
+ if (*mpc_combine_factor < limit) {
+ (*mpc_combine_factor)++;
+ return true;
+ }
+
+ return false;
+}
+
+static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index)
+{
+ unsigned int i, count;
+
+ count = 0;
+ for (i = 0; i < display_cfg->num_planes; i++) {
+ if (display_cfg->plane_descriptors[i].stream_index == stream_index)
+ count++;
+ }
+
+ return count;
+}
+
+static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out,
+ int free_pipes)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ unsigned int i;
+ bool result = true;
+
+ for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) {
+ // For pipes that failed dcc mcache check, we want to increase the pipe count.
+ // The logic for doing this depends on how many pipes is already being used,
+ // and whether it's mpcc or odm combine.
+ if (!in_out->dcc_mcache_supported[i]) {
+ // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1
+ if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) {
+ in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor =
+ in_out->cfg_support_info->plane_support_info[i].dpps_used;
+ // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit.
+ if (free_pipes > 0) {
+ if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor,
+ pmo->mpc_combine_limit)) {
+ // We've reached max pipes allocatable to a single plane, so we fail.
+ result = false;
+ break;
+ } else {
+ // Successfully added another pipe to this failing plane.
+ free_pipes--;
+ }
+ } else {
+ // No free pipes to add.
+ result = false;
+ break;
+ }
+ } else {
+ // If the stream of this plane needs ODM combine, no further optimization can be done.
+ result = false;
+ break;
+ }
+ }
+ }
+
+ return result;
+}
+
+bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ unsigned int i, used_pipes, free_pipes, planes_on_stream;
+ bool result;
+
+ if (in_out->display_config != in_out->optimized_display_cfg) {
+ memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg));
+ }
+
+ //Count number of free pipes, and check if any odm combine is in use.
+ used_pipes = 0;
+ for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) {
+ used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used;
+ }
+ free_pipes = pmo->ip_caps->pipe_count - used_pipes;
+
+ // Optimization loop
+ // The goal here is to add more pipes to any planes
+ // which are failing mcache admissibility
+ result = true;
+
+ // The optimization logic depends on whether ODM combine is enabled, and the stream count.
+ if (in_out->optimized_display_cfg->num_streams > 1 || in_out->instance->options->disable_dyn_odm) {
+ // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes
+ // which are not ODM combined.
+
+ result = optimize_dcc_mcache_no_odm(in_out, free_pipes);
+ } else if (in_out->optimized_display_cfg->num_streams == 1) {
+ // In single stream cases, we still optimize mcache failures when there's ODM combine with some
+ // additional logic.
+
+ if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) {
+ // If ODM combine is enabled, then the logic is to increase ODM combine factor.
+
+ // Optimization for streams with > 1 ODM combine factor is only supported for single display.
+ planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0);
+
+ for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) {
+ // For pipes that failed dcc mcache check, we want to increase the pipe count.
+ // The logic for doing this depends on how many pipes is already being used,
+ // and whether it's mpcc or odm combine.
+ if (!in_out->dcc_mcache_supported[i]) {
+ // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream.
+ if (free_pipes >= planes_on_stream) {
+ if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode,
+ in_out->cfg_support_info->plane_support_info[i].dpps_used)) {
+ result = false;
+ } else {
+ break;
+ }
+ } else {
+ result = false;
+ break;
+ }
+ }
+ }
+ } else {
+ // If ODM combine is not enabled, then we can actually use the same logic as before.
+
+ result = optimize_dcc_mcache_no_odm(in_out, free_pipes);
+ }
+ } else {
+ result = true;
+ }
+
+ return result;
+}
+
+static enum dml2_pstate_method convert_strategy_to_drr_variant(const enum dml2_pstate_method base_strategy)
+{
+ enum dml2_pstate_method variant_strategy = 0;
+
+ switch (base_strategy) {
+ case dml2_pstate_method_vactive:
+ variant_strategy = dml2_pstate_method_fw_vactive_drr;
+ break;
+ case dml2_pstate_method_vblank:
+ variant_strategy = dml2_pstate_method_fw_vblank_drr;
+ break;
+ case dml2_pstate_method_fw_svp:
+ variant_strategy = dml2_pstate_method_fw_svp_drr;
+ break;
+ case dml2_pstate_method_fw_vactive_drr:
+ case dml2_pstate_method_fw_vblank_drr:
+ case dml2_pstate_method_fw_svp_drr:
+ case dml2_pstate_method_fw_drr:
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_count:
+ case dml2_pstate_method_na:
+ default:
+ /* no variant for this mode */
+ variant_strategy = base_strategy;
+ }
+
+ return variant_strategy;
+}
+
+static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count)
+{
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL;
+
+ switch (stream_count) {
+ case 1:
+ expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_1_display;
+ break;
+ case 2:
+ expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_2_display;
+ break;
+ case 3:
+ expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_3_display;
+ break;
+ case 4:
+ expanded_strategy_list = init_data->pmo_dcn4.expanded_strategy_list_4_display;
+ break;
+ default:
+ break;
+ }
+
+ return expanded_strategy_list;
+}
+
+static unsigned int get_num_expanded_strategies(
+ struct dml2_pmo_init_data *init_data,
+ int stream_count)
+{
+ return init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1];
+}
+
+static void insert_strategy_into_expanded_list(
+ const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy,
+ const int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
+{
+ if (expanded_strategy_list && num_expanded_strategies) {
+ memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy));
+
+ (*num_expanded_strategies)++;
+ }
+}
+
+static void expand_base_strategy(
+ const struct dml2_pmo_pstate_strategy *base_strategy,
+ const unsigned int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
+{
+ bool skip_to_next_stream;
+ bool expanded_strategy_added;
+ bool skip_iteration;
+ unsigned int i, j;
+ unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 };
+ unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 };
+ struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 };
+
+ /* determine number of displays per method */
+ for (i = 0; i < stream_count; i++) {
+ /* increment the count of the earliest index with the same method */
+ for (j = 0; j < stream_count; j++) {
+ if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) {
+ num_streams_per_method[j] = num_streams_per_method[j] + 1;
+ break;
+ }
+ }
+ }
+
+ cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase;
+
+ i = 0;
+ /* uses a while loop instead of recursion to build permutations of base strategy */
+ while (stream_iteration_indices[0] < stream_count) {
+ skip_to_next_stream = false;
+ expanded_strategy_added = false;
+ skip_iteration = false;
+
+ /* determine what to do for this iteration */
+ if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) {
+ /* decrement count and assign method */
+ cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]];
+ num_streams_per_method[stream_iteration_indices[i]] -= 1;
+
+ if (i >= stream_count - 1) {
+ /* insert into strategy list */
+ insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, expanded_strategy_list, num_expanded_strategies);
+ expanded_strategy_added = true;
+ } else {
+ /* skip to next stream */
+ skip_to_next_stream = true;
+ }
+ } else {
+ skip_iteration = true;
+ }
+
+ /* prepare for next iteration */
+ if (skip_to_next_stream) {
+ i++;
+ } else {
+ /* restore count */
+ if (!skip_iteration) {
+ num_streams_per_method[stream_iteration_indices[i]] += 1;
+ }
+
+ /* increment iteration count */
+ stream_iteration_indices[i]++;
+
+ /* if iterations are complete, or last stream was reached */
+ if ((stream_iteration_indices[i] >= stream_count || expanded_strategy_added) && i > 0) {
+ /* reset per stream index, decrement i */
+ stream_iteration_indices[i] = 0;
+ i--;
+
+ /* restore previous stream's count and increment index */
+ num_streams_per_method[stream_iteration_indices[i]] += 1;
+ stream_iteration_indices[i]++;
+ }
+ }
+ }
+}
+
+
+static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy,
+ const struct dml2_pmo_pstate_strategy *variant_strategy,
+ const unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS],
+ const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS],
+ const unsigned int stream_count)
+{
+ bool valid = true;
+ unsigned int i;
+
+ /* check all restrictions are met */
+ for (i = 0; i < stream_count; i++) {
+ /* vblank + vblank_drr variants are invalid */
+ if (base_strategy->per_stream_pstate_method[i] == dml2_pstate_method_vblank &&
+ ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) ||
+ num_streams_per_variant_method[i] > 1)) {
+ valid = false;
+ break;
+ }
+ }
+
+ return valid;
+}
+
+static void expand_variant_strategy(
+ const struct dml2_pmo_pstate_strategy *base_strategy,
+ const unsigned int stream_count,
+ const bool should_permute,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
+{
+ bool variant_found;
+ unsigned int i, j;
+ unsigned int method_index;
+ unsigned int stream_index;
+ unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 };
+ unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 };
+ unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 };
+ enum dml2_pstate_method per_stream_variant_method[DML2_MAX_PLANES];
+ struct dml2_pmo_pstate_strategy variant_strategy = { 0 };
+
+ /* determine number of displays per method */
+ for (i = 0; i < stream_count; i++) {
+ /* increment the count of the earliest index with the same method */
+ for (j = 0; j < stream_count; j++) {
+ if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) {
+ num_streams_per_method[j] = num_streams_per_method[j] + 1;
+ break;
+ }
+ }
+
+ per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]);
+ }
+ memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS);
+
+ memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy));
+
+ method_index = 0;
+ /* uses a while loop instead of recursion to build permutations of base strategy */
+ while (num_streams_per_base_method[0] > 0 || method_index != 0) {
+ if (method_index == stream_count) {
+ /* construct variant strategy */
+ variant_found = false;
+ stream_index = 0;
+
+ for (i = 0; i < stream_count; i++) {
+ for (j = 0; j < num_streams_per_base_method[i]; j++) {
+ variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i];
+ }
+
+ for (j = 0; j < num_streams_per_variant_method[i]; j++) {
+ variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i];
+ if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) {
+ variant_found = true;
+ }
+ }
+ }
+
+ if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) {
+ if (should_permute) {
+ /* permutations are permitted, proceed to expand */
+ expand_base_strategy(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies);
+ } else {
+ /* no permutations allowed, so add to list now */
+ insert_strategy_into_expanded_list(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies);
+ }
+ }
+
+ /* rollback to earliest method with bases remaining */
+ for (method_index = stream_count - 1; method_index > 0; method_index--) {
+ if (num_streams_per_base_method[method_index]) {
+ /* bases remaining */
+ break;
+ } else {
+ /* reset counters */
+ num_streams_per_base_method[method_index] = num_streams_per_method[method_index];
+ num_streams_per_variant_method[method_index] = 0;
+ }
+ }
+ }
+
+ if (num_streams_per_base_method[method_index]) {
+ num_streams_per_base_method[method_index]--;
+ num_streams_per_variant_method[method_index]++;
+
+ method_index++;
+ } else if (method_index != 0) {
+ method_index++;
+ }
+ }
+}
+
+void pmo_dcn4_fams2_expand_base_pstate_strategies(
+ const struct dml2_pmo_pstate_strategy *base_strategies_list,
+ const unsigned int num_base_strategies,
+ const unsigned int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
+{
+ unsigned int i;
+
+ /* expand every explicit base strategy (except all DRR) */
+ for (i = 0; i < num_base_strategies; i++) {
+ expand_base_strategy(&base_strategies_list[i], stream_count, expanded_strategy_list, num_expanded_strategies);
+ expand_variant_strategy(&base_strategies_list[i], stream_count, true, expanded_strategy_list, num_expanded_strategies);
+ }
+}
+
+bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out)
+{
+ int i = 0;
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ pmo->soc_bb = in_out->soc_bb;
+ pmo->ip_caps = in_out->ip_caps;
+ pmo->mpc_combine_limit = 2;
+ pmo->odm_combine_limit = 4;
+ pmo->mcg_clock_table_size = in_out->mcg_clock_table_size;
+
+ pmo->fams_params.v2.subvp.refresh_rate_limit_max = 175;
+ pmo->fams_params.v2.subvp.refresh_rate_limit_min = 0;
+ pmo->fams_params.v2.drr.refresh_rate_limit_max = 1000;
+ pmo->fams_params.v2.drr.refresh_rate_limit_min = 119;
+
+ pmo->options = in_out->options;
+
+ /* generate permutations of p-state configs from base strategy list */
+ for (i = 1; i <= PMO_DCN4_MAX_DISPLAYS; i++) {
+ switch (i) {
+ case 1:
+ DML_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+
+ /* populate list */
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_1_display,
+ base_strategy_list_1_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
+ break;
+ case 2:
+ DML_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+
+ /* populate list */
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_2_display,
+ base_strategy_list_2_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
+ break;
+ case 3:
+ DML_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+
+ /* populate list */
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_3_display,
+ base_strategy_list_3_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
+ break;
+ case 4:
+ DML_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
+
+ /* populate list */
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_4_display,
+ base_strategy_list_4_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator)
+{
+ /*
+ * Htotal, Hblank start/end, and Hsync start/end all must be divisible
+ * in order for the horizontal timing params to be considered divisible
+ * by 2. Hsync start is always 0.
+ */
+ unsigned long h_blank_start = timing->h_total - timing->h_front_porch;
+
+ return (timing->h_total % denominator == 0) &&
+ (h_blank_start % denominator == 0) &&
+ (timing->h_blank_end % denominator == 0) &&
+ (timing->h_sync_width % denominator == 0);
+}
+
+static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type)
+{
+ switch (encoder_type) {
+ case dml2_dp:
+ case dml2_edp:
+ case dml2_dp2p0:
+ case dml2_none:
+ return true;
+ case dml2_hdmi:
+ case dml2_hdmifrl:
+ default:
+ return false;
+ }
+}
+
+bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out)
+{
+ unsigned int i;
+ const struct dml2_display_cfg *display_config =
+ &in_out->base_display_config->display_config;
+ const struct dml2_core_mode_support_result *mode_support_result =
+ &in_out->base_display_config->mode_support_result;
+ struct dml2_optimization_stage4_state *state =
+ &in_out->base_display_config->stage4;
+
+ if (in_out->instance->options->disable_dyn_odm ||
+ (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1))
+ return false;
+
+ for (i = 0; i < display_config->num_planes; i++)
+ /*
+ * vmin optimization is required to be seamlessly switched off
+ * at any time when the new configuration is no longer
+ * supported. However switching from ODM combine to MPC combine
+ * is not always seamless. When there not enough free pipes, we
+ * will have to use the same secondary OPP heads as secondary
+ * DPP pipes in MPC combine in new state. This transition is
+ * expected to cause glitches. To avoid the transition, we only
+ * allow vmin optimization if the stream's base configuration
+ * doesn't require MPC combine. This condition checks if MPC
+ * combine is enabled. If so do not optimize the stream.
+ */
+ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 &&
+ mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1)
+ state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true;
+
+ for (i = 0; i < display_config->num_streams; i++) {
+ if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm)
+ state->unoptimizable_streams[i] = true;
+ else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid &&
+ in_out->instance->options->disable_dyn_odm_for_stream_with_svp)
+ state->unoptimizable_streams[i] = true;
+ /*
+ * ODM Combine requires horizontal timing divisible by 2 so each
+ * ODM segment has the same size.
+ */
+ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2))
+ state->unoptimizable_streams[i] = true;
+ /*
+ * Our hardware support seamless ODM transitions for DP encoders
+ * only.
+ */
+ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder))
+ state->unoptimizable_streams[i] = true;
+ }
+
+ state->performed = true;
+
+ return true;
+}
+
+bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out)
+{
+ bool is_vmin = true;
+
+ if (in_out->vmin_limits->dispclk_khz > 0 &&
+ in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz)
+ is_vmin = false;
+
+ return is_vmin;
+}
+
+static int find_highest_odm_load_stream_index(
+ const struct dml2_display_cfg *display_config,
+ const struct dml2_core_mode_support_result *mode_support_result)
+{
+ unsigned int i;
+ int odm_load, highest_odm_load = -1, highest_odm_load_index = -1;
+
+ for (i = 0; i < display_config->num_streams; i++) {
+ if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0)
+ odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz
+ / mode_support_result->cfg_support_info.stream_support_info[i].odms_used;
+ else
+ odm_load = 0;
+
+ if (odm_load > highest_odm_load) {
+ highest_odm_load_index = i;
+ highest_odm_load = odm_load;
+ }
+ }
+
+ return highest_odm_load_index;
+}
+
+bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out)
+{
+ int stream_index;
+ const struct dml2_display_cfg *display_config =
+ &in_out->base_display_config->display_config;
+ const struct dml2_core_mode_support_result *mode_support_result =
+ &in_out->base_display_config->mode_support_result;
+ unsigned int odms_used;
+ struct dml2_stream_parameters *stream_descriptor;
+ bool optimizable = false;
+
+ /*
+ * highest odm load stream must be optimizable to continue as dispclk is
+ * bounded by it.
+ */
+ stream_index = find_highest_odm_load_stream_index(display_config,
+ mode_support_result);
+
+ if (stream_index < 0 ||
+ in_out->base_display_config->stage4.unoptimizable_streams[stream_index])
+ return false;
+
+ odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used;
+ if ((int)odms_used >= in_out->instance->odm_combine_limit)
+ return false;
+
+ memcpy(in_out->optimized_display_config,
+ in_out->base_display_config,
+ sizeof(struct display_configuation_with_meta));
+
+ stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index];
+ while (!optimizable && increase_odm_combine_factor(
+ &stream_descriptor->overrides.odm_mode,
+ odms_used)) {
+ switch (stream_descriptor->overrides.odm_mode) {
+ case dml2_odm_mode_combine_2to1:
+ optimizable = true;
+ break;
+ case dml2_odm_mode_combine_3to1:
+ /*
+ * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of
+ * actual pixel rate. Therefore horizontal timing must
+ * be divisible by 4.
+ */
+ if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) {
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) {
+ /*
+ * DSC h slice count must be divisible
+ * by 3.
+ */
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0)
+ optimizable = true;
+ } else {
+ optimizable = true;
+ }
+ }
+ break;
+ case dml2_odm_mode_combine_4to1:
+ /*
+ * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of
+ * actual pixel rate. Therefore horizontal timing must
+ * be divisible by 4.
+ */
+ if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) {
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) {
+ /*
+ * DSC h slice count must be divisible
+ * by 4.
+ */
+ if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0)
+ optimizable = true;
+ } else {
+ optimizable = true;
+ }
+ }
+ break;
+ case dml2_odm_mode_auto:
+ case dml2_odm_mode_bypass:
+ case dml2_odm_mode_split_1to2:
+ case dml2_odm_mode_mso_1to2:
+ case dml2_odm_mode_mso_1to4:
+ default:
+ break;
+ }
+ }
+
+ return optimizable;
+}
+
+static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset)
+{
+ *bit_field = *bit_field | (0x1 << bit_offset);
+}
+
+static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset)
+{
+ if (bit_field & (0x1 << bit_offset))
+ return true;
+
+ return false;
+}
+
+static void build_synchronized_timing_groups(
+ struct dml2_pmo_instance *pmo,
+ struct display_configuation_with_meta *display_config)
+{
+ unsigned int i, j;
+ struct dml2_timing_cfg *master_timing;
+
+ unsigned int stream_mapped_mask = 0;
+ unsigned int num_timing_groups = 0;
+ unsigned int timing_group_idx = 0;
+ struct dml2_pmo_scratch *s = &pmo->scratch;
+
+ /* clear all group masks */
+ memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks));
+ memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled));
+ memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active));
+ memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us));
+ s->pmo_dcn4.num_timing_groups = 0;
+
+ for (i = 0; i < display_config->display_config.num_streams; i++) {
+ master_timing = &display_config->display_config.stream_descriptors[i].timing;
+
+ /* only need to build group of this stream is not in a group already */
+ if (is_bit_set_in_bitfield(stream_mapped_mask, i)) {
+ continue;
+ }
+ set_bit_in_bitfield(&stream_mapped_mask, i);
+ timing_group_idx = num_timing_groups;
+ num_timing_groups++;
+
+ /* trivially set default timing group to itself */
+ set_bit_in_bitfield(&s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i);
+ s->pmo_dcn4.group_line_time_us[timing_group_idx] = (double)master_timing->h_total / master_timing->pixel_clock_khz * 1000.0;
+
+ /* if drr is in use, timing is not sychnronizable */
+ if (master_timing->drr_config.enabled) {
+ s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true;
+ s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed &&
+ (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable);
+ continue;
+ }
+
+ /* find synchronizable timing groups */
+ for (j = i + 1; j < display_config->display_config.num_streams; j++) {
+ if (memcmp(master_timing,
+ &display_config->display_config.stream_descriptors[j].timing,
+ sizeof(struct dml2_timing_cfg)) == 0) {
+ set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j);
+ set_bit_in_bitfield(&stream_mapped_mask, j);
+ }
+ }
+ }
+
+ s->pmo_dcn4.num_timing_groups = num_timing_groups;
+}
+
+static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_config,
+ unsigned int mask)
+{
+ unsigned int i;
+ bool valid = true;
+
+ // Create a remap array to enable simple iteration through only masked stream indicies
+ for (i = 0; i < display_config->display_config.num_streams; i++) {
+ if (is_bit_set_in_bitfield(mask, i)) {
+ /* check if stream has enough vactive margin */
+ valid &= is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.stream_vactive_capability_mask, i);
+ }
+ }
+
+ return valid;
+}
+
+static bool all_timings_support_vblank(const struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_config,
+ unsigned int mask)
+{
+ unsigned int i;
+
+ bool synchronizable = true;
+
+ /* find first vblank stream index and compare the timing group mask */
+ for (i = 0; i < display_config->display_config.num_streams; i++) {
+ if (is_bit_set_in_bitfield(mask, i)) {
+ if (mask != pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[i]) {
+ /* vblank streams are not synchronizable */
+ synchronizable = false;
+ }
+ break;
+ }
+ }
+
+ return synchronizable;
+}
+
+static unsigned int calc_svp_microschedule(const struct dml2_fams2_meta *fams2_meta)
+{
+ return fams2_meta->contention_delay_otg_vlines +
+ fams2_meta->method_subvp.programming_delay_otg_vlines +
+ fams2_meta->method_subvp.phantom_vtotal +
+ fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines +
+ fams2_meta->dram_clk_change_blackout_otg_vlines;
+}
+
+static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_config,
+ unsigned int mask)
+{
+ unsigned int i;
+ for (i = 0; i < DML2_MAX_PLANES; i++) {
+ const struct dml2_stream_parameters *stream_descriptor;
+ const struct dml2_fams2_meta *stream_fams2_meta;
+
+ if (is_bit_set_in_bitfield(mask, i)) {
+ stream_descriptor = &display_config->display_config.stream_descriptors[i];
+ stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i];
+
+ if (!stream_descriptor->timing.drr_config.enabled)
+ return false;
+
+ /* cannot support required vtotal */
+ if (stream_fams2_meta->method_drr.stretched_vtotal > stream_fams2_meta->max_vtotal) {
+ return false;
+ }
+
+ /* check rr is within bounds */
+ if (stream_fams2_meta->nom_refresh_rate_hz < pmo->fams_params.v2.drr.refresh_rate_limit_min ||
+ stream_fams2_meta->nom_refresh_rate_hz > pmo->fams_params.v2.drr.refresh_rate_limit_max) {
+ return false;
+ }
+
+ /* check required stretch is allowed */
+ if (stream_descriptor->timing.drr_config.max_instant_vtotal_delta > 0 &&
+ stream_fams2_meta->method_drr.stretched_vtotal - stream_fams2_meta->nom_vtotal > stream_descriptor->timing.drr_config.max_instant_vtotal_delta) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_config,
+ unsigned int mask)
+{
+ const struct dml2_stream_parameters *stream_descriptor;
+ const struct dml2_plane_parameters *plane_descriptor;
+ const struct dml2_fams2_meta *stream_fams2_meta;
+ unsigned int microschedule_vlines;
+ unsigned int i;
+ unsigned int mcaches_per_plane;
+ unsigned int total_mcaches_required = 0;
+
+ unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 };
+
+ /* confirm timing it is not a centered timing */
+ for (i = 0; i < display_config->display_config.num_planes; i++) {
+ plane_descriptor = &display_config->display_config.plane_descriptors[i];
+ mcaches_per_plane = 0;
+
+ if (plane_descriptor->surface.dcc.enable) {
+ mcaches_per_plane += display_config->stage2.mcache_allocations[i].num_mcaches_plane0 +
+ display_config->stage2.mcache_allocations[i].num_mcaches_plane1 -
+ (display_config->stage2.mcache_allocations[i].last_slice_sharing.plane0_plane1 ? 1 : 0);
+ }
+
+ if (is_bit_set_in_bitfield(mask, (unsigned char)plane_descriptor->stream_index)) {
+ num_planes_per_stream[plane_descriptor->stream_index]++;
+
+ /* check recout height covers entire otg vactive, and single plane */
+ if (num_planes_per_stream[plane_descriptor->stream_index] > 1 ||
+ !plane_descriptor->composition.rect_out_height_spans_vactive ||
+ plane_descriptor->composition.rotation_angle != dml2_rotation_0) {
+ return false;
+ }
+
+ /* phantom requires same number of mcaches as main */
+ if (plane_descriptor->surface.dcc.enable) {
+ mcaches_per_plane *= 2;
+ }
+ }
+ total_mcaches_required += mcaches_per_plane;
+ }
+
+ if (total_mcaches_required > pmo->soc_bb->num_dcc_mcaches) {
+ /* too many mcaches required */
+ return false;
+ }
+
+ for (i = 0; i < DML2_MAX_PLANES; i++) {
+ if (is_bit_set_in_bitfield(mask, i)) {
+ stream_descriptor = &display_config->display_config.stream_descriptors[i];
+ stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i];
+
+ if (stream_descriptor->overrides.disable_subvp) {
+ return false;
+ }
+
+ microschedule_vlines = calc_svp_microschedule(&pmo->scratch.pmo_dcn4.stream_fams2_meta[i]);
+
+ /* block if using an interlaced timing */
+ if (stream_descriptor->timing.interlaced) {
+ return false;
+ }
+
+ /* 1) svp main stream's vactive must be able to fit the microschedule
+ * 2) refresh rate must be within the allowed bounds
+ */
+ if (microschedule_vlines >= stream_descriptor->timing.v_active ||
+ (stream_fams2_meta->nom_refresh_rate_hz < pmo->fams_params.v2.subvp.refresh_rate_limit_min ||
+ stream_fams2_meta->nom_refresh_rate_hz > pmo->fams_params.v2.subvp.refresh_rate_limit_max)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch)
+{
+ scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy;
+ scratch->pmo_dcn4.num_pstate_candidates++;
+}
+
+static enum dml2_pstate_method uclk_pstate_strategy_override_to_pstate_method(const enum dml2_uclk_pstate_change_strategy override_strategy)
+{
+ enum dml2_pstate_method method = dml2_pstate_method_na;
+
+ switch (override_strategy) {
+ case dml2_uclk_pstate_change_strategy_force_vactive:
+ method = dml2_pstate_method_vactive;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_vblank:
+ method = dml2_pstate_method_vblank;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_drr:
+ method = dml2_pstate_method_fw_drr;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_mall_svp:
+ method = dml2_pstate_method_fw_svp;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_mall_full_frame:
+ case dml2_uclk_pstate_change_strategy_auto:
+ default:
+ method = dml2_pstate_method_na;
+ }
+
+ return method;
+}
+
+static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strategy_override(const enum dml2_pstate_method method)
+{
+ enum dml2_uclk_pstate_change_strategy override_strategy = dml2_uclk_pstate_change_strategy_auto;
+
+ switch (method) {
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_vactive;
+ break;
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_vblank;
+ break;
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp;
+ break;
+ case dml2_pstate_method_fw_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_drr;
+ break;
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_count:
+ case dml2_pstate_method_na:
+ default:
+ override_strategy = dml2_uclk_pstate_change_strategy_auto;
+ }
+
+ return override_strategy;
+}
+
+static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method)
+{
+ unsigned int i;
+
+ for (i = 0; i < DML2_MAX_PLANES; i++) {
+ if (is_bit_set_in_bitfield(plane_mask, i)) {
+ if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto &&
+ display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != pstate_method_to_uclk_pstate_strategy_override(method))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void build_method_scheduling_params(
+ struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta,
+ struct dml2_fams2_meta *stream_fams2_meta)
+{
+ stream_method_fams2_meta->allow_time_us =
+ (double)((int)stream_method_fams2_meta->allow_end_otg_vline - (int)stream_method_fams2_meta->allow_start_otg_vline) *
+ stream_fams2_meta->otg_vline_time_us;
+ if (stream_method_fams2_meta->allow_time_us >= stream_method_fams2_meta->period_us) {
+ /* when allow wave overlaps an entire frame, it is always schedulable (DRR can do this)*/
+ stream_method_fams2_meta->disallow_time_us = 0.0;
+ } else {
+ stream_method_fams2_meta->disallow_time_us =
+ stream_method_fams2_meta->period_us - stream_method_fams2_meta->allow_time_us;
+ }
+}
+
+static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta(
+ struct dml2_pmo_instance *pmo,
+ enum dml2_pstate_method stream_pstate_method,
+ int stream_idx)
+{
+ struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL;
+
+ switch (stream_pstate_method) {
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
+ stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common;
+ break;
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
+ stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vblank.common;
+ break;
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
+ stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_subvp.common;
+ break;
+ case dml2_pstate_method_fw_drr:
+ stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_drr.common;
+ break;
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_count:
+ case dml2_pstate_method_na:
+ default:
+ stream_method_fams2_meta = NULL;
+ }
+
+ return stream_method_fams2_meta;
+}
+
+static bool is_timing_group_schedulable(
+ struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_cfg,
+ const struct dml2_pmo_pstate_strategy *pstate_strategy,
+ const unsigned int timing_group_idx,
+ struct dml2_fams2_per_method_common_meta *group_fams2_meta)
+{
+ unsigned int i;
+ struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta;
+
+ unsigned int base_stream_idx = 0;
+ struct dml2_pmo_scratch *s = &pmo->scratch;
+
+ /* find base stream idx */
+ for (base_stream_idx = 0; base_stream_idx < display_cfg->display_config.num_streams; base_stream_idx++) {
+ if (is_bit_set_in_bitfield(s->pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], base_stream_idx)) {
+ /* master stream found */
+ break;
+ }
+ }
+
+ /* init allow start and end lines for timing group */
+ stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx);
+ if (!stream_method_fams2_meta)
+ return false;
+
+ group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline;
+ group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline;
+ group_fams2_meta->period_us = stream_method_fams2_meta->period_us;
+ for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) {
+ if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) {
+ stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i);
+ if (!stream_method_fams2_meta)
+ continue;
+
+ if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) {
+ /* set group allow start to larger otg vline */
+ group_fams2_meta->allow_start_otg_vline = stream_method_fams2_meta->allow_start_otg_vline;
+ }
+
+ if (group_fams2_meta->allow_end_otg_vline > stream_method_fams2_meta->allow_end_otg_vline) {
+ /* set group allow end to smaller otg vline */
+ group_fams2_meta->allow_end_otg_vline = stream_method_fams2_meta->allow_end_otg_vline;
+ }
+
+ /* check waveform still has positive width */
+ if (group_fams2_meta->allow_start_otg_vline >= group_fams2_meta->allow_end_otg_vline) {
+ /* timing group is not schedulable */
+ return false;
+ }
+ }
+ }
+
+ /* calculate the rest of the meta */
+ build_method_scheduling_params(group_fams2_meta, &pmo->scratch.pmo_dcn4.stream_fams2_meta[base_stream_idx]);
+
+ return group_fams2_meta->allow_time_us > 0.0 &&
+ group_fams2_meta->disallow_time_us < pmo->ip_caps->fams2.max_allow_delay_us;
+}
+
+static bool is_config_schedulable(
+ struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_cfg,
+ const struct dml2_pmo_pstate_strategy *pstate_strategy)
+{
+ unsigned int i, j;
+ bool schedulable;
+ struct dml2_pmo_scratch *s = &pmo->scratch;
+
+ double max_allow_delay_us = 0.0;
+
+ memset(s->pmo_dcn4.group_common_fams2_meta, 0, sizeof(s->pmo_dcn4.group_common_fams2_meta));
+ memset(s->pmo_dcn4.sorted_group_gtl_disallow_index, 0, sizeof(unsigned int) * DML2_MAX_PLANES);
+
+ /* search for a general solution to the schedule */
+
+ /* STAGE 0: Early return for special cases */
+ if (display_cfg->display_config.num_streams == 0) {
+ return true;
+ }
+
+ /* STAGE 1: confirm allow waves overlap for synchronizable streams */
+ schedulable = true;
+ for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) {
+ s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i;
+ s->pmo_dcn4.sorted_group_gtl_period_index[i] = i;
+ if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) {
+ /* synchronized timing group was not schedulable */
+ schedulable = false;
+ break;
+ }
+ max_allow_delay_us += s->pmo_dcn4.group_common_fams2_meta[i].disallow_time_us;
+ }
+
+ if ((schedulable && s->pmo_dcn4.num_timing_groups <= 1) || !schedulable) {
+ /* 1. the only timing group was schedulable, so early pass
+ * 2. one of the timing groups was not schedulable, so early fail */
+ return schedulable;
+ }
+
+ /* STAGE 2: Check allow can't be masked entirely by other disallows */
+ schedulable = true;
+
+ /* sort disallow times from greatest to least */
+ for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) {
+ bool swapped = false;
+
+ for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) {
+ double j_disallow_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j]].disallow_time_us;
+ double jp1_disallow_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]].disallow_time_us;
+ if (j_disallow_us < jp1_disallow_us) {
+ /* swap as A < B */
+ swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j],
+ s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]);
+ swapped = true;
+ }
+ }
+
+ /* sorted, exit early */
+ if (!swapped)
+ break;
+ }
+
+ /* Check worst case disallow region occurs in the middle of allow for the
+ * other display, or when >2 streams continue to halve the remaining allow time.
+ */
+ for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) {
+ if (s->pmo_dcn4.group_common_fams2_meta[i].disallow_time_us <= 0.0) {
+ /* this timing group always allows */
+ continue;
+ }
+
+ double max_allow_time_us = s->pmo_dcn4.group_common_fams2_meta[i].allow_time_us;
+ for (j = 0; j < s->pmo_dcn4.num_timing_groups; j++) {
+ unsigned int sorted_j = s->pmo_dcn4.sorted_group_gtl_disallow_index[j];
+ /* stream can't overlap itself */
+ if (i != sorted_j && s->pmo_dcn4.group_common_fams2_meta[sorted_j].disallow_time_us > 0.0) {
+ max_allow_time_us = math_min2(
+ s->pmo_dcn4.group_common_fams2_meta[sorted_j].allow_time_us,
+ (max_allow_time_us - s->pmo_dcn4.group_common_fams2_meta[sorted_j].disallow_time_us) / 2);
+
+ if (max_allow_time_us < 0.0) {
+ /* failed exit early */
+ break;
+ }
+ }
+ }
+
+ if (max_allow_time_us <= 0.0) {
+ /* not enough time for microschedule in the worst case */
+ schedulable = false;
+ break;
+ }
+ }
+
+ if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) {
+ return true;
+ }
+
+ /* STAGE 3: check larger allow can fit period of all other streams */
+ schedulable = true;
+
+ /* sort periods from greatest to least */
+ for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) {
+ bool swapped = false;
+
+ for (j = 0; j < s->pmo_dcn4.num_timing_groups - 1; j++) {
+ double j_period_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j]].period_us;
+ double jp1_period_us = s->pmo_dcn4.group_common_fams2_meta[s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]].period_us;
+ if (j_period_us < jp1_period_us) {
+ /* swap as A < B */
+ swap(s->pmo_dcn4.sorted_group_gtl_period_index[j],
+ s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]);
+ swapped = true;
+ }
+ }
+
+ /* sorted, exit early */
+ if (!swapped)
+ break;
+ }
+
+ /* check larger allow can fit period of all other streams */
+ for (i = 0; i < s->pmo_dcn4.num_timing_groups - 1; i++) {
+ unsigned int sorted_i = s->pmo_dcn4.sorted_group_gtl_period_index[i];
+ unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1];
+
+ if (s->pmo_dcn4.group_common_fams2_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_fams2_meta[sorted_ip1].period_us ||
+ (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) {
+ schedulable = false;
+ break;
+ }
+ }
+
+ if (schedulable && max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) {
+ return true;
+ }
+
+ /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */
+ if (s->pmo_dcn4.num_timing_groups == 2 &&
+ !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) &&
+ !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) {
+ double period_ratio;
+ double max_shift_us;
+ double shift_per_period;
+
+ /* default period_0 > period_1 */
+ unsigned int lrg_idx = 0;
+ unsigned int sml_idx = 1;
+ if (s->pmo_dcn4.group_common_fams2_meta[0].period_us < s->pmo_dcn4.group_common_fams2_meta[1].period_us) {
+ /* period_0 < period_1 */
+ lrg_idx = 1;
+ sml_idx = 0;
+ }
+ period_ratio = s->pmo_dcn4.group_common_fams2_meta[lrg_idx].period_us / s->pmo_dcn4.group_common_fams2_meta[sml_idx].period_us;
+ shift_per_period = s->pmo_dcn4.group_common_fams2_meta[sml_idx].period_us * (period_ratio - math_floor(period_ratio));
+ max_shift_us = s->pmo_dcn4.group_common_fams2_meta[lrg_idx].disallow_time_us - s->pmo_dcn4.group_common_fams2_meta[sml_idx].allow_time_us;
+ max_allow_delay_us = max_shift_us / shift_per_period * s->pmo_dcn4.group_common_fams2_meta[lrg_idx].period_us;
+
+ if (shift_per_period > 0.0 &&
+ shift_per_period < s->pmo_dcn4.group_common_fams2_meta[lrg_idx].allow_time_us + s->pmo_dcn4.group_common_fams2_meta[sml_idx].allow_time_us &&
+ max_allow_delay_us < pmo->ip_caps->fams2.max_allow_delay_us) {
+ schedulable = true;
+ }
+ }
+
+ return schedulable;
+}
+
+static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_cfg,
+ const enum dml2_pstate_method stream_pstate_method,
+ unsigned int stream_index)
+{
+ const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index];
+ bool strategy_matches_drr_requirements = true;
+
+ /* check if strategy is compatible with stream drr capability and strategy */
+ if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) &&
+ display_cfg->display_config.num_streams > 1 &&
+ stream_descriptor->timing.drr_config.enabled &&
+ (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) {
+ /* DRR is active, so config may become unschedulable */
+ strategy_matches_drr_requirements = false;
+ } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) &&
+ is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) &&
+ stream_descriptor->timing.drr_config.enabled &&
+ stream_descriptor->timing.drr_config.drr_active_variable) {
+ /* DRR is variable, fw exclusive methods require DRR to be clamped */
+ strategy_matches_drr_requirements = false;
+ } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) &&
+ pmo->options->disable_drr_var_when_var_active &&
+ stream_descriptor->timing.drr_config.enabled &&
+ stream_descriptor->timing.drr_config.drr_active_variable) {
+ /* DRR variable is active, but policy blocks DRR for p-state when this happens */
+ strategy_matches_drr_requirements = false;
+ } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) &&
+ (pmo->options->disable_drr_var ||
+ !stream_descriptor->timing.drr_config.enabled ||
+ stream_descriptor->timing.drr_config.disallowed)) {
+ /* DRR variable strategies are disallowed due to settings or policy */
+ strategy_matches_drr_requirements = false;
+ } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) &&
+ (pmo->options->disable_drr_clamped ||
+ (!stream_descriptor->timing.drr_config.enabled ||
+ (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) ||
+ (pmo->options->disable_drr_clamped_when_var_active &&
+ stream_descriptor->timing.drr_config.enabled &&
+ stream_descriptor->timing.drr_config.drr_active_variable))) {
+ /* DRR fixed strategies are disallowed due to settings or policy */
+ strategy_matches_drr_requirements = false;
+ } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) &&
+ pmo->options->disable_fams2) {
+ /* FW modes require FAMS2 */
+ strategy_matches_drr_requirements = false;
+ }
+
+ return strategy_matches_drr_requirements;
+}
+
+static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo,
+ const struct display_configuation_with_meta *display_cfg,
+ const struct dml2_pmo_pstate_strategy *pstate_strategy)
+{
+ struct dml2_pmo_scratch *s = &pmo->scratch;
+
+ unsigned int stream_index = 0;
+
+ unsigned int svp_count = 0;
+ unsigned int svp_stream_mask = 0;
+ unsigned int drr_count = 0;
+ unsigned int drr_stream_mask = 0;
+ unsigned int vactive_count = 0;
+ unsigned int vactive_stream_mask = 0;
+ unsigned int vblank_count = 0;
+ unsigned int vblank_stream_mask = 0;
+
+ bool strategy_matches_forced_requirements = true;
+ bool strategy_matches_drr_requirements = true;
+
+ // Tabulate everything
+ for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) {
+
+ if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index],
+ pstate_strategy->per_stream_pstate_method[stream_index])) {
+ strategy_matches_forced_requirements = false;
+ break;
+ }
+
+ strategy_matches_drr_requirements &=
+ stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index);
+
+ if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp ||
+ pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) {
+ svp_count++;
+ set_bit_in_bitfield(&svp_stream_mask, stream_index);
+ } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) {
+ drr_count++;
+ set_bit_in_bitfield(&drr_stream_mask, stream_index);
+ } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive ||
+ pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) {
+ vactive_count++;
+ set_bit_in_bitfield(&vactive_stream_mask, stream_index);
+ } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank ||
+ pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) {
+ vblank_count++;
+ set_bit_in_bitfield(&vblank_stream_mask, stream_index);
+ }
+ }
+
+ if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements)
+ return false;
+
+ if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask))
+ return false;
+
+ if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask)))
+ return false;
+
+ if (drr_count > 0 && (pmo->options->disable_drr_var || !all_timings_support_drr(pmo, display_cfg, drr_stream_mask)))
+ return false;
+
+ if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask)))
+ return false;
+
+ return is_config_schedulable(pmo, display_cfg, pstate_strategy);
+}
+
+static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask)
+{
+ unsigned int i;
+ int min_vactive_margin_us = 0xFFFFFFF;
+
+ for (i = 0; i < DML2_MAX_PLANES; i++) {
+ if (is_bit_set_in_bitfield(plane_mask, i)) {
+ if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us)
+ min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active;
+ }
+ }
+
+ return min_vactive_margin_us;
+}
+
+static unsigned int get_vactive_det_fill_latency_delay_us(const struct display_configuation_with_meta *display_cfg, int plane_mask)
+{
+ unsigned char i;
+ unsigned int max_vactive_fill_us = 0;
+
+ for (i = 0; i < DML2_MAX_PLANES; i++) {
+ if (is_bit_set_in_bitfield(plane_mask, i)) {
+ if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us > max_vactive_fill_us)
+ max_vactive_fill_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_vactive_det_fill_delay_us;
+ }
+ }
+
+ return max_vactive_fill_us;
+}
+
+static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo,
+ struct display_configuation_with_meta *display_config,
+ int stream_index)
+{
+ const struct dml2_ip_capabilities *ip_caps = pmo->ip_caps;
+ const struct dml2_stream_parameters *stream_descriptor = &display_config->display_config.stream_descriptors[stream_index];
+ const struct core_stream_support_info *stream_info = &display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index];
+ const struct dml2_timing_cfg *timing = &stream_descriptor->timing;
+ struct dml2_fams2_meta *stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index];
+
+ /* worst case all other streams require some programming at the same time, 0 if only 1 stream */
+ unsigned int contention_delay_us = (ip_caps->fams2.vertical_interrupt_ack_delay_us +
+ (unsigned int)math_max3(ip_caps->fams2.subvp_programming_delay_us, ip_caps->fams2.drr_programming_delay_us, ip_caps->fams2.allow_programming_delay_us)) *
+ (display_config->display_config.num_streams - 1);
+
+ /* common */
+ stream_fams2_meta->valid = true;
+ stream_fams2_meta->otg_vline_time_us = (double)timing->h_total / timing->pixel_clock_khz * 1000.0;
+ stream_fams2_meta->nom_vtotal = stream_descriptor->timing.vblank_nom + stream_descriptor->timing.v_active;
+ stream_fams2_meta->nom_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 /
+ (stream_fams2_meta->nom_vtotal * timing->h_total);
+ stream_fams2_meta->nom_frame_time_us =
+ (double)stream_fams2_meta->nom_vtotal * stream_fams2_meta->otg_vline_time_us;
+ stream_fams2_meta->vblank_start = timing->v_blank_end + timing->v_active;
+
+ if (stream_descriptor->timing.drr_config.enabled == true) {
+ if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) {
+ stream_fams2_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz /
+ ((double)stream_descriptor->timing.drr_config.min_refresh_uhz * stream_descriptor->timing.h_total) * 1e9);
+ } else {
+ /* assume min of 48Hz */
+ stream_fams2_meta->max_vtotal = (unsigned int)math_floor((double)stream_descriptor->timing.pixel_clock_khz /
+ (48000000.0 * stream_descriptor->timing.h_total) * 1e9);
+ }
+ } else {
+ stream_fams2_meta->max_vtotal = stream_fams2_meta->nom_vtotal;
+ }
+ stream_fams2_meta->min_refresh_rate_hz = timing->pixel_clock_khz * 1000.0 /
+ (stream_fams2_meta->max_vtotal * timing->h_total);
+ stream_fams2_meta->max_frame_time_us =
+ (double)stream_fams2_meta->max_vtotal * stream_fams2_meta->otg_vline_time_us;
+
+ stream_fams2_meta->scheduling_delay_otg_vlines =
+ (unsigned int)math_ceil(ip_caps->fams2.scheduling_delay_us / stream_fams2_meta->otg_vline_time_us);
+ stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines =
+ (unsigned int)math_ceil(ip_caps->fams2.vertical_interrupt_ack_delay_us / stream_fams2_meta->otg_vline_time_us);
+ stream_fams2_meta->contention_delay_otg_vlines =
+ (unsigned int)math_ceil(contention_delay_us / stream_fams2_meta->otg_vline_time_us);
+ /* worst case allow to target needs to account for all streams' allow events overlapping, and 1 line for error */
+ stream_fams2_meta->allow_to_target_delay_otg_vlines =
+ (unsigned int)(math_ceil((ip_caps->fams2.vertical_interrupt_ack_delay_us + contention_delay_us + ip_caps->fams2.allow_programming_delay_us) / stream_fams2_meta->otg_vline_time_us)) + 1;
+ stream_fams2_meta->min_allow_width_otg_vlines =
+ (unsigned int)math_ceil(ip_caps->fams2.min_allow_width_us / stream_fams2_meta->otg_vline_time_us);
+ /* this value should account for urgent latency */
+ stream_fams2_meta->dram_clk_change_blackout_otg_vlines =
+ (unsigned int)math_ceil(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us /
+ stream_fams2_meta->otg_vline_time_us);
+
+ /* scheduling params should be built based on the worst case for allow_time:disallow_time */
+
+ /* vactive */
+ if (display_config->display_config.num_streams == 1) {
+ /* for single stream, guarantee at least an instant of allow */
+ stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor(
+ math_max2(0.0,
+ timing->v_active - math_max2(1.0, stream_fams2_meta->min_allow_width_otg_vlines) - stream_fams2_meta->dram_clk_change_blackout_otg_vlines));
+ } else {
+ /* for multi stream, bound to a max fill time defined by IP caps */
+ stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines =
+ (unsigned int)math_floor((double)ip_caps->max_vactive_det_fill_delay_us / stream_fams2_meta->otg_vline_time_us);
+ }
+ stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us = stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines * stream_fams2_meta->otg_vline_time_us;
+
+ if (stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us > 0.0) {
+ stream_fams2_meta->method_vactive.common.allow_start_otg_vline =
+ timing->v_blank_end + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
+ stream_fams2_meta->method_vactive.common.allow_end_otg_vline =
+ stream_fams2_meta->vblank_start -
+ stream_fams2_meta->dram_clk_change_blackout_otg_vlines;
+ } else {
+ stream_fams2_meta->method_vactive.common.allow_start_otg_vline = 0;
+ stream_fams2_meta->method_vactive.common.allow_end_otg_vline = 0;
+ }
+ stream_fams2_meta->method_vactive.common.period_us = stream_fams2_meta->nom_frame_time_us;
+ build_method_scheduling_params(&stream_fams2_meta->method_vactive.common, stream_fams2_meta);
+
+ /* vblank */
+ stream_fams2_meta->method_vblank.common.allow_start_otg_vline = stream_fams2_meta->vblank_start;
+ stream_fams2_meta->method_vblank.common.allow_end_otg_vline =
+ stream_fams2_meta->method_vblank.common.allow_start_otg_vline + 1;
+ stream_fams2_meta->method_vblank.common.period_us = stream_fams2_meta->nom_frame_time_us;
+ build_method_scheduling_params(&stream_fams2_meta->method_vblank.common, stream_fams2_meta);
+
+ /* subvp */
+ stream_fams2_meta->method_subvp.programming_delay_otg_vlines =
+ (unsigned int)math_ceil(ip_caps->fams2.subvp_programming_delay_us / stream_fams2_meta->otg_vline_time_us);
+ stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines =
+ (unsigned int)math_ceil(ip_caps->fams2.subvp_df_throttle_delay_us / stream_fams2_meta->otg_vline_time_us);
+ stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines =
+ (unsigned int)math_ceil(ip_caps->fams2.subvp_prefetch_to_mall_delay_us / stream_fams2_meta->otg_vline_time_us);
+ stream_fams2_meta->method_subvp.phantom_vactive =
+ stream_fams2_meta->allow_to_target_delay_otg_vlines +
+ stream_fams2_meta->min_allow_width_otg_vlines +
+ stream_info->phantom_min_v_active;
+ stream_fams2_meta->method_subvp.phantom_vfp =
+ stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines;
+ /* phantom vtotal = v_bp(vstartup) + v_sync(1) + v_fp(throttle_delay) + v_active(allow_to_target + min_allow + min_vactive)*/
+ stream_fams2_meta->method_subvp.phantom_vtotal =
+ stream_info->phantom_v_startup +
+ stream_fams2_meta->method_subvp.phantom_vfp +
+ 1 +
+ stream_fams2_meta->method_subvp.df_throttle_delay_otg_vlines +
+ stream_fams2_meta->method_subvp.phantom_vactive;
+ stream_fams2_meta->method_subvp.common.allow_start_otg_vline =
+ stream_descriptor->timing.v_blank_end +
+ stream_fams2_meta->contention_delay_otg_vlines +
+ stream_fams2_meta->method_subvp.programming_delay_otg_vlines +
+ stream_fams2_meta->method_subvp.phantom_vtotal +
+ stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines +
+ stream_fams2_meta->allow_to_target_delay_otg_vlines;
+ stream_fams2_meta->method_subvp.common.allow_end_otg_vline =
+ stream_fams2_meta->vblank_start -
+ stream_fams2_meta->dram_clk_change_blackout_otg_vlines;
+ stream_fams2_meta->method_subvp.common.period_us = stream_fams2_meta->nom_frame_time_us;
+ build_method_scheduling_params(&stream_fams2_meta->method_subvp.common, stream_fams2_meta);
+
+ /* drr */
+ stream_fams2_meta->method_drr.programming_delay_otg_vlines =
+ (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_fams2_meta->otg_vline_time_us);
+ stream_fams2_meta->method_drr.common.allow_start_otg_vline =
+ stream_fams2_meta->vblank_start +
+ stream_fams2_meta->allow_to_target_delay_otg_vlines;
+ stream_fams2_meta->method_drr.common.period_us = stream_fams2_meta->nom_frame_time_us;
+ if (display_config->display_config.num_streams <= 1) {
+ /* only need to stretch vblank for blackout time */
+ stream_fams2_meta->method_drr.stretched_vtotal =
+ stream_fams2_meta->nom_vtotal +
+ stream_fams2_meta->allow_to_target_delay_otg_vlines +
+ stream_fams2_meta->min_allow_width_otg_vlines +
+ stream_fams2_meta->dram_clk_change_blackout_otg_vlines;
+ } else {
+ /* multi display needs to always be schedulable */
+ stream_fams2_meta->method_drr.stretched_vtotal =
+ stream_fams2_meta->nom_vtotal * 2 +
+ stream_fams2_meta->allow_to_target_delay_otg_vlines +
+ stream_fams2_meta->min_allow_width_otg_vlines +
+ stream_fams2_meta->dram_clk_change_blackout_otg_vlines;
+ }
+ stream_fams2_meta->method_drr.common.allow_end_otg_vline =
+ stream_fams2_meta->method_drr.stretched_vtotal -
+ stream_fams2_meta->dram_clk_change_blackout_otg_vlines;
+ build_method_scheduling_params(&stream_fams2_meta->method_drr.common, stream_fams2_meta);
+}
+
+static void build_subvp_meta_per_stream(struct dml2_pmo_instance *pmo,
+ struct display_configuation_with_meta *display_config,
+ int stream_index)
+{
+ struct dml2_implicit_svp_meta *stream_svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index];
+ struct dml2_fams2_meta *stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index];
+
+ stream_svp_meta->valid = true;
+
+ /* PMO FAMS2 precaulcates these values */
+ stream_svp_meta->v_active = stream_fams2_meta->method_subvp.phantom_vactive;
+ stream_svp_meta->v_front_porch = stream_fams2_meta->method_subvp.phantom_vfp;
+ stream_svp_meta->v_total = stream_fams2_meta->method_subvp.phantom_vtotal;
+}
+
+bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out)
+{
+ struct dml2_pmo_instance *pmo = in_out->instance;
+ struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3;
+ struct dml2_pmo_scratch *s = &pmo->scratch;
+
+ struct display_configuation_with_meta *display_config;
+ const struct dml2_plane_parameters *plane_descriptor;
+ const struct dml2_pmo_pstate_strategy *strategy_list = NULL;
+ struct dml2_pmo_pstate_strategy override_base_strategy = { 0 };
+ unsigned int strategy_list_size = 0;
+ unsigned int plane_index, stream_index, i;
+ bool build_override_strategy = true;
+
+ state->performed = true;
+ in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency;
+
+ display_config = in_out->base_display_config;
+ display_config->display_config.overrides.enable_subvp_implicit_pmo = true;
+
+ memset(s, 0, sizeof(struct dml2_pmo_scratch));
+
+ if (display_config->display_config.overrides.all_streams_blanked) {
+ return true;
+ }
+
+ pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency;
+ pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size;
+ pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency;
+
+ // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping)
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ plane_descriptor = &display_config->display_config.plane_descriptors[plane_index];
+
+ set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index);
+
+ state->pstate_switch_modes[plane_index] = dml2_pstate_method_vactive;
+
+ build_override_strategy &= plane_descriptor->overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto;
+ override_base_strategy.per_stream_pstate_method[plane_descriptor->stream_index] =
+ uclk_pstate_strategy_override_to_pstate_method(plane_descriptor->overrides.uclk_pstate_change_strategy);
+ }
+
+ // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta
+ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) {
+ if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= (int)(MIN_VACTIVE_MARGIN_PCT * pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us))
+ set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index);
+
+ /* FAMS2 meta */
+ build_fams2_meta_per_stream(pmo, display_config, stream_index);
+
+ /* SVP meta */
+ build_subvp_meta_per_stream(pmo, display_config, stream_index);
+ }
+
+ /* get synchronized timing groups */
+ build_synchronized_timing_groups(pmo, display_config);
+
+ if (build_override_strategy) {
+ /* build expanded override strategy list (no permutations) */
+ override_base_strategy.allow_state_increase = true;
+ s->pmo_dcn4.num_expanded_override_strategies = 0;
+ insert_strategy_into_expanded_list(&override_base_strategy,
+ display_config->display_config.num_streams,
+ s->pmo_dcn4.expanded_override_strategy_list,
+ &s->pmo_dcn4.num_expanded_override_strategies);
+ expand_variant_strategy(&override_base_strategy,
+ display_config->display_config.num_streams,
+ false,
+ s->pmo_dcn4.expanded_override_strategy_list,
+ &s->pmo_dcn4.num_expanded_override_strategies);
+
+ /* use override strategy list */
+ strategy_list = s->pmo_dcn4.expanded_override_strategy_list;
+ strategy_list_size = s->pmo_dcn4.num_expanded_override_strategies;
+ } else {
+ /* use predefined strategy list */
+ strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams);
+ strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams);
+ }
+
+ if (!strategy_list || strategy_list_size == 0)
+ return false;
+
+ s->pmo_dcn4.num_pstate_candidates = 0;
+
+ for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) {
+ if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) {
+ insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s);
+ }
+ }
+
+ if (s->pmo_dcn4.num_pstate_candidates > 0) {
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates-1].allow_state_increase = true;
+ s->pmo_dcn4.cur_pstate_candidate = -1;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void reset_display_configuration(struct display_configuation_with_meta *display_config)
+{
+ unsigned int plane_index;
+ unsigned int stream_index;
+ struct dml2_plane_parameters *plane;
+
+ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) {
+ display_config->stage3.stream_svp_meta[stream_index].valid = false;
+
+ display_config->display_config.stream_descriptors[stream_index].overrides.minimize_active_latency_hiding = false;
+ display_config->display_config.overrides.best_effort_min_active_latency_hiding_us = 0;
+ }
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ plane = &display_config->display_config.plane_descriptors[plane_index];
+
+ // Unset SubVP
+ plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto;
+
+ // Remove reserve time
+ plane->overrides.reserved_vblank_time_ns = 0;
+
+ // Reset strategy to auto
+ plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto;
+
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_na;
+ }
+}
+
+static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config,
+ struct dml2_pmo_instance *pmo,
+ int plane_mask)
+{
+ unsigned int plane_index;
+ struct dml2_plane_parameters *plane;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ plane = &display_config->display_config.plane_descriptors[plane_index];
+
+ plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr;
+
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_drr;
+
+ }
+ }
+}
+
+static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config,
+ struct dml2_pmo_instance *pmo,
+ int plane_mask)
+{
+ struct dml2_pmo_scratch *scratch = &pmo->scratch;
+
+ unsigned int plane_index;
+ int stream_index = -1;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp;
+ }
+ }
+
+ if (stream_index >= 0) {
+ memcpy(&display_config->stage3.stream_svp_meta[stream_index],
+ &scratch->pmo_dcn4.stream_svp_meta[stream_index],
+ sizeof(struct dml2_implicit_svp_meta));
+ }
+}
+
+static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_meta *display_config,
+ struct dml2_pmo_instance *pmo,
+ int plane_mask)
+{
+ struct dml2_pmo_scratch *scratch = &pmo->scratch;
+
+ unsigned int plane_index;
+ int stream_index = -1;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp_drr;
+ }
+ }
+
+ if (stream_index >= 0) {
+ memcpy(&display_config->stage3.stream_svp_meta[stream_index],
+ &scratch->pmo_dcn4.stream_svp_meta[stream_index],
+ sizeof(struct dml2_implicit_svp_meta));
+ }
+}
+
+static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config,
+ struct dml2_pmo_instance *pmo,
+ int plane_mask)
+{
+ unsigned int plane_index;
+ struct dml2_plane_parameters *plane;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ plane = &display_config->display_config.plane_descriptors[plane_index];
+
+ plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0,
+ plane->overrides.reserved_vblank_time_ns);
+
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vblank;
+
+ }
+ }
+}
+
+static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with_meta *display_config,
+ struct dml2_pmo_instance *pmo,
+ int plane_mask)
+{
+ unsigned int plane_index;
+ struct dml2_plane_parameters *plane;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ plane = &display_config->display_config.plane_descriptors[plane_index];
+ plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000);
+
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vblank_drr;
+ }
+ }
+}
+
+static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config,
+ struct dml2_pmo_instance *pmo,
+ int plane_mask)
+{
+ unsigned int plane_index;
+ unsigned int stream_index;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index;
+
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive;
+
+ if (!pmo->options->disable_vactive_det_fill_bw_pad) {
+ display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us =
+ (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us);
+ }
+ }
+ }
+}
+
+static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_with_meta *display_config,
+ struct dml2_pmo_instance *pmo,
+ int plane_mask)
+{
+ unsigned int plane_index;
+ unsigned int stream_index;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index;
+
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr;
+
+ if (!pmo->options->disable_vactive_det_fill_bw_pad) {
+ display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us =
+ (unsigned int)math_floor(pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_index].method_vactive.max_vactive_det_fill_delay_us);
+ }
+ }
+ }
+}
+
+static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_instance *pmo, int strategy_index)
+{
+ struct dml2_pmo_scratch *scratch = &pmo->scratch;
+
+ bool fams2_required = false;
+ bool success = true;
+ unsigned int stream_index;
+
+ reset_display_configuration(display_config);
+
+ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) {
+
+ if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) {
+ success = false;
+ break;
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive) {
+ setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) {
+ setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp) {
+ fams2_required = true;
+ setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) {
+ fams2_required = true;
+ setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) {
+ fams2_required = true;
+ setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) {
+ fams2_required = true;
+ setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) {
+ fams2_required = true;
+ setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
+ }
+ }
+
+ /* copy FAMS2 meta */
+ if (success) {
+ display_config->stage3.fams2_required = fams2_required;
+ memcpy(&display_config->stage3.stream_fams2_meta,
+ &scratch->pmo_dcn4.stream_fams2_meta,
+ sizeof(struct dml2_fams2_meta) * DML2_MAX_PLANES);
+ }
+
+ return success;
+}
+
+static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask)
+{
+ int min_time_us = 0xFFFFFF;
+ unsigned int plane_index = 0;
+
+ for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
+ if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
+ if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000))
+ min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
+ }
+ }
+ return min_time_us;
+}
+
+bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out)
+{
+ bool p_state_supported = true;
+ unsigned int stream_index;
+ struct dml2_pmo_scratch *s = &in_out->instance->scratch;
+
+ int MIN_VACTIVE_MARGIN_VBLANK = 0;
+ int MIN_VACTIVE_MARGIN_DRR = 0;
+ int REQUIRED_RESERVED_TIME = 0;
+
+ if (in_out->base_display_config->display_config.overrides.all_streams_blanked) {
+ return true;
+ }
+
+ MIN_VACTIVE_MARGIN_VBLANK = INT_MIN;
+ MIN_VACTIVE_MARGIN_DRR = INT_MIN;
+ REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us;
+
+ if (s->pmo_dcn4.cur_pstate_candidate < 0)
+ return false;
+
+ for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) {
+ struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index];
+
+ if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive ||
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) {
+ if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) ||
+ get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) {
+ p_state_supported = false;
+ break;
+ }
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank ||
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) {
+ if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) <
+ REQUIRED_RESERVED_TIME ||
+ get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) {
+ p_state_supported = false;
+ break;
+ }
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp ||
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) {
+ if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) {
+ p_state_supported = false;
+ break;
+ }
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) {
+ if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) ||
+ get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) {
+ p_state_supported = false;
+ break;
+ }
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) {
+ p_state_supported = false;
+ break;
+ }
+ }
+
+ return p_state_supported;
+}
+
+bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out)
+{
+ bool success = false;
+ struct dml2_pmo_scratch *s = &in_out->instance->scratch;
+
+ memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta));
+
+ if (in_out->last_candidate_failed) {
+ if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase &&
+ s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) {
+ s->pmo_dcn4.cur_latency_index++;
+
+ success = true;
+ }
+ }
+
+ if (!success) {
+ s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index;
+ s->pmo_dcn4.cur_pstate_candidate++;
+
+ if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) {
+ success = true;
+ }
+ }
+
+ if (success) {
+ in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index;
+ setup_display_config(in_out->optimized_display_config, in_out->instance, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate);
+ }
+
+ return success;
+}
+
+bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out)
+{
+ bool success = true;
+ struct dml2_pmo_instance *pmo = in_out->instance;
+ bool stutter_period_meets_z8_eco = true;
+ bool z8_stutter_optimization_too_expensive = false;
+ bool stutter_optimization_too_expensive = false;
+ double line_time_us, vblank_nom_time_us;
+
+ unsigned int i;
+
+ if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 &&
+ pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 &&
+ pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us)
+ return false; // Unexpected SoCBB setup
+
+ for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) {
+ if (in_out->base_display_config->mode_support_result.cfg_support_info.plane_support_info[i].active_latency_hiding_us <
+ pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us + pmo->soc_bb->power_management_parameters.z8_min_idle_time) {
+ stutter_period_meets_z8_eco = false;
+ break;
+ }
+ }
+
+ for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) {
+ line_time_us = (double)in_out->base_display_config->display_config.stream_descriptors[i].timing.h_total / (in_out->base_display_config->display_config.stream_descriptors[i].timing.pixel_clock_khz * 1000) * 1000000;
+ vblank_nom_time_us = line_time_us * in_out->base_display_config->display_config.stream_descriptors[i].timing.vblank_nom;
+
+ if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) {
+ z8_stutter_optimization_too_expensive = true;
+ break;
+ }
+
+ if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) {
+ stutter_optimization_too_expensive = true;
+ break;
+ }
+ }
+
+ pmo->scratch.pmo_dcn4.num_stutter_candidates = 0;
+ pmo->scratch.pmo_dcn4.cur_stutter_candidate = 0;
+
+ if (stutter_period_meets_z8_eco && !z8_stutter_optimization_too_expensive) {
+ if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0) {
+ pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us;
+ pmo->scratch.pmo_dcn4.num_stutter_candidates++;
+ pmo->scratch.pmo_dcn4.z8_vblank_optimizable = true;
+ }
+ } else {
+ pmo->scratch.pmo_dcn4.z8_vblank_optimizable = false;
+ }
+
+ if (!stutter_optimization_too_expensive && pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0) {
+ pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us;
+ pmo->scratch.pmo_dcn4.num_stutter_candidates++;
+ }
+
+ if (pmo->scratch.pmo_dcn4.num_stutter_candidates == 0)
+ success = false;
+
+ return success;
+}
+
+bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out)
+{
+ bool success = true;
+ struct dml2_pmo_instance *pmo = in_out->instance;
+
+ unsigned int i;
+
+ for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) {
+ if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 &&
+ pmo->scratch.pmo_dcn4.z8_vblank_optimizable &&
+ in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) {
+ success = false;
+ break;
+ }
+ if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 &&
+ in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) {
+ success = false;
+ break;
+ }
+ }
+
+ return success;
+}
+
+bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out)
+{
+ bool success = false;
+ struct dml2_pmo_instance *pmo = in_out->instance;
+ unsigned int i;
+
+ memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta));
+
+ if (!in_out->last_candidate_failed) {
+ if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) {
+ for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) {
+ /* take the max of the current and the optimal reserved time */
+ in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns =
+ (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000,
+ in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns);
+ }
+
+ success = true;
+ }
+ }
+
+ return success;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h
new file mode 100644
index 000000000000..6baab7ad6ecc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_PMO_FAMS2_DCN4_H__
+#define __DML2_PMO_FAMS2_DCN4_H__
+
+#include "dml2_internal_shared_types.h"
+
+bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out);
+
+bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out);
+
+bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out);
+bool pmo_dcn4_fams2_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out);
+bool pmo_dcn4_fams2_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out);
+
+bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out);
+bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out);
+bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out);
+
+bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out);
+bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out);
+bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out);
+
+void pmo_dcn4_fams2_expand_base_pstate_strategies(
+ const struct dml2_pmo_pstate_strategy *base_strategies_list,
+ const unsigned int num_base_strategies,
+ const unsigned int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c
new file mode 100644
index 000000000000..55d2464365d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_pmo_factory.h"
+#include "dml2_pmo_dcn4_fams2.h"
+#include "dml2_pmo_dcn3.h"
+#include "dml2_external_lib_deps.h"
+
+static bool dummy_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in_out)
+{
+ return false;
+}
+
+static bool dummy_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out)
+{
+ return true;
+}
+
+static bool dummy_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out)
+{
+ return false;
+}
+
+bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out)
+{
+ bool result = false;
+
+ if (!out)
+ return false;
+
+ memset(out, 0, sizeof(struct dml2_pmo_instance));
+
+ switch (project_id) {
+ case dml2_project_dcn4x_stage1:
+ out->initialize = pmo_dcn4_fams2_initialize;
+ out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache;
+ result = true;
+ break;
+ case dml2_project_dcn4x_stage2:
+ out->initialize = pmo_dcn3_initialize;
+
+ out->optimize_dcc_mcache = pmo_dcn3_optimize_dcc_mcache;
+
+ out->init_for_vmin = pmo_dcn3_init_for_vmin;
+ out->test_for_vmin = pmo_dcn3_test_for_vmin;
+ out->optimize_for_vmin = pmo_dcn3_optimize_for_vmin;
+
+ out->init_for_uclk_pstate = pmo_dcn3_init_for_pstate_support;
+ out->test_for_uclk_pstate = pmo_dcn3_test_for_pstate_support;
+ out->optimize_for_uclk_pstate = pmo_dcn3_optimize_for_pstate_support;
+
+ out->init_for_stutter = dummy_init_for_stutter;
+ out->test_for_stutter = dummy_test_for_stutter;
+ out->optimize_for_stutter = dummy_optimize_for_stutter;
+
+ result = true;
+ break;
+ case dml2_project_dcn4x_stage2_auto_drr_svp:
+ out->initialize = pmo_dcn4_fams2_initialize;
+
+ out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache;
+
+ out->init_for_vmin = pmo_dcn4_fams2_init_for_vmin;
+ out->test_for_vmin = pmo_dcn4_fams2_test_for_vmin;
+ out->optimize_for_vmin = pmo_dcn4_fams2_optimize_for_vmin;
+
+ out->init_for_uclk_pstate = pmo_dcn4_fams2_init_for_pstate_support;
+ out->test_for_uclk_pstate = pmo_dcn4_fams2_test_for_pstate_support;
+ out->optimize_for_uclk_pstate = pmo_dcn4_fams2_optimize_for_pstate_support;
+
+ out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter;
+ out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter;
+ out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter;
+ result = true;
+ break;
+ case dml2_project_invalid:
+ default:
+ break;
+ }
+
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h
new file mode 100644
index 000000000000..7218de1824cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_PMO_FACTORY_H__
+#define __DML2_PMO_FACTORY_H__
+
+#include "dml2_internal_shared_types.h"
+#include "dml_top_types.h"
+
+bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *out);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c
new file mode 100644
index 000000000000..e17b5ceba447
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "lib_float_math.h"
+
+#define ASSERT(condition)
+
+#define isNaN(number) ((number) != (number))
+
+ /*
+ * NOTE:
+ * This file is gcc-parseable HW gospel, coming straight from HW engineers.
+ *
+ * It doesn't adhere to Linux kernel style and sometimes will do things in odd
+ * ways. Unless there is something clearly wrong with it the code should
+ * remain as-is as it provides us with a guarantee from HW that it is correct.
+ */
+
+double math_mod(const double arg1, const double arg2)
+{
+ if (isNaN(arg1))
+ return arg2;
+ if (isNaN(arg2))
+ return arg1;
+ return arg1 - arg1 * ((int)(arg1 / arg2));
+}
+
+double math_min2(const double arg1, const double arg2)
+{
+ if (isNaN(arg1))
+ return arg2;
+ if (isNaN(arg2))
+ return arg1;
+ return arg1 < arg2 ? arg1 : arg2;
+}
+
+double math_max2(const double arg1, const double arg2)
+{
+ if (isNaN(arg1))
+ return arg2;
+ if (isNaN(arg2))
+ return arg1;
+ return arg1 > arg2 ? arg1 : arg2;
+}
+
+double math_floor2(const double arg, const double significance)
+{
+ ASSERT(significance != 0);
+
+ return ((int)(arg / significance)) * significance;
+}
+
+double math_floor(const double arg)
+{
+ return ((int)(arg));
+}
+
+double math_ceil(const double arg)
+{
+ return (int)(arg + 0.99999);
+}
+
+double math_ceil2(const double arg, const double significance)
+{
+ return ((int)(arg / significance + 0.99999)) * significance;
+}
+
+double math_max3(double v1, double v2, double v3)
+{
+ return v3 > math_max2(v1, v2) ? v3 : math_max2(v1, v2);
+}
+
+double math_max4(double v1, double v2, double v3, double v4)
+{
+ return v4 > math_max3(v1, v2, v3) ? v4 : math_max3(v1, v2, v3);
+}
+
+double math_max5(double v1, double v2, double v3, double v4, double v5)
+{
+ return math_max3(v1, v2, v3) > math_max2(v4, v5) ? math_max3(v1, v2, v3) : math_max2(v4, v5);
+}
+
+float math_pow(float a, float exp)
+{
+ double temp;
+ if ((int)exp == 0)
+ return 1;
+ temp = math_pow(a, (float)((int)(exp / 2)));
+ if (((int)exp % 2) == 0) {
+ return (float)(temp * temp);
+ } else {
+ if ((int)exp > 0)
+ return (float)(a * temp * temp);
+ else
+ return (float)((temp * temp) / a);
+ }
+}
+
+double math_fabs(double a)
+{
+ if (a > 0)
+ return (a);
+ else
+ return (-a);
+}
+
+float math_log(float a, float b)
+{
+ int *const exp_ptr = (int *)(&a);
+ int x = *exp_ptr;
+ const int log_2 = ((x >> 23) & 255) - 128;
+ x &= ~(255 << 23);
+ x += 127 << 23;
+ *exp_ptr = x;
+
+ a = ((-1.0f / 3) * a + 2) * a - 2.0f / 3;
+
+ if (b > 2.00001 || b < 1.99999)
+ return (a + log_2) / math_log(b, 2);
+ else
+ return (a + log_2);
+}
+
+float math_log2(float a)
+{
+ return math_log(a, 2.0);
+}
+
+// approximate log2 value of a input
+// - precise if the input pwr of 2, else the approximation will be an integer = floor(actual_log2)
+unsigned int math_log2_approx(unsigned int a)
+{
+ unsigned int log2_val = 0;
+ while (a > 1) {
+ a = a >> 1;
+ log2_val++;
+ }
+ return log2_val;
+}
+
+double math_round(double a)
+{
+ const double round_pt = 0.5;
+
+ return math_floor(a + round_pt);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h
new file mode 100644
index 000000000000..e13b0c5939b0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __LIB_FLOAT_MATH_H__
+#define __LIB_FLOAT_MATH_H__
+
+double math_mod(const double arg1, const double arg2);
+double math_min2(const double arg1, const double arg2);
+double math_max2(const double arg1, const double arg2);
+double math_floor2(const double arg, const double significance);
+double math_floor(const double arg);
+double math_ceil(const double arg);
+double math_ceil2(const double arg, const double significance);
+double math_max3(double v1, double v2, double v3);
+double math_max4(double v1, double v2, double v3, double v4);
+double math_max5(double v1, double v2, double v3, double v4, double v5);
+float math_pow(float a, float exp);
+double math_fabs(double a);
+float math_log(float a, float b);
+float math_log2(float a);
+unsigned int math_log2_approx(unsigned int a);
+double math_round(double a);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c
new file mode 100644
index 000000000000..5a33e2f357f4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml_top.h"
+#include "dml2_internal_shared_types.h"
+#include "dml2_top_soc15.h"
+
+unsigned int dml2_get_instance_size_bytes(void)
+{
+ return sizeof(struct dml2_instance);
+}
+
+bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out)
+{
+ switch (in_out->options.project_id) {
+ case dml2_project_dcn4x_stage1:
+ case dml2_project_dcn4x_stage2:
+ case dml2_project_dcn4x_stage2_auto_drr_svp:
+ return dml2_top_soc15_initialize_instance(in_out);
+ case dml2_project_invalid:
+ default:
+ return false;
+ }
+}
+
+bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out)
+{
+ if (!in_out->dml2_instance->funcs.check_mode_supported)
+ return false;
+
+ return in_out->dml2_instance->funcs.check_mode_supported(in_out);
+}
+
+bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out)
+{
+ if (!in_out->dml2_instance->funcs.build_mode_programming)
+ return false;
+
+ return in_out->dml2_instance->funcs.build_mode_programming(in_out);
+}
+
+bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out)
+{
+ if (!in_out->dml2_instance->funcs.build_mcache_programming)
+ return false;
+
+ return in_out->dml2_instance->funcs.build_mcache_programming(in_out);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c
new file mode 100644
index 000000000000..5e14d85821e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_top_legacy.h"
+#include "dml2_top_soc15.h"
+#include "dml2_core_factory.h"
+#include "dml2_pmo_factory.h"
+#include "display_mode_core_structs.h"
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h
new file mode 100644
index 000000000000..14d0ae03dce6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_TOP_LEGACY_H__
+#define __DML2_TOP_LEGACY_H__
+#include "dml2_internal_shared_types.h"
+bool dml2_top_legacy_initialize_instance(struct dml2_initialize_instance_in_out *in_out);
+#endif /* __DML2_TOP_LEGACY_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
new file mode 100644
index 000000000000..4a7c4c62111e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
@@ -0,0 +1,1170 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_top_soc15.h"
+#include "dml2_mcg_factory.h"
+#include "dml2_dpmm_factory.h"
+#include "dml2_core_factory.h"
+#include "dml2_pmo_factory.h"
+#include "lib_float_math.h"
+#include "dml2_debug.h"
+static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config)
+{
+ memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg));
+ out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1;
+}
+
+static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config)
+{
+ memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg));
+ out->stage1.min_clk_index_for_latency = 0;
+}
+
+static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src)
+{
+ memcpy(dst, src, sizeof(struct display_configuation_with_meta));
+}
+
+static bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_stage1_state *state = &params->display_config->stage1;
+
+ state->performed = true;
+
+ return true;
+}
+
+static bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_stage1_state *state = &params->display_config->stage1;
+
+ return state->min_clk_index_for_latency == 0;
+}
+
+static bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params)
+{
+ bool result = false;
+
+ if (params->display_config->stage1.min_clk_index_for_latency > 0) {
+ copy_display_configuration_with_meta(params->optimized_display_config, params->display_config);
+ params->optimized_display_config->stage1.min_clk_index_for_latency--;
+ result = true;
+ }
+
+ return result;
+}
+
+static bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+ bool mcache_success = false;
+ bool result = false;
+
+ memset(l, 0, sizeof(struct dml2_optimization_test_function_locals));
+
+ l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml;
+ l->test_mcache.calc_mcache_count_params.display_config = &params->display_config->display_config;
+ l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations;
+
+ result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations
+
+ if (result) {
+ l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations;
+ l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes;
+
+ dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params);
+
+ l->test_mcache.validate_admissibility_params.dml2_instance = params->dml;
+ l->test_mcache.validate_admissibility_params.display_cfg = &params->display_config->display_config;
+ l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations;
+ l->test_mcache.validate_admissibility_params.cfg_support_info = &params->display_config->mode_support_result.cfg_support_info;
+
+ mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works
+
+ memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES);
+ }
+
+ return mcache_success;
+}
+
+static bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+ bool optimize_success = false;
+
+ if (params->last_candidate_supported == false)
+ return false;
+
+ copy_display_configuration_with_meta(params->optimized_display_config, params->display_config);
+
+ l->optimize_mcache.optimize_mcache_params.instance = &params->dml->pmo_instance;
+ l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support;
+ l->optimize_mcache.optimize_mcache_params.display_config = &params->display_config->display_config;
+ l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = &params->optimized_display_config->display_config;
+ l->optimize_mcache.optimize_mcache_params.cfg_support_info = &params->optimized_display_config->mode_support_result.cfg_support_info;
+
+ optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params);
+
+ return optimize_success;
+}
+
+static bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_init_function_locals *l = params->locals;
+
+ l->vmin.init_params.instance = &params->dml->pmo_instance;
+ l->vmin.init_params.base_display_config = params->display_config;
+ return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params);
+}
+
+static bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+
+ l->test_vmin.pmo_test_vmin_params.instance = &params->dml->pmo_instance;
+ l->test_vmin.pmo_test_vmin_params.display_config = params->display_config;
+ l->test_vmin.pmo_test_vmin_params.vmin_limits = &params->dml->soc_bbox.vmin_limit;
+ return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params);
+}
+
+static bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+
+ if (params->last_candidate_supported == false)
+ return false;
+
+ l->optimize_vmin.pmo_optimize_vmin_params.instance = &params->dml->pmo_instance;
+ l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config;
+ l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config;
+ return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params);
+}
+
+static bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_init_function_locals *l = params->locals;
+
+ l->uclk_pstate.init_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.init_params.base_display_config = params->display_config;
+
+ return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params);
+}
+
+static bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+
+ l->uclk_pstate.test_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.test_params.base_display_config = params->display_config;
+
+ return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params);
+}
+
+static bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+
+ l->uclk_pstate.optimize_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.optimize_params.base_display_config = params->display_config;
+ l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config;
+ l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported;
+
+ return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params);
+}
+
+static bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_init_function_locals *l = params->locals;
+
+ l->uclk_pstate.init_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.init_params.base_display_config = params->display_config;
+
+ return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params);
+}
+
+static bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+
+ l->stutter.stutter_params.instance = &params->dml->pmo_instance;
+ l->stutter.stutter_params.base_display_config = params->display_config;
+ return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params);
+}
+
+static bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+
+ l->stutter.stutter_params.instance = &params->dml->pmo_instance;
+ l->stutter.stutter_params.base_display_config = params->display_config;
+ l->stutter.stutter_params.optimized_display_config = params->optimized_display_config;
+ l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported;
+ return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params);
+}
+
+static bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params)
+{
+ bool test_passed = false;
+ bool optimize_succeeded = true;
+ bool candidate_validation_passed = true;
+ struct optimization_init_function_params init_params = { 0 };
+ struct optimization_test_function_params test_params = { 0 };
+ struct optimization_optimize_function_params optimize_params = { 0 };
+
+ if (!params->dml ||
+ !params->optimize_function ||
+ !params->test_function ||
+ !params->display_config ||
+ !params->optimized_display_config)
+ return false;
+
+ copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config);
+
+ init_params.locals = &l->init_function_locals;
+ init_params.dml = params->dml;
+ init_params.display_config = &l->cur_candidate_display_cfg;
+
+ if (params->init_function && !params->init_function(&init_params))
+ return false;
+
+ test_params.locals = &l->test_function_locals;
+ test_params.dml = params->dml;
+ test_params.display_config = &l->cur_candidate_display_cfg;
+
+ test_passed = params->test_function(&test_params);
+
+ while (!test_passed && optimize_succeeded) {
+ memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params));
+
+ optimize_params.locals = &l->optimize_function_locals;
+ optimize_params.dml = params->dml;
+ optimize_params.display_config = &l->cur_candidate_display_cfg;
+ optimize_params.optimized_display_config = &l->next_candidate_display_cfg;
+ optimize_params.last_candidate_supported = candidate_validation_passed;
+
+ optimize_succeeded = params->optimize_function(&optimize_params);
+
+ if (optimize_succeeded) {
+ l->mode_support_params.instance = &params->dml->core_instance;
+ l->mode_support_params.display_cfg = &l->next_candidate_display_cfg;
+ l->mode_support_params.min_clk_table = &params->dml->min_clk_table;
+
+ if (l->next_candidate_display_cfg.stage3.performed)
+ l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency;
+ else
+ l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency;
+ candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params);
+ l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result;
+ }
+
+ if (optimize_succeeded && candidate_validation_passed) {
+ memset(&test_params, 0, sizeof(struct optimization_test_function_params));
+ test_params.locals = &l->test_function_locals;
+ test_params.dml = params->dml;
+ test_params.display_config = &l->next_candidate_display_cfg;
+ test_passed = params->test_function(&test_params);
+
+ copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg);
+
+ // If optimization is not all or nothing, then store partial progress in output
+ if (!params->all_or_nothing)
+ copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg);
+ }
+ }
+
+ if (test_passed)
+ copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg);
+
+ return test_passed;
+}
+
+static bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params)
+{
+ int highest_state, lowest_state, cur_state;
+ bool supported = false;
+
+ if (!params->dml ||
+ !params->optimize_function ||
+ !params->test_function ||
+ !params->display_config ||
+ !params->optimized_display_config)
+ return false;
+
+ copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config);
+ highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency;
+ lowest_state = 0;
+
+ while (highest_state > lowest_state) {
+ cur_state = (highest_state + lowest_state) / 2;
+
+ l->mode_support_params.instance = &params->dml->core_instance;
+ l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg;
+ l->mode_support_params.min_clk_table = &params->dml->min_clk_table;
+ l->mode_support_params.min_clk_index = cur_state;
+ supported = params->dml->core_instance.mode_support(&l->mode_support_params);
+
+ if (supported) {
+ l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result;
+ highest_state = cur_state;
+ } else {
+ lowest_state = cur_state + 1;
+ }
+ }
+ l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state;
+
+ copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg);
+
+ return true;
+}
+
+/*
+* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming.
+* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means
+* that the horizontal viewport does not span more than 2 cache slices.
+*
+* It optionally also can apply a constant shift to all the cache boundaries.
+*/
+static const uint32_t MCACHE_ID_UNASSIGNED = 0xF;
+static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF;
+
+static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift,
+ int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset)
+{
+ const int MAX_VP = 0xFFFFFF;
+ int left_cache_id;
+ int right_cache_id;
+ int range_start;
+ int range_end;
+ bool success = false;
+
+ if (num_boundaries <= 1) {
+ if (first_offset && second_offset) {
+ *first_offset = 0;
+ *second_offset = -1;
+ }
+ success = true;
+ return success;
+ } else {
+ range_start = 0;
+ for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) {
+ range_end = mcache_boundaries[left_cache_id] - shift - 1;
+
+ if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end)
+ break;
+
+ range_start = range_end + 1;
+ }
+
+ range_end = MAX_VP;
+ for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) {
+ if (right_cache_id >= 0)
+ range_start = mcache_boundaries[right_cache_id] - shift;
+ else
+ range_start = 0;
+
+ if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) {
+ break;
+ }
+ range_end = range_start - 1;
+ }
+ right_cache_id = (right_cache_id + 1) % num_boundaries;
+
+ if (right_cache_id == left_cache_id) {
+ if (first_offset && second_offset) {
+ *first_offset = left_cache_id;
+ *second_offset = -1;
+ }
+ success = true;
+ } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) {
+ if (first_offset && second_offset) {
+ *first_offset = left_cache_id;
+ *second_offset = right_cache_id;
+ }
+ success = true;
+ }
+ }
+
+ return success;
+}
+
+/*
+* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting.
+* It also attempts to "optimize" by finding a shift if the default 0 shift does not work.
+*/
+static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries,
+ int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift)
+{
+ int max_shift = 0xFFFF;
+ unsigned int pipe_index;
+ unsigned int i, slice_width;
+ bool success = false;
+
+ for (i = 0; i < num_boundaries; i++) {
+ if (i == 0)
+ slice_width = mcache_boundaries[i];
+ else
+ slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1];
+
+ if (max_shift > (int)slice_width) {
+ max_shift = slice_width;
+ }
+ }
+
+ for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) {
+ success = true;
+ for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) {
+ if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift,
+ pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) {
+ success = false;
+ break;
+ }
+ }
+ if (success)
+ break;
+ }
+
+ return success;
+}
+
+/*
+* Counts the number of elements inside input array within the given span length.
+* Formally, what is the size of the largest subset of the array where the largest and smallest element
+* differ no more than the span.
+*/
+static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span)
+{
+ unsigned int i;
+ unsigned int span_start_value;
+ unsigned int span_start_index;
+ unsigned int greatest_element_count;
+
+ if (array_size == 0)
+ return 1;
+
+ if (span == 0)
+ return array_size > 0 ? 1 : 0;
+
+ span_start_value = 0;
+ span_start_index = 0;
+ greatest_element_count = 0;
+
+ while (span_start_index < array_size) {
+ for (i = span_start_index; i < array_size; i++) {
+ if (array[i] - span_start_value <= span) {
+ if (i - span_start_index + 1 > greatest_element_count) {
+ greatest_element_count = i - span_start_index + 1;
+ }
+ } else
+ break;
+ }
+
+ span_start_index++;
+
+ if (span_start_index < array_size) {
+ span_start_value = array[span_start_index - 1] + 1;
+ }
+ }
+
+ return greatest_element_count;
+}
+
+static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes,
+ enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end)
+{
+ int i, slice_width;
+ const char MAX_SCL_VP_OVERLAP = 3;
+ bool success = false;
+
+ switch (scaling_transform) {
+ case dml2_scaling_transform_centered:
+ case dml2_scaling_transform_aspect_ratio:
+ case dml2_scaling_transform_fullscreen:
+ slice_width = full_vp_width / num_pipes;
+ for (i = 0; i < num_pipes; i++) {
+ pipe_vp_x_start[i] = i * slice_width;
+ pipe_vp_x_end[i] = (i + 1) * slice_width - 1;
+
+ if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP)
+ pipe_vp_x_start[i] = 0;
+ else
+ pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP;
+
+ if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1)
+ pipe_vp_x_end[i] = full_vp_width - 1;
+ else
+ pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP;
+ }
+ break;
+ case dml2_scaling_transform_explicit:
+ default:
+ success = false;
+ break;
+ }
+
+ return success;
+}
+
+bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance;
+ struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals;
+
+ const int MAX_PIXEL_OVERLAP = 6;
+ int max_per_pipe_vp_p0 = 0;
+ int max_per_pipe_vp_p1 = 0;
+ int temp, p0shift, p1shift;
+ unsigned int plane_index = 0;
+ unsigned int i;
+ unsigned int odm_combine_factor;
+ unsigned int mpc_combine_factor;
+ unsigned int num_dpps;
+ unsigned int num_boundaries;
+ enum dml2_scaling_transform scaling_transform;
+ const struct dml2_plane_parameters *plane;
+ const struct dml2_stream_parameters *stream;
+
+ bool p0pass = false;
+ bool p1pass = false;
+ bool all_pass = true;
+
+ for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) {
+ if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable)
+ continue;
+
+ plane = &params->display_cfg->plane_descriptors[plane_index];
+ stream = &params->display_cfg->stream_descriptors[plane->stream_index];
+
+ num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used;
+
+ if (odm_combine_factor == 1)
+ num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used;
+ else
+ mpc_combine_factor = 1;
+
+ if (odm_combine_factor > 1) {
+ max_per_pipe_vp_p0 = plane->surface.plane0.width;
+ temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor);
+
+ if (temp < max_per_pipe_vp_p0)
+ max_per_pipe_vp_p0 = temp;
+
+ max_per_pipe_vp_p1 = plane->surface.plane1.width;
+ temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor);
+
+ if (temp < max_per_pipe_vp_p1)
+ max_per_pipe_vp_p1 = temp;
+ } else {
+ max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor;
+ max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor;
+ }
+
+ max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP;
+ max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP;
+
+ p0shift = 0;
+ p1shift = 0;
+
+ // The last element in the unshifted boundary array will always be the first pixel outside the
+ // plane, which means theres no mcache associated with it, so -1
+ num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1;
+ if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0,
+ num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) {
+ p0pass = true;
+ }
+ num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1;
+ if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1,
+ num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) {
+ p1pass = true;
+ }
+
+ if (!p0pass || !p1pass) {
+ if (odm_combine_factor > 1) {
+ num_dpps = odm_combine_factor;
+ scaling_transform = plane->composition.scaling_transform;
+ } else {
+ num_dpps = mpc_combine_factor;
+ scaling_transform = dml2_scaling_transform_fullscreen;
+ }
+
+ if (!p0pass) {
+ if (plane->composition.viewport.stationary) {
+ calculate_h_split_for_scaling_transform(plane->surface.plane0.width,
+ stream->timing.h_active, num_dpps, scaling_transform,
+ &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]);
+ p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0,
+ params->mcache_allocations[plane_index].num_mcaches_plane0,
+ &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps,
+ params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift);
+ }
+ }
+ if (!p1pass) {
+ if (plane->composition.viewport.stationary) {
+ calculate_h_split_for_scaling_transform(plane->surface.plane1.width,
+ stream->timing.h_active, num_dpps, scaling_transform,
+ &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]);
+ p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1,
+ params->mcache_allocations[plane_index].num_mcaches_plane1,
+ &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps,
+ params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift);
+ }
+ }
+ }
+
+ if (p0pass && p1pass) {
+ for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) {
+ params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift;
+ }
+ for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) {
+ params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift;
+ }
+ }
+
+ params->per_plane_status[plane_index] = p0pass && p1pass;
+ all_pass &= p0pass && p1pass;
+ }
+
+ return all_pass;
+}
+
+static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs)
+{
+ // Initialize all entries to special valid MCache ID and special valid split coordinate
+ per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED;
+
+ per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED;
+
+ per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED;
+
+ per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED;
+}
+
+void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params)
+{
+ int i;
+ unsigned int j;
+ int next_unused_cache_id = 0;
+
+ for (i = 0; i < params->num_allocations; i++) {
+ if (!params->allocations[i].valid)
+ continue;
+
+ for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) {
+ params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++;
+ }
+ for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) {
+ params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++;
+ }
+
+ // The "psuedo-last" slice is always wrapped around
+ params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] =
+ params->allocations[i].global_mcache_ids_plane0[0];
+ params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] =
+ params->allocations[i].global_mcache_ids_plane1[0];
+
+ // If we need dedicated caches for mall requesting, then we assign them here.
+ if (params->allocations[i].requires_dedicated_mall_mcache) {
+ for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) {
+ params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++;
+ }
+ for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) {
+ params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++;
+ }
+
+ // The "psuedo-last" slice is always wrapped around
+ params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] =
+ params->allocations[i].global_mcache_ids_mall_plane0[0];
+ params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] =
+ params->allocations[i].global_mcache_ids_mall_plane1[0];
+ }
+
+ // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same
+ // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the
+ // largest mcacheID of P0
+ if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 &&
+ params->allocations[i].last_slice_sharing.plane0_plane1) {
+ params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
+ params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1];
+ }
+
+ // If we need dedicated caches handle last slice sharing
+ if (params->allocations[i].requires_dedicated_mall_mcache) {
+ if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 &&
+ params->allocations[i].last_slice_sharing.plane0_plane1) {
+ params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
+ params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1];
+ }
+ // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0
+ if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) {
+ params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] =
+ params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1];
+ }
+ // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular
+ // read p1 (which can be same as regular read p0 if plane0_plane1 is also set)
+ if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) {
+ params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
+ params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1];
+ }
+ }
+
+ // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting
+ if (!params->allocations[i].requires_dedicated_mall_mcache) {
+ memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0,
+ sizeof(params->allocations[i].global_mcache_ids_mall_plane0));
+ memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1,
+ sizeof(params->allocations[i].global_mcache_ids_mall_plane1));
+ }
+ }
+}
+
+bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance;
+ struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals;
+
+ unsigned int total_mcaches_required;
+ unsigned int i;
+ bool result = false;
+
+ if (dml->soc_bbox.num_dcc_mcaches == 0) {
+ return true;
+ }
+
+ total_mcaches_required = 0;
+ l->calc_mcache_params.instance = &dml->core_instance;
+ for (i = 0; i < params->display_config->num_planes; i++) {
+ if (!params->display_config->plane_descriptors[i].surface.dcc.enable) {
+ memset(&params->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation));
+ continue;
+ }
+
+ l->calc_mcache_params.plane_descriptor = &params->display_config->plane_descriptors[i];
+ l->calc_mcache_params.mcache_allocation = &params->mcache_allocations[i];
+ l->calc_mcache_params.plane_index = i;
+
+ if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) {
+ result = false;
+ break;
+ }
+
+ if (params->mcache_allocations[i].valid) {
+ total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1;
+ if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1)
+ total_mcaches_required--;
+ }
+ }
+ DML_LOG_VERBOSE("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required);
+
+ if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) {
+ result = false;
+ } else {
+ result = true;
+ }
+
+ return result;
+}
+
+static bool dml2_top_soc15_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
+ struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals;
+ struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming;
+
+ bool result = false;
+ bool mcache_success = false;
+ memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming));
+
+ setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
+
+ l->mode_support_params.instance = &dml->core_instance;
+ l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_support_params.min_clk_table = &dml->min_clk_table;
+ l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
+ result = dml->core_instance.mode_support(&l->mode_support_params);
+ l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
+
+ if (result) {
+ struct optimization_phase_params mcache_phase = {
+ .dml = dml,
+ .display_config = &l->base_display_config_with_meta,
+ .test_function = dml2_top_optimization_test_function_mcache,
+ .optimize_function = dml2_top_optimization_optimize_function_mcache,
+ .optimized_display_config = &l->optimized_display_config_with_meta,
+ .all_or_nothing = false,
+ };
+ mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase);
+ }
+
+ /*
+ * Call DPMM to map all requirements to minimum clock state
+ */
+ if (result) {
+ l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table;
+ l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta;
+ l->dppm_map_mode_params.programming = dpmm_programming;
+ l->dppm_map_mode_params.soc_bb = &dml->soc_bbox;
+ l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip;
+ result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params);
+ }
+
+ in_out->is_supported = mcache_success;
+ result = result && in_out->is_supported;
+
+ return result;
+}
+
+static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
+ struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals;
+
+ bool result = false;
+ bool mcache_success = false;
+ bool uclk_pstate_success = false;
+ bool vmin_success = false;
+ bool stutter_success = false;
+
+ memset(l, 0, sizeof(struct dml2_build_mode_programming_locals));
+ memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming));
+
+ memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg));
+
+ setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
+
+ l->mode_support_params.instance = &dml->core_instance;
+ l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_support_params.min_clk_table = &dml->min_clk_table;
+ l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
+ result = dml->core_instance.mode_support(&l->mode_support_params);
+
+ l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
+
+ if (!result) {
+ setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
+
+ l->mode_support_params.instance = &dml->core_instance;
+ l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_support_params.min_clk_table = &dml->min_clk_table;
+ l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
+ result = dml->core_instance.mode_support(&l->mode_support_params);
+ l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
+
+ if (!result) {
+ l->informative_params.instance = &dml->core_instance;
+ l->informative_params.programming = in_out->programming;
+ l->informative_params.mode_is_supported = false;
+ dml->core_instance.populate_informative(&l->informative_params);
+
+ return false;
+ }
+
+ /*
+ * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode
+ */
+ memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params));
+ l->min_clock_for_latency_phase.dml = dml;
+ l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta;
+ l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency;
+ l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency;
+ l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency;
+ l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->min_clock_for_latency_phase.all_or_nothing = false;
+
+ dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase);
+
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ }
+
+ /*
+ * Phase 2: Satisfy DCC mcache requirements
+ */
+ memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params));
+ l->mcache_phase.dml = dml;
+ l->mcache_phase.display_config = &l->base_display_config_with_meta;
+ l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache;
+ l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache;
+ l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->mcache_phase.all_or_nothing = true;
+
+ mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase);
+
+ if (!mcache_success) {
+ l->informative_params.instance = &dml->core_instance;
+ l->informative_params.programming = in_out->programming;
+ l->informative_params.mode_is_supported = false;
+
+ dml->core_instance.populate_informative(&l->informative_params);
+
+ in_out->programming->informative.failed_mcache_validation = true;
+ return false;
+ }
+
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+
+ /*
+ * Phase 3: Optimize for Pstate
+ */
+ memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params));
+ l->uclk_pstate_phase.dml = dml;
+ l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta;
+ l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate;
+ l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate;
+ l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate;
+ l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->uclk_pstate_phase.all_or_nothing = true;
+
+ uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase);
+
+ if (uclk_pstate_success) {
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ l->base_display_config_with_meta.stage3.success = true;
+ }
+
+ /*
+ * Phase 4: Optimize for Vmin
+ */
+ memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params));
+ l->vmin_phase.dml = dml;
+ l->vmin_phase.display_config = &l->base_display_config_with_meta;
+ l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin;
+ l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin;
+ l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin;
+ l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->vmin_phase.all_or_nothing = false;
+
+ vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase);
+
+ if (l->optimized_display_config_with_meta.stage4.performed) {
+ /*
+ * when performed is true, optimization has applied to
+ * optimized_display_config_with_meta and it has passed mode
+ * support. However it may or may not pass the test function to
+ * reach actual Vmin. As long as voltage is optimized even if it
+ * doesn't reach Vmin level, there is still power benefit so in
+ * this case we will still copy this optimization into base
+ * display config.
+ */
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ l->base_display_config_with_meta.stage4.success = vmin_success;
+ }
+
+ /*
+ * Phase 5: Optimize for Stutter
+ */
+ memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params));
+ l->stutter_phase.dml = dml;
+ l->stutter_phase.display_config = &l->base_display_config_with_meta;
+ l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter;
+ l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter;
+ l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter;
+ l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->stutter_phase.all_or_nothing = true;
+
+ stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase);
+
+ if (stutter_success) {
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ l->base_display_config_with_meta.stage5.success = true;
+ }
+
+ /*
+ * Call DPMM to map all requirements to minimum clock state
+ */
+ if (result) {
+ l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table;
+ l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta;
+ l->dppm_map_mode_params.programming = in_out->programming;
+ l->dppm_map_mode_params.soc_bb = &dml->soc_bbox;
+ l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip;
+ result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params);
+ if (!result)
+ in_out->programming->informative.failed_dpmm = true;
+ }
+
+ if (result) {
+ l->mode_programming_params.instance = &dml->core_instance;
+ l->mode_programming_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info;
+ l->mode_programming_params.programming = in_out->programming;
+ result = dml->core_instance.mode_programming(&l->mode_programming_params);
+ if (!result)
+ in_out->programming->informative.failed_mode_programming = true;
+ }
+
+ if (result) {
+ l->dppm_map_watermarks_params.core = &dml->core_instance;
+ l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta;
+ l->dppm_map_watermarks_params.programming = in_out->programming;
+ result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params);
+ }
+
+ l->informative_params.instance = &dml->core_instance;
+ l->informative_params.programming = in_out->programming;
+ l->informative_params.mode_is_supported = result;
+
+ dml->core_instance.populate_informative(&l->informative_params);
+
+ return result;
+}
+
+bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params)
+{
+ bool success = true;
+ int config_index, pipe_index;
+ int first_offset, second_offset;
+ int free_per_plane_reg_index = 0;
+
+ memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *));
+
+ for (config_index = 0; config_index < params->num_configurations; config_index++) {
+ for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) {
+ // Allocate storage for the mcache regs
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index] = &params->mcache_regs_set[free_per_plane_reg_index++];
+
+ reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]);
+
+ if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) {
+ // P0 always enabled
+ if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0,
+ params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0,
+ 0,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start +
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1,
+ &first_offset, &second_offset)) {
+ success = false;
+ break;
+ }
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset];
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset];
+
+ if (second_offset >= 0) {
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1;
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1;
+ }
+
+ // Populate P1 if enabled
+ if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) {
+ if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1,
+ params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1,
+ 0,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start +
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1,
+ &first_offset, &second_offset)) {
+ success = false;
+ break;
+ }
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset];
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset];
+
+ if (second_offset >= 0) {
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1;
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1;
+ }
+ }
+ }
+ }
+ }
+
+ return success;
+}
+
+static const struct dml2_top_funcs soc15_funcs = {
+ .check_mode_supported = dml2_top_soc15_check_mode_supported,
+ .build_mode_programming = dml2_top_soc15_build_mode_programming,
+ .build_mcache_programming = dml2_top_soc15_build_mcache_programming,
+};
+
+bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
+ struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals;
+ struct dml2_core_initialize_in_out core_init_params = { 0 };
+ struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 };
+ struct dml2_pmo_initialize_in_out pmo_init_params = { 0 };
+ bool result = false;
+
+ memset(l, 0, sizeof(struct dml2_initialize_instance_locals));
+ memset(dml, 0, sizeof(struct dml2_instance));
+
+ memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
+ memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb));
+
+ dml->project_id = in_out->options.project_id;
+ dml->pmo_options = in_out->options.pmo_options;
+
+ // Initialize All Components
+ result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance);
+
+ if (result)
+ result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance);
+
+ if (result)
+ result = dml2_core_create(in_out->options.project_id, &dml->core_instance);
+
+ if (result) {
+ mcg_build_min_clk_params.soc_bb = &in_out->soc_bb;
+ mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table;
+ result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params);
+ }
+
+ if (result) {
+ core_init_params.project_id = in_out->options.project_id;
+ core_init_params.instance = &dml->core_instance;
+ core_init_params.minimum_clock_table = &dml->min_clk_table;
+ core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb;
+ core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size;
+ core_init_params.ip_caps = &in_out->ip_caps;
+ core_init_params.soc_bb = &in_out->soc_bb;
+ result = dml->core_instance.initialize(&core_init_params);
+
+ if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) {
+ memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
+ }
+ }
+
+ if (result)
+ result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance);
+
+ if (result) {
+ pmo_init_params.instance = &dml->pmo_instance;
+ pmo_init_params.soc_bb = &dml->soc_bbox;
+ pmo_init_params.ip_caps = &dml->ip_caps;
+ pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries;
+ pmo_init_params.options = &dml->pmo_options;
+ dml->pmo_instance.initialize(&pmo_init_params);
+ }
+ dml->funcs = soc15_funcs;
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h
new file mode 100644
index 000000000000..53bd8602f9ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_TOP_SOC15_H__
+#define __DML2_TOP_SOC15_H__
+#include "dml2_internal_shared_types.h"
+bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out);
+
+bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params);
+void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params);
+bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params);
+bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params);
+#endif /* __DML2_TOP_SOC15_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
new file mode 100644
index 000000000000..611c80f4f1bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_DEBUG_H__
+#define __DML2_DEBUG_H__
+
+#include "os_types.h"
+#define DML_ASSERT(condition) ASSERT(condition)
+#define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN
+#define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__)
+
+/* private helper macros */
+#define _BOOL_FORMAT(field) "%s", field ? "true" : "false"
+#define _UINT_FORMAT(field) "%u", field
+#define _INT_FORMAT(field) "%d", field
+#define _DOUBLE_FORMAT(field) "%lf", field
+#define _ELEMENT_FUNC "function"
+#define _ELEMENT_COMP_IF "component_interface"
+#define _ELEMENT_TOP_IF "top_interface"
+#define _LOG_ENTRY(element) do { \
+ DML_LOG_INTERNAL("<"element" name=\""); \
+ DML_LOG_INTERNAL(__func__); \
+ DML_LOG_INTERNAL("\">\n"); \
+} while (0)
+#define _LOG_EXIT(element) DML_LOG_INTERNAL("</"element">\n")
+#define _LOG_SCALAR(field, format) do { \
+ DML_LOG_INTERNAL(#field" = "format(field)); \
+ DML_LOG_INTERNAL("\n"); \
+} while (0)
+#define _LOG_ARRAY(field, size, format) do { \
+ DML_LOG_INTERNAL(#field " = ["); \
+ for (int _i = 0; _i < (int) size; _i++) { \
+ DML_LOG_INTERNAL(format(field[_i])); \
+ if (_i + 1 == (int) size) \
+ DML_LOG_INTERNAL("]\n"); \
+ else \
+ DML_LOG_INTERNAL(", "); \
+}} while (0)
+#define _LOG_2D_ARRAY(field, size0, size1, format) do { \
+ DML_LOG_INTERNAL(#field" = ["); \
+ for (int _i = 0; _i < (int) size0; _i++) { \
+ DML_LOG_INTERNAL("\n\t["); \
+ for (int _j = 0; _j < (int) size1; _j++) { \
+ DML_LOG_INTERNAL(format(field[_i][_j])); \
+ if (_j + 1 == (int) size1) \
+ DML_LOG_INTERNAL("]"); \
+ else \
+ DML_LOG_INTERNAL(", "); \
+ } \
+ if (_i + 1 == (int) size0) \
+ DML_LOG_INTERNAL("]\n"); \
+ else \
+ DML_LOG_INTERNAL(", "); \
+ } \
+} while (0)
+#define _LOG_3D_ARRAY(field, size0, size1, size2, format) do { \
+ DML_LOG_INTERNAL(#field" = ["); \
+ for (int _i = 0; _i < (int) size0; _i++) { \
+ DML_LOG_INTERNAL("\n\t["); \
+ for (int _j = 0; _j < (int) size1; _j++) { \
+ DML_LOG_INTERNAL("["); \
+ for (int _k = 0; _k < (int) size2; _k++) { \
+ DML_LOG_INTERNAL(format(field[_i][_j][_k])); \
+ if (_k + 1 == (int) size2) \
+ DML_LOG_INTERNAL("]"); \
+ else \
+ DML_LOG_INTERNAL(", "); \
+ } \
+ if (_j + 1 == (int) size1) \
+ DML_LOG_INTERNAL("]"); \
+ else \
+ DML_LOG_INTERNAL(", "); \
+ } \
+ if (_i + 1 == (int) size0) \
+ DML_LOG_INTERNAL("]\n"); \
+ else \
+ DML_LOG_INTERNAL(", "); \
+ } \
+} while (0)
+
+/* fatal errors for unrecoverable DML states until a full reset */
+#define DML_LOG_LEVEL_FATAL 0
+/* unexpected but recoverable failures inside DML */
+#define DML_LOG_LEVEL_ERROR 1
+/* unexpected inputs or events to DML */
+#define DML_LOG_LEVEL_WARN 2
+/* high level tracing of DML interfaces */
+#define DML_LOG_LEVEL_INFO 3
+/* tracing of DML internal executions */
+#define DML_LOG_LEVEL_DEBUG 4
+/* detailed tracing of DML calculation procedure */
+#define DML_LOG_LEVEL_VERBOSE 5
+
+#ifndef DML_LOG_LEVEL
+#define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT
+#endif /* #ifndef DML_LOG_LEVEL */
+
+/* public macros for DML_LOG_LEVEL_FATAL and up */
+#define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__)
+
+/* public macros for DML_LOG_LEVEL_ERROR and up */
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR
+#define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__)
+#define DML_ASSERT_MSG(condition, fmt, ...) \
+ do { \
+ if (!(condition)) { \
+ DML_LOG_ERROR("ASSERT hit in %s line %d\n", __func__, __LINE__); \
+ DML_LOG_ERROR(fmt, ## __VA_ARGS__); \
+ DML_ASSERT(condition); \
+ } \
+ } while (0)
+#else
+#define DML_LOG_ERROR(fmt, ...) ((void)0)
+#define DML_ASSERT_MSG(condition, fmt, ...) ((void)0)
+#endif
+
+/* public macros for DML_LOG_LEVEL_WARN and up */
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN
+#define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__)
+#else
+#define DML_LOG_WARN(fmt, ...) ((void)0)
+#endif
+
+/* public macros for DML_LOG_LEVEL_INFO and up */
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO
+#define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__)
+#define DML_LOG_TOP_IF_ENTER() _LOG_ENTRY(_ELEMENT_TOP_IF)
+#define DML_LOG_TOP_IF_EXIT() _LOG_EXIT(_ELEMENT_TOP_IF)
+#else
+#define DML_LOG_INFO(fmt, ...) ((void)0)
+#define DML_LOG_TOP_IF_ENTER() ((void)0)
+#define DML_LOG_TOP_IF_EXIT() ((void)0)
+#endif
+
+/* public macros for DML_LOG_LEVEL_DEBUG and up */
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG
+#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__)
+#define DML_LOG_COMP_IF_ENTER() _LOG_ENTRY(_ELEMENT_COMP_IF)
+#define DML_LOG_COMP_IF_EXIT() _LOG_EXIT(_ELEMENT_COMP_IF)
+#define DML_LOG_FUNC_ENTER() _LOG_ENTRY(_ELEMENT_FUNC)
+#define DML_LOG_FUNC_EXIT() _LOG_EXIT(_ELEMENT_FUNC)
+#define DML_LOG_DEBUG_BOOL(field) _LOG_SCALAR(field, _BOOL_FORMAT)
+#define DML_LOG_DEBUG_UINT(field) _LOG_SCALAR(field, _UINT_FORMAT)
+#define DML_LOG_DEBUG_INT(field) _LOG_SCALAR(field, _INT_FORMAT)
+#define DML_LOG_DEBUG_DOUBLE(field) _LOG_SCALAR(field, _DOUBLE_FORMAT)
+#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) _LOG_ARRAY(field, size, _BOOL_FORMAT)
+#define DML_LOG_DEBUG_ARRAY_UINT(field, size) _LOG_ARRAY(field, size, _UINT_FORMAT)
+#define DML_LOG_DEBUG_ARRAY_INT(field, size) _LOG_ARRAY(field, size, _INT_FORMAT)
+#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) _LOG_ARRAY(field, size, _DOUBLE_FORMAT)
+#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _BOOL_FORMAT)
+#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _UINT_FORMAT)
+#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _INT_FORMAT)
+#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _DOUBLE_FORMAT)
+#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _BOOL_FORMAT)
+#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _UINT_FORMAT)
+#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _INT_FORMAT)
+#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _DOUBLE_FORMAT)
+#else
+#define DML_LOG_DEBUG(fmt, ...) ((void)0)
+#define DML_LOG_COMP_IF_ENTER() ((void)0)
+#define DML_LOG_COMP_IF_EXIT() ((void)0)
+#define DML_LOG_FUNC_ENTER() ((void)0)
+#define DML_LOG_FUNC_EXIT() ((void)0)
+#define DML_LOG_DEBUG_BOOL(field) ((void)0)
+#define DML_LOG_DEBUG_UINT(field) ((void)0)
+#define DML_LOG_DEBUG_INT(field) ((void)0)
+#define DML_LOG_DEBUG_DOUBLE(field) ((void)0)
+#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) ((void)0)
+#define DML_LOG_DEBUG_ARRAY_UINT(field, size) ((void)0)
+#define DML_LOG_DEBUG_ARRAY_INT(field, size) ((void)0)
+#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) ((void)0)
+#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) ((void)0)
+#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) ((void)0)
+#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) ((void)0)
+#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) ((void)0)
+#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) ((void)0)
+#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) ((void)0)
+#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) ((void)0)
+#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) ((void)0)
+#endif
+
+/* public macros for DML_LOG_LEVEL_VERBOSE */
+#if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE
+#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__)
+#else
+#define DML_LOG_VERBOSE(fmt, ...) ((void)0)
+#endif /* #if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE */
+#endif /* __DML2_DEBUG_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
new file mode 100644
index 000000000000..d52aa82283b3
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
@@ -0,0 +1,988 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_INTERNAL_SHARED_TYPES_H__
+#define __DML2_INTERNAL_SHARED_TYPES_H__
+
+#include "dml2_external_lib_deps.h"
+#include "dml_top_types.h"
+#include "dml2_core_shared_types.h"
+/*
+* DML2 MCG Types and Interfaces
+*/
+
+#define DML_MCG_MAX_CLK_TABLE_SIZE 20
+
+struct dram_bw_to_min_clk_table_entry {
+ unsigned long long pre_derate_dram_bw_kbps;
+ unsigned long min_fclk_khz;
+ unsigned long min_dcfclk_khz;
+};
+
+struct dml2_mcg_dram_bw_to_min_clk_table {
+ struct dram_bw_to_min_clk_table_entry entries[DML_MCG_MAX_CLK_TABLE_SIZE];
+
+ unsigned int num_entries;
+};
+
+struct dml2_mcg_min_clock_table {
+ struct {
+ unsigned int dispclk;
+ unsigned int dppclk;
+ unsigned int dscclk;
+ unsigned int dtbclk;
+ unsigned int phyclk;
+ unsigned int fclk;
+ unsigned int dcfclk;
+ } max_clocks_khz;
+
+ struct {
+ unsigned int dispclk;
+ unsigned int dppclk;
+ unsigned int dtbclk;
+ } max_ss_clocks_khz;
+
+ struct {
+ unsigned int dprefclk;
+ unsigned int xtalclk;
+ unsigned int pcierefclk;
+ unsigned int dchubrefclk;
+ unsigned int amclk;
+ } fixed_clocks_khz;
+
+ struct dml2_mcg_dram_bw_to_min_clk_table dram_bw_table;
+};
+
+struct dml2_mcg_build_min_clock_table_params_in_out {
+ /*
+ * Input
+ */
+ struct dml2_soc_bb *soc_bb;
+ struct {
+ bool perform_pseudo_build;
+ } clean_me_up;
+
+ /*
+ * Output
+ */
+ struct dml2_mcg_min_clock_table *min_clk_table;
+};
+struct dml2_mcg_instance {
+ bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out);
+};
+
+/*
+* DML2 DPMM Types and Interfaces
+*/
+
+struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out {
+ /*
+ * Input
+ */
+ struct dml2_core_ip_params *ip;
+ struct dml2_soc_bb *soc_bb;
+ struct dml2_mcg_min_clock_table *min_clk_table;
+ const struct display_configuation_with_meta *display_cfg;
+ struct {
+ bool perform_pseudo_map;
+ struct dml2_core_internal_soc_bb *soc_bb;
+ } clean_me_up;
+
+ /*
+ * Output
+ */
+ struct dml2_display_cfg_programming *programming;
+};
+
+struct dml2_dpmm_map_watermarks_params_in_out {
+ /*
+ * Input
+ */
+ const struct display_configuation_with_meta *display_cfg;
+ const struct dml2_core_instance *core;
+
+ /*
+ * Output
+ */
+ struct dml2_display_cfg_programming *programming;
+};
+
+struct dml2_dpmm_scratch {
+ struct dml2_display_cfg_programming programming;
+};
+
+struct dml2_dpmm_instance {
+ bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out);
+ bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out);
+
+ struct dml2_dpmm_scratch dpmm_scratch;
+};
+
+/*
+* DML2 Core Types and Interfaces
+*/
+
+struct dml2_core_initialize_in_out {
+ enum dml2_project_id project_id;
+ struct dml2_core_instance *instance;
+ struct dml2_soc_bb *soc_bb;
+ struct dml2_ip_capabilities *ip_caps;
+
+ struct dml2_mcg_min_clock_table *minimum_clock_table;
+
+ void *explicit_ip_bb;
+ unsigned int explicit_ip_bb_size;
+
+ // FIXME_STAGE2 can remove but dcn3 version still need this
+ struct {
+ struct soc_bounding_box_st *soc_bb;
+ struct soc_states_st *soc_states;
+ } legacy;
+};
+
+struct core_bandwidth_requirements {
+ int urgent_bandwidth_kbytes_per_sec;
+ int average_bandwidth_kbytes_per_sec;
+};
+
+struct core_plane_support_info {
+ int dpps_used;
+ int dram_change_latency_hiding_margin_in_active;
+ int active_latency_hiding_us;
+ int mall_svp_size_requirement_ways;
+ int nominal_vblank_pstate_latency_hiding_us;
+ unsigned int dram_change_vactive_det_fill_delay_us;
+};
+
+struct core_stream_support_info {
+ unsigned int odms_used;
+ unsigned int num_odm_output_segments; // for odm split mode (e.g. a value of 2 for odm_mode_mso_1to2)
+
+ /* FAMS2 SubVP support info */
+ unsigned int phantom_min_v_active;
+ unsigned int phantom_v_startup;
+
+ unsigned int phantom_v_active;
+ unsigned int phantom_v_total;
+ int vblank_reserved_time_us;
+ int num_dsc_slices;
+ bool dsc_enable;
+};
+
+struct core_display_cfg_support_info {
+ bool is_supported;
+
+ struct core_stream_support_info stream_support_info[DML2_MAX_PLANES];
+ struct core_plane_support_info plane_support_info[DML2_MAX_PLANES];
+
+ struct {
+ struct dml2_core_internal_mode_support_info support_info;
+ } clean_me_up;
+};
+
+struct dml2_core_mode_support_result {
+ struct {
+ struct {
+ unsigned long urgent_bw_sdp_kbps;
+ unsigned long average_bw_sdp_kbps;
+ unsigned long urgent_bw_dram_kbps;
+ unsigned long average_bw_dram_kbps;
+ unsigned long dcfclk_khz;
+ unsigned long fclk_khz;
+ } svp_prefetch;
+
+ struct {
+ unsigned long urgent_bw_sdp_kbps;
+ unsigned long average_bw_sdp_kbps;
+ unsigned long urgent_bw_dram_kbps;
+ unsigned long average_bw_dram_kbps;
+ unsigned long dcfclk_khz;
+ unsigned long fclk_khz;
+ } active;
+
+ unsigned int dispclk_khz;
+ unsigned int dpprefclk_khz;
+ unsigned int dtbrefclk_khz;
+ unsigned int dcfclk_deepsleep_khz;
+ unsigned int socclk_khz;
+
+ unsigned int uclk_pstate_supported;
+ unsigned int fclk_pstate_supported;
+ } global;
+
+ struct {
+ unsigned int dscclk_khz;
+ unsigned int dtbclk_khz;
+ unsigned int phyclk_khz;
+ } per_stream[DML2_MAX_PLANES];
+
+ struct {
+ unsigned int dppclk_khz;
+ unsigned int mall_svp_allocation_mblks;
+ unsigned int mall_full_frame_allocation_mblks;
+ } per_plane[DML2_MAX_PLANES];
+
+ struct core_display_cfg_support_info cfg_support_info;
+};
+
+struct dml2_optimization_stage1_state {
+ bool performed;
+ bool success;
+
+ int min_clk_index_for_latency;
+};
+
+struct dml2_optimization_stage2_state {
+ bool performed;
+ bool success;
+
+ // Whether or not each plane supports mcache
+ // The number of valid elements == display_cfg.num_planes
+ // The indexing of pstate_switch_modes matches plane_descriptors[]
+ bool per_plane_mcache_support[DML2_MAX_PLANES];
+ struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES];
+};
+
+#define DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS 8
+#define DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE 10
+#define DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE 3
+
+struct dml2_implicit_svp_meta {
+ bool valid;
+ unsigned long v_active;
+ unsigned long v_total;
+ unsigned long v_front_porch;
+};
+
+struct dml2_fams2_per_method_common_meta {
+ /* generic params */
+ unsigned int allow_start_otg_vline;
+ unsigned int allow_end_otg_vline;
+ /* scheduling params */
+ double allow_time_us;
+ double disallow_time_us;
+ double period_us;
+};
+
+struct dml2_fams2_meta {
+ bool valid;
+ double otg_vline_time_us;
+ unsigned int scheduling_delay_otg_vlines;
+ unsigned int vertical_interrupt_ack_delay_otg_vlines;
+ unsigned int allow_to_target_delay_otg_vlines;
+ unsigned int contention_delay_otg_vlines;
+ unsigned int min_allow_width_otg_vlines;
+ unsigned int nom_vtotal;
+ unsigned int vblank_start;
+ double nom_refresh_rate_hz;
+ double nom_frame_time_us;
+ unsigned int max_vtotal;
+ double min_refresh_rate_hz;
+ double max_frame_time_us;
+ unsigned int dram_clk_change_blackout_otg_vlines;
+ struct {
+ double max_vactive_det_fill_delay_us;
+ unsigned int max_vactive_det_fill_delay_otg_vlines;
+ struct dml2_fams2_per_method_common_meta common;
+ } method_vactive;
+ struct {
+ struct dml2_fams2_per_method_common_meta common;
+ } method_vblank;
+ struct {
+ unsigned int programming_delay_otg_vlines;
+ unsigned int df_throttle_delay_otg_vlines;
+ unsigned int prefetch_to_mall_delay_otg_vlines;
+ unsigned long phantom_vactive;
+ unsigned long phantom_vfp;
+ unsigned long phantom_vtotal;
+ struct dml2_fams2_per_method_common_meta common;
+ } method_subvp;
+ struct {
+ unsigned int programming_delay_otg_vlines;
+ unsigned int stretched_vtotal;
+ struct dml2_fams2_per_method_common_meta common;
+ } method_drr;
+};
+
+struct dml2_optimization_stage3_state {
+ bool performed;
+ bool success;
+
+ // The pstate support mode for each plane
+ // The number of valid elements == display_cfg.num_planes
+ // The indexing of pstate_switch_modes matches plane_descriptors[]
+ enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES];
+
+ // Meta-data for implicit SVP generation, indexed by stream index
+ struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES];
+
+ // Meta-data for FAMS2
+ bool fams2_required;
+ struct dml2_fams2_meta stream_fams2_meta[DML2_MAX_PLANES];
+
+ int min_clk_index_for_latency;
+};
+
+struct dml2_optimization_stage4_state {
+ bool performed;
+ bool success;
+ bool unoptimizable_streams[DML2_MAX_DCN_PIPES];
+};
+
+struct dml2_optimization_stage5_state {
+ bool performed;
+ bool success;
+
+ bool optimal_reserved_time_in_vblank_us;
+ bool vblank_includes_z8_optimization;
+};
+
+struct display_configuation_with_meta {
+ struct dml2_display_cfg display_config;
+
+ struct dml2_core_mode_support_result mode_support_result;
+
+ // Stage 1 = Min Clocks for Latency
+ struct dml2_optimization_stage1_state stage1;
+
+ // Stage 2 = MCache
+ struct dml2_optimization_stage2_state stage2;
+
+ // Stage 3 = UCLK PState
+ struct dml2_optimization_stage3_state stage3;
+
+ // Stage 4 = Vmin
+ struct dml2_optimization_stage4_state stage4;
+
+ // Stage 5 = Stutter
+ struct dml2_optimization_stage5_state stage5;
+};
+
+struct dml2_pmo_pstate_strategy {
+ enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES];
+ bool allow_state_increase;
+};
+struct dml2_core_mode_support_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_core_instance *instance;
+ const struct display_configuation_with_meta *display_cfg;
+
+ struct dml2_mcg_min_clock_table *min_clk_table;
+ int min_clk_index;
+ /*
+ * Outputs
+ */
+ struct dml2_core_mode_support_result mode_support_result;
+
+ struct {
+ // Inputs
+ struct dml_display_cfg_st *display_cfg;
+
+ // Outputs
+ struct dml_mode_support_info_st *support_info;
+ unsigned int out_lowest_state_idx;
+ unsigned int min_fclk_khz;
+ unsigned int min_dcfclk_khz;
+ unsigned int min_dram_speed_mts;
+ unsigned int min_socclk_khz;
+ unsigned int min_dscclk_khz;
+ unsigned int min_dtbclk_khz;
+ unsigned int min_phyclk_khz;
+ } legacy;
+};
+
+struct dml2_core_mode_programming_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_core_instance *instance;
+ const struct display_configuation_with_meta *display_cfg;
+ const struct core_display_cfg_support_info *cfg_support_info;
+ /*
+ * Outputs (also Input the clk freq are also from programming struct)
+ */
+ struct dml2_display_cfg_programming *programming;
+
+};
+
+struct dml2_core_populate_informative_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_core_instance *instance;
+
+ // If this is set, then the mode was supported, and mode programming
+ // was successfully run.
+ // Otherwise, mode programming was not run, because mode support failed.
+ bool mode_is_supported;
+
+ /*
+ * Outputs
+ */
+ struct dml2_display_cfg_programming *programming;
+};
+
+struct dml2_calculate_mcache_allocation_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_core_instance *instance;
+ const struct dml2_plane_parameters *plane_descriptor;
+ unsigned int plane_index;
+
+ /*
+ * Outputs
+ */
+ struct dml2_mcache_surface_allocation *mcache_allocation;
+};
+
+struct dml2_core_internal_state_inputs {
+ unsigned int dummy;
+};
+
+struct dml2_core_internal_state_intermediates {
+ unsigned int dummy;
+};
+
+struct dml2_core_mode_support_locals {
+ union {
+ struct dml2_core_calcs_mode_support_ex mode_support_ex_params;
+ };
+ struct dml2_display_cfg svp_expanded_display_cfg;
+ struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params;
+};
+
+struct dml2_core_mode_programming_locals {
+ union {
+ struct dml2_core_calcs_mode_programming_ex mode_programming_ex_params;
+ };
+ struct dml2_display_cfg svp_expanded_display_cfg;
+};
+
+struct dml2_core_scratch {
+ struct dml2_core_mode_support_locals mode_support_locals;
+ struct dml2_core_mode_programming_locals mode_programming_locals;
+ int main_stream_index_from_svp_stream_index[DML2_MAX_PLANES];
+ int svp_stream_index_from_main_stream_index[DML2_MAX_PLANES];
+ int main_plane_index_to_phantom_plane_index[DML2_MAX_PLANES];
+ int phantom_plane_index_to_main_plane_index[DML2_MAX_PLANES];
+};
+
+struct dml2_core_instance {
+ struct dml2_mcg_min_clock_table *minimum_clock_table;
+ struct dml2_core_internal_state_inputs inputs;
+ struct dml2_core_internal_state_intermediates intermediates;
+
+ struct dml2_core_scratch scratch;
+
+ bool (*initialize)(struct dml2_core_initialize_in_out *in_out);
+ bool (*mode_support)(struct dml2_core_mode_support_in_out *in_out);
+ bool (*mode_programming)(struct dml2_core_mode_programming_in_out *in_out);
+ bool (*populate_informative)(struct dml2_core_populate_informative_in_out *in_out);
+ bool (*calculate_mcache_allocation)(struct dml2_calculate_mcache_allocation_in_out *in_out);
+
+ struct {
+ struct dml2_core_internal_display_mode_lib mode_lib;
+ } clean_me_up;
+};
+
+/*
+* DML2 PMO Types and Interfaces
+*/
+
+struct dml2_pmo_initialize_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct dml2_soc_bb *soc_bb;
+ struct dml2_ip_capabilities *ip_caps;
+ struct dml2_pmo_options *options;
+ int mcg_clock_table_size;
+};
+
+struct dml2_pmo_optimize_dcc_mcache_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ const struct dml2_display_cfg *display_config;
+ bool *dcc_mcache_supported;
+ struct core_display_cfg_support_info *cfg_support_info;
+
+ /*
+ * Output
+ */
+ struct dml2_display_cfg *optimized_display_cfg;
+};
+
+struct dml2_pmo_init_for_vmin_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+};
+
+struct dml2_pmo_test_for_vmin_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ const struct display_configuation_with_meta *display_config;
+ const struct dml2_soc_vmin_clock_limits *vmin_limits;
+};
+
+struct dml2_pmo_optimize_for_vmin_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+
+ /*
+ * Output
+ */
+ struct display_configuation_with_meta *optimized_display_config;
+};
+
+struct dml2_pmo_init_for_pstate_support_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+};
+
+struct dml2_pmo_test_for_pstate_support_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+};
+
+struct dml2_pmo_optimize_for_pstate_support_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+ bool last_candidate_failed;
+
+ /*
+ * Output
+ */
+ struct display_configuation_with_meta *optimized_display_config;
+};
+
+struct dml2_pmo_init_for_stutter_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+};
+
+struct dml2_pmo_test_for_stutter_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+};
+
+struct dml2_pmo_optimize_for_stutter_in_out {
+ /*
+ * Input
+ */
+ struct dml2_pmo_instance *instance;
+ struct display_configuation_with_meta *base_display_config;
+ bool last_candidate_failed;
+
+ /*
+ * Output
+ */
+ struct display_configuation_with_meta *optimized_display_config;
+};
+
+#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw - dml2_pstate_method_na + 1)) - 1) << dml2_pstate_method_na)
+#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr)
+#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_clamped - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr)
+#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_drr + 1)) - 1) << dml2_pstate_method_fw_drr)
+#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_svp + 1)) - 1) << dml2_pstate_method_fw_svp)
+
+#define PMO_DCN4_MAX_DISPLAYS 4
+#define PMO_DCN4_MAX_NUM_VARIANTS 2
+#define PMO_DCN4_MAX_BASE_STRATEGIES 10
+
+struct dml2_pmo_scratch {
+ union {
+ struct {
+ double reserved_time_candidates[DML2_MAX_PLANES][DML2_PMO_LEGACY_PREFETCH_MAX_TWAIT_OPTIONS];
+ int reserved_time_candidates_count[DML2_MAX_PLANES];
+ int current_candidate[DML2_MAX_PLANES];
+ int min_latency_index;
+ int max_latency_index;
+ int cur_latency_index;
+ int stream_mask;
+ } pmo_dcn3;
+ struct {
+ struct dml2_pmo_pstate_strategy expanded_override_strategy_list[2 * 2 * 2 * 2];
+ unsigned int num_expanded_override_strategies;
+ struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE];
+ int num_pstate_candidates;
+ int cur_pstate_candidate;
+
+ unsigned int stream_plane_mask[DML2_MAX_PLANES];
+
+ unsigned int stream_vactive_capability_mask;
+
+ int min_latency_index;
+ int max_latency_index;
+ int cur_latency_index;
+
+ // Stores all the implicit SVP meta information indexed by stream index of the display
+ // configuration under inspection, built at optimization stage init
+ struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES];
+ struct dml2_fams2_meta stream_fams2_meta[DML2_MAX_PLANES];
+
+ unsigned int optimal_vblank_reserved_time_for_stutter_us[DML2_PMO_STUTTER_CANDIDATE_LIST_SIZE];
+ unsigned int num_stutter_candidates;
+ unsigned int cur_stutter_candidate;
+ bool z8_vblank_optimizable;
+
+ /* mask of synchronized timings by stream index */
+ unsigned int num_timing_groups;
+ unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES];
+ bool group_is_drr_enabled[DML2_MAX_PLANES];
+ bool group_is_drr_active[DML2_MAX_PLANES];
+ double group_line_time_us[DML2_MAX_PLANES];
+
+ /* scheduling check locals */
+ struct dml2_fams2_per_method_common_meta group_common_fams2_meta[DML2_MAX_PLANES];
+ unsigned int sorted_group_gtl_disallow_index[DML2_MAX_PLANES];
+ unsigned int sorted_group_gtl_period_index[DML2_MAX_PLANES];
+ double group_phase_offset[DML2_MAX_PLANES];
+ } pmo_dcn4;
+ };
+};
+
+struct dml2_pmo_init_data {
+ union {
+ struct {
+ /* populated once during initialization */
+ struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2];
+ struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4];
+ struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6];
+ struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8];
+ unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS];
+ } pmo_dcn4;
+ };
+};
+
+struct dml2_pmo_instance {
+ struct dml2_soc_bb *soc_bb;
+ struct dml2_ip_capabilities *ip_caps;
+
+ struct dml2_pmo_options *options;
+
+ int disp_clk_vmin_threshold;
+ int mpc_combine_limit;
+ int odm_combine_limit;
+ int mcg_clock_table_size;
+ union {
+ struct {
+ struct {
+ int prefetch_end_to_mall_start_us;
+ int fw_processing_delay_us;
+ int refresh_rate_limit_min;
+ int refresh_rate_limit_max;
+ } subvp;
+ } v1;
+ struct {
+ struct {
+ int refresh_rate_limit_min;
+ int refresh_rate_limit_max;
+ } subvp;
+ struct {
+ int refresh_rate_limit_min;
+ int refresh_rate_limit_max;
+ } drr;
+ } v2;
+ } fams_params;
+
+ bool (*initialize)(struct dml2_pmo_initialize_in_out *in_out);
+ bool (*optimize_dcc_mcache)(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out);
+
+ bool (*init_for_vmin)(struct dml2_pmo_init_for_vmin_in_out *in_out);
+ bool (*test_for_vmin)(struct dml2_pmo_test_for_vmin_in_out *in_out);
+ bool (*optimize_for_vmin)(struct dml2_pmo_optimize_for_vmin_in_out *in_out);
+
+ bool (*init_for_uclk_pstate)(struct dml2_pmo_init_for_pstate_support_in_out *in_out);
+ bool (*test_for_uclk_pstate)(struct dml2_pmo_test_for_pstate_support_in_out *in_out);
+ bool (*optimize_for_uclk_pstate)(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out);
+
+ bool (*init_for_stutter)(struct dml2_pmo_init_for_stutter_in_out *in_out);
+ bool (*test_for_stutter)(struct dml2_pmo_test_for_stutter_in_out *in_out);
+ bool (*optimize_for_stutter)(struct dml2_pmo_optimize_for_stutter_in_out *in_out);
+
+ struct dml2_pmo_init_data init_data;
+ struct dml2_pmo_scratch scratch;
+};
+
+/*
+* DML2 MCache Types
+*/
+
+struct top_mcache_validate_admissability_in_out {
+ struct dml2_instance *dml2_instance;
+
+ const struct dml2_display_cfg *display_cfg;
+ const struct core_display_cfg_support_info *cfg_support_info;
+ struct dml2_mcache_surface_allocation *mcache_allocations;
+
+ bool per_plane_status[DML2_MAX_PLANES];
+
+ struct {
+ const struct dml_mode_support_info_st *mode_support_info;
+ } legacy;
+};
+
+struct top_mcache_assign_ids_in_out {
+ /*
+ * Input
+ */
+ const struct dml2_mcache_surface_allocation *mcache_allocations;
+ int plane_count;
+
+ int per_pipe_viewport_x_start[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES];
+ int per_pipe_viewport_x_end[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES];
+ int pipe_count_per_plane[DML2_MAX_PLANES];
+
+ struct dml2_display_mcache_regs *current_mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp
+
+ /*
+ * Output
+ */
+ struct dml2_display_mcache_regs mcache_regs[DML2_MAX_PLANES][DML2_MAX_DCN_PIPES]; //One set per pipe/hubp
+ struct dml2_build_mcache_programming_in_out *mcache_programming;
+};
+
+struct top_mcache_calc_mcache_count_and_offsets_in_out {
+ /*
+ * Inputs
+ */
+ struct dml2_instance *dml2_instance;
+ const struct dml2_display_cfg *display_config;
+
+ /*
+ * Outputs
+ */
+ struct dml2_mcache_surface_allocation *mcache_allocations;
+};
+
+struct top_mcache_assign_global_mcache_ids_in_out {
+ /*
+ * Inputs/Outputs
+ */
+ struct dml2_mcache_surface_allocation *allocations;
+ int num_allocations;
+};
+
+/*
+* DML2 Top Types
+*/
+
+struct dml2_initialize_instance_locals {
+ int dummy;
+};
+
+struct dml2_optimization_init_function_locals {
+ union {
+ struct {
+ struct dml2_pmo_init_for_pstate_support_in_out init_params;
+ } uclk_pstate;
+ struct {
+ struct dml2_pmo_init_for_stutter_in_out stutter_params;
+ } stutter;
+ struct {
+ struct dml2_pmo_init_for_vmin_in_out init_params;
+ } vmin;
+ };
+};
+
+struct dml2_optimization_test_function_locals {
+ union {
+ struct {
+ struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params;
+ struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params;
+ struct top_mcache_validate_admissability_in_out validate_admissibility_params;
+ } test_mcache;
+ struct {
+ struct dml2_pmo_test_for_vmin_in_out pmo_test_vmin_params;
+ } test_vmin;
+ struct {
+ struct dml2_pmo_test_for_pstate_support_in_out test_params;
+ } uclk_pstate;
+ struct {
+ struct dml2_pmo_test_for_stutter_in_out stutter_params;
+ } stutter;
+ };
+};
+
+struct dml2_optimization_optimize_function_locals {
+ union {
+ struct {
+ struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params;
+ } optimize_mcache;
+ struct {
+ struct dml2_pmo_optimize_for_vmin_in_out pmo_optimize_vmin_params;
+ } optimize_vmin;
+ struct {
+ struct dml2_pmo_optimize_for_pstate_support_in_out optimize_params;
+ } uclk_pstate;
+ struct {
+ struct dml2_pmo_optimize_for_stutter_in_out stutter_params;
+ } stutter;
+ };
+};
+
+struct dml2_optimization_phase_locals {
+ struct display_configuation_with_meta cur_candidate_display_cfg;
+ struct display_configuation_with_meta next_candidate_display_cfg;
+ struct dml2_core_mode_support_in_out mode_support_params;
+ struct dml2_optimization_init_function_locals init_function_locals;
+ struct dml2_optimization_test_function_locals test_function_locals;
+ struct dml2_optimization_optimize_function_locals optimize_function_locals;
+};
+
+struct dml2_check_mode_supported_locals {
+ struct dml2_display_cfg display_cfg_working_copy;
+ struct dml2_core_mode_support_in_out mode_support_params;
+ struct dml2_optimization_phase_locals optimization_phase_locals;
+ struct display_configuation_with_meta base_display_config_with_meta;
+ struct display_configuation_with_meta optimized_display_config_with_meta;
+ struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params;
+};
+
+struct optimization_init_function_params {
+ struct dml2_optimization_init_function_locals *locals;
+ struct dml2_instance *dml;
+ struct display_configuation_with_meta *display_config;
+};
+
+struct optimization_test_function_params {
+ struct dml2_optimization_test_function_locals *locals;
+ struct dml2_instance *dml;
+ struct display_configuation_with_meta *display_config;
+};
+
+struct optimization_optimize_function_params {
+ bool last_candidate_supported;
+ struct dml2_optimization_optimize_function_locals *locals;
+ struct dml2_instance *dml;
+ struct display_configuation_with_meta *display_config;
+ struct display_configuation_with_meta *optimized_display_config;
+};
+
+struct optimization_phase_params {
+ struct dml2_instance *dml;
+ const struct display_configuation_with_meta *display_config; // Initial Display Configuration
+ bool (*init_function)(const struct optimization_init_function_params *params); // Test function to determine optimization is complete
+ bool (*test_function)(const struct optimization_test_function_params *params); // Test function to determine optimization is complete
+ bool (*optimize_function)(const struct optimization_optimize_function_params *params); // Function which produces a more optimized display configuration
+ struct display_configuation_with_meta *optimized_display_config; // The optimized display configuration
+
+ bool all_or_nothing;
+};
+
+struct dml2_build_mode_programming_locals {
+ struct dml2_core_mode_support_in_out mode_support_params;
+ struct dml2_core_mode_programming_in_out mode_programming_params;
+ struct dml2_core_populate_informative_in_out informative_params;
+ struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params;
+ struct display_configuation_with_meta base_display_config_with_meta;
+ struct display_configuation_with_meta optimized_display_config_with_meta;
+ struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out dppm_map_mode_params;
+ struct dml2_dpmm_map_watermarks_params_in_out dppm_map_watermarks_params;
+ struct dml2_optimization_phase_locals optimization_phase_locals;
+ struct optimization_phase_params min_clock_for_latency_phase;
+ struct optimization_phase_params mcache_phase;
+ struct optimization_phase_params uclk_pstate_phase;
+ struct optimization_phase_params vmin_phase;
+ struct optimization_phase_params stutter_phase;
+};
+
+struct dml2_legacy_core_build_mode_programming_wrapper_locals {
+ struct dml2_core_mode_support_in_out mode_support_params;
+ struct dml2_core_mode_programming_in_out mode_programming_params;
+ struct dml2_core_populate_informative_in_out informative_params;
+ struct top_mcache_calc_mcache_count_and_offsets_in_out calc_mcache_count_params;
+ struct top_mcache_validate_admissability_in_out validate_admissibility_params;
+ struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES];
+ struct top_mcache_assign_global_mcache_ids_in_out assign_global_mcache_ids_params;
+ struct dml2_pmo_optimize_dcc_mcache_in_out optimize_mcache_params;
+ struct dml2_display_cfg optimized_display_cfg;
+ struct core_display_cfg_support_info core_support_info;
+};
+
+struct dml2_top_mcache_verify_mcache_size_locals {
+ struct dml2_calculate_mcache_allocation_in_out calc_mcache_params;
+};
+
+struct dml2_top_mcache_validate_admissability_locals {
+ struct {
+ int pipe_vp_startx[DML2_MAX_DCN_PIPES];
+ int pipe_vp_endx[DML2_MAX_DCN_PIPES];
+ } plane0;
+ struct {
+ int pipe_vp_startx[DML2_MAX_DCN_PIPES];
+ int pipe_vp_endx[DML2_MAX_DCN_PIPES];
+ } plane1;
+};
+
+struct dml2_top_display_cfg_support_info {
+ const struct dml2_display_cfg *display_config;
+ struct core_display_cfg_support_info core_info;
+};
+
+struct dml2_top_funcs {
+ bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out);
+ bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out);
+ bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out);
+};
+
+struct dml2_instance {
+ enum dml2_project_id project_id;
+
+ struct dml2_core_instance core_instance;
+ struct dml2_mcg_instance mcg_instance;
+ struct dml2_dpmm_instance dpmm_instance;
+ struct dml2_pmo_instance pmo_instance;
+
+ struct dml2_soc_bb soc_bbox;
+ struct dml2_ip_capabilities ip_caps;
+
+ struct dml2_mcg_min_clock_table min_clk_table;
+ struct dml2_pmo_options pmo_options;
+ struct dml2_top_funcs funcs;
+
+ struct {
+ struct dml2_initialize_instance_locals initialize_instance_locals;
+ struct dml2_top_mcache_verify_mcache_size_locals mcache_verify_mcache_size_locals;
+ struct dml2_top_mcache_validate_admissability_locals mcache_validate_admissability_locals;
+ struct dml2_check_mode_supported_locals check_mode_supported_locals;
+ struct dml2_build_mode_programming_locals build_mode_programming_locals;
+ } scratch;
+
+ struct {
+ struct {
+ struct dml2_legacy_core_build_mode_programming_wrapper_locals legacy_core_build_mode_programming_wrapper_locals;
+ } scratch;
+ } legacy;
+};
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
new file mode 100644
index 000000000000..4cfe64aa8492
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -0,0 +1,1174 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dml2_mall_phantom.h"
+
+#include "dml2_dc_types.h"
+#include "dml2_internal_types.h"
+#include "dml2_utils.h"
+#include "dml2_dc_resource_mgmt.h"
+
+#define MAX_ODM_FACTOR 4
+#define MAX_MPCC_FACTOR 4
+
+struct dc_plane_pipe_pool {
+ int pipes_assigned_to_plane[MAX_ODM_FACTOR][MAX_MPCC_FACTOR];
+ bool pipe_used[MAX_ODM_FACTOR][MAX_MPCC_FACTOR];
+ int num_pipes_assigned_to_plane_for_mpcc_combine;
+ int num_pipes_assigned_to_plane_for_odm_combine;
+};
+
+struct dc_pipe_mapping_scratch {
+ struct {
+ unsigned int odm_factor;
+ unsigned int odm_slice_end_x[MAX_PIPES];
+ struct pipe_ctx *next_higher_pipe_for_odm_slice[MAX_PIPES];
+ } odm_info;
+ struct {
+ unsigned int mpc_factor;
+ struct pipe_ctx *prev_odm_pipe;
+ } mpc_info;
+
+ struct dc_plane_pipe_pool pipe_pool;
+};
+
+static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane,
+ unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id)
+{
+ int i, j;
+ bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists;
+
+ if (!plane_id)
+ return false;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i]->stream_id == stream_id) {
+ for (j = 0; j < state->stream_status[i].plane_count; j++) {
+ if (state->stream_status[i].plane_states[j] == plane &&
+ (!is_plane_duplicate || (j == plane_index))) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static int find_disp_cfg_idx_by_plane_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int plane_id)
+{
+ int i;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (mapping->disp_cfg_to_plane_id_valid[i] && mapping->disp_cfg_to_plane_id[i] == plane_id)
+ return i;
+ }
+
+ ASSERT(false);
+ return __DML2_WRAPPER_MAX_STREAMS_PLANES__;
+}
+
+static int find_disp_cfg_idx_by_stream_id(struct dml2_dml_to_dc_pipe_mapping *mapping, unsigned int stream_id)
+{
+ int i;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (mapping->disp_cfg_to_stream_id_valid[i] && mapping->disp_cfg_to_stream_id[i] == stream_id)
+ return i;
+ }
+
+ ASSERT(false);
+ return __DML2_WRAPPER_MAX_STREAMS_PLANES__;
+}
+
+// The master pipe of a stream is defined as the top pipe in odm slice 0
+static struct pipe_ctx *find_master_pipe_of_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id)
+{
+ int i;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ if (state->res_ctx.pipe_ctx[i].stream && state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) {
+ if (!state->res_ctx.pipe_ctx[i].prev_odm_pipe && !state->res_ctx.pipe_ctx[i].top_pipe)
+ return &state->res_ctx.pipe_ctx[i];
+ }
+ }
+
+ return NULL;
+}
+
+static struct pipe_ctx *find_master_pipe_of_plane(struct dml2_context *ctx,
+ struct dc_state *state, unsigned int plane_id)
+{
+ int i;
+ unsigned int plane_id_assigned_to_pipe;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state,
+ state->res_ctx.pipe_ctx[i].stream->stream_id,
+ ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) {
+ if (plane_id_assigned_to_pipe == plane_id)
+ return &state->res_ctx.pipe_ctx[i];
+ }
+ }
+
+ return NULL;
+}
+
+static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx,
+ struct dc_state *state, unsigned int plane_id, unsigned int *pipes)
+{
+ int i;
+ unsigned int num_found = 0;
+ unsigned int plane_id_assigned_to_pipe = -1;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (!pipe->plane_state || !pipe->stream)
+ continue;
+
+ get_plane_id(ctx, state, pipe->plane_state, pipe->stream->stream_id,
+ ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[pipe->pipe_idx],
+ &plane_id_assigned_to_pipe);
+ if (plane_id_assigned_to_pipe == plane_id && !pipe->prev_odm_pipe
+ && (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) {
+ while (pipe) {
+ struct pipe_ctx *mpc_pipe = pipe;
+
+ while (mpc_pipe) {
+ pipes[num_found++] = mpc_pipe->pipe_idx;
+ mpc_pipe = mpc_pipe->bottom_pipe;
+ if (!mpc_pipe)
+ break;
+ if (mpc_pipe->plane_state != pipe->plane_state)
+ mpc_pipe = NULL;
+ }
+ pipe = pipe->next_odm_pipe;
+ }
+ break;
+ }
+ }
+
+ return num_found;
+}
+
+static bool validate_pipe_assignment(const struct dml2_context *ctx, const struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, const struct dml2_dml_to_dc_pipe_mapping *mapping)
+{
+// int i, j, k;
+//
+// unsigned int plane_id;
+//
+// unsigned int disp_cfg_index;
+//
+// unsigned int pipes_assigned_to_plane[MAX_PIPES];
+// unsigned int num_pipes_assigned_to_plane;
+//
+// struct pipe_ctx *top_pipe;
+//
+// for (i = 0; i < state->stream_count; i++) {
+// for (j = 0; j < state->stream_status[i]->plane_count; j++) {
+// if (get_plane_id(state, state->stream_status.plane_states[j], &plane_id)) {
+// disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
+// num_pipes_assigned_to_plane = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes_assigned_to_plane);
+//
+// if (disp_cfg_index >= 0 && num_pipes_assigned_to_plane > 0) {
+// // Verify the number of pipes assigned matches
+// if (disp_cfg->hw.DPPPerSurface != num_pipes_assigned_to_plane)
+// return false;
+//
+// top_pipe = find_top_pipe_in_tree(state->res_ctx.pipe_ctx[pipes_assigned_to_plane[0]]);
+//
+// // Verify MPC and ODM combine
+// if (disp_cfg->hw.ODMMode == dml_odm_mode_bypass) {
+// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, false);
+// } else {
+// verify_combine_tree(top_pipe, state->streams[i]->stream_id, plane_id, state, true);
+// }
+//
+// // TODO: could also do additional verification that the pipes in tree are the same as
+// // pipes_assigned_to_plane
+// } else {
+// ASSERT(false);
+// return false;
+// }
+// } else {
+// ASSERT(false);
+// return false;
+// }
+// }
+// }
+ return true;
+}
+
+static bool is_plane_using_pipe(const struct pipe_ctx *pipe)
+{
+ if (pipe->plane_state)
+ return true;
+
+ return false;
+}
+
+static bool is_pipe_free(const struct pipe_ctx *pipe)
+{
+ if (!pipe->plane_state && !pipe->stream)
+ return true;
+
+ return false;
+}
+
+static unsigned int find_preferred_pipe_candidates(const struct dc_state *existing_state,
+ const int pipe_count,
+ const unsigned int stream_id,
+ unsigned int *preferred_pipe_candidates)
+{
+ unsigned int num_preferred_candidates = 0;
+ int i;
+
+ /* There is only one case which we consider for adding a pipe to the preferred
+ * pipe candidate array:
+ *
+ * 1. If the existing stream id of the pipe is equivalent to the stream id
+ * of the stream we are trying to achieve MPC/ODM combine for. This allows
+ * us to minimize the changes in pipe topology during the transition.
+ *
+ * However this condition comes with a caveat. We need to ignore pipes that will
+ * require a change in OPP but still have the same stream id. For example during
+ * an MPC to ODM transiton.
+ *
+ * Adding check to avoid pipe select on the head pipe by utilizing dc resource
+ * helper function resource_get_primary_dpp_pipe and comparing the pipe index.
+ */
+ if (existing_state) {
+ for (i = 0; i < pipe_count; i++) {
+ if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) {
+ struct pipe_ctx *head_pipe =
+ resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ?
+ resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) :
+ NULL;
+
+ // we should always respect the head pipe from selection
+ if (head_pipe && head_pipe->pipe_idx == i)
+ continue;
+ if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp &&
+ existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i &&
+ (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe ||
+ existing_state->res_ctx.pipe_ctx[i].next_odm_pipe))
+ continue;
+
+ preferred_pipe_candidates[num_preferred_candidates++] = i;
+ }
+ }
+ }
+
+ return num_preferred_candidates;
+}
+
+static unsigned int find_last_resort_pipe_candidates(const struct dc_state *existing_state,
+ const int pipe_count,
+ const unsigned int stream_id,
+ unsigned int *last_resort_pipe_candidates)
+{
+ unsigned int num_last_resort_candidates = 0;
+ int i;
+
+ /* There are two cases where we would like to add a given pipe into the last
+ * candidate array:
+ *
+ * 1. If the pipe requires a change in OPP, for example during an MPC
+ * to ODM transiton.
+ *
+ * 2. If the pipe already has an enabled OTG.
+ */
+ if (existing_state) {
+ for (i = 0; i < pipe_count; i++) {
+ struct pipe_ctx *head_pipe =
+ resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ?
+ resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) :
+ NULL;
+
+ // we should always respect the head pipe from selection
+ if (head_pipe && head_pipe->pipe_idx == i)
+ continue;
+ if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp &&
+ existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) ||
+ existing_state->res_ctx.pipe_ctx[i].stream_res.tg)
+ last_resort_pipe_candidates[num_last_resort_candidates++] = i;
+ }
+ }
+
+ return num_last_resort_candidates;
+}
+
+static bool is_pipe_in_candidate_array(const unsigned int pipe_idx,
+ const unsigned int *candidate_array,
+ const unsigned int candidate_array_size)
+{
+ int i;
+
+ for (i = 0; i < candidate_array_size; i++) {
+ if (candidate_array[i] == pipe_idx)
+ return true;
+ }
+
+ return false;
+}
+
+static bool find_more_pipes_for_stream(struct dml2_context *ctx,
+ struct dc_state *state, // The state we want to find a free mapping in
+ unsigned int stream_id, // The stream we want this pipe to drive
+ int *assigned_pipes,
+ int *assigned_pipe_count,
+ int pipes_needed,
+ const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to
+{
+ struct pipe_ctx *pipe = NULL;
+ unsigned int preferred_pipe_candidates[MAX_PIPES] = {0};
+ unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0};
+ unsigned int num_preferred_candidates = 0;
+ unsigned int num_last_resort_candidates = 0;
+ int i;
+
+ if (existing_state) {
+ num_preferred_candidates =
+ find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates);
+
+ num_last_resort_candidates =
+ find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates);
+ }
+
+ // First see if any of the preferred are unmapped, and choose those instead
+ for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) {
+ pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]];
+ if (!is_plane_using_pipe(pipe)) {
+ pipes_needed--;
+ // TODO: This doens't make sense really, pipe_idx should always be valid
+ pipe->pipe_idx = preferred_pipe_candidates[i];
+ assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx;
+ }
+ }
+
+ // We like to pair pipes starting from the higher order indicies for combining
+ for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) {
+ // Ignore any pipes that are the preferred or last resort candidate
+ if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) ||
+ is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates))
+ continue;
+
+ pipe = &state->res_ctx.pipe_ctx[i];
+ if (!is_plane_using_pipe(pipe)) {
+ pipes_needed--;
+ // TODO: This doens't make sense really, pipe_idx should always be valid
+ pipe->pipe_idx = i;
+ assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx;
+ }
+ }
+
+ // Only use the last resort pipe candidates as a last resort
+ for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) {
+ pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]];
+ if (!is_plane_using_pipe(pipe)) {
+ pipes_needed--;
+ // TODO: This doens't make sense really, pipe_idx should always be valid
+ pipe->pipe_idx = last_resort_pipe_candidates[i];
+ assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx;
+ }
+ }
+
+ ASSERT(pipes_needed <= 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available
+
+ return pipes_needed <= 0;
+}
+
+static bool find_more_free_pipes(struct dml2_context *ctx,
+ struct dc_state *state, // The state we want to find a free mapping in
+ unsigned int stream_id, // The stream we want this pipe to drive
+ int *assigned_pipes,
+ int *assigned_pipe_count,
+ int pipes_needed,
+ const struct dc_state *existing_state) // The state (optional) that we want to minimize remapping relative to
+{
+ struct pipe_ctx *pipe = NULL;
+ unsigned int preferred_pipe_candidates[MAX_PIPES] = {0};
+ unsigned int last_resort_pipe_candidates[MAX_PIPES] = {0};
+ unsigned int num_preferred_candidates = 0;
+ unsigned int num_last_resort_candidates = 0;
+ int i;
+
+ if (existing_state) {
+ num_preferred_candidates =
+ find_preferred_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, preferred_pipe_candidates);
+
+ num_last_resort_candidates =
+ find_last_resort_pipe_candidates(existing_state, ctx->config.dcn_pipe_count, stream_id, last_resort_pipe_candidates);
+ }
+
+ // First see if any of the preferred are unmapped, and choose those instead
+ for (i = 0; pipes_needed > 0 && i < num_preferred_candidates; i++) {
+ pipe = &state->res_ctx.pipe_ctx[preferred_pipe_candidates[i]];
+ if (is_pipe_free(pipe)) {
+ pipes_needed--;
+ // TODO: This doens't make sense really, pipe_idx should always be valid
+ pipe->pipe_idx = preferred_pipe_candidates[i];
+ assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx;
+ }
+ }
+
+ // We like to pair pipes starting from the higher order indicies for combining
+ for (i = ctx->config.dcn_pipe_count - 1; pipes_needed > 0 && i >= 0; i--) {
+ // Ignore any pipes that are the preferred or last resort candidate
+ if (is_pipe_in_candidate_array(i, preferred_pipe_candidates, num_preferred_candidates) ||
+ is_pipe_in_candidate_array(i, last_resort_pipe_candidates, num_last_resort_candidates))
+ continue;
+
+ pipe = &state->res_ctx.pipe_ctx[i];
+ if (is_pipe_free(pipe)) {
+ pipes_needed--;
+ // TODO: This doens't make sense really, pipe_idx should always be valid
+ pipe->pipe_idx = i;
+ assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx;
+ }
+ }
+
+ // Only use the last resort pipe candidates as a last resort
+ for (i = 0; pipes_needed > 0 && i < num_last_resort_candidates; i++) {
+ pipe = &state->res_ctx.pipe_ctx[last_resort_pipe_candidates[i]];
+ if (is_pipe_free(pipe)) {
+ pipes_needed--;
+ // TODO: This doens't make sense really, pipe_idx should always be valid
+ pipe->pipe_idx = last_resort_pipe_candidates[i];
+ assigned_pipes[(*assigned_pipe_count)++] = pipe->pipe_idx;
+ }
+ }
+
+ ASSERT(pipes_needed == 0); // Validation should prevent us from building a pipe context that exceeds the number of HW resoruces available
+
+ return pipes_needed == 0;
+}
+
+static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes)
+{
+ bool sorted, swapped;
+ unsigned int cur_index;
+ int odm_slice_index;
+
+ for (odm_slice_index = 0; odm_slice_index < pipes->num_pipes_assigned_to_plane_for_odm_combine; odm_slice_index++) {
+ // Sort each MPCC set
+ //Un-optimized bubble sort, but that's okay for array sizes <= 6
+
+ if (pipes->num_pipes_assigned_to_plane_for_mpcc_combine <= 1)
+ sorted = true;
+ else
+ sorted = false;
+
+ cur_index = 0;
+ swapped = false;
+ while (!sorted) {
+ if (pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] > pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]) {
+ swap(pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1],
+ pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]);
+
+ swapped = true;
+ }
+
+ cur_index++;
+
+ if (cur_index == pipes->num_pipes_assigned_to_plane_for_mpcc_combine - 1) {
+ cur_index = 0;
+
+ if (swapped)
+ sorted = false;
+ else
+ sorted = true;
+
+ swapped = false;
+ }
+
+ }
+ }
+}
+
+// For example, 3840 x 2160, ODM2:1 has a slice array of [1919, 3839], meaning, slice0 spans h_pixels 0->1919, and slice1 spans 1920->3840
+static void calculate_odm_slices(const struct dc_stream_state *stream, unsigned int odm_factor, unsigned int *odm_slice_end_x)
+{
+ unsigned int slice_size = 0;
+ int i;
+
+ if (odm_factor < 1 || odm_factor > 4) {
+ ASSERT(false);
+ return;
+ }
+
+ slice_size = stream->src.width / odm_factor;
+
+ for (i = 0; i < odm_factor; i++)
+ odm_slice_end_x[i] = (slice_size * (i + 1)) - 1;
+
+ odm_slice_end_x[odm_factor - 1] = stream->src.width - 1;
+}
+
+static void add_odm_slice_to_odm_tree(struct dml2_context *ctx,
+ struct dc_state *state,
+ struct dc_pipe_mapping_scratch *scratch,
+ unsigned int odm_slice_index)
+{
+ struct pipe_ctx *pipe = NULL;
+ int i;
+
+ // MPCC Combine + ODM Combine is not supported, so there should never be a case where the current plane
+ // has more than 1 pipe mapped to it for a given slice.
+ ASSERT(scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine == 1 || scratch->pipe_pool.num_pipes_assigned_to_plane_for_odm_combine == 1);
+
+ for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) {
+ pipe = &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]];
+
+ if (scratch->mpc_info.prev_odm_pipe)
+ scratch->mpc_info.prev_odm_pipe->next_odm_pipe = pipe;
+
+ pipe->prev_odm_pipe = scratch->mpc_info.prev_odm_pipe;
+ pipe->next_odm_pipe = NULL;
+ }
+ scratch->mpc_info.prev_odm_pipe = pipe;
+}
+
+static struct pipe_ctx *add_plane_to_blend_tree(struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dc_plane_state *plane,
+ struct dc_plane_pipe_pool *pipe_pool,
+ unsigned int odm_slice,
+ struct pipe_ctx *top_pipe)
+{
+ int i;
+
+ for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) {
+ if (top_pipe)
+ top_pipe->bottom_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]];
+
+ pipe_pool->pipe_used[odm_slice][i] = true;
+
+ state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].top_pipe = top_pipe;
+ state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]].bottom_pipe = NULL;
+
+ top_pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][i]];
+ }
+
+ // After running the above loop, the top pipe actually ends up pointing to the bottom of this MPCC combine tree, so we are actually
+ // returning the bottom pipe here
+ return top_pipe;
+}
+
+static unsigned int find_pipes_assigned_to_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int stream_id, unsigned int *pipes)
+{
+ int i;
+ unsigned int num_found = 0;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && pipe->stream->stream_id == stream_id && !pipe->top_pipe && !pipe->prev_odm_pipe) {
+ while (pipe) {
+ pipes[num_found++] = pipe->pipe_idx;
+ pipe = pipe->next_odm_pipe;
+ }
+ break;
+ }
+ }
+
+ return num_found;
+}
+
+static struct pipe_ctx *assign_pipes_to_stream(struct dml2_context *ctx, struct dc_state *state,
+ const struct dc_stream_state *stream,
+ int odm_factor,
+ struct dc_plane_pipe_pool *pipe_pool,
+ const struct dc_state *existing_state)
+{
+ struct pipe_ctx *master_pipe;
+ unsigned int pipes_needed;
+ unsigned int pipes_assigned;
+ unsigned int pipes[MAX_PIPES] = {0};
+ unsigned int next_pipe_to_assign;
+ int odm_slice;
+
+ pipes_needed = odm_factor;
+
+ master_pipe = find_master_pipe_of_stream(ctx, state, stream->stream_id);
+ ASSERT(master_pipe);
+
+ pipes_assigned = find_pipes_assigned_to_stream(ctx, state, stream->stream_id, pipes);
+
+ find_more_free_pipes(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state);
+
+ ASSERT(pipes_assigned == pipes_needed);
+
+ next_pipe_to_assign = 0;
+ for (odm_slice = 0; odm_slice < odm_factor; odm_slice++)
+ pipe_pool->pipes_assigned_to_plane[odm_slice][0] = pipes[next_pipe_to_assign++];
+
+ pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = 1;
+ pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor;
+
+ return master_pipe;
+}
+
+static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct dc_state *state,
+ const struct dc_stream_state *stream,
+ const struct dc_plane_state *plane,
+ int odm_factor,
+ int mpc_factor,
+ int plane_index,
+ struct dc_plane_pipe_pool *pipe_pool,
+ const struct dc_state *existing_state)
+{
+ struct pipe_ctx *master_pipe = NULL;
+ unsigned int plane_id;
+ unsigned int pipes_needed;
+ unsigned int pipes_assigned;
+ unsigned int pipes[MAX_PIPES] = {0};
+ unsigned int next_pipe_to_assign;
+ int odm_slice, mpc_slice;
+
+ if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) {
+ ASSERT(false);
+ return master_pipe;
+ }
+
+ pipes_needed = mpc_factor * odm_factor;
+
+ master_pipe = find_master_pipe_of_plane(ctx, state, plane_id);
+ ASSERT(master_pipe);
+
+ pipes_assigned = find_pipes_assigned_to_plane(ctx, state, plane_id, pipes);
+
+ find_more_pipes_for_stream(ctx, state, stream->stream_id, pipes, &pipes_assigned, pipes_needed - pipes_assigned, existing_state);
+
+ ASSERT(pipes_assigned >= pipes_needed);
+
+ next_pipe_to_assign = 0;
+ for (odm_slice = 0; odm_slice < odm_factor; odm_slice++)
+ for (mpc_slice = 0; mpc_slice < mpc_factor; mpc_slice++)
+ pipe_pool->pipes_assigned_to_plane[odm_slice][mpc_slice] = pipes[next_pipe_to_assign++];
+
+ pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine = mpc_factor;
+ pipe_pool->num_pipes_assigned_to_plane_for_odm_combine = odm_factor;
+
+ return master_pipe;
+}
+
+static bool is_pipe_used(const struct dc_plane_pipe_pool *pool, unsigned int pipe_idx)
+{
+ int i, j;
+
+ for (i = 0; i < pool->num_pipes_assigned_to_plane_for_odm_combine; i++) {
+ for (j = 0; j < pool->num_pipes_assigned_to_plane_for_mpcc_combine; j++) {
+ if (pool->pipes_assigned_to_plane[i][j] == pipe_idx && pool->pipe_used[i][j])
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void free_pipe(struct pipe_ctx *pipe)
+{
+ memset(pipe, 0, sizeof(struct pipe_ctx));
+}
+
+static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state,
+ const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id, int plane_index)
+{
+ int i;
+ bool is_plane_duplicate = ctx->v20.scratch.plane_duplicate_exists;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ if (state->res_ctx.pipe_ctx[i].plane_state == plane &&
+ state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id &&
+ (!is_plane_duplicate ||
+ ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx] == plane_index) &&
+ !is_pipe_used(pool, state->res_ctx.pipe_ctx[i].pipe_idx)) {
+ free_pipe(&state->res_ctx.pipe_ctx[i]);
+ }
+ }
+}
+
+static void remove_pipes_from_blend_trees(struct dml2_context *ctx, struct dc_state *state, struct dc_plane_pipe_pool *pipe_pool, unsigned int odm_slice)
+{
+ struct pipe_ctx *pipe;
+ int i;
+
+ for (i = 0; i < pipe_pool->num_pipes_assigned_to_plane_for_mpcc_combine; i++) {
+ pipe = &state->res_ctx.pipe_ctx[pipe_pool->pipes_assigned_to_plane[odm_slice][0]];
+ if (pipe->top_pipe)
+ pipe->top_pipe->bottom_pipe = pipe->bottom_pipe;
+
+ if (pipe->bottom_pipe)
+ pipe->bottom_pipe = pipe->top_pipe;
+
+ pipe_pool->pipe_used[odm_slice][i] = true;
+ }
+}
+
+static void map_pipes_for_stream(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream,
+ struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state)
+{
+ int odm_slice_index;
+ struct pipe_ctx *master_pipe = NULL;
+
+
+ master_pipe = assign_pipes_to_stream(ctx, state, stream, scratch->odm_info.odm_factor, &scratch->pipe_pool, existing_state);
+ sort_pipes_for_splitting(&scratch->pipe_pool);
+
+ for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) {
+ remove_pipes_from_blend_trees(ctx, state, &scratch->pipe_pool, odm_slice_index);
+
+ add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index);
+
+ ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state,
+ master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][0]], true);
+ }
+}
+
+static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, const struct dc_plane_state *plane,
+ int plane_index, struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state)
+{
+ int odm_slice_index;
+ unsigned int plane_id;
+ struct pipe_ctx *master_pipe = NULL;
+ int i;
+
+ if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) {
+ ASSERT(false);
+ return;
+ }
+
+ master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor,
+ scratch->mpc_info.mpc_factor, plane_index, &scratch->pipe_pool, existing_state);
+ sort_pipes_for_splitting(&scratch->pipe_pool);
+
+ for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) {
+ // Now we have a list of all pipes to be used for this plane/stream, now setup the tree.
+ scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index] = add_plane_to_blend_tree(ctx, state,
+ plane,
+ &scratch->pipe_pool,
+ odm_slice_index,
+ scratch->odm_info.next_higher_pipe_for_odm_slice[odm_slice_index]);
+
+ add_odm_slice_to_odm_tree(ctx, state, scratch, odm_slice_index);
+
+ for (i = 0; i < scratch->pipe_pool.num_pipes_assigned_to_plane_for_mpcc_combine; i++) {
+
+ ctx->config.callbacks.acquire_secondary_pipe_for_mpc_odm(ctx->config.callbacks.dc, state,
+ master_pipe, &state->res_ctx.pipe_ctx[scratch->pipe_pool.pipes_assigned_to_plane[odm_slice_index][i]], true);
+ }
+ }
+
+ free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id, plane_index);
+}
+
+static unsigned int get_target_mpc_factor(struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dml_display_cfg_st *disp_cfg,
+ struct dml2_dml_to_dc_pipe_mapping *mapping,
+ const struct dc_stream_status *status,
+ const struct dc_stream_state *stream,
+ int plane_idx)
+{
+ unsigned int plane_id;
+ unsigned int cfg_idx;
+ unsigned int mpc_factor;
+
+ if (ctx->architecture == dml2_architecture_20) {
+ get_plane_id(ctx, state, status->plane_states[plane_idx],
+ stream->stream_id, plane_idx, &plane_id);
+ cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
+ mpc_factor = (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx];
+ } else if (ctx->architecture == dml2_architecture_21) {
+ if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) {
+ struct dc_stream_state *main_stream;
+ struct dc_stream_status *main_stream_status;
+
+ /* get stream id of main stream */
+ main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream);
+ if (!main_stream) {
+ ASSERT(false);
+ return 1;
+ }
+
+ main_stream_status = ctx->config.callbacks.get_stream_status(state, main_stream);
+ if (!main_stream_status) {
+ ASSERT(false);
+ return 1;
+ }
+
+ /* get plane id for associated main plane */
+ get_plane_id(ctx, state, main_stream_status->plane_states[plane_idx],
+ main_stream->stream_id, plane_idx, &plane_id);
+ } else {
+ get_plane_id(ctx, state, status->plane_states[plane_idx],
+ stream->stream_id, plane_idx, &plane_id);
+ }
+
+ cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
+ mpc_factor = ctx->v21.mode_programming.programming->plane_programming[cfg_idx].num_dpps_required;
+ } else {
+ mpc_factor = 1;
+ ASSERT(false);
+ }
+
+ /* For stereo timings, we need to pipe split */
+ if (dml2_is_stereo_timing(stream))
+ mpc_factor = 2;
+
+ return mpc_factor;
+}
+
+static unsigned int get_target_odm_factor(
+ const struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dml_display_cfg_st *disp_cfg,
+ struct dml2_dml_to_dc_pipe_mapping *mapping,
+ const struct dc_stream_state *stream)
+{
+ unsigned int cfg_idx;
+
+ if (ctx->architecture == dml2_architecture_20) {
+ cfg_idx = find_disp_cfg_idx_by_stream_id(
+ mapping, stream->stream_id);
+ switch (disp_cfg->hw.ODMMode[cfg_idx]) {
+ case dml_odm_mode_bypass:
+ return 1;
+ case dml_odm_mode_combine_2to1:
+ return 2;
+ case dml_odm_mode_combine_4to1:
+ return 4;
+ default:
+ break;
+ }
+ } else if (ctx->architecture == dml2_architecture_21) {
+ if (ctx->config.svp_pstate.callbacks.get_stream_subvp_type(state, stream) == SUBVP_PHANTOM) {
+ struct dc_stream_state *main_stream;
+
+ /* get stream id of main stream */
+ main_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(state, stream);
+ if (!main_stream)
+ goto failed;
+
+ /* get cfg idx for associated main stream */
+ cfg_idx = find_disp_cfg_idx_by_stream_id(
+ mapping, main_stream->stream_id);
+ } else {
+ cfg_idx = find_disp_cfg_idx_by_stream_id(
+ mapping, stream->stream_id);
+ }
+
+ return ctx->v21.mode_programming.programming->stream_programming[cfg_idx].num_odms_required;
+ }
+
+failed:
+ ASSERT(false);
+ return 1;
+}
+
+static unsigned int get_source_odm_factor(const struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ struct pipe_ctx *otg_master = ctx->config.callbacks.get_otg_master_for_stream(&state->res_ctx, stream);
+
+ if (!otg_master)
+ return 0;
+
+ return ctx->config.callbacks.get_odm_slice_count(otg_master);
+}
+
+static unsigned int get_source_mpc_factor(const struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dc_plane_state *plane)
+{
+ struct pipe_ctx *dpp_pipes[MAX_PIPES] = {0};
+ int dpp_pipe_count = ctx->config.callbacks.get_dpp_pipes_for_plane(plane,
+ &state->res_ctx, dpp_pipes);
+
+ ASSERT(dpp_pipe_count > 0);
+ return ctx->config.callbacks.get_mpc_slice_count(dpp_pipes[0]);
+}
+
+
+static void populate_mpc_factors_for_stream(
+ struct dml2_context *ctx,
+ const struct dml_display_cfg_st *disp_cfg,
+ struct dml2_dml_to_dc_pipe_mapping *mapping,
+ struct dc_state *state,
+ unsigned int stream_idx,
+ struct dml2_pipe_combine_factor odm_factor,
+ struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES])
+{
+ const struct dc_stream_status *status = &state->stream_status[stream_idx];
+ int i;
+
+ for (i = 0; i < status->plane_count; i++) {
+ mpc_factors[i].source = get_source_mpc_factor(ctx, state, status->plane_states[i]);
+ mpc_factors[i].target = (odm_factor.target == 1) ?
+ get_target_mpc_factor(ctx, state, disp_cfg, mapping, status, state->streams[stream_idx], i) : 1;
+ }
+}
+
+static void populate_odm_factors(const struct dml2_context *ctx,
+ const struct dml_display_cfg_st *disp_cfg,
+ struct dml2_dml_to_dc_pipe_mapping *mapping,
+ struct dc_state *state,
+ struct dml2_pipe_combine_factor odm_factors[MAX_PIPES])
+{
+ int i;
+
+ for (i = 0; i < state->stream_count; i++) {
+ odm_factors[i].source = get_source_odm_factor(ctx, state, state->streams[i]);
+ odm_factors[i].target = get_target_odm_factor(
+ ctx, state, disp_cfg, mapping, state->streams[i]);
+ }
+}
+
+static bool unmap_dc_pipes_for_stream(struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dc_state *existing_state,
+ const struct dc_stream_state *stream,
+ const struct dc_stream_status *status,
+ struct dml2_pipe_combine_factor odm_factor,
+ struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES])
+{
+ int plane_idx;
+ bool result = true;
+
+ for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++)
+ if (mpc_factors[plane_idx].target < mpc_factors[plane_idx].source)
+ result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count(
+ state,
+ existing_state,
+ ctx->config.callbacks.dc->res_pool,
+ status->plane_states[plane_idx],
+ mpc_factors[plane_idx].target);
+ if (odm_factor.target < odm_factor.source)
+ result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count(
+ state,
+ existing_state,
+ ctx->config.callbacks.dc->res_pool,
+ stream,
+ odm_factor.target);
+ return result;
+}
+
+static bool map_dc_pipes_for_stream(struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dc_state *existing_state,
+ const struct dc_stream_state *stream,
+ const struct dc_stream_status *status,
+ struct dml2_pipe_combine_factor odm_factor,
+ struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES])
+{
+ int plane_idx;
+ bool result = true;
+
+ for (plane_idx = 0; plane_idx < status->plane_count; plane_idx++)
+ if (mpc_factors[plane_idx].target > mpc_factors[plane_idx].source)
+ result &= ctx->config.callbacks.update_pipes_for_plane_with_slice_count(
+ state,
+ existing_state,
+ ctx->config.callbacks.dc->res_pool,
+ status->plane_states[plane_idx],
+ mpc_factors[plane_idx].target);
+ if (odm_factor.target > odm_factor.source)
+ result &= ctx->config.callbacks.update_pipes_for_stream_with_slice_count(
+ state,
+ existing_state,
+ ctx->config.callbacks.dc->res_pool,
+ stream,
+ odm_factor.target);
+ return result;
+}
+
+static bool map_dc_pipes_with_callbacks(struct dml2_context *ctx,
+ struct dc_state *state,
+ const struct dml_display_cfg_st *disp_cfg,
+ struct dml2_dml_to_dc_pipe_mapping *mapping,
+ const struct dc_state *existing_state)
+{
+ int i;
+ bool result = true;
+
+ populate_odm_factors(ctx, disp_cfg, mapping, state, ctx->pipe_combine_scratch.odm_factors);
+ for (i = 0; i < state->stream_count; i++)
+ populate_mpc_factors_for_stream(ctx, disp_cfg, mapping, state,
+ i, ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]);
+ for (i = 0; i < state->stream_count; i++)
+ result &= unmap_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i],
+ &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]);
+ for (i = 0; i < state->stream_count; i++)
+ result &= map_dc_pipes_for_stream(ctx, state, existing_state, state->streams[i],
+ &state->stream_status[i], ctx->pipe_combine_scratch.odm_factors[i], ctx->pipe_combine_scratch.mpc_factors[i]);
+
+ return result;
+}
+
+bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state)
+{
+ int stream_index, plane_index, i;
+
+ unsigned int stream_disp_cfg_index;
+ unsigned int plane_disp_cfg_index;
+ unsigned int disp_cfg_index_max;
+
+ unsigned int plane_id;
+ unsigned int stream_id;
+
+ const unsigned int *ODMMode, *DPPPerSurface;
+ unsigned int odm_mode_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}, dpp_per_surface_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0};
+ struct dc_pipe_mapping_scratch scratch;
+
+ if (ctx->config.map_dc_pipes_with_callbacks)
+ return map_dc_pipes_with_callbacks(
+ ctx, state, disp_cfg, mapping, existing_state);
+
+ if (ctx->architecture == dml2_architecture_21) {
+ /*
+ * Extract ODM and DPP outputs from DML2.1 and map them in an array as required for pipe mapping in dml2_map_dc_pipes.
+ * As data cannot be directly extracted in const pointers, assign these arrays to const pointers before proceeding to
+ * maximize the reuse of existing code. Const pointers are required because dml2.0 dml_display_cfg_st is const.
+ *
+ */
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ odm_mode_array[i] = ctx->v21.mode_programming.programming->stream_programming[i].num_odms_required;
+ dpp_per_surface_array[i] = ctx->v21.mode_programming.programming->plane_programming[i].num_dpps_required;
+ }
+
+ ODMMode = (const unsigned int *)odm_mode_array;
+ DPPPerSurface = (const unsigned int *)dpp_per_surface_array;
+ disp_cfg_index_max = __DML2_WRAPPER_MAX_STREAMS_PLANES__;
+ } else {
+ ODMMode = (unsigned int *)disp_cfg->hw.ODMMode;
+ DPPPerSurface = disp_cfg->hw.DPPPerSurface;
+ disp_cfg_index_max = __DML_NUM_PLANES__;
+ }
+
+ for (stream_index = 0; stream_index < state->stream_count; stream_index++) {
+ memset(&scratch, 0, sizeof(struct dc_pipe_mapping_scratch));
+
+ stream_id = state->streams[stream_index]->stream_id;
+ stream_disp_cfg_index = find_disp_cfg_idx_by_stream_id(mapping, stream_id);
+ if (stream_disp_cfg_index >= disp_cfg_index_max)
+ continue;
+
+ if (ctx->architecture == dml2_architecture_20) {
+ if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) {
+ scratch.odm_info.odm_factor = 1;
+ } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) {
+ scratch.odm_info.odm_factor = 2;
+ } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) {
+ scratch.odm_info.odm_factor = 4;
+ } else {
+ ASSERT(false);
+ scratch.odm_info.odm_factor = 1;
+ }
+ } else if (ctx->architecture == dml2_architecture_21) {
+ /* After DML2.1 update, ODM interpretation needs to change and is no longer same as for DML2.0.
+ * This is not an issue with new resource management logic. This block ensure backcompat
+ * with legacy pipe management with updated DML.
+ * */
+ if (ODMMode[stream_disp_cfg_index] == 1) {
+ scratch.odm_info.odm_factor = 1;
+ } else if (ODMMode[stream_disp_cfg_index] == 2) {
+ scratch.odm_info.odm_factor = 2;
+ } else if (ODMMode[stream_disp_cfg_index] == 4) {
+ scratch.odm_info.odm_factor = 4;
+ } else {
+ ASSERT(false);
+ scratch.odm_info.odm_factor = 1;
+ }
+ }
+ calculate_odm_slices(state->streams[stream_index], scratch.odm_info.odm_factor, scratch.odm_info.odm_slice_end_x);
+
+ // If there are no planes, you still want to setup ODM...
+ if (state->stream_status[stream_index].plane_count == 0) {
+ map_pipes_for_stream(ctx, state, state->streams[stream_index], &scratch, existing_state);
+ }
+
+ for (plane_index = 0; plane_index < state->stream_status[stream_index].plane_count; plane_index++) {
+ // Planes are ordered top to bottom.
+ if (get_plane_id(ctx, state, state->stream_status[stream_index].plane_states[plane_index],
+ stream_id, plane_index, &plane_id)) {
+ plane_disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id);
+
+ // Setup mpc_info for this plane
+ scratch.mpc_info.prev_odm_pipe = NULL;
+ if (scratch.odm_info.odm_factor == 1 && plane_disp_cfg_index < disp_cfg_index_max) {
+ // If ODM combine is not inuse, then the number of pipes
+ // per plane is determined by MPC combine factor
+ scratch.mpc_info.mpc_factor = DPPPerSurface[plane_disp_cfg_index];
+
+ //For stereo timings, we need to pipe split
+ if (dml2_is_stereo_timing(state->streams[stream_index]))
+ scratch.mpc_info.mpc_factor = 2;
+ } else {
+ // If ODM combine is enabled, then we use at most 1 pipe per
+ // odm slice per plane, i.e. MPC combine is never used
+ scratch.mpc_info.mpc_factor = 1;
+ }
+
+ ASSERT(scratch.odm_info.odm_factor * scratch.mpc_info.mpc_factor > 0);
+
+ // Clear the pool assignment scratch (which is per plane)
+ memset(&scratch.pipe_pool, 0, sizeof(struct dc_plane_pipe_pool));
+
+ map_pipes_for_plane(ctx, state, state->streams[stream_index],
+ state->stream_status[stream_index].plane_states[plane_index], plane_index, &scratch, existing_state);
+ } else {
+ // Plane ID cannot be generated, therefore no DML mapping can be performed.
+ ASSERT(false);
+ }
+ }
+
+ }
+
+ if (!validate_pipe_assignment(ctx, state, disp_cfg, mapping))
+ ASSERT(false);
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (!ctx->config.callbacks.build_scaling_params(pipe)) {
+ ASSERT(false);
+ }
+ }
+
+ if (ctx->config.callbacks.build_test_pattern_params &&
+ pipe->stream &&
+ pipe->prev_odm_pipe == NULL &&
+ pipe->top_pipe == NULL)
+ ctx->config.callbacks.build_test_pattern_params(&state->res_ctx, pipe);
+ }
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h
new file mode 100644
index 000000000000..1538b708d8be
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML2_DC_RESOURCE_MGMT_H__
+#define __DML2_DC_RESOURCE_MGMT_H__
+
+#include "dml2_dc_types.h"
+
+struct dml2_context;
+struct dml2_dml_to_dc_pipe_mapping;
+struct dml_display_cfg_st;
+
+/*
+ * dml2_map_dc_pipes - Creates a pipe linkage in dc_state based on current display config.
+ * @ctx: Input dml2 context
+ * @state: Current dc_state to be updated.
+ * @disp_cfg: Current display config.
+ * @mapping: Pipe mapping logic structure to keep a track of pipes to be used.
+ *
+ * Based on ODM and DPPPersurface outputs calculated by the DML for the current display
+ * config, create a pipe linkage in dc_state which is then used by DC core.
+ * Make this function generic to be used by multiple DML versions.
+ *
+ * Return: True if pipe mapping and linking is successful, false otherwise.
+ */
+
+bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const struct dml_display_cfg_st *disp_cfg, struct dml2_dml_to_dc_pipe_mapping *mapping, const struct dc_state *existing_state);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
new file mode 100644
index 000000000000..7ca7f2a743c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_types.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+/*
+ * Wrapper header for externally defined types from DC. These types come from
+ * dc headers when building DML2 as part of DC, but are defined here when building
+ * DML2 as a standalone library (such as for unit testing).
+ */
+
+#ifndef __DML2_DC_TYPES_H__
+#define __DML2_DC_TYPES_H__
+
+#include "resource.h"
+#include "core_types.h"
+#include "dsc.h"
+#include "clk_mgr.h"
+#include "dc_state_priv.h"
+
+#endif //__DML2_DC_TYPES_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h
new file mode 100644
index 000000000000..140ec01545db
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML2_INTERNAL_TYPES_H__
+#define __DML2_INTERNAL_TYPES_H__
+
+#include "dml2_dc_types.h"
+#include "display_mode_core.h"
+#include "dml2_wrapper.h"
+#include "dml2_policy.h"
+
+#include "dml_top.h"
+#include "dml21_wrapper.h"
+
+struct dml2_wrapper_optimize_configuration_params {
+ struct display_mode_lib_st *dml_core_ctx;
+ struct dml2_configuration_options *config;
+ struct ip_params_st *ip_params;
+ struct dml_display_cfg_st *cur_display_config;
+ struct dml_display_cfg_st *new_display_config;
+ const struct dml_mode_support_info_st *cur_mode_support_info;
+ struct dml_mode_eval_policy_st *cur_policy;
+ struct dml_mode_eval_policy_st *new_policy;
+};
+
+struct dml2_calculate_lowest_supported_state_for_temp_read_scratch {
+ struct dml_mode_support_info_st evaluation_info;
+ dml_float_t uclk_change_latencies[__DML_MAX_STATE_ARRAY_SIZE__];
+ struct dml_display_cfg_st cur_display_config;
+ struct dml_display_cfg_st new_display_config;
+ struct dml_mode_eval_policy_st new_policy;
+ struct dml_mode_eval_policy_st cur_policy;
+};
+
+struct dml2_create_scratch {
+ struct dml2_policy_build_synthetic_soc_states_scratch build_synthetic_socbb_scratch;
+ struct soc_states_st in_states;
+};
+
+struct dml2_calculate_rq_and_dlg_params_scratch {
+ struct _vcs_dpi_dml_display_rq_regs_st rq_regs;
+ struct _vcs_dpi_dml_display_dlg_regs_st disp_dlg_regs;
+ struct _vcs_dpi_dml_display_ttu_regs_st disp_ttu_regs;
+};
+
+#define __DML2_WRAPPER_MAX_STREAMS_PLANES__ 6
+
+struct dml2_dml_to_dc_pipe_mapping {
+ unsigned int disp_cfg_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ bool disp_cfg_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ unsigned int disp_cfg_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ bool disp_cfg_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ unsigned int dml_pipe_idx_to_stream_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ bool dml_pipe_idx_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ unsigned int dml_pipe_idx_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ bool dml_pipe_idx_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ unsigned int dml_pipe_idx_to_plane_index[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+ bool dml_pipe_idx_to_plane_index_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__];
+};
+
+struct dml2_wrapper_scratch {
+ struct dml_display_cfg_st cur_display_config;
+ struct dml_display_cfg_st new_display_config;
+ struct dml_mode_eval_policy_st new_policy;
+ struct dml_mode_eval_policy_st cur_policy;
+ struct dml_mode_support_info_st mode_support_info;
+ struct dml_mode_support_ex_params_st mode_support_params;
+
+ struct dummy_pstate_entry dummy_pstate_table[4];
+
+ struct dml2_create_scratch create_scratch;
+ struct dml2_calculate_lowest_supported_state_for_temp_read_scratch dml2_calculate_lowest_supported_state_for_temp_read_scratch;
+ struct dml2_calculate_rq_and_dlg_params_scratch calculate_rq_and_dlg_params_scratch;
+
+ struct dml2_wrapper_optimize_configuration_params optimize_configuration_params;
+ struct dml2_policy_build_synthetic_soc_states_params build_synthetic_socbb_params;
+
+ struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping;
+ bool enable_flexible_pipe_mapping;
+ bool plane_duplicate_exists;
+ int hpo_stream_to_link_encoder_mapping[MAX_HPO_DP2_ENCODERS];
+};
+
+struct dml2_helper_det_policy_scratch {
+ int dpps_per_surface[MAX_PLANES];
+};
+
+enum dml2_architecture {
+ dml2_architecture_20,
+ dml2_architecture_21
+};
+
+struct prepare_mcache_programming_locals {
+ struct dml2_build_mcache_programming_in_out build_mcache_programming_params;
+};
+
+struct dml21_wrapper_scratch {
+ struct prepare_mcache_programming_locals prepare_mcache_locals;
+ struct pipe_ctx temp_pipe;
+};
+
+struct dml2_pipe_combine_factor {
+ unsigned int source;
+ unsigned int target;
+};
+
+struct dml2_pipe_combine_scratch {
+ struct dml2_pipe_combine_factor odm_factors[MAX_PIPES];
+ struct dml2_pipe_combine_factor mpc_factors[MAX_PIPES][MAX_PIPES];
+};
+
+struct dml2_context {
+ enum dml2_architecture architecture;
+ struct dml2_configuration_options config;
+ struct dml2_helper_det_policy_scratch det_helper_scratch;
+ struct dml2_pipe_combine_scratch pipe_combine_scratch;
+ union {
+ struct {
+ struct display_mode_lib_st dml_core_ctx;
+ struct dml2_wrapper_scratch scratch;
+ struct dcn_watermarks g6_temp_read_watermark_set;
+ } v20;
+ struct {
+ struct dml21_wrapper_scratch scratch;
+ struct dml2_initialize_instance_in_out dml_init;
+ struct dml2_display_cfg display_config;
+ struct dml2_check_mode_supported_in_out mode_support;
+ struct dml2_build_mode_programming_in_out mode_programming;
+ struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping;
+ } v21;
+ };
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
new file mode 100644
index 000000000000..c59f825cfae9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
@@ -0,0 +1,910 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dml2_dc_types.h"
+#include "dml2_internal_types.h"
+#include "dml2_utils.h"
+#include "dml2_mall_phantom.h"
+
+unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context)
+{
+ uint32_t num_ways = 0;
+ uint32_t bytes_per_pixel = 0;
+ uint32_t cache_lines_used = 0;
+ uint32_t lines_per_way = 0;
+ uint32_t total_cache_lines = 0;
+ uint32_t bytes_in_mall = 0;
+ uint32_t num_mblks = 0;
+ uint32_t cache_lines_per_plane = 0;
+ uint32_t i = 0;
+ uint32_t mblk_width = 0;
+ uint32_t mblk_height = 0;
+ uint32_t full_vp_width_blk_aligned = 0;
+ uint32_t mall_alloc_width_blk_aligned = 0;
+ uint32_t mall_alloc_height_blk_aligned = 0;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ // Find the phantom pipes
+ if (pipe->stream && pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe &&
+ ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
+ mblk_width = ctx->config.mall_cfg.mblk_width_pixels;
+ mblk_height = bytes_per_pixel == 4 ? mblk_width = ctx->config.mall_cfg.mblk_height_4bpe_pixels : ctx->config.mall_cfg.mblk_height_8bpe_pixels;
+
+ /* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) -
+ * FLOOR(vp_x_start, blk_width)
+ */
+ full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
+ pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
+ (pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
+
+ /* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
+ mall_alloc_width_blk_aligned = full_vp_width_blk_aligned;
+
+ /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */
+ mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) /
+ mblk_height * mblk_height + mblk_height;
+
+ /* full_mblk_width_ub_l/c = malldml2_mall_phantom.c_alloc_width_blk_aligned_l/c;
+ * full_mblk_height_ub_l/c = mall_alloc_height_blk_aligned_l/c;
+ * num_mblk_l/c = (full_mblk_width_ub_l/c / mblk_width_l/c) * (full_mblk_height_ub_l/c / mblk_height_l/c);
+ * (Should be divisible, but round up if not)
+ */
+ num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
+ ((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
+ bytes_in_mall = num_mblks * ctx->config.mall_cfg.mblk_size_bytes;
+ // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
+ // (MALL is 64-byte aligned)
+ cache_lines_per_plane = bytes_in_mall / ctx->config.mall_cfg.cache_line_size_bytes + 2;
+
+ // For DCC we must cache the meat surface, so double cache lines required
+ if (pipe->plane_state->dcc.enable)
+ cache_lines_per_plane *= 2;
+ cache_lines_used += cache_lines_per_plane;
+ }
+ }
+
+ total_cache_lines = ctx->config.mall_cfg.max_cab_allocation_bytes / ctx->config.mall_cfg.cache_line_size_bytes;
+ lines_per_way = total_cache_lines / ctx->config.mall_cfg.cache_num_ways;
+ num_ways = cache_lines_used / lines_per_way;
+ if (cache_lines_used % lines_per_way > 0)
+ num_ways++;
+
+ return num_ways;
+}
+
+static void merge_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *context)
+{
+ int i;
+
+ /* merge pipes if necessary */
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ // For now merge all pipes for SubVP since pipe split case isn't supported yet
+
+ /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
+ if (pipe->prev_odm_pipe) {
+ /*split off odm pipe*/
+ pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
+ if (pipe->next_odm_pipe)
+ pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
+
+ pipe->bottom_pipe = NULL;
+ pipe->next_odm_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ pipe->top_pipe = NULL;
+ pipe->prev_odm_pipe = NULL;
+ if (pipe->stream_res.dsc)
+ ctx->config.svp_pstate.callbacks.release_dsc(&context->res_ctx, ctx->config.svp_pstate.callbacks.dc->res_pool, &pipe->stream_res.dsc);
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
+ struct pipe_ctx *top_pipe = pipe->top_pipe;
+ struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
+
+ top_pipe->bottom_pipe = bottom_pipe;
+ if (bottom_pipe)
+ bottom_pipe->top_pipe = top_pipe;
+
+ pipe->top_pipe = NULL;
+ pipe->bottom_pipe = NULL;
+ pipe->plane_state = NULL;
+ pipe->stream = NULL;
+ memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
+ memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
+ }
+ }
+}
+
+static bool all_pipes_have_stream_and_plane(struct dml2_context *ctx, const struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (!pipe->plane_state)
+ return false;
+ }
+ return true;
+}
+
+static bool mpo_in_use(const struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->stream_status[i].plane_count > 1)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * dcn32_get_num_free_pipes: Calculate number of free pipes
+ *
+ * This function assumes that a "used" pipe is a pipe that has
+ * both a stream and a plane assigned to it.
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * Number of free pipes available in the context
+ */
+static unsigned int get_num_free_pipes(struct dml2_context *ctx, struct dc_state *state)
+{
+ unsigned int i;
+ unsigned int free_pipes = 0;
+ unsigned int num_pipes = 0;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && !pipe->top_pipe) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ }
+
+ free_pipes = ctx->config.dcn_pipe_count - num_pipes;
+ return free_pipes;
+}
+
+/*
+ * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP.
+ *
+ * We enter this function if we are Sub-VP capable (i.e. enough pipes available)
+ * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if
+ * we are forcing SubVP P-State switching on the current config.
+ *
+ * The number of pipes used for the chosen surface must be less than or equal to the
+ * number of free pipes available.
+ *
+ * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK).
+ * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own
+ * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't
+ * support MCLK switching naturally [i.e. ACTIVE or VBLANK]).
+ *
+ * @param dc: current dc state
+ * @param context: new dc state
+ * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned
+ *
+ * Return:
+ * True if a valid pipe assignment was found for Sub-VP. Otherwise false.
+ */
+static bool assign_subvp_pipe(struct dml2_context *ctx, struct dc_state *context, unsigned int *index)
+{
+ unsigned int i, pipe_idx;
+ unsigned int max_frame_time = 0;
+ bool valid_assignment_found = false;
+ unsigned int free_pipes = 2; //dcn32_get_num_free_pipes(dc, context);
+ bool current_assignment_freesync = false;
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+
+ for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ unsigned int num_pipes = 0;
+ unsigned int refresh_rate = 0;
+
+ if (!pipe->stream)
+ continue;
+
+ // Round up
+ refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 +
+ pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1)
+ / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total);
+ /* SubVP pipe candidate requirements:
+ * - Refresh rate < 120hz
+ * - Not able to switch in vactive naturally (switching in active means the
+ * DET provides enough buffer to hide the P-State switch latency -- trying
+ * to combine this with SubVP can cause issues with the scheduling).
+ */
+ if (pipe->plane_state && !pipe->top_pipe &&
+ ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_NONE && refresh_rate < 120 &&
+ vba->ActiveDRAMClockChangeLatencyMarginPerState[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]] <= 0) {
+ while (pipe) {
+ num_pipes++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (num_pipes <= free_pipes) {
+ struct dc_stream_state *stream = pipe->stream;
+ unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total /
+ (double)(stream->timing.pix_clk_100hz * 100)) * 1000000;
+ if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) {
+ *index = i;
+ max_frame_time = frame_us;
+ valid_assignment_found = true;
+ current_assignment_freesync = false;
+ /* For the 2-Freesync display case, still choose the one with the
+ * longest frame time
+ */
+ } else if (stream->ignore_msa_timing_param && (!valid_assignment_found ||
+ (current_assignment_freesync && frame_us > max_frame_time))) {
+ *index = i;
+ valid_assignment_found = true;
+ current_assignment_freesync = true;
+ }
+ }
+ }
+ pipe_idx++;
+ }
+ return valid_assignment_found;
+}
+
+/*
+ * enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP.
+ *
+ * This function returns true if there are enough free pipes
+ * to create the required phantom pipes for any given stream
+ * (that does not already have phantom pipe assigned).
+ *
+ * e.g. For a 2 stream config where the first stream uses one
+ * pipe and the second stream uses 2 pipes (i.e. pipe split),
+ * this function will return true because there is 1 remaining
+ * pipe which can be used as the phantom pipe for the non pipe
+ * split pipe.
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * True if there are enough free pipes to assign phantom pipes to at least one
+ * stream that does not already have phantom pipes assigned. Otherwise false.
+ */
+static bool enough_pipes_for_subvp(struct dml2_context *ctx, struct dc_state *state)
+{
+ unsigned int i, split_cnt, free_pipes;
+ unsigned int min_pipe_split = ctx->config.dcn_pipe_count + 1; // init as max number of pipes + 1
+ bool subvp_possible = false;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ // Find the minimum pipe split count for non SubVP pipes
+ if (pipe->stream && !pipe->top_pipe &&
+ ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_NONE) {
+ split_cnt = 0;
+ while (pipe) {
+ split_cnt++;
+ pipe = pipe->bottom_pipe;
+ }
+
+ if (split_cnt < min_pipe_split)
+ min_pipe_split = split_cnt;
+ }
+ }
+
+ free_pipes = get_num_free_pipes(ctx, state);
+
+ // SubVP only possible if at least one pipe is being used (i.e. free_pipes
+ // should not equal to the pipe_count)
+ if (free_pipes >= min_pipe_split && free_pipes < ctx->config.dcn_pipe_count)
+ subvp_possible = true;
+
+ return subvp_possible;
+}
+
+/*
+ * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable
+ *
+ * High level algorithm:
+ * 1. Find longest microschedule length (in us) between the two SubVP pipes
+ * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both
+ * pipes still allows for the maximum microschedule to fit in the active
+ * region for both pipes.
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * bool - True if the SubVP + SubVP config is schedulable, false otherwise
+ */
+static bool subvp_subvp_schedulable(struct dml2_context *ctx, struct dc_state *context)
+{
+ struct pipe_ctx *subvp_pipes[2];
+ struct dc_stream_state *phantom = NULL;
+ uint32_t microschedule_lines = 0;
+ uint32_t index = 0;
+ uint32_t i;
+ uint32_t max_microschedule_us = 0;
+ int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ uint32_t time_us = 0;
+
+ /* Loop to calculate the maximum microschedule time between the two SubVP pipes,
+ * and also to store the two main SubVP pipe pointers in subvp_pipes[2].
+ */
+ if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
+ ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN) {
+ phantom = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
+ microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) +
+ phantom->timing.v_addressable;
+
+ // Round up when calculating microschedule time (+ 1 at the end)
+ time_us = (microschedule_lines * phantom->timing.h_total) /
+ (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 +
+ ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us +
+ ctx->config.svp_pstate.subvp_fw_processing_delay_us + 1;
+ if (time_us > max_microschedule_us)
+ max_microschedule_us = time_us;
+
+ subvp_pipes[index] = pipe;
+ index++;
+
+ // Maximum 2 SubVP pipes
+ if (index == 2)
+ break;
+ }
+ }
+ vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) *
+ subvp_pipes[0]->stream->timing.h_total) /
+ (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+ vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) *
+ subvp_pipes[1]->stream->timing.h_total) /
+ (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000;
+
+ if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us &&
+ (vactive2_us - vblank1_us) / 2 > max_microschedule_us)
+ return true;
+
+ return false;
+}
+
+/*
+ * dml2_svp_drr_schedulable: Determine if SubVP + DRR config is schedulable
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe
+ * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching
+ * (the margin is equal to the MALL region + DRR margin (500us))
+ * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame))
+ * then report the configuration as supported
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config
+ *
+ * Return:
+ * bool - True if the SubVP + DRR config is schedulable, false otherwise
+ */
+bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing)
+{
+ bool schedulable = false;
+ uint32_t i;
+ struct pipe_ctx *pipe = NULL;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_stream_state *phantom_stream;
+ int16_t prefetch_us = 0;
+ int16_t mall_region_us = 0;
+ int16_t drr_frame_us = 0; // nominal frame time
+ int16_t subvp_active_us = 0;
+ int16_t stretched_drr_us = 0;
+ int16_t drr_stretched_vblank_us = 0;
+ int16_t max_vblank_mallregion = 0;
+
+ // Find SubVP pipe
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ // Find the SubVP pipe
+ if (ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
+ break;
+ }
+
+ phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, pipe->stream);
+ main_timing = &pipe->stream->timing;
+ phantom_timing = &phantom_stream->timing;
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ drr_frame_us = drr_timing->v_total * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
+ drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total /
+ (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us);
+ max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us;
+
+ /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the
+ * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis
+ * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ * and the max of (VBLANK blanking time, MALL region)).
+ */
+ if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 &&
+ subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0)
+ schedulable = true;
+
+ return schedulable;
+}
+
+
+/*
+ * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable
+ *
+ * High level algorithm:
+ * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe
+ * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time))
+ * then report the configuration as supported
+ * 3. If the VBLANK display is DRR, then take the DRR static schedulability path
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ *
+ * Return:
+ * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise
+ */
+static bool subvp_vblank_schedulable(struct dml2_context *ctx, struct dc_state *context)
+{
+ struct pipe_ctx *pipe = NULL;
+ struct pipe_ctx *subvp_pipe = NULL;
+ bool found = false;
+ bool schedulable = false;
+ uint32_t i = 0;
+ uint8_t vblank_index = 0;
+ uint16_t prefetch_us = 0;
+ uint16_t mall_region_us = 0;
+ uint16_t vblank_frame_us = 0;
+ uint16_t subvp_active_us = 0;
+ uint16_t vblank_blank_us = 0;
+ uint16_t max_vblank_mallregion = 0;
+ struct dc_crtc_timing *main_timing = NULL;
+ struct dc_crtc_timing *phantom_timing = NULL;
+ struct dc_crtc_timing *vblank_timing = NULL;
+ struct dc_stream_state *phantom_stream;
+ enum mall_stream_type pipe_mall_type;
+
+ /* For SubVP + VBLANK/DRR cases, we assume there can only be
+ * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK
+ * is supported, it is either a single VBLANK case or two VBLANK
+ * displays which are synchronized (in which case they have identical
+ * timings).
+ */
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
+
+ // We check for master pipe, but it shouldn't matter since we only need
+ // the pipe for timing info (stream should be same for any pipe splits)
+ if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe)
+ continue;
+
+ if (!found && pipe_mall_type == SUBVP_NONE) {
+ // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe).
+ vblank_index = i;
+ found = true;
+ }
+
+ if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN)
+ subvp_pipe = pipe;
+ }
+ // Use ignore_msa_timing_param flag to identify as DRR
+ if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) {
+ // SUBVP + DRR case
+ schedulable = dml2_svp_drr_schedulable(ctx, context, &context->res_ctx.pipe_ctx[vblank_index].stream->timing);
+ } else if (found) {
+ phantom_stream = ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, subvp_pipe->stream);
+ main_timing = &subvp_pipe->stream->timing;
+ phantom_timing = &phantom_stream->timing;
+ vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing;
+ // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe
+ // Also include the prefetch end to mallstart delay time
+ prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
+ ctx->config.svp_pstate.subvp_prefetch_end_to_mall_start_us;
+ // P-State allow width and FW delays already included phantom_timing->v_addressable
+ mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total /
+ (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total /
+ (double)(vblank_timing->pix_clk_100hz * 100) * 1000000;
+ subvp_active_us = main_timing->v_addressable * main_timing->h_total /
+ (double)(main_timing->pix_clk_100hz * 100) * 1000000;
+ max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us;
+
+ // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time,
+ // and the max of (VBLANK blanking time, MALL region)
+ // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0)
+ if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0)
+ schedulable = true;
+ }
+ return schedulable;
+}
+
+/*
+ * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle
+ * static analysis based on the case.
+ *
+ * Three cases:
+ * 1. SubVP + SubVP
+ * 2. SubVP + VBLANK (DRR checked internally)
+ * 3. SubVP + VACTIVE (currently unsupported)
+ *
+ * @dc: current dc state
+ * @context: new dc state
+ * @vlevel: Voltage level calculated by DML
+ *
+ * Return:
+ * bool - True if statically schedulable, false otherwise
+ */
+bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type)
+{
+ bool schedulable = true; // true by default for single display case
+ struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+ uint32_t i, pipe_idx;
+ uint8_t subvp_count = 0;
+ uint8_t vactive_count = 0;
+
+ for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ enum mall_stream_type pipe_mall_type = ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe);
+
+ if (!pipe->stream)
+ continue;
+
+ if (pipe->plane_state && !pipe->top_pipe &&
+ pipe_mall_type == SUBVP_MAIN)
+ subvp_count++;
+
+ // Count how many planes that aren't SubVP/phantom are capable of VACTIVE
+ // switching (SubVP + VACTIVE unsupported). In situations where we force
+ // SubVP for a VACTIVE plane, we don't want to increment the vactive_count.
+ if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 &&
+ pipe_mall_type == SUBVP_NONE) {
+ vactive_count++;
+ }
+ pipe_idx++;
+ }
+
+ if (subvp_count == 2) {
+ // Static schedulability check for SubVP + SubVP case
+ schedulable = subvp_subvp_schedulable(ctx, context);
+ } else if (pstate_change_type == dml_dram_clock_change_vblank_w_mall_sub_vp) {
+ // Static schedulability check for SubVP + VBLANK case. Also handle the case where
+ // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK)
+ if (vactive_count > 0)
+ schedulable = false;
+ else
+ schedulable = subvp_vblank_schedulable(ctx, context);
+ } else if (pstate_change_type == dml_dram_clock_change_vactive_w_mall_sub_vp &&
+ vactive_count > 0) {
+ // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default.
+ // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count.
+ // SubVP + VACTIVE currently unsupported
+ schedulable = false;
+ }
+ return schedulable;
+}
+
+static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state *state,
+ struct pipe_ctx *ref_pipe,
+ struct dc_stream_state *phantom_stream,
+ unsigned int dc_pipe_idx,
+ unsigned int svp_height,
+ unsigned int svp_vstartup)
+{
+ unsigned int i;
+ double line_time, fp_and_sync_width_time;
+ struct pipe_ctx *pipe;
+ uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines;
+ static const double cvt_rb_vblank_max = ((double) 460 / (1000 * 1000));
+
+ // Find DML pipe index (pipe_idx) using dc_pipe_idx
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream)
+ continue;
+
+ if (i == dc_pipe_idx)
+ break;
+ }
+
+ // Calculate lines required for pstate allow width and FW processing delays
+ pstate_width_fw_delay_lines = ((double)(ctx->config.svp_pstate.subvp_fw_processing_delay_us +
+ ctx->config.svp_pstate.subvp_pstate_allow_width_us) / 1000000) *
+ (ref_pipe->stream->timing.pix_clk_100hz * 100) /
+ (double)ref_pipe->stream->timing.h_total;
+
+ // DML calculation for MALL region doesn't take into account FW delay
+ // and required pstate allow width for multi-display cases
+ /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned
+ * to 2 swaths (i.e. 16 lines)
+ */
+ phantom_vactive = svp_height + pstate_width_fw_delay_lines + ctx->config.svp_pstate.subvp_swath_height_margin_lines;
+
+ phantom_stream->timing.v_front_porch = 1;
+
+ line_time = phantom_stream->timing.h_total / ((double)phantom_stream->timing.pix_clk_100hz * 100);
+ fp_and_sync_width_time = (phantom_stream->timing.v_front_porch + phantom_stream->timing.v_sync_width) * line_time;
+
+ if ((svp_vstartup * line_time) + fp_and_sync_width_time > cvt_rb_vblank_max) {
+ svp_vstartup = (cvt_rb_vblank_max - fp_and_sync_width_time) / line_time;
+ }
+
+ // For backporch of phantom pipe, use vstartup of the main pipe
+ phantom_bp = svp_vstartup;
+
+ phantom_stream->dst.y = 0;
+ phantom_stream->dst.height = phantom_vactive;
+ phantom_stream->src.y = 0;
+ phantom_stream->src.height = phantom_vactive;
+
+ phantom_stream->timing.v_addressable = phantom_vactive;
+
+ phantom_stream->timing.v_total = phantom_stream->timing.v_addressable +
+ phantom_stream->timing.v_front_porch +
+ phantom_stream->timing.v_sync_width +
+ phantom_bp;
+ phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
+}
+
+static struct dc_stream_state *enable_phantom_stream(struct dml2_context *ctx, struct dc_state *state, unsigned int dc_pipe_idx, unsigned int svp_height, unsigned int vstartup)
+{
+ struct pipe_ctx *ref_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx];
+ struct dc_stream_state *phantom_stream = ctx->config.svp_pstate.callbacks.create_phantom_stream(
+ ctx->config.svp_pstate.callbacks.dc,
+ state,
+ ref_pipe->stream);
+
+ /* stream has limited viewport and small timing */
+ memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
+ memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src));
+ memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst));
+ set_phantom_stream_timing(ctx, state, ref_pipe, phantom_stream, dc_pipe_idx, svp_height, vstartup);
+
+ ctx->config.svp_pstate.callbacks.add_phantom_stream(ctx->config.svp_pstate.callbacks.dc,
+ state,
+ phantom_stream,
+ ref_pipe->stream);
+ return phantom_stream;
+}
+
+static void enable_phantom_plane(struct dml2_context *ctx,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream,
+ unsigned int dc_pipe_idx)
+{
+ struct dc_plane_state *phantom_plane = NULL;
+ struct dc_plane_state *prev_phantom_plane = NULL;
+ struct pipe_ctx *curr_pipe = &state->res_ctx.pipe_ctx[dc_pipe_idx];
+
+ while (curr_pipe) {
+ if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) {
+ phantom_plane = prev_phantom_plane;
+ } else {
+ phantom_plane = ctx->config.svp_pstate.callbacks.create_phantom_plane(
+ ctx->config.svp_pstate.callbacks.dc,
+ state,
+ curr_pipe->plane_state);
+ if (!phantom_plane)
+ return;
+ }
+
+ memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
+ memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality,
+ sizeof(phantom_plane->scaling_quality));
+ memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect));
+ memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect));
+ memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect));
+ memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size,
+ sizeof(phantom_plane->plane_size));
+ memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info,
+ sizeof(phantom_plane->tiling_info));
+ memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc));
+ //phantom_plane->tiling_info.gfx10compatible.compat_level = curr_pipe->plane_state->tiling_info.gfx10compatible.compat_level;
+ phantom_plane->format = curr_pipe->plane_state->format;
+ phantom_plane->rotation = curr_pipe->plane_state->rotation;
+ phantom_plane->visible = curr_pipe->plane_state->visible;
+
+ /* Shadow pipe has small viewport. */
+ phantom_plane->clip_rect.y = 0;
+ phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable;
+
+ ctx->config.svp_pstate.callbacks.add_phantom_plane(ctx->config.svp_pstate.callbacks.dc, phantom_stream, phantom_plane, state);
+
+ curr_pipe = curr_pipe->bottom_pipe;
+ prev_phantom_plane = phantom_plane;
+ }
+}
+
+static void add_phantom_pipes_for_main_pipe(struct dml2_context *ctx, struct dc_state *state, unsigned int main_pipe_idx, unsigned int svp_height, unsigned int vstartup)
+{
+ struct dc_stream_state *phantom_stream = NULL;
+ unsigned int i;
+
+ // The index of the DC pipe passed into this function is guarenteed to
+ // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't
+ // already have phantom pipe assigned, etc.) by previous checks.
+ phantom_stream = enable_phantom_stream(ctx, state, main_pipe_idx, svp_height, vstartup);
+ enable_phantom_plane(ctx, state, phantom_stream, main_pipe_idx);
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ // Build scaling params for phantom pipes which were newly added.
+ // We determine which phantom pipes were added by comparing with
+ // the phantom stream.
+ if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
+ ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
+ pipe->stream->use_dynamic_meta = false;
+ pipe->plane_state->flip_immediate = false;
+ if (!ctx->config.svp_pstate.callbacks.build_scaling_params(pipe)) {
+ // Log / remove phantom pipes since failed to build scaling params
+ }
+ }
+ }
+}
+
+static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struct dc_stream_state *stream, struct dc_state *context)
+{
+ int i, old_plane_count;
+ struct dc_stream_status *stream_status = NULL;
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
+
+ for (i = 0; i < context->stream_count; i++)
+ if (context->streams[i] == stream) {
+ stream_status = &context->stream_status[i];
+ break;
+ }
+
+ if (stream_status == NULL) {
+ return false;
+ }
+
+ old_plane_count = stream_status->plane_count;
+
+ for (i = 0; i < old_plane_count; i++)
+ del_planes[i] = stream_status->plane_states[i];
+
+ for (i = 0; i < old_plane_count; i++) {
+ if (!ctx->config.svp_pstate.callbacks.remove_phantom_plane(ctx->config.svp_pstate.callbacks.dc, stream, del_planes[i], context))
+ return false;
+ ctx->config.svp_pstate.callbacks.release_phantom_plane(ctx->config.svp_pstate.callbacks.dc, context, del_planes[i]);
+ }
+
+ return true;
+}
+
+bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state)
+{
+ int i;
+ bool removed_pipe = false;
+ struct dc_stream_state *phantom_stream = NULL;
+
+ for (i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+ // build scaling params for phantom pipes
+ if (pipe->plane_state && pipe->stream && ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(state, pipe) == SUBVP_PHANTOM) {
+ phantom_stream = pipe->stream;
+
+ remove_all_phantom_planes_for_stream(ctx, phantom_stream, state);
+ ctx->config.svp_pstate.callbacks.remove_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+ ctx->config.svp_pstate.callbacks.release_phantom_stream(ctx->config.svp_pstate.callbacks.dc, state, phantom_stream);
+
+ removed_pipe = true;
+ }
+
+ if (pipe->plane_state) {
+ pipe->plane_state->is_phantom = false;
+ }
+ }
+ return removed_pipe;
+}
+
+
+/* Conditions for setting up phantom pipes for SubVP:
+ * 1. Not force disable SubVP
+ * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING)
+ * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?)
+ * 4. Display configuration passes validation
+ * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch)
+ */
+bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info)
+{
+ unsigned int dc_pipe_idx, dml_pipe_idx;
+ unsigned int svp_height, vstartup;
+
+ if (ctx->config.svp_pstate.force_disable_subvp)
+ return false;
+
+ if (!all_pipes_have_stream_and_plane(ctx, state))
+ return false;
+
+ if (mpo_in_use(state))
+ return false;
+
+ merge_pipes_for_subvp(ctx, state);
+ // to re-initialize viewport after the pipe merge
+ for (int i = 0; i < ctx->config.dcn_pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &state->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx->plane_state || !pipe_ctx->stream)
+ continue;
+
+ ctx->config.svp_pstate.callbacks.build_scaling_params(pipe_ctx);
+ }
+
+ if (enough_pipes_for_subvp(ctx, state) && assign_subvp_pipe(ctx, state, &dc_pipe_idx)) {
+ dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(ctx, state->res_ctx.pipe_ctx[dc_pipe_idx].stream->stream_id);
+ svp_height = mode_support_info->SubViewportLinesNeededInMALL[dml_pipe_idx];
+ vstartup = dml_get_vstartup_calculated(&ctx->v20.dml_core_ctx, dml_pipe_idx);
+
+ add_phantom_pipes_for_main_pipe(ctx, state, dc_pipe_idx, svp_height, vstartup);
+
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h
new file mode 100644
index 000000000000..9d64851f54e7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML2_MALL_PHANTOM_H__
+#define __DML2_MALL_PHANTOM_H__
+
+#include "dml2_dc_types.h"
+#include "display_mode_core_structs.h"
+
+struct dml2_svp_helper_select_best_svp_candidate_params {
+ const struct dml_display_cfg_st *dml_config;
+ const struct dml_mode_support_info_st *mode_support_info;
+ const unsigned int blacklist;
+ unsigned int *candidate_index;
+};
+
+struct dml2_context;
+
+unsigned int dml2_helper_calculate_num_ways_for_subvp(struct dml2_context *ctx, struct dc_state *context);
+
+bool dml2_svp_add_phantom_pipe_to_dc_state(struct dml2_context *ctx, struct dc_state *state, struct dml_mode_support_info_st *mode_support_info);
+
+bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state *state);
+
+bool dml2_svp_validate_static_schedulability(struct dml2_context *ctx, struct dc_state *context, enum dml_dram_clock_change_support pstate_change_type);
+
+bool dml2_svp_drr_schedulable(struct dml2_context *ctx, struct dc_state *context, struct dc_crtc_timing *drr_timing);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c
new file mode 100644
index 000000000000..ef693f608d59
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dml2_policy.h"
+
+static void get_optimal_ntuple(
+ const struct soc_bounding_box_st *socbb,
+ struct soc_state_bounding_box_st *entry)
+{
+ if (entry->dcfclk_mhz > 0) {
+ float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
+
+ entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans *
+ socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
+ } else if (entry->fabricclk_mhz > 0) {
+ float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
+
+ entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
+ entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans *
+ socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
+ } else if (entry->dram_speed_mts > 0) {
+ float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans *
+ socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
+
+ entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
+ entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
+ }
+}
+
+static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb,
+ struct soc_state_bounding_box_st *entry)
+{
+ float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans *
+ socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
+
+ float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
+
+ float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
+
+ float limiting_bw_mbytes_sec = memory_bw_mbytes_sec;
+
+ if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec)
+ limiting_bw_mbytes_sec = fabric_bw_mbytes_sec;
+
+ if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec)
+ limiting_bw_mbytes_sec = sdp_bw_mbytes_sec;
+
+ return limiting_bw_mbytes_sec;
+}
+
+static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb,
+ struct soc_states_st *table,
+ struct soc_state_bounding_box_st *entry)
+{
+ int index = 0;
+ int i = 0;
+ float net_bw_of_new_state = 0;
+
+ get_optimal_ntuple(socbb, entry);
+
+ if (table->num_states == 0) {
+ index = 0;
+ } else {
+ net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry);
+ while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) {
+ index++;
+ if (index >= (int) table->num_states)
+ break;
+ }
+
+ for (i = table->num_states; i > index; i--) {
+ table->state_array[i] = table->state_array[i - 1];
+ }
+ //ASSERT(index < MAX_CLK_TABLE_SIZE);
+ }
+
+ table->state_array[index] = *entry;
+ table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz;
+ table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz;
+ table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts;
+ table->num_states++;
+}
+
+static void remove_entry_from_table_at_index(struct soc_states_st *table,
+ unsigned int index)
+{
+ int i;
+
+ if (table->num_states == 0)
+ return;
+
+ for (i = index; i < (int) table->num_states - 1; i++) {
+ table->state_array[i] = table->state_array[i + 1];
+ }
+ memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st));
+}
+
+int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s,
+ struct dml2_policy_build_synthetic_soc_states_params *p)
+{
+ int i, j;
+ unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz;
+ unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz;
+ unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz;
+
+ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
+ max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0,
+ max_uclk_mhz = 0, max_socclk_mhz = 0;
+
+ int num_uclk_dpms = 0, num_fclk_dpms = 0;
+
+ for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) {
+ if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz;
+ if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz;
+ if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz)
+ max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz;
+ if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz)
+ max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts;
+ if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz;
+ if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz;
+ if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz;
+ if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz;
+
+ if (p->in_states->state_array[i].fabricclk_mhz > 0)
+ num_fclk_dpms++;
+ if (p->in_states->state_array[i].dram_speed_mts > 0)
+ num_uclk_dpms++;
+ }
+
+ if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz)
+ return -1;
+
+ p->out_states->num_states = 0;
+
+ s->entry = p->in_states->state_array[0];
+
+ s->entry.dispclk_mhz = max_dispclk_mhz;
+ s->entry.dppclk_mhz = max_dppclk_mhz;
+ s->entry.dtbclk_mhz = max_dtbclk_mhz;
+ s->entry.phyclk_mhz = max_phyclk_mhz;
+
+ s->entry.dscclk_mhz = max_dispclk_mhz / 3;
+ s->entry.phyclk_mhz = max_phyclk_mhz;
+ s->entry.dtbclk_mhz = max_dtbclk_mhz;
+
+ // Insert all the DCFCLK STAs first
+ for (i = 0; i < p->num_dcfclk_stas; i++) {
+ s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i];
+ s->entry.fabricclk_mhz = 0;
+ s->entry.dram_speed_mts = 0;
+ if (i > 0)
+ s->entry.socclk_mhz = max_socclk_mhz;
+
+ insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
+ }
+
+ // Insert the UCLK DPMS
+ for (i = 0; i < num_uclk_dpms; i++) {
+ s->entry.dcfclk_mhz = 0;
+ s->entry.fabricclk_mhz = 0;
+ s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts;
+ if (i == 0) {
+ s->entry.socclk_mhz = min_socclk_mhz;
+ } else {
+ s->entry.socclk_mhz = max_socclk_mhz;
+ }
+
+ insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
+ }
+
+ // Insert FCLK DPMs (if present)
+ if (num_fclk_dpms > 2) {
+ for (i = 0; i < num_fclk_dpms; i++) {
+ s->entry.dcfclk_mhz = 0;
+ s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz;
+ s->entry.dram_speed_mts = 0;
+
+ insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
+ }
+ }
+ // Add max FCLK
+ else {
+ s->entry.dcfclk_mhz = 0;
+ s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz;
+ s->entry.dram_speed_mts = 0;
+
+ insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
+ }
+
+ // Remove states that require higher clocks than are supported
+ for (i = p->out_states->num_states - 1; i >= 0; i--) {
+ if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz ||
+ p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz ||
+ p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz)
+ remove_entry_from_table_at_index(p->out_states, i);
+ }
+
+ // At this point, the table contains all "points of interest" based on
+ // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
+ // ratios (by derate, are exact).
+
+ // Round up UCLK to DPMs
+ for (i = p->out_states->num_states - 1; i >= 0; i--) {
+ for (j = 0; j < num_uclk_dpms; j++) {
+ if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) {
+ p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts;
+ break;
+ }
+ }
+ }
+
+ // If FCLK is coarse grained, round up to next DPMs
+ if (num_fclk_dpms > 2) {
+ for (i = p->out_states->num_states - 1; i >= 0; i--) {
+ for (j = 0; j < num_fclk_dpms; j++) {
+ if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) {
+ p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz;
+ break;
+ }
+ }
+ }
+ }
+
+ // Clamp to min FCLK/DCFCLK
+ for (i = p->out_states->num_states - 1; i >= 0; i--) {
+ if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) {
+ p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz;
+ }
+ if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) {
+ p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz;
+ }
+ }
+
+ // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
+ i = 0;
+ while (i < (int) p->out_states->num_states - 1) {
+ if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz &&
+ p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz &&
+ p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts)
+ remove_entry_from_table_at_index(p->out_states, i);
+ else
+ i++;
+ }
+
+ return 0;
+}
+
+void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy)
+{
+ for (int i = 0; i < __DML_NUM_PLANES__; i++) {
+ policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB?
+ policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed;
+ policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required;
+ policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible;
+ }
+
+ /* Change the default policy initializations as per spreadsheet. We might need to
+ * review and change them later on as per Jun's earlier comments.
+ */
+ policy->UseUnboundedRequesting = dml_unbounded_requesting_enable;
+ policy->UseMinimumRequiredDCFCLK = false;
+ policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean?
+ policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean?
+ policy->USRRetrainingRequiredFinal = true;
+ policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean?
+ policy->NomDETInKByteOverrideEnable = false;
+ policy->NomDETInKByteOverrideValue = 0;
+ policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true;
+ policy->SynchronizeTimingsFinal = true;
+ policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true;
+ policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean?
+ policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean?
+ if (project == dml_project_dcn35 ||
+ project == dml_project_dcn36 ||
+ project == dml_project_dcn351) {
+ policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false;
+ policy->EnhancedPrefetchScheduleAccelerationFinal = 0;
+ policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/
+ policy->UseOnlyMaxPrefetchModes = 1;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h
new file mode 100644
index 000000000000..e83e05248592
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DML2_POLICY_H__
+#define __DML2_POLICY_H__
+
+#include "display_mode_core_structs.h"
+
+struct dml2_policy_build_synthetic_soc_states_params {
+ const struct soc_bounding_box_st *in_bbox;
+ struct soc_states_st *in_states;
+ struct soc_states_st *out_states;
+ int *dcfclk_stas_mhz;
+ int num_dcfclk_stas;
+};
+
+struct dml2_policy_build_synthetic_soc_states_scratch {
+ struct soc_state_bounding_box_st entry;
+};
+
+int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s,
+ struct dml2_policy_build_synthetic_soc_states_params *p);
+
+void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
new file mode 100644
index 000000000000..3b866e876bf4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -0,0 +1,1525 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "display_mode_core.h"
+#include "dml2_internal_types.h"
+#include "dml2_translation_helper.h"
+
+#define NUM_DCFCLK_STAS 5
+#define NUM_DCFCLK_STAS_NEW 8
+
+void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out)
+{
+ switch (dml2->v20.dml_core_ctx.project) {
+ case dml_project_dcn32:
+ case dml_project_dcn321:
+ default:
+ // Hardcoded values for DCN32x
+ out->vblank_nom_default_us = 600;
+ out->rob_buffer_size_kbytes = 128;
+ out->config_return_buffer_size_in_kbytes = 1280;
+ out->config_return_buffer_segment_size_in_kbytes = 64;
+ out->compressed_buffer_segment_size_in_kbytes = 64;
+ out->meta_fifo_size_in_kentries = 22;
+ out->zero_size_buffer_entries = 512;
+ out->dpte_buffer_size_in_pte_reqs_luma = 68;
+ out->dpte_buffer_size_in_pte_reqs_chroma = 36;
+ out->dcc_meta_buffer_size_bytes = 6272;
+ out->gpuvm_max_page_table_levels = 4;
+ out->hostvm_max_page_table_levels = 0;
+ out->pixel_chunk_size_kbytes = 8;
+ //out->alpha_pixel_chunk_size_kbytes;
+ out->min_pixel_chunk_size_bytes = 1024;
+ out->meta_chunk_size_kbytes = 2;
+ out->min_meta_chunk_size_bytes = 256;
+ out->writeback_chunk_size_kbytes = 8;
+ out->line_buffer_size_bits = 1171920;
+ out->max_line_buffer_lines = 32;
+ out->writeback_interface_buffer_size_kbytes = 90;
+ //Number of pipes after DCN Pipe harvesting
+ out->max_num_dpp = dml2->config.dcn_pipe_count;
+ out->max_num_otg = dml2->config.dcn_pipe_count;
+ out->max_num_wb = 1;
+ out->max_dchub_pscl_bw_pix_per_clk = 4;
+ out->max_pscl_lb_bw_pix_per_clk = 2;
+ out->max_lb_vscl_bw_pix_per_clk = 4;
+ out->max_vscl_hscl_bw_pix_per_clk = 4;
+ out->max_hscl_ratio = 6;
+ out->max_vscl_ratio = 6;
+ out->max_hscl_taps = 8;
+ out->max_vscl_taps = 8;
+ out->dispclk_ramp_margin_percent = 1;
+ out->dppclk_delay_subtotal = 47;
+ out->dppclk_delay_scl = 50;
+ out->dppclk_delay_scl_lb_only = 16;
+ out->dppclk_delay_cnvc_formatter = 28;
+ out->dppclk_delay_cnvc_cursor = 6;
+ out->cursor_buffer_size = 16;
+ out->cursor_chunk_size = 2;
+ out->dispclk_delay_subtotal = 125;
+ out->max_inter_dcn_tile_repeaters = 8;
+ out->writeback_max_hscl_ratio = 1;
+ out->writeback_max_vscl_ratio = 1;
+ out->writeback_min_hscl_ratio = 1;
+ out->writeback_min_vscl_ratio = 1;
+ out->writeback_max_hscl_taps = 1;
+ out->writeback_max_vscl_taps = 1;
+ out->writeback_line_buffer_buffer_size = 0;
+ out->num_dsc = 4;
+ out->maximum_dsc_bits_per_component = 12;
+ out->maximum_pixels_per_line_per_dsc_unit = 6016;
+ out->dsc422_native_support = true;
+ out->dcc_supported = true;
+ out->ptoi_supported = false;
+
+ out->gpuvm_enable = false;
+ out->hostvm_enable = false;
+ out->cursor_64bpp_support = false;
+ out->dynamic_metadata_vm_enabled = false;
+
+ out->max_num_hdmi_frl_outputs = 1;
+ out->max_num_dp2p0_outputs = 2;
+ out->max_num_dp2p0_streams = 4;
+ break;
+
+ case dml_project_dcn35:
+ case dml_project_dcn351:
+ case dml_project_dcn36:
+ out->rob_buffer_size_kbytes = 64;
+ out->config_return_buffer_size_in_kbytes = 1792;
+ out->compressed_buffer_segment_size_in_kbytes = 64;
+ out->meta_fifo_size_in_kentries = 32;
+ out->zero_size_buffer_entries = 512;
+ out->pixel_chunk_size_kbytes = 8;
+ out->alpha_pixel_chunk_size_kbytes = 4;
+ out->min_pixel_chunk_size_bytes = 1024;
+ out->meta_chunk_size_kbytes = 2;
+ out->min_meta_chunk_size_bytes = 256;
+ out->writeback_chunk_size_kbytes = 8;
+ out->dpte_buffer_size_in_pte_reqs_luma = 68;
+ out->dpte_buffer_size_in_pte_reqs_chroma = 36;
+ out->dcc_meta_buffer_size_bytes = 6272;
+ out->gpuvm_enable = 1;
+ out->hostvm_enable = 1;
+ out->gpuvm_max_page_table_levels = 1;
+ out->hostvm_max_page_table_levels = 2;
+ out->num_dsc = 4;
+ out->maximum_dsc_bits_per_component = 12;
+ out->maximum_pixels_per_line_per_dsc_unit = 6016;
+ out->dsc422_native_support = 1;
+ out->line_buffer_size_bits = 986880;
+ out->dcc_supported = 1;
+ out->max_line_buffer_lines = 32;
+ out->writeback_interface_buffer_size_kbytes = 90;
+ out->max_num_dpp = 4;
+ out->max_num_otg = 4;
+ out->max_num_hdmi_frl_outputs = 1;
+ out->max_num_dp2p0_outputs = 2;
+ out->max_num_dp2p0_streams = 4;
+ out->max_num_wb = 1;
+
+ out->max_dchub_pscl_bw_pix_per_clk = 4;
+ out->max_pscl_lb_bw_pix_per_clk = 2;
+ out->max_lb_vscl_bw_pix_per_clk = 4;
+ out->max_vscl_hscl_bw_pix_per_clk = 4;
+ out->max_hscl_ratio = 6;
+ out->max_vscl_ratio = 6;
+ out->max_hscl_taps = 8;
+ out->max_vscl_taps = 8;
+ out->dispclk_ramp_margin_percent = 1.11;
+
+ out->dppclk_delay_subtotal = 47;
+ out->dppclk_delay_scl = 50;
+ out->dppclk_delay_scl_lb_only = 16;
+ out->dppclk_delay_cnvc_formatter = 28;
+ out->dppclk_delay_cnvc_cursor = 6;
+ out->dispclk_delay_subtotal = 125;
+
+ out->dynamic_metadata_vm_enabled = false;
+ out->max_inter_dcn_tile_repeaters = 8;
+ out->cursor_buffer_size = 16; // kBytes
+ out->cursor_chunk_size = 2; // kBytes
+
+ out->writeback_line_buffer_buffer_size = 0;
+ out->writeback_max_hscl_ratio = 1;
+ out->writeback_max_vscl_ratio = 1;
+ out->writeback_min_hscl_ratio = 1;
+ out->writeback_min_vscl_ratio = 1;
+ out->writeback_max_hscl_taps = 1;
+ out->writeback_max_vscl_taps = 1;
+ out->ptoi_supported = 0;
+
+ out->vblank_nom_default_us = 668; /*not in dml, but in programming guide, hard coded in dml2_translate_ip_params*/
+ out->config_return_buffer_segment_size_in_kbytes = 64; /*required, but not exist,, hard coded in dml2_translate_ip_params*/
+ break;
+
+ case dml_project_dcn401:
+ // Hardcoded values for DCN4m
+ out->vblank_nom_default_us = 668; //600;
+ out->rob_buffer_size_kbytes = 192; //128;
+ out->config_return_buffer_size_in_kbytes = 1344; //1280;
+ out->config_return_buffer_segment_size_in_kbytes = 64;
+ out->compressed_buffer_segment_size_in_kbytes = 64;
+ out->meta_fifo_size_in_kentries = 22;
+ out->dpte_buffer_size_in_pte_reqs_luma = 68;
+ out->dpte_buffer_size_in_pte_reqs_chroma = 36;
+ out->gpuvm_max_page_table_levels = 4;
+ out->pixel_chunk_size_kbytes = 8;
+ out->alpha_pixel_chunk_size_kbytes = 4;
+ out->min_pixel_chunk_size_bytes = 1024;
+ out->writeback_chunk_size_kbytes = 8;
+ out->line_buffer_size_bits = 1171920;
+ out->max_line_buffer_lines = 32;
+ out->writeback_interface_buffer_size_kbytes = 90;
+ //Number of pipes after DCN Pipe harvesting
+ out->max_num_dpp = dml2->config.dcn_pipe_count;
+ out->max_num_otg = dml2->config.dcn_pipe_count;
+ out->max_num_wb = 1;
+ out->max_dchub_pscl_bw_pix_per_clk = 4;
+ out->max_pscl_lb_bw_pix_per_clk = 2;
+ out->max_lb_vscl_bw_pix_per_clk = 4;
+ out->max_vscl_hscl_bw_pix_per_clk = 4;
+ out->max_hscl_ratio = 6;
+ out->max_vscl_ratio = 6;
+ out->max_hscl_taps = 8;
+ out->max_vscl_taps = 8;
+ out->dispclk_ramp_margin_percent = 1;
+ out->dppclk_delay_subtotal = 47;
+ out->dppclk_delay_scl = 50;
+ out->dppclk_delay_scl_lb_only = 16;
+ out->dppclk_delay_cnvc_formatter = 28;
+ out->dppclk_delay_cnvc_cursor = 6;
+ out->dispclk_delay_subtotal = 125;
+ out->cursor_buffer_size = 24; //16
+ out->cursor_chunk_size = 2;
+ out->max_inter_dcn_tile_repeaters = 8;
+ out->writeback_max_hscl_ratio = 1;
+ out->writeback_max_vscl_ratio = 1;
+ out->writeback_min_hscl_ratio = 1;
+ out->writeback_min_vscl_ratio = 1;
+ out->writeback_max_hscl_taps = 1;
+ out->writeback_max_vscl_taps = 1;
+ out->writeback_line_buffer_buffer_size = 0;
+ out->num_dsc = 4;
+ out->maximum_dsc_bits_per_component = 12;
+ out->maximum_pixels_per_line_per_dsc_unit = 5760;
+ out->dsc422_native_support = true;
+ out->dcc_supported = true;
+ out->ptoi_supported = false;
+
+ out->gpuvm_enable = false;
+ out->hostvm_enable = false;
+ out->cursor_64bpp_support = true; //false;
+ out->dynamic_metadata_vm_enabled = false;
+
+ out->max_num_hdmi_frl_outputs = 1;
+ out->max_num_dp2p0_outputs = 4; //2;
+ out->max_num_dp2p0_streams = 4;
+ break;
+ }
+}
+
+void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out)
+{
+ out->dprefclk_mhz = dml2->config.bbox_overrides.dprefclk_mhz;
+ out->xtalclk_mhz = dml2->config.bbox_overrides.xtalclk_mhz;
+ out->pcierefclk_mhz = 100;
+ out->refclk_mhz = dml2->config.bbox_overrides.dchub_refclk_mhz;
+
+ out->max_outstanding_reqs = 512;
+ out->pct_ideal_sdp_bw_after_urgent = 100;
+ out->pct_ideal_fabric_bw_after_urgent = 67;
+ out->pct_ideal_dram_bw_after_urgent_pixel_only = 20;
+ out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = 60;
+ out->pct_ideal_dram_bw_after_urgent_vm_only = 30;
+ out->pct_ideal_dram_bw_after_urgent_strobe = 67;
+ out->max_avg_sdp_bw_use_normal_percent = 80;
+ out->max_avg_fabric_bw_use_normal_percent = 60;
+ out->max_avg_dram_bw_use_normal_percent = 15;
+ out->max_avg_dram_bw_use_normal_strobe_percent = 50;
+
+ out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096;
+ out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096;
+ out->urgent_out_of_order_return_per_channel_vm_only_bytes = 4096;
+ out->return_bus_width_bytes = 64;
+ out->dram_channel_width_bytes = 2;
+ out->fabric_datapath_to_dcn_data_return_bytes = 64;
+ out->hostvm_min_page_size_kbytes = 0;
+ out->gpuvm_min_page_size_kbytes = 256;
+ out->phy_downspread_percent = 0.38;
+ out->dcn_downspread_percent = 0.5;
+ out->dispclk_dppclk_vco_speed_mhz = dml2->config.bbox_overrides.disp_pll_vco_speed_mhz;
+ out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64 or 32 MB;
+
+ out->do_urgent_latency_adjustment = true;
+
+ switch (dml2->v20.dml_core_ctx.project) {
+
+ case dml_project_dcn32:
+ default:
+ out->num_chans = 24;
+ out->round_trip_ping_latency_dcfclk_cycles = 263;
+ out->smn_latency_us = 2;
+ break;
+
+ case dml_project_dcn321:
+ out->num_chans = 8;
+ out->round_trip_ping_latency_dcfclk_cycles = 207;
+ out->smn_latency_us = 0;
+ break;
+
+ case dml_project_dcn35:
+ case dml_project_dcn351:
+ case dml_project_dcn36:
+ out->num_chans = 4;
+ out->round_trip_ping_latency_dcfclk_cycles = 106;
+ out->smn_latency_us = 2;
+ out->dispclk_dppclk_vco_speed_mhz = 3600;
+ out->pct_ideal_dram_bw_after_urgent_pixel_only = 65.0;
+ break;
+
+ case dml_project_dcn401:
+ out->pct_ideal_fabric_bw_after_urgent = 76; //67;
+ out->max_avg_sdp_bw_use_normal_percent = 75; //80;
+ out->max_avg_fabric_bw_use_normal_percent = 57; //60;
+
+ out->urgent_out_of_order_return_per_channel_pixel_only_bytes = 0; //4096;
+ out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 0; //4096;
+ out->urgent_out_of_order_return_per_channel_vm_only_bytes = 0; //4096;
+
+ out->num_chans = 16;
+ out->round_trip_ping_latency_dcfclk_cycles = 1000; //263;
+ out->smn_latency_us = 0; //2 us
+ out->mall_allocated_for_dcn_mbytes = dml2->config.mall_cfg.max_cab_allocation_bytes / 1048576; // 64;
+ break;
+ }
+ /* ---Overrides if available--- */
+ if (dml2->config.bbox_overrides.dram_num_chan)
+ out->num_chans = dml2->config.bbox_overrides.dram_num_chan;
+
+ if (dml2->config.bbox_overrides.dram_chanel_width_bytes)
+ out->dram_channel_width_bytes = dml2->config.bbox_overrides.dram_chanel_width_bytes;
+}
+
+void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
+ const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out)
+{
+ struct dml2_policy_build_synthetic_soc_states_scratch *s = &dml2->v20.scratch.create_scratch.build_synthetic_socbb_scratch;
+ struct dml2_policy_build_synthetic_soc_states_params *p = &dml2->v20.scratch.build_synthetic_socbb_params;
+ unsigned int dcfclk_stas_mhz[NUM_DCFCLK_STAS] = {0};
+ unsigned int dcfclk_stas_mhz_new[NUM_DCFCLK_STAS_NEW] = {0};
+ unsigned int dml_project = dml2->v20.dml_core_ctx.project;
+
+ unsigned int i = 0;
+ unsigned int transactions_per_mem_clock = 16; // project specific, depends on used Memory type
+
+ if (dml_project == dml_project_dcn351) {
+ p->dcfclk_stas_mhz = dcfclk_stas_mhz_new;
+ p->num_dcfclk_stas = NUM_DCFCLK_STAS_NEW;
+ } else {
+ p->dcfclk_stas_mhz = dcfclk_stas_mhz;
+ p->num_dcfclk_stas = NUM_DCFCLK_STAS;
+ }
+
+ p->in_bbox = in_bbox;
+ p->out_states = out;
+ p->in_states = &dml2->v20.scratch.create_scratch.in_states;
+
+
+ /* Initial hardcoded values */
+ switch (dml2->v20.dml_core_ctx.project) {
+
+ case dml_project_dcn32:
+ default:
+ p->in_states->num_states = 2;
+ transactions_per_mem_clock = 16;
+ p->in_states->state_array[0].socclk_mhz = 620.0;
+ p->in_states->state_array[0].dscclk_mhz = 716.667;
+ p->in_states->state_array[0].phyclk_mhz = 810;
+ p->in_states->state_array[0].phyclk_d18_mhz = 667;
+ p->in_states->state_array[0].phyclk_d32_mhz = 625;
+ p->in_states->state_array[0].dtbclk_mhz = 1564.0;
+ p->in_states->state_array[0].fabricclk_mhz = 450.0;
+ p->in_states->state_array[0].dcfclk_mhz = 300.0;
+ p->in_states->state_array[0].dispclk_mhz = 2150.0;
+ p->in_states->state_array[0].dppclk_mhz = 2150.0;
+ p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock;
+
+ p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4;
+ p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0;
+ p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0;
+ p->in_states->state_array[0].writeback_latency_us = 12;
+ p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1;
+ p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000;
+ p->in_states->state_array[0].sr_exit_z8_time_us = 0;
+ p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0;
+ p->in_states->state_array[0].dram_clock_change_latency_us = 400;
+ p->in_states->state_array[0].use_ideal_dram_bw_strobe = true;
+ p->in_states->state_array[0].sr_exit_time_us = 42.97;
+ p->in_states->state_array[0].sr_enter_plus_exit_time_us = 49.94;
+ p->in_states->state_array[0].fclk_change_latency_us = 20;
+ p->in_states->state_array[0].usr_retraining_latency_us = 2;
+
+ p->in_states->state_array[1].socclk_mhz = 1200.0;
+ p->in_states->state_array[1].fabricclk_mhz = 2500.0;
+ p->in_states->state_array[1].dcfclk_mhz = 1564.0;
+ p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock;
+ break;
+
+ case dml_project_dcn321:
+ p->in_states->num_states = 2;
+ transactions_per_mem_clock = 16;
+ p->in_states->state_array[0].socclk_mhz = 582.0;
+ p->in_states->state_array[0].dscclk_mhz = 573.333;
+ p->in_states->state_array[0].phyclk_mhz = 810;
+ p->in_states->state_array[0].phyclk_d18_mhz = 667;
+ p->in_states->state_array[0].phyclk_d32_mhz = 313;
+ p->in_states->state_array[0].dtbclk_mhz = 1564.0;
+ p->in_states->state_array[0].fabricclk_mhz = 450.0;
+ p->in_states->state_array[0].dcfclk_mhz = 300.0;
+ p->in_states->state_array[0].dispclk_mhz = 1720.0;
+ p->in_states->state_array[0].dppclk_mhz = 1720.0;
+ p->in_states->state_array[0].dram_speed_mts = 100 * transactions_per_mem_clock;
+
+ p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4;
+ p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0;
+ p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0;
+ p->in_states->state_array[0].writeback_latency_us = 12;
+ p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1;
+ p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 3000;
+ p->in_states->state_array[0].sr_exit_z8_time_us = 0;
+ p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0;
+ p->in_states->state_array[0].dram_clock_change_latency_us = 400;
+ p->in_states->state_array[0].use_ideal_dram_bw_strobe = true;
+ p->in_states->state_array[0].sr_exit_time_us = 19.95;
+ p->in_states->state_array[0].sr_enter_plus_exit_time_us = 24.36;
+ p->in_states->state_array[0].fclk_change_latency_us = 7;
+ p->in_states->state_array[0].usr_retraining_latency_us = 0;
+
+ p->in_states->state_array[1].socclk_mhz = 1200.0;
+ p->in_states->state_array[1].fabricclk_mhz = 2250.0;
+ p->in_states->state_array[1].dcfclk_mhz = 1434.0;
+ p->in_states->state_array[1].dram_speed_mts = 1000 * transactions_per_mem_clock;
+ break;
+ case dml_project_dcn401:
+ p->in_states->num_states = 2;
+ transactions_per_mem_clock = 16;
+ p->in_states->state_array[0].socclk_mhz = 300; //620.0;
+ p->in_states->state_array[0].dscclk_mhz = 666.667; //716.667;
+ p->in_states->state_array[0].phyclk_mhz = 810;
+ p->in_states->state_array[0].phyclk_d18_mhz = 667;
+ p->in_states->state_array[0].phyclk_d32_mhz = 625;
+ p->in_states->state_array[0].dtbclk_mhz = 2000; //1564.0;
+ p->in_states->state_array[0].fabricclk_mhz = 300; //450.0;
+ p->in_states->state_array[0].dcfclk_mhz = 200; //300.0;
+ p->in_states->state_array[0].dispclk_mhz = 2000; //2150.0;
+ p->in_states->state_array[0].dppclk_mhz = 2000; //2150.0;
+ p->in_states->state_array[0].dram_speed_mts = 97 * transactions_per_mem_clock; //100 *
+
+ p->in_states->state_array[0].urgent_latency_pixel_data_only_us = 4;
+ p->in_states->state_array[0].urgent_latency_pixel_mixed_with_vm_data_us = 0;
+ p->in_states->state_array[0].urgent_latency_vm_data_only_us = 0;
+ p->in_states->state_array[0].writeback_latency_us = 12;
+ p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_component_us = 1;
+ p->in_states->state_array[0].urgent_latency_adjustment_fabric_clock_reference_mhz = 1000; //3000;
+ p->in_states->state_array[0].sr_exit_z8_time_us = 0;
+ p->in_states->state_array[0].sr_enter_plus_exit_z8_time_us = 0;
+ p->in_states->state_array[0].dram_clock_change_latency_us = 400;
+ p->in_states->state_array[0].use_ideal_dram_bw_strobe = true;
+ p->in_states->state_array[0].sr_exit_time_us = 15.70; //42.97;
+ p->in_states->state_array[0].sr_enter_plus_exit_time_us = 20.20; //49.94;
+ p->in_states->state_array[0].fclk_change_latency_us = 0; //20;
+ p->in_states->state_array[0].usr_retraining_latency_us = 0; //2;
+
+ p->in_states->state_array[1].socclk_mhz = 1600; //1200.0;
+ p->in_states->state_array[1].fabricclk_mhz = 2500; //2500.0;
+ p->in_states->state_array[1].dcfclk_mhz = 1800; //1564.0;
+ p->in_states->state_array[1].dram_speed_mts = 1125 * transactions_per_mem_clock;
+ break;
+ }
+
+ /* Override from passed values, if available */
+ for (i = 0; i < p->in_states->num_states; i++) {
+ if (dml2->config.bbox_overrides.sr_exit_latency_us) {
+ p->in_states->state_array[i].sr_exit_time_us =
+ dml2->config.bbox_overrides.sr_exit_latency_us;
+ }
+
+ if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) {
+ p->in_states->state_array[i].sr_enter_plus_exit_time_us =
+ dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us;
+ }
+
+ if (dml2->config.bbox_overrides.sr_exit_z8_time_us) {
+ p->in_states->state_array[i].sr_exit_z8_time_us =
+ dml2->config.bbox_overrides.sr_exit_z8_time_us;
+ }
+
+ if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) {
+ p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us =
+ dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us;
+ }
+
+ if (dml2->config.bbox_overrides.urgent_latency_us) {
+ p->in_states->state_array[i].urgent_latency_pixel_data_only_us =
+ dml2->config.bbox_overrides.urgent_latency_us;
+ }
+
+ if (dml2->config.bbox_overrides.dram_clock_change_latency_us) {
+ p->in_states->state_array[i].dram_clock_change_latency_us =
+ dml2->config.bbox_overrides.dram_clock_change_latency_us;
+ }
+
+ if (dml2->config.bbox_overrides.fclk_change_latency_us) {
+ p->in_states->state_array[i].fclk_change_latency_us =
+ dml2->config.bbox_overrides.fclk_change_latency_us;
+ }
+ }
+
+ /* DCFCLK stas values are project specific */
+ if ((dml2->v20.dml_core_ctx.project == dml_project_dcn32) ||
+ (dml2->v20.dml_core_ctx.project == dml_project_dcn321)) {
+ p->dcfclk_stas_mhz[0] = p->in_states->state_array[0].dcfclk_mhz;
+ p->dcfclk_stas_mhz[1] = 615;
+ p->dcfclk_stas_mhz[2] = 906;
+ p->dcfclk_stas_mhz[3] = 1324;
+ p->dcfclk_stas_mhz[4] = p->in_states->state_array[1].dcfclk_mhz;
+ } else if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 &&
+ dml2->v20.dml_core_ctx.project != dml_project_dcn36 &&
+ dml2->v20.dml_core_ctx.project != dml_project_dcn351) {
+ p->dcfclk_stas_mhz[0] = 300;
+ p->dcfclk_stas_mhz[1] = 615;
+ p->dcfclk_stas_mhz[2] = 906;
+ p->dcfclk_stas_mhz[3] = 1324;
+ p->dcfclk_stas_mhz[4] = 1500;
+ }
+ /* Copy clocks tables entries, if available */
+ if (dml2->config.bbox_overrides.clks_table.num_states) {
+ p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states;
+ for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) {
+ p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz;
+ }
+
+ p->dcfclk_stas_mhz[0] = dml2->config.bbox_overrides.clks_table.clk_entries[0].dcfclk_mhz;
+ if (i > 1)
+ p->dcfclk_stas_mhz[4] = dml2->config.bbox_overrides.clks_table.clk_entries[i-1].dcfclk_mhz;
+
+ for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels; i++) {
+ p->in_states->state_array[i].fabricclk_mhz =
+ dml2->config.bbox_overrides.clks_table.clk_entries[i].fclk_mhz;
+ }
+
+ for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels; i++) {
+ p->in_states->state_array[i].dram_speed_mts =
+ dml2->config.bbox_overrides.clks_table.clk_entries[i].memclk_mhz * transactions_per_mem_clock;
+ }
+
+ for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels; i++) {
+ p->in_states->state_array[i].socclk_mhz =
+ dml2->config.bbox_overrides.clks_table.clk_entries[i].socclk_mhz;
+ }
+
+ for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels; i++) {
+ if (dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz > 0)
+ p->in_states->state_array[i].dtbclk_mhz =
+ dml2->config.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz;
+ }
+
+ for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels; i++) {
+ p->in_states->state_array[i].dispclk_mhz =
+ dml2->config.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz;
+ p->in_states->state_array[i].dppclk_mhz =
+ dml2->config.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz;
+ }
+ }
+
+ if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 ||
+ dml2->v20.dml_core_ctx.project == dml_project_dcn36 ||
+ dml2->v20.dml_core_ctx.project == dml_project_dcn351) {
+ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0,
+ max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0, max_socclk_mhz = 0;
+
+ for (i = 0; i < p->in_states->num_states; i++) {
+ if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = (int)p->in_states->state_array[i].dcfclk_mhz;
+ if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = (int)p->in_states->state_array[i].fabricclk_mhz;
+ if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz)
+ max_socclk_mhz = (int)p->in_states->state_array[i].socclk_mhz;
+ if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz)
+ max_uclk_mhz = (int)p->in_states->state_array[i].dram_speed_mts;
+ if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = (int)p->in_states->state_array[i].dispclk_mhz;
+ if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = (int)p->in_states->state_array[i].dppclk_mhz;
+ if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz;
+ if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz;
+ }
+
+ for (i = 0; i < p->in_states->num_states; i++) {
+ /* Independent states - including base (unlisted) parameters from state 0. */
+ p->out_states->state_array[i] = p->in_states->state_array[0];
+
+ p->out_states->state_array[i].dispclk_mhz = max_dispclk_mhz;
+ p->out_states->state_array[i].dppclk_mhz = max_dppclk_mhz;
+ p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz;
+ p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz;
+
+ p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0;
+ p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz;
+ p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz;
+
+ /* Dependent states. */
+ p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts;
+ p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz;
+ p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz;
+ p->out_states->state_array[i].dcfclk_mhz = p->in_states->state_array[i].dcfclk_mhz;
+ }
+
+ p->out_states->num_states = p->in_states->num_states;
+ } else {
+ dml2_policy_build_synthetic_soc_states(s, p);
+ }
+}
+
+void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out)
+{
+ const struct _vcs_dpi_ip_params_st *in_ip_params = &in->dml.ip;
+ /* Copy over the IP params tp dml2_ctx */
+ out->compressed_buffer_segment_size_in_kbytes = in_ip_params->compressed_buffer_segment_size_in_kbytes;
+ out->config_return_buffer_size_in_kbytes = in_ip_params->config_return_buffer_size_in_kbytes;
+ out->cursor_buffer_size = in_ip_params->cursor_buffer_size;
+ out->cursor_chunk_size = in_ip_params->cursor_chunk_size;
+ out->dcc_meta_buffer_size_bytes = in_ip_params->dcc_meta_buffer_size_bytes;
+ out->dcc_supported = in_ip_params->dcc_supported;
+ out->dispclk_delay_subtotal = in_ip_params->dispclk_delay_subtotal;
+ out->dispclk_ramp_margin_percent = in_ip_params->dispclk_ramp_margin_percent;
+ out->dppclk_delay_cnvc_cursor = in_ip_params->dppclk_delay_cnvc_cursor;
+ out->dppclk_delay_cnvc_formatter = in_ip_params->dppclk_delay_cnvc_formatter;
+ out->dppclk_delay_scl = in_ip_params->dppclk_delay_scl;
+ out->dppclk_delay_scl_lb_only = in_ip_params->dppclk_delay_scl_lb_only;
+ out->dppclk_delay_subtotal = in_ip_params->dppclk_delay_subtotal;
+ out->dpte_buffer_size_in_pte_reqs_chroma = in_ip_params->dpte_buffer_size_in_pte_reqs_chroma;
+ out->dpte_buffer_size_in_pte_reqs_luma = in_ip_params->dpte_buffer_size_in_pte_reqs_luma;
+ out->dsc422_native_support = in_ip_params->dsc422_native_support;
+ out->dynamic_metadata_vm_enabled = in_ip_params->dynamic_metadata_vm_enabled;
+ out->gpuvm_enable = in_ip_params->gpuvm_enable;
+ out->gpuvm_max_page_table_levels = in_ip_params->gpuvm_max_page_table_levels;
+ out->hostvm_enable = in_ip_params->hostvm_enable;
+ out->hostvm_max_page_table_levels = in_ip_params->hostvm_max_page_table_levels;
+ out->line_buffer_size_bits = in_ip_params->line_buffer_size_bits;
+ out->maximum_dsc_bits_per_component = in_ip_params->maximum_dsc_bits_per_component;
+ out->maximum_pixels_per_line_per_dsc_unit = in_ip_params->maximum_pixels_per_line_per_dsc_unit;
+ out->max_dchub_pscl_bw_pix_per_clk = in_ip_params->max_dchub_pscl_bw_pix_per_clk;
+ out->max_hscl_ratio = in_ip_params->max_hscl_ratio;
+ out->max_hscl_taps = in_ip_params->max_hscl_taps;
+ out->max_inter_dcn_tile_repeaters = in_ip_params->max_inter_dcn_tile_repeaters;
+ out->max_lb_vscl_bw_pix_per_clk = in_ip_params->max_lb_vscl_bw_pix_per_clk;
+ out->max_line_buffer_lines = in_ip_params->max_line_buffer_lines;
+ out->max_num_dp2p0_outputs = in_ip_params->max_num_dp2p0_outputs;
+ out->max_num_dp2p0_streams = in_ip_params->max_num_dp2p0_streams;
+ out->max_num_dpp = in_ip_params->max_num_dpp;
+ out->max_num_hdmi_frl_outputs = in_ip_params->max_num_hdmi_frl_outputs;
+ out->max_num_otg = in_ip_params->max_num_otg;
+ out->max_num_wb = in_ip_params->max_num_wb;
+ out->max_pscl_lb_bw_pix_per_clk = in_ip_params->max_pscl_lb_bw_pix_per_clk;
+ out->max_vscl_hscl_bw_pix_per_clk = in_ip_params->max_vscl_hscl_bw_pix_per_clk;
+ out->max_vscl_ratio = in_ip_params->max_vscl_ratio;
+ out->max_vscl_taps = in_ip_params->max_vscl_taps;
+ out->meta_chunk_size_kbytes = in_ip_params->meta_chunk_size_kbytes;
+ out->meta_fifo_size_in_kentries = in_ip_params->meta_fifo_size_in_kentries;
+ out->min_meta_chunk_size_bytes = in_ip_params->min_meta_chunk_size_bytes;
+ out->min_pixel_chunk_size_bytes = in_ip_params->min_pixel_chunk_size_bytes;
+ out->num_dsc = in_ip_params->num_dsc;
+ out->pixel_chunk_size_kbytes = in_ip_params->pixel_chunk_size_kbytes;
+ out->ptoi_supported = in_ip_params->ptoi_supported;
+ out->rob_buffer_size_kbytes = in_ip_params->rob_buffer_size_kbytes;
+ out->writeback_chunk_size_kbytes = in_ip_params->writeback_chunk_size_kbytes;
+ out->writeback_interface_buffer_size_kbytes = in_ip_params->writeback_interface_buffer_size_kbytes;
+ out->writeback_line_buffer_buffer_size = in_ip_params->writeback_line_buffer_buffer_size;
+ out->writeback_max_hscl_ratio = in_ip_params->writeback_max_hscl_ratio;
+ out->writeback_max_hscl_taps = in_ip_params->writeback_max_hscl_taps;
+ out->writeback_max_vscl_ratio = in_ip_params->writeback_max_vscl_ratio;
+ out->writeback_max_vscl_taps = in_ip_params->writeback_max_vscl_taps;
+ out->writeback_min_hscl_ratio = in_ip_params->writeback_min_hscl_ratio;
+ out->writeback_min_vscl_ratio = in_ip_params->writeback_min_vscl_ratio;
+ out->zero_size_buffer_entries = in_ip_params->zero_size_buffer_entries;
+
+ /* As per hardcoded reference / discussions */
+ out->config_return_buffer_segment_size_in_kbytes = 64;
+ //out->vblank_nom_default_us = 600;
+ out->vblank_nom_default_us = in_ip_params->VBlankNomDefaultUS;
+}
+
+void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st *out)
+{
+ const struct _vcs_dpi_soc_bounding_box_st *in_soc_params = &in->dml.soc;
+ /* Copy over the SOCBB params to dml2_ctx */
+ out->dispclk_dppclk_vco_speed_mhz = in_soc_params->dispclk_dppclk_vco_speed_mhz;
+ out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment;
+ out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes;
+ out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes;
+ out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024;
+ out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024;
+ out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes;
+ out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent;
+ out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent;
+ out->max_avg_dram_bw_use_normal_strobe_percent = in_soc_params->max_avg_dram_bw_use_normal_strobe_percent;
+ out->max_avg_sdp_bw_use_normal_percent = in_soc_params->max_avg_sdp_bw_use_normal_percent;
+ out->max_outstanding_reqs = in_soc_params->max_request_size_bytes;
+ out->num_chans = in_soc_params->num_chans;
+ out->pct_ideal_dram_bw_after_urgent_strobe = in_soc_params->pct_ideal_dram_bw_after_urgent_strobe;
+ out->pct_ideal_dram_bw_after_urgent_vm_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_vm_only;
+ out->pct_ideal_fabric_bw_after_urgent = in_soc_params->pct_ideal_fabric_bw_after_urgent;
+ out->pct_ideal_sdp_bw_after_urgent = in_soc_params->pct_ideal_sdp_bw_after_urgent;
+ out->phy_downspread_percent = in_soc_params->downspread_percent;
+ out->refclk_mhz = 50; // As per hardcoded reference.
+ out->return_bus_width_bytes = in_soc_params->return_bus_width_bytes;
+ out->round_trip_ping_latency_dcfclk_cycles = in_soc_params->round_trip_ping_latency_dcfclk_cycles;
+ out->smn_latency_us = in_soc_params->smn_latency_us;
+ out->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ out->urgent_out_of_order_return_per_channel_pixel_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ out->urgent_out_of_order_return_per_channel_vm_only_bytes = in_soc_params->urgent_out_of_order_return_per_channel_vm_only_bytes;
+ out->pct_ideal_dram_bw_after_urgent_pixel_and_vm = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm;
+ out->pct_ideal_dram_bw_after_urgent_pixel_only = in_soc_params->pct_ideal_dram_sdp_bw_after_urgent_pixel_only;
+ out->dcn_downspread_percent = in_soc_params->dcn_downspread_percent;
+}
+
+void dml2_translate_soc_states(const struct dc *dc, struct soc_states_st *out, int num_states)
+{
+ unsigned int i = 0;
+ out->num_states = num_states;
+
+ for (i = 0; i < out->num_states; i++) {
+ out->state_array[i].dcfclk_mhz = dc->dml.soc.clock_limits[i].dcfclk_mhz;
+ out->state_array[i].dispclk_mhz = dc->dml.soc.clock_limits[i].dispclk_mhz;
+ out->state_array[i].dppclk_mhz = dc->dml.soc.clock_limits[i].dppclk_mhz;
+ out->state_array[i].dram_speed_mts = dc->dml.soc.clock_limits[i].dram_speed_mts;
+ out->state_array[i].dtbclk_mhz = dc->dml.soc.clock_limits[i].dtbclk_mhz;
+ out->state_array[i].socclk_mhz = dc->dml.soc.clock_limits[i].socclk_mhz;
+ out->state_array[i].fabricclk_mhz = dc->dml.soc.clock_limits[i].fabricclk_mhz;
+ out->state_array[i].dscclk_mhz = dc->dml.soc.clock_limits[i].dscclk_mhz;
+ out->state_array[i].phyclk_d18_mhz = dc->dml.soc.clock_limits[i].phyclk_d18_mhz;
+ out->state_array[i].phyclk_d32_mhz = dc->dml.soc.clock_limits[i].phyclk_d32_mhz;
+ out->state_array[i].phyclk_mhz = dc->dml.soc.clock_limits[i].phyclk_mhz;
+ out->state_array[i].sr_enter_plus_exit_time_us = dc->dml.soc.sr_enter_plus_exit_time_us;
+ out->state_array[i].sr_exit_time_us = dc->dml.soc.sr_exit_time_us;
+ out->state_array[i].fclk_change_latency_us = dc->dml.soc.fclk_change_latency_us;
+ out->state_array[i].dram_clock_change_latency_us = dc->dml.soc.dram_clock_change_latency_us;
+ out->state_array[i].usr_retraining_latency_us = dc->dml.soc.usr_retraining_latency_us;
+ out->state_array[i].writeback_latency_us = dc->dml.soc.writeback_latency_us;
+ /* Driver initialized values for these are different than the spreadsheet. Use the
+ * spreadsheet ones for now. We need to decided which ones to use.
+ */
+ out->state_array[i].sr_exit_z8_time_us = dc->dml.soc.sr_exit_z8_time_us;
+ out->state_array[i].sr_enter_plus_exit_z8_time_us = dc->dml.soc.sr_enter_plus_exit_z8_time_us;
+ //out->state_array[i].sr_exit_z8_time_us = 5.20;
+ //out->state_array[i].sr_enter_plus_exit_z8_time_us = 9.60;
+ out->state_array[i].use_ideal_dram_bw_strobe = true;
+ out->state_array[i].urgent_latency_pixel_data_only_us = dc->dml.soc.urgent_latency_pixel_data_only_us;
+ out->state_array[i].urgent_latency_pixel_mixed_with_vm_data_us = dc->dml.soc.urgent_latency_pixel_mixed_with_vm_data_us;
+ out->state_array[i].urgent_latency_vm_data_only_us = dc->dml.soc.urgent_latency_vm_data_only_us;
+ out->state_array[i].urgent_latency_adjustment_fabric_clock_component_us = dc->dml.soc.urgent_latency_adjustment_fabric_clock_component_us;
+ out->state_array[i].urgent_latency_adjustment_fabric_clock_reference_mhz = dc->dml.soc.urgent_latency_adjustment_fabric_clock_reference_mhz;
+ }
+}
+
+static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st *out, unsigned int location, const struct dc_stream_state *in)
+{
+ dml_uint_t hblank_start, vblank_start;
+
+ out->HActive[location] = in->timing.h_addressable + in->timing.h_border_left + in->timing.h_border_right;
+ out->VActive[location] = in->timing.v_addressable + in->timing.v_border_bottom + in->timing.v_border_top;
+ out->RefreshRate[location] = ((in->timing.pix_clk_100hz * 100) / in->timing.h_total) / in->timing.v_total;
+ out->VFrontPorch[location] = in->timing.v_front_porch;
+ out->PixelClock[location] = in->timing.pix_clk_100hz / 10000.00;
+ if (in->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ out->PixelClock[location] *= 2;
+ out->HTotal[location] = in->timing.h_total;
+ out->VTotal[location] = in->timing.v_total;
+ out->Interlace[location] = in->timing.flags.INTERLACE;
+ hblank_start = in->timing.h_total - in->timing.h_front_porch;
+ out->HBlankEnd[location] = hblank_start
+ - in->timing.h_addressable
+ - in->timing.h_border_left
+ - in->timing.h_border_right;
+ vblank_start = in->timing.v_total - in->timing.v_front_porch;
+ out->VBlankEnd[location] = vblank_start
+ - in->timing.v_addressable
+ - in->timing.v_border_top
+ - in->timing.v_border_bottom;
+ out->DRRDisplay[location] = false;
+}
+
+static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location,
+ const struct dc_stream_state *in, const struct pipe_ctx *pipe, struct dml2_context *dml2)
+{
+ unsigned int output_bpc;
+
+ out->DSCEnable[location] = (enum dml_dsc_enable)in->timing.flags.DSC;
+ out->OutputLinkDPLanes[location] = 4; // As per code in dcn20_resource.c
+ out->DSCInputBitPerComponent[location] = 12; // As per code in dcn20_resource.c
+ out->DSCSlices[location] = in->timing.dsc_cfg.num_slices_h;
+
+ switch (in->signal) {
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ out->OutputEncoder[location] = dml_dp;
+ if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1)
+ out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0;
+ break;
+ case SIGNAL_TYPE_EDP:
+ out->OutputEncoder[location] = dml_edp;
+ break;
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ out->OutputEncoder[location] = dml_hdmi;
+ break;
+ default:
+ out->OutputEncoder[location] = dml_dp;
+ }
+
+ switch (in->timing.display_color_depth) {
+ case COLOR_DEPTH_666:
+ output_bpc = 6;
+ break;
+ case COLOR_DEPTH_888:
+ output_bpc = 8;
+ break;
+ case COLOR_DEPTH_101010:
+ output_bpc = 10;
+ break;
+ case COLOR_DEPTH_121212:
+ output_bpc = 12;
+ break;
+ case COLOR_DEPTH_141414:
+ output_bpc = 14;
+ break;
+ case COLOR_DEPTH_161616:
+ output_bpc = 16;
+ break;
+ case COLOR_DEPTH_999:
+ output_bpc = 9;
+ break;
+ case COLOR_DEPTH_111111:
+ output_bpc = 11;
+ break;
+ default:
+ output_bpc = 8;
+ break;
+ }
+
+ switch (in->timing.pixel_encoding) {
+ case PIXEL_ENCODING_RGB:
+ case PIXEL_ENCODING_YCBCR444:
+ out->OutputFormat[location] = dml_444;
+ out->OutputBpp[location] = (dml_float_t)output_bpc * 3;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ out->OutputFormat[location] = dml_420;
+ out->OutputBpp[location] = (output_bpc * 3.0) / 2;
+ break;
+ case PIXEL_ENCODING_YCBCR422:
+ if (in->timing.flags.DSC && !in->timing.dsc_cfg.ycbcr422_simple)
+ out->OutputFormat[location] = dml_n422;
+ else
+ out->OutputFormat[location] = dml_s422;
+ out->OutputBpp[location] = (dml_float_t)output_bpc * 2;
+ break;
+ default:
+ out->OutputFormat[location] = dml_444;
+ out->OutputBpp[location] = (dml_float_t)output_bpc * 3;
+ break;
+ }
+
+ if (in->timing.flags.DSC) {
+ out->OutputBpp[location] = in->timing.dsc_cfg.bits_per_pixel / 16.0;
+ }
+
+ // This has been false throughout DCN32x development. If needed we can change this later on.
+ out->OutputMultistreamEn[location] = false;
+
+ switch (in->signal) {
+ case SIGNAL_TYPE_NONE:
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ case SIGNAL_TYPE_LVDS:
+ case SIGNAL_TYPE_RGB:
+ case SIGNAL_TYPE_DISPLAY_PORT:
+ case SIGNAL_TYPE_DISPLAY_PORT_MST:
+ case SIGNAL_TYPE_EDP:
+ case SIGNAL_TYPE_VIRTUAL:
+ default:
+ out->OutputLinkDPRate[location] = dml_dp_rate_na;
+ break;
+ }
+
+ out->PixelClockBackEnd[location] = in->timing.pix_clk_100hz / 10000.00;
+
+ out->AudioSampleLayout[location] = in->audio_info.modes->sample_size;
+ out->AudioSampleRate[location] = in->audio_info.modes->max_bit_rate;
+
+ out->OutputDisabled[location] = true;
+}
+
+static void populate_dummy_dml_surface_cfg(struct dml_surface_cfg_st *out, unsigned int location, const struct dc_stream_state *in)
+{
+ out->SurfaceWidthY[location] = in->timing.h_addressable;
+ out->SurfaceHeightY[location] = in->timing.v_addressable;
+ out->SurfaceWidthC[location] = in->timing.h_addressable;
+ out->SurfaceHeightC[location] = in->timing.v_addressable;
+ out->PitchY[location] = ((out->SurfaceWidthY[location] + 127) / 128) * 128;
+ out->PitchC[location] = 1;
+ out->DCCEnable[location] = false;
+ out->DCCMetaPitchY[location] = 0;
+ out->DCCMetaPitchC[location] = 0;
+ out->DCCRateLuma[location] = 1.0;
+ out->DCCRateChroma[location] = 1.0;
+ out->DCCFractionOfZeroSizeRequestsLuma[location] = 0;
+ out->DCCFractionOfZeroSizeRequestsChroma[location] = 0;
+ out->SurfaceTiling[location] = dml_sw_64kb_r_x;
+ out->SourcePixelFormat[location] = dml_444_32;
+}
+
+static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_project, struct dml_surface_cfg_st *out, unsigned int location, const struct dc_plane_state *in)
+{
+ out->PitchY[location] = in->plane_size.surface_pitch;
+ out->SurfaceHeightY[location] = in->plane_size.surface_size.height;
+ out->SurfaceWidthY[location] = in->plane_size.surface_size.width;
+ out->SurfaceHeightC[location] = in->plane_size.chroma_size.height;
+ out->SurfaceWidthC[location] = in->plane_size.chroma_size.width;
+ out->PitchC[location] = in->plane_size.chroma_pitch;
+ out->DCCEnable[location] = in->dcc.enable;
+ out->DCCMetaPitchY[location] = in->dcc.meta_pitch;
+ out->DCCMetaPitchC[location] = in->dcc.meta_pitch_c;
+ out->DCCRateLuma[location] = 1.0;
+ out->DCCRateChroma[location] = 1.0;
+ out->DCCFractionOfZeroSizeRequestsLuma[location] = in->dcc.independent_64b_blks;
+ out->DCCFractionOfZeroSizeRequestsChroma[location] = in->dcc.independent_64b_blks_c;
+
+ switch (dml2_project) {
+ default:
+ out->SurfaceTiling[location] = (enum dml_swizzle_mode)in->tiling_info.gfx9.swizzle;
+ break;
+ case dml_project_dcn401:
+ // Temporary use gfx11 swizzle in dml, until proper dml for DCN4x is integrated/implemented
+ switch (in->tiling_info.gfx_addr3.swizzle) {
+ case DC_ADDR3_SW_4KB_2D:
+ case DC_ADDR3_SW_64KB_2D:
+ case DC_ADDR3_SW_256KB_2D:
+ default:
+ out->SurfaceTiling[location] = dml_sw_64kb_r_x;
+ break;
+ case DC_ADDR3_SW_LINEAR:
+ out->SurfaceTiling[location] = dml_sw_linear;
+ break;
+ }
+ }
+
+ switch (in->format) {
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ out->SourcePixelFormat[location] = dml_420_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ out->SourcePixelFormat[location] = dml_420_10;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ out->SourcePixelFormat[location] = dml_444_64;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ out->SourcePixelFormat[location] = dml_444_16;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
+ out->SourcePixelFormat[location] = dml_444_8;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ out->SourcePixelFormat[location] = dml_rgbe_alpha;
+ break;
+ default:
+ out->SourcePixelFormat[location] = dml_444_32;
+ break;
+ }
+}
+
+static struct scaler_data *get_scaler_data_for_plane(
+ const struct dc_plane_state *in,
+ struct dc_state *context)
+{
+ int i;
+ struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe;
+
+ memset(temp_pipe, 0, sizeof(struct pipe_ctx));
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state == in && !pipe->prev_odm_pipe) {
+ temp_pipe->stream = pipe->stream;
+ temp_pipe->plane_state = pipe->plane_state;
+ temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
+ temp_pipe->stream_res = pipe->stream_res;
+ resource_build_scaling_params(temp_pipe);
+ break;
+ }
+ }
+
+ ASSERT(i < MAX_PIPES);
+ return &temp_pipe->plane_res.scl_data;
+}
+
+static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location,
+ const struct dc_stream_state *in,
+ const struct soc_bounding_box_st *soc)
+{
+ dml_uint_t width, height;
+
+ if (in->timing.h_addressable > 3840)
+ width = 3840;
+ else
+ width = in->timing.h_addressable; // 4K max
+
+ if (in->timing.v_addressable > 2160)
+ height = 2160;
+ else
+ height = in->timing.v_addressable; // 4K max
+
+ out->CursorBPP[location] = dml_cur_32bit;
+ out->CursorWidth[location] = 256;
+
+ out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes;
+
+ out->ViewportWidth[location] = width;
+ out->ViewportHeight[location] = height;
+ out->ViewportStationary[location] = false;
+ out->ViewportWidthChroma[location] = 0;
+ out->ViewportHeightChroma[location] = 0;
+ out->ViewportXStart[location] = 0;
+ out->ViewportXStartC[location] = 0;
+ out->ViewportYStart[location] = 0;
+ out->ViewportYStartC[location] = 0;
+
+ out->ScalerEnabled[location] = false;
+ out->HRatio[location] = 1.0;
+ out->VRatio[location] = 1.0;
+ out->HRatioChroma[location] = 0;
+ out->VRatioChroma[location] = 0;
+ out->HTaps[location] = 1;
+ out->VTaps[location] = 1;
+ out->HTapsChroma[location] = 0;
+ out->VTapsChroma[location] = 0;
+ out->SourceScan[location] = dml_rotation_0;
+ out->ScalerRecoutWidth[location] = width;
+
+ out->LBBitPerPixel[location] = 57;
+
+ out->DynamicMetadataEnable[location] = false;
+
+ out->NumberOfCursors[location] = 1;
+ out->UseMALLForStaticScreen[location] = dml_use_mall_static_screen_disable;
+ out->UseMALLForPStateChange[location] = dml_use_mall_pstate_change_disable;
+
+ out->DETSizeOverride[location] = 256;
+
+ out->ScalerEnabled[location] = false;
+}
+
+static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location,
+ const struct dc_plane_state *in, struct dc_state *context,
+ const struct soc_bounding_box_st *soc)
+{
+ struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context);
+
+ out->CursorBPP[location] = dml_cur_32bit;
+ out->CursorWidth[location] = 256;
+
+ out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes;
+
+ out->ViewportWidth[location] = scaler_data->viewport.width;
+ out->ViewportHeight[location] = scaler_data->viewport.height;
+ out->ViewportWidthChroma[location] = scaler_data->viewport_c.width;
+ out->ViewportHeightChroma[location] = scaler_data->viewport_c.height;
+ out->ViewportXStart[location] = scaler_data->viewport.x;
+ out->ViewportYStart[location] = scaler_data->viewport.y;
+ out->ViewportXStartC[location] = scaler_data->viewport_c.x;
+ out->ViewportYStartC[location] = scaler_data->viewport_c.y;
+ out->ViewportStationary[location] = false;
+
+ out->ScalerEnabled[location] = scaler_data->ratios.horz.value != dc_fixpt_one.value ||
+ scaler_data->ratios.horz_c.value != dc_fixpt_one.value ||
+ scaler_data->ratios.vert.value != dc_fixpt_one.value ||
+ scaler_data->ratios.vert_c.value != dc_fixpt_one.value;
+
+ /* Current driver code base uses LBBitPerPixel as 57. There is a discrepancy
+ * from the HW/DML teams about this value. Initialize LBBitPerPixel with the
+ * value current used in Navi3x .
+ */
+
+ out->LBBitPerPixel[location] = 57;
+
+ if (out->ScalerEnabled[location] == false) {
+ out->HRatio[location] = 1;
+ out->HRatioChroma[location] = 1;
+ out->VRatio[location] = 1;
+ out->VRatioChroma[location] = 1;
+ } else {
+ /* Follow the original dml_wrapper.c code direction to fix scaling issues */
+ out->HRatio[location] = (dml_float_t)scaler_data->ratios.horz.value / (1ULL << 32);
+ out->HRatioChroma[location] = (dml_float_t)scaler_data->ratios.horz_c.value / (1ULL << 32);
+ out->VRatio[location] = (dml_float_t)scaler_data->ratios.vert.value / (1ULL << 32);
+ out->VRatioChroma[location] = (dml_float_t)scaler_data->ratios.vert_c.value / (1ULL << 32);
+ }
+
+ if (!scaler_data->taps.h_taps) {
+ out->HTaps[location] = 1;
+ out->HTapsChroma[location] = 1;
+ } else {
+ out->HTaps[location] = scaler_data->taps.h_taps;
+ out->HTapsChroma[location] = scaler_data->taps.h_taps_c;
+ }
+ if (!scaler_data->taps.v_taps) {
+ out->VTaps[location] = 1;
+ out->VTapsChroma[location] = 1;
+ } else {
+ out->VTaps[location] = scaler_data->taps.v_taps;
+ out->VTapsChroma[location] = scaler_data->taps.v_taps_c;
+ }
+
+ out->SourceScan[location] = (enum dml_rotation_angle)in->rotation;
+ out->ScalerRecoutWidth[location] = in->dst_rect.width;
+
+ out->DynamicMetadataEnable[location] = false;
+ out->DynamicMetadataLinesBeforeActiveRequired[location] = 0;
+ out->DynamicMetadataTransmittedBytes[location] = 0;
+
+ out->NumberOfCursors[location] = 1;
+}
+
+static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2,
+ const struct dc_stream_state *stream, const struct dml_display_cfg_st *dml_dispcfg)
+{
+ int i = 0;
+ int location = -1;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == stream->stream_id) {
+ location = i;
+ break;
+ }
+ }
+
+ return location;
+}
+
+static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *context, const struct dc_plane_state *plane,
+ unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id)
+{
+ int i, j;
+ bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists;
+
+ if (!plane_id)
+ return false;
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->stream_id == stream_id) {
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ if (context->stream_status[i].plane_states[j] == plane &&
+ (!is_plane_duplicate || (j == plane_index))) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane,
+ const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index)
+{
+ unsigned int plane_id;
+ int i = 0;
+ int location = -1;
+
+ if (!get_plane_id(context->bw_ctx.dml2, context, plane, stream_id, plane_index, &plane_id)) {
+ ASSERT(false);
+ return -1;
+ }
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] && dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i] == plane_id) {
+ location = i;
+ break;
+ }
+ }
+
+ return location;
+}
+
+static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state)
+{
+ unsigned int i;
+ unsigned int pipe_index = 0;
+ unsigned int plane_index = 0;
+ struct dml2_dml_to_dc_pipe_mapping *dml_to_dc_pipe_mapping = &dml2->v20.scratch.dml_to_dc_pipe_mapping;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[i] = false;
+ dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[i] = 0;
+ }
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i];
+
+ if (!pipe || !pipe->stream || !pipe->plane_state)
+ continue;
+
+ while (pipe) {
+ pipe_index = pipe->pipe_idx;
+
+ if (pipe->stream && dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] == false) {
+ dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[pipe_index] = plane_index;
+ plane_index++;
+ dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] = true;
+ }
+
+ pipe = pipe->bottom_pipe;
+ }
+
+ plane_index = 0;
+ }
+}
+
+static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out,
+ unsigned int location, const struct dc_stream_state *in)
+{
+ if (in->num_wb_info > 0) {
+ for (int i = 0; i < __DML_NUM_DMB__; i++) {
+ const struct dc_writeback_info *wb_info = &in->writeback_info[i];
+ /*current dml support 1 dwb per stream, limitation*/
+ if (wb_info->wb_enabled) {
+ out->WritebackEnable[location] = wb_info->wb_enabled;
+ out->ActiveWritebacksPerSurface[location] = wb_info->dwb_params.cnv_params.src_width;
+ out->WritebackDestinationWidth[location] = wb_info->dwb_params.dest_width;
+ out->WritebackDestinationHeight[location] = wb_info->dwb_params.dest_height;
+
+ out->WritebackSourceWidth[location] = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_width :
+ wb_info->dwb_params.cnv_params.src_width;
+
+ out->WritebackSourceHeight[location] = wb_info->dwb_params.cnv_params.crop_en ?
+ wb_info->dwb_params.cnv_params.crop_height :
+ wb_info->dwb_params.cnv_params.src_height;
+ /*current design does not have chroma scaling, need to follow up*/
+ out->WritebackHTaps[location] = wb_info->dwb_params.scaler_taps.h_taps > 0 ?
+ wb_info->dwb_params.scaler_taps.h_taps : 1;
+ out->WritebackVTaps[location] = wb_info->dwb_params.scaler_taps.v_taps > 0 ?
+ wb_info->dwb_params.scaler_taps.v_taps : 1;
+
+ out->WritebackHRatio[location] = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_width /
+ (double)wb_info->dwb_params.dest_width :
+ (double)wb_info->dwb_params.cnv_params.src_width /
+ (double)wb_info->dwb_params.dest_width;
+ out->WritebackVRatio[location] = wb_info->dwb_params.cnv_params.crop_en ?
+ (double)wb_info->dwb_params.cnv_params.crop_height /
+ (double)wb_info->dwb_params.dest_height :
+ (double)wb_info->dwb_params.cnv_params.src_height /
+ (double)wb_info->dwb_params.dest_height;
+ }
+ }
+ }
+}
+
+static void dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(struct dml2_context *dml2, struct dc_state *context)
+{
+ int i;
+ struct pipe_ctx *current_pipe_context;
+
+ /* Scratch gets reset to zero in dml, but link encoder instance can be zero, so reset to -1 */
+ for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) {
+ dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[i] = -1;
+ }
+
+ /* If an HPO stream encoder is allocated to a pipe, get the instance of it's allocated HPO Link encoder */
+ for (i = 0; i < MAX_PIPES; i++) {
+ current_pipe_context = &context->res_ctx.pipe_ctx[i];
+ if (current_pipe_context->stream &&
+ current_pipe_context->stream_res.hpo_dp_stream_enc &&
+ current_pipe_context->link_res.hpo_dp_link_enc &&
+ dc_is_dp_signal(current_pipe_context->stream->signal)) {
+ dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[current_pipe_context->stream_res.hpo_dp_stream_enc->inst] =
+ current_pipe_context->link_res.hpo_dp_link_enc->inst;
+ }
+ }
+}
+
+void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg)
+{
+ int i = 0, j = 0, k = 0;
+ int disp_cfg_stream_location, disp_cfg_plane_location;
+ enum mall_stream_type stream_mall_type;
+ struct pipe_ctx *current_pipe_context;
+
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[i] = false;
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[i] = false;
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] = false;
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] = false;
+ }
+
+ //Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file
+ dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable;
+ dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels;
+ dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable;
+ dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels;
+ if (dml2->v20.dml_core_ctx.ip.hostvm_enable)
+ dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter;
+
+ dml2_populate_pipe_to_plane_index_mapping(dml2, context);
+ dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(dml2, context);
+
+ for (i = 0; i < context->stream_count; i++) {
+ current_pipe_context = NULL;
+ for (k = 0; k < MAX_PIPES; k++) {
+ /* find one pipe allocated to this stream for the purpose of getting
+ info about the link later */
+ if (context->streams[i] == context->res_ctx.pipe_ctx[k].stream) {
+ current_pipe_context = &context->res_ctx.pipe_ctx[k];
+ break;
+ }
+ }
+ disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg);
+ stream_mall_type = dc_state_get_stream_subvp_type(context, context->streams[i]);
+
+ if (disp_cfg_stream_location < 0)
+ disp_cfg_stream_location = dml_dispcfg->num_timings++;
+
+ ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+
+ populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]);
+ populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2);
+ /*Call site for populate_dml_writeback_cfg_from_stream_state*/
+ populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback,
+ disp_cfg_stream_location, context->streams[i]);
+
+ switch (context->streams[i]->debug.force_odm_combine_segments) {
+ case 2:
+ dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_2to1;
+ break;
+ case 4:
+ dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_stream_location] = dml_odm_use_policy_combine_4to1;
+ break;
+ default:
+ break;
+ }
+
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_stream_location] = context->streams[i]->stream_id;
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_stream_location] = true;
+
+ if (context->stream_status[i].plane_count == 0) {
+ disp_cfg_plane_location = dml_dispcfg->num_surfaces++;
+
+ populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]);
+ populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location,
+ context->streams[i], &dml2->v20.dml_core_ctx.soc);
+
+ dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location;
+
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true;
+ } else {
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ disp_cfg_plane_location = map_plane_to_dml_display_cfg(dml2,
+ context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id, j);
+
+ if (disp_cfg_plane_location < 0)
+ disp_cfg_plane_location = dml_dispcfg->num_surfaces++;
+
+ ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+
+ populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]);
+ populate_dml_plane_cfg_from_plane_state(
+ &dml_dispcfg->plane, disp_cfg_plane_location,
+ context->stream_status[i].plane_states[j], context,
+ &dml2->v20.dml_core_ctx.soc);
+
+ if (stream_mall_type == SUBVP_MAIN) {
+ dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
+ dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize;
+ } else if (stream_mall_type == SUBVP_PHANTOM) {
+ dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
+ dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_disable;
+ dml2->v20.dml_core_ctx.policy.ImmediateFlipRequirement[disp_cfg_plane_location] = dml_immediate_flip_not_required;
+ } else {
+ dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_disable;
+ dml_dispcfg->plane.UseMALLForStaticScreen[disp_cfg_plane_location] = dml_use_mall_static_screen_optimize;
+ }
+
+ dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location;
+
+ if (get_plane_id(dml2, context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, j,
+ &dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true;
+
+ if (j >= 1) {
+ populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]);
+ populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2);
+ switch (context->streams[i]->debug.force_odm_combine_segments) {
+ case 2:
+ dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1;
+ break;
+ case 4:
+ dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_4to1;
+ break;
+ default:
+ break;
+ }
+
+ if (stream_mall_type == SUBVP_MAIN)
+ dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport;
+ else if (stream_mall_type == SUBVP_PHANTOM)
+ dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_phantom_pipe;
+
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[disp_cfg_plane_location] = context->streams[i]->stream_id;
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id_valid[disp_cfg_plane_location] = true;
+
+ dml_dispcfg->num_timings++;
+ }
+ }
+ }
+ }
+}
+
+void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs,
+ struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs,
+ struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs,
+ struct pipe_ctx *out)
+{
+ memset(&out->rq_regs, 0, sizeof(out->rq_regs));
+ out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size;
+ out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size;
+ out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size;
+ out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size;
+ out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size;
+ out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size;
+ out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height;
+ out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear;
+
+ out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size;
+ out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size;
+ out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size;
+ out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size;
+ out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size;
+ out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size;
+ out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height;
+ out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear;
+
+ out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode;
+ out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode;
+ out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode;
+ out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode;
+ out->rq_regs.plane1_base_address = rq_regs->plane1_base_address;
+
+ memset(&out->dlg_regs, 0, sizeof(out->dlg_regs));
+ out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end;
+ out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end;
+ out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
+ out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal;
+ out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler;
+ out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler;
+ out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch;
+ out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank;
+ out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank;
+ out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip;
+ out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip;
+ out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq;
+ out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch;
+ out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c;
+ out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l;
+ out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c;
+ out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l;
+ out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c;
+ out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l;
+ out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c;
+ out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l;
+ out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c;
+ out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l;
+ out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c;
+ out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l;
+ out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c;
+ out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l;
+ out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c;
+ out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l;
+ out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c;
+ out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l;
+ out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c;
+ out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l;
+ out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c;
+ out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank;
+ out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip;
+ out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank;
+ out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip;
+ out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0;
+ out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0;
+ out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1;
+ out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1;
+ out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0;
+ out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit;
+ out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata;
+ out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta;
+
+ memset(&out->ttu_regs, 0, sizeof(out->ttu_regs));
+ out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm;
+ out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm;
+ out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank;
+ out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip;
+ out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l;
+ out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c;
+ out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0;
+ out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1;
+ out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l;
+ out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c;
+ out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0;
+ out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1;
+ out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l;
+ out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c;
+ out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0;
+ out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1;
+ out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l;
+ out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c;
+ out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0;
+ out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h
new file mode 100644
index 000000000000..d764773938f4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DML2_TRANSLATION_HELPER_H__
+#define __DML2_TRANSLATION_HELPER_H__
+
+void dml2_init_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out);
+void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out);
+void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
+ const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out);
+void dml2_translate_ip_params(const struct dc *in_dc, struct ip_params_st *out);
+void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box_st *out);
+void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states);
+void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg);
+void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out);
+bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe);
+
+#endif //__DML2_TRANSLATION_HELPER_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
new file mode 100644
index 000000000000..9a33158b63bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c
@@ -0,0 +1,560 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+//#include "dml2_utils.h"
+#include "display_mode_core.h"
+#include "dml_display_rq_dlg_calc.h"
+#include "dml2_internal_types.h"
+#include "dml2_translation_helper.h"
+#include "dml2_utils.h"
+
+void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index)
+{
+ dml_timing_array->HTotal[dst_index] = dml_timing_array->HTotal[src_index];
+ dml_timing_array->VTotal[dst_index] = dml_timing_array->VTotal[src_index];
+ dml_timing_array->HBlankEnd[dst_index] = dml_timing_array->HBlankEnd[src_index];
+ dml_timing_array->VBlankEnd[dst_index] = dml_timing_array->VBlankEnd[src_index];
+ dml_timing_array->RefreshRate[dst_index] = dml_timing_array->RefreshRate[src_index];
+ dml_timing_array->VFrontPorch[dst_index] = dml_timing_array->VFrontPorch[src_index];
+ dml_timing_array->PixelClock[dst_index] = dml_timing_array->PixelClock[src_index];
+ dml_timing_array->HActive[dst_index] = dml_timing_array->HActive[src_index];
+ dml_timing_array->VActive[dst_index] = dml_timing_array->VActive[src_index];
+ dml_timing_array->Interlace[dst_index] = dml_timing_array->Interlace[src_index];
+ dml_timing_array->DRRDisplay[dst_index] = dml_timing_array->DRRDisplay[src_index];
+ dml_timing_array->VBlankNom[dst_index] = dml_timing_array->VBlankNom[src_index];
+}
+
+void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index)
+{
+ dml_plane_array->GPUVMMinPageSizeKBytes[dst_index] = dml_plane_array->GPUVMMinPageSizeKBytes[src_index];
+ dml_plane_array->ForceOneRowForFrame[dst_index] = dml_plane_array->ForceOneRowForFrame[src_index];
+ dml_plane_array->PTEBufferModeOverrideEn[dst_index] = dml_plane_array->PTEBufferModeOverrideEn[src_index];
+ dml_plane_array->PTEBufferMode[dst_index] = dml_plane_array->PTEBufferMode[src_index];
+ dml_plane_array->ViewportWidth[dst_index] = dml_plane_array->ViewportWidth[src_index];
+ dml_plane_array->ViewportHeight[dst_index] = dml_plane_array->ViewportHeight[src_index];
+ dml_plane_array->ViewportWidthChroma[dst_index] = dml_plane_array->ViewportWidthChroma[src_index];
+ dml_plane_array->ViewportHeightChroma[dst_index] = dml_plane_array->ViewportHeightChroma[src_index];
+ dml_plane_array->ViewportXStart[dst_index] = dml_plane_array->ViewportXStart[src_index];
+ dml_plane_array->ViewportXStartC[dst_index] = dml_plane_array->ViewportXStartC[src_index];
+ dml_plane_array->ViewportYStart[dst_index] = dml_plane_array->ViewportYStart[src_index];
+ dml_plane_array->ViewportYStartC[dst_index] = dml_plane_array->ViewportYStartC[src_index];
+ dml_plane_array->ViewportStationary[dst_index] = dml_plane_array->ViewportStationary[src_index];
+
+ dml_plane_array->ScalerEnabled[dst_index] = dml_plane_array->ScalerEnabled[src_index];
+ dml_plane_array->HRatio[dst_index] = dml_plane_array->HRatio[src_index];
+ dml_plane_array->VRatio[dst_index] = dml_plane_array->VRatio[src_index];
+ dml_plane_array->HRatioChroma[dst_index] = dml_plane_array->HRatioChroma[src_index];
+ dml_plane_array->VRatioChroma[dst_index] = dml_plane_array->VRatioChroma[src_index];
+ dml_plane_array->HTaps[dst_index] = dml_plane_array->HTaps[src_index];
+ dml_plane_array->VTaps[dst_index] = dml_plane_array->VTaps[src_index];
+ dml_plane_array->HTapsChroma[dst_index] = dml_plane_array->HTapsChroma[src_index];
+ dml_plane_array->VTapsChroma[dst_index] = dml_plane_array->VTapsChroma[src_index];
+ dml_plane_array->LBBitPerPixel[dst_index] = dml_plane_array->LBBitPerPixel[src_index];
+
+ dml_plane_array->SourceScan[dst_index] = dml_plane_array->SourceScan[src_index];
+ dml_plane_array->ScalerRecoutWidth[dst_index] = dml_plane_array->ScalerRecoutWidth[src_index];
+
+ dml_plane_array->DynamicMetadataEnable[dst_index] = dml_plane_array->DynamicMetadataEnable[src_index];
+ dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[dst_index] = dml_plane_array->DynamicMetadataLinesBeforeActiveRequired[src_index];
+ dml_plane_array->DynamicMetadataTransmittedBytes[dst_index] = dml_plane_array->DynamicMetadataTransmittedBytes[src_index];
+ dml_plane_array->DETSizeOverride[dst_index] = dml_plane_array->DETSizeOverride[src_index];
+
+ dml_plane_array->NumberOfCursors[dst_index] = dml_plane_array->NumberOfCursors[src_index];
+ dml_plane_array->CursorWidth[dst_index] = dml_plane_array->CursorWidth[src_index];
+ dml_plane_array->CursorBPP[dst_index] = dml_plane_array->CursorBPP[src_index];
+
+ dml_plane_array->UseMALLForStaticScreen[dst_index] = dml_plane_array->UseMALLForStaticScreen[src_index];
+ dml_plane_array->UseMALLForPStateChange[dst_index] = dml_plane_array->UseMALLForPStateChange[src_index];
+
+ dml_plane_array->BlendingAndTiming[dst_index] = dml_plane_array->BlendingAndTiming[src_index];
+}
+
+void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index)
+{
+ dml_surface_array->SurfaceTiling[dst_index] = dml_surface_array->SurfaceTiling[src_index];
+ dml_surface_array->SourcePixelFormat[dst_index] = dml_surface_array->SourcePixelFormat[src_index];
+ dml_surface_array->PitchY[dst_index] = dml_surface_array->PitchY[src_index];
+ dml_surface_array->SurfaceWidthY[dst_index] = dml_surface_array->SurfaceWidthY[src_index];
+ dml_surface_array->SurfaceHeightY[dst_index] = dml_surface_array->SurfaceHeightY[src_index];
+ dml_surface_array->PitchC[dst_index] = dml_surface_array->PitchC[src_index];
+ dml_surface_array->SurfaceWidthC[dst_index] = dml_surface_array->SurfaceWidthC[src_index];
+ dml_surface_array->SurfaceHeightC[dst_index] = dml_surface_array->SurfaceHeightC[src_index];
+
+ dml_surface_array->DCCEnable[dst_index] = dml_surface_array->DCCEnable[src_index];
+ dml_surface_array->DCCMetaPitchY[dst_index] = dml_surface_array->DCCMetaPitchY[src_index];
+ dml_surface_array->DCCMetaPitchC[dst_index] = dml_surface_array->DCCMetaPitchC[src_index];
+
+ dml_surface_array->DCCRateLuma[dst_index] = dml_surface_array->DCCRateLuma[src_index];
+ dml_surface_array->DCCRateChroma[dst_index] = dml_surface_array->DCCRateChroma[src_index];
+ dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsLuma[src_index];
+ dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[dst_index] = dml_surface_array->DCCFractionOfZeroSizeRequestsChroma[src_index];
+}
+
+void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index)
+{
+ dml_output_array->DSCInputBitPerComponent[dst_index] = dml_output_array->DSCInputBitPerComponent[src_index];
+ dml_output_array->OutputFormat[dst_index] = dml_output_array->OutputFormat[src_index];
+ dml_output_array->OutputEncoder[dst_index] = dml_output_array->OutputEncoder[src_index];
+ dml_output_array->OutputMultistreamId[dst_index] = dml_output_array->OutputMultistreamId[src_index];
+ dml_output_array->OutputMultistreamEn[dst_index] = dml_output_array->OutputMultistreamEn[src_index];
+ dml_output_array->OutputBpp[dst_index] = dml_output_array->OutputBpp[src_index];
+ dml_output_array->PixelClockBackEnd[dst_index] = dml_output_array->PixelClockBackEnd[src_index];
+ dml_output_array->DSCEnable[dst_index] = dml_output_array->DSCEnable[src_index];
+ dml_output_array->OutputLinkDPLanes[dst_index] = dml_output_array->OutputLinkDPLanes[src_index];
+ dml_output_array->OutputLinkDPRate[dst_index] = dml_output_array->OutputLinkDPRate[src_index];
+ dml_output_array->ForcedOutputLinkBPP[dst_index] = dml_output_array->ForcedOutputLinkBPP[src_index];
+ dml_output_array->AudioSampleRate[dst_index] = dml_output_array->AudioSampleRate[src_index];
+ dml_output_array->AudioSampleLayout[dst_index] = dml_output_array->AudioSampleLayout[src_index];
+}
+
+unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled)
+{
+ switch (encoder) {
+ case dml_dp:
+ case dml_edp:
+ return 2;
+ case dml_dp2p0:
+ if (dsc_enabled || force_odm_4to1)
+ return 4;
+ else
+ return 2;
+ case dml_hdmi:
+ return 1;
+ case dml_hdmifrl:
+ if (force_odm_4to1)
+ return 4;
+ else
+ return 2;
+ default:
+ return 1;
+ }
+}
+
+bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx)
+{
+ if (pipe_ctx == NULL || pipe_ctx->stream == NULL)
+ return false;
+
+ /* If this assert is hit then we have a link encoder dynamic management issue */
+ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true);
+
+ return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
+ pipe_ctx->link_res.hpo_dp_link_enc &&
+ dc_is_dp_signal(pipe_ctx->stream->signal));
+}
+
+bool is_dtbclk_required(const struct dc *dc, struct dc_state *context)
+{
+ int i;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (!context->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i]))
+ return true;
+ }
+ return false;
+}
+
+void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context)
+{
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = out_clks->dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = out_clks->dcfclk_khz;
+ context->bw_ctx.bw.dcn.clk.dramclk_khz = out_clks->uclk_mts / 16;
+ context->bw_ctx.bw.dcn.clk.fclk_khz = out_clks->fclk_khz;
+ context->bw_ctx.bw.dcn.clk.phyclk_khz = out_clks->phyclk_khz;
+ context->bw_ctx.bw.dcn.clk.socclk_khz = out_clks->socclk_khz;
+ context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = out_clks->ref_dtbclk_khz;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = out_clks->p_state_supported;
+}
+
+int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id)
+{
+ int i;
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[i] == stream_id)
+ return i;
+ }
+
+ return -1;
+}
+
+static int find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id)
+{
+ int i;
+ for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) {
+ if (ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[i] && ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[i] == plane_id)
+ return i;
+ }
+
+ return -1;
+}
+
+static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane,
+ unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id)
+{
+ unsigned int i, j;
+ bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists;
+
+ if (!plane_id)
+ return false;
+
+ for (i = 0; i < state->stream_count; i++) {
+ if (state->streams[i]->stream_id == stream_id) {
+ for (j = 0; j < state->stream_status[i].plane_count; j++) {
+ if (state->stream_status[i].plane_states[j] == plane &&
+ (!is_plane_duplicate || (j == plane_index))) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static void populate_pipe_ctx_dlg_params_from_dml(struct pipe_ctx *pipe_ctx, struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
+{
+ unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end;
+ struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
+
+ hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right;
+ vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top;
+ hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch;
+ vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch;
+
+ hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right;
+ vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom;
+
+ pipe_ctx->pipe_dlg_param.vstartup_start = dml_get_vstartup_calculated(mode_lib, pipe_idx);
+ pipe_ctx->pipe_dlg_param.vupdate_offset = dml_get_vupdate_offset(mode_lib, pipe_idx);
+ pipe_ctx->pipe_dlg_param.vupdate_width = dml_get_vupdate_width(mode_lib, pipe_idx);
+ pipe_ctx->pipe_dlg_param.vready_offset = dml_get_vready_offset(mode_lib, pipe_idx);
+
+ pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst;
+
+ pipe_ctx->pipe_dlg_param.hactive = hactive;
+ pipe_ctx->pipe_dlg_param.vactive = vactive;
+ pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total;
+ pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total;
+ pipe_ctx->pipe_dlg_param.hblank_end = hblank_end;
+ pipe_ctx->pipe_dlg_param.vblank_end = vblank_end;
+ pipe_ctx->pipe_dlg_param.hblank_start = hblank_start;
+ pipe_ctx->pipe_dlg_param.vblank_start = vblank_start;
+ pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch;
+ pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00;
+ pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total;
+ pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max;
+ pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min;
+ pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height;
+ pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width;
+ pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height;
+ pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width;
+}
+
+void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt)
+{
+ unsigned int dc_pipe_ctx_index, dml_pipe_idx, plane_id;
+ enum mall_stream_type pipe_mall_type;
+ struct dml2_calculate_rq_and_dlg_params_scratch *s = &in_ctx->v20.scratch.calculate_rq_and_dlg_params_scratch;
+
+ context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = (unsigned int)in_ctx->v20.dml_core_ctx.mp.DCFCLKDeepSleep * 1000;
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+
+ if (in_ctx->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false;
+ else
+ context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true;
+
+ if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
+ context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
+
+ context->bw_ctx.bw.dcn.compbuf_size_kb = in_ctx->v20.dml_core_ctx.ip.config_return_buffer_size_in_kbytes;
+
+ for (dc_pipe_ctx_index = 0; dc_pipe_ctx_index < pipe_cnt; dc_pipe_ctx_index++) {
+ if (!context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream)
+ continue;
+ /* The DML2 and the DC logic of determining pipe indices are different from each other so
+ * there is a need to know which DML pipe index maps to which DC pipe. The code below
+ * finds a dml_pipe_index from the plane id if a plane is valid. If a plane is not valid then
+ * it finds a dml_pipe_index from the stream id. */
+ if (get_plane_id(in_ctx, context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state,
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id,
+ in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[context->res_ctx.pipe_ctx[dc_pipe_ctx_index].pipe_idx], &plane_id)) {
+ dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id);
+ } else {
+ dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id);
+ }
+
+ if (dml_pipe_idx == 0xFFFFFFFF)
+ continue;
+ ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[dml_pipe_idx]);
+ ASSERT(in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[dml_pipe_idx] == context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id);
+
+ /* Use the dml_pipe_index here for the getters to fetch the correct values and dc_pipe_index in the pipe_ctx to populate them
+ * at the right locations.
+ */
+ populate_pipe_ctx_dlg_params_from_dml(&context->res_ctx.pipe_ctx[dc_pipe_ctx_index], &context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx);
+
+ pipe_mall_type = dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[dc_pipe_ctx_index]);
+ if (pipe_mall_type == SUBVP_PHANTOM) {
+ // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = 0;
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = false;
+ } else {
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb = dml_get_det_buffer_size_kbytes(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx);
+ // Unbounded requesting should not ever be used when more than 1 pipe is enabled.
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].unbounded_req = in_ctx->v20.dml_core_ctx.ms.UnboundedRequestEnabledThisState;
+ }
+
+ context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[dc_pipe_ctx_index].det_buffer_size_kb;
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz = dml_get_dppclk_calculated(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx) * 1000;
+ if (context->bw_ctx.bw.dcn.clk.dppclk_khz < context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz)
+ context->bw_ctx.bw.dcn.clk.dppclk_khz = context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_res.bw.dppclk_khz;
+
+ dml_rq_dlg_get_rq_reg(&s->rq_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx);
+ dml_rq_dlg_get_dlg_reg(&s->disp_dlg_regs, &s->disp_ttu_regs, &in_ctx->v20.dml_core_ctx, dml_pipe_idx);
+ dml2_update_pipe_ctx_dchub_regs(&s->rq_regs, &s->disp_dlg_regs, &s->disp_ttu_regs, &out_new_hw_state->pipe_ctx[dc_pipe_ctx_index]);
+
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes = dml_get_surface_size_for_mall(&context->bw_ctx.dml2->v20.dml_core_ctx, dml_pipe_idx);
+
+ /* Reuse MALL Allocation Sizes logic from dcn32_fpu.c */
+ /* Count from active, top pipes per plane only. Only add mall_ss_size_bytes for each unique plane. */
+ if (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream && context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state &&
+ (context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe == NULL ||
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state != context->res_ctx.pipe_ctx[dc_pipe_ctx_index].top_pipe->plane_state) &&
+ context->res_ctx.pipe_ctx[dc_pipe_ctx_index].prev_odm_pipe == NULL) {
+ /* SS: all active surfaces stored in MALL */
+ if (pipe_mall_type != SUBVP_PHANTOM) {
+ context->bw_ctx.bw.dcn.mall_ss_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes;
+ } else {
+ /* SUBVP: phantom surfaces only stored in MALL */
+ context->bw_ctx.bw.dcn.mall_subvp_size_bytes += context->res_ctx.pipe_ctx[dc_pipe_ctx_index].surface_size_in_mall_bytes;
+ }
+ }
+ }
+
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
+
+ context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dppclk_mhz
+ * 1000;
+ context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = in_ctx->v20.dml_core_ctx.states.state_array[in_ctx->v20.scratch.mode_support_params.out_lowest_state_idx].dispclk_mhz
+ * 1000;
+
+ if (dc->config.forced_clocks || dc->debug.max_disp_clk) {
+ context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz;
+ context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz ;
+ }
+}
+
+void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx)
+{
+ watermark->urgent_ns = dml_get_wm_urgent(dml_core_ctx) * 1000;
+ watermark->cstate_pstate.cstate_enter_plus_exit_ns = dml_get_wm_stutter_enter_exit(dml_core_ctx) * 1000;
+ watermark->cstate_pstate.cstate_exit_ns = dml_get_wm_stutter_exit(dml_core_ctx) * 1000;
+ watermark->cstate_pstate.pstate_change_ns = dml_get_wm_dram_clock_change(dml_core_ctx) * 1000;
+ watermark->pte_meta_urgent_ns = dml_get_wm_memory_trip(dml_core_ctx) * 1000;
+ watermark->frac_urg_bw_nom = dml_get_fraction_of_urgent_bandwidth(dml_core_ctx) * 1000;
+ watermark->frac_urg_bw_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(dml_core_ctx) * 1000;
+ watermark->urgent_latency_ns = dml_get_urgent_latency(dml_core_ctx) * 1000;
+ watermark->cstate_pstate.fclk_pstate_change_ns = dml_get_wm_fclk_change(dml_core_ctx) * 1000;
+ watermark->usr_retraining_ns = dml_get_wm_usr_retraining(dml_core_ctx) * 1000;
+ watermark->cstate_pstate.cstate_enter_plus_exit_z8_ns = dml_get_wm_z8_stutter_enter_exit(dml_core_ctx) * 1000;
+ watermark->cstate_pstate.cstate_exit_z8_ns = dml_get_wm_z8_stutter(dml_core_ctx) * 1000;
+}
+
+unsigned int dml2_calc_max_scaled_time(
+ unsigned int time_per_pixel,
+ enum mmhubbub_wbif_mode mode,
+ unsigned int urgent_watermark)
+{
+ unsigned int time_per_byte = 0;
+ unsigned int total_free_entry = 0xb40;
+ unsigned int buf_lh_capability;
+ unsigned int max_scaled_time;
+
+ if (mode == PACKED_444) /* packed mode 32 bpp */
+ time_per_byte = time_per_pixel/4;
+ else if (mode == PACKED_444_FP16) /* packed mode 64 bpp */
+ time_per_byte = time_per_pixel/8;
+
+ if (time_per_byte == 0)
+ time_per_byte = 1;
+
+ buf_lh_capability = (total_free_entry*time_per_byte*32) >> 6; /* time_per_byte is in u6.6*/
+ max_scaled_time = buf_lh_capability - urgent_watermark;
+ return max_scaled_time;
+}
+
+void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx)
+{
+ int i, j = 0;
+ struct mcif_arb_params *wb_arb_params = NULL;
+ struct dcn_bw_writeback *bw_writeback = NULL;
+ enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/
+
+ if (context->stream_count != 0) {
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->num_wb_info != 0)
+ j++;
+ }
+ }
+ if (j == 0) /*no dwb */
+ return;
+ for (i = 0; i < __DML_NUM_DMB__; i++) {
+ bw_writeback = &context->bw_ctx.bw.dcn.bw_writeback;
+ wb_arb_params = &context->bw_ctx.bw.dcn.bw_writeback.mcif_wb_arb[i];
+
+ for (j = 0 ; j < 4; j++) {
+ /*current dml only has one set of watermark, need to follow up*/
+ bw_writeback->mcif_wb_arb[i].cli_watermark[j] =
+ dml_get_wm_writeback_urgent(dml_core_ctx) * 1000;
+ bw_writeback->mcif_wb_arb[i].pstate_watermark[j] =
+ dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000;
+ }
+ if (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk != 0) {
+ /* time_per_pixel should be in u6.6 format */
+ bw_writeback->mcif_wb_arb[i].time_per_pixel =
+ (1000000 << 6) / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk;
+ }
+ bw_writeback->mcif_wb_arb[i].slice_lines = 32;
+ bw_writeback->mcif_wb_arb[i].arbitration_slice = 2;
+ bw_writeback->mcif_wb_arb[i].max_scaled_time =
+ dml2_calc_max_scaled_time(wb_arb_params->time_per_pixel,
+ wbif_mode, wb_arb_params->cli_watermark[0]);
+ /*not required any more*/
+ bw_writeback->mcif_wb_arb[i].dram_speed_change_duration =
+ dml_get_wm_writeback_dram_clock_change(dml_core_ctx) * 1000;
+
+ }
+}
+void dml2_initialize_det_scratch(struct dml2_context *in_ctx)
+{
+ int i;
+
+ for (i = 0; i < MAX_PLANES; i++) {
+ in_ctx->det_helper_scratch.dpps_per_surface[i] = 1;
+ }
+}
+
+static unsigned int find_planes_per_stream_and_stream_count(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg, int *num_of_planes_per_stream)
+{
+ unsigned int plane_index, stream_index = 0, num_of_streams;
+
+ for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) {
+ /* Number of planes per stream */
+ num_of_planes_per_stream[stream_index] += 1;
+
+ if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1])
+ stream_index++;
+ }
+
+ num_of_streams = stream_index + 1;
+
+ return num_of_streams;
+}
+
+void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg)
+{
+ unsigned int num_of_streams = 0, plane_index = 0, max_det_size, stream_index = 0;
+ int num_of_planes_per_stream[__DML_NUM_PLANES__] = { 0 };
+
+ max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp;
+
+ num_of_streams = find_planes_per_stream_and_stream_count(in_ctx, dml_dispcfg, num_of_planes_per_stream);
+
+ for (plane_index = 0; plane_index < dml_dispcfg->num_surfaces; plane_index++) {
+
+ if (in_ctx->config.override_det_buffer_size_kbytes)
+ dml_dispcfg->plane.DETSizeOverride[plane_index] = max_det_size / in_ctx->config.dcn_pipe_count;
+ else {
+ dml_dispcfg->plane.DETSizeOverride[plane_index] = ((max_det_size / num_of_streams) / num_of_planes_per_stream[stream_index] / in_ctx->det_helper_scratch.dpps_per_surface[plane_index]);
+
+ /* If the override size is not divisible by det_segment_size then round off to nearest number divisible by det_segment_size as
+ * this is a requirement.
+ */
+ if (dml_dispcfg->plane.DETSizeOverride[plane_index] % in_ctx->config.det_segment_size != 0) {
+ dml_dispcfg->plane.DETSizeOverride[plane_index] = dml_dispcfg->plane.DETSizeOverride[plane_index] & ~0x3F;
+ }
+
+ if (plane_index + 1 < dml_dispcfg->num_surfaces && dml_dispcfg->plane.BlendingAndTiming[plane_index] != dml_dispcfg->plane.BlendingAndTiming[plane_index + 1])
+ stream_index++;
+ }
+ }
+}
+
+bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch)
+{
+ unsigned int i = 0, dml_pipe_idx = 0, plane_id = 0;
+ unsigned int max_det_size, total_det_allocated = 0;
+ bool need_recalculation = false;
+
+ max_det_size = in_ctx->config.det_segment_size * in_ctx->config.max_segments_per_hubp;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (!display_state->res_ctx.pipe_ctx[i].stream)
+ continue;
+ if (get_plane_id(in_ctx, display_state, display_state->res_ctx.pipe_ctx[i].plane_state,
+ display_state->res_ctx.pipe_ctx[i].stream->stream_id,
+ in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[display_state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id))
+ dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id);
+ else
+ dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, display_state->res_ctx.pipe_ctx[i].stream->stream_id);
+
+ if (dml_pipe_idx == 0xFFFFFFFF)
+ continue;
+ total_det_allocated += dml_get_det_buffer_size_kbytes(&in_ctx->v20.dml_core_ctx, dml_pipe_idx);
+ if (total_det_allocated > max_det_size) {
+ need_recalculation = true;
+ }
+ }
+
+ /* Store the DPPPerSurface for correctly determining the number of planes in the next call. */
+ for (i = 0; i < MAX_PLANES; i++) {
+ det_scratch->dpps_per_surface[i] = in_ctx->v20.scratch.cur_display_config.hw.DPPPerSurface[i];
+ }
+
+ return need_recalculation;
+}
+
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream)
+{
+ bool is_stereo = false;
+
+ if ((stream->view_format ==
+ VIEW_3D_FORMAT_SIDE_BY_SIDE ||
+ stream->view_format ==
+ VIEW_3D_FORMAT_TOP_AND_BOTTOM) &&
+ (stream->timing.timing_3d_format ==
+ TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
+ stream->timing.timing_3d_format ==
+ TIMING_3D_FORMAT_SIDE_BY_SIDE))
+ is_stereo = true;
+
+ return is_stereo;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
new file mode 100644
index 000000000000..04fcfe637119
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _DML2_UTILS_H_
+#define _DML2_UTILS_H_
+
+#include "os_types.h"
+#include "dml2_dc_types.h"
+
+struct dc;
+struct dml_timing_cfg_st;
+struct dml2_dcn_clocks;
+struct dc_state;
+
+void dml2_util_copy_dml_timing(struct dml_timing_cfg_st *dml_timing_array, unsigned int dst_index, unsigned int src_index);
+void dml2_util_copy_dml_plane(struct dml_plane_cfg_st *dml_plane_array, unsigned int dst_index, unsigned int src_index);
+void dml2_util_copy_dml_surface(struct dml_surface_cfg_st *dml_surface_array, unsigned int dst_index, unsigned int src_index);
+void dml2_util_copy_dml_output(struct dml_output_cfg_st *dml_output_array, unsigned int dst_index, unsigned int src_index);
+unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, enum dml_output_encoder_class encoder, bool dsc_enabled);
+void dml2_copy_clocks_to_dc_state(struct dml2_dcn_clocks *out_clks, struct dc_state *context);
+void dml2_extract_watermark_set(struct dcn_watermarks *watermark, struct display_mode_lib_st *dml_core_ctx);
+void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx);
+int dml2_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id);
+bool is_dtbclk_required(const struct dc *dc, struct dc_state *context);
+bool dml2_is_stereo_timing(const struct dc_stream_state *stream);
+unsigned int dml2_calc_max_scaled_time(
+ unsigned int time_per_pixel,
+ enum mmhubbub_wbif_mode mode,
+ unsigned int urgent_watermark);
+
+/*
+ * dml2_dc_construct_pipes - This function will determine if we need additional pipes based
+ * on the DML calculated outputs for MPC, ODM and allocate them as necessary. This function
+ * could be called after in dml_validate_build_resource after dml_mode_pragramming like :
+ * {
+ * ...
+ * map_hw_resources(&s->cur_display_config, &s->mode_support_info);
+ * result = dml_mode_programming(&in_ctx->dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true);
+ * dml2_dc_construct_pipes(in_display_state, s->mode_support_info, out_hw_context);
+ * ...
+ * }
+ *
+ * @context: To obtain res_ctx and read other information like stream ID etc.
+ * @dml_mode_support_st : To get the ODM, MPC outputs as determined by the DML.
+ * @out_hw_context : Handle to the new hardware context.
+ *
+ *
+ * Return: None.
+ */
+void dml2_dc_construct_pipes(struct dc_state *context, struct dml_mode_support_info_st *dml_mode_support_st,
+ struct resource_context *out_hw_context);
+
+/*
+ * dml2_predict_pipe_split - This function is the dml2 version of predict split pipe. It predicts a
+ * if pipe split is required or not and returns the output as a bool.
+ * @context : dc_state.
+ * @pipe : old_index is the index of the pipe as derived from pipe_idx.
+ * @index : index of the pipe
+ *
+ *
+ * Return: Returns the result in boolean.
+ */
+bool dml2_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index);
+
+/*
+ * dml2_build_mapped_resource - This function is the dml2 version of build_mapped_resource.
+ * In case of ODM, we need to build pipe hardware params again as done in dcn20_build_mapped_resource.
+ * @dc : struct dc
+ * @context : struct dc_state.
+ * @stream : stream whoose corresponding pipe params need to be modified.
+ *
+ *
+ * Return: Returns DC_OK if successful.
+ */
+enum dc_status dml2_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream);
+
+/*
+ * dml2_extract_rq_regs - This function will extract information needed for struct _vcs_dpi_display_rq_regs_st
+ * and populate it.
+ * @context: To obtain and populate the res_ctx->pipe_ctx->rq_regs with DML outputs.
+ * @support : This structure has the DML intermediate outputs required to populate rq_regs.
+ *
+ *
+ * Return: None.
+ */
+
+ /*
+ * dml2_calculate_rq_and_dlg_params - This function will call into DML2 functions needed
+ * for populating rq, ttu and dlg param structures and populate it.
+ * @dc : struct dc
+ * @context : dc_state provides a handle to selectively populate pipe_ctx
+ * @out_new_hw_state: To obtain and populate the rq, dlg and ttu regs in
+ * out_new_hw_state->pipe_ctx with DML outputs.
+ * @in_ctx : This structure has the pointer to display_mode_lib_st.
+ * @pipe_cnt : DML functions to obtain RQ, TTu and DLG params need a pipe_index.
+ * This helps provide pipe_index in the pipe_cnt loop.
+ *
+ *
+ * Return: None.
+ */
+void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, struct dml2_context *in_ctx, unsigned int pipe_cnt);
+
+/*
+ * dml2_apply_det_buffer_allocation_policy - This function will determine the DET Buffer size
+ * and return the number of streams.
+ * @dml2 : Handle for dml2 context
+ * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config
+ * Return : None.
+ */
+void dml2_apply_det_buffer_allocation_policy(struct dml2_context *in_ctx, struct dml_display_cfg_st *dml_dispcfg);
+
+/*
+ * dml2_verify_det_buffer_configuration - This function will verify if the allocated DET buffer exceeds
+ * the total available DET size available and outputs a boolean to indicate if recalulation is needed.
+ * @dml2 : Handle for dml2 context
+ * @dml_dispcfg : dml_dispcfg is the DML2 struct representing the current display config
+ * @struct dml2_helper_det_policy_scratch : Pointer to DET helper scratch
+ * Return : returns true if recalculation is required, false otherwise.
+ */
+bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc_state *display_state, struct dml2_helper_det_policy_scratch *det_scratch);
+
+/*
+ * dml2_initialize_det_scratch - This function will initialize the DET scratch space as per requirements.
+ * @dml2 : Handle for dml2 context
+ * Return : None
+ */
+void dml2_initialize_det_scratch(struct dml2_context *in_ctx);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
new file mode 100644
index 000000000000..9deb03a18ccc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -0,0 +1,704 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "display_mode_core.h"
+#include "dml2_internal_types.h"
+#include "dml2_utils.h"
+#include "dml2_policy.h"
+#include "dml2_translation_helper.h"
+#include "dml2_mall_phantom.h"
+#include "dml2_dc_resource_mgmt.h"
+#include "dml21_wrapper.h"
+
+static void initialize_dml2_ip_params(struct dml2_context *dml2, const struct dc *in_dc, struct ip_params_st *out)
+{
+ if (dml2->config.use_native_soc_bb_construction)
+ dml2_init_ip_params(dml2, in_dc, out);
+ else
+ dml2_translate_ip_params(in_dc, out);
+}
+
+static void initialize_dml2_soc_bbox(struct dml2_context *dml2, const struct dc *in_dc, struct soc_bounding_box_st *out)
+{
+ if (dml2->config.use_native_soc_bb_construction)
+ dml2_init_socbb_params(dml2, in_dc, out);
+ else
+ dml2_translate_socbb_params(in_dc, out);
+}
+
+static void initialize_dml2_soc_states(struct dml2_context *dml2,
+ const struct dc *in_dc, const struct soc_bounding_box_st *in_bbox, struct soc_states_st *out)
+{
+ if (dml2->config.use_native_soc_bb_construction)
+ dml2_init_soc_states(dml2, in_dc, in_bbox, out);
+ else
+ dml2_translate_soc_states(in_dc, out, in_dc->dml.soc.num_states);
+}
+
+static void map_hw_resources(struct dml2_context *dml2,
+ struct dml_display_cfg_st *in_out_display_cfg, struct dml_mode_support_info_st *mode_support_info)
+{
+ unsigned int num_pipes = 0;
+ int i, j;
+
+ for (i = 0; i < __DML_NUM_PLANES__; i++) {
+ in_out_display_cfg->hw.ODMMode[i] = mode_support_info->ODMMode[i];
+ in_out_display_cfg->hw.DPPPerSurface[i] = mode_support_info->DPPPerSurface[i];
+ in_out_display_cfg->hw.DSCEnabled[i] = mode_support_info->DSCEnabled[i];
+ in_out_display_cfg->hw.NumberOfDSCSlices[i] = mode_support_info->NumberOfDSCSlices[i];
+ in_out_display_cfg->hw.DLGRefClkFreqMHz = 24;
+ if (dml2->v20.dml_core_ctx.project != dml_project_dcn35 &&
+ dml2->v20.dml_core_ctx.project != dml_project_dcn36 &&
+ dml2->v20.dml_core_ctx.project != dml_project_dcn351) {
+ /*dGPU default as 50Mhz*/
+ in_out_display_cfg->hw.DLGRefClkFreqMHz = 50;
+ }
+ for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) {
+ if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) {
+ dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n",
+ __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__);
+ break;
+ }
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i];
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true;
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i];
+ dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id_valid[num_pipes] = true;
+ num_pipes++;
+ }
+ }
+}
+
+static unsigned int pack_and_call_dml_mode_support_ex(struct dml2_context *dml2,
+ const struct dml_display_cfg_st *display_cfg,
+ struct dml_mode_support_info_st *evaluation_info,
+ enum dc_validate_mode validate_mode)
+{
+ struct dml2_wrapper_scratch *s = &dml2->v20.scratch;
+
+ s->mode_support_params.mode_lib = &dml2->v20.dml_core_ctx;
+ s->mode_support_params.in_display_cfg = display_cfg;
+ if (validate_mode == DC_VALIDATE_MODE_ONLY)
+ s->mode_support_params.in_start_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1;
+ else
+ s->mode_support_params.in_start_state_idx = 0;
+ s->mode_support_params.out_evaluation_info = evaluation_info;
+
+ memset(evaluation_info, 0, sizeof(struct dml_mode_support_info_st));
+ s->mode_support_params.out_lowest_state_idx = 0;
+
+ return dml_mode_support_ex(&s->mode_support_params);
+}
+
+static bool optimize_configuration(struct dml2_context *dml2, struct dml2_wrapper_optimize_configuration_params *p)
+{
+ int unused_dpps = p->ip_params->max_num_dpp;
+ int i;
+ int odms_needed;
+ int largest_blend_and_timing = 0;
+ bool optimization_done = false;
+
+ for (i = 0; i < (int) p->cur_display_config->num_timings; i++) {
+ if (p->cur_display_config->plane.BlendingAndTiming[i] > largest_blend_and_timing)
+ largest_blend_and_timing = p->cur_display_config->plane.BlendingAndTiming[i];
+ }
+
+ if (p->new_policy != p->cur_policy)
+ *p->new_policy = *p->cur_policy;
+
+ if (p->new_display_config != p->cur_display_config)
+ *p->new_display_config = *p->cur_display_config;
+
+
+ // Optimize Clocks
+ if (!optimization_done) {
+ if (largest_blend_and_timing == 0 && p->cur_policy->ODMUse[0] == dml_odm_use_policy_combine_as_needed && dml2->config.minimize_dispclk_using_odm) {
+ odms_needed = dml2_util_get_maximum_odm_combine_for_output(dml2->config.optimize_odm_4to1,
+ p->cur_display_config->output.OutputEncoder[0], p->cur_mode_support_info->DSCEnabled[0]) - 1;
+
+ if (odms_needed <= unused_dpps) {
+ if (odms_needed == 1) {
+ p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_2to1;
+ optimization_done = true;
+ } else if (odms_needed == 3) {
+ p->new_policy->ODMUse[0] = dml_odm_use_policy_combine_4to1;
+ optimization_done = true;
+ } else
+ optimization_done = false;
+ }
+ }
+ }
+
+ return optimization_done;
+}
+
+static int calculate_lowest_supported_state_for_temp_read(struct dml2_context *dml2, struct dc_state *display_state,
+ enum dc_validate_mode validate_mode)
+{
+ struct dml2_calculate_lowest_supported_state_for_temp_read_scratch *s = &dml2->v20.scratch.dml2_calculate_lowest_supported_state_for_temp_read_scratch;
+ struct dml2_wrapper_scratch *s_global = &dml2->v20.scratch;
+
+ unsigned int dml_result = 0;
+ int result = -1, i, j;
+
+ build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy);
+
+ /* Zero out before each call before proceeding */
+ memset(s, 0, sizeof(struct dml2_calculate_lowest_supported_state_for_temp_read_scratch));
+ memset(&s_global->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st));
+ memset(&s_global->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+
+ for (i = 0; i < dml2->config.dcn_pipe_count; i++) {
+ /* Calling resource_build_scaling_params will populate the pipe params
+ * with the necessary information needed for correct DML calculations
+ * This is also done in DML1 driver code path and hence display_state
+ * cannot be const.
+ */
+ struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (!dml2->config.callbacks.build_scaling_params(pipe)) {
+ ASSERT(false);
+ return false;
+ }
+ }
+ }
+
+ map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config);
+
+ for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) {
+ s->uclk_change_latencies[i] = dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us;
+ }
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < dml2->v20.dml_core_ctx.states.num_states; j++) {
+ dml2->v20.dml_core_ctx.states.state_array[j].dram_clock_change_latency_us = s_global->dummy_pstate_table[i].dummy_pstate_latency_us;
+ }
+
+ dml_result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, &s->evaluation_info,
+ validate_mode);
+
+ if (dml_result && s->evaluation_info.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive) {
+ map_hw_resources(dml2, &s->cur_display_config, &s->evaluation_info);
+ dml_result = dml_mode_programming(&dml2->v20.dml_core_ctx, s_global->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true);
+
+ ASSERT(dml_result);
+
+ dml2_extract_watermark_set(&dml2->v20.g6_temp_read_watermark_set, &dml2->v20.dml_core_ctx);
+ dml2->v20.g6_temp_read_watermark_set.cstate_pstate.fclk_pstate_change_ns = dml2->v20.g6_temp_read_watermark_set.cstate_pstate.pstate_change_ns;
+
+ result = s_global->mode_support_params.out_lowest_state_idx;
+
+ while (dml2->v20.dml_core_ctx.states.state_array[result].dram_speed_mts < s_global->dummy_pstate_table[i].dram_speed_mts)
+ result++;
+
+ break;
+ }
+ }
+
+ for (i = 0; i < dml2->v20.dml_core_ctx.states.num_states; i++) {
+ dml2->v20.dml_core_ctx.states.state_array[i].dram_clock_change_latency_us = s->uclk_change_latencies[i];
+ }
+
+ return result;
+}
+
+static void copy_dummy_pstate_table(struct dummy_pstate_entry *dest, struct dummy_pstate_entry *src, unsigned int num_entries)
+{
+ for (int i = 0; i < num_entries; i++) {
+ dest[i] = src[i];
+ }
+}
+
+static bool are_timings_requiring_odm_doing_blending(const struct dml_display_cfg_st *display_cfg,
+ const struct dml_mode_support_info_st *evaluation_info)
+{
+ unsigned int planes_per_timing[__DML_NUM_PLANES__] = {0};
+ int i;
+
+ for (i = 0; i < display_cfg->num_surfaces; i++)
+ planes_per_timing[display_cfg->plane.BlendingAndTiming[i]]++;
+
+ for (i = 0; i < __DML_NUM_PLANES__; i++) {
+ if (planes_per_timing[i] > 1 && evaluation_info->ODMMode[i] != dml_odm_mode_bypass)
+ return true;
+ }
+
+ return false;
+}
+
+static bool does_configuration_meet_sw_policies(struct dml2_context *ctx, const struct dml_display_cfg_st *display_cfg,
+ const struct dml_mode_support_info_st *evaluation_info)
+{
+ bool pass = true;
+
+ if (!ctx->config.enable_windowed_mpo_odm) {
+ if (are_timings_requiring_odm_doing_blending(display_cfg, evaluation_info))
+ pass = false;
+ }
+
+ return pass;
+}
+
+static bool dml_mode_support_wrapper(struct dml2_context *dml2,
+ struct dc_state *display_state,
+ enum dc_validate_mode validate_mode)
+{
+ struct dml2_wrapper_scratch *s = &dml2->v20.scratch;
+ unsigned int result = 0, i;
+ unsigned int optimized_result = true;
+
+ build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy);
+
+ /* Zero out before each call before proceeding */
+ memset(&s->cur_display_config, 0, sizeof(struct dml_display_cfg_st));
+ memset(&s->mode_support_params, 0, sizeof(struct dml_mode_support_ex_params_st));
+ memset(&s->dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping));
+ memset(&s->optimize_configuration_params, 0, sizeof(struct dml2_wrapper_optimize_configuration_params));
+
+ for (i = 0; i < dml2->config.dcn_pipe_count; i++) {
+ /* Calling resource_build_scaling_params will populate the pipe params
+ * with the necessary information needed for correct DML calculations
+ * This is also done in DML1 driver code path and hence display_state
+ * cannot be const.
+ */
+ struct pipe_ctx *pipe = &display_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (!dml2->config.callbacks.build_scaling_params(pipe)) {
+ ASSERT(false);
+ return false;
+ }
+ }
+ }
+
+ map_dc_state_into_dml_display_cfg(dml2, display_state, &s->cur_display_config);
+ if (!dml2->config.skip_hw_state_mapping)
+ dml2_apply_det_buffer_allocation_policy(dml2, &s->cur_display_config);
+
+ result = pack_and_call_dml_mode_support_ex(dml2,
+ &s->cur_display_config,
+ &s->mode_support_info,
+ validate_mode);
+
+ if (result)
+ result = does_configuration_meet_sw_policies(dml2, &s->cur_display_config, &s->mode_support_info);
+
+ // Try to optimize
+ if (result) {
+ s->cur_policy = dml2->v20.dml_core_ctx.policy;
+ s->optimize_configuration_params.dml_core_ctx = &dml2->v20.dml_core_ctx;
+ s->optimize_configuration_params.config = &dml2->config;
+ s->optimize_configuration_params.ip_params = &dml2->v20.dml_core_ctx.ip;
+ s->optimize_configuration_params.cur_display_config = &s->cur_display_config;
+ s->optimize_configuration_params.cur_mode_support_info = &s->mode_support_info;
+ s->optimize_configuration_params.cur_policy = &s->cur_policy;
+ s->optimize_configuration_params.new_display_config = &s->new_display_config;
+ s->optimize_configuration_params.new_policy = &s->new_policy;
+
+ while (optimized_result && optimize_configuration(dml2, &s->optimize_configuration_params)) {
+ dml2->v20.dml_core_ctx.policy = s->new_policy;
+ optimized_result = pack_and_call_dml_mode_support_ex(dml2,
+ &s->new_display_config,
+ &s->mode_support_info,
+ validate_mode);
+
+ if (optimized_result)
+ optimized_result = does_configuration_meet_sw_policies(dml2, &s->new_display_config, &s->mode_support_info);
+
+ // If the new optimized state is supposed, then set current = new
+ if (optimized_result) {
+ s->cur_display_config = s->new_display_config;
+ s->cur_policy = s->new_policy;
+ } else {
+ // Else, restore policy to current
+ dml2->v20.dml_core_ctx.policy = s->cur_policy;
+ }
+ }
+
+ // Optimize ended with a failed config, so we need to restore DML state to last passing
+ if (!optimized_result) {
+ result = pack_and_call_dml_mode_support_ex(dml2,
+ &s->cur_display_config,
+ &s->mode_support_info,
+ validate_mode);
+ }
+ }
+
+ if (result)
+ map_hw_resources(dml2, &s->cur_display_config, &s->mode_support_info);
+
+ return result;
+}
+
+static bool call_dml_mode_support_and_programming(struct dc_state *context, enum dc_validate_mode validate_mode)
+{
+ unsigned int result = 0;
+ unsigned int min_state = 0;
+ int min_state_for_g6_temp_read = 0;
+
+
+ if (!context)
+ return false;
+
+ struct dml2_context *dml2 = context->bw_ctx.dml2;
+ struct dml2_wrapper_scratch *s = &dml2->v20.scratch;
+
+ if (!context->streams[0]->sink->link->dc->caps.is_apu) {
+ min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context,
+ validate_mode);
+
+ ASSERT(min_state_for_g6_temp_read >= 0);
+ }
+
+ result = dml_mode_support_wrapper(dml2, context, validate_mode);
+
+ /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on.
+ * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly.
+ */
+ if (!context->streams[0]->sink->link->dc->caps.is_apu) {
+ if (min_state_for_g6_temp_read >= 0)
+ min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx;
+ else
+ min_state = s->mode_support_params.out_lowest_state_idx;
+ }
+
+ if (result) {
+ if (!context->streams[0]->sink->link->dc->caps.is_apu) {
+ result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true);
+ } else {
+ result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true);
+ }
+ }
+ return result;
+}
+
+static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode)
+{
+ struct dml2_context *dml2 = context->bw_ctx.dml2;
+ struct dml2_wrapper_scratch *s = &dml2->v20.scratch;
+ struct dml2_dcn_clocks out_clks;
+ unsigned int result = 0;
+ bool need_recalculation = false;
+ uint32_t cstate_enter_plus_exit_z8_ns;
+
+ if (context->stream_count == 0) {
+ unsigned int lowest_state_idx = 0;
+
+ out_clks.p_state_supported = true;
+ out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */
+ out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000;
+ out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000;
+ out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts;
+ out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000;
+ out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000;
+ out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000;
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = false;
+ dml2_copy_clocks_to_dc_state(&out_clks, context);
+ return true;
+ }
+
+ /* Zero out before each call before proceeding */
+ memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch));
+ memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st));
+ memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st));
+ memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st));
+
+ /* Initialize DET scratch */
+ dml2_initialize_det_scratch(dml2);
+
+ copy_dummy_pstate_table(s->dummy_pstate_table, in_dc->clk_mgr->bw_params->dummy_pstate_table, 4);
+
+ result = call_dml_mode_support_and_programming(context, validate_mode);
+ /* Call map dc pipes to map the pipes based on the DML output. For correctly determining if recalculation
+ * is required or not, the resource context needs to correctly reflect the number of active pipes. We would
+ * only know the correct number if active pipes after dml2_map_dc_pipes is called.
+ */
+ if (result && !dml2->config.skip_hw_state_mapping)
+ dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state);
+
+ /* Verify and update DET Buffer configuration if needed. dml2_verify_det_buffer_configuration will check if DET Buffer
+ * size needs to be updated. If yes it will update the DETOverride variable and set need_recalculation flag to true.
+ * Based on that flag, run mode support again. Verification needs to be run after dml_mode_programming because the getters
+ * return correct det buffer values only after dml_mode_programming is called.
+ */
+ if (result && !dml2->config.skip_hw_state_mapping) {
+ need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch);
+ if (need_recalculation) {
+ /* Engage the DML again if recalculation is required. */
+ call_dml_mode_support_and_programming(context, validate_mode);
+ if (!dml2->config.skip_hw_state_mapping) {
+ dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state);
+ }
+ need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch);
+ ASSERT(need_recalculation == false);
+ }
+ }
+
+ if (result) {
+ unsigned int lowest_state_idx = s->mode_support_params.out_lowest_state_idx;
+ out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.mp.Dispclk_calculated * 1000;
+ out_clks.p_state_supported = s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported;
+ if (in_dc->config.use_default_clock_table &&
+ (lowest_state_idx < dml2->v20.dml_core_ctx.states.num_states - 1)) {
+ lowest_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1;
+ out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000;
+ }
+
+ out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000;
+ out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000;
+ out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts;
+ out_clks.phyclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].phyclk_mhz * 1000;
+ out_clks.socclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].socclk_mhz * 1000;
+ out_clks.ref_dtbclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dtbclk_mhz * 1000;
+ context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(in_dc, context);
+
+ if (!dml2->config.skip_hw_state_mapping) {
+ /* Call dml2_calculate_rq_and_dlg_params */
+ dml2_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml2, in_dc->res_pool->pipe_count);
+ }
+
+ dml2_copy_clocks_to_dc_state(&out_clks, context);
+ dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.a, &dml2->v20.dml_core_ctx);
+ dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx);
+ if (context->streams[0]->sink->link->dc->caps.is_apu)
+ dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.dml_core_ctx);
+ else
+ memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c));
+ dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx);
+ dml2_extract_writeback_wm(context, &dml2->v20.dml_core_ctx);
+ //copy for deciding zstate use
+ context->bw_ctx.dml.vba.StutterPeriod = context->bw_ctx.dml2->v20.dml_core_ctx.mp.StutterPeriod;
+
+ cstate_enter_plus_exit_z8_ns = context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+
+ if (context->bw_ctx.dml.vba.StutterPeriod < in_dc->debug.minimum_z8_residency_time &&
+ cstate_enter_plus_exit_z8_ns < in_dc->debug.minimum_z8_residency_time * 1000)
+ cstate_enter_plus_exit_z8_ns = in_dc->debug.minimum_z8_residency_time * 1000;
+
+ context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = cstate_enter_plus_exit_z8_ns;
+ }
+
+ return result;
+}
+
+static bool dml2_validate_only(struct dc_state *context, enum dc_validate_mode validate_mode)
+{
+ struct dml2_context *dml2;
+ unsigned int result = 0;
+
+ if (!context || context->stream_count == 0)
+ return true;
+
+ dml2 = context->bw_ctx.dml2;
+
+ /* Zero out before each call before proceeding */
+ memset(&dml2->v20.scratch, 0, sizeof(struct dml2_wrapper_scratch));
+ memset(&dml2->v20.dml_core_ctx.policy, 0, sizeof(struct dml_mode_eval_policy_st));
+ memset(&dml2->v20.dml_core_ctx.ms, 0, sizeof(struct mode_support_st));
+ memset(&dml2->v20.dml_core_ctx.mp, 0, sizeof(struct mode_program_st));
+
+ build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy);
+
+ map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config);
+ if (!dml2->config.skip_hw_state_mapping)
+ dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config);
+
+ result = pack_and_call_dml_mode_support_ex(dml2,
+ &dml2->v20.scratch.cur_display_config,
+ &dml2->v20.scratch.mode_support_info,
+ validate_mode);
+
+ if (result)
+ result = does_configuration_meet_sw_policies(dml2, &dml2->v20.scratch.cur_display_config, &dml2->v20.scratch.mode_support_info);
+
+ return result == 1;
+}
+
+static void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *dml2)
+{
+ if (dc->debug.override_odm_optimization) {
+ dml2->config.minimize_dispclk_using_odm = dc->debug.minimize_dispclk_using_odm;
+ }
+}
+
+bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2,
+ enum dc_validate_mode validate_mode)
+{
+ bool out = false;
+
+ if (!dml2)
+ return false;
+ dml2_apply_debug_options(in_dc, dml2);
+
+ /* DML2.1 validation path */
+ if (dml2->architecture == dml2_architecture_21) {
+ out = dml21_validate(in_dc, context, dml2, validate_mode);
+ return out;
+ }
+
+ DC_FP_START();
+
+ /* Use dml_validate_only for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ out = dml2_validate_only(context, validate_mode);
+ else
+ out = dml2_validate_and_build_resource(in_dc, context, validate_mode);
+
+ DC_FP_END();
+
+ return out;
+}
+
+static inline struct dml2_context *dml2_allocate_memory(void)
+{
+ return (struct dml2_context *) vzalloc(sizeof(struct dml2_context));
+}
+
+static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
+{
+ if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) {
+ dml21_reinit(in_dc, *dml2, config);
+ return;
+ }
+
+ // Store config options
+ (*dml2)->config = *config;
+
+ switch (in_dc->ctx->dce_version) {
+ case DCN_VERSION_3_5:
+ (*dml2)->v20.dml_core_ctx.project = dml_project_dcn35;
+ break;
+ case DCN_VERSION_3_51:
+ (*dml2)->v20.dml_core_ctx.project = dml_project_dcn351;
+ break;
+ case DCN_VERSION_3_6:
+ (*dml2)->v20.dml_core_ctx.project = dml_project_dcn36;
+ break;
+ case DCN_VERSION_3_2:
+ (*dml2)->v20.dml_core_ctx.project = dml_project_dcn32;
+ break;
+ case DCN_VERSION_3_21:
+ (*dml2)->v20.dml_core_ctx.project = dml_project_dcn321;
+ break;
+ case DCN_VERSION_4_01:
+ (*dml2)->v20.dml_core_ctx.project = dml_project_dcn401;
+ break;
+ default:
+ (*dml2)->v20.dml_core_ctx.project = dml_project_default;
+ break;
+ }
+
+ DC_FP_START();
+
+ initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip);
+
+ initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc);
+
+ initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states);
+
+ DC_FP_END();
+}
+
+bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
+{
+ // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete.
+ if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01))
+ return dml21_create(in_dc, dml2, config);
+
+ // Allocate Mode Lib Ctx
+ *dml2 = dml2_allocate_memory();
+
+ if (!(*dml2))
+ return false;
+
+ dml2_init(in_dc, config, dml2);
+
+ return true;
+}
+
+void dml2_destroy(struct dml2_context *dml2)
+{
+ if (!dml2)
+ return;
+
+ if (dml2->architecture == dml2_architecture_21)
+ dml21_destroy(dml2);
+ vfree(dml2);
+}
+
+void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,
+ unsigned int *fclk_change_support, unsigned int *dram_clk_change_support)
+{
+ *fclk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.FCLKChangeSupport[0];
+ *dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0];
+}
+
+void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2)
+{
+ if (dml2->architecture == dml2_architecture_21)
+ dml21_prepare_mcache_programming(in_dc, context, dml2);
+}
+
+void dml2_copy(struct dml2_context *dst_dml2,
+ struct dml2_context *src_dml2)
+{
+ if (src_dml2->architecture == dml2_architecture_21) {
+ dml21_copy(dst_dml2, src_dml2);
+ return;
+ }
+ /* copy Mode Lib Ctx */
+ memcpy(dst_dml2, src_dml2, sizeof(struct dml2_context));
+}
+
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+ struct dml2_context *src_dml2)
+{
+ if (src_dml2->architecture == dml2_architecture_21)
+ return dml21_create_copy(dst_dml2, src_dml2);
+ /* Allocate Mode Lib Ctx */
+ *dst_dml2 = dml2_allocate_memory();
+
+ if (!(*dst_dml2))
+ return false;
+
+ /* copy Mode Lib Ctx */
+ dml2_copy(*dst_dml2, src_dml2);
+
+ return true;
+}
+
+void dml2_reinit(const struct dc *in_dc,
+ const struct dml2_configuration_options *config,
+ struct dml2_context **dml2)
+{
+ if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) {
+ dml21_reinit(in_dc, *dml2, config);
+ return;
+ }
+
+ dml2_init(in_dc, config, dml2);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
new file mode 100644
index 000000000000..c384e141cebc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DML2_WRAPPER_H_
+#define _DML2_WRAPPER_H_
+
+#include "os_types.h"
+
+#define DML2_MAX_NUM_DPM_LVL 30
+
+struct dml2_context;
+struct display_mode_lib_st;
+struct dc;
+struct pipe_ctx;
+struct dc_plane_state;
+struct dc_sink;
+struct dc_stream_state;
+struct resource_context;
+struct display_stream_compressor;
+struct dc_mcache_params;
+
+// Configuration of the MALL on the SoC
+struct dml2_soc_mall_info {
+ // Cache line size of 0 means MALL is not enabled/present
+ unsigned int cache_line_size_bytes;
+ unsigned int cache_num_ways;
+ unsigned int max_cab_allocation_bytes;
+
+ unsigned int mblk_width_pixels;
+ unsigned int mblk_size_bytes;
+ unsigned int mblk_height_4bpe_pixels;
+ unsigned int mblk_height_8bpe_pixels;
+};
+
+// Output of DML2 for clock requirements
+struct dml2_dcn_clocks {
+ unsigned int dispclk_khz;
+ unsigned int dcfclk_khz;
+ unsigned int fclk_khz;
+ unsigned int uclk_mts;
+ unsigned int phyclk_khz;
+ unsigned int socclk_khz;
+ unsigned int ref_dtbclk_khz;
+ bool p_state_supported;
+ unsigned int cab_num_ways_required;
+ unsigned int dcfclk_khz_ds;
+};
+
+struct dml2_dc_callbacks {
+ struct dc *dc;
+ bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx);
+ void (*build_test_pattern_params)(struct resource_context *res_ctx, struct pipe_ctx *otg_master);
+ bool (*can_support_mclk_switch_using_fw_based_vblank_stretch)(struct dc *dc, struct dc_state *context);
+ bool (*acquire_secondary_pipe_for_mpc_odm)(const struct dc *dc, struct dc_state *state, struct pipe_ctx *pri_pipe, struct pipe_ctx *sec_pipe, bool odm);
+ bool (*update_pipes_for_stream_with_slice_count)(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_stream_state *stream,
+ int new_slice_count);
+ bool (*update_pipes_for_plane_with_slice_count)(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_plane_state *plane,
+ int slice_count);
+ int (*get_odm_slice_index)(const struct pipe_ctx *opp_head);
+ int (*get_odm_slice_count)(const struct pipe_ctx *opp_head);
+ int (*get_mpc_slice_index)(const struct pipe_ctx *dpp_pipe);
+ int (*get_mpc_slice_count)(const struct pipe_ctx *dpp_pipe);
+ struct pipe_ctx *(*get_opp_head)(const struct pipe_ctx *pipe_ctx);
+ struct pipe_ctx *(*get_otg_master_for_stream)(
+ struct resource_context *res_ctx,
+ const struct dc_stream_state *stream);
+ int (*get_opp_heads_for_otg_master)(const struct pipe_ctx *otg_master,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *opp_heads[MAX_PIPES]);
+ int (*get_dpp_pipes_for_plane)(const struct dc_plane_state *plane,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *dpp_pipes[MAX_PIPES]);
+ struct dc_stream_status *(*get_stream_status)(
+ struct dc_state *state,
+ const struct dc_stream_state *stream);
+ struct dc_stream_state *(*get_stream_from_id)(const struct dc_state *state, unsigned int id);
+ unsigned int (*get_max_flickerless_instant_vtotal_increase)(
+ struct dc_stream_state *stream,
+ bool is_gaming);
+ bool (*allocate_mcache)(struct dc_state *context, const struct dc_mcache_params *mcache_params);
+};
+
+struct dml2_dc_svp_callbacks {
+ struct dc *dc;
+ bool (*build_scaling_params)(struct pipe_ctx *pipe_ctx);
+ struct dc_stream_state* (*create_phantom_stream)(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *main_stream);
+ struct dc_plane_state* (*create_phantom_plane)(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *main_plane);
+ enum dc_status (*add_phantom_stream)(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *phantom_stream,
+ struct dc_stream_state *main_stream);
+ bool (*add_phantom_plane)(const struct dc *dc, struct dc_stream_state *stream, struct dc_plane_state *plane_state, struct dc_state *context);
+ bool (*remove_phantom_plane)(const struct dc *dc,
+ struct dc_stream_state *stream,
+ struct dc_plane_state *plane_state,
+ struct dc_state *context);
+ enum dc_status (*remove_phantom_stream)(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream);
+ void (*release_phantom_plane)(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_plane_state *plane);
+ void (*release_phantom_stream)(const struct dc *dc,
+ struct dc_state *state,
+ struct dc_stream_state *stream);
+ void (*release_dsc)(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc);
+ enum mall_stream_type (*get_pipe_subvp_type)(const struct dc_state *state, const struct pipe_ctx *pipe_ctx);
+ enum mall_stream_type (*get_stream_subvp_type)(const struct dc_state *state, const struct dc_stream_state *stream);
+ struct dc_stream_state *(*get_paired_subvp_stream)(const struct dc_state *state, const struct dc_stream_state *stream);
+ bool (*remove_phantom_streams_and_planes)(
+ const struct dc *dc,
+ struct dc_state *state);
+ void (*release_phantom_streams_and_planes)(
+ const struct dc *dc,
+ struct dc_state *state);
+ unsigned int (*calculate_mall_ways_from_bytes)(
+ const struct dc *dc,
+ unsigned int total_size_in_mall_bytes);
+};
+
+struct dml2_clks_table_entry {
+ unsigned int dcfclk_mhz;
+ unsigned int fclk_mhz;
+ unsigned int memclk_mhz;
+ unsigned int socclk_mhz;
+ unsigned int dtbclk_mhz;
+ unsigned int dispclk_mhz;
+ unsigned int dppclk_mhz;
+ unsigned int dram_speed_mts; /*which is based on wck_ratio*/
+};
+
+struct dml2_clks_num_entries {
+ unsigned int num_dcfclk_levels;
+ unsigned int num_fclk_levels;
+ unsigned int num_memclk_levels;
+ unsigned int num_socclk_levels;
+ unsigned int num_dtbclk_levels;
+ unsigned int num_dispclk_levels;
+ unsigned int num_dppclk_levels;
+};
+
+struct dml2_clks_limit_table {
+ struct dml2_clks_table_entry clk_entries[DML2_MAX_NUM_DPM_LVL];
+ struct dml2_clks_num_entries num_entries_per_clk;
+ unsigned int num_states;
+};
+
+// Various overrides, per ASIC or per SKU specific, or for debugging purpose when/if available
+struct dml2_soc_bbox_overrides {
+ double xtalclk_mhz;
+ double dchub_refclk_mhz;
+ double dprefclk_mhz;
+ double disp_pll_vco_speed_mhz;
+ double urgent_latency_us;
+ double sr_exit_latency_us;
+ double sr_enter_plus_exit_latency_us;
+ double sr_exit_z8_time_us;
+ double sr_enter_plus_exit_z8_time_us;
+ double dram_clock_change_latency_us;
+ double fclk_change_latency_us;
+ unsigned int dram_num_chan;
+ unsigned int dram_chanel_width_bytes;
+ struct dml2_clks_limit_table clks_table;
+};
+
+enum dml2_force_pstate_methods {
+ dml2_force_pstate_method_auto = 0,
+ dml2_force_pstate_method_vactive,
+ dml2_force_pstate_method_vblank,
+ dml2_force_pstate_method_drr,
+ dml2_force_pstate_method_subvp,
+};
+
+struct dml2_configuration_options {
+ int dcn_pipe_count;
+ bool use_native_pstate_optimization;
+ bool enable_windowed_mpo_odm;
+ bool use_native_soc_bb_construction;
+ bool skip_hw_state_mapping;
+ bool optimize_odm_4to1;
+ bool minimize_dispclk_using_odm;
+ bool override_det_buffer_size_kbytes;
+ struct dml2_dc_callbacks callbacks;
+ struct {
+ bool force_disable_subvp;
+ bool force_enable_subvp;
+ unsigned int subvp_fw_processing_delay_us;
+ unsigned int subvp_pstate_allow_width_us;
+ unsigned int subvp_prefetch_end_to_mall_start_us;
+ unsigned int subvp_swath_height_margin_lines;
+ struct dml2_dc_svp_callbacks callbacks;
+ } svp_pstate;
+ struct dml2_soc_mall_info mall_cfg;
+ struct dml2_soc_bbox_overrides bbox_overrides;
+ unsigned int max_segments_per_hubp;
+ unsigned int det_segment_size;
+ /* Only for debugging purposes when initializing SOCBB params via tool for DML21. */
+ struct socbb_ip_params_external *external_socbb_ip_params;
+ struct {
+ bool force_pstate_method_enable;
+ enum dml2_force_pstate_methods force_pstate_method_values[MAX_PIPES];
+ } pmo;
+ bool map_dc_pipes_with_callbacks;
+
+ bool use_clock_dc_limits;
+ bool gpuvm_enable;
+ bool force_tdlut_enable;
+ void *bb_from_dmub;
+};
+
+/*
+ * dml2_create - Creates dml2_context.
+ * @in_dc: dc.
+ * @config: dml2 configuration options.
+ * @dml2: Created dml2 context.
+ *
+ * Create and destroy of DML2 is done as part of dc_state creation
+ * and dc_state_free. DML2 IP, SOC and STATES are initialized at
+ * creation time.
+ *
+ * Return: True if dml2 is successfully created, false otherwise.
+ */
+bool dml2_create(const struct dc *in_dc,
+ const struct dml2_configuration_options *config,
+ struct dml2_context **dml2);
+
+void dml2_destroy(struct dml2_context *dml2);
+void dml2_copy(struct dml2_context *dst_dml2,
+ struct dml2_context *src_dml2);
+bool dml2_create_copy(struct dml2_context **dst_dml2,
+ struct dml2_context *src_dml2);
+void dml2_reinit(const struct dc *in_dc,
+ const struct dml2_configuration_options *config,
+ struct dml2_context **dml2);
+
+/*
+ * dml2_validate - Determines if a display configuration is supported or not.
+ * @in_dc: dc.
+ * @context: dc_state to be validated.
+ * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX will not populate context.res_ctx.
+ *
+ * DML1.0 compatible interface for validation.
+ *
+ * Based on fast_validate option internally would call:
+ *
+ * -dml2_validate_and_build_resource - for non fast_validate option
+ * Calculates if dc_state can be supported on the SOC, and attempts to
+ * optimize the power management feature supports versus minimum clocks.
+ * If supported, also builds out_new_hw_state to represent the hw programming
+ * for the new dc state.
+ *
+ * -dml2_validate_only - for fast_validate option
+ * Calculates if dc_state can be supported on the SOC (i.e. at maximum
+ * clocks) with all mandatory power features enabled.
+
+ * Context: Two threads may not invoke this function concurrently unless they reference
+ * separate dc_states for validation.
+ * Return: True if mode is supported, false otherwise.
+ */
+bool dml2_validate(const struct dc *in_dc,
+ struct dc_state *context,
+ struct dml2_context *dml2,
+ enum dc_validate_mode validate_mode);
+
+/*
+ * dml2_extract_dram_and_fclk_change_support - Extracts the FCLK and UCLK change support info.
+ * @dml2: input dml2 context pointer.
+ * @fclk_change_support: output pointer holding the fclk change support info (vactive, vblank, unsupported).
+ * @dram_clk_change_support: output pointer holding the uclk change support info (vactive, vblank, unsupported).
+ */
+void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,
+ unsigned int *fclk_change_support, unsigned int *dram_clk_change_support);
+void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2);
+#endif //_DML2_WRAPPER_H_
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h
index 3a80f5595943..17f0972b1af7 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_assert.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
@@ -22,9 +23,10 @@
* Authors: AMD
*
*/
-#ifndef __LINK_FPGA_H__
-#define __LINK_FPGA_H__
-#include "link.h"
-void dp_fpga_hpo_enable_link_and_stream(struct dc_state *state,
- struct pipe_ctx *pipe_ctx);
-#endif /* __LINK_FPGA_H__ */
+
+#ifndef __DML_ASSERT_H__
+#define __DML_ASSERT_H__
+
+#include "os_types.h"
+
+#endif //__DML_ASSERT_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h
new file mode 100644
index 000000000000..f7d30b47beff
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_depedencies.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+/* This header intentinally does not include an #ifdef guard as it only contains includes for other headers*/
+
+/*
+ * Standard Types
+ */
+#include "os_types.h"
+#include "cmntypes.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c
new file mode 100644
index 000000000000..00d22e542469
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c
@@ -0,0 +1,573 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "dml_display_rq_dlg_calc.h"
+#include "display_mode_core.h"
+#include "display_mode_util.h"
+
+static dml_bool_t is_dual_plane(enum dml_source_format_class source_format)
+{
+ dml_bool_t ret_val = 0;
+
+ if ((source_format == dml_420_12) || (source_format == dml_420_8) || (source_format == dml_420_10) || (source_format == dml_rgbe_alpha))
+ ret_val = 1;
+
+ return ret_val;
+}
+
+void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs,
+ struct display_mode_lib_st *mode_lib,
+ const dml_uint_t pipe_idx)
+{
+ dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
+ enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx];
+ enum dml_swizzle_mode sw_mode = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[plane_idx];
+ dml_bool_t dual_plane = is_dual_plane((enum dml_source_format_class)(source_format));
+
+ uint32 pixel_chunk_bytes = 0;
+ uint32 min_pixel_chunk_bytes = 0;
+ uint32 meta_chunk_bytes = 0;
+ uint32 min_meta_chunk_bytes = 0;
+ uint32 dpte_group_bytes = 0;
+ uint32 mpte_group_bytes = 0;
+
+ uint32 p1_pixel_chunk_bytes = 0;
+ uint32 p1_min_pixel_chunk_bytes = 0;
+ uint32 p1_meta_chunk_bytes = 0;
+ uint32 p1_min_meta_chunk_bytes = 0;
+ uint32 p1_dpte_group_bytes = 0;
+ uint32 p1_mpte_group_bytes = 0;
+
+ dml_uint_t detile_buf_size_in_bytes;
+ dml_uint_t detile_buf_plane1_addr = 0;
+
+ dml_float_t stored_swath_l_bytes;
+ dml_float_t stored_swath_c_bytes;
+ dml_bool_t is_phantom_pipe;
+
+ dml_uint_t pte_row_height_linear;
+
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
+
+ memset(rq_regs, 0, sizeof(*rq_regs));
+
+ pixel_chunk_bytes = (dml_uint_t)(dml_get_pixel_chunk_size_in_kbyte(mode_lib) * 1024);
+ min_pixel_chunk_bytes = (dml_uint_t)(dml_get_min_pixel_chunk_size_in_byte(mode_lib));
+
+ if (pixel_chunk_bytes == 64 * 1024)
+ min_pixel_chunk_bytes = 0;
+
+ meta_chunk_bytes = (dml_uint_t)(dml_get_meta_chunk_size_in_kbyte(mode_lib) * 1024);
+ min_meta_chunk_bytes = (dml_uint_t)(dml_get_min_meta_chunk_size_in_byte(mode_lib));
+
+ dpte_group_bytes = (dml_uint_t)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx));
+ mpte_group_bytes = (dml_uint_t)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx));
+
+ p1_pixel_chunk_bytes = pixel_chunk_bytes;
+ p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
+ p1_meta_chunk_bytes = meta_chunk_bytes;
+ p1_min_meta_chunk_bytes = min_meta_chunk_bytes;
+ p1_dpte_group_bytes = dpte_group_bytes;
+ p1_mpte_group_bytes = mpte_group_bytes;
+
+ if (source_format == dml_rgbe_alpha)
+ p1_pixel_chunk_bytes = (dml_uint_t)(dml_get_alpha_pixel_chunk_size_in_kbyte(mode_lib) * 1024);
+
+ rq_regs->rq_regs_l.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) pixel_chunk_bytes) - 10);
+ rq_regs->rq_regs_c.chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_pixel_chunk_bytes) - 10);
+
+ if (min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_pixel_chunk_bytes) - 8 + 1);
+
+ if (p1_min_pixel_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_pixel_chunk_bytes) - 8 + 1);
+
+ rq_regs->rq_regs_l.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) meta_chunk_bytes) - 10);
+ rq_regs->rq_regs_c.meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_meta_chunk_bytes) - 10);
+
+ if (min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_l.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_l.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) min_meta_chunk_bytes) - 6 + 1);
+
+ if (min_meta_chunk_bytes == 0)
+ rq_regs->rq_regs_c.min_meta_chunk_size = 0;
+ else
+ rq_regs->rq_regs_c.min_meta_chunk_size = (dml_uint_t)(dml_log2((dml_float_t) p1_min_meta_chunk_bytes) - 6 + 1);
+
+ rq_regs->rq_regs_l.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) dpte_group_bytes) - 6);
+ rq_regs->rq_regs_l.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) mpte_group_bytes) - 6);
+ rq_regs->rq_regs_c.dpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_dpte_group_bytes) - 6);
+ rq_regs->rq_regs_c.mpte_group_size = (dml_uint_t)(dml_log2((dml_float_t) p1_mpte_group_bytes) - 6);
+
+ detile_buf_size_in_bytes = (dml_uint_t)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024);
+
+ pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
+
+ if (sw_mode == dml_sw_linear)
+ ASSERT(pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_l.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) pte_row_height_linear), 1) - 3);
+
+ if (dual_plane) {
+ dml_uint_t p1_pte_row_height_linear = (dml_uint_t)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
+ if (sw_mode == dml_sw_linear)
+ ASSERT(p1_pte_row_height_linear >= 8);
+
+ rq_regs->rq_regs_c.pte_row_height_linear = (dml_uint_t)(dml_floor(dml_log2((dml_float_t) p1_pte_row_height_linear), 1) - 3);
+ }
+
+ rq_regs->rq_regs_l.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_l(mode_lib, pipe_idx)));
+ rq_regs->rq_regs_c.swath_height = (dml_uint_t)(dml_log2((dml_float_t) dml_get_swath_height_c(mode_lib, pipe_idx)));
+
+ if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
+ rq_regs->drq_expansion_mode = 0;
+ } else {
+ rq_regs->drq_expansion_mode = 2;
+ }
+ rq_regs->prq_expansion_mode = 1;
+ rq_regs->mrq_expansion_mode = 1;
+ rq_regs->crq_expansion_mode = 1;
+
+ stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx);
+ stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx);
+ is_phantom_pipe = dml_get_is_phantom_pipe(mode_lib, pipe_idx);
+
+ // Note: detile_buf_plane1_addr is in unit of 1KB
+ if (dual_plane) {
+ if (is_phantom_pipe) {
+ detile_buf_plane1_addr = (dml_uint_t)((1024.0*1024.0) / 2.0 / 1024.0); // half to chroma
+ } else {
+ if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
+ detile_buf_plane1_addr = (dml_uint_t)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ } else {
+ detile_buf_plane1_addr = (dml_uint_t)(dml_round_to_multiple((dml_uint_t)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
+#endif
+ }
+ }
+ }
+ rq_regs->plane1_base_address = detile_buf_plane1_addr;
+
+#ifdef __DML_VBA_DEBUG__
+ dml_print("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
+ dml_print("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
+ dml_print("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
+ dml_print("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
+ dml_print("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
+ dml_print("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
+#endif
+ dml_print_rq_regs_st(rq_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
+}
+
+// Note: currently taken in as is.
+// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma.
+
+
+void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs,
+ dml_display_ttu_regs_st *disp_ttu_regs,
+ struct display_mode_lib_st *mode_lib,
+ const dml_uint_t pipe_idx)
+{
+ dml_uint_t plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
+ enum dml_source_format_class source_format = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[plane_idx];
+ struct dml_timing_cfg_st *timing = &mode_lib->ms.cache_display_cfg.timing;
+ struct dml_plane_cfg_st *plane = &mode_lib->ms.cache_display_cfg.plane;
+ struct dml_hw_resource_st *hw = &mode_lib->ms.cache_display_cfg.hw;
+ dml_bool_t dual_plane = is_dual_plane(source_format);
+ dml_uint_t num_cursors = plane->NumberOfCursors[plane_idx];
+ enum dml_odm_mode odm_mode = hw->ODMMode[plane_idx];
+
+ dml_uint_t htotal = timing->HTotal[plane_idx];
+ dml_uint_t hactive = timing->HActive[plane_idx];
+ dml_uint_t hblank_end = timing->HBlankEnd[plane_idx];
+ dml_uint_t vblank_end = timing->VBlankEnd[plane_idx];
+ dml_bool_t interlaced = timing->Interlace[plane_idx];
+ dml_float_t pclk_freq_in_mhz = (dml_float_t) timing->PixelClock[plane_idx];
+ dml_float_t refclk_freq_in_mhz = (hw->DLGRefClkFreqMHz > 0) ? (dml_float_t) hw->DLGRefClkFreqMHz : mode_lib->soc.refclk_mhz;
+ dml_float_t ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz;
+
+ dml_uint_t vready_after_vcount0;
+
+ dml_uint_t dst_x_after_scaler;
+ dml_uint_t dst_y_after_scaler;
+
+ dml_float_t dst_y_prefetch;
+ dml_float_t dst_y_per_vm_vblank;
+ dml_float_t dst_y_per_row_vblank;
+ dml_float_t dst_y_per_vm_flip;
+ dml_float_t dst_y_per_row_flip;
+
+ dml_float_t max_dst_y_per_vm_vblank = 32.0; //U5.2
+ dml_float_t max_dst_y_per_row_vblank = 16.0; //U4.2
+
+ dml_float_t vratio_pre_l;
+ dml_float_t vratio_pre_c;
+
+ dml_float_t refcyc_per_line_delivery_pre_l;
+ dml_float_t refcyc_per_line_delivery_l;
+ dml_float_t refcyc_per_line_delivery_pre_c = 0.;
+ dml_float_t refcyc_per_line_delivery_c = 0.;
+ dml_float_t refcyc_per_req_delivery_pre_l;
+ dml_float_t refcyc_per_req_delivery_l;
+ dml_float_t refcyc_per_req_delivery_pre_c = 0.;
+ dml_float_t refcyc_per_req_delivery_c = 0.;
+ dml_float_t refcyc_per_req_delivery_pre_cur0 = 0.;
+ dml_float_t refcyc_per_req_delivery_cur0 = 0.;
+
+ dml_float_t dst_y_per_pte_row_nom_l;
+ dml_float_t dst_y_per_pte_row_nom_c;
+ dml_float_t dst_y_per_meta_row_nom_l;
+ dml_float_t dst_y_per_meta_row_nom_c;
+ dml_float_t refcyc_per_pte_group_nom_l;
+ dml_float_t refcyc_per_pte_group_nom_c;
+ dml_float_t refcyc_per_pte_group_vblank_l;
+ dml_float_t refcyc_per_pte_group_vblank_c;
+ dml_float_t refcyc_per_pte_group_flip_l;
+ dml_float_t refcyc_per_pte_group_flip_c;
+ dml_float_t refcyc_per_meta_chunk_nom_l;
+ dml_float_t refcyc_per_meta_chunk_nom_c;
+ dml_float_t refcyc_per_meta_chunk_vblank_l;
+ dml_float_t refcyc_per_meta_chunk_vblank_c;
+ dml_float_t refcyc_per_meta_chunk_flip_l;
+ dml_float_t refcyc_per_meta_chunk_flip_c;
+
+ dml_float_t temp;
+ dml_float_t min_ttu_vblank;
+ dml_uint_t min_dst_y_next_start;
+
+ dml_print("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
+ dml_print("DML_DLG::%s: plane_idx = %d\n", __func__, plane_idx);
+ dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal);
+ dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: hw->DLGRefClkFreqMHz = %3.2f\n", __func__, hw->DLGRefClkFreqMHz);
+ dml_print("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.refclk_mhz);
+ dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced);
+
+ memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs));
+ memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs));
+
+ ASSERT(refclk_freq_in_mhz != 0);
+ ASSERT(pclk_freq_in_mhz != 0);
+ ASSERT(ref_freq_to_pix_freq < 4.0);
+
+ // Need to figure out which side of odm combine we're in
+ // Assume the pipe instance under the same plane is in order
+
+ if (odm_mode == dml_odm_mode_bypass) {
+ disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)((dml_float_t) hblank_end * ref_freq_to_pix_freq);
+ } else if (odm_mode == dml_odm_mode_combine_2to1 || odm_mode == dml_odm_mode_combine_4to1) {
+ // find out how many pipe are in this plane
+ dml_uint_t num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg);
+ dml_uint_t first_pipe_idx_in_plane = __DML_NUM_PLANES__;
+ dml_uint_t pipe_idx_in_combine = 0; // pipe index within the plane
+ dml_uint_t odm_combine_factor = (odm_mode == dml_odm_mode_combine_2to1 ? 2 : 4);
+
+ for (dml_uint_t i = 0; i < num_active_pipes; i++) {
+ if (dml_get_plane_idx(mode_lib, i) == plane_idx) {
+ if (i < first_pipe_idx_in_plane) {
+ first_pipe_idx_in_plane = i;
+ }
+ }
+ }
+ pipe_idx_in_combine = pipe_idx - first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
+
+ disp_dlg_regs->refcyc_h_blank_end = (dml_uint_t)(((dml_float_t) hblank_end + (dml_float_t) pipe_idx_in_combine * (dml_float_t) hactive / (dml_float_t) odm_combine_factor) * ref_freq_to_pix_freq);
+ dml_print("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
+ dml_print("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, first_pipe_idx_in_plane);
+ dml_print("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, pipe_idx_in_combine);
+ dml_print("DML_DLG: %s: odm_combine_factor = %d\n", __func__, odm_combine_factor);
+ }
+ dml_print("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
+
+ ASSERT(disp_dlg_regs->refcyc_h_blank_end < (dml_uint_t)dml_pow(2, 13));
+
+ disp_dlg_regs->ref_freq_to_pix_freq = (dml_uint_t)(ref_freq_to_pix_freq * dml_pow(2, 19));
+ temp = dml_pow(2, 8);
+ disp_dlg_regs->refcyc_per_htotal = (dml_uint_t)(ref_freq_to_pix_freq * (dml_float_t)htotal * temp);
+ disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits
+
+ min_ttu_vblank = dml_get_min_ttu_vblank_in_us(mode_lib, pipe_idx);
+ min_dst_y_next_start = (dml_uint_t)(dml_get_min_dst_y_next_start(mode_lib, pipe_idx));
+
+ dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank);
+ dml_print("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, min_dst_y_next_start);
+ dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, ref_freq_to_pix_freq);
+
+ vready_after_vcount0 = (dml_uint_t)(dml_get_vready_at_or_after_vsync(mode_lib, pipe_idx));
+ disp_dlg_regs->vready_after_vcount0 = vready_after_vcount0;
+
+ dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
+
+ dst_x_after_scaler = (dml_uint_t)(dml_get_dst_x_after_scaler(mode_lib, pipe_idx));
+ dst_y_after_scaler = (dml_uint_t)(dml_get_dst_y_after_scaler(mode_lib, pipe_idx));
+
+ dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler);
+ dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, dst_y_after_scaler);
+
+ dst_y_prefetch = dml_get_dst_y_prefetch(mode_lib, pipe_idx);
+ dst_y_per_vm_vblank = dml_get_dst_y_per_vm_vblank(mode_lib, pipe_idx);
+ dst_y_per_row_vblank = dml_get_dst_y_per_row_vblank(mode_lib, pipe_idx);
+ dst_y_per_vm_flip = dml_get_dst_y_per_vm_flip(mode_lib, pipe_idx);
+ dst_y_per_row_flip = dml_get_dst_y_per_row_flip(mode_lib, pipe_idx);
+
+ // magic!
+ if (htotal <= 75) {
+ max_dst_y_per_vm_vblank = 100.0;
+ max_dst_y_per_row_vblank = 100.0;
+ }
+
+ dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch);
+ dml_print("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, dst_y_per_row_flip);
+ dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank);
+
+ ASSERT(dst_y_per_vm_vblank < max_dst_y_per_vm_vblank);
+ ASSERT(dst_y_per_row_vblank < max_dst_y_per_row_vblank);
+ ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank));
+
+ vratio_pre_l = dml_get_vratio_prefetch_l(mode_lib, pipe_idx);
+ vratio_pre_c = dml_get_vratio_prefetch_c(mode_lib, pipe_idx);
+
+ dml_print("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, vratio_pre_l);
+ dml_print("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, vratio_pre_c);
+
+ // Active
+ refcyc_per_line_delivery_pre_l = dml_get_refcyc_per_line_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_line_delivery_l = dml_get_refcyc_per_line_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, refcyc_per_line_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, refcyc_per_line_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_line_delivery_pre_c = dml_get_refcyc_per_line_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_line_delivery_c = dml_get_refcyc_per_line_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, refcyc_per_line_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, refcyc_per_line_delivery_c);
+ }
+
+ disp_dlg_regs->refcyc_per_vm_dmdata = (dml_uint_t)(dml_get_refcyc_per_vm_dmdata_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz);
+ disp_dlg_regs->dmdata_dl_delta = (dml_uint_t)(dml_get_dmdata_dl_delta_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz);
+
+ refcyc_per_req_delivery_pre_l = dml_get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_req_delivery_l = dml_get_refcyc_per_req_delivery_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, refcyc_per_req_delivery_pre_l);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, refcyc_per_req_delivery_l);
+
+ if (dual_plane) {
+ refcyc_per_req_delivery_pre_c = dml_get_refcyc_per_req_delivery_pre_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_req_delivery_c = dml_get_refcyc_per_req_delivery_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, refcyc_per_req_delivery_pre_c);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, refcyc_per_req_delivery_c);
+ }
+
+ // TTU - Cursor
+ ASSERT(num_cursors <= 1);
+ if (num_cursors > 0) {
+ refcyc_per_req_delivery_pre_cur0 = dml_get_refcyc_per_cursor_req_delivery_pre_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_req_delivery_cur0 = dml_get_refcyc_per_cursor_req_delivery_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_pre_cur0);
+ dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur0 = %3.2f\n", __func__, refcyc_per_req_delivery_cur0);
+ }
+
+ // Assign to register structures
+ disp_dlg_regs->min_dst_y_next_start = (dml_uint_t)((dml_float_t) min_dst_y_next_start * dml_pow(2, 2));
+ ASSERT(disp_dlg_regs->min_dst_y_next_start < (dml_uint_t)dml_pow(2, 18));
+
+ disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line
+ disp_dlg_regs->refcyc_x_after_scaler = (dml_uint_t)((dml_float_t) dst_x_after_scaler * ref_freq_to_pix_freq); // in terms of refclk
+ disp_dlg_regs->dst_y_prefetch = (dml_uint_t)(dst_y_prefetch * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_vblank = (dml_uint_t)(dst_y_per_vm_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_vblank = (dml_uint_t)(dst_y_per_row_vblank * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_vm_flip = (dml_uint_t)(dst_y_per_vm_flip * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_row_flip = (dml_uint_t)(dst_y_per_row_flip * dml_pow(2, 2));
+
+ disp_dlg_regs->vratio_prefetch = (dml_uint_t)(vratio_pre_l * dml_pow(2, 19));
+ disp_dlg_regs->vratio_prefetch_c = (dml_uint_t)(vratio_pre_c * dml_pow(2, 19));
+
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
+ dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
+ disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_group_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz);
+ disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_get_refcyc_per_vm_group_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz);
+ disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_req_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10));
+ disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_get_refcyc_per_vm_req_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10));
+
+ dst_y_per_pte_row_nom_l = dml_get_dst_y_per_pte_row_nom_l(mode_lib, pipe_idx);
+ dst_y_per_pte_row_nom_c = dml_get_dst_y_per_pte_row_nom_c(mode_lib, pipe_idx);
+ dst_y_per_meta_row_nom_l = dml_get_dst_y_per_meta_row_nom_l(mode_lib, pipe_idx);
+ dst_y_per_meta_row_nom_c = dml_get_dst_y_per_meta_row_nom_c(mode_lib, pipe_idx);
+
+ refcyc_per_pte_group_nom_l = dml_get_refcyc_per_pte_group_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_pte_group_nom_c = dml_get_refcyc_per_pte_group_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_pte_group_vblank_l = dml_get_refcyc_per_pte_group_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_pte_group_vblank_c = dml_get_refcyc_per_pte_group_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_pte_group_flip_l = dml_get_refcyc_per_pte_group_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_pte_group_flip_c = dml_get_refcyc_per_pte_group_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+
+ refcyc_per_meta_chunk_nom_l = dml_get_refcyc_per_meta_chunk_nom_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_meta_chunk_nom_c = dml_get_refcyc_per_meta_chunk_nom_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_meta_chunk_vblank_l = dml_get_refcyc_per_meta_chunk_vblank_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_meta_chunk_vblank_c = dml_get_refcyc_per_meta_chunk_vblank_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_meta_chunk_flip_l = dml_get_refcyc_per_meta_chunk_flip_l_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+ refcyc_per_meta_chunk_flip_c = dml_get_refcyc_per_meta_chunk_flip_c_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz;
+
+ disp_dlg_regs->dst_y_per_pte_row_nom_l = (dml_uint_t)(dst_y_per_pte_row_nom_l * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_pte_row_nom_c = (dml_uint_t)(dst_y_per_pte_row_nom_c * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_meta_row_nom_l = (dml_uint_t)(dst_y_per_meta_row_nom_l * dml_pow(2, 2));
+ disp_dlg_regs->dst_y_per_meta_row_nom_c = (dml_uint_t)(dst_y_per_meta_row_nom_c * dml_pow(2, 2));
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(refcyc_per_pte_group_nom_l);
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(refcyc_per_pte_group_nom_c);
+ disp_dlg_regs->refcyc_per_pte_group_vblank_l = (dml_uint_t)(refcyc_per_pte_group_vblank_l);
+ disp_dlg_regs->refcyc_per_pte_group_vblank_c = (dml_uint_t)(refcyc_per_pte_group_vblank_c);
+ disp_dlg_regs->refcyc_per_pte_group_flip_l = (dml_uint_t)(refcyc_per_pte_group_flip_l);
+ disp_dlg_regs->refcyc_per_pte_group_flip_c = (dml_uint_t)(refcyc_per_pte_group_flip_c);
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(refcyc_per_meta_chunk_nom_l);
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(refcyc_per_meta_chunk_nom_c);
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (dml_uint_t)(refcyc_per_meta_chunk_vblank_l);
+ disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (dml_uint_t)(refcyc_per_meta_chunk_vblank_c);
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (dml_uint_t)(refcyc_per_meta_chunk_flip_l);
+ disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (dml_uint_t)(refcyc_per_meta_chunk_flip_c);
+ disp_dlg_regs->refcyc_per_line_delivery_pre_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_l, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_l = (dml_uint_t)dml_floor(refcyc_per_line_delivery_l, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_pre_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_pre_c, 1);
+ disp_dlg_regs->refcyc_per_line_delivery_c = (dml_uint_t)dml_floor(refcyc_per_line_delivery_c, 1);
+
+ disp_dlg_regs->chunk_hdl_adjust_cur0 = 3;
+ disp_dlg_regs->dst_y_offset_cur0 = 0;
+ disp_dlg_regs->chunk_hdl_adjust_cur1 = 3;
+ disp_dlg_regs->dst_y_offset_cur1 = 0;
+
+ disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
+
+ disp_ttu_regs->refcyc_per_req_delivery_pre_l = (dml_uint_t)(refcyc_per_req_delivery_pre_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_l = (dml_uint_t)(refcyc_per_req_delivery_l * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_c = (dml_uint_t)(refcyc_per_req_delivery_pre_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_c = (dml_uint_t)(refcyc_per_req_delivery_c * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = (dml_uint_t)(refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_cur0 = (dml_uint_t)(refcyc_per_req_delivery_cur0 * dml_pow(2, 10));
+ disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = 0;
+ disp_ttu_regs->refcyc_per_req_delivery_cur1 = 0;
+ disp_ttu_regs->qos_level_low_wm = 0;
+
+ disp_ttu_regs->qos_level_high_wm = (dml_uint_t)(4.0 * (dml_float_t)htotal * ref_freq_to_pix_freq);
+
+ disp_ttu_regs->qos_level_flip = 14;
+ disp_ttu_regs->qos_level_fixed_l = 8;
+ disp_ttu_regs->qos_level_fixed_c = 8;
+ disp_ttu_regs->qos_level_fixed_cur0 = 8;
+ disp_ttu_regs->qos_ramp_disable_l = 0;
+ disp_ttu_regs->qos_ramp_disable_c = 0;
+ disp_ttu_regs->qos_ramp_disable_cur0 = 0;
+ disp_ttu_regs->min_ttu_vblank = (dml_uint_t)(min_ttu_vblank * refclk_freq_in_mhz);
+
+ // CHECK for HW registers' range, assert or clamp
+ ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13));
+ ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13));
+ if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_pow(2, 23) - 1);
+
+ if (disp_dlg_regs->refcyc_per_vm_group_flip >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_pow(2, 23) - 1);
+
+ if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_pow(2, 23) - 1);
+
+ if (disp_dlg_regs->refcyc_per_vm_req_flip >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_vm_req_flip = (dml_uint_t)(dml_pow(2, 23) - 1);
+
+
+ ASSERT(disp_dlg_regs->dst_y_after_scaler < (dml_uint_t)8);
+ ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (dml_uint_t)dml_pow(2, 13));
+ ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (dml_uint_t)dml_pow(2, 17));
+ if (dual_plane) {
+ if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (dml_uint_t)dml_pow(2, 17)) { // FIXME what so special about chroma, can we just assert?
+ dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u > register max U15.2 %u\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (dml_uint_t)dml_pow(2, 17) - 1);
+ }
+ }
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (dml_uint_t)dml_pow(2, 17));
+ ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_c < (dml_uint_t)dml_pow(2, 17));
+
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1);
+ if (dual_plane) {
+ if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_pte_group_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1);
+ }
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (dml_uint_t)dml_pow(2, 13));
+ if (dual_plane) {
+ ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (dml_uint_t)dml_pow(2, 13));
+ }
+
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (dml_uint_t)(dml_pow(2, 23) - 1);
+ if (dual_plane) {
+ if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (dml_uint_t)dml_pow(2, 23))
+ disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (dml_uint_t)(dml_pow(2, 23) - 1);
+ }
+ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (dml_uint_t)dml_pow(2, 13));
+ ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_c < (dml_uint_t)dml_pow(2, 13));
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (dml_uint_t)dml_pow(2, 13));
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (dml_uint_t)dml_pow(2, 13));
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (dml_uint_t)dml_pow(2, 13));
+ ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (dml_uint_t)dml_pow(2, 13));
+ ASSERT(disp_ttu_regs->qos_level_low_wm < (dml_uint_t) dml_pow(2, 14));
+ ASSERT(disp_ttu_regs->qos_level_high_wm < (dml_uint_t) dml_pow(2, 14));
+ ASSERT(disp_ttu_regs->min_ttu_vblank < (dml_uint_t) dml_pow(2, 24));
+
+ dml_print_ttu_regs_st(disp_ttu_regs);
+ dml_print_dlg_regs_st(disp_dlg_regs);
+ dml_print("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
+}
+
+void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param)
+{
+ memset(arb_param, 0, sizeof(*arb_param));
+ arb_param->max_req_outstanding = 256;
+ arb_param->min_req_outstanding = 256; // turn off the sat level feature if this set to max
+ arb_param->sat_level_us = 60;
+ arb_param->hvm_max_qos_commit_threshold = 0xf;
+ arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
+ arb_param->compbuf_reserved_space_kbytes = 2 * 8; // assume max data chunk size of 8K
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h
new file mode 100644
index 000000000000..bf491cf0582d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DML_DISPLAY_RQ_DLG_CALC_H__
+#define __DML_DISPLAY_RQ_DLG_CALC_H__
+
+#include "display_mode_core_structs.h"
+#include "display_mode_lib_defines.h"
+
+struct display_mode_lib_st;
+
+// Function: dml_rq_dlg_get_rq_reg
+// Main entry point for test to get the register values out of this DML class.
+// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate
+// and then populate the rq_regs struct
+// Input:
+// Assume mode_program is already called
+// Output:
+// rq_regs - struct that holds all the RQ registers field value.
+// See also: <display_rq_regs_st>
+
+void dml_rq_dlg_get_rq_reg(dml_display_rq_regs_st *rq_regs,
+ struct display_mode_lib_st *mode_lib,
+ const dml_uint_t pipe_idx);
+
+// Function: dml_rq_dlg_get_dlg_reg
+// Calculate and return DLG and TTU register struct given the system setting
+// Output:
+// dlg_regs - output DLG register struct
+// ttu_regs - output DLG TTU register struct
+// Input:
+// Assume mode_program is already called
+// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg
+void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *dlg_regs,
+ dml_display_ttu_regs_st *ttu_regs,
+ struct display_mode_lib_st *mode_lib,
+ const dml_uint_t pipe_idx);
+
+// Function: dml_rq_dlg_get_arb_params
+void dml_rq_dlg_get_arb_params(struct display_mode_lib_st *mode_lib, dml_display_arb_params_st *arb_param);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h b/drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h
new file mode 100644
index 000000000000..2a2f84e07ca8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_logging.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+#ifndef __DML_LOGGING_H__
+#define __DML_LOGGING_H__
+
+#define dml_print(...) ((void)0)
+
+#endif //__DML_LOGGING_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/Makefile b/drivers/gpu/drm/amd/display/dc/dpp/Makefile
new file mode 100644
index 000000000000..8324a56fe7db
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dpp/Makefile
@@ -0,0 +1,83 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'dpp' sub-component of DAL.
+#
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+DPP_DCN10 = dcn10_dpp.o dcn10_dpp_dscl.o dcn10_dpp_cm.o
+
+AMD_DAL_DPP_DCN10 = $(addprefix $(AMDDALPATH)/dc/dpp/dcn10/,$(DPP_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DPP_DCN10)
+
+###############################################################################
+
+DPP_DCN20 = dcn20_dpp.o dcn20_dpp_cm.o
+
+AMD_DAL_DPP_DCN20 = $(addprefix $(AMDDALPATH)/dc/dpp/dcn20/,$(DPP_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DPP_DCN20)
+
+###############################################################################
+
+DPP_DCN201 = dcn201_dpp.o
+
+AMD_DAL_DPP_DCN201 = $(addprefix $(AMDDALPATH)/dc/dpp/dcn201/,$(DPP_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DPP_DCN201)
+
+###############################################################################
+
+DPP_DCN30 = dcn30_dpp.o dcn30_dpp_cm.o
+
+AMD_DAL_DPP_DCN30 = $(addprefix $(AMDDALPATH)/dc/dpp/dcn30/,$(DPP_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DPP_DCN30)
+
+###############################################################################
+
+DPP_DCN32 = dcn32_dpp.o
+
+AMD_DAL_DPP_DCN32 = $(addprefix $(AMDDALPATH)/dc/dpp/dcn32/,$(DPP_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DPP_DCN32)
+
+###############################################################################
+
+DPP_DCN35 = dcn35_dpp.o
+
+AMD_DAL_DPP_DCN35 = $(addprefix $(AMDDALPATH)/dc/dpp/dcn35/,$(DPP_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DPP_DCN35)
+
+###############################################################################
+
+DPP_DCN401 = dcn401_dpp.o dcn401_dpp_cm.o dcn401_dpp_dscl.o
+
+AMD_DAL_DPP_DCN401 = $(addprefix $(AMDDALPATH)/dc/dpp/dcn401/,$(DPP_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_DPP_DCN401)
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c
index ef52e6b6eccf..01480a04f85e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c
@@ -28,7 +28,7 @@
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn10_dpp.h"
+#include "dcn10/dcn10_dpp.h"
#include "basics/conversion.h"
#define NUM_PHASES 64
@@ -194,6 +194,9 @@ void dpp_reset(struct dpp *dpp_base)
dpp->filter_h = NULL;
dpp->filter_v = NULL;
+ memset(&dpp_base->pos, 0, sizeof(dpp_base->pos));
+ memset(&dpp_base->att, 0, sizeof(dpp_base->att));
+
memset(&dpp->scl_data, 0, sizeof(dpp->scl_data));
memset(&dpp->pwl_data, 0, sizeof(dpp->pwl_data));
}
@@ -480,10 +483,11 @@ void dpp1_set_cursor_position(
if (src_y_offset + cursor_height <= 0)
cur_en = 0; /* not visible beyond top edge*/
- REG_UPDATE(CURSOR0_CONTROL,
- CUR0_ENABLE, cur_en);
+ if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) {
+ REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en);
- dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en;
+ dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en;
+ }
}
void dpp1_cnv_set_optional_cursor_attributes(
@@ -516,6 +520,15 @@ void dpp1_dppclk_control(
REG_UPDATE(DPP_CONTROL, DPP_CLOCK_ENABLE, 0);
}
+void dpp_force_disable_cursor(struct dpp *dpp_base)
+{
+ struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
+
+ /* Force disable cursor */
+ REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, 0);
+ dpp_base->pos.cur0_ctl.bits.cur0_enable = 0;
+}
+
static const struct dpp_funcs dcn10_dpp_funcs = {
.dpp_read_state = dpp_read_state,
.dpp_reset = dpp_reset,
@@ -543,7 +556,8 @@ static const struct dpp_funcs dcn10_dpp_funcs = {
.dpp_set_hdr_multiplier = dpp1_set_hdr_multiplier,
.dpp_program_blnd_lut = NULL,
.dpp_program_shaper_lut = NULL,
- .dpp_program_3dlut = NULL
+ .dpp_program_3dlut = NULL,
+ .dpp_get_gamut_remap = dpp1_cm_get_gamut_remap,
};
static struct dpp_caps dcn10_dpp_cap = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h
index c9e045666dcc..f466182963f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h
@@ -1090,7 +1090,8 @@
type DPP_CLOCK_ENABLE; \
type CM_HDR_MULT_COEF; \
type CUR0_FP_BIAS; \
- type CUR0_FP_SCALE;
+ type CUR0_FP_SCALE;\
+ type DISPCLK_R_GATE_DISABLE;
struct dcn_dpp_shift {
TF_REG_FIELD_LIST(uint8_t)
@@ -1521,4 +1522,9 @@ void dpp1_construct(struct dcn10_dpp *dpp1,
const struct dcn_dpp_registers *tf_regs,
const struct dcn_dpp_shift *tf_shift,
const struct dcn_dpp_mask *tf_mask);
+
+void dpp1_cm_get_gamut_remap(struct dpp *dpp_base,
+ struct dpp_grph_csc_adjustment *adjust);
+void dpp_force_disable_cursor(struct dpp *dpp_base);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c
index 904c2d278998..f8f6019d8304 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c
@@ -28,9 +28,9 @@
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn10_dpp.h"
+#include "dcn10/dcn10_dpp.h"
#include "basics/conversion.h"
-#include "dcn10_cm_common.h"
+#include "dcn10/dcn10_cm_common.h"
#define NUM_PHASES 64
#define HORZ_MAX_TAPS 8
@@ -98,7 +98,7 @@ static void program_gamut_remap(
if (regval == NULL || select == GAMUT_REMAP_BYPASS) {
REG_SET(CM_GAMUT_REMAP_CONTROL, 0,
- CM_GAMUT_REMAP_MODE, 0);
+ CM_GAMUT_REMAP_MODE, 0);
return;
}
switch (select) {
@@ -181,6 +181,74 @@ void dpp1_cm_set_gamut_remap(
}
}
+static void read_gamut_remap(struct dcn10_dpp *dpp,
+ uint16_t *regval,
+ enum gamut_remap_select *select)
+{
+ struct color_matrices_reg gam_regs;
+ uint32_t selection;
+
+ REG_GET(CM_GAMUT_REMAP_CONTROL,
+ CM_GAMUT_REMAP_MODE, &selection);
+
+ *select = selection;
+
+ gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11;
+ gam_regs.masks.csc_c11 = dpp->tf_mask->CM_GAMUT_REMAP_C11;
+ gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12;
+ gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12;
+
+ if (*select == GAMUT_REMAP_COEFF) {
+
+ gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12);
+ gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34);
+
+ cm_helper_read_color_matrices(
+ dpp->base.ctx,
+ regval,
+ &gam_regs);
+
+ } else if (*select == GAMUT_REMAP_COMA_COEFF) {
+
+ gam_regs.csc_c11_c12 = REG(CM_COMA_C11_C12);
+ gam_regs.csc_c33_c34 = REG(CM_COMA_C33_C34);
+
+ cm_helper_read_color_matrices(
+ dpp->base.ctx,
+ regval,
+ &gam_regs);
+
+ } else if (*select == GAMUT_REMAP_COMB_COEFF) {
+
+ gam_regs.csc_c11_c12 = REG(CM_COMB_C11_C12);
+ gam_regs.csc_c33_c34 = REG(CM_COMB_C33_C34);
+
+ cm_helper_read_color_matrices(
+ dpp->base.ctx,
+ regval,
+ &gam_regs);
+ }
+}
+
+void dpp1_cm_get_gamut_remap(struct dpp *dpp_base,
+ struct dpp_grph_csc_adjustment *adjust)
+{
+ struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
+ uint16_t arr_reg_val[12] = {0};
+ enum gamut_remap_select select;
+
+ read_gamut_remap(dpp, arr_reg_val, &select);
+
+ if (select == GAMUT_REMAP_BYPASS) {
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ return;
+ }
+
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ convert_hw_matrix(adjust->temperature_matrix,
+ arr_reg_val, ARRAY_SIZE(arr_reg_val));
+}
+
static void dpp1_cm_program_color_matrix(
struct dcn10_dpp *dpp,
const uint16_t *regval)
@@ -342,9 +410,10 @@ void dpp1_cm_program_regamma_lut(struct dpp *dpp_base,
REG_SET(CM_RGAM_LUT_DATA, 0, CM_RGAM_LUT_DATA, rgb[i].delta_red_reg);
REG_SET(CM_RGAM_LUT_DATA, 0, CM_RGAM_LUT_DATA, rgb[i].delta_green_reg);
REG_SET(CM_RGAM_LUT_DATA, 0, CM_RGAM_LUT_DATA, rgb[i].delta_blue_reg);
-
}
+ REG_SEQ_SUBMIT();
+ REG_SEQ_WAIT_DONE();
}
void dpp1_cm_configure_regamma_lut(
@@ -615,9 +684,6 @@ void dpp1_set_degamma(
BREAK_TO_DEBUGGER();
break;
}
-
- REG_SEQ_SUBMIT();
- REG_SEQ_WAIT_DONE();
}
void dpp1_degamma_ram_select(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c
index 5ca9ab8a76e8..808bca9fb804 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_dscl.c
@@ -28,7 +28,7 @@
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn10_dpp.h"
+#include "dcn10/dcn10_dpp.h"
#include "basics/conversion.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.c
index eaa7032f0f1a..c433f4b876e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.c
@@ -28,7 +28,7 @@
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn20_dpp.h"
+#include "dcn20/dcn20_dpp.h"
#include "basics/conversion.h"
#define NUM_PHASES 64
@@ -55,21 +55,23 @@ void dpp20_read_state(struct dpp *dpp_base,
REG_GET(DPP_CONTROL,
DPP_CLOCK_ENABLE, &s->is_enabled);
+
+ // Degamma LUT (RAM)
REG_GET(CM_DGAM_CONTROL,
- CM_DGAM_LUT_MODE, &s->dgam_lut_mode);
- // BGAM has no ROM, and definition is different, can't reuse same dump
- //REG_GET(CM_BLNDGAM_CONTROL,
- // CM_BLNDGAM_LUT_MODE, &s->rgam_lut_mode);
- REG_GET(CM_GAMUT_REMAP_CONTROL,
- CM_GAMUT_REMAP_MODE, &s->gamut_remap_mode);
- if (s->gamut_remap_mode) {
- s->gamut_remap_c11_c12 = REG_READ(CM_GAMUT_REMAP_C11_C12);
- s->gamut_remap_c13_c14 = REG_READ(CM_GAMUT_REMAP_C13_C14);
- s->gamut_remap_c21_c22 = REG_READ(CM_GAMUT_REMAP_C21_C22);
- s->gamut_remap_c23_c24 = REG_READ(CM_GAMUT_REMAP_C23_C24);
- s->gamut_remap_c31_c32 = REG_READ(CM_GAMUT_REMAP_C31_C32);
- s->gamut_remap_c33_c34 = REG_READ(CM_GAMUT_REMAP_C33_C34);
- }
+ CM_DGAM_LUT_MODE, &s->dgam_lut_mode);
+
+ // Shaper LUT (RAM), 3D LUT (mode, bit-depth, size)
+ REG_GET(CM_SHAPER_CONTROL,
+ CM_SHAPER_LUT_MODE, &s->shaper_lut_mode);
+ REG_GET_2(CM_3DLUT_READ_WRITE_CONTROL,
+ CM_3DLUT_CONFIG_STATUS, &s->lut3d_mode,
+ CM_3DLUT_30BIT_EN, &s->lut3d_bit_depth);
+ REG_GET(CM_3DLUT_MODE,
+ CM_3DLUT_SIZE, &s->lut3d_size);
+
+ // Blend/Out Gamma (RAM)
+ REG_GET(CM_BLNDGAM_LUT_WRITE_EN_MASK,
+ CM_BLNDGAM_CONFIG_STATUS, &s->rgam_lut_mode);
}
void dpp2_power_on_obuf(
@@ -393,6 +395,7 @@ static struct dpp_funcs dcn20_dpp_funcs = {
.set_optional_cursor_attributes = dpp1_cnv_set_optional_cursor_attributes,
.dpp_dppclk_control = dpp1_dppclk_control,
.dpp_set_hdr_multiplier = dpp2_set_hdr_multiplier,
+ .dpp_get_gamut_remap = dpp2_cm_get_gamut_remap,
};
static struct dpp_caps dcn20_dpp_cap = {
@@ -429,4 +432,60 @@ bool dpp2_construct(
return true;
}
+/*compute the maximum number of lines that we can fit in the line buffer*/
+void dscl2_spl_calc_lb_num_partitions(
+ bool alpha_en,
+ const struct spl_scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c)
+{
+ int memory_line_size_y, memory_line_size_c, memory_line_size_a,
+ lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a;
+
+ int line_size = scl_data->viewport.width < scl_data->recout.width ?
+ scl_data->viewport.width : scl_data->recout.width;
+ int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ?
+ scl_data->viewport_c.width : scl_data->recout.width;
+
+ if (line_size == 0)
+ line_size = 1;
+
+ if (line_size_c == 0)
+ line_size_c = 1;
+
+ memory_line_size_y = (line_size + 5) / 6; /* +5 to ceil */
+ memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */
+ memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */
+
+ if (lb_config == LB_MEMORY_CONFIG_1) {
+ lb_memory_size = 970;
+ lb_memory_size_c = 970;
+ lb_memory_size_a = 970;
+ } else if (lb_config == LB_MEMORY_CONFIG_2) {
+ lb_memory_size = 1290;
+ lb_memory_size_c = 1290;
+ lb_memory_size_a = 1290;
+ } else if (lb_config == LB_MEMORY_CONFIG_3) {
+ /* 420 mode: using 3rd mem from Y, Cr and Cb */
+ lb_memory_size = 970 + 1290 + 484 + 484 + 484;
+ lb_memory_size_c = 970 + 1290;
+ lb_memory_size_a = 970 + 1290 + 484;
+ } else {
+ lb_memory_size = 970 + 1290 + 484;
+ lb_memory_size_c = 970 + 1290 + 484;
+ lb_memory_size_a = 970 + 1290 + 484;
+ }
+ *num_part_y = lb_memory_size / memory_line_size_y;
+ *num_part_c = lb_memory_size_c / memory_line_size_c;
+ num_partitions_a = lb_memory_size_a / memory_line_size_a;
+ if (alpha_en
+ && (num_partitions_a < *num_part_y))
+ *num_part_y = num_partitions_a;
+
+ if (*num_part_y > 64)
+ *num_part_y = 64;
+ if (*num_part_c > 64)
+ *num_part_c = 64;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h
index e735363d0051..85f359b5da67 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h
@@ -26,7 +26,6 @@
#define __DCN20_DPP_H__
#include "dcn10/dcn10_dpp.h"
-
#define TO_DCN20_DPP(dpp)\
container_of(dpp, struct dcn20_dpp, base)
@@ -690,6 +689,7 @@ struct dcn20_dpp {
int lb_memory_size;
int lb_bits_per_entry;
bool is_write_to_ram_a_safe;
+ bool dispclk_r_gate_disable;
struct scaler_data scl_data;
struct pwl_params pwl_data;
};
@@ -736,7 +736,7 @@ bool dpp20_program_shaper(
bool dpp20_program_3dlut(
struct dpp *dpp_base,
- struct tetrahedral_params *params);
+ const struct tetrahedral_params *params);
void dpp2_cnv_set_alpha_keyer(
struct dpp *dpp_base,
@@ -748,6 +748,13 @@ void dscl2_calc_lb_num_partitions(
int *num_part_y,
int *num_part_c);
+void dscl2_spl_calc_lb_num_partitions(
+ bool alpha_en,
+ const struct spl_scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c);
+
void dpp2_set_cursor_attributes(
struct dpp *dpp_base,
struct dc_cursor_attributes *cursor_attributes);
@@ -775,4 +782,7 @@ bool dpp2_construct(struct dcn20_dpp *dpp2,
void dpp2_power_on_obuf(
struct dpp *dpp_base,
bool power_on);
+
+void dpp2_cm_get_gamut_remap(struct dpp *dpp_base,
+ struct dpp_grph_csc_adjustment *adjust);
#endif /* __DC_HWSS_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp_cm.c
index 598caa508d43..31613372e214 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp_cm.c
@@ -28,7 +28,7 @@
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn20_dpp.h"
+#include "dcn20/dcn20_dpp.h"
#include "basics/conversion.h"
#include "dcn10/dcn10_cm_common.h"
@@ -234,6 +234,61 @@ void dpp2_cm_set_gamut_remap(
}
}
+static void read_gamut_remap(struct dcn20_dpp *dpp,
+ uint16_t *regval,
+ enum dcn20_gamut_remap_select *select)
+{
+ struct color_matrices_reg gam_regs;
+ uint32_t selection;
+
+ IX_REG_GET(CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_DATA,
+ CM_TEST_DEBUG_DATA_STATUS_IDX,
+ CM_TEST_DEBUG_DATA_GAMUT_REMAP_MODE, &selection);
+
+ *select = selection;
+
+ gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11;
+ gam_regs.masks.csc_c11 = dpp->tf_mask->CM_GAMUT_REMAP_C11;
+ gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12;
+ gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12;
+
+ if (*select == DCN2_GAMUT_REMAP_COEF_A) {
+ gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12);
+ gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34);
+
+ cm_helper_read_color_matrices(dpp->base.ctx,
+ regval,
+ &gam_regs);
+
+ } else if (*select == DCN2_GAMUT_REMAP_COEF_B) {
+ gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_B_C11_C12);
+ gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_B_C33_C34);
+
+ cm_helper_read_color_matrices(dpp->base.ctx,
+ regval,
+ &gam_regs);
+ }
+}
+
+void dpp2_cm_get_gamut_remap(struct dpp *dpp_base,
+ struct dpp_grph_csc_adjustment *adjust)
+{
+ struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base);
+ uint16_t arr_reg_val[12] = {0};
+ enum dcn20_gamut_remap_select select;
+
+ read_gamut_remap(dpp, arr_reg_val, &select);
+
+ if (select == DCN2_GAMUT_REMAP_BYPASS) {
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ return;
+ }
+
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ convert_hw_matrix(adjust->temperature_matrix,
+ arr_reg_val, ARRAY_SIZE(arr_reg_val));
+}
+
void dpp2_program_input_csc(
struct dpp *dpp_base,
enum dc_color_space color_space,
@@ -1059,15 +1114,15 @@ static void dpp20_select_3dlut_ram_mask(
bool dpp20_program_3dlut(
struct dpp *dpp_base,
- struct tetrahedral_params *params)
+ const struct tetrahedral_params *params)
{
enum dc_lut_mode mode;
bool is_17x17x17;
bool is_12bits_color_channel;
- struct dc_rgb *lut0;
- struct dc_rgb *lut1;
- struct dc_rgb *lut2;
- struct dc_rgb *lut3;
+ const struct dc_rgb *lut0;
+ const struct dc_rgb *lut1;
+ const struct dc_rgb *lut2;
+ const struct dc_rgb *lut3;
int lut_size0;
int lut_size;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.c
index a7268027a472..d78ab3ffd95d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.c
@@ -28,7 +28,7 @@
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn201_dpp.h"
+#include "dcn201/dcn201_dpp.h"
#include "basics/conversion.h"
#define REG(reg)\
@@ -61,6 +61,13 @@ static void dpp201_cnv_setup(
CNVC_BYPASS, 0,
FORMAT_EXPANSION_MODE, mode);
+ /*
+ * hardcode default
+ * FORMAT_CONTROL. FORMAT_CNV16 default 0: U0.16/S.1.15; 1: U1.15/ S.1.14
+ * FORMAT_CONTROL. CNVC_BYPASS_MSB_ALIGN default 0: disabled 1: enabled
+ * FORMAT_CONTROL. CLAMP_POSITIVE default 0: disabled 1: enabled
+ * FORMAT_CONTROL. CLAMP_POSITIVE_C default 0: disabled 1: enabled
+ */
REG_UPDATE(FORMAT_CONTROL, FORMAT_CNV16, 0);
REG_UPDATE(FORMAT_CONTROL, CNVC_BYPASS_MSB_ALIGN, 0);
REG_UPDATE(FORMAT_CONTROL, CLAMP_POSITIVE, 0);
@@ -185,6 +192,7 @@ static bool dpp201_get_optimal_number_of_taps(
struct scaler_data *scl_data,
const struct scaling_taps *in_taps)
{
+ /* Some ASICs does not support FP16 scaling, so we reject modes require this*/
if (scl_data->viewport.width != scl_data->h_active &&
scl_data->viewport.height != scl_data->v_active &&
dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT &&
@@ -196,6 +204,7 @@ static bool dpp201_get_optimal_number_of_taps(
scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
return false;
+ /* No support for programming ratio of 8, drop to 7.99999.. */
if (scl_data->ratios.horz.value == (8ll << 32))
scl_data->ratios.horz.value--;
if (scl_data->ratios.vert.value == (8ll << 32))
@@ -205,6 +214,7 @@ static bool dpp201_get_optimal_number_of_taps(
if (scl_data->ratios.vert_c.value == (8ll << 32))
scl_data->ratios.vert_c.value--;
+ /* Set default taps if none are provided */
if (in_taps->h_taps == 0) {
if (dc_fixpt_ceil(scl_data->ratios.horz) > 4)
scl_data->taps.h_taps = 8;
@@ -233,6 +243,7 @@ static bool dpp201_get_optimal_number_of_taps(
else
scl_data->taps.h_taps_c = 2;
} else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1)
+ /* Only 1 and even h_taps_c are supported by hw */
scl_data->taps.h_taps_c = in_taps->h_taps_c - 1;
else
scl_data->taps.h_taps_c = in_taps->h_taps_c;
@@ -275,6 +286,7 @@ static struct dpp_funcs dcn201_dpp_funcs = {
.set_optional_cursor_attributes = dpp1_cnv_set_optional_cursor_attributes,
.dpp_dppclk_control = dpp1_dppclk_control,
.dpp_set_hdr_multiplier = dpp2_set_hdr_multiplier,
+ .dpp_get_gamut_remap = dpp2_cm_get_gamut_remap,
};
static struct dpp_caps dcn201_dpp_cap = {
@@ -306,7 +318,7 @@ bool dpp201_construct(
LB_PIXEL_DEPTH_30BPP;
dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY;
- dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES;
+ dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.h
index cbd5b47b4acf..cbd5b47b4acf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn201/dcn201_dpp.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c
index 50dc83404644..09be2a90cc79 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c
@@ -26,9 +26,9 @@
#include "dm_services.h"
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn30_dpp.h"
+#include "dcn30/dcn30_dpp.h"
#include "basics/conversion.h"
-#include "dcn30_cm_common.h"
+#include "dcn30/dcn30_cm_common.h"
#define REG(reg)\
dpp->tf_regs->reg
@@ -44,12 +44,46 @@
void dpp30_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s)
{
struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base);
+ uint32_t gamcor_lut_mode, rgam_lut_mode;
REG_GET(DPP_CONTROL,
- DPP_CLOCK_ENABLE, &s->is_enabled);
+ DPP_CLOCK_ENABLE, &s->is_enabled);
+
+ // Pre-degamma (ROM)
+ REG_GET_2(PRE_DEGAM,
+ PRE_DEGAM_MODE, &s->pre_dgam_mode,
+ PRE_DEGAM_SELECT, &s->pre_dgam_select);
+
+ // Gamma Correction (RAM)
+ REG_GET(CM_GAMCOR_CONTROL,
+ CM_GAMCOR_MODE_CURRENT, &s->gamcor_mode);
+ if (s->gamcor_mode) {
+ REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &gamcor_lut_mode);
+ if (!gamcor_lut_mode)
+ s->gamcor_mode = LUT_RAM_A; // Otherwise, LUT_RAM_B
+ }
- // TODO: Implement for DCN3
+ // Shaper LUT (RAM), 3D LUT (mode, bit-depth, size)
+ if (REG(CM_SHAPER_CONTROL))
+ REG_GET(CM_SHAPER_CONTROL, CM_SHAPER_LUT_MODE, &s->shaper_lut_mode);
+ if (REG(CM_3DLUT_MODE))
+ REG_GET(CM_3DLUT_MODE, CM_3DLUT_MODE_CURRENT, &s->lut3d_mode);
+ if (REG(CM_3DLUT_READ_WRITE_CONTROL))
+ REG_GET(CM_3DLUT_READ_WRITE_CONTROL, CM_3DLUT_30BIT_EN, &s->lut3d_bit_depth);
+ if (REG(CM_3DLUT_MODE))
+ REG_GET(CM_3DLUT_MODE, CM_3DLUT_SIZE, &s->lut3d_size);
+
+ // Blend/Out Gamma (RAM)
+ if (REG(CM_BLNDGAM_CONTROL)) {
+ REG_GET(CM_BLNDGAM_CONTROL, CM_BLNDGAM_MODE_CURRENT, &s->rgam_lut_mode);
+ if (s->rgam_lut_mode) {
+ REG_GET(CM_BLNDGAM_CONTROL, CM_BLNDGAM_SELECT_CURRENT, &rgam_lut_mode);
+ if (!rgam_lut_mode)
+ s->rgam_lut_mode = LUT_RAM_A; // Otherwise, LUT_RAM_B
+ }
+ }
}
+
/*program post scaler scs block in dpp CM*/
void dpp3_program_post_csc(
struct dpp *dpp_base,
@@ -185,7 +219,6 @@ void dpp3_cnv_setup (
uint32_t alpha_plane_enable = 0;
uint32_t dealpha_en = 0, dealpha_ablnd_en = 0;
uint32_t realpha_en = 0, realpha_ablnd_en = 0;
- uint32_t program_prealpha_dealpha = 0;
struct out_csc_color_matrix tbl_entry;
int i;
@@ -260,9 +293,11 @@ void dpp3_cnv_setup (
break;
case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
pixel_format = 112;
+ alpha_en = 0;
break;
case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX:
pixel_format = 113;
+ alpha_en = 0;
break;
case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
pixel_format = 114;
@@ -286,9 +321,11 @@ void dpp3_cnv_setup (
break;
case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT:
pixel_format = 118;
+ alpha_en = 0;
break;
case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT:
pixel_format = 119;
+ alpha_en = 0;
break;
default:
break;
@@ -309,10 +346,6 @@ void dpp3_cnv_setup (
CNVC_ALPHA_PLANE_ENABLE, alpha_plane_enable);
REG_UPDATE(FORMAT_CONTROL, FORMAT_CONTROL__ALPHA_EN, alpha_en);
- if (program_prealpha_dealpha) {
- dealpha_en = 1;
- realpha_en = 1;
- }
REG_SET_2(PRE_DEALPHA, 0,
PRE_DEALPHA_EN, dealpha_en,
PRE_DEALPHA_ABLND_EN, dealpha_ablnd_en);
@@ -392,11 +425,6 @@ bool dpp3_get_optimal_number_of_taps(
int min_taps_y, min_taps_c;
enum lb_memory_config lb_config;
- if (scl_data->viewport.width > scl_data->h_active &&
- dpp->ctx->dc->debug.max_downscale_src_width != 0 &&
- scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
- return false;
-
/*
* Set default taps if none are provided
* From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling
@@ -434,6 +462,12 @@ bool dpp3_get_optimal_number_of_taps(
else
scl_data->taps.h_taps_c = in_taps->h_taps_c;
+ // Avoid null data in the scl data with this early return, proceed non-adaptive calcualtion first
+ if (scl_data->viewport.width > scl_data->h_active &&
+ dpp->ctx->dc->debug.max_downscale_src_width != 0 &&
+ scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
+ return false;
+
/*Ensure we can support the requested number of vtaps*/
min_taps_y = dc_fixpt_ceil(scl_data->ratios.vert);
min_taps_c = dc_fixpt_ceil(scl_data->ratios.vert_c);
@@ -613,16 +647,19 @@ static void dpp3_program_blnd_pwl(
REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].red_reg);
REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_red);
} else {
+ REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0);
REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 4);
for (i = 0 ; i < num; i++)
REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].red_reg);
REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_red);
+ REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0);
REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 2);
for (i = 0 ; i < num; i++)
REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].green_reg);
REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, last_base_value_green);
+ REG_SET(CM_BLNDGAM_LUT_INDEX, 0, CM_BLNDGAM_LUT_INDEX, 0);
REG_UPDATE(CM_BLNDGAM_LUT_CONTROL, CM_BLNDGAM_LUT_WRITE_COLOR_MASK, 1);
for (i = 0 ; i < num; i++)
REG_SET(CM_BLNDGAM_LUT_DATA, 0, CM_BLNDGAM_LUT_DATA, rgb[i].blue_reg);
@@ -753,8 +790,7 @@ static bool dpp3_program_blnd_lut(struct dpp *dpp_base,
if (params == NULL) {
REG_SET(CM_BLNDGAM_CONTROL, 0, CM_BLNDGAM_MODE, 0);
- if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm)
- dpp3_power_on_blnd_lut(dpp_base, false);
+ dpp3_power_on_blnd_lut(dpp_base, false);
return false;
}
@@ -1167,8 +1203,7 @@ static bool dpp3_program_shaper(struct dpp *dpp_base,
if (params == NULL) {
REG_SET(CM_SHAPER_CONTROL, 0, CM_SHAPER_LUT_MODE, 0);
- if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm)
- dpp3_power_on_shaper(dpp_base, false);
+ dpp3_power_on_shaper(dpp_base, false);
return false;
}
@@ -1348,22 +1383,21 @@ static void dpp3_select_3dlut_ram_mask(
}
static bool dpp3_program_3dlut(struct dpp *dpp_base,
- struct tetrahedral_params *params)
+ const struct tetrahedral_params *params)
{
enum dc_lut_mode mode;
bool is_17x17x17;
bool is_12bits_color_channel;
- struct dc_rgb *lut0;
- struct dc_rgb *lut1;
- struct dc_rgb *lut2;
- struct dc_rgb *lut3;
+ const struct dc_rgb *lut0;
+ const struct dc_rgb *lut1;
+ const struct dc_rgb *lut2;
+ const struct dc_rgb *lut3;
int lut_size0;
int lut_size;
if (params == NULL) {
dpp3_set_3dlut_mode(dpp_base, LUT_BYPASS, false, false);
- if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm)
- dpp3_power_on_hdr3dlut(dpp_base, false);
+ dpp3_power_on_hdr3dlut(dpp_base, false);
return false;
}
@@ -1459,6 +1493,8 @@ static struct dpp_funcs dcn30_dpp_funcs = {
.set_optional_cursor_attributes = dpp1_cnv_set_optional_cursor_attributes,
.dpp_dppclk_control = dpp1_dppclk_control,
.dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier,
+ .dpp_get_gamut_remap = dpp3_cm_get_gamut_remap,
+ .dpp_force_disable_cursor = dpp_force_disable_cursor,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h
index 2082372d69ee..f236824126e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h
@@ -132,6 +132,8 @@
SRI(CM_POST_CSC_B_C33_C34, CM, id), \
SRI(CM_MEM_PWR_CTRL, CM, id), \
SRI(CM_CONTROL, CM, id), \
+ SRI(CM_TEST_DEBUG_INDEX, CM, id), \
+ SRI(CM_TEST_DEBUG_DATA, CM, id), \
SRI(FORMAT_CONTROL, CNVC_CFG, id), \
SRI(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \
SRI(CURSOR0_CONTROL, CNVC_CUR, id), \
@@ -173,8 +175,6 @@
SRI(CM_BLNDGAM_RAMB_START_SLOPE_CNTL_R, CM, id),\
SRI(CM_BLNDGAM_LUT_CONTROL, CM, id)
-
-
#define DPP_REG_LIST_SH_MASK_DCN30_COMMON(mask_sh)\
TF_SF(CM0_CM_MEM_PWR_STATUS, GAMCOR_MEM_PWR_STATE, mask_sh),\
TF_SF(CM0_CM_DEALPHA, CM_DEALPHA_EN, mask_sh),\
@@ -195,7 +195,6 @@
TF_SF(CM0_CM_GAMCOR_LUT_DATA, CM_GAMCOR_LUT_DATA, mask_sh),\
TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_WRITE_COLOR_MASK, mask_sh),\
TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_READ_COLOR_SEL, mask_sh),\
- TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_READ_DBG, mask_sh),\
TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_HOST_SEL, mask_sh),\
TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_CONFIG_MODE, mask_sh),\
TF_SF(CM0_CM_GAMCOR_RAMA_START_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_B, mask_sh),\
@@ -295,6 +294,7 @@
TF_SF(CM0_CM_POST_CSC_C11_C12, CM_POST_CSC_C12, mask_sh), \
TF_SF(CM0_CM_POST_CSC_C33_C34, CM_POST_CSC_C33, mask_sh), \
TF_SF(CM0_CM_POST_CSC_C33_C34, CM_POST_CSC_C34, mask_sh), \
+ TF_SF(CM0_CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_INDEX, mask_sh), \
TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS, mask_sh), \
TF2_SF(CNVC_CFG0, FORMAT_CONTROL__ALPHA_EN, mask_sh), \
TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_EXPANSION_MODE, mask_sh), \
@@ -572,6 +572,7 @@ struct dcn3_dpp {
int lb_memory_size;
int lb_bits_per_entry;
bool is_write_to_ram_a_safe;
+ bool dispclk_r_gate_disable;
struct scaler_data scl_data;
struct pwl_params pwl_data;
};
@@ -598,7 +599,7 @@ bool dpp3_get_optimal_number_of_taps(
struct scaler_data *scl_data,
const struct scaling_taps *in_taps);
-void dpp3_cnv_setup (
+void dpp3_cnv_setup(
struct dpp *dpp_base,
enum surface_pixel_format format,
enum expansion_mode mode,
@@ -639,4 +640,6 @@ void dpp3_program_cm_dealpha(
struct dpp *dpp_base,
uint32_t enable, uint32_t additive_blending);
+void dpp3_cm_get_gamut_remap(struct dpp *dpp_base,
+ struct dpp_grph_csc_adjustment *adjust);
#endif /* __DC_HWSS_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp_cm.c
index e43f77c11c00..82eca0e7b7d0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp_cm.c
@@ -26,9 +26,9 @@
#include "dm_services.h"
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn30_dpp.h"
+#include "dcn30/dcn30_dpp.h"
#include "basics/conversion.h"
-#include "dcn30_cm_common.h"
+#include "dcn30/dcn30_cm_common.h"
#define REG(reg)\
dpp->tf_regs->reg
@@ -56,16 +56,13 @@ static void dpp3_enable_cm_block(
static enum dc_lut_mode dpp30_get_gamcor_current(struct dpp *dpp_base)
{
- enum dc_lut_mode mode;
+ enum dc_lut_mode mode = LUT_BYPASS;
uint32_t state_mode;
uint32_t lut_mode;
struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base);
REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, &state_mode);
- if (state_mode == 0)
- mode = LUT_BYPASS;
-
if (state_mode == 2) {//Programmable RAM LUT
REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &lut_mode);
if (lut_mode == 0)
@@ -408,3 +405,57 @@ void dpp3_cm_set_gamut_remap(
program_gamut_remap(dpp, arr_reg_val, gamut_mode);
}
}
+
+static void read_gamut_remap(struct dcn3_dpp *dpp,
+ uint16_t *regval,
+ int *select)
+{
+ struct color_matrices_reg gam_regs;
+ uint32_t selection;
+
+ //current coefficient set in use
+ REG_GET(CM_GAMUT_REMAP_CONTROL, CM_GAMUT_REMAP_MODE_CURRENT, &selection);
+
+ *select = selection;
+
+ gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_GAMUT_REMAP_C11;
+ gam_regs.masks.csc_c11 = dpp->tf_mask->CM_GAMUT_REMAP_C11;
+ gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_GAMUT_REMAP_C12;
+ gam_regs.masks.csc_c12 = dpp->tf_mask->CM_GAMUT_REMAP_C12;
+
+ if (*select == GAMUT_REMAP_COEFF) {
+ gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_C11_C12);
+ gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_C33_C34);
+
+ cm_helper_read_color_matrices(dpp->base.ctx,
+ regval,
+ &gam_regs);
+
+ } else if (*select == GAMUT_REMAP_COMA_COEFF) {
+ gam_regs.csc_c11_c12 = REG(CM_GAMUT_REMAP_B_C11_C12);
+ gam_regs.csc_c33_c34 = REG(CM_GAMUT_REMAP_B_C33_C34);
+
+ cm_helper_read_color_matrices(dpp->base.ctx,
+ regval,
+ &gam_regs);
+ }
+}
+
+void dpp3_cm_get_gamut_remap(struct dpp *dpp_base,
+ struct dpp_grph_csc_adjustment *adjust)
+{
+ struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base);
+ uint16_t arr_reg_val[12] = {0};
+ int select;
+
+ read_gamut_remap(dpp, arr_reg_val, &select);
+
+ if (select == GAMUT_REMAP_BYPASS) {
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ return;
+ }
+
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ convert_hw_matrix(adjust->temperature_matrix,
+ arr_reg_val, ARRAY_SIZE(arr_reg_val));
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c
index dcf12a0b031c..fa67e54bf94e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.c
@@ -26,7 +26,7 @@
#include "dm_services.h"
#include "core_types.h"
#include "reg_helper.h"
-#include "dcn32_dpp.h"
+#include "dcn32/dcn32_dpp.h"
#include "basics/conversion.h"
#include "dcn30/dcn30_cm_common.h"
@@ -133,6 +133,7 @@ static struct dpp_funcs dcn32_dpp_funcs = {
.set_optional_cursor_attributes = dpp1_cnv_set_optional_cursor_attributes,
.dpp_dppclk_control = dpp1_dppclk_control,
.dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier,
+ .dpp_get_gamut_remap = dpp3_cm_get_gamut_remap,
};
@@ -162,3 +163,76 @@ bool dpp32_construct(
return true;
}
+void dscl32_spl_calc_lb_num_partitions(
+ bool alpha_en,
+ const struct spl_scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c)
+{
+ int memory_line_size_y, memory_line_size_c, memory_line_size_a,
+ lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a;
+
+ int line_size = scl_data->viewport.width < scl_data->recout.width ?
+ scl_data->viewport.width : scl_data->recout.width;
+ int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ?
+ scl_data->viewport_c.width : scl_data->recout.width;
+
+ if (line_size == 0)
+ line_size = 1;
+
+ if (line_size_c == 0)
+ line_size_c = 1;
+
+ memory_line_size_y = (line_size + 5) / 6; /* +5 to ceil */
+ memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */
+ memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */
+
+ if (lb_config == LB_MEMORY_CONFIG_1) {
+ lb_memory_size = 970;
+ lb_memory_size_c = 970;
+ lb_memory_size_a = 970;
+ } else if (lb_config == LB_MEMORY_CONFIG_2) {
+ lb_memory_size = 1290;
+ lb_memory_size_c = 1290;
+ lb_memory_size_a = 1290;
+ } else if (lb_config == LB_MEMORY_CONFIG_3) {
+ if (scl_data->viewport.width == scl_data->h_active &&
+ scl_data->viewport.height == scl_data->v_active) {
+ /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */
+ /* use increased LB size for calculation only if Scaler not enabled */
+ lb_memory_size = 970 + 1290 + 1170 + 1170 + 1170;
+ lb_memory_size_c = 970 + 1290;
+ lb_memory_size_a = 970 + 1290 + 1170;
+ } else {
+ /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */
+ lb_memory_size = 970 + 1290 + 484 + 484 + 484;
+ lb_memory_size_c = 970 + 1290;
+ lb_memory_size_a = 970 + 1290 + 484;
+ }
+ } else {
+ if (scl_data->viewport.width == scl_data->h_active &&
+ scl_data->viewport.height == scl_data->v_active) {
+ /* use increased LB size for calculation only if Scaler not enabled */
+ lb_memory_size = 970 + 1290 + 1170;
+ lb_memory_size_c = 970 + 1290 + 1170;
+ lb_memory_size_a = 970 + 1290 + 1170;
+ } else {
+ lb_memory_size = 970 + 1290 + 484;
+ lb_memory_size_c = 970 + 1290 + 484;
+ lb_memory_size_a = 970 + 1290 + 484;
+ }
+ }
+ *num_part_y = lb_memory_size / memory_line_size_y;
+ *num_part_c = lb_memory_size_c / memory_line_size_c;
+ num_partitions_a = lb_memory_size_a / memory_line_size_a;
+
+ if (alpha_en
+ && (num_partitions_a < *num_part_y))
+ *num_part_y = num_partitions_a;
+
+ if (*num_part_y > 32)
+ *num_part_y = 32;
+ if (*num_part_c > 32)
+ *num_part_c = 32;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.h
index 572958d287eb..f33dddbfcc31 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn32/dcn32_dpp.h
@@ -35,4 +35,11 @@ bool dpp32_construct(struct dcn3_dpp *dpp3,
const struct dcn3_dpp_shift *tf_shift,
const struct dcn3_dpp_mask *tf_mask);
+void dscl32_spl_calc_lb_num_partitions(
+ bool alpha_en,
+ const struct spl_scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c);
+
#endif /* __DCN32_DPP_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c
new file mode 100644
index 000000000000..f7a373a3d70a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "dcn35/dcn35_dpp.h"
+#include "reg_helper.h"
+
+#define REG(reg) dpp->tf_regs->reg
+
+#define CTX dpp->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ ((const struct dcn35_dpp_shift *)(dpp->tf_shift))->field_name, \
+ ((const struct dcn35_dpp_mask *)(dpp->tf_mask))->field_name
+
+void dpp35_dppclk_control(
+ struct dpp *dpp_base,
+ bool dppclk_div,
+ bool enable)
+{
+ struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base);
+
+ if (enable) {
+ if (dpp->tf_mask->DPPCLK_RATE_CONTROL)
+ REG_UPDATE_2(DPP_CONTROL,
+ DPPCLK_RATE_CONTROL, dppclk_div,
+ DPP_CLOCK_ENABLE, 1);
+ else
+ if (dpp->dispclk_r_gate_disable)
+ REG_UPDATE_2(DPP_CONTROL,
+ DPP_CLOCK_ENABLE, 1,
+ DISPCLK_R_GATE_DISABLE, 1);
+ else
+ REG_UPDATE(DPP_CONTROL,
+ DPP_CLOCK_ENABLE, 1);
+ } else
+ if (dpp->dispclk_r_gate_disable)
+ REG_UPDATE_2(DPP_CONTROL,
+ DPP_CLOCK_ENABLE, 0,
+ DISPCLK_R_GATE_DISABLE, 0);
+ else
+ REG_UPDATE(DPP_CONTROL,
+ DPP_CLOCK_ENABLE, 0);
+}
+
+void dpp35_program_bias_and_scale_fcnv(
+ struct dpp *dpp_base,
+ struct dc_bias_and_scale *params)
+{
+ struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base);
+
+ if (!params->bias_and_scale_valid) {
+ REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, 0);
+ REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, 0);
+ REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, 0);
+
+ REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, 0x1F000);
+ REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, 0x1F000);
+ REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, 0x1F000);
+ } else {
+ REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, params->bias_red);
+ REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, params->bias_green);
+ REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, params->bias_blue);
+
+ REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, params->scale_red);
+ REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, params->scale_green);
+ REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, params->scale_blue);
+ }
+}
+
+static struct dpp_funcs dcn35_dpp_funcs = {
+ .dpp_program_gamcor_lut = dpp3_program_gamcor_lut,
+ .dpp_read_state = dpp30_read_state,
+ .dpp_reset = dpp_reset,
+ .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale,
+ .dpp_get_optimal_number_of_taps = dpp3_get_optimal_number_of_taps,
+ .dpp_set_gamut_remap = dpp3_cm_set_gamut_remap,
+ .dpp_set_csc_adjustment = NULL,
+ .dpp_set_csc_default = NULL,
+ .dpp_program_regamma_pwl = NULL,
+ .dpp_set_pre_degam = dpp3_set_pre_degam,
+ .dpp_program_input_lut = NULL,
+ .dpp_full_bypass = dpp1_full_bypass,
+ .dpp_setup = dpp3_cnv_setup,
+ .dpp_program_degamma_pwl = NULL,
+ .dpp_program_cm_dealpha = dpp3_program_cm_dealpha,
+ .dpp_program_cm_bias = dpp3_program_cm_bias,
+
+ .dpp_program_blnd_lut = NULL, // BLNDGAM is removed completely in DCN3.2 DPP
+ .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND)
+ .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND)
+
+ .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv,
+ .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer,
+ .set_cursor_attributes = dpp3_set_cursor_attributes,
+ .set_cursor_position = dpp1_set_cursor_position,
+ .set_optional_cursor_attributes = dpp1_cnv_set_optional_cursor_attributes,
+ .dpp_dppclk_control = dpp35_dppclk_control,
+ .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier,
+ .dpp_get_gamut_remap = dpp3_cm_get_gamut_remap,
+};
+
+
+bool dpp35_construct(
+ struct dcn3_dpp *dpp, struct dc_context *ctx,
+ uint32_t inst, const struct dcn3_dpp_registers *tf_regs,
+ const struct dcn35_dpp_shift *tf_shift,
+ const struct dcn35_dpp_mask *tf_mask)
+{
+ bool ret = dpp32_construct(dpp, ctx, inst, tf_regs,
+ (const struct dcn3_dpp_shift *)(tf_shift),
+ (const struct dcn3_dpp_mask *)(tf_mask));
+
+ dpp->base.funcs = &dcn35_dpp_funcs;
+
+ // w/a for cursor memory stuck in LS by programming DISPCLK_R_GATE_DISABLE, limit w/a to some ASIC revs
+ if (dpp->base.ctx->asic_id.hw_internal_rev < 0x40)
+ dpp->dispclk_r_gate_disable = true;
+ return ret;
+}
+
+void dpp35_set_fgcg(struct dcn3_dpp *dpp, bool enable)
+{
+ REG_UPDATE(DPP_CONTROL, DPP_FGCG_REP_DIS, !enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h
new file mode 100644
index 000000000000..3ca339a16e5b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_DPP_H__
+#define __DCN35_DPP_H__
+
+#include "dcn32/dcn32_dpp.h"
+
+#define DPP_REG_LIST_SH_MASK_DCN35(mask_sh) \
+ DPP_REG_LIST_SH_MASK_DCN30_COMMON(mask_sh), \
+ TF_SF(DPP_TOP0_DPP_CONTROL, DPP_FGCG_REP_DIS, mask_sh), \
+ TF_SF(DPP_TOP0_DPP_CONTROL, DPP_FGCG_REP_DIS, mask_sh), \
+ TF_SF(DPP_TOP0_DPP_CONTROL, DISPCLK_R_GATE_DISABLE, mask_sh)
+
+#define DPP_REG_FIELD_LIST_DCN35(type) \
+ struct { \
+ DPP_REG_FIELD_LIST_DCN3(type); \
+ type DPP_FGCG_REP_DIS; \
+ }
+
+struct dcn35_dpp_shift {
+ DPP_REG_FIELD_LIST_DCN35(uint8_t);
+};
+
+struct dcn35_dpp_mask {
+ DPP_REG_FIELD_LIST_DCN35(uint32_t);
+};
+
+void dpp35_dppclk_control(
+ struct dpp *dpp_base,
+ bool dppclk_div,
+ bool enable);
+
+bool dpp35_construct(struct dcn3_dpp *dpp3, struct dc_context *ctx,
+ uint32_t inst, const struct dcn3_dpp_registers *tf_regs,
+ const struct dcn35_dpp_shift *tf_shift,
+ const struct dcn35_dpp_mask *tf_mask);
+
+void dpp35_set_fgcg(struct dcn3_dpp *dpp, bool enable);
+
+void dpp35_program_bias_and_scale_fcnv(struct dpp *dpp_base,
+ struct dc_bias_and_scale *bias_and_scale);
+
+#endif // __DCN35_DPP_H
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c
new file mode 100644
index 000000000000..36187f890d5d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "core_types.h"
+#include "reg_helper.h"
+#include "dcn401/dcn401_dpp.h"
+#include "basics/conversion.h"
+#include "dcn30/dcn30_cm_common.h"
+#include "dcn32/dcn32_dpp.h"
+#include "dcn35/dcn35_dpp.h"
+
+#define REG(reg)\
+ dpp->tf_regs->reg
+
+#define CTX \
+ dpp->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dpp->tf_shift->field_name, dpp->tf_mask->field_name
+
+void dpp401_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s)
+{
+ struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base);
+
+ REG_GET(DPP_CONTROL,
+ DPP_CLOCK_ENABLE, &s->is_enabled);
+
+ // TODO: Implement for DCN4
+}
+
+void dpp401_dpp_setup(
+ struct dpp *dpp_base,
+ enum surface_pixel_format format,
+ enum expansion_mode mode,
+ struct dc_csc_transform input_csc_color_matrix,
+ enum dc_color_space input_color_space,
+ struct cnv_alpha_2bit_lut *alpha_2bit_lut)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+ uint32_t pixel_format = 0;
+ uint32_t alpha_en = 1;
+ enum dc_color_space color_space = COLOR_SPACE_SRGB;
+ enum dcn10_input_csc_select select = INPUT_CSC_SELECT_BYPASS;
+ uint32_t is_2bit = 0;
+ uint32_t alpha_plane_enable = 0;
+ uint32_t dealpha_en = 0, dealpha_ablnd_en = 0;
+ uint32_t realpha_en = 0, realpha_ablnd_en = 0;
+ struct out_csc_color_matrix tbl_entry;
+ int i;
+
+ REG_SET_2(FORMAT_CONTROL, 0,
+ CNVC_BYPASS, 0,
+ FORMAT_EXPANSION_MODE, mode);
+
+ REG_UPDATE(FORMAT_CONTROL, FORMAT_CNV16, 0);
+ REG_UPDATE(FORMAT_CONTROL, CNVC_BYPASS_MSB_ALIGN, 0);
+ REG_UPDATE(FORMAT_CONTROL, CLAMP_POSITIVE, 0);
+ REG_UPDATE(FORMAT_CONTROL, CLAMP_POSITIVE_C, 0);
+
+ REG_UPDATE(FORMAT_CONTROL, FORMAT_CROSSBAR_R, 0);
+ REG_UPDATE(FORMAT_CONTROL, FORMAT_CROSSBAR_G, 1);
+ REG_UPDATE(FORMAT_CONTROL, FORMAT_CROSSBAR_B, 2);
+
+ switch (format) {
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ pixel_format = 1;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ pixel_format = 3;
+ alpha_en = 0;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+ pixel_format = 8;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+ pixel_format = 10;
+ is_2bit = 1;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ pixel_format = 65;
+ color_space = COLOR_SPACE_YCBCR709;
+ select = INPUT_CSC_SELECT_ICSC;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ pixel_format = 64;
+ color_space = COLOR_SPACE_YCBCR709;
+ select = INPUT_CSC_SELECT_ICSC;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ pixel_format = 67;
+ color_space = COLOR_SPACE_YCBCR709;
+ select = INPUT_CSC_SELECT_ICSC;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ pixel_format = 66;
+ color_space = COLOR_SPACE_YCBCR709;
+ select = INPUT_CSC_SELECT_ICSC;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ pixel_format = 26; /* ARGB16161616_UNORM */
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ pixel_format = 24;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ pixel_format = 25;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
+ pixel_format = 12;
+ color_space = COLOR_SPACE_YCBCR709;
+ select = INPUT_CSC_SELECT_ICSC;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
+ pixel_format = 112;
+ alpha_en = 0;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX:
+ pixel_format = 113;
+ alpha_en = 0;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
+ pixel_format = 114;
+ color_space = COLOR_SPACE_YCBCR709;
+ select = INPUT_CSC_SELECT_ICSC;
+ is_2bit = 1;
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102:
+ pixel_format = 115;
+ color_space = COLOR_SPACE_YCBCR709;
+ select = INPUT_CSC_SELECT_ICSC;
+ is_2bit = 1;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE:
+ pixel_format = 116;
+ alpha_plane_enable = 0;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ pixel_format = 116;
+ alpha_plane_enable = 1;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT:
+ pixel_format = 118;
+ alpha_en = 0;
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT:
+ pixel_format = 119;
+ alpha_en = 0;
+ break;
+ default:
+ break;
+ }
+
+ /* Set default color space based on format if none is given. */
+ color_space = input_color_space ? input_color_space : color_space;
+
+ if (is_2bit == 1 && alpha_2bit_lut != NULL) {
+ REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0);
+ REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1);
+ REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT2, alpha_2bit_lut->lut2);
+ REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT3, alpha_2bit_lut->lut3);
+ }
+
+ REG_SET_2(CNVC_SURFACE_PIXEL_FORMAT, 0,
+ CNVC_SURFACE_PIXEL_FORMAT, pixel_format,
+ CNVC_ALPHA_PLANE_ENABLE, alpha_plane_enable);
+ REG_UPDATE(FORMAT_CONTROL, FORMAT_CONTROL__ALPHA_EN, alpha_en);
+
+ REG_SET_2(PRE_DEALPHA, 0,
+ PRE_DEALPHA_EN, dealpha_en,
+ PRE_DEALPHA_ABLND_EN, dealpha_ablnd_en);
+ REG_SET_2(PRE_REALPHA, 0,
+ PRE_REALPHA_EN, realpha_en,
+ PRE_REALPHA_ABLND_EN, realpha_ablnd_en);
+
+ /* If input adjustment exists, program the ICSC with those values. */
+ if (input_csc_color_matrix.enable_adjustment == true) {
+ for (i = 0; i < 12; i++)
+ tbl_entry.regval[i] = input_csc_color_matrix.matrix[i];
+
+ tbl_entry.color_space = input_color_space;
+
+ if (color_space >= COLOR_SPACE_YCBCR601)
+ select = INPUT_CSC_SELECT_ICSC;
+ else
+ select = INPUT_CSC_SELECT_BYPASS;
+
+ dpp3_program_post_csc(dpp_base, color_space, select,
+ &tbl_entry);
+ } else {
+ dpp3_program_post_csc(dpp_base, color_space, select, NULL);
+ }
+}
+
+
+static struct dpp_funcs dcn401_dpp_funcs = {
+ .dpp_program_gamcor_lut = dpp3_program_gamcor_lut,
+ .dpp_read_state = dpp401_read_state,
+ .dpp_reset = dpp_reset,
+ .dpp_set_scaler = dpp401_dscl_set_scaler_manual_scale,
+ .dpp_get_optimal_number_of_taps = dpp3_get_optimal_number_of_taps,
+ .dpp_set_gamut_remap = NULL,
+ .dpp_set_csc_adjustment = NULL,
+ .dpp_set_csc_default = NULL,
+ .dpp_program_regamma_pwl = NULL,
+ .dpp_set_pre_degam = dpp3_set_pre_degam,
+ .dpp_program_input_lut = NULL,
+ .dpp_full_bypass = NULL,
+ .dpp_setup = dpp401_dpp_setup,
+ .dpp_program_degamma_pwl = NULL,
+ .dpp_program_cm_dealpha = dpp3_program_cm_dealpha,
+ .dpp_program_cm_bias = dpp3_program_cm_bias,
+
+ .dpp_program_blnd_lut = NULL, // BLNDGAM is removed completely in DCN3.2 DPP
+ .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND)
+ .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND)
+
+ .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv,
+ .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer,
+ .set_cursor_attributes = dpp401_set_cursor_attributes,
+ .set_cursor_position = dpp401_set_cursor_position,
+ .set_optional_cursor_attributes = dpp401_set_optional_cursor_attributes,
+ .dpp_dppclk_control = dpp1_dppclk_control,
+ .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier,
+ .set_cursor_matrix = dpp401_set_cursor_matrix,
+};
+
+
+static struct dpp_caps dcn401_dpp_cap = {
+ .dscl_data_proc_format = DSCL_DATA_PRCESSING_FLOAT_FORMAT,
+ .max_lb_partitions = 63,
+ .dscl_calc_lb_num_partitions = dscl401_calc_lb_num_partitions,
+};
+
+bool dpp401_construct(
+ struct dcn401_dpp *dpp,
+ struct dc_context *ctx,
+ uint32_t inst,
+ const struct dcn401_dpp_registers *tf_regs,
+ const struct dcn401_dpp_shift *tf_shift,
+ const struct dcn401_dpp_mask *tf_mask)
+{
+ dpp->base.ctx = ctx;
+
+ dpp->base.inst = inst;
+ dpp->base.funcs = &dcn401_dpp_funcs;
+ dpp->base.caps = &dcn401_dpp_cap;
+
+ dpp->tf_regs = tf_regs;
+ dpp->tf_shift = tf_shift;
+ dpp->tf_mask = tf_mask;
+
+ return true;
+}
+/* Compute the maximum number of lines that we can fit in the line buffer */
+
+void dscl401_calc_lb_num_partitions(
+ const struct scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c)
+{
+ int memory_line_size_y, memory_line_size_c, memory_line_size_a,
+ lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a;
+
+ int line_size = scl_data->viewport.width < scl_data->recout.width ?
+ scl_data->viewport.width : scl_data->recout.width;
+ int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ?
+ scl_data->viewport_c.width : scl_data->recout.width;
+
+ if (line_size == 0)
+ line_size = 1;
+
+ if (line_size_c == 0)
+ line_size_c = 1;
+
+ memory_line_size_y = (line_size + 5) / 6; /* +5 to ceil */
+ memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */
+ memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */
+
+ if (lb_config == LB_MEMORY_CONFIG_1) {
+ lb_memory_size = 970;
+ lb_memory_size_c = 970;
+ lb_memory_size_a = 970;
+ } else if (lb_config == LB_MEMORY_CONFIG_2) {
+ lb_memory_size = 1290;
+ lb_memory_size_c = 1290;
+ lb_memory_size_a = 1290;
+ } else if (lb_config == LB_MEMORY_CONFIG_3) {
+ if (scl_data->viewport.width == scl_data->h_active &&
+ scl_data->viewport.height == scl_data->v_active) {
+ /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */
+ /* use increased LB size for calculation only if Scaler not enabled */
+ lb_memory_size = 970 + 1290 + 1170 + 1170 + 1170;
+ lb_memory_size_c = 970 + 1290;
+ lb_memory_size_a = 970 + 1290 + 1170;
+ } else {
+ /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */
+ lb_memory_size = 970 + 1290 + 484 + 484 + 484;
+ lb_memory_size_c = 970 + 1290;
+ lb_memory_size_a = 970 + 1290 + 484;
+ }
+ } else {
+ if (scl_data->viewport.width == scl_data->h_active &&
+ scl_data->viewport.height == scl_data->v_active) {
+ /* use increased LB size for calculation only if Scaler not enabled */
+ lb_memory_size = 970 + 1290 + 1170;
+ lb_memory_size_c = 970 + 1290 + 1170;
+ lb_memory_size_a = 970 + 1290 + 1170;
+ } else {
+ lb_memory_size = 970 + 1290 + 484;
+ lb_memory_size_c = 970 + 1290 + 484;
+ lb_memory_size_a = 970 + 1290 + 484;
+ }
+ }
+ *num_part_y = lb_memory_size / memory_line_size_y;
+ *num_part_c = lb_memory_size_c / memory_line_size_c;
+ num_partitions_a = lb_memory_size_a / memory_line_size_a;
+
+ if (scl_data->lb_params.alpha_en
+ && (num_partitions_a < *num_part_y))
+ *num_part_y = num_partitions_a;
+
+ if (*num_part_y > 64)
+ *num_part_y = 64;
+ if (*num_part_c > 64)
+ *num_part_c = 64;
+}
+
+/* Compute the maximum number of lines that we can fit in the line buffer */
+void dscl401_spl_calc_lb_num_partitions(
+ bool alpha_en,
+ const struct spl_scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c)
+{
+ int memory_line_size_y, memory_line_size_c, memory_line_size_a,
+ lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a;
+
+ int line_size = scl_data->viewport.width < scl_data->recout.width ?
+ scl_data->viewport.width : scl_data->recout.width;
+ int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ?
+ scl_data->viewport_c.width : scl_data->recout.width;
+
+ if (line_size == 0)
+ line_size = 1;
+
+ if (line_size_c == 0)
+ line_size_c = 1;
+
+ memory_line_size_y = (line_size + 5) / 6; /* +5 to ceil */
+ memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */
+ memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */
+
+ if (lb_config == LB_MEMORY_CONFIG_1) {
+ lb_memory_size = 970;
+ lb_memory_size_c = 970;
+ lb_memory_size_a = 970;
+ } else if (lb_config == LB_MEMORY_CONFIG_2) {
+ lb_memory_size = 1290;
+ lb_memory_size_c = 1290;
+ lb_memory_size_a = 1290;
+ } else if (lb_config == LB_MEMORY_CONFIG_3) {
+ if (scl_data->viewport.width == scl_data->h_active &&
+ scl_data->viewport.height == scl_data->v_active) {
+ /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */
+ /* use increased LB size for calculation only if Scaler not enabled */
+ lb_memory_size = 970 + 1290 + 1170 + 1170 + 1170;
+ lb_memory_size_c = 970 + 1290;
+ lb_memory_size_a = 970 + 1290 + 1170;
+ } else {
+ /* 420 mode: luma using all 3 mem from Y, plus 3rd mem from Cr and Cb */
+ lb_memory_size = 970 + 1290 + 484 + 484 + 484;
+ lb_memory_size_c = 970 + 1290;
+ lb_memory_size_a = 970 + 1290 + 484;
+ }
+ } else {
+ if (scl_data->viewport.width == scl_data->h_active &&
+ scl_data->viewport.height == scl_data->v_active) {
+ /* use increased LB size for calculation only if Scaler not enabled */
+ lb_memory_size = 970 + 1290 + 1170;
+ lb_memory_size_c = 970 + 1290 + 1170;
+ lb_memory_size_a = 970 + 1290 + 1170;
+ } else {
+ lb_memory_size = 970 + 1290 + 484;
+ lb_memory_size_c = 970 + 1290 + 484;
+ lb_memory_size_a = 970 + 1290 + 484;
+ }
+ }
+ *num_part_y = lb_memory_size / memory_line_size_y;
+ *num_part_c = lb_memory_size_c / memory_line_size_c;
+ num_partitions_a = lb_memory_size_a / memory_line_size_a;
+
+ if (alpha_en && (num_partitions_a < *num_part_y))
+ *num_part_y = num_partitions_a;
+
+ if (*num_part_y > 64)
+ *num_part_y = 64;
+ if (*num_part_c > 64)
+ *num_part_c = 64;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h
new file mode 100644
index 000000000000..5f6b431ec398
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h
@@ -0,0 +1,740 @@
+/* Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN401_DPP_H__
+#define __DCN401_DPP_H__
+
+#include "dcn20/dcn20_dpp.h"
+#include "dcn30/dcn30_dpp.h"
+#include "dcn32/dcn32_dpp.h"
+
+#define TO_DCN401_DPP(dpp)\
+ container_of(dpp, struct dcn401_dpp, base)
+
+#define DPP_REG_LIST_SH_MASK_DCN401_COMMON(mask_sh)\
+ TF_SF(CM0_CM_MEM_PWR_STATUS, GAMCOR_MEM_PWR_STATE, mask_sh),\
+ TF_SF(CM0_CM_DEALPHA, CM_DEALPHA_EN, mask_sh),\
+ TF_SF(CM0_CM_DEALPHA, CM_DEALPHA_ABLND, mask_sh),\
+ TF_SF(CM0_CM_BIAS_CR_R, CM_BIAS_CR_R, mask_sh),\
+ TF_SF(CM0_CM_BIAS_Y_G_CB_B, CM_BIAS_Y_G, mask_sh),\
+ TF_SF(CM0_CM_BIAS_Y_G_CB_B, CM_BIAS_CB_B, mask_sh),\
+ TF_SF(CM0_CM_MEM_PWR_CTRL, GAMCOR_MEM_PWR_DIS, mask_sh),\
+ TF_SF(CM0_CM_MEM_PWR_CTRL, GAMCOR_MEM_PWR_FORCE, mask_sh),\
+ TF_SF(CNVC_CFG0_PRE_DEGAM, PRE_DEGAM_MODE, mask_sh),\
+ TF_SF(CNVC_CFG0_PRE_DEGAM, PRE_DEGAM_SELECT, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_MODE, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_PWL_DISABLE, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_LUT_INDEX, CM_GAMCOR_LUT_INDEX, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_LUT_DATA, CM_GAMCOR_LUT_DATA, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_WRITE_COLOR_MASK, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_READ_COLOR_SEL, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_READ_DBG, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_HOST_SEL, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_LUT_CONTROL, CM_GAMCOR_LUT_CONFIG_MODE, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_START_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_START_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_SEGMENT_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_START_SLOPE_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_SLOPE_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_START_BASE_CNTL_B, CM_GAMCOR_RAMA_EXP_REGION_START_BASE_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_END_CNTL1_B, CM_GAMCOR_RAMA_EXP_REGION_END_BASE_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_END_CNTL2_B, CM_GAMCOR_RAMA_EXP_REGION_END_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_END_CNTL2_B, CM_GAMCOR_RAMA_EXP_REGION_END_SLOPE_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_OFFSET_B, CM_GAMCOR_RAMA_OFFSET_B, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION0_LUT_OFFSET, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION0_NUM_SEGMENTS, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION1_LUT_OFFSET, mask_sh),\
+ TF_SF(CM0_CM_GAMCOR_RAMA_REGION_0_1, CM_GAMCOR_RAMA_EXP_REGION1_NUM_SEGMENTS, mask_sh),\
+ TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_LEFT, mask_sh),\
+ TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_RIGHT, mask_sh),\
+ TF_SF(DSCL0_DSCL_EXT_OVERSCAN_TOP_BOTTOM, EXT_OVERSCAN_BOTTOM, mask_sh),\
+ TF_SF(DSCL0_DSCL_EXT_OVERSCAN_TOP_BOTTOM, EXT_OVERSCAN_TOP, mask_sh),\
+ TF_SF(DSCL0_OTG_H_BLANK, OTG_H_BLANK_START, mask_sh),\
+ TF_SF(DSCL0_OTG_H_BLANK, OTG_H_BLANK_END, mask_sh),\
+ TF_SF(DSCL0_OTG_V_BLANK, OTG_V_BLANK_START, mask_sh),\
+ TF_SF(DSCL0_OTG_V_BLANK, OTG_V_BLANK_END, mask_sh),\
+ TF_SF(DSCL0_LB_DATA_FORMAT, INTERLEAVE_EN, mask_sh),\
+ TF2_SF(DSCL0, LB_DATA_FORMAT__ALPHA_EN, mask_sh),\
+ TF_SF(DSCL0_LB_MEMORY_CTRL, MEMORY_CONFIG, mask_sh),\
+ TF_SF(DSCL0_LB_MEMORY_CTRL, LB_MAX_PARTITIONS, mask_sh),\
+ TF_SF(DSCL0_DSCL_AUTOCAL, AUTOCAL_MODE, mask_sh),\
+ TF_SF(DSCL0_DSCL_AUTOCAL, AUTOCAL_NUM_PIPE, mask_sh),\
+ TF_SF(DSCL0_DSCL_CONTROL, SCL_BOUNDARY_MODE, mask_sh),\
+ TF_SF(DSCL0_DSCL_AUTOCAL, AUTOCAL_PIPE_ID, mask_sh),\
+ TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_V_NUM_TAPS, mask_sh),\
+ TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_H_NUM_TAPS, mask_sh),\
+ TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_V_NUM_TAPS_C, mask_sh),\
+ TF_SF(DSCL0_SCL_TAP_CONTROL, SCL_H_NUM_TAPS_C, mask_sh),\
+ TF_SF(DSCL0_SCL_COEF_RAM_TAP_SELECT, SCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\
+ TF_SF(DSCL0_SCL_COEF_RAM_TAP_SELECT, SCL_COEF_RAM_PHASE, mask_sh),\
+ TF_SF(DSCL0_SCL_COEF_RAM_TAP_SELECT, SCL_COEF_RAM_FILTER_TYPE, mask_sh),\
+ TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\
+ TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\
+ TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\
+ TF_SF(DSCL0_SCL_COEF_RAM_TAP_DATA, SCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_H_2TAP_HARDCODE_COEF_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_H_2TAP_SHARP_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_H_2TAP_SHARP_FACTOR, mask_sh),\
+ TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_V_2TAP_HARDCODE_COEF_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_V_2TAP_SHARP_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_2TAP_CONTROL, SCL_V_2TAP_SHARP_FACTOR, mask_sh),\
+ TF_SF(DSCL0_SCL_MODE, SCL_COEF_RAM_SELECT, mask_sh),\
+ TF_SF(DSCL0_SCL_MODE, DSCL_MODE, mask_sh),\
+ TF_SF(DSCL0_RECOUT_START, RECOUT_START_X, mask_sh),\
+ TF_SF(DSCL0_RECOUT_START, RECOUT_START_Y, mask_sh),\
+ TF_SF(DSCL0_RECOUT_SIZE, RECOUT_WIDTH, mask_sh),\
+ TF_SF(DSCL0_RECOUT_SIZE, RECOUT_HEIGHT, mask_sh),\
+ TF_SF(DSCL0_MPC_SIZE, MPC_WIDTH, mask_sh),\
+ TF_SF(DSCL0_MPC_SIZE, MPC_HEIGHT, mask_sh),\
+ TF_SF(DSCL0_SCL_HORZ_FILTER_SCALE_RATIO, SCL_H_SCALE_RATIO, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_SCALE_RATIO, SCL_V_SCALE_RATIO, mask_sh),\
+ TF_SF(DSCL0_SCL_HORZ_FILTER_SCALE_RATIO_C, SCL_H_SCALE_RATIO_C, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_SCALE_RATIO_C, SCL_V_SCALE_RATIO_C, mask_sh),\
+ TF_SF(DSCL0_SCL_HORZ_FILTER_INIT, SCL_H_INIT_FRAC, mask_sh),\
+ TF_SF(DSCL0_SCL_HORZ_FILTER_INIT, SCL_H_INIT_INT, mask_sh),\
+ TF_SF(DSCL0_SCL_HORZ_FILTER_INIT_C, SCL_H_INIT_FRAC_C, mask_sh),\
+ TF_SF(DSCL0_SCL_HORZ_FILTER_INIT_C, SCL_H_INIT_INT_C, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT, SCL_V_INIT_FRAC, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT, SCL_V_INIT_INT, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT_C, SCL_V_INIT_FRAC_C, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT_C, SCL_V_INIT_INT_C, mask_sh),\
+ TF_SF(DSCL0_SCL_MODE, SCL_CHROMA_COEF_MODE, mask_sh),\
+ TF_SF(DSCL0_SCL_MODE, SCL_COEF_RAM_SELECT_CURRENT, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_DEALPHA, PRE_DEALPHA_EN, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_DEALPHA, PRE_DEALPHA_ABLND_EN, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_REALPHA, PRE_REALPHA_EN, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_REALPHA, PRE_REALPHA_ABLND_EN, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_CSC_MODE, PRE_CSC_MODE, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_CSC_MODE, PRE_CSC_MODE_CURRENT, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_CSC_C11_C12, PRE_CSC_C11, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_CSC_C11_C12, PRE_CSC_C12, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_CSC_C33_C34, PRE_CSC_C33, mask_sh), \
+ TF_SF(CNVC_CFG0_PRE_CSC_C33_C34, PRE_CSC_C34, mask_sh), \
+ TF_SF(CM0_CM_POST_CSC_CONTROL, CM_POST_CSC_MODE, mask_sh), \
+ TF_SF(CM0_CM_POST_CSC_CONTROL, CM_POST_CSC_MODE_CURRENT, mask_sh), \
+ TF_SF(CM0_CM_POST_CSC_C11_C12, CM_POST_CSC_C11, mask_sh), \
+ TF_SF(CM0_CM_POST_CSC_C11_C12, CM_POST_CSC_C12, mask_sh), \
+ TF_SF(CM0_CM_POST_CSC_C33_C34, CM_POST_CSC_C33, mask_sh), \
+ TF_SF(CM0_CM_POST_CSC_C33_C34, CM_POST_CSC_C34, mask_sh), \
+ TF_SF(CM0_CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_INDEX, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS, mask_sh), \
+ TF2_SF(CNVC_CFG0, FORMAT_CONTROL__ALPHA_EN, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_EXPANSION_MODE, mask_sh), \
+ TF_SF(CNVC_CFG0_CNVC_SURFACE_PIXEL_FORMAT, CNVC_SURFACE_PIXEL_FORMAT, mask_sh), \
+ TF_SF(CNVC_CFG0_CNVC_SURFACE_PIXEL_FORMAT, CNVC_ALPHA_PLANE_ENABLE, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_MODE, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_EXPANSION_MODE, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_ENABLE, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_COLOR0, CUR0_COLOR0, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_COLOR1, CUR0_COLOR1, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_BIAS_G_Y, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_SCALE_G_Y, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_BIAS_RB_CRCB, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_SCALE_RB_CRCB, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_MODE, CUR0_MATRIX_MODE, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_MODE, CUR0_MATRIX_MODE_CURRENT, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_MODE, CUR0_MATRIX_COEF_FORMAT, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C11_C12_A, CUR0_MATRIX_C11_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C11_C12_A, CUR0_MATRIX_C12_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C13_C14_A, CUR0_MATRIX_C13_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C13_C14_A, CUR0_MATRIX_C14_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C21_C22_A, CUR0_MATRIX_C21_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C21_C22_A, CUR0_MATRIX_C22_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C23_C24_A, CUR0_MATRIX_C23_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C23_C24_A, CUR0_MATRIX_C24_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C31_C32_A, CUR0_MATRIX_C31_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C31_C32_A, CUR0_MATRIX_C32_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C33_C34_A, CUR0_MATRIX_C33_A, mask_sh), \
+ TF_SF(CM_CUR0_CUR0_MATRIX_C33_C34_A, CUR0_MATRIX_C34_A, mask_sh), \
+ TF_SF(DPP_TOP0_DPP_CONTROL, DPP_CLOCK_ENABLE, mask_sh), \
+ TF_SF(CM0_CM_HDR_MULT_COEF, CM_HDR_MULT_COEF, mask_sh), \
+ TF_SF(CM0_CM_CONTROL, CM_BYPASS, mask_sh), \
+ TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_MODE, mask_sh), \
+ TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_PITCH, mask_sh), \
+ TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_LINES_PER_CHUNK, mask_sh), \
+ TF_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CNV16, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS_MSB_ALIGN, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, CLAMP_POSITIVE, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, CLAMP_POSITIVE_C, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CROSSBAR_R, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CROSSBAR_G, mask_sh), \
+ TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_CROSSBAR_B, mask_sh), \
+ TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, mask_sh), \
+ TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, mask_sh), \
+ TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT2, mask_sh), \
+ TF_SF(CNVC_CFG0_ALPHA_2BIT_LUT, ALPHA_2BIT_LUT3, mask_sh), \
+ TF_SF(CNVC_CFG0_FCNV_FP_BIAS_R, FCNV_FP_BIAS_R, mask_sh), \
+ TF_SF(CNVC_CFG0_FCNV_FP_BIAS_G, FCNV_FP_BIAS_G, mask_sh), \
+ TF_SF(CNVC_CFG0_FCNV_FP_BIAS_B, FCNV_FP_BIAS_B, mask_sh), \
+ TF_SF(CNVC_CFG0_FCNV_FP_SCALE_R, FCNV_FP_SCALE_R, mask_sh), \
+ TF_SF(CNVC_CFG0_FCNV_FP_SCALE_G, FCNV_FP_SCALE_G, mask_sh), \
+ TF_SF(CNVC_CFG0_FCNV_FP_SCALE_B, FCNV_FP_SCALE_B, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_CONTROL, COLOR_KEYER_EN, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_CONTROL, LUMA_KEYER_EN, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_CONTROL, COLOR_KEYER_MODE, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_ALPHA, COLOR_KEYER_ALPHA_LOW, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_ALPHA, COLOR_KEYER_ALPHA_HIGH, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_RED, COLOR_KEYER_RED_LOW, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_RED, COLOR_KEYER_RED_HIGH, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_GREEN, COLOR_KEYER_GREEN_LOW, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_GREEN, COLOR_KEYER_GREEN_HIGH, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_BLUE, COLOR_KEYER_BLUE_LOW, mask_sh), \
+ TF_SF(CNVC_CFG0_COLOR_KEYER_BLUE, COLOR_KEYER_BLUE_HIGH, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_PIX_INV_MODE, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_PIXEL_ALPHA_MOD_EN, mask_sh), \
+ TF_SF(CM_CUR0_CURSOR0_CONTROL, CUR0_ROM_EN, mask_sh),\
+ TF_SF(DSCL0_OBUF_MEM_PWR_CTRL, OBUF_MEM_PWR_FORCE, mask_sh),\
+ TF_SF(DSCL0_DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, mask_sh),\
+ TF_SF(DSCL0_DSCL_MEM_PWR_STATUS, LUT_MEM_PWR_STATE, mask_sh),\
+ TF_SF(DSCL0_DSCL_SC_MODE, SCL_SC_MATRIX_MODE, mask_sh),\
+ TF_SF(DSCL0_DSCL_SC_MODE, SCL_SC_LTONL_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_MODE, SCL_EASF_H_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_MODE, SCL_EASF_H_RINGEST_FORCE_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_MODE, SCL_EASF_H_2TAP_SHARP_FACTOR, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF1_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_MODE, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF3_MODE, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_FLAT1_GAIN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_FLAT2_GAIN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_CNTL, SCL_EASF_H_BF2_ROC_GAIN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_GAIN, SCL_EASF_H_RINGEST_EVENTAP_GAIN1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_RINGEST_EVENTAP_GAIN, SCL_EASF_H_RINGEST_EVENTAP_GAIN2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MAXA, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MAXB, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MINA, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF_FINAL_MAX_MIN, SCL_EASF_H_BF_MINB, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG0, SCL_EASF_H_BF1_PWL_IN_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG0, SCL_EASF_H_BF1_PWL_BASE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG0, SCL_EASF_H_BF1_PWL_SLOPE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG1, SCL_EASF_H_BF1_PWL_IN_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG1, SCL_EASF_H_BF1_PWL_BASE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG1, SCL_EASF_H_BF1_PWL_SLOPE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG2, SCL_EASF_H_BF1_PWL_IN_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG2, SCL_EASF_H_BF1_PWL_BASE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG2, SCL_EASF_H_BF1_PWL_SLOPE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG3, SCL_EASF_H_BF1_PWL_IN_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG3, SCL_EASF_H_BF1_PWL_BASE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG3, SCL_EASF_H_BF1_PWL_SLOPE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG4, SCL_EASF_H_BF1_PWL_IN_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG4, SCL_EASF_H_BF1_PWL_BASE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG4, SCL_EASF_H_BF1_PWL_SLOPE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG5, SCL_EASF_H_BF1_PWL_IN_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG5, SCL_EASF_H_BF1_PWL_BASE_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG5, SCL_EASF_H_BF1_PWL_SLOPE_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG6, SCL_EASF_H_BF1_PWL_IN_SEG6, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG6, SCL_EASF_H_BF1_PWL_BASE_SEG6, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG6, SCL_EASF_H_BF1_PWL_SLOPE_SEG6, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG7, SCL_EASF_H_BF1_PWL_IN_SEG7, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF1_PWL_SEG7, SCL_EASF_H_BF1_PWL_BASE_SEG7, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG0, SCL_EASF_H_BF3_PWL_IN_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG0, SCL_EASF_H_BF3_PWL_BASE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG0, SCL_EASF_H_BF3_PWL_SLOPE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG1, SCL_EASF_H_BF3_PWL_IN_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG1, SCL_EASF_H_BF3_PWL_BASE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG1, SCL_EASF_H_BF3_PWL_SLOPE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG2, SCL_EASF_H_BF3_PWL_IN_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG2, SCL_EASF_H_BF3_PWL_BASE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG2, SCL_EASF_H_BF3_PWL_SLOPE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG3, SCL_EASF_H_BF3_PWL_IN_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG3, SCL_EASF_H_BF3_PWL_BASE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG3, SCL_EASF_H_BF3_PWL_SLOPE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG4, SCL_EASF_H_BF3_PWL_IN_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG4, SCL_EASF_H_BF3_PWL_BASE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG4, SCL_EASF_H_BF3_PWL_SLOPE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG5, SCL_EASF_H_BF3_PWL_IN_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_H_BF3_PWL_SEG5, SCL_EASF_H_BF3_PWL_BASE_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_MODE, SCL_EASF_V_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_MODE, SCL_EASF_V_RINGEST_FORCE_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_MODE, SCL_EASF_V_2TAP_SHARP_FACTOR, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF1_EN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_MODE, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF3_MODE, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_FLAT1_GAIN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_FLAT2_GAIN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_CNTL, SCL_EASF_V_BF2_ROC_GAIN, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL1, SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL1, SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL2, SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL2, SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL3, SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_3TAP_CNTL3, SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_GAIN, SCL_EASF_V_RINGEST_EVENTAP_GAIN1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_RINGEST_EVENTAP_GAIN, SCL_EASF_V_RINGEST_EVENTAP_GAIN2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MAXA, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MAXB, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MINA, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF_FINAL_MAX_MIN, SCL_EASF_V_BF_MINB, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG0, SCL_EASF_V_BF1_PWL_IN_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG0, SCL_EASF_V_BF1_PWL_BASE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG0, SCL_EASF_V_BF1_PWL_SLOPE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG1, SCL_EASF_V_BF1_PWL_IN_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG1, SCL_EASF_V_BF1_PWL_BASE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG1, SCL_EASF_V_BF1_PWL_SLOPE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG2, SCL_EASF_V_BF1_PWL_IN_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG2, SCL_EASF_V_BF1_PWL_BASE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG2, SCL_EASF_V_BF1_PWL_SLOPE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG3, SCL_EASF_V_BF1_PWL_IN_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG3, SCL_EASF_V_BF1_PWL_BASE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG3, SCL_EASF_V_BF1_PWL_SLOPE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG4, SCL_EASF_V_BF1_PWL_IN_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG4, SCL_EASF_V_BF1_PWL_BASE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG4, SCL_EASF_V_BF1_PWL_SLOPE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG5, SCL_EASF_V_BF1_PWL_IN_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG5, SCL_EASF_V_BF1_PWL_BASE_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG5, SCL_EASF_V_BF1_PWL_SLOPE_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG6, SCL_EASF_V_BF1_PWL_IN_SEG6, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG6, SCL_EASF_V_BF1_PWL_BASE_SEG6, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG6, SCL_EASF_V_BF1_PWL_SLOPE_SEG6, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG7, SCL_EASF_V_BF1_PWL_IN_SEG7, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF1_PWL_SEG7, SCL_EASF_V_BF1_PWL_BASE_SEG7, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG0, SCL_EASF_V_BF3_PWL_IN_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG0, SCL_EASF_V_BF3_PWL_BASE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG0, SCL_EASF_V_BF3_PWL_SLOPE_SEG0, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG1, SCL_EASF_V_BF3_PWL_IN_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG1, SCL_EASF_V_BF3_PWL_BASE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG1, SCL_EASF_V_BF3_PWL_SLOPE_SEG1, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG2, SCL_EASF_V_BF3_PWL_IN_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG2, SCL_EASF_V_BF3_PWL_BASE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG2, SCL_EASF_V_BF3_PWL_SLOPE_SEG2, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG3, SCL_EASF_V_BF3_PWL_IN_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG3, SCL_EASF_V_BF3_PWL_BASE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG3, SCL_EASF_V_BF3_PWL_SLOPE_SEG3, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG4, SCL_EASF_V_BF3_PWL_IN_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG4, SCL_EASF_V_BF3_PWL_BASE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG4, SCL_EASF_V_BF3_PWL_SLOPE_SEG4, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG5, SCL_EASF_V_BF3_PWL_IN_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_EASF_V_BF3_PWL_SEG5, SCL_EASF_V_BF3_PWL_BASE_SEG5, mask_sh),\
+ TF_SF(DSCL0_DSCL_SC_MATRIX_C0C1, SCL_SC_MATRIX_C0, mask_sh),\
+ TF_SF(DSCL0_DSCL_SC_MATRIX_C0C1, SCL_SC_MATRIX_C1, mask_sh),\
+ TF_SF(DSCL0_DSCL_SC_MATRIX_C2C3, SCL_SC_MATRIX_C2, mask_sh),\
+ TF_SF(DSCL0_DSCL_SC_MATRIX_C2C3, SCL_SC_MATRIX_C3, mask_sh),\
+ TF_SF(DSCL0_ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, mask_sh),\
+ TF_SF(DSCL0_ISHARP_DELTA_DATA, ISHARP_DELTA_DATA, mask_sh),\
+ TF_SF(DSCL0_ISHARP_DELTA_INDEX, ISHARP_DELTA_INDEX, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_EN, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_NOISEDET_EN, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_NOISEDET_MODE, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_LBA_MODE, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_DELTA_LUT_SELECT, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_FMT_MODE, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_FMT_NORM, mask_sh),\
+ TF_SF(DSCL0_ISHARP_MODE, ISHARP_DELTA_LUT_SELECT_CURRENT, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_IN_SEG0, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_BASE_SEG0, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG0, ISHARP_LBA_PWL_SLOPE_SEG0, mask_sh), \
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_IN_SEG1, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_BASE_SEG1, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG1, ISHARP_LBA_PWL_SLOPE_SEG1, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_IN_SEG2, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_BASE_SEG2, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG2, ISHARP_LBA_PWL_SLOPE_SEG2, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_IN_SEG3, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_BASE_SEG3, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG3, ISHARP_LBA_PWL_SLOPE_SEG3, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_IN_SEG4, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_BASE_SEG4, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG4, ISHARP_LBA_PWL_SLOPE_SEG4, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG5, ISHARP_LBA_PWL_IN_SEG5, mask_sh),\
+ TF_SF(DSCL0_ISHARP_LBA_PWL_SEG5, ISHARP_LBA_PWL_BASE_SEG5, mask_sh),\
+ TF_SF(DSCL0_ISHARP_NOISEDET_THRESHOLD, ISHARP_NOISEDET_UTHRE, mask_sh),\
+ TF_SF(DSCL0_ISHARP_NOISEDET_THRESHOLD, ISHARP_NOISEDET_DTHRE, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_START_IN, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_END_IN, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NOISE_GAIN_PWL, ISHARP_NOISEDET_PWL_SLOPE, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_EN_P, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_PIVOT_P, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_SLOPE_P, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_EN_N, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_PIVOT_N, mask_sh), \
+ TF_SF(DSCL0_ISHARP_NLDELTA_SOFT_CLIP, ISHARP_NLDELTA_SCLIP_SLOPE_N, mask_sh), \
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT_BOT, SCL_V_INIT_FRAC_BOT, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT_BOT, SCL_V_INIT_INT_BOT, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT_BOT_C, SCL_V_INIT_FRAC_BOT_C, mask_sh),\
+ TF_SF(DSCL0_SCL_VERT_FILTER_INIT_BOT_C, SCL_V_INIT_INT_BOT_C, mask_sh)
+
+#define DPP_REG_FIELD_LIST_DCN401(type) \
+ DPP_REG_FIELD_LIST_DCN3(type); \
+ type CUR0_FP_BIAS_G_Y; \
+ type CUR0_FP_SCALE_G_Y; \
+ type CUR0_FP_BIAS_RB_CRCB; \
+ type CUR0_FP_SCALE_RB_CRCB; \
+ type CUR0_MATRIX_MODE; \
+ type CUR0_MATRIX_MODE_CURRENT; \
+ type CUR0_MATRIX_COEF_FORMAT; \
+ type CUR0_MATRIX_C11_A; \
+ type CUR0_MATRIX_C12_A; \
+ type CUR0_MATRIX_C13_A; \
+ type CUR0_MATRIX_C14_A; \
+ type CUR0_MATRIX_C21_A; \
+ type CUR0_MATRIX_C22_A; \
+ type CUR0_MATRIX_C23_A; \
+ type CUR0_MATRIX_C24_A; \
+ type CUR0_MATRIX_C31_A; \
+ type CUR0_MATRIX_C32_A; \
+ type CUR0_MATRIX_C33_A; \
+ type CUR0_MATRIX_C34_A; \
+ type LUMA_KEYER_EN; \
+ type SCL_SC_MATRIX_MODE; \
+ type SCL_SC_LTONL_EN; \
+ type SCL_EASF_H_EN; \
+ type SCL_EASF_H_RINGEST_FORCE_EN; \
+ type SCL_EASF_H_2TAP_SHARP_FACTOR; \
+ type SCL_EASF_H_BF1_EN; \
+ type SCL_EASF_H_BF2_MODE; \
+ type SCL_EASF_H_BF3_MODE; \
+ type SCL_EASF_H_BF2_FLAT1_GAIN; \
+ type SCL_EASF_H_BF2_FLAT2_GAIN; \
+ type SCL_EASF_H_BF2_ROC_GAIN; \
+ type SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1; \
+ type SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2; \
+ type SCL_EASF_H_RINGEST_EVENTAP_GAIN1; \
+ type SCL_EASF_H_RINGEST_EVENTAP_GAIN2; \
+ type SCL_EASF_H_BF_MAXA; \
+ type SCL_EASF_H_BF_MAXB; \
+ type SCL_EASF_H_BF_MINA; \
+ type SCL_EASF_H_BF_MINB; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG0; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG0; \
+ type SCL_EASF_H_BF1_PWL_SLOPE_SEG0; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG1; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG1; \
+ type SCL_EASF_H_BF1_PWL_SLOPE_SEG1; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG2; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG2; \
+ type SCL_EASF_H_BF1_PWL_SLOPE_SEG2; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG3; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG3; \
+ type SCL_EASF_H_BF1_PWL_SLOPE_SEG3; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG4; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG4; \
+ type SCL_EASF_H_BF1_PWL_SLOPE_SEG4; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG5; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG5; \
+ type SCL_EASF_H_BF1_PWL_SLOPE_SEG5; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG6; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG6; \
+ type SCL_EASF_H_BF1_PWL_SLOPE_SEG6; \
+ type SCL_EASF_H_BF1_PWL_IN_SEG7; \
+ type SCL_EASF_H_BF1_PWL_BASE_SEG7; \
+ type SCL_EASF_H_BF3_PWL_IN_SEG0; \
+ type SCL_EASF_H_BF3_PWL_BASE_SEG0; \
+ type SCL_EASF_H_BF3_PWL_SLOPE_SEG0; \
+ type SCL_EASF_H_BF3_PWL_IN_SEG1; \
+ type SCL_EASF_H_BF3_PWL_BASE_SEG1; \
+ type SCL_EASF_H_BF3_PWL_SLOPE_SEG1; \
+ type SCL_EASF_H_BF3_PWL_IN_SEG2; \
+ type SCL_EASF_H_BF3_PWL_BASE_SEG2; \
+ type SCL_EASF_H_BF3_PWL_SLOPE_SEG2; \
+ type SCL_EASF_H_BF3_PWL_IN_SEG3; \
+ type SCL_EASF_H_BF3_PWL_BASE_SEG3; \
+ type SCL_EASF_H_BF3_PWL_SLOPE_SEG3; \
+ type SCL_EASF_H_BF3_PWL_IN_SEG4; \
+ type SCL_EASF_H_BF3_PWL_BASE_SEG4; \
+ type SCL_EASF_H_BF3_PWL_SLOPE_SEG4; \
+ type SCL_EASF_H_BF3_PWL_IN_SEG5; \
+ type SCL_EASF_H_BF3_PWL_BASE_SEG5; \
+ type SCL_EASF_V_EN; \
+ type SCL_EASF_V_RINGEST_FORCE_EN; \
+ type SCL_EASF_V_2TAP_SHARP_FACTOR; \
+ type SCL_EASF_V_BF1_EN; \
+ type SCL_EASF_V_BF2_MODE; \
+ type SCL_EASF_V_BF3_MODE; \
+ type SCL_EASF_V_BF2_FLAT1_GAIN; \
+ type SCL_EASF_V_BF2_FLAT2_GAIN; \
+ type SCL_EASF_V_BF2_ROC_GAIN; \
+ type SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT; \
+ type SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL; \
+ type SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE; \
+ type SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE; \
+ type SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE; \
+ type SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET; \
+ type SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1; \
+ type SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2; \
+ type SCL_EASF_V_RINGEST_EVENTAP_GAIN1; \
+ type SCL_EASF_V_RINGEST_EVENTAP_GAIN2; \
+ type SCL_EASF_V_BF_MAXA; \
+ type SCL_EASF_V_BF_MAXB; \
+ type SCL_EASF_V_BF_MINA; \
+ type SCL_EASF_V_BF_MINB; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG0; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG0; \
+ type SCL_EASF_V_BF1_PWL_SLOPE_SEG0; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG1; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG1; \
+ type SCL_EASF_V_BF1_PWL_SLOPE_SEG1; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG2; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG2; \
+ type SCL_EASF_V_BF1_PWL_SLOPE_SEG2; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG3; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG3; \
+ type SCL_EASF_V_BF1_PWL_SLOPE_SEG3; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG4; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG4; \
+ type SCL_EASF_V_BF1_PWL_SLOPE_SEG4; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG5; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG5; \
+ type SCL_EASF_V_BF1_PWL_SLOPE_SEG5; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG6; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG6; \
+ type SCL_EASF_V_BF1_PWL_SLOPE_SEG6; \
+ type SCL_EASF_V_BF1_PWL_IN_SEG7; \
+ type SCL_EASF_V_BF1_PWL_BASE_SEG7; \
+ type SCL_EASF_V_BF3_PWL_IN_SEG0; \
+ type SCL_EASF_V_BF3_PWL_BASE_SEG0; \
+ type SCL_EASF_V_BF3_PWL_SLOPE_SEG0; \
+ type SCL_EASF_V_BF3_PWL_IN_SEG1; \
+ type SCL_EASF_V_BF3_PWL_BASE_SEG1; \
+ type SCL_EASF_V_BF3_PWL_SLOPE_SEG1; \
+ type SCL_EASF_V_BF3_PWL_IN_SEG2; \
+ type SCL_EASF_V_BF3_PWL_BASE_SEG2; \
+ type SCL_EASF_V_BF3_PWL_SLOPE_SEG2; \
+ type SCL_EASF_V_BF3_PWL_IN_SEG3; \
+ type SCL_EASF_V_BF3_PWL_BASE_SEG3; \
+ type SCL_EASF_V_BF3_PWL_SLOPE_SEG3; \
+ type SCL_EASF_V_BF3_PWL_IN_SEG4; \
+ type SCL_EASF_V_BF3_PWL_BASE_SEG4; \
+ type SCL_EASF_V_BF3_PWL_SLOPE_SEG4; \
+ type SCL_EASF_V_BF3_PWL_IN_SEG5; \
+ type SCL_EASF_V_BF3_PWL_BASE_SEG5; \
+ type SCL_SC_MATRIX_C0; \
+ type SCL_SC_MATRIX_C1; \
+ type SCL_SC_MATRIX_C2; \
+ type SCL_SC_MATRIX_C3; \
+ type ISHARP_EN; \
+ type ISHARP_NOISEDET_EN; \
+ type ISHARP_NOISEDET_MODE; \
+ type ISHARP_NOISEDET_UTHRE; \
+ type ISHARP_NOISEDET_DTHRE; \
+ type ISHARP_NOISEDET_PWL_START_IN; \
+ type ISHARP_NOISEDET_PWL_END_IN; \
+ type ISHARP_NOISEDET_PWL_SLOPE; \
+ type ISHARP_LBA_MODE; \
+ type ISHARP_LBA_PWL_IN_SEG0; \
+ type ISHARP_LBA_PWL_BASE_SEG0; \
+ type ISHARP_LBA_PWL_SLOPE_SEG0; \
+ type ISHARP_LBA_PWL_IN_SEG1; \
+ type ISHARP_LBA_PWL_BASE_SEG1; \
+ type ISHARP_LBA_PWL_SLOPE_SEG1; \
+ type ISHARP_LBA_PWL_IN_SEG2; \
+ type ISHARP_LBA_PWL_BASE_SEG2; \
+ type ISHARP_LBA_PWL_SLOPE_SEG2; \
+ type ISHARP_LBA_PWL_IN_SEG3; \
+ type ISHARP_LBA_PWL_BASE_SEG3; \
+ type ISHARP_LBA_PWL_SLOPE_SEG3; \
+ type ISHARP_LBA_PWL_IN_SEG4; \
+ type ISHARP_LBA_PWL_BASE_SEG4; \
+ type ISHARP_LBA_PWL_SLOPE_SEG4; \
+ type ISHARP_LBA_PWL_IN_SEG5; \
+ type ISHARP_LBA_PWL_BASE_SEG5; \
+ type ISHARP_FMT_MODE; \
+ type ISHARP_FMT_NORM; \
+ type ISHARP_DELTA_LUT_SELECT; \
+ type ISHARP_DELTA_LUT_SELECT_CURRENT; \
+ type ISHARP_DELTA_LUT_HOST_SELECT; \
+ type ISHARP_DELTA_DATA; \
+ type ISHARP_DELTA_INDEX; \
+ type ISHARP_NLDELTA_SCLIP_EN_P; \
+ type ISHARP_NLDELTA_SCLIP_PIVOT_P; \
+ type ISHARP_NLDELTA_SCLIP_SLOPE_P; \
+ type ISHARP_NLDELTA_SCLIP_EN_N; \
+ type ISHARP_NLDELTA_SCLIP_PIVOT_N; \
+ type ISHARP_NLDELTA_SCLIP_SLOPE_N
+
+#define DPP_REG_VARIABLE_LIST_DCN401 \
+ DPP_DCN3_REG_VARIABLE_LIST_COMMON; \
+ uint32_t CURSOR0_FP_SCALE_BIAS_G_Y; \
+ uint32_t CURSOR0_FP_SCALE_BIAS_RB_CRCB; \
+ uint32_t CUR0_MATRIX_MODE; \
+ uint32_t CUR0_MATRIX_C11_C12_A; \
+ uint32_t CUR0_MATRIX_C13_C14_A; \
+ uint32_t CUR0_MATRIX_C21_C22_A; \
+ uint32_t CUR0_MATRIX_C23_C24_A; \
+ uint32_t CUR0_MATRIX_C31_C32_A; \
+ uint32_t CUR0_MATRIX_C33_C34_A; \
+ uint32_t CUR0_MATRIX_C11_C12_B; \
+ uint32_t CUR0_MATRIX_C13_C14_B; \
+ uint32_t CUR0_MATRIX_C21_C22_B; \
+ uint32_t CUR0_MATRIX_C23_C24_B; \
+ uint32_t CUR0_MATRIX_C31_C32_B; \
+ uint32_t CUR0_MATRIX_C33_C34_B; \
+ uint32_t DSCL_SC_MODE; \
+ uint32_t DSCL_EASF_H_MODE; \
+ uint32_t DSCL_EASF_H_BF_CNTL; \
+ uint32_t DSCL_EASF_H_RINGEST_EVENTAP_REDUCE; \
+ uint32_t DSCL_EASF_H_RINGEST_EVENTAP_GAIN; \
+ uint32_t DSCL_EASF_H_BF_FINAL_MAX_MIN; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG0; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG1; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG2; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG3; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG4; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG5; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG6; \
+ uint32_t DSCL_EASF_H_BF1_PWL_SEG7; \
+ uint32_t DSCL_EASF_H_BF3_PWL_SEG0; \
+ uint32_t DSCL_EASF_H_BF3_PWL_SEG1; \
+ uint32_t DSCL_EASF_H_BF3_PWL_SEG2; \
+ uint32_t DSCL_EASF_H_BF3_PWL_SEG3; \
+ uint32_t DSCL_EASF_H_BF3_PWL_SEG4; \
+ uint32_t DSCL_EASF_H_BF3_PWL_SEG5; \
+ uint32_t DSCL_EASF_V_MODE; \
+ uint32_t DSCL_EASF_V_BF_CNTL; \
+ uint32_t DSCL_EASF_V_RINGEST_3TAP_CNTL1; \
+ uint32_t DSCL_EASF_V_RINGEST_3TAP_CNTL2; \
+ uint32_t DSCL_EASF_V_RINGEST_3TAP_CNTL3; \
+ uint32_t DSCL_EASF_V_RINGEST_EVENTAP_REDUCE; \
+ uint32_t DSCL_EASF_V_RINGEST_EVENTAP_GAIN; \
+ uint32_t DSCL_EASF_V_BF_FINAL_MAX_MIN; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG0; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG1; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG2; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG3; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG4; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG5; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG6; \
+ uint32_t DSCL_EASF_V_BF1_PWL_SEG7; \
+ uint32_t DSCL_EASF_V_BF3_PWL_SEG0; \
+ uint32_t DSCL_EASF_V_BF3_PWL_SEG1; \
+ uint32_t DSCL_EASF_V_BF3_PWL_SEG2; \
+ uint32_t DSCL_EASF_V_BF3_PWL_SEG3; \
+ uint32_t DSCL_EASF_V_BF3_PWL_SEG4; \
+ uint32_t DSCL_EASF_V_BF3_PWL_SEG5; \
+ uint32_t DSCL_SC_MATRIX_C0C1; \
+ uint32_t DSCL_SC_MATRIX_C2C3; \
+ uint32_t ISHARP_MODE; \
+ uint32_t ISHARP_NOISEDET_THRESHOLD; \
+ uint32_t ISHARP_NOISE_GAIN_PWL; \
+ uint32_t ISHARP_LBA_PWL_SEG0; \
+ uint32_t ISHARP_LBA_PWL_SEG1; \
+ uint32_t ISHARP_LBA_PWL_SEG2; \
+ uint32_t ISHARP_LBA_PWL_SEG3; \
+ uint32_t ISHARP_LBA_PWL_SEG4; \
+ uint32_t ISHARP_LBA_PWL_SEG5; \
+ uint32_t ISHARP_DELTA_CTRL; \
+ uint32_t ISHARP_DELTA_DATA; \
+ uint32_t ISHARP_DELTA_INDEX; \
+ uint32_t ISHARP_NLDELTA_SOFT_CLIP
+
+struct dcn401_dpp_registers {
+ DPP_REG_VARIABLE_LIST_DCN401;
+};
+
+struct dcn401_dpp_shift {
+ DPP_REG_FIELD_LIST_DCN401(uint8_t);
+};
+
+struct dcn401_dpp_mask {
+ DPP_REG_FIELD_LIST_DCN401(uint32_t);
+};
+
+struct dcn401_dpp {
+ struct dpp base;
+
+ const struct dcn401_dpp_registers *tf_regs;
+ const struct dcn401_dpp_shift *tf_shift;
+ const struct dcn401_dpp_mask *tf_mask;
+
+ const uint16_t *filter_v;
+ const uint16_t *filter_h;
+ const uint16_t *filter_v_c;
+ const uint16_t *filter_h_c;
+ int lb_pixel_depth_supported;
+ int lb_memory_size;
+ int lb_bits_per_entry;
+ bool is_write_to_ram_a_safe;
+ struct scaler_data scl_data;
+ struct pwl_params pwl_data;
+};
+
+enum dcn401_dscl_mode_sel {
+ DCN401_DSCL_MODE_SCALING_444_BYPASS = 0,
+ DCN401_DSCL_MODE_SCALING_444_RGB_ENABLE = 1,
+ DCN401_DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2,
+ DCN401_DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3,
+ DCN401_DSCL_MODE_SCALING_420_LUMA_BYPASS = 4,
+ DCN401_DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
+ DCN401_DSCL_MODE_DSCL_BYPASS = 6
+};
+
+bool dpp401_construct(struct dcn401_dpp *dpp401,
+ struct dc_context *ctx,
+ uint32_t inst,
+ const struct dcn401_dpp_registers *tf_regs,
+ const struct dcn401_dpp_shift *tf_shift,
+ const struct dcn401_dpp_mask *tf_mask);
+
+void dpp401_dscl_set_scaler_manual_scale(
+ struct dpp *dpp_base,
+ const struct scaler_data *scl_data);
+
+void dpp401_dpp_setup(
+ struct dpp *dpp_base,
+ enum surface_pixel_format format,
+ enum expansion_mode mode,
+ struct dc_csc_transform input_csc_color_matrix,
+ enum dc_color_space input_color_space,
+ struct cnv_alpha_2bit_lut *alpha_2bit_lut);
+
+void dpp401_set_cursor_attributes(
+ struct dpp *dpp_base,
+ struct dc_cursor_attributes *cursor_attributes);
+
+void dpp401_set_cursor_position(
+ struct dpp *dpp_base,
+ const struct dc_cursor_position *pos,
+ const struct dc_cursor_mi_param *param,
+ uint32_t width,
+ uint32_t height);
+
+void dpp401_set_optional_cursor_attributes(
+ struct dpp *dpp_base,
+ struct dpp_cursor_attributes *attr);
+
+void dscl401_calc_lb_num_partitions(
+ const struct scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c);
+
+void dscl401_spl_calc_lb_num_partitions(
+ bool alpha_en,
+ const struct spl_scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c);
+
+void dpp401_read_state(struct dpp *dpp_base, struct dcn_dpp_state *s);
+
+void dpp401_set_cursor_matrix(
+ struct dpp *dpp_base,
+ enum dc_color_space color_space,
+ struct dc_csc_transform cursor_csc_color_matrix);
+
+#endif /* __DCN401_DPP_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
new file mode 100644
index 000000000000..7aab77b58869
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+
+#include "core_types.h"
+
+#include "reg_helper.h"
+#include "dcn401/dcn401_dpp.h"
+#include "basics/conversion.h"
+#include "dcn10/dcn10_cm_common.h"
+
+#define NUM_PHASES 64
+#define HORZ_MAX_TAPS 8
+#define VERT_MAX_TAPS 8
+
+#define BLACK_OFFSET_RGB_Y 0x0
+#define BLACK_OFFSET_CBCR 0x8000
+
+#define REG(reg)\
+ dpp->tf_regs->reg
+
+#define CTX \
+ dpp->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dpp->tf_shift->field_name, dpp->tf_mask->field_name
+
+#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+
+
+enum dcn401_coef_filter_type_sel {
+ SCL_COEF_LUMA_VERT_FILTER = 0,
+ SCL_COEF_LUMA_HORZ_FILTER = 1,
+ SCL_COEF_CHROMA_VERT_FILTER = 2,
+ SCL_COEF_CHROMA_HORZ_FILTER = 3,
+ SCL_COEF_SC_VERT_FILTER = 4,
+ SCL_COEF_SC_HORZ_FILTER = 5
+};
+
+enum dscl_autocal_mode {
+ AUTOCAL_MODE_OFF = 0,
+
+ /* Autocal calculate the scaling ratio and initial phase and the
+ * DSCL_MODE_SEL must be set to 1
+ */
+ AUTOCAL_MODE_AUTOSCALE = 1,
+ /* Autocal perform auto centering without replication and the
+ * DSCL_MODE_SEL must be set to 0
+ */
+ AUTOCAL_MODE_AUTOCENTER = 2,
+ /* Autocal perform auto centering and auto replication and the
+ * DSCL_MODE_SEL must be set to 0
+ */
+ AUTOCAL_MODE_AUTOREPLICATE = 3
+};
+
+enum dscl_mode_sel {
+ DSCL_MODE_SCALING_444_BYPASS = 0,
+ DSCL_MODE_SCALING_444_RGB_ENABLE = 1,
+ DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2,
+ DSCL_MODE_SCALING_YCBCR_ENABLE = 3,
+ DSCL_MODE_LUMA_SCALING_BYPASS = 4,
+ DSCL_MODE_CHROMA_SCALING_BYPASS = 5,
+ DSCL_MODE_DSCL_BYPASS = 6
+};
+
+void dpp401_set_cursor_attributes(
+ struct dpp *dpp_base,
+ struct dc_cursor_attributes *cursor_attributes)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+ enum dc_cursor_color_format color_format = cursor_attributes->color_format;
+ int cur_rom_en = 0;
+
+ if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
+ color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
+ if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
+ cur_rom_en = 1;
+ }
+ }
+
+ REG_UPDATE_3(CURSOR0_CONTROL,
+ CUR0_MODE, color_format,
+ CUR0_EXPANSION_MODE, 0,
+ CUR0_ROM_EN, cur_rom_en);
+
+ if (color_format == CURSOR_MODE_MONO) {
+ /* todo: clarify what to program these to */
+ REG_UPDATE(CURSOR0_COLOR0,
+ CUR0_COLOR0, 0x00000000);
+ REG_UPDATE(CURSOR0_COLOR1,
+ CUR0_COLOR1, 0xFFFFFFFF);
+ }
+
+ dpp_base->att.cur0_ctl.bits.expansion_mode = 0;
+ dpp_base->att.cur0_ctl.bits.cur0_rom_en = cur_rom_en;
+ dpp_base->att.cur0_ctl.bits.mode = color_format;
+}
+
+void dpp401_set_cursor_position(
+ struct dpp *dpp_base,
+ const struct dc_cursor_position *pos,
+ const struct dc_cursor_mi_param *param,
+ uint32_t width,
+ uint32_t height)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+ uint32_t cur_en = pos->enable ? 1 : 0;
+
+ if (dpp_base->pos.cur0_ctl.bits.cur0_enable != cur_en) {
+ REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, cur_en);
+
+ dpp_base->pos.cur0_ctl.bits.cur0_enable = cur_en;
+ }
+}
+
+void dpp401_set_optional_cursor_attributes(
+ struct dpp *dpp_base,
+ struct dpp_cursor_attributes *attr)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+
+ if (attr) {
+ REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_BIAS_G_Y, attr->bias);
+ REG_UPDATE(CURSOR0_FP_SCALE_BIAS_G_Y, CUR0_FP_SCALE_G_Y, attr->scale);
+ REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_BIAS_RB_CRCB, attr->bias);
+ REG_UPDATE(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CUR0_FP_SCALE_RB_CRCB, attr->scale);
+ }
+}
+
+/* Program Cursor matrix block in DPP CM */
+static void dpp401_program_cursor_csc(
+ struct dpp *dpp_base,
+ enum dc_color_space color_space,
+ const struct dpp_input_csc_matrix *tbl_entry)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+ uint32_t mode_select = 0;
+ struct color_matrices_reg cur_matrix_regs;
+ unsigned int i;
+ const uint16_t *regval = NULL;
+ int arr_size = sizeof(dpp_input_csc_matrix) / sizeof(struct dpp_input_csc_matrix);
+
+ if (color_space < COLOR_SPACE_YCBCR601) {
+ REG_SET(CUR0_MATRIX_MODE, 0, CUR0_MATRIX_MODE, CUR_MATRIX_BYPASS);
+ return;
+ }
+
+ /* If adjustments not provided use hardcoded table for color space conversion */
+ if (tbl_entry == NULL) {
+
+ for (i = 0; i < arr_size; i++)
+ if (dpp_input_csc_matrix[i].color_space == color_space) {
+ regval = dpp_input_csc_matrix[i].regval;
+ break;
+ }
+
+ if (regval == NULL) {
+ BREAK_TO_DEBUGGER();
+ REG_SET(CUR0_MATRIX_MODE, 0, CUR0_MATRIX_MODE, CUR_MATRIX_BYPASS);
+ return;
+ }
+ } else {
+ regval = tbl_entry->regval;
+ }
+
+ REG_GET(CUR0_MATRIX_MODE, CUR0_MATRIX_MODE_CURRENT, &mode_select);
+
+ //If current set in use not set A, then use set A, otherwise use set B
+ if (mode_select != CUR_MATRIX_SET_A)
+ mode_select = CUR_MATRIX_SET_A;
+ else
+ mode_select = CUR_MATRIX_SET_B;
+
+ cur_matrix_regs.shifts.csc_c11 = dpp->tf_shift->CUR0_MATRIX_C11_A;
+ cur_matrix_regs.masks.csc_c11 = dpp->tf_mask->CUR0_MATRIX_C11_A;
+ cur_matrix_regs.shifts.csc_c12 = dpp->tf_shift->CUR0_MATRIX_C12_A;
+ cur_matrix_regs.masks.csc_c12 = dpp->tf_mask->CUR0_MATRIX_C12_A;
+
+ if (mode_select == CUR_MATRIX_SET_A) {
+ cur_matrix_regs.csc_c11_c12 = REG(CUR0_MATRIX_C11_C12_A);
+ cur_matrix_regs.csc_c33_c34 = REG(CUR0_MATRIX_C33_C34_A);
+ } else {
+ cur_matrix_regs.csc_c11_c12 = REG(CUR0_MATRIX_C11_C12_B);
+ cur_matrix_regs.csc_c33_c34 = REG(CUR0_MATRIX_C33_C34_B);
+ }
+
+ cm_helper_program_color_matrices(
+ dpp->base.ctx,
+ regval,
+ &cur_matrix_regs);
+
+ //select coefficient set to use
+ REG_SET(CUR0_MATRIX_MODE, 0, CUR0_MATRIX_MODE, mode_select);
+}
+
+/* Program Cursor matrix block in DPP CM */
+void dpp401_set_cursor_matrix(
+ struct dpp *dpp_base,
+ enum dc_color_space color_space,
+ struct dc_csc_transform cursor_csc_color_matrix)
+{
+ //Since we don't have cursor matrix information, force bypass mode by passing in unknown color space
+ dpp401_program_cursor_csc(dpp_base, COLOR_SPACE_UNKNOWN, NULL);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c
new file mode 100644
index 000000000000..6df3419f825f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c
@@ -0,0 +1,1186 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+
+#include "core_types.h"
+
+#include "reg_helper.h"
+#include "dcn401/dcn401_dpp.h"
+#include "basics/conversion.h"
+
+
+#define NUM_PHASES 64
+#define HORZ_MAX_TAPS 8
+#define VERT_MAX_TAPS 8
+#define NUM_LEVELS 32
+#define BLACK_OFFSET_RGB_Y 0x0
+#define BLACK_OFFSET_CBCR 0x8000
+
+
+#define REG(reg)\
+ dpp->tf_regs->reg
+
+#define CTX \
+ dpp->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dpp->tf_shift->field_name, dpp->tf_mask->field_name
+
+enum dcn401_coef_filter_type_sel {
+ SCL_COEF_LUMA_VERT_FILTER = 0,
+ SCL_COEF_LUMA_HORZ_FILTER = 1,
+ SCL_COEF_CHROMA_VERT_FILTER = 2,
+ SCL_COEF_CHROMA_HORZ_FILTER = 3,
+ SCL_COEF_ALPHA_VERT_FILTER = 4,
+ SCL_COEF_ALPHA_HORZ_FILTER = 5,
+ SCL_COEF_VERTICAL_BLUR_SCALE = SCL_COEF_ALPHA_VERT_FILTER,
+ SCL_COEF_HORIZONTAL_BLUR_SCALE = SCL_COEF_ALPHA_HORZ_FILTER
+};
+
+enum dscl_autocal_mode {
+ AUTOCAL_MODE_OFF = 0,
+
+ /* Autocal calculate the scaling ratio and initial phase and the
+ * DSCL_MODE_SEL must be set to 1
+ */
+ AUTOCAL_MODE_AUTOSCALE = 1,
+ /* Autocal perform auto centering without replication and the
+ * DSCL_MODE_SEL must be set to 0
+ */
+ AUTOCAL_MODE_AUTOCENTER = 2,
+ /* Autocal perform auto centering and auto replication and the
+ * DSCL_MODE_SEL must be set to 0
+ */
+ AUTOCAL_MODE_AUTOREPLICATE = 3
+};
+
+static int dpp401_dscl_get_pixel_depth_val(enum lb_pixel_depth depth)
+{
+ if (depth == LB_PIXEL_DEPTH_30BPP)
+ return 0; /* 10 bpc */
+ else if (depth == LB_PIXEL_DEPTH_24BPP)
+ return 1; /* 8 bpc */
+ else if (depth == LB_PIXEL_DEPTH_18BPP)
+ return 2; /* 6 bpc */
+ else if (depth == LB_PIXEL_DEPTH_36BPP)
+ return 3; /* 12 bpc */
+ else {
+ ASSERT(0);
+ return -1; /* Unsupported */
+ }
+}
+
+static bool dpp401_dscl_is_video_format(enum pixel_format format)
+{
+ if (format >= PIXEL_FORMAT_VIDEO_BEGIN
+ && format <= PIXEL_FORMAT_VIDEO_END)
+ return true;
+ else
+ return false;
+}
+
+static bool dpp401_dscl_is_420_format(enum pixel_format format)
+{
+ if (format == PIXEL_FORMAT_420BPP8 ||
+ format == PIXEL_FORMAT_420BPP10)
+ return true;
+ else
+ return false;
+}
+
+static enum dcn401_dscl_mode_sel dpp401_dscl_get_dscl_mode(
+ struct dpp *dpp_base,
+ const struct scaler_data *data,
+ bool dbg_always_scale)
+{
+ const long long one = dc_fixpt_one.value;
+
+ if (dpp_base->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) {
+ /* DSCL is processing data in fixed format */
+ if (data->format == PIXEL_FORMAT_FP16)
+ return DCN401_DSCL_MODE_DSCL_BYPASS;
+ }
+
+ if (data->ratios.horz.value == one
+ && data->ratios.vert.value == one
+ && data->ratios.horz_c.value == one
+ && data->ratios.vert_c.value == one
+ && !dbg_always_scale)
+ return DCN401_DSCL_MODE_SCALING_444_BYPASS;
+
+ if (!dpp401_dscl_is_420_format(data->format)) {
+ if (dpp401_dscl_is_video_format(data->format))
+ return DCN401_DSCL_MODE_SCALING_444_YCBCR_ENABLE;
+ else
+ return DCN401_DSCL_MODE_SCALING_444_RGB_ENABLE;
+ }
+ if (data->ratios.horz.value == one && data->ratios.vert.value == one)
+ return DCN401_DSCL_MODE_SCALING_420_LUMA_BYPASS;
+ if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one)
+ return DCN401_DSCL_MODE_SCALING_420_CHROMA_BYPASS;
+
+ return DCN401_DSCL_MODE_SCALING_420_YCBCR_ENABLE;
+}
+
+static void dpp401_power_on_dscl(
+ struct dpp *dpp_base,
+ bool power_on)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+
+ if (dpp->tf_regs->DSCL_MEM_PWR_CTRL) {
+ if (power_on) {
+ REG_UPDATE(DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, 0);
+ REG_WAIT(DSCL_MEM_PWR_STATUS, LUT_MEM_PWR_STATE, 0, 1, 5);
+ } else {
+ if (dpp->base.ctx->dc->debug.enable_mem_low_power.bits.dscl) {
+ dpp->base.ctx->dc->optimized_required = true;
+ dpp->base.deferred_reg_writes.bits.disable_dscl = true;
+ } else {
+ REG_UPDATE(DSCL_MEM_PWR_CTRL, LUT_MEM_PWR_FORCE, 3);
+ }
+ }
+ }
+}
+
+
+static void dpp401_dscl_set_lb(
+ struct dcn401_dpp *dpp,
+ const struct line_buffer_params *lb_params,
+ enum lb_memory_config mem_size_config)
+{
+ uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */
+
+ /* LB */
+ if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) {
+ /* DSCL caps: pixel data processed in fixed format */
+ uint32_t pixel_depth = dpp401_dscl_get_pixel_depth_val(lb_params->depth);
+ uint32_t dyn_pix_depth = lb_params->dynamic_pixel_depth;
+
+ REG_SET_7(LB_DATA_FORMAT, 0,
+ PIXEL_DEPTH, pixel_depth, /* Pixel depth stored in LB */
+ PIXEL_EXPAN_MODE, lb_params->pixel_expan_mode, /* Pixel expansion mode */
+ PIXEL_REDUCE_MODE, 1, /* Pixel reduction mode: Rounding */
+ DYNAMIC_PIXEL_DEPTH, dyn_pix_depth, /* Dynamic expansion pixel depth */
+ DITHER_EN, 0, /* Dithering enable: Disabled */
+ INTERLEAVE_EN, lb_params->interleave_en, /* Interleave source enable */
+ LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
+ } else {
+ /* DSCL caps: pixel data processed in float format */
+ REG_SET_2(LB_DATA_FORMAT, 0,
+ INTERLEAVE_EN, lb_params->interleave_en, /* Interleave source enable */
+ LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
+ }
+
+ if (dpp->base.caps->max_lb_partitions == 31)
+ max_partitions = 31;
+
+ REG_SET_2(LB_MEMORY_CTRL, 0,
+ MEMORY_CONFIG, mem_size_config,
+ LB_MAX_PARTITIONS, max_partitions);
+}
+
+static const uint16_t *dpp401_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio)
+{
+ if (taps == 8)
+ return get_filter_8tap_64p(ratio);
+ else if (taps == 7)
+ return get_filter_7tap_64p(ratio);
+ else if (taps == 6)
+ return get_filter_6tap_64p(ratio);
+ else if (taps == 5)
+ return get_filter_5tap_64p(ratio);
+ else if (taps == 4)
+ return get_filter_4tap_64p(ratio);
+ else if (taps == 3)
+ return get_filter_3tap_64p(ratio);
+ else if (taps == 2)
+ return get_filter_2tap_64p();
+ else if (taps == 1)
+ return NULL;
+ else {
+ /* should never happen, bug */
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+}
+
+static void dpp401_dscl_set_scaler_filter(
+ struct dcn401_dpp *dpp,
+ uint32_t taps,
+ enum dcn401_coef_filter_type_sel filter_type,
+ const uint16_t *filter)
+{
+ const int tap_pairs = (taps + 1) / 2;
+ int phase;
+ int pair;
+ uint16_t odd_coef, even_coef;
+
+ REG_SET_3(SCL_COEF_RAM_TAP_SELECT, 0,
+ SCL_COEF_RAM_TAP_PAIR_IDX, 0,
+ SCL_COEF_RAM_PHASE, 0,
+ SCL_COEF_RAM_FILTER_TYPE, filter_type);
+
+ for (phase = 0; phase < (NUM_PHASES / 2 + 1); phase++) {
+ for (pair = 0; pair < tap_pairs; pair++) {
+ even_coef = filter[phase * taps + 2 * pair];
+ if ((pair * 2 + 1) < taps)
+ odd_coef = filter[phase * taps + 2 * pair + 1];
+ else
+ odd_coef = 0;
+
+ REG_SET_4(SCL_COEF_RAM_TAP_DATA, 0,
+ /* Even tap coefficient (bits 1:0 fixed to 0) */
+ SCL_COEF_RAM_EVEN_TAP_COEF, even_coef,
+ /* Write/read control for even coefficient */
+ SCL_COEF_RAM_EVEN_TAP_COEF_EN, 1,
+ /* Odd tap coefficient (bits 1:0 fixed to 0) */
+ SCL_COEF_RAM_ODD_TAP_COEF, odd_coef,
+ /* Write/read control for odd coefficient */
+ SCL_COEF_RAM_ODD_TAP_COEF_EN, 1);
+ }
+ }
+
+}
+
+static void dpp401_dscl_set_scl_filter(
+ struct dcn401_dpp *dpp,
+ const struct scaler_data *scl_data,
+ bool chroma_coef_mode,
+ bool force_coeffs_update)
+{
+ bool h_2tap_hardcode_coef_en = false;
+ bool v_2tap_hardcode_coef_en = false;
+ bool h_2tap_sharp_en = false;
+ bool v_2tap_sharp_en = false;
+ uint32_t h_2tap_sharp_factor = scl_data->sharpness.horz;
+ uint32_t v_2tap_sharp_factor = scl_data->sharpness.vert;
+ bool coef_ram_current;
+ const uint16_t *filter_h = NULL;
+ const uint16_t *filter_v = NULL;
+ const uint16_t *filter_h_c = NULL;
+ const uint16_t *filter_v_c = NULL;
+
+ if ((dpp->base.ctx->dc->config.use_spl) && (!dpp->base.ctx->dc->debug.disable_spl)) {
+ filter_h = scl_data->dscl_prog_data.filter_h;
+ filter_v = scl_data->dscl_prog_data.filter_v;
+ if (chroma_coef_mode) {
+ filter_h_c = scl_data->dscl_prog_data.filter_h_c;
+ filter_v_c = scl_data->dscl_prog_data.filter_v_c;
+ }
+ } else {
+ filter_h = dpp401_dscl_get_filter_coeffs_64p(
+ scl_data->taps.h_taps, scl_data->ratios.horz);
+ filter_v = dpp401_dscl_get_filter_coeffs_64p(
+ scl_data->taps.v_taps, scl_data->ratios.vert);
+ if (chroma_coef_mode) {
+ filter_h_c = dpp401_dscl_get_filter_coeffs_64p(
+ scl_data->taps.h_taps_c, scl_data->ratios.horz_c);
+ filter_v_c = dpp401_dscl_get_filter_coeffs_64p(
+ scl_data->taps.v_taps_c, scl_data->ratios.vert_c);
+ }
+ }
+
+ h_2tap_hardcode_coef_en = scl_data->taps.h_taps < 3
+ && scl_data->taps.h_taps_c < 3
+ && (scl_data->taps.h_taps > 1 && scl_data->taps.h_taps_c > 1);
+ v_2tap_hardcode_coef_en = scl_data->taps.v_taps < 3
+ && scl_data->taps.v_taps_c < 3
+ && (scl_data->taps.v_taps > 1 && scl_data->taps.v_taps_c > 1);
+
+ h_2tap_sharp_en = h_2tap_hardcode_coef_en && h_2tap_sharp_factor != 0;
+ v_2tap_sharp_en = v_2tap_hardcode_coef_en && v_2tap_sharp_factor != 0;
+
+ REG_UPDATE_6(DSCL_2TAP_CONTROL,
+ SCL_H_2TAP_HARDCODE_COEF_EN, h_2tap_hardcode_coef_en,
+ SCL_H_2TAP_SHARP_EN, h_2tap_sharp_en,
+ SCL_H_2TAP_SHARP_FACTOR, h_2tap_sharp_factor,
+ SCL_V_2TAP_HARDCODE_COEF_EN, v_2tap_hardcode_coef_en,
+ SCL_V_2TAP_SHARP_EN, v_2tap_sharp_en,
+ SCL_V_2TAP_SHARP_FACTOR, v_2tap_sharp_factor);
+
+ if (!v_2tap_hardcode_coef_en || !h_2tap_hardcode_coef_en) {
+ bool filter_updated = false;
+
+ filter_updated = (filter_h && (filter_h != dpp->filter_h))
+ || (filter_v && (filter_v != dpp->filter_v));
+
+ if (chroma_coef_mode) {
+ filter_updated = filter_updated || (filter_h_c && (filter_h_c != dpp->filter_h_c))
+ || (filter_v_c && (filter_v_c != dpp->filter_v_c));
+ }
+
+ if ((filter_updated) || (force_coeffs_update)) {
+ uint32_t scl_mode = REG_READ(SCL_MODE);
+
+ if (!h_2tap_hardcode_coef_en && filter_h) {
+ dpp401_dscl_set_scaler_filter(
+ dpp, scl_data->taps.h_taps,
+ SCL_COEF_LUMA_HORZ_FILTER, filter_h);
+ }
+ dpp->filter_h = filter_h;
+ if (!v_2tap_hardcode_coef_en && filter_v) {
+ dpp401_dscl_set_scaler_filter(
+ dpp, scl_data->taps.v_taps,
+ SCL_COEF_LUMA_VERT_FILTER, filter_v);
+ }
+ dpp->filter_v = filter_v;
+ if (chroma_coef_mode) {
+ if (!h_2tap_hardcode_coef_en && filter_h_c) {
+ dpp401_dscl_set_scaler_filter(
+ dpp, scl_data->taps.h_taps_c,
+ SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c);
+ }
+ if (!v_2tap_hardcode_coef_en && filter_v_c) {
+ dpp401_dscl_set_scaler_filter(
+ dpp, scl_data->taps.v_taps_c,
+ SCL_COEF_CHROMA_VERT_FILTER, filter_v_c);
+ }
+ }
+ dpp->filter_h_c = filter_h_c;
+ dpp->filter_v_c = filter_v_c;
+
+ coef_ram_current = get_reg_field_value_ex(
+ scl_mode, dpp->tf_mask->SCL_COEF_RAM_SELECT_CURRENT,
+ dpp->tf_shift->SCL_COEF_RAM_SELECT_CURRENT);
+
+ /* Swap coefficient RAM and set chroma coefficient mode */
+ REG_SET_2(SCL_MODE, scl_mode,
+ SCL_COEF_RAM_SELECT, !coef_ram_current,
+ SCL_CHROMA_COEF_MODE, chroma_coef_mode);
+ }
+ }
+}
+
+// TODO: Fix defined but not used error
+//static int dpp401_dscl_get_lb_depth_bpc(enum lb_pixel_depth depth)
+//{
+// if (depth == LB_PIXEL_DEPTH_30BPP)
+// return 10;
+// else if (depth == LB_PIXEL_DEPTH_24BPP)
+// return 8;
+// else if (depth == LB_PIXEL_DEPTH_18BPP)
+// return 6;
+// else if (depth == LB_PIXEL_DEPTH_36BPP)
+// return 12;
+// else {
+// BREAK_TO_DEBUGGER();
+// return -1; /* Unsupported */
+// }
+//}
+
+// TODO: Fix defined but not used error
+//void dpp401_dscl_calc_lb_num_partitions(
+// const struct scaler_data *scl_data,
+// enum lb_memory_config lb_config,
+// int *num_part_y,
+// int *num_part_c)
+//{
+// int lb_memory_size, lb_memory_size_c, lb_memory_size_a, num_partitions_a,
+// lb_bpc, memory_line_size_y, memory_line_size_c, memory_line_size_a;
+//
+// int line_size = scl_data->viewport.width < scl_data->recout.width ?
+// scl_data->viewport.width : scl_data->recout.width;
+// int line_size_c = scl_data->viewport_c.width < scl_data->recout.width ?
+// scl_data->viewport_c.width : scl_data->recout.width;
+//
+// if (line_size == 0)
+// line_size = 1;
+//
+// if (line_size_c == 0)
+// line_size_c = 1;
+//
+//
+// lb_bpc = dpp401_dscl_get_lb_depth_bpc(scl_data->lb_params.depth);
+// memory_line_size_y = (line_size * lb_bpc + 71) / 72; /* +71 to ceil */
+// memory_line_size_c = (line_size_c * lb_bpc + 71) / 72; /* +71 to ceil */
+// memory_line_size_a = (line_size + 5) / 6; /* +5 to ceil */
+//
+// if (lb_config == LB_MEMORY_CONFIG_1) {
+// lb_memory_size = 816;
+// lb_memory_size_c = 816;
+// lb_memory_size_a = 984;
+// } else if (lb_config == LB_MEMORY_CONFIG_2) {
+// lb_memory_size = 1088;
+// lb_memory_size_c = 1088;
+// lb_memory_size_a = 1312;
+// } else if (lb_config == LB_MEMORY_CONFIG_3) {
+// /* 420 mode: using 3rd mem from Y, Cr and Cb */
+// lb_memory_size = 816 + 1088 + 848 + 848 + 848;
+// lb_memory_size_c = 816 + 1088;
+// lb_memory_size_a = 984 + 1312 + 456;
+// } else {
+// lb_memory_size = 816 + 1088 + 848;
+// lb_memory_size_c = 816 + 1088 + 848;
+// lb_memory_size_a = 984 + 1312 + 456;
+// }
+// *num_part_y = lb_memory_size / memory_line_size_y;
+// *num_part_c = lb_memory_size_c / memory_line_size_c;
+// num_partitions_a = lb_memory_size_a / memory_line_size_a;
+//
+// if (scl_data->lb_params.alpha_en
+// && (num_partitions_a < *num_part_y))
+// *num_part_y = num_partitions_a;
+//
+// if (*num_part_y > 64)
+// *num_part_y = 64;
+// if (*num_part_c > 64)
+// *num_part_c = 64;
+//
+//}
+
+static bool dpp401_dscl_is_lb_conf_valid(int ceil_vratio, int num_partitions, int vtaps)
+{
+ if (ceil_vratio > 2)
+ return vtaps <= (num_partitions - ceil_vratio + 2);
+ else
+ return vtaps <= num_partitions;
+}
+
+/*find first match configuration which meets the min required lb size*/
+static enum lb_memory_config dpp401_dscl_find_lb_memory_config(struct dcn401_dpp *dpp,
+ const struct scaler_data *scl_data)
+{
+ int num_part_y, num_part_c;
+ int vtaps = scl_data->taps.v_taps;
+ int vtaps_c = scl_data->taps.v_taps_c;
+ int ceil_vratio = dc_fixpt_ceil(scl_data->ratios.vert);
+ int ceil_vratio_c = dc_fixpt_ceil(scl_data->ratios.vert_c);
+
+ if (dpp->base.ctx->dc->debug.use_max_lb) {
+ if (scl_data->format == PIXEL_FORMAT_420BPP8
+ || scl_data->format == PIXEL_FORMAT_420BPP10)
+ return LB_MEMORY_CONFIG_3;
+ return LB_MEMORY_CONFIG_0;
+ }
+
+ dpp->base.caps->dscl_calc_lb_num_partitions(
+ scl_data, LB_MEMORY_CONFIG_1, &num_part_y, &num_part_c);
+
+ if (dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps)
+ && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c))
+ return LB_MEMORY_CONFIG_1;
+
+ dpp->base.caps->dscl_calc_lb_num_partitions(
+ scl_data, LB_MEMORY_CONFIG_2, &num_part_y, &num_part_c);
+
+ if (dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps)
+ && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c))
+ return LB_MEMORY_CONFIG_2;
+
+ if (scl_data->format == PIXEL_FORMAT_420BPP8
+ || scl_data->format == PIXEL_FORMAT_420BPP10) {
+ dpp->base.caps->dscl_calc_lb_num_partitions(
+ scl_data, LB_MEMORY_CONFIG_3, &num_part_y, &num_part_c);
+
+ if (dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps)
+ && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c))
+ return LB_MEMORY_CONFIG_3;
+ }
+
+ dpp->base.caps->dscl_calc_lb_num_partitions(
+ scl_data, LB_MEMORY_CONFIG_0, &num_part_y, &num_part_c);
+
+ /*Ensure we can support the requested number of vtaps*/
+ ASSERT(dpp401_dscl_is_lb_conf_valid(ceil_vratio, num_part_y, vtaps)
+ && dpp401_dscl_is_lb_conf_valid(ceil_vratio_c, num_part_c, vtaps_c));
+
+ return LB_MEMORY_CONFIG_0;
+}
+
+
+static void dpp401_dscl_set_manual_ratio_init(
+ struct dcn401_dpp *dpp, const struct scaler_data *data)
+{
+ uint32_t init_frac = 0;
+ uint32_t init_int = 0;
+ if ((dpp->base.ctx->dc->config.use_spl) && (!dpp->base.ctx->dc->debug.disable_spl)) {
+ REG_SET(SCL_HORZ_FILTER_SCALE_RATIO, 0,
+ SCL_H_SCALE_RATIO, data->dscl_prog_data.ratios.h_scale_ratio);
+
+ REG_SET(SCL_VERT_FILTER_SCALE_RATIO, 0,
+ SCL_V_SCALE_RATIO, data->dscl_prog_data.ratios.v_scale_ratio);
+
+ REG_SET(SCL_HORZ_FILTER_SCALE_RATIO_C, 0,
+ SCL_H_SCALE_RATIO_C, data->dscl_prog_data.ratios.h_scale_ratio_c);
+
+ REG_SET(SCL_VERT_FILTER_SCALE_RATIO_C, 0,
+ SCL_V_SCALE_RATIO_C, data->dscl_prog_data.ratios.v_scale_ratio_c);
+
+ REG_SET_2(SCL_HORZ_FILTER_INIT, 0,
+ SCL_H_INIT_FRAC, data->dscl_prog_data.init.h_filter_init_frac,
+ SCL_H_INIT_INT, data->dscl_prog_data.init.h_filter_init_int);
+
+ REG_SET_2(SCL_HORZ_FILTER_INIT_C, 0,
+ SCL_H_INIT_FRAC_C, data->dscl_prog_data.init.h_filter_init_frac_c,
+ SCL_H_INIT_INT_C, data->dscl_prog_data.init.h_filter_init_int_c);
+
+ REG_SET_2(SCL_VERT_FILTER_INIT, 0,
+ SCL_V_INIT_FRAC, data->dscl_prog_data.init.v_filter_init_frac,
+ SCL_V_INIT_INT, data->dscl_prog_data.init.v_filter_init_int);
+
+ if (REG(SCL_VERT_FILTER_INIT_BOT)) {
+ REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0,
+ SCL_V_INIT_FRAC_BOT, data->dscl_prog_data.init.v_filter_init_bot_frac,
+ SCL_V_INIT_INT_BOT, data->dscl_prog_data.init.v_filter_init_bot_int);
+ }
+
+ REG_SET_2(SCL_VERT_FILTER_INIT_C, 0,
+ SCL_V_INIT_FRAC_C, data->dscl_prog_data.init.v_filter_init_frac_c,
+ SCL_V_INIT_INT_C, data->dscl_prog_data.init.v_filter_init_int_c);
+
+ if (REG(SCL_VERT_FILTER_INIT_BOT_C)) {
+ REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0,
+ SCL_V_INIT_FRAC_BOT_C, data->dscl_prog_data.init.v_filter_init_bot_frac_c,
+ SCL_V_INIT_INT_BOT_C, data->dscl_prog_data.init.v_filter_init_bot_int_c);
+ }
+ return;
+ }
+ REG_SET(SCL_HORZ_FILTER_SCALE_RATIO, 0,
+ SCL_H_SCALE_RATIO, dc_fixpt_u3d19(data->ratios.horz) << 5);
+
+ REG_SET(SCL_VERT_FILTER_SCALE_RATIO, 0,
+ SCL_V_SCALE_RATIO, dc_fixpt_u3d19(data->ratios.vert) << 5);
+
+ REG_SET(SCL_HORZ_FILTER_SCALE_RATIO_C, 0,
+ SCL_H_SCALE_RATIO_C, dc_fixpt_u3d19(data->ratios.horz_c) << 5);
+
+ REG_SET(SCL_VERT_FILTER_SCALE_RATIO_C, 0,
+ SCL_V_SCALE_RATIO_C, dc_fixpt_u3d19(data->ratios.vert_c) << 5);
+
+ /*
+ * 0.24 format for fraction, first five bits zeroed
+ */
+ init_frac = dc_fixpt_u0d19(data->inits.h) << 5;
+ init_int = dc_fixpt_floor(data->inits.h);
+ REG_SET_2(SCL_HORZ_FILTER_INIT, 0,
+ SCL_H_INIT_FRAC, init_frac,
+ SCL_H_INIT_INT, init_int);
+
+ init_frac = dc_fixpt_u0d19(data->inits.h_c) << 5;
+ init_int = dc_fixpt_floor(data->inits.h_c);
+ REG_SET_2(SCL_HORZ_FILTER_INIT_C, 0,
+ SCL_H_INIT_FRAC_C, init_frac,
+ SCL_H_INIT_INT_C, init_int);
+
+ init_frac = dc_fixpt_u0d19(data->inits.v) << 5;
+ init_int = dc_fixpt_floor(data->inits.v);
+ REG_SET_2(SCL_VERT_FILTER_INIT, 0,
+ SCL_V_INIT_FRAC, init_frac,
+ SCL_V_INIT_INT, init_int);
+
+ if (REG(SCL_VERT_FILTER_INIT_BOT)) {
+ struct fixed31_32 bot = dc_fixpt_add(data->inits.v, data->ratios.vert);
+
+ init_frac = dc_fixpt_u0d19(bot) << 5;
+ init_int = dc_fixpt_floor(bot);
+ REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0,
+ SCL_V_INIT_FRAC_BOT, init_frac,
+ SCL_V_INIT_INT_BOT, init_int);
+ }
+
+ init_frac = dc_fixpt_u0d19(data->inits.v_c) << 5;
+ init_int = dc_fixpt_floor(data->inits.v_c);
+ REG_SET_2(SCL_VERT_FILTER_INIT_C, 0,
+ SCL_V_INIT_FRAC_C, init_frac,
+ SCL_V_INIT_INT_C, init_int);
+
+ if (REG(SCL_VERT_FILTER_INIT_BOT_C)) {
+ struct fixed31_32 bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c);
+
+ init_frac = dc_fixpt_u0d19(bot) << 5;
+ init_int = dc_fixpt_floor(bot);
+ REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0,
+ SCL_V_INIT_FRAC_BOT_C, init_frac,
+ SCL_V_INIT_INT_BOT_C, init_int);
+ }
+}
+
+/**
+ * dpp401_dscl_set_recout - Set the first pixel of RECOUT in the OTG active area
+ *
+ * @dpp: DPP data struct
+ * @recout: Rectangle information
+ *
+ * This function sets the MPC RECOUT_START and RECOUT_SIZE registers based on
+ * the values specified in the recount parameter.
+ *
+ * Note: This function only have effect if AutoCal is disabled.
+ */
+static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp,
+ const struct rect *recout)
+{
+ REG_SET_2(RECOUT_START, 0,
+ /* First pixel of RECOUT in the active OTG area */
+ RECOUT_START_X, recout->x,
+ /* First line of RECOUT in the active OTG area */
+ RECOUT_START_Y, recout->y);
+
+ REG_SET_2(RECOUT_SIZE, 0,
+ /* Number of RECOUT horizontal pixels */
+ RECOUT_WIDTH, recout->width,
+ /* Number of RECOUT vertical lines */
+ RECOUT_HEIGHT, recout->height);
+}
+/**
+ * dpp401_dscl_program_easf_v - Program EASF_V
+ *
+ * @dpp_base: High level DPP struct
+ * @scl_data: scalaer_data info
+ *
+ * This is the primary function to program vertical EASF registers
+ *
+ */
+static void dpp401_dscl_program_easf_v(struct dpp *dpp_base, const struct scaler_data *scl_data)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+
+ PERF_TRACE();
+ /* DSCL_EASF_V_MODE */
+ REG_SET_3(DSCL_EASF_V_MODE, 0,
+ SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en,
+ SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor,
+ SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring);
+
+ if (!scl_data->dscl_prog_data.easf_v_en) {
+ PERF_TRACE();
+ return;
+ }
+
+ /* DSCL_EASF_V_BF_CNTL */
+ REG_SET_6(DSCL_EASF_V_BF_CNTL, 0,
+ SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en,
+ SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode,
+ SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode,
+ SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain,
+ SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain,
+ SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain);
+ /* DSCL_EASF_V_RINGEST_3TAP_CNTLn */
+ REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL1, 0,
+ SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt,
+ SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max);
+ REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL2, 0,
+ SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope,
+ SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope);
+ REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL3, 0,
+ SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope,
+ SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset);
+ /* DSCL_EASF_V_RINGEST_EVENTAP_REDUCE */
+ REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, 0,
+ SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1,
+ SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2);
+ /* DSCL_EASF_V_RINGEST_EVENTAP_GAIN */
+ REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, 0,
+ SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1,
+ SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2);
+ /* DSCL_EASF_V_BF_FINAL_MAX_MIN */
+ REG_SET_4(DSCL_EASF_V_BF_FINAL_MAX_MIN, 0,
+ SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa,
+ SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb,
+ SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina,
+ SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb);
+ /* DSCL_EASF_V_BF1_PWL_SEGn */
+ REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG0, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0,
+ SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0,
+ SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0);
+ REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG1, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1,
+ SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1,
+ SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1);
+ REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG2, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2,
+ SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2,
+ SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2);
+ REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG3, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3,
+ SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3,
+ SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3);
+ REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG4, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4,
+ SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4,
+ SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4);
+ REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG5, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5,
+ SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5,
+ SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5);
+ REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG6, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6,
+ SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6,
+ SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6);
+ REG_SET_2(DSCL_EASF_V_BF1_PWL_SEG7, 0,
+ SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7,
+ SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7);
+ /* DSCL_EASF_V_BF3_PWL_SEGn */
+ REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG0, 0,
+ SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0,
+ SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0,
+ SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0);
+ REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG1, 0,
+ SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1,
+ SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1,
+ SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1);
+ REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG2, 0,
+ SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2,
+ SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2,
+ SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2);
+ REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG3, 0,
+ SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3,
+ SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3,
+ SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3);
+ REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG4, 0,
+ SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4,
+ SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4,
+ SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4);
+ REG_SET_2(DSCL_EASF_V_BF3_PWL_SEG5, 0,
+ SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5,
+ SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5);
+ PERF_TRACE();
+}
+/**
+ * dpp401_dscl_program_easf_h - Program EASF_H
+ *
+ * @dpp_base: High level DPP struct
+ * @scl_data: scalaer_data info
+ *
+ * This is the primary function to program horizontal EASF registers
+ *
+ */
+static void dpp401_dscl_program_easf_h(struct dpp *dpp_base, const struct scaler_data *scl_data)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+
+ PERF_TRACE();
+ /* DSCL_EASF_H_MODE */
+ REG_SET_3(DSCL_EASF_H_MODE, 0,
+ SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en,
+ SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor,
+ SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring);
+
+ if (!scl_data->dscl_prog_data.easf_h_en) {
+ PERF_TRACE();
+ return;
+ }
+
+ /* DSCL_EASF_H_BF_CNTL */
+ REG_SET_6(DSCL_EASF_H_BF_CNTL, 0,
+ SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en,
+ SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode,
+ SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode,
+ SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain,
+ SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain,
+ SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain);
+ /* DSCL_EASF_H_RINGEST_EVENTAP_REDUCE */
+ REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, 0,
+ SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1,
+ SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2);
+ /* DSCL_EASF_H_RINGEST_EVENTAP_GAIN */
+ REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, 0,
+ SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1,
+ SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2);
+ /* DSCL_EASF_H_BF_FINAL_MAX_MIN */
+ REG_SET_4(DSCL_EASF_H_BF_FINAL_MAX_MIN, 0,
+ SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa,
+ SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb,
+ SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina,
+ SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb);
+ /* DSCL_EASF_H_BF1_PWL_SEGn */
+ REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG0, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0,
+ SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0,
+ SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0);
+ REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG1, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1,
+ SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1,
+ SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1);
+ REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG2, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2,
+ SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2,
+ SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2);
+ REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG3, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3,
+ SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3,
+ SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3);
+ REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG4, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4,
+ SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4,
+ SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4);
+ REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG5, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5,
+ SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5,
+ SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5);
+ REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG6, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6,
+ SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6,
+ SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6);
+ REG_SET_2(DSCL_EASF_H_BF1_PWL_SEG7, 0,
+ SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7,
+ SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7);
+ /* DSCL_EASF_H_BF3_PWL_SEGn */
+ REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG0, 0,
+ SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0,
+ SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0,
+ SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0);
+ REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG1, 0,
+ SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1,
+ SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1,
+ SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1);
+ REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG2, 0,
+ SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2,
+ SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2,
+ SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2);
+ REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG3, 0,
+ SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3,
+ SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3,
+ SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3);
+ REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG4, 0,
+ SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4,
+ SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4,
+ SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4);
+ REG_SET_2(DSCL_EASF_H_BF3_PWL_SEG5, 0,
+ SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5,
+ SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5);
+ PERF_TRACE();
+}
+/**
+ * dpp401_dscl_program_easf - Program EASF
+ *
+ * @dpp_base: High level DPP struct
+ * @scl_data: scalaer_data info
+ *
+ * This is the primary function to program EASF
+ *
+ */
+static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_data *scl_data)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+
+ PERF_TRACE();
+ /* DSCL_SC_MODE */
+ REG_SET_2(DSCL_SC_MODE, 0,
+ SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode,
+ SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en);
+ /* DSCL_EASF_SC_MATRIX_C0C1, DSCL_EASF_SC_MATRIX_C2C3 */
+ REG_SET_2(DSCL_SC_MATRIX_C0C1, 0,
+ SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0,
+ SCL_SC_MATRIX_C1, scl_data->dscl_prog_data.easf_matrix_c1);
+ REG_SET_2(DSCL_SC_MATRIX_C2C3, 0,
+ SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2,
+ SCL_SC_MATRIX_C3, scl_data->dscl_prog_data.easf_matrix_c3);
+ dpp401_dscl_program_easf_v(dpp_base, scl_data);
+ dpp401_dscl_program_easf_h(dpp_base, scl_data);
+ PERF_TRACE();
+}
+/**
+ * dpp401_dscl_disable_easf - Disable EASF when no scaling (1:1)
+ *
+ * @dpp_base: High level DPP struct
+ * @scl_data: scalaer_data info
+ *
+ * When we have 1:1 scaling, we need to disable EASF
+ *
+ */
+static void dpp401_dscl_disable_easf(struct dpp *dpp_base, const struct scaler_data *scl_data)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+
+ PERF_TRACE();
+ /* DSCL_EASF_V_MODE */
+ REG_UPDATE(DSCL_EASF_V_MODE,
+ SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en);
+ /* DSCL_EASF_H_MODE */
+ REG_UPDATE(DSCL_EASF_H_MODE,
+ SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en);
+ PERF_TRACE();
+}
+static void dpp401_dscl_set_isharp_filter(
+ struct dcn401_dpp *dpp, const uint32_t *filter)
+{
+ int level;
+ uint32_t filter_data;
+ if (filter == NULL)
+ return;
+
+ REG_UPDATE(ISHARP_DELTA_CTRL,
+ ISHARP_DELTA_LUT_HOST_SELECT, 0);
+ /* LUT data write is auto-indexed. Write index once */
+ REG_SET(ISHARP_DELTA_INDEX, 0,
+ ISHARP_DELTA_INDEX, 0);
+ for (level = 0; level < NUM_LEVELS; level++) {
+ filter_data = filter[level];
+ REG_SET(ISHARP_DELTA_DATA, 0,
+ ISHARP_DELTA_DATA, filter_data);
+ }
+} // dpp401_dscl_set_isharp_filter
+/**
+ * dpp401_dscl_program_isharp - Program isharp
+ *
+ * @dpp_base: High level DPP struct
+ * @scl_data: scalaer_data info
+ * @program_isharp_1dlut: flag to program isharp 1D LUT
+ * @bs_coeffs_updated: Blur and Scale Coefficients update flag
+ *
+ * This is the primary function to program isharp
+ *
+ */
+static void dpp401_dscl_program_isharp(struct dpp *dpp_base,
+ const struct scaler_data *scl_data,
+ bool program_isharp_1dlut,
+ bool *bs_coeffs_updated)
+{
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+ *bs_coeffs_updated = false;
+
+ PERF_TRACE();
+ /* ISHARP_MODE */
+ REG_SET_6(ISHARP_MODE, 0,
+ ISHARP_EN, scl_data->dscl_prog_data.isharp_en,
+ ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable,
+ ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode,
+ ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode,
+ ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode,
+ ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm);
+
+ /* Skip remaining register programming if ISHARP is disabled */
+ if (!scl_data->dscl_prog_data.isharp_en) {
+ PERF_TRACE();
+ return;
+ }
+
+ /* ISHARP_NOISEDET_THRESHOLD */
+ REG_SET_2(ISHARP_NOISEDET_THRESHOLD, 0,
+ ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold,
+ ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold);
+
+ /* ISHARP_NOISE_GAIN_PWL */
+ REG_SET_3(ISHARP_NOISE_GAIN_PWL, 0,
+ ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in,
+ ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in,
+ ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope);
+
+ /* ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG */
+ REG_SET_3(ISHARP_LBA_PWL_SEG0, 0,
+ ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0],
+ ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0],
+ ISHARP_LBA_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.isharp_lba.slope_seg[0]);
+ REG_SET_3(ISHARP_LBA_PWL_SEG1, 0,
+ ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1],
+ ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1],
+ ISHARP_LBA_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.isharp_lba.slope_seg[1]);
+ REG_SET_3(ISHARP_LBA_PWL_SEG2, 0,
+ ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2],
+ ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2],
+ ISHARP_LBA_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.isharp_lba.slope_seg[2]);
+ REG_SET_3(ISHARP_LBA_PWL_SEG3, 0,
+ ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3],
+ ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3],
+ ISHARP_LBA_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.isharp_lba.slope_seg[3]);
+ REG_SET_3(ISHARP_LBA_PWL_SEG4, 0,
+ ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4],
+ ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4],
+ ISHARP_LBA_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.isharp_lba.slope_seg[4]);
+ REG_SET_2(ISHARP_LBA_PWL_SEG5, 0,
+ ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5],
+ ISHARP_LBA_PWL_BASE_SEG5, scl_data->dscl_prog_data.isharp_lba.base_seg[5]);
+
+ /* ISHARP_DELTA_LUT */
+ if (!program_isharp_1dlut)
+ dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta);
+
+ /* ISHARP_NLDELTA_SOFT_CLIP */
+ REG_SET_6(ISHARP_NLDELTA_SOFT_CLIP, 0,
+ ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p,
+ ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p,
+ ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p,
+ ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n,
+ ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n,
+ ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n);
+
+ /* Blur and Scale Coefficients - SCL_COEF_RAM_TAP_SELECT */
+ if (scl_data->dscl_prog_data.isharp_en) {
+ if (scl_data->dscl_prog_data.filter_blur_scale_v) {
+ dpp401_dscl_set_scaler_filter(
+ dpp, scl_data->taps.v_taps,
+ SCL_COEF_VERTICAL_BLUR_SCALE,
+ scl_data->dscl_prog_data.filter_blur_scale_v);
+ *bs_coeffs_updated = true;
+ }
+ if (scl_data->dscl_prog_data.filter_blur_scale_h) {
+ dpp401_dscl_set_scaler_filter(
+ dpp, scl_data->taps.h_taps,
+ SCL_COEF_HORIZONTAL_BLUR_SCALE,
+ scl_data->dscl_prog_data.filter_blur_scale_h);
+ *bs_coeffs_updated = true;
+ }
+ }
+ PERF_TRACE();
+} // dpp401_dscl_program_isharp
+/**
+ * dpp401_dscl_set_scaler_manual_scale - Manually program scaler and line buffer
+ *
+ * @dpp_base: High level DPP struct
+ * @scl_data: scalaer_data info
+ *
+ * This is the primary function to program scaler and line buffer in manual
+ * scaling mode. To execute the required operations for manual scale, we need
+ * to disable AutoCal first.
+ */
+void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base,
+ const struct scaler_data *scl_data)
+{
+ enum lb_memory_config lb_config;
+ struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base);
+ const struct rect *rect = &scl_data->recout;
+ uint32_t mpc_width = scl_data->h_active;
+ uint32_t mpc_height = scl_data->v_active;
+ uint32_t v_num_taps = scl_data->taps.v_taps - 1;
+ uint32_t v_num_taps_c = scl_data->taps.v_taps_c - 1;
+ uint32_t h_num_taps = scl_data->taps.h_taps - 1;
+ uint32_t h_num_taps_c = scl_data->taps.h_taps_c - 1;
+ enum dcn401_dscl_mode_sel dscl_mode = dpp401_dscl_get_dscl_mode(
+ dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale);
+ bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN
+ && scl_data->format <= PIXEL_FORMAT_VIDEO_END;
+ bool program_isharp_1dlut = false;
+ bool bs_coeffs_updated = false;
+
+
+ if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0)
+ return;
+
+ PERF_TRACE();
+
+ /* If only sharpness has changed, then only update 1dlut, then return */
+ if (scl_data->dscl_prog_data.isharp_en &&
+ (dpp->scl_data.dscl_prog_data.sharpness_level
+ != scl_data->dscl_prog_data.sharpness_level)) {
+ /* ISHARP_DELTA_LUT */
+ dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta);
+ dpp->scl_data.dscl_prog_data.sharpness_level = scl_data->dscl_prog_data.sharpness_level;
+ memcpy(dpp->scl_data.dscl_prog_data.isharp_delta, scl_data->dscl_prog_data.isharp_delta,
+ sizeof(uint32_t) * ISHARP_LUT_TABLE_SIZE);
+
+ if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0)
+ return;
+ program_isharp_1dlut = true;
+ }
+
+ dpp->scl_data = *scl_data;
+
+ if ((dpp->base.ctx->dc->config.use_spl) && (!dpp->base.ctx->dc->debug.disable_spl)) {
+ dscl_mode = (enum dcn401_dscl_mode_sel) scl_data->dscl_prog_data.dscl_mode;
+ rect = (struct rect *)&scl_data->dscl_prog_data.recout;
+ mpc_width = scl_data->dscl_prog_data.mpc_size.width;
+ mpc_height = scl_data->dscl_prog_data.mpc_size.height;
+ v_num_taps = scl_data->dscl_prog_data.taps.v_taps;
+ v_num_taps_c = scl_data->dscl_prog_data.taps.v_taps_c;
+ h_num_taps = scl_data->dscl_prog_data.taps.h_taps;
+ h_num_taps_c = scl_data->dscl_prog_data.taps.h_taps_c;
+ }
+ if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl) {
+ if (dscl_mode != DCN401_DSCL_MODE_DSCL_BYPASS)
+ dpp401_power_on_dscl(dpp_base, true);
+ }
+
+ /* Autocal off */
+ REG_SET_3(DSCL_AUTOCAL, 0,
+ AUTOCAL_MODE, AUTOCAL_MODE_OFF,
+ AUTOCAL_NUM_PIPE, 0,
+ AUTOCAL_PIPE_ID, 0);
+
+ /*clean scaler boundary mode when Autocal off*/
+ REG_SET(DSCL_CONTROL, 0,
+ SCL_BOUNDARY_MODE, 0);
+
+ /* Recout */
+ dpp401_dscl_set_recout(dpp, rect);
+
+ /* MPC Size */
+ REG_SET_2(MPC_SIZE, 0,
+ /* Number of horizontal pixels of MPC */
+ MPC_WIDTH, mpc_width,
+ /* Number of vertical lines of MPC */
+ MPC_HEIGHT, mpc_height);
+
+ /* SCL mode */
+ REG_UPDATE(SCL_MODE, DSCL_MODE, dscl_mode);
+
+ if (dscl_mode == DCN401_DSCL_MODE_DSCL_BYPASS) {
+ if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl)
+ dpp401_power_on_dscl(dpp_base, false);
+ return;
+ }
+
+ /* LB */
+ lb_config = dpp401_dscl_find_lb_memory_config(dpp, scl_data);
+ dpp401_dscl_set_lb(dpp, &scl_data->lb_params, lb_config);
+
+ if (dscl_mode == DCN401_DSCL_MODE_SCALING_444_BYPASS) {
+ if (dpp->base.ctx->dc->config.prefer_easf)
+ dpp401_dscl_disable_easf(dpp_base, scl_data);
+ dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated);
+ return;
+ }
+
+ /* Black offsets */
+ if (REG(SCL_BLACK_OFFSET)) {
+ if (ycbcr)
+ REG_SET_2(SCL_BLACK_OFFSET, 0,
+ SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y,
+ SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_CBCR);
+ else
+
+ REG_SET_2(SCL_BLACK_OFFSET, 0,
+ SCL_BLACK_OFFSET_RGB_Y, BLACK_OFFSET_RGB_Y,
+ SCL_BLACK_OFFSET_CBCR, BLACK_OFFSET_RGB_Y);
+ }
+
+ /* Manually calculate scale ratio and init values */
+ dpp401_dscl_set_manual_ratio_init(dpp, scl_data);
+
+ /* HTaps/VTaps */
+ REG_SET_4(SCL_TAP_CONTROL, 0,
+ SCL_V_NUM_TAPS, v_num_taps,
+ SCL_H_NUM_TAPS, h_num_taps,
+ SCL_V_NUM_TAPS_C, v_num_taps_c,
+ SCL_H_NUM_TAPS_C, h_num_taps_c);
+
+ /* ISharp configuration
+ * - B&S coeffs are written to same coeff RAM as WB scaler coeffs
+ * - coeff RAM toggle is in EASF programming
+ * - if we are only programming B&S coeffs, then need to reprogram
+ * WB scaler coeffs and toggle coeff RAM together
+ */
+ //if (dpp->base.ctx->dc->config.prefer_easf)
+ dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated);
+
+ dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr, bs_coeffs_updated);
+ /* Edge adaptive scaler function configuration */
+ if (dpp->base.ctx->dc->config.prefer_easf)
+ dpp401_dscl_program_easf(dpp_base, scl_data);
+ PERF_TRACE();
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index a2537229ee88..94883c4e4c61 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,8 +1,39 @@
# SPDX-License-Identifier: MIT
-#
-# Makefile for the 'dsc' sub-component of DAL.
+# Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
+
+ifdef CONFIG_DRM_AMD_DC_FP
+
+###############################################################################
+# DCN20
+###############################################################################
+DSC_DCN20 = dcn20_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn20/,$(DSC_DCN20))
+
+
+
+
+###############################################################################
+# DCN35
+###############################################################################
+
+DSC_DCN35 = dcn35_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn35/,$(DSC_DCN35))
+
+###############################################################################
+# DCN401
+###############################################################################
+
+DSC_DCN401 += dcn401_dsc.o
+
+AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/dsc/dcn401/,$(DSC_DCN401))
+
+endif
+
DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
AMD_DAL_DSC = $(addprefix $(AMDDALPATH)/dc/dsc/,$(DSC))
AMD_DISPLAY_FILES += $(AMD_DAL_DSC)
+
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index 3966845c7694..e4144b244332 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -30,6 +30,12 @@
#include "rc_calc.h"
#include "fixed31_32.h"
+#include "clk_mgr.h"
+#include "resource.h"
+
+#define DC_LOGGER \
+ dsc->ctx->logger
+
/* This module's internal functions */
/* default DSC policy target bitrate limit is 16bpp */
@@ -137,11 +143,20 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
if (link_encoding == DC_LINK_ENCODING_DP_128b_132b)
kbps = apply_128b_132b_stream_overhead(timing, kbps);
+ if (link_encoding == DC_LINK_ENCODING_HDMI_FRL &&
+ timing->vic == 0 && timing->hdmi_vic == 0 &&
+ timing->frl_uncompressed_video_bandwidth_in_kbps != 0)
+ kbps = timing->frl_uncompressed_video_bandwidth_in_kbps;
+
return kbps;
}
-
/* Forward Declerations */
+static unsigned int get_min_dsc_slice_count_for_odm(
+ const struct display_stream_compressor *dsc,
+ const struct dsc_enc_caps *dsc_enc_caps,
+ const struct dc_crtc_timing *timing);
+
static bool decide_dsc_bandwidth_range(
const uint32_t min_bpp_x16,
const uint32_t max_bpp_x16,
@@ -176,6 +191,7 @@ static bool setup_dsc_config(
const struct dc_crtc_timing *timing,
const struct dc_dsc_config_options *options,
const enum dc_link_encoding_format link_encoding,
+ int min_slice_count,
struct dc_dsc_config *dsc_cfg);
static bool dsc_buff_block_size_from_dpcd(int dpcd_buff_block_size, int *buff_block_size)
@@ -331,8 +347,9 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
int buff_block_size;
int buff_size;
- if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT],
- &buff_block_size))
+ if (!dsc_buff_block_size_from_dpcd(
+ dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] & 0x03,
+ &buff_block_size))
return false;
buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1;
@@ -357,10 +374,15 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
{
int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT];
+ int dsc_throughput_granular_delta;
+
+ dsc_throughput_granular_delta = dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT] >> 3;
+ dsc_throughput_granular_delta *= 2;
if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK,
&dsc_sink_caps->throughput_mode_0_mps))
return false;
+ dsc_sink_caps->throughput_mode_0_mps += dsc_throughput_granular_delta;
dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT;
if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps))
@@ -429,7 +451,6 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
return true;
}
-
/* If DSC is possbile, get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range and
* timing's pixel clock and uncompressed bandwidth.
* If DSC is not possible, leave '*range' untouched.
@@ -445,9 +466,10 @@ bool dc_dsc_compute_bandwidth_range(
struct dc_dsc_bw_range *range)
{
bool is_dsc_possible = false;
+ unsigned int min_dsc_slice_count;
struct dsc_enc_caps dsc_enc_caps;
struct dsc_enc_caps dsc_common_caps;
- struct dc_dsc_config config;
+ struct dc_dsc_config config = {0};
struct dc_dsc_config_options options = {0};
options.dsc_min_slice_height_override = dsc_min_slice_height_override;
@@ -456,12 +478,14 @@ bool dc_dsc_compute_bandwidth_range(
get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz);
+ min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing);
+
is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps,
timing->pixel_encoding, &dsc_common_caps);
if (is_dsc_possible)
is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing,
- &options, link_encoding, &config);
+ &options, link_encoding, min_dsc_slice_count, &config);
if (is_dsc_possible)
is_dsc_possible = decide_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16,
@@ -470,20 +494,195 @@ bool dc_dsc_compute_bandwidth_range(
return is_dsc_possible;
}
+void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc,
+ const struct dc_crtc_timing *timing)
+{
+ struct dsc_enc_caps dsc_enc_caps;
+
+ get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz);
+
+ DC_LOG_DSC("dsc encoder caps:");
+ DC_LOG_DSC("\tdsc_version 0x%x", dsc_enc_caps.dsc_version);
+ DC_LOG_DSC("\tslice_caps 0x%x", dsc_enc_caps.slice_caps.raw);
+ DC_LOG_DSC("\tlb_bit_depth %d", dsc_enc_caps.lb_bit_depth);
+ DC_LOG_DSC("\tis_block_pred_supported %d", dsc_enc_caps.is_block_pred_supported);
+ DC_LOG_DSC("\tcolor_formats 0x%x", dsc_enc_caps.color_formats.raw);
+ DC_LOG_DSC("\tcolor_depth 0x%x", dsc_enc_caps.color_depth.raw);
+ DC_LOG_DSC("\tmax_total_throughput_mps %d", dsc_enc_caps.max_total_throughput_mps);
+ DC_LOG_DSC("\tmax_slice_width %d", dsc_enc_caps.max_slice_width);
+ DC_LOG_DSC("\tbpp_increment_div %d", dsc_enc_caps.bpp_increment_div);
+}
+
+void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc,
+ const struct dsc_dec_dpcd_caps *dsc_sink_caps)
+{
+ DC_LOG_DSC("dsc decoder caps:");
+ DC_LOG_DSC("\tis_dsc_supported %d", dsc_sink_caps->is_dsc_supported);
+ DC_LOG_DSC("\tdsc_version 0x%x", dsc_sink_caps->dsc_version);
+ DC_LOG_DSC("\trc_buffer_size %d", dsc_sink_caps->rc_buffer_size);
+ DC_LOG_DSC("\tslice_caps1 0x%x", dsc_sink_caps->slice_caps1.raw);
+ DC_LOG_DSC("\tslice_caps2 0x%x", dsc_sink_caps->slice_caps2.raw);
+ DC_LOG_DSC("\tlb_bit_depth %d", dsc_sink_caps->lb_bit_depth);
+ DC_LOG_DSC("\tis_block_pred_supported %d", dsc_sink_caps->is_block_pred_supported);
+ DC_LOG_DSC("\tedp_max_bits_per_pixel %d", dsc_sink_caps->edp_max_bits_per_pixel);
+ DC_LOG_DSC("\tcolor_formats 0x%x", dsc_sink_caps->color_formats.raw);
+ DC_LOG_DSC("\tthroughput_mode_0_mps %d", dsc_sink_caps->throughput_mode_0_mps);
+ DC_LOG_DSC("\tthroughput_mode_1_mps %d", dsc_sink_caps->throughput_mode_1_mps);
+ DC_LOG_DSC("\tmax_slice_width %d", dsc_sink_caps->max_slice_width);
+ DC_LOG_DSC("\tbpp_increment_div %d", dsc_sink_caps->bpp_increment_div);
+ DC_LOG_DSC("\tbranch_overall_throughput_0_mps %d", dsc_sink_caps->branch_overall_throughput_0_mps);
+ DC_LOG_DSC("\tbranch_overall_throughput_1_mps %d", dsc_sink_caps->branch_overall_throughput_1_mps);
+ DC_LOG_DSC("\tbranch_max_line_width %d", dsc_sink_caps->branch_max_line_width);
+ DC_LOG_DSC("\tis_dp %d", dsc_sink_caps->is_dp);
+}
+
+
+static void build_dsc_enc_combined_slice_caps(
+ const struct dsc_enc_caps *single_dsc_enc_caps,
+ struct dsc_enc_caps *dsc_enc_caps,
+ unsigned int max_odm_combine_factor)
+{
+ /* 1-16 slice configurations, single DSC */
+ dsc_enc_caps->slice_caps.raw |= single_dsc_enc_caps->slice_caps.raw;
+
+ /* 2x DSC's */
+ if (max_odm_combine_factor >= 2) {
+ /* 1 + 1 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_1;
+
+ /* 2 + 2 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_2;
+
+ /* 4 + 4 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4;
+
+ /* 8 + 8 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_8;
+ }
+
+ /* 3x DSC's */
+ if (max_odm_combine_factor >= 3) {
+ /* 4 + 4 + 4 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4;
+ }
+
+ /* 4x DSC's */
+ if (max_odm_combine_factor >= 4) {
+ /* 1 + 1 + 1 + 1 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_1;
+
+ /* 2 + 2 + 2 + 2 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_2;
+
+ /* 3 + 3 + 3 + 3 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_3;
+
+ /* 4 + 4 + 4 + 4 */
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4;
+ }
+}
+
+static void build_dsc_enc_caps(
+ const struct display_stream_compressor *dsc,
+ struct dsc_enc_caps *dsc_enc_caps)
+{
+ unsigned int max_dscclk_khz;
+ unsigned int num_dsc;
+ unsigned int max_odm_combine_factor;
+ struct dsc_enc_caps single_dsc_enc_caps;
+
+ struct dc *dc;
+
+ if (!dsc || !dsc->ctx || !dsc->ctx->dc || !dsc->funcs->dsc_get_single_enc_caps)
+ return;
+
+ dc = dsc->ctx->dc;
+
+ if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_max_clock_khz || !dc->res_pool || dc->debug.disable_dsc)
+ return;
+
+ /* get max DSCCLK from clk_mgr */
+ max_dscclk_khz = dc->clk_mgr->funcs->get_max_clock_khz(dc->clk_mgr, CLK_TYPE_DSCCLK);
+
+ dsc->funcs->dsc_get_single_enc_caps(&single_dsc_enc_caps, max_dscclk_khz);
+
+ /* global capabilities */
+ dsc_enc_caps->dsc_version = single_dsc_enc_caps.dsc_version;
+ dsc_enc_caps->lb_bit_depth = single_dsc_enc_caps.lb_bit_depth;
+ dsc_enc_caps->is_block_pred_supported = single_dsc_enc_caps.is_block_pred_supported;
+ dsc_enc_caps->max_slice_width = single_dsc_enc_caps.max_slice_width;
+ dsc_enc_caps->bpp_increment_div = single_dsc_enc_caps.bpp_increment_div;
+ dsc_enc_caps->color_formats.raw = single_dsc_enc_caps.color_formats.raw;
+ dsc_enc_caps->color_depth.raw = single_dsc_enc_caps.color_depth.raw;
+
+ /* expand per DSC capabilities to global */
+ max_odm_combine_factor = dc->caps.max_odm_combine_factor;
+ num_dsc = dc->res_pool->res_cap->num_dsc;
+ max_odm_combine_factor = min(max_odm_combine_factor, num_dsc);
+ dsc_enc_caps->max_total_throughput_mps =
+ single_dsc_enc_caps.max_total_throughput_mps *
+ max_odm_combine_factor;
+
+ /* check slice counts possible for with ODM combine */
+ build_dsc_enc_combined_slice_caps(&single_dsc_enc_caps, dsc_enc_caps, max_odm_combine_factor);
+}
+
+static inline uint32_t dsc_div_by_10_round_up(uint32_t value)
+{
+ return (value + 9) / 10;
+}
+
+static unsigned int get_min_dsc_slice_count_for_odm(
+ const struct display_stream_compressor *dsc,
+ const struct dsc_enc_caps *dsc_enc_caps,
+ const struct dc_crtc_timing *timing)
+{
+ unsigned int max_dispclk_khz;
+
+ /* get max pixel rate and combine caps */
+ max_dispclk_khz = dsc_enc_caps->max_total_throughput_mps * 1000;
+ if (dsc && dsc->ctx->dc) {
+ if (dsc->ctx->dc->clk_mgr &&
+ dsc->ctx->dc->clk_mgr->funcs->get_max_clock_khz) {
+ /* dispclk is available */
+ max_dispclk_khz = dsc->ctx->dc->clk_mgr->funcs->get_max_clock_khz(dsc->ctx->dc->clk_mgr, CLK_TYPE_DISPCLK);
+ }
+ }
+
+ /* validate parameters */
+ if (max_dispclk_khz == 0 || dsc_enc_caps->max_slice_width == 0)
+ return 1;
+
+ /* consider minimum odm slices required due to
+ * 1) display pipe throughput (dispclk)
+ * 2) max image width per slice
+ */
+ return dc_fixpt_ceil(dc_fixpt_max(
+ dc_fixpt_div_int(dc_fixpt_from_int(dsc_div_by_10_round_up(timing->pix_clk_100hz)),
+ max_dispclk_khz), // throughput
+ dc_fixpt_div_int(dc_fixpt_from_int(timing->h_addressable + timing->h_border_left + timing->h_border_right),
+ dsc_enc_caps->max_slice_width))); // slice width
+}
+
static void get_dsc_enc_caps(
const struct display_stream_compressor *dsc,
struct dsc_enc_caps *dsc_enc_caps,
int pixel_clock_100Hz)
{
- // This is a static HW query, so we can use any DSC
-
memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps));
- if (dsc) {
- if (!dsc->ctx->dc->debug.disable_dsc)
- dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz);
- if (dsc->ctx->dc->debug.native422_support)
- dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1;
+
+ if (!dsc || !dsc->ctx || !dsc->ctx->dc || dsc->ctx->dc->debug.disable_dsc)
+ return;
+
+ /* check if reported cap global or only for a single DCN DSC enc */
+ if (dsc->funcs->dsc_get_enc_caps) {
+ dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz);
+ } else {
+ build_dsc_enc_caps(dsc, dsc_enc_caps);
}
+
+ if (dsc->ctx->dc->debug.native422_support)
+ dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1;
}
/* Returns 'false' if no intersection was found for at least one capability.
@@ -512,6 +711,11 @@ static bool intersect_dsc_caps(
dsc_sink_caps->slice_caps1.bits.NUM_SLICES_4 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_4;
dsc_common_caps->slice_caps.bits.NUM_SLICES_8 =
dsc_sink_caps->slice_caps1.bits.NUM_SLICES_8 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_8;
+ dsc_common_caps->slice_caps.bits.NUM_SLICES_12 =
+ dsc_sink_caps->slice_caps1.bits.NUM_SLICES_12 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_12;
+ dsc_common_caps->slice_caps.bits.NUM_SLICES_16 =
+ dsc_sink_caps->slice_caps2.bits.NUM_SLICES_16 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_16;
+
if (!dsc_common_caps->slice_caps.raw)
return false;
@@ -561,11 +765,6 @@ static bool intersect_dsc_caps(
return true;
}
-static inline uint32_t dsc_div_by_10_round_up(uint32_t value)
-{
- return (value + 9) / 10;
-}
-
static uint32_t compute_bpp_x16_from_target_bandwidth(
const uint32_t bandwidth_in_kbps,
const struct dc_crtc_timing *timing,
@@ -653,6 +852,7 @@ static bool decide_dsc_bandwidth_range(
*/
static bool decide_dsc_target_bpp_x16(
const struct dc_dsc_policy *policy,
+ const struct dc_dsc_config_options *options,
const struct dsc_enc_caps *dsc_common_caps,
const int target_bandwidth_kbps,
const struct dc_crtc_timing *timing,
@@ -667,7 +867,7 @@ static bool decide_dsc_target_bpp_x16(
if (decide_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16,
num_slices_h, dsc_common_caps, timing, link_encoding, &range)) {
if (target_bandwidth_kbps >= range.stream_kbps) {
- if (policy->enable_dsc_when_not_needed)
+ if (policy->enable_dsc_when_not_needed || options->force_dsc_when_not_needed)
/* enable max bpp even dsc is not needed */
*target_bpp_x16 = range.max_target_bpp_x16;
} else if (target_bandwidth_kbps >= range.max_kbps) {
@@ -703,6 +903,12 @@ static int get_available_dsc_slices(union dsc_enc_slice_caps slice_caps, int *av
if (slice_caps.bits.NUM_SLICES_8)
available_slices[idx++] = 8;
+ if (slice_caps.bits.NUM_SLICES_12)
+ available_slices[idx++] = 12;
+
+ if (slice_caps.bits.NUM_SLICES_16)
+ available_slices[idx++] = 16;
+
return idx;
}
@@ -843,12 +1049,12 @@ static bool setup_dsc_config(
const struct dc_crtc_timing *timing,
const struct dc_dsc_config_options *options,
const enum dc_link_encoding_format link_encoding,
+ int min_slices_h,
struct dc_dsc_config *dsc_cfg)
{
struct dsc_enc_caps dsc_common_caps;
- int max_slices_h;
- int min_slices_h;
- int num_slices_h;
+ int max_slices_h = 0;
+ int num_slices_h = 0;
int pic_width;
int slice_width;
int target_bpp;
@@ -861,7 +1067,7 @@ static bool setup_dsc_config(
memset(dsc_cfg, 0, sizeof(struct dc_dsc_config));
- dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy);
+ dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding);
pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right;
pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
@@ -951,12 +1157,14 @@ static bool setup_dsc_config(
if (!is_dsc_possible)
goto done;
- min_slices_h = pic_width / dsc_common_caps.max_slice_width;
- if (pic_width % dsc_common_caps.max_slice_width)
- min_slices_h++;
+ /* increase miniumum slice count to meet sink slice width limitations */
+ min_slices_h = dc_fixpt_ceil(dc_fixpt_max(
+ dc_fixpt_div_int(dc_fixpt_from_int(pic_width), dsc_common_caps.max_slice_width), // sink min
+ dc_fixpt_from_int(min_slices_h))); // source min
min_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, min_slices_h);
+ /* increase minimum slice count to meet sink throughput limitations */
while (min_slices_h <= max_slices_h) {
int pix_clk_per_slice_khz = dsc_div_by_10_round_up(timing->pix_clk_100hz) / min_slices_h;
if (pix_clk_per_slice_khz <= sink_per_slice_throughput_mps * 1000)
@@ -965,14 +1173,12 @@ static bool setup_dsc_config(
min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h);
}
- is_dsc_possible = (min_slices_h <= max_slices_h);
-
- if (pic_width % min_slices_h != 0)
- min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first?
-
- if (min_slices_h == 0 && max_slices_h == 0)
- is_dsc_possible = false;
+ /* increase minimum slice count to meet divisibility requirements */
+ while (pic_width % min_slices_h != 0 && min_slices_h <= max_slices_h) {
+ min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h);
+ }
+ is_dsc_possible = (min_slices_h <= max_slices_h) && max_slices_h != 0;
if (!is_dsc_possible)
goto done;
@@ -997,14 +1203,30 @@ static bool setup_dsc_config(
else
is_dsc_possible = false;
}
- // When we force 2:1 ODM, we can't have 1 slice to divide amongst 2 separate DSC instances
- // need to enforce at minimum 2 horizontal slices
- if (options->dsc_force_odm_hslice_override) {
- num_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, 2);
- if (num_slices_h == 0)
- is_dsc_possible = false;
+ // When we force ODM, num dsc h slices must be divisible by num odm h slices
+ switch (options->dsc_force_odm_hslice_override) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ if (num_slices_h < 2)
+ num_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, 2);
+ break;
+ case 3:
+ if (dsc_common_caps.slice_caps.bits.NUM_SLICES_12)
+ num_slices_h = 12;
+ else
+ num_slices_h = 0;
+ break;
+ case 4:
+ if (num_slices_h < 4)
+ num_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, 4);
+ break;
+ default:
+ break;
}
-
+ if (num_slices_h == 0)
+ is_dsc_possible = false;
if (!is_dsc_possible)
goto done;
@@ -1033,11 +1255,17 @@ static bool setup_dsc_config(
if (!is_dsc_possible)
goto done;
- dsc_cfg->num_slices_v = pic_height/slice_height;
+ if (slice_height > 0) {
+ dsc_cfg->num_slices_v = pic_height / slice_height;
+ } else {
+ is_dsc_possible = false;
+ goto done;
+ }
if (target_bandwidth_kbps > 0) {
is_dsc_possible = decide_dsc_target_bpp_x16(
&policy,
+ options,
&dsc_common_caps,
target_bandwidth_kbps,
timing,
@@ -1049,14 +1277,11 @@ static bool setup_dsc_config(
if (!is_dsc_possible)
goto done;
- // Final decission: can we do DSC or not?
- if (is_dsc_possible) {
- // Fill out the rest of DSC settings
- dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported;
- dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth;
- dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4;
- dsc_cfg->is_dp = dsc_sink_caps->is_dp;
- }
+ /* Fill out the rest of DSC settings */
+ dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported;
+ dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth;
+ dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4;
+ dsc_cfg->is_dp = dsc_sink_caps->is_dp;
done:
if (!is_dsc_possible)
@@ -1076,12 +1301,19 @@ bool dc_dsc_compute_config(
{
bool is_dsc_possible = false;
struct dsc_enc_caps dsc_enc_caps;
-
+ unsigned int min_dsc_slice_count;
get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz);
+
+ min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing);
+
is_dsc_possible = setup_dsc_config(dsc_sink_caps,
&dsc_enc_caps,
target_bandwidth_kbps,
- timing, options, link_encoding, dsc_cfg);
+ timing,
+ options,
+ link_encoding,
+ min_dsc_slice_count,
+ dsc_cfg);
return is_dsc_possible;
}
@@ -1129,7 +1361,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
- struct dc_dsc_policy *policy)
+ struct dc_dsc_policy *policy,
+ const enum dc_link_encoding_format link_encoding)
{
uint32_t bpc = 0;
@@ -1193,10 +1426,7 @@ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
policy->max_target_bpp = max_target_bpp_limit_override_x16 / 16;
/* enable DSC when not needed, default false */
- if (dsc_policy_enable_dsc_when_not_needed)
- policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed;
- else
- policy->enable_dsc_when_not_needed = false;
+ policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed;
}
void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit)
@@ -1225,4 +1455,5 @@ void dc_dsc_get_default_config_option(const struct dc *dc, struct dc_dsc_config_
options->dsc_force_odm_hslice_override = dc->debug.force_odm_combine;
options->max_target_bpp_limit_override_x16 = 0;
options->slice_height_granularity = 1;
+ options->force_dsc_when_not_needed = false;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
index 5eebe7f03ddc..89f0d999bf35 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c
@@ -32,15 +32,6 @@
static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals);
-/* Object I/F functions */
-static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s);
-static bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg);
-static void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg,
- struct dsc_optc_config *dsc_optc_cfg);
-static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe);
-static void dsc2_disable(struct display_stream_compressor *dsc);
-static void dsc2_disconnect(struct display_stream_compressor *dsc);
-
static const struct dsc_funcs dcn20_dsc_funcs = {
.dsc_get_enc_caps = dsc2_get_enc_caps,
.dsc_read_state = dsc2_read_state,
@@ -50,6 +41,7 @@ static const struct dsc_funcs dcn20_dsc_funcs = {
.dsc_enable = dsc2_enable,
.dsc_disable = dsc2_disable,
.dsc_disconnect = dsc2_disconnect,
+ .dsc_wait_disconnect_pending_clear = dsc2_wait_disconnect_pending_clear,
};
/* Macro definitios for REG_SET macros*/
@@ -65,13 +57,6 @@ static const struct dsc_funcs dcn20_dsc_funcs = {
#define DC_LOGGER \
dsc->ctx->logger
-enum dsc_bits_per_comp {
- DSC_BPC_8 = 8,
- DSC_BPC_10 = 10,
- DSC_BPC_12 = 12,
- DSC_BPC_UNKNOWN
-};
-
/* API functions (external or via structure->function_pointer) */
void dsc2_construct(struct dcn20_dsc *dsc,
@@ -137,7 +122,15 @@ void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz)
dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 2;
}
- // TODO DSC: This is actually image width limitation, not a slice width. This should be added to the criteria to use ODM.
+ /* For pixel clock bigger than a single-pipe limit needing four engines ODM 4:1, which then quardruples our
+ * throughput and number of slices
+ */
+ if (pixel_clock_100Hz > DCN20_MAX_PIXEL_CLOCK_Mhz*10000*2) {
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1;
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1;
+ dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 4;
+ }
+
dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */
dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */
}
@@ -146,7 +139,7 @@ void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz)
/* this function read dsc related register fields to be logged later in dcn10_log_hw_state
* into a dcn_dsc_state struct.
*/
-static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s)
+void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s)
{
struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
@@ -163,7 +156,7 @@ static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_ds
}
-static bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg)
+bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg)
{
struct dsc_optc_config dsc_optc_cfg;
struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
@@ -186,7 +179,7 @@ void dsc_config_log(struct display_stream_compressor *dsc, const struct dsc_conf
DC_LOG_DSC("\tcolor_depth %d", config->color_depth);
}
-static void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg,
+void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg,
struct dsc_optc_config *dsc_optc_cfg)
{
bool is_config_ok;
@@ -223,7 +216,7 @@ bool dsc2_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc
}
-static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe)
+void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe)
{
struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
int dsc_clock_en;
@@ -248,20 +241,16 @@ static void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe)
}
-static void dsc2_disable(struct display_stream_compressor *dsc)
+void dsc2_disable(struct display_stream_compressor *dsc)
{
struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
int dsc_clock_en;
- int dsc_fw_config;
- int enabled_opp_pipe;
DC_LOG_DSC("disable DSC %d", dsc->inst);
REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en);
- REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe);
- if (!dsc_clock_en || !dsc_fw_config) {
- DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already disabled!", dsc->inst, enabled_opp_pipe);
- ASSERT(0);
+ if (!dsc_clock_en) {
+ DC_LOG_DSC("DSC %d already disabled!", dsc->inst);
}
REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG,
@@ -271,7 +260,14 @@ static void dsc2_disable(struct display_stream_compressor *dsc)
DSC_CLOCK_EN, 0);
}
-static void dsc2_disconnect(struct display_stream_compressor *dsc)
+void dsc2_wait_disconnect_pending_clear(struct display_stream_compressor *dsc)
+{
+ struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
+
+ REG_WAIT(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_DOUBLE_BUFFER_REG_UPDATE_PENDING, 0, 2, 50000);
+}
+
+void dsc2_disconnect(struct display_stream_compressor *dsc)
{
struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
@@ -410,9 +406,10 @@ bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values
dsc_reg_vals->alternate_ich_encoding_en = dsc_reg_vals->pps.dsc_version_minor == 1 ? 0 : 1;
dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0;
+ // Need to find the ceiling value for the slice width
+ dsc_reg_vals->pps.slice_width = (dsc_cfg->pic_width + dsc_cfg->dc_dsc_cfg.num_slices_h - 1) / dsc_cfg->dc_dsc_cfg.num_slices_h;
// TODO: in addition to validating slice height (pic height must be divisible by slice height),
// see what happens when the same condition doesn't apply for slice_width/pic_width.
- dsc_reg_vals->pps.slice_width = dsc_cfg->pic_width / dsc_cfg->dc_dsc_cfg.num_slices_h;
dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v;
ASSERT(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height);
@@ -769,4 +766,3 @@ static void dsc_write_to_registers(struct display_stream_compressor *dsc, const
RANGE_BPG_OFFSET14, reg_vals->pps.rc_range_params[14].range_bpg_offset);
}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
index ba869387c3c5..a9c04fc95bd1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h
@@ -78,6 +78,7 @@
SRI(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id),\
SRI(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id),\
SRI(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id),\
+ SRI(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id),\
SRI(DSCCIF_CONFIG0, DSCCIF, id),\
SRI(DSCCIF_CONFIG1, DSCCIF, id),\
SRI(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id)
@@ -95,6 +96,7 @@
DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DISPCLK_R_GATE_DIS, mask_sh), \
DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DSCCLK_R_GATE_DIS, mask_sh), \
DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_DBG_EN, mask_sh), \
+ DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_TEST_CLOCK_MUX_SEL, mask_sh), \
DSC_SF(DSCC0_DSCC_CONFIG0, ICH_RESET_AT_END_OF_LINE, mask_sh), \
DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_PER_LINE, mask_sh), \
DSC_SF(DSCC0_DSCC_CONFIG0, ALTERNATE_ICH_ENCODING_EN, mask_sh), \
@@ -247,6 +249,10 @@
DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, mask_sh), \
DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, mask_sh), \
DSC_SF(DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS0_ROTATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS1_ROTATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS2_ROTATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS3_ROTATE, mask_sh), \
DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN, mask_sh), \
DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN, mask_sh), \
DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS, mask_sh), \
@@ -421,6 +427,10 @@
type DSCC_UPDATE_PENDING_STATUS; \
type DSCC_UPDATE_TAKEN_STATUS; \
type DSCC_UPDATE_TAKEN_ACK; \
+ type DSCC_TEST_DEBUG_BUS0_ROTATE; \
+ type DSCC_TEST_DEBUG_BUS1_ROTATE; \
+ type DSCC_TEST_DEBUG_BUS2_ROTATE; \
+ type DSCC_TEST_DEBUG_BUS3_ROTATE; \
type DSCC_RATE_BUFFER0_FULLNESS_LEVEL; \
type DSCC_RATE_BUFFER1_FULLNESS_LEVEL; \
type DSCC_RATE_BUFFER2_FULLNESS_LEVEL; \
@@ -443,7 +453,16 @@
type DSCCIF_UPDATE_TAKEN_STATUS; \
type DSCCIF_UPDATE_TAKEN_ACK; \
type DSCRM_DSC_FORWARD_EN; \
- type DSCRM_DSC_OPP_PIPE_SOURCE
+ type DSCRM_DSC_OPP_PIPE_SOURCE; \
+ type DSCRM_DSC_DOUBLE_BUFFER_REG_UPDATE_PENDING; \
+ type DSCRM_DSC_FORWARD_EN_STATUS
+
+enum dsc_bits_per_comp {
+ DSC_BPC_8 = 8,
+ DSC_BPC_10 = 10,
+ DSC_BPC_12 = 12,
+ DSC_BPC_UNKNOWN
+};
struct dcn20_dsc_registers {
uint32_t DSC_TOP_CONTROL;
@@ -492,6 +511,7 @@ struct dcn20_dsc_registers {
uint32_t DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL;
uint32_t DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL;
uint32_t DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL;
+ uint32_t DSCC_TEST_DEBUG_BUS_ROTATE;
uint32_t DSCCIF_CONFIG0;
uint32_t DSCCIF_CONFIG1;
uint32_t DSCRM_DSC_FORWARD_CONFIG;
@@ -585,5 +605,14 @@ bool dsc2_get_packed_pps(struct display_stream_compressor *dsc,
const struct dsc_config *dsc_cfg,
uint8_t *dsc_packed_pps);
+void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s);
+bool dsc2_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg);
+void dsc2_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg,
+ struct dsc_optc_config *dsc_optc_cfg);
+void dsc2_enable(struct display_stream_compressor *dsc, int opp_pipe);
+void dsc2_disable(struct display_stream_compressor *dsc);
+void dsc2_disconnect(struct display_stream_compressor *dsc);
+void dsc2_wait_disconnect_pending_clear(struct display_stream_compressor *dsc);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
new file mode 100644
index 000000000000..6f4f5a3c4861
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn35_dsc.h"
+#include "reg_helper.h"
+
+static void dsc35_enable(struct display_stream_compressor *dsc, int opp_pipe);
+
+static const struct dsc_funcs dcn35_dsc_funcs = {
+ .dsc_get_enc_caps = dsc2_get_enc_caps,
+ .dsc_read_state = dsc2_read_state,
+ .dsc_validate_stream = dsc2_validate_stream,
+ .dsc_set_config = dsc2_set_config,
+ .dsc_get_packed_pps = dsc2_get_packed_pps,
+ .dsc_enable = dsc35_enable,
+ .dsc_disable = dsc2_disable,
+ .dsc_disconnect = dsc2_disconnect,
+ .dsc_wait_disconnect_pending_clear = dsc2_wait_disconnect_pending_clear,
+};
+
+/* Macro definitios for REG_SET macros*/
+#define CTX \
+ dsc20->base.ctx
+
+#define REG(reg)\
+ dsc20->dsc_regs->reg
+
+#undef FN
+#define FN(reg_name, field_name) \
+ ((const struct dcn35_dsc_shift *)(dsc20->dsc_shift))->field_name, \
+ ((const struct dcn35_dsc_mask *)(dsc20->dsc_mask))->field_name
+
+#define DC_LOGGER \
+ dsc->ctx->logger
+
+void dsc35_construct(struct dcn20_dsc *dsc,
+ struct dc_context *ctx,
+ int inst,
+ const struct dcn20_dsc_registers *dsc_regs,
+ const struct dcn35_dsc_shift *dsc_shift,
+ const struct dcn35_dsc_mask *dsc_mask)
+{
+ dsc->base.ctx = ctx;
+ dsc->base.inst = inst;
+ dsc->base.funcs = &dcn35_dsc_funcs;
+
+ dsc->dsc_regs = dsc_regs;
+ dsc->dsc_shift = (const struct dcn20_dsc_shift *)(dsc_shift);
+ dsc->dsc_mask = (const struct dcn20_dsc_mask *)(dsc_mask);
+
+ dsc->max_image_width = 5184;
+}
+
+static void dsc35_enable(struct display_stream_compressor *dsc, int opp_pipe)
+{
+ struct dcn20_dsc *dsc20 = TO_DCN20_DSC(dsc);
+ int dsc_clock_en;
+ int dsc_fw_config;
+ int enabled_opp_pipe;
+
+ DC_LOG_DSC("enable DSC %d at opp pipe %d", dsc->inst, opp_pipe);
+
+ // TODO: After an idle exit, the HW default values for power control
+ // are changed intermittently due to unknown reasons. There are cases
+ // when dscc memory are still in shutdown state during enablement.
+ // Reset power control to hw default values.
+ REG_UPDATE_2(DSCC_MEM_POWER_CONTROL,
+ DSCC_MEM_PWR_FORCE, 0,
+ DSCC_MEM_PWR_DIS, 0);
+
+ REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en);
+ REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe);
+ if ((dsc_clock_en || dsc_fw_config) && enabled_opp_pipe != opp_pipe) {
+ DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already enabled!", dsc->inst, enabled_opp_pipe);
+ ASSERT(0);
+ }
+
+ REG_UPDATE(DSC_TOP_CONTROL,
+ DSC_CLOCK_EN, 1);
+
+ REG_UPDATE_2(DSCRM_DSC_FORWARD_CONFIG,
+ DSCRM_DSC_FORWARD_EN, 1,
+ DSCRM_DSC_OPP_PIPE_SOURCE, opp_pipe);
+}
+
+void dsc35_set_fgcg(struct dcn20_dsc *dsc20, bool enable)
+{
+ REG_UPDATE(DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, !enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
new file mode 100644
index 000000000000..133ad38842cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn35/dcn35_dsc.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_DSC_H__
+#define __DCN35_DSC_H__
+
+#include "dcn20/dcn20_dsc.h"
+
+#define DSC_REG_LIST_SH_MASK_DCN35(mask_sh) \
+ DSC_REG_LIST_SH_MASK_DCN20(mask_sh), \
+ DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, mask_sh)
+
+#define DSC_FIELD_LIST_DCN35(type) \
+ struct { \
+ DSC_FIELD_LIST_DCN20(type); \
+ type DSC_FGCG_REP_DIS; \
+ }
+
+struct dcn35_dsc_shift {
+ DSC_FIELD_LIST_DCN35(uint8_t);
+};
+
+struct dcn35_dsc_mask {
+ DSC_FIELD_LIST_DCN35(uint32_t);
+};
+
+void dsc35_construct(struct dcn20_dsc *dsc,
+ struct dc_context *ctx,
+ int inst,
+ const struct dcn20_dsc_registers *dsc_regs,
+ const struct dcn35_dsc_shift *dsc_shift,
+ const struct dcn35_dsc_mask *dsc_mask);
+
+void dsc35_set_fgcg(struct dcn20_dsc *dsc20, bool enable);
+
+#endif /* __DCN35_DSC_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c
new file mode 100644
index 000000000000..7bd92ae8b13e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include <drm/display/drm_dsc_helper.h>
+
+#include "reg_helper.h"
+#include "dcn401_dsc.h"
+#include "dsc/dscc_types.h"
+#include "dsc/rc_calc.h"
+
+static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals);
+
+/* Object I/F functions */
+//static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz);
+//static bool dsc401_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, uint8_t *dsc_packed_pps);
+static void dsc401_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz);
+
+static const struct dsc_funcs dcn401_dsc_funcs = {
+ .dsc_read_state = dsc401_read_state,
+ .dsc_validate_stream = dsc401_validate_stream,
+ .dsc_set_config = dsc401_set_config,
+ .dsc_get_packed_pps = dsc2_get_packed_pps,
+ .dsc_enable = dsc401_enable,
+ .dsc_disable = dsc401_disable,
+ .dsc_disconnect = dsc401_disconnect,
+ .dsc_wait_disconnect_pending_clear = dsc401_wait_disconnect_pending_clear,
+ .dsc_get_single_enc_caps = dsc401_get_single_enc_caps,
+};
+
+/* Macro definitios for REG_SET macros*/
+#define CTX \
+ dsc401->base.ctx
+
+#define REG(reg)\
+ dsc401->dsc_regs->reg
+
+#undef FN
+#define FN(reg_name, field_name) \
+ dsc401->dsc_shift->field_name, dsc401->dsc_mask->field_name
+#define DC_LOGGER \
+ dsc->ctx->logger
+
+
+/* API functions (external or via structure->function_pointer) */
+
+void dsc401_construct(struct dcn401_dsc *dsc,
+ struct dc_context *ctx,
+ int inst,
+ const struct dcn401_dsc_registers *dsc_regs,
+ const struct dcn401_dsc_shift *dsc_shift,
+ const struct dcn401_dsc_mask *dsc_mask)
+{
+ dsc->base.ctx = ctx;
+ dsc->base.inst = inst;
+ dsc->base.funcs = &dcn401_dsc_funcs;
+
+ dsc->dsc_regs = dsc_regs;
+ dsc->dsc_shift = dsc_shift;
+ dsc->dsc_mask = dsc_mask;
+
+ dsc->max_image_width = 5184;
+}
+
+static void dsc401_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz)
+{
+ dsc_enc_caps->dsc_version = 0x21; /* v1.2 - DP spec defined it in reverse order and we kept it */
+
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 1;
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = 1;
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = 1;
+ dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 = 1;
+
+ dsc_enc_caps->lb_bit_depth = 13;
+ dsc_enc_caps->is_block_pred_supported = true;
+
+ dsc_enc_caps->color_formats.bits.RGB = 1;
+ dsc_enc_caps->color_formats.bits.YCBCR_444 = 1;
+ dsc_enc_caps->color_formats.bits.YCBCR_SIMPLE_422 = 1;
+ dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 0;
+ dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_420 = 1;
+
+ dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1;
+ dsc_enc_caps->color_depth.bits.COLOR_DEPTH_10_BPC = 1;
+ dsc_enc_caps->color_depth.bits.COLOR_DEPTH_12_BPC = 1;
+ dsc_enc_caps->max_total_throughput_mps = max_dscclk_khz * 3 / 1000;
+
+ dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */
+ dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */
+}
+
+/* this function read dsc related register fields to be logged later in dcn10_log_hw_state
+ * into a dcn_dsc_state struct.
+ */
+void dsc401_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s)
+{
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+
+ REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &s->dsc_clock_en);
+ REG_GET(DSCC_PPS_CONFIG3, SLICE_WIDTH, &s->dsc_slice_width);
+ REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bits_per_pixel);
+ REG_GET(DSCC_PPS_CONFIG3, SLICE_HEIGHT, &s->dsc_slice_height);
+ REG_GET(DSCC_PPS_CONFIG1, CHUNK_SIZE, &s->dsc_chunk_size);
+ REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width);
+ REG_GET(DSCC_PPS_CONFIG2, PIC_HEIGHT, &s->dsc_pic_height);
+ REG_GET(DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, &s->dsc_slice_bpg_offset);
+ REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &s->dsc_fw_en,
+ DSCRM_DSC_OPP_PIPE_SOURCE, &s->dsc_opp_source);
+}
+
+
+bool dsc401_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg)
+{
+ struct dsc_optc_config dsc_optc_cfg;
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+
+ if (dsc_cfg->pic_width > dsc401->max_image_width)
+ return false;
+
+ return dsc_prepare_config(dsc_cfg, &dsc401->reg_vals, &dsc_optc_cfg);
+}
+
+void dsc401_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg,
+ struct dsc_optc_config *dsc_optc_cfg)
+{
+ bool is_config_ok;
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+
+ DC_LOG_DSC("Setting DSC Config at DSC inst %d", dsc->inst);
+ dsc_config_log(dsc, dsc_cfg);
+ is_config_ok = dsc_prepare_config(dsc_cfg, &dsc401->reg_vals, dsc_optc_cfg);
+ ASSERT(is_config_ok);
+ DC_LOG_DSC("programming DSC Picture Parameter Set (PPS):");
+ dsc_log_pps(dsc, &dsc401->reg_vals.pps);
+ dsc_write_to_registers(dsc, &dsc401->reg_vals);
+}
+
+void dsc401_enable(struct display_stream_compressor *dsc, int opp_pipe)
+{
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+ int dsc_clock_en;
+ int dsc_fw_config;
+ int enabled_opp_pipe;
+
+ DC_LOG_DSC("enable DSC %d at opp pipe %d", dsc->inst, opp_pipe);
+
+ REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en);
+ REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, &dsc_fw_config, DSCRM_DSC_OPP_PIPE_SOURCE, &enabled_opp_pipe);
+ if ((dsc_clock_en || dsc_fw_config) && enabled_opp_pipe != opp_pipe) {
+ DC_LOG_DSC("ERROR: DSC %d at opp pipe %d already enabled!", dsc->inst, enabled_opp_pipe);
+ ASSERT(0);
+ }
+
+ REG_UPDATE(DSC_TOP_CONTROL,
+ DSC_CLOCK_EN, 1);
+
+ REG_UPDATE_2(DSCRM_DSC_FORWARD_CONFIG,
+ DSCRM_DSC_FORWARD_EN, 1,
+ DSCRM_DSC_OPP_PIPE_SOURCE, opp_pipe);
+}
+
+
+void dsc401_disable(struct display_stream_compressor *dsc)
+{
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+ int dsc_clock_en;
+
+ DC_LOG_DSC("disable DSC %d", dsc->inst);
+
+ REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &dsc_clock_en);
+ if (!dsc_clock_en) {
+ DC_LOG_DSC("DSC %d already disabled!", dsc->inst);
+ }
+
+ REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG,
+ DSCRM_DSC_FORWARD_EN, 0);
+
+ REG_UPDATE(DSC_TOP_CONTROL,
+ DSC_CLOCK_EN, 0);
+}
+
+void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc)
+{
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+
+ REG_WAIT(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN_STATUS, 0, 2, 50000);
+}
+
+void dsc401_disconnect(struct display_stream_compressor *dsc)
+{
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+
+ DC_LOG_DSC("disconnect DSC %d", dsc->inst);
+
+ REG_UPDATE(DSCRM_DSC_FORWARD_CONFIG,
+ DSCRM_DSC_FORWARD_EN, 0);
+}
+
+static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals)
+{
+ uint32_t temp_int;
+ struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc);
+
+ REG_SET(DSC_DEBUG_CONTROL, 0,
+ DSC_DBG_EN, reg_vals->dsc_dbg_en);
+
+ // dsccif registers
+ REG_SET_2(DSCCIF_CONFIG0, 0,
+ //INPUT_INTERFACE_UNDERFLOW_RECOVERY_EN, reg_vals->underflow_recovery_en,
+ //INPUT_INTERFACE_UNDERFLOW_OCCURRED_INT_EN, reg_vals->underflow_occurred_int_en,
+ //INPUT_INTERFACE_UNDERFLOW_OCCURRED_STATUS, reg_vals->underflow_occurred_status,
+ INPUT_PIXEL_FORMAT, reg_vals->pixel_format,
+ DSCCIF_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component);
+
+ /* REG_SET_2(DSCCIF_CONFIG1, 0,
+ PIC_WIDTH, reg_vals->pps.pic_width,
+ PIC_HEIGHT, reg_vals->pps.pic_height);
+ */
+ // dscc registers
+ if (dsc401->dsc_mask->ICH_RESET_AT_END_OF_LINE == 0) {
+ REG_SET_3(DSCC_CONFIG0, 0,
+ NUMBER_OF_SLICES_PER_LINE, reg_vals->num_slices_h - 1,
+ ALTERNATE_ICH_ENCODING_EN, reg_vals->alternate_ich_encoding_en,
+ NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, reg_vals->num_slices_v - 1);
+ } else {
+ REG_SET_4(DSCC_CONFIG0, 0, ICH_RESET_AT_END_OF_LINE,
+ reg_vals->ich_reset_at_eol, NUMBER_OF_SLICES_PER_LINE,
+ reg_vals->num_slices_h - 1, ALTERNATE_ICH_ENCODING_EN,
+ reg_vals->alternate_ich_encoding_en, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION,
+ reg_vals->num_slices_v - 1);
+ }
+
+ REG_SET(DSCC_CONFIG1, 0,
+ DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size);
+ /*REG_SET_2(DSCC_CONFIG1, 0,
+ DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, reg_vals->rc_buffer_model_size,
+ DSCC_DISABLE_ICH, reg_vals->disable_ich);*/
+
+ REG_SET_4(DSCC_INTERRUPT_CONTROL0, 0,
+ DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN0, reg_vals->rc_buffer_model_overflow_int_en[0],
+ DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN1, reg_vals->rc_buffer_model_overflow_int_en[1],
+ DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN2, reg_vals->rc_buffer_model_overflow_int_en[2],
+ DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN3, reg_vals->rc_buffer_model_overflow_int_en[3]);
+
+ REG_SET_3(DSCC_PPS_CONFIG0, 0,
+ DSC_VERSION_MINOR, reg_vals->pps.dsc_version_minor,
+ LINEBUF_DEPTH, reg_vals->pps.line_buf_depth,
+ DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, reg_vals->pps.bits_per_component);
+
+ if (reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420 || reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422)
+ temp_int = reg_vals->bpp_x32;
+ else
+ temp_int = reg_vals->bpp_x32 >> 1;
+
+ REG_SET_7(DSCC_PPS_CONFIG1, 0,
+ BITS_PER_PIXEL, temp_int,
+ SIMPLE_422, reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422,
+ CONVERT_RGB, reg_vals->pixel_format == DSC_PIXFMT_RGB,
+ BLOCK_PRED_ENABLE, reg_vals->pps.block_pred_enable,
+ NATIVE_422, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422,
+ NATIVE_420, reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420,
+ CHUNK_SIZE, reg_vals->pps.slice_chunk_size);
+
+ REG_SET_2(DSCC_PPS_CONFIG2, 0,
+ PIC_WIDTH, reg_vals->pps.pic_width,
+ PIC_HEIGHT, reg_vals->pps.pic_height);
+
+ REG_SET_2(DSCC_PPS_CONFIG3, 0,
+ SLICE_WIDTH, reg_vals->pps.slice_width,
+ SLICE_HEIGHT, reg_vals->pps.slice_height);
+
+ REG_SET(DSCC_PPS_CONFIG4, 0,
+ INITIAL_XMIT_DELAY, reg_vals->pps.initial_xmit_delay);
+
+ REG_SET_2(DSCC_PPS_CONFIG5, 0,
+ INITIAL_SCALE_VALUE, reg_vals->pps.initial_scale_value,
+ SCALE_INCREMENT_INTERVAL, reg_vals->pps.scale_increment_interval);
+
+ REG_SET_3(DSCC_PPS_CONFIG6, 0,
+ SCALE_DECREMENT_INTERVAL, reg_vals->pps.scale_decrement_interval,
+ FIRST_LINE_BPG_OFFSET, reg_vals->pps.first_line_bpg_offset,
+ SECOND_LINE_BPG_OFFSET, reg_vals->pps.second_line_bpg_offset);
+
+ REG_SET_2(DSCC_PPS_CONFIG7, 0,
+ NFL_BPG_OFFSET, reg_vals->pps.nfl_bpg_offset,
+ SLICE_BPG_OFFSET, reg_vals->pps.slice_bpg_offset);
+
+ REG_SET_2(DSCC_PPS_CONFIG8, 0,
+ NSL_BPG_OFFSET, reg_vals->pps.nsl_bpg_offset,
+ SECOND_LINE_OFFSET_ADJ, reg_vals->pps.second_line_offset_adj);
+
+ REG_SET_2(DSCC_PPS_CONFIG9, 0,
+ INITIAL_OFFSET, reg_vals->pps.initial_offset,
+ FINAL_OFFSET, reg_vals->pps.final_offset);
+
+ REG_SET_3(DSCC_PPS_CONFIG10, 0,
+ FLATNESS_MIN_QP, reg_vals->pps.flatness_min_qp,
+ FLATNESS_MAX_QP, reg_vals->pps.flatness_max_qp,
+ RC_MODEL_SIZE, reg_vals->pps.rc_model_size);
+
+ REG_SET_5(DSCC_PPS_CONFIG11, 0,
+ RC_EDGE_FACTOR, reg_vals->pps.rc_edge_factor,
+ RC_QUANT_INCR_LIMIT0, reg_vals->pps.rc_quant_incr_limit0,
+ RC_QUANT_INCR_LIMIT1, reg_vals->pps.rc_quant_incr_limit1,
+ RC_TGT_OFFSET_LO, reg_vals->pps.rc_tgt_offset_low,
+ RC_TGT_OFFSET_HI, reg_vals->pps.rc_tgt_offset_high);
+
+ REG_SET_4(DSCC_PPS_CONFIG12, 0,
+ RC_BUF_THRESH0, reg_vals->pps.rc_buf_thresh[0],
+ RC_BUF_THRESH1, reg_vals->pps.rc_buf_thresh[1],
+ RC_BUF_THRESH2, reg_vals->pps.rc_buf_thresh[2],
+ RC_BUF_THRESH3, reg_vals->pps.rc_buf_thresh[3]);
+
+ REG_SET_4(DSCC_PPS_CONFIG13, 0,
+ RC_BUF_THRESH4, reg_vals->pps.rc_buf_thresh[4],
+ RC_BUF_THRESH5, reg_vals->pps.rc_buf_thresh[5],
+ RC_BUF_THRESH6, reg_vals->pps.rc_buf_thresh[6],
+ RC_BUF_THRESH7, reg_vals->pps.rc_buf_thresh[7]);
+
+ REG_SET_4(DSCC_PPS_CONFIG14, 0,
+ RC_BUF_THRESH8, reg_vals->pps.rc_buf_thresh[8],
+ RC_BUF_THRESH9, reg_vals->pps.rc_buf_thresh[9],
+ RC_BUF_THRESH10, reg_vals->pps.rc_buf_thresh[10],
+ RC_BUF_THRESH11, reg_vals->pps.rc_buf_thresh[11]);
+
+ REG_SET_5(DSCC_PPS_CONFIG15, 0,
+ RC_BUF_THRESH12, reg_vals->pps.rc_buf_thresh[12],
+ RC_BUF_THRESH13, reg_vals->pps.rc_buf_thresh[13],
+ RANGE_MIN_QP0, reg_vals->pps.rc_range_params[0].range_min_qp,
+ RANGE_MAX_QP0, reg_vals->pps.rc_range_params[0].range_max_qp,
+ RANGE_BPG_OFFSET0, reg_vals->pps.rc_range_params[0].range_bpg_offset);
+
+ REG_SET_6(DSCC_PPS_CONFIG16, 0,
+ RANGE_MIN_QP1, reg_vals->pps.rc_range_params[1].range_min_qp,
+ RANGE_MAX_QP1, reg_vals->pps.rc_range_params[1].range_max_qp,
+ RANGE_BPG_OFFSET1, reg_vals->pps.rc_range_params[1].range_bpg_offset,
+ RANGE_MIN_QP2, reg_vals->pps.rc_range_params[2].range_min_qp,
+ RANGE_MAX_QP2, reg_vals->pps.rc_range_params[2].range_max_qp,
+ RANGE_BPG_OFFSET2, reg_vals->pps.rc_range_params[2].range_bpg_offset);
+
+ REG_SET_6(DSCC_PPS_CONFIG17, 0,
+ RANGE_MIN_QP3, reg_vals->pps.rc_range_params[3].range_min_qp,
+ RANGE_MAX_QP3, reg_vals->pps.rc_range_params[3].range_max_qp,
+ RANGE_BPG_OFFSET3, reg_vals->pps.rc_range_params[3].range_bpg_offset,
+ RANGE_MIN_QP4, reg_vals->pps.rc_range_params[4].range_min_qp,
+ RANGE_MAX_QP4, reg_vals->pps.rc_range_params[4].range_max_qp,
+ RANGE_BPG_OFFSET4, reg_vals->pps.rc_range_params[4].range_bpg_offset);
+
+ REG_SET_6(DSCC_PPS_CONFIG18, 0,
+ RANGE_MIN_QP5, reg_vals->pps.rc_range_params[5].range_min_qp,
+ RANGE_MAX_QP5, reg_vals->pps.rc_range_params[5].range_max_qp,
+ RANGE_BPG_OFFSET5, reg_vals->pps.rc_range_params[5].range_bpg_offset,
+ RANGE_MIN_QP6, reg_vals->pps.rc_range_params[6].range_min_qp,
+ RANGE_MAX_QP6, reg_vals->pps.rc_range_params[6].range_max_qp,
+ RANGE_BPG_OFFSET6, reg_vals->pps.rc_range_params[6].range_bpg_offset);
+
+ REG_SET_6(DSCC_PPS_CONFIG19, 0,
+ RANGE_MIN_QP7, reg_vals->pps.rc_range_params[7].range_min_qp,
+ RANGE_MAX_QP7, reg_vals->pps.rc_range_params[7].range_max_qp,
+ RANGE_BPG_OFFSET7, reg_vals->pps.rc_range_params[7].range_bpg_offset,
+ RANGE_MIN_QP8, reg_vals->pps.rc_range_params[8].range_min_qp,
+ RANGE_MAX_QP8, reg_vals->pps.rc_range_params[8].range_max_qp,
+ RANGE_BPG_OFFSET8, reg_vals->pps.rc_range_params[8].range_bpg_offset);
+
+ REG_SET_6(DSCC_PPS_CONFIG20, 0,
+ RANGE_MIN_QP9, reg_vals->pps.rc_range_params[9].range_min_qp,
+ RANGE_MAX_QP9, reg_vals->pps.rc_range_params[9].range_max_qp,
+ RANGE_BPG_OFFSET9, reg_vals->pps.rc_range_params[9].range_bpg_offset,
+ RANGE_MIN_QP10, reg_vals->pps.rc_range_params[10].range_min_qp,
+ RANGE_MAX_QP10, reg_vals->pps.rc_range_params[10].range_max_qp,
+ RANGE_BPG_OFFSET10, reg_vals->pps.rc_range_params[10].range_bpg_offset);
+
+ REG_SET_6(DSCC_PPS_CONFIG21, 0,
+ RANGE_MIN_QP11, reg_vals->pps.rc_range_params[11].range_min_qp,
+ RANGE_MAX_QP11, reg_vals->pps.rc_range_params[11].range_max_qp,
+ RANGE_BPG_OFFSET11, reg_vals->pps.rc_range_params[11].range_bpg_offset,
+ RANGE_MIN_QP12, reg_vals->pps.rc_range_params[12].range_min_qp,
+ RANGE_MAX_QP12, reg_vals->pps.rc_range_params[12].range_max_qp,
+ RANGE_BPG_OFFSET12, reg_vals->pps.rc_range_params[12].range_bpg_offset);
+
+ REG_SET_6(DSCC_PPS_CONFIG22, 0,
+ RANGE_MIN_QP13, reg_vals->pps.rc_range_params[13].range_min_qp,
+ RANGE_MAX_QP13, reg_vals->pps.rc_range_params[13].range_max_qp,
+ RANGE_BPG_OFFSET13, reg_vals->pps.rc_range_params[13].range_bpg_offset,
+ RANGE_MIN_QP14, reg_vals->pps.rc_range_params[14].range_min_qp,
+ RANGE_MAX_QP14, reg_vals->pps.rc_range_params[14].range_max_qp,
+ RANGE_BPG_OFFSET14, reg_vals->pps.rc_range_params[14].range_bpg_offset);
+}
+
+void dsc401_set_fgcg(struct dcn401_dsc *dsc401, bool enable)
+{
+ REG_UPDATE(DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, !enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h
new file mode 100644
index 000000000000..7acd57eb4f42
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DCN401_DSC_H__
+#define __DCN401_DSC_H__
+
+#include "dsc.h"
+#include "dsc/dscc_types.h"
+#include "dcn20/dcn20_dsc.h"
+#include <drm/display/drm_dsc.h>
+
+#define TO_DCN401_DSC(dsc)\
+ container_of(dsc, struct dcn401_dsc, base)
+
+#define DSC_REG_LIST_SH_MASK_DCN401(mask_sh)\
+ DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_CLOCK_EN, mask_sh), \
+ DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DISPCLK_R_GATE_DIS, mask_sh), \
+ DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_DSCCLK_R_GATE_DIS, mask_sh), \
+ DSC_SF(DSC_TOP0_DSC_TOP_CONTROL, DSC_FGCG_REP_DIS, mask_sh), \
+ DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_DBG_EN, mask_sh), \
+ DSC_SF(DSC_TOP0_DSC_DEBUG_CONTROL, DSC_TEST_CLOCK_MUX_SEL, mask_sh), \
+ DSC_SF(DSCC0_DSCC_CONFIG0, ICH_RESET_AT_END_OF_LINE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_PER_LINE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_CONFIG0, ALTERNATE_ICH_ENCODING_EN, mask_sh), \
+ DSC_SF(DSCC0_DSCC_CONFIG0, NUMBER_OF_SLICES_IN_VERTICAL_DIRECTION, mask_sh), \
+ DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_RATE_CONTROL_BUFFER_MODEL_SIZE, mask_sh), \
+ /*DSC_SF(DSCC0_DSCC_CONFIG1, DSCC_DISABLE_ICH, mask_sh),*/ \
+ DSC_SF(DSCC0_DSCC_STATUS, DSCC_DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL0, DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_STATUS1, DSCC_END_OF_FRAME_NOT_REACHED_CLEAR, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_INTERRUPT_CONTROL1, DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED_INT_EN, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MINOR, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG0, DSC_VERSION_MAJOR, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG0, PPS_IDENTIFIER, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG0, LINEBUF_DEPTH, mask_sh), \
+ DSC2_SF(DSCC0, DSCC_PPS_CONFIG0__BITS_PER_COMPONENT, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BITS_PER_PIXEL, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, VBR_ENABLE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, SIMPLE_422, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CONVERT_RGB, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, BLOCK_PRED_ENABLE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_422, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, NATIVE_420, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG1, CHUNK_SIZE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_WIDTH, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG2, PIC_HEIGHT, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_WIDTH, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG3, SLICE_HEIGHT, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_XMIT_DELAY, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG4, INITIAL_DEC_DELAY, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG5, INITIAL_SCALE_VALUE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG5, SCALE_INCREMENT_INTERVAL, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SCALE_DECREMENT_INTERVAL, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG6, FIRST_LINE_BPG_OFFSET, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG6, SECOND_LINE_BPG_OFFSET, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG7, NFL_BPG_OFFSET, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG8, NSL_BPG_OFFSET, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG8, SECOND_LINE_OFFSET_ADJ, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG9, INITIAL_OFFSET, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG9, FINAL_OFFSET, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MIN_QP, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG10, FLATNESS_MAX_QP, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG10, RC_MODEL_SIZE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_EDGE_FACTOR, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_QUANT_INCR_LIMIT1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_LO, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG11, RC_TGT_OFFSET_HI, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG12, RC_BUF_THRESH3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH4, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH5, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH6, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG13, RC_BUF_THRESH7, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH8, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH9, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH10, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG14, RC_BUF_THRESH11, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH12, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RC_BUF_THRESH13, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MIN_QP0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_MAX_QP0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG15, RANGE_BPG_OFFSET0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MIN_QP2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_MAX_QP2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG16, RANGE_BPG_OFFSET2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MIN_QP4, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_MAX_QP4, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG17, RANGE_BPG_OFFSET4, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP5, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP5, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET5, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MIN_QP6, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_MAX_QP6, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG18, RANGE_BPG_OFFSET6, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP7, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP7, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET7, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MIN_QP8, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_MAX_QP8, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG19, RANGE_BPG_OFFSET8, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP9, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP9, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET9, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MIN_QP10, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_MAX_QP10, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG20, RANGE_BPG_OFFSET10, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP11, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP11, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET11, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MIN_QP12, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_MAX_QP12, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG21, RANGE_BPG_OFFSET12, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP13, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP13, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET13, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MIN_QP14, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_MAX_QP14, mask_sh), \
+ DSC_SF(DSCC0_DSCC_PPS_CONFIG22, RANGE_BPG_OFFSET14, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_DEFAULT_MEM_LOW_POWER_STATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_MEM_PWR_FORCE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_MEM_PWR_DIS, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL0, DSCC_MEM_PWR_STATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_DEFAULT_MEM_LOW_POWER_STATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_MEM_PWR_FORCE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_MEM_PWR_DIS, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MEM_POWER_CONTROL1, DSCC_MEM_PWR_STATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC_R_Y_SQUARED_ERROR_LOWER, mask_sh), \
+ DSC_SF(DSCC0_DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC_R_Y_SQUARED_ERROR_UPPER, mask_sh), \
+ DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC_G_CB_SQUARED_ERROR_LOWER, mask_sh), \
+ DSC_SF(DSCC0_DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC_G_CB_SQUARED_ERROR_UPPER, mask_sh), \
+ DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC_B_CR_SQUARED_ERROR_LOWER, mask_sh), \
+ DSC_SF(DSCC0_DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC_B_CR_SQUARED_ERROR_UPPER, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_R_Y_MAX_ABS_ERROR, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR0, DSCC_G_CB_MAX_ABS_ERROR, mask_sh), \
+ DSC_SF(DSCC0_DSCC_MAX_ABS_ERROR1, DSCC_B_CR_MAX_ABS_ERROR, mask_sh), \
+ DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3, DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0, mask_sh), \
+ DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1, mask_sh), \
+ DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2, mask_sh), \
+ DSC_SF(DSCC0_DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3, DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS0_ROTATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS1_ROTATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS2_ROTATE, mask_sh), \
+ DSC_SF(DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE, DSCC_TEST_DEBUG_BUS3_ROTATE, mask_sh), \
+ DSC_SF(DSCCIF0_DSCCIF_CONFIG0, INPUT_PIXEL_FORMAT, mask_sh), \
+ DSC2_SF(DSCCIF0, DSCCIF_CONFIG0__BITS_PER_COMPONENT, mask_sh), \
+ DSC_SF(DSCCIF0_DSCCIF_CONFIG0, DOUBLE_BUFFER_REG_UPDATE_PENDING, mask_sh), \
+ DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, mask_sh), \
+ DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_OPP_PIPE_SOURCE, mask_sh), \
+ DSC_SF(DSCRM0_DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN_STATUS, mask_sh)
+
+struct dcn401_dsc_registers {
+ uint32_t DSC_TOP_CONTROL;
+ uint32_t DSC_DEBUG_CONTROL;
+ uint32_t DSCC_CONFIG0;
+ uint32_t DSCC_CONFIG1;
+ uint32_t DSCC_STATUS;
+ uint32_t DSCC_INTERRUPT_CONTROL0;
+ uint32_t DSCC_INTERRUPT_CONTROL1;
+ uint32_t DSCC_INTERRUPT_STATUS0;
+ uint32_t DSCC_INTERRUPT_STATUS1;
+ uint32_t DSCC_PPS_CONFIG0;
+ uint32_t DSCC_PPS_CONFIG1;
+ uint32_t DSCC_PPS_CONFIG2;
+ uint32_t DSCC_PPS_CONFIG3;
+ uint32_t DSCC_PPS_CONFIG4;
+ uint32_t DSCC_PPS_CONFIG5;
+ uint32_t DSCC_PPS_CONFIG6;
+ uint32_t DSCC_PPS_CONFIG7;
+ uint32_t DSCC_PPS_CONFIG8;
+ uint32_t DSCC_PPS_CONFIG9;
+ uint32_t DSCC_PPS_CONFIG10;
+ uint32_t DSCC_PPS_CONFIG11;
+ uint32_t DSCC_PPS_CONFIG12;
+ uint32_t DSCC_PPS_CONFIG13;
+ uint32_t DSCC_PPS_CONFIG14;
+ uint32_t DSCC_PPS_CONFIG15;
+ uint32_t DSCC_PPS_CONFIG16;
+ uint32_t DSCC_PPS_CONFIG17;
+ uint32_t DSCC_PPS_CONFIG18;
+ uint32_t DSCC_PPS_CONFIG19;
+ uint32_t DSCC_PPS_CONFIG20;
+ uint32_t DSCC_PPS_CONFIG21;
+ uint32_t DSCC_PPS_CONFIG22;
+ uint32_t DSCC_MEM_POWER_CONTROL0;
+ uint32_t DSCC_MEM_POWER_CONTROL1;
+ uint32_t DSCC_R_Y_SQUARED_ERROR_LOWER;
+ uint32_t DSCC_R_Y_SQUARED_ERROR_UPPER;
+ uint32_t DSCC_G_CB_SQUARED_ERROR_LOWER;
+ uint32_t DSCC_G_CB_SQUARED_ERROR_UPPER;
+ uint32_t DSCC_B_CR_SQUARED_ERROR_LOWER;
+ uint32_t DSCC_B_CR_SQUARED_ERROR_UPPER;
+ uint32_t DSCC_MAX_ABS_ERROR0;
+ uint32_t DSCC_MAX_ABS_ERROR1;
+ uint32_t DSCC_TEST_DEBUG_BUS_ROTATE;
+ uint32_t DSCCIF_CONFIG0;
+ uint32_t DSCRM_DSC_FORWARD_CONFIG;
+ uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0;
+ uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1;
+ uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2;
+ uint32_t DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3;
+ uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0;
+ uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1;
+ uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2;
+ uint32_t DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3;
+};
+
+#define DSC_FIELD_LIST_DCN401(type)\
+ DSC_FIELD_LIST_DCN20(type); \
+ type DSC_FGCG_REP_DIS; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN0; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN1; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN2; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED_INT_EN3; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED0; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED1; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED2; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED3; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED0; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED1; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED2; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED3; \
+ type DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR0; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR1; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR2; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_CLEAR3; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR0; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR1; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR2; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_CLEAR3; \
+ type DSCC_END_OF_FRAME_NOT_REACHED_CLEAR; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED0; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED1; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED2; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_OCCURRED3; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR0; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR1; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR2; \
+ type DSCC_RATE_CONTROL_BUFFER_MODEL_OVERFLOW_CLEAR3; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN0; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN1; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN2; \
+ type DSCC_OUTPUT_BUFFER_OVERFLOW_OCCURRED_INT_EN3; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN0; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN1; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN2; \
+ type DSCC_OUTPUT_BUFFER_UNDERFLOW_OCCURRED_INT_EN3; \
+ type DSCC_END_OF_FRAME_NOT_REACHED_OCCURRED_INT_EN; \
+ type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL0; \
+ type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL1; \
+ type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL2; \
+ type DSCC_OUTPUT_BUFFER_MAX_FULLNESS_LEVEL3; \
+ type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0; \
+ type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1; \
+ type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2; \
+ type DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3
+
+struct dcn401_dsc_shift {
+ DSC_FIELD_LIST_DCN401(uint8_t);
+};
+
+struct dcn401_dsc_mask {
+ DSC_FIELD_LIST_DCN401(uint32_t);
+};
+
+struct dcn401_dsc {
+ struct display_stream_compressor base;
+ const struct dcn401_dsc_registers *dsc_regs;
+ const struct dcn401_dsc_shift *dsc_shift;
+ const struct dcn401_dsc_mask *dsc_mask;
+
+ struct dsc_reg_values reg_vals;
+
+ int max_image_width;
+};
+
+void dsc401_construct(struct dcn401_dsc *dsc,
+ struct dc_context *ctx,
+ int inst,
+ const struct dcn401_dsc_registers *dsc_regs,
+ const struct dcn401_dsc_shift *dsc_shift,
+ const struct dcn401_dsc_mask *dsc_mask);
+
+void dsc401_set_fgcg(struct dcn401_dsc *dsc401, bool enable);
+
+void dsc401_read_state(struct display_stream_compressor *dsc, struct dcn_dsc_state *s);
+bool dsc401_validate_stream(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg);
+void dsc401_set_config(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg,
+ struct dsc_optc_config *dsc_optc_cfg);
+void dsc401_enable(struct display_stream_compressor *dsc, int opp_pipe);
+void dsc401_disable(struct display_stream_compressor *dsc);
+void dsc401_disconnect(struct display_stream_compressor *dsc);
+void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc);
+#endif
+
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h
index d7b8d586b523..b0bd1f9425b5 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h
@@ -76,6 +76,8 @@ union dsc_enc_slice_caps {
uint8_t NUM_SLICES_3 : 1; /* This one is not per DSC spec, but our encoder supports it */
uint8_t NUM_SLICES_4 : 1;
uint8_t NUM_SLICES_8 : 1;
+ uint8_t NUM_SLICES_12 : 1;
+ uint8_t NUM_SLICES_16 : 1;
} bits;
uint8_t raw;
};
@@ -105,6 +107,8 @@ struct dsc_funcs {
void (*dsc_enable)(struct display_stream_compressor *dsc, int opp_pipe);
void (*dsc_disable)(struct display_stream_compressor *dsc);
void (*dsc_disconnect)(struct display_stream_compressor *dsc);
+ void (*dsc_wait_disconnect_pending_clear)(struct display_stream_compressor *dsc);
+ void (*dsc_get_single_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
index 36d6c1646a51..59864130cf83 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
@@ -101,7 +101,6 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps,
{
int ret;
struct drm_dsc_config dsc_cfg;
- unsigned long long tmp;
dsc_params->pps = *pps;
dsc_params->pps.initial_scale_value = 8 * rc->rc_model_size / (rc->rc_model_size - rc->initial_fullness_offset);
@@ -112,9 +111,9 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps,
dsc_cfg.mux_word_size = dsc_params->pps.bits_per_component <= 10 ? 48 : 64;
ret = drm_dsc_compute_rc_parameters(&dsc_cfg);
- tmp = (unsigned long long)dsc_cfg.slice_chunk_size * 0x10000000 + (dsc_cfg.slice_width - 1);
- do_div(tmp, (uint32_t)dsc_cfg.slice_width); //ROUND-UP
- dsc_params->bytes_per_pixel = (uint32_t)tmp;
+ dsc_params->bytes_per_pixel =
+ (uint32_t)(div_u64(((uint64_t)dsc_cfg.slice_chunk_size * 0x10000000 + (dsc_cfg.slice_width - 1)),
+ (uint32_t)dsc_cfg.slice_width)); /* Round-up */
copy_pps_fields(&dsc_params->pps, &dsc_cfg);
dsc_params->rc_buffer_model_size = dsc_cfg.rc_bits;
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dwb/Makefile
index ff20c47f559e..3952ba4cd508 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dwb/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2020 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -19,28 +19,28 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
+# Authors: AMD
+#
#
-# Makefile for the 'controller' sub-component of DAL.
-# It provides the control and status of HW CRTC block.
-
-CFLAGS_$(AMDDALPATH)/dc/dce100/dce100_resource.o = $(call cc-disable-warning, override-init)
-
-DCE100 = dce100_resource.o dce100_hw_sequencer.o
-AMD_DAL_DCE100 = $(addprefix $(AMDDALPATH)/dc/dce100/,$(DCE100))
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN30
+###############################################################################
+DWB_DCN30 = dcn30_dwb.o dcn30_dwb_cm.o
-AMD_DISPLAY_FILES += $(AMD_DAL_DCE100)
+AMD_DAL_DWB_DCN30 = $(addprefix $(AMDDALPATH)/dc/dwb/dcn30/,$(DWB_DCN30))
+AMD_DISPLAY_FILES += $(AMD_DAL_DWB_DCN30)
###############################################################################
-# DCE 10x
+# DCN35
###############################################################################
-ifdef 0#CONFIG_DRM_AMD_DC_DCE11_0
-TG_DCE100 = dce100_resource.o
+DWB_DCN35 = dcn35_dwb.o
-AMD_DAL_TG_DCE100 = $(addprefix \
- $(AMDDALPATH)/dc/dce100/,$(TG_DCE100))
+AMD_DAL_DWB_DCN35 = $(addprefix $(AMDDALPATH)/dc/dwb/dcn35/,$(DWB_DCN35))
-AMD_DISPLAY_FILES += $(AMD_DAL_TG_DCE100)
-endif
+AMD_DISPLAY_FILES += $(AMD_DAL_DWB_DCN35)
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h
index bd98b327a6c7..b86347c9b038 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h
@@ -63,10 +63,6 @@ bool cm3_helper_translate_curve_to_hw_format(
const struct dc_transfer_func *output_tf,
struct pwl_params *lut_params, bool fixpoint);
-bool cm3_helper_translate_curve_to_degamma_hw_format(
- const struct dc_transfer_func *output_tf,
- struct pwl_params *lut_params);
-
bool cm3_helper_convert_to_custom_float(
struct pwl_result_data *rgb_resulted,
struct curve_points3 *corner_points,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c
index 0d98918bf0fc..bc058f682438 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c
@@ -130,6 +130,28 @@ bool dwb3_disable(struct dwbc *dwbc)
return true;
}
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable)
+{
+ struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
+ unsigned int pre_locked;
+
+ REG_GET(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, &pre_locked);
+
+ /* Lock DWB registers */
+ if (pre_locked == 0)
+ REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 1);
+
+ /* Disable FC */
+ REG_UPDATE(FC_MODE_CTRL, FC_FRAME_CAPTURE_EN, enable);
+
+ /* Unlock DWB registers */
+ if (pre_locked == 0)
+ REG_UPDATE(DWB_UPDATE_CTRL, DWB_UPDATE_LOCK, 0);
+
+ DC_LOG_DWB("%s dwb3_fc_disabled at inst = %d", __func__, dwbc->inst);
+}
+
+
bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params)
{
struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
@@ -226,11 +248,10 @@ static const struct dwbc_funcs dcn30_dwbc_funcs = {
.disable = dwb3_disable,
.update = dwb3_update,
.is_enabled = dwb3_is_enabled,
+ .set_fc_enable = dwb3_set_fc_enable,
.set_stereo = dwb3_set_stereo,
.set_new_content = dwb3_set_new_content,
- .dwb_program_output_csc = NULL,
.dwb_ogam_set_input_transfer_func = dwb3_ogam_set_input_transfer_func, //TODO: rename
- .dwb_set_scaler = NULL,
};
void dcn30_dwbc_construct(struct dcn30_dwbc *dwbc30,
@@ -249,16 +270,3 @@ void dcn30_dwbc_construct(struct dcn30_dwbc *dwbc30,
dwbc30->dwbc_shift = dwbc_shift;
dwbc30->dwbc_mask = dwbc_mask;
}
-
-void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay)
-{
- struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
-
- /*
- * Set maximum delay of host read access to DWBSCL LUT or OGAM LUT if there are no
- * idle cycles in HW pipeline (in number of clock cycles times 4)
- */
- REG_UPDATE(DWB_HOST_READ_CONTROL, DWB_HOST_READ_RATE_CONTROL, host_read_delay);
-
- DC_LOG_DWB("%s dwb3_rate_control at inst = %d", __func__, dwbc->inst);
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h
index fc00ec0a0881..7f053f49ec6a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h
@@ -712,7 +712,7 @@
type DWB_OGAM_RAMB_EXP_REGION32_LUT_OFFSET;\
type DWB_OGAM_RAMB_EXP_REGION32_NUM_SEGMENTS;\
type DWB_OGAM_RAMB_EXP_REGION33_LUT_OFFSET;\
- type DWB_OGAM_RAMB_EXP_REGION33_NUM_SEGMENTS;
+ type DWB_OGAM_RAMB_EXP_REGION33_NUM_SEGMENTS
struct dcn30_dwbc_registers {
/* DCN3AG */
@@ -735,6 +735,10 @@ struct dcn30_dwbc_registers {
uint32_t DWB_MMHUBBUB_BACKPRESSURE_CNT;
uint32_t DWB_HOST_READ_CONTROL;
uint32_t DWB_SOFT_RESET;
+ uint32_t DWB_DEBUG_CTRL;
+ uint32_t DWB_DEBUG;
+ uint32_t DWB_TEST_DEBUG_INDEX;
+ uint32_t DWB_TEST_DEBUG_DATA;
/* DWBSCL */
uint32_t DWBSCL_COEF_RAM_TAP_SELECT;
@@ -749,6 +753,9 @@ struct dcn30_dwbc_registers {
uint32_t DWBSCL_DEST_SIZE;
uint32_t DWBSCL_OVERFLOW_STATUS;
uint32_t DWBSCL_OVERFLOW_COUNTER;
+ uint32_t DWBSCL_DEBUG;
+ uint32_t DWBSCL_TEST_DEBUG_INDEX;
+ uint32_t DWBSCL_TEST_DEBUG_DATA;
/* DWBCP */
uint32_t DWB_HDR_MULT_COEF;
@@ -840,6 +847,9 @@ struct dcn30_dwbc_registers {
uint32_t DWB_OGAM_RAMB_REGION_28_29;
uint32_t DWB_OGAM_RAMB_REGION_30_31;
uint32_t DWB_OGAM_RAMB_REGION_32_33;
+ uint32_t DWBCP_DEBUG;
+ uint32_t DWBCP_TEST_DEBUG_INDEX;
+ uint32_t DWBCP_TEST_DEBUG_DATA;
};
/* Internal enums / structs */
@@ -879,6 +889,8 @@ bool dwb3_update(struct dwbc *dwbc, struct dc_dwb_params *params);
bool dwb3_is_enabled(struct dwbc *dwbc);
+void dwb3_set_fc_enable(struct dwbc *dwbc, enum dwb_frame_capture_enable enable);
+
void dwb3_set_stereo(struct dwbc *dwbc,
struct dwb_stereo_params *stereo_params);
@@ -902,7 +914,6 @@ bool dwb3_ogam_set_input_transfer_func(
struct dwbc *dwbc,
const struct dc_transfer_func *in_transfer_func_dwb_ogam);
-void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c
index 701c7d8bc038..03a50c32fcfe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c
@@ -243,6 +243,9 @@ static bool dwb3_program_ogam_lut(
return false;
}
+ if (params->hw_points_num == 0)
+ return false;
+
REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2);
current_mode = dwb3_get_ogam_current(dwbc30);
diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c
new file mode 100644
index 000000000000..d5e8294f5a16
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "reg_helper.h"
+#include "dcn35_dwb.h"
+
+#define REG(reg)\
+ dwbc30->dwbc_regs->reg
+
+#define CTX \
+ dwbc30->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ ((const struct dcn35_dwbc_shift *)(dwbc30->dwbc_shift))->field_name, \
+ ((const struct dcn35_dwbc_mask *)(dwbc30->dwbc_mask)) \
+ ->field_name
+
+#define DC_LOGGER \
+ dwbc30->base.ctx->logger
+
+void dcn35_dwbc_construct(struct dcn30_dwbc *dwbc30,
+ struct dc_context *ctx,
+ const struct dcn30_dwbc_registers *dwbc_regs,
+ const struct dcn35_dwbc_shift *dwbc_shift,
+ const struct dcn35_dwbc_mask *dwbc_mask,
+ int inst)
+{
+ dcn30_dwbc_construct(dwbc30, ctx, dwbc_regs,
+ (const struct dcn30_dwbc_shift *)dwbc_shift,
+ (const struct dcn30_dwbc_mask *)dwbc_mask, inst);
+}
+
+void dcn35_dwbc_set_fgcg(struct dcn30_dwbc *dwbc30, bool enable)
+{
+ REG_UPDATE(DWB_ENABLE_CLK_CTRL, DWB_FGCG_REP_DIS, !enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.h
new file mode 100644
index 000000000000..886e727ed080
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_DWB_H
+#define __DCN35_DWB_H
+
+#include "resource.h"
+#include "dwb.h"
+#include "dcn30/dcn30_dwb.h"
+
+#define DWBC_COMMON_MASK_SH_LIST_DCN35(mask_sh) \
+ DWBC_COMMON_MASK_SH_LIST_DCN30(mask_sh), \
+ SF_DWB2(DWB_ENABLE_CLK_CTRL, DWB_TOP, 0, DWB_FGCG_REP_DIS, mask_sh)
+
+#define DWBC_REG_FIELD_LIST_DCN3_5(type) \
+ struct { \
+ DWBC_REG_FIELD_LIST_DCN3_0(type); \
+ type DWB_FGCG_REP_DIS; \
+ }
+
+struct dcn35_dwbc_mask {
+ DWBC_REG_FIELD_LIST_DCN3_5(uint32_t);
+};
+
+struct dcn35_dwbc_shift {
+ DWBC_REG_FIELD_LIST_DCN3_5(uint8_t);
+};
+
+void dcn35_dwbc_construct(struct dcn30_dwbc *dwbc30,
+ struct dc_context *ctx,
+ const struct dcn30_dwbc_registers *dwbc_regs,
+ const struct dcn35_dwbc_shift *dwbc_shift,
+ const struct dcn35_dwbc_mask *dwbc_mask,
+ int inst);
+
+void dcn35_dwbc_set_fgcg(struct dcn30_dwbc *dwbc30, bool enable);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/Makefile b/drivers/gpu/drm/amd/display/dc/gpio/Makefile
index bc47481a158e..b72e2a9f9a28 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/gpio/Makefile
@@ -122,3 +122,13 @@ GPIO_DCN32 = hw_translate_dcn32.o hw_factory_dcn32.o
AMD_DAL_GPIO_DCN32 = $(addprefix $(AMDDALPATH)/dc/gpio/dcn32/,$(GPIO_DCN32))
AMD_DISPLAY_FILES += $(AMD_DAL_GPIO_DCN32)
+
+###############################################################################
+# DCN 4.01
+###############################################################################
+GPIO_DCN401 = hw_translate_dcn401.o hw_factory_dcn401.o
+
+AMD_DAL_GPIO_DCN401 = $(addprefix $(AMDDALPATH)/dc/gpio/dcn401/,$(GPIO_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_GPIO_DCN401)
+
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
index d734e3a134d1..e3b11b3c1daa 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn21/hw_translate_dcn21.c
@@ -95,10 +95,6 @@ static bool offset_to_id(
return true;
default:
ASSERT_CRITICAL(false);
-#ifdef PALLADIUM_SUPPORTED
- *en = GPIO_DDC_LINE_DDC1;
- return true;
-#endif
return false;
}
break;
@@ -183,12 +179,7 @@ static bool offset_to_id(
*/
/* UNEXPECTED */
default:
-/* case REG(DC_GPIO_SYNCA_A): not exista */
-#ifdef PALLADIUM_SUPPORTED
- *id = GPIO_ID_HPD;
- *en = GPIO_DDC_LINE_DDC1;
- return true;
-#endif
+/* case REG(DC_GPIO_SYNCA_A): not exist */
ASSERT_CRITICAL(false);
return false;
}
@@ -308,10 +299,6 @@ static bool id_to_offset(
break;
default:
ASSERT_CRITICAL(false);
-#ifdef PALLADIUM_SUPPORTED
- info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD1_A_MASK;
- result = true;
-#endif
result = false;
}
break;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c
new file mode 100644
index 000000000000..928abca18a18
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dm_services.h"
+#include "include/gpio_types.h"
+#include "../hw_factory.h"
+
+
+#include "../hw_gpio.h"
+#include "../hw_ddc.h"
+#include "../hw_hpd.h"
+#include "../hw_generic.h"
+
+
+#include "dcn/dcn_4_1_0_offset.h"
+#include "dcn/dcn_4_1_0_sh_mask.h"
+
+#include "reg_helper.h"
+#include "../hpd_regs.h"
+#include "hw_factory_dcn401.h"
+
+#define DCN_BASE__INST0_SEG2 0x000034C0
+
+/* begin *********************
+ * macros to expend register list macro defined in HW object header file */
+
+/* DCN */
+#define block HPD
+#define reg_num 0
+
+#undef BASE_INNER
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+
+
+#define REG(reg_name)\
+ BASE(reg ## reg_name ## _BASE_IDX) + reg ## reg_name
+
+#define SF_HPD(reg_name, field_name, post_fix)\
+ .field_name = HPD0_ ## reg_name ## __ ## field_name ## post_fix
+
+#define REGI(reg_name, block, id)\
+ BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SF(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+/* macros to expend register list macro defined in HW object header file
+ * end *********************/
+
+
+
+#define hpd_regs(id) \
+{\
+ HPD_REG_LIST(id)\
+}
+
+static const struct hpd_registers hpd_regs[] = {
+ hpd_regs(0),
+ hpd_regs(1),
+ hpd_regs(2),
+ hpd_regs(3),
+// hpd_regs(4),
+};
+
+static const struct hpd_sh_mask hpd_shift = {
+ HPD_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct hpd_sh_mask hpd_mask = {
+ HPD_MASK_SH_LIST(_MASK)
+};
+
+#include "../ddc_regs.h"
+
+ /* set field name */
+#define SF_DDC(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+static const struct ddc_registers ddc_data_regs_dcn[] = {
+ ddc_data_regs_dcn2(1),
+ ddc_data_regs_dcn2(2),
+ ddc_data_regs_dcn2(3),
+ ddc_data_regs_dcn2(4),
+ {
+ // add a dummy entry for cases no such port
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
+ .ddc_setup = 0,
+ .phy_aux_cntl = 0,
+ .dc_gpio_aux_ctrl_5 = 0
+ },
+ {
+ // add a dummy entry for cases no such port
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
+ .ddc_setup = 0,
+ .phy_aux_cntl = 0,
+ .dc_gpio_aux_ctrl_5 = 0
+ },
+ {
+ DDC_GPIO_VGA_REG_LIST(DATA),
+ .ddc_setup = 0,
+ .phy_aux_cntl = 0,
+ .dc_gpio_aux_ctrl_5 = 0
+ }
+};
+
+static const struct ddc_registers ddc_clk_regs_dcn[] = {
+ ddc_clk_regs_dcn2(1),
+ ddc_clk_regs_dcn2(2),
+ ddc_clk_regs_dcn2(3),
+ ddc_clk_regs_dcn2(4),
+ {
+ // add a dummy entry for cases no such port
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
+ .ddc_setup = 0,
+ .phy_aux_cntl = 0,
+ .dc_gpio_aux_ctrl_5 = 0
+ },
+ {
+ // add a dummy entry for cases no such port
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
+ .ddc_setup = 0,
+ .phy_aux_cntl = 0,
+ .dc_gpio_aux_ctrl_5 = 0
+ },
+ {
+ DDC_GPIO_VGA_REG_LIST(CLK),
+ .ddc_setup = 0,
+ .phy_aux_cntl = 0,
+ .dc_gpio_aux_ctrl_5 = 0
+ }
+};
+
+static const struct ddc_sh_mask ddc_shift[] = {
+ DDC_MASK_SH_LIST_DCN2(__SHIFT, 1),
+ DDC_MASK_SH_LIST_DCN2(__SHIFT, 2),
+ DDC_MASK_SH_LIST_DCN2(__SHIFT, 3),
+ DDC_MASK_SH_LIST_DCN2(__SHIFT, 4),
+ DDC_MASK_SH_LIST_DCN2(__SHIFT, 5),
+ DDC_MASK_SH_LIST_DCN2(__SHIFT, 6),
+ DDC_MASK_SH_LIST_DCN2_VGA(__SHIFT)
+};
+
+static const struct ddc_sh_mask ddc_mask[] = {
+ DDC_MASK_SH_LIST_DCN2(_MASK, 1),
+ DDC_MASK_SH_LIST_DCN2(_MASK, 2),
+ DDC_MASK_SH_LIST_DCN2(_MASK, 3),
+ DDC_MASK_SH_LIST_DCN2(_MASK, 4),
+ DDC_MASK_SH_LIST_DCN2(_MASK, 5),
+ DDC_MASK_SH_LIST_DCN2(_MASK, 6),
+ DDC_MASK_SH_LIST_DCN2_VGA(_MASK)
+};
+
+#include "../generic_regs.h"
+
+/* set field name */
+#define SF_GENERIC(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define generic_regs(id) \
+{\
+ GENERIC_REG_LIST(id)\
+}
+
+static const struct generic_registers generic_regs[] = {
+ generic_regs(A),
+ generic_regs(B),
+};
+
+static const struct generic_sh_mask generic_shift[] = {
+ GENERIC_MASK_SH_LIST(__SHIFT, A),
+ GENERIC_MASK_SH_LIST(__SHIFT, B),
+};
+
+static const struct generic_sh_mask generic_mask[] = {
+ GENERIC_MASK_SH_LIST(_MASK, A),
+ GENERIC_MASK_SH_LIST(_MASK, B),
+};
+
+static void define_generic_registers(struct hw_gpio_pin *pin, uint32_t en)
+{
+ struct hw_generic *generic = HW_GENERIC_FROM_BASE(pin);
+
+ generic->regs = &generic_regs[en];
+ generic->shifts = &generic_shift[en];
+ generic->masks = &generic_mask[en];
+ generic->base.regs = &generic_regs[en].gpio;
+}
+
+static void define_ddc_registers(
+ struct hw_gpio_pin *pin,
+ uint32_t en)
+{
+ struct hw_ddc *ddc = HW_DDC_FROM_BASE(pin);
+
+ switch (pin->id) {
+ case GPIO_ID_DDC_DATA:
+ ddc->regs = &ddc_data_regs_dcn[en];
+ ddc->base.regs = &ddc_data_regs_dcn[en].gpio;
+ break;
+ case GPIO_ID_DDC_CLOCK:
+ ddc->regs = &ddc_clk_regs_dcn[en];
+ ddc->base.regs = &ddc_clk_regs_dcn[en].gpio;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ return;
+ }
+
+ ddc->shifts = &ddc_shift[en];
+ ddc->masks = &ddc_mask[en];
+
+}
+
+static void define_hpd_registers(struct hw_gpio_pin *pin, uint32_t en)
+{
+ struct hw_hpd *hpd = HW_HPD_FROM_BASE(pin);
+
+ hpd->regs = &hpd_regs[en];
+ hpd->shifts = &hpd_shift;
+ hpd->masks = &hpd_mask;
+ hpd->base.regs = &hpd_regs[en].gpio;
+}
+
+
+/* function table */
+static const struct hw_factory_funcs funcs = {
+ .init_ddc_data = dal_hw_ddc_init,
+ .init_generic = dal_hw_generic_init,
+ .init_hpd = dal_hw_hpd_init,
+ .get_ddc_pin = dal_hw_ddc_get_pin,
+ .get_hpd_pin = dal_hw_hpd_get_pin,
+ .get_generic_pin = dal_hw_generic_get_pin,
+ .define_hpd_registers = define_hpd_registers,
+ .define_ddc_registers = define_ddc_registers,
+ .define_generic_registers = define_generic_registers
+};
+
+/*
+ * dal_hw_factory_dcn401_init
+ *
+ * @brief
+ * Initialize HW factory function pointers and pin info
+ *
+ * @param
+ * struct hw_factory *factory - [out] struct of function pointers
+ */
+void dal_hw_factory_dcn401_init(struct hw_factory *factory)
+{
+ factory->number_of_pins[GPIO_ID_DDC_DATA] = 8;
+ factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 8;
+ factory->number_of_pins[GPIO_ID_GENERIC] = 4;
+ factory->number_of_pins[GPIO_ID_HPD] = 5;
+ factory->number_of_pins[GPIO_ID_GPIO_PAD] = 28;
+ factory->number_of_pins[GPIO_ID_VIP_PAD] = 0;
+ factory->number_of_pins[GPIO_ID_SYNC] = 0;
+ factory->number_of_pins[GPIO_ID_GSL] = 0;/*add this*/
+
+ factory->funcs = &funcs;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h
new file mode 100644
index 000000000000..22e650723ee7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DAL_HW_FACTORY_DCN401_H__
+#define __DAL_HW_FACTORY_DCN401_H__
+
+/* Initialize HW factory function pointers and pin info */
+void dal_hw_factory_dcn401_init(struct hw_factory *factory);
+
+#endif /* __DAL_HW_FACTORY_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c
new file mode 100644
index 000000000000..ea416f01f888
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "hw_translate_dcn401.h"
+
+#include "dm_services.h"
+#include "include/gpio_types.h"
+#include "../hw_translate.h"
+
+#include "dcn/dcn_4_1_0_offset.h"
+#include "dcn/dcn_4_1_0_sh_mask.h"
+
+#define DCN_BASE__INST0_SEG2 0x000034C0
+/* begin *********************
+ * macros to expend register list macro defined in HW object header file */
+
+/* DCN */
+#define block HPD
+#define reg_num 0
+
+#undef BASE_INNER
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#undef REG
+#define REG(reg_name)\
+ BASE(reg ## reg_name ## _BASE_IDX) + reg ## reg_name
+#define SF_HPD(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+
+/* macros to expend register list macro defined in HW object header file
+ * end *********************/
+
+
+static bool offset_to_id(
+ uint32_t offset,
+ uint32_t mask,
+ enum gpio_id *id,
+ uint32_t *en)
+{
+ switch (offset) {
+ /* GENERIC */
+ case REG(DC_GPIO_GENERIC_A):
+ *id = GPIO_ID_GENERIC;
+ switch (mask) {
+ case DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK:
+ *en = GPIO_GENERIC_A;
+ return true;
+ case DC_GPIO_GENERIC_A__DC_GPIO_GENERICB_A_MASK:
+ *en = GPIO_GENERIC_B;
+ return true;
+ case DC_GPIO_GENERIC_A__DC_GPIO_GENERICC_A_MASK:
+ *en = GPIO_GENERIC_C;
+ return true;
+ case DC_GPIO_GENERIC_A__DC_GPIO_GENERICD_A_MASK:
+ *en = GPIO_GENERIC_D;
+ return true;
+ case DC_GPIO_GENERIC_A__DC_GPIO_GENERICE_A_MASK:
+ *en = GPIO_GENERIC_E;
+ return true;
+ case DC_GPIO_GENERIC_A__DC_GPIO_GENERICF_A_MASK:
+ *en = GPIO_GENERIC_F;
+ return true;
+ default:
+ ASSERT_CRITICAL(false);
+ return false;
+ }
+ break;
+ /* HPD */
+ case REG(DC_GPIO_HPD_A):
+ *id = GPIO_ID_HPD;
+ switch (mask) {
+ case DC_GPIO_HPD_A__DC_GPIO_HPD1_A_MASK:
+ *en = GPIO_HPD_1;
+ return true;
+ case DC_GPIO_HPD_A__DC_GPIO_HPD2_A_MASK:
+ *en = GPIO_HPD_2;
+ return true;
+ case DC_GPIO_HPD_A__DC_GPIO_HPD3_A_MASK:
+ *en = GPIO_HPD_3;
+ return true;
+ case DC_GPIO_HPD_A__DC_GPIO_HPD4_A_MASK:
+ *en = GPIO_HPD_4;
+ return true;
+ case DC_GPIO_HPD_A__DC_GPIO_HPD5_A_MASK:
+ *en = GPIO_HPD_5;
+ return true;
+ default:
+ ASSERT_CRITICAL(false);
+ return false;
+ }
+ break;
+ /* REG(DC_GPIO_GENLK_MASK */
+ case REG(DC_GPIO_GENLK_A):
+ *id = GPIO_ID_GSL;
+ switch (mask) {
+ case DC_GPIO_GENLK_A__DC_GPIO_GENLK_CLK_A_MASK:
+ *en = GPIO_GSL_GENLOCK_CLOCK;
+ return true;
+ case DC_GPIO_GENLK_A__DC_GPIO_GENLK_VSYNC_A_MASK:
+ *en = GPIO_GSL_GENLOCK_VSYNC;
+ return true;
+ case DC_GPIO_GENLK_A__DC_GPIO_SWAPLOCK_A_A_MASK:
+ *en = GPIO_GSL_SWAPLOCK_A;
+ return true;
+ case DC_GPIO_GENLK_A__DC_GPIO_SWAPLOCK_B_A_MASK:
+ *en = GPIO_GSL_SWAPLOCK_B;
+ return true;
+ default:
+ ASSERT_CRITICAL(false);
+ return false;
+ }
+ break;
+ /* DDC */
+ /* we don't care about the GPIO_ID for DDC
+ * in DdcHandle it will use GPIO_ID_DDC_DATA/GPIO_ID_DDC_CLOCK
+ * directly in the create method
+ */
+ case REG(DC_GPIO_DDC1_A):
+ *en = GPIO_DDC_LINE_DDC1;
+ return true;
+ case REG(DC_GPIO_DDC2_A):
+ *en = GPIO_DDC_LINE_DDC2;
+ return true;
+ case REG(DC_GPIO_DDC3_A):
+ *en = GPIO_DDC_LINE_DDC3;
+ return true;
+ case REG(DC_GPIO_DDC4_A):
+ *en = GPIO_DDC_LINE_DDC4;
+ return true;
+ case REG(DC_GPIO_DDCVGA_A):
+ *en = GPIO_DDC_LINE_DDC_VGA;
+ return true;
+
+/*
+ * case REG(DC_GPIO_I2CPAD_A): not exit
+ * case REG(DC_GPIO_PWRSEQ_A):
+ * case REG(DC_GPIO_PAD_STRENGTH_1):
+ * case REG(DC_GPIO_PAD_STRENGTH_2):
+ * case REG(DC_GPIO_DEBUG):
+ */
+ /* UNEXPECTED */
+ default:
+/* case REG(DC_GPIO_SYNCA_A): not exist */
+ ASSERT_CRITICAL(false);
+ return false;
+ }
+}
+
+
+static bool id_to_offset(
+ enum gpio_id id,
+ uint32_t en,
+ struct gpio_pin_info *info)
+{
+ bool result = true;
+
+ switch (id) {
+ case GPIO_ID_DDC_DATA:
+ info->mask = DC_GPIO_DDC1_A__DC_GPIO_DDC1DATA_A_MASK;
+ switch (en) {
+ case GPIO_DDC_LINE_DDC1:
+ info->offset = REG(DC_GPIO_DDC1_A);
+ break;
+ case GPIO_DDC_LINE_DDC2:
+ info->offset = REG(DC_GPIO_DDC2_A);
+ break;
+ case GPIO_DDC_LINE_DDC3:
+ info->offset = REG(DC_GPIO_DDC3_A);
+ break;
+ case GPIO_DDC_LINE_DDC4:
+ info->offset = REG(DC_GPIO_DDC4_A);
+ break;
+/* case GPIO_DDC_LINE_DDC5:
+ info->offset = REG(DC_GPIO_DDC5_A);
+ break; */
+ case GPIO_DDC_LINE_DDC_VGA:
+ info->offset = REG(DC_GPIO_DDCVGA_A);
+ break;
+ case GPIO_DDC_LINE_I2C_PAD:
+ default:
+ ASSERT_CRITICAL(false);
+ result = false;
+ }
+ break;
+ case GPIO_ID_DDC_CLOCK:
+ info->mask = DC_GPIO_DDC1_A__DC_GPIO_DDC1CLK_A_MASK;
+ switch (en) {
+ case GPIO_DDC_LINE_DDC1:
+ info->offset = REG(DC_GPIO_DDC1_A);
+ break;
+ case GPIO_DDC_LINE_DDC2:
+ info->offset = REG(DC_GPIO_DDC2_A);
+ break;
+ case GPIO_DDC_LINE_DDC3:
+ info->offset = REG(DC_GPIO_DDC3_A);
+ break;
+ case GPIO_DDC_LINE_DDC4:
+ info->offset = REG(DC_GPIO_DDC4_A);
+ break;
+/* case GPIO_DDC_LINE_DDC5:
+ info->offset = REG(DC_GPIO_DDC5_A);
+ break; */
+ case GPIO_DDC_LINE_DDC_VGA:
+ info->offset = REG(DC_GPIO_DDCVGA_A);
+ break;
+ case GPIO_DDC_LINE_I2C_PAD:
+ default:
+ ASSERT_CRITICAL(false);
+ result = false;
+ }
+ break;
+ case GPIO_ID_GENERIC:
+ info->offset = REG(DC_GPIO_GENERIC_A);
+ switch (en) {
+ case GPIO_GENERIC_A:
+ info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICA_A_MASK;
+ break;
+ case GPIO_GENERIC_B:
+ info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICB_A_MASK;
+ break;
+ case GPIO_GENERIC_C:
+ info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICC_A_MASK;
+ break;
+ case GPIO_GENERIC_D:
+ info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICD_A_MASK;
+ break;
+ case GPIO_GENERIC_E:
+ info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICE_A_MASK;
+ break;
+ case GPIO_GENERIC_F:
+ info->mask = DC_GPIO_GENERIC_A__DC_GPIO_GENERICF_A_MASK;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ result = false;
+ }
+ break;
+ case GPIO_ID_HPD:
+ info->offset = REG(DC_GPIO_HPD_A);
+ switch (en) {
+ case GPIO_HPD_1:
+ info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD1_A_MASK;
+ break;
+ case GPIO_HPD_2:
+ info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD2_A_MASK;
+ break;
+ case GPIO_HPD_3:
+ info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD3_A_MASK;
+ break;
+ case GPIO_HPD_4:
+ info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD4_A_MASK;
+ break;
+ case GPIO_HPD_5:
+ info->mask = DC_GPIO_HPD_A__DC_GPIO_HPD5_A_MASK;
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ result = false;
+ }
+ break;
+ case GPIO_ID_GSL:
+ switch (en) {
+ case GPIO_GSL_GENLOCK_CLOCK:
+ /*not implmented*/
+ ASSERT_CRITICAL(false);
+ result = false;
+ break;
+ case GPIO_GSL_GENLOCK_VSYNC:
+ /*not implmented*/
+ ASSERT_CRITICAL(false);
+ result = false;
+ break;
+ case GPIO_GSL_SWAPLOCK_A:
+ /*not implmented*/
+ ASSERT_CRITICAL(false);
+ result = false;
+ break;
+ case GPIO_GSL_SWAPLOCK_B:
+ /*not implmented*/
+ ASSERT_CRITICAL(false);
+ result = false;
+
+ break;
+ default:
+ ASSERT_CRITICAL(false);
+ result = false;
+ }
+ break;
+ case GPIO_ID_SYNC:
+ case GPIO_ID_VIP_PAD:
+ default:
+ ASSERT_CRITICAL(false);
+ result = false;
+ }
+
+ if (result) {
+ info->offset_y = info->offset + 2;
+ info->offset_en = info->offset + 1;
+ info->offset_mask = info->offset - 1;
+
+ info->mask_y = info->mask;
+ info->mask_en = info->mask;
+ info->mask_mask = info->mask;
+ }
+
+ return result;
+}
+
+
+/* function table */
+static const struct hw_translate_funcs funcs = {
+ .offset_to_id = offset_to_id,
+ .id_to_offset = id_to_offset,
+};
+
+
+/*
+ * dal_hw_translate_dcn401_init
+ *
+ * @brief
+ * Initialize Hw translate function pointers.
+ *
+ * @param
+ * struct hw_translate *tr - [out] struct of function pointers
+ *
+ */
+void dal_hw_translate_dcn401_init(struct hw_translate *tr)
+{
+ tr->funcs = &funcs;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h
new file mode 100644
index 000000000000..aadecb05bba1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_translate_dcn401.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DAL_HW_TRANSLATE_DCN401_H__
+#define __DAL_HW_TRANSLATE_DCN401_H__
+
+struct hw_translate;
+
+/* Initialize Hw translate function pointers */
+void dal_hw_translate_dcn401_init(struct hw_translate *tr);
+
+#endif /* __DAL_HW_TRANSLATE_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
index 3ede6e02c3a7..942d9f0b6df2 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
@@ -56,7 +56,7 @@ struct gpio_service *dal_gpio_service_create(
struct dc_context *ctx)
{
struct gpio_service *service;
- uint32_t index_of_id;
+ int32_t index_of_id;
service = kzalloc(sizeof(struct gpio_service), GFP_KERNEL);
@@ -112,7 +112,7 @@ struct gpio_service *dal_gpio_service_create(
return service;
failure_2:
- while (index_of_id) {
+ while (index_of_id > 0) {
--index_of_id;
kfree(service->busyness[index_of_id]);
}
@@ -128,7 +128,7 @@ struct gpio *dal_gpio_service_create_irq(
uint32_t offset,
uint32_t mask)
{
- enum gpio_id id;
+ enum gpio_id id = 0;
uint32_t en;
if (!service->translate.funcs->offset_to_id(offset, mask, &id, &en)) {
@@ -144,7 +144,7 @@ struct gpio *dal_gpio_service_create_generic_mux(
uint32_t offset,
uint32_t mask)
{
- enum gpio_id id;
+ enum gpio_id id = 0;
uint32_t en;
struct gpio *generic;
@@ -178,7 +178,7 @@ struct gpio_pin_info dal_gpio_get_generic_pin_info(
enum gpio_id id,
uint32_t en)
{
- struct gpio_pin_info pin;
+ struct gpio_pin_info pin = {0};
if (service->translate.funcs->id_to_offset) {
service->translate.funcs->id_to_offset(id, en, &pin);
@@ -239,6 +239,9 @@ static bool is_pin_busy(
enum gpio_id id,
uint32_t en)
{
+ if (id == GPIO_ID_UNKNOWN)
+ return false;
+
return service->busyness[id][en];
}
@@ -247,6 +250,9 @@ static void set_pin_busy(
enum gpio_id id,
uint32_t en)
{
+ if (id == GPIO_ID_UNKNOWN)
+ return;
+
service->busyness[id][en] = true;
}
@@ -255,6 +261,9 @@ static void set_pin_free(
enum gpio_id id,
uint32_t en)
{
+ if (id == GPIO_ID_UNKNOWN)
+ return;
+
service->busyness[id][en] = false;
}
@@ -263,7 +272,7 @@ enum gpio_result dal_gpio_service_lock(
enum gpio_id id,
uint32_t en)
{
- if (!service->busyness[id]) {
+ if (id != GPIO_ID_UNKNOWN && !service->busyness[id]) {
ASSERT_CRITICAL(false);
return GPIO_RESULT_OPEN_FAILED;
}
@@ -277,7 +286,7 @@ enum gpio_result dal_gpio_service_unlock(
enum gpio_id id,
uint32_t en)
{
- if (!service->busyness[id]) {
+ if (id != GPIO_ID_UNKNOWN && !service->busyness[id]) {
ASSERT_CRITICAL(false);
return GPIO_RESULT_OPEN_FAILED;
}
@@ -402,6 +411,20 @@ enum dc_irq_source dal_irq_get_rx_source(
}
}
+enum dc_irq_source dal_irq_get_read_request(
+ const struct gpio *irq)
+{
+ enum gpio_id id = dal_gpio_get_id(irq);
+
+ switch (id) {
+ case GPIO_ID_HPD:
+ return (enum dc_irq_source)(DC_IRQ_SOURCE_DCI2C_RR_DDC1 +
+ dal_gpio_get_enum(irq));
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+}
+
enum gpio_result dal_irq_setup_hpd_filter(
struct gpio *irq,
struct gpio_hpd_config *config)
@@ -434,7 +457,6 @@ struct gpio *dal_gpio_create_irq(
case GPIO_ID_GPIO_PAD:
break;
default:
- id = GPIO_ID_HPD;
ASSERT_CRITICAL(false);
return NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
index 525bc8881950..d9e6e70dc394 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
@@ -170,8 +170,7 @@ static enum gpio_result set_config(
return GPIO_RESULT_OK;
case GPIO_DDC_CONFIG_TYPE_POLL_FOR_CONNECT:
- if ((hw_gpio->base.en >= GPIO_DDC_LINE_DDC1) &&
- (hw_gpio->base.en <= GPIO_DDC_LINE_DDC_VGA)) {
+ if (hw_gpio->base.en <= GPIO_DDC_LINE_DDC_VGA) {
REG_UPDATE_3(ddc_setup,
DC_I2C_DDC1_ENABLE, 1,
DC_I2C_DDC1_EDID_DETECT_ENABLE, 1,
@@ -180,8 +179,7 @@ static enum gpio_result set_config(
}
break;
case GPIO_DDC_CONFIG_TYPE_POLL_FOR_DISCONNECT:
- if ((hw_gpio->base.en >= GPIO_DDC_LINE_DDC1) &&
- (hw_gpio->base.en <= GPIO_DDC_LINE_DDC_VGA)) {
+ if (hw_gpio->base.en <= GPIO_DDC_LINE_DDC_VGA) {
REG_UPDATE_3(ddc_setup,
DC_I2C_DDC1_ENABLE, 1,
DC_I2C_DDC1_EDID_DETECT_ENABLE, 1,
@@ -190,8 +188,7 @@ static enum gpio_result set_config(
}
break;
case GPIO_DDC_CONFIG_TYPE_DISABLE_POLLING:
- if ((hw_gpio->base.en >= GPIO_DDC_LINE_DDC1) &&
- (hw_gpio->base.en <= GPIO_DDC_LINE_DDC_VGA)) {
+ if (hw_gpio->base.en <= GPIO_DDC_LINE_DDC_VGA) {
REG_UPDATE_2(ddc_setup,
DC_I2C_DDC1_ENABLE, 0,
DC_I2C_DDC1_EDID_DETECT_ENABLE, 0);
@@ -231,7 +228,7 @@ void dal_hw_ddc_init(
enum gpio_id id,
uint32_t en)
{
- if ((en < GPIO_DDC_LINE_MIN) || (en > GPIO_DDC_LINE_MAX)) {
+ if (en > GPIO_DDC_LINE_MAX) {
ASSERT_CRITICAL(false);
*hw_ddc = NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index 0ceba8f57d57..8bc67ca42197 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -52,6 +52,7 @@
#include "dcn30/hw_factory_dcn30.h"
#include "dcn315/hw_factory_dcn315.h"
#include "dcn32/hw_factory_dcn32.h"
+#include "dcn401/hw_factory_dcn401.h"
bool dal_hw_factory_init(
struct hw_factory *factory,
@@ -109,8 +110,14 @@ bool dal_hw_factory_init(
return true;
case DCN_VERSION_3_2:
case DCN_VERSION_3_21:
+ case DCN_VERSION_3_5:
+ case DCN_VERSION_3_51:
+ case DCN_VERSION_3_6:
dal_hw_factory_dcn32_init(factory);
return true;
+ case DCN_VERSION_4_01:
+ dal_hw_factory_dcn401_init(factory);
+ return true;
default:
ASSERT_CRITICAL(false);
return false;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c
index f9e847e6555d..6cd50232c432 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_generic.c
@@ -106,7 +106,7 @@ void dal_hw_generic_init(
enum gpio_id id,
uint32_t en)
{
- if ((en < GPIO_DDC_LINE_MIN) || (en > GPIO_DDC_LINE_MAX)) {
+ if (en > GPIO_DDC_LINE_MAX) {
ASSERT_CRITICAL(false);
*hw_generic = NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
index 1489fdfaf0e7..01ec451004f7 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
@@ -62,7 +62,7 @@ static void dal_hw_hpd_destroy(
*ptr = NULL;
}
-static enum gpio_result get_value(
+static enum gpio_result dal_hw_hpd_get_value(
const struct hw_gpio_pin *ptr,
uint32_t *value)
{
@@ -85,7 +85,7 @@ static enum gpio_result get_value(
return dal_hw_gpio_get_value(ptr, value);
}
-static enum gpio_result set_config(
+static enum gpio_result dal_hw_hpd_set_config(
struct hw_gpio_pin *ptr,
const struct gpio_config_data *config_data)
{
@@ -104,9 +104,9 @@ static enum gpio_result set_config(
static const struct hw_gpio_pin_funcs funcs = {
.destroy = dal_hw_hpd_destroy,
.open = dal_hw_gpio_open,
- .get_value = get_value,
+ .get_value = dal_hw_hpd_get_value,
.set_value = dal_hw_gpio_set_value,
- .set_config = set_config,
+ .set_config = dal_hw_hpd_set_config,
.change_mode = dal_hw_gpio_change_mode,
.close = dal_hw_gpio_close,
};
@@ -127,7 +127,7 @@ void dal_hw_hpd_init(
enum gpio_id id,
uint32_t en)
{
- if ((en < GPIO_DDC_LINE_MIN) || (en > GPIO_DDC_LINE_MAX)) {
+ if (en > GPIO_DDC_LINE_MAX) {
ASSERT_CRITICAL(false);
*hw_hpd = NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index 23b7ddefda11..cb79a2832287 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -52,6 +52,7 @@
#include "dcn30/hw_translate_dcn30.h"
#include "dcn315/hw_translate_dcn315.h"
#include "dcn32/hw_translate_dcn32.h"
+#include "dcn401/hw_translate_dcn401.h"
/*
* This unit
@@ -110,8 +111,14 @@ bool dal_hw_translate_init(
return true;
case DCN_VERSION_3_2:
case DCN_VERSION_3_21:
+ case DCN_VERSION_3_5:
+ case DCN_VERSION_3_51:
+ case DCN_VERSION_3_6:
dal_hw_translate_dcn32_init(translate);
return true;
+ case DCN_VERSION_4_01:
+ dal_hw_translate_dcn401_init(translate);
+ return true;
default:
BREAK_TO_DEBUGGER();
return false;
diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/Makefile b/drivers/gpu/drm/amd/display/dc/hdcp/Makefile
index 4170b6eb9ec0..c1c47a6cefe1 100644
--- a/drivers/gpu/drm/amd/display/dc/hdcp/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/hdcp/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2019 Advanced Micro Devices, Inc.
+# Copyright 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
index 25ffc052d53b..73a1e6a03719 100644
--- a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c
@@ -23,14 +23,12 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "dm_helpers.h"
#include "include/hdcp_msg_types.h"
#include "include/signal_types.h"
#include "core_types.h"
-#include "link.h"
+#include "link_service.h"
#include "link_hwss.h"
#include "link/protocols/link_dpcd.h"
@@ -72,7 +70,7 @@ static const bool hdcp_cmd_is_read[HDCP_MESSAGE_ID_MAX] = {
[HDCP_MESSAGE_ID_WRITE_REPEATER_AUTH_STREAM_MANAGE] = false,
[HDCP_MESSAGE_ID_READ_REPEATER_AUTH_STREAM_READY] = true,
[HDCP_MESSAGE_ID_READ_RXSTATUS] = true,
- [HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE] = false
+ [HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE] = false,
};
static const uint8_t hdcp_i2c_offsets[HDCP_MESSAGE_ID_MAX] = {
@@ -130,13 +128,21 @@ static bool hdmi_14_process_transaction(
const uint8_t hdcp_i2c_addr_link_primary = 0x3a; /* 0x74 >> 1*/
const uint8_t hdcp_i2c_addr_link_secondary = 0x3b; /* 0x76 >> 1*/
struct i2c_command i2c_command;
- uint8_t offset = hdcp_i2c_offsets[message_info->msg_id];
+ uint8_t offset;
struct i2c_payload i2c_payloads[] = {
- { true, 0, 1, &offset },
+ { true, 0, 1, 0 },
/* actual hdcp payload, will be filled later, zeroed for now*/
{ 0 }
};
+ if (message_info->msg_id == HDCP_MESSAGE_ID_INVALID) {
+ DC_LOG_ERROR("%s: Invalid message_info msg_id - %d\n", __func__, message_info->msg_id);
+ return false;
+ }
+
+ offset = hdcp_i2c_offsets[message_info->msg_id];
+ i2c_payloads[0].data = &offset;
+
switch (message_info->link) {
case HDCP_LINK_SECONDARY:
i2c_payloads[0].address = hdcp_i2c_addr_link_secondary;
@@ -310,6 +316,11 @@ static bool dp_11_process_transaction(
struct dc_link *link,
struct hdcp_protection_message *message_info)
{
+ if (message_info->msg_id == HDCP_MESSAGE_ID_INVALID) {
+ DC_LOG_ERROR("%s: Invalid message_info msg_id - %d\n", __func__, message_info->msg_id);
+ return false;
+ }
+
return dpcd_access_helper(
link,
message_info->length,
diff --git a/drivers/gpu/drm/amd/display/dc/hpo/Makefile b/drivers/gpu/drm/amd/display/dc/hpo/Makefile
new file mode 100644
index 000000000000..7f2c9ee0dff1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hpo/Makefile
@@ -0,0 +1,50 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+#
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN30
+###############################################################################
+
+AMD_DAL_HPO_DCN30 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn30/,$(HPO_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN30)
+###############################################################################
+# DCN31
+###############################################################################
+HPO_DCN31 = dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o
+
+AMD_DAL_HPO_DCN31 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn31/,$(HPO_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN31)
+###############################################################################
+# DCN32
+###############################################################################
+HPO_DCN32 = dcn32_hpo_dp_link_encoder.o
+
+AMD_DAL_HPO_DCN32 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn32/,$(HPO_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN32)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c
index 5b7ad38f85e0..0d2ae21abbdd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c
@@ -262,7 +262,7 @@ void dcn31_hpo_dp_link_enc_set_link_test_pattern(
}
}
-static void fill_stream_allocation_row_info(
+void dcn31_fill_stream_allocation_row_info(
const struct link_mst_stream_allocation *stream_allocation,
uint32_t *src,
uint32_t *slots)
@@ -296,7 +296,7 @@ void dcn31_hpo_dp_link_enc_update_stream_allocation_table(
/* we should clean-up table each time */
if (table->stream_count >= 1) {
- fill_stream_allocation_row_info(
+ dcn31_fill_stream_allocation_row_info(
&table->stream_allocations[0],
&src,
&slots);
@@ -310,7 +310,7 @@ void dcn31_hpo_dp_link_enc_update_stream_allocation_table(
SAT_SLOT_COUNT, slots);
if (table->stream_count >= 2) {
- fill_stream_allocation_row_info(
+ dcn31_fill_stream_allocation_row_info(
&table->stream_allocations[1],
&src,
&slots);
@@ -324,7 +324,7 @@ void dcn31_hpo_dp_link_enc_update_stream_allocation_table(
SAT_SLOT_COUNT, slots);
if (table->stream_count >= 3) {
- fill_stream_allocation_row_info(
+ dcn31_fill_stream_allocation_row_info(
&table->stream_allocations[2],
&src,
&slots);
@@ -338,7 +338,7 @@ void dcn31_hpo_dp_link_enc_update_stream_allocation_table(
SAT_SLOT_COUNT, slots);
if (table->stream_count >= 4) {
- fill_stream_allocation_row_info(
+ dcn31_fill_stream_allocation_row_info(
&table->stream_allocations[3],
&src,
&slots);
@@ -377,7 +377,7 @@ void dcn31_hpo_dp_link_enc_update_stream_allocation_table(
*/
REG_WAIT(DP_DPHY_SYM32_STATUS,
SAT_UPDATE_PENDING, 0,
- 10, DP_SAT_UPDATE_MAX_RETRY);
+ 100, DP_SAT_UPDATE_MAX_RETRY);
}
void dcn31_hpo_dp_link_enc_set_throttled_vcp_size(
@@ -395,6 +395,12 @@ void dcn31_hpo_dp_link_enc_set_throttled_vcp_size(
x),
25));
+ // If y rounds up to integer, carry it over to x.
+ if (y >> 25) {
+ x += 1;
+ y = 0;
+ }
+
switch (stream_encoder_inst) {
case 0:
REG_SET_2(DP_DPHY_SYM32_VC_RATE_CNTL0, 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h
index e324e9b83136..40859660e4dc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h
@@ -104,7 +104,10 @@ struct dcn31_hpo_dp_link_encoder_registers {
uint32_t RDPCSTX_PHY_CNTL6[5];
};
-#define DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(mask_sh)\
+#define DCN3_1_HPO_DP_LINK_ENC_RDPCSTX_MASK_SH_LIST(mask_sh)\
+ SE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, mask_sh)
+
+#define DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(mask_sh)\
SE_SF(DP_LINK_ENC0_DP_LINK_ENC_CLOCK_CONTROL, DP_LINK_ENC_CLOCK_EN, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_CONTROL, DPHY_RESET, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_CONTROL, DPHY_ENABLE, mask_sh),\
@@ -126,11 +129,14 @@ struct dcn31_hpo_dp_link_encoder_registers {
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_TP_SQ_PULSE, TP_SQ_PULSE_WIDTH, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_SAT_VC0, SAT_STREAM_SOURCE, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_SAT_VC0, SAT_SLOT_COUNT, mask_sh),\
- SE_SF(RDPCSTX0_RDPCSTX_PHY_CNTL6, RDPCS_PHY_DPALT_DISABLE, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_VC_RATE_CNTL0, STREAM_VC_RATE_X, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_VC_RATE_CNTL0, STREAM_VC_RATE_Y, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_SAT_UPDATE, SAT_UPDATE, mask_sh)
+#define DCN3_1_HPO_DP_LINK_ENC_MASK_SH_LIST(mask_sh)\
+ DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(mask_sh),\
+ DCN3_1_HPO_DP_LINK_ENC_RDPCSTX_MASK_SH_LIST(mask_sh)\
+
#define DCN3_1_HPO_DP_LINK_ENC_REG_FIELD_LIST(type) \
type DP_LINK_ENC_CLOCK_EN;\
type DPHY_RESET;\
@@ -220,4 +226,10 @@ void dcn31_hpo_dp_link_enc_set_ffe(
const struct dc_link_settings *link_settings,
uint8_t ffe_preset);
+
+void dcn31_fill_stream_allocation_row_info(
+ const struct link_mst_stream_allocation *stream_allocation,
+ uint32_t *src,
+ uint32_t *slots);
+
#endif // __DAL_DCN31_HPO_LINK_ENCODER_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c
index 45143459eedd..759b453385c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c
@@ -323,7 +323,7 @@ static void dcn31_hpo_dp_stream_enc_set_stream_attribute(
break;
case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
case COLOR_SPACE_2020_RGB_FULLRANGE:
- case COLOR_SPACE_2020_YCBCR:
+ case COLOR_SPACE_2020_YCBCR_LIMITED:
case COLOR_SPACE_XR_RGB:
case COLOR_SPACE_MSREF_SCRGB:
case COLOR_SPACE_ADOBERGB:
@@ -336,6 +336,7 @@ static void dcn31_hpo_dp_stream_enc_set_stream_attribute(
case COLOR_SPACE_CUSTOMPOINTS:
case COLOR_SPACE_UNKNOWN:
case COLOR_SPACE_YCBCR709_BLACK:
+ default:
/* do nothing */
break;
}
@@ -474,6 +475,10 @@ static void dcn31_hpo_dp_stream_enc_update_dp_info_packets(
&info_frame->hdrsmd,
true);
+ /* packetIndex 4 is used for send immediate sdp message, and please
+ * use other packetIndex (such as 5,6) for other info packet
+ */
+
if (info_frame->adaptive_sync.valid)
enc->vpg->funcs->update_generic_info_packet(
enc->vpg,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h
index 82c3b3ac1f0d..82c3b3ac1f0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.c
index 8af01f579690..de3ec4fcade2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hpo_dp_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.c
@@ -41,7 +41,7 @@
#define CTX \
enc3->base.ctx
-static bool dcn32_hpo_dp_link_enc_is_in_alt_mode(
+bool dcn32_hpo_dp_link_enc_is_in_alt_mode(
struct hpo_dp_link_encoder *enc)
{
struct dcn31_hpo_dp_link_encoder *enc3 = DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(enc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hpo_dp_link_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.h
index 176b1537d2a1..bea4e1a8ff90 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hpo_dp_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn32/dcn32_hpo_dp_link_encoder.h
@@ -54,6 +54,7 @@
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_VC_RATE_CNTL0, STREAM_VC_RATE_Y, mask_sh),\
SE_SF(DP_DPHY_SYM320_DP_DPHY_SYM32_SAT_UPDATE, SAT_UPDATE, mask_sh)
+bool dcn32_hpo_dp_link_enc_is_in_alt_mode(struct hpo_dp_link_encoder *enc);
void hpo_dp_link_encoder32_construct(struct dcn31_hpo_dp_link_encoder *enc31,
struct dc_context *ctx,
uint32_t inst,
@@ -61,4 +62,7 @@ void hpo_dp_link_encoder32_construct(struct dcn31_hpo_dp_link_encoder *enc31,
const struct dcn31_hpo_dp_link_encoder_shift *hpo_le_shift,
const struct dcn31_hpo_dp_link_encoder_mask *hpo_le_mask);
+bool dcn32_hpo_dp_link_enc_is_in_alt_mode(
+ struct hpo_dp_link_encoder *enc);
+
#endif // __DAL_DCN32_HPO_DP_LINK_ENCODER_H__
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/Makefile b/drivers/gpu/drm/amd/display/dc/hubbub/Makefile
new file mode 100644
index 000000000000..66ca5a6a0415
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/Makefile
@@ -0,0 +1,104 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'hubbub' sub-component of DAL.
+#
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+HUBBUB_DCN10 = dcn10_hubbub.o
+
+AMD_DAL_HUBBUB_DCN10 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn10/,$(HUBBUB_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN10)
+
+###############################################################################
+
+HUBBUB_DCN20 = dcn20_hubbub.o
+
+AMD_DAL_HUBBUB_DCN20 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn20/,$(HUBBUB_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN20)
+
+###############################################################################
+
+HUBBUB_DCN201 = dcn201_hubbub.o
+
+AMD_DAL_HUBBUB_DCN201 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn201/,$(HUBBUB_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN201)
+
+###############################################################################
+
+HUBBUB_DCN21 = dcn21_hubbub.o
+
+AMD_DAL_HUBBUB_DCN21 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn21/,$(HUBBUB_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN21)
+
+###############################################################################
+HUBBUB_DCN30 = dcn30_hubbub.o
+
+AMD_DAL_HUBBUB_DCN30 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn30/,$(HUBBUB_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN30)
+
+###############################################################################
+HUBBUB_DCN301 = dcn301_hubbub.o
+
+AMD_DAL_HUBBUB_DCN301 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn301/,$(HUBBUB_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN301)
+
+###############################################################################
+
+HUBBUB_DCN31 = dcn31_hubbub.o
+
+AMD_DAL_HUBBUB_DCN31 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn31/,$(HUBBUB_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN31)
+
+###############################################################################
+HUBBUB_DCN32 = dcn32_hubbub.o
+
+AMD_DAL_HUBBUB_DCN32 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn32/,$(HUBBUB_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN32)
+
+###############################################################################
+
+HUBBUB_DCN35 = dcn35_hubbub.o
+
+AMD_DAL_HUBBUB_DCN35 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn35/,$(HUBBUB_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN35)
+
+###############################################################################
+HUBBUB_DCN401 = dcn401_hubbub.o
+
+AMD_DAL_HUBBUB_DCN401 = $(addprefix $(AMDDALPATH)/dc/hubbub/dcn401/,$(HUBBUB_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBBUB_DCN401)
+
+###############################################################################
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.c
index d51f1ce02874..7847c1c4927b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.c
@@ -24,7 +24,7 @@
*/
#include "dm_services.h"
-#include "dcn10_hubp.h"
+#include "dcn10/dcn10_hubp.h"
#include "dcn10_hubbub.h"
#include "reg_helper.h"
@@ -130,7 +130,7 @@ bool hubbub1_verify_allow_pstate_change_high(
static unsigned int max_sampled_pstate_wait_us; /* data collection */
static bool forced_pstate_allow; /* help with revert wa */
- unsigned int debug_data;
+ unsigned int debug_data = 0;
unsigned int i;
if (forced_pstate_allow) {
@@ -242,7 +242,7 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub)
bool hubbub1_program_urgent_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -356,7 +356,7 @@ bool hubbub1_program_urgent_watermarks(
bool hubbub1_program_stutter_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -501,7 +501,7 @@ bool hubbub1_program_stutter_watermarks(
bool hubbub1_program_pstate_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -582,7 +582,7 @@ bool hubbub1_program_pstate_watermarks(
bool hubbub1_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -679,24 +679,6 @@ void hubbub1_update_dchub(
dh_data->dchub_info_valid = false;
}
-void hubbub1_toggle_watermark_change_req(struct hubbub *hubbub)
-{
- struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
-
- uint32_t watermark_change_req;
-
- REG_GET(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL,
- DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, &watermark_change_req);
-
- if (watermark_change_req)
- watermark_change_req = 0;
- else
- watermark_change_req = 1;
-
- REG_UPDATE(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL,
- DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, watermark_change_req);
-}
-
void hubbub1_soft_reset(struct hubbub *hubbub, bool reset)
{
struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h
index e8752077571a..fa5c4c18ed59 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h
@@ -171,11 +171,36 @@ struct dcn_hubbub_registers {
uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B;
uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C;
uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D;
+ uint32_t DCHUBBUB_ARB_MALL_CNTL;
uint32_t SDPIF_REQUEST_RATE_LIMIT;
uint32_t DCHUBBUB_SDPIF_CFG0;
uint32_t DCHUBBUB_SDPIF_CFG1;
uint32_t DCHUBBUB_CLOCK_CNTL;
uint32_t DCHUBBUB_MEM_PWR_MODE_CTRL;
+ uint32_t DCHUBBUB_ARB_QOS_FORCE;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B;
+ uint32_t DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B;
+ uint32_t DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A;
+ uint32_t DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B;
+ uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A;
+ uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B;
+ uint32_t DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A;
+ uint32_t DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B;
+ uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_A;
+ uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_B;
+ uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL1;
+ uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL2;
+ uint32_t DCHUBBUB_CTRL_STATUS;
};
#define HUBBUB_REG_FIELD_LIST_DCN32(type) \
@@ -194,7 +219,13 @@ struct dcn_hubbub_registers {
type DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A;\
type DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B;\
type DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C;\
- type DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D
+ type DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D;\
+ type MALL_PREFETCH_COMPLETE;\
+ type MALL_IN_USE
+
+#define HUBBUB_REG_FIELD_LIST_DCN35(type) \
+ type DCHUBBUB_FGCG_REP_DIS;\
+ type DCHUBBUB_ARB_ALLOW_CSTATE_DEEPSLEEP_LEGACY_MODE
/* set field name */
#define HUBBUB_SF(reg_name, field_name, post_fix)\
@@ -285,7 +316,17 @@ struct dcn_hubbub_registers {
type DCN_VM_ERROR_VMID;\
type DCN_VM_ERROR_TABLE_LEVEL;\
type DCN_VM_ERROR_PIPE;\
- type DCN_VM_ERROR_INTERRUPT_STATUS
+ type DCN_VM_ERROR_INTERRUPT_STATUS;\
+ type DCHUBBUB_TIMEOUT_ERROR_STATUS;\
+ type DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD;\
+ type DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD;\
+ type DCHUBBUB_TIMEOUT_DETECTION_EN;\
+ type DCHUBBUB_TIMEOUT_TIMER_RESET;\
+ type ROB_UNDERFLOW_STATUS;\
+ type ROB_OVERFLOW_STATUS;\
+ type ROB_OVERFLOW_CLEAR;\
+ type DCHUBBUB_HW_DEBUG;\
+ type CSTATE_SWATH_CHK_GOOD_MODE
#define HUBBUB_STUTTER_REG_FIELD_LIST(type) \
type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\
@@ -297,6 +338,7 @@ struct dcn_hubbub_registers {
type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C;\
type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D
+
#define HUBBUB_HVM_REG_FIELD_LIST(type) \
type DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD;\
type DCHUBBUB_ARB_VM_ROW_URGENCY_WATERMARK_A;\
@@ -375,12 +417,36 @@ struct dcn_hubbub_registers {
type DET_MEM_PWR_LS_MODE
+#define HUBBUB_REG_FIELD_LIST_DCN4_01(type) \
+ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A;\
+ type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A;\
+ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B;\
+ type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B;\
+ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A;\
+ type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A;\
+ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B;\
+ type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B;\
+ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A;\
+ type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A;\
+ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B;\
+ type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B;\
+ type DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A;\
+ type DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B;\
+ type DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A;\
+ type DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B;\
+ type DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A;\
+ type DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B;\
+ type DCHUBBUB_ARB_FRAC_URG_BW_MALL_A;\
+ type DCHUBBUB_ARB_FRAC_URG_BW_MALL_B
+
struct dcn_hubbub_shift {
DCN_HUBBUB_REG_FIELD_LIST(uint8_t);
HUBBUB_STUTTER_REG_FIELD_LIST(uint8_t);
HUBBUB_HVM_REG_FIELD_LIST(uint8_t);
HUBBUB_RET_REG_FIELD_LIST(uint8_t);
HUBBUB_REG_FIELD_LIST_DCN32(uint8_t);
+ HUBBUB_REG_FIELD_LIST_DCN35(uint8_t);
+ HUBBUB_REG_FIELD_LIST_DCN4_01(uint8_t);
};
struct dcn_hubbub_mask {
@@ -389,6 +455,8 @@ struct dcn_hubbub_mask {
HUBBUB_HVM_REG_FIELD_LIST(uint32_t);
HUBBUB_RET_REG_FIELD_LIST(uint32_t);
HUBBUB_REG_FIELD_LIST_DCN32(uint32_t);
+ HUBBUB_REG_FIELD_LIST_DCN35(uint32_t);
+ HUBBUB_REG_FIELD_LIST_DCN4_01(uint32_t);
};
struct dc;
@@ -399,7 +467,7 @@ struct dcn10_hubbub {
const struct dcn_hubbub_shift *shifts;
const struct dcn_hubbub_mask *masks;
unsigned int debug_test_index_pstate;
- struct dcn_watermark_set watermarks;
+ union dcn_watermark_set watermarks;
};
void hubbub1_update_dchub(
@@ -413,7 +481,7 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub);
bool hubbub1_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
@@ -421,9 +489,6 @@ void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow);
bool hubbub1_is_allow_self_refresh_enabled(struct hubbub *hubub);
-void hubbub1_toggle_watermark_change_req(
- struct hubbub *hubbub);
-
void hubbub1_wm_read_state(struct hubbub *hubbub,
struct dcn_hubbub_wm *wm);
@@ -436,17 +501,17 @@ void hubbub1_construct(struct hubbub *hubbub,
bool hubbub1_program_urgent_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub1_program_stutter_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub1_program_pstate_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.c
index 6eebcb22e317..5c6f7ddafd6b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.c
@@ -339,15 +339,36 @@ static enum dcn_hubbub_page_table_block_size page_table_block_size_to_hw(unsigne
case 4096:
block_size = DCN_PAGE_TABLE_BLOCK_SIZE_4KB;
break;
- case 65536:
- block_size = DCN_PAGE_TABLE_BLOCK_SIZE_64KB;
+ case 8192:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_8KB;
+ break;
+ case 16384:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_16KB;
break;
case 32768:
block_size = DCN_PAGE_TABLE_BLOCK_SIZE_32KB;
break;
+ case 65536:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_64KB;
+ break;
+ case 131072:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_128KB;
+ break;
+ case 262144:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_256KB;
+ break;
+ case 524288:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_512KB;
+ break;
+ case 1048576:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_1024KB;
+ break;
+ case 2097152:
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_2048KB;
+ break;
default:
ASSERT(false);
- block_size = page_table_block_size;
+ block_size = DCN_PAGE_TABLE_BLOCK_SIZE_4KB;
break;
}
@@ -570,7 +591,7 @@ void hubbub2_get_dchub_ref_freq(struct hubbub *hubbub,
static bool hubbub2_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -595,7 +616,8 @@ static bool hubbub2_program_watermarks(
hubbub1->base.ctx->dc->clk_mgr->clks.p_state_change_support == false)
safe_to_lower = true;
- hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
+ if (hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0,
DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h
index 2f6146bf1d32..46d8f5c70750 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h
@@ -27,7 +27,7 @@
#define __DC_HUBBUB_DCN20_H__
#include "dcn10/dcn10_hubbub.h"
-#include "dcn20_vmid.h"
+#include "dcn20/dcn20_vmid.h"
#define TO_DCN20_HUBBUB(hubbub)\
container_of(hubbub, struct dcn20_hubbub, base)
@@ -85,7 +85,7 @@ struct dcn20_hubbub {
const struct dcn_hubbub_shift *shifts;
const struct dcn_hubbub_mask *masks;
unsigned int debug_test_index_pstate;
- struct dcn_watermark_set watermarks;
+ union dcn_watermark_set watermarks;
int num_vmid;
struct dcn20_vmid vmid[16];
unsigned int detile_buf_size;
@@ -96,6 +96,7 @@ struct dcn20_hubbub {
unsigned int det1_size;
unsigned int det2_size;
unsigned int det3_size;
+ bool allow_sdpif_rate_limit_when_cstate_req;
};
void hubbub2_construct(struct dcn20_hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.c
index 037d265431c6..63798132ed95 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.c
@@ -52,7 +52,7 @@
static bool hubbub201_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -103,5 +103,5 @@ void hubbub201_construct(struct dcn20_hubbub *hubbub,
hubbub->masks = hubbub_mask;
hubbub->debug_test_index_pstate = 0xB;
- hubbub->detile_buf_size = 164 * 1024;
+ hubbub->detile_buf_size = 164 * 1024; /* 164KB for DCN2.0 */
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.h
index 5aeca0be3e15..5aeca0be3e15 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn201/dcn201_hubbub.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c
index aeb0e0d9b70a..e4496ad203b2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c
@@ -132,15 +132,15 @@ int hubbub21_init_dchub(struct hubbub *hubbub,
// Init VMID 0 based on PA config
dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config);
}
-
- dcn21_dchvm_init(hubbub);
-
+ if (!hubbub1->base.ctx->dc->config.skip_riommu_prefetch_wa) {
+ dcn21_dchvm_init(hubbub);
+ }
return hubbub1->num_vmid;
}
bool hubbub21_program_urgent_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -334,7 +334,7 @@ bool hubbub21_program_urgent_watermarks(
bool hubbub21_program_stutter_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -487,7 +487,7 @@ bool hubbub21_program_stutter_watermarks(
bool hubbub21_program_pstate_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -573,7 +573,7 @@ bool hubbub21_program_pstate_watermarks(
bool hubbub21_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.h
index d8eb2bb7282c..ab2ce0313529 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.h
@@ -127,22 +127,22 @@ int hubbub21_init_dchub(struct hubbub *hubbub,
struct dcn_hubbub_phys_addr_config *pa_config);
bool hubbub21_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub21_program_urgent_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub21_program_stutter_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub21_program_pstate_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
index 152c9c5733f1..e7e5f6d4778e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c
@@ -95,7 +95,7 @@ int hubbub3_init_dchub_sys_ctx(struct hubbub *hubbub,
bool hubbub3_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -129,7 +129,8 @@ bool hubbub3_program_watermarks(
REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
return wm_pending;
}
@@ -339,6 +340,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
return false;
switch (dcc_control) {
+ case dcc_control__256_256:
case dcc_control__256_256_xxx:
output->grph.rgb.max_uncompressed_blk_size = 256;
output->grph.rgb.max_compressed_blk_size = 256;
@@ -346,6 +348,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
output->grph.rgb.dcc_controls.dcc_256_256_unconstrained = 1;
output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
break;
+ case dcc_control__256_128:
case dcc_control__128_128_xxx:
output->grph.rgb.max_uncompressed_blk_size = 128;
output->grph.rgb.max_compressed_blk_size = 128;
@@ -353,6 +356,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
output->grph.rgb.dcc_controls.dcc_128_128_uncontrained = 1;
output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
break;
+ case dcc_control__256_64:
case dcc_control__256_64_64:
output->grph.rgb.max_uncompressed_blk_size = 256;
output->grph.rgb.max_compressed_blk_size = 64;
@@ -436,6 +440,35 @@ void hubbub3_init_watermarks(struct hubbub *hubbub)
REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, reg);
}
+void hubbub3_get_det_sizes(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes)
+{
+ struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
+
+ REG_GET_2(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, &curr_det_sizes[0],
+ DET0_SIZE, &target_det_sizes[0]);
+
+ REG_GET_2(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, &curr_det_sizes[1],
+ DET1_SIZE, &target_det_sizes[1]);
+
+ REG_GET_2(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, &curr_det_sizes[2],
+ DET2_SIZE, &target_det_sizes[2]);
+
+ REG_GET_2(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, &curr_det_sizes[3],
+ DET3_SIZE, &target_det_sizes[3]);
+
+}
+
+uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub)
+{
+ struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
+ uint32_t compbuf_config_error = 0;
+
+ REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR,
+ &compbuf_config_error);
+
+ return compbuf_config_error;
+}
+
static const struct hubbub_funcs hubbub30_funcs = {
.update_dchub = hubbub2_update_dchub,
.init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx,
@@ -453,6 +486,8 @@ static const struct hubbub_funcs hubbub30_funcs = {
.force_pstate_change_control = hubbub3_force_pstate_change_control,
.init_watermarks = hubbub3_init_watermarks,
.hubbub_read_state = hubbub2_read_state,
+ .get_det_sizes = hubbub3_get_det_sizes,
+ .compbuf_config_error = hubbub3_compbuf_config_error,
};
void hubbub3_construct(struct dcn20_hubbub *hubbub3,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h
index 7b597908b937..49a469969d36 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h
@@ -124,7 +124,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub,
bool hubbub3_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
@@ -133,4 +133,10 @@ void hubbub3_force_pstate_change_control(struct hubbub *hubbub,
void hubbub3_init_watermarks(struct hubbub *hubbub);
+void hubbub3_get_det_sizes(struct hubbub *hubbub,
+ uint32_t *curr_det_sizes,
+ uint32_t *target_det_sizes);
+
+uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.c
index a046664e2031..c1959672df50 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.c
@@ -63,6 +63,7 @@ static const struct hubbub_funcs hubbub301_funcs = {
.verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high,
.force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes,
.force_pstate_change_control = hubbub3_force_pstate_change_control,
+ .init_watermarks = hubbub3_init_watermarks,
.hubbub_read_state = hubbub2_read_state,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.h
index b599f4475479..b599f4475479 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn301/dcn301_hubbub.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
index 1f4e0b6261ad..cdb20251a154 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c
@@ -109,6 +109,28 @@ static void dcn31_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigne
+ hubbub2->det3_size + hubbub2->compbuf_size_segments <= hubbub2->crb_size_segs);
}
+static void dcn31_wait_for_det_apply(struct hubbub *hubbub, int hubp_inst)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ switch (hubp_inst) {
+ case 0:
+ REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1000, 30);
+ break;
+ case 1:
+ REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1000, 30);
+ break;
+ case 2:
+ REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1000, 30);
+ break;
+ case 3:
+ REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1000, 30);
+ break;
+ default:
+ break;
+ }
+}
+
static void dcn31_program_compbuf_size(struct hubbub *hubbub, unsigned int compbuf_size_kb, bool safe_to_increase)
{
struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
@@ -150,7 +172,7 @@ static uint32_t convert_and_clamp(
static bool hubbub31_program_urgent_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -340,7 +362,7 @@ static bool hubbub31_program_urgent_watermarks(
static bool hubbub31_program_stutter_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -355,7 +377,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
@@ -371,7 +393,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
@@ -387,7 +409,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_A calculated =%d\n"
@@ -403,7 +425,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->a.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->a.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_A calculated =%d\n"
@@ -420,7 +442,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
@@ -436,7 +458,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
@@ -452,7 +474,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_B calculated =%d\n"
@@ -468,7 +490,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->b.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->b.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_B calculated =%d\n"
@@ -485,7 +507,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
@@ -501,7 +523,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
@@ -517,7 +539,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_C calculated =%d\n"
@@ -533,7 +555,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->c.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->c.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_C calculated =%d\n"
@@ -550,7 +572,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
@@ -566,7 +588,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_exit_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
@@ -582,7 +604,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, 0,
DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_D calculated =%d\n"
@@ -598,7 +620,7 @@ static bool hubbub31_program_stutter_watermarks(
watermarks->d.cstate_pstate.cstate_exit_z8_ns;
prog_wm_value = convert_and_clamp(
watermarks->d.cstate_pstate.cstate_exit_z8_ns,
- refclk_mhz, 0xffff);
+ refclk_mhz, 0xfffff);
REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, 0,
DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, prog_wm_value);
DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_D calculated =%d\n"
@@ -613,7 +635,7 @@ static bool hubbub31_program_stutter_watermarks(
static bool hubbub31_program_pstate_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -695,7 +717,7 @@ static bool hubbub31_program_pstate_watermarks(
static bool hubbub31_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -728,7 +750,8 @@ static bool hubbub31_program_watermarks(
REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
return wm_pending;
}
@@ -844,6 +867,7 @@ static bool hubbub31_get_dcc_compression_cap(struct hubbub *hubbub,
return false;
switch (dcc_control) {
+ case dcc_control__256_256:
case dcc_control__256_256_xxx:
output->grph.rgb.max_uncompressed_blk_size = 256;
output->grph.rgb.max_compressed_blk_size = 256;
@@ -859,12 +883,14 @@ static bool hubbub31_get_dcc_compression_cap(struct hubbub *hubbub,
output->grph.rgb.dcc_controls.dcc_256_128_128 = 1;
break;
case dcc_control__256_64_64:
+ case dcc_control__256_64:
output->grph.rgb.max_uncompressed_blk_size = 256;
output->grph.rgb.max_compressed_blk_size = 64;
output->grph.rgb.independent_64b_blks = true;
output->grph.rgb.dcc_controls.dcc_256_64_64 = 1;
break;
case dcc_control__256_128_128:
+ case dcc_control__256_128:
output->grph.rgb.max_uncompressed_blk_size = 256;
output->grph.rgb.max_compressed_blk_size = 128;
output->grph.rgb.independent_64b_blks = false;
@@ -1041,9 +1067,12 @@ static const struct hubbub_funcs hubbub31_funcs = {
.is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
.verify_allow_pstate_change_high = hubbub31_verify_allow_pstate_change_high,
.program_det_size = dcn31_program_det_size,
+ .wait_for_det_apply = dcn31_wait_for_det_apply,
.program_compbuf_size = dcn31_program_compbuf_size,
.init_crb = dcn31_init_crb,
.hubbub_read_state = hubbub2_read_state,
+ .get_det_sizes = hubbub3_get_det_sizes,
+ .compbuf_config_error = hubbub3_compbuf_config_error,
};
void hubbub31_construct(struct dcn20_hubbub *hubbub31,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.h
index 89d6208287b5..89d6208287b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
index 8bfef6d095b2..4d4ca6d77bbd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c
@@ -28,6 +28,7 @@
#include "dcn32_hubbub.h"
#include "dm_services.h"
#include "reg_helper.h"
+#include "dal_asic_id.h"
#define CTX \
@@ -72,6 +73,14 @@ static void dcn32_init_crb(struct hubbub *hubbub)
REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x47F);
}
+static void hubbub32_set_sdp_control(struct hubbub *hubbub, bool dc_control)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ REG_UPDATE(DCHUBBUB_SDPIF_CFG0,
+ SDPIF_PORT_CONTROL, dc_control);
+}
+
void hubbub32_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel)
{
struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
@@ -128,7 +137,7 @@ void dcn32_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigned int d
}
}
-static void dcn32_program_compbuf_size(struct hubbub *hubbub, unsigned int compbuf_size_kb, bool safe_to_increase)
+void dcn32_program_compbuf_size(struct hubbub *hubbub, unsigned int compbuf_size_kb, bool safe_to_increase)
{
struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
unsigned int compbuf_size_segments = (compbuf_size_kb + DCN32_CRB_SEGMENT_SIZE_KB - 1) / DCN32_CRB_SEGMENT_SIZE_KB;
@@ -167,7 +176,7 @@ static uint32_t convert_and_clamp(
bool hubbub32_program_urgent_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -357,7 +366,7 @@ bool hubbub32_program_urgent_watermarks(
bool hubbub32_program_stutter_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -503,7 +512,7 @@ bool hubbub32_program_stutter_watermarks(
bool hubbub32_program_pstate_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -656,7 +665,7 @@ bool hubbub32_program_pstate_watermarks(
bool hubbub32_program_usr_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
@@ -750,12 +759,22 @@ void hubbub32_force_usr_retraining_allow(struct hubbub *hubbub, bool allow)
static bool hubbub32_program_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
+ struct dc *dc = hubbub->ctx->dc;
bool wm_pending = false;
+ if (!safe_to_lower && dc->debug.disable_stutter_for_wm_program &&
+ (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) ||
+ ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) {
+ /* before raising watermarks, SDP control give to DF, stutter must be disabled */
+ wm_pending = true;
+ hubbub32_set_sdp_control(hubbub, false);
+ hubbub1_allow_self_refresh_control(hubbub, false);
+ }
+
if (hubbub32_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
wm_pending = true;
@@ -786,9 +805,20 @@ static bool hubbub32_program_watermarks(
REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/
- hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+ if (safe_to_lower) {
+ /* after lowering watermarks, stutter setting is restored, SDP control given to DC */
+ hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter);
- hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow);
+ if (dc->debug.disable_stutter_for_wm_program &&
+ (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) ||
+ ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) {
+ hubbub32_set_sdp_control(hubbub, true);
+ }
+ } else if (dc->debug.disable_stutter) {
+ hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter);
+ }
+
+ hubbub32_force_usr_retraining_allow(hubbub, dc->debug.force_usr_allow);
return wm_pending;
}
@@ -945,6 +975,17 @@ void hubbub32_force_wm_propagate_to_pipes(struct hubbub *hubbub)
DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value);
}
+void hubbub32_get_mall_en(struct hubbub *hubbub, unsigned int *mall_in_use)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ uint32_t prefetch_complete, mall_en;
+
+ REG_GET_2(DCHUBBUB_ARB_MALL_CNTL, MALL_IN_USE, &mall_en,
+ MALL_PREFETCH_COMPLETE, &prefetch_complete);
+
+ *mall_in_use = prefetch_complete && mall_en;
+}
+
void hubbub32_init(struct hubbub *hubbub)
{
struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
@@ -962,8 +1003,7 @@ void hubbub32_init(struct hubbub *hubbub)
ignore the "df_pre_cstate_req" from the SDP port control.
only the DCN will determine when to connect the SDP port
*/
- REG_UPDATE(DCHUBBUB_SDPIF_CFG0,
- SDPIF_PORT_CONTROL, 1);
+ hubbub32_set_sdp_control(hubbub, true);
/*Set SDP's max outstanding request to 512
must set the register back to 0 (max outstanding = 256) in zero frame buffer mode*/
REG_UPDATE(DCHUBBUB_SDPIF_CFG1,
@@ -995,7 +1035,10 @@ static const struct hubbub_funcs hubbub32_funcs = {
.init_crb = dcn32_init_crb,
.hubbub_read_state = hubbub2_read_state,
.force_usr_retraining_allow = hubbub32_force_usr_retraining_allow,
- .set_request_limit = hubbub32_set_request_limit
+ .set_request_limit = hubbub32_set_request_limit,
+ .get_mall_en = hubbub32_get_mall_en,
+ .get_det_sizes = hubbub3_get_det_sizes,
+ .compbuf_config_error = hubbub3_compbuf_config_error,
};
void hubbub32_construct(struct dcn20_hubbub *hubbub2,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.h
index ad33427192c6..bfc55dbbad1f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.h
@@ -110,31 +110,33 @@
HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DCFCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\
HUBBUB_SF(DCHUBBUB_SDPIF_CFG0, SDPIF_PORT_CONTROL, mask_sh),\
HUBBUB_SF(DCHUBBUB_SDPIF_CFG1, SDPIF_MAX_NUM_OUTSTANDING, mask_sh),\
- HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh)
+ HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_MALL_CNTL, MALL_PREFETCH_COMPLETE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_MALL_CNTL, MALL_IN_USE, mask_sh)
bool hubbub32_program_urgent_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub32_program_stutter_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub32_program_pstate_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
bool hubbub32_program_usr_watermarks(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
@@ -157,4 +159,8 @@ void hubbub32_construct(struct dcn20_hubbub *hubbub2,
void hubbub32_set_request_limit(struct hubbub *hubbub, int umc_count, int words_per_umc);
+void hubbub32_get_mall_en(struct hubbub *hubbub, unsigned int *mall_in_use);
+
+void dcn32_program_compbuf_size(struct hubbub *hubbub, unsigned int compbuf_size_kb, bool safe_to_increase);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
new file mode 100644
index 000000000000..a443722a8632
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c
@@ -0,0 +1,615 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dcn30/dcn30_hubbub.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn32/dcn32_hubbub.h"
+#include "dcn35_hubbub.h"
+#include "dm_services.h"
+#include "reg_helper.h"
+
+
+#define CTX \
+ hubbub2->base.ctx
+#define DC_LOGGER \
+ hubbub2->base.ctx->logger
+#define REG(reg)\
+ hubbub2->regs->reg
+
+#undef FN
+#define FN(reg_name, field_name) \
+ hubbub2->shifts->field_name, hubbub2->masks->field_name
+
+#define DCN35_CRB_SEGMENT_SIZE_KB 64
+
+void dcn35_init_crb(struct hubbub *hubbub)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ REG_GET(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT,
+ &hubbub2->det0_size);
+
+ REG_GET(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT,
+ &hubbub2->det1_size);
+
+ REG_GET(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT,
+ &hubbub2->det2_size);
+
+ REG_GET(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT,
+ &hubbub2->det3_size);
+
+ REG_GET(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT,
+ &hubbub2->compbuf_size_segments);
+
+ REG_SET_2(COMPBUF_RESERVED_SPACE, 0,
+ COMPBUF_RESERVED_SPACE_64B, hubbub2->pixel_chunk_size / 32,
+ COMPBUF_RESERVED_SPACE_ZS, hubbub2->pixel_chunk_size / 128);
+ REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x5FF);
+}
+
+void dcn35_program_compbuf_size(struct hubbub *hubbub, unsigned int compbuf_size_kb, bool safe_to_increase)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ unsigned int compbuf_size_segments = (compbuf_size_kb + DCN35_CRB_SEGMENT_SIZE_KB - 1) / DCN35_CRB_SEGMENT_SIZE_KB;
+
+ if (safe_to_increase || compbuf_size_segments <= hubbub2->compbuf_size_segments) {
+ if (compbuf_size_segments > hubbub2->compbuf_size_segments) {
+ REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100);
+ REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100);
+ REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100);
+ REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100);
+ }
+ /* Should never be hit, if it is we have an erroneous hw config*/
+ ASSERT(hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size
+ + hubbub2->det3_size + compbuf_size_segments <= hubbub2->crb_size_segs);
+ REG_UPDATE(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, compbuf_size_segments);
+ hubbub2->compbuf_size_segments = compbuf_size_segments;
+ ASSERT(REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, &compbuf_size_segments) && !compbuf_size_segments);
+ }
+}
+
+static uint32_t convert_and_clamp(
+ uint32_t wm_ns,
+ uint32_t refclk_mhz,
+ uint32_t clamp_value)
+{
+ uint32_t ret_val = 0;
+
+ ret_val = wm_ns * refclk_mhz;
+
+ ret_val /= 1000;
+
+ if (ret_val > clamp_value)
+ ret_val = clamp_value;
+
+ return ret_val;
+}
+
+static bool hubbub35_program_stutter_z8_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ uint32_t prog_wm_value;
+ bool wm_pending = false;
+
+ /* clock state A */
+ if (watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ > hubbub2->watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns) {
+ hubbub2->watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns =
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ < hubbub2->watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_z8_ns
+ > hubbub2->watermarks.a.cstate_pstate.cstate_exit_z8_ns) {
+ hubbub2->watermarks.a.cstate_pstate.cstate_exit_z8_ns =
+ watermarks->a.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->a.cstate_pstate.cstate_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->a.cstate_pstate.cstate_exit_z8_ns
+ < hubbub2->watermarks.a.cstate_pstate.cstate_exit_z8_ns)
+ wm_pending = true;
+
+ /* clock state B */
+
+ if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ > hubbub2->watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns) {
+ hubbub2->watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns =
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ < hubbub2->watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_z8_ns
+ > hubbub2->watermarks.b.cstate_pstate.cstate_exit_z8_ns) {
+ hubbub2->watermarks.b.cstate_pstate.cstate_exit_z8_ns =
+ watermarks->b.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->b.cstate_pstate.cstate_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->b.cstate_pstate.cstate_exit_z8_ns
+ < hubbub2->watermarks.b.cstate_pstate.cstate_exit_z8_ns)
+ wm_pending = true;
+
+ /* clock state C */
+ if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ > hubbub2->watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns) {
+ hubbub2->watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns =
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_C calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ < hubbub2->watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_z8_ns
+ > hubbub2->watermarks.c.cstate_pstate.cstate_exit_z8_ns) {
+ hubbub2->watermarks.c.cstate_pstate.cstate_exit_z8_ns =
+ watermarks->c.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_C calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->c.cstate_pstate.cstate_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->c.cstate_pstate.cstate_exit_z8_ns
+ < hubbub2->watermarks.c.cstate_pstate.cstate_exit_z8_ns)
+ wm_pending = true;
+
+ /* clock state D */
+ if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ > hubbub2->watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns) {
+ hubbub2->watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns =
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_D calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns
+ < hubbub2->watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_z8_ns
+ > hubbub2->watermarks.d.cstate_pstate.cstate_exit_z8_ns) {
+ hubbub2->watermarks.d.cstate_pstate.cstate_exit_z8_ns =
+ watermarks->d.cstate_pstate.cstate_exit_z8_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_exit_z8_ns,
+ refclk_mhz, 0xfffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_D calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->d.cstate_pstate.cstate_exit_z8_ns, prog_wm_value);
+ } else if (watermarks->d.cstate_pstate.cstate_exit_z8_ns
+ < hubbub2->watermarks.d.cstate_pstate.cstate_exit_z8_ns)
+ wm_pending = true;
+
+ return wm_pending;
+}
+
+void hubbub35_get_dchub_ref_freq(struct hubbub *hubbub,
+ unsigned int dccg_ref_freq_inKhz,
+ unsigned int *dchub_ref_freq_inKhz)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ uint32_t ref_div = 0;
+ uint32_t ref_en = 0;
+ unsigned int dc_refclk_khz = 24000;
+
+ REG_GET_2(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, &ref_div,
+ DCHUBBUB_GLOBAL_TIMER_ENABLE, &ref_en);
+
+ if (ref_en) {
+ if (ref_div == 2)
+ *dchub_ref_freq_inKhz = dc_refclk_khz / 2;
+ else
+ *dchub_ref_freq_inKhz = dc_refclk_khz;
+
+ /*
+ * The external Reference Clock may change based on the board or
+ * platform requirements and the programmable integer divide must
+ * be programmed to provide a suitable DLG RefClk frequency between
+ * a minimum of 20MHz and maximum of 50MHz
+ */
+ if (*dchub_ref_freq_inKhz < 20000 || *dchub_ref_freq_inKhz > 50000)
+ ASSERT_CRITICAL(false);
+
+ return;
+ } else {
+ *dchub_ref_freq_inKhz = dc_refclk_khz;
+ /*init sequence issue on bringup patch*/
+ REG_UPDATE_2(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 1,
+ DCHUBBUB_GLOBAL_TIMER_ENABLE, 1);
+ // HUBBUB global timer must be enabled.
+ ASSERT_CRITICAL(false);
+ return;
+ }
+}
+
+
+bool hubbub35_program_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ bool wm_pending = false;
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ if (hubbub32_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ if (hubbub32_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ if (hubbub32_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ if (hubbub32_program_usr_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ if (hubbub35_program_stutter_z8_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0,
+ DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
+ REG_UPDATE_2(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
+ DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0xFF,
+ DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, 0xA);/*hw delta*/
+ REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DCHUBBUB_ARB_MAX_QOS_COMMIT_THRESHOLD, 0xF);
+
+ if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter)
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+
+ hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow);
+
+ return wm_pending;
+}
+
+/* Copy values from WM set A to all other sets */
+void hubbub35_init_watermarks(struct hubbub *hubbub)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ uint32_t reg;
+
+ reg = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A);
+ REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, reg);
+
+}
+
+void hubbub35_wm_read_state(struct hubbub *hubbub,
+ struct dcn_hubbub_wm *wm)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ struct dcn_hubbub_wm_set *s;
+
+ memset(wm, 0, sizeof(struct dcn_hubbub_wm));
+
+ s = &wm->sets[0];
+ s->wm_set = 0;
+ REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, &s->data_urgent);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, &s->sr_enter);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit);
+
+ REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_change);
+
+ REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, &s->usr_retrain);
+
+ REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, &s->fclk_pstate_change);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, &s->sr_enter_exit_Z8);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, &s->sr_enter_Z8);
+ s = &wm->sets[1];
+ s->wm_set = 1;
+ REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, &s->data_urgent);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, &s->sr_enter);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit);
+
+ REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_change);
+
+ REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, &s->usr_retrain);
+
+ REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, &s->fclk_pstate_change);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, &s->sr_enter_exit_Z8);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, &s->sr_enter_Z8);
+
+ s = &wm->sets[2];
+ s->wm_set = 2;
+ REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, &s->data_urgent);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, &s->sr_enter);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit);
+
+ REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_change);
+
+ REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, &s->usr_retrain);
+
+ REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C, &s->fclk_pstate_change);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, &s->sr_enter_exit_Z8);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, &s->sr_enter_Z8);
+
+ s = &wm->sets[3];
+ s->wm_set = 3;
+ REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, &s->data_urgent);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, &s->sr_enter);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit);
+
+ REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_change);
+
+ REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, &s->usr_retrain);
+
+ REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D, &s->fclk_pstate_change);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, &s->sr_enter_exit_Z8);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, &s->sr_enter_Z8);
+}
+
+static void hubbub35_set_fgcg(struct dcn20_hubbub *hubbub2, bool enable)
+{
+ REG_UPDATE(DCHUBBUB_CLOCK_CNTL, DCHUBBUB_FGCG_REP_DIS, !enable);
+}
+
+void hubbub35_init(struct hubbub *hubbub)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ /*Enable clock gaters*/
+ if (hubbub->ctx->dc->debug.disable_clock_gate) {
+ /*done in hwseq*/
+ /*REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);*/
+
+ REG_UPDATE_2(DCHUBBUB_CLOCK_CNTL,
+ DISPCLK_R_DCHUBBUB_GATE_DIS, 1,
+ DCFCLK_R_DCHUBBUB_GATE_DIS, 1);
+ }
+ hubbub35_set_fgcg(hubbub2,
+ hubbub->ctx->dc->debug.enable_fine_grain_clock_gating
+ .bits.dchubbub);
+ /*
+ ignore the "df_pre_cstate_req" from the SDP port control.
+ only the DCN will determine when to connect the SDP port
+ */
+ REG_UPDATE(DCHUBBUB_SDPIF_CFG0,
+ SDPIF_PORT_CONTROL, 1);
+ /*Set SDP's max outstanding request
+ When set to 1: Max outstanding is 512
+ When set to 0: Max outstanding is 256
+ must set the register back to 0 (max outstanding = 256) in zero frame buffer mode*/
+ REG_UPDATE(DCHUBBUB_SDPIF_CFG1,
+ SDPIF_MAX_NUM_OUTSTANDING, 0);
+
+ REG_UPDATE_2(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
+ DCHUBBUB_ARB_MAX_REQ_OUTSTAND, 256,
+ DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 256);
+
+ memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate));
+}
+
+/*static void hubbub35_set_request_limit(struct hubbub *hubbub,
+ int memory_channel_count,
+ int words_per_channel)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ uint32_t request_limit = 3 * memory_channel_count * words_per_channel / 4;
+
+ ASSERT((request_limit & (~0xFFF)) == 0); //field is only 24 bits long
+ ASSERT(request_limit > 0); //field is only 24 bits long
+
+ if (request_limit > 0xFFF)
+ request_limit = 0xFFF;
+
+ if (request_limit > 0)
+ REG_UPDATE(SDPIF_REQUEST_RATE_LIMIT, SDPIF_REQUEST_RATE_LIMIT, request_limit);
+}*/
+
+static const struct hubbub_funcs hubbub35_funcs = {
+ .update_dchub = hubbub2_update_dchub,
+ .init_dchub_sys_ctx = hubbub31_init_dchub_sys_ctx,
+ .init_vm_ctx = hubbub2_init_vm_ctx,
+ .dcc_support_swizzle = hubbub3_dcc_support_swizzle,
+ .dcc_support_pixel_format = hubbub2_dcc_support_pixel_format,
+ .get_dcc_compression_cap = hubbub3_get_dcc_compression_cap,
+ .wm_read_state = hubbub35_wm_read_state,
+ .get_dchub_ref_freq = hubbub35_get_dchub_ref_freq,
+ .program_watermarks = hubbub35_program_watermarks,
+ .allow_self_refresh_control = hubbub1_allow_self_refresh_control,
+ .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
+ .verify_allow_pstate_change_high = hubbub1_verify_allow_pstate_change_high,
+ .force_wm_propagate_to_pipes = hubbub32_force_wm_propagate_to_pipes,
+ .force_pstate_change_control = hubbub3_force_pstate_change_control,
+ .init_watermarks = hubbub35_init_watermarks,
+ .program_det_size = dcn32_program_det_size,
+ .program_compbuf_size = dcn35_program_compbuf_size,
+ .init_crb = dcn35_init_crb,
+ .hubbub_read_state = hubbub2_read_state,
+ .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow,
+ .dchubbub_init = hubbub35_init,
+ .get_det_sizes = hubbub3_get_det_sizes,
+ .compbuf_config_error = hubbub3_compbuf_config_error,
+};
+
+void hubbub35_construct(struct dcn20_hubbub *hubbub2,
+ struct dc_context *ctx,
+ const struct dcn_hubbub_registers *hubbub_regs,
+ const struct dcn_hubbub_shift *hubbub_shift,
+ const struct dcn_hubbub_mask *hubbub_mask,
+ int det_size_kb,
+ int pixel_chunk_size_kb,
+ int config_return_buffer_size_kb)
+{
+ hubbub2->base.ctx = ctx;
+ hubbub2->base.funcs = &hubbub35_funcs;
+ hubbub2->regs = hubbub_regs;
+ hubbub2->shifts = hubbub_shift;
+ hubbub2->masks = hubbub_mask;
+
+ hubbub2->debug_test_index_pstate = 0xB;
+ hubbub2->detile_buf_size = det_size_kb * 1024;
+ hubbub2->pixel_chunk_size = pixel_chunk_size_kb * 1024;
+ hubbub2->crb_size_segs = config_return_buffer_size_kb / DCN35_CRB_SEGMENT_SIZE_KB; /*todo*/
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h
new file mode 100644
index 000000000000..23fecf88556c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HUBBUB_DCN35_H__
+#define __DC_HUBBUB_DCN35_H__
+
+#include "dcn32/dcn32_hubbub.h"
+
+#define HUBBUB_REG_LIST_DCN35(id)\
+ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\
+ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B),\
+ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C),\
+ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D),\
+ SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL),\
+ SR(DCHUBBUB_ARB_DRAM_STATE_CNTL),\
+ SR(DCHUBBUB_ARB_SAT_LEVEL),\
+ SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND),\
+ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+ SR(DCHUBBUB_SOFT_RESET),\
+ SR(DCHUBBUB_CRC_CTRL), \
+ SR(DCN_VM_FB_LOCATION_BASE),\
+ SR(DCN_VM_FB_LOCATION_TOP),\
+ SR(DCN_VM_FB_OFFSET),\
+ SR(DCN_VM_AGP_BOT),\
+ SR(DCN_VM_AGP_TOP),\
+ SR(DCN_VM_AGP_BASE),\
+ HUBBUB_SR_WATERMARK_REG_LIST(), \
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A),\
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B),\
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_C),\
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_D),\
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A),\
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B),\
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_C),\
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_D),\
+ SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A),\
+ SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B),\
+ SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C),\
+ SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D),\
+ SR(DCHUBBUB_DET0_CTRL),\
+ SR(DCHUBBUB_DET1_CTRL),\
+ SR(DCHUBBUB_DET2_CTRL),\
+ SR(DCHUBBUB_DET3_CTRL),\
+ SR(DCHUBBUB_COMPBUF_CTRL),\
+ SR(COMPBUF_RESERVED_SPACE),\
+ SR(DCHUBBUB_DEBUG_CTRL_0),\
+ SR(DCHUBBUB_ARB_USR_RETRAINING_CNTL),\
+ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A),\
+ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B),\
+ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C),\
+ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D),\
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A),\
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B),\
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C),\
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D),\
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A),\
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B),\
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C),\
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D),\
+ SR(DCN_VM_FAULT_ADDR_MSB),\
+ SR(DCN_VM_FAULT_ADDR_LSB),\
+ SR(DCN_VM_FAULT_CNTL),\
+ SR(DCN_VM_FAULT_STATUS),\
+ SR(SDPIF_REQUEST_RATE_LIMIT),\
+ SR(DCHUBBUB_CLOCK_CNTL),\
+ SR(DCHUBBUB_SDPIF_CFG0),\
+ SR(DCHUBBUB_SDPIF_CFG1),\
+ SR(DCHUBBUB_MEM_PWR_MODE_CTRL),\
+ SR(DCHUBBUB_ARB_HOSTVM_CNTL),\
+ SR(DCHVM_CTRL0),\
+ SR(DCHVM_MEM_CTRL),\
+ SR(DCHVM_CLK_CTRL),\
+ SR(DCHVM_RIOMMU_CTRL0),\
+ SR(DCHVM_RIOMMU_STAT0),\
+ SR(DCHUBBUB_COMPBUF_CTRL),\
+ SR(COMPBUF_RESERVED_SPACE),\
+ SR(DCHUBBUB_DEBUG_CTRL_0),\
+ SR(DCHUBBUB_CLOCK_CNTL),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D),\
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D),\
+ SR(DCHUBBUB_ARB_QOS_FORCE)
+
+
+#define HUBBUB_MASK_SH_LIST_DCN35(mask_sh)\
+ HUBBUB_MASK_SH_LIST_DCN32(mask_sh), \
+ HUBBUB_SF(DCHVM_CTRL0, HOSTVM_INIT_REQ, mask_sh),\
+ HUBBUB_SF(DCHVM_MEM_CTRL, HVM_GPUVMRET_PWR_REQ_DIS, mask_sh),\
+ HUBBUB_SF(DCHVM_MEM_CTRL, HVM_GPUVMRET_FORCE_REQ, mask_sh),\
+ HUBBUB_SF(DCHVM_MEM_CTRL, HVM_GPUVMRET_POWER_STATUS, mask_sh),\
+ HUBBUB_SF(DCHVM_CLK_CTRL, HVM_DISPCLK_R_GATE_DIS, mask_sh),\
+ HUBBUB_SF(DCHVM_CLK_CTRL, HVM_DISPCLK_G_GATE_DIS, mask_sh),\
+ HUBBUB_SF(DCHVM_CLK_CTRL, HVM_DCFCLK_R_GATE_DIS, mask_sh),\
+ HUBBUB_SF(DCHVM_CLK_CTRL, HVM_DCFCLK_G_GATE_DIS, mask_sh),\
+ HUBBUB_SF(DCHVM_CLK_CTRL, TR_REQ_REQCLKREQ_MODE, mask_sh),\
+ HUBBUB_SF(DCHVM_CLK_CTRL, TW_RSP_COMPCLKREQ_MODE, mask_sh),\
+ HUBBUB_SF(DCHVM_RIOMMU_CTRL0, HOSTVM_PREFETCH_REQ, mask_sh),\
+ HUBBUB_SF(DCHVM_RIOMMU_CTRL0, HOSTVM_POWERSTATUS, mask_sh),\
+ HUBBUB_SF(DCHVM_RIOMMU_STAT0, RIOMMU_ACTIVE, mask_sh),\
+ HUBBUB_SF(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, mask_sh),\
+ HUBBUB_SF(COMPBUF_RESERVED_SPACE, COMPBUF_RESERVED_SPACE_64B, mask_sh),\
+ HUBBUB_SF(COMPBUF_RESERVED_SPACE, COMPBUF_RESERVED_SPACE_ZS, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DCHUBBUB_FGCG_REP_DIS, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_CSTATE_DEEPSLEEP_LEGACY_MODE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_HOSTVM_CNTL, DCHUBBUB_ARB_MAX_QOS_COMMIT_THRESHOLD, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, mask_sh)
+
+void hubbub35_construct(struct dcn20_hubbub *hubbub2,
+ struct dc_context *ctx,
+ const struct dcn_hubbub_registers *hubbub_regs,
+ const struct dcn_hubbub_shift *hubbub_shift,
+ const struct dcn_hubbub_mask *hubbub_mask,
+ int det_size_kb,
+ int pixel_chunk_size_kb,
+ int config_return_buffer_size_kb);
+
+void hubbub35_wm_read_state(struct hubbub *hubbub,
+ struct dcn_hubbub_wm *wm);
+void hubbub35_get_dchub_ref_freq(struct hubbub *hubbub,
+ unsigned int dccg_ref_freq_inKhz,
+ unsigned int *dchub_ref_freq_inKhz);
+bool hubbub35_program_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+void hubbub35_init_watermarks(struct hubbub *hubbub);
+void dcn35_program_compbuf_size(struct hubbub *hubbub,
+ unsigned int compbuf_size_kb, bool safe_to_increase);
+void dcn35_init_crb(struct hubbub *hubbub);
+void hubbub35_init(struct hubbub *hubbub);
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
new file mode 100644
index 000000000000..a36273a52880
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
@@ -0,0 +1,1272 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dcn30/dcn30_hubbub.h"
+#include "dcn401_hubbub.h"
+#include "dm_services.h"
+#include "reg_helper.h"
+
+
+#define CTX \
+ hubbub2->base.ctx
+#define DC_LOGGER \
+ hubbub2->base.ctx->logger
+#define REG(reg)\
+ hubbub2->regs->reg
+
+#undef FN
+#define FN(reg_name, field_name) \
+ hubbub2->shifts->field_name, hubbub2->masks->field_name
+
+static void dcn401_init_crb(struct hubbub *hubbub)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ REG_GET(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT,
+ &hubbub2->det0_size);
+
+ REG_GET(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT,
+ &hubbub2->det1_size);
+
+ REG_GET(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT,
+ &hubbub2->det2_size);
+
+ REG_GET(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT,
+ &hubbub2->det3_size);
+
+ REG_GET(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT,
+ &hubbub2->compbuf_size_segments);
+
+ REG_SET(COMPBUF_RESERVED_SPACE, 0,
+ COMPBUF_RESERVED_SPACE_64B, hubbub2->pixel_chunk_size / 32); // 256 64Bytes
+}
+
+bool hubbub401_program_urgent_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ bool wm_pending = false;
+
+ /* Repeat for water mark set A and B */
+ /* clock state A */
+ if (safe_to_lower || watermarks->dcn4x.a.urgent > hubbub2->watermarks.dcn4x.a.urgent) {
+ hubbub2->watermarks.dcn4x.a.urgent = watermarks->dcn4x.a.urgent;
+ REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4x.a.urgent);
+ DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->dcn4x.a.urgent, watermarks->dcn4x.a.urgent);
+ } else if (watermarks->dcn4x.a.urgent < hubbub2->watermarks.dcn4x.a.urgent)
+ wm_pending = true;
+
+ /* determine the transfer time for a quantity of data for a particular requestor.*/
+ if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_flip
+ > hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) {
+ hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip = watermarks->dcn4x.a.frac_urg_bw_flip;
+ REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, 0,
+ DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4x.a.frac_urg_bw_flip);
+ } else if (watermarks->dcn4x.a.frac_urg_bw_flip
+ < hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_nom
+ > hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) {
+ hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom = watermarks->dcn4x.a.frac_urg_bw_nom;
+ REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0,
+ DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4x.a.frac_urg_bw_nom);
+ } else if (watermarks->dcn4x.a.frac_urg_bw_nom
+ < hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_mall
+ > hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) {
+ hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall = watermarks->dcn4x.a.frac_urg_bw_mall;
+ REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, 0,
+ DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4x.a.frac_urg_bw_mall);
+ } else if (watermarks->dcn4x.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) {
+ hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem = watermarks->dcn4x.a.refcyc_per_trip_to_mem;
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0,
+ DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4x.a.refcyc_per_trip_to_mem);
+ } else if (watermarks->dcn4x.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) {
+ hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem;
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, 0,
+ DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem);
+ } else if (watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem)
+ wm_pending = true;
+
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->dcn4x.b.urgent > hubbub2->watermarks.dcn4x.b.urgent) {
+ hubbub2->watermarks.dcn4x.b.urgent = watermarks->dcn4x.b.urgent;
+ REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4x.b.urgent);
+ DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->dcn4x.b.urgent, watermarks->dcn4x.b.urgent);
+ } else if (watermarks->dcn4x.b.urgent < hubbub2->watermarks.dcn4x.b.urgent)
+ wm_pending = true;
+
+ /* determine the transfer time for a quantity of data for a particular requestor.*/
+ if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_flip
+ > hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) {
+ hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip = watermarks->dcn4x.b.frac_urg_bw_flip;
+ REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, 0,
+ DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4x.b.frac_urg_bw_flip);
+ } else if (watermarks->dcn4x.b.frac_urg_bw_flip
+ < hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_nom
+ > hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) {
+ hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom = watermarks->dcn4x.b.frac_urg_bw_nom;
+ REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, 0,
+ DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4x.b.frac_urg_bw_nom);
+ } else if (watermarks->dcn4x.b.frac_urg_bw_nom
+ < hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_mall
+ > hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) {
+ hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall = watermarks->dcn4x.b.frac_urg_bw_mall;
+ REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, 0,
+ DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4x.b.frac_urg_bw_mall);
+ } else if (watermarks->dcn4x.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) {
+ hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem = watermarks->dcn4x.b.refcyc_per_trip_to_mem;
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0,
+ DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4x.b.refcyc_per_trip_to_mem);
+ } else if (watermarks->dcn4x.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) {
+ hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem;
+ REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, 0,
+ DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem);
+ } else if (watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem)
+ wm_pending = true;
+
+ return wm_pending;
+}
+
+bool hubbub401_program_stutter_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ bool wm_pending = false;
+
+ /* clock state A */
+ if (safe_to_lower || watermarks->dcn4x.a.sr_enter
+ > hubbub2->watermarks.dcn4x.a.sr_enter) {
+ hubbub2->watermarks.dcn4x.a.sr_enter =
+ watermarks->dcn4x.a.sr_enter;
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4x.a.sr_enter);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->dcn4x.a.sr_enter, watermarks->dcn4x.a.sr_enter);
+ // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4x.a.sr_enter);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4x.a.sr_enter);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4x.a.sr_enter);
+
+ } else if (watermarks->dcn4x.a.sr_enter
+ < hubbub2->watermarks.dcn4x.a.sr_enter)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.a.sr_exit
+ > hubbub2->watermarks.dcn4x.a.sr_exit) {
+ hubbub2->watermarks.dcn4x.a.sr_exit =
+ watermarks->dcn4x.a.sr_exit;
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4x.a.sr_exit);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->dcn4x.a.sr_exit, watermarks->dcn4x.a.sr_exit);
+ // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4x.a.sr_exit);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4x.a.sr_exit);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4x.a.sr_exit);
+
+ } else if (watermarks->dcn4x.a.sr_exit
+ < hubbub2->watermarks.dcn4x.a.sr_exit)
+ wm_pending = true;
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->dcn4x.b.sr_enter
+ > hubbub2->watermarks.dcn4x.b.sr_enter) {
+ hubbub2->watermarks.dcn4x.b.sr_enter =
+ watermarks->dcn4x.b.sr_enter;
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4x.b.sr_enter);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->dcn4x.b.sr_enter, watermarks->dcn4x.b.sr_enter);
+ // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4x.b.sr_enter);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4x.b.sr_enter);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4x.b.sr_enter);
+
+ } else if (watermarks->dcn4x.b.sr_enter
+ < hubbub2->watermarks.dcn4x.b.sr_enter)
+ wm_pending = true;
+
+ if (safe_to_lower || watermarks->dcn4x.b.sr_exit
+ > hubbub2->watermarks.dcn4x.b.sr_exit) {
+ hubbub2->watermarks.dcn4x.b.sr_exit =
+ watermarks->dcn4x.b.sr_exit;
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4x.b.sr_exit);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->dcn4x.b.sr_exit, watermarks->dcn4x.b.sr_exit);
+ // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4x.b.sr_exit);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4x.b.sr_exit);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4x.b.sr_exit);
+
+ } else if (watermarks->dcn4x.b.sr_exit
+ < hubbub2->watermarks.dcn4x.b.sr_exit)
+ wm_pending = true;
+
+ return wm_pending;
+}
+
+
+bool hubbub401_program_pstate_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ bool wm_pending = false;
+
+ /* Section for UCLK_PSTATE_CHANGE_WATERMARKS */
+ /* clock state A */
+ if (safe_to_lower || watermarks->dcn4x.a.uclk_pstate
+ > hubbub2->watermarks.dcn4x.a.uclk_pstate) {
+ hubbub2->watermarks.dcn4x.a.uclk_pstate =
+ watermarks->dcn4x.a.uclk_pstate;
+ REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, 0,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.uclk_pstate);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.a.uclk_pstate, watermarks->dcn4x.a.uclk_pstate);
+ } else if (watermarks->dcn4x.a.uclk_pstate
+ < hubbub2->watermarks.dcn4x.a.uclk_pstate)
+ wm_pending = true;
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->dcn4x.b.uclk_pstate
+ > hubbub2->watermarks.dcn4x.b.uclk_pstate) {
+ hubbub2->watermarks.dcn4x.b.uclk_pstate =
+ watermarks->dcn4x.b.uclk_pstate;
+ REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, 0,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.uclk_pstate);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.b.uclk_pstate, watermarks->dcn4x.b.uclk_pstate);
+ } else if (watermarks->dcn4x.b.uclk_pstate
+ < hubbub2->watermarks.dcn4x.b.uclk_pstate)
+ wm_pending = true;
+
+ /* Section for UCLK_PSTATE_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */
+ if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt
+ > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) {
+ hubbub2->watermarks.dcn4x.a.temp_read_or_ppt =
+ watermarks->dcn4x.a.temp_read_or_ppt;
+ REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, 0,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_A calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt);
+ } else if (watermarks->dcn4x.a.temp_read_or_ppt
+ < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt)
+ wm_pending = true;
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt
+ > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) {
+ hubbub2->watermarks.dcn4x.b.temp_read_or_ppt =
+ watermarks->dcn4x.b.temp_read_or_ppt;
+ REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, 0,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_B calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt);
+ } else if (watermarks->dcn4x.b.temp_read_or_ppt
+ < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt)
+ wm_pending = true;
+
+ /* Section for FCLK_PSTATE_CHANGE_WATERMARKS */
+ /* clock state A */
+ if (safe_to_lower || watermarks->dcn4x.a.fclk_pstate
+ > hubbub2->watermarks.dcn4x.a.fclk_pstate) {
+ hubbub2->watermarks.dcn4x.a.fclk_pstate =
+ watermarks->dcn4x.a.fclk_pstate;
+ REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, 0,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.fclk_pstate);
+ DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.a.fclk_pstate, watermarks->dcn4x.a.fclk_pstate);
+ } else if (watermarks->dcn4x.a.fclk_pstate
+ < hubbub2->watermarks.dcn4x.a.fclk_pstate)
+ wm_pending = true;
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->dcn4x.b.fclk_pstate
+ > hubbub2->watermarks.dcn4x.b.fclk_pstate) {
+ hubbub2->watermarks.dcn4x.b.fclk_pstate =
+ watermarks->dcn4x.b.fclk_pstate;
+ REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, 0,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.fclk_pstate);
+ DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.b.fclk_pstate, watermarks->dcn4x.b.fclk_pstate);
+ } else if (watermarks->dcn4x.b.fclk_pstate
+ < hubbub2->watermarks.dcn4x.b.fclk_pstate)
+ wm_pending = true;
+
+ /* Section for FCLK_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */
+ if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt
+ > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) {
+ hubbub2->watermarks.dcn4x.a.temp_read_or_ppt =
+ watermarks->dcn4x.a.temp_read_or_ppt;
+ REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, 0,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt);
+ DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_A calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt);
+ } else if (watermarks->dcn4x.a.temp_read_or_ppt
+ < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt)
+ wm_pending = true;
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt
+ > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) {
+ hubbub2->watermarks.dcn4x.b.temp_read_or_ppt =
+ watermarks->dcn4x.b.temp_read_or_ppt;
+ REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, 0,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt);
+ DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_B calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt);
+ } else if (watermarks->dcn4x.b.temp_read_or_ppt
+ < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt)
+ wm_pending = true;
+
+ return wm_pending;
+}
+
+
+bool hubbub401_program_usr_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ bool wm_pending = false;
+
+ /* clock state A */
+ if (safe_to_lower || watermarks->dcn4x.a.usr
+ > hubbub2->watermarks.dcn4x.a.usr) {
+ hubbub2->watermarks.dcn4x.a.usr = watermarks->dcn4x.a.usr;
+ REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, 0,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4x.a.usr);
+ DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.a.usr, watermarks->dcn4x.a.usr);
+ } else if (watermarks->dcn4x.a.usr
+ < hubbub2->watermarks.dcn4x.a.usr)
+ wm_pending = true;
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->dcn4x.b.usr
+ > hubbub2->watermarks.dcn4x.b.usr) {
+ hubbub2->watermarks.dcn4x.b.usr = watermarks->dcn4x.b.usr;
+ REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, 0,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4x.b.usr);
+ DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->dcn4x.b.usr, watermarks->dcn4x.b.usr);
+ } else if (watermarks->dcn4x.b.usr
+ < hubbub2->watermarks.dcn4x.b.usr)
+ wm_pending = true;
+
+ return wm_pending;
+}
+
+
+static bool hubbub401_program_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ bool wm_pending = false;
+
+ if (hubbub401_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ if (hubbub401_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ if (hubbub401_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ if (hubbub401_program_usr_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower))
+ wm_pending = true;
+
+ /*
+ * The DCHub arbiter has a mechanism to dynamically rate limit the DCHub request stream to the fabric.
+ * If the memory controller is fully utilized and the DCHub requestors are
+ * well ahead of their amortized schedule, then it is safe to prevent the next winner
+ * from being committed and sent to the fabric.
+ * The utilization of the memory controller is approximated by ensuring that
+ * the number of outstanding requests is greater than a threshold specified
+ * by the ARB_MIN_REQ_OUTSTANDING. To determine that the DCHub requestors are well ahead of the amortized
+ * schedule, the slack of the next winner is compared with the ARB_SAT_LEVEL in DLG RefClk cycles.
+ *
+ * TODO: Revisit request limit after figure out right number. request limit for RM isn't decided yet,
+ * set maximum value (0x1FF) to turn off it for now.
+ */
+ /*REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0,
+ DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
+ REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND,
+ DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);
+ */
+
+ hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter);
+
+ hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow);
+
+ return wm_pending;
+}
+
+/* Copy values from WM set A to all other sets */
+static void hubbub401_init_watermarks(struct hubbub *hubbub)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ uint32_t reg;
+
+ reg = REG_READ(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A);
+ REG_WRITE(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A);
+ REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A);
+ REG_WRITE(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, reg);
+ REG_WRITE(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, reg);
+ reg = REG_READ(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A);
+ REG_WRITE(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, reg);
+
+ reg = REG_READ(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A);
+ REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, reg);
+ reg = REG_READ(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A);
+ REG_WRITE(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, reg);
+}
+
+static void hubbub401_wm_read_state(struct hubbub *hubbub,
+ struct dcn_hubbub_wm *wm)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+ struct dcn_hubbub_wm_set *s;
+
+ memset(wm, 0, sizeof(struct dcn_hubbub_wm));
+
+ s = &wm->sets[0];
+ s->wm_set = 0;
+ REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, &s->data_urgent);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, &s->sr_enter);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit);
+
+ REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_change);
+
+ REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, &s->usr_retrain);
+
+ REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, &s->fclk_pstate_change);
+
+ s = &wm->sets[1];
+ s->wm_set = 1;
+ REG_GET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B,
+ DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, &s->data_urgent);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, &s->sr_enter);
+
+ REG_GET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit);
+
+ REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B,
+ DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_change);
+
+ REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B,
+ DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, &s->usr_retrain);
+
+ REG_GET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B,
+ DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, &s->fclk_pstate_change);
+}
+
+bool hubbub401_dcc_support_swizzle(
+ enum swizzle_mode_addr3_values swizzle,
+ unsigned int plane_pitch,
+ unsigned int bytes_per_element,
+ enum segment_order *segment_order_horz,
+ enum segment_order *segment_order_vert)
+{
+ bool swizzle_supported = false;
+
+ switch (swizzle) {
+ case DC_ADDR3_SW_LINEAR:
+ if ((plane_pitch * bytes_per_element) % 256 == 0)
+ swizzle_supported = true;
+ break;
+ case DC_ADDR3_SW_64KB_2D:
+ case DC_ADDR3_SW_256KB_2D:
+ swizzle_supported = true;
+ break;
+ default:
+ swizzle_supported = false;
+ break;
+ }
+
+ if (swizzle_supported) {
+ if (bytes_per_element == 1) {
+ *segment_order_horz = segment_order__contiguous;
+ *segment_order_vert = segment_order__non_contiguous;
+ return true;
+ }
+ if (bytes_per_element == 2) {
+ *segment_order_horz = segment_order__non_contiguous;
+ *segment_order_vert = segment_order__contiguous;
+ return true;
+ }
+ if (bytes_per_element == 4) {
+ *segment_order_horz = segment_order__contiguous;
+ *segment_order_vert = segment_order__non_contiguous;
+ return true;
+ }
+ if (bytes_per_element == 8) {
+ *segment_order_horz = segment_order__contiguous;
+ *segment_order_vert = segment_order__non_contiguous;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool hubbub401_dcc_support_pixel_format(
+ enum surface_pixel_format format,
+ unsigned int *plane0_bpe,
+ unsigned int *plane1_bpe)
+{
+ switch (format) {
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ *plane0_bpe = 2;
+ *plane1_bpe = 0;
+ return true;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ *plane0_bpe = 1;
+ *plane1_bpe = 2;
+ return true;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
+ case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT:
+ case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT:
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE:
+ *plane0_bpe = 4;
+ *plane1_bpe = 0;
+ return true;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ *plane0_bpe = 4;
+ *plane1_bpe = 1;
+ return true;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ *plane0_bpe = 2;
+ *plane1_bpe = 4;
+ return true;
+ case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
+ case SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102:
+ case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
+ *plane0_bpe = 4;
+ *plane1_bpe = 0;
+ return true;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+ *plane0_bpe = 8;
+ *plane1_bpe = 0;
+ return true;
+ default:
+ return false;
+ }
+}
+
+void hubbub401_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height,
+ unsigned int bytes_per_element)
+{
+ if (bytes_per_element == 1) {
+ *blk256_width = 16;
+ *blk256_height = 16;
+ } else if (bytes_per_element == 2) {
+ *blk256_width = 16;
+ *blk256_height = 8;
+ } else if (bytes_per_element == 4) {
+ *blk256_width = 8;
+ *blk256_height = 8;
+ } else if (bytes_per_element == 8) {
+ *blk256_width = 8;
+ *blk256_height = 4;
+ }
+}
+
+void hubbub401_det_request_size(
+ unsigned int detile_buf_size,
+ enum surface_pixel_format format,
+ unsigned int p0_height,
+ unsigned int p0_width,
+ unsigned int p0_bpe,
+ unsigned int p1_height,
+ unsigned int p1_width,
+ unsigned int p1_bpe,
+ bool *p0_req128_horz_wc,
+ bool *p0_req128_vert_wc,
+ bool *p1_req128_horz_wc,
+ bool *p1_req128_vert_wc)
+{
+ unsigned int blk256_height = 0;
+ unsigned int blk256_width = 0;
+ unsigned int p0_swath_bytes_horz_wc, p0_swath_bytes_vert_wc;
+ unsigned int p1_swath_bytes_horz_wc, p1_swath_bytes_vert_wc;
+
+ //For plane0
+ hubbub401_get_blk256_size(&blk256_width, &blk256_height, p0_bpe);
+
+ p0_swath_bytes_horz_wc = p0_width * blk256_height * p0_bpe;
+ p0_swath_bytes_vert_wc = p0_height * blk256_width * p0_bpe;
+
+ *p0_req128_horz_wc = (2 * p0_swath_bytes_horz_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+
+ *p0_req128_vert_wc = (2 * p0_swath_bytes_vert_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+
+ /*For dual planes needs to be considered together */
+ if (p1_bpe) {
+ hubbub401_get_blk256_size(&blk256_width, &blk256_height, p1_bpe);
+
+ p1_swath_bytes_horz_wc = p1_width * blk256_height * p1_bpe;
+ p1_swath_bytes_vert_wc = p1_height * blk256_width * p1_bpe;
+
+ switch (format) {
+ default:
+ /* No any adjustment needed*/
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ /* Packing at the ratio of 3:2 is supported before the detile buffer
+ * for YUV420 video with 10bpc (P010). Need to adjust for that.
+ */
+ p0_swath_bytes_horz_wc = (((p0_swath_bytes_horz_wc * 2) / 3 + 255) / 256) * 256;
+ p0_swath_bytes_vert_wc = (((p0_swath_bytes_vert_wc * 2) / 3 + 255) / 256) * 256;
+ p1_swath_bytes_horz_wc = (((p1_swath_bytes_horz_wc * 2) / 3 + 255) / 256) * 256;
+ p1_swath_bytes_vert_wc = (((p1_swath_bytes_vert_wc * 2) / 3 + 255) / 256) * 256;
+ break;
+ }
+
+ *p0_req128_horz_wc = *p1_req128_horz_wc = (2 * p0_swath_bytes_horz_wc +
+ 2 * p1_swath_bytes_horz_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128B request */
+
+ *p0_req128_vert_wc = *p1_req128_vert_wc = (2 * p0_swath_bytes_vert_wc +
+ 2 * p1_swath_bytes_vert_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128B request */
+
+ /* If 128B requests are true, meaning 2 full swaths of data cannot fit
+ * in de-tile buffer, check if one plane can use 256B request while
+ * the other plane is using 128B requests
+ */
+ if (*p0_req128_horz_wc) {
+ // If ratio around 1:1 between p0 and p1 try to recalulate if p0 can use 256B
+ if (p0_swath_bytes_horz_wc <= p1_swath_bytes_horz_wc + p1_swath_bytes_horz_wc / 2) {
+
+ *p0_req128_horz_wc = (2 * p0_swath_bytes_horz_wc + p1_swath_bytes_horz_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+
+ } else {
+ /* ratio about 2:1 between p0 and p1, try to recalulate if p1 can use 256B */
+ *p1_req128_horz_wc = (p0_swath_bytes_horz_wc + 2 * p1_swath_bytes_horz_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+ }
+ }
+
+ if (*p0_req128_vert_wc) {
+ // If ratio around 1:1 between p0 and p1 try to recalulate if p0 can use 256B
+ if (p0_swath_bytes_vert_wc <= p1_swath_bytes_vert_wc + p1_swath_bytes_vert_wc / 2) {
+
+ *p0_req128_vert_wc = (2 * p0_swath_bytes_vert_wc + p1_swath_bytes_vert_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+
+ } else {
+ /* ratio about 2:1 between p0 and p1, try to recalulate if p1 can use 256B */
+ *p1_req128_vert_wc = (p0_swath_bytes_vert_wc + 2 * p1_swath_bytes_vert_wc <= detile_buf_size) ?
+ false : /* full 256B request */
+ true; /* half 128b request */
+ }
+ }
+ }
+}
+bool hubbub401_get_dcc_compression_cap(struct hubbub *hubbub,
+ const struct dc_dcc_surface_param *input,
+ struct dc_surface_dcc_cap *output)
+{
+ struct dc *dc = hubbub->ctx->dc;
+ const unsigned int max_dcc_plane_width = dc->caps.dcc_plane_width_limit;
+ /* DCN4_Programming_Guide_DCHUB.docx, Section 5.11.2.2 */
+ enum dcc_control dcc_control;
+ unsigned int plane0_bpe, plane1_bpe;
+ enum segment_order segment_order_horz, segment_order_vert;
+ enum segment_order p1_segment_order_horz, p1_segment_order_vert;
+ bool req128_horz_wc, req128_vert_wc;
+ unsigned int plane0_width = 0, plane0_height = 0, plane1_width = 0, plane1_height = 0;
+ bool p1_req128_horz_wc, p1_req128_vert_wc, is_dual_plane;
+
+ memset(output, 0, sizeof(*output));
+
+ if (dc->debug.disable_dcc == DCC_DISABLE)
+ return false;
+
+ /* Conservatively disable DCC for cases where ODM4:1 may be required. */
+ if (max_dcc_plane_width != 0 &&
+ (input->surface_size.width > max_dcc_plane_width || input->plane1_size.width > max_dcc_plane_width))
+ return false;
+
+ switch (input->format) {
+ default:
+ is_dual_plane = false;
+
+ plane1_width = 0;
+ plane1_height = 0;
+
+ if (input->surface_size.width > 6144 + 16)
+ plane0_width = 6160;
+ else
+ plane0_width = input->surface_size.width;
+
+ if (input->surface_size.height > 6144 + 16)
+ plane0_height = 6160;
+ else
+ plane0_height = input->surface_size.height;
+
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ is_dual_plane = true;
+
+ if (input->surface_size.width > 7680 + 16)
+ plane0_width = 7696;
+ else
+ plane0_width = input->surface_size.width;
+
+ if (input->surface_size.height > 4320 + 16)
+ plane0_height = 4336;
+ else
+ plane0_height = input->surface_size.height;
+
+ if (input->plane1_size.width > 7680 + 16)
+ plane1_width = 7696 / 2;
+ else
+ plane1_width = input->plane1_size.width;
+
+ if (input->plane1_size.height > 4320 + 16)
+ plane1_height = 4336 / 2;
+ else
+ plane1_height = input->plane1_size.height;
+
+ break;
+
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ is_dual_plane = true;
+
+ if (input->surface_size.width > 5120 + 16)
+ plane0_width = 5136;
+ else
+ plane0_width = input->surface_size.width;
+
+ if (input->surface_size.height > 5120 + 16)
+ plane0_height = 5136;
+ else
+ plane0_height = input->surface_size.height;
+
+ if (input->plane1_size.width > 5120 + 16)
+ plane1_width = 5136;
+ else
+ plane1_width = input->plane1_size.width;
+
+ if (input->plane1_size.height > 5120 + 16)
+ plane1_height = 5136;
+ else
+ plane1_height = input->plane1_size.height;
+
+ break;
+ }
+
+ if (!hubbub->funcs->dcc_support_pixel_format_plane0_plane1(input->format,
+ &plane0_bpe, &plane1_bpe))
+ return false;
+
+ /* Find plane0 DCC Controls */
+ if (!is_dual_plane) {
+
+ if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3,
+ input->plane0_pitch, plane0_bpe,
+ &segment_order_horz, &segment_order_vert))
+ return false;
+
+ hubbub401_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, input->format,
+ plane0_height, plane0_width, plane0_bpe,
+ plane1_height, plane1_width, plane1_bpe,
+ &req128_horz_wc, &req128_vert_wc, &p1_req128_horz_wc, &p1_req128_vert_wc);
+
+ if (!req128_horz_wc && !req128_vert_wc) {
+ dcc_control = dcc_control__256_256;
+ } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+ if (!req128_horz_wc)
+ dcc_control = dcc_control__256_256;
+ else if (segment_order_horz == segment_order__contiguous)
+ dcc_control = dcc_control__256_128;
+ else
+ dcc_control = dcc_control__256_64;
+ } else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+ if (!req128_vert_wc)
+ dcc_control = dcc_control__256_256;
+ else if (segment_order_vert == segment_order__contiguous)
+ dcc_control = dcc_control__256_128;
+ else
+ dcc_control = dcc_control__256_64;
+ } else {
+ if ((req128_horz_wc &&
+ segment_order_horz == segment_order__non_contiguous) ||
+ (req128_vert_wc &&
+ segment_order_vert == segment_order__non_contiguous))
+ /* access_dir not known, must use most constraining */
+ dcc_control = dcc_control__256_64;
+ else
+ /* req128 is true for either horz and vert
+ * but segment_order is contiguous
+ */
+ dcc_control = dcc_control__256_128;
+ }
+
+ if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
+ dcc_control != dcc_control__256_256)
+ return false;
+
+ switch (dcc_control) {
+ case dcc_control__256_256:
+ output->grph.rgb.dcc_controls.dcc_256_256 = 1;
+ output->grph.rgb.dcc_controls.dcc_256_128 = 1;
+ output->grph.rgb.dcc_controls.dcc_256_64 = 1;
+ break;
+ case dcc_control__256_128:
+ output->grph.rgb.dcc_controls.dcc_256_128 = 1;
+ output->grph.rgb.dcc_controls.dcc_256_64 = 1;
+ break;
+ case dcc_control__256_64:
+ output->grph.rgb.dcc_controls.dcc_256_64 = 1;
+ break;
+ default:
+ /* Shouldn't get here */
+ ASSERT(0);
+ break;
+ }
+ } else {
+ /* For dual plane cases, need to examine both planes together */
+ if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3,
+ input->plane0_pitch, plane0_bpe,
+ &segment_order_horz, &segment_order_vert))
+ return false;
+
+ if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3,
+ input->plane1_pitch, plane1_bpe,
+ &p1_segment_order_horz, &p1_segment_order_vert))
+ return false;
+
+ hubbub401_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, input->format,
+ plane0_height, plane0_width, plane0_bpe,
+ plane1_height, plane1_width, plane1_bpe,
+ &req128_horz_wc, &req128_vert_wc, &p1_req128_horz_wc, &p1_req128_vert_wc);
+
+ /* Determine Plane 0 DCC Controls */
+ if (!req128_horz_wc && !req128_vert_wc) {
+ dcc_control = dcc_control__256_256;
+ } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+ if (!req128_horz_wc)
+ dcc_control = dcc_control__256_256;
+ else if (segment_order_horz == segment_order__contiguous)
+ dcc_control = dcc_control__256_128;
+ else
+ dcc_control = dcc_control__256_64;
+ } else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+ if (!req128_vert_wc)
+ dcc_control = dcc_control__256_256;
+ else if (segment_order_vert == segment_order__contiguous)
+ dcc_control = dcc_control__256_128;
+ else
+ dcc_control = dcc_control__256_64;
+ } else {
+ if ((req128_horz_wc &&
+ segment_order_horz == segment_order__non_contiguous) ||
+ (req128_vert_wc &&
+ segment_order_vert == segment_order__non_contiguous))
+ /* access_dir not known, must use most constraining */
+ dcc_control = dcc_control__256_64;
+ else
+ /* req128 is true for either horz and vert
+ * but segment_order is contiguous
+ */
+ dcc_control = dcc_control__256_128;
+ }
+
+ switch (dcc_control) {
+ case dcc_control__256_256:
+ output->video.luma.dcc_controls.dcc_256_256 = 1;
+ output->video.luma.dcc_controls.dcc_256_128 = 1;
+ output->video.luma.dcc_controls.dcc_256_64 = 1;
+ break;
+ case dcc_control__256_128:
+ output->video.luma.dcc_controls.dcc_256_128 = 1;
+ output->video.luma.dcc_controls.dcc_256_64 = 1;
+ break;
+ case dcc_control__256_64:
+ output->video.luma.dcc_controls.dcc_256_64 = 1;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ /* Determine Plane 1 DCC Controls */
+ if (!p1_req128_horz_wc && !p1_req128_vert_wc) {
+ dcc_control = dcc_control__256_256;
+ } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+ if (!p1_req128_horz_wc)
+ dcc_control = dcc_control__256_256;
+ else if (p1_segment_order_horz == segment_order__contiguous)
+ dcc_control = dcc_control__256_128;
+ else
+ dcc_control = dcc_control__256_64;
+ } else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+ if (!p1_req128_vert_wc)
+ dcc_control = dcc_control__256_256;
+ else if (p1_segment_order_vert == segment_order__contiguous)
+ dcc_control = dcc_control__256_128;
+ else
+ dcc_control = dcc_control__256_64;
+ } else {
+ if ((p1_req128_horz_wc &&
+ p1_segment_order_horz == segment_order__non_contiguous) ||
+ (p1_req128_vert_wc &&
+ p1_segment_order_vert == segment_order__non_contiguous))
+ /* access_dir not known, must use most constraining */
+ dcc_control = dcc_control__256_64;
+ else
+ /* req128 is true for either horz and vert
+ * but segment_order is contiguous
+ */
+ dcc_control = dcc_control__256_128;
+ }
+
+ switch (dcc_control) {
+ case dcc_control__256_256:
+ output->video.chroma.dcc_controls.dcc_256_256 = 1;
+ output->video.chroma.dcc_controls.dcc_256_128 = 1;
+ output->video.chroma.dcc_controls.dcc_256_64 = 1;
+ break;
+ case dcc_control__256_128:
+ output->video.chroma.dcc_controls.dcc_256_128 = 1;
+ output->video.chroma.dcc_controls.dcc_256_64 = 1;
+ break;
+ case dcc_control__256_64:
+ output->video.chroma.dcc_controls.dcc_256_64 = 1;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ }
+
+ output->capable = true;
+ return true;
+}
+
+static void dcn401_program_det_segments(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ switch (hubp_inst) {
+ case 0:
+ REG_UPDATE(DCHUBBUB_DET0_CTRL,
+ DET0_SIZE, det_buffer_size_seg);
+ hubbub2->det0_size = det_buffer_size_seg;
+ break;
+ case 1:
+ REG_UPDATE(DCHUBBUB_DET1_CTRL,
+ DET1_SIZE, det_buffer_size_seg);
+ hubbub2->det1_size = det_buffer_size_seg;
+ break;
+ case 2:
+ REG_UPDATE(DCHUBBUB_DET2_CTRL,
+ DET2_SIZE, det_buffer_size_seg);
+ hubbub2->det2_size = det_buffer_size_seg;
+ break;
+ case 3:
+ REG_UPDATE(DCHUBBUB_DET3_CTRL,
+ DET3_SIZE, det_buffer_size_seg);
+ hubbub2->det3_size = det_buffer_size_seg;
+ break;
+ default:
+ break;
+ }
+ if (hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size
+ + hubbub2->det3_size + hubbub2->compbuf_size_segments > hubbub2->crb_size_segs) {
+ /* This may happen during seamless transition from ODM 2:1 to ODM4:1 */
+ DC_LOG_WARNING("CRB Config Warning: DET size (%d,%d,%d,%d) + Compbuf size (%d) > CRB segments (%d)\n",
+ hubbub2->det0_size, hubbub2->det1_size, hubbub2->det2_size, hubbub2->det3_size,
+ hubbub2->compbuf_size_segments, hubbub2->crb_size_segs);
+ }
+}
+
+static void dcn401_program_compbuf_segments(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ unsigned int cur_compbuf_size_seg = 0;
+
+ if (safe_to_increase || compbuf_size_seg <= hubbub2->compbuf_size_segments) {
+ if (compbuf_size_seg > hubbub2->compbuf_size_segments) {
+ REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100);
+ REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100);
+ REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100);
+ REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100);
+ }
+ /* Should never be hit, if it is we have an erroneous hw config*/
+ ASSERT(hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size
+ + hubbub2->det3_size + compbuf_size_seg <= hubbub2->crb_size_segs);
+ REG_UPDATE(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, compbuf_size_seg);
+ hubbub2->compbuf_size_segments = compbuf_size_seg;
+
+ ASSERT(REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, &cur_compbuf_size_seg) && !cur_compbuf_size_seg);
+ }
+}
+
+static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ switch (hubp_inst) {
+ case 0:
+ REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100000); /* 1 vupdate at 10hz */
+ break;
+ case 1:
+ REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100000);
+ break;
+ case 2:
+ REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100000);
+ break;
+ case 3:
+ REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100000);
+ break;
+ default:
+ break;
+ }
+}
+
+static bool dcn401_program_arbiter(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower)
+{
+ struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+ bool wm_pending = false;
+ uint32_t temp;
+
+ /* request backpressure and outstanding return threshold (unused)*/
+ //REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, arb_regs->req_stall_threshold);
+
+ /* P-State stall threshold */
+ REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, arb_regs->pstate_stall_threshold);
+
+ if (safe_to_lower || arb_regs->allow_sdpif_rate_limit_when_cstate_req > hubbub2->allow_sdpif_rate_limit_when_cstate_req) {
+ hubbub2->allow_sdpif_rate_limit_when_cstate_req = arb_regs->allow_sdpif_rate_limit_when_cstate_req;
+
+ /* only update the required bits */
+ REG_GET(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, &temp);
+ if (hubbub2->allow_sdpif_rate_limit_when_cstate_req) {
+ temp |= (1 << 5);
+ } else {
+ temp &= ~(1 << 5);
+ }
+ REG_UPDATE(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, temp);
+ } else {
+ wm_pending = true;
+ }
+
+ return wm_pending;
+}
+
+static const struct hubbub_funcs hubbub4_01_funcs = {
+ .update_dchub = hubbub2_update_dchub,
+ .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx,
+ .init_vm_ctx = hubbub2_init_vm_ctx,
+ .dcc_support_swizzle_addr3 = hubbub401_dcc_support_swizzle,
+ .dcc_support_pixel_format_plane0_plane1 = hubbub401_dcc_support_pixel_format,
+ .get_dcc_compression_cap = hubbub401_get_dcc_compression_cap,
+ .wm_read_state = hubbub401_wm_read_state,
+ .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq,
+ .program_watermarks = hubbub401_program_watermarks,
+ .allow_self_refresh_control = hubbub1_allow_self_refresh_control,
+ .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled,
+ .verify_allow_pstate_change_high = NULL,
+ .force_wm_propagate_to_pipes = hubbub32_force_wm_propagate_to_pipes,
+ .force_pstate_change_control = hubbub3_force_pstate_change_control,
+ .init_watermarks = hubbub401_init_watermarks,
+ .init_crb = dcn401_init_crb,
+ .hubbub_read_state = hubbub2_read_state,
+ .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow,
+ .set_request_limit = hubbub32_set_request_limit,
+ .program_det_segments = dcn401_program_det_segments,
+ .program_compbuf_segments = dcn401_program_compbuf_segments,
+ .wait_for_det_update = dcn401_wait_for_det_update,
+ .program_arbiter = dcn401_program_arbiter,
+ .get_det_sizes = hubbub3_get_det_sizes,
+ .compbuf_config_error = hubbub3_compbuf_config_error,
+};
+
+void hubbub401_construct(struct dcn20_hubbub *hubbub2,
+ struct dc_context *ctx,
+ const struct dcn_hubbub_registers *hubbub_regs,
+ const struct dcn_hubbub_shift *hubbub_shift,
+ const struct dcn_hubbub_mask *hubbub_mask,
+ int det_size_kb,
+ int pixel_chunk_size_kb,
+ int config_return_buffer_size_kb)
+{
+ hubbub2->base.ctx = ctx;
+ hubbub2->base.funcs = &hubbub4_01_funcs;
+ hubbub2->regs = hubbub_regs;
+ hubbub2->shifts = hubbub_shift;
+ hubbub2->masks = hubbub_mask;
+
+ hubbub2->detile_buf_size = det_size_kb * 1024;
+ hubbub2->pixel_chunk_size = pixel_chunk_size_kb * 1024;
+ hubbub2->crb_size_segs = config_return_buffer_size_kb / DCN4_01_CRB_SEGMENT_SIZE_KB;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
new file mode 100644
index 000000000000..b1d9ea9d1c3d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HUBBUB_DCN401_H__
+#define __DC_HUBBUB_DCN401_H__
+
+#include "dcn32/dcn32_hubbub.h"
+
+#define DCN4_01_CRB_SIZE_KB 1344
+#define DCN4_01_DEFAULT_DET_SIZE 320
+#define DCN4_01_CRB_SEGMENT_SIZE_KB 64
+
+#define HUBBUB_MASK_SH_LIST_DCN4_01(mask_sh)\
+ HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_SOFT_RESET, DCHUBBUB_GLOBAL_SOFT_RESET, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_DONE_INTERRUPT_DISABLE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_VALUE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_PSTATE_CHANGE_FORCE_ENABLE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_SAT_LEVEL, DCHUBBUB_ARB_SAT_LEVEL, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MAX_REQ_OUTSTAND, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+ HUBBUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE, mask_sh), \
+ HUBBUB_SF(DCN_VM_FB_LOCATION_TOP, FB_TOP, mask_sh), \
+ HUBBUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET, mask_sh), \
+ HUBBUB_SF(DCN_VM_AGP_BOT, AGP_BOT, mask_sh), \
+ HUBBUB_SF(DCN_VM_AGP_TOP, AGP_TOP, mask_sh), \
+ HUBBUB_SF(DCN_VM_AGP_BASE, AGP_BASE, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET0_CTRL, DET0_SIZE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET1_CTRL, DET1_SIZE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET2_CTRL, DET2_SIZE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET3_CTRL, DET3_SIZE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, mask_sh),\
+ HUBBUB_SF(COMPBUF_RESERVED_SPACE, COMPBUF_RESERVED_SPACE_64B, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_ALLOW_USR_RETRAINING_FORCE_VALUE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_ALLOW_USR_RETRAINING_FORCE_ENABLE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_DO_NOT_FORCE_ALLOW_USR_RETRAINING_DURING_PSTATE_CHANGE_REQUEST, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_CNTL, DCHUBBUB_ARB_DO_NOT_FORCE_ALLOW_USR_RETRAINING_DURING_PRE_CSTATE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, mask_sh), \
+ HUBBUB_SF(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_ADDR_MSB, DCN_VM_FAULT_ADDR_MSB, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_ADDR_LSB, DCN_VM_FAULT_ADDR_LSB, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_CLEAR, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_MODE, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_INTERRUPT_ENABLE, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_RANGE_FAULT_DISABLE, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_PRQ_FAULT_DISABLE, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_STATUS, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \
+ HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh),\
+ HUBBUB_SF(SDPIF_REQUEST_RATE_LIMIT, SDPIF_REQUEST_RATE_LIMIT, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DISPCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DCFCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_SDPIF_CFG0, SDPIF_PORT_CONTROL, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_SDPIF_CFG1, SDPIF_MAX_NUM_OUTSTANDING, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_ERROR_STATUS, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_DETECTION_EN, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_UNDERFLOW_STATUS, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_STATUS, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_CLEAR, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, mask_sh),\
+ HUBBUB_SF(DCHUBBUB_CTRL_STATUS, CSTATE_SWATH_CHK_GOOD_MODE, mask_sh)
+
+bool hubbub401_program_urgent_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+
+bool hubbub401_program_stutter_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+
+bool hubbub401_program_pstate_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+
+bool hubbub401_program_usr_watermarks(
+ struct hubbub *hubbub,
+ union dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+
+bool hubbub401_dcc_support_swizzle(
+ enum swizzle_mode_addr3_values swizzle,
+ unsigned int plane_pitch,
+ unsigned int bytes_per_element,
+ enum segment_order *segment_order_horz,
+ enum segment_order *segment_order_vert);
+
+bool hubbub401_dcc_support_pixel_format(
+ enum surface_pixel_format format,
+ unsigned int *plane0_bpe,
+ unsigned int *plane1_bpe);
+
+void hubbub401_get_blk256_size(
+ unsigned int *blk256_width,
+ unsigned int *blk256_height,
+ unsigned int bytes_per_element);
+
+void hubbub401_det_request_size(
+ unsigned int detile_buf_size,
+ enum surface_pixel_format format,
+ unsigned int p0_height,
+ unsigned int p0_width,
+ unsigned int p0_bpe,
+ unsigned int p1_height,
+ unsigned int p1_width,
+ unsigned int p1_bpe,
+ bool *p0_req128_horz_wc,
+ bool *p0_req128_vert_wc,
+ bool *p1_req128_horz_wc,
+ bool *p1_req128_vert_wc);
+bool hubbub401_get_dcc_compression_cap(
+ struct hubbub *hubbub,
+ const struct dc_dcc_surface_param *input,
+ struct dc_surface_dcc_cap *output);
+
+void hubbub401_construct(struct dcn20_hubbub *hubbub2,
+ struct dc_context *ctx,
+ const struct dcn_hubbub_registers *hubbub_regs,
+ const struct dcn_hubbub_shift *hubbub_shift,
+ const struct dcn_hubbub_mask *hubbub_mask,
+ int det_size_kb,
+ int pixel_chunk_size_kb,
+ int config_return_buffer_size_kb);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/Makefile b/drivers/gpu/drm/amd/display/dc/hubp/Makefile
new file mode 100644
index 000000000000..a2d1128de7a1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubp/Makefile
@@ -0,0 +1,97 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'hubp' sub-component of DAL.
+#
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+HUBP_DCN10 = dcn10_hubp.o
+
+AMD_DAL_HUBP_DCN10 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn10/,$(HUBP_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN10)
+###############################################################################
+
+HUBP_DCN20 = dcn20_hubp.o
+
+AMD_DAL_HUBP_DCN20 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn20/,$(HUBP_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN20)
+
+###############################################################################
+
+HUBP_DCN201 = dcn201_hubp.o
+
+AMD_DAL_HUBP_DCN201 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn201/,$(HUBP_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN201)
+
+###############################################################################
+
+HUBP_DCN21 = dcn21_hubp.o
+
+AMD_DAL_HUBP_DCN21 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn21/,$(HUBP_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN21)
+
+###############################################################################
+HUBP_DCN30 = dcn30_hubp.o
+
+AMD_DAL_HUBP_DCN30 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn30/,$(HUBP_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN30)
+
+###############################################################################
+
+HUBP_DCN31 = dcn31_hubp.o
+
+AMD_DAL_HUBP_DCN31 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn31/,$(HUBP_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN31)
+
+###############################################################################
+
+HUBP_DCN32 = dcn32_hubp.o
+
+AMD_DAL_HUBP_DCN32 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn32/,$(HUBP_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN32)
+
+###############################################################################
+
+HUBP_DCN35 = dcn35_hubp.o
+
+AMD_DAL_HUBP_DCN35 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn35/,$(HUBP_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN35)
+
+###############################################################################
+
+HUBP_DCN401 = dcn401_hubp.o
+
+AMD_DAL_HUBP_DCN401 = $(addprefix $(AMDDALPATH)/dc/hubp/dcn401/,$(HUBP_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HUBP_DCN401)
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c
index bf399819ca80..9b026600b90e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c
@@ -140,7 +140,7 @@ void hubp1_vready_workaround(struct hubp *hubp,
void hubp1_program_tiling(
struct hubp *hubp,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
@@ -518,6 +518,20 @@ bool hubp1_program_surface_flip_and_addr(
return true;
}
+void hubp1_clear_tiling(struct hubp *hubp)
+{
+ struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ PRIMARY_SURFACE_DCC_IND_64B_BLK, 0,
+ SECONDARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_IND_64B_BLK, 0);
+}
+
void hubp1_dcc_control(struct hubp *hubp, bool enable,
enum hubp_ind_block_size independent_64b_blks)
{
@@ -532,10 +546,16 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable,
SECONDARY_SURFACE_DCC_IND_64B_BLK, dcc_ind_64b_blk);
}
+void hubp_reset(struct hubp *hubp)
+{
+ memset(&hubp->pos, 0, sizeof(hubp->pos));
+ memset(&hubp->att, 0, sizeof(hubp->att));
+}
+
void hubp1_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -749,7 +769,8 @@ bool hubp1_is_flip_pending(struct hubp *hubp)
if (flip_pending)
return true;
- if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part)
+ if (hubp &&
+ earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part)
return true;
return false;
@@ -1336,8 +1357,9 @@ static void hubp1_wait_pipe_read_start(struct hubp *hubp)
void hubp1_init(struct hubp *hubp)
{
- //do nothing
+ hubp_reset(hubp);
}
+
static const struct hubp_funcs dcn10_hubp_funcs = {
.hubp_program_surface_flip_and_addr =
hubp1_program_surface_flip_and_addr,
@@ -1350,6 +1372,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = {
.hubp_set_vm_context0_settings = hubp1_set_vm_context0_settings,
.set_blank = hubp1_set_blank,
.dcc_control = hubp1_dcc_control,
+ .hubp_reset = hubp_reset,
.mem_program_viewport = min_set_viewport,
.set_hubp_blank_en = hubp1_set_hubp_blank_en,
.set_cursor_attributes = hubp1_cursor_set_attributes,
@@ -1362,6 +1385,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = {
.hubp_disable_control = hubp1_disable_control,
.hubp_get_underflow_status = hubp1_get_underflow_status,
.hubp_init = hubp1_init,
+ .hubp_clear_tiling = hubp1_clear_tiling,
.dmdata_set_attributes = NULL,
.dmdata_load = NULL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h
index 09784222cc03..cf2eb9793008 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h
@@ -104,7 +104,8 @@
SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\
SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\
SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\
- SRI(HUBP_CLK_CNTL, HUBP, id)
+ SRI(HUBP_CLK_CNTL, HUBP, id),\
+ SRI(HUBPRET_READ_LINE_VALUE, HUBPRET, id)
/* Register address initialization macro for ASICs with VM */
#define HUBP_REG_LIST_DCN_VM(id)\
@@ -249,7 +250,8 @@
uint32_t CURSOR_POSITION; \
uint32_t CURSOR_HOT_SPOT; \
uint32_t CURSOR_DST_OFFSET; \
- uint32_t HUBP_CLK_CNTL
+ uint32_t HUBP_CLK_CNTL; \
+ uint32_t HUBPRET_READ_LINE_VALUE
#define HUBP_SF(reg_name, field_name, post_fix)\
.field_name = reg_name ## __ ## field_name ## post_fix
@@ -622,6 +624,8 @@
type DCN_VM_SYSTEM_APERTURE_DEFAULT_SYSTEM;\
type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB;\
type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB;\
+ type PIPE_READ_LINE;\
+ type HUBP_SEG_ALLOC_ERR_STATUS;\
/* todo: get these from GVM instead of reading registers ourselves */\
type PAGE_DIRECTORY_ENTRY_HI32;\
type PAGE_DIRECTORY_ENTRY_LO32;\
@@ -666,10 +670,30 @@ struct dcn_mi_mask {
DCN_HUBP_REG_FIELD_LIST(uint32_t);
};
+struct dcn_fl_regs_st {
+ uint32_t lut_enable;
+ uint32_t lut_done;
+ uint32_t lut_addr_mode;
+ uint32_t lut_width;
+ uint32_t lut_mpc_width;
+ uint32_t lut_tmz;
+ uint32_t lut_crossbar_sel_r;
+ uint32_t lut_crossbar_sel_g;
+ uint32_t lut_crossbar_sel_b;
+ uint32_t lut_addr_hi;
+ uint32_t lut_addr_lo;
+ uint32_t refcyc_3dlut_group;
+ uint32_t lut_fl_bias;
+ uint32_t lut_fl_scale;
+ uint32_t lut_fl_mode;
+ uint32_t lut_fl_format;
+};
+
struct dcn_hubp_state {
struct _vcs_dpi_display_dlg_regs_st dlg_attr;
struct _vcs_dpi_display_ttu_regs_st ttu_attr;
struct _vcs_dpi_display_rq_regs_st rq_regs;
+ struct dcn_fl_regs_st fl_regs;
uint32_t pixel_format;
uint32_t inuse_addr_hi;
uint32_t inuse_addr_lo;
@@ -692,6 +716,7 @@ struct dcn_hubp_state {
uint32_t primary_meta_addr_hi;
uint32_t uclk_pstate_force;
uint32_t hubp_cntl;
+ uint32_t flip_control;
};
struct dcn10_hubp {
@@ -705,7 +730,7 @@ struct dcn10_hubp {
void hubp1_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -738,13 +763,15 @@ void hubp1_program_rotation(
void hubp1_program_tiling(
struct hubp *hubp,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format);
void hubp1_dcc_control(struct hubp *hubp,
bool enable,
enum hubp_ind_block_size independent_64b_blks);
+void hubp_reset(struct hubp *hubp);
+
bool hubp1_program_surface_flip_and_addr(
struct hubp *hubp,
const struct dc_plane_address *address,
@@ -793,4 +820,6 @@ void hubp1_soft_reset(struct hubp *hubp, bool reset);
void hubp1_set_flip_int(struct hubp *hubp);
+void hubp1_clear_tiling(struct hubp *hubp);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c
index 4566bc7abf17..91259b896e03 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c
@@ -30,6 +30,8 @@
#include "reg_helper.h"
#include "basics/conversion.h"
+#define DC_LOGGER \
+ ctx->logger
#define DC_LOGGER_INIT(logger)
#define REG(reg)\
@@ -308,7 +310,7 @@ void hubp2_setup_interdependent(
*/
static void hubp2_program_tiling(
struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
REG_UPDATE_3(DCSURF_ADDR_CONFIG,
@@ -404,6 +406,20 @@ void hubp2_program_rotation(
H_MIRROR_EN, mirror);
}
+void hubp2_clear_tiling(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ PRIMARY_SURFACE_DCC_IND_64B_BLK, 0,
+ SECONDARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_IND_64B_BLK, 0);
+}
+
void hubp2_dcc_control(struct hubp *hubp, bool enable,
enum hubp_ind_block_size independent_64b_blks)
{
@@ -534,7 +550,7 @@ void hubp2_program_pixel_format(
void hubp2_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -925,7 +941,8 @@ bool hubp2_is_flip_pending(struct hubp *hubp)
if (flip_pending)
return true;
- if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part)
+ if (hubp &&
+ earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part)
return true;
return false;
@@ -1041,11 +1058,13 @@ void hubp2_cursor_set_position(
if (src_y_offset + cursor_height <= 0)
cur_en = 0; /* not visible beyond top edge*/
- if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
- hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr);
+ if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) {
+ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
+ hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr);
- REG_UPDATE(CURSOR_CONTROL,
+ REG_UPDATE(CURSOR_CONTROL,
CURSOR_ENABLE, cur_en);
+ }
REG_SET_2(CURSOR_POSITION, 0,
CURSOR_X_POSITION, pos->x,
@@ -1075,8 +1094,16 @@ void hubp2_cursor_set_position(
if (src_y_offset < 0)
src_y_offset = 0;
/* Save necessary cursor info x, y position. w, h is saved in attribute func. */
- hubp->cur_rect.x = src_x_offset + param->viewport.x;
- hubp->cur_rect.y = src_y_offset + param->viewport.y;
+ if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
+ param->rotation != ROTATION_ANGLE_0) {
+ hubp->cur_rect.x = 0;
+ hubp->cur_rect.y = 0;
+ hubp->cur_rect.w = param->stream->timing.h_addressable;
+ hubp->cur_rect.h = param->stream->timing.v_addressable;
+ } else {
+ hubp->cur_rect.x = src_x_offset + param->viewport.x;
+ hubp->cur_rect.y = src_y_offset + param->viewport.y;
+ }
}
void hubp2_clk_cntl(struct hubp *hubp, bool enable)
@@ -1321,6 +1348,12 @@ void hubp2_read_state(struct hubp *hubp)
SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height,
PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear);
+ if (REG(DCHUBP_CNTL))
+ s->hubp_cntl = REG_READ(DCHUBP_CNTL);
+
+ if (REG(DCSURF_FLIP_CONTROL))
+ s->flip_control = REG_READ(DCSURF_FLIP_CONTROL);
+
}
static void hubp2_validate_dml_output(struct hubp *hubp,
@@ -1643,6 +1676,7 @@ static struct hubp_funcs dcn20_hubp_funcs = {
.set_blank = hubp2_set_blank,
.set_blank_regs = hubp2_set_blank_regs,
.dcc_control = hubp2_dcc_control,
+ .hubp_reset = hubp_reset,
.mem_program_viewport = min_set_viewport,
.set_cursor_attributes = hubp2_cursor_set_attributes,
.set_cursor_position = hubp2_cursor_set_position,
@@ -1659,6 +1693,7 @@ static struct hubp_funcs dcn20_hubp_funcs = {
.hubp_in_blank = hubp1_in_blank,
.hubp_soft_reset = hubp1_soft_reset,
.hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_clear_tiling = hubp2_clear_tiling,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
index efa2adf4f83d..f325db555102 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
@@ -147,7 +147,7 @@
uint32_t DCN_CUR1_TTU_CNTL1;\
uint32_t VMID_SETTINGS_0
-
+/*shared with dcn3.x*/
#define DCN21_HUBP_REG_COMMON_VARIABLE_LIST \
DCN2_HUBP_REG_COMMON_VARIABLE_LIST; \
uint32_t FLIP_PARAMETERS_3;\
@@ -167,6 +167,17 @@
uint32_t DCHUBP_VMPG_CONFIG;\
uint32_t UCLK_PSTATE_FORCE
+#define DCN401_HUBP_REG_COMMON_VARIABLE_LIST \
+ DCN32_HUBP_REG_COMMON_VARIABLE_LIST;\
+ uint32_t _3DLUT_FL_BIAS_SCALE;\
+ uint32_t _3DLUT_FL_CONFIG;\
+ uint32_t HUBP_3DLUT_ADDRESS_HIGH;\
+ uint32_t HUBP_3DLUT_ADDRESS_LOW;\
+ uint32_t HUBP_3DLUT_CONTROL;\
+ uint32_t HUBP_3DLUT_DLG_PARAM;\
+ uint32_t DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE;\
+ uint32_t DCHUBP_MCACHEID_CONFIG
+
#define DCN2_HUBP_REG_FIELD_VARIABLE_LIST(type) \
DCN_HUBP_REG_FIELD_BASE_LIST(type); \
type DMDATA_ADDRESS_HIGH;\
@@ -241,16 +252,48 @@
type CURSOR_UCLK_PSTATE_FORCE_EN; \
type CURSOR_UCLK_PSTATE_FORCE_VALUE
+#define DCN401_HUBP_REG_FIELD_VARIABLE_LIST(type) \
+ DCN32_HUBP_REG_FIELD_VARIABLE_LIST(type);\
+ type MALL_PREF_CMD_TYPE; \
+ type MALL_PREF_MODE; \
+ type HUBP0_3DLUT_FL_MODE; \
+ type HUBP0_3DLUT_FL_FORMAT; \
+ type HUBP0_3DLUT_FL_SCALE; \
+ type HUBP0_3DLUT_FL_BIAS; \
+ type HUBP_3DLUT_ENABLE;\
+ type HUBP_3DLUT_DONE;\
+ type HUBP_3DLUT_ADDRESSING_MODE;\
+ type HUBP_3DLUT_WIDTH;\
+ type HUBP_3DLUT_MPC_WIDTH;\
+ type HUBP_3DLUT_TMZ;\
+ type HUBP_3DLUT_CROSSBAR_SELECT_Y_G;\
+ type HUBP_3DLUT_CROSSBAR_SELECT_CB_B;\
+ type HUBP_3DLUT_CROSSBAR_SELECT_CR_R;\
+ type HUBP_3DLUT_ADDRESS_HIGH;\
+ type HUBP_3DLUT_ADDRESS_LOW;\
+ type REFCYC_PER_3DLUT_GROUP;\
+ type VIEWPORT_MCACHE_SPLIT_COORDINATE;\
+ type VIEWPORT_MCACHE_SPLIT_COORDINATE_C;\
+ type MCACHEID_REG_READ_1H_P0;\
+ type MCACHEID_REG_READ_2H_P0;\
+ type MCACHEID_REG_READ_1H_P1;\
+ type MCACHEID_REG_READ_2H_P1;\
+ type MCACHEID_MALL_PREF_1H_P0;\
+ type MCACHEID_MALL_PREF_2H_P0;\
+ type MCACHEID_MALL_PREF_1H_P1;\
+ type MCACHEID_MALL_PREF_2H_P1;\
+ type HUBP_FGCG_REP_DIS
+
struct dcn_hubp2_registers {
- DCN32_HUBP_REG_COMMON_VARIABLE_LIST;
+ DCN401_HUBP_REG_COMMON_VARIABLE_LIST;
};
struct dcn_hubp2_shift {
- DCN32_HUBP_REG_FIELD_VARIABLE_LIST(uint8_t);
+ DCN401_HUBP_REG_FIELD_VARIABLE_LIST(uint8_t);
};
struct dcn_hubp2_mask {
- DCN32_HUBP_REG_FIELD_VARIABLE_LIST(uint32_t);
+ DCN401_HUBP_REG_FIELD_VARIABLE_LIST(uint32_t);
};
struct dcn20_hubp {
@@ -339,7 +382,7 @@ void hubp2_program_pixel_format(
void hubp2_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -366,6 +409,8 @@ void hubp2_read_state_common(struct hubp *hubp);
void hubp2_read_state(struct hubp *hubp);
+void hubp2_clear_tiling(struct hubp *hubp);
+
#endif /* __DC_MEM_INPUT_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c
index 35dd4bac242a..ec88ee424a7f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c
@@ -42,7 +42,7 @@
static void hubp201_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -77,6 +77,7 @@ static void hubp201_program_requestor(struct hubp *hubp,
MRQ_EXPANSION_MODE, rq_regs->mrq_expansion_mode,
CRQ_EXPANSION_MODE, rq_regs->crq_expansion_mode);
+ /* no need to program PTE */
REG_SET_5(DCHUBP_REQ_SIZE_CONFIG, 0,
CHUNK_SIZE, rq_regs->rq_regs_l.chunk_size,
MIN_CHUNK_SIZE, rq_regs->rq_regs_l.min_chunk_size,
@@ -99,6 +100,10 @@ static void hubp201_setup(
struct _vcs_dpi_display_rq_regs_st *rq_regs,
struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest)
{
+ /*
+ * otg is locked when this func is called. Register are double buffered.
+ * disable the requestors is not needed
+ */
hubp2_vready_at_or_After_vsync(hubp, pipe_dest);
hubp201_program_requestor(hubp, rq_regs);
hubp201_program_deadline(hubp, dlg_attr, ttu_attr);
@@ -116,6 +121,7 @@ static struct hubp_funcs dcn201_hubp_funcs = {
.set_cursor_position = hubp1_cursor_set_position,
.set_blank = hubp1_set_blank,
.dcc_control = hubp1_dcc_control,
+ .hubp_reset = hubp_reset,
.mem_program_viewport = min_set_viewport,
.hubp_clk_cntl = hubp1_clk_cntl,
.hubp_vtg_sel = hubp1_vtg_sel,
@@ -126,6 +132,7 @@ static struct hubp_funcs dcn201_hubp_funcs = {
.hubp_clear_underflow = hubp1_clear_underflow,
.hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
.hubp_init = hubp1_init,
+ .hubp_clear_tiling = hubp1_clear_tiling,
};
bool dcn201_hubp_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.h
index a1e3384eed63..a1e3384eed63 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c
index f976fac8dc3f..e2740482e1cf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c
@@ -31,6 +31,8 @@
#include "dc_dmub_srv.h"
+#define DC_LOGGER \
+ ctx->logger
#define DC_LOGGER_INIT(logger)
#define REG(reg)\
@@ -689,7 +691,7 @@ static void dmcub_PLAT_54186_wa(struct hubp *hubp,
cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid;
PERF_TRACE(); // TODO: remove after performance is stable.
- dm_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(hubp->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
PERF_TRACE(); // TODO: remove after performance is stable.
}
@@ -809,6 +811,8 @@ static void hubp21_init(struct hubp *hubp)
struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp);
//hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1;
REG_WRITE(HUBPREQ_DEBUG, 1 << 26);
+
+ hubp_reset(hubp);
}
static struct hubp_funcs dcn21_hubp_funcs = {
.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
@@ -821,6 +825,7 @@ static struct hubp_funcs dcn21_hubp_funcs = {
.hubp_set_vm_system_aperture_settings = hubp21_set_vm_system_aperture_settings,
.set_blank = hubp1_set_blank,
.dcc_control = hubp1_dcc_control,
+ .hubp_reset = hubp_reset,
.mem_program_viewport = hubp21_set_viewport,
.set_cursor_attributes = hubp2_cursor_set_attributes,
.set_cursor_position = hubp1_cursor_set_position,
@@ -835,6 +840,7 @@ static struct hubp_funcs dcn21_hubp_funcs = {
.hubp_init = hubp21_init,
.validate_dml_output = hubp21_validate_dml_output,
.hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_clear_tiling = hubp1_clear_tiling,
};
bool hubp21_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.h
index 9873b6cbc5ba..9873b6cbc5ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
index 2861d974fcf6..556214b2227d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
@@ -316,9 +316,9 @@ bool hubp3_program_surface_flip_and_addr(
return true;
}
-static void hubp3_program_tiling(
+void hubp3_program_tiling(
struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
REG_UPDATE_4(DCSURF_ADDR_CONFIG,
@@ -334,6 +334,22 @@ static void hubp3_program_tiling(
}
+void hubp3_clear_tiling(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_6(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ PRIMARY_SURFACE_DCC_IND_BLK, 0,
+ PRIMARY_SURFACE_DCC_IND_BLK_C, 0,
+ SECONDARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_IND_BLK, 0,
+ SECONDARY_SURFACE_DCC_IND_BLK_C, 0);
+}
+
void hubp3_dcc_control(struct hubp *hubp, bool enable,
enum hubp_ind_block_size blk_size)
{
@@ -395,7 +411,7 @@ void hubp3_dmdata_set_attributes(
void hubp3_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -455,6 +471,9 @@ void hubp3_read_state(struct hubp *hubp)
if (REG(DCHUBP_CNTL))
s->hubp_cntl = REG_READ(DCHUBP_CNTL);
+ if (REG(DCSURF_FLIP_CONTROL))
+ s->flip_control = REG_READ(DCSURF_FLIP_CONTROL);
+
}
void hubp3_setup(
@@ -480,6 +499,34 @@ void hubp3_init(struct hubp *hubp)
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
//hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1;
REG_WRITE(HUBPREQ_DEBUG, 1 << 26);
+
+ REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0);
+
+ hubp_reset(hubp);
+}
+
+uint32_t hubp3_get_current_read_line(struct hubp *hubp)
+{
+ uint32_t read_line = 0;
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_GET(HUBPRET_READ_LINE_VALUE,
+ PIPE_READ_LINE,
+ &read_line);
+
+ return read_line;
+}
+
+unsigned int hubp3_get_underflow_status(struct hubp *hubp)
+{
+ uint32_t hubp_underflow = 0;
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_GET(DCHUBP_CNTL,
+ HUBP_UNDERFLOW_STATUS,
+ &hubp_underflow);
+
+ return hubp_underflow;
}
static struct hubp_funcs dcn30_hubp_funcs = {
@@ -494,6 +541,7 @@ static struct hubp_funcs dcn30_hubp_funcs = {
.set_blank = hubp2_set_blank,
.set_blank_regs = hubp2_set_blank_regs,
.dcc_control = hubp3_dcc_control,
+ .hubp_reset = hubp_reset,
.mem_program_viewport = min_set_viewport,
.set_cursor_attributes = hubp2_cursor_set_attributes,
.set_cursor_position = hubp2_cursor_set_position,
@@ -509,6 +557,9 @@ static struct hubp_funcs dcn30_hubp_funcs = {
.hubp_in_blank = hubp1_in_blank,
.hubp_soft_reset = hubp1_soft_reset,
.hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_clear_tiling = hubp3_clear_tiling,
+ .hubp_get_underflow_status = hubp3_get_underflow_status,
+ .hubp_get_current_read_line = hubp3_get_current_read_line,
};
bool hubp3_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h
index 8a32772d4e91..842f4eb72cc8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h
@@ -243,7 +243,8 @@
HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\
- HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh)
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh)
bool hubp3_construct(
struct dcn20_hubp *hubp2,
@@ -264,7 +265,7 @@ bool hubp3_program_surface_flip_and_addr(
void hubp3_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -278,6 +279,11 @@ void hubp3_setup(
struct _vcs_dpi_display_rq_regs_st *rq_regs,
struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest);
+void hubp3_program_tiling(
+ struct dcn20_hubp *hubp2,
+ const struct dc_tiling_info *info,
+ const enum surface_pixel_format pixel_format);
+
void hubp3_dcc_control(struct hubp *hubp, bool enable,
enum hubp_ind_block_size blk_size);
@@ -292,6 +298,13 @@ void hubp3_read_state(struct hubp *hubp);
void hubp3_init(struct hubp *hubp);
+void hubp3_clear_tiling(struct hubp *hubp);
+
+uint32_t hubp3_get_current_read_line(struct hubp *hubp);
+
+uint32_t hubp3_get_underflow_status(struct hubp *hubp);
+
+
#endif /* __DC_HUBP_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c
index 39a57bcd7866..47101847c2b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c
@@ -44,7 +44,7 @@ void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable)
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_UPDATE(DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, enable);
- REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, enable);
+ REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, 1);
}
void hubp31_soft_reset(struct hubp *hubp, bool reset)
@@ -62,6 +62,24 @@ static void hubp31_program_extended_blank(struct hubp *hubp,
REG_UPDATE(BLANK_OFFSET_1, MIN_DST_Y_NEXT_START, min_dst_y_next_start_optimized);
}
+void hubp31_program_extended_blank_value(
+ struct hubp *hubp, unsigned int min_dst_y_next_start_optimized)
+{
+ hubp31_program_extended_blank(hubp, min_dst_y_next_start_optimized);
+}
+
+uint32_t hubp31_get_det_config_error(struct hubp *hubp)
+{
+ uint32_t config_error = 0;
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_GET(DCHUBP_CNTL,
+ HUBP_SEG_ALLOC_ERR_STATUS,
+ &config_error);
+
+ return config_error;
+}
+
static struct hubp_funcs dcn31_hubp_funcs = {
.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
.hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
@@ -73,6 +91,7 @@ static struct hubp_funcs dcn31_hubp_funcs = {
.hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings,
.set_blank = hubp2_set_blank,
.dcc_control = hubp3_dcc_control,
+ .hubp_reset = hubp_reset,
.mem_program_viewport = min_set_viewport,
.set_cursor_attributes = hubp2_cursor_set_attributes,
.set_cursor_position = hubp2_cursor_set_position,
@@ -90,6 +109,10 @@ static struct hubp_funcs dcn31_hubp_funcs = {
.hubp_set_flip_int = hubp1_set_flip_int,
.hubp_in_blank = hubp1_in_blank,
.program_extended_blank = hubp31_program_extended_blank,
+ .hubp_clear_tiling = hubp3_clear_tiling,
+ .hubp_get_underflow_status = hubp3_get_underflow_status,
+ .hubp_get_current_read_line = hubp3_get_current_read_line,
+ .hubp_get_det_config_error = hubp31_get_det_config_error,
};
bool hubp31_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h
index c31a7b8f81ee..5952c4671507 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h
@@ -228,7 +228,9 @@
HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\
HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\
- HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh)
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SEG_ALLOC_ERR_STATUS, mask_sh)
bool hubp31_construct(
@@ -243,4 +245,9 @@ void hubp31_soft_reset(struct hubp *hubp, bool reset);
void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable);
+void hubp31_program_extended_blank_value(
+ struct hubp *hubp, unsigned int min_dst_y_next_start_optimized);
+
+uint32_t hubp31_get_det_config_error(struct hubp *hubp);
+
#endif /* __DC_HUBP_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
index ca5b4b28a664..a5f23bb2a76a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
@@ -168,6 +168,8 @@ void hubp32_init(struct hubp *hubp)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8);
+
+ REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0);
}
static struct hubp_funcs dcn32_hubp_funcs = {
.hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
@@ -181,6 +183,7 @@ static struct hubp_funcs dcn32_hubp_funcs = {
.set_blank = hubp2_set_blank,
.set_blank_regs = hubp2_set_blank_regs,
.dcc_control = hubp3_dcc_control,
+ .hubp_reset = hubp_reset,
.mem_program_viewport = min_set_viewport,
.set_cursor_attributes = hubp32_cursor_set_attributes,
.set_cursor_position = hubp2_cursor_set_position,
@@ -201,7 +204,11 @@ static struct hubp_funcs dcn32_hubp_funcs = {
.hubp_update_force_cursor_pstate_disallow = hubp32_update_force_cursor_pstate_disallow,
.phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable,
.hubp_update_mall_sel = hubp32_update_mall_sel,
- .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering
+ .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering,
+ .hubp_clear_tiling = hubp3_clear_tiling,
+ .hubp_get_underflow_status = hubp3_get_underflow_status,
+ .hubp_get_current_read_line = hubp3_get_current_read_line,
+ .hubp_get_det_config_error = hubp31_get_det_config_error,
};
bool hubp32_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.h
index d2acbc129609..d2acbc129609 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.h
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c
new file mode 100644
index 000000000000..b140808f21af
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn35_hubp.h"
+#include "reg_helper.h"
+
+#define REG(reg)\
+ hubp2->hubp_regs->reg
+
+#define CTX \
+ hubp2->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ ((const struct dcn35_hubp2_shift *)hubp2->hubp_shift)->field_name, \
+ ((const struct dcn35_hubp2_mask *)hubp2->hubp_mask)->field_name
+
+void hubp35_set_fgcg(struct hubp *hubp, bool enable)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBP_CLK_CNTL, HUBP_FGCG_REP_DIS, !enable);
+}
+
+void hubp35_init(struct hubp *hubp)
+{
+ hubp3_init(hubp);
+
+ hubp35_set_fgcg(hubp, hubp->ctx->dc->debug.enable_fine_grain_clock_gating.bits.dchub);
+
+ /*do nothing for now for dcn3.5 or later*/
+}
+
+void hubp35_program_pixel_format(
+ struct hubp *hubp,
+ enum surface_pixel_format format)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+ uint32_t green_bar = 1;
+ uint32_t red_bar = 3;
+ uint32_t blue_bar = 2;
+
+ /* swap for ABGR format */
+ if (format == SURFACE_PIXEL_FORMAT_GRPH_ABGR8888
+ || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010
+ || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS
+ || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616
+ || format == SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F) {
+ red_bar = 2;
+ blue_bar = 3;
+ }
+
+ REG_UPDATE_3(HUBPRET_CONTROL,
+ CROSSBAR_SRC_Y_G, green_bar,
+ CROSSBAR_SRC_CB_B, blue_bar,
+ CROSSBAR_SRC_CR_R, red_bar);
+
+ /* Mapping is same as ipp programming (cnvc) */
+
+ switch (format) {
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 1);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 3);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 8);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010_XR_BIAS:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 10);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /* we use crossbar already */
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+ case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:/*we use crossbar already*/
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 24);
+ break;
+
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 65);
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 64);
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 67);
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 66);
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 12);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FIX:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 112);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FIX:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 113);
+ break;
+ case SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 114);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB111110_FLOAT:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 118);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_BGR101111_FLOAT:
+ REG_UPDATE(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 119);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE:
+ REG_UPDATE_2(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 116,
+ ALPHA_PLANE_EN, 0);
+ break;
+ case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
+ REG_UPDATE_2(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, 116,
+ ALPHA_PLANE_EN, 1);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ /* don't see the need of program the xbar in DCN 1.0 */
+}
+
+void hubp35_program_surface_config(
+ struct hubp *hubp,
+ enum surface_pixel_format format,
+ struct dc_tiling_info *tiling_info,
+ struct plane_size *plane_size,
+ enum dc_rotation_angle rotation,
+ struct dc_plane_dcc_param *dcc,
+ bool horizontal_mirror,
+ unsigned int compat_level)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ hubp3_dcc_control_sienna_cichlid(hubp, dcc);
+ hubp3_program_tiling(hubp2, tiling_info, format);
+ hubp2_program_size(hubp, format, plane_size, dcc);
+ hubp2_program_rotation(hubp, rotation, horizontal_mirror);
+ hubp35_program_pixel_format(hubp, format);
+}
+
+static struct hubp_funcs dcn35_hubp_funcs = {
+ .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
+ .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
+ .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr,
+ .hubp_program_surface_config = hubp35_program_surface_config,
+ .hubp_is_flip_pending = hubp2_is_flip_pending,
+ .hubp_setup = hubp3_setup,
+ .hubp_setup_interdependent = hubp2_setup_interdependent,
+ .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings,
+ .set_blank = hubp2_set_blank,
+ .dcc_control = hubp3_dcc_control,
+ .hubp_reset = hubp_reset,
+ .mem_program_viewport = min_set_viewport,
+ .set_cursor_attributes = hubp2_cursor_set_attributes,
+ .set_cursor_position = hubp2_cursor_set_position,
+ .hubp_clk_cntl = hubp2_clk_cntl,
+ .hubp_vtg_sel = hubp2_vtg_sel,
+ .dmdata_set_attributes = hubp3_dmdata_set_attributes,
+ .dmdata_load = hubp2_dmdata_load,
+ .dmdata_status_done = hubp2_dmdata_status_done,
+ .hubp_read_state = hubp3_read_state,
+ .hubp_clear_underflow = hubp2_clear_underflow,
+ .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
+ .hubp_init = hubp35_init,
+ .set_unbounded_requesting = hubp31_set_unbounded_requesting,
+ .hubp_soft_reset = hubp31_soft_reset,
+ .hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_in_blank = hubp1_in_blank,
+ .program_extended_blank = hubp31_program_extended_blank_value,
+ .hubp_clear_tiling = hubp3_clear_tiling,
+ .hubp_get_underflow_status = hubp3_get_underflow_status,
+ .hubp_get_current_read_line = hubp3_get_current_read_line,
+ .hubp_get_det_config_error = hubp31_get_det_config_error,
+};
+
+bool hubp35_construct(
+ struct dcn20_hubp *hubp2,
+ struct dc_context *ctx,
+ uint32_t inst,
+ const struct dcn_hubp2_registers *hubp_regs,
+ const struct dcn35_hubp2_shift *hubp_shift,
+ const struct dcn35_hubp2_mask *hubp_mask)
+{
+ hubp2->base.funcs = &dcn35_hubp_funcs;
+ hubp2->base.ctx = ctx;
+ hubp2->hubp_regs = hubp_regs;
+ hubp2->hubp_shift = (const struct dcn_hubp2_shift *)hubp_shift;
+ hubp2->hubp_mask = (const struct dcn_hubp2_mask *)hubp_mask;
+ hubp2->base.inst = inst;
+ hubp2->base.opp_id = OPP_ID_INVALID;
+ hubp2->base.mpcc_id = 0xf;
+
+ return true;
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h
new file mode 100644
index 000000000000..934836717f32
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HUBP_DCN35_H__
+#define __DC_HUBP_DCN35_H__
+
+#include "dcn31/dcn31_hubp.h"
+#include "dcn32/dcn32_hubp.h"
+#define HUBP_MASK_SH_LIST_DCN35(mask_sh)\
+ HUBP_MASK_SH_LIST_DCN32(mask_sh),\
+ HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_FGCG_REP_DIS, mask_sh)
+
+#define DCN35_HUBP_REG_FIELD_VARIABLE_LIST(type) \
+ struct { \
+ DCN32_HUBP_REG_FIELD_VARIABLE_LIST(type); \
+ type HUBP_FGCG_REP_DIS; \
+ }
+
+struct dcn35_hubp2_shift {
+ DCN35_HUBP_REG_FIELD_VARIABLE_LIST(uint8_t);
+};
+
+struct dcn35_hubp2_mask {
+ DCN35_HUBP_REG_FIELD_VARIABLE_LIST(uint32_t);
+};
+
+
+bool hubp35_construct(
+ struct dcn20_hubp *hubp2,
+ struct dc_context *ctx,
+ uint32_t inst,
+ const struct dcn_hubp2_registers *hubp_regs,
+ const struct dcn35_hubp2_shift *hubp_shift,
+ const struct dcn35_hubp2_mask *hubp_mask);
+
+void hubp35_set_fgcg(struct hubp *hubp, bool enable);
+
+void hubp35_program_pixel_format(
+ struct hubp *hubp,
+ enum surface_pixel_format format);
+
+void hubp35_program_surface_config(
+ struct hubp *hubp,
+ enum surface_pixel_format format,
+ struct dc_tiling_info *tiling_info,
+ struct plane_size *plane_size,
+ enum dc_rotation_angle rotation,
+ struct dc_plane_dcc_param *dcc,
+ bool horizontal_mirror,
+ unsigned int compat_level);
+
+void hubp35_init(struct hubp *hubp);
+#endif /* __DC_HUBP_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
new file mode 100644
index 000000000000..0fcbc6a35be6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
@@ -0,0 +1,1097 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "dce_calcs.h"
+#include "reg_helper.h"
+#include "basics/conversion.h"
+#include "dcn401_hubp.h"
+#include "dal_asic_id.h"
+
+#define REG(reg)\
+ hubp2->hubp_regs->reg
+
+#define CTX \
+ hubp2->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ hubp2->hubp_shift->field_name, hubp2->hubp_mask->field_name
+
+void hubp401_program_3dlut_fl_addr(struct hubp *hubp,
+ const struct dc_plane_address address)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBP_3DLUT_ADDRESS_HIGH, HUBP_3DLUT_ADDRESS_HIGH, address.lut3d.addr.high_part);
+ REG_WRITE(HUBP_3DLUT_ADDRESS_LOW, address.lut3d.addr.low_part);
+}
+
+void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, refcyc_per_3dlut_group);
+}
+
+void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_ENABLE, enable ? 1 : 0);
+}
+
+int hubp401_get_3dlut_fl_done(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+ int ret;
+
+ REG_GET(HUBP_3DLUT_CONTROL, HUBP_3DLUT_DONE, &ret);
+ return ret;
+}
+
+void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_ADDRESSING_MODE, addr_mode);
+}
+
+void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, width);
+}
+
+void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, uint8_t protection_bits)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_bits);
+}
+
+void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE_3(HUBP_3DLUT_CONTROL,
+ HUBP_3DLUT_CROSSBAR_SELECT_Y_G, bit_slice_y_g,
+ HUBP_3DLUT_CROSSBAR_SELECT_CB_B, bit_slice_cb_b,
+ HUBP_3DLUT_CROSSBAR_SELECT_CR_R, bit_slice_cr_r);
+}
+
+void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_BIAS, bias, HUBP0_3DLUT_FL_SCALE, scale);
+}
+
+void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_MODE, mode);
+}
+
+void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_FORMAT, format);
+}
+
+void hubp401_program_3dlut_fl_config(
+ struct hubp *hubp,
+ struct hubp_fl_3dlut_config *cfg)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ uint32_t mpc_width = {(cfg->width == 17) ? 0 : 1};
+ uint32_t width = {cfg->width};
+
+ if (cfg->layout == DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR)
+ width = (cfg->width == 17) ? 4916 : 35940;
+
+ REG_UPDATE_2(_3DLUT_FL_CONFIG,
+ HUBP0_3DLUT_FL_MODE, cfg->mode,
+ HUBP0_3DLUT_FL_FORMAT, cfg->format);
+
+ REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE,
+ HUBP0_3DLUT_FL_BIAS, cfg->bias,
+ HUBP0_3DLUT_FL_SCALE, cfg->scale);
+
+ REG_UPDATE(HUBP_3DLUT_ADDRESS_HIGH,
+ HUBP_3DLUT_ADDRESS_HIGH, cfg->address.lut3d.addr.high_part);
+ REG_UPDATE(HUBP_3DLUT_ADDRESS_LOW,
+ HUBP_3DLUT_ADDRESS_LOW, cfg->address.lut3d.addr.low_part);
+
+ //cross bar
+ REG_UPDATE_8(HUBP_3DLUT_CONTROL,
+ HUBP_3DLUT_MPC_WIDTH, mpc_width,
+ HUBP_3DLUT_WIDTH, width,
+ HUBP_3DLUT_CROSSBAR_SELECT_CR_R, cfg->crossbar_bit_slice_cr_r,
+ HUBP_3DLUT_CROSSBAR_SELECT_Y_G, cfg->crossbar_bit_slice_y_g,
+ HUBP_3DLUT_CROSSBAR_SELECT_CB_B, cfg->crossbar_bit_slice_cb_b,
+ HUBP_3DLUT_ADDRESSING_MODE, cfg->addr_mode,
+ HUBP_3DLUT_TMZ, cfg->protection_bits,
+ HUBP_3DLUT_ENABLE, cfg->enabled ? 1 : 0);
+}
+
+void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ // Also cache cursor in MALL if using MALL for SS
+ REG_UPDATE_2(DCHUBP_MALL_CONFIG, USE_MALL_SEL, mall_sel,
+ USE_MALL_FOR_CURSOR, c_cursor);
+
+ REG_UPDATE_2(DCHUBP_MALL_CONFIG, MALL_PREF_CMD_TYPE, 1, MALL_PREF_MODE, 0);
+}
+
+
+void hubp401_init(struct hubp *hubp)
+{
+ hubp_reset(hubp);
+}
+
+void hubp401_vready_at_or_After_vsync(struct hubp *hubp,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing)
+{
+ unsigned int vstartup_lines = pipe_global_sync->dcn4x.vstartup_lines;
+ unsigned int vupdate_offset_pixels = pipe_global_sync->dcn4x.vupdate_offset_pixels;
+ unsigned int vupdate_width_pixels = pipe_global_sync->dcn4x.vupdate_vupdate_width_pixels;
+ unsigned int vready_offset_pixels = pipe_global_sync->dcn4x.vready_offset_pixels;
+ unsigned int htotal = timing->h_total;
+ unsigned int vblank_start = 0;
+ unsigned int vblank_end = 0;
+ unsigned int pixel_width = 0;
+ uint32_t reg_value = 0;
+ bool is_vready_at_or_after_vsync = false;
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ /*
+ * if (VSTARTUP_START - (VREADY_OFFSET+VUPDATE_WIDTH+VUPDATE_OFFSET)/htotal) <= OTG_V_BLANK_END
+ * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 1
+ * else
+ * Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 0
+ */
+ if (htotal != 0) {
+ vblank_start = timing->v_total - timing->v_front_porch;
+ vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom;
+ pixel_width = vready_offset_pixels + vupdate_width_pixels + vupdate_offset_pixels;
+
+ is_vready_at_or_after_vsync = (vstartup_lines - pixel_width / htotal) <= vblank_end;
+
+ if (is_vready_at_or_after_vsync)
+ reg_value = 1;
+ }
+
+ REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, reg_value);
+}
+
+void hubp401_program_requestor(
+ struct hubp *hubp,
+ struct dml2_display_rq_regs *rq_regs)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(HUBPRET_CONTROL,
+ DET_BUF_PLANE1_BASE_ADDRESS, rq_regs->plane1_base_address);
+ REG_SET_4(DCN_EXPANSION_MODE, 0,
+ DRQ_EXPANSION_MODE, rq_regs->drq_expansion_mode,
+ PRQ_EXPANSION_MODE, rq_regs->prq_expansion_mode,
+ MRQ_EXPANSION_MODE, rq_regs->mrq_expansion_mode,
+ CRQ_EXPANSION_MODE, rq_regs->crq_expansion_mode);
+ REG_SET_6(DCHUBP_REQ_SIZE_CONFIG, 0,
+ CHUNK_SIZE, rq_regs->rq_regs_l.chunk_size,
+ MIN_CHUNK_SIZE, rq_regs->rq_regs_l.min_chunk_size,
+ DPTE_GROUP_SIZE, rq_regs->rq_regs_l.dpte_group_size,
+ VM_GROUP_SIZE, rq_regs->rq_regs_l.mpte_group_size,
+ SWATH_HEIGHT, rq_regs->rq_regs_l.swath_height,
+ PTE_ROW_HEIGHT_LINEAR, rq_regs->rq_regs_l.pte_row_height_linear);
+ REG_SET_5(DCHUBP_REQ_SIZE_CONFIG_C, 0,
+ CHUNK_SIZE_C, rq_regs->rq_regs_c.chunk_size,
+ MIN_CHUNK_SIZE_C, rq_regs->rq_regs_c.min_chunk_size,
+ DPTE_GROUP_SIZE_C, rq_regs->rq_regs_c.dpte_group_size,
+ SWATH_HEIGHT_C, rq_regs->rq_regs_c.swath_height,
+ PTE_ROW_HEIGHT_LINEAR_C, rq_regs->rq_regs_c.pte_row_height_linear);
+}
+
+void hubp401_program_deadline(
+ struct hubp *hubp,
+ struct dml2_display_dlg_regs *dlg_attr,
+ struct dml2_display_ttu_regs *ttu_attr)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ /* put DLG in mission mode */
+ REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8);
+
+ /* DLG - Per hubp */
+ REG_SET_2(BLANK_OFFSET_0, 0,
+ REFCYC_H_BLANK_END, dlg_attr->refcyc_h_blank_end,
+ DLG_V_BLANK_END, dlg_attr->dlg_vblank_end);
+
+ REG_SET(BLANK_OFFSET_1, 0,
+ MIN_DST_Y_NEXT_START, dlg_attr->min_dst_y_next_start);
+
+ REG_SET(DST_DIMENSIONS, 0,
+ REFCYC_PER_HTOTAL, dlg_attr->refcyc_per_htotal);
+
+ REG_SET_2(DST_AFTER_SCALER, 0,
+ REFCYC_X_AFTER_SCALER, dlg_attr->refcyc_x_after_scaler,
+ DST_Y_AFTER_SCALER, dlg_attr->dst_y_after_scaler);
+
+ REG_SET(REF_FREQ_TO_PIX_FREQ, 0,
+ REF_FREQ_TO_PIX_FREQ, dlg_attr->ref_freq_to_pix_freq);
+
+ /* DLG - Per luma/chroma */
+ REG_SET(VBLANK_PARAMETERS_1, 0,
+ REFCYC_PER_PTE_GROUP_VBLANK_L, dlg_attr->refcyc_per_pte_group_vblank_l);
+
+ if (REG(NOM_PARAMETERS_0))
+ REG_SET(NOM_PARAMETERS_0, 0,
+ DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l);
+
+ if (REG(NOM_PARAMETERS_1))
+ REG_SET(NOM_PARAMETERS_1, 0,
+ REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l);
+
+ REG_SET(NOM_PARAMETERS_4, 0,
+ DST_Y_PER_META_ROW_NOM_L, dlg_attr->dst_y_per_meta_row_nom_l);
+
+ REG_SET(NOM_PARAMETERS_5, 0,
+ REFCYC_PER_META_CHUNK_NOM_L, dlg_attr->refcyc_per_meta_chunk_nom_l);
+
+ REG_SET_2(PER_LINE_DELIVERY, 0,
+ REFCYC_PER_LINE_DELIVERY_L, dlg_attr->refcyc_per_line_delivery_l,
+ REFCYC_PER_LINE_DELIVERY_C, dlg_attr->refcyc_per_line_delivery_c);
+
+ REG_SET(VBLANK_PARAMETERS_2, 0,
+ REFCYC_PER_PTE_GROUP_VBLANK_C, dlg_attr->refcyc_per_pte_group_vblank_c);
+
+ if (REG(NOM_PARAMETERS_2))
+ REG_SET(NOM_PARAMETERS_2, 0,
+ DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c);
+
+ if (REG(NOM_PARAMETERS_3))
+ REG_SET(NOM_PARAMETERS_3, 0,
+ REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c);
+
+ REG_SET(NOM_PARAMETERS_6, 0,
+ DST_Y_PER_META_ROW_NOM_C, dlg_attr->dst_y_per_meta_row_nom_c);
+
+ REG_SET(NOM_PARAMETERS_7, 0,
+ REFCYC_PER_META_CHUNK_NOM_C, dlg_attr->refcyc_per_meta_chunk_nom_c);
+
+ /* TTU - per hubp */
+ REG_SET_2(DCN_TTU_QOS_WM, 0,
+ QoS_LEVEL_LOW_WM, ttu_attr->qos_level_low_wm,
+ QoS_LEVEL_HIGH_WM, ttu_attr->qos_level_high_wm);
+
+ /* TTU - per luma/chroma */
+ /* Assumed surf0 is luma and 1 is chroma */
+
+ REG_SET_3(DCN_SURF0_TTU_CNTL0, 0,
+ REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_l,
+ QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_l,
+ QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_l);
+
+ REG_SET_3(DCN_SURF1_TTU_CNTL0, 0,
+ REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_c,
+ QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_c,
+ QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_c);
+
+ REG_SET_3(DCN_CUR0_TTU_CNTL0, 0,
+ REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_cur0,
+ QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_cur0,
+ QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_cur0);
+
+ REG_SET(FLIP_PARAMETERS_1, 0,
+ REFCYC_PER_PTE_GROUP_FLIP_L, dlg_attr->refcyc_per_pte_group_flip_l);
+ REG_SET(HUBP_3DLUT_DLG_PARAM, 0, REFCYC_PER_3DLUT_GROUP, dlg_attr->refcyc_per_tdlut_group);
+
+ REG_UPDATE(DCN_DMDATA_VM_CNTL,
+ REFCYC_PER_VM_DMDATA, dlg_attr->refcyc_per_vm_dmdata);
+}
+
+void hubp401_setup(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing)
+{
+ /* otg is locked when this func is called. Register are double buffered.
+ * disable the requestors is not needed
+ */
+ hubp401_vready_at_or_After_vsync(hubp, pipe_global_sync, timing);
+ hubp401_program_requestor(hubp, &pipe_regs->rq_regs);
+ hubp401_program_deadline(hubp, &pipe_regs->dlg_regs, &pipe_regs->ttu_regs);
+}
+
+void hubp401_setup_interdependent(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_SET_2(PREFETCH_SETTINGS, 0,
+ DST_Y_PREFETCH, pipe_regs->dlg_regs.dst_y_prefetch,
+ VRATIO_PREFETCH, pipe_regs->dlg_regs.vratio_prefetch);
+
+ REG_SET(PREFETCH_SETTINGS_C, 0,
+ VRATIO_PREFETCH_C, pipe_regs->dlg_regs.vratio_prefetch_c);
+
+ REG_SET_2(VBLANK_PARAMETERS_0, 0,
+ DST_Y_PER_VM_VBLANK, pipe_regs->dlg_regs.dst_y_per_vm_vblank,
+ DST_Y_PER_ROW_VBLANK, pipe_regs->dlg_regs.dst_y_per_row_vblank);
+
+ REG_SET_2(FLIP_PARAMETERS_0, 0,
+ DST_Y_PER_VM_FLIP, pipe_regs->dlg_regs.dst_y_per_vm_flip,
+ DST_Y_PER_ROW_FLIP, pipe_regs->dlg_regs.dst_y_per_row_flip);
+
+ REG_SET(VBLANK_PARAMETERS_3, 0,
+ REFCYC_PER_META_CHUNK_VBLANK_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_l);
+
+ REG_SET(VBLANK_PARAMETERS_4, 0,
+ REFCYC_PER_META_CHUNK_VBLANK_C, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_c);
+
+ REG_SET(FLIP_PARAMETERS_2, 0,
+ REFCYC_PER_META_CHUNK_FLIP_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_flip_l);
+
+ REG_SET_2(PER_LINE_DELIVERY_PRE, 0,
+ REFCYC_PER_LINE_DELIVERY_PRE_L, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_l,
+ REFCYC_PER_LINE_DELIVERY_PRE_C, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_c);
+
+ REG_SET(DCN_SURF0_TTU_CNTL1, 0,
+ REFCYC_PER_REQ_DELIVERY_PRE,
+ pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_l);
+ REG_SET(DCN_SURF1_TTU_CNTL1, 0,
+ REFCYC_PER_REQ_DELIVERY_PRE,
+ pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_c);
+ REG_SET(DCN_CUR0_TTU_CNTL1, 0,
+ REFCYC_PER_REQ_DELIVERY_PRE, pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_cur0);
+
+ REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0,
+ MIN_TTU_VBLANK, pipe_regs->ttu_regs.min_ttu_vblank,
+ QoS_LEVEL_FLIP, pipe_regs->ttu_regs.qos_level_flip);
+}
+
+
+bool hubp401_program_surface_flip_and_addr(
+ struct hubp *hubp,
+ const struct dc_plane_address *address,
+ bool flip_immediate)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ //program flip type
+ REG_UPDATE(DCSURF_FLIP_CONTROL,
+ SURFACE_FLIP_TYPE, flip_immediate);
+
+ // Program VMID reg
+ if (flip_immediate == 0)
+ REG_UPDATE(VMID_SETTINGS_0,
+ VMID, address->vmid);
+
+ if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) {
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0);
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
+
+ } else {
+ // turn off stereo if not in stereo
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x0);
+ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x0);
+ }
+
+ /* HW automatically latch rest of address register on write to
+ * DCSURF_PRIMARY_SURFACE_ADDRESS if SURFACE_UPDATE_LOCK is not used
+ *
+ * program high first and then the low addr, order matters!
+ */
+ switch (address->type) {
+ case PLN_ADDR_TYPE_GRAPHICS:
+ if (address->grph.addr.quad_part == 0)
+ break;
+
+ REG_UPDATE(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_TMZ, address->tmz_surface);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
+ PRIMARY_SURFACE_ADDRESS_HIGH,
+ address->grph.addr.high_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
+ PRIMARY_SURFACE_ADDRESS,
+ address->grph.addr.low_part);
+ break;
+ case PLN_ADDR_TYPE_VIDEO_PROGRESSIVE:
+ if (address->video_progressive.luma_addr.quad_part == 0
+ || address->video_progressive.chroma_addr.quad_part == 0)
+ break;
+
+ REG_UPDATE_2(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_TMZ, address->tmz_surface,
+ PRIMARY_SURFACE_TMZ_C, address->tmz_surface);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
+ PRIMARY_SURFACE_ADDRESS_HIGH_C,
+ address->video_progressive.chroma_addr.high_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
+ PRIMARY_SURFACE_ADDRESS_C,
+ address->video_progressive.chroma_addr.low_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
+ PRIMARY_SURFACE_ADDRESS_HIGH,
+ address->video_progressive.luma_addr.high_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
+ PRIMARY_SURFACE_ADDRESS,
+ address->video_progressive.luma_addr.low_part);
+ break;
+ case PLN_ADDR_TYPE_GRPH_STEREO:
+ if (address->grph_stereo.left_addr.quad_part == 0)
+ break;
+ if (address->grph_stereo.right_addr.quad_part == 0)
+ break;
+
+ REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_TMZ, address->tmz_surface,
+ PRIMARY_SURFACE_TMZ_C, address->tmz_surface,
+ SECONDARY_SURFACE_TMZ, address->tmz_surface,
+ SECONDARY_SURFACE_TMZ_C, address->tmz_surface);
+
+ REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, 0,
+ SECONDARY_SURFACE_ADDRESS_HIGH_C,
+ address->grph_stereo.right_alpha_addr.high_part);
+
+ REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_C, 0,
+ SECONDARY_SURFACE_ADDRESS_C,
+ address->grph_stereo.right_alpha_addr.low_part);
+
+ REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, 0,
+ SECONDARY_SURFACE_ADDRESS_HIGH,
+ address->grph_stereo.right_addr.high_part);
+
+ REG_SET(DCSURF_SECONDARY_SURFACE_ADDRESS, 0,
+ SECONDARY_SURFACE_ADDRESS,
+ address->grph_stereo.right_addr.low_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
+ PRIMARY_SURFACE_ADDRESS_HIGH_C,
+ address->grph_stereo.left_alpha_addr.high_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
+ PRIMARY_SURFACE_ADDRESS_C,
+ address->grph_stereo.left_alpha_addr.low_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
+ PRIMARY_SURFACE_ADDRESS_HIGH,
+ address->grph_stereo.left_addr.high_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
+ PRIMARY_SURFACE_ADDRESS,
+ address->grph_stereo.left_addr.low_part);
+ break;
+ case PLN_ADDR_TYPE_RGBEA:
+ if (address->rgbea.addr.quad_part == 0
+ || address->rgbea.alpha_addr.quad_part == 0)
+ break;
+
+ REG_UPDATE_2(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_TMZ, address->tmz_surface,
+ PRIMARY_SURFACE_TMZ_C, address->tmz_surface);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, 0,
+ PRIMARY_SURFACE_ADDRESS_HIGH_C,
+ address->rgbea.alpha_addr.high_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_C, 0,
+ PRIMARY_SURFACE_ADDRESS_C,
+ address->rgbea.alpha_addr.low_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, 0,
+ PRIMARY_SURFACE_ADDRESS_HIGH,
+ address->rgbea.addr.high_part);
+
+ REG_SET(DCSURF_PRIMARY_SURFACE_ADDRESS, 0,
+ PRIMARY_SURFACE_ADDRESS,
+ address->rgbea.addr.low_part);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ hubp->request_address = *address;
+
+ return true;
+}
+
+void hubp401_clear_tiling(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_2(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_EN, 0);
+}
+
+void hubp401_dcc_control(struct hubp *hubp,
+ struct dc_plane_dcc_param *dcc)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE_2(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, dcc->enable,
+ SECONDARY_SURFACE_DCC_EN, dcc->enable);
+}
+
+void hubp401_program_tiling(
+ struct dcn20_hubp *hubp2,
+ const struct dc_tiling_info *info,
+ const enum surface_pixel_format pixel_format)
+{
+ /* DCSURF_ADDR_CONFIG still shows up in reg spec, but does not need to be programmed for DCN4x
+ * All 4 fields NUM_PIPES, PIPE_INTERLEAVE, MAX_COMPRESSED_FRAGS and NUM_PKRS are irrelevant.
+ *
+ * DIM_TYPE field in DCSURF_TILING for Display is always 1 (2D dimension) which is HW default.
+ */
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, info->gfx_addr3.swizzle);
+}
+
+void hubp401_program_size(
+ struct hubp *hubp,
+ enum surface_pixel_format format,
+ const struct plane_size *plane_size,
+ struct dc_plane_dcc_param *dcc)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+ uint32_t pitch, pitch_c;
+ bool use_pitch_c = false;
+
+ /* Program data pitch (calculation from addrlib)
+ * 444 or 420 luma
+ */
+ use_pitch_c = format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN
+ && format < SURFACE_PIXEL_FORMAT_SUBSAMPLE_END;
+ use_pitch_c = use_pitch_c
+ || (format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA);
+ if (use_pitch_c) {
+ ASSERT(plane_size->chroma_pitch != 0);
+ /* Chroma pitch zero can cause system hang! */
+
+ pitch = plane_size->surface_pitch - 1;
+ pitch_c = plane_size->chroma_pitch - 1;
+ } else {
+ pitch = plane_size->surface_pitch - 1;
+ pitch_c = 0;
+ }
+
+ REG_UPDATE(DCSURF_SURFACE_PITCH, PITCH, pitch);
+
+ if (use_pitch_c)
+ REG_UPDATE(DCSURF_SURFACE_PITCH_C, PITCH_C, pitch_c);
+}
+
+void hubp401_program_surface_config(
+ struct hubp *hubp,
+ enum surface_pixel_format format,
+ struct dc_tiling_info *tiling_info,
+ struct plane_size *plane_size,
+ enum dc_rotation_angle rotation,
+ struct dc_plane_dcc_param *dcc,
+ bool horizontal_mirror,
+ unsigned int compat_level)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ hubp401_dcc_control(hubp, dcc);
+ hubp401_program_tiling(hubp2, tiling_info, format);
+ hubp401_program_size(hubp, format, plane_size, dcc);
+ hubp2_program_rotation(hubp, rotation, horizontal_mirror);
+ hubp2_program_pixel_format(hubp, format);
+}
+
+void hubp401_set_viewport(
+ struct hubp *hubp,
+ const struct rect *viewport,
+ const struct rect *viewport_c)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION, 0,
+ PRI_VIEWPORT_WIDTH, viewport->width,
+ PRI_VIEWPORT_HEIGHT, viewport->height);
+
+ REG_SET_2(DCSURF_PRI_VIEWPORT_START, 0,
+ PRI_VIEWPORT_X_START, viewport->x,
+ PRI_VIEWPORT_Y_START, viewport->y);
+
+ /*for stereo*/
+ REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION, 0,
+ SEC_VIEWPORT_WIDTH, viewport->width,
+ SEC_VIEWPORT_HEIGHT, viewport->height);
+
+ REG_SET_2(DCSURF_SEC_VIEWPORT_START, 0,
+ SEC_VIEWPORT_X_START, viewport->x,
+ SEC_VIEWPORT_Y_START, viewport->y);
+
+ /* DC supports NV12 only at the moment */
+ REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION_C, 0,
+ PRI_VIEWPORT_WIDTH_C, viewport_c->width,
+ PRI_VIEWPORT_HEIGHT_C, viewport_c->height);
+
+ REG_SET_2(DCSURF_PRI_VIEWPORT_START_C, 0,
+ PRI_VIEWPORT_X_START_C, viewport_c->x,
+ PRI_VIEWPORT_Y_START_C, viewport_c->y);
+
+ REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION_C, 0,
+ SEC_VIEWPORT_WIDTH_C, viewport_c->width,
+ SEC_VIEWPORT_HEIGHT_C, viewport_c->height);
+
+ REG_SET_2(DCSURF_SEC_VIEWPORT_START_C, 0,
+ SEC_VIEWPORT_X_START_C, viewport_c->x,
+ SEC_VIEWPORT_Y_START_C, viewport_c->y);
+}
+
+void hubp401_program_mcache_id_and_split_coordinate(
+ struct hubp *hubp,
+ struct dml2_hubp_pipe_mcache_regs *mcache_regs)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_SET_8(DCHUBP_MCACHEID_CONFIG, 0,
+ MCACHEID_REG_READ_1H_P0, mcache_regs->main.p0.mcache_id_first,
+ MCACHEID_REG_READ_2H_P0, mcache_regs->main.p0.mcache_id_second,
+ MCACHEID_REG_READ_1H_P1, mcache_regs->main.p1.mcache_id_first,
+ MCACHEID_REG_READ_2H_P1, mcache_regs->main.p1.mcache_id_second,
+ MCACHEID_MALL_PREF_1H_P0, mcache_regs->mall.p0.mcache_id_first,
+ MCACHEID_MALL_PREF_2H_P0, mcache_regs->mall.p0.mcache_id_second,
+ MCACHEID_MALL_PREF_1H_P1, mcache_regs->mall.p1.mcache_id_first,
+ MCACHEID_MALL_PREF_2H_P1, mcache_regs->mall.p1.mcache_id_second);
+
+ REG_SET_2(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, 0,
+ VIEWPORT_MCACHE_SPLIT_COORDINATE, mcache_regs->main.p0.split_location,
+ VIEWPORT_MCACHE_SPLIT_COORDINATE_C, mcache_regs->main.p1.split_location);
+}
+void hubp401_set_flip_int(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCSURF_SURFACE_FLIP_INTERRUPT,
+ SURFACE_FLIP_INT_MASK, 1);
+
+ return;
+}
+
+bool hubp401_in_blank(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+ uint32_t in_blank;
+
+ REG_GET(DCHUBP_CNTL, HUBP_IN_BLANK, &in_blank);
+ return in_blank ? true : false;
+}
+
+
+void hubp401_cursor_set_position(
+ struct hubp *hubp,
+ const struct dc_cursor_position *pos,
+ const struct dc_cursor_mi_param *param)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+ int x_pos = pos->x - param->recout.x;
+ int y_pos = pos->y - param->recout.y;
+ int rec_x_offset = x_pos - pos->x_hotspot;
+ int rec_y_offset = y_pos - pos->y_hotspot;
+ int dst_x_offset;
+ int x_pos_viewport = 0;
+ int x_hot_viewport = 0;
+ uint32_t cur_en = pos->enable ? 1 : 0;
+
+ hubp->curs_pos = *pos;
+
+ /* Recout is zero for pipes if the entire dst_rect is contained
+ * within preceeding ODM slices.
+ */
+ if (param->recout.width) {
+ x_pos_viewport = x_pos * param->viewport.width / param->recout.width;
+ x_hot_viewport = pos->x_hotspot * param->viewport.width / param->recout.width;
+ } else {
+ ASSERT(!cur_en || x_pos == 0);
+ ASSERT(!cur_en || pos->x_hotspot == 0);
+ }
+
+ /*
+ * Guard aganst cursor_set_position() from being called with invalid
+ * attributes
+ */
+ if (hubp->curs_attr.address.quad_part == 0)
+ return;
+
+ /* Translate the x position of the cursor from rect
+ * space into viewport space. CURSOR_DST_X_OFFSET
+ * is the offset relative to viewport start position.
+ */
+ dst_x_offset = x_pos_viewport - x_hot_viewport *
+ (1 + hubp->curs_attr.attribute_flags.bits.ENABLE_MAGNIFICATION);
+ dst_x_offset = (dst_x_offset >= 0) ? dst_x_offset : 0;
+ dst_x_offset *= param->ref_clk_khz;
+ dst_x_offset /= param->pixel_clk_khz;
+
+ ASSERT(param->h_scale_ratio.value);
+
+ if (param->h_scale_ratio.value)
+ dst_x_offset = dc_fixpt_floor(dc_fixpt_div(
+ dc_fixpt_from_int(dst_x_offset),
+ param->h_scale_ratio));
+
+ if (hubp->pos.cur_ctl.bits.cur_enable != cur_en) {
+ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
+ hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr);
+
+ REG_UPDATE(CURSOR_CONTROL,
+ CURSOR_ENABLE, cur_en);
+ }
+
+ REG_SET_2(CURSOR_POSITION, 0,
+ CURSOR_X_POSITION, x_pos,
+ CURSOR_Y_POSITION, y_pos);
+
+ REG_SET_2(CURSOR_HOT_SPOT, 0,
+ CURSOR_HOT_SPOT_X, pos->x_hotspot,
+ CURSOR_HOT_SPOT_Y, pos->y_hotspot);
+
+ REG_SET(CURSOR_DST_OFFSET, 0,
+ CURSOR_DST_X_OFFSET, dst_x_offset);
+
+ /* Cursor Position Register Config */
+ hubp->pos.cur_ctl.bits.cur_enable = cur_en;
+ hubp->pos.position.bits.x_pos = pos->x;
+ hubp->pos.position.bits.y_pos = pos->y;
+ hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot;
+ hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot;
+ hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset;
+ /* Cursor Rectangle Cache
+ * Cursor bitmaps have different hotspot values
+ * There's a possibility that the above logic returns a negative value,
+ * so we clamp them to 0
+ */
+ if (rec_x_offset < 0)
+ rec_x_offset = 0;
+ if (rec_y_offset < 0)
+ rec_y_offset = 0;
+ /* Save necessary cursor info x, y position. w, h is saved in attribute func. */
+ hubp->cur_rect.x = rec_x_offset + param->recout.x;
+ hubp->cur_rect.y = rec_y_offset + param->recout.y;
+}
+
+void hubp401_read_state(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+ struct dcn_hubp_state *s = &hubp2->state;
+ struct _vcs_dpi_display_dlg_regs_st *dlg_attr = &s->dlg_attr;
+ struct _vcs_dpi_display_ttu_regs_st *ttu_attr = &s->ttu_attr;
+ struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs;
+
+ /* Requester */
+ REG_GET(HUBPRET_CONTROL,
+ DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs->plane1_base_address);
+ REG_GET_4(DCN_EXPANSION_MODE,
+ DRQ_EXPANSION_MODE, &rq_regs->drq_expansion_mode,
+ PRQ_EXPANSION_MODE, &rq_regs->prq_expansion_mode,
+ MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode,
+ CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode);
+
+ REG_GET_5(DCHUBP_REQ_SIZE_CONFIG,
+ CHUNK_SIZE, &rq_regs->rq_regs_l.chunk_size,
+ MIN_CHUNK_SIZE, &rq_regs->rq_regs_l.min_chunk_size,
+ DPTE_GROUP_SIZE, &rq_regs->rq_regs_l.dpte_group_size,
+ SWATH_HEIGHT, &rq_regs->rq_regs_l.swath_height,
+ PTE_ROW_HEIGHT_LINEAR, &rq_regs->rq_regs_l.pte_row_height_linear);
+
+ REG_GET_5(DCHUBP_REQ_SIZE_CONFIG_C,
+ CHUNK_SIZE_C, &rq_regs->rq_regs_c.chunk_size,
+ MIN_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_chunk_size,
+ DPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.dpte_group_size,
+ SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height,
+ PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear);
+
+ REG_GET(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ MC_VM_SYSTEM_APERTURE_HIGH_ADDR, &rq_regs->aperture_high_addr);
+
+ REG_GET(DCN_VM_SYSTEM_APERTURE_LOW_ADDR,
+ MC_VM_SYSTEM_APERTURE_LOW_ADDR, &rq_regs->aperture_low_addr);
+
+ /* DLG - Per hubp */
+ REG_GET_2(BLANK_OFFSET_0,
+ REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end,
+ DLG_V_BLANK_END, &dlg_attr->dlg_vblank_end);
+
+ REG_GET(BLANK_OFFSET_1,
+ MIN_DST_Y_NEXT_START, &dlg_attr->min_dst_y_next_start);
+
+ REG_GET(DST_DIMENSIONS,
+ REFCYC_PER_HTOTAL, &dlg_attr->refcyc_per_htotal);
+
+ REG_GET_2(DST_AFTER_SCALER,
+ REFCYC_X_AFTER_SCALER, &dlg_attr->refcyc_x_after_scaler,
+ DST_Y_AFTER_SCALER, &dlg_attr->dst_y_after_scaler);
+
+ REG_GET_2(PREFETCH_SETTINGS,
+ DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch,
+ VRATIO_PREFETCH, &dlg_attr->vratio_prefetch);
+
+ REG_GET_2(VBLANK_PARAMETERS_0,
+ DST_Y_PER_VM_VBLANK, &dlg_attr->dst_y_per_vm_vblank,
+ DST_Y_PER_ROW_VBLANK, &dlg_attr->dst_y_per_row_vblank);
+
+ REG_GET(REF_FREQ_TO_PIX_FREQ,
+ REF_FREQ_TO_PIX_FREQ, &dlg_attr->ref_freq_to_pix_freq);
+
+ /* DLG - Per luma/chroma */
+ REG_GET(VBLANK_PARAMETERS_1,
+ REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr->refcyc_per_pte_group_vblank_l);
+
+ REG_GET(VBLANK_PARAMETERS_3,
+ REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr->refcyc_per_meta_chunk_vblank_l);
+
+ REG_GET(NOM_PARAMETERS_0,
+ DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr->dst_y_per_pte_row_nom_l);
+
+ REG_GET(NOM_PARAMETERS_1,
+ REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr->refcyc_per_pte_group_nom_l);
+
+ REG_GET(NOM_PARAMETERS_4,
+ DST_Y_PER_META_ROW_NOM_L, &dlg_attr->dst_y_per_meta_row_nom_l);
+
+ REG_GET(NOM_PARAMETERS_5,
+ REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr->refcyc_per_meta_chunk_nom_l);
+
+ REG_GET_2(PER_LINE_DELIVERY_PRE,
+ REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr->refcyc_per_line_delivery_pre_l,
+ REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr->refcyc_per_line_delivery_pre_c);
+
+ REG_GET_2(PER_LINE_DELIVERY,
+ REFCYC_PER_LINE_DELIVERY_L, &dlg_attr->refcyc_per_line_delivery_l,
+ REFCYC_PER_LINE_DELIVERY_C, &dlg_attr->refcyc_per_line_delivery_c);
+
+ REG_GET(PREFETCH_SETTINGS_C,
+ VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c);
+
+ REG_GET(VBLANK_PARAMETERS_2,
+ REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr->refcyc_per_pte_group_vblank_c);
+
+ REG_GET(VBLANK_PARAMETERS_4,
+ REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr->refcyc_per_meta_chunk_vblank_c);
+
+ REG_GET(NOM_PARAMETERS_2,
+ DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr->dst_y_per_pte_row_nom_c);
+
+ REG_GET(NOM_PARAMETERS_3,
+ REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr->refcyc_per_pte_group_nom_c);
+
+ REG_GET(NOM_PARAMETERS_6,
+ DST_Y_PER_META_ROW_NOM_C, &dlg_attr->dst_y_per_meta_row_nom_c);
+
+ REG_GET(NOM_PARAMETERS_7,
+ REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr->refcyc_per_meta_chunk_nom_c);
+
+ /* TTU - per hubp */
+ REG_GET_2(DCN_TTU_QOS_WM,
+ QoS_LEVEL_LOW_WM, &ttu_attr->qos_level_low_wm,
+ QoS_LEVEL_HIGH_WM, &ttu_attr->qos_level_high_wm);
+
+ REG_GET_2(DCN_GLOBAL_TTU_CNTL,
+ MIN_TTU_VBLANK, &ttu_attr->min_ttu_vblank,
+ QoS_LEVEL_FLIP, &ttu_attr->qos_level_flip);
+
+ /* TTU - per luma/chroma */
+ /* Assumed surf0 is luma and 1 is chroma */
+
+ REG_GET_3(DCN_SURF0_TTU_CNTL0,
+ REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_l,
+ QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_l,
+ QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_l);
+
+ REG_GET(DCN_SURF0_TTU_CNTL1,
+ REFCYC_PER_REQ_DELIVERY_PRE,
+ &ttu_attr->refcyc_per_req_delivery_pre_l);
+
+ REG_GET_3(DCN_SURF1_TTU_CNTL0,
+ REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_c,
+ QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_c,
+ QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_c);
+
+ REG_GET(DCN_SURF1_TTU_CNTL1,
+ REFCYC_PER_REQ_DELIVERY_PRE,
+ &ttu_attr->refcyc_per_req_delivery_pre_c);
+
+ /* Rest of hubp */
+ REG_GET(DCSURF_SURFACE_CONFIG,
+ SURFACE_PIXEL_FORMAT, &s->pixel_format);
+
+ REG_GET(DCSURF_SURFACE_EARLIEST_INUSE_HIGH,
+ SURFACE_EARLIEST_INUSE_ADDRESS_HIGH, &s->inuse_addr_hi);
+
+ REG_GET(DCSURF_SURFACE_EARLIEST_INUSE,
+ SURFACE_EARLIEST_INUSE_ADDRESS, &s->inuse_addr_lo);
+
+ REG_GET_2(DCSURF_PRI_VIEWPORT_DIMENSION,
+ PRI_VIEWPORT_WIDTH, &s->viewport_width,
+ PRI_VIEWPORT_HEIGHT, &s->viewport_height);
+
+ REG_GET_2(DCSURF_SURFACE_CONFIG,
+ ROTATION_ANGLE, &s->rotation_angle,
+ H_MIRROR_EN, &s->h_mirror_en);
+
+ REG_GET(DCSURF_TILING_CONFIG,
+ SW_MODE, &s->sw_mode);
+
+ REG_GET(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, &s->dcc_en);
+
+ REG_GET_3(DCHUBP_CNTL,
+ HUBP_BLANK_EN, &s->blank_en,
+ HUBP_TTU_DISABLE, &s->ttu_disable,
+ HUBP_UNDERFLOW_STATUS, &s->underflow_status);
+
+ REG_GET(HUBP_CLK_CNTL,
+ HUBP_CLOCK_ENABLE, &s->clock_en);
+
+ REG_GET(DCN_GLOBAL_TTU_CNTL,
+ MIN_TTU_VBLANK, &s->min_ttu_vblank);
+
+ REG_GET_2(DCN_TTU_QOS_WM,
+ QoS_LEVEL_LOW_WM, &s->qos_level_low_wm,
+ QoS_LEVEL_HIGH_WM, &s->qos_level_high_wm);
+
+ REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS,
+ PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_lo);
+
+ REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH,
+ PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_hi);
+
+ s->uclk_pstate_force = REG_READ(UCLK_PSTATE_FORCE);
+
+ s->hubp_cntl = REG_READ(DCHUBP_CNTL);
+ s->flip_control = REG_READ(DCSURF_FLIP_CONTROL);
+}
+
+void hubp401_set_unbounded_requesting(struct hubp *hubp, bool enable)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, enable);
+
+ /* To ensure that cursor fetching starts as early as possible in
+ * the display prefetch, set CURSOR_REQ_MODE = 1 always.
+ * The setting of CURSOR_REQ_MODE = 0 is no longer supported in
+ * DCN4x as a fall back to legacy behavior of fetching cursor
+ * just before it appears on the screen.
+ */
+ REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, 1);
+}
+
+static struct hubp_funcs dcn401_hubp_funcs = {
+ .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer,
+ .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled,
+ .hubp_program_surface_flip_and_addr = hubp401_program_surface_flip_and_addr,
+ .hubp_program_surface_config = hubp401_program_surface_config,
+ .hubp_is_flip_pending = hubp2_is_flip_pending,
+ .hubp_setup2 = hubp401_setup,
+ .hubp_setup_interdependent2 = hubp401_setup_interdependent,
+ .hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings,
+ .set_blank = hubp2_set_blank,
+ .set_blank_regs = hubp2_set_blank_regs,
+ .hubp_reset = hubp_reset,
+ .mem_program_viewport = hubp401_set_viewport,
+ .set_cursor_attributes = hubp32_cursor_set_attributes,
+ .set_cursor_position = hubp401_cursor_set_position,
+ .hubp_clk_cntl = hubp2_clk_cntl,
+ .hubp_vtg_sel = hubp2_vtg_sel,
+ .dmdata_set_attributes = hubp3_dmdata_set_attributes,
+ .dmdata_load = hubp2_dmdata_load,
+ .dmdata_status_done = hubp2_dmdata_status_done,
+ .hubp_read_state = hubp401_read_state,
+ .hubp_clear_underflow = hubp2_clear_underflow,
+ .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
+ .hubp_init = hubp401_init,
+ .set_unbounded_requesting = hubp401_set_unbounded_requesting,
+ .hubp_soft_reset = hubp31_soft_reset,
+ .hubp_set_flip_int = hubp401_set_flip_int,
+ .hubp_in_blank = hubp401_in_blank,
+ .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable,
+ .hubp_update_mall_sel = hubp401_update_mall_sel,
+ .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering,
+ .hubp_program_mcache_id_and_split_coordinate = hubp401_program_mcache_id_and_split_coordinate,
+ .hubp_update_3dlut_fl_bias_scale = hubp401_update_3dlut_fl_bias_scale,
+ .hubp_program_3dlut_fl_mode = hubp401_program_3dlut_fl_mode,
+ .hubp_program_3dlut_fl_format = hubp401_program_3dlut_fl_format,
+ .hubp_program_3dlut_fl_addr = hubp401_program_3dlut_fl_addr,
+ .hubp_program_3dlut_fl_dlg_param = hubp401_program_3dlut_fl_dlg_param,
+ .hubp_enable_3dlut_fl = hubp401_enable_3dlut_fl,
+ .hubp_program_3dlut_fl_addressing_mode = hubp401_program_3dlut_fl_addressing_mode,
+ .hubp_program_3dlut_fl_width = hubp401_program_3dlut_fl_width,
+ .hubp_program_3dlut_fl_tmz_protected = hubp401_program_3dlut_fl_tmz_protected,
+ .hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar,
+ .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done,
+ .hubp_clear_tiling = hubp401_clear_tiling,
+ .hubp_program_3dlut_fl_config = hubp401_program_3dlut_fl_config,
+ .hubp_get_underflow_status = hubp3_get_underflow_status,
+ .hubp_get_current_read_line = hubp3_get_current_read_line,
+ .hubp_get_det_config_error = hubp31_get_det_config_error,
+};
+
+bool hubp401_construct(
+ struct dcn20_hubp *hubp2,
+ struct dc_context *ctx,
+ uint32_t inst,
+ const struct dcn_hubp2_registers *hubp_regs,
+ const struct dcn_hubp2_shift *hubp_shift,
+ const struct dcn_hubp2_mask *hubp_mask)
+{
+ hubp2->base.funcs = &dcn401_hubp_funcs;
+ hubp2->base.ctx = ctx;
+ hubp2->hubp_regs = hubp_regs;
+ hubp2->hubp_shift = hubp_shift;
+ hubp2->hubp_mask = hubp_mask;
+ hubp2->base.inst = inst;
+ hubp2->base.opp_id = OPP_ID_INVALID;
+ hubp2->base.mpcc_id = 0xf;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
new file mode 100644
index 000000000000..fdabbeec8ffa
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
@@ -0,0 +1,373 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HUBP_DCN401_H__
+#define __DC_HUBP_DCN401_H__
+
+#include "dcn20/dcn20_hubp.h"
+#include "dcn21/dcn21_hubp.h"
+#include "dcn30/dcn30_hubp.h"
+#include "dcn31/dcn31_hubp.h"
+#include "dcn32/dcn32_hubp.h"
+#include "dml2/dml21/inc/dml_top_dchub_registers.h"
+
+#define HUBP_3DLUT_FL_REG_LIST_DCN401(inst)\
+ SRI_ARR_US(_3DLUT_FL_CONFIG, HUBP, inst),\
+ SRI_ARR_US(_3DLUT_FL_BIAS_SCALE, HUBP, inst),\
+ SRI_ARR(HUBP_3DLUT_ADDRESS_HIGH, CURSOR0_, inst),\
+ SRI_ARR(HUBP_3DLUT_ADDRESS_LOW, CURSOR0_, inst),\
+ SRI_ARR(HUBP_3DLUT_CONTROL, CURSOR0_, inst),\
+ SRI_ARR(HUBP_3DLUT_DLG_PARAM, CURSOR0_, inst)
+
+#define HUBP_MASK_SH_LIST_DCN401(mask_sh)\
+ HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, REFCYC_PER_VM_DMDATA, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_FAULT_STATUS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_FAULT_STATUS_CLEAR, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_UNDERFLOW_STATUS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_LATE_STATUS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_UNDERFLOW_STATUS_CLEAR, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_DMDATA_VM_CNTL, DMDATA_VM_DONE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_CLEAR, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_IN_BLANK, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SOFT_RESET, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, MAX_COMPRESSED_FRAGS, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PKRS, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_TILING_CONFIG, SW_MODE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_PITCH_C, PITCH_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_TYPE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_FLIP_PENDING, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, SURFACE_UPDATE_LOCK, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START, PRI_VIEWPORT_X_START, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START, PRI_VIEWPORT_Y_START, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION, SEC_VIEWPORT_WIDTH, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION, SEC_VIEWPORT_HEIGHT, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START, SEC_VIEWPORT_X_START, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START, SEC_VIEWPORT_Y_START, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_C, PRI_VIEWPORT_WIDTH_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_C, PRI_VIEWPORT_HEIGHT_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_X_START_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_PRI_VIEWPORT_START_C, PRI_VIEWPORT_Y_START_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_WIDTH_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_DIMENSION_C, SEC_VIEWPORT_HEIGHT_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_X_START_C, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SEC_VIEWPORT_START_C, SEC_VIEWPORT_Y_START_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, PRIMARY_SURFACE_ADDRESS_HIGH, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS, PRIMARY_SURFACE_ADDRESS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, SECONDARY_SURFACE_ADDRESS_HIGH, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS, SECONDARY_SURFACE_ADDRESS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, PRIMARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_PRIMARY_SURFACE_ADDRESS_C, PRIMARY_SURFACE_ADDRESS_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, SECONDARY_SURFACE_ADDRESS_HIGH_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SECONDARY_SURFACE_ADDRESS_C, SECONDARY_SURFACE_ADDRESS_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE, SURFACE_INUSE_ADDRESS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_HIGH, SURFACE_INUSE_ADDRESS_HIGH, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_C, SURFACE_INUSE_ADDRESS_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_INUSE_HIGH_C, SURFACE_INUSE_ADDRESS_HIGH_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE, SURFACE_EARLIEST_INUSE_ADDRESS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_HIGH, SURFACE_EARLIEST_INUSE_ADDRESS_HIGH, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_C, SURFACE_EARLIEST_INUSE_ADDRESS_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, SURFACE_EARLIEST_INUSE_ADDRESS_HIGH_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_TMZ, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_TMZ_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_DCC_EN, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_TMZ, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_TMZ_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_EN, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CB_B, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CR_R, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_Y_G, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_ALPHA, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, PACK_3TO2_ELEMENT_DISABLE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, DRQ_EXPANSION_MODE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, PRQ_EXPANSION_MODE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, MRQ_EXPANSION_MODE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_EXPANSION_MODE, CRQ_EXPANSION_MODE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, CHUNK_SIZE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, MIN_CHUNK_SIZE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, DPTE_GROUP_SIZE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, PTE_ROW_HEIGHT_LINEAR, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, CHUNK_SIZE_C, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, MIN_CHUNK_SIZE_C, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, DPTE_GROUP_SIZE_C, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, SWATH_HEIGHT_C, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG_C, PTE_ROW_HEIGHT_LINEAR_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_BLANK_OFFSET_0, REFCYC_H_BLANK_END, mask_sh),\
+ HUBP_SF(HUBPREQ0_BLANK_OFFSET_0, DLG_V_BLANK_END, mask_sh),\
+ HUBP_SF(HUBPREQ0_BLANK_OFFSET_1, MIN_DST_Y_NEXT_START, mask_sh),\
+ HUBP_SF(HUBPREQ0_DST_DIMENSIONS, REFCYC_PER_HTOTAL, mask_sh),\
+ HUBP_SF(HUBPREQ0_DST_AFTER_SCALER, REFCYC_X_AFTER_SCALER, mask_sh),\
+ HUBP_SF(HUBPREQ0_DST_AFTER_SCALER, DST_Y_AFTER_SCALER, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_0, DST_Y_PER_VM_VBLANK, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_0, DST_Y_PER_ROW_VBLANK, mask_sh),\
+ HUBP_SF(HUBPREQ0_REF_FREQ_TO_PIX_FREQ, REF_FREQ_TO_PIX_FREQ, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_1, REFCYC_PER_PTE_GROUP_VBLANK_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_3, REFCYC_PER_META_CHUNK_VBLANK_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_4, DST_Y_PER_META_ROW_NOM_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_5, REFCYC_PER_META_CHUNK_NOM_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_2, REFCYC_PER_PTE_GROUP_VBLANK_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_4, REFCYC_PER_META_CHUNK_VBLANK_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_6, DST_Y_PER_META_ROW_NOM_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_7, REFCYC_PER_META_CHUNK_NOM_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_LOW_WM, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_HIGH_WM, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_GLOBAL_TTU_CNTL, MIN_TTU_VBLANK, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_GLOBAL_TTU_CNTL, QoS_LEVEL_FLIP, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_GLOBAL_TTU_CNTL, ROW_TTU_MODE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, REFCYC_PER_REQ_DELIVERY, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\
+ HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh),\
+ HUBP_MASK_SH_LIST_DCN_VM(mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, ROTATION_ANGLE, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, H_MIRROR_EN, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_SURFACE_CONFIG, ALPHA_PLANE_EN, mask_sh),\
+ HUBP_SF(HUBPREQ0_PREFETCH_SETTINGS, DST_Y_PREFETCH, mask_sh),\
+ HUBP_SF(HUBPREQ0_PREFETCH_SETTINGS, VRATIO_PREFETCH, mask_sh),\
+ HUBP_SF(HUBPREQ0_PREFETCH_SETTINGS_C, VRATIO_PREFETCH_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_VM_SYSTEM_APERTURE_LOW_ADDR, MC_VM_SYSTEM_APERTURE_LOW_ADDR, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, MC_VM_SYSTEM_APERTURE_HIGH_ADDR, mask_sh),\
+ HUBP_SF(HUBPREQ0_CURSOR_SETTINGS, CURSOR0_DST_Y_OFFSET, mask_sh), \
+ HUBP_SF(HUBPREQ0_CURSOR_SETTINGS, CURSOR0_CHUNK_HDL_ADJUST, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_SURFACE_ADDRESS_HIGH, CURSOR_SURFACE_ADDRESS_HIGH, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_SURFACE_ADDRESS, CURSOR_SURFACE_ADDRESS, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_SIZE, CURSOR_WIDTH, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_SIZE, CURSOR_HEIGHT, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_MODE, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_REQ_MODE, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_2X_MAGNIFY, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_PITCH, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_LINES_PER_CHUNK, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_POSITION, CURSOR_X_POSITION, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_POSITION, CURSOR_Y_POSITION, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_X, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_Y, mask_sh), \
+ HUBP_SF(CURSOR0_0_CURSOR_DST_OFFSET, CURSOR_DST_X_OFFSET, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_ADDRESS_HIGH, DMDATA_ADDRESS_HIGH, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_MODE, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_UPDATED, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_REPEAT, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_CNTL, DMDATA_SIZE, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_SW_CNTL, DMDATA_SW_UPDATED, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_SW_CNTL, DMDATA_SW_REPEAT, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_SW_CNTL, DMDATA_SW_SIZE, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_QOS_CNTL, DMDATA_QOS_MODE, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_QOS_CNTL, DMDATA_QOS_LEVEL, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_QOS_CNTL, DMDATA_DL_DELTA, mask_sh), \
+ HUBP_SF(CURSOR0_0_DMDATA_STATUS, DMDATA_DONE, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_0, DST_Y_PER_VM_FLIP, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_0, DST_Y_PER_ROW_FLIP, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_1, REFCYC_PER_PTE_GROUP_FLIP_L, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_2, REFCYC_PER_META_CHUNK_FLIP_L, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE_STOP_DATA_DURING_VM, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL, HUBPREQ_MASTER_UPDATE_LOCK_STATUS, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL2, SURFACE_GSL_ENABLE, mask_sh),\
+ HUBP_SF(HUBPREQ0_DCSURF_FLIP_CONTROL2, SURFACE_TRIPLE_BUFFER_ENABLE, mask_sh),\
+ HUBP_SF(HUBPREQ0_VMID_SETTINGS_0, VMID, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_3, REFCYC_PER_VM_GROUP_FLIP, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_4, REFCYC_PER_VM_REQ_FLIP, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_5, REFCYC_PER_PTE_GROUP_FLIP_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\
+ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, USE_MALL_SEL, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, VMPG_SIZE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, PTE_BUFFER_MODE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, BIGK_FRAGMENT_SIZE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_VMPG_CONFIG, FORCE_ONE_ROW_FOR_FRAME, mask_sh),\
+ HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, DATA_UCLK_PSTATE_FORCE_EN, mask_sh),\
+ HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, DATA_UCLK_PSTATE_FORCE_VALUE, mask_sh),\
+ HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, CURSOR_UCLK_PSTATE_FORCE_EN, mask_sh),\
+ HUBP_SF(HUBPREQ0_UCLK_PSTATE_FORCE, CURSOR_UCLK_PSTATE_FORCE_VALUE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, MALL_PREF_CMD_TYPE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MALL_CONFIG, MALL_PREF_MODE, mask_sh),\
+ HUBP_SF(HUBP0_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_MODE, mask_sh),\
+ HUBP_SF(HUBP0_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_FORMAT, mask_sh),\
+ HUBP_SF(HUBP0_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_BIAS, mask_sh),\
+ HUBP_SF(HUBP0_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_SCALE, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_ENABLE, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_DONE, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_ADDRESSING_MODE, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_CROSSBAR_SELECT_Y_G, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_CROSSBAR_SELECT_CB_B, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_CONTROL, HUBP_3DLUT_CROSSBAR_SELECT_CR_R, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_HIGH, HUBP_3DLUT_ADDRESS_HIGH, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_LOW, HUBP_3DLUT_ADDRESS_LOW, mask_sh),\
+ HUBP_SF(CURSOR0_0_HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, VIEWPORT_MCACHE_SPLIT_COORDINATE, mask_sh),\
+ HUBP_SF(HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, VIEWPORT_MCACHE_SPLIT_COORDINATE_C, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_1H_P0, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_2H_P0, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_1H_P1, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_2H_P1, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P0, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P0, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P1, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P1, mask_sh),\
+ HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh),\
+ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SEG_ALLOC_ERR_STATUS, mask_sh)
+
+void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor);
+
+void hubp401_setup(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing);
+
+void hubp401_setup_interdependent(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs);
+
+bool hubp401_program_surface_flip_and_addr(
+ struct hubp *hubp,
+ const struct dc_plane_address *address,
+ bool flip_immediate);
+
+void hubp401_dcc_control(struct hubp *hubp,
+ struct dc_plane_dcc_param *dcc);
+
+void hubp401_program_tiling(
+ struct dcn20_hubp *hubp2,
+ const struct dc_tiling_info *info,
+ const enum surface_pixel_format pixel_format);
+
+void hubp401_program_size(
+ struct hubp *hubp,
+ enum surface_pixel_format format,
+ const struct plane_size *plane_size,
+ struct dc_plane_dcc_param *dcc);
+
+void hubp401_program_surface_config(
+ struct hubp *hubp,
+ enum surface_pixel_format format,
+ struct dc_tiling_info *tiling_info,
+ struct plane_size *plane_size,
+ enum dc_rotation_angle rotation,
+ struct dc_plane_dcc_param *dcc,
+ bool horizontal_mirror,
+ unsigned int compat_level);
+
+void hubp401_set_viewport(struct hubp *hubp,
+ const struct rect *viewport,
+ const struct rect *viewport_c);
+void hubp401_program_mcache_id_and_split_coordinate(
+ struct hubp *hubp,
+ struct dml2_hubp_pipe_mcache_regs *mcache_regs);
+void hubp401_set_flip_int(struct hubp *hubp);
+
+bool hubp401_in_blank(struct hubp *hubp);
+
+void hubp401_cursor_set_position(
+ struct hubp *hubp,
+ const struct dc_cursor_position *pos,
+ const struct dc_cursor_mi_param *param);
+
+void hubp401_read_state(struct hubp *hubp);
+
+bool hubp401_construct(
+ struct dcn20_hubp *hubp2,
+ struct dc_context *ctx,
+ uint32_t inst,
+ const struct dcn_hubp2_registers *hubp_regs,
+ const struct dcn_hubp2_shift *hubp_shift,
+ const struct dcn_hubp2_mask *hubp_mask);
+
+void hubp401_init(struct hubp *hubp);
+
+int hubp401_get_3dlut_fl_done(struct hubp *hubp);
+
+void hubp401_set_unbounded_requesting(struct hubp *hubp, bool enable);
+
+void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale);
+
+void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r);
+
+void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, uint8_t protection_bits);
+
+void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width);
+
+void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode);
+
+void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable);
+
+void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group);
+
+void hubp401_program_3dlut_fl_addr(struct hubp *hubp, const struct dc_plane_address address);
+
+void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format);
+
+void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode);
+
+void hubp401_program_3dlut_fl_config(
+ struct hubp *hubp,
+ struct hubp_fl_3dlut_config *cfg);
+
+void hubp401_clear_tiling(struct hubp *hubp);
+
+void hubp401_vready_at_or_After_vsync(struct hubp *hubp,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing);
+
+void hubp401_program_requestor(
+ struct hubp *hubp,
+ struct dml2_display_rq_regs *rq_regs);
+
+void hubp401_program_deadline(
+ struct hubp *hubp,
+ struct dml2_display_dlg_regs *dlg_attr,
+ struct dml2_display_ttu_regs *ttu_attr);
+
+#endif /* __DC_HUBP_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
new file mode 100644
index 000000000000..bee617ca0838
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile
@@ -0,0 +1,202 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'hwss' sub-component of DAL.
+#
+
+
+###############################################################################
+# DCE
+###############################################################################
+
+ifdef CONFIG_DRM_AMD_DC_SI
+HWSS_DCE60 = dce60_hwseq.o
+
+AMD_DAL_HWSS_DCE60 = $(addprefix $(AMDDALPATH)/dc/hwss/dce60/,$(HWSS_DCE60))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE60)
+endif
+
+###############################################################################
+
+HWSS_DCE80 = dce80_hwseq.o
+
+AMD_DAL_HWSS_DCE80 = $(addprefix $(AMDDALPATH)/dc/hwss/dce80/,$(HWSS_DCE80))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE80)
+
+###############################################################################
+
+HWSS_DCE = dce_hwseq.o
+
+AMD_DAL_HWSS_DCE = $(addprefix $(AMDDALPATH)/dc/hwss/dce/,$(HWSS_DCE))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE)
+
+###############################################################################
+
+HWSS_DCE100 = dce100_hwseq.o
+
+AMD_DAL_HWSS_DCE100 = $(addprefix $(AMDDALPATH)/dc/hwss/dce100/,$(HWSS_DCE100))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE100)
+
+###############################################################################
+
+HWSS_DCE110 = dce110_hwseq.o
+
+AMD_DAL_HWSS_DCE110 = $(addprefix $(AMDDALPATH)/dc/hwss/dce110/,$(HWSS_DCE110))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE110)
+
+###############################################################################
+
+HWSS_DCE112 = dce112_hwseq.o
+
+AMD_DAL_HWSS_DCE112 = $(addprefix $(AMDDALPATH)/dc/hwss/dce112/,$(HWSS_DCE112))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE112)
+
+###############################################################################
+
+HWSS_DCE120 = dce120_hwseq.o
+
+AMD_DAL_HWSS_DCE120 = $(addprefix $(AMDDALPATH)/dc/hwss/dce120/,$(HWSS_DCE120))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE120)
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+HWSS_DCN10 = dcn10_hwseq.o dcn10_init.o
+
+AMD_DAL_HWSS_DCN10 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn10/,$(HWSS_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN10)
+
+###############################################################################
+
+HWSS_DCN20 = dcn20_hwseq.o dcn20_init.o
+
+AMD_DAL_HWSS_DCN20 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn20/,$(HWSS_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN20)
+
+###############################################################################
+
+HWSS_DCN201 = dcn201_hwseq.o dcn201_init.o
+
+AMD_DAL_HWSS_DCN201 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn201/,$(HWSS_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN201)
+
+###############################################################################
+
+HWSS_DCN21 = dcn21_hwseq.o dcn21_init.o
+
+AMD_DAL_HWSS_DCN21 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn21/,$(HWSS_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN21)
+
+###############################################################################
+
+HWSS_DCN30 = dcn30_hwseq.o dcn30_init.o
+
+AMD_DAL_HWSS_DCN30 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn30/,$(HWSS_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN30)
+
+###############################################################################
+
+HWSS_DCN301 = dcn301_hwseq.o dcn301_init.o
+
+AMD_DAL_HWSS_DCN301 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn301/,$(HWSS_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN301)
+
+###############################################################################
+
+HWSS_DCN302 = dcn302_hwseq.o dcn302_init.o
+
+AMD_DAL_HWSS_DCN302 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn302/,$(HWSS_DCN302))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN302)
+
+
+
+###############################################################################
+
+HWSS_DCN303 = dcn303_hwseq.o dcn303_init.o
+
+AMD_DAL_HWSS_DCN303 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn303/,$(HWSS_DCN303))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN303)
+
+###############################################################################
+
+HWSS_DCN31 = dcn31_hwseq.o dcn31_init.o
+
+AMD_DAL_HWSS_DCN31 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn31/,$(HWSS_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN31)
+
+###############################################################################
+
+HWSS_DCN314 = dcn314_hwseq.o dcn314_init.o
+
+AMD_DAL_HWSS_DCN314 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn314/,$(HWSS_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN314)
+
+###############################################################################
+
+HWSS_DCN32 = dcn32_hwseq.o dcn32_init.o
+
+AMD_DAL_HWSS_DCN32 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn32/,$(HWSS_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN32)
+
+###############################################################################
+
+HWSS_DCN35 = dcn35_hwseq.o dcn35_init.o
+
+AMD_DAL_HWSS_DCN35 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn35/,$(HWSS_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN35)
+
+###############################################################################
+
+HWSS_DCN351 = dcn351_hwseq.o dcn351_init.o
+
+AMD_DAL_HWSS_DCN351 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn351/,$(HWSS_DCN351))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN351)
+
+###############################################################################
+
+HWSS_DCN401 = dcn401_hwseq.o dcn401_init.o
+
+AMD_DAL_HWSS_DCN401 = $(addprefix $(AMDDALPATH)/dc/hwss/dcn401/,$(HWSS_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCN401)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.c
index 4202fadb2c0e..4202fadb2c0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.c
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
index 86233f94db4a..f66a38f43a09 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h
@@ -644,10 +644,18 @@ struct dce_hwseq_registers {
uint32_t DPP_TOP0_DPP_CRC_CTRL;
uint32_t DPP_TOP0_DPP_CRC_VAL_R_G;
uint32_t DPP_TOP0_DPP_CRC_VAL_B_A;
+ uint32_t DPP_TOP0_DPP_CRC_VAL_R;
+ uint32_t DPP_TOP0_DPP_CRC_VAL_G;
+ uint32_t DPP_TOP0_DPP_CRC_VAL_B;
+ uint32_t DPP_TOP0_DPP_CRC_VAL_A;
uint32_t MPC_CRC_CTRL;
uint32_t MPC_CRC_RESULT_GB;
uint32_t MPC_CRC_RESULT_C;
uint32_t MPC_CRC_RESULT_AR;
+ uint32_t MPC_CRC_RESULT_R;
+ uint32_t MPC_CRC_RESULT_G;
+ uint32_t MPC_CRC_RESULT_B;
+ uint32_t MPC_CRC_RESULT_A;
uint32_t D1VGA_CONTROL;
uint32_t D2VGA_CONTROL;
uint32_t D3VGA_CONTROL;
@@ -681,6 +689,17 @@ struct dce_hwseq_registers {
uint32_t DMU_MEM_PWR_CNTL;
uint32_t DCHUBBUB_ARB_HOSTVM_CNTL;
uint32_t HPO_TOP_HW_CONTROL;
+ uint32_t DMU_CLK_CNTL;
+ uint32_t DCCG_GATE_DISABLE_CNTL4;
+ uint32_t DCCG_GATE_DISABLE_CNTL5;
+ uint32_t DOMAIN22_PG_CONFIG;
+ uint32_t DOMAIN23_PG_CONFIG;
+ uint32_t DOMAIN24_PG_CONFIG;
+ uint32_t DOMAIN25_PG_CONFIG;
+ uint32_t DOMAIN22_PG_STATUS;
+ uint32_t DOMAIN23_PG_STATUS;
+ uint32_t DOMAIN24_PG_STATUS;
+ uint32_t DOMAIN25_PG_STATUS;
};
/* set field name */
#define HWS_SF(blk_name, reg_name, field_name, post_fix)\
@@ -1167,12 +1186,73 @@ struct dce_hwseq_registers {
type I2C_LIGHT_SLEEP_FORCE;\
type HPO_IO_EN;
+#define HWSEQ_DCN35_REG_FIELD_LIST(type) \
+ type DISPCLK_R_DMU_GATE_DIS;\
+ type DISPCLK_G_RBBMIF_GATE_DIS;\
+ type RBBMIF_FGCG_REP_DIS;\
+ type IHC_FGCG_REP_DIS;\
+ type DPREFCLK_ALLOW_DS_CLKSTOP;\
+ type DISPCLK_ALLOW_DS_CLKSTOP;\
+ type DPPCLK_ALLOW_DS_CLKSTOP;\
+ type DTBCLK_ALLOW_DS_CLKSTOP;\
+ type DCFCLK_ALLOW_DS_CLKSTOP;\
+ type DPIACLK_ALLOW_DS_CLKSTOP;\
+ type LONO_FGCG_REP_DIS;\
+ type LONO_DISPCLK_GATE_DISABLE;\
+ type LONO_SOCCLK_GATE_DISABLE;\
+ type LONO_DMCUBCLK_GATE_DISABLE;\
+ type SYMCLKA_FE_GATE_DISABLE;\
+ type SYMCLKB_FE_GATE_DISABLE;\
+ type SYMCLKC_FE_GATE_DISABLE;\
+ type SYMCLKD_FE_GATE_DISABLE;\
+ type SYMCLKE_FE_GATE_DISABLE;\
+ type HDMICHARCLK0_GATE_DISABLE;\
+ type SYMCLKA_GATE_DISABLE;\
+ type SYMCLKB_GATE_DISABLE;\
+ type SYMCLKC_GATE_DISABLE;\
+ type SYMCLKD_GATE_DISABLE;\
+ type SYMCLKE_GATE_DISABLE;\
+ type PHYASYMCLK_ROOT_GATE_DISABLE;\
+ type PHYBSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYCSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYDSYMCLK_ROOT_GATE_DISABLE;\
+ type PHYESYMCLK_ROOT_GATE_DISABLE;\
+ type DTBCLK_P0_GATE_DISABLE;\
+ type DTBCLK_P1_GATE_DISABLE;\
+ type DTBCLK_P2_GATE_DISABLE;\
+ type DTBCLK_P3_GATE_DISABLE;\
+ type DPSTREAMCLK0_GATE_DISABLE;\
+ type DPSTREAMCLK1_GATE_DISABLE;\
+ type DPSTREAMCLK2_GATE_DISABLE;\
+ type DPSTREAMCLK3_GATE_DISABLE;\
+ type DPIASYMCLK0_GATE_DISABLE;\
+ type DPIASYMCLK1_GATE_DISABLE;\
+ type DPIASYMCLK2_GATE_DISABLE;\
+ type DPIASYMCLK3_GATE_DISABLE;
+
+#define HWSEQ_DCN401_REG_FIELD_LIST(type) \
+ type DOMAIN22_POWER_FORCEON; \
+ type DOMAIN22_POWER_GATE; \
+ type DOMAIN23_POWER_FORCEON; \
+ type DOMAIN23_POWER_GATE; \
+ type DOMAIN24_POWER_FORCEON; \
+ type DOMAIN24_POWER_GATE; \
+ type DOMAIN25_POWER_FORCEON; \
+ type DOMAIN25_POWER_GATE; \
+ type DOMAIN22_PGFSM_PWR_STATUS; \
+ type DOMAIN23_PGFSM_PWR_STATUS; \
+ type DOMAIN24_PGFSM_PWR_STATUS; \
+ type DOMAIN25_PGFSM_PWR_STATUS; \
+ type DOMAIN_DESIRED_PWR_STATE;
+
struct dce_hwseq_shift {
HWSEQ_REG_FIELD_LIST(uint8_t)
HWSEQ_DCN_REG_FIELD_LIST(uint8_t)
HWSEQ_DCN3_REG_FIELD_LIST(uint8_t)
HWSEQ_DCN301_REG_FIELD_LIST(uint8_t)
HWSEQ_DCN31_REG_FIELD_LIST(uint8_t)
+ HWSEQ_DCN35_REG_FIELD_LIST(uint8_t)
+ HWSEQ_DCN401_REG_FIELD_LIST(uint8_t)
};
struct dce_hwseq_mask {
@@ -1181,6 +1261,8 @@ struct dce_hwseq_mask {
HWSEQ_DCN3_REG_FIELD_LIST(uint32_t)
HWSEQ_DCN301_REG_FIELD_LIST(uint32_t)
HWSEQ_DCN31_REG_FIELD_LIST(uint32_t)
+ HWSEQ_DCN35_REG_FIELD_LIST(uint32_t)
+ HWSEQ_DCN401_REG_FIELD_LIST(uint32_t)
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.c
index 753cb8edd996..0d7e28260db1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.c
@@ -26,10 +26,10 @@
#include "dc.h"
#include "core_types.h"
#include "clk_mgr.h"
-#include "dce100_hw_sequencer.h"
+#include "dce100_hwseq.h"
#include "resource.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
/* include DCE10 register header files */
#include "dce/dce_10_0_d.h"
@@ -138,5 +138,35 @@ void dce100_hw_sequencer_construct(struct dc *dc)
dc->hwseq->funcs.enable_display_power_gating = dce100_enable_display_power_gating;
dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
+ dc->hwss.clear_surface_dcc_and_tiling = dce100_reset_surface_dcc_and_tiling;
+}
+
+/**
+ * dce100_reset_surface_dcc_and_tiling - Set DCC and tiling in DCE to their disable mode.
+ *
+ * @pipe_ctx: Pointer to the pipe context structure.
+ * @plane_state: Surface state
+ * @clear_tiling: If true set tiling to Linear, otherwise does not change tiling
+ *
+ * This function is responsible for call the HUBP block to disable DCC and set
+ * tiling to the linear mode.
+ */
+void dce100_reset_surface_dcc_and_tiling(struct pipe_ctx *pipe_ctx,
+ struct dc_plane_state *plane_state,
+ bool clear_tiling)
+{
+ struct mem_input *mi = pipe_ctx->plane_res.mi;
+
+ if (!mi)
+ return;
+
+ /* if framebuffer is tiled, disable tiling */
+ if (clear_tiling && mi->funcs->mem_input_clear_tiling)
+ mi->funcs->mem_input_clear_tiling(mi);
+
+ /* force page flip to see the new content of the framebuffer */
+ mi->funcs->mem_input_program_surface_flip_and_addr(mi,
+ &plane_state->address,
+ true);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.h
index 34518da20009..fadfa794f96b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce100/dce100_hwseq.h
@@ -46,5 +46,9 @@ bool dce100_enable_display_power_gating(struct dc *dc, uint8_t controller_id,
struct dc_bios *dcb,
enum pipe_gating_control power_gating);
+void dce100_reset_surface_dcc_and_tiling(struct pipe_ctx *pipe_ctx,
+ struct dc_plane_state *plane_state,
+ bool clear_tiling);
+
#endif /* __DC_HWSS_DCE100_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 478281f2a5ba..24184b4eb352 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -30,11 +30,13 @@
#include "core_status.h"
#include "resource.h"
#include "dm_helpers.h"
-#include "dce110_timing_generator.h"
+#include "dce110_hwseq.h"
+#include "dce110/dce110_timing_generator.h"
#include "dce/dce_hwseq.h"
+#include "dce100/dce100_hwseq.h"
#include "gpio_service_interface.h"
-#include "dce110_compressor.h"
+#include "dce110/dce110_compressor.h"
#include "bios/bios_parser_helper.h"
#include "timing_generator.h"
@@ -46,7 +48,7 @@
#include "link_encoder.h"
#include "link_enc_cfg.h"
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
#include "dccg.h"
#include "clock_source.h"
#include "clk_mgr.h"
@@ -54,7 +56,9 @@
#include "audio.h"
#include "reg_helper.h"
#include "panel_cntl.h"
+#include "dc_state_priv.h"
#include "dpcd_defs.h"
+#include "dsc.h"
/* include DCE11 register header files */
#include "dce/dce_11_0_d.h"
#include "dce/dce_11_0_sh_mask.h"
@@ -62,9 +66,7 @@
#include "atomfirmware.h"
-#include "dcn10/dcn10_hw_sequencer.h"
-
-#include "dce110_hw_sequencer.h"
+#include "dcn10/dcn10_hwseq.h"
#define GAMMA_HW_POINTS_NUM 256
@@ -82,7 +84,10 @@
#define CTX \
hws->ctx
-#define DC_LOGGER_INIT()
+#define DC_LOGGER \
+ ctx->logger
+#define DC_LOGGER_INIT() \
+ struct dc_context *ctx = dc->ctx
#define REG(reg)\
hws->regs->reg
@@ -246,7 +251,7 @@ static bool dce110_enable_display_power_gating(
return false;
}
-static void build_prescale_params(struct ipp_prescale_params *prescale_params,
+static void dce110_prescale_params(struct ipp_prescale_params *prescale_params,
const struct dc_plane_state *plane_state)
{
prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED;
@@ -286,21 +291,16 @@ dce110_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
if (ipp == NULL)
return false;
- if (plane_state->in_transfer_func)
- tf = plane_state->in_transfer_func;
+ tf = &plane_state->in_transfer_func;
- build_prescale_params(&prescale_params, plane_state);
+ dce110_prescale_params(&prescale_params, plane_state);
ipp->funcs->ipp_program_prescale(ipp, &prescale_params);
- if (plane_state->gamma_correction &&
- !plane_state->gamma_correction->is_identity &&
+ if (!plane_state->gamma_correction.is_identity &&
dce_use_lut(plane_state->format))
- ipp->funcs->ipp_program_input_lut(ipp, plane_state->gamma_correction);
+ ipp->funcs->ipp_program_input_lut(ipp, &plane_state->gamma_correction);
- if (tf == NULL) {
- /* Default case if no input transfer function specified */
- ipp->funcs->ipp_set_degamma(ipp, IPP_DEGAMMA_MODE_HW_sRGB);
- } else if (tf->type == TF_TYPE_PREDEFINED) {
+ if (tf->type == TF_TYPE_PREDEFINED) {
switch (tf->tf) {
case TRANSFER_FUNCTION_SRGB:
ipp->funcs->ipp_set_degamma(ipp, IPP_DEGAMMA_MODE_HW_sRGB);
@@ -611,11 +611,10 @@ dce110_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
xfm->funcs->opp_power_on_regamma_lut(xfm, true);
xfm->regamma_params.hw_points_num = GAMMA_HW_POINTS_NUM;
- if (stream->out_transfer_func &&
- stream->out_transfer_func->type == TF_TYPE_PREDEFINED &&
- stream->out_transfer_func->tf == TRANSFER_FUNCTION_SRGB) {
+ if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED &&
+ stream->out_transfer_func.tf == TRANSFER_FUNCTION_SRGB) {
xfm->funcs->opp_set_regamma_mode(xfm, OPP_REGAMMA_SRGB);
- } else if (dce110_translate_regamma_to_hw_format(stream->out_transfer_func,
+ } else if (dce110_translate_regamma_to_hw_format(&stream->out_transfer_func,
&xfm->regamma_params)) {
xfm->funcs->opp_program_regamma_pwl(xfm, &xfm->regamma_params);
xfm->funcs->opp_set_regamma_mode(xfm, OPP_REGAMMA_USER);
@@ -672,6 +671,7 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
uint32_t early_control = 0;
struct timing_generator *tg = pipe_ctx->stream_res.tg;
+ link_hwss->setup_stream_attribute(pipe_ctx);
link_hwss->setup_stream_encoder(pipe_ctx);
dc->hwss.update_info_frame(pipe_ctx);
@@ -745,12 +745,10 @@ void dce110_edp_wait_for_hpd_ready(
return;
}
- if (link != NULL) {
- if (link->panel_config.pps.extra_t3_ms > 0) {
- int extra_t3_in_ms = link->panel_config.pps.extra_t3_ms;
+ if (link->panel_config.pps.extra_t3_ms > 0) {
+ int extra_t3_in_ms = link->panel_config.pps.extra_t3_ms;
- msleep(extra_t3_in_ms);
- }
+ msleep(extra_t3_in_ms);
}
dal_gpio_open(hpd, GPIO_MODE_INTERRUPT);
@@ -788,7 +786,7 @@ void dce110_edp_power_control(
struct dc_context *ctx = link->ctx;
struct bp_transmitter_control cntl = { 0 };
enum bp_result bp_result;
- uint8_t panel_instance;
+ uint8_t pwrseq_instance;
if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
@@ -871,7 +869,7 @@ void dce110_edp_power_control(
cntl.coherent = false;
cntl.lanes_number = LANE_COUNT_FOUR;
cntl.hpd_sel = link->link_enc->hpd_source;
- panel_instance = link->panel_cntl->inst;
+ pwrseq_instance = link->panel_cntl->pwrseq_inst;
if (ctx->dc->ctx->dmub_srv &&
ctx->dc->debug.dmub_command_table) {
@@ -879,11 +877,11 @@ void dce110_edp_power_control(
if (cntl.action == TRANSMITTER_CONTROL_POWER_ON) {
bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
LVTMA_CONTROL_POWER_ON,
- panel_instance, link->link_powered_externally);
+ pwrseq_instance, link->link_powered_externally);
} else {
bp_result = ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
LVTMA_CONTROL_POWER_OFF,
- panel_instance, link->link_powered_externally);
+ pwrseq_instance, link->link_powered_externally);
}
}
@@ -954,9 +952,9 @@ void dce110_edp_backlight_control(
{
struct dc_context *ctx = link->ctx;
struct bp_transmitter_control cntl = { 0 };
- uint8_t panel_instance;
- unsigned int pre_T11_delay = OLED_PRE_T11_DELAY;
- unsigned int post_T7_delay = OLED_POST_T7_DELAY;
+ uint8_t pwrseq_instance = 0;
+ unsigned int pre_T11_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_PRE_T11_DELAY : 0);
+ unsigned int post_T7_delay = (link->dpcd_sink_ext_caps.bits.oled ? OLED_POST_T7_DELAY : 0);
if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
!= CONNECTOR_ID_EDP) {
@@ -1007,7 +1005,8 @@ void dce110_edp_backlight_control(
*/
/* dc_service_sleep_in_milliseconds(50); */
/*edp 1.2*/
- panel_instance = link->panel_cntl->inst;
+ if (link->panel_cntl)
+ pwrseq_instance = link->panel_cntl->pwrseq_inst;
if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) {
if (!link->dc->config.edp_no_power_sequencing)
@@ -1032,16 +1031,18 @@ void dce110_edp_backlight_control(
if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON)
ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
LVTMA_CONTROL_LCD_BLON,
- panel_instance, link->link_powered_externally);
+ pwrseq_instance, link->link_powered_externally);
else
ctx->dc_bios->funcs->enable_lvtma_control(ctx->dc_bios,
LVTMA_CONTROL_LCD_BLOFF,
- panel_instance, link->link_powered_externally);
+ pwrseq_instance, link->link_powered_externally);
}
link_transmitter_control(ctx->dc_bios, &cntl);
- if (enable && link->dpcd_sink_ext_caps.bits.oled) {
+ if (enable && link->dpcd_sink_ext_caps.bits.oled &&
+ !link->dc->config.edp_no_power_sequencing &&
+ !link->local_sink->edid_caps.panel_patch.oled_optimize_display_on) {
post_T7_delay += link->panel_config.pps.extra_post_t7_ms;
msleep(post_T7_delay);
}
@@ -1066,9 +1067,11 @@ void dce110_edp_backlight_control(
DC_LOG_DC("edp_receiver_ready_T9 skipped\n");
}
- if (!enable && link->dpcd_sink_ext_caps.bits.oled) {
+ if (!enable) {
+ /*follow oem panel config's requirement*/
pre_T11_delay += link->panel_config.pps.extra_pre_t11_ms;
- msleep(pre_T11_delay);
+ if (pre_T11_delay)
+ msleep(pre_T11_delay);
}
}
@@ -1153,9 +1156,12 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
struct timing_generator *tg = pipe_ctx->stream_res.tg;
struct dtbclk_dto_params dto_params = {0};
int dp_hpo_inst;
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
+ if (!dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+
if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) {
pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets(
pipe_ctx->stream_res.stream_enc);
@@ -1174,25 +1180,21 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
link_hwss->reset_stream_encoder(pipe_ctx);
- if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
+ if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) && dccg) {
dto_params.otg_inst = tg->inst;
dto_params.timing = &pipe_ctx->stream->timing;
dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
- dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
- dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
- } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->disable_symclk_se)
+ if (dccg) {
+ dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
+ dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
+ if (!(dc->ctx->dce_version >= DCN_VERSION_3_5)) {
+ if (dccg && dccg->funcs->set_dtbclk_dto)
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ }
+ }
+ } else if (dccg && dccg->funcs->disable_symclk_se) {
dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
- link_enc->transmitter - TRANSMITTER_UNIPHY_A);
-
- if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
- /* TODO: This looks like a bug to me as we are disabling HPO IO when
- * we are just disabling a single HPO stream. Shouldn't we disable HPO
- * HW control only when HPOs for all streams are disabled?
- */
- if (pipe_ctx->stream->ctx->dc->hwseq->funcs.setup_hpo_hw_control)
- pipe_ctx->stream->ctx->dc->hwseq->funcs.setup_hpo_hw_control(
- pipe_ctx->stream->ctx->dc->hwseq, false);
+ link_enc->transmitter - TRANSMITTER_UNIPHY_A);
}
}
@@ -1222,8 +1224,11 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
struct dc_link *link = stream->link;
struct dce_hwseq *hws = link->dc->hwseq;
+ if (hws && hws->wa_state.skip_blank_stream)
+ return;
+
if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
- if (!stream->skip_edp_power_down)
+ if (!link->skip_implict_edp_power_control && hws)
hws->funcs.edp_backlight_control(link, false);
link->dc->hwss.set_abm_immediate_disable(pipe_ctx);
}
@@ -1241,20 +1246,21 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
* has changed or they enter protection state and hang.
*/
msleep(60);
- } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) {
- if (!link->dc->config.edp_no_power_sequencing) {
- /*
- * Sometimes, DP receiver chip power-controlled externally by an
- * Embedded Controller could be treated and used as eDP,
- * if it drives mobile display. In this case,
- * we shouldn't be doing power-sequencing, hence we can skip
- * waiting for T9-ready.
- */
- link->dc->link_srv->edp_receiver_ready_T9(link);
- }
}
}
+ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
+ !link->dc->config.edp_no_power_sequencing) {
+ /*
+ * Sometimes, DP receiver chip power-controlled externally by an
+ * Embedded Controller could be treated and used as eDP,
+ * if it drives mobile display. In this case,
+ * we shouldn't be doing power-sequencing, hence we can skip
+ * waiting for T9-ready.
+ */
+ link->dc->link_srv->edp_receiver_ready_T9(link);
+ }
+
}
@@ -1264,7 +1270,7 @@ void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
pipe_ctx->stream_res.stream_enc->funcs->set_avmute(pipe_ctx->stream_res.stream_enc, enable);
}
-static enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id)
+enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id)
{
switch (crtc_id) {
case CONTROLLER_ID_D0:
@@ -1284,7 +1290,99 @@ static enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id)
}
}
-static void build_audio_output(
+void populate_audio_dp_link_info(
+ const struct pipe_ctx *pipe_ctx,
+ struct audio_dp_link_info *dp_link_info)
+{
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ const struct dc_link *link = stream->link;
+ struct fixed31_32 link_bw_kbps;
+
+ dp_link_info->encoding = link->dc->link_srv->dp_get_encoding_format(
+ &pipe_ctx->link_config.dp_link_settings);
+ dp_link_info->is_mst = (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST);
+ dp_link_info->lane_count = pipe_ctx->link_config.dp_link_settings.lane_count;
+ dp_link_info->link_rate = pipe_ctx->link_config.dp_link_settings.link_rate;
+
+ link_bw_kbps = dc_fixpt_from_int(dc_link_bandwidth_kbps(link,
+ &pipe_ctx->link_config.dp_link_settings));
+
+ /* For audio stream calculations, the video stream should not include FEC or SSC
+ * in order to get the most pessimistic values.
+ */
+ if (dp_link_info->encoding == DP_8b_10b_ENCODING &&
+ link->dc->link_srv->dp_is_fec_supported(link)) {
+ link_bw_kbps = dc_fixpt_mul(link_bw_kbps,
+ dc_fixpt_from_fraction(100, DATA_EFFICIENCY_8b_10b_FEC_EFFICIENCY_x100));
+ } else if (dp_link_info->encoding == DP_128b_132b_ENCODING) {
+ link_bw_kbps = dc_fixpt_mul(link_bw_kbps,
+ dc_fixpt_from_fraction(10000, 9975)); /* 99.75% SSC overhead*/
+ }
+
+ dp_link_info->link_bandwidth_kbps = dc_fixpt_floor(link_bw_kbps);
+
+ /* Calculates hblank_min_symbol_width for 128b/132b
+ * Corresponding HBLANK_MIN_SYMBOL_WIDTH register is calculated as:
+ * floor(h_blank * bits_per_pixel / 128)
+ */
+ if (dp_link_info->encoding == DP_128b_132b_ENCODING) {
+ struct dc_crtc_timing *crtc_timing = &pipe_ctx->stream->timing;
+
+ uint32_t h_active = crtc_timing->h_addressable + crtc_timing->h_border_left
+ + crtc_timing->h_border_right;
+ uint32_t h_blank = crtc_timing->h_total - h_active;
+
+ uint32_t bpp;
+
+ if (crtc_timing->flags.DSC) {
+ bpp = crtc_timing->dsc_cfg.bits_per_pixel;
+ } else {
+ /* When the timing is using DSC, dsc_cfg.bits_per_pixel is in 16th bits.
+ * The bpp in this path is scaled to 16th bits so the final calculation
+ * is correct for both cases.
+ */
+ bpp = 16;
+ switch (crtc_timing->display_color_depth) {
+ case COLOR_DEPTH_666:
+ bpp *= 18;
+ break;
+ case COLOR_DEPTH_888:
+ bpp *= 24;
+ break;
+ case COLOR_DEPTH_101010:
+ bpp *= 30;
+ break;
+ case COLOR_DEPTH_121212:
+ bpp *= 36;
+ break;
+ default:
+ bpp = 0;
+ break;
+ }
+
+ switch (crtc_timing->pixel_encoding) {
+ case PIXEL_ENCODING_YCBCR422:
+ bpp = bpp * 2 / 3;
+ break;
+ case PIXEL_ENCODING_YCBCR420:
+ bpp /= 2;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Min symbol width = floor(h_blank * (bpp/16) / 128) */
+ dp_link_info->hblank_min_symbol_width = dc_fixpt_floor(
+ dc_fixpt_div(dc_fixpt_from_int(h_blank * bpp),
+ dc_fixpt_from_int(128 / 16)));
+
+ } else {
+ dp_link_info->hblank_min_symbol_width = 0;
+ }
+}
+
+void build_audio_output(
struct dc_state *state,
const struct pipe_ctx *pipe_ctx,
struct audio_output *audio_output)
@@ -1331,6 +1429,15 @@ static void build_audio_output(
audio_output->crtc_info.calculated_pixel_clock_100Hz =
pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz;
+ audio_output->crtc_info.pixel_encoding =
+ stream->timing.pixel_encoding;
+
+ audio_output->crtc_info.dsc_bits_per_pixel =
+ stream->timing.dsc_cfg.bits_per_pixel;
+
+ audio_output->crtc_info.dsc_num_slices =
+ stream->timing.dsc_cfg.num_slices_h;
+
/*for HDMI, audio ACR is with deep color ratio factor*/
if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) &&
audio_output->crtc_info.requested_pixel_clock_100Hz ==
@@ -1347,7 +1454,7 @@ static void build_audio_output(
if (state->clk_mgr &&
(pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) {
- audio_output->pll_info.dp_dto_source_clock_in_khz =
+ audio_output->pll_info.audio_dto_source_clock_in_khz =
state->clk_mgr->funcs->get_dp_ref_clk_frequency(
state->clk_mgr);
}
@@ -1364,6 +1471,10 @@ static void build_audio_output(
audio_output->pll_info.ss_percentage =
pipe_ctx->pll_settings.ss_percentage;
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
+ populate_audio_dp_link_info(pipe_ctx, &audio_output->dp_link_info);
+ }
}
static void program_scaler(const struct dc *dc,
@@ -1454,6 +1565,7 @@ static enum dc_status dce110_enable_stream_timing(
0,
0,
0,
+ 0,
pipe_ctx->stream->signal,
true);
}
@@ -1469,7 +1581,7 @@ static enum dc_status dce110_enable_stream_timing(
return DC_OK;
}
-static enum dc_status apply_single_controller_ctx_to_hw(
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
struct dc *dc)
@@ -1489,7 +1601,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
}
if (pipe_ctx->stream_res.audio != NULL) {
- struct audio_output audio_output;
+ struct audio_output audio_output = {0};
build_audio_output(context, pipe_ctx, &audio_output);
@@ -1500,7 +1612,13 @@ static enum dc_status apply_single_controller_ctx_to_hw(
pipe_ctx->stream_res.audio,
pipe_ctx->stream->signal,
&audio_output.crtc_info,
- &pipe_ctx->stream->audio_info);
+ &pipe_ctx->stream->audio_info,
+ &audio_output.dp_link_info);
+
+ if (dc->config.disable_hbr_audio_dp2)
+ if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio &&
+ dc->link_srv->dp_is_128b_132b_signal(pipe_ctx))
+ pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio(pipe_ctx->stream_res.audio);
}
/* make sure no pipes syncd to the pipe being enabled */
@@ -1548,9 +1666,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
params.vertical_total_min = stream->adjust.v_total_min;
params.vertical_total_max = stream->adjust.v_total_max;
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, &params);
+ set_drr_and_clear_adjust_pending(pipe_ctx, stream, &params);
// DRR should set trigger event to monitor surface update event
if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
@@ -1571,6 +1687,19 @@ static enum dc_status apply_single_controller_ctx_to_hw(
if (dc_is_dp_signal(pipe_ctx->stream->signal))
dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG);
+ /* Temporary workaround to perform DSC programming ahead of stream enablement
+ * for smartmux/SPRS
+ * TODO: Remove SmartMux/SPRS checks once movement of DSC programming is generalized
+ */
+ if (pipe_ctx->stream->timing.flags.DSC) {
+ if ((pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
+ ((link->dc->config.smart_mux_version && link->dc->is_switch_in_progress_dest)
+ || link->is_dds || link->skip_implict_edp_power_control)) &&
+ (dc_is_dp_signal(pipe_ctx->stream->signal) ||
+ dc_is_virtual_signal(pipe_ctx->stream->signal)))
+ dc->link_srv->set_dsc_enable(pipe_ctx, true);
+ }
+
if (!stream->dpms_off)
dc->link_srv->set_dpms_on(context, pipe_ctx);
@@ -1590,7 +1719,7 @@ static enum dc_status apply_single_controller_ctx_to_hw(
* is constructed with the same sink). Make sure not to override
* and link programming on the main.
*/
- if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
pipe_ctx->stream->link->replay_settings.replay_feature_enabled = false;
}
@@ -1678,7 +1807,7 @@ static void disable_vga_and_power_gate_all_controllers(
true);
dc->current_state->res_ctx.pipe_ctx[i].pipe_idx = i;
- dc->hwss.disable_plane(dc,
+ dc->hwss.disable_plane(dc, dc->current_state,
&dc->current_state->res_ctx.pipe_ctx[i]);
}
}
@@ -1719,6 +1848,48 @@ static void get_edp_links_with_sink(
}
}
+static void clean_up_dsc_blocks(struct dc *dc)
+{
+ struct display_stream_compressor *dsc = NULL;
+ struct timing_generator *tg = NULL;
+ struct stream_encoder *se = NULL;
+ struct dccg *dccg = dc->res_pool->dccg;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+ int i;
+
+ if (!dc->caps.is_apu ||
+ dc->ctx->dce_version < DCN_VERSION_3_15)
+ return;
+ /*VBIOS supports dsc starts from dcn315*/
+ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
+ struct dcn_dsc_state s = {0};
+
+ dsc = dc->res_pool->dscs[i];
+ dsc->funcs->dsc_read_state(dsc, &s);
+ if (s.dsc_fw_en) {
+ /* disable DSC in OPTC */
+ if (i < dc->res_pool->timing_generator_count) {
+ tg = dc->res_pool->timing_generators[i];
+ tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0);
+ }
+ /* disable DSC in stream encoder */
+ if (i < dc->res_pool->stream_enc_count) {
+ se = dc->res_pool->stream_enc[i];
+ se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0);
+ se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true);
+ }
+ /* disable DSC block */
+ if (dccg->funcs->set_ref_dscclk)
+ dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
+ dsc->funcs->dsc_disable(dsc);
+
+ /* power down DSC */
+ if (pg_cntl != NULL)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false);
+ }
+ }
+}
+
/*
* When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need:
* 1. Power down all DC HW blocks
@@ -1733,6 +1904,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
struct dc_stream_state *edp_streams[MAX_NUM_EDP];
struct dc_link *edp_link_with_sink = NULL;
struct dc_link *edp_link = NULL;
+ struct pipe_ctx *pipe_ctx = NULL;
struct dce_hwseq *hws = dc->hwseq;
int edp_with_sink_num;
int edp_num;
@@ -1741,6 +1913,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
bool can_apply_edp_fast_boot = false;
bool can_apply_seamless_boot = false;
bool keep_edp_vdd_on = false;
+ struct dc_bios *dcb = dc->ctx->dc_bios;
DC_LOGGER_INIT();
@@ -1752,10 +1925,8 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
get_edp_streams(context, edp_streams, &edp_stream_num);
- // Check fastboot support, disable on DCE8 because of blank screens
- if (edp_num && edp_stream_num && dc->ctx->dce_version != DCE_VERSION_8_0 &&
- dc->ctx->dce_version != DCE_VERSION_8_1 &&
- dc->ctx->dce_version != DCE_VERSION_8_3) {
+ /* Check fastboot support, disable on DCE 6-8 because of blank screens */
+ if (edp_num && edp_stream_num && dc->ctx->dce_version < DCE_VERSION_10_0) {
for (i = 0; i < edp_num; i++) {
edp_link = edp_links[i];
if (edp_link != edp_streams[0]->link)
@@ -1768,10 +1939,34 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
can_apply_edp_fast_boot = dc_validate_boot_timing(dc,
edp_stream->sink, &edp_stream->timing);
- edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot;
- if (can_apply_edp_fast_boot)
- DC_LOG_EVENT_LINK_TRAINING("eDP fast boot disabled to optimize link rate\n");
+ // For Mux-platform, the default value is false.
+ // Disable fast boot during mux switching.
+ // The flag would be clean after switching done.
+ if (dc->is_switch_in_progress_dest && edp_link->is_dds)
+ can_apply_edp_fast_boot = false;
+
+ edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot;
+ if (can_apply_edp_fast_boot) {
+ DC_LOG_EVENT_LINK_TRAINING("eDP fast boot Enable\n");
+
+ // Vbios & Driver support different pixel rate div policy.
+ pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, edp_stream);
+ if (pipe_ctx &&
+ hws->funcs.is_dp_dig_pixel_rate_div_policy &&
+ hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)) {
+ // Get Vbios div factor from register
+ dc->res_pool->dccg->funcs->get_pixel_rate_div(
+ dc->res_pool->dccg,
+ pipe_ctx->stream_res.tg->inst,
+ &pipe_ctx->pixel_rate_divider.div_factor1,
+ &pipe_ctx->pixel_rate_divider.div_factor2);
+
+ // VBios doesn't support pixel rate div, so force it.
+ // If VBios supports it, we check it from reigster or other flags.
+ pipe_ctx->stream_res.pix_clk_params.dio_se_pix_per_cycle = 1;
+ }
+ }
break;
}
}
@@ -1794,19 +1989,32 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
if (edp_with_sink_num)
edp_link_with_sink = edp_links_with_sink[0];
+ // During a mux switch, powering down the HW blocks and then enabling
+ // the link via a DPCD SET_POWER write causes a brief flash
+ keep_edp_vdd_on |= dc->is_switch_in_progress_dest;
+
if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) {
if (edp_link_with_sink && !keep_edp_vdd_on) {
/*turn off backlight before DP_blank and encoder powered down*/
hws->funcs.edp_backlight_control(edp_link_with_sink, false);
}
/*resume from S3, no vbios posting, no need to power down again*/
- clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
+ if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb))
+ clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
power_down_all_hw_blocks(dc);
+
+ /* DSC could be enabled on eDP during VBIOS post.
+ * To clean up dsc blocks if eDP is in link but not active.
+ */
+ if (edp_link_with_sink && (edp_stream_num == 0))
+ clean_up_dsc_blocks(dc);
+
disable_vga_and_power_gate_all_controllers(dc);
if (edp_link_with_sink && !keep_edp_vdd_on)
dc->hwss.edp_power_control(edp_link_with_sink, false);
- clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
+ if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb))
+ clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
}
bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1);
}
@@ -1921,13 +2129,19 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
* as well.
*/
for (i = 0; i < num_pipes; i++) {
- pipe_ctx[i]->stream_res.tg->funcs->set_drr(
- pipe_ctx[i]->stream_res.tg, &params);
-
- if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx[i]->stream_res.tg,
- event_triggers, num_frames);
+ /* dc_state_destruct() might null the stream resources, so fetch tg
+ * here first to avoid a race condition. The lifetime of the pointee
+ * itself (the timing_generator object) is not a problem here.
+ */
+ struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
+
+ if ((tg != NULL) && tg->funcs) {
+ set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, &params);
+ if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+ if (tg->funcs->set_static_screen_control)
+ tg->funcs->set_static_screen_control(
+ tg, event_triggers, num_frames);
+ }
}
}
@@ -1999,9 +2213,6 @@ static bool should_enable_fbc(struct dc *dc,
pipe_ctx = &res_ctx->pipe_ctx[i];
- if (!pipe_ctx)
- continue;
-
/* fbc not applicable on underlay pipe */
if (pipe_ctx->pipe_idx != underlay_idx) {
*pipe_idx = i;
@@ -2042,7 +2253,7 @@ static bool should_enable_fbc(struct dc *dc,
/*
* Enable FBC
*/
-static void enable_fbc(
+void enable_fbc(
struct dc *dc,
struct dc_state *context)
{
@@ -2121,7 +2332,8 @@ static void dce110_reset_hw_ctx_wrap(
BREAK_TO_DEBUGGER();
}
pipe_ctx_old->stream_res.tg->funcs->disable_crtc(pipe_ctx_old->stream_res.tg);
- pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+ if (dc_is_hdmi_tmds_signal(pipe_ctx_old->stream->signal))
+ pipe_ctx_old->stream->link->phy_state.symclk_ref_cnts.otg = 0;
pipe_ctx_old->plane_res.mi->funcs->free_mem_input(
pipe_ctx_old->plane_res.mi, dc->current_state->stream_count);
@@ -2130,7 +2342,7 @@ static void dce110_reset_hw_ctx_wrap(
old_clk))
old_clk->funcs->cs_power_down(old_clk);
- dc->hwss.disable_plane(dc, pipe_ctx_old);
+ dc->hwss.disable_plane(dc, dc->current_state, pipe_ctx_old);
pipe_ctx_old->stream = NULL;
}
@@ -2141,7 +2353,7 @@ static void dce110_setup_audio_dto(
struct dc *dc,
struct dc_state *context)
{
- int i;
+ unsigned int i;
/* program audio wall clock. use HDMI as clock source if HDMI
* audio active. Otherwise, use DP as clock source
@@ -2213,7 +2425,7 @@ static void dce110_setup_audio_dto(
continue;
if (pipe_ctx->stream_res.audio != NULL) {
- struct audio_output audio_output;
+ struct audio_output audio_output = {0};
build_audio_output(context, pipe_ctx, &audio_output);
@@ -2236,6 +2448,8 @@ enum dc_status dce110_apply_ctx_to_hw(
struct dc_bios *dcb = dc->ctx->dc_bios;
enum dc_status status;
int i;
+ bool was_hpo_acquired = resource_is_hpo_acquired(dc->current_state);
+ bool is_hpo_acquired = resource_is_hpo_acquired(context);
/* reset syncd pipes from disabled pipes */
if (dc->config.use_pipe_ctx_sync_logic)
@@ -2278,6 +2492,10 @@ enum dc_status dce110_apply_ctx_to_hw(
dce110_setup_audio_dto(dc, context);
+ if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_acquired != is_hpo_acquired) {
+ dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_acquired);
+ }
+
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx_old =
&dc->current_state->res_ctx.pipe_ctx[i];
@@ -2297,7 +2515,7 @@ enum dc_status dce110_apply_ctx_to_hw(
if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
continue;
- status = apply_single_controller_ctx_to_hw(
+ status = dce110_apply_single_controller_ctx_to_hw(
pipe_ctx,
context,
dc);
@@ -2307,7 +2525,7 @@ enum dc_status dce110_apply_ctx_to_hw(
#ifdef CONFIG_DRM_AMD_DC_FP
if (hws->funcs.resync_fifo_dccg_dio)
- hws->funcs.resync_fifo_dccg_dio(hws, dc, context);
+ hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i);
#endif
}
@@ -2457,6 +2675,7 @@ static bool wait_for_reset_trigger_to_occur(
struct dc_context *dc_ctx,
struct timing_generator *tg)
{
+ struct dc_context *ctx = dc_ctx;
bool rc = false;
/* To avoid endless loop we wait at most
@@ -2493,6 +2712,7 @@ static bool wait_for_reset_trigger_to_occur(
/* Enable timing synchronization for a group of Timing Generators. */
static void dce110_enable_timing_synchronization(
struct dc *dc,
+ struct dc_state *state,
int group_index,
int group_size,
struct pipe_ctx *grouped_pipes[])
@@ -2500,6 +2720,7 @@ static void dce110_enable_timing_synchronization(
struct dc_context *dc_ctx = dc->ctx;
struct dcp_gsl_params gsl_params = { 0 };
int i;
+ DC_LOGGER_INIT();
DC_SYNC_INFO("GSL: Setting-up...\n");
@@ -2545,6 +2766,7 @@ static void dce110_enable_per_frame_crtc_position_reset(
struct dc_context *dc_ctx = dc->ctx;
struct dcp_gsl_params gsl_params = { 0 };
int i;
+ DC_LOGGER_INIT();
gsl_params.gsl_group = 0;
gsl_params.gsl_master = 0;
@@ -2570,12 +2792,12 @@ static void dce110_enable_per_frame_crtc_position_reset(
}
-static void init_pipes(struct dc *dc, struct dc_state *context)
+static void dce110_init_pipes(struct dc *dc, struct dc_state *context)
{
// Do nothing
}
-static void init_hw(struct dc *dc)
+static void dce110_init_hw(struct dc *dc)
{
int i;
struct dc_bios *bp;
@@ -2584,6 +2806,7 @@ static void init_hw(struct dc *dc)
struct dmcu *dmcu;
struct dce_hwseq *hws = dc->hwseq;
uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+ uint32_t user_level = MAX_BACKLIGHT_LEVEL;
bp = dc->ctx->dc_bios;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -2633,13 +2856,15 @@ static void init_hw(struct dc *dc)
for (i = 0; i < dc->link_count; i++) {
struct dc_link *link = dc->links[i];
- if (link->panel_cntl)
+ if (link->panel_cntl) {
backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+ user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+ }
}
abm = dc->res_pool->abm;
if (abm != NULL)
- abm->funcs->abm_init(abm, backlight);
+ abm->funcs->abm_init(abm, backlight, user_level);
dmcu = dc->res_pool->dmcu;
if (dmcu != NULL && abm != NULL)
@@ -2658,11 +2883,11 @@ void dce110_prepare_bandwidth(
struct clk_mgr *dccg = dc->clk_mgr;
dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
-
- dccg->funcs->update_clocks(
- dccg,
- context,
- false);
+ if (dccg)
+ dccg->funcs->update_clocks(
+ dccg,
+ context,
+ false);
}
void dce110_optimize_bandwidth(
@@ -2673,10 +2898,11 @@ void dce110_optimize_bandwidth(
dce110_set_displaymarks(dc, context);
- dccg->funcs->update_clocks(
- dccg,
- context,
- true);
+ if (dccg)
+ dccg->funcs->update_clocks(
+ dccg,
+ context,
+ true);
}
static void dce110_program_front_end_for_pipe(
@@ -2689,7 +2915,6 @@ static void dce110_program_front_end_for_pipe(
unsigned int i;
struct dce_hwseq *hws = dc->hwseq;
- DC_LOGGER_INIT();
memset(&tbl_entry, 0, sizeof(tbl_entry));
memset(&adjust, 0, sizeof(adjust));
@@ -2836,7 +3061,7 @@ static void dce110_post_unlock_program_front_end(
{
}
-static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
+static void dce110_power_down_fe(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
{
struct dce_hwseq *hws = dc->hwseq;
int fe_idx = pipe_ctx->plane_res.mi ?
@@ -2949,9 +3174,10 @@ static void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx)
}
bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp)
+ struct set_backlight_level_params *backlight_level_params)
{
+ uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16;
+ uint32_t frame_ramp = backlight_level_params->frame_ramp;
struct dc_link *link = pipe_ctx->stream->link;
struct dc *dc = link->ctx->dc;
struct abm *abm = pipe_ctx->stream_res.abm;
@@ -3109,7 +3335,8 @@ void dce110_disable_link_output(struct dc_link *link,
struct dmcu *dmcu = dc->res_pool->dmcu;
if (signal == SIGNAL_TYPE_EDP &&
- link->dc->hwss.edp_backlight_control)
+ link->dc->hwss.edp_backlight_control &&
+ !link->skip_implict_edp_power_control)
link->dc->hwss.edp_backlight_control(link, false);
else if (dmcu != NULL && dmcu->funcs->lock_phy)
dmcu->funcs->lock_phy(dmcu);
@@ -3121,7 +3348,7 @@ void dce110_disable_link_output(struct dc_link *link,
* from enable/disable link output and only call edp panel control
* in enable_link_dp and disable_link_dp once.
*/
- if (dmcu != NULL && dmcu->funcs->lock_phy)
+ if (dmcu != NULL && dmcu->funcs->unlock_phy)
dmcu->funcs->unlock_phy(dmcu);
dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
}
@@ -3129,12 +3356,13 @@ void dce110_disable_link_output(struct dc_link *link,
static const struct hw_sequencer_funcs dce110_funcs = {
.program_gamut_remap = program_gamut_remap,
.program_output_csc = program_output_csc,
- .init_hw = init_hw,
+ .init_hw = dce110_init_hw,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = dce110_apply_ctx_for_surface,
.post_unlock_program_front_end = dce110_post_unlock_program_front_end,
.update_plane_addr = update_plane_addr,
.update_pending_status = dce110_update_pending_status,
+ .clear_surface_dcc_and_tiling = dce100_reset_surface_dcc_and_tiling,
.enable_accelerated_mode = dce110_enable_accelerated_mode,
.enable_timing_synchronization = dce110_enable_timing_synchronization,
.enable_per_frame_crtc_position_reset = dce110_enable_per_frame_crtc_position_reset,
@@ -3172,8 +3400,7 @@ static const struct hw_sequencer_funcs dce110_funcs = {
};
static const struct hwseq_private_funcs dce110_private_funcs = {
- .init_pipes = init_pipes,
- .update_plane_addr = update_plane_addr,
+ .init_pipes = dce110_init_pipes,
.set_input_transfer_func = dce110_set_input_transfer_func,
.set_output_transfer_func = dce110_set_output_transfer_func,
.power_down = dce110_power_down,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
index 08028a1779ae..9c032e449481 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h
@@ -39,6 +39,10 @@ enum dc_status dce110_apply_ctx_to_hw(
struct dc *dc,
struct dc_state *context);
+enum dc_status dce110_apply_single_controller_ctx_to_hw(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dc *dc);
void dce110_enable_stream(struct pipe_ctx *pipe_ctx);
@@ -84,8 +88,7 @@ void dce110_edp_wait_for_hpd_ready(
bool power_up);
bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
+ struct set_backlight_level_params *params);
void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx);
void dce110_set_pipe(struct pipe_ctx *pipe_ctx);
void dce110_disable_link_output(struct dc_link *link,
@@ -107,5 +110,16 @@ void dce110_enable_dp_link_output(
enum signal_type signal,
enum clock_source_id clock_source,
const struct dc_link_settings *link_settings);
+void build_audio_output(
+ struct dc_state *state,
+ const struct pipe_ctx *pipe_ctx,
+ struct audio_output *audio_output);
+enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id);
+void populate_audio_dp_link_info(
+ const struct pipe_ctx *pipe_ctx,
+ struct audio_dp_link_info *dp_link_info);
+void enable_fbc(
+ struct dc *dc,
+ struct dc_state *context);
#endif /* __DC_HWSS_DCE110_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.c
index 0ef9ebb3c1e2..ed9b0113a7a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.c
@@ -26,9 +26,9 @@
#include "dm_services.h"
#include "dc.h"
#include "core_types.h"
-#include "dce112_hw_sequencer.h"
+#include "dce112_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
/* include DCE11.2 register header files */
#include "dce/dce_11_2_d.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.h
index 943f1b2c5b2f..943f1b2c5b2f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce112/dce112_hwseq.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.c
index 45e08c4d5861..2a62f63d0357 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.c
@@ -26,10 +26,11 @@
#include "dm_services.h"
#include "dc.h"
#include "core_types.h"
-#include "dce120_hw_sequencer.h"
+#include "dce120_hwseq.h"
#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce100/dce100_hwseq.h"
+#include "dce110/dce110_hwseq.h"
#include "dce/dce_12_0_offset.h"
#include "dce/dce_12_0_sh_mask.h"
@@ -264,5 +265,6 @@ void dce120_hw_sequencer_construct(struct dc *dc)
dce110_hw_sequencer_construct(dc);
dc->hwseq->funcs.enable_display_power_gating = dce120_enable_display_power_gating;
dc->hwss.update_dchub = dce120_update_dchub;
+ dc->hwss.clear_surface_dcc_and_tiling = dce100_reset_surface_dcc_and_tiling;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.h
index bc024534732f..bc024534732f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce120/dce120_hwseq.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c
index 920c7ae29d53..a08e9f9eec17 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c
@@ -26,11 +26,11 @@
#include "dm_services.h"
#include "dc.h"
#include "core_types.h"
-#include "dce60_hw_sequencer.h"
+#include "dce60_hwseq.h"
#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dce100/dce100_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dce100/dce100_hwseq.h"
/* include DCE6 register header files */
#include "dce/dce_6_0_d.h"
@@ -428,5 +428,6 @@ void dce60_hw_sequencer_construct(struct dc *dc)
dc->hwss.pipe_control_lock = dce60_pipe_control_lock;
dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
+ dc->hwss.clear_surface_dcc_and_tiling = dce100_reset_surface_dcc_and_tiling;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h
index f3b2d8b60d5b..f3b2d8b60d5b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.c
index d2ceebdbdf51..76fd45550c5e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.c
@@ -26,11 +26,11 @@
#include "dm_services.h"
#include "dc.h"
#include "core_types.h"
-#include "dce80_hw_sequencer.h"
+#include "dce80_hwseq.h"
#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dce100/dce100_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dce100/dce100_hwseq.h"
/* include DCE8 register header files */
#include "dce/dce_8_0_d.h"
@@ -50,5 +50,6 @@ void dce80_hw_sequencer_construct(struct dc *dc)
dc->hwss.pipe_control_lock = dce_pipe_control_lock;
dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
+ dc->hwss.clear_surface_dcc_and_tiling = dce100_reset_surface_dcc_and_tiling;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.h
index e43af832d00c..e43af832d00c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce80/dce80_hwseq.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 9834b75f1837..e9fe97f0c4ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -29,22 +29,22 @@
#include "core_types.h"
#include "resource.h"
#include "custom_float.h"
-#include "dcn10_hw_sequencer.h"
-#include "dcn10_hw_sequencer_debug.h"
+#include "dcn10_hwseq.h"
+#include "dcn10/dcn10_hw_sequencer_debug.h"
#include "dce/dce_hwseq.h"
#include "abm.h"
#include "dmcu.h"
-#include "dcn10_optc.h"
-#include "dcn10_dpp.h"
-#include "dcn10_mpc.h"
+#include "dcn10/dcn10_optc.h"
+#include "dcn10/dcn10_dpp.h"
+#include "dcn10/dcn10_mpc.h"
#include "timing_generator.h"
#include "opp.h"
#include "ipp.h"
#include "mpc.h"
#include "reg_helper.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
-#include "dcn10_cm_common.h"
+#include "dcn10/dcn10_hubp.h"
+#include "dcn10/dcn10_hubbub.h"
+#include "dcn10/dcn10_cm_common.h"
#include "dccg.h"
#include "clk_mgr.h"
#include "link_hwss.h"
@@ -55,9 +55,13 @@
#include "dce/dmub_hw_lock_mgr.h"
#include "dc_trace.h"
#include "dce/dmub_outbox.h"
-#include "link.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
-#define DC_LOGGER_INIT(logger)
+#define DC_LOGGER \
+ dc_logger
+#define DC_LOGGER_INIT(logger) \
+ struct dal_logger *dc_logger = logger
#define CTX \
hws->ctx
@@ -90,6 +94,128 @@ static void print_microsec(struct dc_context *dc_ctx,
us_x10 % frac);
}
+/*
+ * Delay until we passed busy-until-point to which we can
+ * do necessary locking/programming on consecutive full updates
+ */
+void dcn10_wait_for_pipe_update_if_needed(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only)
+{
+ struct crtc_position position;
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ unsigned int vpos, frame_count;
+ uint32_t vupdate_start, vupdate_end, vblank_start;
+ unsigned int lines_to_vupdate, us_to_vupdate;
+ unsigned int us_per_line, us_vupdate;
+
+ if (!pipe_ctx->stream ||
+ !pipe_ctx->stream_res.tg ||
+ !pipe_ctx->stream_res.stream_enc)
+ return;
+
+ if (pipe_ctx->prev_odm_pipe &&
+ pipe_ctx->stream)
+ return;
+
+ if (!pipe_ctx->wait_is_required)
+ return;
+
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
+
+ if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg))
+ return;
+
+ dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start,
+ &vupdate_end);
+
+ dc->hwss.get_position(&pipe_ctx, 1, &position);
+ vpos = position.vertical_count;
+
+ frame_count = tg->funcs->get_frame_count(tg);
+
+ if (frame_count - pipe_ctx->wait_frame_count > 2)
+ return;
+
+ vblank_start = pipe_ctx->pipe_dlg_param.vblank_start;
+
+ if (vpos >= vupdate_start && vupdate_start >= vblank_start)
+ lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start;
+ else
+ lines_to_vupdate = vupdate_start - vpos;
+
+ us_per_line =
+ stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz;
+ us_to_vupdate = lines_to_vupdate * us_per_line;
+
+ if (vupdate_end < vupdate_start)
+ vupdate_end += stream->timing.v_total;
+
+ if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start)
+ us_to_vupdate = 0;
+
+ us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line;
+
+ if (is_surface_update_only && us_to_vupdate + us_vupdate > 200) {
+ //surface updates come in at high irql
+ pipe_ctx->wait_is_required = true;
+ return;
+ }
+
+ fsleep(us_to_vupdate + us_vupdate);
+
+ //clear
+ pipe_ctx->next_vupdate = 0;
+ pipe_ctx->wait_frame_count = 0;
+ pipe_ctx->wait_is_required = false;
+}
+
+/*
+ * On pipe unlock and programming, indicate pipe will be busy
+ * until some frame and line (vupdate), this is required for consecutive
+ * full updates, need to wait for updates
+ * to latch to try and program the next update
+ */
+void dcn10_set_wait_for_update_needed_for_pipe(struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+ uint32_t vupdate_start, vupdate_end;
+ struct crtc_position position;
+ unsigned int vpos, cur_frame;
+
+ if (!pipe_ctx->stream ||
+ !pipe_ctx->stream_res.tg ||
+ !pipe_ctx->stream_res.stream_enc)
+ return;
+
+ dc->hwss.get_position(&pipe_ctx, 1, &position);
+ vpos = position.vertical_count;
+
+ dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start,
+ &vupdate_end);
+
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
+
+ struct optc *optc1 = DCN10TG_FROM_TG(tg);
+
+ ASSERT(optc1->max_frame_count != 0);
+
+ if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg))
+ return;
+
+ pipe_ctx->next_vupdate = vupdate_start;
+
+ cur_frame = tg->funcs->get_frame_count(tg);
+
+ if (vpos < vupdate_start) {
+ pipe_ctx->wait_frame_count = cur_frame;
+ } else {
+ if (cur_frame + 1 > optc1->max_frame_count)
+ pipe_ctx->wait_frame_count = cur_frame + 1 - optc1->max_frame_count;
+ else
+ pipe_ctx->wait_frame_count = cur_frame + 1;
+ }
+
+ pipe_ctx->wait_is_required = true;
+}
+
void dcn10_lock_all_pipes(struct dc *dc,
struct dc_state *context,
bool lock)
@@ -111,7 +237,8 @@ void dcn10_lock_all_pipes(struct dc *dc,
if (pipe_ctx->top_pipe ||
!pipe_ctx->stream ||
(!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) ||
- !tg->funcs->is_tg_enabled(tg))
+ !tg->funcs->is_tg_enabled(tg) ||
+ dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM)
continue;
if (lock)
@@ -200,6 +327,46 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
}
}
+ DTN_INFO("\n=======HUBP FL======\n");
+ static const char * const pLabels[] = {
+ "inst", "Enabled ", "Done ", "adr_mode ", "width ", "mpc_width ",
+ "tmz", "xbar_sel_R", "xbar_sel_G", "xbar_sel_B", "adr_hi ",
+ "adr_low", "REFCYC", "Bias", "Scale", "Mode",
+ "Format", "prefetch"};
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
+ struct dcn_fl_regs_st *fl_regs = &s->fl_regs;
+ struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr;
+
+ if (!s->blank_en) {
+ uint32_t values[] = {
+ pool->hubps[i]->inst,
+ fl_regs->lut_enable,
+ fl_regs->lut_done,
+ fl_regs->lut_addr_mode,
+ fl_regs->lut_width,
+ fl_regs->lut_mpc_width,
+ fl_regs->lut_tmz,
+ fl_regs->lut_crossbar_sel_r,
+ fl_regs->lut_crossbar_sel_g,
+ fl_regs->lut_crossbar_sel_b,
+ fl_regs->lut_addr_hi,
+ fl_regs->lut_addr_lo,
+ fl_regs->refcyc_3dlut_group,
+ fl_regs->lut_fl_bias,
+ fl_regs->lut_fl_scale,
+ fl_regs->lut_fl_mode,
+ fl_regs->lut_fl_format,
+ dlg_regs->dst_y_prefetch};
+
+ int num_elements = 18;
+
+ for (int j = 0; j < num_elements; j++)
+ DTN_INFO("%s \t %8xh\n", pLabels[j], values[j]);
+ }
+ }
+
DTN_INFO("\n=========RQ========\n");
DTN_INFO("HUBP: drq_exp_m prq_exp_m mrq_exp_m crq_exp_m plane1_ba L:chunk_s min_chu_s meta_ch_s"
" min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h C:chunk_s min_chu_s meta_ch_s"
@@ -228,7 +395,8 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
" rc_pg_flc rc_mc_fll rc_mc_flc pr_nom_l pr_nom_c rc_pg_nl rc_pg_nc "
" mr_nom_l mr_nom_c rc_mc_nl rc_mc_nc rc_ld_pl rc_ld_pc rc_ld_l "
" rc_ld_c cha_cur0 ofst_cur1 cha_cur1 vr_af_vc0 ddrq_limt x_rt_dlay"
- " x_rp_dlay x_rr_sfl\n");
+ " x_rp_dlay x_rr_sfl rc_td_grp\n");
+
for (i = 0; i < pool->pipe_count; i++) {
struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr;
@@ -236,7 +404,7 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
if (!s->blank_en)
DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh"
" %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh"
- " %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n",
+ " %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %xh\n",
pool->hubps[i]->inst, dlg_regs->refcyc_h_blank_end, dlg_regs->dlg_vblank_end, dlg_regs->min_dst_y_next_start,
dlg_regs->refcyc_per_htotal, dlg_regs->refcyc_x_after_scaler, dlg_regs->dst_y_after_scaler,
dlg_regs->dst_y_prefetch, dlg_regs->dst_y_per_vm_vblank, dlg_regs->dst_y_per_row_vblank,
@@ -254,7 +422,7 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
dlg_regs->refcyc_per_line_delivery_c, dlg_regs->chunk_hdl_adjust_cur0, dlg_regs->dst_y_offset_cur1,
dlg_regs->chunk_hdl_adjust_cur1, dlg_regs->vready_after_vcount0, dlg_regs->dst_y_delta_drq_limit,
dlg_regs->xfc_reg_transfer_delay, dlg_regs->xfc_reg_precharge_delay,
- dlg_regs->xfc_reg_remote_surface_flip_latency);
+ dlg_regs->xfc_reg_remote_surface_flip_latency, dlg_regs->refcyc_per_tdlut_group);
}
DTN_INFO("========TTU========\n");
@@ -278,33 +446,33 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx)
DTN_INFO("\n");
}
-void dcn10_log_hw_state(struct dc *dc,
- struct dc_log_buffer_ctx *log_ctx)
+static void dcn10_log_color_state(struct dc *dc,
+ struct dc_log_buffer_ctx *log_ctx)
{
struct dc_context *dc_ctx = dc->ctx;
struct resource_pool *pool = dc->res_pool;
+ bool is_gamut_remap_available = false;
int i;
- DTN_INFO_BEGIN();
-
- dcn10_log_hubbub_state(dc, log_ctx);
-
- dcn10_log_hubp_states(dc, log_ctx);
-
- DTN_INFO("DPP: IGAM format IGAM mode DGAM mode RGAM mode"
- " GAMUT mode C11 C12 C13 C14 C21 C22 C23 C24 "
- "C31 C32 C33 C34\n");
+ DTN_INFO("DPP: IGAM format IGAM mode DGAM mode RGAM mode"
+ " GAMUT adjust "
+ "C11 C12 C13 C14 "
+ "C21 C22 C23 C24 "
+ "C31 C32 C33 C34 \n");
for (i = 0; i < pool->pipe_count; i++) {
struct dpp *dpp = pool->dpps[i];
struct dcn_dpp_state s = {0};
dpp->funcs->dpp_read_state(dpp, &s);
+ if (dpp->funcs->dpp_get_gamut_remap) {
+ dpp->funcs->dpp_get_gamut_remap(dpp, &s.gamut_remap);
+ is_gamut_remap_available = true;
+ }
if (!s.is_enabled)
continue;
- DTN_INFO("[%2d]: %11xh %-11s %-11s %-11s"
- "%8x %08xh %08xh %08xh %08xh %08xh %08xh",
+ DTN_INFO("[%2d]: %11xh %11s %9s %9s",
dpp->inst,
s.igam_input_format,
(s.igam_lut_mode == 0) ? "BypassFixed" :
@@ -323,20 +491,51 @@ void dcn10_log_hw_state(struct dc *dc,
((s.rgam_lut_mode == 2) ? "Ycc" :
((s.rgam_lut_mode == 3) ? "RAM" :
((s.rgam_lut_mode == 4) ? "RAM" :
- "Unknown")))),
- s.gamut_remap_mode,
- s.gamut_remap_c11_c12,
- s.gamut_remap_c13_c14,
- s.gamut_remap_c21_c22,
- s.gamut_remap_c23_c24,
- s.gamut_remap_c31_c32,
- s.gamut_remap_c33_c34);
+ "Unknown")))));
+ if (is_gamut_remap_available)
+ DTN_INFO(" %12s "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld",
+ (s.gamut_remap.gamut_adjust_type == 0) ? "Bypass" :
+ ((s.gamut_remap.gamut_adjust_type == 1) ? "HW" : "SW"),
+ s.gamut_remap.temperature_matrix[0].value,
+ s.gamut_remap.temperature_matrix[1].value,
+ s.gamut_remap.temperature_matrix[2].value,
+ s.gamut_remap.temperature_matrix[3].value,
+ s.gamut_remap.temperature_matrix[4].value,
+ s.gamut_remap.temperature_matrix[5].value,
+ s.gamut_remap.temperature_matrix[6].value,
+ s.gamut_remap.temperature_matrix[7].value,
+ s.gamut_remap.temperature_matrix[8].value,
+ s.gamut_remap.temperature_matrix[9].value,
+ s.gamut_remap.temperature_matrix[10].value,
+ s.gamut_remap.temperature_matrix[11].value);
+
DTN_INFO("\n");
}
DTN_INFO("\n");
+ DTN_INFO("DPP Color Caps: input_lut_shared:%d icsc:%d"
+ " dgam_ram:%d dgam_rom: srgb:%d,bt2020:%d,gamma2_2:%d,pq:%d,hlg:%d"
+ " post_csc:%d gamcor:%d dgam_rom_for_yuv:%d 3d_lut:%d"
+ " blnd_lut:%d oscs:%d\n\n",
+ dc->caps.color.dpp.input_lut_shared,
+ dc->caps.color.dpp.icsc,
+ dc->caps.color.dpp.dgam_ram,
+ dc->caps.color.dpp.dgam_rom_caps.srgb,
+ dc->caps.color.dpp.dgam_rom_caps.bt2020,
+ dc->caps.color.dpp.dgam_rom_caps.gamma2_2,
+ dc->caps.color.dpp.dgam_rom_caps.pq,
+ dc->caps.color.dpp.dgam_rom_caps.hlg,
+ dc->caps.color.dpp.post_csc,
+ dc->caps.color.dpp.gamma_corr,
+ dc->caps.color.dpp.dgam_rom_for_yuv,
+ dc->caps.color.dpp.hw_3d_lut,
+ dc->caps.color.dpp.ogam_ram,
+ dc->caps.color.dpp.ocsc);
DTN_INFO("MPCC: OPP DPP MPCCBOT MODE ALPHA_MODE PREMULT OVERLAP_ONLY IDLE\n");
- for (i = 0; i < pool->pipe_count; i++) {
+ for (i = 0; i < pool->mpcc_count; i++) {
struct mpcc_state s = {0};
pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
@@ -347,6 +546,84 @@ void dcn10_log_hw_state(struct dc *dc,
s.idle);
}
DTN_INFO("\n");
+ DTN_INFO("MPC Color Caps: gamut_remap:%d, 3dlut:%d, ogam_ram:%d, ocsc:%d\n\n",
+ dc->caps.color.mpc.gamut_remap,
+ dc->caps.color.mpc.num_3dluts,
+ dc->caps.color.mpc.ogam_ram,
+ dc->caps.color.mpc.ocsc);
+ DTN_INFO("===== MPC RMCM 3DLUT =====\n");
+ static const char * const pLabels[] = {
+ "MPCC", "SIZE", "MODE", "MODE_CUR", "RD_SEL",
+ "30BIT_EN", "WR_EN_MASK", "RAM_SEL", "OUT_NORM_FACTOR", "FL_SEL",
+ "OUT_OFFSET", "OUT_SCALE", "FL_DONE", "SOFT_UNDERFLOW", "HARD_UNDERFLOW",
+ "MEM_PWR_ST", "FORCE", "DIS", "MODE"};
+
+ for (i = 0; i < pool->mpcc_count; i++) {
+ struct mpcc_state s = {0};
+
+ pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
+ if (s.opp_id != 0xf) {
+ uint32_t values[] = {
+ i,
+ s.rmcm_regs.rmcm_3dlut_size,
+ s.rmcm_regs.rmcm_3dlut_mode,
+ s.rmcm_regs.rmcm_3dlut_mode_cur,
+ s.rmcm_regs.rmcm_3dlut_read_sel,
+ s.rmcm_regs.rmcm_3dlut_30bit_en,
+ s.rmcm_regs.rmcm_3dlut_wr_en_mask,
+ s.rmcm_regs.rmcm_3dlut_ram_sel,
+ s.rmcm_regs.rmcm_3dlut_out_norm_factor,
+ s.rmcm_regs.rmcm_3dlut_fl_sel,
+ s.rmcm_regs.rmcm_3dlut_out_offset_r,
+ s.rmcm_regs.rmcm_3dlut_out_scale_r,
+ s.rmcm_regs.rmcm_3dlut_fl_done,
+ s.rmcm_regs.rmcm_3dlut_fl_soft_underflow,
+ s.rmcm_regs.rmcm_3dlut_fl_hard_underflow,
+ s.rmcm_regs.rmcm_3dlut_mem_pwr_state,
+ s.rmcm_regs.rmcm_3dlut_mem_pwr_force,
+ s.rmcm_regs.rmcm_3dlut_mem_pwr_dis,
+ s.rmcm_regs.rmcm_3dlut_mem_pwr_mode};
+
+ int num_elements = 19;
+
+ for (int j = 0; j < num_elements; j++)
+ DTN_INFO("%s \t %8xh\n", pLabels[j], values[j]);
+ }
+ }
+ DTN_INFO("\n");
+ DTN_INFO("===== MPC RMCM Shaper =====\n");
+ DTN_INFO("MPCC: CNTL LUT_MODE MODE_CUR WR_EN_MASK WR_SEL OFFSET SCALE START_B START_SEG_B END_B END_BASE_B MEM_PWR_ST FORCE DIS MODE\n");
+ for (i = 0; i < pool->mpcc_count; i++) {
+ struct mpcc_state s = {0};
+
+ pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
+ if (s.opp_id != 0xf)
+ DTN_INFO("[%2d]: %4xh %4xh %6xh %4x %4x %4x %4x %4x %4xh %4xh %6xh %4x %4x %4x %4x\n",
+ i, s.rmcm_regs.rmcm_cntl, s.rmcm_regs.rmcm_shaper_lut_mode, s.rmcm_regs.rmcm_shaper_mode_cur,
+ s.rmcm_regs.rmcm_shaper_lut_write_en_mask, s.rmcm_regs.rmcm_shaper_lut_write_sel, s.rmcm_regs.rmcm_shaper_offset_b,
+ s.rmcm_regs.rmcm_shaper_scale_b, s.rmcm_regs.rmcm_shaper_rama_exp_region_start_b, s.rmcm_regs.rmcm_shaper_rama_exp_region_start_seg_b,
+ s.rmcm_regs.rmcm_shaper_rama_exp_region_end_b, s.rmcm_regs.rmcm_shaper_rama_exp_region_end_base_b, s.rmcm_regs.rmcm_shaper_mem_pwr_state,
+ s.rmcm_regs.rmcm_shaper_mem_pwr_force, s.rmcm_regs.rmcm_shaper_mem_pwr_dis, s.rmcm_regs.rmcm_shaper_mem_pwr_mode);
+ }
+}
+
+void dcn10_log_hw_state(struct dc *dc,
+ struct dc_log_buffer_ctx *log_ctx)
+{
+ struct dc_context *dc_ctx = dc->ctx;
+ struct resource_pool *pool = dc->res_pool;
+ int i;
+
+ DTN_INFO_BEGIN();
+
+ dcn10_log_hubbub_state(dc, log_ctx);
+
+ dcn10_log_hubp_states(dc, log_ctx);
+
+ if (dc->hwss.log_color_state)
+ dc->hwss.log_color_state(dc, log_ctx);
+ else
+ dcn10_log_color_state(dc, log_ctx);
DTN_INFO("OTG: v_bs v_be v_ss v_se vpol vmax vmin vmax_sel vmin_sel h_bs h_be h_ss h_se hpol htot vtot underflow blank_en\n");
@@ -354,7 +631,8 @@ void dcn10_log_hw_state(struct dc *dc,
struct timing_generator *tg = pool->timing_generators[i];
struct dcn_otg_state s = {0};
/* Read shared OTG state registers for all DCNx */
- optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
+ if (tg->funcs->read_otg_state)
+ tg->funcs->read_otg_state(tg, &s);
/*
* For DCN2 and greater, a register on the OPP is used to
@@ -944,6 +1222,7 @@ enum dc_status dcn10_enable_stream_timing(
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width,
+ pipe_ctx->pipe_dlg_param.pstate_keepout,
pipe_ctx->stream->signal,
true);
@@ -1050,10 +1329,9 @@ static void dcn10_reset_back_end_for_pipe(
pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false);
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, NULL);
- pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+ set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL);
+ if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+ pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
}
for (i = 0; i < dc->res_pool->pipe_count; i++)
@@ -1072,26 +1350,9 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc)
{
struct hubp *hubp ;
unsigned int i;
- bool need_recover = true;
if (!dc->debug.recovery_enabled)
return false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx =
- &dc->current_state->res_ctx.pipe_ctx[i];
- if (pipe_ctx != NULL) {
- hubp = pipe_ctx->plane_res.hubp;
- if (hubp != NULL && hubp->funcs->hubp_get_underflow_status) {
- if (hubp->funcs->hubp_get_underflow_status(hubp) != 0) {
- /* one pipe underflow, we will reset all the pipes*/
- need_recover = true;
- }
- }
- }
- }
- if (!need_recover)
- return false;
/*
DCHUBP_CNTL:HUBP_BLANK_EN=1
DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1
@@ -1176,7 +1437,9 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc)
}
/* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+ struct dc_state *state,
+ struct pipe_ctx *pipe_ctx)
{
struct dce_hwseq *hws = dc->hwseq;
struct hubp *hubp = pipe_ctx->plane_res.hubp;
@@ -1196,7 +1459,7 @@ void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove);
// Phantom pipes have OTG disabled by default, so MPCC_STATUS will never assert idle,
// so don't wait for MPCC_IDLE in the programming sequence
- if (opp != NULL && !pipe_ctx->plane_state->is_phantom)
+ if (dc_state_get_pipe_subvp_type(state, pipe_ctx) != SUBVP_PHANTOM)
opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
dc->optimized_required = true;
@@ -1238,6 +1501,7 @@ void dcn10_plane_atomic_power_down(struct dc *dc,
if (hws->funcs.hubp_pg_control)
hws->funcs.hubp_pg_control(hws, hubp->inst, false);
+ hubp->funcs->hubp_reset(hubp);
dpp->funcs->dpp_reset(dpp);
REG_SET(DC_IP_REQUEST_CNTL, 0,
@@ -1286,7 +1550,7 @@ void dcn10_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_state = NULL;
}
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
{
struct dce_hwseq *hws = dc->hwseq;
DC_LOGGER_INIT(dc->ctx->logger);
@@ -1308,6 +1572,7 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
struct dce_hwseq *hws = dc->hwseq;
struct hubbub *hubbub = dc->res_pool->hubbub;
bool can_apply_seamless_boot = false;
+ bool tg_enabled[MAX_PIPES] = {false};
for (i = 0; i < context->stream_count; i++) {
if (context->streams[i]->apply_seamless_boot_optimization) {
@@ -1354,6 +1619,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
if (hubbub && hubp) {
if (hubbub->funcs->program_det_size)
hubbub->funcs->program_det_size(hubbub, hubp->inst, 0);
+ if (hubbub->funcs->program_det_segments)
+ hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0);
}
}
@@ -1389,12 +1656,14 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
// requesting data while in PSR.
tg->funcs->tg_init(tg);
hubp->power_gated = true;
+ tg_enabled[i] = true;
continue;
}
/* Disable on the current state so the new one isn't cleared. */
pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+ hubp->funcs->hubp_reset(hubp);
dpp->funcs->dpp_reset(dpp);
pipe_ctx->stream_res.tg = tg;
@@ -1412,12 +1681,12 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
- hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+ hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
if (tg->funcs->is_tg_enabled(tg))
tg->funcs->unlock(tg);
- dc->hwss.disable_plane(dc, pipe_ctx);
+ dc->hwss.disable_plane(dc, context, pipe_ctx);
pipe_ctx->stream_res.tg = NULL;
pipe_ctx->plane_res.hubp = NULL;
@@ -1430,6 +1699,20 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
tg->funcs->tg_init(tg);
}
+ /* Clean up MPC tree */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (tg_enabled[i]) {
+ if (dc->res_pool->opps[i]->mpc_tree_params.opp_list) {
+ if (dc->res_pool->opps[i]->mpc_tree_params.opp_list->mpcc_bot) {
+ int bot_id = dc->res_pool->opps[i]->mpc_tree_params.opp_list->mpcc_bot->mpcc_id;
+
+ if ((bot_id < MAX_MPCC) && (bot_id < MAX_PIPES) && (!tg_enabled[bot_id]))
+ dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
+ }
+ }
+ }
+ }
+
/* Power gate DSCs */
if (hws->funcs.dsc_pg_control != NULL) {
uint32_t num_opps = 0;
@@ -1482,13 +1765,14 @@ void dcn10_init_hw(struct dc *dc)
struct dc_bios *dcb = dc->ctx->dc_bios;
struct resource_pool *res_pool = dc->res_pool;
uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+ uint32_t user_level = MAX_BACKLIGHT_LEVEL;
bool is_optimized_init_done = false;
if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
/* Align bw context with hw config when system resume. */
- if (dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) {
+ if (dc->clk_mgr && dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) {
dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz = dc->clk_mgr->clks.dispclk_khz;
dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz = dc->clk_mgr->clks.dppclk_khz;
}
@@ -1579,12 +1863,14 @@ void dcn10_init_hw(struct dc *dc)
for (i = 0; i < dc->link_count; i++) {
struct dc_link *link = dc->links[i];
- if (link->panel_cntl)
+ if (link->panel_cntl) {
backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+ user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+ }
}
if (abm != NULL)
- abm->funcs->abm_init(abm, backlight);
+ abm->funcs->abm_init(abm, backlight, user_level);
if (dmcu != NULL && !dmcu->auto_load_dmcu)
dmcu->funcs->dmcu_init(dmcu);
@@ -1606,7 +1892,7 @@ void dcn10_init_hw(struct dc *dc)
REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
}
- if (dc->clk_mgr->funcs->notify_wm_ranges)
+ if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges)
dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
}
@@ -1629,10 +1915,10 @@ void dcn10_power_down_on_boot(struct dc *dc)
if (edp_link && edp_link->link_enc->funcs->is_dig_enabled &&
edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
dc->hwseq->funcs.edp_backlight_control &&
- dc->hwss.power_down &&
+ dc->hwseq->funcs.power_down &&
dc->hwss.edp_power_control) {
dc->hwseq->funcs.edp_backlight_control(edp_link, false);
- dc->hwss.power_down(dc);
+ dc->hwseq->funcs.power_down(dc);
dc->hwss.edp_power_control(edp_link, false);
} else {
for (i = 0; i < dc->link_count; i++) {
@@ -1640,8 +1926,8 @@ void dcn10_power_down_on_boot(struct dc *dc)
if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
- dc->hwss.power_down) {
- dc->hwss.power_down(dc);
+ dc->hwseq->funcs.power_down) {
+ dc->hwseq->funcs.power_down(dc);
break;
}
@@ -1752,18 +2038,14 @@ bool dcn10_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
if (dpp_base == NULL)
return false;
- if (plane_state->in_transfer_func)
- tf = plane_state->in_transfer_func;
+ tf = &plane_state->in_transfer_func;
- if (plane_state->gamma_correction &&
- !dpp_base->ctx->dc->debug.always_use_regamma
- && !plane_state->gamma_correction->is_identity
+ if (!dpp_base->ctx->dc->debug.always_use_regamma
+ && !plane_state->gamma_correction.is_identity
&& dce_use_lut(plane_state->format))
- dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction);
+ dpp_base->funcs->dpp_program_input_lut(dpp_base, &plane_state->gamma_correction);
- if (tf == NULL)
- dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS);
- else if (tf->type == TF_TYPE_PREDEFINED) {
+ if (tf->type == TF_TYPE_PREDEFINED) {
switch (tf->tf) {
case TRANSFER_FUNCTION_SRGB:
dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_HW_sRGB);
@@ -1800,14 +2082,13 @@ bool dcn10_set_input_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
#define MAX_NUM_HW_POINTS 0x200
static void log_tf(struct dc_context *ctx,
- struct dc_transfer_func *tf, uint32_t hw_points_num)
+ const struct dc_transfer_func *tf, uint32_t hw_points_num)
{
// DC_LOG_GAMMA is default logging of all hw points
// DC_LOG_ALL_GAMMA logs all points, not only hw points
// DC_LOG_ALL_TF_POINTS logs all channels of the tf
int i = 0;
- DC_LOGGER_INIT(ctx->logger);
DC_LOG_GAMMA("Gamma Correction TF");
DC_LOG_ALL_GAMMA("Logging all tf points...");
DC_LOG_ALL_TF_CHANNELS("Logging all channels...");
@@ -1830,21 +2111,23 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
{
struct dpp *dpp = pipe_ctx->plane_res.dpp;
+ if (!stream)
+ return false;
+
if (dpp == NULL)
return false;
dpp->regamma_params.hw_points_num = GAMMA_HW_POINTS_NUM;
- if (stream->out_transfer_func &&
- stream->out_transfer_func->type == TF_TYPE_PREDEFINED &&
- stream->out_transfer_func->tf == TRANSFER_FUNCTION_SRGB)
+ if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED &&
+ stream->out_transfer_func.tf == TRANSFER_FUNCTION_SRGB)
dpp->funcs->dpp_program_regamma_pwl(dpp, NULL, OPP_REGAMMA_SRGB);
/* dcn10_translate_regamma_to_hw_format takes 750us, only do it when full
* update.
*/
else if (cm_helper_translate_curve_to_hw_format(dc->ctx,
- stream->out_transfer_func,
+ &stream->out_transfer_func,
&dpp->regamma_params, false)) {
dpp->funcs->dpp_program_regamma_pwl(
dpp,
@@ -1852,10 +2135,9 @@ bool dcn10_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
} else
dpp->funcs->dpp_program_regamma_pwl(dpp, NULL, OPP_REGAMMA_BYPASS);
- if (stream != NULL && stream->ctx != NULL &&
- stream->out_transfer_func != NULL) {
+ if (stream->ctx) {
log_tf(stream->ctx,
- stream->out_transfer_func,
+ &stream->out_transfer_func,
dpp->regamma_params.hw_points_num);
}
@@ -1925,20 +2207,11 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx)
dc->hwss.get_position(&pipe_ctx, 1, &position);
vpos = position.vertical_count;
- /* Avoid wraparound calculation issues */
- vupdate_start += stream->timing.v_total;
- vupdate_end += stream->timing.v_total;
- vpos += stream->timing.v_total;
-
if (vpos <= vupdate_start) {
/* VPOS is in VACTIVE or back porch. */
lines_to_vupdate = vupdate_start - vpos;
- } else if (vpos > vupdate_end) {
- /* VPOS is in the front porch. */
- return;
} else {
- /* VPOS is in VUPDATE. */
- lines_to_vupdate = 0;
+ lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start;
}
/* Calculate time until VUPDATE in microseconds. */
@@ -1946,13 +2219,18 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx)
stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz;
us_to_vupdate = lines_to_vupdate * us_per_line;
+ /* Stall out until the cursor update completes. */
+ if (vupdate_end < vupdate_start)
+ vupdate_end += stream->timing.v_total;
+
+ /* Position is in the range of vupdate start and end*/
+ if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start)
+ us_to_vupdate = 0;
+
/* 70 us is a conservative estimate of cursor update time*/
if (us_to_vupdate > 70)
return;
- /* Stall out until the cursor update completes. */
- if (vupdate_end < vupdate_start)
- vupdate_end += stream->timing.v_total;
us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line;
udelay(us_to_vupdate + us_vupdate);
}
@@ -1989,6 +2267,8 @@ static bool wait_for_reset_trigger_to_occur(
{
bool rc = false;
+ DC_LOGGER_INIT(dc_ctx->logger);
+
/* To avoid endless loop we wait at most
* frames_to_wait_on_triggered_reset frames for the reset to occur. */
const uint32_t frames_to_wait_on_triggered_reset = 10;
@@ -2108,7 +2388,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size,
struct dc_crtc_timing *hw_crtc_timing;
uint64_t phase[MAX_PIPES];
uint64_t modulo[MAX_PIPES];
- unsigned int pclk;
+ unsigned int pclk = 0;
uint32_t embedded_pix_clk_100hz;
uint16_t embedded_h_total;
@@ -2116,6 +2396,8 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size,
uint32_t dp_ref_clk_100hz =
dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10;
+ DC_LOGGER_INIT(dc_ctx->logger);
+
hw_crtc_timing = kcalloc(MAX_PIPES, sizeof(*hw_crtc_timing), GFP_KERNEL);
if (!hw_crtc_timing)
return master;
@@ -2142,7 +2424,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size,
grouped_pipes[i]->stream->signal)) {
embedded = i;
master = i;
- phase[i] = embedded_pix_clk_100hz*100;
+ phase[i] = embedded_pix_clk_100hz*(uint64_t)100;
modulo[i] = dp_ref_clk_100hz*100;
} else {
@@ -2197,7 +2479,9 @@ void dcn10_enable_vblanks_synchronization(
struct dc_context *dc_ctx = dc->ctx;
struct output_pixel_processor *opp;
struct timing_generator *tg;
- int i, width, height, master;
+ int i, width = 0, height = 0, master;
+
+ DC_LOGGER_INIT(dc_ctx->logger);
for (i = 1; i < group_size; i++) {
opp = grouped_pipes[i]->stream_res.opp;
@@ -2253,6 +2537,7 @@ void dcn10_enable_vblanks_synchronization(
void dcn10_enable_timing_synchronization(
struct dc *dc,
+ struct dc_state *state,
int group_index,
int group_size,
struct pipe_ctx *grouped_pipes[])
@@ -2260,12 +2545,14 @@ void dcn10_enable_timing_synchronization(
struct dc_context *dc_ctx = dc->ctx;
struct output_pixel_processor *opp;
struct timing_generator *tg;
- int i, width, height;
+ int i, width = 0, height = 0;
+
+ DC_LOGGER_INIT(dc_ctx->logger);
DC_SYNC_INFO("Setting up OTG reset trigger\n");
for (i = 1; i < group_size; i++) {
- if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+ if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
continue;
opp = grouped_pipes[i]->stream_res.opp;
@@ -2285,14 +2572,14 @@ void dcn10_enable_timing_synchronization(
if (grouped_pipes[i]->stream == NULL)
continue;
- if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+ if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
continue;
grouped_pipes[i]->stream->vblank_synchronized = false;
}
for (i = 1; i < group_size; i++) {
- if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+ if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
continue;
grouped_pipes[i]->stream_res.tg->funcs->enable_reset_trigger(
@@ -2306,11 +2593,11 @@ void dcn10_enable_timing_synchronization(
* synchronized. Look at last pipe programmed to reset.
*/
- if (grouped_pipes[1]->stream && grouped_pipes[1]->stream->mall_stream_config.type != SUBVP_PHANTOM)
+ if (grouped_pipes[1]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[1]) != SUBVP_PHANTOM)
wait_for_reset_trigger_to_occur(dc_ctx, grouped_pipes[1]->stream_res.tg);
for (i = 1; i < group_size; i++) {
- if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+ if (grouped_pipes[i]->stream && dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
continue;
grouped_pipes[i]->stream_res.tg->funcs->disable_reset_trigger(
@@ -2318,7 +2605,7 @@ void dcn10_enable_timing_synchronization(
}
for (i = 1; i < group_size; i++) {
- if (grouped_pipes[i]->stream && grouped_pipes[i]->stream->mall_stream_config.type == SUBVP_PHANTOM)
+ if (dc_state_get_pipe_subvp_type(state, grouped_pipes[i]) == SUBVP_PHANTOM)
continue;
opp = grouped_pipes[i]->stream_res.opp;
@@ -2339,6 +2626,8 @@ void dcn10_enable_per_frame_crtc_position_reset(
struct dc_context *dc_ctx = dc->ctx;
int i;
+ DC_LOGGER_INIT(dc_ctx->logger);
+
DC_SYNC_INFO("Setting up\n");
for (i = 0; i < group_size; i++)
if (grouped_pipes[i]->stream_res.tg->funcs->enable_crtc_reset)
@@ -2510,8 +2799,11 @@ static bool dcn10_is_rear_mpo_fix_required(struct pipe_ctx *pipe_ctx, enum dc_co
while (top->top_pipe)
top = top->top_pipe; // Traverse to top pipe_ctx
- if (top->plane_state && top->plane_state->layer_index == 0)
- return true; // Front MPO plane not hidden
+ if (top->plane_state && top->plane_state->layer_index == 0 && !top->plane_state->global_alpha)
+ // Global alpha used by top plane for PIP overlay
+ // Pre-multiplied/per-pixel alpha used by MPO
+ // Check top plane's global alpha to ensure layer_index > 0 not caused by PIP
+ return true; // MPO in use and front plane not hidden
}
}
return false;
@@ -2588,7 +2880,6 @@ void dcn10_update_visual_confirm_color(struct dc *dc,
struct mpc *mpc = dc->res_pool->mpc;
if (mpc->funcs->set_bg_color) {
- memcpy(&pipe_ctx->plane_state->visual_confirm_color, &(pipe_ctx->visual_confirm_color), sizeof(struct tg_color));
mpc->funcs->set_bg_color(mpc, &(pipe_ctx->visual_confirm_color), mpcc_id);
}
}
@@ -2799,8 +3090,8 @@ static void dcn10_update_dchubp_dpp(
}
if (pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
- dc->hwss.set_cursor_position(pipe_ctx);
dc->hwss.set_cursor_attribute(pipe_ctx);
+ dc->hwss.set_cursor_position(pipe_ctx);
if (dc->hwss.set_cursor_sdr_white_level)
dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
@@ -2839,7 +3130,7 @@ static void dcn10_update_dchubp_dpp(
hubp->power_gated = false;
- hws->funcs.update_plane_addr(dc, pipe_ctx);
+ dc->hwss.update_plane_addr(dc, pipe_ctx);
if (is_pipe_tree_visible(pipe_ctx))
hubp->funcs->set_blank(hubp, false);
@@ -2922,7 +3213,8 @@ void dcn10_program_pipe(
calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width);
+ pipe_ctx->pipe_dlg_param.vupdate_width,
+ pipe_ctx->pipe_dlg_param.pstate_keepout);
pipe_ctx->stream_res.tg->funcs->set_vtg_params(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
@@ -2993,8 +3285,6 @@ void dcn10_post_unlock_program_front_end(
{
int i;
- DC_LOGGER_INIT(dc->ctx->logger);
-
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -3010,7 +3300,7 @@ void dcn10_post_unlock_program_front_end(
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
- dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+ dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable) {
@@ -3057,7 +3347,7 @@ void dcn10_prepare_bandwidth(
context,
false);
- dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
+ dc->optimized_required = hubbub->funcs->program_watermarks(hubbub,
&context->bw_ctx.bw.dcn.watermarks,
dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
true);
@@ -3134,15 +3424,18 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
* as well.
*/
for (i = 0; i < num_pipes; i++) {
- if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
- if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
- pipe_ctx[i]->stream_res.tg->funcs->set_drr(
- pipe_ctx[i]->stream_res.tg, &params);
+ /* dc_state_destruct() might null the stream resources, so fetch tg
+ * here first to avoid a race condition. The lifetime of the pointee
+ * itself (the timing_generator object) is not a problem here.
+ */
+ struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
+
+ if ((tg != NULL) && tg->funcs) {
+ set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
- if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx[i]->stream_res.tg,
- event_triggers, num_frames);
+ if (tg->funcs->set_static_screen_control)
+ tg->funcs->set_static_screen_control(
+ tg, event_triggers, num_frames);
}
}
}
@@ -3347,53 +3640,6 @@ void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data)
hubbub->funcs->update_dchub(hubbub, dh_data);
}
-static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
-{
- struct pipe_ctx *test_pipe, *split_pipe;
- const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data;
- struct rect r1 = scl_data->recout, r2, r2_half;
- int r1_r = r1.x + r1.width, r1_b = r1.y + r1.height, r2_r, r2_b;
- int cur_layer = pipe_ctx->plane_state->layer_index;
-
- /**
- * Disable the cursor if there's another pipe above this with a
- * plane that contains this pipe's viewport to prevent double cursor
- * and incorrect scaling artifacts.
- */
- for (test_pipe = pipe_ctx->top_pipe; test_pipe;
- test_pipe = test_pipe->top_pipe) {
- // Skip invisible layer and pipe-split plane on same layer
- if (!test_pipe->plane_state ||
- !test_pipe->plane_state->visible ||
- test_pipe->plane_state->layer_index == cur_layer)
- continue;
-
- r2 = test_pipe->plane_res.scl_data.recout;
- r2_r = r2.x + r2.width;
- r2_b = r2.y + r2.height;
- split_pipe = test_pipe;
-
- /**
- * There is another half plane on same layer because of
- * pipe-split, merge together per same height.
- */
- for (split_pipe = pipe_ctx->top_pipe; split_pipe;
- split_pipe = split_pipe->top_pipe)
- if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) {
- r2_half = split_pipe->plane_res.scl_data.recout;
- r2.x = (r2_half.x < r2.x) ? r2_half.x : r2.x;
- r2.width = r2.width + r2_half.width;
- r2_r = r2.x + r2.width;
- break;
- }
-
- if (r1.x >= r2.x && r1.y >= r2.y && r1_r <= r2_r && r1_b <= r2_b)
- return true;
- }
-
- return false;
-}
-
void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
{
struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
@@ -3406,7 +3652,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
.h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz,
.v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert,
.rotation = pipe_ctx->plane_state->rotation,
- .mirror = pipe_ctx->plane_state->horizontal_mirror
+ .mirror = pipe_ctx->plane_state->horizontal_mirror,
+ .stream = pipe_ctx->stream,
};
bool pipe_split_on = false;
bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) ||
@@ -3416,6 +3663,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
int y_plane = pipe_ctx->plane_state->dst_rect.y;
int x_pos = pos_cpy.x;
int y_pos = pos_cpy.y;
+ int clip_x = pipe_ctx->plane_state->clip_rect.x;
+ int clip_width = pipe_ctx->plane_state->clip_rect.width;
if ((pipe_ctx->top_pipe != NULL) || (pipe_ctx->bottom_pipe != NULL)) {
if ((pipe_ctx->plane_state->src_rect.width != pipe_ctx->plane_res.scl_data.viewport.width) ||
@@ -3434,7 +3683,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
*/
/**
- * Translate cursor from stream space to plane space.
+ * Translate cursor and clip offset from stream space to plane space.
*
* If the cursor is scaled then we need to scale the position
* to be in the approximately correct place. We can't do anything
@@ -3451,6 +3700,10 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
pipe_ctx->plane_state->dst_rect.width;
y_pos = (y_pos - y_plane) * pipe_ctx->plane_state->src_rect.height /
pipe_ctx->plane_state->dst_rect.height;
+ clip_x = (clip_x - x_plane) * pipe_ctx->plane_state->src_rect.width /
+ pipe_ctx->plane_state->dst_rect.width;
+ clip_width = clip_width * pipe_ctx->plane_state->src_rect.width /
+ pipe_ctx->plane_state->dst_rect.width;
}
/**
@@ -3492,35 +3745,23 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
== PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
pos_cpy.enable = false;
- if (pos_cpy.enable && dcn10_can_pipe_disable_cursor(pipe_ctx))
+ if (pos_cpy.enable && resource_can_pipe_disable_cursor(pipe_ctx))
pos_cpy.enable = false;
if (param.rotation == ROTATION_ANGLE_0) {
- int viewport_width =
- pipe_ctx->plane_res.scl_data.viewport.width;
- int viewport_x =
- pipe_ctx->plane_res.scl_data.viewport.x;
if (param.mirror) {
- if (pipe_split_on || odm_combine_on) {
- if (pos_cpy.x >= viewport_width + viewport_x) {
- pos_cpy.x = 2 * viewport_width
- - pos_cpy.x + 2 * viewport_x;
- } else {
- uint32_t temp_x = pos_cpy.x;
-
- pos_cpy.x = 2 * viewport_x - pos_cpy.x;
- if (temp_x >= viewport_x +
- (int)hubp->curs_attr.width || pos_cpy.x
- <= (int)hubp->curs_attr.width +
- pipe_ctx->plane_state->src_rect.x) {
- pos_cpy.x = temp_x + viewport_width;
- }
- }
- } else {
- pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x;
- }
+ /*
+ * The plane is split into multiple viewports.
+ * The combination of all viewports span the
+ * entirety of the clip rect.
+ *
+ * For no pipe_split, viewport_width is represents
+ * the full width of the clip_rect, so we can just
+ * mirror it.
+ */
+ pos_cpy.x = clip_width - pos_cpy.x + 2 * clip_x;
}
}
// Swap axis and mirror horizontally
@@ -3590,30 +3831,17 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx)
}
// Mirror horizontally and vertically
else if (param.rotation == ROTATION_ANGLE_180) {
- int viewport_width =
- pipe_ctx->plane_res.scl_data.viewport.width;
- int viewport_x =
- pipe_ctx->plane_res.scl_data.viewport.x;
-
if (!param.mirror) {
- if (pipe_split_on || odm_combine_on) {
- if (pos_cpy.x >= viewport_width + viewport_x) {
- pos_cpy.x = 2 * viewport_width
- - pos_cpy.x + 2 * viewport_x;
- } else {
- uint32_t temp_x = pos_cpy.x;
-
- pos_cpy.x = 2 * viewport_x - pos_cpy.x;
- if (temp_x >= viewport_x +
- (int)hubp->curs_attr.width || pos_cpy.x
- <= (int)hubp->curs_attr.width +
- pipe_ctx->plane_state->src_rect.x) {
- pos_cpy.x = 2 * viewport_width - temp_x;
- }
- }
- } else {
- pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x;
- }
+ /*
+ * The plane is split into multiple viewports.
+ * The combination of all viewports span the
+ * entirety of the clip rect.
+ *
+ * For no pipe_split, viewport_width is represents
+ * the full width of the clip_rect, so we can just
+ * mirror it.
+ */
+ pos_cpy.x = clip_width - pos_cpy.x + 2 * clip_x;
}
/**
@@ -3885,3 +4113,32 @@ void dcn10_get_dcc_en_bits(struct dc *dc, int *dcc_en_bits)
dcc_en_bits[i] = s->dcc_en ? 1 : 0;
}
}
+
+/**
+ * dcn10_reset_surface_dcc_and_tiling - Set DCC and tiling in DCN to their disable mode.
+ *
+ * @pipe_ctx: Pointer to the pipe context structure.
+ * @plane_state: Surface state
+ * @clear_tiling: If true set tiling to Linear, otherwise does not change tiling
+ *
+ * This function is responsible for call the HUBP block to disable DCC and set
+ * tiling to the linear mode.
+ */
+void dcn10_reset_surface_dcc_and_tiling(struct pipe_ctx *pipe_ctx,
+ struct dc_plane_state *plane_state,
+ bool clear_tiling)
+{
+ struct hubp *hubp = pipe_ctx->plane_res.hubp;
+
+ if (!hubp)
+ return;
+
+ /* if framebuffer is tiled, disable tiling */
+ if (clear_tiling && hubp->funcs->hubp_clear_tiling)
+ hubp->funcs->hubp_clear_tiling(hubp);
+
+ /* force page flip to see the new content of the framebuffer */
+ hubp->funcs->hubp_program_surface_flip_and_addr(hubp,
+ &plane_state->address,
+ true);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
index ef6d56da417c..57d30ea225f2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h
@@ -50,6 +50,13 @@ void dcn10_optimize_bandwidth(
void dcn10_prepare_bandwidth(
struct dc *dc,
struct dc_state *context);
+void dcn10_wait_for_pipe_update_if_needed(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ bool is_surface_update_only);
+void dcn10_set_wait_for_update_needed_for_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx);
void dcn10_pipe_control_lock(
struct dc *dc,
struct pipe_ctx *pipe,
@@ -75,7 +82,7 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
void dcn10_reset_hw_ctx_wrap(
struct dc *dc,
struct dc_state *context);
-void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
void dcn10_lock_all_pipes(
struct dc *dc,
struct dc_state *context,
@@ -108,13 +115,16 @@ void dcn10_power_down_on_boot(struct dc *dc);
enum dc_status dce110_apply_ctx_to_hw(
struct dc *dc,
struct dc_state *context);
-void dcn10_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn10_plane_atomic_disconnect(struct dc *dc,
+ struct dc_state *state,
+ struct pipe_ctx *pipe_ctx);
void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data);
void dcn10_update_pending_status(struct pipe_ctx *pipe_ctx);
void dce110_power_down(struct dc *dc);
void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context);
void dcn10_enable_timing_synchronization(
struct dc *dc,
+ struct dc_state *state,
int group_index,
int group_size,
struct pipe_ctx *grouped_pipes[]);
@@ -204,4 +214,8 @@ void dcn10_update_visual_confirm_color(
struct pipe_ctx *pipe_ctx,
int mpcc_id);
+void dcn10_reset_surface_dcc_and_tiling(struct pipe_ctx *pipe_ctx,
+ struct dc_plane_state *plane_state,
+ bool clear_tiling);
+
#endif /* __DC_HWSS_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
index f2371c948822..079c226c1097 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c
@@ -24,8 +24,8 @@
*/
#include "hw_sequencer_private.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
static const struct hw_sequencer_funcs dcn10_funcs = {
@@ -40,6 +40,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
.update_plane_addr = dcn10_update_plane_addr,
.update_dchub = dcn10_update_dchub,
.update_pending_status = dcn10_update_pending_status,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.program_output_csc = dcn10_program_output_csc,
.enable_accelerated_mode = dce110_enable_accelerated_mode,
.enable_timing_synchronization = dcn10_enable_timing_synchronization,
@@ -78,7 +79,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
.get_clock = dcn10_get_clock,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
- .power_down = dce110_power_down,
.set_backlight_level = dce110_set_backlight_level,
.set_abm_immediate_disable = dce110_set_abm_immediate_disable,
.set_pipe = dce110_set_pipe,
@@ -92,7 +92,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
static const struct hwseq_private_funcs dcn10_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn10_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.program_pipe = dcn10_program_pipe,
.update_mpcc = dcn10_update_mpcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
index 8c6fd7b844a4..8c6fd7b844a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index e72f15ac0048..9477c9f9e196 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -29,11 +29,11 @@
#include "dm_helpers.h"
#include "core_types.h"
#include "resource.h"
-#include "dcn20_resource.h"
+#include "dcn20/dcn20_resource.h"
#include "dcn20_hwseq.h"
#include "dce/dce_hwseq.h"
-#include "dcn20_dsc.h"
-#include "dcn20_optc.h"
+#include "dcn20/dcn20_dsc.h"
+#include "dcn20/dcn20_optc.h"
#include "abm.h"
#include "clk_mgr.h"
#include "dmcu.h"
@@ -54,9 +54,13 @@
#include "dpcd_defs.h"
#include "inc/link_enc_cfg.h"
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
-#define DC_LOGGER_INIT(logger)
+#define DC_LOGGER \
+ dc_logger
+#define DC_LOGGER_INIT(logger) \
+ struct dal_logger *dc_logger = logger
#define CTX \
hws->ctx
@@ -67,6 +71,122 @@
#define FN(reg_name, field_name) \
hws->shifts->field_name, hws->masks->field_name
+void dcn20_log_color_state(struct dc *dc,
+ struct dc_log_buffer_ctx *log_ctx)
+{
+ struct dc_context *dc_ctx = dc->ctx;
+ struct resource_pool *pool = dc->res_pool;
+ bool is_gamut_remap_available = false;
+ int i;
+
+ DTN_INFO("DPP: DGAM mode SHAPER mode 3DLUT mode 3DLUT bit depth"
+ " 3DLUT size RGAM mode GAMUT adjust "
+ "C11 C12 C13 C14 "
+ "C21 C22 C23 C24 "
+ "C31 C32 C33 C34 \n");
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct dpp *dpp = pool->dpps[i];
+ struct dcn_dpp_state s = {0};
+
+ dpp->funcs->dpp_read_state(dpp, &s);
+ if (dpp->funcs->dpp_get_gamut_remap) {
+ dpp->funcs->dpp_get_gamut_remap(dpp, &s.gamut_remap);
+ is_gamut_remap_available = true;
+ }
+
+ if (!s.is_enabled)
+ continue;
+
+ DTN_INFO("[%2d]: %8s %11s %10s %15s %10s %9s",
+ dpp->inst,
+ (s.dgam_lut_mode == 0) ? "Bypass" :
+ ((s.dgam_lut_mode == 1) ? "sRGB" :
+ ((s.dgam_lut_mode == 2) ? "Ycc" :
+ ((s.dgam_lut_mode == 3) ? "RAM" :
+ ((s.dgam_lut_mode == 4) ? "RAM" :
+ "Unknown")))),
+ (s.shaper_lut_mode == 1) ? "RAM A" :
+ ((s.shaper_lut_mode == 2) ? "RAM B" :
+ "Bypass"),
+ (s.lut3d_mode == 1) ? "RAM A" :
+ ((s.lut3d_mode == 2) ? "RAM B" :
+ "Bypass"),
+ (s.lut3d_bit_depth <= 0) ? "12-bit" : "10-bit",
+ (s.lut3d_size == 0) ? "17x17x17" : "9x9x9",
+ (s.rgam_lut_mode == 1) ? "RAM A" :
+ ((s.rgam_lut_mode == 1) ? "RAM B" : "Bypass"));
+
+ if (is_gamut_remap_available) {
+ DTN_INFO(" %12s "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld",
+
+ (s.gamut_remap.gamut_adjust_type == 0) ? "Bypass" :
+ ((s.gamut_remap.gamut_adjust_type == 1) ? "HW" :
+ "SW"),
+ s.gamut_remap.temperature_matrix[0].value,
+ s.gamut_remap.temperature_matrix[1].value,
+ s.gamut_remap.temperature_matrix[2].value,
+ s.gamut_remap.temperature_matrix[3].value,
+ s.gamut_remap.temperature_matrix[4].value,
+ s.gamut_remap.temperature_matrix[5].value,
+ s.gamut_remap.temperature_matrix[6].value,
+ s.gamut_remap.temperature_matrix[7].value,
+ s.gamut_remap.temperature_matrix[8].value,
+ s.gamut_remap.temperature_matrix[9].value,
+ s.gamut_remap.temperature_matrix[10].value,
+ s.gamut_remap.temperature_matrix[11].value);
+ }
+
+ DTN_INFO("\n");
+ }
+ DTN_INFO("\n");
+ DTN_INFO("DPP Color Caps: input_lut_shared:%d icsc:%d"
+ " dgam_ram:%d dgam_rom: srgb:%d,bt2020:%d,gamma2_2:%d,pq:%d,hlg:%d"
+ " post_csc:%d gamcor:%d dgam_rom_for_yuv:%d 3d_lut:%d"
+ " blnd_lut:%d oscs:%d\n\n",
+ dc->caps.color.dpp.input_lut_shared,
+ dc->caps.color.dpp.icsc,
+ dc->caps.color.dpp.dgam_ram,
+ dc->caps.color.dpp.dgam_rom_caps.srgb,
+ dc->caps.color.dpp.dgam_rom_caps.bt2020,
+ dc->caps.color.dpp.dgam_rom_caps.gamma2_2,
+ dc->caps.color.dpp.dgam_rom_caps.pq,
+ dc->caps.color.dpp.dgam_rom_caps.hlg,
+ dc->caps.color.dpp.post_csc,
+ dc->caps.color.dpp.gamma_corr,
+ dc->caps.color.dpp.dgam_rom_for_yuv,
+ dc->caps.color.dpp.hw_3d_lut,
+ dc->caps.color.dpp.ogam_ram,
+ dc->caps.color.dpp.ocsc);
+
+ DTN_INFO("MPCC: OPP DPP MPCCBOT MODE ALPHA_MODE PREMULT OVERLAP_ONLY IDLE"
+ " OGAM mode\n");
+
+ for (i = 0; i < pool->mpcc_count; i++) {
+ struct mpcc_state s = {0};
+
+ pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
+ if (s.opp_id != 0xf)
+ DTN_INFO("[%2d]: %2xh %2xh %6xh %4d %10d %7d %12d %4d %9s\n",
+ i, s.opp_id, s.dpp_id, s.bot_mpcc_id,
+ s.mode, s.alpha_mode, s.pre_multiplied_alpha, s.overlap_only,
+ s.idle,
+ (s.rgam_mode == 1) ? "RAM A" :
+ ((s.rgam_mode == 2) ? "RAM B" :
+ "Bypass"));
+ }
+ DTN_INFO("\n");
+ DTN_INFO("MPC Color Caps: gamut_remap:%d, 3dlut:%d, ogam_ram:%d, ocsc:%d\n\n",
+ dc->caps.color.mpc.gamut_remap,
+ dc->caps.color.mpc.num_3dluts,
+ dc->caps.color.mpc.ogam_ram,
+ dc->caps.color.mpc.ocsc);
+}
+
+
static int find_free_gsl_group(const struct dc *dc)
{
if (dc->res_pool->gsl_groups.gsl_0 == 0)
@@ -94,7 +214,7 @@ static int find_free_gsl_group(const struct dc *dc)
* gsl_0 <=> pipe_ctx->stream_res.gsl_group == 1
* Using a magic value like -1 would require tracking all inits/resets
*/
-static void dcn20_setup_gsl_group_as_lock(
+void dcn20_setup_gsl_group_as_lock(
const struct dc *dc,
struct pipe_ctx *pipe_ctx,
bool enable)
@@ -163,14 +283,13 @@ static void dcn20_setup_gsl_group_as_lock(
}
/* at this point we want to program whether it's to enable or disable */
- if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL &&
- pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) {
+ if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL) {
pipe_ctx->stream_res.tg->funcs->set_gsl(
pipe_ctx->stream_res.tg,
&gsl);
-
- pipe_ctx->stream_res.tg->funcs->set_gsl_source_select(
- pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0);
+ if (pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL)
+ pipe_ctx->stream_res.tg->funcs->set_gsl_source_select(
+ pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0);
} else
BREAK_TO_DEBUGGER();
}
@@ -293,7 +412,7 @@ void dcn20_init_blank(
struct output_pixel_processor *opp = NULL;
struct output_pixel_processor *bottom_opp = NULL;
uint32_t num_opps, opp_id_src0, opp_id_src1;
- uint32_t otg_active_width, otg_active_height;
+ uint32_t otg_active_width = 0, otg_active_height = 0;
/* program opp dpg blank color */
color_space = COLOR_SPACE_SRGB;
@@ -614,13 +733,15 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res));
pipe_ctx->top_pipe = NULL;
pipe_ctx->bottom_pipe = NULL;
+ pipe_ctx->prev_odm_pipe = NULL;
+ pipe_ctx->next_odm_pipe = NULL;
pipe_ctx->plane_state = NULL;
}
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
{
- bool is_phantom = pipe_ctx->plane_state && pipe_ctx->plane_state->is_phantom;
+ bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
DC_LOGGER_INIT(dc->ctx->logger);
@@ -646,9 +767,9 @@ void dcn20_disable_pixel_data(struct dc *dc, struct pipe_ctx *pipe_ctx, bool bla
}
static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
+ int opp_cnt, bool is_two_pixels_per_container)
{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
+ bool hblank_halved = is_two_pixels_per_container;
int flow_ctrl_cnt;
if (opp_cnt >= 2)
@@ -668,6 +789,37 @@ static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
return flow_ctrl_cnt;
}
+static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link)
+{
+ switch (link->link_enc->transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ return PHYD32CLKA;
+ case TRANSMITTER_UNIPHY_B:
+ return PHYD32CLKB;
+ case TRANSMITTER_UNIPHY_C:
+ return PHYD32CLKC;
+ case TRANSMITTER_UNIPHY_D:
+ return PHYD32CLKD;
+ case TRANSMITTER_UNIPHY_E:
+ return PHYD32CLKE;
+ default:
+ return PHYD32CLKA;
+ }
+}
+
+static int get_odm_segment_count(struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
+ int count = 1;
+
+ while (odm_pipe != NULL) {
+ count++;
+ odm_pipe = odm_pipe->next_odm_pipe;
+ }
+
+ return count;
+}
+
enum dc_status dcn20_enable_stream_timing(
struct pipe_ctx *pipe_ctx,
struct dc_state *context,
@@ -677,25 +829,26 @@ enum dc_status dcn20_enable_stream_timing(
struct dc_stream_state *stream = pipe_ctx->stream;
struct drr_params params = {0};
unsigned int event_triggers = 0;
- struct pipe_ctx *odm_pipe;
int opp_cnt = 1;
- int opp_inst[MAX_PIPES] = { pipe_ctx->stream_res.opp->inst };
+ int opp_inst[MAX_PIPES] = {0};
bool interlace = stream->timing.flags.INTERLACE;
int i;
struct mpc_dwb_flow_control flow_control;
struct mpc *mpc = dc->res_pool->mpc;
- bool rate_control_2x_pclk = (interlace || optc2_is_two_pixels_per_containter(&stream->timing));
- unsigned int k1_div = PIXEL_RATE_DIV_NA;
- unsigned int k2_div = PIXEL_RATE_DIV_NA;
-
- if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) {
- hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div);
-
+ bool is_two_pixels_per_container =
+ pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing);
+ bool rate_control_2x_pclk = (interlace || is_two_pixels_per_container);
+ int odm_slice_width;
+ int last_odm_slice_width;
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+
+ if (dc->res_pool->dccg->funcs->set_pixel_rate_div)
dc->res_pool->dccg->funcs->set_pixel_rate_div(
dc->res_pool->dccg,
pipe_ctx->stream_res.tg->inst,
- k1_div, k2_div);
- }
+ pipe_ctx->pixel_rate_divider.div_factor1,
+ pipe_ctx->pixel_rate_divider.div_factor2);
+
/* by upper caller loop, pipe0 is parent pipe and be called first.
* back end is set up by for pipe0. Other children pipe share back end
* with pipe 0. No program is needed.
@@ -705,16 +858,17 @@ enum dc_status dcn20_enable_stream_timing(
/* TODO check if timing_changed, disable stream if timing changed */
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
- opp_inst[opp_cnt] = odm_pipe->stream_res.opp->inst;
- opp_cnt++;
- }
+ opp_cnt = resource_get_opp_heads_for_otg_master(pipe_ctx, &context->res_ctx, opp_heads);
+ for (i = 0; i < opp_cnt; i++)
+ opp_inst[i] = opp_heads[i]->stream_res.opp->inst;
+ odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
+ last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
if (opp_cnt > 1)
pipe_ctx->stream_res.tg->funcs->set_odm_combine(
pipe_ctx->stream_res.tg,
- opp_inst, opp_cnt,
- &pipe_ctx->stream->timing);
+ opp_inst, opp_cnt, odm_slice_width,
+ last_odm_slice_width);
/* HW program guide assume display already disable
* by unplug sequence. OTG assume stop.
@@ -730,6 +884,22 @@ enum dc_status dcn20_enable_stream_timing(
return DC_ERROR_UNEXPECTED;
}
+ if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
+ struct dccg *dccg = dc->res_pool->dccg;
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
+ struct dtbclk_dto_params dto_params = {0};
+
+ if (dccg->funcs->set_dtbclk_p_src)
+ dccg->funcs->set_dtbclk_p_src(dccg, DTBCLK0, tg->inst);
+
+ dto_params.otg_inst = tg->inst;
+ dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
+ dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
+ dto_params.timing = &pipe_ctx->stream->timing;
+ dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ }
+
if (dc_is_hdmi_tmds_signal(stream->signal)) {
stream->link->phy_state.symclk_ref_cnts.otg = 1;
if (stream->link->phy_state.symclk_state == SYMCLK_OFF_TX_OFF)
@@ -748,13 +918,15 @@ enum dc_status dcn20_enable_stream_timing(
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width,
+ pipe_ctx->pipe_dlg_param.pstate_keepout,
pipe_ctx->stream->signal,
true);
rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
flow_control.flow_ctrl_mode = 0;
flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(stream, opp_cnt);
+ flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(stream, opp_cnt,
+ is_two_pixels_per_container);
if (mpc->funcs->set_out_rate_control) {
for (i = 0; i < opp_cnt; ++i) {
mpc->funcs->set_out_rate_control(
@@ -765,14 +937,15 @@ enum dc_status dcn20_enable_stream_timing(
}
}
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
- odm_pipe->stream_res.opp,
+ for (i = 0; i < opp_cnt; i++) {
+ opp_heads[i]->stream_res.opp->funcs->opp_pipe_clock_control(
+ opp_heads[i]->stream_res.opp,
true);
-
- pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
- pipe_ctx->stream_res.opp,
- true);
+ opp_heads[i]->stream_res.opp->funcs->opp_program_left_edge_extra_pixel(
+ opp_heads[i]->stream_res.opp,
+ stream->timing.pixel_encoding,
+ resource_is_pipe_type(opp_heads[i], OTG_MASTER));
+ }
hws->funcs.blank_pixel_data(dc, pipe_ctx, true);
@@ -782,15 +955,13 @@ enum dc_status dcn20_enable_stream_timing(
return DC_ERROR_UNEXPECTED;
}
- hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp);
+ udelay(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz));
params.vertical_total_min = stream->adjust.v_total_min;
params.vertical_total_max = stream->adjust.v_total_max;
params.vertical_total_mid = stream->adjust.v_total_mid;
params.vertical_total_mid_frame_num = stream->adjust.v_total_mid_frame_num;
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, &params);
+ set_drr_and_clear_adjust_pending(pipe_ctx, stream, &params);
// DRR should set trigger event to monitor surface update event
if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
@@ -811,10 +982,11 @@ enum dc_status dcn20_enable_stream_timing(
/* TODO enable stream if timing changed */
/* TODO unblank stream if DP */
- if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- if (pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable)
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
+ if (pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable)
pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg);
}
+
return DC_OK;
}
@@ -851,7 +1023,7 @@ bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
{
int mpcc_id = pipe_ctx->plane_res.hubp->inst;
struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
- struct pwl_params *params = NULL;
+ const struct pwl_params *params = NULL;
/*
* program OGAM only for the top pipe
* if there is a pipe split then fix diagnostic is required:
@@ -862,25 +1034,26 @@ bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx,
if (mpc->funcs->power_on_mpc_mem_pwr)
mpc->funcs->power_on_mpc_mem_pwr(mpc, mpcc_id, true);
if (pipe_ctx->top_pipe == NULL
- && mpc->funcs->set_output_gamma && stream->out_transfer_func) {
- if (stream->out_transfer_func->type == TF_TYPE_HWPWL)
- params = &stream->out_transfer_func->pwl;
- else if (pipe_ctx->stream->out_transfer_func->type ==
+ && mpc->funcs->set_output_gamma) {
+ if (stream->out_transfer_func.type == TF_TYPE_HWPWL)
+ params = &stream->out_transfer_func.pwl;
+ else if (pipe_ctx->stream->out_transfer_func.type ==
TF_TYPE_DISTRIBUTED_POINTS &&
cm_helper_translate_curve_to_hw_format(dc->ctx,
- stream->out_transfer_func,
+ &stream->out_transfer_func,
&mpc->blender_params, false))
params = &mpc->blender_params;
/*
* there is no ROM
*/
- if (stream->out_transfer_func->type == TF_TYPE_PREDEFINED)
+ if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED)
BREAK_TO_DEBUGGER();
}
/*
* if above if is not executed then 'params' equal to 0 and set in bypass
*/
- mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
+ if (mpc->funcs->set_output_gamma)
+ mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
return true;
}
@@ -890,17 +1063,15 @@ bool dcn20_set_blend_lut(
{
struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
bool result = true;
- struct pwl_params *blend_lut = NULL;
-
- if (plane_state->blend_tf) {
- if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
- blend_lut = &plane_state->blend_tf->pwl;
- else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm_helper_translate_curve_to_hw_format(plane_state->ctx,
- plane_state->blend_tf,
- &dpp_base->regamma_params, false);
- blend_lut = &dpp_base->regamma_params;
- }
+ const struct pwl_params *blend_lut = NULL;
+
+ if (plane_state->blend_tf.type == TF_TYPE_HWPWL)
+ blend_lut = &plane_state->blend_tf.pwl;
+ else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) {
+ cm_helper_translate_curve_to_hw_format(plane_state->ctx,
+ &plane_state->blend_tf,
+ &dpp_base->regamma_params, false);
+ blend_lut = &dpp_base->regamma_params;
}
result = dpp_base->funcs->dpp_program_blnd_lut(dpp_base, blend_lut);
@@ -912,24 +1083,21 @@ bool dcn20_set_shaper_3dlut(
{
struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
bool result = true;
- struct pwl_params *shaper_lut = NULL;
-
- if (plane_state->in_shaper_func) {
- if (plane_state->in_shaper_func->type == TF_TYPE_HWPWL)
- shaper_lut = &plane_state->in_shaper_func->pwl;
- else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm_helper_translate_curve_to_hw_format(plane_state->ctx,
- plane_state->in_shaper_func,
- &dpp_base->shaper_params, true);
- shaper_lut = &dpp_base->shaper_params;
- }
+ const struct pwl_params *shaper_lut = NULL;
+
+ if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL)
+ shaper_lut = &plane_state->in_shaper_func.pwl;
+ else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) {
+ cm_helper_translate_curve_to_hw_format(plane_state->ctx,
+ &plane_state->in_shaper_func,
+ &dpp_base->shaper_params, true);
+ shaper_lut = &dpp_base->shaper_params;
}
result = dpp_base->funcs->dpp_program_shaper_lut(dpp_base, shaper_lut);
- if (plane_state->lut3d_func &&
- plane_state->lut3d_func->state.bits.initialized == 1)
+ if (plane_state->lut3d_func.state.bits.initialized == 1)
result = dpp_base->funcs->dpp_program_3dlut(dpp_base,
- &plane_state->lut3d_func->lut_3d);
+ &plane_state->lut3d_func.lut_3d);
else
result = dpp_base->funcs->dpp_program_3dlut(dpp_base, NULL);
@@ -952,15 +1120,7 @@ bool dcn20_set_input_transfer_func(struct dc *dc,
hws->funcs.set_shaper_3dlut(pipe_ctx, plane_state);
hws->funcs.set_blend_lut(pipe_ctx, plane_state);
- if (plane_state->in_transfer_func)
- tf = plane_state->in_transfer_func;
-
-
- if (tf == NULL) {
- dpp_base->funcs->dpp_set_degamma(dpp_base,
- IPP_DEGAMMA_MODE_BYPASS);
- return true;
- }
+ tf = &plane_state->in_transfer_func;
if (tf->type == TF_TYPE_HWPWL || tf->type == TF_TYPE_DISTRIBUTED_POINTS)
use_degamma_ram = true;
@@ -1025,6 +1185,8 @@ void dcn20_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
struct pipe_ctx *odm_pipe;
int opp_cnt = 1;
int opp_inst[MAX_PIPES] = { pipe_ctx->stream_res.opp->inst };
+ int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
+ int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
opp_inst[opp_cnt] = odm_pipe->stream_res.opp->inst;
@@ -1035,7 +1197,7 @@ void dcn20_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
pipe_ctx->stream_res.tg->funcs->set_odm_combine(
pipe_ctx->stream_res.tg,
opp_inst, opp_cnt,
- &pipe_ctx->stream->timing);
+ odm_slice_width, last_odm_slice_width);
else
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
@@ -1053,10 +1215,7 @@ void dcn20_blank_pixel_data(
enum controller_dp_test_pattern test_pattern = CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR;
enum controller_dp_color_space test_pattern_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
struct pipe_ctx *odm_pipe;
- int odm_cnt = 1;
- int h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
- int v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
- int odm_slice_width, last_odm_slice_width, offset = 0;
+ struct rect odm_slice_src;
if (stream->link->test_pattern_enabled)
return;
@@ -1064,11 +1223,6 @@ void dcn20_blank_pixel_data(
/* get opp dpg blank color */
color_space_to_black_color(dc, color_space, &black_color);
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- odm_cnt++;
- odm_slice_width = h_active / odm_cnt;
- last_odm_slice_width = h_active - odm_slice_width * (odm_cnt - 1);
-
if (blank) {
dc->hwss.set_abm_immediate_disable(pipe_ctx);
@@ -1083,28 +1237,29 @@ void dcn20_blank_pixel_data(
odm_pipe = pipe_ctx;
while (odm_pipe->next_odm_pipe) {
+ odm_slice_src = resource_get_odm_slice_src_rect(odm_pipe);
dc->hwss.set_disp_pattern_generator(dc,
odm_pipe,
test_pattern,
test_pattern_color_space,
stream->timing.display_color_depth,
&black_color,
- odm_slice_width,
- v_active,
- offset);
- offset += odm_slice_width;
+ odm_slice_src.width,
+ odm_slice_src.height,
+ odm_slice_src.x);
odm_pipe = odm_pipe->next_odm_pipe;
}
+ odm_slice_src = resource_get_odm_slice_src_rect(odm_pipe);
dc->hwss.set_disp_pattern_generator(dc,
odm_pipe,
test_pattern,
test_pattern_color_space,
stream->timing.display_color_depth,
&black_color,
- last_odm_slice_width,
- v_active,
- offset);
+ odm_slice_src.width,
+ odm_slice_src.height,
+ odm_slice_src.x);
if (!blank)
if (stream_res->abm) {
@@ -1118,14 +1273,18 @@ static void dcn20_power_on_plane_resources(
struct dce_hwseq *hws,
struct pipe_ctx *pipe_ctx)
{
+ uint32_t org_ip_request_cntl = 0;
+
DC_LOGGER_INIT(hws->ctx->logger);
if (hws->funcs.dpp_root_clock_control)
hws->funcs.dpp_root_clock_control(hws, pipe_ctx->plane_res.dpp->inst, true);
if (REG(DC_IP_REQUEST_CNTL)) {
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 1);
+ REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0,
+ IP_REQUEST_EN, 1);
if (hws->funcs.dpp_pg_control)
hws->funcs.dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true);
@@ -1133,14 +1292,16 @@ static void dcn20_power_on_plane_resources(
if (hws->funcs.hubp_pg_control)
hws->funcs.hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true);
- REG_SET(DC_IP_REQUEST_CNTL, 0,
- IP_REQUEST_EN, 0);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0,
+ IP_REQUEST_EN, 0);
+
DC_LOG_DEBUG(
"Un-gated front end for pipe %d\n", pipe_ctx->plane_res.hubp->inst);
}
}
-static void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
+void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
struct dc_state *context)
{
//if (dc->debug.sanity_checks) {
@@ -1310,13 +1471,23 @@ void dcn20_pipe_control_lock(
} else {
if (lock)
pipe->stream_res.tg->funcs->lock(pipe->stream_res.tg);
- else
- pipe->stream_res.tg->funcs->unlock(pipe->stream_res.tg);
+ else {
+ if (dc->hwseq->funcs.perform_3dlut_wa_unlock)
+ dc->hwseq->funcs.perform_3dlut_wa_unlock(pipe);
+ else
+ pipe->stream_res.tg->funcs->unlock(pipe->stream_res.tg);
+ }
}
}
-static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe)
+void dcn20_detect_pipe_changes(struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe)
{
+ bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM;
+ bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM;
+
new_pipe->update_flags.raw = 0;
/* If non-phantom pipe is being transitioned to a phantom pipe,
@@ -1326,12 +1497,17 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
* be different). The post_unlock sequence will set the correct
* update flags to enable the phantom pipe.
*/
- if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom &&
- new_pipe->plane_state && new_pipe->plane_state->is_phantom) {
+ if (old_pipe->plane_state && !old_is_phantom &&
+ new_pipe->plane_state && new_is_phantom) {
new_pipe->update_flags.bits.disable = 1;
return;
}
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER) &&
+ resource_is_odm_topology_changed(new_pipe, old_pipe))
+ /* Detect odm changes */
+ new_pipe->update_flags.bits.odm = 1;
+
/* Exit on unchanged, unused pipe */
if (!old_pipe->plane_state && !new_pipe->plane_state)
return;
@@ -1347,6 +1523,10 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
new_pipe->update_flags.bits.scaler = 1;
new_pipe->update_flags.bits.viewport = 1;
new_pipe->update_flags.bits.det_size = 1;
+ if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE &&
+ new_pipe->stream_res.test_pattern_params.width != 0 &&
+ new_pipe->stream_res.test_pattern_params.height != 0)
+ new_pipe->update_flags.bits.test_pattern_changed = 1;
if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
new_pipe->update_flags.bits.odm = 1;
new_pipe->update_flags.bits.global_sync = 1;
@@ -1359,14 +1539,14 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
* The remove-add sequence of the phantom pipe always results in the pipe
* being blanked in enable_stream_timing (DPG).
*/
- if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
+ if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM)
new_pipe->update_flags.bits.enable = 1;
/* Phantom pipes are effectively disabled, if the pipe was previously phantom
* we have to enable
*/
- if (old_pipe->plane_state && old_pipe->plane_state->is_phantom &&
- new_pipe->plane_state && !new_pipe->plane_state->is_phantom)
+ if (old_pipe->plane_state && old_is_phantom &&
+ new_pipe->plane_state && !new_is_phantom)
new_pipe->update_flags.bits.enable = 1;
if (old_pipe->plane_state && !new_pipe->plane_state) {
@@ -1380,15 +1560,7 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
}
/* Detect top pipe only changes */
- if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
- /* Detect odm changes */
- if ((old_pipe->next_odm_pipe && new_pipe->next_odm_pipe
- && old_pipe->next_odm_pipe->pipe_idx != new_pipe->next_odm_pipe->pipe_idx)
- || (!old_pipe->next_odm_pipe && new_pipe->next_odm_pipe)
- || (old_pipe->next_odm_pipe && !new_pipe->next_odm_pipe)
- || old_pipe->stream_res.opp != new_pipe->stream_res.opp)
- new_pipe->update_flags.bits.odm = 1;
-
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER)) {
/* Detect global sync changes */
if (old_pipe->pipe_dlg_param.vready_offset != new_pipe->pipe_dlg_param.vready_offset
|| old_pipe->pipe_dlg_param.vstartup_start != new_pipe->pipe_dlg_param.vstartup_start
@@ -1489,9 +1661,14 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
if (old_pipe->unbounded_req != new_pipe->unbounded_req)
new_pipe->update_flags.bits.unbounded_req = 1;
+
+ if (memcmp(&old_pipe->stream_res.test_pattern_params,
+ &new_pipe->stream_res.test_pattern_params, sizeof(struct test_pattern_params))) {
+ new_pipe->update_flags.bits.test_pattern_changed = 1;
+ }
}
-static void dcn20_update_dchubp_dpp(
+void dcn20_update_dchubp_dpp(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
struct dc_state *context)
@@ -1502,6 +1679,7 @@ static void dcn20_update_dchubp_dpp(
struct dc_plane_state *plane_state = pipe_ctx->plane_state;
struct dccg *dccg = dc->res_pool->dccg;
bool viewport_changed = false;
+ enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe_ctx);
if (pipe_ctx->update_flags.bits.dppclk)
dpp->funcs->dpp_dppclk_control(dpp, false, true);
@@ -1513,25 +1691,41 @@ static void dcn20_update_dchubp_dpp(
* VTG is within DCHUBBUB which is commond block share by each pipe HUBP.
* VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG
*/
+
if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) {
hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst);
- hubp->funcs->hubp_setup(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs,
- &pipe_ctx->rq_regs,
- &pipe_ctx->pipe_dlg_param);
+ if (hubp->funcs->hubp_setup2) {
+ hubp->funcs->hubp_setup2(
+ hubp,
+ &pipe_ctx->hubp_regs,
+ &pipe_ctx->global_sync,
+ &pipe_ctx->stream->timing);
+ } else {
+ hubp->funcs->hubp_setup(
+ hubp,
+ &pipe_ctx->dlg_regs,
+ &pipe_ctx->ttu_regs,
+ &pipe_ctx->rq_regs,
+ &pipe_ctx->pipe_dlg_param);
+ }
}
if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting)
hubp->funcs->set_unbounded_requesting(hubp, pipe_ctx->unbounded_req);
- if (pipe_ctx->update_flags.bits.hubp_interdependent)
- hubp->funcs->hubp_setup_interdependent(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs);
+ if (pipe_ctx->update_flags.bits.hubp_interdependent) {
+ if (hubp->funcs->hubp_setup_interdependent2) {
+ hubp->funcs->hubp_setup_interdependent2(
+ hubp,
+ &pipe_ctx->hubp_regs);
+ } else {
+ hubp->funcs->hubp_setup_interdependent(
+ hubp,
+ &pipe_ctx->dlg_regs,
+ &pipe_ctx->ttu_regs);
+ }
+ }
if (pipe_ctx->update_flags.bits.enable ||
pipe_ctx->update_flags.bits.plane_changed ||
@@ -1539,7 +1733,7 @@ static void dcn20_update_dchubp_dpp(
plane_state->update_flags.bits.input_csc_change ||
plane_state->update_flags.bits.color_space_change ||
plane_state->update_flags.bits.coeff_reduction_change) {
- struct dc_bias_and_scale bns_params = {0};
+ struct dc_bias_and_scale bns_params = plane_state->bias_and_scale;
// program the input csc
dpp->funcs->dpp_setup(dpp,
@@ -1549,9 +1743,13 @@ static void dcn20_update_dchubp_dpp(
plane_state->color_space,
NULL);
+ if (dpp->funcs->set_cursor_matrix) {
+ dpp->funcs->set_cursor_matrix(dpp,
+ plane_state->color_space,
+ plane_state->cursor_csc_color_matrix);
+ }
if (dpp->funcs->dpp_program_bias_and_scale) {
//TODO :for CNVC set scale and bias registers if necessary
- build_prescale_params(&bns_params, plane_state);
dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
}
}
@@ -1588,12 +1786,15 @@ static void dcn20_update_dchubp_dpp(
viewport_changed = true;
}
+ if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate)
+ hubp->funcs->hubp_program_mcache_id_and_split_coordinate(hubp, &pipe_ctx->mcache_regs);
+
/* Any updates are handled in dc interface, just need to apply existing for plane enable */
if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed ||
pipe_ctx->update_flags.bits.scaler || viewport_changed == true) &&
pipe_ctx->stream->cursor_attributes.address.quad_part != 0) {
- dc->hwss.set_cursor_position(pipe_ctx);
dc->hwss.set_cursor_attribute(pipe_ctx);
+ dc->hwss.set_cursor_position(pipe_ctx);
if (dc->hwss.set_cursor_sdr_white_level)
dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
@@ -1645,18 +1846,27 @@ static void dcn20_update_dchubp_dpp(
if (pipe_ctx->update_flags.bits.enable ||
pipe_ctx->update_flags.bits.plane_changed ||
- plane_state->update_flags.bits.addr_update)
- hws->funcs.update_plane_addr(dc, pipe_ctx);
+ plane_state->update_flags.bits.addr_update) {
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) &&
+ pipe_mall_type == SUBVP_MAIN) {
+ union block_sequence_params params;
+
+ params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv;
+ params.subvp_save_surf_addr.addr = &pipe_ctx->plane_state->address;
+ params.subvp_save_surf_addr.subvp_index = pipe_ctx->subvp_index;
+ hwss_subvp_save_surf_addr(&params);
+ }
+ dc->hwss.update_plane_addr(dc, pipe_ctx);
+ }
if (pipe_ctx->update_flags.bits.enable)
hubp->funcs->set_blank(hubp, false);
/* If the stream paired with this plane is phantom, the plane is also phantom */
- if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM
- && hubp->funcs->phantom_hubp_post_enable)
+ if (pipe_mall_type == SUBVP_PHANTOM && hubp->funcs->phantom_hubp_post_enable)
hubp->funcs->phantom_hubp_post_enable(hubp);
}
-static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+static int dcn20_calculate_vready_offset_for_group(struct pipe_ctx *pipe)
{
struct pipe_ctx *other_pipe;
int vready_offset = pipe->pipe_dlg_param.vready_offset;
@@ -1682,6 +1892,30 @@ static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
return vready_offset;
}
+static void dcn20_program_tg(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dce_hwseq *hws)
+{
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+ dcn20_calculate_vready_offset_for_group(pipe_ctx),
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width,
+ pipe_ctx->pipe_dlg_param.pstate_keepout);
+
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+
+ pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
+
+ if (hws->funcs.setup_vupdate_interrupt)
+ hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
+}
+
static void dcn20_program_pipe(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
@@ -1692,57 +1926,55 @@ static void dcn20_program_pipe(
/* Only need to unblank on top pipe */
if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.odm ||
- pipe_ctx->stream->update_flags.bits.abm_level)
+ pipe_ctx->update_flags.bits.odm ||
+ pipe_ctx->stream->update_flags.bits.abm_level)
hws->funcs.blank_pixel_data(dc, pipe_ctx,
- !pipe_ctx->plane_state ||
- !pipe_ctx->plane_state->visible);
+ !pipe_ctx->plane_state ||
+ !pipe_ctx->plane_state->visible);
}
/* Only update TG on top pipe */
if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
- && !pipe_ctx->prev_odm_pipe) {
- pipe_ctx->stream_res.tg->funcs->program_global_sync(
- pipe_ctx->stream_res.tg,
- calculate_vready_offset_for_group(pipe_ctx),
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width);
-
- if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM)
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
-
- pipe_ctx->stream_res.tg->funcs->set_vtg_params(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
-
- if (hws->funcs.setup_vupdate_interrupt)
- hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
- }
+ && !pipe_ctx->prev_odm_pipe)
+ dcn20_program_tg(dc, pipe_ctx, context, hws);
if (pipe_ctx->update_flags.bits.odm)
hws->funcs.update_odm(dc, context, pipe_ctx);
if (pipe_ctx->update_flags.bits.enable) {
- dcn20_enable_plane(dc, pipe_ctx, context);
+ if (hws->funcs.enable_plane)
+ hws->funcs.enable_plane(dc, pipe_ctx, context);
+ else
+ dcn20_enable_plane(dc, pipe_ctx, context);
+
if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes)
dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub);
}
- if (dc->res_pool->hubbub->funcs->program_det_size && pipe_ctx->update_flags.bits.det_size)
- dc->res_pool->hubbub->funcs->program_det_size(
- dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb);
+ if (pipe_ctx->update_flags.bits.det_size) {
+ if (dc->res_pool->hubbub->funcs->program_det_size)
+ dc->res_pool->hubbub->funcs->program_det_size(
+ dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb);
+
+ if (dc->res_pool->hubbub->funcs->program_det_segments)
+ dc->res_pool->hubbub->funcs->program_det_segments(
+ dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size);
+ }
- if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw)
+ if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw ||
+ pipe_ctx->plane_state->update_flags.raw ||
+ pipe_ctx->stream->update_flags.raw))
dcn20_update_dchubp_dpp(dc, pipe_ctx, context);
- if (pipe_ctx->update_flags.bits.enable
- || pipe_ctx->plane_state->update_flags.bits.hdr_mult)
+ if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->plane_state->update_flags.bits.hdr_mult))
hws->funcs.set_hdr_multiplier(pipe_ctx);
- if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
- pipe_ctx->plane_state->update_flags.bits.gamma_change ||
- pipe_ctx->plane_state->update_flags.bits.lut_3d)
+ if (pipe_ctx->plane_state &&
+ (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
+ pipe_ctx->plane_state->update_flags.bits.gamma_change ||
+ pipe_ctx->plane_state->update_flags.bits.lut_3d ||
+ pipe_ctx->update_flags.bits.enable))
hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
/* dcn10_translate_regamma_to_hw_format takes 750us to finish
@@ -1750,9 +1982,8 @@ static void dcn20_program_pipe(
* updating on slave planes
*/
if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.plane_changed ||
- pipe_ctx->stream->update_flags.bits.out_tf ||
- pipe_ctx->plane_state->update_flags.bits.output_tf_change)
+ pipe_ctx->update_flags.bits.plane_changed ||
+ pipe_ctx->stream->update_flags.bits.out_tf)
hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
/* If the pipe has been enabled or has a different opp, we
@@ -1761,7 +1992,7 @@ static void dcn20_program_pipe(
* causes a different pipe to be chosen to odm combine with.
*/
if (pipe_ctx->update_flags.bits.enable
- || pipe_ctx->update_flags.bits.opp_changed) {
+ || pipe_ctx->update_flags.bits.opp_changed) {
pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
pipe_ctx->stream_res.opp,
@@ -1776,13 +2007,30 @@ static void dcn20_program_pipe(
}
/* Set ABM pipe after other pipe configurations done */
- if (pipe_ctx->plane_state->visible) {
+ if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) {
if (pipe_ctx->stream_res.abm) {
dc->hwss.set_pipe(pipe_ctx);
pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm,
pipe_ctx->stream->abm_level);
}
}
+
+ if (pipe_ctx->update_flags.bits.test_pattern_changed) {
+ struct output_pixel_processor *odm_opp = pipe_ctx->stream_res.opp;
+ struct bit_depth_reduction_params params;
+
+ memset(&params, 0, sizeof(params));
+ odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
+ dc->hwss.set_disp_pattern_generator(dc,
+ pipe_ctx,
+ pipe_ctx->stream_res.test_pattern_params.test_pattern,
+ pipe_ctx->stream_res.test_pattern_params.color_space,
+ pipe_ctx->stream_res.test_pattern_params.color_depth,
+ NULL,
+ pipe_ctx->stream_res.test_pattern_params.width,
+ pipe_ctx->stream_res.test_pattern_params.height,
+ pipe_ctx->stream_res.test_pattern_params.offset);
+ }
}
void dcn20_program_front_end_for_ctx(
@@ -1790,35 +2038,47 @@ void dcn20_program_front_end_for_ctx(
struct dc_state *context)
{
int i;
+ unsigned int prev_hubp_count = 0;
+ unsigned int hubp_count = 0;
struct dce_hwseq *hws = dc->hwseq;
- DC_LOGGER_INIT(dc->ctx->logger);
+ struct pipe_ctx *pipe = NULL;
- /* Carry over GSL groups in case the context is changing. */
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+ DC_LOGGER_INIT(dc->ctx->logger);
- if (pipe_ctx->stream == old_pipe_ctx->stream)
- pipe_ctx->stream_res.gsl_group = old_pipe_ctx->stream_res.gsl_group;
- }
+ if (resource_is_pipe_topology_changed(dc->current_state, context))
+ resource_log_pipe_topology_update(dc, context);
if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ pipe = &context->res_ctx.pipe_ctx[i];
- if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe && pipe_ctx->plane_state) {
- ASSERT(!pipe_ctx->plane_state->triplebuffer_flips);
+ if (pipe->plane_state) {
+ ASSERT(!pipe->plane_state->triplebuffer_flips);
/*turn off triple buffer for full update*/
dc->hwss.program_triplebuffer(
- dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips);
+ dc, pipe, pipe->plane_state->triplebuffer_flips);
}
}
}
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
+ prev_hubp_count++;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ hubp_count++;
+ }
+
+ if (prev_hubp_count == 0 && hubp_count > 0) {
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, true, false);
+ udelay(500);
+ }
+
/* Set pipe update flags and lock pipes */
for (i = 0; i < dc->res_pool->pipe_count; i++)
- dcn20_detect_pipe_changes(&dc->current_state->res_ctx.pipe_ctx[i],
- &context->res_ctx.pipe_ctx[i]);
+ dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i],
+ &context->res_ctx.pipe_ctx[i]);
/* When disabling phantom pipes, turn on phantom OTG first (so we can get double
* buffer updates properly)
@@ -1826,27 +2086,32 @@ void dcn20_program_front_end_for_ctx(
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream;
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream &&
- dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
- if (tg->funcs->enable_crtc)
+ if (tg->funcs->enable_crtc) {
+ if (dc->hwseq->funcs.blank_pixel_data)
+ dc->hwseq->funcs.blank_pixel_data(dc, pipe, true);
+
tg->funcs->enable_crtc(tg);
+ }
}
}
/* OTG blank before disabling all front ends */
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
- && !context->res_ctx.pipe_ctx[i].top_pipe
- && !context->res_ctx.pipe_ctx[i].prev_odm_pipe
- && context->res_ctx.pipe_ctx[i].stream)
+ && !context->res_ctx.pipe_ctx[i].top_pipe
+ && !context->res_ctx.pipe_ctx[i].prev_odm_pipe
+ && context->res_ctx.pipe_ctx[i].stream)
hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
-
/* Disconnect mpcc */
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
- || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
+ || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
struct hubbub *hubbub = dc->res_pool->hubbub;
/* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom
@@ -1855,19 +2120,38 @@ void dcn20_program_front_end_for_ctx(
* turned on (i.e. in an MCLK switch) which can come in too late and cause issues with
* DET allocation.
*/
- if (hubbub->funcs->program_det_size && (context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
- (context->res_ctx.pipe_ctx[i].plane_state && context->res_ctx.pipe_ctx[i].plane_state->is_phantom)))
- hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
- hws->funcs.plane_atomic_disconnect(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+ if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
+ (context->res_ctx.pipe_ctx[i].plane_state &&
+ dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i])
+ == SUBVP_PHANTOM))) {
+ if (hubbub->funcs->program_det_size)
+ hubbub->funcs->program_det_size(hubbub,
+ dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ if (dc->res_pool->hubbub->funcs->program_det_segments)
+ dc->res_pool->hubbub->funcs->program_det_segments(
+ hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ }
+ hws->funcs.plane_atomic_disconnect(dc, dc->current_state,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
}
+ /* update ODM for blanked OTG master pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+ !resource_is_pipe_type(pipe, DPP_PIPE) &&
+ pipe->update_flags.bits.odm &&
+ hws->funcs.update_odm)
+ hws->funcs.update_odm(dc, context, pipe);
+ }
+
/*
* Program all updated pipes, order matters for mpcc setup. Start with
* top pipe and program all pipes that follow in order
*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ pipe = &context->res_ctx.pipe_ctx[i];
if (pipe->plane_state && !pipe->top_pipe) {
while (pipe) {
@@ -1876,25 +2160,28 @@ void dcn20_program_front_end_for_ctx(
else {
/* Don't program phantom pipes in the regular front end programming sequence.
* There is an MPO transition case where a pipe being used by a video plane is
- * transitioned directly to be a phantom pipe when closing the MPO video. However
- * the phantom pipe will program a new HUBP_VTG_SEL (update takes place right away),
- * but the MPO still exists until the double buffered update of the main pipe so we
- * will get a frame of underflow if the phantom pipe is programmed here.
+ * transitioned directly to be a phantom pipe when closing the MPO video.
+ * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place
+ * right away) but the MPO still exists until the double buffered update of the
+ * main pipe so we will get a frame of underflow if the phantom pipe is
+ * programmed here.
*/
- if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM)
+ if (pipe->stream &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
dcn20_program_pipe(dc, pipe, context);
}
pipe = pipe->bottom_pipe;
}
}
+
/* Program secondary blending tree and writeback pipes */
pipe = &context->res_ctx.pipe_ctx[i];
if (!pipe->top_pipe && !pipe->prev_odm_pipe
- && pipe->stream && pipe->stream->num_wb_info > 0
- && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw)
- || pipe->stream->update_flags.raw)
- && hws->funcs.program_all_writeback_pipes_in_tree)
+ && pipe->stream && pipe->stream->num_wb_info > 0
+ && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw)
+ || pipe->stream->update_flags.raw)
+ && hws->funcs.program_all_writeback_pipes_in_tree)
hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context);
/* Avoid underflow by check of pipe line read when adding 2nd plane. */
@@ -1906,17 +2193,43 @@ void dcn20_program_front_end_for_ctx(
context->stream_status[0].plane_count > 1) {
pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp);
}
+ }
+}
- /* when dynamic ODM is active, pipes must be reconfigured when all planes are
- * disabled, as some transitions will leave software and hardware state
- * mismatched.
- */
- if (dc->debug.enable_single_display_2to1_odm_policy &&
- pipe->stream &&
- pipe->update_flags.bits.disable &&
- !pipe->prev_odm_pipe &&
- hws->funcs.update_odm)
- hws->funcs.update_odm(dc, context, pipe);
+/* post_unlock_reset_opp - the function wait for corresponding double
+ * buffered pending status clear and reset opp head pipe's none double buffered
+ * registers to their initial state.
+ */
+void dcn20_post_unlock_reset_opp(struct dc *dc,
+ struct pipe_ctx *opp_head)
+{
+ struct display_stream_compressor *dsc = opp_head->stream_res.dsc;
+ struct dccg *dccg = dc->res_pool->dccg;
+
+ /*
+ * wait for all DPP pipes in current mpc blending tree completes double
+ * buffered disconnection before resetting OPP
+ */
+ dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, opp_head);
+
+ if (dsc) {
+ bool is_dsc_ungated = false;
+
+ if (dc->hwseq->funcs.dsc_pg_status)
+ is_dsc_ungated = dc->hwseq->funcs.dsc_pg_status(dc->hwseq, dsc->inst);
+
+ if (is_dsc_ungated) {
+ /*
+ * seamless update specific where we will postpone non
+ * double buffered DSCCLK disable logic in post unlock
+ * sequence after DSC is disconnected from OPP but not
+ * yet power gated.
+ */
+ dsc->funcs->dsc_wait_disconnect_pending_clear(dsc);
+ dsc->funcs->dsc_disable(dsc);
+ if (dccg->funcs->set_ref_dscclk)
+ dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
+ }
}
}
@@ -1924,16 +2237,21 @@ void dcn20_post_unlock_program_front_end(
struct dc *dc,
struct dc_state *context)
{
- int i;
- const unsigned int TIMEOUT_FOR_PIPE_ENABLE_US = 100000;
+ // Timeout for pipe enable
+ unsigned int timeout_us = 100000;
unsigned int polling_interval_us = 1;
struct dce_hwseq *hwseq = dc->hwseq;
+ int i;
- DC_LOGGER_INIT(dc->ctx->logger);
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) &&
+ !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD))
+ dcn20_post_unlock_reset_opp(dc,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
- dc->hwss.disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+ dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
/*
* If we are enabling a pipe, we need to wait for pending clear as this is a critical
@@ -1945,17 +2263,44 @@ void dcn20_post_unlock_program_front_end(
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
// Don't check flip pending on phantom pipes
if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable &&
- pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
struct hubp *hubp = pipe->plane_res.hubp;
int j = 0;
- for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us
- && hubp->funcs->hubp_is_flip_pending(hubp); j++)
+
+ for (j = 0; j < timeout_us / polling_interval_us
+ && hubp->funcs->hubp_is_flip_pending(hubp); j++)
udelay(polling_interval_us);
}
}
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ /* When going from a smaller ODM slice count to larger, we must ensure double
+ * buffer update completes before we return to ensure we don't reduce DISPCLK
+ * before we've transitioned to 2:1 or 4:1
+ */
+ if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) &&
+ resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
+ int j = 0;
+ struct timing_generator *tg = pipe->stream_res.tg;
+
+ if (tg->funcs->get_optc_double_buffer_pending) {
+ for (j = 0; j < timeout_us / polling_interval_us
+ && tg->funcs->get_optc_double_buffer_pending(tg); j++)
+ udelay(polling_interval_us);
+ }
+ }
+ }
+
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, false, false);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
if (pipe->plane_state && !pipe->top_pipe) {
/* Program phantom pipe here to prevent a frame of underflow in the MPO transition
@@ -1964,7 +2309,7 @@ void dcn20_post_unlock_program_front_end(
* programming sequence).
*/
while (pipe) {
- if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
/* When turning on the phantom pipe we want to run through the
* entire enable sequence, so apply all the "enable" flags.
*/
@@ -1979,14 +2324,17 @@ void dcn20_post_unlock_program_front_end(
}
}
+ if (!hwseq)
+ return;
+
/* P-State support transitions:
- * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe
- * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally)
- * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe
- * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe
- * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes
+ * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe
+ * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally)
+ * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe
+ * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe
+ * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes
*/
- if (hwseq && hwseq->funcs.update_force_pstate)
+ if (hwseq->funcs.update_force_pstate)
dc->hwseq->funcs.update_force_pstate(dc, context);
/* Only program the MALL registers after all the main and phantom pipes
@@ -1999,12 +2347,11 @@ void dcn20_post_unlock_program_front_end(
if (hwseq->wa.DEGVIDCN21)
dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub);
-
/* WA for stutter underflow during MPO transitions when adding 2nd plane */
if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) {
if (dc->current_state->stream_status[0].plane_count == 1 &&
- context->stream_status[0].plane_count > 1) {
+ context->stream_status[0].plane_count > 1) {
struct timing_generator *tg = dc->res_pool->timing_generators[0];
@@ -2034,17 +2381,17 @@ void dcn20_prepare_bandwidth(
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
// At optimize don't restore the original watermark value
- if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
break;
}
}
/* program dchubbub watermarks:
- * For assigning wm_optimized_required, use |= operator since we don't want
+ * For assigning optimized_required, use |= operator since we don't want
* to clear the value if the optimize has not happened yet
*/
- dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+ dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
&context->bw_ctx.bw.dcn.watermarks,
dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
false);
@@ -2057,10 +2404,10 @@ void dcn20_prepare_bandwidth(
if (hubbub->funcs->program_compbuf_size) {
if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
- dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+ dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
} else {
compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
- dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+ dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
}
hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
@@ -2078,7 +2425,7 @@ void dcn20_optimize_bandwidth(
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
// At optimize don't need to restore the original watermark value
- if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE) {
context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U;
break;
}
@@ -2112,7 +2459,8 @@ void dcn20_optimize_bandwidth(
dc->clk_mgr,
context,
true);
- if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) {
+ if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+ !dc->debug.disable_extblankadj) {
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
@@ -2133,7 +2481,7 @@ bool dcn20_update_bandwidth(
struct dce_hwseq *hws = dc->hwseq;
/* recalculate DML parameters */
- if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false))
+ if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK)
return false;
/* apply updated bandwidth parameters */
@@ -2151,10 +2499,11 @@ bool dcn20_update_bandwidth(
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
- calculate_vready_offset_for_group(pipe_ctx),
+ dcn20_calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width);
+ pipe_ctx->pipe_dlg_param.vupdate_width,
+ pipe_ctx->pipe_dlg_param.pstate_keepout);
pipe_ctx->stream_res.tg->funcs->set_vtg_params(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false);
@@ -2224,8 +2573,11 @@ bool dcn20_wait_for_blank_complete(
{
int counter;
+ if (!opp)
+ return false;
+
for (counter = 0; counter < 1000; counter++) {
- if (opp->funcs->dpg_is_blanked(opp))
+ if (!opp->funcs->dpg_is_pending(opp))
break;
udelay(100);
@@ -2236,7 +2588,7 @@ bool dcn20_wait_for_blank_complete(
return false;
}
- return true;
+ return opp->funcs->dpg_is_blanked(opp);
}
bool dcn20_dmdata_status_done(struct pipe_ctx *pipe_ctx)
@@ -2402,8 +2754,11 @@ void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
struct dc_link *link = stream->link;
struct dce_hwseq *hws = link->dc->hwseq;
struct pipe_ctx *odm_pipe;
+ bool is_two_pixels_per_container =
+ pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing);
params.opp_cnt = 1;
+
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
params.opp_cnt++;
}
@@ -2418,9 +2773,10 @@ void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
pipe_ctx->stream_res.hpo_dp_stream_enc,
pipe_ctx->stream_res.tg->inst);
} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- if (optc2_is_two_pixels_per_containter(&stream->timing) || params.opp_cnt > 1)
+ if (is_two_pixels_per_container || params.opp_cnt > 1)
params.timing.pix_clk_100hz /= 2;
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine(
+ if (pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine)
+ pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine(
pipe_ctx->stream_res.stream_enc, params.opp_cnt > 1);
pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(link, pipe_ctx->stream_res.stream_enc, &params);
}
@@ -2442,14 +2798,15 @@ void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx)
tg->funcs->setup_vertical_interrupt2(tg, start_line);
}
-static void dcn20_reset_back_end_for_pipe(
+void dcn20_reset_back_end_for_pipe(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
struct dc_state *context)
{
- int i;
struct dc_link *link = pipe_ctx->stream->link;
const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
+ struct dccg *dccg = dc->res_pool->dccg;
+ struct dtbclk_dto_params dto_params = {0};
DC_LOGGER_INIT(dc->ctx->logger);
if (pipe_ctx->stream_res.stream_enc == NULL) {
@@ -2498,29 +2855,37 @@ static void dcn20_reset_back_end_for_pipe(
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, NULL);
+ set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL);
/* TODO - convert symclk_ref_cnts for otg to a bit map to solve
* the case where the same symclk is shared across multiple otg
* instances
*/
- link->phy_state.symclk_ref_cnts.otg = 0;
+ if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+ link->phy_state.symclk_ref_cnts.otg = 0;
if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) {
link_hwss->disable_link_output(link,
&pipe_ctx->link_res, pipe_ctx->stream->signal);
link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF;
}
+ if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) && dccg
+ && dc->ctx->dce_version >= DCN_VERSION_3_5) {
+ dto_params.otg_inst = pipe_ctx->stream_res.tg->inst;
+ dto_params.timing = &pipe_ctx->stream->timing;
+ if (dccg && dccg->funcs->set_dtbclk_dto)
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ }
}
- for (i = 0; i < dc->res_pool->pipe_count; i++)
- if (&dc->current_state->res_ctx.pipe_ctx[i] == pipe_ctx)
- break;
-
- if (i == dc->res_pool->pipe_count)
- return;
-
+/*
+ * In case of a dangling plane, setting this to NULL unconditionally
+ * causes failures during reset hw ctx where, if stream is NULL,
+ * it is expected that the pipe_ctx pointers to pipes and plane are NULL.
+ */
pipe_ctx->stream = NULL;
+ pipe_ctx->top_pipe = NULL;
+ pipe_ctx->bottom_pipe = NULL;
+ pipe_ctx->next_odm_pipe = NULL;
+ pipe_ctx->prev_odm_pipe = NULL;
DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n",
pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
}
@@ -2641,37 +3006,6 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
hubp->mpcc_id = mpcc_id;
}
-static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link)
-{
- switch (link->link_enc->transmitter) {
- case TRANSMITTER_UNIPHY_A:
- return PHYD32CLKA;
- case TRANSMITTER_UNIPHY_B:
- return PHYD32CLKB;
- case TRANSMITTER_UNIPHY_C:
- return PHYD32CLKC;
- case TRANSMITTER_UNIPHY_D:
- return PHYD32CLKD;
- case TRANSMITTER_UNIPHY_E:
- return PHYD32CLKE;
- default:
- return PHYD32CLKA;
- }
-}
-
-static int get_odm_segment_count(struct pipe_ctx *pipe_ctx)
-{
- struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
- int count = 1;
-
- while (odm_pipe != NULL) {
- count++;
- odm_pipe = odm_pipe->next_odm_pipe;
- }
-
- return count;
-}
-
void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
{
enum dc_lane_count lane_count =
@@ -2689,42 +3023,43 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
struct dccg *dccg = dc->res_pool->dccg;
enum phyd32clk_clock_source phyd32clk;
int dp_hpo_inst;
- struct dce_hwseq *hws = dc->hwseq;
- unsigned int k1_div = PIXEL_RATE_DIV_NA;
- unsigned int k2_div = PIXEL_RATE_DIV_NA;
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
+
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
- if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
- if (dc->hwseq->funcs.setup_hpo_hw_control)
- dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, true);
- }
+ if (!dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
- dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
- dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
-
- phyd32clk = get_phyd32clk_src(link);
- dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
-
dto_params.otg_inst = tg->inst;
dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10;
dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
dto_params.timing = &pipe_ctx->stream->timing;
dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
- } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && dccg->funcs->enable_symclk_se)
- dccg->funcs->enable_symclk_se(dccg,
- stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A);
+ dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+ dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst);
+
+ phyd32clk = get_phyd32clk_src(link);
+ if (link->cur_link_settings.link_rate == LINK_RATE_UNKNOWN) {
+ dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
+ } else {
+ dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
+ }
+ } else {
+ if (dccg->funcs->enable_symclk_se)
+ dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst,
+ link_enc->transmitter - TRANSMITTER_UNIPHY_A);
+ }
- if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) {
- hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div);
+ link_hwss->setup_stream_attribute(pipe_ctx);
+ if (dc->res_pool->dccg->funcs->set_pixel_rate_div)
dc->res_pool->dccg->funcs->set_pixel_rate_div(
dc->res_pool->dccg,
pipe_ctx->stream_res.tg->inst,
- k1_div, k2_div);
- }
+ pipe_ctx->pixel_rate_divider.div_factor1,
+ pipe_ctx->pixel_rate_divider.div_factor2);
link_hwss->setup_stream_encoder(pipe_ctx);
@@ -2751,9 +3086,6 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
early_control = lane_count;
tg->funcs->set_early_control(tg, early_control);
-
- if (dc->hwseq->funcs.set_pixels_per_cycle)
- dc->hwseq->funcs.set_pixels_per_cycle(pipe_ctx);
}
void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
@@ -2797,7 +3129,8 @@ void dcn20_fpga_init_hw(struct dc *dc)
res_pool->dccg->funcs->dccg_init(res_pool->dccg);
//Enable ability to power gate / don't force power on permanently
- hws->funcs.enable_power_gating_plane(hws, true);
+ if (hws->funcs.enable_power_gating_plane)
+ hws->funcs.enable_power_gating_plane(hws, true);
// Specific to FPGA dccg and registers
REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF);
@@ -2837,7 +3170,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
res_pool->mpc->funcs->mpc_init(res_pool->mpc);
/* initialize OPP mpc_tree parameter */
- for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
res_pool->opps[i]->mpc_tree_params.opp_id = res_pool->opps[i]->inst;
res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
for (j = 0; j < MAX_PIPES; j++)
@@ -2868,7 +3201,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
/*to do*/
- hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+ hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
}
/* initialize DWB pointer to MCIF_WB */
@@ -2885,7 +3218,7 @@ void dcn20_fpga_init_hw(struct dc *dc)
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- dc->hwss.disable_plane(dc, pipe_ctx);
+ dc->hwss.disable_plane(dc, context, pipe_ctx);
pipe_ctx->stream_res.tg = NULL;
pipe_ctx->plane_res.hubp = NULL;
@@ -2900,33 +3233,6 @@ void dcn20_fpga_init_hw(struct dc *dc)
if (dc->res_pool->hubbub->funcs->init_crb)
dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
}
-#ifndef TRIM_FSFT
-bool dcn20_optimize_timing_for_fsft(struct dc *dc,
- struct dc_crtc_timing *timing,
- unsigned int max_input_rate_in_khz)
-{
- unsigned int old_v_front_porch;
- unsigned int old_v_total;
- unsigned int max_input_rate_in_100hz;
- unsigned long long new_v_total;
-
- max_input_rate_in_100hz = max_input_rate_in_khz * 10;
- if (max_input_rate_in_100hz < timing->pix_clk_100hz)
- return false;
-
- old_v_total = timing->v_total;
- old_v_front_porch = timing->v_front_porch;
-
- timing->fast_transport_output_rate_100hz = timing->pix_clk_100hz;
- timing->pix_clk_100hz = max_input_rate_in_100hz;
-
- new_v_total = div_u64((unsigned long long)old_v_total * max_input_rate_in_100hz, timing->pix_clk_100hz);
-
- timing->v_total = new_v_total;
- timing->v_front_porch = old_v_front_porch + (timing->v_total - old_v_total);
- return true;
-}
-#endif
void dcn20_set_disp_pattern_generator(const struct dc *dc,
struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
index 01901b08644c..9d1ad3b29ca5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
@@ -28,6 +28,8 @@
#include "hw_sequencer_private.h"
+void dcn20_log_color_state(struct dc *dc,
+ struct dc_log_buffer_ctx *log_ctx);
bool dcn20_set_blend_lut(
struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state);
bool dcn20_set_shaper_3dlut(
@@ -52,7 +54,7 @@ void dcn20_program_output_csc(struct dc *dc,
void dcn20_enable_stream(struct pipe_ctx *pipe_ctx);
void dcn20_unblank_stream(struct pipe_ctx *pipe_ctx,
struct dc_link_settings *link_settings);
-void dcn20_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
void dcn20_disable_pixel_data(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
@@ -84,6 +86,10 @@ enum dc_status dcn20_enable_stream_timing(
void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx);
void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn20_reset_back_end_for_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
void dcn20_init_blank(
struct dc *dc,
struct timing_generator *tg);
@@ -136,12 +142,6 @@ int dcn20_init_sys_ctx(struct dce_hwseq *hws,
struct dc *dc,
struct dc_phy_addr_space_config *pa_config);
-#ifndef TRIM_FSFT
-bool dcn20_optimize_timing_for_fsft(struct dc *dc,
- struct dc_crtc_timing *timing,
- unsigned int max_input_rate_in_khz);
-#endif
-
void dcn20_set_disp_pattern_generator(const struct dc *dc,
struct pipe_ctx *pipe_ctx,
enum controller_dp_test_pattern test_pattern,
@@ -150,5 +150,25 @@ void dcn20_set_disp_pattern_generator(const struct dc *dc,
const struct tg_color *solid_color,
int width, int height, int offset);
+void dcn20_setup_gsl_group_as_lock(
+ const struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ bool enable);
+void dcn20_detect_pipe_changes(
+ struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe);
+void dcn20_enable_plane(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+void dcn20_update_dchubp_dpp(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+void dcn20_post_unlock_reset_opp(
+ struct dc *dc,
+ struct pipe_ctx *opp_head);
#endif /* __DC_HWSS_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
index e4b44e691ce6..ad253c586ea1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c
@@ -23,9 +23,9 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
-#include "dcn20_hwseq.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
#include "dcn20_init.h"
@@ -36,6 +36,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = {
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
@@ -67,6 +68,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = {
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dce110_set_avmute,
.log_hw_state = dcn10_log_hw_state,
+ .log_color_state = dcn20_log_color_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
@@ -93,9 +95,6 @@ static const struct hw_sequencer_funcs dcn20_funcs = {
.set_backlight_level = dce110_set_backlight_level,
.set_abm_immediate_disable = dce110_set_abm_immediate_disable,
.set_pipe = dce110_set_pipe,
-#ifndef TRIM_FSFT
- .optimize_timing_for_fsft = dcn20_optimize_timing_for_fsft,
-#endif
.enable_lvds_link_output = dce110_enable_lvds_link_output,
.enable_tmds_link_output = dce110_enable_tmds_link_output,
.enable_dp_link_output = dce110_enable_dp_link_output,
@@ -107,7 +106,6 @@ static const struct hw_sequencer_funcs dcn20_funcs = {
static const struct hwseq_private_funcs dcn20_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.set_input_transfer_func = dcn20_set_input_transfer_func,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
index 12277797cd71..12277797cd71 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
index 9e027db6d752..1635e5a552ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.c
@@ -28,7 +28,7 @@
#include "core_types.h"
#include "resource.h"
#include "dcn201_hwseq.h"
-#include "dcn201_optc.h"
+#include "dcn201/dcn201_optc.h"
#include "dce/dce_hwseq.h"
#include "hubp.h"
#include "dchubbub.h"
@@ -95,8 +95,11 @@ static bool gpu_addr_to_uma(struct dce_hwseq *hwseq,
} else if (hwseq->fb_offset.quad_part <= addr->quad_part &&
addr->quad_part <= hwseq->uma_top.quad_part) {
is_in_uma = true;
+ } else if (addr->quad_part == 0) {
+ is_in_uma = false;
} else {
is_in_uma = false;
+ BREAK_TO_DEBUGGER();
}
return is_in_uma;
}
@@ -167,7 +170,7 @@ void dcn201_init_blank(
struct tg_color black_color = {0};
struct output_pixel_processor *opp = NULL;
uint32_t num_opps, opp_id_src0, opp_id_src1;
- uint32_t otg_active_width, otg_active_height;
+ uint32_t otg_active_width = 0, otg_active_height = 0;
/* program opp dpg blank color */
color_space = COLOR_SPACE_SRGB;
@@ -237,7 +240,7 @@ void dcn201_init_hw(struct dc *dc)
res_pool->ref_clocks.xtalin_clock_inKhz =
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
- if (res_pool->dccg && res_pool->hubbub) {
+ if (res_pool->hubbub) {
(res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
&res_pool->ref_clocks.dccg_ref_clock_inKhz);
@@ -320,7 +323,7 @@ void dcn201_init_hw(struct dc *dc)
res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
pipe_ctx->stream_res.opp = res_pool->opps[i];
/*To do: number of MPCC != number of opp*/
- hws->funcs.plane_atomic_disconnect(dc, pipe_ctx);
+ hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
}
/* initialize DWB pointer to MCIF_WB */
@@ -337,7 +340,7 @@ void dcn201_init_hw(struct dc *dc)
for (i = 0; i < res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- dc->hwss.disable_plane(dc, pipe_ctx);
+ dc->hwss.disable_plane(dc, context, pipe_ctx);
pipe_ctx->stream_res.tg = NULL;
pipe_ctx->plane_res.hubp = NULL;
@@ -369,7 +372,9 @@ void dcn201_init_hw(struct dc *dc)
}
/* trigger HW to start disconnect plane from stream on the next vsync */
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void dcn201_plane_atomic_disconnect(struct dc *dc,
+ struct dc_state *state,
+ struct pipe_ctx *pipe_ctx)
{
struct dce_hwseq *hws = dc->hwseq;
struct hubp *hubp = pipe_ctx->plane_res.hubp;
@@ -403,8 +408,7 @@ void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
if (mpcc_removed == false)
return;
- if (opp != NULL)
- opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
+ opp->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
dc->optimized_required = true;
@@ -599,7 +603,7 @@ void dcn201_unblank_stream(struct pipe_ctx *pipe_ctx,
if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
/*check whether it is half the rate*/
- if (optc201_is_two_pixels_per_containter(&stream->timing))
+ if (pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing))
params.timing.pix_clk_100hz /= 2;
pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(link, pipe_ctx->stream_res.stream_enc, &params);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
index 26cd62be6418..6a50a9894be6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_hwseq.h
@@ -33,7 +33,7 @@ void dcn201_init_hw(struct dc *dc);
void dcn201_unblank_stream(struct pipe_ctx *pipe_ctx,
struct dc_link_settings *link_settings);
void dcn201_update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx);
-void dcn201_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn201_plane_atomic_disconnect(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
void dcn201_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx);
void dcn201_set_cursor_attribute(struct pipe_ctx *pipe_ctx);
void dcn201_pipe_control_lock(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
index 92dd4cddbab8..dec57fb4c05c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c
@@ -23,10 +23,10 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
-#include "dcn201_hwseq.h"
+#include "dcn201/dcn201_hwseq.h"
#include "dcn201_init.h"
static const struct hw_sequencer_funcs dcn201_funcs = {
@@ -36,6 +36,7 @@ static const struct hw_sequencer_funcs dcn201_funcs = {
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn10_post_unlock_program_front_end,
.update_plane_addr = dcn201_update_plane_addr,
@@ -96,7 +97,6 @@ static const struct hw_sequencer_funcs dcn201_funcs = {
static const struct hwseq_private_funcs dcn201_private_funcs = {
.init_pipes = NULL,
- .update_plane_addr = dcn201_update_plane_addr,
.plane_atomic_disconnect = dcn201_plane_atomic_disconnect,
.program_pipe = dcn10_program_pipe,
.update_mpcc = dcn201_update_mpcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
index 1168887b033d..1168887b033d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
index 43463d08f21b..e2269211553c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c
@@ -28,14 +28,14 @@
#include "core_types.h"
#include "resource.h"
#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn21_hwseq.h"
#include "vmid.h"
#include "reg_helper.h"
#include "hw/clk_mgr.h"
#include "dc_dmub_srv.h"
#include "abm.h"
-#include "link.h"
+#include "link_service.h"
#define DC_LOGGER_INIT(logger)
@@ -66,7 +66,7 @@ static void mmhub_update_page_table_config(struct dcn_hubbub_phys_addr_config *c
int dcn21_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
{
- struct dcn_hubbub_phys_addr_config config;
+ struct dcn_hubbub_phys_addr_config config = {0};
config.system_aperture.fb_top = pa_config->system_aperture.fb_top;
config.system_aperture.fb_offset = pa_config->system_aperture.fb_offset;
@@ -137,22 +137,24 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx)
pipe_ctx->stream->dpms_off = true;
}
-static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst)
+bool dcn21_dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst,
+ uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst)
{
union dmub_rb_cmd cmd;
struct dc_context *dc = abm->ctx;
- uint32_t ramping_boundary = 0xFFFF;
+ uint8_t ramping_boundary = 0xFF;
memset(&cmd, 0, sizeof(cmd));
cmd.abm_set_pipe.header.type = DMUB_CMD__ABM;
cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE;
cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst;
+ cmd.abm_set_pipe.abm_set_pipe_data.pwrseq_inst = pwrseq_inst;
cmd.abm_set_pipe.abm_set_pipe_data.set_pipe_option = option;
cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst;
cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary;
cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -171,7 +173,7 @@ static void dmub_abm_set_backlight(struct dc_context *dc, uint32_t backlight_pwm
cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
- dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
@@ -179,9 +181,14 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
struct abm *abm = pipe_ctx->stream_res.abm;
uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
-
struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
+ // make a short term w/a for an issue that backlight ramping unexpectedly paused in the middle,
+ // will decouple backlight from ABM and redefine DMUB interface, then this w/a could be removed
+ if (pipe_ctx->stream->abm_level == 0 || pipe_ctx->stream->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) {
+ return;
+ }
+
if (dmcu) {
dce110_set_abm_immediate_disable(pipe_ctx);
return;
@@ -190,9 +197,13 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
if (abm && panel_cntl) {
if (abm->funcs && abm->funcs->set_pipe_ex) {
abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE,
- panel_cntl->inst);
+ panel_cntl->inst, panel_cntl->pwrseq_inst);
} else {
- dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst);
+ dcn21_dmub_abm_set_pipe(abm,
+ otg_inst,
+ SET_ABM_PIPE_IMMEDIATELY_DISABLE,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
}
panel_cntl->funcs->store_backlight_level(panel_cntl);
}
@@ -201,50 +212,71 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx)
void dcn21_set_pipe(struct pipe_ctx *pipe_ctx)
{
struct abm *abm = pipe_ctx->stream_res.abm;
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu;
+ uint32_t otg_inst;
+
+ if (!abm || !tg || !panel_cntl)
+ return;
+
+ otg_inst = tg->inst;
if (dmcu) {
dce110_set_pipe(pipe_ctx);
return;
}
- if (abm && panel_cntl) {
- if (abm->funcs && abm->funcs->set_pipe_ex) {
- abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
- } else {
- dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
- }
+ if (abm->funcs && abm->funcs->set_pipe_ex) {
+ abm->funcs->set_pipe_ex(abm,
+ otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
+ } else {
+ dcn21_dmub_abm_set_pipe(abm, otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
}
}
bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp)
+ struct set_backlight_level_params *backlight_level_params)
{
struct dc_context *dc = pipe_ctx->stream->ctx;
struct abm *abm = pipe_ctx->stream_res.abm;
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
+ uint32_t otg_inst;
+ uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16;
+ uint32_t frame_ramp = backlight_level_params->frame_ramp;
+
+ if (!abm || !tg || !panel_cntl)
+ return false;
+
+ otg_inst = tg->inst;
if (dc->dc->res_pool->dmcu) {
- dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp);
+ dce110_set_backlight_level(pipe_ctx, backlight_level_params);
return true;
}
- if (abm != NULL) {
- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst;
-
- if (abm && panel_cntl) {
- if (abm->funcs && abm->funcs->set_pipe_ex) {
- abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
- } else {
- dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst);
- }
- }
+ if (abm->funcs && abm->funcs->set_pipe_ex) {
+ abm->funcs->set_pipe_ex(abm,
+ otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
+ } else {
+ dcn21_dmub_abm_set_pipe(abm,
+ otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
}
- if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm)
+ if (abm->funcs && abm->funcs->set_backlight_level_pwm)
abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16,
frame_ramp, 0, panel_cntl->inst);
else
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h
index 9cee9bdb8de9..f72a27ac1bf1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h
@@ -47,11 +47,12 @@ void dcn21_optimize_pwr_state(
void dcn21_PLAT_58856_wa(struct dc_state *context,
struct pipe_ctx *pipe_ctx);
+bool dcn21_dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst,
+ uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst);
void dcn21_set_pipe(struct pipe_ctx *pipe_ctx);
void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx);
bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
+ struct set_backlight_level_params *params);
bool dcn21_is_abm_supported(struct dc *dc,
struct dc_state *context, struct dc_stream_state *stream);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
index f024157bd6eb..c7701a8b574a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c
@@ -23,10 +23,10 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
-#include "dcn21_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
#include "dcn21_init.h"
@@ -37,6 +37,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
@@ -68,6 +69,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
.set_avmute = dce110_set_avmute,
.log_hw_state = dcn10_log_hw_state,
.get_hw_state = dcn10_get_hw_state,
+ .log_color_state = dcn20_log_color_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
.edp_backlight_control = dce110_edp_backlight_control,
@@ -92,13 +94,9 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
- .power_down = dce110_power_down,
.set_backlight_level = dcn21_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.set_pipe = dcn21_set_pipe,
-#ifndef TRIM_FSFT
- .optimize_timing_for_fsft = dcn20_optimize_timing_for_fsft,
-#endif
.enable_lvds_link_output = dce110_enable_lvds_link_output,
.enable_tmds_link_output = dce110_enable_tmds_link_output,
.enable_dp_link_output = dce110_enable_dp_link_output,
@@ -111,7 +109,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = {
static const struct hwseq_private_funcs dcn21_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.set_input_transfer_func = dcn20_set_input_transfer_func,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
index 3ed24292648a..3ed24292648a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
index 255713ec29bb..e47ed5571dfd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c
@@ -31,10 +31,10 @@
#include "dcn30_hwseq.h"
#include "dccg.h"
#include "dce/dce_hwseq.h"
-#include "dcn30_mpc.h"
-#include "dcn30_dpp.h"
+#include "dcn30/dcn30_mpc.h"
+#include "dcn30/dcn30_dpp.h"
#include "dcn10/dcn10_cm_common.h"
-#include "dcn30_cm_common.h"
+#include "dcn30/dcn30_cm_common.h"
#include "reg_helper.h"
#include "abm.h"
#include "clk_mgr.h"
@@ -48,10 +48,10 @@
#include "dc_dmub_srv.h"
#include "link_hwss.h"
#include "dpcd_defs.h"
-#include "../dcn20/dcn20_hwseq.h"
-#include "dcn30_resource.h"
-#include "link.h"
-
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn30/dcn30_resource.h"
+#include "link_service.h"
+#include "dc_state_priv.h"
@@ -69,21 +69,181 @@
#define FN(reg_name, field_name) \
hws->shifts->field_name, hws->masks->field_name
+void dcn30_log_color_state(struct dc *dc,
+ struct dc_log_buffer_ctx *log_ctx)
+{
+ struct dc_context *dc_ctx = dc->ctx;
+ struct resource_pool *pool = dc->res_pool;
+ bool is_gamut_remap_available = false;
+ int i;
+
+ DTN_INFO("DPP: DGAM ROM DGAM ROM type DGAM LUT SHAPER mode"
+ " 3DLUT mode 3DLUT bit depth 3DLUT size RGAM mode"
+ " GAMUT adjust "
+ "C11 C12 C13 C14 "
+ "C21 C22 C23 C24 "
+ "C31 C32 C33 C34 \n");
+
+ for (i = 0; i < pool->pipe_count; i++) {
+ struct dpp *dpp = pool->dpps[i];
+ struct dcn_dpp_state s = {0};
+
+ dpp->funcs->dpp_read_state(dpp, &s);
+
+ if (dpp->funcs->dpp_get_gamut_remap) {
+ dpp->funcs->dpp_get_gamut_remap(dpp, &s.gamut_remap);
+ is_gamut_remap_available = true;
+ }
+
+ if (!s.is_enabled)
+ continue;
+
+ DTN_INFO("[%2d]: %7x %13s %8s %11s %10s %15s %10s %9s",
+ dpp->inst,
+ s.pre_dgam_mode,
+ (s.pre_dgam_select == 0) ? "sRGB" :
+ ((s.pre_dgam_select == 1) ? "Gamma 2.2" :
+ ((s.pre_dgam_select == 2) ? "Gamma 2.4" :
+ ((s.pre_dgam_select == 3) ? "Gamma 2.6" :
+ ((s.pre_dgam_select == 4) ? "BT.709" :
+ ((s.pre_dgam_select == 5) ? "PQ" :
+ ((s.pre_dgam_select == 6) ? "HLG" :
+ "Unknown")))))),
+ (s.gamcor_mode == 0) ? "Bypass" :
+ ((s.gamcor_mode == 1) ? "RAM A" :
+ "RAM B"),
+ (s.shaper_lut_mode == 1) ? "RAM A" :
+ ((s.shaper_lut_mode == 2) ? "RAM B" :
+ "Bypass"),
+ (s.lut3d_mode == 1) ? "RAM A" :
+ ((s.lut3d_mode == 2) ? "RAM B" :
+ "Bypass"),
+ (s.lut3d_bit_depth <= 0) ? "12-bit" : "10-bit",
+ (s.lut3d_size == 0) ? "17x17x17" : "9x9x9",
+ (s.rgam_lut_mode == 0) ? "Bypass" :
+ ((s.rgam_lut_mode == 1) ? "RAM A" :
+ "RAM B"));
+
+ if (is_gamut_remap_available) {
+ DTN_INFO(" %12s "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld",
+
+ (s.gamut_remap.gamut_adjust_type == 0) ? "Bypass" :
+ ((s.gamut_remap.gamut_adjust_type == 1) ? "HW" :
+ "SW"),
+ s.gamut_remap.temperature_matrix[0].value,
+ s.gamut_remap.temperature_matrix[1].value,
+ s.gamut_remap.temperature_matrix[2].value,
+ s.gamut_remap.temperature_matrix[3].value,
+ s.gamut_remap.temperature_matrix[4].value,
+ s.gamut_remap.temperature_matrix[5].value,
+ s.gamut_remap.temperature_matrix[6].value,
+ s.gamut_remap.temperature_matrix[7].value,
+ s.gamut_remap.temperature_matrix[8].value,
+ s.gamut_remap.temperature_matrix[9].value,
+ s.gamut_remap.temperature_matrix[10].value,
+ s.gamut_remap.temperature_matrix[11].value);
+ }
+
+ DTN_INFO("\n");
+ }
+ DTN_INFO("\n");
+ DTN_INFO("DPP Color Caps: input_lut_shared:%d icsc:%d"
+ " dgam_ram:%d dgam_rom: srgb:%d,bt2020:%d,gamma2_2:%d,pq:%d,hlg:%d"
+ " post_csc:%d gamcor:%d dgam_rom_for_yuv:%d 3d_lut:%d"
+ " blnd_lut:%d oscs:%d\n\n",
+ dc->caps.color.dpp.input_lut_shared,
+ dc->caps.color.dpp.icsc,
+ dc->caps.color.dpp.dgam_ram,
+ dc->caps.color.dpp.dgam_rom_caps.srgb,
+ dc->caps.color.dpp.dgam_rom_caps.bt2020,
+ dc->caps.color.dpp.dgam_rom_caps.gamma2_2,
+ dc->caps.color.dpp.dgam_rom_caps.pq,
+ dc->caps.color.dpp.dgam_rom_caps.hlg,
+ dc->caps.color.dpp.post_csc,
+ dc->caps.color.dpp.gamma_corr,
+ dc->caps.color.dpp.dgam_rom_for_yuv,
+ dc->caps.color.dpp.hw_3d_lut,
+ dc->caps.color.dpp.ogam_ram,
+ dc->caps.color.dpp.ocsc);
+
+ DTN_INFO("MPCC: OPP DPP MPCCBOT MODE ALPHA_MODE PREMULT OVERLAP_ONLY IDLE"
+ " SHAPER mode 3DLUT mode 3DLUT bit-depth 3DLUT size OGAM mode OGAM LUT"
+ " GAMUT adjust "
+ "C11 C12 C13 C14 "
+ "C21 C22 C23 C24 "
+ "C31 C32 C33 C34 \n");
+
+ for (i = 0; i < pool->mpcc_count; i++) {
+ struct mpcc_state s = {0};
+
+ pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
+ mpc3_get_gamut_remap(pool->mpc, i, &s.gamut_remap);
+
+ if (s.opp_id != 0xf)
+ DTN_INFO("[%2d]: %2xh %2xh %6xh %4d %10d %7d %12d %4d %11s %11s %16s %11s %10s %9s"
+ " %-12s "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld "
+ "%010lld %010lld %010lld %010lld\n",
+ i, s.opp_id, s.dpp_id, s.bot_mpcc_id,
+ s.mode, s.alpha_mode, s.pre_multiplied_alpha, s.overlap_only,
+ s.idle,
+ (s.shaper_lut_mode == 1) ? "RAM A" :
+ ((s.shaper_lut_mode == 2) ? "RAM B" :
+ "Bypass"),
+ (s.lut3d_mode == 1) ? "RAM A" :
+ ((s.lut3d_mode == 2) ? "RAM B" :
+ "Bypass"),
+ (s.lut3d_bit_depth <= 0) ? "12-bit" : "10-bit",
+ (s.lut3d_size == 0) ? "17x17x17" : "9x9x9",
+ (s.rgam_mode == 0) ? "Bypass" :
+ ((s.rgam_mode == 2) ? "RAM" :
+ "Unknown"),
+ (s.rgam_mode == 1) ? "B" : "A",
+ (s.gamut_remap.gamut_adjust_type == 0) ? "Bypass" :
+ ((s.gamut_remap.gamut_adjust_type == 1) ? "HW" :
+ "SW"),
+ s.gamut_remap.temperature_matrix[0].value,
+ s.gamut_remap.temperature_matrix[1].value,
+ s.gamut_remap.temperature_matrix[2].value,
+ s.gamut_remap.temperature_matrix[3].value,
+ s.gamut_remap.temperature_matrix[4].value,
+ s.gamut_remap.temperature_matrix[5].value,
+ s.gamut_remap.temperature_matrix[6].value,
+ s.gamut_remap.temperature_matrix[7].value,
+ s.gamut_remap.temperature_matrix[8].value,
+ s.gamut_remap.temperature_matrix[9].value,
+ s.gamut_remap.temperature_matrix[10].value,
+ s.gamut_remap.temperature_matrix[11].value);
+
+ }
+ DTN_INFO("\n");
+ DTN_INFO("MPC Color Caps: gamut_remap:%d, 3dlut:%d, ogam_ram:%d, ocsc:%d\n\n",
+ dc->caps.color.mpc.gamut_remap,
+ dc->caps.color.mpc.num_3dluts,
+ dc->caps.color.mpc.ogam_ram,
+ dc->caps.color.mpc.ocsc);
+}
+
bool dcn30_set_blend_lut(
struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state)
{
struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
bool result = true;
- struct pwl_params *blend_lut = NULL;
-
- if (plane_state->blend_tf) {
- if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
- blend_lut = &plane_state->blend_tf->pwl;
- else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm3_helper_translate_curve_to_hw_format(
- plane_state->blend_tf, &dpp_base->regamma_params, false);
- blend_lut = &dpp_base->regamma_params;
- }
+ const struct pwl_params *blend_lut = NULL;
+
+ if (plane_state->blend_tf.type == TF_TYPE_HWPWL)
+ blend_lut = &plane_state->blend_tf.pwl;
+ else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) {
+ result = cm3_helper_translate_curve_to_hw_format(
+ &plane_state->blend_tf, &dpp_base->regamma_params, false);
+ if (!result)
+ return result;
+
+ blend_lut = &dpp_base->regamma_params;
}
result = dpp_base->funcs->dpp_program_blnd_lut(dpp_base, blend_lut);
@@ -95,6 +255,7 @@ static bool dcn30_set_mpc_shaper_3dlut(struct pipe_ctx *pipe_ctx,
{
struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
bool result = false;
int acquired_rmu = 0;
@@ -133,8 +294,14 @@ static bool dcn30_set_mpc_shaper_3dlut(struct pipe_ctx *pipe_ctx,
result = mpc->funcs->program_3dlut(mpc, &stream->lut3d_func->lut_3d,
stream->lut3d_func->state.bits.rmu_mux_num);
+ if (!result)
+ DC_LOG_ERROR("%s: program_3dlut failed\n", __func__);
+
result = mpc->funcs->program_shaper(mpc, shaper_lut,
stream->lut3d_func->state.bits.rmu_mux_num);
+ if (!result)
+ DC_LOG_ERROR("%s: program_shaper failed\n", __func__);
+
} else {
// loop through the available mux and release the requested mpcc_id
mpc->funcs->release_rmu(mpc, mpcc_id);
@@ -151,27 +318,24 @@ bool dcn30_set_input_transfer_func(struct dc *dc,
struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
enum dc_transfer_func_predefined tf;
bool result = true;
- struct pwl_params *params = NULL;
+ const struct pwl_params *params = NULL;
if (dpp_base == NULL || plane_state == NULL)
return false;
tf = TRANSFER_FUNCTION_UNITY;
- if (plane_state->in_transfer_func &&
- plane_state->in_transfer_func->type == TF_TYPE_PREDEFINED)
- tf = plane_state->in_transfer_func->tf;
+ if (plane_state->in_transfer_func.type == TF_TYPE_PREDEFINED)
+ tf = plane_state->in_transfer_func.tf;
dpp_base->funcs->dpp_set_pre_degam(dpp_base, tf);
- if (plane_state->in_transfer_func) {
- if (plane_state->in_transfer_func->type == TF_TYPE_HWPWL)
- params = &plane_state->in_transfer_func->pwl;
- else if (plane_state->in_transfer_func->type == TF_TYPE_DISTRIBUTED_POINTS &&
- cm3_helper_translate_curve_to_hw_format(plane_state->in_transfer_func,
- &dpp_base->degamma_params, false))
- params = &dpp_base->degamma_params;
- }
+ if (plane_state->in_transfer_func.type == TF_TYPE_HWPWL)
+ params = &plane_state->in_transfer_func.pwl;
+ else if (plane_state->in_transfer_func.type == TF_TYPE_DISTRIBUTED_POINTS &&
+ cm3_helper_translate_curve_to_hw_format(&plane_state->in_transfer_func,
+ &dpp_base->degamma_params, false))
+ params = &dpp_base->degamma_params;
result = dpp_base->funcs->dpp_program_gamcor_lut(dpp_base, params);
@@ -186,35 +350,76 @@ bool dcn30_set_input_transfer_func(struct dc *dc,
return result;
}
+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx)
+{
+ int i = 0;
+ struct dpp_grph_csc_adjustment dpp_adjust;
+ struct mpc_grph_gamut_adjustment mpc_adjust;
+ int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+ struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
+
+ memset(&dpp_adjust, 0, sizeof(dpp_adjust));
+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+
+ if (pipe_ctx->plane_state &&
+ pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
+ dpp_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+ dpp_adjust.temperature_matrix[i] =
+ pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
+ }
+
+ pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp,
+ &dpp_adjust);
+
+ memset(&mpc_adjust, 0, sizeof(mpc_adjust));
+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+
+ if (pipe_ctx->top_pipe == NULL) {
+ if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+ mpc_adjust.temperature_matrix[i] =
+ pipe_ctx->stream->gamut_remap_matrix.matrix[i];
+ }
+ }
+
+ mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
+}
+
bool dcn30_set_output_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_stream_state *stream)
{
int mpcc_id = pipe_ctx->plane_res.hubp->inst;
struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
- struct pwl_params *params = NULL;
+ const struct pwl_params *params = NULL;
bool ret = false;
/* program OGAM or 3DLUT only for the top pipe*/
if (pipe_ctx->top_pipe == NULL) {
/*program rmu shaper and 3dlut in MPC*/
ret = dcn30_set_mpc_shaper_3dlut(pipe_ctx, stream);
- if (ret == false && mpc->funcs->set_output_gamma && stream->out_transfer_func) {
- if (stream->out_transfer_func->type == TF_TYPE_HWPWL)
- params = &stream->out_transfer_func->pwl;
- else if (pipe_ctx->stream->out_transfer_func->type ==
+ if (ret == false && mpc->funcs->set_output_gamma) {
+ if (stream->out_transfer_func.type == TF_TYPE_HWPWL)
+ params = &stream->out_transfer_func.pwl;
+ else if (pipe_ctx->stream->out_transfer_func.type ==
TF_TYPE_DISTRIBUTED_POINTS &&
cm3_helper_translate_curve_to_hw_format(
- stream->out_transfer_func,
+ &stream->out_transfer_func,
&mpc->blender_params, false))
params = &mpc->blender_params;
/* there are no ROM LUTs in OUTGAM */
- if (stream->out_transfer_func->type == TF_TYPE_PREDEFINED)
+ if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED)
BREAK_TO_DEBUGGER();
}
}
- mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
+ if (mpc->funcs->set_output_gamma)
+ mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
+ else
+ DC_LOG_ERROR("%s: set_output_gamma function pointer is NULL.\n", __func__);
+
return ret;
}
@@ -267,7 +472,7 @@ bool dcn30_mmhubbub_warmup(
struct mcif_wb *mcif_wb;
struct mcif_warmup_params warmup_params = {0};
unsigned int i, i_buf;
- /*make sure there is no active DWB eanbled */
+ /* make sure there is no active DWB enabled */
for (i = 0; i < num_dwb; i++) {
dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst];
if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) {
@@ -298,7 +503,6 @@ bool dcn30_mmhubbub_warmup(
}
/*following is the original: warmup each DWB's mcif buffer*/
for (i = 0; i < num_dwb; i++) {
- dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst];
mcif_wb = dc->res_pool->mcif_wb[wb_info[i].dwb_pipe_inst];
/*warmup is for VM mode only*/
if (wb_info[i].mcif_buf_params.p_vmid == 0)
@@ -330,6 +534,10 @@ void dcn30_enable_writeback(
DC_LOG_DWB("%s dwb_pipe_inst = %d, mpcc_inst = %d",\
__func__, wb_info->dwb_pipe_inst,\
wb_info->mpcc_inst);
+
+ /* Warmup interface */
+ dcn30_mmhubbub_warmup(dc, 1, wb_info);
+
/* Update writeback pipe */
dcn30_set_writeback(dc, wb_info, context);
@@ -435,8 +643,9 @@ void dcn30_init_hw(struct dc *dc)
int i;
int edp_num;
uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+ uint32_t user_level = MAX_BACKLIGHT_LEVEL;
- if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks)
dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
// Initialize the dccg
@@ -470,7 +679,7 @@ void dcn30_init_hw(struct dc *dc)
res_pool->ref_clocks.xtalin_clock_inKhz =
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
- if (res_pool->dccg && res_pool->hubbub) {
+ if (res_pool->hubbub) {
(res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
@@ -542,10 +751,10 @@ void dcn30_init_hw(struct dc *dc)
if (edp_link && edp_link->link_enc->funcs->is_dig_enabled &&
edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
dc->hwss.edp_backlight_control &&
- dc->hwss.power_down &&
+ hws->funcs.power_down &&
dc->hwss.edp_power_control) {
dc->hwss.edp_backlight_control(edp_link, false);
- dc->hwss.power_down(dc);
+ hws->funcs.power_down(dc);
dc->hwss.edp_power_control(edp_link, false);
} else {
for (i = 0; i < dc->link_count; i++) {
@@ -553,8 +762,8 @@ void dcn30_init_hw(struct dc *dc)
if (link->link_enc->funcs->is_dig_enabled &&
link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
- dc->hwss.power_down) {
- dc->hwss.power_down(dc);
+ hws->funcs.power_down) {
+ hws->funcs.power_down(dc);
break;
}
@@ -571,13 +780,15 @@ void dcn30_init_hw(struct dc *dc)
for (i = 0; i < dc->link_count; i++) {
struct dc_link *link = dc->links[i];
- if (link->panel_cntl)
+ if (link->panel_cntl) {
backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+ user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+ }
}
for (i = 0; i < dc->res_pool->pipe_count; i++) {
if (abms[i] != NULL)
- abms[i]->funcs->abm_init(abms[i], backlight);
+ abms[i]->funcs->abm_init(abms[i], backlight, user_level);
}
/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -595,11 +806,12 @@ void dcn30_init_hw(struct dc *dc)
if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
- if (dc->clk_mgr->funcs->notify_wm_ranges)
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges)
dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
//if softmax is enabled then hardmax will be set by a different call
- if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled)
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk &&
+ !dc->clk_mgr->dc_mode_softmax_enabled)
dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
@@ -611,7 +823,7 @@ void dcn30_init_hw(struct dc *dc)
// Get DMCUB capabilities
dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv);
dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
- dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
+ dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
}
void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
@@ -619,10 +831,20 @@ void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
if (pipe_ctx == NULL)
return;
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL)
+ if (dc_is_hdmi_signal(pipe_ctx->stream->signal) && pipe_ctx->stream_res.stream_enc != NULL) {
pipe_ctx->stream_res.stream_enc->funcs->set_avmute(
pipe_ctx->stream_res.stream_enc,
enable);
+
+ /* Wait for two frame to make sure AV mute is sent out */
+ if (enable && pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg)) {
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+ }
+ }
}
void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx)
@@ -687,7 +909,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
{
union dmub_rb_cmd cmd;
uint32_t tmr_delay = 0, tmr_scale = 0;
- struct dc_cursor_attributes cursor_attr;
+ struct dc_cursor_attributes cursor_attr = {0};
bool cursor_cache_enable = false;
struct dc_stream_state *stream = NULL;
struct dc_plane_state *plane = NULL;
@@ -713,7 +935,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_NO_DF_REQ;
cmd.mall.header.payload_bytes = sizeof(cmd.mall) - sizeof(cmd.mall.header);
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
return true;
}
@@ -721,6 +943,9 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
stream = dc->current_state->streams[0];
plane = (stream ? dc->current_state->stream_status[0].plane_states[0] : NULL);
+ if (!stream || !plane)
+ return false;
+
if (stream && plane) {
cursor_cache_enable = stream->cursor_position.enable &&
plane->address.grph.cursor_cache_addr.quad_part;
@@ -743,7 +968,8 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB8888 &&
plane->address.page_table_base.quad_part == 0 &&
dc->hwss.does_plane_fit_in_mall &&
- dc->hwss.does_plane_fit_in_mall(dc, plane,
+ dc->hwss.does_plane_fit_in_mall(dc, plane->plane_size.surface_pitch,
+ plane->plane_size.surface_size.height, plane->format,
cursor_cache_enable ? &cursor_attr : NULL)) {
unsigned int v_total = stream->adjust.v_total_max ?
stream->adjust.v_total_max : stream->timing.v_total;
@@ -835,11 +1061,11 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
cmd.mall.cursor_height = cursor_attr.height;
cmd.mall.cursor_pitch = cursor_attr.pitch;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
/* Use copied cursor, and it's okay to not switch back */
cursor_attr.address.quad_part = cmd.mall.cursor_copy_dst.quad_part;
- dc_stream_set_cursor_attributes(stream, &cursor_attr);
+ dc_stream_program_cursor_attributes(stream, &cursor_attr);
}
/* Enable MALL */
@@ -851,7 +1077,7 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
cmd.mall.tmr_scale = tmr_scale;
cmd.mall.debug_bits = dc->debug.mall_error_as_fatal;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
return true;
}
@@ -868,16 +1094,20 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
cmd.mall.header.payload_bytes =
sizeof(cmd.mall) - sizeof(cmd.mall.header);
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
-bool dcn30_does_plane_fit_in_mall(struct dc *dc, struct dc_plane_state *plane, struct dc_cursor_attributes *cursor_attr)
+bool dcn30_does_plane_fit_in_mall(struct dc *dc,
+ unsigned int pitch,
+ unsigned int height,
+ enum surface_pixel_format format,
+ struct dc_cursor_attributes *cursor_attr)
{
// add meta size?
- unsigned int surface_size = plane->plane_size.surface_pitch * plane->plane_size.surface_size.height *
- (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
+ unsigned int surface_size = pitch * height *
+ (format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
unsigned int mall_size = dc->caps.mall_size_total;
unsigned int cursor_size = 0;
@@ -925,7 +1155,7 @@ void dcn30_hardware_release(struct dc *dc)
if (!pipe->stream)
continue;
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ if (dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_MAIN) {
subvp_in_use = true;
break;
}
@@ -956,11 +1186,7 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc,
void dcn30_prepare_bandwidth(struct dc *dc,
struct dc_state *context)
{
- bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support;
- /* Any transition into an FPO config should disable MCLK switching first to avoid
- * driver and FW P-State synchronization issues.
- */
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) {
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !dc->clk_mgr->clks.fw_based_mclk_switching) {
dc->optimized_required = true;
context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
}
@@ -971,36 +1197,82 @@ void dcn30_prepare_bandwidth(struct dc *dc,
dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz);
dcn20_prepare_bandwidth(dc, context);
- /*
- * enabled -> enabled: do not disable
- * enabled -> disabled: disable
- * disabled -> enabled: don't care
- * disabled -> disabled: don't care
- */
- if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+
+ if (!dc->clk_mgr->clks.fw_based_mclk_switching)
dc_dmub_srv_p_state_delegate(dc, false, context);
+}
+
+void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx)
+{
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
+ bool pending_updates = false;
+ unsigned int i;
+
+ if (tg && tg->funcs->is_tg_enabled(tg)) {
+ // Poll for 100ms maximum
+ for (i = 0; i < 100000; i++) {
+ pending_updates = false;
+ if (tg->funcs->get_optc_double_buffer_pending)
+ pending_updates |= tg->funcs->get_optc_double_buffer_pending(tg);
+
+ if (tg->funcs->get_otg_double_buffer_pending)
+ pending_updates |= tg->funcs->get_otg_double_buffer_pending(tg);
+
+ if (tg->funcs->get_pipe_update_pending && pipe_ctx->plane_state)
+ pending_updates |= tg->funcs->get_pipe_update_pending(tg);
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) {
- /* After disabling P-State, restore the original value to ensure we get the correct P-State
- * on the next optimize. */
- context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support;
+ if (!pending_updates)
+ break;
+
+ udelay(1);
+ }
}
}
-void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
- int num_pipes, const struct dc_static_screen_params *params)
+void dcn30_get_underflow_debug_data(const struct dc *dc,
+ struct timing_generator *tg,
+ struct dc_underflow_debug_data *out_data)
{
- unsigned int i;
- unsigned int triggers = 0;
-
- if (params->triggers.surface_update)
- triggers |= 0x100;
- if (params->triggers.cursor_update)
- triggers |= 0x8;
- if (params->triggers.force_trigger)
- triggers |= 0x1;
-
- for (i = 0; i < num_pipes; i++)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(pipe_ctx[i]->stream_res.tg,
- triggers, params->num_frames);
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ if (tg) {
+ uint32_t v_blank_start = 0, v_blank_end = 0;
+
+ out_data->otg_inst = tg->inst;
+
+ tg->funcs->get_scanoutpos(tg,
+ &v_blank_start,
+ &v_blank_end,
+ &out_data->h_position,
+ &out_data->v_position);
+
+ out_data->otg_frame_count = tg->funcs->get_frame_count(tg);
+
+ out_data->otg_underflow = tg->funcs->is_optc_underflow_occurred(tg);
+ }
+
+ for (int i = 0; i < MAX_PIPES; i++) {
+ struct hubp *hubp = dc->res_pool->hubps[i];
+
+ if (hubp) {
+ if (hubp->funcs->hubp_get_underflow_status)
+ out_data->hubps[i].hubp_underflow = hubp->funcs->hubp_get_underflow_status(hubp);
+
+ if (hubp->funcs->hubp_in_blank)
+ out_data->hubps[i].hubp_in_blank = hubp->funcs->hubp_in_blank(hubp);
+
+ if (hubp->funcs->hubp_get_current_read_line)
+ out_data->hubps[i].hubp_readline = hubp->funcs->hubp_get_current_read_line(hubp);
+
+ if (hubp->funcs->hubp_get_det_config_error)
+ out_data->hubps[i].det_config_error = hubp->funcs->hubp_get_det_config_error(hubp);
+ }
+ }
+
+ if (hubbub->funcs->get_det_sizes)
+ hubbub->funcs->get_det_sizes(hubbub, out_data->curr_det_sizes, out_data->target_det_sizes);
+
+ if (hubbub->funcs->compbuf_config_error)
+ out_data->compbuf_config_error = hubbub->funcs->compbuf_config_error(hubbub);
+
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h
index ce19c54097f8..40afbbfb5b9c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h
@@ -27,8 +27,9 @@
#define __DC_HWSS_DCN30_H__
#include "hw_sequencer_private.h"
-#include "dcn20/dcn20_hwseq.h"
+
struct dc;
+struct dc_underflow_debug_data;
void dcn30_init_hw(struct dc *dc);
void dcn30_program_all_writeback_pipes_in_tree(
@@ -52,12 +53,18 @@ bool dcn30_mmhubbub_warmup(
unsigned int num_dwb,
struct dc_writeback_info *wb_info);
+void dcn30_log_color_state(struct dc *dc,
+ struct dc_log_buffer_ctx *log_ctx);
+
bool dcn30_set_blend_lut(struct pipe_ctx *pipe_ctx,
const struct dc_plane_state *plane_state);
bool dcn30_set_input_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_plane_state *plane_state);
+
+void dcn30_program_gamut_remap(struct pipe_ctx *pipe_ctx);
+
bool dcn30_set_output_transfer_func(struct dc *dc,
struct pipe_ctx *pipe_ctx,
const struct dc_stream_state *stream);
@@ -65,7 +72,10 @@ void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable);
void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx);
void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx);
-bool dcn30_does_plane_fit_in_mall(struct dc *dc, struct dc_plane_state *plane,
+bool dcn30_does_plane_fit_in_mall(struct dc *dc,
+ unsigned int pitch,
+ unsigned int height,
+ enum surface_pixel_format format,
struct dc_cursor_attributes *cursor_attr);
bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable);
@@ -87,7 +97,10 @@ void dcn30_set_hubp_blank(const struct dc *dc,
void dcn30_prepare_bandwidth(struct dc *dc,
struct dc_state *context);
-void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
- int num_pipes, const struct dc_static_screen_params *params);
+void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx);
+
+void dcn30_get_underflow_debug_data(const struct dc *dc,
+ struct timing_generator *tg,
+ struct dc_underflow_debug_data *out_data);
#endif /* __DC_HWSS_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
index 0de8b2783cf6..d7ff55669bac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
@@ -23,20 +23,21 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn21/dcn21_hwseq.h"
-#include "dcn30_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
#include "dcn30_init.h"
static const struct hw_sequencer_funcs dcn30_funcs = {
- .program_gamut_remap = dcn10_program_gamut_remap,
+ .program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn30_init_hw,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
@@ -64,10 +65,11 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn30_set_static_screen_control,
+ .set_static_screen_control = dcn10_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
+ .log_color_state = dcn30_log_color_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
@@ -85,7 +87,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -107,12 +108,13 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
.set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
.get_dcc_en_bits = dcn10_get_dcc_en_bits,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
- .is_abm_supported = dcn21_is_abm_supported
+ .is_abm_supported = dcn21_is_abm_supported,
+ .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates,
+ .get_underflow_debug_data = dcn30_get_underflow_debug_data,
};
static const struct hwseq_private_funcs dcn30_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.set_input_transfer_func = dcn30_set_input_transfer_func,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
index c280ff90bfa3..c280ff90bfa3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.c
index 10bedb2ea62a..10bedb2ea62a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.h
index aa3df3f77108..aa3df3f77108 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_hwseq.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
index 61205cdbe2d5..8d7ceb7b32b8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
@@ -23,22 +23,23 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn21/dcn21_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dcn301_hwseq.h"
+#include "dcn301/dcn301_hwseq.h"
#include "dcn301_init.h"
static const struct hw_sequencer_funcs dcn301_funcs = {
- .program_gamut_remap = dcn10_program_gamut_remap,
+ .program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn10_init_hw,
.power_down_on_boot = dcn10_power_down_on_boot,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
@@ -53,9 +54,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
.enable_stream = dcn20_enable_stream,
.disable_stream = dce110_disable_stream,
.unblank_stream = dcn20_unblank_stream,
-#ifdef FREESYNC_POWER_OPTIMIZE
- .are_streams_coarse_grain_aligned = dcn20_are_streams_coarse_grain_aligned,
-#endif
.blank_stream = dce110_blank_stream,
.enable_audio_stream = dce110_enable_audio_stream,
.disable_audio_stream = dce110_disable_audio_stream,
@@ -72,6 +70,7 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
+ .log_color_state = dcn30_log_color_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
@@ -88,7 +87,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -109,11 +107,11 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
.optimize_pwr_state = dcn21_optimize_pwr_state,
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
+ .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates,
};
static const struct hwseq_private_funcs dcn301_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.set_input_transfer_func = dcn30_set_input_transfer_func,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
index 0bca48ccbfa2..a6e0115a53ee 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.h
@@ -23,8 +23,8 @@
*
*/
-#ifndef __DC_DCN30_INIT_H__
-#define __DC_DCN30_INIT_H__
+#ifndef __DC_DCN301_INIT_H__
+#define __DC_DCN301_INIT_H__
struct dc;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.c
index 0a6d58dd8f6d..0a6d58dd8f6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.h
index 1e5126a0e695..1e5126a0e695 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_hwseq.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
index eb375f30f5bc..637f9514d37b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.c
@@ -23,7 +23,7 @@
*
*/
-#include "dcn302_hwseq.h"
+#include "dcn302/dcn302_hwseq.h"
#include "dcn30/dcn30_init.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
index 899587b93aa1..899587b93aa1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn302/dcn302_init.h
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.c
index e3b250918f39..3bc56ac346f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.c
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
+// SPDX-License-Identifier: MIT
/*
- * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,52 +19,46 @@
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
*/
-#include <linux/types.h>
-#include "kfd_priv.h"
-#include "amdgpu_ids.h"
+#include "dcn303_hwseq.h"
-static unsigned int pasid_bits = 16;
-static bool pasids_allocated; /* = false */
+#include "dce/dce_hwseq.h"
-bool kfd_set_pasid_limit(unsigned int new_limit)
-{
- if (new_limit < 2)
- return false;
+#include "reg_helper.h"
+#include "dc.h"
- if (new_limit < (1U << pasid_bits)) {
- if (pasids_allocated)
- /* We've already allocated user PASIDs, too late to
- * change the limit
- */
- return false;
+#define DC_LOGGER_INIT(logger)
- while (new_limit < (1U << pasid_bits))
- pasid_bits--;
- }
+#define CTX \
+ hws->ctx
+#define REG(reg)\
+ hws->regs->reg
- return true;
-}
+#undef FN
+#define FN(reg_name, field_name) \
+ hws->shifts->field_name, hws->masks->field_name
-unsigned int kfd_get_pasid_limit(void)
+
+void dcn303_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on)
{
- return 1U << pasid_bits;
+ /*DCN303 removes PG registers*/
}
-u32 kfd_pasid_alloc(void)
+void dcn303_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
{
- int r = amdgpu_pasid_alloc(pasid_bits);
-
- if (r > 0) {
- pasids_allocated = true;
- return r;
- }
+ /*DCN303 removes PG registers*/
+}
- return 0;
+void dcn303_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool power_on)
+{
+ /*DCN303 removes PG registers*/
}
-void kfd_pasid_free(u32 pasid)
+void dcn303_enable_power_gating_plane(struct dce_hwseq *hws, bool enable)
{
- amdgpu_pasid_free(pasid);
+ /*DCN303 removes PG registers*/
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.h
new file mode 100644
index 000000000000..7fdfc4175f80
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_hwseq.h
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HWSS_DCN303_H__
+#define __DC_HWSS_DCN303_H__
+
+#include "hw_sequencer_private.h"
+
+void dcn303_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on);
+void dcn303_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on);
+void dcn303_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool power_on);
+void dcn303_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
+
+#endif /* __DC_HWSS_DCN303_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
new file mode 100644
index 000000000000..edb4d68b8187
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ */
+
+#include "dcn303/dcn303_hwseq.h"
+#include "dcn30/dcn30_init.h"
+#include "dc.h"
+
+#include "dcn303_init.h"
+
+void dcn303_hw_sequencer_construct(struct dc *dc)
+{
+ dcn30_hw_sequencer_construct(dc);
+
+ dc->hwseq->funcs.dpp_pg_control = dcn303_dpp_pg_control;
+ dc->hwseq->funcs.hubp_pg_control = dcn303_hubp_pg_control;
+ dc->hwseq->funcs.dsc_pg_control = dcn303_dsc_pg_control;
+ dc->hwseq->funcs.enable_power_gating_plane = dcn303_enable_power_gating_plane;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
new file mode 100644
index 000000000000..4949981126d7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn303/dcn303_init.h
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ */
+
+#ifndef __DC_DCN303_INIT_H__
+#define __DC_DCN303_INIT_H__
+
+struct dc;
+
+void dcn303_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN303_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
index 2a7f47642a44..b822f2dffff0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c
@@ -45,11 +45,13 @@
#include "link_hwss.h"
#include "dpcd_defs.h"
#include "dce/dmub_outbox.h"
-#include "link.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "link_service.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
#include "inc/link_enc_cfg.h"
#include "dcn30/dcn30_vpg.h"
#include "dce/dce_i2c_hw.h"
+#include "dce/dmub_abm_lcd.h"
#define DC_LOGGER_INIT(logger)
@@ -96,11 +98,10 @@ static void enable_memory_low_power(struct dc *dc)
if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
// Power down VPGs
for (i = 0; i < dc->res_pool->stream_enc_count; i++)
- dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
-#if defined(CONFIG_DRM_AMD_DC_FP)
+ if (dc->res_pool->stream_enc[i]->vpg)
+ dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
-#endif
}
}
@@ -112,6 +113,7 @@ void dcn31_init_hw(struct dc *dc)
struct dc_bios *dcb = dc->ctx->dc_bios;
struct resource_pool *res_pool = dc->res_pool;
uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+ uint32_t user_level = MAX_BACKLIGHT_LEVEL;
int i;
if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
@@ -132,7 +134,7 @@ void dcn31_init_hw(struct dc *dc)
res_pool->ref_clocks.xtalin_clock_inKhz =
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
- if (res_pool->dccg && res_pool->hubbub) {
+ if (res_pool->hubbub) {
(res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
@@ -223,13 +225,15 @@ void dcn31_init_hw(struct dc *dc)
for (i = 0; i < dc->link_count; i++) {
struct dc_link *link = dc->links[i];
- if (link->panel_cntl)
+ if (link->panel_cntl) {
backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+ user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+ }
}
for (i = 0; i < dc->res_pool->pipe_count; i++) {
if (abms[i] != NULL)
- abms[i]->funcs->abm_init(abms[i], backlight);
+ abms[i]->funcs->abm_init(abms[i], backlight, user_level);
}
/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -254,10 +258,10 @@ void dcn31_init_hw(struct dc *dc)
if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
- if (dc->clk_mgr->funcs->notify_wm_ranges)
+ if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges)
dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
- if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled)
+ if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled)
dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
@@ -271,7 +275,7 @@ void dcn31_init_hw(struct dc *dc)
// Get DMCUB capabilities
dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv);
dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
- dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
+ dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
}
void dcn31_dsc_pg_control(
@@ -392,6 +396,11 @@ void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx)
pipe_ctx->stream_res.stream_enc,
&pipe_ctx->stream_res.encoder_info_frame);
else if (pipe_ctx->stream->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
+ if (pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->update_dp_info_packets_sdp_line_num)
+ pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->update_dp_info_packets_sdp_line_num(
+ pipe_ctx->stream_res.hpo_dp_stream_enc,
+ &pipe_ctx->stream_res.encoder_info_frame);
+
pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->update_dp_info_packets(
pipe_ctx->stream_res.hpo_dp_stream_enc,
&pipe_ctx->stream_res.encoder_info_frame);
@@ -415,7 +424,7 @@ void dcn31_z10_save_init(struct dc *dc)
cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
void dcn31_z10_restore(const struct dc *dc)
@@ -433,7 +442,7 @@ void dcn31_z10_restore(const struct dc *dc)
cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT;
cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_RESTORE;
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
@@ -477,7 +486,7 @@ void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool p
int dcn31_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config)
{
- struct dcn_hubbub_phys_addr_config config;
+ struct dcn_hubbub_phys_addr_config config = {0};
config.system_aperture.fb_top = pa_config->system_aperture.fb_top;
config.system_aperture.fb_offset = pa_config->system_aperture.fb_offset;
@@ -515,21 +524,33 @@ static void dcn31_reset_back_end_for_pipe(
dc->hwss.set_abm_immediate_disable(pipe_ctx);
+ link = pipe_ctx->stream->link;
+
+ if (dc->hwseq)
+ dc->hwseq->wa_state.skip_blank_stream = false;
+
+ if ((!pipe_ctx->stream->dpms_off || link->link_status.link_active) &&
+ (link->connector_signal == SIGNAL_TYPE_EDP)) {
+ dc->hwss.blank_stream(pipe_ctx);
+ if (dc->hwseq)
+ dc->hwseq->wa_state.skip_blank_stream = true;
+ }
+
pipe_ctx->stream_res.tg->funcs->set_dsc_config(
pipe_ctx->stream_res.tg,
OPTC_DSC_DISABLED, 0, 0);
+
pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
+
pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false);
if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass)
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
+ if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+ pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0;
- if (pipe_ctx->stream_res.tg->funcs->set_drr)
- pipe_ctx->stream_res.tg->funcs->set_drr(
- pipe_ctx->stream_res.tg, NULL);
+ set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL);
- link = pipe_ctx->stream->link;
/* DPMS may already disable or */
/* dpms_off status is incorrect due to fastboot
* feature. When system resume from S4 with second
@@ -541,6 +562,19 @@ static void dcn31_reset_back_end_for_pipe(
else if (pipe_ctx->stream_res.audio)
dc->hwss.disable_audio_stream(pipe_ctx);
+ /* Temporary workaround to perform DSC programming ahead of pipe reset
+ * for smartmux/SPRS
+ * TODO: Remove SmartMux/SPRS checks once movement of DSC programming is generalized
+ */
+ if (pipe_ctx->stream->timing.flags.DSC) {
+ if ((pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
+ ((link->dc->config.smart_mux_version && link->dc->is_switch_in_progress_dest)
+ || link->is_dds || link->skip_implict_edp_power_control)) &&
+ (dc_is_dp_signal(pipe_ctx->stream->signal) ||
+ dc_is_virtual_signal(pipe_ctx->stream->signal)))
+ dc->link_srv->set_dsc_enable(pipe_ctx, false);
+ }
+
/* free acquired resources */
if (pipe_ctx->stream_res.audio) {
/*disable az_endpoint*/
@@ -555,7 +589,8 @@ static void dcn31_reset_back_end_for_pipe(
pipe_ctx->stream_res.audio = NULL;
}
}
-
+ if (dc->hwseq)
+ dc->hwseq->wa_state.skip_blank_stream = false;
pipe_ctx->stream = NULL;
DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n",
pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
@@ -584,6 +619,17 @@ void dcn31_reset_hw_ctx_wrap(
pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
struct clock_source *old_clk = pipe_ctx_old->clock_source;
+ /* Reset pipe which is seamless boot stream. */
+ if (!pipe_ctx_old->plane_state &&
+ dc->res_pool->hubbub->funcs->program_det_size &&
+ dc->res_pool->hubbub->funcs->wait_for_det_apply) {
+ dc->res_pool->hubbub->funcs->program_det_size(
+ dc->res_pool->hubbub, pipe_ctx_old->plane_res.hubp->inst, 0);
+ /* Wait det size changed. */
+ dc->res_pool->hubbub->funcs->wait_for_det_apply(
+ dc->res_pool->hubbub, pipe_ctx_old->plane_res.hubp->inst);
+ }
+
dcn31_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
if (hws->funcs.enable_stream_gating)
hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
@@ -593,7 +639,8 @@ void dcn31_reset_hw_ctx_wrap(
}
/* New dc_state in the process of being applied to hardware. */
- link_enc_cfg_set_transient_mode(dc, dc->current_state, context);
+ if (!dc->config.unify_link_enc_assignment)
+ link_enc_cfg_set_transient_mode(dc, dc->current_state, context);
}
void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
@@ -601,3 +648,69 @@ void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
if (hws->ctx->dc->debug.hpo_optimization)
REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable);
}
+
+void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params)
+{
+ unsigned int i;
+ unsigned int triggers = 0;
+
+ if (params->triggers.surface_update)
+ triggers |= 0x100;
+ if (params->triggers.cursor_update)
+ triggers |= 0x8;
+ if (params->triggers.force_trigger)
+ triggers |= 0x1;
+
+ for (i = 0; i < num_pipes; i++)
+ pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+ triggers, params->num_frames);
+}
+
+static void dmub_abm_set_backlight(struct dc_context *dc,
+ struct set_backlight_level_params *backlight_level_params, uint32_t panel_inst)
+{
+ union dmub_rb_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.abm_set_backlight.header.type = DMUB_CMD__ABM;
+ cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT;
+ cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = backlight_level_params->frame_ramp;
+ cmd.abm_set_backlight.abm_set_backlight_data.backlight_user_level = backlight_level_params->backlight_pwm_u16_16;
+ cmd.abm_set_backlight.abm_set_backlight_data.backlight_control_type =
+ (enum dmub_backlight_control_type) backlight_level_params->control_type;
+ cmd.abm_set_backlight.abm_set_backlight_data.min_luminance = backlight_level_params->min_luminance;
+ cmd.abm_set_backlight.abm_set_backlight_data.max_luminance = backlight_level_params->max_luminance;
+ cmd.abm_set_backlight.abm_set_backlight_data.min_backlight_pwm = backlight_level_params->min_backlight_pwm;
+ cmd.abm_set_backlight.abm_set_backlight_data.max_backlight_pwm = backlight_level_params->max_backlight_pwm;
+ cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1;
+ cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst);
+ cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data);
+
+ dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+bool dcn31_set_backlight_level(struct pipe_ctx *pipe_ctx,
+ struct set_backlight_level_params *backlight_level_params)
+{
+ struct dc_context *dc = pipe_ctx->stream->ctx;
+ struct abm *abm = pipe_ctx->stream_res.abm;
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
+ struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl;
+ uint32_t otg_inst;
+
+ if (!abm || !tg || !panel_cntl)
+ return false;
+
+ otg_inst = tg->inst;
+
+ dcn21_dmub_abm_set_pipe(abm,
+ otg_inst,
+ SET_ABM_PIPE_NORMAL,
+ panel_cntl->inst,
+ panel_cntl->pwrseq_inst);
+
+ dmub_abm_set_backlight(dc, backlight_level_params, panel_cntl->inst);
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h
index edfc01d6ad73..0d09aa8cfb65 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h
@@ -51,9 +51,14 @@ int dcn31_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_
void dcn31_reset_hw_ctx_wrap(
struct dc *dc,
struct dc_state *context);
+bool dcn31_set_backlight_level(struct pipe_ctx *pipe_ctx,
+ struct set_backlight_level_params *params);
bool dcn31_is_abm_supported(struct dc *dc,
struct dc_state *context, struct dc_stream_state *stream);
void dcn31_init_pipes(struct dc *dc, struct dc_state *context);
void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable);
+void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params);
+
#endif /* __DC_HWSS_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
index 1d7bc1e39afe..5a6a459da224 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
@@ -23,8 +23,8 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn21/dcn21_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
@@ -34,12 +34,13 @@
#include "dcn31_init.h"
static const struct hw_sequencer_funcs dcn31_funcs = {
- .program_gamut_remap = dcn10_program_gamut_remap,
+ .program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn31_init_hw,
.power_down_on_boot = dcn10_power_down_on_boot,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
@@ -67,10 +68,11 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn30_set_static_screen_control,
+ .set_static_screen_control = dcn31_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
+ .log_color_state = dcn30_log_color_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
@@ -88,7 +90,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -97,7 +98,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.set_flip_control_gsl = dcn20_set_flip_control_gsl,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
- .power_down = dce110_power_down,
.set_backlight_level = dcn21_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.set_pipe = dcn21_set_pipe,
@@ -111,11 +111,12 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.optimize_pwr_state = dcn21_optimize_pwr_state,
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
+ .setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
+ .get_underflow_debug_data = dcn30_get_underflow_debug_data,
};
static const struct hwseq_private_funcs dcn31_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.set_input_transfer_func = dcn30_set_input_transfer_func,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
index a3db08c8bd35..a3db08c8bd35 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
index 4d2820ffe468..f925f669f2a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
@@ -46,8 +46,8 @@
#include "link_hwss.h"
#include "dpcd_defs.h"
#include "dce/dmub_outbox.h"
-#include "link.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "link_service.h"
+#include "dcn10/dcn10_hwseq.h"
#include "inc/link_enc_cfg.h"
#include "dcn30/dcn30_vpg.h"
#include "dce/dce_i2c_hw.h"
@@ -62,36 +62,13 @@
#define REG(reg)\
hws->regs->reg
#define DC_LOGGER \
- dc->ctx->logger
+ stream->ctx->logger
#undef FN
#define FN(reg_name, field_name) \
hws->shifts->field_name, hws->masks->field_name
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
@@ -105,8 +82,22 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
if (enable) {
struct dsc_config dsc_cfg;
- struct dsc_optc_config dsc_optc_cfg;
+ struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
+ struct dcn_dsc_state dsc_state = {0};
+
+ if (!dsc) {
+ DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+
+ if (dsc->funcs->dsc_read_state) {
+ dsc->funcs->dsc_read_state(dsc, &dsc_state);
+ if (!dsc_state.dsc_fw_en) {
+ DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+ }
/* Enable DSC hw block */
dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
@@ -183,8 +174,8 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
+ int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
+ int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
struct mpc *mpc = dc->res_pool->mpc;
int i;
@@ -194,22 +185,18 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
pipe_ctx->stream_res.tg->funcs->set_odm_combine(
pipe_ctx->stream_res.tg,
opp_inst, opp_cnt,
- &pipe_ctx->stream->timing);
+ odm_slice_width, last_odm_slice_width);
else
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
if (mpc->funcs->set_out_rate_control) {
for (i = 0; i < opp_cnt; ++i) {
mpc->funcs->set_out_rate_control(
mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
+ false,
+ 0,
+ NULL);
}
}
@@ -343,7 +330,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
unsigned int odm_combine_factor = 0;
bool two_pix_per_container = false;
- two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
+ two_pix_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing);
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
if (stream->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
@@ -373,36 +360,62 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
return odm_combine_factor;
}
-void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx)
+void dcn314_calculate_pix_rate_divider(
+ struct dc *dc,
+ struct dc_state *context,
+ const struct dc_stream_state *stream)
{
- uint32_t pix_per_cycle = 1;
- uint32_t odm_combine_factor = 1;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct pipe_ctx *pipe_ctx = NULL;
+ unsigned int k1_div = PIXEL_RATE_DIV_NA;
+ unsigned int k2_div = PIXEL_RATE_DIV_NA;
- if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc)
- return;
+ pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
- odm_combine_factor = get_odm_config(pipe_ctx, NULL);
- if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1)
- pix_per_cycle = 2;
+ if (pipe_ctx) {
+ if (hws->funcs.calculate_dccg_k1_k2_values)
+ hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div);
+
+ pipe_ctx->pixel_rate_divider.div_factor1 = k1_div;
+ pipe_ctx->pixel_rate_divider.div_factor2 = k2_div;
+ }
+}
- if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
- pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
- pix_per_cycle);
+static bool dcn314_is_pipe_dig_fifo_on(struct pipe_ctx *pipe)
+{
+ return pipe && pipe->stream
+ // Check dig's otg instance.
+ && pipe->stream_res.stream_enc
+ && pipe->stream_res.stream_enc->funcs->dig_source_otg
+ && pipe->stream_res.tg->inst == pipe->stream_res.stream_enc->funcs->dig_source_otg(pipe->stream_res.stream_enc)
+ && pipe->stream->link && pipe->stream->link->link_enc
+ && pipe->stream->link->link_enc->funcs->is_dig_enabled
+ && pipe->stream->link->link_enc->funcs->is_dig_enabled(pipe->stream->link->link_enc)
+ && pipe->stream_res.stream_enc->funcs->is_fifo_enabled
+ && pipe->stream_res.stream_enc->funcs->is_fifo_enabled(pipe->stream_res.stream_enc);
}
-void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context)
+void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx)
{
unsigned int i;
struct pipe_ctx *pipe = NULL;
bool otg_disabled[MAX_PIPES] = {false};
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (i <= current_pipe_idx) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ } else {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ }
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
- if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) &&
+ !pipe->stream->apply_seamless_boot_optimization &&
+ !pipe->stream->apply_edp_fast_boot_optimization) {
+ if (dcn314_is_pipe_dig_fifo_on(pipe))
+ continue;
pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg);
reset_sync_context_for_pipe(dc, context, i);
otg_disabled[i] = true;
@@ -412,10 +425,30 @@ void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc
hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-
- if (otg_disabled[i])
+ if (i <= current_pipe_idx)
+ pipe = &context->res_ctx.pipe_ctx[i];
+ else
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (otg_disabled[i]) {
+ int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst };
+ int opp_cnt = 1;
+ int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe, true);
+ int odm_slice_width = resource_get_odm_slice_dst_width(pipe, false);
+ struct pipe_ctx *odm_pipe;
+
+ for (odm_pipe = pipe->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+ opp_inst[opp_cnt] = odm_pipe->stream_res.opp->inst;
+ opp_cnt++;
+ }
+ if (opp_cnt > 1)
+ pipe->stream_res.tg->funcs->set_odm_combine(
+ pipe->stream_res.tg,
+ opp_inst, opp_cnt,
+ odm_slice_width,
+ last_odm_slice_width);
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
+ }
}
}
@@ -476,7 +509,8 @@ void dcn314_disable_link_output(struct dc_link *link,
struct dmcu *dmcu = dc->res_pool->dmcu;
if (signal == SIGNAL_TYPE_EDP &&
- link->dc->hwss.edp_backlight_control)
+ link->dc->hwss.edp_backlight_control &&
+ !link->skip_implict_edp_power_control)
link->dc->hwss.edp_backlight_control(link, false);
else if (dmcu != NULL && dmcu->funcs->lock_phy)
dmcu->funcs->lock_phy(dmcu);
@@ -488,9 +522,81 @@ void dcn314_disable_link_output(struct dc_link *link,
* from enable/disable link output and only call edp panel control
* in enable_link_dp and disable_link_dp once.
*/
- if (dmcu != NULL && dmcu->funcs->lock_phy)
+ if (dmcu != NULL && dmcu->funcs->unlock_phy)
dmcu->funcs->unlock_phy(dmcu);
dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
apply_symclk_on_tx_off_wa(link);
}
+
+/**
+ * dcn314_dpp_pg_control - DPP power gate control.
+ *
+ * @hws: dce_hwseq reference.
+ * @dpp_inst: DPP instance reference.
+ * @power_on: true if we want to enable power gate, false otherwise.
+ *
+ * Enable or disable power gate in the specific DPP instance.
+ * If power gating is disabled, will force disable cursor in the DPP instance.
+ */
+void dcn314_dpp_pg_control(
+ struct dce_hwseq *hws,
+ unsigned int dpp_inst,
+ bool power_on)
+{
+ uint32_t power_gate = power_on ? 0 : 1;
+ uint32_t pwr_status = power_on ? 0 : 2;
+
+
+ if (hws->ctx->dc->debug.disable_dpp_power_gate) {
+ /* Workaround for DCN314 with disabled power gating */
+ if (!power_on) {
+
+ /* Force disable cursor if power gating is disabled */
+ struct dpp *dpp = hws->ctx->dc->res_pool->dpps[dpp_inst];
+ if (dpp && dpp->funcs->dpp_force_disable_cursor)
+ dpp->funcs->dpp_force_disable_cursor(dpp);
+ }
+ return;
+ }
+ if (REG(DOMAIN1_PG_CONFIG) == 0)
+ return;
+
+ switch (dpp_inst) {
+ case 0: /* DPP0 */
+ REG_UPDATE(DOMAIN1_PG_CONFIG,
+ DOMAIN1_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN1_PG_STATUS,
+ DOMAIN1_PGFSM_PWR_STATUS, pwr_status,
+ 1, 1000);
+ break;
+ case 1: /* DPP1 */
+ REG_UPDATE(DOMAIN3_PG_CONFIG,
+ DOMAIN3_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN3_PG_STATUS,
+ DOMAIN3_PGFSM_PWR_STATUS, pwr_status,
+ 1, 1000);
+ break;
+ case 2: /* DPP2 */
+ REG_UPDATE(DOMAIN5_PG_CONFIG,
+ DOMAIN5_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN5_PG_STATUS,
+ DOMAIN5_PGFSM_PWR_STATUS, pwr_status,
+ 1, 1000);
+ break;
+ case 3: /* DPP3 */
+ REG_UPDATE(DOMAIN7_PG_CONFIG,
+ DOMAIN7_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN7_PG_STATUS,
+ DOMAIN7_PGFSM_PWR_STATUS, pwr_status,
+ 1, 1000);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h
index eafcc4ea6d24..6c072d0274ea 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h
@@ -39,12 +39,14 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div);
-void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx);
+void dcn314_calculate_pix_rate_divider(struct dc *dc, struct dc_state *context, const struct dc_stream_state *stream);
-void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context);
+void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx);
void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on);
void dcn314_disable_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal);
+void dcn314_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on);
+
#endif /* __DC_HWSS_DCN314_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
index 4ef85c3a0688..79faab1125d4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
@@ -24,8 +24,8 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn21/dcn21_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
@@ -36,12 +36,13 @@
#include "dcn314_init.h"
static const struct hw_sequencer_funcs dcn314_funcs = {
- .program_gamut_remap = dcn10_program_gamut_remap,
+ .program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn31_init_hw,
.power_down_on_boot = dcn10_power_down_on_boot,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
@@ -69,11 +70,12 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn30_set_static_screen_control,
+ .set_static_screen_control = dcn31_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
.get_hw_state = dcn10_get_hw_state,
+ .log_color_state = dcn30_log_color_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
.edp_backlight_control = dce110_edp_backlight_control,
@@ -90,7 +92,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -99,7 +100,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.set_flip_control_gsl = dcn20_set_flip_control_gsl,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
- .power_down = dce110_power_down,
.set_backlight_level = dcn21_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.set_pipe = dcn21_set_pipe,
@@ -113,11 +113,13 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.optimize_pwr_state = dcn21_optimize_pwr_state,
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
+ .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider,
+ .setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
+ .get_underflow_debug_data = dcn30_get_underflow_debug_data,
};
static const struct hwseq_private_funcs dcn314_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.set_input_transfer_func = dcn30_set_input_transfer_func,
@@ -140,6 +142,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = {
.enable_power_gating_plane = dcn314_enable_power_gating_plane,
.dpp_root_clock_control = dcn314_dpp_root_clock_control,
.hubp_pg_control = dcn31_hubp_pg_control,
+ .dpp_pg_control = dcn314_dpp_pg_control,
.program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
.update_odm = dcn314_update_odm,
.dsc_pg_control = dcn314_dsc_pg_control,
@@ -151,7 +154,6 @@ static const struct hwseq_private_funcs dcn314_private_funcs = {
.set_shaper_3dlut = dcn20_set_shaper_3dlut,
.setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values,
- .set_pixels_per_cycle = dcn314_set_pixels_per_cycle,
.resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
index 8f92e66577cf..8f92e66577cf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index 680e7fa8d18a..f39292952702 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -48,8 +48,10 @@
#include "dsc.h"
#include "dcn20/dcn20_optc.h"
#include "dce/dmub_hw_lock_mgr.h"
-#include "dcn32_resource.h"
-#include "link.h"
+#include "dcn32/dcn32_resource.h"
+#include "link_service.h"
+#include "../dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
#define DC_LOGGER_INIT(logger)
@@ -58,8 +60,7 @@
#define REG(reg)\
hws->regs->reg
#define DC_LOGGER \
- dc->ctx->logger
-
+ dc->ctx->logger
#undef FN
#define FN(reg_name, field_name) \
@@ -73,14 +74,19 @@ void dcn32_dsc_pg_control(
uint32_t power_gate = power_on ? 0 : 1;
uint32_t pwr_status = power_on ? 0 : 2;
uint32_t org_ip_request_cntl = 0;
+ struct dc *dc = hws->ctx->dc;
+
+ if (dc->debug.disable_dsc_power_gate)
+ return;
- if (hws->ctx->dc->debug.disable_dsc_power_gate)
+ if (!dc->debug.enable_double_buffered_dsc_pg_support)
return;
REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
if (org_ip_request_cntl == 0)
REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
+ DC_LOG_DSC("%s DSC power gate for inst %d", power_gate ? "enable" : "disable", dsc_inst);
switch (dsc_inst) {
case 0: /* DSC0 */
REG_UPDATE(DOMAIN16_PG_CONFIG,
@@ -214,7 +220,7 @@ static bool dcn32_check_no_memory_request_for_cab(struct dc *dc)
static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *ctx)
{
int i;
- uint8_t num_ways = 0;
+ uint32_t num_ways = 0;
uint32_t mall_ss_size_bytes = 0;
mall_ss_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_size_bytes;
@@ -234,8 +240,10 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
// Convert number of cache lines required to number of ways
if (dc->debug.force_mall_ss_num_ways > 0) {
num_ways = dc->debug.force_mall_ss_num_ways;
+ } else if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) {
+ num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, mall_ss_size_bytes);
} else {
- num_ways = dcn32_helper_mall_bytes_to_ways(dc, mall_ss_size_bytes);
+ num_ways = 0;
}
return num_ways;
@@ -244,7 +252,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
{
union dmub_rb_cmd cmd;
- uint8_t ways, i;
+ uint8_t i;
+ uint32_t ways;
int j;
bool mall_ss_unsupported = false;
struct dc_plane_state *plane = NULL;
@@ -255,63 +264,64 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
for (i = 0; i < dc->current_state->stream_count; i++) {
/* MALL SS messaging is not supported with PSR at this time */
if (dc->current_state->streams[i] != NULL &&
- dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED)
+ dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
+ (dc->current_state->stream_count > 1 || (!dc->current_state->streams[i]->dpms_off &&
+ dc->current_state->stream_status[i].plane_count > 0)))
return false;
}
if (enable) {
- if (dc->current_state) {
+ /* 1. Check no memory request case for CAB.
+ * If no memory request case, send CAB_ACTION NO_DF_REQ DMUB message
+ */
+ if (dcn32_check_no_memory_request_for_cab(dc)) {
+ /* Enable no-memory-requests case */
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
+ cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ;
+ cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
- /* 1. Check no memory request case for CAB.
- * If no memory request case, send CAB_ACTION NO_DF_REQ DMUB message
- */
- if (dcn32_check_no_memory_request_for_cab(dc)) {
- /* Enable no-memory-requests case */
- memset(&cmd, 0, sizeof(cmd));
- cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
- cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ;
- cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ return true;
+ }
- return true;
- }
+ /* 2. Check if all surfaces can fit in CAB.
+ * If surfaces can fit into CAB, send CAB_ACTION_ALLOW DMUB message
+ * and configure HUBP's to fetch from MALL
+ */
+ ways = dcn32_calculate_cab_allocation(dc, dc->current_state);
- /* 2. Check if all surfaces can fit in CAB.
- * If surfaces can fit into CAB, send CAB_ACTION_ALLOW DMUB message
- * and configure HUBP's to fetch from MALL
- */
- ways = dcn32_calculate_cab_allocation(dc, dc->current_state);
+ /* MALL not supported with Stereo3D or TMZ surface. If any plane is using stereo,
+ * or TMZ surface, don't try to enter MALL.
+ */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
+ plane = dc->current_state->stream_status[i].plane_states[j];
- /* MALL not supported with Stereo3D or TMZ surface. If any plane is using stereo,
- * or TMZ surface, don't try to enter MALL.
- */
- for (i = 0; i < dc->current_state->stream_count; i++) {
- for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
- plane = dc->current_state->stream_status[i].plane_states[j];
-
- if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO ||
- plane->address.tmz_surface) {
- mall_ss_unsupported = true;
- break;
- }
- }
- if (mall_ss_unsupported)
+ if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO ||
+ plane->address.tmz_surface) {
+ mall_ss_unsupported = true;
break;
+ }
}
- if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) {
- memset(&cmd, 0, sizeof(cmd));
- cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
- cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB;
- cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
- cmd.cab.cab_alloc_ways = ways;
-
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ if (mall_ss_unsupported)
+ break;
+ }
+ if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) {
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
+ cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB;
+ cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
+ cmd.cab.cab_alloc_ways = (uint8_t)ways;
- return true;
- }
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+ DC_LOG_MALL("enable scanout from MALL");
+ return true;
}
+
+ DC_LOG_MALL("surface cannot fit in CAB, disabling scanout from MALL\n");
return false;
}
@@ -322,7 +332,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
cmd.cab.header.payload_bytes =
sizeof(cmd.cab) - sizeof(cmd.cab.header);
- dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+ dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
return true;
}
@@ -343,8 +353,7 @@ void dcn32_commit_subvp_config(struct dc *dc, struct dc_state *context)
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
- if (pipe_ctx->stream && pipe_ctx->stream->mall_stream_config.paired_stream &&
- pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN) {
+ if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) {
// There is at least 1 SubVP pipe, so enable SubVP
enable_subvp = true;
break;
@@ -370,18 +379,20 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
bool subvp_immediate_flip = false;
bool subvp_in_use = false;
struct pipe_ctx *pipe;
+ enum mall_stream_type pipe_mall_type = SUBVP_NONE;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &context->res_ctx.pipe_ctx[i];
+ pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
- if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN) {
subvp_in_use = true;
break;
}
}
if (top_pipe_to_program && top_pipe_to_program->stream && top_pipe_to_program->plane_state) {
- if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN &&
+ if (dc_state_get_pipe_subvp_type(context, top_pipe_to_program) == SUBVP_MAIN &&
top_pipe_to_program->plane_state->flip_immediate)
subvp_immediate_flip = true;
}
@@ -393,7 +404,7 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc,
if (!lock) {
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &context->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
+ if (pipe->stream && pipe->plane_state && pipe_mall_type == SUBVP_MAIN &&
should_lock_all_pipes)
pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg, CRTC_STATE_VBLANK);
}
@@ -411,14 +422,7 @@ void dcn32_subvp_pipe_control_lock_fast(union block_sequence_params *params)
{
struct dc *dc = params->subvp_pipe_control_lock_fast_params.dc;
bool lock = params->subvp_pipe_control_lock_fast_params.lock;
- struct pipe_ctx *pipe_ctx = params->subvp_pipe_control_lock_fast_params.pipe_ctx;
- bool subvp_immediate_flip = false;
-
- if (pipe_ctx && pipe_ctx->stream && pipe_ctx->plane_state) {
- if (pipe_ctx->stream->mall_stream_config.type == SUBVP_MAIN &&
- pipe_ctx->plane_state->flip_immediate)
- subvp_immediate_flip = true;
- }
+ bool subvp_immediate_flip = params->subvp_pipe_control_lock_fast_params.subvp_immediate_flip;
// Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared.
if (subvp_immediate_flip) {
@@ -437,6 +441,7 @@ bool dcn32_set_mpc_shaper_3dlut(
{
struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
bool result = false;
@@ -456,13 +461,13 @@ bool dcn32_set_mpc_shaper_3dlut(
if (stream->lut3d_func &&
stream->lut3d_func->state.bits.initialized == 1) {
- result = mpc->funcs->program_3dlut(mpc,
- &stream->lut3d_func->lut_3d,
- mpcc_id);
+ result = mpc->funcs->program_3dlut(mpc, &stream->lut3d_func->lut_3d, mpcc_id);
+ if (!result)
+ DC_LOG_ERROR("%s: program_3dlut failed\n", __func__);
- result = mpc->funcs->program_shaper(mpc,
- shaper_lut,
- mpcc_id);
+ result = mpc->funcs->program_shaper(mpc, shaper_lut, mpcc_id);
+ if (!result)
+ DC_LOG_ERROR("%s: program_shaper failed\n", __func__);
}
return result;
@@ -475,40 +480,38 @@ bool dcn32_set_mcm_luts(
int mpcc_id = pipe_ctx->plane_res.hubp->inst;
struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
bool result = true;
- struct pwl_params *lut_params = NULL;
+ const struct pwl_params *lut_params = NULL;
// 1D LUT
- if (plane_state->blend_tf) {
- if (plane_state->blend_tf->type == TF_TYPE_HWPWL)
- lut_params = &plane_state->blend_tf->pwl;
- else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) {
- cm_helper_translate_curve_to_hw_format(plane_state->ctx,
- plane_state->blend_tf,
- &dpp_base->regamma_params, false);
- lut_params = &dpp_base->regamma_params;
- }
+ if (plane_state->blend_tf.type == TF_TYPE_HWPWL)
+ lut_params = &plane_state->blend_tf.pwl;
+ else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) {
+ result = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf,
+ &dpp_base->regamma_params, false);
+ if (!result)
+ return result;
+
+ lut_params = &dpp_base->regamma_params;
}
- result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id);
+ mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id);
+ lut_params = NULL;
// Shaper
- if (plane_state->in_shaper_func) {
- if (plane_state->in_shaper_func->type == TF_TYPE_HWPWL)
- lut_params = &plane_state->in_shaper_func->pwl;
- else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) {
- // TODO: dpp_base replace
- ASSERT(false);
- cm_helper_translate_curve_to_hw_format(plane_state->ctx,
- plane_state->in_shaper_func,
- &dpp_base->shaper_params, true);
- lut_params = &dpp_base->shaper_params;
- }
+ if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL)
+ lut_params = &plane_state->in_shaper_func.pwl;
+ else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) {
+ // TODO: dpp_base replace
+ ASSERT(false);
+ cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func,
+ &dpp_base->shaper_params, true);
+ lut_params = &dpp_base->shaper_params;
}
- result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id);
+ mpc->funcs->program_shaper(mpc, lut_params, mpcc_id);
// 3D
- if (plane_state->lut3d_func && plane_state->lut3d_func->state.bits.initialized == 1)
- result = mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func->lut_3d, mpcc_id);
+ if (plane_state->lut3d_func.state.bits.initialized == 1)
+ result = mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id);
else
result = mpc->funcs->program_3dlut(mpc, NULL, mpcc_id);
@@ -525,27 +528,24 @@ bool dcn32_set_input_transfer_func(struct dc *dc,
enum dc_transfer_func_predefined tf;
bool result = true;
- struct pwl_params *params = NULL;
+ const struct pwl_params *params = NULL;
if (mpc == NULL || plane_state == NULL)
return false;
tf = TRANSFER_FUNCTION_UNITY;
- if (plane_state->in_transfer_func &&
- plane_state->in_transfer_func->type == TF_TYPE_PREDEFINED)
- tf = plane_state->in_transfer_func->tf;
+ if (plane_state->in_transfer_func.type == TF_TYPE_PREDEFINED)
+ tf = plane_state->in_transfer_func.tf;
dpp_base->funcs->dpp_set_pre_degam(dpp_base, tf);
- if (plane_state->in_transfer_func) {
- if (plane_state->in_transfer_func->type == TF_TYPE_HWPWL)
- params = &plane_state->in_transfer_func->pwl;
- else if (plane_state->in_transfer_func->type == TF_TYPE_DISTRIBUTED_POINTS &&
- cm3_helper_translate_curve_to_hw_format(plane_state->in_transfer_func,
- &dpp_base->degamma_params, false))
- params = &dpp_base->degamma_params;
- }
+ if (plane_state->in_transfer_func.type == TF_TYPE_HWPWL)
+ params = &plane_state->in_transfer_func.pwl;
+ else if (plane_state->in_transfer_func.type == TF_TYPE_DISTRIBUTED_POINTS &&
+ cm3_helper_translate_curve_to_hw_format(&plane_state->in_transfer_func,
+ &dpp_base->degamma_params, false))
+ params = &dpp_base->degamma_params;
dpp_base->funcs->dpp_program_gamcor_lut(dpp_base, params);
@@ -563,29 +563,31 @@ bool dcn32_set_output_transfer_func(struct dc *dc,
{
int mpcc_id = pipe_ctx->plane_res.hubp->inst;
struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
- struct pwl_params *params = NULL;
+ const struct pwl_params *params = NULL;
bool ret = false;
/* program OGAM or 3DLUT only for the top pipe*/
if (resource_is_pipe_type(pipe_ctx, OPP_HEAD)) {
/*program shaper and 3dlut in MPC*/
ret = dcn32_set_mpc_shaper_3dlut(pipe_ctx, stream);
- if (ret == false && mpc->funcs->set_output_gamma && stream->out_transfer_func) {
- if (stream->out_transfer_func->type == TF_TYPE_HWPWL)
- params = &stream->out_transfer_func->pwl;
- else if (pipe_ctx->stream->out_transfer_func->type ==
+ if (ret == false && mpc->funcs->set_output_gamma) {
+ if (stream->out_transfer_func.type == TF_TYPE_HWPWL)
+ params = &stream->out_transfer_func.pwl;
+ else if (pipe_ctx->stream->out_transfer_func.type ==
TF_TYPE_DISTRIBUTED_POINTS &&
cm3_helper_translate_curve_to_hw_format(
- stream->out_transfer_func,
+ &stream->out_transfer_func,
&mpc->blender_params, false))
params = &mpc->blender_params;
/* there are no ROM LUTs in OUTGAM */
- if (stream->out_transfer_func->type == TF_TYPE_PREDEFINED)
+ if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED)
BREAK_TO_DEBUGGER();
}
}
- mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
+ if (mpc->funcs->set_output_gamma)
+ mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
+
return ret;
}
@@ -604,17 +606,15 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
struct hubp *hubp = pipe->plane_res.hubp;
+ struct dc_stream_status *stream_status = NULL;
+
+ if (pipe->stream)
+ stream_status = dc_state_get_stream_status(context, pipe->stream);
- if (!pipe->stream || !(pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
- pipe->stream->fpo_in_use)) {
+ if (!pipe->stream || !(dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
+ (stream_status && stream_status->fpo_in_use))) {
if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
hubp->funcs->hubp_update_force_pstate_disallow(hubp, false);
- }
-
- /* Today only FPO uses cursor P-State force. Only clear cursor P-State force
- * if it's not FPO.
- */
- if (!pipe->stream || !pipe->stream->fpo_in_use) {
if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow)
hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, false);
}
@@ -624,19 +624,34 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context)
*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
struct hubp *hubp = pipe->plane_res.hubp;
-
- if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
- if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
- hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
- }
-
- if (pipe->stream && pipe->stream->fpo_in_use) {
+ struct dc_stream_status *stream_status = NULL;
+ struct dc_stream_status *old_stream_status = NULL;
+
+ /* Today for MED update type we do not call update clocks. However, for FPO
+ * the assumption is that update clocks should be called to disable P-State
+ * switch before any HW programming since FPO in FW and driver are not
+ * synchronized. This causes an issue where on a MED update, an FPO P-State
+ * switch could be taking place, then driver forces P-State disallow in the below
+ * code and prevents FPO from completing the sequence. In this case we add a check
+ * to avoid re-programming (and thus re-setting) the P-State force register by
+ * only reprogramming if the pipe was not previously Subvp or FPO. The assumption
+ * is that the P-State force register should be programmed correctly the first
+ * time SubVP / FPO was enabled, so there's no need to update / reset it if the
+ * pipe config has never exited SubVP / FPO.
+ */
+ if (pipe->stream)
+ stream_status = dc_state_get_stream_status(context, pipe->stream);
+ if (old_pipe->stream)
+ old_stream_status = dc_state_get_stream_status(dc->current_state, old_pipe->stream);
+
+ if (pipe->stream && (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN ||
+ (stream_status && stream_status->fpo_in_use)) &&
+ (!old_pipe->stream || (dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) != SUBVP_MAIN &&
+ (old_stream_status && !old_stream_status->fpo_in_use)))) {
if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
hubp->funcs->hubp_update_force_pstate_disallow(hubp, true);
- /* For now only force cursor p-state disallow for FPO
- * Needs to be added for subvp once FW side gets updated
- */
if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow)
hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, true);
}
@@ -680,8 +695,8 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
if (cursor_size > 16384)
cache_cursor = true;
- if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ hubp->funcs->hubp_update_mall_sel(hubp, 1, false);
} else {
// MALL not supported with Stereo3D
hubp->funcs->hubp_update_mall_sel(hubp,
@@ -723,9 +738,8 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context)
* see if CURSOR_REQ_MODE will be back to 1 for SubVP
* when it should be 0 for MPO
*/
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ if (dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN)
hubp->funcs->hubp_prepare_subvp_buffering(hubp, true);
- }
}
}
}
@@ -768,8 +782,9 @@ void dcn32_init_hw(struct dc *dc)
int i;
int edp_num;
uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+ uint32_t user_level = MAX_BACKLIGHT_LEVEL;
- if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks)
dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
// Initialize the dccg
@@ -796,7 +811,7 @@ void dcn32_init_hw(struct dc *dc)
res_pool->ref_clocks.xtalin_clock_inKhz =
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
- if (res_pool->dccg && res_pool->hubbub) {
+ if (res_pool->hubbub) {
(res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
&res_pool->ref_clocks.dccg_ref_clock_inKhz);
@@ -891,10 +906,10 @@ void dcn32_init_hw(struct dc *dc)
if (edp_link->link_enc->funcs->is_dig_enabled &&
edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
dc->hwss.edp_backlight_control &&
- dc->hwss.power_down &&
+ hws->funcs.power_down &&
dc->hwss.edp_power_control) {
dc->hwss.edp_backlight_control(edp_link, false);
- dc->hwss.power_down(dc);
+ hws->funcs.power_down(dc);
dc->hwss.edp_power_control(edp_link, false);
}
}
@@ -904,8 +919,8 @@ void dcn32_init_hw(struct dc *dc)
if (link->link_enc->funcs->is_dig_enabled &&
link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
- dc->hwss.power_down) {
- dc->hwss.power_down(dc);
+ hws->funcs.power_down) {
+ hws->funcs.power_down(dc);
break;
}
@@ -922,13 +937,15 @@ void dcn32_init_hw(struct dc *dc)
for (i = 0; i < dc->link_count; i++) {
struct dc_link *link = dc->links[i];
- if (link->panel_cntl)
+ if (link->panel_cntl) {
backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+ user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+ }
}
for (i = 0; i < dc->res_pool->pipe_count; i++) {
if (abms[i] != NULL && abms[i]->funcs != NULL)
- abms[i]->funcs->abm_init(abms[i], backlight);
+ abms[i]->funcs->abm_init(abms[i], backlight, user_level);
}
/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
@@ -946,10 +963,11 @@ void dcn32_init_hw(struct dc *dc)
if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
- if (dc->clk_mgr->funcs->notify_wm_ranges)
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges)
dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
- if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled)
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk &&
+ !dc->clk_mgr->dc_mode_softmax_enabled)
dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
@@ -968,39 +986,47 @@ void dcn32_init_hw(struct dc *dc)
dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support;
dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable;
- dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
+ dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
+ dc->caps.dmub_caps.aux_backlight_support = dc->ctx->dmub_srv->dmub->feature_caps.abm_aux_backlight_support;
+
+ /* for DCN401 testing only */
+ dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
+ if (dc->caps.dmub_caps.fams_ver == 2) {
+ /* FAMS2 is enabled */
+ dc->debug.fams2_config.bits.enable &= true;
+ } else if (dc->ctx->dmub_srv->dmub->fw_version <
+ DMUB_FW_VERSION(7, 0, 35)) {
+ /* FAMS2 is disabled */
+ dc->debug.fams2_config.bits.enable = false;
+ if (dc->debug.using_dml2 && dc->res_pool->funcs->update_bw_bounding_box) {
+ /* update bounding box if FAMS2 disabled */
+ dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params);
+ }
+ dc->debug.force_disable_subvp = true;
+ dc->debug.disable_fpo_optimizations = true;
+ }
}
}
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
-static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
+void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
struct dc_stream_state *stream = pipe_ctx->stream;
struct pipe_ctx *odm_pipe;
int opp_cnt = 1;
+ struct dccg *dccg = dc->res_pool->dccg;
+ /* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+ * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+ * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+ * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+ * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+ * this problem. We are implementing a workaround here to keep using dscclk
+ * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+ * 48Mhz) pixel clock to avoid hitting this problem.
+ */
+ bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+ stream->timing.pix_clk_100hz > 480000;
ASSERT(dsc);
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -1008,11 +1034,26 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
if (enable) {
struct dsc_config dsc_cfg;
- struct dsc_optc_config dsc_optc_cfg;
+ struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
+ struct dcn_dsc_state dsc_state = {0};
+
+ if (!dsc) {
+ DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+
+ if (dsc->funcs->dsc_read_state) {
+ dsc->funcs->dsc_read_state(dsc, &dsc_state);
+ if (!dsc_state.dsc_fw_en) {
+ DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+ }
/* Enable DSC hw block */
- dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
+ dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding +
+ stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
dsc_cfg.color_depth = stream->timing.display_color_depth;
@@ -1021,20 +1062,22 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0);
dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
+ if (should_use_dto_dscclk)
+ dccg->funcs->set_dto_dscclk(dccg, dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h);
dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
ASSERT(odm_dsc);
+ if (!odm_dsc)
+ continue;
+ if (should_use_dto_dscclk)
+ dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h);
odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
}
- dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
- dsc_cfg.pic_width *= opp_cnt;
-
optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED;
-
/* Enable DSC in OPTC */
DC_LOG_DSC("Setting optc DSC config for tg instance %d:", pipe_ctx->stream_res.tg->inst);
pipe_ctx->stream_res.tg->funcs->set_dsc_config(pipe_ctx->stream_res.tg,
@@ -1047,11 +1090,11 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
pipe_ctx->stream_res.tg,
OPTC_DSC_DISABLED, 0, 0);
- /* disable DSC block */
- dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
+ /* only disconnect DSC block, DSC is disabled when OPP head pipe is reset */
+ dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc);
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
ASSERT(odm_pipe->stream_res.dsc);
- odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
+ odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc);
}
}
}
@@ -1088,10 +1131,8 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
+ int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
+ int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -1099,44 +1140,42 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
pipe_ctx->stream_res.tg->funcs->set_odm_combine(
pipe_ctx->stream_res.tg,
opp_inst, opp_cnt,
- &pipe_ctx->stream->timing);
+ odm_slice_width, last_odm_slice_width);
else
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
true);
+ odm_pipe->stream_res.opp->funcs->opp_program_left_edge_extra_pixel(
+ odm_pipe->stream_res.opp,
+ pipe_ctx->stream->timing.pixel_encoding,
+ resource_is_pipe_type(odm_pipe, OTG_MASTER));
}
if (pipe_ctx->stream_res.dsc) {
struct pipe_ctx *current_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx];
- update_dsc_on_stream(pipe_ctx, pipe_ctx->stream->timing.flags.DSC);
+ dcn32_update_dsc_on_stream(pipe_ctx, pipe_ctx->stream->timing.flags.DSC);
/* Check if no longer using pipe for ODM, then need to disconnect DSC for that pipe */
if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe &&
current_pipe_ctx->next_odm_pipe->stream_res.dsc) {
struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc;
+
/* disconnect DSC block from stream */
dsc->funcs->dsc_disconnect(dsc);
}
}
+
+ if (!resource_is_pipe_type(pipe_ctx, DPP_PIPE))
+ /*
+ * blank pattern is generated by OPP, reprogram blank pattern
+ * due to OPP count change
+ */
+ dc->hwseq->funcs.blank_pixel_data(dc, pipe_ctx, true);
}
unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div)
@@ -1144,8 +1183,9 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
struct dc_stream_state *stream = pipe_ctx->stream;
unsigned int odm_combine_factor = 0;
bool two_pix_per_container = false;
+ struct dce_hwseq *hws = stream->ctx->dc->hwseq;
- two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
+ two_pix_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing);
odm_combine_factor = get_odm_config(pipe_ctx, NULL);
if (stream->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
@@ -1164,7 +1204,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
} else {
*k1_div = PIXEL_RATE_DIV_BY_1;
*k2_div = PIXEL_RATE_DIV_BY_4;
- if ((odm_combine_factor == 2) || dcn32_is_dp_dig_pixel_rate_div_policy(pipe_ctx))
+ if ((odm_combine_factor == 2) || (hws->funcs.is_dp_dig_pixel_rate_div_policy &&
+ hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)))
*k2_div = PIXEL_RATE_DIV_BY_2;
}
}
@@ -1175,38 +1216,49 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
return odm_combine_factor;
}
-void dcn32_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx)
+void dcn32_calculate_pix_rate_divider(
+ struct dc *dc,
+ struct dc_state *context,
+ const struct dc_stream_state *stream)
{
- uint32_t pix_per_cycle = 1;
- uint32_t odm_combine_factor = 1;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct pipe_ctx *pipe_ctx = NULL;
+ unsigned int k1_div = PIXEL_RATE_DIV_NA;
+ unsigned int k2_div = PIXEL_RATE_DIV_NA;
- if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc)
- return;
+ pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
- odm_combine_factor = get_odm_config(pipe_ctx, NULL);
- if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1
- || dcn32_is_dp_dig_pixel_rate_div_policy(pipe_ctx))
- pix_per_cycle = 2;
+ if (pipe_ctx) {
- if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode)
- pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
- pix_per_cycle);
+ if (hws->funcs.calculate_dccg_k1_k2_values)
+ hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div);
+
+ pipe_ctx->pixel_rate_divider.div_factor1 = k1_div;
+ pipe_ctx->pixel_rate_divider.div_factor2 = k2_div;
+ }
}
-void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context)
+void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx)
{
unsigned int i;
struct pipe_ctx *pipe = NULL;
bool otg_disabled[MAX_PIPES] = {false};
+ struct dc_state *dc_state = NULL;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (i <= current_pipe_idx) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ dc_state = context;
+ } else {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ dc_state = dc->current_state;
+ }
if (!resource_is_pipe_type(pipe, OTG_MASTER))
continue;
if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))
- && pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+ && dc_state_get_pipe_subvp_type(dc_state, pipe) != SUBVP_PHANTOM) {
pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg);
reset_sync_context_for_pipe(dc, context, i);
otg_disabled[i] = true;
@@ -1216,11 +1268,33 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_
hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-
- if (otg_disabled[i])
+ if (i <= current_pipe_idx)
+ pipe = &context->res_ctx.pipe_ctx[i];
+ else
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (otg_disabled[i]) {
+ int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst };
+ int opp_cnt = 1;
+ int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe, true);
+ int odm_slice_width = resource_get_odm_slice_dst_width(pipe, false);
+ struct pipe_ctx *odm_pipe;
+
+ for (odm_pipe = pipe->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+ opp_inst[opp_cnt] = odm_pipe->stream_res.opp->inst;
+ opp_cnt++;
+ }
+ if (opp_cnt > 1)
+ pipe->stream_res.tg->funcs->set_odm_combine(
+ pipe->stream_res.tg,
+ opp_inst, opp_cnt,
+ odm_slice_width,
+ last_odm_slice_width);
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
+ }
}
+
+ dc_trigger_sync(dc, dc->current_state);
}
void dcn32_unblank_stream(struct pipe_ctx *pipe_ctx,
@@ -1231,9 +1305,10 @@ void dcn32_unblank_stream(struct pipe_ctx *pipe_ctx,
struct dc_link *link = stream->link;
struct dce_hwseq *hws = link->dc->hwseq;
struct pipe_ctx *odm_pipe;
- uint32_t pix_per_cycle = 1;
params.opp_cnt = 1;
+ params.pix_per_cycle = pipe_ctx->stream_res.pix_clk_params.dio_se_pix_per_cycle;
+
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
params.opp_cnt++;
@@ -1248,13 +1323,14 @@ void dcn32_unblank_stream(struct pipe_ctx *pipe_ctx,
pipe_ctx->stream_res.hpo_dp_stream_enc,
pipe_ctx->stream_res.tg->inst);
} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
- if (optc2_is_two_pixels_per_containter(&stream->timing) || params.opp_cnt > 1
- || dcn32_is_dp_dig_pixel_rate_div_policy(pipe_ctx)) {
+ if (pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing) ||
+ params.opp_cnt > 1) {
params.timing.pix_clk_100hz /= 2;
- pix_per_cycle = 2;
+ params.pix_per_cycle = 2;
}
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine(
- pipe_ctx->stream_res.stream_enc, pix_per_cycle > 1);
+ if (pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine)
+ pipe_ctx->stream_res.stream_enc->funcs->dp_set_odm_combine(
+ pipe_ctx->stream_res.stream_enc, params.pix_per_cycle > 1);
pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(link, pipe_ctx->stream_res.stream_enc, &params);
}
@@ -1322,18 +1398,20 @@ void dcn32_disable_link_output(struct dc_link *link,
struct dmcu *dmcu = dc->res_pool->dmcu;
if (signal == SIGNAL_TYPE_EDP &&
- link->dc->hwss.edp_backlight_control)
+ link->dc->hwss.edp_backlight_control &&
+ !link->skip_implict_edp_power_control)
link->dc->hwss.edp_backlight_control(link, false);
else if (dmcu != NULL && dmcu->funcs->lock_phy)
dmcu->funcs->lock_phy(dmcu);
link_hwss->disable_link_output(link, link_res, signal);
link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF;
-
- if (signal == SIGNAL_TYPE_EDP &&
- link->dc->hwss.edp_backlight_control)
- link->dc->hwss.edp_power_control(link, false);
- else if (dmcu != NULL && dmcu->funcs->lock_phy)
+ /*
+ * Add the logic to extract BOTH power up and power down sequences
+ * from enable/disable link output and only call edp panel control
+ * in enable_link_dp and disable_link_dp once.
+ */
+ if (dmcu != NULL && dmcu->funcs->unlock_phy)
dmcu->funcs->unlock_phy(dmcu);
dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
@@ -1355,8 +1433,8 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN &&
- pipe->stream->mall_stream_config.paired_stream == phantom_pipe->stream) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_MAIN &&
+ dc_state_get_paired_subvp_stream(context, pipe->stream) == phantom_pipe->stream) {
if (pipe->plane_state && pipe->plane_state->update_flags.bits.position_change) {
phantom_plane->src_rect.x = pipe->plane_state->src_rect.x;
@@ -1381,21 +1459,19 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe)
{
phantom_pipe->update_flags.raw = 0;
- if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
- phantom_pipe->update_flags.bits.enable = 1;
- phantom_pipe->update_flags.bits.mpcc = 1;
- phantom_pipe->update_flags.bits.dppclk = 1;
- phantom_pipe->update_flags.bits.hubp_interdependent = 1;
- phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
- phantom_pipe->update_flags.bits.gamut_remap = 1;
- phantom_pipe->update_flags.bits.scaler = 1;
- phantom_pipe->update_flags.bits.viewport = 1;
- phantom_pipe->update_flags.bits.det_size = 1;
- if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
- phantom_pipe->update_flags.bits.odm = 1;
- phantom_pipe->update_flags.bits.global_sync = 1;
- }
+ if (resource_is_pipe_type(phantom_pipe, DPP_PIPE)) {
+ phantom_pipe->update_flags.bits.enable = 1;
+ phantom_pipe->update_flags.bits.mpcc = 1;
+ phantom_pipe->update_flags.bits.dppclk = 1;
+ phantom_pipe->update_flags.bits.hubp_interdependent = 1;
+ phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+ phantom_pipe->update_flags.bits.gamut_remap = 1;
+ phantom_pipe->update_flags.bits.scaler = 1;
+ phantom_pipe->update_flags.bits.viewport = 1;
+ phantom_pipe->update_flags.bits.det_size = 1;
+ if (resource_is_pipe_type(phantom_pipe, OTG_MASTER)) {
+ phantom_pipe->update_flags.bits.odm = 1;
+ phantom_pipe->update_flags.bits.global_sync = 1;
}
}
}
@@ -1455,9 +1531,44 @@ void dcn32_update_dsc_pg(struct dc *dc,
}
}
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+ int i;
+
+ for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
+ struct pipe_ctx *pipe_ctx_old =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx_old->stream)
+ continue;
+
+ if (dc_state_get_pipe_subvp_type(dc->current_state, pipe_ctx_old) != SUBVP_PHANTOM)
+ continue;
+
+ if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
+ continue;
+
+ if (!pipe_ctx->stream || pipe_need_reprogram(pipe_ctx_old, pipe_ctx) ||
+ (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)) {
+ struct clock_source *old_clk = pipe_ctx_old->clock_source;
+
+ if (hws->funcs.reset_back_end_for_pipe)
+ hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
+ if (hws->funcs.enable_stream_gating)
+ hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
+ if (old_clk)
+ old_clk->funcs->cs_power_down(old_clk);
+ }
+ }
+}
+
void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
{
unsigned int i;
+ enum dc_status status = DC_OK;
+ struct dce_hwseq *hws = dc->hwseq;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -1467,8 +1578,8 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
* pipe, wait for the double buffer update to complete first before we do
* ANY phantom pipe programming.
*/
- if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM &&
- old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM &&
+ old_pipe->stream && dc_state_get_pipe_subvp_type(dc->current_state, old_pipe) != SUBVP_PHANTOM) {
old_pipe->stream_res.tg->funcs->wait_for_state(
old_pipe->stream_res.tg,
CRTC_STATE_VBLANK);
@@ -1478,16 +1589,39 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context)
}
}
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
-
- if (new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- // If old context or new context has phantom pipes, apply
- // the phantom timings now. We can't change the phantom
- // pipe configuration safely without driver acquiring
- // the DMCUB lock first.
- dc->hwss.apply_ctx_to_hw(dc, context);
- break;
+ struct pipe_ctx *pipe_ctx_old =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream == NULL)
+ continue;
+
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+ continue;
+
+ if (pipe_ctx->stream == pipe_ctx_old->stream &&
+ pipe_ctx->stream->link->link_state_valid) {
+ continue;
}
+
+ if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
+ continue;
+
+ if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe)
+ continue;
+
+ if (hws->funcs.apply_single_controller_ctx_to_hw)
+ status = hws->funcs.apply_single_controller_ctx_to_hw(
+ pipe_ctx,
+ context,
+ dc);
+
+ ASSERT(status == DC_OK);
+
+#ifdef CONFIG_DRM_AMD_DC_FP
+ if (hws->funcs.resync_fifo_dccg_dio)
+ hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i);
+#endif
}
}
@@ -1502,7 +1636,7 @@ void dcn32_init_blank(
struct output_pixel_processor *opp = NULL;
struct output_pixel_processor *bottom_opp = NULL;
uint32_t num_opps, opp_id_src0, opp_id_src1;
- uint32_t otg_active_width, otg_active_height;
+ uint32_t otg_active_width = 0, otg_active_height = 0;
uint32_t i;
/* program opp dpg blank color */
@@ -1573,3 +1707,141 @@ void dcn32_init_blank(
if (opp)
hws->funcs.wait_for_blank_complete(opp);
}
+
+/* phantom stream id's can change often, but can be identical between contexts.
+* This function checks for the condition the streams are identical to avoid
+* redundant pipe transitions.
+*/
+static bool is_subvp_phantom_topology_transition_seamless(
+ const struct dc_state *cur_ctx,
+ const struct dc_state *new_ctx,
+ const struct pipe_ctx *cur_pipe,
+ const struct pipe_ctx *new_pipe)
+{
+ enum mall_stream_type cur_pipe_type = dc_state_get_pipe_subvp_type(cur_ctx, cur_pipe);
+ enum mall_stream_type new_pipe_type = dc_state_get_pipe_subvp_type(new_ctx, new_pipe);
+
+ const struct dc_stream_state *cur_paired_stream = dc_state_get_paired_subvp_stream(cur_ctx, cur_pipe->stream);
+ const struct dc_stream_state *new_paired_stream = dc_state_get_paired_subvp_stream(new_ctx, new_pipe->stream);
+
+ return cur_pipe_type == SUBVP_PHANTOM &&
+ cur_pipe_type == new_pipe_type &&
+ cur_paired_stream && new_paired_stream &&
+ cur_paired_stream->stream_id == new_paired_stream->stream_id;
+}
+
+bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc,
+ const struct dc_state *cur_ctx,
+ const struct dc_state *new_ctx)
+{
+ int i;
+ const struct pipe_ctx *cur_pipe, *new_pipe;
+ bool is_seamless = true;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ cur_pipe = &cur_ctx->res_ctx.pipe_ctx[i];
+ new_pipe = &new_ctx->res_ctx.pipe_ctx[i];
+
+ if (resource_is_pipe_type(cur_pipe, FREE_PIPE) ||
+ resource_is_pipe_type(new_pipe, FREE_PIPE))
+ /* adding or removing free pipes is always seamless */
+ continue;
+ else if (resource_is_pipe_type(cur_pipe, OTG_MASTER)) {
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER))
+ if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id ||
+ is_subvp_phantom_topology_transition_seamless(cur_ctx, new_ctx, cur_pipe, new_pipe))
+ /* OTG master with the same stream is seamless */
+ continue;
+ } else if (resource_is_pipe_type(cur_pipe, OPP_HEAD)) {
+ if (resource_is_pipe_type(new_pipe, OPP_HEAD)) {
+ if (cur_pipe->stream_res.tg == new_pipe->stream_res.tg)
+ /*
+ * OPP heads sharing the same timing
+ * generator is seamless
+ */
+ continue;
+ }
+ } else if (resource_is_pipe_type(cur_pipe, DPP_PIPE)) {
+ if (resource_is_pipe_type(new_pipe, DPP_PIPE)) {
+ if (cur_pipe->stream_res.opp == new_pipe->stream_res.opp)
+ /*
+ * DPP pipes sharing the same OPP head is
+ * seamless
+ */
+ continue;
+ }
+ }
+
+ /*
+ * This pipe's transition doesn't fall under any seamless
+ * conditions
+ */
+ is_seamless = false;
+ break;
+ }
+
+ return is_seamless;
+}
+
+void dcn32_prepare_bandwidth(struct dc *dc,
+ struct dc_state *context)
+{
+ bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+ /* Any transition into an FPO config should disable MCLK switching first to avoid
+ * driver and FW P-State synchronization issues.
+ */
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) {
+ dc->optimized_required = true;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
+ }
+
+ if (dc->clk_mgr->dc_mode_softmax_enabled)
+ if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 &&
+ context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000)
+ dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz);
+
+ dcn20_prepare_bandwidth(dc, context);
+
+ if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+ dc_dmub_srv_p_state_delegate(dc, false, context);
+
+ if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) {
+ /* After disabling P-State, restore the original value to ensure we get the correct P-State
+ * on the next optimize.
+ */
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support;
+ }
+}
+
+void dcn32_interdependent_update_lock(struct dc *dc,
+ struct dc_state *context, bool lock)
+{
+ unsigned int i;
+ struct pipe_ctx *pipe;
+ struct timing_generator *tg;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ tg = pipe->stream_res.tg;
+
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !tg->funcs->is_tg_enabled(tg) ||
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)
+ continue;
+
+ if (lock)
+ dc->hwss.pipe_control_lock(dc, pipe, true);
+ else
+ dc->hwss.pipe_control_lock(dc, pipe, false);
+ }
+}
+
+void dcn32_program_outstanding_updates(struct dc *dc,
+ struct dc_state *context)
+{
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ /* update compbuf if required */
+ if (hubbub->funcs->program_compbuf_size)
+ hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
index 2d2628f31bed..0303a5953673 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h
@@ -71,11 +71,11 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context);
void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx);
-unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div);
+void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable);
-void dcn32_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx);
+unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div);
-void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context);
+void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx);
void dcn32_subvp_pipe_control_lock(struct dc *dc,
struct dc_state *context,
@@ -91,6 +91,8 @@ void dcn32_unblank_stream(struct pipe_ctx *pipe_ctx,
bool dcn32_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+void dcn32_calculate_pix_rate_divider(struct dc *dc, struct dc_state *context, const struct dc_stream_state *stream);
+
void dcn32_disable_link_output(struct dc_link *link,
const struct link_resource *link_res,
enum signal_type signal);
@@ -111,8 +113,23 @@ void dcn32_update_dsc_pg(struct dc *dc,
void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context);
+void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context);
+
void dcn32_init_blank(
struct dc *dc,
struct timing_generator *tg);
+bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc,
+ const struct dc_state *cur_ctx,
+ const struct dc_state *new_ctx);
+
+void dcn32_prepare_bandwidth(struct dc *dc,
+ struct dc_state *context);
+
+void dcn32_interdependent_update_lock(struct dc *dc,
+ struct dc_state *context, bool lock);
+
+void dcn32_program_outstanding_updates(struct dc *dc,
+ struct dc_state *context);
+
#endif /* __DC_HWSS_DCN32_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
index c7417147dff1..c19ef075c882 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
@@ -23,21 +23,23 @@
*
*/
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10/dcn10_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn21/dcn21_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
#include "dcn31/dcn31_hwseq.h"
-#include "dcn32_hwseq.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn401/dcn401_hwseq.h"
#include "dcn32_init.h"
static const struct hw_sequencer_funcs dcn32_funcs = {
- .program_gamut_remap = dcn10_program_gamut_remap,
+ .program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn32_init_hw,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
.post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
@@ -58,14 +60,14 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.disable_plane = dcn20_disable_plane,
.disable_pixel_data = dcn20_disable_pixel_data,
.pipe_control_lock = dcn20_pipe_control_lock,
- .interdependent_update_lock = dcn10_lock_all_pipes,
+ .interdependent_update_lock = dcn32_interdependent_update_lock,
.cursor_lock = dcn10_cursor_lock,
- .prepare_bandwidth = dcn30_prepare_bandwidth,
+ .prepare_bandwidth = dcn32_prepare_bandwidth,
.optimize_bandwidth = dcn20_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn30_set_static_screen_control,
+ .set_static_screen_control = dcn31_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
@@ -86,7 +88,6 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -97,7 +98,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.calc_vupdate_position = dcn10_calc_vupdate_position,
.apply_idle_power_optimizations = dcn32_apply_idle_power_optimizations,
.does_plane_fit_in_mall = NULL,
- .set_backlight_level = dcn21_set_backlight_level,
+ .set_backlight_level = dcn31_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.hardware_release = dcn30_hardware_release,
.set_pipe = dcn21_set_pipe,
@@ -109,17 +110,22 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.get_dcc_en_bits = dcn10_get_dcc_en_bits,
.commit_subvp_config = dcn32_commit_subvp_config,
.enable_phantom_streams = dcn32_enable_phantom_streams,
+ .disable_phantom_streams = dcn32_disable_phantom_streams,
.subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
.subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast,
.update_phantom_vp_position = dcn32_update_phantom_vp_position,
.update_dsc_pg = dcn32_update_dsc_pg,
.apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom,
+ .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless,
+ .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider,
+ .program_outstanding_updates = dcn32_program_outstanding_updates,
+ .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates,
+ .get_underflow_debug_data = dcn30_get_underflow_debug_data,
};
static const struct hwseq_private_funcs dcn32_private_funcs = {
.init_pipes = dcn10_init_pipes,
- .update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.set_input_transfer_func = dcn32_set_input_transfer_func,
@@ -154,9 +160,10 @@ static const struct hwseq_private_funcs dcn32_private_funcs = {
.update_force_pstate = dcn32_update_force_pstate,
.update_mall_sel = dcn32_update_mall_sel,
.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
- .set_pixels_per_cycle = dcn32_set_pixels_per_cycle,
.resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio,
.is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy,
+ .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw,
+ .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe,
};
void dcn32_hw_sequencer_init_functions(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
index 89a591eb2c23..89a591eb2c23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.h
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
new file mode 100644
index 000000000000..05011061822c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -0,0 +1,1594 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "dm_helpers.h"
+#include "core_types.h"
+#include "resource.h"
+#include "dccg.h"
+#include "dce/dce_hwseq.h"
+#include "clk_mgr.h"
+#include "reg_helper.h"
+#include "abm.h"
+#include "hubp.h"
+#include "dchubbub.h"
+#include "timing_generator.h"
+#include "opp.h"
+#include "ipp.h"
+#include "mpc.h"
+#include "mcif_wb.h"
+#include "dc_dmub_srv.h"
+#include "dcn35_hwseq.h"
+#include "dcn35/dcn35_dccg.h"
+#include "link_hwss.h"
+#include "dpcd_defs.h"
+#include "dce/dmub_outbox.h"
+#include "link_service.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "inc/link_enc_cfg.h"
+#include "dcn30/dcn30_vpg.h"
+#include "dce/dce_i2c_hw.h"
+#include "dsc.h"
+#include "dcn20/dcn20_optc.h"
+#include "dcn30/dcn30_cm_common.h"
+#include "dcn31/dcn31_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dc_state_priv.h"
+
+#define DC_LOGGER_INIT(logger) \
+ struct dal_logger *dc_logger = logger
+
+#define CTX \
+ hws->ctx
+#define REG(reg)\
+ hws->regs->reg
+#define DC_LOGGER \
+ dc_logger
+
+
+#undef FN
+#define FN(reg_name, field_name) \
+ hws->shifts->field_name, hws->masks->field_name
+#if 0
+static void enable_memory_low_power(struct dc *dc)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+ int i;
+
+ if (dc->debug.enable_mem_low_power.bits.dmcu) {
+ // Force ERAM to shutdown if DMCU is not enabled
+ if (dc->debug.disable_dmcu || dc->config.disable_dmcu) {
+ REG_UPDATE(DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, 3);
+ }
+ }
+ /*dcn35 has default MEM_PWR enabled, make sure wake them up*/
+ // Set default OPTC memory power states
+ if (dc->debug.enable_mem_low_power.bits.optc) {
+ // Shutdown when unassigned and light sleep in VBLANK
+ REG_SET_2(ODM_MEM_PWR_CTRL3, 0, ODM_MEM_UNASSIGNED_PWR_MODE, 3, ODM_MEM_VBLANK_PWR_MODE, 1);
+ }
+
+ if (dc->debug.enable_mem_low_power.bits.vga) {
+ // Power down VGA memory
+ REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1);
+ }
+
+ if (dc->debug.enable_mem_low_power.bits.mpc &&
+ dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode)
+ dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode(dc->res_pool->mpc);
+
+ if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
+ // Power down VPGs
+ for (i = 0; i < dc->res_pool->stream_enc_count; i++)
+ dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
+#if defined(CONFIG_DRM_AMD_DC_DP2_0)
+ for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
+ dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
+#endif
+ }
+
+}
+#endif
+
+static void print_pg_status(struct dc *dc, const char *debug_func, const char *debug_log)
+{
+ if (dc->debug.enable_pg_cntl_debug_logs && dc->res_pool->pg_cntl) {
+ if (dc->res_pool->pg_cntl->funcs->print_pg_status)
+ dc->res_pool->pg_cntl->funcs->print_pg_status(dc->res_pool->pg_cntl, debug_func, debug_log);
+ }
+}
+
+void dcn35_set_dmu_fgcg(struct dce_hwseq *hws, bool enable)
+{
+ REG_UPDATE_3(DMU_CLK_CNTL,
+ RBBMIF_FGCG_REP_DIS, !enable,
+ IHC_FGCG_REP_DIS, !enable,
+ LONO_FGCG_REP_DIS, !enable
+ );
+}
+
+void dcn35_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
+{
+ REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable);
+}
+
+void dcn35_init_hw(struct dc *dc)
+{
+ struct abm **abms = dc->res_pool->multiple_abms;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct dc_bios *dcb = dc->ctx->dc_bios;
+ struct resource_pool *res_pool = dc->res_pool;
+ uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+ uint32_t user_level = MAX_BACKLIGHT_LEVEL;
+ int i;
+
+ print_pg_status(dc, __func__, ": start");
+
+ if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
+ dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
+
+ //dcn35_set_dmu_fgcg(hws, dc->debug.enable_fine_grain_clock_gating.bits.dmu);
+
+ if (!dcb->funcs->is_accelerated_mode(dcb)) {
+ /*this calls into dmubfw to do the init*/
+ hws->funcs.bios_golden_init(dc);
+ }
+
+ // Initialize the dccg
+ if (res_pool->dccg->funcs->dccg_init)
+ res_pool->dccg->funcs->dccg_init(res_pool->dccg);
+
+ //enable_memory_low_power(dc);
+
+ if (dc->ctx->dc_bios->fw_info_valid) {
+ res_pool->ref_clocks.xtalin_clock_inKhz =
+ dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
+
+ if (res_pool->hubbub) {
+
+ (res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
+ dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
+ &res_pool->ref_clocks.dccg_ref_clock_inKhz);
+
+ (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub,
+ res_pool->ref_clocks.dccg_ref_clock_inKhz,
+ &res_pool->ref_clocks.dchub_ref_clock_inKhz);
+ } else {
+ // Not all ASICs have DCCG sw component
+ res_pool->ref_clocks.dccg_ref_clock_inKhz =
+ res_pool->ref_clocks.xtalin_clock_inKhz;
+ res_pool->ref_clocks.dchub_ref_clock_inKhz =
+ res_pool->ref_clocks.xtalin_clock_inKhz;
+ }
+ } else
+ ASSERT_CRITICAL(false);
+
+ for (i = 0; i < dc->link_count; i++) {
+ /* Power up AND update implementation according to the
+ * required signal (which may be different from the
+ * default signal on connector).
+ */
+ struct dc_link *link = dc->links[i];
+
+ if (link->ep_type != DISPLAY_ENDPOINT_PHY)
+ continue;
+
+ link->link_enc->funcs->hw_init(link->link_enc);
+
+ /* Check for enabled DIG to identify enabled display */
+ if (link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
+ link->link_status.link_active = true;
+ if (link->link_enc->funcs->fec_is_active &&
+ link->link_enc->funcs->fec_is_active(link->link_enc))
+ link->fec_state = dc_link_fec_enabled;
+ }
+ }
+
+ /* we want to turn off all dp displays before doing detection */
+ dc->link_srv->blank_all_dp_displays(dc);
+
+ if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init)
+ res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub);
+ /* If taking control over from VBIOS, we may want to optimize our first
+ * mode set, so we need to skip powering down pipes until we know which
+ * pipes we want to use.
+ * Otherwise, if taking control is not possible, we need to power
+ * everything down.
+ */
+ if (dcb->funcs->is_accelerated_mode(dcb) || !dc->config.seamless_boot_edp_requested) {
+
+ // we want to turn off edp displays if odm is enabled and no seamless boot
+ if (!dc->caps.seamless_odm) {
+ for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
+ struct timing_generator *tg = dc->res_pool->timing_generators[i];
+ uint32_t num_opps, opp_id_src0, opp_id_src1;
+
+ num_opps = 1;
+ if (tg) {
+ if (tg->funcs->is_tg_enabled(tg) && tg->funcs->get_optc_source) {
+ tg->funcs->get_optc_source(tg, &num_opps,
+ &opp_id_src0, &opp_id_src1);
+ }
+ }
+
+ if (num_opps > 1) {
+ dc->link_srv->blank_all_edp_displays(dc);
+ break;
+ }
+ }
+ }
+
+ hws->funcs.init_pipes(dc, dc->current_state);
+ print_pg_status(dc, __func__, ": after init_pipes");
+
+ if (dc->res_pool->hubbub->funcs->allow_self_refresh_control &&
+ !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter)
+ dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
+ !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
+ }
+ for (i = 0; i < res_pool->audio_count; i++) {
+ struct audio *audio = res_pool->audios[i];
+
+ audio->funcs->hw_init(audio);
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ struct dc_link *link = dc->links[i];
+
+ if (link->panel_cntl) {
+ backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+ user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+ }
+ }
+ if (dc->ctx->dmub_srv) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (abms[i] != NULL && abms[i]->funcs != NULL)
+ abms[i]->funcs->abm_init(abms[i], backlight, user_level);
+ }
+ }
+
+ /* power AFMT HDMI memory TODO: may move to dis/en output save power*/
+ REG_WRITE(DIO_MEM_PWR_CTRL, 0);
+
+ // Set i2c to light sleep until engine is setup
+ if (dc->debug.enable_mem_low_power.bits.i2c)
+ REG_UPDATE(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, 0);
+
+ if (hws->funcs.setup_hpo_hw_control)
+ hws->funcs.setup_hpo_hw_control(hws, false);
+
+ if (!dc->debug.disable_clock_gate) {
+ /* enable all DCN clock gating */
+ REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
+ }
+
+ if (dc->debug.disable_mem_low_power) {
+ REG_UPDATE(DC_MEM_GLOBAL_PWR_REQ_CNTL, DC_MEM_GLOBAL_PWR_REQ_DIS, 1);
+ }
+ if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
+ dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
+
+ if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges)
+ dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
+
+ if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled)
+ dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr);
+
+
+
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, false, false);
+
+ if (dc->res_pool->hubbub->funcs->init_crb)
+ dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
+
+ if (dc->res_pool->hubbub->funcs->set_request_limit && dc->config.sdpif_request_limit_words_per_umc > 0)
+ dc->res_pool->hubbub->funcs->set_request_limit(dc->res_pool->hubbub, dc->ctx->dc_bios->vram_info.num_chans, dc->config.sdpif_request_limit_words_per_umc);
+ // Get DMCUB capabilities
+ if (dc->ctx->dmub_srv) {
+ dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv);
+ dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
+ dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
+ dc->caps.dmub_caps.aux_backlight_support = dc->ctx->dmub_srv->dmub->feature_caps.abm_aux_backlight_support;
+ }
+
+ if (dc->res_pool->pg_cntl) {
+ if (dc->res_pool->pg_cntl->funcs->init_pg_status)
+ dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl);
+ }
+ print_pg_status(dc, __func__, ": after init_pg_status");
+}
+
+static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
+{
+ struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ struct pipe_ctx *odm_pipe;
+ int opp_cnt = 1;
+
+ DC_LOGGER_INIT(stream->ctx->logger);
+
+ ASSERT(dsc);
+ for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
+ opp_cnt++;
+
+ if (enable) {
+ struct dsc_config dsc_cfg;
+ struct dsc_optc_config dsc_optc_cfg = {0};
+ enum optc_dsc_mode optc_dsc_mode;
+ struct dcn_dsc_state dsc_state = {0};
+
+ if (!dsc) {
+ DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+
+ if (dsc->funcs->dsc_read_state) {
+ dsc->funcs->dsc_read_state(dsc, &dsc_state);
+ if (!dsc_state.dsc_fw_en) {
+ DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ return;
+ }
+ }
+ /* Enable DSC hw block */
+ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
+ dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
+ dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
+ dsc_cfg.color_depth = stream->timing.display_color_depth;
+ dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false;
+ dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg;
+ ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0);
+ dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
+
+ dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
+ dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
+ for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+ struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
+
+ ASSERT(odm_dsc);
+ odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
+ odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
+ }
+ dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt;
+ dsc_cfg.pic_width *= opp_cnt;
+
+ optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED;
+
+ /* Enable DSC in OPTC */
+ DC_LOG_DSC("Setting optc DSC config for tg instance %d:", pipe_ctx->stream_res.tg->inst);
+ pipe_ctx->stream_res.tg->funcs->set_dsc_config(pipe_ctx->stream_res.tg,
+ optc_dsc_mode,
+ dsc_optc_cfg.bytes_per_pixel,
+ dsc_optc_cfg.slice_width);
+ } else {
+ /* disable DSC in OPTC */
+ pipe_ctx->stream_res.tg->funcs->set_dsc_config(
+ pipe_ctx->stream_res.tg,
+ OPTC_DSC_DISABLED, 0, 0);
+
+ /* disable DSC block */
+ dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
+ for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+ ASSERT(odm_pipe->stream_res.dsc);
+ odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
+ }
+ }
+}
+
+// Given any pipe_ctx, return the total ODM combine factor, and optionally return
+// the OPPids which are used
+static unsigned int get_odm_config(struct pipe_ctx *pipe_ctx, unsigned int *opp_instances)
+{
+ unsigned int opp_count = 1;
+ struct pipe_ctx *odm_pipe;
+
+ // First get to the top pipe
+ for (odm_pipe = pipe_ctx; odm_pipe->prev_odm_pipe; odm_pipe = odm_pipe->prev_odm_pipe)
+ ;
+
+ // First pipe is always used
+ if (opp_instances)
+ opp_instances[0] = odm_pipe->stream_res.opp->inst;
+
+ // Find and count odm pipes, if any
+ for (odm_pipe = odm_pipe->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+ if (opp_instances)
+ opp_instances[opp_count] = odm_pipe->stream_res.opp->inst;
+ opp_count++;
+ }
+
+ return opp_count;
+}
+
+void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx)
+{
+ struct pipe_ctx *odm_pipe;
+ int opp_cnt = 0;
+ int opp_inst[MAX_PIPES] = {0};
+ int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
+ int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
+ struct mpc *mpc = dc->res_pool->mpc;
+ int i;
+
+ opp_cnt = get_odm_config(pipe_ctx, opp_inst);
+
+ if (opp_cnt > 1)
+ pipe_ctx->stream_res.tg->funcs->set_odm_combine(
+ pipe_ctx->stream_res.tg,
+ opp_inst, opp_cnt,
+ odm_slice_width, last_odm_slice_width);
+ else
+ pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+
+ if (mpc->funcs->set_out_rate_control) {
+ for (i = 0; i < opp_cnt; ++i) {
+ mpc->funcs->set_out_rate_control(
+ mpc, opp_inst[i],
+ false,
+ 0,
+ NULL);
+ }
+ }
+
+ for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+ odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
+ odm_pipe->stream_res.opp,
+ true);
+ }
+
+ if (pipe_ctx->stream_res.dsc) {
+ struct pipe_ctx *current_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx];
+
+ update_dsc_on_stream(pipe_ctx, pipe_ctx->stream->timing.flags.DSC);
+
+ /* Check if no longer using pipe for ODM, then need to disconnect DSC for that pipe */
+ if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe &&
+ current_pipe_ctx->next_odm_pipe->stream_res.dsc) {
+ struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc;
+ /* disconnect DSC block from stream */
+ dsc->funcs->dsc_disconnect(dsc);
+ }
+ }
+}
+
+void dcn35_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on)
+{
+ if (!hws->ctx->dc->debug.root_clock_optimization.bits.dpp)
+ return;
+
+ if (hws->ctx->dc->res_pool->dccg->funcs->dpp_root_clock_control) {
+ hws->ctx->dc->res_pool->dccg->funcs->dpp_root_clock_control(
+ hws->ctx->dc->res_pool->dccg, dpp_inst, clock_on);
+ }
+}
+
+void dcn35_dpstream_root_clock_control(struct dce_hwseq *hws, unsigned int dp_hpo_inst, bool clock_on)
+{
+ if (!hws->ctx->dc->debug.root_clock_optimization.bits.dpstream)
+ return;
+
+ if (hws->ctx->dc->res_pool->dccg->funcs->set_dpstreamclk_root_clock_gating) {
+ hws->ctx->dc->res_pool->dccg->funcs->set_dpstreamclk_root_clock_gating(
+ hws->ctx->dc->res_pool->dccg, dp_hpo_inst, clock_on);
+ }
+}
+
+void dcn35_physymclk_root_clock_control(struct dce_hwseq *hws, unsigned int phy_inst, bool clock_on)
+{
+ if (!hws->ctx->dc->debug.root_clock_optimization.bits.physymclk)
+ return;
+
+ if (hws->ctx->dc->res_pool->dccg->funcs->set_physymclk_root_clock_gating) {
+ hws->ctx->dc->res_pool->dccg->funcs->set_physymclk_root_clock_gating(
+ hws->ctx->dc->res_pool->dccg, phy_inst, clock_on);
+ }
+}
+
+/* In headless boot cases, DIG may be turned
+ * on which causes HW/SW discrepancies.
+ * To avoid this, power down hardware on boot
+ * if DIG is turned on
+ */
+void dcn35_power_down_on_boot(struct dc *dc)
+{
+ struct dc_link *edp_links[MAX_NUM_EDP];
+ struct dc_link *edp_link = NULL;
+ int edp_num;
+ int i = 0;
+
+ dc_get_edp_links(dc, edp_links, &edp_num);
+ if (edp_num)
+ edp_link = edp_links[0];
+
+ if (edp_link && edp_link->link_enc->funcs->is_dig_enabled &&
+ edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
+ dc->hwseq->funcs.edp_backlight_control &&
+ dc->hwseq->funcs.power_down &&
+ dc->hwss.edp_power_control) {
+ dc->hwseq->funcs.edp_backlight_control(edp_link, false);
+ dc->hwseq->funcs.power_down(dc);
+ dc->hwss.edp_power_control(edp_link, false);
+ } else {
+ for (i = 0; i < dc->link_count; i++) {
+ struct dc_link *link = dc->links[i];
+
+ if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
+ dc->hwseq->funcs.power_down) {
+ dc->hwseq->funcs.power_down(dc);
+ break;
+ }
+
+ }
+ }
+
+ /*
+ * Call update_clocks with empty context
+ * to send DISPLAY_OFF
+ * Otherwise DISPLAY_OFF may not be asserted
+ */
+ if (dc->clk_mgr->funcs->set_low_power_state)
+ dc->clk_mgr->funcs->set_low_power_state(dc->clk_mgr);
+
+ if (dc->clk_mgr->clks.pwr_state == DCN_PWR_STATE_LOW_POWER)
+ dc_allow_idle_optimizations(dc, true);
+}
+
+bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
+{
+ if (dc->debug.dmcub_emulation)
+ return true;
+
+ if (enable) {
+ uint32_t num_active_edp = 0;
+ int i;
+
+ for (i = 0; i < dc->current_state->stream_count; ++i) {
+ struct dc_stream_state *stream = dc->current_state->streams[i];
+ struct dc_link *link = stream->link;
+ bool is_psr = link && !link->panel_config.psr.disable_psr &&
+ (link->psr_settings.psr_version == DC_PSR_VERSION_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_SU_1);
+ bool is_replay = link && link->replay_settings.replay_feature_enabled;
+
+ /* Ignore streams that disabled. */
+ if (stream->dpms_off)
+ continue;
+
+ /* Active external displays block idle optimizations. */
+ if (!dc_is_embedded_signal(stream->signal))
+ return false;
+
+ /* If not PWRSEQ0 can't enter idle optimizations */
+ if (link && link->link_index != 0)
+ return false;
+
+ /* Check for panel power features required for idle optimizations. */
+ if (!is_psr && !is_replay)
+ return false;
+
+ num_active_edp += 1;
+ }
+
+ /* If more than one active eDP then disallow. */
+ if (num_active_edp > 1)
+ return false;
+ }
+
+ // TODO: review other cases when idle optimization is allowed
+ dc_dmub_srv_apply_idle_power_optimizations(dc, enable);
+
+ return true;
+}
+
+void dcn35_z10_restore(const struct dc *dc)
+{
+ if (dc->debug.disable_z10)
+ return;
+
+ dc_dmub_srv_apply_idle_power_optimizations(dc, false);
+
+ dcn31_z10_restore(dc);
+}
+
+void dcn35_init_pipes(struct dc *dc, struct dc_state *context)
+{
+ int i;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+ bool can_apply_seamless_boot = false;
+ bool tg_enabled[MAX_PIPES] = {false};
+
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->apply_seamless_boot_optimization) {
+ can_apply_seamless_boot = true;
+ break;
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct timing_generator *tg = dc->res_pool->timing_generators[i];
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ /* There is assumption that pipe_ctx is not mapping irregularly
+ * to non-preferred front end. If pipe_ctx->stream is not NULL,
+ * we will use the pipe, so don't disable
+ */
+ if (pipe_ctx->stream != NULL && can_apply_seamless_boot)
+ continue;
+
+ /* Blank controller using driver code instead of
+ * command table.
+ */
+ if (tg->funcs->is_tg_enabled(tg)) {
+ if (hws->funcs.init_blank != NULL) {
+ hws->funcs.init_blank(dc, tg);
+ tg->funcs->lock(tg);
+ } else {
+ tg->funcs->lock(tg);
+ tg->funcs->set_blank(tg, true);
+ hwss_wait_for_blank_complete(tg);
+ }
+ }
+ }
+
+ /* Reset det size */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+ struct hubp *hubp = dc->res_pool->hubps[i];
+
+ /* Do not need to reset for seamless boot */
+ if (pipe_ctx->stream != NULL && can_apply_seamless_boot)
+ continue;
+
+ if (hubbub && hubp) {
+ if (hubbub->funcs->program_det_size)
+ hubbub->funcs->program_det_size(hubbub, hubp->inst, 0);
+ if (hubbub->funcs->program_det_segments)
+ hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0);
+ }
+ }
+
+ /* num_opp will be equal to number of mpcc */
+ for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ /* Cannot reset the MPC mux if seamless boot */
+ if (pipe_ctx->stream != NULL && can_apply_seamless_boot)
+ continue;
+
+ dc->res_pool->mpc->funcs->mpc_init_single_inst(
+ dc->res_pool->mpc, i);
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct timing_generator *tg = dc->res_pool->timing_generators[i];
+ struct hubp *hubp = dc->res_pool->hubps[i];
+ struct dpp *dpp = dc->res_pool->dpps[i];
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ /* There is assumption that pipe_ctx is not mapping irregularly
+ * to non-preferred front end. If pipe_ctx->stream is not NULL,
+ * we will use the pipe, so don't disable
+ */
+ if (can_apply_seamless_boot &&
+ pipe_ctx->stream != NULL &&
+ pipe_ctx->stream_res.tg->funcs->is_tg_enabled(
+ pipe_ctx->stream_res.tg)) {
+ // Enable double buffering for OTG_BLANK no matter if
+ // seamless boot is enabled or not to suppress global sync
+ // signals when OTG blanked. This is to prevent pipe from
+ // requesting data while in PSR.
+ tg->funcs->tg_init(tg);
+ hubp->power_gated = true;
+ tg_enabled[i] = true;
+ continue;
+ }
+
+ /* Disable on the current state so the new one isn't cleared. */
+ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ hubp->funcs->hubp_reset(hubp);
+ dpp->funcs->dpp_reset(dpp);
+
+ pipe_ctx->stream_res.tg = tg;
+ pipe_ctx->pipe_idx = i;
+
+ pipe_ctx->plane_res.hubp = hubp;
+ pipe_ctx->plane_res.dpp = dpp;
+ pipe_ctx->plane_res.mpcc_inst = dpp->inst;
+ hubp->mpcc_id = dpp->inst;
+ hubp->opp_id = OPP_ID_INVALID;
+ hubp->power_gated = false;
+
+ dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst;
+ dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
+ dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
+ pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
+
+ hws->funcs.plane_atomic_disconnect(dc, context, pipe_ctx);
+
+ if (tg->funcs->is_tg_enabled(tg))
+ tg->funcs->unlock(tg);
+
+ dc->hwss.disable_plane(dc, context, pipe_ctx);
+
+ pipe_ctx->stream_res.tg = NULL;
+ pipe_ctx->plane_res.hubp = NULL;
+
+ if (tg->funcs->is_tg_enabled(tg)) {
+ if (tg->funcs->init_odm)
+ tg->funcs->init_odm(tg);
+ }
+
+ tg->funcs->tg_init(tg);
+ }
+
+ /* Clean up MPC tree */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (tg_enabled[i]) {
+ if (dc->res_pool->opps[i]->mpc_tree_params.opp_list) {
+ if (dc->res_pool->opps[i]->mpc_tree_params.opp_list->mpcc_bot) {
+ int bot_id = dc->res_pool->opps[i]->mpc_tree_params.opp_list->mpcc_bot->mpcc_id;
+
+ if ((bot_id < MAX_MPCC) && (bot_id < MAX_PIPES) && (!tg_enabled[bot_id]))
+ dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL;
+ }
+ }
+ }
+ }
+
+ if (pg_cntl != NULL) {
+ if (pg_cntl->funcs->dsc_pg_control != NULL) {
+ uint32_t num_opps = 0;
+ uint32_t opp_id_src0 = OPP_ID_INVALID;
+ uint32_t opp_id_src1 = OPP_ID_INVALID;
+ uint32_t optc_dsc_state = 0;
+
+ // Step 1: To find out which OPTC is running & OPTC DSC is ON
+ // We can't use res_pool->res_cap->num_timing_generator to check
+ // Because it records display pipes default setting built in driver,
+ // not display pipes of the current chip.
+ // Some ASICs would be fused display pipes less than the default setting.
+ // In dcnxx_resource_construct function, driver would obatin real information.
+ for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
+ struct timing_generator *tg = dc->res_pool->timing_generators[i];
+
+ if (tg->funcs->is_tg_enabled(tg)) {
+ if (tg->funcs->get_dsc_status)
+ tg->funcs->get_dsc_status(tg, &optc_dsc_state);
+ // Only one OPTC with DSC is ON, so if we got one result,
+ // we would exit this block. non-zero value is DSC enabled
+ if (optc_dsc_state != 0) {
+ tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1);
+ break;
+ }
+ }
+ }
+
+ // Step 2: To power down DSC but skip DSC of running OPTC
+ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
+ struct dcn_dsc_state s = {0};
+
+ /* avoid reading DSC state when it is not in use as it may be power gated */
+ if (optc_dsc_state) {
+ dc->res_pool->dscs[i]->funcs->dsc_read_state(dc->res_pool->dscs[i], &s);
+
+ if ((s.dsc_opp_source == opp_id_src0 || s.dsc_opp_source == opp_id_src1) &&
+ s.dsc_clock_en && s.dsc_fw_en)
+ continue;
+ }
+
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, dc->res_pool->dscs[i]->inst, false);
+ }
+ }
+ }
+}
+
+void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
+ struct dc_state *context)
+{
+ struct dpp *dpp = pipe_ctx->plane_res.dpp;
+ struct dccg *dccg = dc->res_pool->dccg;
+
+
+ /* enable DCFCLK current DCHUB */
+ pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl(pipe_ctx->plane_res.hubp, true);
+
+ /* initialize HUBP on power up */
+ pipe_ctx->plane_res.hubp->funcs->hubp_init(pipe_ctx->plane_res.hubp);
+ /*make sure DPPCLK is on*/
+ dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, true);
+ dpp->funcs->dpp_dppclk_control(dpp, false, true);
+ /* make sure OPP_PIPE_CLOCK_EN = 1 */
+ pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
+ pipe_ctx->stream_res.opp,
+ true);
+ /*to do: insert PG here*/
+ if (dc->vm_pa_config.valid) {
+ struct vm_system_aperture_param apt;
+
+ apt.sys_default.quad_part = 0;
+
+ apt.sys_low.quad_part = dc->vm_pa_config.system_aperture.start_addr;
+ apt.sys_high.quad_part = dc->vm_pa_config.system_aperture.end_addr;
+
+ // Program system aperture settings
+ pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt);
+ }
+ //DC_LOG_DEBUG("%s: dpp_inst(%d) =\n", __func__, dpp->inst);
+
+ if (!pipe_ctx->top_pipe
+ && pipe_ctx->plane_state
+ && pipe_ctx->plane_state->flip_int_enabled
+ && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int)
+ pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp);
+}
+
+/* disable HW used by plane.
+ * note: cannot disable until disconnect is complete
+ */
+void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+ struct hubp *hubp = pipe_ctx->plane_res.hubp;
+ struct dpp *dpp = pipe_ctx->plane_res.dpp;
+ struct dccg *dccg = dc->res_pool->dccg;
+
+
+ dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx);
+
+ /* In flip immediate with pipe splitting case GSL is used for
+ * synchronization so we must disable it when the plane is disabled.
+ */
+ if (pipe_ctx->stream_res.gsl_group != 0)
+ dcn20_setup_gsl_group_as_lock(dc, pipe_ctx, false);
+/*
+ if (hubp->funcs->hubp_update_mall_sel)
+ hubp->funcs->hubp_update_mall_sel(hubp, 0, false);
+*/
+ dc->hwss.set_flip_control_gsl(pipe_ctx, false);
+
+ hubp->funcs->hubp_clk_cntl(hubp, false);
+
+ dpp->funcs->dpp_dppclk_control(dpp, false, false);
+ dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, false);
+
+ hubp->power_gated = true;
+
+ hubp->funcs->hubp_reset(hubp);
+ dpp->funcs->dpp_reset(dpp);
+
+ pipe_ctx->stream = NULL;
+ memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res));
+ memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res));
+ pipe_ctx->top_pipe = NULL;
+ pipe_ctx->bottom_pipe = NULL;
+ pipe_ctx->plane_state = NULL;
+ //DC_LOG_DEBUG("%s: dpp_inst(%d)=\n", __func__, dpp->inst);
+
+}
+
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+ bool is_phantom = dc_state_get_pipe_subvp_type(state, pipe_ctx) == SUBVP_PHANTOM;
+ struct timing_generator *tg = is_phantom ? pipe_ctx->stream_res.tg : NULL;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated)
+ return;
+
+ if (hws->funcs.plane_atomic_disable)
+ hws->funcs.plane_atomic_disable(dc, pipe_ctx);
+
+ /* Turn back off the phantom OTG after the phantom plane is fully disabled
+ */
+ if (is_phantom)
+ if (tg && tg->funcs->disable_phantom_crtc)
+ tg->funcs->disable_phantom_crtc(tg);
+
+ DC_LOG_DC("Power down front end %d\n",
+ pipe_ctx->pipe_idx);
+}
+
+void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state)
+{
+ bool hpo_frl_stream_enc_acquired = false;
+ bool hpo_dp_stream_enc_acquired = false;
+ int i = 0, j = 0;
+ int edp_num = 0;
+ struct dc_link *edp_links[MAX_NUM_EDP] = { NULL };
+
+ memset(update_state, 0, sizeof(struct pg_block_update));
+
+ for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++) {
+ if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i] &&
+ dc->res_pool->hpo_dp_stream_enc[i]) {
+ hpo_dp_stream_enc_acquired = true;
+ break;
+ }
+ }
+
+ if (!hpo_frl_stream_enc_acquired && !hpo_dp_stream_enc_acquired)
+ update_state->pg_res_update[PG_HPO] = true;
+
+ update_state->pg_res_update[PG_DWB] = true;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++)
+ update_state->pg_pipe_res_update[j][i] = true;
+
+ if (!pipe_ctx)
+ continue;
+
+ if (pipe_ctx->plane_res.hubp)
+ update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->plane_res.hubp->inst] = false;
+
+ if (pipe_ctx->plane_res.dpp && pipe_ctx->plane_res.hubp)
+ update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->plane_res.hubp->inst] = false;
+
+ if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp)
+ update_state->pg_pipe_res_update[PG_MPCC][pipe_ctx->plane_res.mpcc_inst] = false;
+
+ if (pipe_ctx->stream_res.dsc) {
+ update_state->pg_pipe_res_update[PG_DSC][pipe_ctx->stream_res.dsc->inst] = false;
+ if (dc->caps.sequential_ono) {
+ update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false;
+ update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false;
+
+ /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */
+ if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp &&
+ pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) {
+ for (j = 0; j < dc->res_pool->pipe_count; ++j) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = false;
+ update_state->pg_pipe_res_update[PG_DPP][j] = false;
+ }
+ }
+ }
+ }
+
+ if (pipe_ctx->stream_res.opp)
+ update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false;
+
+ if (pipe_ctx->stream_res.hpo_dp_stream_enc)
+ update_state->pg_pipe_res_update[PG_DPSTREAM][pipe_ctx->stream_res.hpo_dp_stream_enc->inst] = false;
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ update_state->pg_pipe_res_update[PG_PHYSYMCLK][dc->links[i]->link_enc_hw_inst] = true;
+ if (dc->links[i]->type != dc_connection_none)
+ update_state->pg_pipe_res_update[PG_PHYSYMCLK][dc->links[i]->link_enc_hw_inst] = false;
+ }
+
+ /*domain24 controls all the otg, mpc, opp, as long as one otg is still up, avoid enabling OTG PG*/
+ for (i = 0; i < dc->res_pool->timing_generator_count; i++) {
+ struct timing_generator *tg = dc->res_pool->timing_generators[i];
+ if (tg && tg->funcs->is_tg_enabled(tg)) {
+ update_state->pg_pipe_res_update[PG_OPTC][i] = false;
+ break;
+ }
+ }
+
+ dc_get_edp_links(dc, edp_links, &edp_num);
+ if (edp_num == 0 ||
+ ((!edp_links[0] || !edp_links[0]->edp_sink_present) &&
+ (!edp_links[1] || !edp_links[1]->edp_sink_present))) {
+ /*eDP not exist on this config, keep Domain24 power on, for S0i3, this will be handled in dmubfw*/
+ update_state->pg_pipe_res_update[PG_OPTC][0] = false;
+ }
+
+ if (dc->caps.sequential_ono) {
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (!update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ !update_state->pg_pipe_res_update[PG_DPP][i]) {
+ for (j = i - 1; j >= 0; j--) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = false;
+ update_state->pg_pipe_res_update[PG_DPP][j] = false;
+ }
+
+ break;
+ }
+ }
+ }
+}
+
+void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state)
+{
+ bool hpo_frl_stream_enc_acquired = false;
+ bool hpo_dp_stream_enc_acquired = false;
+ int i = 0, j = 0;
+
+ memset(update_state, 0, sizeof(struct pg_block_update));
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *cur_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (cur_pipe == NULL || new_pipe == NULL)
+ continue;
+
+ if ((!cur_pipe->plane_state && new_pipe->plane_state) ||
+ (!cur_pipe->stream && new_pipe->stream) ||
+ (cur_pipe->stream != new_pipe->stream && new_pipe->stream)) {
+ // New pipe addition
+ for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++) {
+ if (j == PG_HUBP && new_pipe->plane_res.hubp)
+ update_state->pg_pipe_res_update[j][new_pipe->plane_res.hubp->inst] = true;
+
+ if (j == PG_DPP && new_pipe->plane_res.dpp)
+ update_state->pg_pipe_res_update[j][new_pipe->plane_res.dpp->inst] = true;
+
+ if (j == PG_MPCC && new_pipe->plane_res.dpp)
+ update_state->pg_pipe_res_update[j][new_pipe->plane_res.mpcc_inst] = true;
+
+ if (j == PG_DSC && new_pipe->stream_res.dsc)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.dsc->inst] = true;
+
+ if (j == PG_OPP && new_pipe->stream_res.opp)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.opp->inst] = true;
+
+ if (j == PG_OPTC && new_pipe->stream_res.tg)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.tg->inst] = true;
+
+ if (j == PG_DPSTREAM && new_pipe->stream_res.hpo_dp_stream_enc)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.hpo_dp_stream_enc->inst] = true;
+ }
+ } else if (cur_pipe->plane_state == new_pipe->plane_state ||
+ cur_pipe == new_pipe) {
+ //unchanged pipes
+ for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++) {
+ if (j == PG_HUBP &&
+ cur_pipe->plane_res.hubp != new_pipe->plane_res.hubp &&
+ new_pipe->plane_res.hubp)
+ update_state->pg_pipe_res_update[j][new_pipe->plane_res.hubp->inst] = true;
+
+ if (j == PG_DPP &&
+ cur_pipe->plane_res.dpp != new_pipe->plane_res.dpp &&
+ new_pipe->plane_res.dpp)
+ update_state->pg_pipe_res_update[j][new_pipe->plane_res.dpp->inst] = true;
+
+ if (j == PG_OPP &&
+ cur_pipe->stream_res.opp != new_pipe->stream_res.opp &&
+ new_pipe->stream_res.opp)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.opp->inst] = true;
+
+ if (j == PG_DSC &&
+ cur_pipe->stream_res.dsc != new_pipe->stream_res.dsc &&
+ new_pipe->stream_res.dsc)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.dsc->inst] = true;
+
+ if (j == PG_OPTC &&
+ cur_pipe->stream_res.tg != new_pipe->stream_res.tg &&
+ new_pipe->stream_res.tg)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.tg->inst] = true;
+
+ if (j == PG_DPSTREAM &&
+ cur_pipe->stream_res.hpo_dp_stream_enc != new_pipe->stream_res.hpo_dp_stream_enc &&
+ new_pipe->stream_res.hpo_dp_stream_enc)
+ update_state->pg_pipe_res_update[j][new_pipe->stream_res.hpo_dp_stream_enc->inst] = true;
+ }
+ }
+ }
+
+ for (i = 0; i < dc->link_count; i++)
+ if (dc->links[i]->type != dc_connection_none)
+ update_state->pg_pipe_res_update[PG_PHYSYMCLK][dc->links[i]->link_enc_hw_inst] = true;
+
+ for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++) {
+ if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i] &&
+ dc->res_pool->hpo_dp_stream_enc[i]) {
+ hpo_dp_stream_enc_acquired = true;
+ break;
+ }
+ }
+
+ if (hpo_frl_stream_enc_acquired || hpo_dp_stream_enc_acquired)
+ update_state->pg_res_update[PG_HPO] = true;
+
+ if (hpo_frl_stream_enc_acquired)
+ update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true;
+
+ if (dc->caps.sequential_ono) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (new_pipe->stream_res.dsc && !new_pipe->top_pipe &&
+ update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) {
+ update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true;
+ update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true;
+
+ /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */
+ if (new_pipe->plane_res.hubp &&
+ new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) {
+ for (j = 0; j < dc->res_pool->pipe_count; ++j) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = true;
+ update_state->pg_pipe_res_update[PG_DPP][j] = true;
+ }
+ }
+ }
+ }
+
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ for (j = i - 1; j >= 0; j--) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = true;
+ update_state->pg_pipe_res_update[PG_DPP][j] = true;
+ }
+
+ break;
+ }
+ }
+ }
+}
+
+/**
+ * dcn35_hw_block_power_down() - power down sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power down:
+ *
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed
+ *
+ * If sequential ONO is specified the order is modified from ONO Region 11 -> ONO Region 0 descending.
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn35_hw_block_power_down(struct dc *dc,
+ struct pg_block_update *update_state)
+{
+ int i = 0;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+
+ if (!pg_cntl)
+ return;
+ if (dc->debug.ignore_pg)
+ return;
+
+ if (update_state->pg_res_update[PG_HPO]) {
+ if (pg_cntl->funcs->hpo_pg_control)
+ pg_cntl->funcs->hpo_pg_control(pg_cntl, false);
+ }
+
+ if (!dc->caps.sequential_ono) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ if (pg_cntl->funcs->hubp_dpp_pg_control)
+ pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false);
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
+ if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+ if (pg_cntl->funcs->dsc_pg_control)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false);
+ }
+ }
+ } else {
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+ if (pg_cntl->funcs->dsc_pg_control)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false);
+ }
+
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ if (pg_cntl->funcs->hubp_dpp_pg_control)
+ pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false);
+ }
+ }
+ }
+
+ /*this will need all the clients to unregister optc interruts let dmubfw handle this*/
+ if (pg_cntl->funcs->plane_otg_pg_control)
+ pg_cntl->funcs->plane_otg_pg_control(pg_cntl, false);
+
+ //domain22, 23, 25 currently always on.
+
+}
+
+/**
+ * dcn35_hw_block_power_up() - power up sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power up:
+ *
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ *
+ * If sequential ONO is specified the order is modified from ONO Region 0 -> ONO Region 11 ascending.
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn35_hw_block_power_up(struct dc *dc,
+ struct pg_block_update *update_state)
+{
+ int i = 0;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+
+ if (!pg_cntl)
+ return;
+ if (dc->debug.ignore_pg)
+ return;
+ //domain22, 23, 25 currently always on.
+ /*this will need all the clients to unregister optc interruts let dmubfw handle this*/
+ if (pg_cntl->funcs->plane_otg_pg_control)
+ pg_cntl->funcs->plane_otg_pg_control(pg_cntl, true);
+
+ if (!dc->caps.sequential_ono) {
+ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++)
+ if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+ if (pg_cntl->funcs->dsc_pg_control)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true);
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ if (pg_cntl->funcs->hubp_dpp_pg_control)
+ pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, true);
+ }
+
+ if (dc->caps.sequential_ono) {
+ if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+ if (pg_cntl->funcs->dsc_pg_control)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true);
+ }
+ }
+ }
+ if (update_state->pg_res_update[PG_HPO]) {
+ if (pg_cntl->funcs->hpo_pg_control)
+ pg_cntl->funcs->hpo_pg_control(pg_cntl, true);
+ }
+}
+void dcn35_root_clock_control(struct dc *dc,
+ struct pg_block_update *update_state, bool power_on)
+{
+ int i = 0;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+
+ if (!pg_cntl)
+ return;
+ /*enable root clock first when power up*/
+ if (power_on) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ if (dc->hwseq->funcs.dpp_root_clock_control)
+ dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+ }
+ if (update_state->pg_pipe_res_update[PG_DPSTREAM][i])
+ if (dc->hwseq->funcs.dpstream_root_clock_control)
+ dc->hwseq->funcs.dpstream_root_clock_control(dc->hwseq, i, power_on);
+ }
+
+ for (i = 0; i < dc->res_pool->dig_link_enc_count; i++)
+ if (update_state->pg_pipe_res_update[PG_PHYSYMCLK][i])
+ if (dc->hwseq->funcs.physymclk_root_clock_control)
+ dc->hwseq->funcs.physymclk_root_clock_control(dc->hwseq, i, power_on);
+
+ }
+ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
+ if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+ if (power_on) {
+ if (dc->res_pool->dccg->funcs->enable_dsc)
+ dc->res_pool->dccg->funcs->enable_dsc(dc->res_pool->dccg, i);
+ } else {
+ if (dc->res_pool->dccg->funcs->disable_dsc)
+ dc->res_pool->dccg->funcs->disable_dsc(dc->res_pool->dccg, i);
+ }
+ }
+ }
+ /*disable root clock first when power down*/
+ if (!power_on) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ if (dc->hwseq->funcs.dpp_root_clock_control)
+ dc->hwseq->funcs.dpp_root_clock_control(dc->hwseq, i, power_on);
+ }
+ if (update_state->pg_pipe_res_update[PG_DPSTREAM][i])
+ if (dc->hwseq->funcs.dpstream_root_clock_control)
+ dc->hwseq->funcs.dpstream_root_clock_control(dc->hwseq, i, power_on);
+ }
+
+ for (i = 0; i < dc->res_pool->dig_link_enc_count; i++)
+ if (update_state->pg_pipe_res_update[PG_PHYSYMCLK][i])
+ if (dc->hwseq->funcs.physymclk_root_clock_control)
+ dc->hwseq->funcs.physymclk_root_clock_control(dc->hwseq, i, power_on);
+
+ }
+}
+
+void dcn35_prepare_bandwidth(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ struct pg_block_update pg_update_state;
+
+ if (dc->hwss.calc_blocks_to_ungate) {
+ dc->hwss.calc_blocks_to_ungate(dc, context, &pg_update_state);
+
+ if (dc->hwss.root_clock_control)
+ dc->hwss.root_clock_control(dc, &pg_update_state, true);
+ /*power up required HW block*/
+ if (dc->hwss.hw_block_power_up)
+ dc->hwss.hw_block_power_up(dc, &pg_update_state);
+ }
+
+ dcn20_prepare_bandwidth(dc, context);
+
+ print_pg_status(dc, __func__, ": after rcg and power up");
+}
+
+void dcn35_optimize_bandwidth(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ struct pg_block_update pg_update_state;
+
+ print_pg_status(dc, __func__, ": before rcg and power up");
+
+ dcn20_optimize_bandwidth(dc, context);
+
+ if (dc->hwss.calc_blocks_to_gate) {
+ dc->hwss.calc_blocks_to_gate(dc, context, &pg_update_state);
+ /*try to power down unused block*/
+ if (dc->hwss.hw_block_power_down)
+ dc->hwss.hw_block_power_down(dc, &pg_update_state);
+
+ if (dc->hwss.root_clock_control)
+ dc->hwss.root_clock_control(dc, &pg_update_state, false);
+ }
+
+ print_pg_status(dc, __func__, ": after rcg and power up");
+}
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+ int num_pipes, struct dc_crtc_timing_adjust adjust)
+{
+ int i = 0;
+ struct drr_params params = {0};
+ // DRR set trigger event mapped to OTG_TRIG_A
+ unsigned int event_triggers = 0x2;//Bit[1]: OTG_TRIG_A
+ // Note DRR trigger events are generated regardless of whether num frames met.
+ unsigned int num_frames = 2;
+
+ params.vertical_total_max = adjust.v_total_max;
+ params.vertical_total_min = adjust.v_total_min;
+ params.vertical_total_mid = adjust.v_total_mid;
+ params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
+
+ for (i = 0; i < num_pipes; i++) {
+ /* dc_state_destruct() might null the stream resources, so fetch tg
+ * here first to avoid a race condition. The lifetime of the pointee
+ * itself (the timing_generator object) is not a problem here.
+ */
+ struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
+
+ if ((tg != NULL) && tg->funcs) {
+ if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) {
+ struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
+ struct dc *dc = pipe_ctx[i]->stream->ctx->dc;
+ unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total);
+
+ if (frame_rate >= 120 && dc->caps.ips_support &&
+ dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
+ /*ips enable case*/
+ num_frames = 2 * (frame_rate % 60);
+ }
+ }
+ set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, &params);
+ if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+ if (tg->funcs->set_static_screen_control)
+ tg->funcs->set_static_screen_control(
+ tg, event_triggers, num_frames);
+ }
+ }
+}
+void dcn35_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params)
+{
+ unsigned int i;
+ unsigned int triggers = 0;
+
+ if (params->triggers.surface_update)
+ triggers |= 0x200;/*bit 9 : 10 0000 0000*/
+ if (params->triggers.cursor_update)
+ triggers |= 0x8;/*bit3*/
+ if (params->triggers.force_trigger)
+ triggers |= 0x1;
+ for (i = 0; i < num_pipes; i++)
+ pipe_ctx[i]->stream_res.tg->funcs->
+ set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+ triggers, params->num_frames);
+}
+
+void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx,
+ int num_pipes, uint32_t v_total_min, uint32_t v_total_max)
+{
+ int i = 0;
+ struct long_vtotal_params params = {0};
+
+ params.vertical_total_max = v_total_max;
+ params.vertical_total_min = v_total_min;
+
+ for (i = 0; i < num_pipes; i++) {
+ if (!pipe_ctx[i])
+ continue;
+
+ if (pipe_ctx[i]->stream) {
+ struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
+
+ if (timing)
+ params.vertical_blank_start = timing->v_total - timing->v_front_porch;
+ else
+ params.vertical_blank_start = 0;
+
+ if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs &&
+ pipe_ctx[i]->stream_res.tg->funcs->set_long_vtotal)
+ pipe_ctx[i]->stream_res.tg->funcs->set_long_vtotal(pipe_ctx[i]->stream_res.tg, &params);
+ }
+ }
+}
+
+static bool should_avoid_empty_tu(struct pipe_ctx *pipe_ctx)
+{
+ /* Calculate average pixel count per TU, return false if under ~2.00 to
+ * avoid empty TUs. This is only required for DPIA tunneling as empty TUs
+ * are legal to generate for native DP links. Assume TU size 64 as there
+ * is currently no scenario where it's reprogrammed from HW default.
+ * MTPs have no such limitation, so this does not affect MST use cases.
+ */
+ unsigned int pix_clk_mhz;
+ unsigned int symclk_mhz;
+ unsigned int avg_pix_per_tu_x1000;
+ unsigned int tu_size_bytes = 64;
+ struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
+ struct dc_link_settings *link_settings = &pipe_ctx->link_config.dp_link_settings;
+ const struct dc *dc = pipe_ctx->stream->link->dc;
+
+ if (pipe_ctx->link_config.dp_tunnel_settings.should_enable_dp_tunneling == false)
+ return false;
+
+ // Not necessary for MST configurations
+ if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
+ return false;
+
+ pix_clk_mhz = timing->pix_clk_100hz / 10000;
+
+ // If this is true, can't block due to dynamic ODM
+ if (pix_clk_mhz > dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz)
+ return false;
+
+ switch (link_settings->link_rate) {
+ case LINK_RATE_LOW:
+ symclk_mhz = 162;
+ break;
+ case LINK_RATE_HIGH:
+ symclk_mhz = 270;
+ break;
+ case LINK_RATE_HIGH2:
+ symclk_mhz = 540;
+ break;
+ case LINK_RATE_HIGH3:
+ symclk_mhz = 810;
+ break;
+ default:
+ // We shouldn't be tunneling any other rates, something is wrong
+ ASSERT(0);
+ return false;
+ }
+
+ avg_pix_per_tu_x1000 = (1000 * pix_clk_mhz * tu_size_bytes)
+ / (symclk_mhz * link_settings->lane_count);
+
+ // Add small empirically-decided margin to account for potential jitter
+ return (avg_pix_per_tu_x1000 < 2020);
+}
+
+bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
+{
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+
+ if (!is_h_timing_divisible_by_2(pipe_ctx->stream))
+ return false;
+
+ if (should_avoid_empty_tu(pipe_ctx))
+ return false;
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal) && !dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) &&
+ dc->debug.enable_dp_dig_pixel_rate_div_policy)
+ return true;
+
+ return false;
+}
+
+/*
+ * Set powerup to true for every pipe to match pre-OS configuration.
+ */
+static void dcn35_calc_blocks_to_ungate_for_hw_release(struct dc *dc, struct pg_block_update *update_state)
+{
+ int i = 0, j = 0;
+
+ memset(update_state, 0, sizeof(struct pg_block_update));
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++)
+ update_state->pg_pipe_res_update[j][i] = true;
+
+ update_state->pg_res_update[PG_HPO] = true;
+ update_state->pg_res_update[PG_DWB] = true;
+}
+
+/*
+ * The purpose is to power up all gatings to restore optimization to pre-OS env.
+ * Re-use hwss func and existing PG&RCG flags to decide powerup sequence.
+ */
+void dcn35_hardware_release(struct dc *dc)
+{
+ struct pg_block_update pg_update_state;
+
+ dcn35_calc_blocks_to_ungate_for_hw_release(dc, &pg_update_state);
+
+ if (dc->hwss.root_clock_control)
+ dc->hwss.root_clock_control(dc, &pg_update_state, true);
+ /*power up required HW block*/
+ if (dc->hwss.hw_block_power_up)
+ dc->hwss.hw_block_power_up(dc, &pg_update_state);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
new file mode 100644
index 000000000000..0b1d6f608edd
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HWSS_DCN35_H__
+#define __DC_HWSS_DCN35_H__
+
+#include "hw_sequencer_private.h"
+
+struct dc;
+
+void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx);
+
+void dcn35_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool power_on);
+
+void dcn35_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on);
+
+void dcn35_dpstream_root_clock_control(struct dce_hwseq *hws, unsigned int dp_hpo_inst, bool clock_on);
+
+void dcn35_physymclk_root_clock_control(struct dce_hwseq *hws, unsigned int phy_inst, bool clock_on);
+
+void dcn35_enable_power_gating_plane(struct dce_hwseq *hws, bool enable);
+
+void dcn35_set_dmu_fgcg(struct dce_hwseq *hws, bool enable);
+
+void dcn35_init_hw(struct dc *dc);
+
+void dcn35_disable_link_output(struct dc_link *link,
+ const struct link_resource *link_res,
+ enum signal_type signal);
+
+void dcn35_power_down_on_boot(struct dc *dc);
+
+bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable);
+
+void dcn35_z10_restore(const struct dc *dc);
+
+void dcn35_init_pipes(struct dc *dc, struct dc_state *context);
+void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx);
+void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
+
+void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state);
+void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state);
+void dcn35_hw_block_power_up(struct dc *dc,
+ struct pg_block_update *update_state);
+void dcn35_hw_block_power_down(struct dc *dc,
+ struct pg_block_update *update_state);
+void dcn35_root_clock_control(struct dc *dc,
+ struct pg_block_update *update_state, bool power_on);
+
+void dcn35_prepare_bandwidth(
+ struct dc *dc,
+ struct dc_state *context);
+
+void dcn35_optimize_bandwidth(
+ struct dc *dc,
+ struct dc_state *context);
+
+void dcn35_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable);
+void dcn35_dsc_pg_control(
+ struct dce_hwseq *hws,
+ unsigned int dsc_inst,
+ bool power_on);
+
+void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
+ int num_pipes, struct dc_crtc_timing_adjust adjust);
+
+void dcn35_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params);
+
+void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx,
+ int num_pipes, uint32_t v_total_min, uint32_t v_total_max);
+
+bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+
+void dcn35_hardware_release(struct dc *dc);
+
+#endif /* __DC_HWSS_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
new file mode 100644
index 000000000000..f2f16a0bdb4f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dcn301/dcn301_hwseq.h"
+#include "dcn31/dcn31_hwseq.h"
+#include "dcn314/dcn314_hwseq.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn35/dcn35_hwseq.h"
+
+#include "dcn35_init.h"
+
+static const struct hw_sequencer_funcs dcn35_funcs = {
+ .program_gamut_remap = dcn30_program_gamut_remap,
+ .init_hw = dcn35_init_hw,
+ .power_down_on_boot = dcn35_power_down_on_boot,
+ .apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+ .apply_ctx_for_surface = NULL,
+ .program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
+ .wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
+ .post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
+ .update_plane_addr = dcn20_update_plane_addr,
+ .update_dchub = dcn10_update_dchub,
+ .update_pending_status = dcn10_update_pending_status,
+ .program_output_csc = dcn20_program_output_csc,
+ .enable_accelerated_mode = dce110_enable_accelerated_mode,
+ .enable_timing_synchronization = dcn10_enable_timing_synchronization,
+ .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+ .update_info_frame = dcn31_update_info_frame,
+ .send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+ .enable_stream = dcn20_enable_stream,
+ .disable_stream = dce110_disable_stream,
+ .unblank_stream = dcn32_unblank_stream,
+ .blank_stream = dce110_blank_stream,
+ .enable_audio_stream = dce110_enable_audio_stream,
+ .disable_audio_stream = dce110_disable_audio_stream,
+ .disable_plane = dcn35_disable_plane,
+ .disable_pixel_data = dcn20_disable_pixel_data,
+ .pipe_control_lock = dcn20_pipe_control_lock,
+ .interdependent_update_lock = dcn10_lock_all_pipes,
+ .cursor_lock = dcn10_cursor_lock,
+ .prepare_bandwidth = dcn35_prepare_bandwidth,
+ .optimize_bandwidth = dcn35_optimize_bandwidth,
+ .update_bandwidth = dcn20_update_bandwidth,
+ .set_drr = dcn35_set_drr,
+ .get_position = dcn10_get_position,
+ .set_static_screen_control = dcn35_set_static_screen_control,
+ .setup_stereo = dcn10_setup_stereo,
+ .set_avmute = dcn30_set_avmute,
+ .log_hw_state = dcn10_log_hw_state,
+ .get_hw_state = dcn10_get_hw_state,
+ .clear_status_bits = dcn10_clear_status_bits,
+ .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+ .edp_backlight_control = dce110_edp_backlight_control,
+ .edp_power_control = dce110_edp_power_control,
+ .edp_wait_for_T12 = dce110_edp_wait_for_T12,
+ .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+ .set_cursor_position = dcn10_set_cursor_position,
+ .set_cursor_attribute = dcn10_set_cursor_attribute,
+ .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+ .setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+ .set_clock = dcn10_set_clock,
+ .get_clock = dcn10_get_clock,
+ .program_triplebuffer = dcn20_program_triple_buffer,
+ .enable_writeback = dcn30_enable_writeback,
+ .disable_writeback = dcn30_disable_writeback,
+ .update_writeback = dcn30_update_writeback,
+ .dmdata_status_done = dcn20_dmdata_status_done,
+ .program_dmdata_engine = dcn30_program_dmdata_engine,
+ .set_dmdata_attributes = dcn20_set_dmdata_attributes,
+ .init_sys_ctx = dcn31_init_sys_ctx,
+ .init_vm_ctx = dcn20_init_vm_ctx,
+ .set_flip_control_gsl = dcn20_set_flip_control_gsl,
+ .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+ .calc_vupdate_position = dcn10_calc_vupdate_position,
+ .set_backlight_level = dcn31_set_backlight_level,
+ .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
+ .set_pipe = dcn21_set_pipe,
+ .enable_lvds_link_output = dce110_enable_lvds_link_output,
+ .enable_tmds_link_output = dce110_enable_tmds_link_output,
+ .enable_dp_link_output = dce110_enable_dp_link_output,
+ .disable_link_output = dcn32_disable_link_output,
+ .z10_restore = dcn35_z10_restore,
+ .z10_save_init = dcn31_z10_save_init,
+ .set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
+ .optimize_pwr_state = dcn21_optimize_pwr_state,
+ .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+ .update_visual_confirm_color = dcn10_update_visual_confirm_color,
+ .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations,
+ .calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
+ .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
+ .hw_block_power_up = dcn35_hw_block_power_up,
+ .hw_block_power_down = dcn35_hw_block_power_down,
+ .root_clock_control = dcn35_root_clock_control,
+ .set_long_vtotal = dcn35_set_long_vblank,
+ .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider,
+ .hardware_release = dcn35_hardware_release,
+ .detect_pipe_changes = dcn20_detect_pipe_changes,
+ .enable_plane = dcn20_enable_plane,
+ .update_dchubp_dpp = dcn20_update_dchubp_dpp,
+ .post_unlock_reset_opp = dcn20_post_unlock_reset_opp,
+ .get_underflow_debug_data = dcn30_get_underflow_debug_data,
+};
+
+static const struct hwseq_private_funcs dcn35_private_funcs = {
+ .init_pipes = dcn35_init_pipes,
+ .plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+ .update_mpcc = dcn20_update_mpcc,
+ .set_input_transfer_func = dcn32_set_input_transfer_func,
+ .set_output_transfer_func = dcn32_set_output_transfer_func,
+ .power_down = dce110_power_down,
+ .enable_display_power_gating = dcn10_dummy_display_power_gating,
+ .blank_pixel_data = dcn20_blank_pixel_data,
+ .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap,
+ .enable_stream_timing = dcn20_enable_stream_timing,
+ .edp_backlight_control = dce110_edp_backlight_control,
+ .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+ .did_underflow_occur = dcn10_did_underflow_occur,
+ .init_blank = dcn20_init_blank,
+ .disable_vga = NULL,
+ .bios_golden_init = dcn10_bios_golden_init,
+ .plane_atomic_disable = dcn35_plane_atomic_disable,
+ //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/
+ //.hubp_pg_control = dcn35_hubp_pg_control,
+ .dpp_root_clock_control = dcn35_dpp_root_clock_control,
+ .dpstream_root_clock_control = dcn35_dpstream_root_clock_control,
+ .physymclk_root_clock_control = dcn35_physymclk_root_clock_control,
+ .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
+ .update_odm = dcn35_update_odm,
+ .set_hdr_multiplier = dcn10_set_hdr_multiplier,
+ .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+ .wait_for_blank_complete = dcn20_wait_for_blank_complete,
+ .dccg_init = dcn20_dccg_init,
+ .set_mcm_luts = dcn32_set_mcm_luts,
+ .setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
+ .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
+ .resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio,
+ .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy,
+ .dsc_pg_status = dcn32_dsc_pg_status,
+ .enable_plane = dcn35_enable_plane,
+ .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed,
+ .set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe,
+};
+
+void dcn35_hw_sequencer_construct(struct dc *dc)
+{
+ dc->hwss = dcn35_funcs;
+ dc->hwseq->funcs = dcn35_private_funcs;
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
new file mode 100644
index 000000000000..b67015032c35
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN35_INIT_H__
+#define __DC_DCN35_INIT_H__
+
+struct dc;
+
+void dcn35_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN35_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
index 59381d24800b..a4b3c1e99ec6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright © 2021 Advanced Micro Devices, Inc.
+# Copyright (c) 2022-2024 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -19,12 +19,10 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
-# Authors: AMD
-#
-# Makefile for dcn315.
+# Makefile for DCN351.
-DCN315 = dcn315_resource.o
+DCN351 = dcn351_hwseq.o dcn351_init.o
-AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))
+AMD_DAL_DCN351 = $(addprefix $(AMDDALPATH)/dc/dcn351/,$(DCN351))
-AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
+AMD_DISPLAY_FILES += $(AMD_DAL_DCN351)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.c
new file mode 100644
index 000000000000..93fe5b262a3d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.c
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "resource.h"
+#include "dcn351_hwseq.h"
+#include "dcn35/dcn35_hwseq.h"
+
+#define DC_LOGGER_INIT(logger) \
+ struct dal_logger *dc_logger = logger
+
+#define DC_LOGGER \
+ dc_logger
+
+void dcn351_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state)
+{
+ int i, j;
+
+ dcn35_calc_blocks_to_gate(dc, context, update_state);
+
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (!update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ !update_state->pg_pipe_res_update[PG_DPP][i]) {
+ for (j = i - 1; j >= 0; j--) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = false;
+ update_state->pg_pipe_res_update[PG_DPP][j] = false;
+ }
+
+ break;
+ }
+ }
+}
+
+void dcn351_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state)
+{
+ int i, j;
+
+ dcn35_calc_blocks_to_ungate(dc, context, update_state);
+
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ for (j = i - 1; j >= 0; j--) {
+ update_state->pg_pipe_res_update[PG_HUBP][j] = true;
+ update_state->pg_pipe_res_update[PG_DPP][j] = true;
+ }
+
+ break;
+ }
+ }
+}
+
+/**
+ * dcn351_hw_block_power_down() - power down sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power down:
+ *
+ * ONO Region 11, DCPG 19: dsc3
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 3, DCPG 25: hpo - SKIPPED. Should be kept on
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will pwr dwn at IPS2 entry
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED. will be pwr dwn after lono timer is armed
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn351_hw_block_power_down(struct dc *dc,
+ struct pg_block_update *update_state)
+{
+ int i = 0;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+
+ if (!pg_cntl || dc->debug.ignore_pg)
+ return;
+
+ for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
+ if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+ if (pg_cntl->funcs->dsc_pg_control)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, i, false);
+ }
+
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ if (pg_cntl->funcs->hubp_dpp_pg_control)
+ pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, false);
+ }
+ }
+
+ // domain25 currently always on.
+
+ /* this will need all the clients to unregister optc interrupts, let dmubfw handle this */
+ if (pg_cntl->funcs->plane_otg_pg_control)
+ pg_cntl->funcs->plane_otg_pg_control(pg_cntl, false);
+
+ // domain23 currently always on.
+ // domain22 currently always on.
+}
+
+/**
+ * dcn351_hw_block_power_up() - power up sequence
+ *
+ * The following sequence describes the ON-OFF (ONO) for power up:
+ *
+ * ONO Region 0, DCPG 22: dccg dio dcio - SKIPPED
+ * ONO Region 1, DCPG 23: dchubbub dchvm dchubbubmem - SKIPPED. PMFW will power up at IPS2 exit
+ * ONO Region 2, DCPG 24: mpc opp optc dwb
+ * ONO Region 3, DCPG 25: hpo - SKIPPED
+ * ONO Region 4, DCPG 0: dchubp0, dpp0
+ * ONO Region 5, DCPG 16: dsc0
+ * ONO Region 6, DCPG 1: dchubp1, dpp1
+ * ONO Region 7, DCPG 17: dsc1
+ * ONO Region 8, DCPG 2: dchubp2, dpp2
+ * ONO Region 9, DCPG 18: dsc2
+ * ONO Region 10, DCPG 3: dchubp3, dpp3
+ * ONO Region 11, DCPG 19: dsc3
+ *
+ * @dc: Current DC state
+ * @update_state: update PG sequence states for HW block
+ */
+void dcn351_hw_block_power_up(struct dc *dc,
+ struct pg_block_update *update_state)
+{
+ int i = 0;
+ struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
+
+ if (!pg_cntl || dc->debug.ignore_pg)
+ return;
+
+ // domain22 currently always on.
+ // domain23 currently always on.
+
+ /* this will need all the clients to unregister optc interrupts, let dmubfw handle this */
+ if (pg_cntl->funcs->plane_otg_pg_control)
+ pg_cntl->funcs->plane_otg_pg_control(pg_cntl, true);
+
+ // domain25 currently always on.
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (update_state->pg_pipe_res_update[PG_HUBP][i] &&
+ update_state->pg_pipe_res_update[PG_DPP][i]) {
+ if (pg_cntl->funcs->hubp_dpp_pg_control)
+ pg_cntl->funcs->hubp_dpp_pg_control(pg_cntl, i, true);
+ }
+
+ if (update_state->pg_pipe_res_update[PG_DSC][i]) {
+ if (pg_cntl->funcs->dsc_pg_control)
+ pg_cntl->funcs->dsc_pg_control(pg_cntl, i, true);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.h
new file mode 100644
index 000000000000..6d8f3bfb668e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_hwseq.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_HWSS_DCN351_H__
+#define __DC_HWSS_DCN351_H__
+
+#include "hw_sequencer_private.h"
+
+void dcn351_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state);
+void dcn351_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state);
+void dcn351_hw_block_power_up(struct dc *dc,
+ struct pg_block_update *update_state);
+void dcn351_hw_block_power_down(struct dc *dc,
+ struct pg_block_update *update_state);
+
+#endif /* __DC_HWSS_DCN351_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
new file mode 100644
index 000000000000..09e60158f0b5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dcn301/dcn301_hwseq.h"
+#include "dcn31/dcn31_hwseq.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn35/dcn35_hwseq.h"
+#include "dcn351/dcn351_hwseq.h"
+
+#include "dcn351_init.h"
+
+static const struct hw_sequencer_funcs dcn351_funcs = {
+ .program_gamut_remap = dcn30_program_gamut_remap,
+ .init_hw = dcn35_init_hw,
+ .power_down_on_boot = dcn35_power_down_on_boot,
+ .apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+ .apply_ctx_for_surface = NULL,
+ .program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
+ .wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
+ .post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
+ .update_plane_addr = dcn20_update_plane_addr,
+ .update_dchub = dcn10_update_dchub,
+ .update_pending_status = dcn10_update_pending_status,
+ .program_output_csc = dcn20_program_output_csc,
+ .enable_accelerated_mode = dce110_enable_accelerated_mode,
+ .enable_timing_synchronization = dcn10_enable_timing_synchronization,
+ .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+ .update_info_frame = dcn31_update_info_frame,
+ .send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+ .enable_stream = dcn20_enable_stream,
+ .disable_stream = dce110_disable_stream,
+ .unblank_stream = dcn32_unblank_stream,
+ .blank_stream = dce110_blank_stream,
+ .enable_audio_stream = dce110_enable_audio_stream,
+ .disable_audio_stream = dce110_disable_audio_stream,
+ .disable_plane = dcn35_disable_plane,
+ .disable_pixel_data = dcn20_disable_pixel_data,
+ .pipe_control_lock = dcn20_pipe_control_lock,
+ .interdependent_update_lock = dcn10_lock_all_pipes,
+ .cursor_lock = dcn10_cursor_lock,
+ .prepare_bandwidth = dcn35_prepare_bandwidth,
+ .optimize_bandwidth = dcn35_optimize_bandwidth,
+ .update_bandwidth = dcn20_update_bandwidth,
+ .set_drr = dcn35_set_drr,
+ .get_position = dcn10_get_position,
+ .set_static_screen_control = dcn35_set_static_screen_control,
+ .setup_stereo = dcn10_setup_stereo,
+ .set_avmute = dcn30_set_avmute,
+ .log_hw_state = dcn10_log_hw_state,
+ .get_hw_state = dcn10_get_hw_state,
+ .clear_status_bits = dcn10_clear_status_bits,
+ .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+ .edp_backlight_control = dce110_edp_backlight_control,
+ .edp_power_control = dce110_edp_power_control,
+ .edp_wait_for_T12 = dce110_edp_wait_for_T12,
+ .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+ .set_cursor_position = dcn10_set_cursor_position,
+ .set_cursor_attribute = dcn10_set_cursor_attribute,
+ .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+ .setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+ .set_clock = dcn10_set_clock,
+ .get_clock = dcn10_get_clock,
+ .program_triplebuffer = dcn20_program_triple_buffer,
+ .enable_writeback = dcn30_enable_writeback,
+ .disable_writeback = dcn30_disable_writeback,
+ .update_writeback = dcn30_update_writeback,
+ .dmdata_status_done = dcn20_dmdata_status_done,
+ .program_dmdata_engine = dcn30_program_dmdata_engine,
+ .set_dmdata_attributes = dcn20_set_dmdata_attributes,
+ .init_sys_ctx = dcn31_init_sys_ctx,
+ .init_vm_ctx = dcn20_init_vm_ctx,
+ .set_flip_control_gsl = dcn20_set_flip_control_gsl,
+ .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+ .calc_vupdate_position = dcn10_calc_vupdate_position,
+ .set_backlight_level = dcn31_set_backlight_level,
+ .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
+ .set_pipe = dcn21_set_pipe,
+ .enable_lvds_link_output = dce110_enable_lvds_link_output,
+ .enable_tmds_link_output = dce110_enable_tmds_link_output,
+ .enable_dp_link_output = dce110_enable_dp_link_output,
+ .disable_link_output = dcn32_disable_link_output,
+ .z10_restore = dcn35_z10_restore,
+ .z10_save_init = dcn31_z10_save_init,
+ .set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
+ .optimize_pwr_state = dcn21_optimize_pwr_state,
+ .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
+ .update_visual_confirm_color = dcn10_update_visual_confirm_color,
+ .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations,
+ .calc_blocks_to_gate = dcn351_calc_blocks_to_gate,
+ .calc_blocks_to_ungate = dcn351_calc_blocks_to_ungate,
+ .hw_block_power_up = dcn351_hw_block_power_up,
+ .hw_block_power_down = dcn351_hw_block_power_down,
+ .root_clock_control = dcn35_root_clock_control,
+ .set_long_vtotal = dcn35_set_long_vblank,
+ .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider,
+ .setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
+ .get_underflow_debug_data = dcn30_get_underflow_debug_data,
+};
+
+static const struct hwseq_private_funcs dcn351_private_funcs = {
+ .init_pipes = dcn35_init_pipes,
+ .plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+ .update_mpcc = dcn20_update_mpcc,
+ .set_input_transfer_func = dcn32_set_input_transfer_func,
+ .set_output_transfer_func = dcn32_set_output_transfer_func,
+ .power_down = dce110_power_down,
+ .enable_display_power_gating = dcn10_dummy_display_power_gating,
+ .blank_pixel_data = dcn20_blank_pixel_data,
+ .reset_hw_ctx_wrap = dcn31_reset_hw_ctx_wrap,
+ .enable_stream_timing = dcn20_enable_stream_timing,
+ .edp_backlight_control = dce110_edp_backlight_control,
+ .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+ .did_underflow_occur = dcn10_did_underflow_occur,
+ .init_blank = dcn20_init_blank,
+ .disable_vga = NULL,
+ .bios_golden_init = dcn10_bios_golden_init,
+ .plane_atomic_disable = dcn35_plane_atomic_disable,
+ //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/
+ //.hubp_pg_control = dcn35_hubp_pg_control,
+ .dpp_root_clock_control = dcn35_dpp_root_clock_control,
+ .dpstream_root_clock_control = dcn35_dpstream_root_clock_control,
+ .physymclk_root_clock_control = dcn35_physymclk_root_clock_control,
+ .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
+ .update_odm = dcn35_update_odm,
+ .set_hdr_multiplier = dcn10_set_hdr_multiplier,
+ .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+ .wait_for_blank_complete = dcn20_wait_for_blank_complete,
+ .dccg_init = dcn20_dccg_init,
+ .set_mcm_luts = dcn32_set_mcm_luts,
+ .setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
+ .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
+ .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy,
+ .dsc_pg_status = dcn32_dsc_pg_status,
+ .enable_plane = dcn35_enable_plane,
+ .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed,
+ .set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe,
+};
+
+void dcn351_hw_sequencer_construct(struct dc *dc)
+{
+ dc->hwss = dcn351_funcs;
+ dc->hwseq->funcs = dcn351_private_funcs;
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
new file mode 100644
index 000000000000..970b01008b23
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_DCN351_INIT_H__
+#define __DC_DCN351_INIT_H__
+
+struct dc;
+
+void dcn351_hw_sequencer_construct(struct dc *dc);
+
+#endif /* __DC_DCN351_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
new file mode 100644
index 000000000000..7c276c319086
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -0,0 +1,2671 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+
+#include "os_types.h"
+#include "dm_services.h"
+#include "basics/dc_common.h"
+#include "dm_helpers.h"
+#include "core_types.h"
+#include "resource.h"
+#include "dccg.h"
+#include "dce/dce_hwseq.h"
+#include "reg_helper.h"
+#include "abm.h"
+#include "hubp.h"
+#include "dchubbub.h"
+#include "timing_generator.h"
+#include "opp.h"
+#include "ipp.h"
+#include "mpc.h"
+#include "mcif_wb.h"
+#include "dc_dmub_srv.h"
+#include "link_hwss.h"
+#include "dpcd_defs.h"
+#include "clk_mgr.h"
+#include "dsc.h"
+#include "link_service.h"
+
+#include "dce/dmub_hw_lock_mgr.h"
+#include "dcn10/dcn10_cm_common.h"
+#include "dcn20/dcn20_optc.h"
+#include "dcn30/dcn30_cm_common.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn401_hwseq.h"
+#include "dcn401/dcn401_resource.h"
+#include "dc_state_priv.h"
+#include "link_enc_cfg.h"
+
+#define DC_LOGGER_INIT(logger)
+
+#define CTX \
+ hws->ctx
+#define REG(reg)\
+ hws->regs->reg
+#define DC_LOGGER \
+ dc->ctx->logger
+
+
+#undef FN
+#define FN(reg_name, field_name) \
+ hws->shifts->field_name, hws->masks->field_name
+
+void dcn401_initialize_min_clocks(struct dc *dc)
+{
+ struct dc_clocks *clocks = &dc->current_state->bw_ctx.bw.dcn.clk;
+
+ clocks->dcfclk_deep_sleep_khz = DCN3_2_DCFCLK_DS_INIT_KHZ;
+ clocks->dcfclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz * 1000;
+ clocks->socclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].socclk_mhz * 1000;
+ clocks->dramclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 1000;
+ clocks->dppclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dppclk_mhz * 1000;
+ if (dc->debug.disable_boot_optimizations) {
+ clocks->dispclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dispclk_mhz * 1000;
+ } else {
+ /* Even though DPG_EN = 1 for the connected display, it still requires the
+ * correct timing so we cannot set DISPCLK to min freq or it could cause
+ * audio corruption. Read current DISPCLK from DENTIST and request the same
+ * freq to ensure that the timing is valid and unchanged.
+ */
+ clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr);
+ }
+ clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
+ clocks->fclk_p_state_change_support = true;
+ clocks->p_state_change_support = true;
+
+ dc->clk_mgr->funcs->update_clocks(
+ dc->clk_mgr,
+ dc->current_state,
+ true);
+}
+
+void dcn401_program_gamut_remap(struct pipe_ctx *pipe_ctx)
+{
+ unsigned int i = 0;
+ struct mpc_grph_gamut_adjustment mpc_adjust;
+ unsigned int mpcc_id = pipe_ctx->plane_res.mpcc_inst;
+ struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
+
+ //For now assert if location is not pre-blend
+ if (pipe_ctx->plane_state)
+ ASSERT(pipe_ctx->plane_state->mcm_location == MPCC_MOVABLE_CM_LOCATION_BEFORE);
+
+ // program MPCC_MCM_FIRST_GAMUT_REMAP
+ memset(&mpc_adjust, 0, sizeof(mpc_adjust));
+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ mpc_adjust.mpcc_gamut_remap_block_id = MPCC_MCM_FIRST_GAMUT_REMAP;
+
+ if (pipe_ctx->plane_state &&
+ pipe_ctx->plane_state->gamut_remap_matrix.enable_remap == true) {
+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+ mpc_adjust.temperature_matrix[i] =
+ pipe_ctx->plane_state->gamut_remap_matrix.matrix[i];
+ }
+
+ mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
+
+ // program MPCC_MCM_SECOND_GAMUT_REMAP for Bypass / Disable for now
+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ mpc_adjust.mpcc_gamut_remap_block_id = MPCC_MCM_SECOND_GAMUT_REMAP;
+
+ mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
+
+ // program MPCC_OGAM_GAMUT_REMAP same as is currently used on DCN3x
+ memset(&mpc_adjust, 0, sizeof(mpc_adjust));
+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ mpc_adjust.mpcc_gamut_remap_block_id = MPCC_OGAM_GAMUT_REMAP;
+
+ if (pipe_ctx->top_pipe == NULL) {
+ if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
+ mpc_adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
+ mpc_adjust.temperature_matrix[i] =
+ pipe_ctx->stream->gamut_remap_matrix.matrix[i];
+ }
+ }
+
+ mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
+}
+
+void dcn401_init_hw(struct dc *dc)
+{
+ struct abm **abms = dc->res_pool->multiple_abms;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct dc_bios *dcb = dc->ctx->dc_bios;
+ struct resource_pool *res_pool = dc->res_pool;
+ int i;
+ int edp_num;
+ uint32_t backlight = MAX_BACKLIGHT_LEVEL;
+ uint32_t user_level = MAX_BACKLIGHT_LEVEL;
+ int current_dchub_ref_freq = 0;
+
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) {
+ dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
+
+ // mark dcmode limits present if any clock has distinct AC and DC values from SMU
+ dc->caps.dcmode_power_limits_present = dc->clk_mgr->funcs->is_dc_mode_present &&
+ dc->clk_mgr->funcs->is_dc_mode_present(dc->clk_mgr);
+ }
+
+ // Initialize the dccg
+ if (res_pool->dccg->funcs->dccg_init)
+ res_pool->dccg->funcs->dccg_init(res_pool->dccg);
+
+ // Disable DMUB Initialization until IPS state programming is finalized
+ //if (!dcb->funcs->is_accelerated_mode(dcb)) {
+ // hws->funcs.bios_golden_init(dc);
+ //}
+
+ // Set default OPTC memory power states
+ if (dc->debug.enable_mem_low_power.bits.optc) {
+ // Shutdown when unassigned and light sleep in VBLANK
+ REG_SET_2(ODM_MEM_PWR_CTRL3, 0, ODM_MEM_UNASSIGNED_PWR_MODE, 3, ODM_MEM_VBLANK_PWR_MODE, 1);
+ }
+
+ if (dc->debug.enable_mem_low_power.bits.vga) {
+ // Power down VGA memory
+ REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1);
+ }
+
+ if (dc->ctx->dc_bios->fw_info_valid) {
+ res_pool->ref_clocks.xtalin_clock_inKhz =
+ dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency;
+
+ if (res_pool->hubbub) {
+ (res_pool->dccg->funcs->get_dccg_ref_freq)(res_pool->dccg,
+ dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency,
+ &res_pool->ref_clocks.dccg_ref_clock_inKhz);
+
+ current_dchub_ref_freq = res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000;
+
+ (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub,
+ res_pool->ref_clocks.dccg_ref_clock_inKhz,
+ &res_pool->ref_clocks.dchub_ref_clock_inKhz);
+ } else {
+ // Not all ASICs have DCCG sw component
+ res_pool->ref_clocks.dccg_ref_clock_inKhz =
+ res_pool->ref_clocks.xtalin_clock_inKhz;
+ res_pool->ref_clocks.dchub_ref_clock_inKhz =
+ res_pool->ref_clocks.xtalin_clock_inKhz;
+ }
+ } else
+ ASSERT_CRITICAL(false);
+
+ for (i = 0; i < dc->link_count; i++) {
+ /* Power up AND update implementation according to the
+ * required signal (which may be different from the
+ * default signal on connector).
+ */
+ struct dc_link *link = dc->links[i];
+
+ link->link_enc->funcs->hw_init(link->link_enc);
+
+ /* Check for enabled DIG to identify enabled display */
+ if (link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
+ link->link_status.link_active = true;
+ link->phy_state.symclk_state = SYMCLK_ON_TX_ON;
+ if (link->link_enc->funcs->fec_is_active &&
+ link->link_enc->funcs->fec_is_active(link->link_enc))
+ link->fec_state = dc_link_fec_enabled;
+ }
+ }
+
+ /* enable_power_gating_plane before dsc_pg_control because
+ * FORCEON = 1 with hw default value on bootup, resume from s3
+ */
+ if (hws->funcs.enable_power_gating_plane)
+ hws->funcs.enable_power_gating_plane(dc->hwseq, true);
+
+ /* we want to turn off all dp displays before doing detection */
+ dc->link_srv->blank_all_dp_displays(dc);
+
+ /* If taking control over from VBIOS, we may want to optimize our first
+ * mode set, so we need to skip powering down pipes until we know which
+ * pipes we want to use.
+ * Otherwise, if taking control is not possible, we need to power
+ * everything down.
+ */
+ if (dcb->funcs->is_accelerated_mode(dcb) || !dc->config.seamless_boot_edp_requested) {
+ /* Disable boot optimizations means power down everything including PHY, DIG,
+ * and OTG (i.e. the boot is not optimized because we do a full power down).
+ */
+ if (dc->hwss.enable_accelerated_mode && dc->debug.disable_boot_optimizations)
+ dc->hwss.enable_accelerated_mode(dc, dc->current_state);
+ else
+ hws->funcs.init_pipes(dc, dc->current_state);
+
+ if (dc->res_pool->hubbub->funcs->allow_self_refresh_control)
+ dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
+ !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
+
+ dcn401_initialize_min_clocks(dc);
+
+ /* On HW init, allow idle optimizations after pipes have been turned off.
+ *
+ * In certain D3 cases (i.e. BOCO / BOMACO) it's possible that hardware state
+ * is reset (i.e. not in idle at the time hw init is called), but software state
+ * still has idle_optimizations = true, so we must disable idle optimizations first
+ * (i.e. set false), then re-enable (set true).
+ */
+ dc_allow_idle_optimizations(dc, false);
+ dc_allow_idle_optimizations(dc, true);
+ }
+
+ /* In headless boot cases, DIG may be turned
+ * on which causes HW/SW discrepancies.
+ * To avoid this, power down hardware on boot
+ * if DIG is turned on and seamless boot not enabled
+ */
+ if (!dc->config.seamless_boot_edp_requested) {
+ struct dc_link *edp_links[MAX_NUM_EDP];
+ struct dc_link *edp_link;
+
+ dc_get_edp_links(dc, edp_links, &edp_num);
+ if (edp_num) {
+ for (i = 0; i < edp_num; i++) {
+ edp_link = edp_links[i];
+ if (edp_link->link_enc->funcs->is_dig_enabled &&
+ edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) &&
+ dc->hwss.edp_backlight_control &&
+ hws->funcs.power_down &&
+ dc->hwss.edp_power_control) {
+ dc->hwss.edp_backlight_control(edp_link, false);
+ hws->funcs.power_down(dc);
+ dc->hwss.edp_power_control(edp_link, false);
+ }
+ }
+ } else {
+ for (i = 0; i < dc->link_count; i++) {
+ struct dc_link *link = dc->links[i];
+
+ if (link->link_enc->funcs->is_dig_enabled &&
+ link->link_enc->funcs->is_dig_enabled(link->link_enc) &&
+ hws->funcs.power_down) {
+ hws->funcs.power_down(dc);
+ break;
+ }
+
+ }
+ }
+ }
+
+ for (i = 0; i < res_pool->audio_count; i++) {
+ struct audio *audio = res_pool->audios[i];
+
+ audio->funcs->hw_init(audio);
+ }
+
+ for (i = 0; i < dc->link_count; i++) {
+ struct dc_link *link = dc->links[i];
+
+ if (link->panel_cntl) {
+ backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl);
+ user_level = link->panel_cntl->stored_backlight_registers.USER_LEVEL;
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (abms[i] != NULL && abms[i]->funcs != NULL)
+ abms[i]->funcs->abm_init(abms[i], backlight, user_level);
+ }
+
+ /* power AFMT HDMI memory TODO: may move to dis/en output save power*/
+ REG_WRITE(DIO_MEM_PWR_CTRL, 0);
+
+ if (!dc->debug.disable_clock_gate) {
+ /* enable all DCN clock gating */
+ REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
+
+ REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0);
+
+ REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0);
+ }
+
+ dcn401_setup_hpo_hw_control(hws, true);
+
+ if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks)
+ dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub);
+
+ if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges)
+ dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr);
+
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, false, false);
+
+ if (dc->res_pool->hubbub->funcs->init_crb)
+ dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
+
+ if (dc->res_pool->hubbub->funcs->set_request_limit && dc->config.sdpif_request_limit_words_per_umc > 0)
+ dc->res_pool->hubbub->funcs->set_request_limit(dc->res_pool->hubbub, dc->ctx->dc_bios->vram_info.num_chans, dc->config.sdpif_request_limit_words_per_umc);
+
+ // Get DMCUB capabilities
+ if (dc->ctx->dmub_srv) {
+ dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv);
+ dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
+ dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0;
+ dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
+ dc->debug.fams2_config.bits.enable &=
+ dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver; // sw & fw fams versions must match for support
+ if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box)
+ || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) {
+ /* update bounding box if FAMS2 disabled, or if dchub clk has changed */
+ if (dc->clk_mgr)
+ dc->res_pool->funcs->update_bw_bounding_box(dc,
+ dc->clk_mgr->bw_params);
+ }
+ }
+}
+
+static void dcn401_get_mcm_lut_xable_from_pipe_ctx(struct dc *dc, struct pipe_ctx *pipe_ctx,
+ enum MCM_LUT_XABLE *shaper_xable,
+ enum MCM_LUT_XABLE *lut3d_xable,
+ enum MCM_LUT_XABLE *lut1d_xable)
+{
+ enum dc_cm2_shaper_3dlut_setting shaper_3dlut_setting = DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL;
+ bool lut1d_enable = false;
+ struct mpc *mpc = dc->res_pool->mpc;
+ int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+
+ if (!pipe_ctx->plane_state)
+ return;
+ shaper_3dlut_setting = pipe_ctx->plane_state->mcm_shaper_3dlut_setting;
+ lut1d_enable = pipe_ctx->plane_state->mcm_lut1d_enable;
+ mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id);
+ pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE;
+
+ *lut1d_xable = lut1d_enable ? MCM_LUT_ENABLE : MCM_LUT_DISABLE;
+
+ switch (shaper_3dlut_setting) {
+ case DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL:
+ *lut3d_xable = *shaper_xable = MCM_LUT_DISABLE;
+ break;
+ case DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER:
+ *lut3d_xable = MCM_LUT_DISABLE;
+ *shaper_xable = MCM_LUT_ENABLE;
+ break;
+ case DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT:
+ *lut3d_xable = *shaper_xable = MCM_LUT_ENABLE;
+ break;
+ }
+}
+
+void dcn401_populate_mcm_luts(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_cm2_func_luts mcm_luts,
+ bool lut_bank_a)
+{
+ struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
+ struct hubp *hubp = pipe_ctx->plane_res.hubp;
+ int mpcc_id = hubp->inst;
+ struct mpc *mpc = dc->res_pool->mpc;
+ union mcm_lut_params m_lut_params;
+ enum dc_cm2_transfer_func_source lut3d_src = mcm_luts.lut3d_data.lut3d_src;
+ enum hubp_3dlut_fl_format format = 0;
+ enum hubp_3dlut_fl_mode mode;
+ enum hubp_3dlut_fl_width width = 0;
+ enum hubp_3dlut_fl_addressing_mode addr_mode;
+ enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g = 0;
+ enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b = 0;
+ enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r = 0;
+ enum MCM_LUT_XABLE shaper_xable = MCM_LUT_DISABLE;
+ enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE;
+ enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE;
+ bool rval;
+
+ dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable);
+
+ /* 1D LUT */
+ if (mcm_luts.lut1d_func) {
+ memset(&m_lut_params, 0, sizeof(m_lut_params));
+ if (mcm_luts.lut1d_func->type == TF_TYPE_HWPWL)
+ m_lut_params.pwl = &mcm_luts.lut1d_func->pwl;
+ else if (mcm_luts.lut1d_func->type == TF_TYPE_DISTRIBUTED_POINTS) {
+ rval = cm3_helper_translate_curve_to_hw_format(
+ mcm_luts.lut1d_func,
+ &dpp_base->regamma_params, false);
+ m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL;
+ }
+ if (m_lut_params.pwl) {
+ if (mpc->funcs->populate_lut)
+ mpc->funcs->populate_lut(mpc, MCM_LUT_1DLUT, m_lut_params, lut_bank_a, mpcc_id);
+ }
+ if (mpc->funcs->program_lut_mode)
+ mpc->funcs->program_lut_mode(mpc, MCM_LUT_1DLUT, lut1d_xable && m_lut_params.pwl, lut_bank_a, mpcc_id);
+ }
+
+ /* Shaper */
+ if (mcm_luts.shaper && mcm_luts.lut3d_data.mpc_3dlut_enable) {
+ memset(&m_lut_params, 0, sizeof(m_lut_params));
+ if (mcm_luts.shaper->type == TF_TYPE_HWPWL)
+ m_lut_params.pwl = &mcm_luts.shaper->pwl;
+ else if (mcm_luts.shaper->type == TF_TYPE_DISTRIBUTED_POINTS) {
+ ASSERT(false);
+ rval = cm3_helper_translate_curve_to_hw_format(
+ mcm_luts.shaper,
+ &dpp_base->regamma_params, true);
+ m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL;
+ }
+ if (m_lut_params.pwl) {
+ if (mpc->funcs->mcm.populate_lut)
+ mpc->funcs->mcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id);
+ if (mpc->funcs->program_lut_mode)
+ mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, MCM_LUT_ENABLE, lut_bank_a, mpcc_id);
+ }
+ }
+
+ /* 3DLUT */
+ switch (lut3d_src) {
+ case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM:
+ memset(&m_lut_params, 0, sizeof(m_lut_params));
+ if (hubp->funcs->hubp_enable_3dlut_fl)
+ hubp->funcs->hubp_enable_3dlut_fl(hubp, false);
+
+ if (mcm_luts.lut3d_data.lut3d_func && mcm_luts.lut3d_data.lut3d_func->state.bits.initialized) {
+ m_lut_params.lut3d = &mcm_luts.lut3d_data.lut3d_func->lut_3d;
+ if (mpc->funcs->populate_lut)
+ mpc->funcs->populate_lut(mpc, MCM_LUT_3DLUT, m_lut_params, lut_bank_a, mpcc_id);
+ if (mpc->funcs->program_lut_mode)
+ mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, lut3d_xable, lut_bank_a,
+ mpcc_id);
+ }
+ break;
+ case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM:
+ switch (mcm_luts.lut3d_data.gpu_mem_params.size) {
+ case DC_CM2_GPU_MEM_SIZE_171717:
+ width = hubp_3dlut_fl_width_17;
+ break;
+ case DC_CM2_GPU_MEM_SIZE_TRANSFORMED:
+ width = hubp_3dlut_fl_width_transformed;
+ break;
+ default:
+ //TODO: handle default case
+ break;
+ }
+
+ //check for support
+ if (mpc->funcs->mcm.is_config_supported &&
+ !mpc->funcs->mcm.is_config_supported(width))
+ break;
+
+ if (mpc->funcs->program_lut_read_write_control)
+ mpc->funcs->program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, mpcc_id);
+ if (mpc->funcs->program_lut_mode)
+ mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, lut3d_xable, lut_bank_a, mpcc_id);
+
+ if (hubp->funcs->hubp_program_3dlut_fl_addr)
+ hubp->funcs->hubp_program_3dlut_fl_addr(hubp, mcm_luts.lut3d_data.gpu_mem_params.addr);
+
+ if (mpc->funcs->mcm.program_bit_depth)
+ mpc->funcs->mcm.program_bit_depth(mpc, mcm_luts.lut3d_data.gpu_mem_params.bit_depth, mpcc_id);
+
+ switch (mcm_luts.lut3d_data.gpu_mem_params.layout) {
+ case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB:
+ mode = hubp_3dlut_fl_mode_native_1;
+ addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear;
+ break;
+ case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR:
+ mode = hubp_3dlut_fl_mode_native_2;
+ addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear;
+ break;
+ case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR:
+ mode = hubp_3dlut_fl_mode_transform;
+ addr_mode = hubp_3dlut_fl_addressing_mode_simple_linear;
+ break;
+ default:
+ mode = hubp_3dlut_fl_mode_disable;
+ addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear;
+ break;
+ }
+ if (hubp->funcs->hubp_program_3dlut_fl_mode)
+ hubp->funcs->hubp_program_3dlut_fl_mode(hubp, mode);
+
+ if (hubp->funcs->hubp_program_3dlut_fl_addressing_mode)
+ hubp->funcs->hubp_program_3dlut_fl_addressing_mode(hubp, addr_mode);
+
+ switch (mcm_luts.lut3d_data.gpu_mem_params.format_params.format) {
+ case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB:
+ format = hubp_3dlut_fl_format_unorm_12msb_bitslice;
+ break;
+ case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB:
+ format = hubp_3dlut_fl_format_unorm_12lsb_bitslice;
+ break;
+ case DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10:
+ format = hubp_3dlut_fl_format_float_fp1_5_10;
+ break;
+ }
+ if (hubp->funcs->hubp_program_3dlut_fl_format)
+ hubp->funcs->hubp_program_3dlut_fl_format(hubp, format);
+ if (hubp->funcs->hubp_update_3dlut_fl_bias_scale &&
+ mpc->funcs->mcm.program_bias_scale) {
+ mpc->funcs->mcm.program_bias_scale(mpc,
+ mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias,
+ mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale,
+ mpcc_id);
+ hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp,
+ mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias,
+ mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale);
+ }
+
+ //navi 4x has a bug and r and blue are swapped and need to be worked around here in
+ //TODO: need to make a method for get_xbar per asic OR do the workaround in program_crossbar for 4x
+ switch (mcm_luts.lut3d_data.gpu_mem_params.component_order) {
+ case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA:
+ default:
+ crossbar_bit_slice_cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15;
+ crossbar_bit_slice_y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31;
+ crossbar_bit_slice_cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47;
+ break;
+ }
+
+ if (hubp->funcs->hubp_program_3dlut_fl_crossbar)
+ hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp,
+ crossbar_bit_slice_cr_r,
+ crossbar_bit_slice_y_g,
+ crossbar_bit_slice_cb_b);
+
+ if (mpc->funcs->mcm.program_lut_read_write_control)
+ mpc->funcs->mcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, true, mpcc_id);
+
+ if (mpc->funcs->mcm.program_3dlut_size)
+ mpc->funcs->mcm.program_3dlut_size(mpc, width, mpcc_id);
+
+ if (mpc->funcs->update_3dlut_fast_load_select)
+ mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst);
+
+ if (hubp->funcs->hubp_enable_3dlut_fl)
+ hubp->funcs->hubp_enable_3dlut_fl(hubp, true);
+ else {
+ if (mpc->funcs->program_lut_mode) {
+ mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, MCM_LUT_DISABLE, lut_bank_a, mpcc_id);
+ mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, MCM_LUT_DISABLE, lut_bank_a, mpcc_id);
+ mpc->funcs->program_lut_mode(mpc, MCM_LUT_1DLUT, MCM_LUT_DISABLE, lut_bank_a, mpcc_id);
+ }
+ }
+ break;
+
+ }
+}
+
+void dcn401_trigger_3dlut_dma_load(struct dc *dc, struct pipe_ctx *pipe_ctx)
+{
+ struct hubp *hubp = pipe_ctx->plane_res.hubp;
+
+ if (hubp->funcs->hubp_enable_3dlut_fl) {
+ hubp->funcs->hubp_enable_3dlut_fl(hubp, true);
+ }
+}
+
+bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx,
+ const struct dc_plane_state *plane_state)
+{
+ struct dpp *dpp_base = pipe_ctx->plane_res.dpp;
+ int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+ struct dc *dc = pipe_ctx->stream_res.opp->ctx->dc;
+ struct mpc *mpc = dc->res_pool->mpc;
+ bool result;
+ const struct pwl_params *lut_params = NULL;
+ bool rval;
+
+ if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) {
+ dcn401_populate_mcm_luts(dc, pipe_ctx, plane_state->mcm_luts, plane_state->lut_bank_a);
+ return true;
+ }
+
+ mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id);
+ pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE;
+ // 1D LUT
+ if (plane_state->blend_tf.type == TF_TYPE_HWPWL)
+ lut_params = &plane_state->blend_tf.pwl;
+ else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) {
+ rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf,
+ &dpp_base->regamma_params, false);
+ lut_params = rval ? &dpp_base->regamma_params : NULL;
+ }
+ result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id);
+ lut_params = NULL;
+
+ // Shaper
+ if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL)
+ lut_params = &plane_state->in_shaper_func.pwl;
+ else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) {
+ // TODO: dpp_base replace
+ rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func,
+ &dpp_base->shaper_params, true);
+ lut_params = rval ? &dpp_base->shaper_params : NULL;
+ }
+ result &= mpc->funcs->program_shaper(mpc, lut_params, mpcc_id);
+
+ // 3D
+ if (mpc->funcs->program_3dlut) {
+ if (plane_state->lut3d_func.state.bits.initialized == 1)
+ result &= mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id);
+ else
+ result &= mpc->funcs->program_3dlut(mpc, NULL, mpcc_id);
+ }
+
+ return result;
+}
+
+bool dcn401_set_output_transfer_func(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ const struct dc_stream_state *stream)
+{
+ int mpcc_id = pipe_ctx->plane_res.hubp->inst;
+ struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
+ const struct pwl_params *params = NULL;
+ bool ret = false;
+
+ /* program OGAM or 3DLUT only for the top pipe*/
+ if (resource_is_pipe_type(pipe_ctx, OPP_HEAD)) {
+ /*program shaper and 3dlut in MPC*/
+ ret = dcn32_set_mpc_shaper_3dlut(pipe_ctx, stream);
+ if (ret == false && mpc->funcs->set_output_gamma) {
+ if (stream->out_transfer_func.type == TF_TYPE_HWPWL)
+ params = &stream->out_transfer_func.pwl;
+ else if (pipe_ctx->stream->out_transfer_func.type ==
+ TF_TYPE_DISTRIBUTED_POINTS &&
+ cm3_helper_translate_curve_to_hw_format(
+ &stream->out_transfer_func,
+ &mpc->blender_params, false))
+ params = &mpc->blender_params;
+ /* there are no ROM LUTs in OUTGAM */
+ if (stream->out_transfer_func.type == TF_TYPE_PREDEFINED)
+ BREAK_TO_DEBUGGER();
+ }
+ }
+
+ if (mpc->funcs->set_output_gamma)
+ mpc->funcs->set_output_gamma(mpc, mpcc_id, params);
+
+ return ret;
+}
+
+void dcn401_calculate_dccg_tmds_div_value(struct pipe_ctx *pipe_ctx,
+ unsigned int *tmds_div)
+{
+ struct dc_stream_state *stream = pipe_ctx->stream;
+
+ if (dc_is_tmds_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) {
+ if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+ *tmds_div = PIXEL_RATE_DIV_BY_2;
+ else
+ *tmds_div = PIXEL_RATE_DIV_BY_4;
+ } else {
+ *tmds_div = PIXEL_RATE_DIV_BY_1;
+ }
+
+ if (*tmds_div == PIXEL_RATE_DIV_NA)
+ ASSERT(false);
+
+}
+
+static void enable_stream_timing_calc(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dc *dc,
+ unsigned int *tmds_div,
+ int *opp_inst,
+ int *opp_cnt,
+ struct pipe_ctx *opp_heads[MAX_PIPES],
+ bool *manual_mode,
+ struct drr_params *params,
+ unsigned int *event_triggers)
+{
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ int i;
+
+ if (dc_is_tmds_signal(stream->signal) || dc_is_virtual_signal(stream->signal))
+ dcn401_calculate_dccg_tmds_div_value(pipe_ctx, tmds_div);
+
+ *opp_cnt = resource_get_opp_heads_for_otg_master(pipe_ctx, &context->res_ctx, opp_heads);
+ for (i = 0; i < *opp_cnt; i++)
+ opp_inst[i] = opp_heads[i]->stream_res.opp->inst;
+
+ if (dc_is_tmds_signal(stream->signal)) {
+ stream->link->phy_state.symclk_ref_cnts.otg = 1;
+ if (stream->link->phy_state.symclk_state == SYMCLK_OFF_TX_OFF)
+ stream->link->phy_state.symclk_state = SYMCLK_ON_TX_OFF;
+ else
+ stream->link->phy_state.symclk_state = SYMCLK_ON_TX_ON;
+ }
+
+ params->vertical_total_min = stream->adjust.v_total_min;
+ params->vertical_total_max = stream->adjust.v_total_max;
+ params->vertical_total_mid = stream->adjust.v_total_mid;
+ params->vertical_total_mid_frame_num = stream->adjust.v_total_mid_frame_num;
+
+ // DRR should set trigger event to monitor surface update event
+ if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0)
+ *event_triggers = 0x80;
+}
+
+enum dc_status dcn401_enable_stream_timing(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dc *dc)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ struct drr_params params = {0};
+ unsigned int event_triggers = 0;
+ int opp_cnt = 1;
+ int opp_inst[MAX_PIPES] = {0};
+ struct pipe_ctx *opp_heads[MAX_PIPES] = {0};
+ struct dc_crtc_timing patched_crtc_timing = stream->timing;
+ bool manual_mode = false;
+ unsigned int tmds_div = PIXEL_RATE_DIV_NA;
+ unsigned int unused_div = PIXEL_RATE_DIV_NA;
+ int odm_slice_width;
+ int last_odm_slice_width;
+ int i;
+
+ if (!resource_is_pipe_type(pipe_ctx, OTG_MASTER))
+ return DC_OK;
+
+ enable_stream_timing_calc(pipe_ctx, context, dc, &tmds_div, opp_inst,
+ &opp_cnt, opp_heads, &manual_mode, &params, &event_triggers);
+
+ if (dc->res_pool->dccg->funcs->set_pixel_rate_div) {
+ dc->res_pool->dccg->funcs->set_pixel_rate_div(
+ dc->res_pool->dccg, pipe_ctx->stream_res.tg->inst,
+ tmds_div, unused_div);
+ }
+
+ /* TODO check if timing_changed, disable stream if timing changed */
+
+ if (opp_cnt > 1) {
+ odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
+ last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
+ pipe_ctx->stream_res.tg->funcs->set_odm_combine(
+ pipe_ctx->stream_res.tg,
+ opp_inst, opp_cnt,
+ odm_slice_width, last_odm_slice_width);
+ }
+
+ /* set DTBCLK_P */
+ if (dc->res_pool->dccg->funcs->set_dtbclk_p_src) {
+ if (dc_is_dp_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) {
+ dc->res_pool->dccg->funcs->set_dtbclk_p_src(dc->res_pool->dccg, DPREFCLK, pipe_ctx->stream_res.tg->inst);
+ }
+ }
+
+ /* HW program guide assume display already disable
+ * by unplug sequence. OTG assume stop.
+ */
+ pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, true);
+
+ if (false == pipe_ctx->clock_source->funcs->program_pix_clk(
+ pipe_ctx->clock_source,
+ &pipe_ctx->stream_res.pix_clk_params,
+ dc->link_srv->dp_get_encoding_format(&pipe_ctx->link_config.dp_link_settings),
+ &pipe_ctx->pll_settings)) {
+ BREAK_TO_DEBUGGER();
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal)))
+ dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx);
+
+ /* if we are padding, h_addressable needs to be adjusted */
+ if (dc->debug.enable_hblank_borrow) {
+ patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding;
+ patched_crtc_timing.h_total = patched_crtc_timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding;
+ patched_crtc_timing.pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz;
+ }
+
+ pipe_ctx->stream_res.tg->funcs->program_timing(
+ pipe_ctx->stream_res.tg,
+ &patched_crtc_timing,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vready_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines,
+ pipe_ctx->stream->signal,
+ true);
+
+ for (i = 0; i < opp_cnt; i++) {
+ opp_heads[i]->stream_res.opp->funcs->opp_pipe_clock_control(
+ opp_heads[i]->stream_res.opp,
+ true);
+ opp_heads[i]->stream_res.opp->funcs->opp_program_left_edge_extra_pixel(
+ opp_heads[i]->stream_res.opp,
+ stream->timing.pixel_encoding,
+ resource_is_pipe_type(opp_heads[i], OTG_MASTER));
+ }
+
+ pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control(
+ pipe_ctx->stream_res.opp,
+ true);
+
+ hws->funcs.blank_pixel_data(dc, pipe_ctx, true);
+
+ /* VTG is within DCHUB command block. DCFCLK is always on */
+ if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(pipe_ctx->stream_res.tg)) {
+ BREAK_TO_DEBUGGER();
+ return DC_ERROR_UNEXPECTED;
+ }
+
+ hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp);
+ set_drr_and_clear_adjust_pending(pipe_ctx, stream, &params);
+
+ /* Event triggers and num frames initialized for DRR, but can be
+ * later updated for PSR use. Note DRR trigger events are generated
+ * regardless of whether num frames met.
+ */
+ if (pipe_ctx->stream_res.tg->funcs->set_static_screen_control)
+ pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
+ pipe_ctx->stream_res.tg, event_triggers, 2);
+
+ /* TODO program crtc source select for non-virtual signal*/
+ /* TODO program FMT */
+ /* TODO setup link_enc */
+ /* TODO set stream attributes */
+ /* TODO program audio */
+ /* TODO enable stream if timing changed */
+ /* TODO unblank stream if DP */
+
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
+ if (pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable)
+ pipe_ctx->stream_res.tg->funcs->phantom_crtc_post_enable(pipe_ctx->stream_res.tg);
+ }
+
+ return DC_OK;
+}
+
+static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link)
+{
+ switch (link->link_enc->transmitter) {
+ case TRANSMITTER_UNIPHY_A:
+ return PHYD32CLKA;
+ case TRANSMITTER_UNIPHY_B:
+ return PHYD32CLKB;
+ case TRANSMITTER_UNIPHY_C:
+ return PHYD32CLKC;
+ case TRANSMITTER_UNIPHY_D:
+ return PHYD32CLKD;
+ case TRANSMITTER_UNIPHY_E:
+ return PHYD32CLKE;
+ default:
+ return PHYD32CLKA;
+ }
+}
+
+static void dcn401_enable_stream_calc(
+ struct pipe_ctx *pipe_ctx,
+ int *dp_hpo_inst,
+ enum phyd32clk_clock_source *phyd32clk,
+ unsigned int *tmds_div,
+ uint32_t *early_control)
+{
+
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
+ enum dc_lane_count lane_count =
+ pipe_ctx->stream->link->cur_link_settings.lane_count;
+ uint32_t active_total_with_borders;
+
+ if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx))
+ *dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst;
+
+ *phyd32clk = get_phyd32clk_src(pipe_ctx->stream->link);
+
+ if (dc_is_tmds_signal(pipe_ctx->stream->signal))
+ dcn401_calculate_dccg_tmds_div_value(pipe_ctx, tmds_div);
+ else
+ *tmds_div = PIXEL_RATE_DIV_BY_1;
+
+ /* enable early control to avoid corruption on DP monitor*/
+ active_total_with_borders =
+ timing->h_addressable
+ + timing->h_border_left
+ + timing->h_border_right;
+
+ if (lane_count != 0)
+ *early_control = active_total_with_borders % lane_count;
+
+ if (*early_control == 0)
+ *early_control = lane_count;
+
+}
+
+void dcn401_enable_stream(struct pipe_ctx *pipe_ctx)
+{
+ uint32_t early_control = 0;
+ struct timing_generator *tg = pipe_ctx->stream_res.tg;
+ struct dc_link *link = pipe_ctx->stream->link;
+ const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ struct dccg *dccg = dc->res_pool->dccg;
+ enum phyd32clk_clock_source phyd32clk;
+ int dp_hpo_inst = 0;
+ unsigned int tmds_div = PIXEL_RATE_DIV_NA;
+ unsigned int unused_div = PIXEL_RATE_DIV_NA;
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
+ struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
+
+ if (!dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+
+ dcn401_enable_stream_calc(pipe_ctx, &dp_hpo_inst, &phyd32clk,
+ &tmds_div, &early_control);
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) {
+ if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
+ dccg->funcs->set_dpstreamclk(dccg, DPREFCLK, tg->inst, dp_hpo_inst);
+ if (link->cur_link_settings.link_rate == LINK_RATE_UNKNOWN) {
+ dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
+ } else {
+ dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk);
+ }
+ } else {
+ dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst,
+ link_enc->transmitter - TRANSMITTER_UNIPHY_A);
+ }
+ }
+
+ link_hwss->setup_stream_attribute(pipe_ctx);
+
+ if (dc->res_pool->dccg->funcs->set_pixel_rate_div) {
+ dc->res_pool->dccg->funcs->set_pixel_rate_div(
+ dc->res_pool->dccg,
+ pipe_ctx->stream_res.tg->inst,
+ tmds_div,
+ unused_div);
+ }
+
+ link_hwss->setup_stream_encoder(pipe_ctx);
+
+ if (pipe_ctx->plane_state && pipe_ctx->plane_state->flip_immediate != 1) {
+ if (dc->hwss.program_dmdata_engine)
+ dc->hwss.program_dmdata_engine(pipe_ctx);
+ }
+
+ dc->hwss.update_info_frame(pipe_ctx);
+
+ if (dc_is_dp_signal(pipe_ctx->stream->signal))
+ dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_UPDATE_INFO_FRAME);
+
+ tg->funcs->set_early_control(tg, early_control);
+}
+
+void dcn401_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
+{
+ REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, enable);
+}
+
+void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy)
+{
+ if (cursor_width <= 128) {
+ pos_cpy->x_hotspot /= 2;
+ pos_cpy->x_hotspot += 1;
+ } else {
+ pos_cpy->x_hotspot /= 2;
+ pos_cpy->x_hotspot += 2;
+ }
+}
+
+static void disable_link_output_symclk_on_tx_off(struct dc_link *link, enum dp_link_encoding link_encoding)
+{
+ struct dc *dc = link->ctx->dc;
+ struct pipe_ctx *pipe_ctx = NULL;
+ uint8_t i;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe_ctx->stream && pipe_ctx->stream->link == link && pipe_ctx->top_pipe == NULL) {
+ pipe_ctx->clock_source->funcs->program_pix_clk(
+ pipe_ctx->clock_source,
+ &pipe_ctx->stream_res.pix_clk_params,
+ link_encoding,
+ &pipe_ctx->pll_settings);
+ break;
+ }
+ }
+}
+
+void dcn401_disable_link_output(struct dc_link *link,
+ const struct link_resource *link_res,
+ enum signal_type signal)
+{
+ struct dc *dc = link->ctx->dc;
+ const struct link_hwss *link_hwss = get_link_hwss(link, link_res);
+ struct dmcu *dmcu = dc->res_pool->dmcu;
+
+ if (signal == SIGNAL_TYPE_EDP &&
+ link->dc->hwss.edp_backlight_control &&
+ !link->skip_implict_edp_power_control)
+ link->dc->hwss.edp_backlight_control(link, false);
+ else if (dmcu != NULL && dmcu->funcs->lock_phy)
+ dmcu->funcs->lock_phy(dmcu);
+
+ if (dc_is_tmds_signal(signal) && link->phy_state.symclk_ref_cnts.otg > 0) {
+ disable_link_output_symclk_on_tx_off(link, DP_UNKNOWN_ENCODING);
+ link->phy_state.symclk_state = SYMCLK_ON_TX_OFF;
+ } else {
+ link_hwss->disable_link_output(link, link_res, signal);
+ link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF;
+ }
+
+ if (signal == SIGNAL_TYPE_EDP &&
+ link->dc->hwss.edp_backlight_control &&
+ !link->skip_implict_edp_power_control)
+ link->dc->hwss.edp_power_control(link, false);
+ else if (dmcu != NULL && dmcu->funcs->lock_phy)
+ dmcu->funcs->unlock_phy(dmcu);
+
+ dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
+}
+
+void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx)
+{
+ struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position;
+ struct hubp *hubp = pipe_ctx->plane_res.hubp;
+ struct dpp *dpp = pipe_ctx->plane_res.dpp;
+ struct dc_cursor_mi_param param = {
+ .pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10,
+ .ref_clk_khz = pipe_ctx->stream->ctx->dc->res_pool->ref_clocks.dchub_ref_clock_inKhz,
+ .viewport = pipe_ctx->plane_res.scl_data.viewport,
+ .recout = pipe_ctx->plane_res.scl_data.recout,
+ .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz,
+ .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert,
+ .rotation = pipe_ctx->plane_state->rotation,
+ .mirror = pipe_ctx->plane_state->horizontal_mirror,
+ .stream = pipe_ctx->stream
+ };
+ struct rect odm_slice_src = { 0 };
+ bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) ||
+ (pipe_ctx->prev_odm_pipe != NULL);
+ int prev_odm_width = 0;
+ struct pipe_ctx *prev_odm_pipe = NULL;
+ bool mpc_combine_on = false;
+ int bottom_pipe_x_pos = 0;
+
+ int x_pos = pos_cpy.x;
+ int y_pos = pos_cpy.y;
+ int recout_x_pos = 0;
+ int recout_y_pos = 0;
+
+ if ((pipe_ctx->top_pipe != NULL) || (pipe_ctx->bottom_pipe != NULL)) {
+ if ((pipe_ctx->plane_state->src_rect.width != pipe_ctx->plane_res.scl_data.viewport.width) ||
+ (pipe_ctx->plane_state->src_rect.height != pipe_ctx->plane_res.scl_data.viewport.height)) {
+ mpc_combine_on = true;
+ }
+ }
+
+ /* DCN4 moved cursor composition after Scaler, so in HW it is in
+ * recout space and for HW Cursor position programming need to
+ * translate to recout space.
+ *
+ * Cursor X and Y position programmed into HW can't be negative,
+ * in fact it is X, Y coordinate shifted for the HW Cursor Hot spot
+ * position that goes into HW X and Y coordinates while HW Hot spot
+ * X and Y coordinates are length relative to the cursor top left
+ * corner, hotspot must be smaller than the cursor size.
+ *
+ * DMs/DC interface for Cursor position is in stream->src space, and
+ * DMs supposed to transform Cursor coordinates to stream->src space,
+ * then here we need to translate Cursor coordinates to stream->dst
+ * space, as now in HW, Cursor coordinates are in per pipe recout
+ * space, and for the given pipe valid coordinates are only in range
+ * from 0,0 - recout width, recout height space.
+ * If certain pipe combining is in place, need to further adjust per
+ * pipe to make sure each pipe enabling cursor on its part of the
+ * screen.
+ */
+ x_pos = pipe_ctx->stream->dst.x + x_pos * pipe_ctx->stream->dst.width /
+ pipe_ctx->stream->src.width;
+ y_pos = pipe_ctx->stream->dst.y + y_pos * pipe_ctx->stream->dst.height /
+ pipe_ctx->stream->src.height;
+
+ /* If the cursor's source viewport is clipped then we need to
+ * translate the cursor to appear in the correct position on
+ * the screen.
+ *
+ * This translation isn't affected by scaling so it needs to be
+ * done *after* we adjust the position for the scale factor.
+ *
+ * This is only done by opt-in for now since there are still
+ * some usecases like tiled display that might enable the
+ * cursor on both streams while expecting dc to clip it.
+ */
+ if (pos_cpy.translate_by_source) {
+ x_pos += pipe_ctx->plane_state->src_rect.x;
+ y_pos += pipe_ctx->plane_state->src_rect.y;
+ }
+
+ /* Adjust for ODM Combine
+ * next/prev_odm_offset is to account for scaled modes that have underscan
+ */
+ if (odm_combine_on) {
+ prev_odm_pipe = pipe_ctx->prev_odm_pipe;
+
+ while (prev_odm_pipe != NULL) {
+ odm_slice_src = resource_get_odm_slice_src_rect(prev_odm_pipe);
+ prev_odm_width += odm_slice_src.width;
+ prev_odm_pipe = prev_odm_pipe->prev_odm_pipe;
+ }
+
+ x_pos -= (prev_odm_width);
+ }
+
+ /* If the position is negative then we need to add to the hotspot
+ * to fix cursor size between ODM slices
+ */
+
+ if (x_pos < 0) {
+ pos_cpy.x_hotspot -= x_pos;
+ if (hubp->curs_attr.attribute_flags.bits.ENABLE_MAGNIFICATION)
+ adjust_hotspot_between_slices_for_2x_magnify(hubp->curs_attr.width, &pos_cpy);
+ x_pos = 0;
+ }
+
+ if (y_pos < 0) {
+ pos_cpy.y_hotspot -= y_pos;
+ y_pos = 0;
+ }
+
+ /* If the position on bottom MPC pipe is negative then we need to add to the hotspot and
+ * adjust x_pos on bottom pipe to make cursor visible when crossing between MPC slices.
+ */
+ if (mpc_combine_on &&
+ pipe_ctx->top_pipe &&
+ (pipe_ctx == pipe_ctx->top_pipe->bottom_pipe)) {
+
+ bottom_pipe_x_pos = x_pos - pipe_ctx->plane_res.scl_data.recout.x;
+ if (bottom_pipe_x_pos < 0) {
+ x_pos = pipe_ctx->plane_res.scl_data.recout.x;
+ pos_cpy.x_hotspot -= bottom_pipe_x_pos;
+ if (hubp->curs_attr.attribute_flags.bits.ENABLE_MAGNIFICATION)
+ adjust_hotspot_between_slices_for_2x_magnify(hubp->curs_attr.width, &pos_cpy);
+ }
+ }
+
+ pos_cpy.x = (uint32_t)x_pos;
+ pos_cpy.y = (uint32_t)y_pos;
+
+ if (pos_cpy.enable && resource_can_pipe_disable_cursor(pipe_ctx))
+ pos_cpy.enable = false;
+
+ x_pos = pos_cpy.x - param.recout.x;
+ y_pos = pos_cpy.y - param.recout.y;
+
+ recout_x_pos = x_pos - pos_cpy.x_hotspot;
+ recout_y_pos = y_pos - pos_cpy.y_hotspot;
+
+ if (recout_x_pos >= (int)param.recout.width)
+ pos_cpy.enable = false; /* not visible beyond right edge*/
+
+ if (recout_y_pos >= (int)param.recout.height)
+ pos_cpy.enable = false; /* not visible beyond bottom edge*/
+
+ if (recout_x_pos + (int)hubp->curs_attr.width <= 0)
+ pos_cpy.enable = false; /* not visible beyond left edge*/
+
+ if (recout_y_pos + (int)hubp->curs_attr.height <= 0)
+ pos_cpy.enable = false; /* not visible beyond top edge*/
+
+ hubp->funcs->set_cursor_position(hubp, &pos_cpy, &param);
+ dpp->funcs->set_cursor_position(dpp, &pos_cpy, &param, hubp->curs_attr.width, hubp->curs_attr.height);
+}
+
+static bool dcn401_check_no_memory_request_for_cab(struct dc *dc)
+{
+ int i;
+
+ /* First, check no-memory-request case */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ if ((dc->current_state->stream_status[i].plane_count) &&
+ (dc->current_state->streams[i]->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED))
+ /* Fail eligibility on a visible stream */
+ return false;
+ }
+
+ return true;
+}
+
+static uint32_t dcn401_calculate_cab_allocation(struct dc *dc, struct dc_state *ctx)
+{
+ int i;
+ uint8_t num_ways = 0;
+ uint32_t mall_ss_size_bytes = 0;
+
+ mall_ss_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_size_bytes;
+ // TODO add additional logic for PSR active stream exclusion optimization
+ // mall_ss_psr_active_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes;
+
+ // Include cursor size for CAB allocation
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[i];
+
+ if (!pipe->stream || !pipe->plane_state)
+ continue;
+
+ mall_ss_size_bytes += dcn32_helper_calculate_mall_bytes_for_cursor(dc, pipe, false);
+ }
+
+ // Convert number of cache lines required to number of ways
+ if (dc->debug.force_mall_ss_num_ways > 0)
+ num_ways = dc->debug.force_mall_ss_num_ways;
+ else if (dc->res_pool->funcs->calculate_mall_ways_from_bytes)
+ num_ways = dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, mall_ss_size_bytes);
+ else
+ num_ways = 0;
+
+ return num_ways;
+}
+
+bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable)
+{
+ union dmub_rb_cmd cmd;
+ uint8_t ways, i;
+ int j;
+ bool mall_ss_unsupported = false;
+ struct dc_plane_state *plane = NULL;
+
+ if (!dc->ctx->dmub_srv || !dc->current_state)
+ return false;
+
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ /* MALL SS messaging is not supported with PSR at this time */
+ if (dc->current_state->streams[i] != NULL &&
+ dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) {
+ DC_LOG_MALL("MALL SS not supported with PSR at this time\n");
+ return false;
+ }
+ }
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS;
+ cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header);
+
+ if (enable) {
+ if (dcn401_check_no_memory_request_for_cab(dc)) {
+ /* 1. Check no memory request case for CAB.
+ * If no memory request case, send CAB_ACTION NO_DCN_REQ DMUB message
+ */
+ DC_LOG_MALL("sending CAB action NO_DCN_REQ\n");
+ cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ;
+ } else {
+ /* 2. Check if all surfaces can fit in CAB.
+ * If surfaces can fit into CAB, send CAB_ACTION_ALLOW DMUB message
+ * and configure HUBP's to fetch from MALL
+ */
+ ways = dcn401_calculate_cab_allocation(dc, dc->current_state);
+
+ /* MALL not supported with Stereo3D or TMZ surface. If any plane is using stereo,
+ * or TMZ surface, don't try to enter MALL.
+ */
+ for (i = 0; i < dc->current_state->stream_count; i++) {
+ for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
+ plane = dc->current_state->stream_status[i].plane_states[j];
+
+ if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO ||
+ plane->address.tmz_surface) {
+ mall_ss_unsupported = true;
+ break;
+ }
+ }
+ if (mall_ss_unsupported)
+ break;
+ }
+ if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) {
+ cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB;
+ cmd.cab.cab_alloc_ways = ways;
+ DC_LOG_MALL("cab allocation: %d ways. CAB action: DCN_SS_FIT_IN_CAB\n", ways);
+ } else {
+ cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB;
+ DC_LOG_MALL("frame does not fit in CAB: %d ways required. CAB action: DCN_SS_NOT_FIT_IN_CAB\n", ways);
+ }
+ }
+ } else {
+ /* Disable CAB */
+ cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_IDLE_OPTIMIZATION;
+ DC_LOG_MALL("idle optimization disabled\n");
+ }
+
+ dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc,
+ const struct pipe_ctx *top_pipe)
+{
+ bool is_wait_needed = false;
+ const struct pipe_ctx *pipe_ctx = top_pipe;
+
+ /* check if any surfaces are updating address while using flip immediate and dcc */
+ while (pipe_ctx != NULL) {
+ if (pipe_ctx->plane_state &&
+ pipe_ctx->plane_state->dcc.enable &&
+ pipe_ctx->plane_state->flip_immediate &&
+ pipe_ctx->plane_state->update_flags.bits.addr_update) {
+ is_wait_needed = true;
+ break;
+ }
+
+ /* check next pipe */
+ pipe_ctx = pipe_ctx->bottom_pipe;
+ }
+
+ if (is_wait_needed && dc->debug.dcc_meta_propagation_delay_us > 0) {
+ udelay(dc->debug.dcc_meta_propagation_delay_us);
+ }
+}
+
+void dcn401_prepare_bandwidth(struct dc *dc,
+ struct dc_state *context)
+{
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+ bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support;
+ unsigned int compbuf_size = 0;
+
+ /* Any transition into P-State support should disable MCLK switching first to avoid hangs */
+ if (p_state_change_support) {
+ dc->optimized_required = true;
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
+ }
+
+ if (dc->clk_mgr->dc_mode_softmax_enabled)
+ if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 &&
+ context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000)
+ dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz);
+
+ /* Increase clocks */
+ dc->clk_mgr->funcs->update_clocks(
+ dc->clk_mgr,
+ context,
+ false);
+
+ /* program dchubbub watermarks:
+ * For assigning optimized_required, use |= operator since we don't want
+ * to clear the value if the optimize has not happened yet
+ */
+ dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+ &context->bw_ctx.bw.dcn.watermarks,
+ dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
+ false);
+ /* update timeout thresholds */
+ if (hubbub->funcs->program_arbiter) {
+ dc->optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false);
+ }
+
+ /* decrease compbuf size */
+ if (hubbub->funcs->program_compbuf_segments) {
+ compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size;
+ dc->optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size);
+
+ hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false);
+ }
+
+ if (dc->debug.fams2_config.bits.enable) {
+ dcn401_fams2_global_control_lock(dc, context, true);
+ dcn401_fams2_update_config(dc, context, false);
+ dcn401_fams2_global_control_lock(dc, context, false);
+ }
+
+ if (p_state_change_support != context->bw_ctx.bw.dcn.clk.p_state_change_support) {
+ /* After disabling P-State, restore the original value to ensure we get the correct P-State
+ * on the next optimize. */
+ context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support;
+ }
+}
+
+void dcn401_optimize_bandwidth(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i;
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ /* enable fams2 if needed */
+ if (dc->debug.fams2_config.bits.enable) {
+ dcn401_fams2_global_control_lock(dc, context, true);
+ dcn401_fams2_update_config(dc, context, true);
+ dcn401_fams2_global_control_lock(dc, context, false);
+ }
+
+ /* program dchubbub watermarks */
+ hubbub->funcs->program_watermarks(hubbub,
+ &context->bw_ctx.bw.dcn.watermarks,
+ dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
+ true);
+ /* update timeout thresholds */
+ if (hubbub->funcs->program_arbiter) {
+ hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, true);
+ }
+
+ if (dc->clk_mgr->dc_mode_softmax_enabled)
+ if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 &&
+ context->bw_ctx.bw.dcn.clk.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000)
+ dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->dc_mode_softmax_memclk);
+
+ /* increase compbuf size */
+ if (hubbub->funcs->program_compbuf_segments)
+ hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true);
+
+ dc->clk_mgr->funcs->update_clocks(
+ dc->clk_mgr,
+ context,
+ true);
+ if (context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) {
+ for (i = 0; i < dc->res_pool->pipe_count; ++i) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && pipe_ctx->plane_res.hubp->funcs->program_extended_blank
+ && pipe_ctx->stream->adjust.v_total_min == pipe_ctx->stream->adjust.v_total_max
+ && pipe_ctx->stream->adjust.v_total_max > pipe_ctx->stream->timing.v_total)
+ pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp,
+ pipe_ctx->dlg_regs.min_dst_y_next_start);
+ }
+ }
+}
+
+void dcn401_fams2_global_control_lock(struct dc *dc,
+ struct dc_state *context,
+ bool lock)
+{
+ /* use always for now */
+ union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 };
+
+ if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable)
+ return;
+
+ hw_lock_cmd.bits.command_code = DMUB_INBOX0_CMD__HW_LOCK;
+ hw_lock_cmd.bits.hw_lock_client = HW_LOCK_CLIENT_DRIVER;
+ hw_lock_cmd.bits.lock = lock;
+ hw_lock_cmd.bits.should_release = !lock;
+ dmub_hw_lock_mgr_inbox0_cmd(dc->ctx->dmub_srv, hw_lock_cmd);
+}
+
+void dcn401_fams2_global_control_lock_fast(union block_sequence_params *params)
+{
+ struct dc *dc = params->fams2_global_control_lock_fast_params.dc;
+ bool lock = params->fams2_global_control_lock_fast_params.lock;
+
+ if (params->fams2_global_control_lock_fast_params.is_required) {
+ union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 };
+
+ hw_lock_cmd.bits.command_code = DMUB_INBOX0_CMD__HW_LOCK;
+ hw_lock_cmd.bits.hw_lock_client = HW_LOCK_CLIENT_DRIVER;
+ hw_lock_cmd.bits.lock = lock;
+ hw_lock_cmd.bits.should_release = !lock;
+ dmub_hw_lock_mgr_inbox0_cmd(dc->ctx->dmub_srv, hw_lock_cmd);
+ }
+}
+
+void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool enable)
+{
+ bool fams2_required;
+
+ if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable)
+ return;
+
+ fams2_required = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable;
+
+ dc_dmub_srv_fams2_update_config(dc, context, enable && fams2_required);
+}
+
+static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context,
+ struct pipe_ctx *otg_master)
+{
+ int i;
+ struct pipe_ctx *old_pipe;
+ struct pipe_ctx *new_pipe;
+ struct pipe_ctx *old_opp_heads[MAX_PIPES];
+ struct pipe_ctx *old_otg_master;
+ int old_opp_head_count = 0;
+
+ old_otg_master = &dc->current_state->res_ctx.pipe_ctx[otg_master->pipe_idx];
+
+ if (resource_is_pipe_type(old_otg_master, OTG_MASTER)) {
+ old_opp_head_count = resource_get_opp_heads_for_otg_master(old_otg_master,
+ &dc->current_state->res_ctx,
+ old_opp_heads);
+ } else {
+ // DC cannot assume that the current state and the new state
+ // share the same OTG pipe since this is not true when called
+ // in the context of a commit stream not checked. Hence, set
+ // old_otg_master to NULL to skip the DSC configuration.
+ old_otg_master = NULL;
+ }
+
+
+ if (otg_master->stream_res.dsc)
+ dcn32_update_dsc_on_stream(otg_master,
+ otg_master->stream->timing.flags.DSC);
+ if (old_otg_master && old_otg_master->stream_res.dsc) {
+ for (i = 0; i < old_opp_head_count; i++) {
+ old_pipe = old_opp_heads[i];
+ new_pipe = &context->res_ctx.pipe_ctx[old_pipe->pipe_idx];
+ if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc)
+ old_pipe->stream_res.dsc->funcs->dsc_disconnect(
+ old_pipe->stream_res.dsc);
+ }
+ }
+}
+
+void dcn401_update_odm(struct dc *dc, struct dc_state *context,
+ struct pipe_ctx *otg_master)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ int opp_inst[MAX_PIPES] = {0};
+ int opp_head_count;
+ int odm_slice_width = resource_get_odm_slice_dst_width(otg_master, false);
+ int last_odm_slice_width = resource_get_odm_slice_dst_width(otg_master, true);
+ int i;
+
+ opp_head_count = resource_get_opp_heads_for_otg_master(
+ otg_master, &context->res_ctx, opp_heads);
+
+ for (i = 0; i < opp_head_count; i++)
+ opp_inst[i] = opp_heads[i]->stream_res.opp->inst;
+ if (opp_head_count > 1)
+ otg_master->stream_res.tg->funcs->set_odm_combine(
+ otg_master->stream_res.tg,
+ opp_inst, opp_head_count,
+ odm_slice_width, last_odm_slice_width);
+ else
+ otg_master->stream_res.tg->funcs->set_odm_bypass(
+ otg_master->stream_res.tg,
+ &otg_master->stream->timing);
+
+ for (i = 0; i < opp_head_count; i++) {
+ opp_heads[i]->stream_res.opp->funcs->opp_pipe_clock_control(
+ opp_heads[i]->stream_res.opp,
+ true);
+ opp_heads[i]->stream_res.opp->funcs->opp_program_left_edge_extra_pixel(
+ opp_heads[i]->stream_res.opp,
+ opp_heads[i]->stream->timing.pixel_encoding,
+ resource_is_pipe_type(opp_heads[i], OTG_MASTER));
+ }
+
+ update_dsc_for_odm_change(dc, context, otg_master);
+
+ if (!resource_is_pipe_type(otg_master, DPP_PIPE))
+ /*
+ * blank pattern is generated by OPP, reprogram blank pattern
+ * due to OPP count change
+ */
+ dc->hwseq->funcs.blank_pixel_data(dc, otg_master, true);
+}
+
+void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx,
+ struct dc_link_settings *link_settings)
+{
+ struct encoder_unblank_param params = {0};
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ struct dc_link *link = stream->link;
+ struct dce_hwseq *hws = link->dc->hwseq;
+
+ /* calculate parameters for unblank */
+ params.opp_cnt = resource_get_odm_slice_count(pipe_ctx);
+
+ params.timing = pipe_ctx->stream->timing;
+ params.link_settings.link_rate = link_settings->link_rate;
+ params.pix_per_cycle = pipe_ctx->stream_res.pix_clk_params.dio_se_pix_per_cycle;
+
+ if (link->dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) {
+ pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_unblank(
+ pipe_ctx->stream_res.hpo_dp_stream_enc,
+ pipe_ctx->stream_res.tg->inst);
+ } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
+ pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(link, pipe_ctx->stream_res.stream_enc, &params);
+ }
+
+ if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP)
+ hws->funcs.edp_backlight_control(link, true);
+}
+
+void dcn401_hardware_release(struct dc *dc)
+{
+ if (!dc->debug.disable_force_pstate_allow_on_hw_release) {
+ dc_dmub_srv_fams2_update_config(dc, dc->current_state, false);
+
+ /* If pstate unsupported, or still supported
+ * by firmware, force it supported by dcn
+ */
+ if (dc->current_state) {
+ if ((!dc->clk_mgr->clks.p_state_change_support ||
+ dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) &&
+ dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, true, true);
+
+ dc->current_state->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+ dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true);
+ }
+ } else {
+ if (dc->current_state) {
+ dc->clk_mgr->clks.p_state_change_support = false;
+ dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true);
+ }
+ dc_dmub_srv_fams2_update_config(dc, dc->current_state, false);
+ }
+}
+
+void dcn401_wait_for_det_buffer_update_under_otg_master(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master)
+{
+ struct pipe_ctx *opp_heads[MAX_PIPES];
+ struct pipe_ctx *dpp_pipes[MAX_PIPES];
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+ int dpp_count = 0;
+
+ if (!otg_master->stream)
+ return;
+
+ int slice_count = resource_get_opp_heads_for_otg_master(otg_master,
+ &context->res_ctx, opp_heads);
+
+ for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) {
+ if (opp_heads[slice_idx]->plane_state) {
+ dpp_count = resource_get_dpp_pipes_for_opp_head(
+ opp_heads[slice_idx],
+ &context->res_ctx,
+ dpp_pipes);
+ for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) {
+ struct pipe_ctx *dpp_pipe = dpp_pipes[dpp_idx];
+ if (dpp_pipe && hubbub &&
+ dpp_pipe->plane_res.hubp &&
+ hubbub->funcs->wait_for_det_update)
+ hubbub->funcs->wait_for_det_update(hubbub, dpp_pipe->plane_res.hubp->inst);
+ }
+ } else {
+ if (hubbub && opp_heads[slice_idx]->plane_res.hubp && hubbub->funcs->wait_for_det_update)
+ hubbub->funcs->wait_for_det_update(hubbub, opp_heads[slice_idx]->plane_res.hubp->inst);
+ }
+ }
+}
+
+void dcn401_interdependent_update_lock(struct dc *dc,
+ struct dc_state *context, bool lock)
+{
+ unsigned int i = 0;
+ struct pipe_ctx *pipe = NULL;
+ struct timing_generator *tg = NULL;
+
+ if (lock) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ tg = pipe->stream_res.tg;
+
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !tg->funcs->is_tg_enabled(tg) ||
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM)
+ continue;
+ dc->hwss.pipe_control_lock(dc, pipe, true);
+ }
+ } else {
+ /* Need to free DET being used first and have pipe update, then unlock the remaining pipes*/
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ tg = pipe->stream_res.tg;
+
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !tg->funcs->is_tg_enabled(tg) ||
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ continue;
+ }
+
+ if (dc->scratch.pipes_to_unlock_first[i]) {
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ dc->hwss.pipe_control_lock(dc, pipe, false);
+ /* Assumes pipe of the same index in current_state is also an OTG_MASTER pipe*/
+ dcn401_wait_for_det_buffer_update_under_otg_master(dc, dc->current_state, old_pipe);
+ }
+ }
+
+ /* Unlocking the rest of the pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (dc->scratch.pipes_to_unlock_first[i])
+ continue;
+
+ pipe = &context->res_ctx.pipe_ctx[i];
+ tg = pipe->stream_res.tg;
+ if (!resource_is_pipe_type(pipe, OTG_MASTER) ||
+ !tg->funcs->is_tg_enabled(tg) ||
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ continue;
+ }
+
+ dc->hwss.pipe_control_lock(dc, pipe, false);
+ }
+ }
+}
+
+void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx)
+{
+ /* If 3DLUT FL is enabled and 3DLUT is in use, follow the workaround sequence for pipe unlock to make sure that
+ * HUBP will properly fetch 3DLUT contents after unlock.
+ *
+ * This is meant to work around a known HW issue where VREADY will cancel the pending 3DLUT_ENABLE signal regardless
+ * of whether OTG lock is currently being held or not.
+ */
+ struct pipe_ctx *wa_pipes[MAX_PIPES] = { NULL };
+ struct pipe_ctx *odm_pipe, *mpc_pipe;
+ int i, wa_pipe_ct = 0;
+
+ for (odm_pipe = pipe_ctx; odm_pipe != NULL; odm_pipe = odm_pipe->next_odm_pipe) {
+ for (mpc_pipe = odm_pipe; mpc_pipe != NULL; mpc_pipe = mpc_pipe->bottom_pipe) {
+ if (mpc_pipe->plane_state && mpc_pipe->plane_state->mcm_luts.lut3d_data.lut3d_src
+ == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM
+ && mpc_pipe->plane_state->mcm_shaper_3dlut_setting
+ == DC_CM2_SHAPER_3DLUT_SETTING_ENABLE_SHAPER_3DLUT) {
+ wa_pipes[wa_pipe_ct++] = mpc_pipe;
+ }
+ }
+ }
+
+ if (wa_pipe_ct > 0) {
+ if (pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout)
+ pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout(pipe_ctx->stream_res.tg, true);
+
+ for (i = 0; i < wa_pipe_ct; ++i) {
+ if (wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl)
+ wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl(wa_pipes[i]->plane_res.hubp, true);
+ }
+
+ pipe_ctx->stream_res.tg->funcs->unlock(pipe_ctx->stream_res.tg);
+ if (pipe_ctx->stream_res.tg->funcs->wait_update_lock_status)
+ pipe_ctx->stream_res.tg->funcs->wait_update_lock_status(pipe_ctx->stream_res.tg, false);
+
+ for (i = 0; i < wa_pipe_ct; ++i) {
+ if (wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl)
+ wa_pipes[i]->plane_res.hubp->funcs->hubp_enable_3dlut_fl(wa_pipes[i]->plane_res.hubp, true);
+ }
+
+ if (pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout)
+ pipe_ctx->stream_res.tg->funcs->set_vupdate_keepout(pipe_ctx->stream_res.tg, false);
+ } else {
+ pipe_ctx->stream_res.tg->funcs->unlock(pipe_ctx->stream_res.tg);
+ }
+}
+
+void dcn401_program_outstanding_updates(struct dc *dc,
+ struct dc_state *context)
+{
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ /* update compbuf if required */
+ if (hubbub->funcs->program_compbuf_segments)
+ hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true);
+}
+
+void dcn401_reset_back_end_for_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context)
+{
+ struct dc_link *link = pipe_ctx->stream->link;
+ const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+ if (pipe_ctx->stream_res.stream_enc == NULL) {
+ pipe_ctx->stream = NULL;
+ return;
+ }
+
+ /* DPMS may already disable or */
+ /* dpms_off status is incorrect due to fastboot
+ * feature. When system resume from S4 with second
+ * screen only, the dpms_off would be true but
+ * VBIOS lit up eDP, so check link status too.
+ */
+ if (!pipe_ctx->stream->dpms_off || link->link_status.link_active)
+ dc->link_srv->set_dpms_off(pipe_ctx);
+ else if (pipe_ctx->stream_res.audio)
+ dc->hwss.disable_audio_stream(pipe_ctx);
+
+ /* free acquired resources */
+ if (pipe_ctx->stream_res.audio) {
+ /*disable az_endpoint*/
+ pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio);
+
+ /*free audio*/
+ if (dc->caps.dynamic_audio == true) {
+ /*we have to dynamic arbitrate the audio endpoints*/
+ /*we free the resource, need reset is_audio_acquired*/
+ update_audio_usage(&dc->current_state->res_ctx, dc->res_pool,
+ pipe_ctx->stream_res.audio, false);
+ pipe_ctx->stream_res.audio = NULL;
+ }
+ }
+
+ /* by upper caller loop, parent pipe: pipe0, will be reset last.
+ * back end share by all pipes and will be disable only when disable
+ * parent pipe.
+ */
+ if (pipe_ctx->top_pipe == NULL) {
+
+ dc->hwss.set_abm_immediate_disable(pipe_ctx);
+
+ pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg);
+
+ pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false);
+ if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass)
+ pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+
+ set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL);
+
+ /* TODO - convert symclk_ref_cnts for otg to a bit map to solve
+ * the case where the same symclk is shared across multiple otg
+ * instances
+ */
+ if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+ link->phy_state.symclk_ref_cnts.otg = 0;
+ if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) {
+ link_hwss->disable_link_output(link,
+ &pipe_ctx->link_res, pipe_ctx->stream->signal);
+ link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF;
+ }
+
+ /* reset DTBCLK_P */
+ if (dc->res_pool->dccg->funcs->set_dtbclk_p_src)
+ dc->res_pool->dccg->funcs->set_dtbclk_p_src(dc->res_pool->dccg, REFCLK, pipe_ctx->stream_res.tg->inst);
+ }
+
+/*
+ * In case of a dangling plane, setting this to NULL unconditionally
+ * causes failures during reset hw ctx where, if stream is NULL,
+ * it is expected that the pipe_ctx pointers to pipes and plane are NULL.
+ */
+ pipe_ctx->stream = NULL;
+ pipe_ctx->top_pipe = NULL;
+ pipe_ctx->bottom_pipe = NULL;
+ pipe_ctx->next_odm_pipe = NULL;
+ pipe_ctx->prev_odm_pipe = NULL;
+ DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n",
+ pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
+}
+
+void dcn401_reset_hw_ctx_wrap(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i;
+ struct dce_hwseq *hws = dc->hwseq;
+
+ /* Reset Back End*/
+ for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) {
+ struct pipe_ctx *pipe_ctx_old =
+ &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx_old->stream)
+ continue;
+
+ if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe)
+ continue;
+
+ if (!pipe_ctx->stream ||
+ pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) {
+ struct clock_source *old_clk = pipe_ctx_old->clock_source;
+
+ if (hws->funcs.reset_back_end_for_pipe)
+ hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
+ if (hws->funcs.enable_stream_gating)
+ hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
+ if (old_clk)
+ old_clk->funcs->cs_power_down(old_clk);
+ }
+ }
+}
+
+static unsigned int dcn401_calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+{
+ struct pipe_ctx *other_pipe;
+ unsigned int vready_offset = pipe->global_sync.dcn4x.vready_offset_pixels;
+
+ /* Always use the largest vready_offset of all connected pipes */
+ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+
+ return vready_offset;
+}
+
+static void dcn401_program_tg(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dce_hwseq *hws)
+{
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+ dcn401_calculate_vready_offset_for_group(pipe_ctx),
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines);
+
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+
+ pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
+
+ if (hws->funcs.setup_vupdate_interrupt)
+ hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
+}
+
+void dcn401_program_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+
+ /* Only need to unblank on top pipe */
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
+ if (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->update_flags.bits.odm ||
+ pipe_ctx->stream->update_flags.bits.abm_level)
+ hws->funcs.blank_pixel_data(dc, pipe_ctx,
+ !pipe_ctx->plane_state ||
+ !pipe_ctx->plane_state->visible);
+ }
+
+ /* Only update TG on top pipe */
+ if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
+ && !pipe_ctx->prev_odm_pipe)
+ dcn401_program_tg(dc, pipe_ctx, context, hws);
+
+ if (pipe_ctx->update_flags.bits.odm)
+ hws->funcs.update_odm(dc, context, pipe_ctx);
+
+ if (pipe_ctx->update_flags.bits.enable) {
+ if (hws->funcs.enable_plane)
+ hws->funcs.enable_plane(dc, pipe_ctx, context);
+ else
+ dc->hwss.enable_plane(dc, pipe_ctx, context);
+
+ if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes)
+ dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub);
+ }
+
+ if (pipe_ctx->update_flags.bits.det_size) {
+ if (dc->res_pool->hubbub->funcs->program_det_size)
+ dc->res_pool->hubbub->funcs->program_det_size(
+ dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb);
+ if (dc->res_pool->hubbub->funcs->program_det_segments)
+ dc->res_pool->hubbub->funcs->program_det_segments(
+ dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size);
+ }
+
+ if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw ||
+ pipe_ctx->plane_state->update_flags.raw ||
+ pipe_ctx->stream->update_flags.raw))
+ dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context);
+
+ if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->plane_state->update_flags.bits.hdr_mult))
+ hws->funcs.set_hdr_multiplier(pipe_ctx);
+
+ if (pipe_ctx->plane_state &&
+ (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
+ pipe_ctx->plane_state->update_flags.bits.gamma_change ||
+ pipe_ctx->plane_state->update_flags.bits.lut_3d ||
+ pipe_ctx->update_flags.bits.enable))
+ hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
+
+ /* dcn10_translate_regamma_to_hw_format takes 750us to finish
+ * only do gamma programming for powering on, internal memcmp to avoid
+ * updating on slave planes
+ */
+ if (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->update_flags.bits.plane_changed ||
+ pipe_ctx->stream->update_flags.bits.out_tf)
+ hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
+
+ /* If the pipe has been enabled or has a different opp, we
+ * should reprogram the fmt. This deals with cases where
+ * interation between mpc and odm combine on different streams
+ * causes a different pipe to be chosen to odm combine with.
+ */
+ if (pipe_ctx->update_flags.bits.enable
+ || pipe_ctx->update_flags.bits.opp_changed) {
+
+ pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
+ pipe_ctx->stream_res.opp,
+ COLOR_SPACE_YCBCR601,
+ pipe_ctx->stream->timing.display_color_depth,
+ pipe_ctx->stream->signal);
+
+ pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
+ pipe_ctx->stream_res.opp,
+ &pipe_ctx->stream->bit_depth_params,
+ &pipe_ctx->stream->clamping);
+ }
+
+ /* Set ABM pipe after other pipe configurations done */
+ if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) {
+ if (pipe_ctx->stream_res.abm) {
+ dc->hwss.set_pipe(pipe_ctx);
+ pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm,
+ pipe_ctx->stream->abm_level);
+ }
+ }
+
+ if (pipe_ctx->update_flags.bits.test_pattern_changed) {
+ struct output_pixel_processor *odm_opp = pipe_ctx->stream_res.opp;
+ struct bit_depth_reduction_params params;
+
+ memset(&params, 0, sizeof(params));
+ odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
+ dc->hwss.set_disp_pattern_generator(dc,
+ pipe_ctx,
+ pipe_ctx->stream_res.test_pattern_params.test_pattern,
+ pipe_ctx->stream_res.test_pattern_params.color_space,
+ pipe_ctx->stream_res.test_pattern_params.color_depth,
+ NULL,
+ pipe_ctx->stream_res.test_pattern_params.width,
+ pipe_ctx->stream_res.test_pattern_params.height,
+ pipe_ctx->stream_res.test_pattern_params.offset);
+ }
+}
+
+void dcn401_program_front_end_for_ctx(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i;
+ unsigned int prev_hubp_count = 0;
+ unsigned int hubp_count = 0;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct pipe_ctx *pipe = NULL;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (resource_is_pipe_topology_changed(dc->current_state, context))
+ resource_log_pipe_topology_update(dc, context);
+
+ if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state) {
+ if (pipe->plane_state->triplebuffer_flips)
+ BREAK_TO_DEBUGGER();
+
+ /*turn off triple buffer for full update*/
+ dc->hwss.program_triplebuffer(
+ dc, pipe, pipe->plane_state->triplebuffer_flips);
+ }
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
+ prev_hubp_count++;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ hubp_count++;
+ }
+
+ if (prev_hubp_count == 0 && hubp_count > 0) {
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, true, false);
+ udelay(500);
+ }
+
+ /* Set pipe update flags and lock pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ dc->hwss.detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i],
+ &context->res_ctx.pipe_ctx[i]);
+
+ /* When disabling phantom pipes, turn on phantom OTG first (so we can get double
+ * buffer updates properly)
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream;
+
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream &&
+ dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
+ struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
+
+ if (tg->funcs->enable_crtc) {
+ if (dc->hwseq->funcs.blank_pixel_data)
+ dc->hwseq->funcs.blank_pixel_data(dc, pipe, true);
+
+ tg->funcs->enable_crtc(tg);
+ }
+ }
+ }
+ /* OTG blank before disabling all front ends */
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
+ && !context->res_ctx.pipe_ctx[i].top_pipe
+ && !context->res_ctx.pipe_ctx[i].prev_odm_pipe
+ && context->res_ctx.pipe_ctx[i].stream)
+ hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
+
+
+ /* Disconnect mpcc */
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
+ || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom
+ * then we want to do the programming here (effectively it's being disabled). If we do
+ * the programming later the DET won't be updated until the OTG for the phantom pipe is
+ * turned on (i.e. in an MCLK switch) which can come in too late and cause issues with
+ * DET allocation.
+ */
+ if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
+ (context->res_ctx.pipe_ctx[i].plane_state &&
+ dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) ==
+ SUBVP_PHANTOM))) {
+ if (hubbub->funcs->program_det_size)
+ hubbub->funcs->program_det_size(hubbub,
+ dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ if (dc->res_pool->hubbub->funcs->program_det_segments)
+ dc->res_pool->hubbub->funcs->program_det_segments(
+ hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ }
+ hws->funcs.plane_atomic_disconnect(dc, dc->current_state,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
+ DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
+ }
+
+ /* update ODM for blanked OTG master pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+ !resource_is_pipe_type(pipe, DPP_PIPE) &&
+ pipe->update_flags.bits.odm &&
+ hws->funcs.update_odm)
+ hws->funcs.update_odm(dc, context, pipe);
+ }
+
+ /*
+ * Program all updated pipes, order matters for mpcc setup. Start with
+ * top pipe and program all pipes that follow in order
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state && !pipe->top_pipe) {
+ while (pipe) {
+ if (hws->funcs.program_pipe)
+ hws->funcs.program_pipe(dc, pipe, context);
+ else {
+ /* Don't program phantom pipes in the regular front end programming sequence.
+ * There is an MPO transition case where a pipe being used by a video plane is
+ * transitioned directly to be a phantom pipe when closing the MPO video.
+ * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place
+ * right away) but the MPO still exists until the double buffered update of the
+ * main pipe so we will get a frame of underflow if the phantom pipe is
+ * programmed here.
+ */
+ if (pipe->stream &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
+ dcn401_program_pipe(dc, pipe, context);
+ }
+
+ pipe = pipe->bottom_pipe;
+ }
+ }
+
+ /* Program secondary blending tree and writeback pipes */
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (!pipe->top_pipe && !pipe->prev_odm_pipe
+ && pipe->stream && pipe->stream->num_wb_info > 0
+ && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw)
+ || pipe->stream->update_flags.raw)
+ && hws->funcs.program_all_writeback_pipes_in_tree)
+ hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context);
+
+ /* Avoid underflow by check of pipe line read when adding 2nd plane. */
+ if (hws->wa.wait_hubpret_read_start_during_mpo_transition &&
+ !pipe->top_pipe &&
+ pipe->stream &&
+ pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start &&
+ dc->current_state->stream_status[0].plane_count == 1 &&
+ context->stream_status[0].plane_count > 1) {
+ pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp);
+ }
+ }
+}
+
+void dcn401_post_unlock_program_front_end(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ // Timeout for pipe enable
+ unsigned int timeout_us = 100000;
+ unsigned int polling_interval_us = 1;
+ struct dce_hwseq *hwseq = dc->hwseq;
+ int i;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) &&
+ !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD))
+ dc->hwss.post_unlock_reset_opp(dc,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
+ dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
+
+ /*
+ * If we are enabling a pipe, we need to wait for pending clear as this is a critical
+ * part of the enable operation otherwise, DM may request an immediate flip which
+ * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which
+ * is unsupported on DCN.
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ // Don't check flip pending on phantom pipes
+ if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
+ struct hubp *hubp = pipe->plane_res.hubp;
+ int j = 0;
+
+ for (j = 0; j < timeout_us / polling_interval_us
+ && hubp->funcs->hubp_is_flip_pending(hubp); j++)
+ udelay(polling_interval_us);
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ /* When going from a smaller ODM slice count to larger, we must ensure double
+ * buffer update completes before we return to ensure we don't reduce DISPCLK
+ * before we've transitioned to 2:1 or 4:1
+ */
+ if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) &&
+ resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
+ int j = 0;
+ struct timing_generator *tg = pipe->stream_res.tg;
+
+ if (tg->funcs->get_optc_double_buffer_pending) {
+ for (j = 0; j < timeout_us / polling_interval_us
+ && tg->funcs->get_optc_double_buffer_pending(tg); j++)
+ udelay(polling_interval_us);
+ }
+ }
+ }
+
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, false, false);
+
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state && !pipe->top_pipe) {
+ /* Program phantom pipe here to prevent a frame of underflow in the MPO transition
+ * case (if a pipe being used for a video plane transitions to a phantom pipe, it
+ * can underflow due to HUBP_VTG_SEL programming if done in the regular front end
+ * programming sequence).
+ */
+ while (pipe) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ /* When turning on the phantom pipe we want to run through the
+ * entire enable sequence, so apply all the "enable" flags.
+ */
+ if (dc->hwss.apply_update_flags_for_phantom)
+ dc->hwss.apply_update_flags_for_phantom(pipe);
+ if (dc->hwss.update_phantom_vp_position)
+ dc->hwss.update_phantom_vp_position(dc, context, pipe);
+ dcn401_program_pipe(dc, pipe, context);
+ }
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ }
+
+ if (!hwseq)
+ return;
+
+ /* P-State support transitions:
+ * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe
+ * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally)
+ * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe
+ * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe
+ * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes
+ */
+ if (hwseq->funcs.update_force_pstate)
+ dc->hwseq->funcs.update_force_pstate(dc, context);
+
+ /* Only program the MALL registers after all the main and phantom pipes
+ * are done programming.
+ */
+ if (hwseq->funcs.program_mall_pipe_config)
+ hwseq->funcs.program_mall_pipe_config(dc, context);
+
+ /* WA to apply WM setting*/
+ if (hwseq->wa.DEGVIDCN21)
+ dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub);
+
+
+ /* WA for stutter underflow during MPO transitions when adding 2nd plane */
+ if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) {
+
+ if (dc->current_state->stream_status[0].plane_count == 1 &&
+ context->stream_status[0].plane_count > 1) {
+
+ struct timing_generator *tg = dc->res_pool->timing_generators[0];
+
+ dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, false);
+
+ hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied = true;
+ hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied_on_frame =
+ tg->funcs->get_frame_count(tg);
+ }
+ }
+}
+
+bool dcn401_update_bandwidth(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i;
+ struct dce_hwseq *hws = dc->hwseq;
+
+ /* recalculate DML parameters */
+ if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK)
+ return false;
+
+ /* apply updated bandwidth parameters */
+ dc->hwss.prepare_bandwidth(dc, context);
+
+ /* update hubp configs for all pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->plane_state == NULL)
+ continue;
+
+ if (pipe_ctx->top_pipe == NULL) {
+ bool blank = !is_pipe_tree_visible(pipe_ctx);
+
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+ dcn401_calculate_vready_offset_for_group(pipe_ctx),
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines);
+
+ pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false);
+
+ if (pipe_ctx->prev_odm_pipe == NULL)
+ hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
+
+ if (hws->funcs.setup_vupdate_interrupt)
+ hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
+ }
+
+ if (pipe_ctx->plane_res.hubp->funcs->hubp_setup2)
+ pipe_ctx->plane_res.hubp->funcs->hubp_setup2(
+ pipe_ctx->plane_res.hubp,
+ &pipe_ctx->hubp_regs,
+ &pipe_ctx->global_sync,
+ &pipe_ctx->stream->timing);
+ }
+
+ return true;
+}
+
+void dcn401_detect_pipe_changes(struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe)
+{
+ bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM;
+ bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM;
+
+ unsigned int old_pipe_vready_offset_pixels = old_pipe->global_sync.dcn4x.vready_offset_pixels;
+ unsigned int new_pipe_vready_offset_pixels = new_pipe->global_sync.dcn4x.vready_offset_pixels;
+ unsigned int old_pipe_vstartup_lines = old_pipe->global_sync.dcn4x.vstartup_lines;
+ unsigned int new_pipe_vstartup_lines = new_pipe->global_sync.dcn4x.vstartup_lines;
+ unsigned int old_pipe_vupdate_offset_pixels = old_pipe->global_sync.dcn4x.vupdate_offset_pixels;
+ unsigned int new_pipe_vupdate_offset_pixels = new_pipe->global_sync.dcn4x.vupdate_offset_pixels;
+ unsigned int old_pipe_vupdate_width_pixels = old_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels;
+ unsigned int new_pipe_vupdate_width_pixels = new_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels;
+
+ new_pipe->update_flags.raw = 0;
+
+ /* If non-phantom pipe is being transitioned to a phantom pipe,
+ * set disable and return immediately. This is because the pipe
+ * that was previously in use must be fully disabled before we
+ * can "enable" it as a phantom pipe (since the OTG will certainly
+ * be different). The post_unlock sequence will set the correct
+ * update flags to enable the phantom pipe.
+ */
+ if (old_pipe->plane_state && !old_is_phantom &&
+ new_pipe->plane_state && new_is_phantom) {
+ new_pipe->update_flags.bits.disable = 1;
+ return;
+ }
+
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER) &&
+ resource_is_odm_topology_changed(new_pipe, old_pipe))
+ /* Detect odm changes */
+ new_pipe->update_flags.bits.odm = 1;
+
+ /* Exit on unchanged, unused pipe */
+ if (!old_pipe->plane_state && !new_pipe->plane_state)
+ return;
+ /* Detect pipe enable/disable */
+ if (!old_pipe->plane_state && new_pipe->plane_state) {
+ new_pipe->update_flags.bits.enable = 1;
+ new_pipe->update_flags.bits.mpcc = 1;
+ new_pipe->update_flags.bits.dppclk = 1;
+ new_pipe->update_flags.bits.hubp_interdependent = 1;
+ new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+ new_pipe->update_flags.bits.unbounded_req = 1;
+ new_pipe->update_flags.bits.gamut_remap = 1;
+ new_pipe->update_flags.bits.scaler = 1;
+ new_pipe->update_flags.bits.viewport = 1;
+ new_pipe->update_flags.bits.det_size = 1;
+ if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE &&
+ new_pipe->stream_res.test_pattern_params.width != 0 &&
+ new_pipe->stream_res.test_pattern_params.height != 0)
+ new_pipe->update_flags.bits.test_pattern_changed = 1;
+ if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
+ new_pipe->update_flags.bits.odm = 1;
+ new_pipe->update_flags.bits.global_sync = 1;
+ }
+ return;
+ }
+
+ /* For SubVP we need to unconditionally enable because any phantom pipes are
+ * always removed then newly added for every full updates whenever SubVP is in use.
+ * The remove-add sequence of the phantom pipe always results in the pipe
+ * being blanked in enable_stream_timing (DPG).
+ */
+ if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM)
+ new_pipe->update_flags.bits.enable = 1;
+
+ /* Phantom pipes are effectively disabled, if the pipe was previously phantom
+ * we have to enable
+ */
+ if (old_pipe->plane_state && old_is_phantom &&
+ new_pipe->plane_state && !new_is_phantom)
+ new_pipe->update_flags.bits.enable = 1;
+
+ if (old_pipe->plane_state && !new_pipe->plane_state) {
+ new_pipe->update_flags.bits.disable = 1;
+ return;
+ }
+
+ /* Detect plane change */
+ if (old_pipe->plane_state != new_pipe->plane_state)
+ new_pipe->update_flags.bits.plane_changed = true;
+
+ /* Detect top pipe only changes */
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER)) {
+ /* Detect global sync changes */
+ if ((old_pipe_vready_offset_pixels != new_pipe_vready_offset_pixels)
+ || (old_pipe_vstartup_lines != new_pipe_vstartup_lines)
+ || (old_pipe_vupdate_offset_pixels != new_pipe_vupdate_offset_pixels)
+ || (old_pipe_vupdate_width_pixels != new_pipe_vupdate_width_pixels))
+ new_pipe->update_flags.bits.global_sync = 1;
+ }
+
+ if (old_pipe->det_buffer_size_kb != new_pipe->det_buffer_size_kb)
+ new_pipe->update_flags.bits.det_size = 1;
+
+ /*
+ * Detect opp / tg change, only set on change, not on enable
+ * Assume mpcc inst = pipe index, if not this code needs to be updated
+ * since mpcc is what is affected by these. In fact all of our sequence
+ * makes this assumption at the moment with how hubp reset is matched to
+ * same index mpcc reset.
+ */
+ if (old_pipe->stream_res.opp != new_pipe->stream_res.opp)
+ new_pipe->update_flags.bits.opp_changed = 1;
+ if (old_pipe->stream_res.tg != new_pipe->stream_res.tg)
+ new_pipe->update_flags.bits.tg_changed = 1;
+
+ /*
+ * Detect mpcc blending changes, only dpp inst and opp matter here,
+ * mpccs getting removed/inserted update connected ones during their own
+ * programming
+ */
+ if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp
+ || old_pipe->stream_res.opp != new_pipe->stream_res.opp)
+ new_pipe->update_flags.bits.mpcc = 1;
+
+ /* Detect dppclk change */
+ if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz)
+ new_pipe->update_flags.bits.dppclk = 1;
+
+ /* Check for scl update */
+ if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data)))
+ new_pipe->update_flags.bits.scaler = 1;
+ /* Check for vp update */
+ if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect))
+ || memcmp(&old_pipe->plane_res.scl_data.viewport_c,
+ &new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect)))
+ new_pipe->update_flags.bits.viewport = 1;
+
+ /* Detect dlg/ttu/rq updates */
+ {
+ struct dml2_display_dlg_regs old_dlg_regs = old_pipe->hubp_regs.dlg_regs;
+ struct dml2_display_ttu_regs old_ttu_regs = old_pipe->hubp_regs.ttu_regs;
+ struct dml2_display_rq_regs old_rq_regs = old_pipe->hubp_regs.rq_regs;
+ struct dml2_display_dlg_regs *new_dlg_regs = &new_pipe->hubp_regs.dlg_regs;
+ struct dml2_display_ttu_regs *new_ttu_regs = &new_pipe->hubp_regs.ttu_regs;
+ struct dml2_display_rq_regs *new_rq_regs = &new_pipe->hubp_regs.rq_regs;
+
+ /* Detect pipe interdependent updates */
+ if ((old_dlg_regs.dst_y_prefetch != new_dlg_regs->dst_y_prefetch)
+ || (old_dlg_regs.vratio_prefetch != new_dlg_regs->vratio_prefetch)
+ || (old_dlg_regs.vratio_prefetch_c != new_dlg_regs->vratio_prefetch_c)
+ || (old_dlg_regs.dst_y_per_vm_vblank != new_dlg_regs->dst_y_per_vm_vblank)
+ || (old_dlg_regs.dst_y_per_row_vblank != new_dlg_regs->dst_y_per_row_vblank)
+ || (old_dlg_regs.dst_y_per_vm_flip != new_dlg_regs->dst_y_per_vm_flip)
+ || (old_dlg_regs.dst_y_per_row_flip != new_dlg_regs->dst_y_per_row_flip)
+ || (old_dlg_regs.refcyc_per_meta_chunk_vblank_l != new_dlg_regs->refcyc_per_meta_chunk_vblank_l)
+ || (old_dlg_regs.refcyc_per_meta_chunk_vblank_c != new_dlg_regs->refcyc_per_meta_chunk_vblank_c)
+ || (old_dlg_regs.refcyc_per_meta_chunk_flip_l != new_dlg_regs->refcyc_per_meta_chunk_flip_l)
+ || (old_dlg_regs.refcyc_per_line_delivery_pre_l != new_dlg_regs->refcyc_per_line_delivery_pre_l)
+ || (old_dlg_regs.refcyc_per_line_delivery_pre_c != new_dlg_regs->refcyc_per_line_delivery_pre_c)
+ || (old_ttu_regs.refcyc_per_req_delivery_pre_l != new_ttu_regs->refcyc_per_req_delivery_pre_l)
+ || (old_ttu_regs.refcyc_per_req_delivery_pre_c != new_ttu_regs->refcyc_per_req_delivery_pre_c)
+ || (old_ttu_regs.refcyc_per_req_delivery_pre_cur0 !=
+ new_ttu_regs->refcyc_per_req_delivery_pre_cur0)
+ || (old_ttu_regs.min_ttu_vblank != new_ttu_regs->min_ttu_vblank)
+ || (old_ttu_regs.qos_level_flip != new_ttu_regs->qos_level_flip)) {
+ old_dlg_regs.dst_y_prefetch = new_dlg_regs->dst_y_prefetch;
+ old_dlg_regs.vratio_prefetch = new_dlg_regs->vratio_prefetch;
+ old_dlg_regs.vratio_prefetch_c = new_dlg_regs->vratio_prefetch_c;
+ old_dlg_regs.dst_y_per_vm_vblank = new_dlg_regs->dst_y_per_vm_vblank;
+ old_dlg_regs.dst_y_per_row_vblank = new_dlg_regs->dst_y_per_row_vblank;
+ old_dlg_regs.dst_y_per_vm_flip = new_dlg_regs->dst_y_per_vm_flip;
+ old_dlg_regs.dst_y_per_row_flip = new_dlg_regs->dst_y_per_row_flip;
+ old_dlg_regs.refcyc_per_meta_chunk_vblank_l = new_dlg_regs->refcyc_per_meta_chunk_vblank_l;
+ old_dlg_regs.refcyc_per_meta_chunk_vblank_c = new_dlg_regs->refcyc_per_meta_chunk_vblank_c;
+ old_dlg_regs.refcyc_per_meta_chunk_flip_l = new_dlg_regs->refcyc_per_meta_chunk_flip_l;
+ old_dlg_regs.refcyc_per_line_delivery_pre_l = new_dlg_regs->refcyc_per_line_delivery_pre_l;
+ old_dlg_regs.refcyc_per_line_delivery_pre_c = new_dlg_regs->refcyc_per_line_delivery_pre_c;
+ old_ttu_regs.refcyc_per_req_delivery_pre_l = new_ttu_regs->refcyc_per_req_delivery_pre_l;
+ old_ttu_regs.refcyc_per_req_delivery_pre_c = new_ttu_regs->refcyc_per_req_delivery_pre_c;
+ old_ttu_regs.refcyc_per_req_delivery_pre_cur0 = new_ttu_regs->refcyc_per_req_delivery_pre_cur0;
+ old_ttu_regs.min_ttu_vblank = new_ttu_regs->min_ttu_vblank;
+ old_ttu_regs.qos_level_flip = new_ttu_regs->qos_level_flip;
+ new_pipe->update_flags.bits.hubp_interdependent = 1;
+ }
+ /* Detect any other updates to ttu/rq/dlg */
+ if (memcmp(&old_dlg_regs, new_dlg_regs, sizeof(old_dlg_regs)) ||
+ memcmp(&old_ttu_regs, new_ttu_regs, sizeof(old_ttu_regs)) ||
+ memcmp(&old_rq_regs, new_rq_regs, sizeof(old_rq_regs)))
+ new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+ }
+
+ if (old_pipe->unbounded_req != new_pipe->unbounded_req)
+ new_pipe->update_flags.bits.unbounded_req = 1;
+
+ if (memcmp(&old_pipe->stream_res.test_pattern_params,
+ &new_pipe->stream_res.test_pattern_params, sizeof(struct test_pattern_params))) {
+ new_pipe->update_flags.bits.test_pattern_changed = 1;
+ }
+}
+
+void dcn401_plane_atomic_power_down(struct dc *dc,
+ struct dpp *dpp,
+ struct hubp *hubp)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+ uint32_t org_ip_request_cntl = 0;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (REG(DC_IP_REQUEST_CNTL)) {
+ REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0,
+ IP_REQUEST_EN, 1);
+ }
+
+ if (hws->funcs.dpp_pg_control)
+ hws->funcs.dpp_pg_control(hws, dpp->inst, false);
+
+ if (hws->funcs.hubp_pg_control)
+ hws->funcs.hubp_pg_control(hws, hubp->inst, false);
+
+ hubp->funcs->hubp_reset(hubp);
+ dpp->funcs->dpp_reset(dpp);
+
+ if (org_ip_request_cntl == 0 && REG(DC_IP_REQUEST_CNTL))
+ REG_SET(DC_IP_REQUEST_CNTL, 0,
+ IP_REQUEST_EN, 0);
+
+ DC_LOG_DEBUG(
+ "Power gated front end %d\n", hubp->inst);
+
+ if (hws->funcs.dpp_root_clock_control)
+ hws->funcs.dpp_root_clock_control(hws, dpp->inst, false);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
new file mode 100644
index 000000000000..2621b7725267
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_HWSS_DCN401_H__
+#define __DC_HWSS_DCN401_H__
+
+#include "inc/core_types.h"
+#include "dc.h"
+#include "dc_stream.h"
+#include "hw_sequencer_private.h"
+#include "dcn401/dcn401_dccg.h"
+
+struct dc;
+
+enum ips_ono_state {
+ ONO_ON = 0,
+ ONO_ON_IN_PROGRESS = 1,
+ ONO_OFF = 2,
+ ONO_OFF_IN_PROGRESS = 3
+};
+
+struct ips_ono_region_state {
+ /**
+ * @desire_pwr_state: desired power state based on configured value
+ */
+ uint32_t desire_pwr_state;
+ /**
+ * @current_pwr_state: current power gate status
+ */
+ uint32_t current_pwr_state;
+};
+
+void dcn401_program_gamut_remap(struct pipe_ctx *pipe_ctx);
+
+void dcn401_init_hw(struct dc *dc);
+
+bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx,
+ const struct dc_plane_state *plane_state);
+bool dcn401_set_output_transfer_func(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ const struct dc_stream_state *stream);
+void dcn401_trigger_3dlut_dma_load(struct dc *dc,
+ struct pipe_ctx *pipe_ctx);
+void dcn401_calculate_dccg_tmds_div_value(struct pipe_ctx *pipe_ctx,
+ unsigned int *tmds_div);
+enum dc_status dcn401_enable_stream_timing(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dc *dc);
+void dcn401_enable_stream(struct pipe_ctx *pipe_ctx);
+void dcn401_populate_mcm_luts(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_cm2_func_luts mcm_luts,
+ bool lut_bank_a);
+void dcn401_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable);
+
+void dcn401_disable_link_output(struct dc_link *link,
+ const struct link_resource *link_res,
+ enum signal_type signal);
+
+void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx);
+
+bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable);
+
+void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc,
+ const struct pipe_ctx *top_pipe_to_program);
+
+void dcn401_prepare_bandwidth(struct dc *dc,
+ struct dc_state *context);
+
+void dcn401_optimize_bandwidth(
+ struct dc *dc,
+ struct dc_state *context);
+
+void dcn401_fams2_global_control_lock(struct dc *dc,
+ struct dc_state *context,
+ bool lock);
+void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool enable);
+void dcn401_fams2_global_control_lock_fast(union block_sequence_params *params);
+void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings);
+void dcn401_hardware_release(struct dc *dc);
+void dcn401_update_odm(struct dc *dc, struct dc_state *context,
+ struct pipe_ctx *otg_master);
+void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy);
+void dcn401_wait_for_det_buffer_update_under_otg_master(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master);
+void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock);
+void dcn401_program_outstanding_updates(struct dc *dc, struct dc_state *context);
+void dcn401_reset_back_end_for_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+void dcn401_reset_hw_ctx_wrap(
+ struct dc *dc,
+ struct dc_state *context);
+void dcn401_program_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx);
+void dcn401_program_front_end_for_ctx(struct dc *dc, struct dc_state *context);
+void dcn401_post_unlock_program_front_end(struct dc *dc, struct dc_state *context);
+bool dcn401_update_bandwidth(struct dc *dc, struct dc_state *context);
+void dcn401_detect_pipe_changes(
+ struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe);
+void dcn401_plane_atomic_power_down(struct dc *dc,
+ struct dpp *dpp,
+ struct hubp *hubp);
+void dcn401_initialize_min_clocks(struct dc *dc);
+#endif /* __DC_HWSS_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
new file mode 100644
index 000000000000..d6e11b7e4fce
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn21/dcn21_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dcn31/dcn31_hwseq.h"
+#include "dcn32/dcn32_hwseq.h"
+#include "dcn401/dcn401_hwseq.h"
+#include "dcn401_init.h"
+
+static const struct hw_sequencer_funcs dcn401_funcs = {
+ .program_gamut_remap = dcn401_program_gamut_remap,
+ .init_hw = dcn401_init_hw,
+ .apply_ctx_to_hw = dce110_apply_ctx_to_hw,
+ .apply_ctx_for_surface = NULL,
+ .program_front_end_for_ctx = dcn401_program_front_end_for_ctx,
+ .clear_surface_dcc_and_tiling = dcn10_reset_surface_dcc_and_tiling,
+ .wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
+ .post_unlock_program_front_end = dcn401_post_unlock_program_front_end,
+ .update_plane_addr = dcn20_update_plane_addr,
+ .update_dchub = dcn10_update_dchub,
+ .update_pending_status = dcn10_update_pending_status,
+ .program_output_csc = dcn20_program_output_csc,
+ .trigger_3dlut_dma_load = dcn401_trigger_3dlut_dma_load,
+ .enable_accelerated_mode = dce110_enable_accelerated_mode,
+ .enable_timing_synchronization = dcn10_enable_timing_synchronization,
+ .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
+ .update_info_frame = dcn31_update_info_frame,
+ .send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
+ .enable_stream = dcn401_enable_stream,
+ .disable_stream = dce110_disable_stream,
+ .unblank_stream = dcn401_unblank_stream,
+ .blank_stream = dce110_blank_stream,
+ .enable_audio_stream = dce110_enable_audio_stream,
+ .disable_audio_stream = dce110_disable_audio_stream,
+ .disable_plane = dcn20_disable_plane,
+ .pipe_control_lock = dcn20_pipe_control_lock,
+ .interdependent_update_lock = dcn401_interdependent_update_lock,
+ .cursor_lock = dcn10_cursor_lock,
+ .prepare_bandwidth = dcn401_prepare_bandwidth,
+ .optimize_bandwidth = dcn401_optimize_bandwidth,
+ .update_bandwidth = dcn401_update_bandwidth,
+ .set_drr = dcn10_set_drr,
+ .get_position = dcn10_get_position,
+ .set_static_screen_control = dcn31_set_static_screen_control,
+ .setup_stereo = dcn10_setup_stereo,
+ .set_avmute = dcn30_set_avmute,
+ .log_hw_state = dcn10_log_hw_state,
+ .get_hw_state = dcn10_get_hw_state,
+ .clear_status_bits = dcn10_clear_status_bits,
+ .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+ .edp_backlight_control = dce110_edp_backlight_control,
+ .edp_power_control = dce110_edp_power_control,
+ .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
+ .edp_wait_for_T12 = dce110_edp_wait_for_T12,
+ .set_cursor_position = dcn401_set_cursor_position,
+ .set_cursor_attribute = dcn10_set_cursor_attribute,
+ .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+ .setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+ .set_clock = dcn10_set_clock,
+ .get_clock = dcn10_get_clock,
+ .program_triplebuffer = dcn20_program_triple_buffer,
+ .enable_writeback = dcn30_enable_writeback,
+ .disable_writeback = dcn30_disable_writeback,
+ .update_writeback = dcn30_update_writeback,
+ .dmdata_status_done = dcn20_dmdata_status_done,
+ .program_dmdata_engine = dcn30_program_dmdata_engine,
+ .set_dmdata_attributes = dcn20_set_dmdata_attributes,
+ .init_sys_ctx = dcn20_init_sys_ctx,
+ .init_vm_ctx = dcn20_init_vm_ctx,
+ .set_flip_control_gsl = dcn20_set_flip_control_gsl,
+ .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
+ .calc_vupdate_position = dcn10_calc_vupdate_position,
+ .apply_idle_power_optimizations = dcn401_apply_idle_power_optimizations,
+ .does_plane_fit_in_mall = NULL,
+ .set_backlight_level = dcn31_set_backlight_level,
+ .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
+ .hardware_release = dcn401_hardware_release,
+ .set_pipe = dcn21_set_pipe,
+ .enable_lvds_link_output = dce110_enable_lvds_link_output,
+ .enable_tmds_link_output = dce110_enable_tmds_link_output,
+ .enable_dp_link_output = dce110_enable_dp_link_output,
+ .disable_link_output = dcn401_disable_link_output,
+ .set_disp_pattern_generator = dcn30_set_disp_pattern_generator,
+ .get_dcc_en_bits = dcn10_get_dcc_en_bits,
+ .enable_phantom_streams = dcn32_enable_phantom_streams,
+ .disable_phantom_streams = dcn32_disable_phantom_streams,
+ .update_visual_confirm_color = dcn10_update_visual_confirm_color,
+ .update_phantom_vp_position = dcn32_update_phantom_vp_position,
+ .update_dsc_pg = dcn32_update_dsc_pg,
+ .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom,
+ .wait_for_dcc_meta_propagation = dcn401_wait_for_dcc_meta_propagation,
+ .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless,
+ .fams2_global_control_lock = dcn401_fams2_global_control_lock,
+ .fams2_update_config = dcn401_fams2_update_config,
+ .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast,
+ .program_outstanding_updates = dcn401_program_outstanding_updates,
+ .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates,
+ .detect_pipe_changes = dcn401_detect_pipe_changes,
+ .enable_plane = dcn20_enable_plane,
+ .update_dchubp_dpp = dcn20_update_dchubp_dpp,
+ .post_unlock_reset_opp = dcn20_post_unlock_reset_opp,
+ .get_underflow_debug_data = dcn30_get_underflow_debug_data,
+};
+
+static const struct hwseq_private_funcs dcn401_private_funcs = {
+ .init_pipes = dcn10_init_pipes,
+ .plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
+ .update_mpcc = dcn20_update_mpcc,
+ .set_input_transfer_func = dcn32_set_input_transfer_func,
+ .set_output_transfer_func = dcn401_set_output_transfer_func,
+ .power_down = dce110_power_down,
+ .enable_display_power_gating = dcn10_dummy_display_power_gating,
+ .blank_pixel_data = dcn20_blank_pixel_data,
+ .reset_hw_ctx_wrap = dcn401_reset_hw_ctx_wrap,
+ .enable_stream_timing = dcn401_enable_stream_timing,
+ .edp_backlight_control = dce110_edp_backlight_control,
+ .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt,
+ .did_underflow_occur = dcn10_did_underflow_occur,
+ .init_blank = dcn32_init_blank,
+ .disable_vga = dcn20_disable_vga,
+ .bios_golden_init = dcn10_bios_golden_init,
+ .plane_atomic_disable = dcn20_plane_atomic_disable,
+ .plane_atomic_power_down = dcn401_plane_atomic_power_down,
+ .enable_power_gating_plane = dcn32_enable_power_gating_plane,
+ .hubp_pg_control = dcn32_hubp_pg_control,
+ .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
+ .update_odm = dcn401_update_odm,
+ .dsc_pg_control = dcn32_dsc_pg_control,
+ .dsc_pg_status = dcn32_dsc_pg_status,
+ .set_hdr_multiplier = dcn10_set_hdr_multiplier,
+ .verify_allow_pstate_change_high = dcn10_verify_allow_pstate_change_high,
+ .wait_for_blank_complete = dcn20_wait_for_blank_complete,
+ .dccg_init = dcn20_dccg_init,
+ .set_mcm_luts = dcn401_set_mcm_luts,
+ .program_mall_pipe_config = dcn32_program_mall_pipe_config,
+ .update_mall_sel = dcn32_update_mall_sel,
+ .calculate_dccg_k1_k2_values = NULL,
+ .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw,
+ .reset_back_end_for_pipe = dcn401_reset_back_end_for_pipe,
+ .populate_mcm_luts = NULL,
+ .perform_3dlut_wa_unlock = dcn401_perform_3dlut_wa_unlock,
+};
+
+void dcn401_hw_sequencer_init_functions(struct dc *dc)
+{
+ dc->hwss = dcn401_funcs;
+ dc->hwseq->funcs = dcn401_private_funcs;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h
new file mode 100644
index 000000000000..59e6d8525e19
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_DCN401_INIT_H__
+#define __DC_DCN401_INIT_H__
+
+struct dc;
+
+void dcn401_hw_sequencer_init_functions(struct dc *dc);
+
+#endif /* __DC_DCN401_INIT_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index 02ff99f7bec2..1723bbcf2c46 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -26,11 +26,11 @@
#ifndef __DC_HW_SEQUENCER_H__
#define __DC_HW_SEQUENCER_H__
#include "dc_types.h"
-#include "clock_source.h"
+#include "inc/clock_source.h"
#include "inc/hw/timing_generator.h"
#include "inc/hw/opp.h"
#include "inc/hw/link_encoder.h"
-#include "core_status.h"
+#include "inc/core_status.h"
struct pipe_ctx;
struct dc_state;
@@ -45,11 +45,14 @@ struct dpp;
struct dce_hwseq;
struct link_resource;
struct dc_dmub_cmd;
+struct pg_block_update;
+struct drr_params;
+struct dc_underflow_debug_data;
struct subvp_pipe_control_lock_fast_params {
struct dc *dc;
bool lock;
- struct pipe_ctx *pipe_ctx;
+ bool subvp_immediate_flip;
};
struct pipe_control_lock_params {
@@ -134,6 +137,23 @@ struct set_ocsc_default_params {
enum mpc_output_csc_mode ocsc_mode;
};
+struct subvp_save_surf_addr {
+ struct dc_dmub_srv *dc_dmub_srv;
+ const struct dc_plane_address *addr;
+ uint8_t subvp_index;
+};
+
+struct wait_for_dcc_meta_propagation_params {
+ const struct dc *dc;
+ const struct pipe_ctx *top_pipe_to_program;
+};
+
+struct fams2_global_control_lock_fast_params {
+ struct dc *dc;
+ bool is_required;
+ bool lock;
+};
+
union block_sequence_params {
struct update_plane_addr_params update_plane_addr_params;
struct subvp_pipe_control_lock_fast_params subvp_pipe_control_lock_fast_params;
@@ -151,6 +171,9 @@ union block_sequence_params {
struct power_on_mpc_mem_pwr_params power_on_mpc_mem_pwr_params;
struct set_output_csc_params set_output_csc_params;
struct set_ocsc_default_params set_ocsc_default_params;
+ struct subvp_save_surf_addr subvp_save_surf_addr;
+ struct wait_for_dcc_meta_propagation_params wait_for_dcc_meta_propagation_params;
+ struct fams2_global_control_lock_fast_params fams2_global_control_lock_fast_params;
};
enum block_sequence_func {
@@ -170,6 +193,11 @@ enum block_sequence_func {
MPC_POWER_ON_MPC_MEM_PWR,
MPC_SET_OUTPUT_CSC,
MPC_SET_OCSC_DEFAULT,
+ DMUB_SUBVP_SAVE_SURF_ADDR,
+ HUBP_WAIT_FOR_DCC_META_PROP,
+ DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST,
+ /* This must be the last value in this enum, add new ones above */
+ HWSS_BLOCK_SEQUENCE_FUNC_COUNT
};
struct block_sequence {
@@ -177,6 +205,8 @@ struct block_sequence {
enum block_sequence_func func;
};
+#define MAX_HWSS_BLOCK_SEQUENCE_SIZE (HWSS_BLOCK_SEQUENCE_FUNC_COUNT * MAX_PIPES)
+
struct hw_sequencer_funcs {
void (*hardware_release)(struct dc *dc);
/* Embedded Display Related */
@@ -191,7 +221,7 @@ struct hw_sequencer_funcs {
struct dc_state *context);
enum dc_status (*apply_ctx_to_hw)(struct dc *dc,
struct dc_state *context);
- void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+ void (*disable_plane)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx);
void (*disable_pixel_data)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool blank);
void (*apply_ctx_for_surface)(struct dc *dc,
const struct dc_stream_state *stream,
@@ -215,8 +245,8 @@ struct hw_sequencer_funcs {
void (*program_triplebuffer)(const struct dc *dc,
struct pipe_ctx *pipe_ctx, bool enableTripleBuffer);
void (*update_pending_status)(struct pipe_ctx *pipe_ctx);
- void (*power_down)(struct dc *dc);
void (*update_dsc_pg)(struct dc *dc, struct dc_state *context, bool safe_to_disable);
+ void (*clear_surface_dcc_and_tiling)(struct pipe_ctx *pipe_ctx, struct dc_plane_state *plane_state, bool clear_tiling);
/* Pipe Lock Related */
void (*pipe_control_lock)(struct dc *dc,
@@ -239,6 +269,7 @@ struct hw_sequencer_funcs {
void (*enable_per_frame_crtc_position_reset)(struct dc *dc,
int group_size, struct pipe_ctx *grouped_pipes[]);
void (*enable_timing_synchronization)(struct dc *dc,
+ struct dc_state *state,
int group_index, int group_size,
struct pipe_ctx *grouped_pipes[]);
void (*enable_vblanks_synchronization)(struct dc *dc,
@@ -251,11 +282,6 @@ struct hw_sequencer_funcs {
void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx,
int num_pipes,
const struct dc_static_screen_params *events);
-#ifndef TRIM_FSFT
- bool (*optimize_timing_for_fsft)(struct dc *dc,
- struct dc_crtc_timing *timing,
- unsigned int max_input_rate_in_khz);
-#endif
/* Stream Related */
void (*enable_stream)(struct pipe_ctx *pipe_ctx);
@@ -290,6 +316,7 @@ struct hw_sequencer_funcs {
void (*program_output_csc)(struct dc *dc, struct pipe_ctx *pipe_ctx,
enum dc_color_space colorspace,
uint16_t *matrix, int opp_id);
+ void (*trigger_3dlut_dma_load)(struct dc *dc, struct pipe_ctx *pipe_ctx);
/* VM Related */
int (*init_sys_ctx)(struct dce_hwseq *hws,
@@ -310,10 +337,6 @@ struct hw_sequencer_funcs {
void (*disable_writeback)(struct dc *dc,
unsigned int dwb_pipe_inst);
- bool (*mmhubbub_warmup)(struct dc *dc,
- unsigned int num_dwb,
- struct dc_writeback_info *wb_info);
-
/* Clock Related */
enum dc_status (*set_clock)(struct dc *dc,
enum dc_clock_type clock_type,
@@ -324,6 +347,9 @@ struct hw_sequencer_funcs {
struct dc_state *context);
void (*exit_optimized_pwr_state)(const struct dc *dc,
struct dc_state *context);
+ void (*calculate_pix_rate_divider)(struct dc *dc,
+ struct dc_state *context,
+ const struct dc_stream_state *stream);
/* Audio Related */
void (*enable_audio_stream)(struct pipe_ctx *pipe_ctx);
@@ -334,13 +360,14 @@ struct hw_sequencer_funcs {
/* HW State Logging Related */
void (*log_hw_state)(struct dc *dc, struct dc_log_buffer_ctx *log_ctx);
+ void (*log_color_state)(struct dc *dc,
+ struct dc_log_buffer_ctx *log_ctx);
void (*get_hw_state)(struct dc *dc, char *pBuf,
unsigned int bufSize, unsigned int mask);
void (*clear_status_bits)(struct dc *dc, unsigned int mask);
bool (*set_backlight_level)(struct pipe_ctx *pipe_ctx,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
+ struct set_backlight_level_params *params);
void (*set_abm_immediate_disable)(struct pipe_ctx *pipe_ctx);
@@ -370,9 +397,24 @@ struct hw_sequencer_funcs {
/* Idle Optimization Related */
bool (*apply_idle_power_optimizations)(struct dc *dc, bool enable);
- bool (*does_plane_fit_in_mall)(struct dc *dc, struct dc_plane_state *plane,
+ bool (*does_plane_fit_in_mall)(struct dc *dc,
+ unsigned int pitch,
+ unsigned int height,
+ enum surface_pixel_format format,
struct dc_cursor_attributes *cursor_attr);
+ void (*commit_subvp_config)(struct dc *dc, struct dc_state *context);
+ void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context);
+ void (*disable_phantom_streams)(struct dc *dc, struct dc_state *context);
+ void (*subvp_pipe_control_lock)(struct dc *dc,
+ struct dc_state *context,
+ bool lock,
+ bool should_lock_all_pipes,
+ struct pipe_ctx *top_pipe_to_program,
+ bool subvp_prev_use);
+ void (*subvp_pipe_control_lock_fast)(union block_sequence_params *params);
+ void (*z10_restore)(const struct dc *dc);
+ void (*z10_save_init)(struct dc *dc);
bool (*is_abm_supported)(struct dc *dc,
struct dc_state *context, struct dc_stream_state *stream);
@@ -383,29 +425,60 @@ struct hw_sequencer_funcs {
enum dc_color_depth color_depth,
const struct tg_color *solid_color,
int width, int height, int offset);
-
- void (*subvp_pipe_control_lock_fast)(union block_sequence_params *params);
- void (*z10_restore)(const struct dc *dc);
- void (*z10_save_init)(struct dc *dc);
-
+ void (*blank_phantom)(struct dc *dc,
+ struct timing_generator *tg,
+ int width,
+ int height);
void (*update_visual_confirm_color)(struct dc *dc,
struct pipe_ctx *pipe_ctx,
int mpcc_id);
-
void (*update_phantom_vp_position)(struct dc *dc,
struct dc_state *context,
struct pipe_ctx *phantom_pipe);
void (*apply_update_flags_for_phantom)(struct pipe_ctx *phantom_pipe);
- void (*commit_subvp_config)(struct dc *dc, struct dc_state *context);
- void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context);
- void (*subvp_pipe_control_lock)(struct dc *dc,
+ void (*calc_blocks_to_gate)(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state);
+ void (*calc_blocks_to_ungate)(struct dc *dc, struct dc_state *context,
+ struct pg_block_update *update_state);
+ void (*hw_block_power_up)(struct dc *dc,
+ struct pg_block_update *update_state);
+ void (*hw_block_power_down)(struct dc *dc,
+ struct pg_block_update *update_state);
+ void (*root_clock_control)(struct dc *dc,
+ struct pg_block_update *update_state, bool power_on);
+ bool (*is_pipe_topology_transition_seamless)(struct dc *dc,
+ const struct dc_state *cur_ctx,
+ const struct dc_state *new_ctx);
+ void (*wait_for_dcc_meta_propagation)(const struct dc *dc,
+ const struct pipe_ctx *top_pipe_to_program);
+ void (*fams2_global_control_lock)(struct dc *dc,
struct dc_state *context,
- bool lock,
- bool should_lock_all_pipes,
- struct pipe_ctx *top_pipe_to_program,
- bool subvp_prev_use);
-
+ bool lock);
+ void (*fams2_update_config)(struct dc *dc,
+ struct dc_state *context,
+ bool enable);
+ void (*fams2_global_control_lock_fast)(union block_sequence_params *params);
+ void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max);
+ void (*program_outstanding_updates)(struct dc *dc,
+ struct dc_state *context);
+ void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable);
+ void (*wait_for_all_pending_updates)(const struct pipe_ctx *pipe_ctx);
+ void (*detect_pipe_changes)(struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe);
+ void (*enable_plane)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+ void (*update_dchubp_dpp)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+ void (*post_unlock_reset_opp)(struct dc *dc,
+ struct pipe_ctx *opp_head);
+ void (*get_underflow_debug_data)(const struct dc *dc,
+ struct timing_generator *tg,
+ struct dc_underflow_debug_data *out_data);
};
void color_space_to_black_color(
@@ -420,42 +493,84 @@ const uint16_t *find_color_matrix(
enum dc_color_space color_space,
uint32_t *array_size);
+void get_surface_tile_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color);
void get_surface_visual_confirm_color(
const struct pipe_ctx *pipe_ctx,
struct tg_color *color);
+void get_hdr_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color);
+void get_mpctree_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color);
+void get_smartmux_visual_confirm_color(
+ struct dc *dc,
+ struct tg_color *color);
+void get_vabc_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color);
void get_subvp_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color);
+void get_fams2_visual_confirm_color(
struct dc *dc,
struct dc_state *context,
struct pipe_ctx *pipe_ctx,
struct tg_color *color);
-void get_hdr_visual_confirm_color(
- struct pipe_ctx *pipe_ctx,
- struct tg_color *color);
-void get_mpctree_visual_confirm_color(
+void get_mclk_switch_visual_confirm_color(
struct pipe_ctx *pipe_ctx,
struct tg_color *color);
-void get_surface_tile_visual_confirm_color(
+
+void get_cursor_visual_confirm_color(
struct pipe_ctx *pipe_ctx,
struct tg_color *color);
-void get_mclk_switch_visual_confirm_color(
+void get_dcc_visual_confirm_color(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color);
+
+void set_p_state_switch_method(
struct dc *dc,
struct dc_state *context,
+ struct pipe_ctx *pipe_ctx);
+
+void set_drr_and_clear_adjust_pending(
struct pipe_ctx *pipe_ctx,
- struct tg_color *color);
+ struct dc_stream_state *stream,
+ struct drr_params *params);
void hwss_execute_sequence(struct dc *dc,
- struct block_sequence block_sequence[],
+ struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
int num_steps);
void hwss_build_fast_sequence(struct dc *dc,
struct dc_dmub_cmd *dc_dmub_cmd,
unsigned int dmub_cmd_count,
- struct block_sequence block_sequence[],
- int *num_steps,
- struct pipe_ctx *pipe_ctx);
+ struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE],
+ unsigned int *num_steps,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_stream_status *stream_status,
+ struct dc_state *context);
+
+void hwss_wait_for_all_blank_complete(struct dc *dc,
+ struct dc_state *context);
+
+void hwss_wait_for_odm_update_pending_complete(struct dc *dc,
+ struct dc_state *context);
+
+void hwss_wait_for_no_pipes_pending(struct dc *dc,
+ struct dc_state *context);
+
+void hwss_wait_for_outstanding_hw_updates(struct dc *dc,
+ struct dc_state *dc_context);
+
+void hwss_process_outstanding_hw_updates(struct dc *dc,
+ struct dc_state *dc_context);
void hwss_send_dmcub_cmd(union block_sequence_params *params);
@@ -471,4 +586,6 @@ void hwss_set_output_csc(union block_sequence_params *params);
void hwss_set_ocsc_default(union block_sequence_params *params);
+void hwss_subvp_save_surf_addr(union block_sequence_params *params);
+
#endif /* __DC_HW_SEQUENCER_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
index 4ca4192c1e12..1e2d247fbbac 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h
@@ -49,6 +49,7 @@ struct hwseq_wa_state {
bool DEGVIDCN10_253_applied;
bool disallow_self_refresh_during_multi_plane_transition_applied;
unsigned int disallow_self_refresh_during_multi_plane_transition_applied_on_frame;
+ bool skip_blank_stream;
};
struct pipe_ctx;
@@ -76,9 +77,8 @@ struct hwseq_private_funcs {
void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx);
void (*init_pipes)(struct dc *dc, struct dc_state *context);
void (*reset_hw_ctx_wrap)(struct dc *dc, struct dc_state *context);
- void (*update_plane_addr)(const struct dc *dc,
- struct pipe_ctx *pipe_ctx);
void (*plane_atomic_disconnect)(struct dc *dc,
+ struct dc_state *state,
struct pipe_ctx *pipe_ctx);
void (*update_mpcc)(struct dc *dc, struct pipe_ctx *pipe_ctx);
bool (*set_input_transfer_func)(struct dc *dc,
@@ -119,6 +119,14 @@ struct hwseq_private_funcs {
struct dce_hwseq *hws,
unsigned int dpp_inst,
bool clock_on);
+ void (*dpstream_root_clock_control)(
+ struct dce_hwseq *hws,
+ unsigned int dpp_inst,
+ bool clock_on);
+ void (*physymclk_root_clock_control)(
+ struct dce_hwseq *hws,
+ unsigned int phy_inst,
+ bool clock_on);
void (*dpp_pg_control)(struct dce_hwseq *hws,
unsigned int dpp_inst,
bool power_on);
@@ -152,18 +160,32 @@ struct hwseq_private_funcs {
void (*PLAT_58856_wa)(struct dc_state *context,
struct pipe_ctx *pipe_ctx);
void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable);
-#ifdef CONFIG_DRM_AMD_DC_FP
+ void (*enable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
void (*program_mall_pipe_config)(struct dc *dc, struct dc_state *context);
void (*update_force_pstate)(struct dc *dc, struct dc_state *context);
void (*update_mall_sel)(struct dc *dc, struct dc_state *context);
unsigned int (*calculate_dccg_k1_k2_values)(struct pipe_ctx *pipe_ctx,
unsigned int *k1_div,
unsigned int *k2_div);
- void (*set_pixels_per_cycle)(struct pipe_ctx *pipe_ctx);
void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc,
- struct dc_state *context);
+ struct dc_state *context,
+ unsigned int current_pipe_idx);
+ enum dc_status (*apply_single_controller_ctx_to_hw)(
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dc *dc);
bool (*is_dp_dig_pixel_rate_div_policy)(struct pipe_ctx *pipe_ctx);
-#endif
+ void (*reset_back_end_for_pipe)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+ void (*populate_mcm_luts)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_cm2_func_luts mcm_luts,
+ bool lut_bank_a);
+ void (*perform_3dlut_wa_unlock)(struct pipe_ctx *pipe_ctx);
+ void (*wait_for_pipe_update_if_needed)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only);
+ void (*set_wait_for_update_needed_for_pipe)(struct dc *dc, struct pipe_ctx *pipe_ctx);
};
struct dce_hwseq {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h
index 8f8ac8e29ed0..ed2f8005d85e 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h
@@ -96,6 +96,7 @@ struct pixel_clk_params {
/*> de-spread info, relevant only for on-the-fly tune-up pixel rate*/
enum dc_pixel_encoding pixel_encoding;
struct pixel_clk_flags flags;
+ uint32_t dio_se_pix_per_cycle;
};
/**
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h
index fa5edd03d004..82085d9c3f40 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h
@@ -26,6 +26,8 @@
#ifndef _CORE_STATUS_H_
#define _CORE_STATUS_H_
+#include "dc_hw_types.h"
+
enum dc_status {
DC_OK = 1,
@@ -56,9 +58,13 @@ enum dc_status {
DC_NO_LINK_ENC_RESOURCE = 26,
DC_FAIL_DP_PAYLOAD_ALLOCATION = 27,
DC_FAIL_DP_LINK_BANDWIDTH = 28,
+ DC_FAIL_HW_CURSOR_SUPPORT = 29,
+ DC_FAIL_DP_TUNNEL_BW_VALIDATE = 30,
DC_ERROR_UNEXPECTED = -1
};
char *dc_status_to_str(enum dc_status status);
+char *dc_pixel_encoding_to_str(enum dc_pixel_encoding pixel_encoding);
+char *dc_color_depth_to_str(enum dc_color_depth color_depth);
#endif /* _CORE_STATUS_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index eaad1260bfd1..d11893f8c916 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -38,14 +38,13 @@
#include "mcif_wb.h"
#include "panel_cntl.h"
#include "dmub/inc/dmub_cmd.h"
+#include "pg_cntl.h"
+#include "sspl/dc_spl.h"
#define MAX_CLOCK_SOURCES 7
#define MAX_SVP_PHANTOM_STREAMS 2
#define MAX_SVP_PHANTOM_PLANES 2
-void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
- uint32_t controller_id);
-
#include "grph_object_id.h"
#include "link_encoder.h"
#include "stream_encoder.h"
@@ -59,10 +58,16 @@ void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
#include "transform.h"
#include "dpp.h"
+#include "dml2/dml21/inc/dml_top_dchub_registers.h"
+#include "dml2/dml21/inc/dml_top_types.h"
+
struct resource_pool;
struct dc_state;
struct resource_context;
struct clk_bw_params;
+struct dc_mcache_params;
+
+#define MAX_RMCM_INST 2
struct resource_funcs {
enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index);
@@ -76,11 +81,10 @@ struct resource_funcs {
/* Create a minimal link encoder object with no dc_link object
* associated with it. */
struct link_encoder *(*link_enc_create_minimal)(struct dc_context *ctx, enum engine_id eng_id);
-
- bool (*validate_bandwidth)(
+ enum dc_status (*validate_bandwidth)(
struct dc *dc,
struct dc_state *context,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
void (*calculate_wm_and_dlg)(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -89,6 +93,12 @@ struct resource_funcs {
void (*update_soc_for_wm_a)(
struct dc *dc, struct dc_state *context);
+ unsigned int (*calculate_mall_ways_from_bytes)(
+ const struct dc *dc,
+ unsigned int total_size_in_mall_bytes);
+ void (*prepare_mcache_programming)(
+ struct dc *dc,
+ struct dc_state *context);
/**
* @populate_dml_pipes - Populate pipe data struct
*
@@ -99,7 +109,7 @@ struct resource_funcs {
struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
/*
* Algorithm for assigning available link encoders to links.
@@ -132,6 +142,16 @@ struct resource_funcs {
const struct resource_pool *pool,
const struct pipe_ctx *opp_head_pipe);
+ struct pipe_ctx *(*acquire_free_pipe_as_secondary_opp_head)(
+ const struct dc_state *cur_ctx,
+ struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ const struct pipe_ctx *otg_master);
+
+ void (*release_pipe)(struct dc_state *context,
+ struct pipe_ctx *pipe,
+ const struct resource_pool *pool);
+
enum dc_status (*validate_plane)(
const struct dc_plane_state *plane_state,
struct dc_caps *caps);
@@ -145,6 +165,7 @@ struct resource_funcs {
struct dc *dc,
struct dc_state *new_ctx,
struct dc_stream_state *stream);
+
enum dc_status (*patch_unknown_plane_state)(
struct dc_plane_state *plane_state);
@@ -152,6 +173,7 @@ struct resource_funcs {
struct resource_context *res_ctx,
const struct resource_pool *pool,
struct dc_stream_state *stream);
+
void (*populate_dml_writeback_from_context)(
struct dc *dc,
struct resource_context *res_ctx,
@@ -162,6 +184,7 @@ struct resource_funcs {
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt);
+
void (*update_bw_bounding_box)(
struct dc *dc,
struct clk_bw_params *bw_params);
@@ -189,11 +212,24 @@ struct resource_funcs {
unsigned int pipe_cnt,
unsigned int index);
- bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
- void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
- void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
- void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
+ void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx);
+ /*
+ * Get indicator of power from a context that went through full validation
+ */
+ int (*get_power_profile)(const struct dc_state *context);
+ unsigned int (*get_det_buffer_size)(const struct dc_state *context);
+ unsigned int (*get_vstartup_for_pipe)(struct pipe_ctx *pipe_ctx);
+ unsigned int (*get_max_hw_cursor_size)(const struct dc *dc,
+ struct dc_state *state,
+ const struct dc_stream_state *stream);
+ bool (*program_mcache_pipe_config)(struct dc_state *context,
+ const struct dc_mcache_params *mcache_params);
+ enum dc_status (*update_dc_state_for_encoder_switch)(struct dc_link *link,
+ struct dc_link_settings *link_setting,
+ uint8_t pipe_count,
+ struct pipe_ctx *pipes,
+ struct audio_output *audio_output);
};
struct audio_support{
@@ -252,6 +288,7 @@ struct resource_pool {
struct hpo_dp_link_encoder *hpo_dp_link_enc[MAX_HPO_DP2_LINK_ENCODERS];
struct dc_3dlut *mpc_lut[MAX_PIPES];
struct dc_transfer_func *mpc_shaper[MAX_PIPES];
+ struct dc_rmcm_3dlut rmcm_3dlut[MAX_RMCM_INST];
struct {
unsigned int xtalin_clock_inKhz;
@@ -275,12 +312,12 @@ struct resource_pool {
struct audio_support audio_support;
struct dccg *dccg;
+ struct pg_cntl *pg_cntl;
struct irq_service *irqs;
struct abm *abm;
struct dmcu *dmcu;
struct dmub_psr *psr;
-
struct dmub_replay *replay;
struct abm *multiple_abms[MAX_PIPES];
@@ -296,6 +333,16 @@ struct dcn_fe_bandwidth {
};
+/* Parameters needed to call set_disp_pattern_generator */
+struct test_pattern_params {
+ enum controller_dp_test_pattern test_pattern;
+ enum controller_dp_color_space color_space;
+ enum dc_color_depth color_depth;
+ int width;
+ int height;
+ int offset;
+};
+
struct stream_resource {
struct output_pixel_processor *opp;
struct display_stream_compressor *dsc;
@@ -312,10 +359,21 @@ struct stream_resource {
* otherwise it's using group number 'gsl_group-1'
*/
uint8_t gsl_group;
+
+ struct test_pattern_params test_pattern_params;
};
struct plane_resource {
+ /* scl_data is scratch space required to program a plane */
struct scaler_data scl_data;
+ /* Below pointers to hw objects are required to enable the plane */
+ /* spl_in and spl_out are the input and output structures for SPL
+ * which are required when using Scaler Programming Library
+ * these are scratch spaces needed when programming a plane
+ */
+ struct spl_in spl_in;
+ struct spl_out spl_out;
+ /* Below pointers to hw objects are required to enable the plane */
struct hubp *hubp;
struct mem_input *mi;
struct input_pixel_processor *ipp;
@@ -331,12 +389,15 @@ struct plane_resource {
/* all mappable hardware resources used to enable a link */
struct link_resource {
+ struct link_encoder *dio_link_enc;
struct hpo_dp_link_encoder *hpo_dp_link_enc;
};
struct link_config {
struct dc_link_settings dp_link_settings;
+ struct dc_tunnel_settings dp_tunnel_settings;
};
+
union pipe_update_flags {
struct {
uint32_t enable : 1;
@@ -355,10 +416,33 @@ union pipe_update_flags {
uint32_t plane_changed : 1;
uint32_t det_size : 1;
uint32_t unbounded_req : 1;
+ uint32_t test_pattern_changed : 1;
} bits;
uint32_t raw;
};
+struct pixel_rate_divider {
+ uint32_t div_factor1;
+ uint32_t div_factor2;
+};
+
+enum p_state_switch_method {
+ P_STATE_UNKNOWN = 0,
+ P_STATE_V_BLANK = 1,
+ P_STATE_FPO,
+ P_STATE_V_ACTIVE,
+ P_STATE_SUB_VP,
+ P_STATE_DRR_SUB_VP,
+ P_STATE_V_BLANK_SUB_VP,
+};
+
+struct dsc_padding_params {
+ /* pixels borrowed from hblank to hactive */
+ uint8_t dsc_hactive_padding;
+ uint32_t dsc_htotal_padding;
+ uint32_t dsc_pix_clk_100hz;
+};
+
struct pipe_ctx {
struct dc_plane_state *plane_state;
struct dc_stream_state *stream;
@@ -403,12 +487,24 @@ struct pipe_ctx {
int det_buffer_size_kb;
bool unbounded_req;
unsigned int surface_size_in_mall_bytes;
+ struct dml2_dchub_per_pipe_register_set hubp_regs;
+ struct dml2_hubp_pipe_mcache_regs mcache_regs;
+ union dml2_global_sync_programming global_sync;
struct dwbc *dwbc;
struct mcif_wb *mcif_wb;
union pipe_update_flags update_flags;
+ enum p_state_switch_method p_state_type;
struct tg_color visual_confirm_color;
bool has_vactive_margin;
+ /* subvp_index: only valid if the pipe is a SUBVP_MAIN*/
+ uint8_t subvp_index;
+ struct pixel_rate_divider pixel_rate_divider;
+ struct dsc_padding_params dsc_padding_params;
+ /* next vupdate */
+ uint32_t next_vupdate;
+ uint32_t wait_frame_count;
+ bool wait_is_required;
};
/* Data used for dynamic link encoder assignment.
@@ -430,10 +526,14 @@ struct resource_context {
uint8_t dp_clock_source_ref_count;
bool is_dsc_acquired[MAX_PIPES];
struct link_enc_cfg_context link_enc_cfg_ctx;
+ unsigned int dio_link_enc_to_link_idx[MAX_DIG_LINK_ENCODERS];
+ int dio_link_enc_ref_cnts[MAX_DIG_LINK_ENCODERS];
bool is_hpo_dp_stream_enc_acquired[MAX_HPO_DP2_ENCODERS];
unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS];
int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS];
bool is_mpc_3dlut_acquired[MAX_PIPES];
+ /* used to build scalar data in dml2 and for edp backlight programming */
+ struct pipe_ctx temp_pipe;
};
struct dce_bw_output {
@@ -459,7 +559,7 @@ struct dcn_bw_writeback {
struct dcn_bw_output {
struct dc_clocks clk;
- struct dcn_watermark_set watermarks;
+ union dcn_watermark_set watermarks;
struct dcn_bw_writeback bw_writeback;
int compbuf_size_kb;
unsigned int mall_ss_size_bytes;
@@ -467,6 +567,14 @@ struct dcn_bw_output {
unsigned int mall_subvp_size_bytes;
unsigned int legacy_svp_drr_stream_index;
bool legacy_svp_drr_stream_index_valid;
+ struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES];
+ struct dmub_cmd_fams2_global_config fams2_global_config;
+ union dmub_cmd_fams2_config fams2_stream_base_params[DML2_MAX_PLANES];
+ union {
+ union dmub_cmd_fams2_config fams2_stream_sub_params[DML2_MAX_PLANES];
+ union dmub_fams2_stream_static_sub_state_v2 fams2_stream_sub_params_v2[DML2_MAX_PLANES];
+ };
+ struct dml2_display_arb_regs arb_regs;
};
union bw_output {
@@ -477,6 +585,8 @@ union bw_output {
struct bw_context {
union bw_output bw;
struct display_mode_lib dml;
+ struct dml2_context *dml2;
+ struct dml2_context *dml2_dc_power_source;
};
struct dc_dmub_cmd {
@@ -497,6 +607,14 @@ struct dc_state {
* @stream_status: Planes status on a given stream
*/
struct dc_stream_status stream_status[MAX_PIPES];
+ /**
+ * @phantom_streams: Stream state properties for phantoms
+ */
+ struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES];
+ /**
+ * @phantom_planes: Planes state properties for phantoms
+ */
+ struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES];
/**
* @stream_count: Total of streams in use
@@ -505,6 +623,14 @@ struct dc_state {
uint8_t stream_mask;
/**
+ * @stream_count: Total phantom streams in use
+ */
+ uint8_t phantom_stream_count;
+ /**
+ * @stream_count: Total phantom planes in use
+ */
+ uint8_t phantom_plane_count;
+ /**
* @res_ctx: Persistent state of resources
*/
struct resource_context res_ctx;
@@ -532,7 +658,7 @@ struct dc_state {
*/
struct bw_context bw_ctx;
- struct block_sequence block_sequence[50];
+ struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE];
unsigned int block_sequence_steps;
struct dc_dmub_cmd dc_dmub_cmd[10];
unsigned int dmub_cmd_count;
@@ -549,6 +675,8 @@ struct dc_state {
struct {
unsigned int stutter_period_us;
} perf_params;
+
+ enum dc_power_source_type power_source;
};
struct replay_context {
@@ -561,6 +689,7 @@ struct replay_context {
/* Controller Id used for Dig Fe source select */
enum controller_id controllerId;
unsigned int line_time_in_ns;
+ bool os_request_force_ffu;
};
enum dc_replay_enable {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
index 9e4ddc985240..134091d5842d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
@@ -31,7 +31,7 @@
#define __DCN_CALCS_H__
#include "bw_fixed.h"
-#include "../dml/display_mode_lib.h"
+#include "dml/display_mode_lib.h"
struct dc;
@@ -622,11 +622,7 @@ extern const struct dcn_ip_params dcn10_ip_defaults;
bool dcn_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate);
-
-unsigned int dcn_find_dcfclk_suits_all(
- const struct dc *dc,
- struct dc_clocks *clocks);
+ enum dc_validate_mode validate_mode);
void dcn_get_soc_clks(
struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index 33db15d69f23..3f0161d64675 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -36,7 +36,7 @@ struct abm {
};
struct abm_funcs {
- void (*abm_init)(struct abm *abm, uint32_t back_light);
+ void (*abm_init)(struct abm *abm, uint32_t back_light, uint32_t user_level);
bool (*set_abm_level)(struct abm *abm, unsigned int abm_level);
bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst);
bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst);
@@ -64,7 +64,8 @@ struct abm_funcs {
bool (*set_pipe_ex)(struct abm *abm,
unsigned int otg_inst,
unsigned int option,
- unsigned int panel_inst);
+ unsigned int panel_inst,
+ unsigned int pwrseq_inst);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h
index 6ed1fb8c9300..8c18efc2aa70 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h
@@ -43,7 +43,10 @@ struct audio_funcs {
void (*az_configure)(struct audio *audio,
enum signal_type signal,
const struct audio_crtc_info *crtc_info,
- const struct audio_info *audio_info);
+ const struct audio_info *audio_info,
+ const struct audio_dp_link_info *dp_link_info);
+
+ void (*az_disable_hbr_audio)(struct audio *audio);
void (*wall_dto_setup)(struct audio *audio,
enum signal_type signal,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index ecb7bcc39469..2c9a4a12bd8a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -29,9 +29,6 @@
#include "dc.h"
#include "dm_pp_smu.h"
-#define DCN_MINIMUM_DISPCLK_Khz 100000
-#define DCN_MINIMUM_DPPCLK_Khz 100000
-
/* Constants */
#define DDR4_DRAM_WIDTH 64
#define WM_A 0
@@ -39,17 +36,20 @@
#define WM_C 2
#define WM_D 3
#define WM_SET_COUNT 4
+#define WM_1A 2
+#define WM_1B 3
#define DCN_MINIMUM_DISPCLK_Khz 100000
#define DCN_MINIMUM_DPPCLK_Khz 100000
struct dcn3_clk_internal {
int dummy;
- /*TODO:
+// TODO:
uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
uint32_t CLK1_CLK1_CURRENT_CNT; //dppclk
uint32_t CLK1_CLK2_CURRENT_CNT; //dprefclk
uint32_t CLK1_CLK3_CURRENT_CNT; //dcfclk
+ uint32_t CLK1_CLK4_CURRENT_CNT;
uint32_t CLK1_CLK3_DS_CNTL; //dcf_deep_sleep_divider
uint32_t CLK1_CLK3_ALLOW_DS; //dcf_deep_sleep_allow
@@ -57,7 +57,27 @@ struct dcn3_clk_internal {
uint32_t CLK1_CLK1_BYPASS_CNTL; //dppclk bypass
uint32_t CLK1_CLK2_BYPASS_CNTL; //dprefclk bypass
uint32_t CLK1_CLK3_BYPASS_CNTL; //dcfclk bypass
- */
+
+ uint32_t CLK4_CLK0_CURRENT_CNT; //fclk
+};
+
+struct dcn35_clk_internal {
+ int dummy;
+ uint32_t CLK1_CLK0_CURRENT_CNT; //dispclk
+ uint32_t CLK1_CLK1_CURRENT_CNT; //dppclk
+ uint32_t CLK1_CLK2_CURRENT_CNT; //dprefclk
+ uint32_t CLK1_CLK3_CURRENT_CNT; //dcfclk
+ uint32_t CLK1_CLK4_CURRENT_CNT; //dtbclk
+ //uint32_t CLK1_CLK5_CURRENT_CNT; //dpiaclk
+ //uint32_t CLK1_CLK6_CURRENT_CNT; //srdbgclk
+ uint32_t CLK1_CLK3_DS_CNTL; //dcf_deep_sleep_divider
+ uint32_t CLK1_CLK3_ALLOW_DS; //dcf_deep_sleep_allow
+
+ uint32_t CLK1_CLK0_BYPASS_CNTL; //dispclk bypass
+ uint32_t CLK1_CLK1_BYPASS_CNTL; //dppclk bypass
+ uint32_t CLK1_CLK2_BYPASS_CNTL; //dprefclk bypass
+ uint32_t CLK1_CLK3_BYPASS_CNTL; //dcfclk bypass
+ uint32_t CLK1_CLK4_BYPASS_CNTL; //dtbclk bypass
};
struct dcn301_clk_internal {
@@ -80,6 +100,17 @@ struct dcn301_clk_internal {
#define MAX_NUM_DPM_LVL 8
#define WM_SET_COUNT 4
+enum clk_type {
+ CLK_TYPE_DCFCLK,
+ CLK_TYPE_FCLK,
+ CLK_TYPE_MCLK,
+ CLK_TYPE_SOCCLK,
+ CLK_TYPE_DTBCLK,
+ CLK_TYPE_DISPCLK,
+ CLK_TYPE_DPPCLK,
+ CLK_TYPE_DSCCLK,
+ CLK_TYPE_COUNT
+};
struct clk_limit_table_entry {
unsigned int voltage; /* milivolts withh 2 fractional bits */
@@ -157,6 +188,7 @@ struct clk_state_registers_and_bypass {
uint32_t dispclk;
uint32_t dppclk;
uint32_t dtbclk;
+ uint32_t fclk;
uint32_t dppclk_bypass;
uint32_t dcfclk_bypass;
@@ -221,14 +253,14 @@ struct wm_table {
struct dummy_pstate_entry {
unsigned int dram_speed_mts;
- double dummy_pstate_latency_us;
+ unsigned int dummy_pstate_latency_us;
};
struct clk_bw_params {
unsigned int vram_type;
unsigned int num_channels;
unsigned int dram_channel_width_bytes;
- unsigned int dispclk_vco_khz;
+ unsigned int dispclk_vco_khz;
unsigned int dc_mode_softmax_memclk;
unsigned int max_memclk_mhz;
struct clk_limit_table clk_table;
@@ -258,6 +290,8 @@ struct clk_mgr_funcs {
int (*get_dtb_ref_clk_frequency)(struct clk_mgr *clk_mgr);
void (*set_low_power_state)(struct clk_mgr *clk_mgr);
+ void (*exit_low_power_state)(struct clk_mgr *clk_mgr);
+ bool (*is_ips_supported)(struct clk_mgr *clk_mgr);
void (*init_clocks)(struct clk_mgr *clk_mgr);
@@ -283,6 +317,9 @@ struct clk_mgr_funcs {
*/
void (*set_hard_min_memclk)(struct clk_mgr *clk_mgr, bool current_mode);
+ int (*get_hard_min_memclk)(struct clk_mgr *clk_mgr);
+ int (*get_hard_min_fclk)(struct clk_mgr *clk_mgr);
+
/* Send message to PMFW to set hard max memclk frequency to highest DPM */
void (*set_hard_max_memclk)(struct clk_mgr *clk_mgr);
@@ -298,6 +335,11 @@ struct clk_mgr_funcs {
int (*get_dispclk_from_dentist)(struct clk_mgr *clk_mgr_base);
+ bool (*is_dc_mode_present)(struct clk_mgr *clk_mgr);
+
+ uint32_t (*set_smartmux_switch)(struct clk_mgr *clk_mgr, uint32_t pins_to_set);
+
+ unsigned int (*get_max_clock_khz)(struct clk_mgr *clk_mgr_base, enum clk_type clk_type);
};
struct clk_mgr {
@@ -308,6 +350,7 @@ struct clk_mgr {
bool force_smu_not_present;
bool dc_mode_softmax_enabled;
int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes
+ int dp_dto_source_clock_in_khz; // Used to program DP DTO with ss adjustment on DCN314
int dentist_vco_freq_khz;
struct clk_state_registers_and_bypass boot_snapshot;
struct clk_bw_params *bw_params;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
index cff5fd55a0ad..bac8febad69a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
@@ -73,7 +73,7 @@ enum dentist_divider_range {
clk_mgr->base.ctx
#define DC_LOGGER \
- clk_mgr->base.ctx->logger
+ dc->ctx->logger
@@ -97,11 +97,6 @@ enum dentist_divider_range {
#define CLK_COMMON_REG_LIST_DCN_BASE() \
SR(DENTIST_DISPCLK_CNTL)
-#define VBIOS_SMU_MSG_BOX_REG_LIST_RV() \
- .MP1_SMN_C2PMSG_91 = mmMP1_SMN_C2PMSG_91, \
- .MP1_SMN_C2PMSG_83 = mmMP1_SMN_C2PMSG_83, \
- .MP1_SMN_C2PMSG_67 = mmMP1_SMN_C2PMSG_67
-
#define CLK_COMMON_REG_LIST_DCN_201() \
SR(DENTIST_DISPCLK_CNTL), \
CLK_SRI(CLK4_CLK_PLL_REQ, CLK4, 0), \
@@ -113,7 +108,7 @@ enum dentist_divider_range {
CLK_SRI(CLK3_CLK2_DFS_CNTL, CLK3, 0)
#define CLK_REG_LIST_DCN3() \
- CLK_COMMON_REG_LIST_DCN_BASE(), \
+ SR(DENTIST_DISPCLK_CNTL), \
CLK_SRI(CLK0_CLK_PLL_REQ, CLK02, 0), \
CLK_SRI(CLK0_CLK2_DFS_CNTL, CLK02, 0)
@@ -163,7 +158,49 @@ enum dentist_divider_range {
CLK_SR_DCN32(CLK1_CLK1_DFS_CNTL), \
CLK_SR_DCN32(CLK1_CLK2_DFS_CNTL), \
CLK_SR_DCN32(CLK1_CLK3_DFS_CNTL), \
- CLK_SR_DCN32(CLK1_CLK4_DFS_CNTL)
+ CLK_SR_DCN32(CLK1_CLK4_DFS_CNTL), \
+ CLK_SR_DCN32(CLK1_CLK0_CURRENT_CNT), \
+ CLK_SR_DCN32(CLK1_CLK1_CURRENT_CNT), \
+ CLK_SR_DCN32(CLK1_CLK2_CURRENT_CNT), \
+ CLK_SR_DCN32(CLK1_CLK3_CURRENT_CNT), \
+ CLK_SR_DCN32(CLK1_CLK4_CURRENT_CNT), \
+ CLK_SR_DCN32(CLK4_CLK0_CURRENT_CNT)
+
+#define CLK_REG_LIST_DCN35() \
+ CLK_SR_DCN35(CLK1_CLK_PLL_REQ), \
+ CLK_SR_DCN35(CLK1_CLK0_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK1_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK2_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK3_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK4_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK5_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK0_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK1_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK2_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK3_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK4_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK5_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK0_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK1_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK2_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK3_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK4_BYPASS_CNTL),\
+ CLK_SR_DCN35(CLK1_CLK5_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK0_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK1_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK2_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK3_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK4_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK5_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK0_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK1_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK2_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK3_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \
+ CLK_SR_DCN35(CLK5_spll_field_8), \
+ CLK_SR_DCN35(CLK6_spll_field_8), \
+ SR(DENTIST_DISPCLK_CNTL), \
#define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \
CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\
@@ -184,44 +221,46 @@ enum dentist_divider_range {
CLK_SF(CLK0_CLK_PLL_REQ, FbMult_int, mask_sh),\
CLK_SF(CLK0_CLK_PLL_REQ, FbMult_frac, mask_sh)
+#define CLK_REG_LIST_DCN401() \
+ SR(DENTIST_DISPCLK_CNTL), \
+ CLK_SR_DCN401(CLK0_CLK_PLL_REQ, CLK01, 0), \
+ CLK_SR_DCN401(CLK0_CLK0_DFS_CNTL, CLK01, 0), \
+ CLK_SR_DCN401(CLK0_CLK1_DFS_CNTL, CLK01, 0), \
+ CLK_SR_DCN401(CLK0_CLK2_DFS_CNTL, CLK01, 0), \
+ CLK_SR_DCN401(CLK0_CLK3_DFS_CNTL, CLK01, 0), \
+ CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0), \
+ CLK_SR_DCN401(CLK2_CLK2_DFS_CNTL, CLK20, 0)
+
+#define CLK_COMMON_MASK_SH_LIST_DCN401(mask_sh) \
+ CLK_COMMON_MASK_SH_LIST_DCN321(mask_sh)
+
#define CLK_REG_FIELD_LIST(type) \
type DPREFCLK_SRC_SEL; \
type DENTIST_DPREFCLK_WDIVIDER; \
type DENTIST_DISPCLK_WDIVIDER; \
type DENTIST_DISPCLK_CHG_DONE;
-/*
- ***************************************************************************************
- ****************** Clock Manager Private Structures ***********************************
- ***************************************************************************************
- */
#define CLK20_REG_FIELD_LIST(type) \
type DENTIST_DPPCLK_WDIVIDER; \
type DENTIST_DPPCLK_CHG_DONE; \
type FbMult_int; \
type FbMult_frac;
-#define VBIOS_SMU_REG_FIELD_LIST(type) \
- type CONTENT;
-
-struct clk_mgr_shift {
- CLK_REG_FIELD_LIST(uint8_t)
- CLK20_REG_FIELD_LIST(uint8_t)
- VBIOS_SMU_REG_FIELD_LIST(uint32_t)
-};
-
-struct clk_mgr_mask {
- CLK_REG_FIELD_LIST(uint32_t)
- CLK20_REG_FIELD_LIST(uint32_t)
- VBIOS_SMU_REG_FIELD_LIST(uint32_t)
-};
+/*
+ ***************************************************************************************
+ ****************** Clock Manager Private Structures ***********************************
+ ***************************************************************************************
+ */
struct clk_mgr_registers {
uint32_t DPREFCLK_CNTL;
uint32_t DENTIST_DISPCLK_CNTL;
+
uint32_t CLK4_CLK2_CURRENT_CNT;
uint32_t CLK4_CLK_PLL_REQ;
+ uint32_t CLK4_CLK0_CURRENT_CNT;
+
uint32_t CLK3_CLK2_DFS_CNTL;
uint32_t CLK3_CLK_PLL_REQ;
@@ -234,15 +273,52 @@ struct clk_mgr_registers {
uint32_t CLK1_CLK2_DFS_CNTL;
uint32_t CLK1_CLK3_DFS_CNTL;
uint32_t CLK1_CLK4_DFS_CNTL;
+ uint32_t CLK1_CLK5_DFS_CNTL;
+ uint32_t CLK2_CLK2_DFS_CNTL;
+
+ uint32_t CLK1_CLK0_CURRENT_CNT;
+ uint32_t CLK1_CLK1_CURRENT_CNT;
+ uint32_t CLK1_CLK2_CURRENT_CNT;
+ uint32_t CLK1_CLK3_CURRENT_CNT;
+ uint32_t CLK1_CLK4_CURRENT_CNT;
+ uint32_t CLK1_CLK5_CURRENT_CNT;
uint32_t CLK0_CLK0_DFS_CNTL;
uint32_t CLK0_CLK1_DFS_CNTL;
uint32_t CLK0_CLK3_DFS_CNTL;
uint32_t CLK0_CLK4_DFS_CNTL;
+ uint32_t CLK1_CLK0_BYPASS_CNTL;
+ uint32_t CLK1_CLK1_BYPASS_CNTL;
+ uint32_t CLK1_CLK2_BYPASS_CNTL;
+ uint32_t CLK1_CLK3_BYPASS_CNTL;
+ uint32_t CLK1_CLK4_BYPASS_CNTL;
+ uint32_t CLK1_CLK5_BYPASS_CNTL;
+
+ uint32_t CLK1_CLK0_DS_CNTL;
+ uint32_t CLK1_CLK1_DS_CNTL;
+ uint32_t CLK1_CLK2_DS_CNTL;
+ uint32_t CLK1_CLK3_DS_CNTL;
+ uint32_t CLK1_CLK4_DS_CNTL;
+ uint32_t CLK1_CLK5_DS_CNTL;
+
+ uint32_t CLK1_CLK0_ALLOW_DS;
+ uint32_t CLK1_CLK1_ALLOW_DS;
+ uint32_t CLK1_CLK2_ALLOW_DS;
+ uint32_t CLK1_CLK3_ALLOW_DS;
+ uint32_t CLK1_CLK4_ALLOW_DS;
+ uint32_t CLK1_CLK5_ALLOW_DS;
+ uint32_t CLK5_spll_field_8;
+ uint32_t CLK6_spll_field_8;
+};
+
+struct clk_mgr_shift {
+ CLK_REG_FIELD_LIST(uint8_t)
+ CLK20_REG_FIELD_LIST(uint8_t)
+};
- uint32_t MP1_SMN_C2PMSG_67;
- uint32_t MP1_SMN_C2PMSG_83;
- uint32_t MP1_SMN_C2PMSG_91;
+struct clk_mgr_mask {
+ CLK_REG_FIELD_LIST(uint32_t)
+ CLK20_REG_FIELD_LIST(uint32_t)
};
enum clock_type {
@@ -335,13 +411,14 @@ struct clk_mgr_internal {
enum dm_pp_clocks_state cur_min_clks_state;
bool periodic_retraining_disabled;
- unsigned int cur_phyclk_req_table[MAX_PIPES * 2];
+ unsigned int cur_phyclk_req_table[MAX_LINKS];
bool smu_present;
void *wm_range_table;
long long wm_range_table_addr;
bool dpm_present;
+ bool pme_trigger_pending;
};
struct clk_mgr_internal_funcs {
@@ -379,6 +456,11 @@ static inline int khz_to_mhz_ceil(int khz)
return (khz + 999) / 1000;
}
+static inline int khz_to_mhz_floor(int khz)
+{
+ return khz / 1000;
+}
+
int clk_mgr_helper_get_active_display_cnt(
struct dc *dc,
struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index 3e2f0f64c98c..61c4d2a7db1c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -56,6 +56,14 @@ enum dentist_dispclk_change_mode {
DISPCLK_CHANGE_MODE_RAMPING,
};
+struct dp_dto_params {
+ int otg_inst;
+ enum signal_type signal;
+ enum streamclk_source clk_src;
+ uint64_t pixclk_hz;
+ uint64_t refclk_hz;
+};
+
enum pixel_rate_div {
PIXEL_RATE_DIV_BY_1 = 0,
PIXEL_RATE_DIV_BY_2 = 1,
@@ -98,6 +106,10 @@ struct dccg_funcs {
void (*otg_drop_pixel)(struct dccg *dccg,
uint32_t otg_inst);
void (*dccg_init)(struct dccg *dccg);
+ void (*set_dpstreamclk_root_clock_gating)(
+ struct dccg *dccg,
+ int dp_hpo_inst,
+ bool enable);
void (*set_dpstreamclk)(
struct dccg *dccg,
@@ -134,6 +146,11 @@ struct dccg_funcs {
enum physymclk_clock_source clk_src,
bool force_enable);
+ void (*set_physymclk_root_clock_gating)(
+ struct dccg *dccg,
+ int phy_inst,
+ bool enable);
+
void (*set_dtbclk_dto)(
struct dccg *dccg,
const struct dtbclk_dto_params *params);
@@ -159,6 +176,11 @@ struct dccg_funcs {
enum pixel_rate_div k1,
enum pixel_rate_div k2);
+ void (*get_pixel_rate_div)(struct dccg *dccg,
+ uint32_t otg_inst,
+ uint32_t *div_factor1,
+ uint32_t *div_factor2);
+
void (*set_valid_pixel_rate)(
struct dccg *dccg,
int ref_dtbclk_khz,
@@ -182,6 +204,16 @@ struct dccg_funcs {
struct dccg *dccg,
uint32_t stream_enc_inst,
uint32_t link_enc_inst);
+ void (*set_dp_dto)(
+ struct dccg *dccg,
+ const struct dp_dto_params *params);
+ void (*set_dtbclk_p_src)(
+ struct dccg *dccg,
+ enum streamclk_source src,
+ uint32_t otg_inst);
+ void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst, uint32_t num_slices_h);
+ void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
+ void (*dccg_root_gate_disable_control)(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating);
};
#endif //__DAL_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index f5677dbb4e7d..843a18287c83 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -26,12 +26,24 @@
#ifndef __DAL_DCHUBBUB_H__
#define __DAL_DCHUBBUB_H__
+/**
+ * DOC: overview
+ *
+ * There is only one common DCHUBBUB. It contains the common request and return
+ * blocks for the Data Fabric Interface that are not clock/power gated.
+ */
+
+#include "dc/dc_hw_types.h"
enum dcc_control {
dcc_control__256_256_xxx,
dcc_control__128_128_xxx,
dcc_control__256_64_64,
dcc_control__256_128_128,
+ dcc_control__256_256,
+ dcc_control__256_128,
+ dcc_control__256_64,
+
};
enum segment_order {
@@ -66,8 +78,15 @@ enum dcn_hubbub_page_table_depth {
enum dcn_hubbub_page_table_block_size {
DCN_PAGE_TABLE_BLOCK_SIZE_4KB = 0,
+ DCN_PAGE_TABLE_BLOCK_SIZE_8KB = 1,
+ DCN_PAGE_TABLE_BLOCK_SIZE_16KB = 2,
+ DCN_PAGE_TABLE_BLOCK_SIZE_32KB = 3,
DCN_PAGE_TABLE_BLOCK_SIZE_64KB = 4,
- DCN_PAGE_TABLE_BLOCK_SIZE_32KB = 3
+ DCN_PAGE_TABLE_BLOCK_SIZE_128KB = 5,
+ DCN_PAGE_TABLE_BLOCK_SIZE_256KB = 6,
+ DCN_PAGE_TABLE_BLOCK_SIZE_512KB = 7,
+ DCN_PAGE_TABLE_BLOCK_SIZE_1024KB = 8,
+ DCN_PAGE_TABLE_BLOCK_SIZE_2048KB = 9
};
struct dcn_hubbub_phys_addr_config {
@@ -118,6 +137,19 @@ struct dcn_hubbub_state {
uint32_t dram_state_cntl;
};
+struct hubbub_system_latencies {
+ uint32_t max_latency_ns;
+ uint32_t avg_latency_ns;
+ uint32_t min_latency_ns;
+};
+
+struct hubbub_urgent_latency_params {
+ uint32_t refclk_mhz;
+ uint32_t t_win_ns;
+ uint32_t bandwidth_mbps;
+ uint32_t bw_factor_x1000;
+};
+
struct hubbub_funcs {
void (*update_dchub)(
struct hubbub *hubbub,
@@ -141,6 +173,17 @@ struct hubbub_funcs {
enum segment_order *segment_order_horz,
enum segment_order *segment_order_vert);
+ bool (*dcc_support_swizzle_addr3)(
+ enum swizzle_mode_addr3_values swizzle,
+ unsigned int plane_pitch,
+ unsigned int bytes_per_element,
+ enum segment_order *segment_order_horz,
+ enum segment_order *segment_order_vert);
+
+ bool (*dcc_support_pixel_format_plane0_plane1)(
+ enum surface_pixel_format format,
+ unsigned int *plane0_bpe,
+ unsigned int *plane1_bpe);
bool (*dcc_support_pixel_format)(
enum surface_pixel_format format,
unsigned int *bytes_per_element);
@@ -154,7 +197,7 @@ struct hubbub_funcs {
bool (*program_watermarks)(
struct hubbub *hubbub,
- struct dcn_watermark_set *watermarks,
+ union dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower);
@@ -188,11 +231,28 @@ struct hubbub_funcs {
* compressed or detiled buffers.
*/
void (*program_det_size)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_in_kbyte);
+ void (*wait_for_det_apply)(struct hubbub *hubbub, int hubp_inst);
void (*program_compbuf_size)(struct hubbub *hubbub, unsigned compbuf_size_kb, bool safe_to_increase);
void (*init_crb)(struct hubbub *hubbub);
void (*force_usr_retraining_allow)(struct hubbub *hubbub, bool allow);
void (*set_request_limit)(struct hubbub *hubbub, int memory_channel_count, int words_per_channel);
void (*dchubbub_init)(struct hubbub *hubbub);
+ void (*get_mall_en)(struct hubbub *hubbub, unsigned int *mall_in_use);
+ void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg);
+ void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase);
+ void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst);
+ bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower);
+ void (*get_det_sizes)(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes);
+ uint32_t (*compbuf_config_error)(struct hubbub *hubbub);
+ struct hubbub_perfmon_funcs{
+ void (*start_system_latency_measurement)(struct hubbub *hubbub);
+ void (*get_system_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, struct hubbub_system_latencies *latencies);
+ void (*start_in_order_bandwidth_measurement)(struct hubbub *hubbub);
+ void (*get_in_order_bandwidth_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *bandwidth_mbps);
+ void (*start_urgent_ramp_latency_measurement)(struct hubbub *hubbub, const struct hubbub_urgent_latency_params *params);
+ void (*get_urgent_ramp_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *latency_ns);
+ void (*reset)(struct hubbub *hubbub);
+ } perfmon;
};
struct hubbub {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index f4aa76e02518..1b7c085dc2cc 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -27,6 +27,31 @@
#ifndef __DAL_DPP_H__
#define __DAL_DPP_H__
+/**
+ * DOC: overview
+ *
+ * The DPP (Display Pipe and Plane) block is the unified display data
+ * processing engine in DCN for processing graphic or video data on per DPP
+ * rectangle base. This rectangle can be a part of SLS (Single Large Surface),
+ * or a layer to be blended with other DPP, or a rectangle associated with a
+ * display tile.
+ *
+ * It provides various functions including:
+ * - graphic color keyer
+ * - graphic cursor compositing
+ * - graphic or video image source to destination scaling
+ * - image sharping
+ * - video format conversion from 4:2:0 or 4:2:2 to 4:4:4
+ * - Color Space Conversion
+ * - Host LUT gamma adjustment
+ * - Color Gamut Remap
+ * - brightness and contrast adjustment.
+ *
+ * DPP pipe consists of Converter and Cursor (CNVC), Scaler (DSCL), Color
+ * Management (CM), Output Buffer (OBUF) and Digital Bypass (DPB) module
+ * connected in a video/graphics pipeline.
+ */
+
#include "transform.h"
#include "cursor_reg_cache.h"
@@ -94,10 +119,14 @@ static const struct dpp_input_csc_matrix __maybe_unused dpp_input_csc_matrix[] =
{ 0x39a6, 0x2568, 0, 0xe0d6,
0xeedd, 0x2568, 0xf925, 0x9a8,
0, 0x2568, 0x43ee, 0xdbb2 } },
- { COLOR_SPACE_2020_YCBCR,
+ { COLOR_SPACE_2020_YCBCR_FULL,
{ 0x2F30, 0x2000, 0, 0xE869,
0xEDB7, 0x2000, 0xFABC, 0xBC6,
0, 0x2000, 0x3C34, 0xE1E6 } },
+ { COLOR_SPACE_2020_YCBCR_LIMITED,
+ { 0x35B9, 0x2543, 0, 0xE2B2,
+ 0xEB2F, 0x2543, 0xFA01, 0x0B1F,
+ 0, 0x2543, 0x4489, 0xDB42 } },
{ COLOR_SPACE_2020_RGB_LIMITEDRANGE,
{ 0x35E0, 0x255F, 0, 0xE2B3,
0xEB20, 0x255F, 0xF9FD, 0xB1E,
@@ -122,16 +151,28 @@ struct cnv_color_keyer_params {
int color_keyer_blue_high;
};
-/* new for dcn2: set the 8bit alpha values based on the 2 bit alpha
- *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0 default: 0b00000000
- *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1 default: 0b01010101
- *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2 default: 0b10101010
- *ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3 default: 0b11111111
+/**
+ * struct cnv_alpha_2bit_lut - Set the 8bit alpha values based on the 2 bit alpha
*/
struct cnv_alpha_2bit_lut {
+ /**
+ * @lut0: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT0. Default: 0b00000000
+ */
int lut0;
+
+ /**
+ * @lut1: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT1. Default: 0b01010101
+ */
int lut1;
+
+ /**
+ * @lut2: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT2. Default: 0b10101010
+ */
int lut2;
+
+ /**
+ * @lut3: ALPHA_2BIT_LUT. ALPHA_2BIT_LUT3. Default: 0b11111111
+ */
int lut3;
};
@@ -141,6 +182,7 @@ struct dcn_dpp_state {
uint32_t igam_input_format;
uint32_t dgam_lut_mode;
uint32_t rgam_lut_mode;
+ // gamut_remap data for dcn10_get_cm_states()
uint32_t gamut_remap_mode;
uint32_t gamut_remap_c11_c12;
uint32_t gamut_remap_c13_c14;
@@ -148,6 +190,16 @@ struct dcn_dpp_state {
uint32_t gamut_remap_c23_c24;
uint32_t gamut_remap_c31_c32;
uint32_t gamut_remap_c33_c34;
+ // gamut_remap data for dcn*_log_color_state()
+ struct dpp_grph_csc_adjustment gamut_remap;
+ uint32_t shaper_lut_mode;
+ uint32_t lut3d_mode;
+ uint32_t lut3d_bit_depth;
+ uint32_t lut3d_size;
+ uint32_t blnd_lut_mode;
+ uint32_t pre_dgam_mode;
+ uint32_t pre_dgam_select;
+ uint32_t gamcor_mode;
};
struct CM_bias_params {
@@ -286,10 +338,20 @@ struct dpp_funcs {
const struct pwl_params *params);
bool (*dpp_program_3dlut)(
struct dpp *dpp,
- struct tetrahedral_params *params);
+ const struct tetrahedral_params *params);
void (*dpp_cnv_set_alpha_keyer)(
struct dpp *dpp_base,
struct cnv_color_keyer_params *color_keyer);
+
+ void (*dpp_get_gamut_remap)(struct dpp *dpp_base,
+ struct dpp_grph_csc_adjustment *adjust);
+ void (*set_cursor_matrix)(
+ struct dpp *dpp_base,
+ enum dc_color_space color_space,
+ struct dc_csc_transform cursor_csc_color_matrix);
+
+ void (*dpp_force_disable_cursor)(struct dpp *dpp_base);
+
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
index 86b711dcc785..063efc8128a7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h
@@ -147,9 +147,10 @@ struct dwb_caps {
unsigned int support_ogam :1;
unsigned int support_wbscl :1;
unsigned int support_ocsc :1;
- unsigned int support_stereo :1;
+ unsigned int support_stereo :1;
+ unsigned int support_4k_120p :1;
} caps;
- unsigned int reserved2[9]; /* Reserved for future use, MUST BE 0. */
+ unsigned int reserved2[10]; /* Reserved for future use, MUST BE 0. */
};
struct dwbc {
@@ -166,8 +167,9 @@ struct dwbc {
bool dwb_is_drc;
int wb_src_plane_inst;/*hubp, mpcc, inst*/
uint32_t mask_id;
- int otg_inst;
- bool mvc_cfg;
+ int otg_inst;
+ bool mvc_cfg;
+ struct dc_dwb_params params;
};
struct dwbc_funcs {
@@ -188,6 +190,14 @@ struct dwbc_funcs {
bool (*is_enabled)(
struct dwbc *dwbc);
+ void (*set_fc_enable)(
+ struct dwbc *dwbc,
+ enum dwb_frame_capture_enable enable);
+
+ void (*dwb_set_scaler)(
+ struct dwbc *dwbc,
+ struct dc_dwb_params *params);
+
void (*set_stereo)(
struct dwbc *dwbc,
struct dwb_stereo_params *stereo_params);
@@ -201,9 +211,11 @@ struct dwbc_funcs {
struct dwbc *dwbc,
struct dwb_warmup_params *warmup_params);
-
+ bool (*dwb_get_mcifbuf_line)(
+ struct dwbc *dwbc, unsigned int *buf_idx,
+ unsigned int *cur_line,
+ unsigned int *over_run);
#if defined(CONFIG_DRM_AMD_DC_FP)
-
void (*dwb_program_output_csc)(
struct dwbc *dwbc,
enum dc_color_space color_space,
@@ -212,17 +224,17 @@ struct dwbc_funcs {
bool (*dwb_ogam_set_output_transfer_func)(
struct dwbc *dwbc,
const struct dc_transfer_func *in_transfer_func_dwb_ogam);
-
+#endif
//TODO: merge with output_transfer_func?
bool (*dwb_ogam_set_input_transfer_func)(
struct dwbc *dwbc,
const struct dc_transfer_func *in_transfer_func_dwb_ogam);
-#endif
+
+ void (*get_drr_time_stamp)(
+ struct dwbc *dwbc, uint32_t *time_stamp);
+
bool (*get_dwb_status)(
struct dwbc *dwbc);
- void (*dwb_set_scaler)(
- struct dwbc *dwbc,
- struct dc_dwb_params *params);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 7f3f9b69e903..2b874d2cc61c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -26,13 +26,27 @@
#ifndef __DAL_HUBP_H__
#define __DAL_HUBP_H__
+/**
+ * DOC: overview
+ *
+ * Display Controller Hub (DCHUB) is the gateway between the Scalable Data Port
+ * (SDP) and DCN. This component has multiple features, such as memory
+ * arbitration, rotation, and cursor manipulation.
+ *
+ * There is one HUBP allocated per pipe, which fetches data and converts
+ * different pixel formats (i.e. ARGB8888, NV12, etc) into linear, interleaved
+ * and fixed-depth streams of pixel data.
+ */
+
#include "mem_input.h"
#include "cursor_reg_cache.h"
+#include "dml2/dml21/inc/dml_top_dchub_registers.h"
+#include "dml2/dml21/inc/dml_top_types.h"
+
#define OPP_ID_INVALID 0xf
#define MAX_TTU 0xffffff
-
enum cursor_pitch {
CURSOR_PITCH_64_PIXELS = 0,
CURSOR_PITCH_128_PIXELS,
@@ -54,6 +68,53 @@ enum hubp_ind_block_size {
hubp_ind_block_64b_no_128bcl,
};
+enum hubp_3dlut_fl_mode {
+ hubp_3dlut_fl_mode_disable = 0,
+ hubp_3dlut_fl_mode_native_1 = 1,
+ hubp_3dlut_fl_mode_native_2 = 2,
+ hubp_3dlut_fl_mode_transform = 3
+};
+
+enum hubp_3dlut_fl_format {
+ hubp_3dlut_fl_format_unorm_12msb_bitslice = 0,
+ hubp_3dlut_fl_format_unorm_12lsb_bitslice = 1,
+ hubp_3dlut_fl_format_float_fp1_5_10 = 2
+};
+
+enum hubp_3dlut_fl_addressing_mode {
+ hubp_3dlut_fl_addressing_mode_sw_linear = 0,
+ hubp_3dlut_fl_addressing_mode_simple_linear = 1
+};
+
+enum hubp_3dlut_fl_width {
+ hubp_3dlut_fl_width_17 = 17,
+ hubp_3dlut_fl_width_33 = 33,
+ hubp_3dlut_fl_width_transformed = 4916, //mpc default
+};
+
+enum hubp_3dlut_fl_crossbar_bit_slice {
+ hubp_3dlut_fl_crossbar_bit_slice_0_15 = 0,
+ hubp_3dlut_fl_crossbar_bit_slice_16_31 = 1,
+ hubp_3dlut_fl_crossbar_bit_slice_32_47 = 2,
+ hubp_3dlut_fl_crossbar_bit_slice_48_63 = 3
+};
+
+struct hubp_fl_3dlut_config {
+ bool enabled;
+ enum hubp_3dlut_fl_width width;
+ enum hubp_3dlut_fl_mode mode;
+ enum hubp_3dlut_fl_format format;
+ uint16_t bias;
+ uint16_t scale;
+ struct dc_plane_address address;
+ enum hubp_3dlut_fl_addressing_mode addr_mode;
+ enum dc_cm2_gpu_mem_layout layout;
+ uint8_t protection_bits;
+ enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g;
+ enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b;
+ enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r;
+};
+
struct hubp {
const struct hubp_funcs *funcs;
struct dc_context *ctx;
@@ -100,14 +161,26 @@ struct hubp_funcs {
struct _vcs_dpi_display_rq_regs_st *rq_regs,
struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest);
+ void (*hubp_setup2)(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing);
+
void (*hubp_setup_interdependent)(
struct hubp *hubp,
struct _vcs_dpi_display_dlg_regs_st *dlg_regs,
struct _vcs_dpi_display_ttu_regs_st *ttu_regs);
+ void (*hubp_setup_interdependent2)(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs);
+
void (*dcc_control)(struct hubp *hubp, bool enable,
enum hubp_ind_block_size blk_size);
+ void (*hubp_reset)(struct hubp *hubp);
+
void (*mem_program_viewport)(
struct hubp *hubp,
const struct rect *viewport,
@@ -121,7 +194,7 @@ struct hubp_funcs {
void (*hubp_program_pte_vm)(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation);
void (*hubp_set_vm_system_aperture_settings)(
@@ -135,7 +208,7 @@ struct hubp_funcs {
void (*hubp_program_surface_config)(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -146,9 +219,7 @@ struct hubp_funcs {
void (*set_blank)(struct hubp *hubp, bool blank);
void (*set_blank_regs)(struct hubp *hubp, bool blank);
-#ifdef CONFIG_DRM_AMD_DC_FP
void (*phantom_hubp_post_enable)(struct hubp *hubp);
-#endif
void (*set_hubp_blank_en)(struct hubp *hubp, bool blank);
void (*set_cursor_attributes)(
@@ -202,17 +273,41 @@ struct hubp_funcs {
bool (*hubp_in_blank)(struct hubp *hubp);
void (*hubp_soft_reset)(struct hubp *hubp, bool reset);
+ void (*hubp_set_flip_int)(struct hubp *hubp);
+
void (*hubp_update_force_pstate_disallow)(struct hubp *hubp, bool allow);
void (*hubp_update_force_cursor_pstate_disallow)(struct hubp *hubp, bool allow);
void (*hubp_update_mall_sel)(struct hubp *hubp, uint32_t mall_sel, bool c_cursor);
void (*hubp_prepare_subvp_buffering)(struct hubp *hubp, bool enable);
-
- void (*hubp_set_flip_int)(struct hubp *hubp);
+ void (*hubp_surface_update_lock)(struct hubp *hubp,
+ bool lock);
void (*program_extended_blank)(struct hubp *hubp,
unsigned int min_dst_y_next_start_optimized);
void (*hubp_wait_pipe_read_start)(struct hubp *hubp);
+ void (*hubp_program_mcache_id_and_split_coordinate)(struct hubp *hubp, struct dml2_hubp_pipe_mcache_regs *mcache_regs);
+ void (*hubp_update_3dlut_fl_bias_scale)(struct hubp *hubp, uint16_t bias, uint16_t scale);
+ void (*hubp_program_3dlut_fl_mode)(struct hubp *hubp,
+ enum hubp_3dlut_fl_mode mode);
+ void (*hubp_program_3dlut_fl_format)(struct hubp *hubp,
+ enum hubp_3dlut_fl_format format);
+ void (*hubp_program_3dlut_fl_addr)(struct hubp *hubp,
+ const struct dc_plane_address address);
+ void (*hubp_program_3dlut_fl_dlg_param)(struct hubp *hubp, int refcyc_per_3dlut_group);
+ void (*hubp_enable_3dlut_fl)(struct hubp *hubp, bool enable);
+ void (*hubp_program_3dlut_fl_addressing_mode)(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode);
+ void (*hubp_program_3dlut_fl_width)(struct hubp *hubp, enum hubp_3dlut_fl_width width);
+ void (*hubp_program_3dlut_fl_tmz_protected)(struct hubp *hubp, uint8_t protection_bits);
+ void (*hubp_program_3dlut_fl_crossbar)(struct hubp *hubp,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r);
+ int (*hubp_get_3dlut_fl_done)(struct hubp *hubp);
+ void (*hubp_program_3dlut_fl_config)(struct hubp *hubp, struct hubp_fl_3dlut_config *cfg);
+ void (*hubp_clear_tiling)(struct hubp *hubp);
+ uint32_t (*hubp_get_current_read_line)(struct hubp *hubp);
+ uint32_t (*hubp_get_det_config_error)(struct hubp *hubp);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index b95ae9596c3b..41c76ba9ba56 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -43,10 +43,12 @@
* to be used inside loops and for determining array sizes.
*/
#define MAX_PIPES 6
+#define MAX_PHANTOM_PIPES (MAX_PIPES / 2)
+#define MAX_LINKS (MAX_PIPES * 2 +2)
#define MAX_DIG_LINK_ENCODERS 7
#define MAX_DWB_PIPES 1
#define MAX_HPO_DP2_ENCODERS 4
-#define MAX_HPO_DP2_LINK_ENCODERS 2
+#define MAX_HPO_DP2_LINK_ENCODERS 4
struct gamma_curve {
uint32_t offset;
@@ -215,12 +217,13 @@ enum optc_dsc_mode {
};
struct dc_bias_and_scale {
- uint16_t scale_red;
- uint16_t bias_red;
- uint16_t scale_green;
- uint16_t bias_green;
- uint16_t scale_blue;
- uint16_t bias_blue;
+ uint32_t scale_red;
+ uint32_t bias_red;
+ uint32_t scale_green;
+ uint32_t bias_green;
+ uint32_t scale_blue;
+ uint32_t bias_blue;
+ bool bias_and_scale_valid;
};
enum test_pattern_dyn_range {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index dbe7afa9d3a2..08c16ba52a51 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -163,12 +163,19 @@ struct link_encoder_funcs {
enum signal_type (*get_dig_mode)(
struct link_encoder *enc);
+
void (*set_dio_phy_mux)(
struct link_encoder *enc,
enum encoder_type_select sel,
uint32_t hpo_inst);
- void (*set_dig_output_mode)(
- struct link_encoder *enc, uint8_t pix_per_container);
+ void (*enable_dpia_output)(struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy);
+ void (*disable_dpia_output)(struct link_encoder *link_enc,
+ uint8_t dpia_id,
+ uint8_t digmode);
};
/*
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
index b72fb314d804..42fbc70f7056 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
@@ -29,6 +29,7 @@
#include "include/grph_object_id.h"
#include "dml/display_mode_structs.h"
+#include "dml2/dml21/inc/dml_top_dchub_registers.h"
struct dchub_init_data;
struct cstate_pstate_watermarks_st {
@@ -45,16 +46,24 @@ struct dcn_watermarks {
uint32_t urgent_ns;
uint32_t frac_urg_bw_nom;
uint32_t frac_urg_bw_flip;
- int32_t urgent_latency_ns;
+ uint32_t urgent_latency_ns;
struct cstate_pstate_watermarks_st cstate_pstate;
uint32_t usr_retraining_ns;
};
-struct dcn_watermark_set {
- struct dcn_watermarks a;
- struct dcn_watermarks b;
- struct dcn_watermarks c;
- struct dcn_watermarks d;
+union dcn_watermark_set {
+ struct {
+ struct dcn_watermarks a;
+ struct dcn_watermarks b;
+ struct dcn_watermarks c;
+ struct dcn_watermarks d;
+ }; // legacy
+ struct {
+ struct dml2_dchub_watermark_regs a;
+ struct dml2_dchub_watermark_regs b;
+ struct dml2_dchub_watermark_regs c;
+ struct dml2_dchub_watermark_regs d;
+ } dcn4x; //dcn4+
};
struct dce_watermarks {
@@ -141,7 +150,7 @@ struct mem_input_funcs {
void (*mem_input_program_pte_vm)(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation);
void (*mem_input_set_vm_system_aperture_settings)(
@@ -155,7 +164,7 @@ struct mem_input_funcs {
void (*mem_input_program_surface_config)(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -178,6 +187,8 @@ struct mem_input_funcs {
const struct dc_cursor_position *pos,
const struct dc_cursor_mi_param *param);
+ void (*mem_input_clear_tiling)(
+ struct mem_input *mem_input);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 8d86159d9de0..22960ee03dee 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -1,4 +1,5 @@
-/* Copyright 2012-15 Advanced Micro Devices, Inc.
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -23,13 +24,28 @@
*/
/**
- * DOC: mpc-overview
+ * DOC: overview
*
- * Multiple Pipe/Plane Combined (MPC) is a component in the hardware pipeline
+ * Multiple Pipe/Plane Combiner (MPC) is a component in the hardware pipeline
* that performs blending of multiple planes, using global and per-pixel alpha.
* It also performs post-blending color correction operations according to the
* hardware capabilities, such as color transformation matrix and gamma 1D and
* 3D LUT.
+ *
+ * MPC receives output from all DPP pipes and combines them to multiple outputs
+ * supporting "M MPC inputs -> N MPC outputs" flexible composition
+ * architecture. It features:
+ *
+ * - Programmable blending structure to allow software controlled blending and
+ * cascading;
+ * - Programmable window location of each DPP in active region of display;
+ * - Combining multiple DPP pipes in one active region when a single DPP pipe
+ * cannot process very large surface;
+ * - Combining multiple DPP from different SLS with blending;
+ * - Stereo formats from single DPP in top-bottom or side-by-side modes;
+ * - Stereo formats from 2 DPPs;
+ * - Alpha blending of multiple layers from different DPP pipes;
+ * - Programmable background color;
*/
#ifndef __DC_MPCC_H__
@@ -81,36 +97,143 @@ enum mpcc_alpha_blend_mode {
MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA
};
+enum mpcc_movable_cm_location {
+ MPCC_MOVABLE_CM_LOCATION_BEFORE,
+ MPCC_MOVABLE_CM_LOCATION_AFTER,
+};
+
+enum MCM_LUT_XABLE {
+ MCM_LUT_DISABLE,
+ MCM_LUT_DISABLED = MCM_LUT_DISABLE,
+ MCM_LUT_ENABLE,
+ MCM_LUT_ENABLED = MCM_LUT_ENABLE,
+};
+
+enum MCM_LUT_ID {
+ MCM_LUT_3DLUT,
+ MCM_LUT_1DLUT,
+ MCM_LUT_SHAPER
+};
+
+struct mpc_fl_3dlut_config {
+ bool enabled;
+ uint16_t width;
+ bool select_lut_bank_a;
+ uint16_t bit_depth;
+ int hubp_index;
+ uint16_t bias;
+ uint16_t scale;
+};
+
+union mcm_lut_params {
+ const struct pwl_params *pwl;
+ const struct tetrahedral_params *lut3d;
+};
+
/**
* struct mpcc_blnd_cfg - MPCC blending configuration
- *
- * @black_color: background color
- * @alpha_mode: alpha blend mode (MPCC_ALPHA_BLND_MODE)
- * @pre_multiplied_alpha: whether pixel color values were pre-multiplied by the
- * alpha channel (MPCC_ALPHA_MULTIPLIED_MODE)
- * @global_gain: used when blend mode considers both pixel alpha and plane
- * alpha value and assumes the global alpha value.
- * @global_alpha: plane alpha value
*/
struct mpcc_blnd_cfg {
- struct tg_color black_color; /* background color */
- enum mpcc_alpha_blend_mode alpha_mode; /* alpha blend mode */
- bool pre_multiplied_alpha; /* alpha pre-multiplied mode flag */
+ /**
+ * @black_color: background color.
+ */
+ struct tg_color black_color;
+
+ /**
+ * @alpha_mode: alpha blend mode (MPCC_ALPHA_BLND_MODE).
+ */
+ enum mpcc_alpha_blend_mode alpha_mode;
+
+ /**
+ * @pre_multiplied_alpha:
+ * Whether pixel color values were pre-multiplied by the alpha channel
+ * (MPCC_ALPHA_MULTIPLIED_MODE).
+ */
+ bool pre_multiplied_alpha;
+
+ /**
+ * @global_gain: Used when blend mode considers both pixel alpha and plane.
+ */
int global_gain;
+
+ /**
+ * @global_alpha: Plane alpha value.
+ */
int global_alpha;
+
+ /**
+ * @overlap_only: Whether overlapping of different planes is allowed.
+ */
bool overlap_only;
/* MPCC top/bottom gain settings */
+
+ /**
+ * @bottom_gain_mode: Blend mode for bottom gain setting.
+ */
int bottom_gain_mode;
+
+ /**
+ * @background_color_bpc: Background color for bpc.
+ */
int background_color_bpc;
+
+ /**
+ * @top_gain: Top gain setting.
+ */
int top_gain;
+
+ /**
+ * @bottom_inside_gain: Blend mode for bottom inside.
+ */
int bottom_inside_gain;
+
+ /**
+ * @bottom_outside_gain: Blend mode for bottom outside.
+ */
int bottom_outside_gain;
};
struct mpc_grph_gamut_adjustment {
struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE];
enum graphics_gamut_adjust_type gamut_adjust_type;
+ enum mpcc_gamut_remap_id mpcc_gamut_remap_block_id;
+};
+
+struct mpc_rmcm_regs {
+ uint32_t rmcm_3dlut_mem_pwr_state;
+ uint32_t rmcm_3dlut_mem_pwr_force;
+ uint32_t rmcm_3dlut_mem_pwr_dis;
+ uint32_t rmcm_3dlut_mem_pwr_mode;
+ uint32_t rmcm_3dlut_size;
+ uint32_t rmcm_3dlut_mode;
+ uint32_t rmcm_3dlut_mode_cur;
+ uint32_t rmcm_3dlut_read_sel;
+ uint32_t rmcm_3dlut_30bit_en;
+ uint32_t rmcm_3dlut_wr_en_mask;
+ uint32_t rmcm_3dlut_ram_sel;
+ uint32_t rmcm_3dlut_out_norm_factor;
+ uint32_t rmcm_3dlut_fl_sel;
+ uint32_t rmcm_3dlut_out_offset_r;
+ uint32_t rmcm_3dlut_out_scale_r;
+ uint32_t rmcm_3dlut_fl_done;
+ uint32_t rmcm_3dlut_fl_soft_underflow;
+ uint32_t rmcm_3dlut_fl_hard_underflow;
+ uint32_t rmcm_cntl;
+ uint32_t rmcm_shaper_mem_pwr_state;
+ uint32_t rmcm_shaper_mem_pwr_force;
+ uint32_t rmcm_shaper_mem_pwr_dis;
+ uint32_t rmcm_shaper_mem_pwr_mode;
+ uint32_t rmcm_shaper_lut_mode;
+ uint32_t rmcm_shaper_mode_cur;
+ uint32_t rmcm_shaper_lut_write_en_mask;
+ uint32_t rmcm_shaper_lut_write_sel;
+ uint32_t rmcm_shaper_offset_b;
+ uint32_t rmcm_shaper_scale_b;
+ uint32_t rmcm_shaper_rama_exp_region_start_b;
+ uint32_t rmcm_shaper_rama_exp_region_start_seg_b;
+ uint32_t rmcm_shaper_rama_exp_region_end_b;
+ uint32_t rmcm_shaper_rama_exp_region_end_base_b;
};
struct mpcc_sm_cfg {
@@ -144,34 +267,58 @@ struct mpc_dwb_flow_control {
/**
* struct mpcc - MPCC connection and blending configuration for a single MPCC instance.
- * @mpcc_id: MPCC physical instance
- * @dpp_id: DPP input to this MPCC
- * @mpcc_bot: pointer to bottom layer MPCC. NULL when not connected.
- * @blnd_cfg: the blending configuration for this MPCC
- * @sm_cfg: stereo mix setting for this MPCC
- * @shared_bottom: if MPCC output to both OPP and DWB endpoints, true. Otherwise, false.
*
* This struct is used as a node in an MPC tree.
*/
struct mpcc {
- int mpcc_id; /* MPCC physical instance */
- int dpp_id; /* DPP input to this MPCC */
- struct mpcc *mpcc_bot; /* pointer to bottom layer MPCC. NULL when not connected */
- struct mpcc_blnd_cfg blnd_cfg; /* The blending configuration for this MPCC */
- struct mpcc_sm_cfg sm_cfg; /* stereo mix setting for this MPCC */
- bool shared_bottom; /* TRUE if MPCC output to both OPP and DWB endpoints, else FALSE */
+ /**
+ * @mpcc_id: MPCC physical instance.
+ */
+ int mpcc_id;
+
+ /**
+ * @dpp_id: DPP input to this MPCC
+ */
+ int dpp_id;
+
+ /**
+ * @mpcc_bot: Pointer to bottom layer MPCC. NULL when not connected.
+ */
+ struct mpcc *mpcc_bot;
+
+ /**
+ * @blnd_cfg: The blending configuration for this MPCC.
+ */
+ struct mpcc_blnd_cfg blnd_cfg;
+
+ /**
+ * @sm_cfg: stereo mix setting for this MPCC
+ */
+ struct mpcc_sm_cfg sm_cfg;
+
+ /**
+ * @shared_bottom:
+ *
+ * If MPCC output to both OPP and DWB endpoints, true. Otherwise, false.
+ */
+ bool shared_bottom;
};
/**
* struct mpc_tree - MPC tree represents all MPCC connections for a pipe.
*
- * @opp_id: the OPP instance that owns this MPC tree
- * @opp_list: the top MPCC layer of the MPC tree that outputs to OPP endpoint
*
*/
struct mpc_tree {
- int opp_id; /* The OPP instance that owns this MPC tree */
- struct mpcc *opp_list; /* The top MPCC layer of the MPC tree that outputs to OPP endpoint */
+ /**
+ * @opp_id: The OPP instance that owns this MPC tree.
+ */
+ int opp_id;
+
+ /**
+ * @opp_list: the top MPCC layer of the MPC tree that outputs to OPP endpoint
+ */
+ struct mpcc *opp_list;
};
struct mpc {
@@ -193,35 +340,62 @@ struct mpcc_state {
uint32_t overlap_only;
uint32_t idle;
uint32_t busy;
+ uint32_t shaper_lut_mode;
+ uint32_t lut3d_mode;
+ uint32_t lut3d_bit_depth;
+ uint32_t lut3d_size;
+ uint32_t rgam_mode;
+ uint32_t rgam_lut;
+ struct mpc_grph_gamut_adjustment gamut_remap;
+ struct mpc_rmcm_regs rmcm_regs;
};
/**
* struct mpc_funcs - funcs
*/
struct mpc_funcs {
+ /**
+ * @read_mpcc_state:
+ *
+ * Read register content from given MPCC physical instance.
+ *
+ * Parameters:
+ *
+ * - [in/out] mpc - MPC context
+ * - [in] mpcc_instance - MPC context instance
+ * - [in] mpcc_state - MPC context state
+ *
+ * Return:
+ *
+ * void
+ */
void (*read_mpcc_state)(
struct mpc *mpc,
int mpcc_inst,
struct mpcc_state *s);
/**
- * @insert_plane:
- *
- * Insert DPP into MPC tree based on specified blending position.
- * Only used for planes that are part of blending chain for OPP output
- *
- * Parameters:
- * [in/out] mpc - MPC context.
- * [in/out] tree - MPC tree structure that plane will be added to.
- * [in] blnd_cfg - MPCC blending configuration for the new blending layer.
- * [in] sm_cfg - MPCC stereo mix configuration for the new blending layer.
- * stereo mix must disable for the very bottom layer of the tree config.
- * [in] insert_above_mpcc - Insert new plane above this MPCC. If NULL, insert as bottom plane.
- * [in] dpp_id - DPP instance for the plane to be added.
- * [in] mpcc_id - The MPCC physical instance to use for blending.
- *
- * Return: struct mpcc* - MPCC that was added.
- */
+ * @insert_plane:
+ *
+ * Insert DPP into MPC tree based on specified blending position.
+ * Only used for planes that are part of blending chain for OPP output
+ *
+ * Parameters:
+ *
+ * - [in/out] mpc - MPC context.
+ * - [in/out] tree - MPC tree structure that plane will be added to.
+ * - [in] blnd_cfg - MPCC blending configuration for the new blending layer.
+ * - [in] sm_cfg - MPCC stereo mix configuration for the new blending layer.
+ * stereo mix must disable for the very bottom layer of the tree config.
+ * - [in] insert_above_mpcc - Insert new plane above this MPCC.
+ * If NULL, insert as bottom plane.
+ * - [in] dpp_id - DPP instance for the plane to be added.
+ * - [in] mpcc_id - The MPCC physical instance to use for blending.
+ *
+ * Return:
+ *
+ * struct mpcc* - MPCC that was added.
+ */
struct mpcc* (*insert_plane)(
struct mpc *mpc,
struct mpc_tree *tree,
@@ -232,90 +406,117 @@ struct mpc_funcs {
int mpcc_id);
/**
- * @remove_mpcc:
- *
- * Remove a specified MPCC from the MPC tree.
- *
- * Parameters:
- * [in/out] mpc - MPC context.
- * [in/out] tree - MPC tree structure that plane will be removed from.
- * [in/out] mpcc - MPCC to be removed from tree.
- *
- * Return: void
- */
+ * @remove_mpcc:
+ *
+ * Remove a specified MPCC from the MPC tree.
+ *
+ * Parameters:
+ *
+ * - [in/out] mpc - MPC context.
+ * - [in/out] tree - MPC tree structure that plane will be removed from.
+ * - [in/out] mpcc - MPCC to be removed from tree.
+ *
+ * Return:
+ *
+ * void
+ */
void (*remove_mpcc)(
struct mpc *mpc,
struct mpc_tree *tree,
struct mpcc *mpcc);
/**
- * @mpc_init:
- *
- * Reset the MPCC HW status by disconnecting all muxes.
- *
- * Parameters:
- * [in/out] mpc - MPC context.
- *
- * Return: void
- */
+ * @mpc_init:
+ *
+ * Reset the MPCC HW status by disconnecting all muxes.
+ *
+ * Parameters:
+ *
+ * - [in/out] mpc - MPC context.
+ *
+ * Return:
+ *
+ * void
+ */
void (*mpc_init)(struct mpc *mpc);
+
+ /**
+ * @mpc_init_single_inst:
+ *
+ * Initialize given MPCC physical instance.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] mpcc_id - The MPCC physical instance to be initialized.
+ */
void (*mpc_init_single_inst)(
struct mpc *mpc,
unsigned int mpcc_id);
/**
- * @update_blending:
- *
- * Update the blending configuration for a specified MPCC.
- *
- * Parameters:
- * [in/out] mpc - MPC context.
- * [in] blnd_cfg - MPCC blending configuration.
- * [in] mpcc_id - The MPCC physical instance.
- *
- * Return: void
- */
+ * @update_blending:
+ *
+ * Update the blending configuration for a specified MPCC.
+ *
+ * Parameters:
+ *
+ * - [in/out] mpc - MPC context.
+ * - [in] blnd_cfg - MPCC blending configuration.
+ * - [in] mpcc_id - The MPCC physical instance.
+ *
+ * Return:
+ *
+ * void
+ */
void (*update_blending)(
struct mpc *mpc,
struct mpcc_blnd_cfg *blnd_cfg,
int mpcc_id);
/**
- * @cursor_lock:
- *
- * Lock cursor updates for the specified OPP.
- * OPP defines the set of MPCC that are locked together for cursor.
- *
- * Parameters:
- * [in] mpc - MPC context.
- * [in] opp_id - The OPP to lock cursor updates on
- * [in] lock - lock/unlock the OPP
- *
- * Return: void
- */
+ * @cursor_lock:
+ *
+ * Lock cursor updates for the specified OPP. OPP defines the set of
+ * MPCC that are locked together for cursor.
+ *
+ * Parameters:
+ *
+ * - [in] mpc - MPC context.
+ * - [in] opp_id - The OPP to lock cursor updates on
+ * - [in] lock - lock/unlock the OPP
+ *
+ * Return:
+ *
+ * void
+ */
void (*cursor_lock)(
struct mpc *mpc,
int opp_id,
bool lock);
/**
- * @insert_plane_to_secondary:
- *
- * Add DPP into secondary MPC tree based on specified blending position.
- * Only used for planes that are part of blending chain for DWB output
- *
- * Parameters:
- * [in/out] mpc - MPC context.
- * [in/out] tree - MPC tree structure that plane will be added to.
- * [in] blnd_cfg - MPCC blending configuration for the new blending layer.
- * [in] sm_cfg - MPCC stereo mix configuration for the new blending layer.
- * stereo mix must disable for the very bottom layer of the tree config.
- * [in] insert_above_mpcc - Insert new plane above this MPCC. If NULL, insert as bottom plane.
- * [in] dpp_id - DPP instance for the plane to be added.
- * [in] mpcc_id - The MPCC physical instance to use for blending.
- *
- * Return: struct mpcc* - MPCC that was added.
- */
+ * @insert_plane_to_secondary:
+ *
+ * Add DPP into secondary MPC tree based on specified blending
+ * position. Only used for planes that are part of blending chain for
+ * DWB output
+ *
+ * Parameters:
+ *
+ * - [in/out] mpc - MPC context.
+ * - [in/out] tree - MPC tree structure that plane will be added to.
+ * - [in] blnd_cfg - MPCC blending configuration for the new blending layer.
+ * - [in] sm_cfg - MPCC stereo mix configuration for the new blending layer.
+ * stereo mix must disable for the very bottom layer of the tree config.
+ * - [in] insert_above_mpcc - Insert new plane above this MPCC. If
+ * NULL, insert as bottom plane.
+ * - [in] dpp_id - DPP instance for the plane to be added.
+ * - [in] mpcc_id - The MPCC physical instance to use for blending.
+ *
+ * Return:
+ *
+ * struct mpcc* - MPCC that was added.
+ */
struct mpcc* (*insert_plane_to_secondary)(
struct mpc *mpc,
struct mpc_tree *tree,
@@ -326,77 +527,301 @@ struct mpc_funcs {
int mpcc_id);
/**
- * @remove_mpcc_from_secondary:
- *
- * Remove a specified DPP from the 'secondary' MPC tree.
- *
- * Parameters:
- * [in/out] mpc - MPC context.
- * [in/out] tree - MPC tree structure that plane will be removed from.
- * [in] mpcc - MPCC to be removed from tree.
- * Return: void
- */
+ * @remove_mpcc_from_secondary:
+ *
+ * Remove a specified DPP from the 'secondary' MPC tree.
+ *
+ * Parameters:
+ *
+ * - [in/out] mpc - MPC context.
+ * - [in/out] tree - MPC tree structure that plane will be removed from.
+ * - [in] mpcc - MPCC to be removed from tree.
+ *
+ * Return:
+ *
+ * void
+ */
void (*remove_mpcc_from_secondary)(
struct mpc *mpc,
struct mpc_tree *tree,
struct mpcc *mpcc);
+ /**
+ * @get_mpcc_for_dpp_from_secondary:
+ *
+ * Find, if it exists, a MPCC from a given 'secondary' MPC tree that
+ * is associated with specified plane.
+ *
+ * Parameters:
+ * - [in/out] tree - MPC tree structure to search for plane.
+ * - [in] dpp_id - DPP to be searched.
+ *
+ * Return:
+ *
+ * struct mpcc* - pointer to plane or NULL if no plane found.
+ */
struct mpcc* (*get_mpcc_for_dpp_from_secondary)(
struct mpc_tree *tree,
int dpp_id);
+ /**
+ * @get_mpcc_for_dpp:
+ *
+ * Find, if it exists, a MPCC from a given MPC tree that
+ * is associated with specified plane.
+ *
+ * Parameters:
+ * - [in/out] tree - MPC tree structure to search for plane.
+ * - [in] dpp_id - DPP to be searched.
+ *
+ * Return:
+ *
+ * struct mpcc* - pointer to plane or NULL if no plane found.
+ */
struct mpcc* (*get_mpcc_for_dpp)(
struct mpc_tree *tree,
int dpp_id);
+ /**
+ * @wait_for_idle:
+ *
+ * Wait for a MPCC in MPC context to enter idle state.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC Context.
+ * - [in] id - MPCC to wait for idle state.
+ *
+ * Return:
+ *
+ * void
+ */
void (*wait_for_idle)(struct mpc *mpc, int id);
+ /**
+ * @assert_mpcc_idle_before_connect:
+ *
+ * Assert if MPCC in MPC context is in idle state.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] id - MPCC to assert idle state.
+ *
+ * Return:
+ *
+ * void
+ */
void (*assert_mpcc_idle_before_connect)(struct mpc *mpc, int mpcc_id);
+ /**
+ * @init_mpcc_list_from_hw:
+ *
+ * Iterate through the MPCC array from a given MPC context struct
+ * and configure each MPCC according to its registers' values.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context to initialize MPCC array.
+ * - [in/out] tree - MPC tree structure containing MPCC contexts to initialize.
+ *
+ * Return:
+ *
+ * void
+ */
void (*init_mpcc_list_from_hw)(
struct mpc *mpc,
struct mpc_tree *tree);
+ /**
+ * @set_denorm:
+ *
+ * Set corresponding OPP DENORM_CONTROL register value to specific denorm_mode
+ * based on given color depth.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] opp_id - Corresponding OPP to update register.
+ * - [in] output_depth - Arbitrary color depth to set denorm_mode.
+ *
+ * Return:
+ *
+ * void
+ */
void (*set_denorm)(struct mpc *mpc,
int opp_id,
enum dc_color_depth output_depth);
+ /**
+ * @set_denorm_clamp:
+ *
+ * Set denorm clamp values on corresponding OPP DENORM CONTROL register.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] opp_id - Corresponding OPP to update register.
+ * - [in] denorm_clamp - Arbitrary denorm clamp to be set.
+ *
+ * Return:
+ *
+ * void
+ */
void (*set_denorm_clamp)(
struct mpc *mpc,
int opp_id,
struct mpc_denorm_clamp denorm_clamp);
+ /**
+ * @set_output_csc:
+ *
+ * Set the Output Color Space Conversion matrix
+ * with given values and mode.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] opp_id - Corresponding OPP to update register.
+ * - [in] regval - Values to set in CSC matrix.
+ * - [in] ocsc_mode - Mode to set CSC.
+ *
+ * Return:
+ *
+ * void
+ */
void (*set_output_csc)(struct mpc *mpc,
int opp_id,
const uint16_t *regval,
enum mpc_output_csc_mode ocsc_mode);
+ /**
+ * @set_ocsc_default:
+ *
+ * Set the Output Color Space Conversion matrix
+ * to default values according to color space.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] opp_id - Corresponding OPP to update register.
+ * - [in] color_space - OCSC color space.
+ * - [in] ocsc_mode - Mode to set CSC.
+ *
+ * Return:
+ *
+ * void
+ *
+ */
void (*set_ocsc_default)(struct mpc *mpc,
int opp_id,
enum dc_color_space color_space,
enum mpc_output_csc_mode ocsc_mode);
+ /**
+ * @set_output_gamma:
+ *
+ * Set Output Gamma with given curve parameters.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] mpcc_id - Corresponding MPC to update registers.
+ * - [in] params - Parameters.
+ *
+ * Return:
+ *
+ * void
+ *
+ */
void (*set_output_gamma)(
struct mpc *mpc,
int mpcc_id,
const struct pwl_params *params);
+ /**
+ * @power_on_mpc_mem_pwr:
+ *
+ * Power on/off memory LUT for given MPCC.
+ * Powering on enables LUT to be updated.
+ * Powering off allows entering low power mode.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] mpcc_id - MPCC to power on.
+ * - [in] power_on
+ *
+ * Return:
+ *
+ * void
+ */
void (*power_on_mpc_mem_pwr)(
struct mpc *mpc,
int mpcc_id,
bool power_on);
+ /**
+ * @set_dwb_mux:
+ *
+ * Set corresponding Display Writeback mux
+ * MPC register field to given MPCC id.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] dwb_id - DWB to be set.
+ * - [in] mpcc_id - MPCC id to be stored in DWB mux register.
+ *
+ * Return:
+ *
+ * void
+ */
void (*set_dwb_mux)(
struct mpc *mpc,
int dwb_id,
int mpcc_id);
+ /**
+ * @disable_dwb_mux:
+ *
+ * Reset corresponding Display Writeback mux
+ * MPC register field.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] dwb_id - DWB to be set.
+ *
+ * Return:
+ *
+ * void
+ */
void (*disable_dwb_mux)(
struct mpc *mpc,
int dwb_id);
+ /**
+ * @is_dwb_idle:
+ *
+ * Check DWB status on MPC_DWB0_MUX_STATUS register field.
+ * Return if it is null.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] dwb_id - DWB to be checked.
+ *
+ * Return:
+ *
+ * bool - wheter DWB is idle or not
+ */
bool (*is_dwb_idle)(
struct mpc *mpc,
int dwb_id);
+ /**
+ * @set_out_rate_control:
+ *
+ * Set display output rate control.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] opp_id - OPP to be set.
+ * - [in] enable
+ * - [in] rate_2x_mode
+ * - [in] flow_control
+ *
+ * Return:
+ *
+ * void
+ */
void (*set_out_rate_control)(
struct mpc *mpc,
int opp_id,
@@ -404,38 +829,286 @@ struct mpc_funcs {
bool rate_2x_mode,
struct mpc_dwb_flow_control *flow_control);
+ /**
+ * @set_gamut_remap:
+ *
+ * Set post-blending CTM for given MPCC.
+ *
+ * Parameters:
+ * - [in] mpc - MPC context.
+ * - [in] mpcc_id - MPCC to set gamut map.
+ * - [in] adjust
+ *
+ * Return:
+ *
+ * void
+ */
void (*set_gamut_remap)(
struct mpc *mpc,
int mpcc_id,
const struct mpc_grph_gamut_adjustment *adjust);
+ /**
+ * @program_1dlut:
+ *
+ * Set 1 dimensional Lookup Table.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context
+ * - [in] params - curve parameters for the LUT configuration
+ * - [in] rmu_idx
+ *
+ * bool - wheter LUT was set (set with given parameters) or not (params is NULL and LUT is disabled).
+ */
bool (*program_1dlut)(
struct mpc *mpc,
const struct pwl_params *params,
uint32_t rmu_idx);
+ /**
+ * @program_shaper:
+ *
+ * Set shaper.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context
+ * - [in] params - curve parameters to be set
+ * - [in] rmu_idx
+ *
+ * Return:
+ *
+ * bool - wheter shaper was set (set with given parameters) or not (params is NULL and LUT is disabled).
+ */
bool (*program_shaper)(
struct mpc *mpc,
const struct pwl_params *params,
uint32_t rmu_idx);
+ /**
+ * @acquire_rmu:
+ *
+ * Set given MPCC to be multiplexed to given RMU unit.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context
+ * - [in] mpcc_id - MPCC
+ * - [in] rmu_idx - Given RMU unit to set MPCC to be multiplexed to.
+ *
+ * Return:
+ *
+ * unit32_t - rmu_idx if operation was successful, -1 else.
+ */
uint32_t (*acquire_rmu)(struct mpc *mpc, int mpcc_id, int rmu_idx);
+ /**
+ * @program_3dlut:
+ *
+ * Set 3 dimensional Lookup Table.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context
+ * - [in] params - tetrahedral parameters for the LUT configuration
+ * - [in] rmu_idx
+ *
+ * bool - wheter LUT was set (set with given parameters) or not (params is NULL and LUT is disabled).
+ */
bool (*program_3dlut)(
struct mpc *mpc,
const struct tetrahedral_params *params,
int rmu_idx);
+ /**
+ * @release_rmu:
+ *
+ * For a given MPCC, release the RMU unit it muliplexes to.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context
+ * - [in] mpcc_id - MPCC
+ *
+ * Return:
+ *
+ * int - a valid rmu_idx representing released RMU unit or -1 if there was no RMU unit to release.
+ */
int (*release_rmu)(struct mpc *mpc, int mpcc_id);
+ /**
+ * @get_mpc_out_mux:
+ *
+ * Return MPC out mux.
+ *
+ * Parameters:
+ * - [in] mpc - MPC context.
+ * - [in] opp_id - OPP
+ *
+ * Return:
+ *
+ * unsigned int - Out Mux
+ */
unsigned int (*get_mpc_out_mux)(
- struct mpc *mpc,
- int opp_id);
+ struct mpc *mpc,
+ int opp_id);
+ /**
+ * @set_bg_color:
+ *
+ * Find corresponding bottommost MPCC and
+ * set its bg color.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] bg_color - background color to be set.
+ * - [in] mpcc_id
+ *
+ * Return:
+ *
+ * void
+ */
void (*set_bg_color)(struct mpc *mpc,
struct tg_color *bg_color,
int mpcc_id);
+
+ /**
+ * @set_mpc_mem_lp_mode:
+ *
+ * Set mpc_mem_lp_mode.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ *
+ * Return:
+ *
+ * void
+ */
+
void (*set_mpc_mem_lp_mode)(struct mpc *mpc);
+ /**
+ * @set_movable_cm_location:
+ *
+ * Set Movable CM Location.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] location
+ * - [in] mpcc_id
+ *
+ * Return:
+ *
+ * void
+ */
+
+ void (*set_movable_cm_location)(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id);
+ /**
+ * @update_3dlut_fast_load_select:
+ *
+ * Update 3D LUT fast load select.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] mpcc_id
+ * - [in] hubp_idx
+ *
+ * Return:
+ *
+ * void
+ */
+
+ void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx);
+
+ /**
+ * @populate_lut:
+ *
+ * Populate LUT with given tetrahedral parameters.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] id
+ * - [in] params
+ * - [in] lut_bank_a
+ * - [in] mpcc_id
+ *
+ * Return:
+ *
+ * void
+ */
+ void (*populate_lut)(struct mpc *mpc, const enum MCM_LUT_ID id, const union mcm_lut_params params,
+ bool lut_bank_a, int mpcc_id);
+
+ /**
+ * @program_lut_read_write_control:
+ *
+ * Program LUT RW control.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] id
+ * - [in] lut_bank_a
+ * - [in] mpcc_id
+ *
+ * Return:
+ *
+ * void
+ */
+ void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id, bool lut_bank_a, int mpcc_id);
+
+ /**
+ * @program_lut_mode:
+ *
+ * Program LUT mode.
+ *
+ * Parameters:
+ * - [in/out] mpc - MPC context.
+ * - [in] id
+ * - [in] xable
+ * - [in] lut_bank_a
+ * - [in] mpcc_id
+ *
+ * Return:
+ *
+ * void
+ */
+ void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_ID id, const enum MCM_LUT_XABLE xable,
+ bool lut_bank_a, int mpcc_id);
+
+ /**
+ * @mcm:
+ *
+ * MPC MCM new HW sequential programming functions
+ */
+ struct {
+ void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id);
+ void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id);
+ void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id);
+ bool (*is_config_supported)(uint32_t width);
+ void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id,
+ bool lut_bank_a, bool enabled, int mpcc_id);
+
+ void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params,
+ bool lut_bank_a, int mpcc_id);
+ } mcm;
+
+ /**
+ * @rmcm:
+ *
+ * MPC RMCM new HW sequential programming functions
+ */
+ struct {
+ void (*fl_3dlut_configure)(struct mpc *mpc, struct mpc_fl_3dlut_config *cfg, int mpcc_id);
+ void (*enable_3dlut_fl)(struct mpc *mpc, bool enable, int mpcc_id);
+ void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx);
+ void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id,
+ bool lut_bank_a, bool enabled, int mpcc_id);
+ void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_XABLE xable,
+ bool lut_bank_a, int mpcc_id);
+ void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id);
+ void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id);
+ void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id);
+ bool (*is_config_supported)(uint32_t width);
+
+ void (*power_on_shaper_3dlut)(struct mpc *mpc, uint32_t mpcc_id, bool power_on);
+ void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params,
+ bool lut_bank_a, int mpcc_id);
+ } rmcm;
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
index 7617fabbd16e..747679cb4944 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
@@ -23,6 +23,22 @@
*
*/
+/**
+ * DOC: overview
+ *
+ * The Output Plane Processor (OPP) block groups have functions that format
+ * pixel streams such that they are suitable for display at the display device.
+ * The key functions contained in the OPP are:
+ *
+ * - Adaptive Backlight Modulation (ABM)
+ * - Formatter (FMT) which provide pixel-by-pixel operations for format the
+ * incoming pixel stream.
+ * - Output Buffer that provide pixel replication, and overlapping.
+ * - Interface between MPC and OPTC.
+ * - Clock and reset generation.
+ * - CRC generation.
+ */
+
#ifndef __DAL_OPP_H__
#define __DAL_OPP_H__
@@ -189,9 +205,24 @@ struct gamma_coefficients {
struct fixed31_32 user_brightness;
};
+/**
+ * struct pwl_float_data - Fixed point RGB color
+ */
struct pwl_float_data {
+ /**
+ * @r: Component Red.
+ */
struct fixed31_32 r;
+
+ /**
+ * @g: Component Green.
+ */
+
struct fixed31_32 g;
+
+ /**
+ * @b: Component Blue.
+ */
struct fixed31_32 b;
};
@@ -321,14 +352,22 @@ struct opp_funcs {
bool (*dpg_is_blanked)(
struct output_pixel_processor *opp);
+ bool (*dpg_is_pending)(struct output_pixel_processor *opp);
+
+
void (*opp_dpg_set_blank_color)(
struct output_pixel_processor *opp,
const struct tg_color *color);
void (*opp_program_left_edge_extra_pixel)(
struct output_pixel_processor *opp,
- bool count);
+ enum dc_pixel_encoding pixel_encoding,
+ bool is_primary);
+ uint32_t (*opp_get_left_edge_extra_pixel_count)(
+ struct output_pixel_processor *opp,
+ enum dc_pixel_encoding pixel_encoding,
+ bool is_primary);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
new file mode 100644
index 000000000000..0d5a8358a778
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+/**
+ * DOC: overview
+ *
+ * Output Pipe Timing Combiner (OPTC) includes two major functional blocks:
+ * Output Data Mapper (ODM) and Output Timing Generator (OTG).
+ *
+ * - ODM: It is Output Data Mapping block. It can combine input data from
+ * multiple OPP data pipes into one single data stream or split data from one
+ * OPP data pipe into multiple data streams or just bypass OPP data to DIO.
+ * - OTG: It is Output Timing Generator. It generates display timing signals to
+ * drive the display output.
+ */
+
+#ifndef __DC_OPTC_H__
+#define __DC_OPTC_H__
+
+#include "timing_generator.h"
+
+struct optc {
+ struct timing_generator base;
+
+ const struct dcn_optc_registers *tg_regs;
+ const struct dcn_optc_shift *tg_shift;
+ const struct dcn_optc_mask *tg_mask;
+
+ int opp_count;
+
+ uint32_t max_h_total;
+ uint32_t max_v_total;
+
+ uint32_t min_h_blank;
+
+ uint32_t min_h_sync_width;
+ uint32_t min_v_sync_width;
+ uint32_t min_v_blank;
+ uint32_t min_v_blank_interlace;
+
+ int vstartup_start;
+ int vupdate_offset;
+ int vupdate_width;
+ int vready_offset;
+ int pstate_keepout;
+ struct dc_crtc_timing orginal_patched_timing;
+ enum signal_type signal;
+ uint32_t max_frame_count;
+};
+
+void optc1_read_otg_state(struct timing_generator *optc, struct dcn_otg_state *s);
+
+bool optc1_get_hw_timing(struct timing_generator *tg, struct dc_crtc_timing *hw_crtc_timing);
+
+bool optc1_validate_timing(struct timing_generator *optc,
+ const struct dc_crtc_timing *timing);
+
+void optc1_program_timing(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ int pstate_keepout,
+ const enum signal_type signal,
+ bool use_vbios);
+
+void optc1_setup_vertical_interrupt0(struct timing_generator *optc,
+ uint32_t start_line,
+ uint32_t end_line);
+
+void optc1_setup_vertical_interrupt1(struct timing_generator *optc,
+ uint32_t start_line);
+
+void optc1_setup_vertical_interrupt2(struct timing_generator *optc,
+ uint32_t start_line);
+
+void optc1_program_global_sync(struct timing_generator *optc,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ int pstate_keepout);
+
+bool optc1_disable_crtc(struct timing_generator *optc);
+
+bool optc1_is_counter_moving(struct timing_generator *optc);
+
+void optc1_get_position(struct timing_generator *optc,
+ struct crtc_position *position);
+
+uint32_t optc1_get_vblank_counter(struct timing_generator *optc);
+
+void optc1_get_crtc_scanoutpos(struct timing_generator *optc,
+ uint32_t *v_blank_start,
+ uint32_t *v_blank_end,
+ uint32_t *h_position,
+ uint32_t *v_position);
+
+void optc1_set_early_control(struct timing_generator *optc,
+ uint32_t early_cntl);
+
+void optc1_wait_for_state(struct timing_generator *optc,
+ enum crtc_state state);
+
+void optc1_set_blank(struct timing_generator *optc,
+ bool enable_blanking);
+
+bool optc1_is_blanked(struct timing_generator *optc);
+
+void optc1_program_blank_color(struct timing_generator *optc,
+ const struct tg_color *black_color);
+
+bool optc1_did_triggered_reset_occur(struct timing_generator *optc);
+
+void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst);
+
+void optc1_disable_reset_trigger(struct timing_generator *optc);
+
+void optc1_lock(struct timing_generator *optc);
+
+void optc1_unlock(struct timing_generator *optc);
+
+void optc1_enable_optc_clock(struct timing_generator *optc, bool enable);
+
+void optc1_set_drr(struct timing_generator *optc,
+ const struct drr_params *params);
+
+void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
+
+void optc1_set_static_screen_control(struct timing_generator *optc,
+ uint32_t event_triggers,
+ uint32_t num_frames);
+
+void optc1_program_stereo(struct timing_generator *optc,
+ const struct dc_crtc_timing *timing,
+ struct crtc_stereo_flags *flags);
+
+bool optc1_is_stereo_left_eye(struct timing_generator *optc);
+
+void optc1_clear_optc_underflow(struct timing_generator *optc);
+
+void optc1_tg_init(struct timing_generator *optc);
+
+bool optc1_is_tg_enabled(struct timing_generator *optc);
+
+bool optc1_is_optc_underflow_occurred(struct timing_generator *optc);
+
+void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable);
+
+void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable);
+
+bool optc1_get_otg_active_size(struct timing_generator *optc,
+ uint32_t *otg_active_width,
+ uint32_t *otg_active_height);
+
+void optc1_enable_crtc_reset(struct timing_generator *optc,
+ int source_tg_inst,
+ struct crtc_trigger_info *crtc_tp);
+
+bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params);
+
+bool optc1_get_crc(struct timing_generator *optc, uint8_t idx,
+ uint32_t *r_cr,
+ uint32_t *g_y,
+ uint32_t *b_cb);
+
+void optc1_set_vtg_params(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing,
+ bool program_fp2);
+
+bool optc1_is_two_pixels_per_container(const struct dc_crtc_timing *timing);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
index 24af9d80b937..e97d964a1791 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h
@@ -40,6 +40,7 @@ struct panel_cntl_backlight_registers {
unsigned int BL_PWM_PERIOD_CNTL;
unsigned int LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV;
unsigned int PANEL_PWRSEQ_REF_DIV2;
+ unsigned int USER_LEVEL;
};
struct panel_cntl_funcs {
@@ -56,12 +57,14 @@ struct panel_cntl_funcs {
struct panel_cntl_init_data {
struct dc_context *ctx;
uint32_t inst;
+ uint32_t eng_id;
};
struct panel_cntl {
const struct panel_cntl_funcs *funcs;
struct dc_context *ctx;
uint32_t inst;
+ uint32_t pwrseq_inst;
/* registers setting needs to be saved and restored at InitBacklight */
struct panel_cntl_backlight_registers stored_backlight_registers;
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
new file mode 100644
index 000000000000..227e3f8d7e5f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
@@ -0,0 +1,55 @@
+/* Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_PG_CNTL_H__
+#define __DC_PG_CNTL_H__
+
+#include "dc.h"
+#include "dc_types.h"
+#include "hw_shared.h"
+
+struct pg_cntl {
+ struct dc_context *ctx;
+ const struct pg_cntl_funcs *funcs;
+ bool pg_pipe_res_enable[PG_HW_PIPE_RESOURCES_NUM_ELEMENT][MAX_PIPES];
+ bool pg_res_enable[PG_HW_RESOURCES_NUM_ELEMENT];
+};
+
+struct pg_cntl_funcs {
+ void (*dsc_pg_control)(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bool power_on);
+ void (*hubp_dpp_pg_control)(struct pg_cntl *pg_cntl, unsigned int hubp_dpp_inst, bool power_on);
+ void (*hpo_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
+ void (*io_clk_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
+ void (*plane_otg_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
+ void (*mpcc_pg_control)(struct pg_cntl *pg_cntl, unsigned int mpcc_inst, bool power_on);
+ void (*opp_pg_control)(struct pg_cntl *pg_cntl, unsigned int opp_inst, bool power_on);
+ void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on);
+ void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
+ void (*mem_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
+ void (*dio_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
+ void (*init_pg_status)(struct pg_cntl *pg_cntl);
+ void (*print_pg_status)(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log);
+};
+
+#endif //__DC_PG_CNTL_H__
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index a6dedf3c7d74..27f950ae45ee 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -99,6 +99,7 @@ struct encoder_unblank_param {
struct dc_link_settings link_settings;
struct dc_crtc_timing timing;
int opp_cnt;
+ uint32_t pix_per_cycle;
};
struct encoder_set_dp_phy_pattern_param {
@@ -116,6 +117,7 @@ struct stream_encoder {
uint32_t stream_enc_inst;
struct vpg *vpg;
struct afmt *afmt;
+ struct apg *apg;
};
struct enc_state {
@@ -178,10 +180,6 @@ struct stream_encoder_funcs {
void (*stop_dp_info_packets)(
struct stream_encoder *enc);
- void (*reset_fifo)(
- struct stream_encoder *enc
- );
-
void (*dp_blank)(
struct dc_link *link,
struct stream_encoder *enc);
@@ -226,6 +224,11 @@ struct stream_encoder_funcs {
struct stream_encoder *enc,
int tg_inst);
+ void (*enable_stream)(
+ struct stream_encoder *enc,
+ enum signal_type signal,
+ bool enable);
+
void (*hdmi_reset_stream_attribute)(
struct stream_encoder *enc);
@@ -269,7 +272,9 @@ struct stream_encoder_funcs {
struct stream_encoder *enc, unsigned int pix_per_container);
void (*enable_fifo)(struct stream_encoder *enc);
void (*disable_fifo)(struct stream_encoder *enc);
+ bool (*is_fifo_enabled)(struct stream_encoder *enc);
void (*map_stream_to_link)(struct stream_encoder *enc, uint32_t stream_enc_inst, uint32_t link_enc_inst);
+ uint32_t (*get_pixels_per_cycle)(struct stream_encoder *enc);
};
struct hpo_dp_stream_encoder_state {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index c21e7ffd5bd0..f2de2cf23859 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -64,6 +64,12 @@ struct drr_params {
bool immediate_flip;
};
+struct long_vtotal_params {
+ uint32_t vertical_total_min;
+ uint32_t vertical_total_max;
+ uint32_t vertical_blank_start;
+};
+
#define LEFT_EYE_3D_PRIMARY_SURFACE 1
#define RIGHT_EYE_3D_PRIMARY_SURFACE 0
@@ -135,6 +141,38 @@ struct crc_params {
bool continuous_mode;
bool enable;
+
+ uint8_t crc_eng_inst;
+ bool reset;
+};
+
+struct dcn_otg_state {
+ uint32_t v_blank_start;
+ uint32_t v_blank_end;
+ uint32_t v_sync_a_pol;
+ uint32_t v_total;
+ uint32_t v_total_max;
+ uint32_t v_total_min;
+ uint32_t v_total_min_sel;
+ uint32_t v_total_max_sel;
+ uint32_t v_sync_a_start;
+ uint32_t v_sync_a_end;
+ uint32_t h_blank_start;
+ uint32_t h_blank_end;
+ uint32_t h_sync_a_start;
+ uint32_t h_sync_a_end;
+ uint32_t h_sync_a_pol;
+ uint32_t h_total;
+ uint32_t underflow_occurred_status;
+ uint32_t otg_enabled;
+ uint32_t blank_enabled;
+ uint32_t vertical_interrupt1_en;
+ uint32_t vertical_interrupt1_line;
+ uint32_t vertical_interrupt2_en;
+ uint32_t vertical_interrupt2_line;
+ uint32_t vertical_interrupt2_dest;
+ uint32_t otg_master_update_lock;
+ uint32_t otg_double_buffer_control;
};
/**
@@ -166,6 +204,7 @@ struct timing_generator_funcs {
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios
);
@@ -182,9 +221,7 @@ struct timing_generator_funcs {
bool (*enable_crtc)(struct timing_generator *tg);
bool (*disable_crtc)(struct timing_generator *tg);
-#ifdef CONFIG_DRM_AMD_DC_FP
void (*phantom_crtc_post_enable)(struct timing_generator *tg);
-#endif
void (*disable_phantom_crtc)(struct timing_generator *tg);
bool (*immediate_disable_crtc)(struct timing_generator *tg);
bool (*is_counter_moving)(struct timing_generator *tg);
@@ -252,7 +289,8 @@ struct timing_generator_funcs {
int vready_offset,
int vstartup_start,
int vupdate_offset,
- int vupdate_width);
+ int vupdate_width,
+ int pstate_keepout);
void (*enable_optc_clock)(struct timing_generator *tg, bool enable);
void (*program_stereo)(struct timing_generator *tg,
const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
@@ -272,6 +310,7 @@ struct timing_generator_funcs {
uint32_t *num_of_input_segments,
uint32_t *seg0_src_sel,
uint32_t *seg1_src_sel);
+ bool (*is_two_pixels_per_container)(const struct dc_crtc_timing *timing);
/**
* Configure CRCs for the given timing generator. Return false if TG is
@@ -284,7 +323,7 @@ struct timing_generator_funcs {
* @get_crc: Get CRCs for the given timing generator. Return false if
* CRCs are not enabled (via configure_crc).
*/
- bool (*get_crc)(struct timing_generator *tg,
+ bool (*get_crc)(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
void (*program_manual_trigger)(struct timing_generator *optc);
@@ -308,7 +347,8 @@ struct timing_generator_funcs {
* OPP(s) and turn on/off ODM memory.
*/
void (*set_odm_combine)(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing);
+ int segment_width, int last_segment_width);
+ void (*get_odm_combine_segments)(struct timing_generator *tg, int *odm_segments);
void (*set_h_timing_div_manual_mode)(struct timing_generator *optc, bool manual_mode);
void (*set_gsl)(struct timing_generator *optc, const struct gsl_params *params);
void (*set_gsl_source_select)(struct timing_generator *optc,
@@ -332,6 +372,15 @@ struct timing_generator_funcs {
void (*init_odm)(struct timing_generator *tg);
void (*wait_drr_doublebuffer_pending_clear)(struct timing_generator *tg);
+ void (*set_long_vtotal)(struct timing_generator *optc, const struct long_vtotal_params *params);
+ void (*wait_odm_doublebuffer_pending_clear)(struct timing_generator *tg);
+ void (*wait_otg_disable)(struct timing_generator *optc);
+ bool (*get_optc_double_buffer_pending)(struct timing_generator *tg);
+ bool (*get_otg_double_buffer_pending)(struct timing_generator *tg);
+ bool (*get_pipe_update_pending)(struct timing_generator *tg);
+ void (*set_vupdate_keepout)(struct timing_generator *tg, bool enable);
+ bool (*wait_update_lock_status)(struct timing_generator *tg, bool locked);
+ void (*read_otg_state)(struct timing_generator *tg, struct dcn_otg_state *s);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
index 9ac9d5e8df8b..5a1d9b708a9d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
@@ -29,6 +29,7 @@
#include "hw_shared.h"
#include "dc_hw_types.h"
#include "fixed31_32.h"
+#include "sspl/dc_spl_types.h"
#define CSC_TEMPERATURE_MATRIX_SIZE 12
@@ -110,22 +111,6 @@ enum graphics_gamut_adjust_type {
GRAPHICS_GAMUT_ADJUST_TYPE_SW /* use adjustments */
};
-enum lb_memory_config {
- /* Enable all 3 pieces of memory */
- LB_MEMORY_CONFIG_0 = 0,
-
- /* Enable only the first piece of memory */
- LB_MEMORY_CONFIG_1 = 1,
-
- /* Enable only the second piece of memory */
- LB_MEMORY_CONFIG_2 = 2,
-
- /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the
- * last piece of chroma memory used for the luma storage
- */
- LB_MEMORY_CONFIG_3 = 3
-};
-
struct xfm_grph_csc_adjustment {
struct fixed31_32 temperature_matrix[CSC_TEMPERATURE_MATRIX_SIZE];
enum graphics_gamut_adjust_type gamut_adjust_type;
@@ -177,6 +162,8 @@ struct scaler_data {
struct sharpness_adj sharpness;
enum pixel_format format;
struct line_buffer_params lb_params;
+ // Below struct holds the scaler values to program hw registers
+ struct dscl_prog_data dscl_prog_data;
};
struct transform_funcs {
@@ -258,7 +245,6 @@ struct transform_funcs {
void (*set_cursor_attributes)(
struct transform *xfm_base,
const struct dc_cursor_attributes *attr);
-
};
const uint16_t *get_filter_2tap_16p(void);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/vpg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/vpg.h
new file mode 100644
index 000000000000..51da368f5c3e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/vpg.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ */
+
+#ifndef __DC_VPG_H__
+#define __DC_VPG_H__
+
+struct dc_context;
+struct dc_info_packet;
+
+struct vpg;
+
+struct vpg_funcs {
+ void (*update_generic_info_packet)(
+ struct vpg *vpg,
+ uint32_t packet_index,
+ const struct dc_info_packet *info_packet,
+ bool immediate_update);
+
+ void (*vpg_poweron)(
+ struct vpg *vpg);
+
+ void (*vpg_powerdown)(
+ struct vpg *vpg);
+};
+
+struct vpg {
+ const struct vpg_funcs *funcs;
+ struct dc_context *ctx;
+ int inst;
+};
+
+#endif /* DC_INC_VPG_H_ */ \ No newline at end of file
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h
index dc650be3837e..f1afb31ac70b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h
@@ -96,11 +96,6 @@ struct link_encoder *link_enc_cfg_get_link_enc_used_by_link(
/* Return next available DIG link encoder. NULL if none available. */
struct link_encoder *link_enc_cfg_get_next_avail_link_enc(struct dc *dc);
-/* Return DIG link encoder used by stream. NULL if unused. */
-struct link_encoder *link_enc_cfg_get_link_enc_used_by_stream(
- struct dc *dc,
- const struct dc_stream_state *stream);
-
/* Return DIG link encoder. NULL if unused. */
struct link_encoder *link_enc_cfg_get_link_enc(const struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link_service.h
index e3e8c76c17cf..1e34e84160aa 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link_service.h
@@ -42,8 +42,8 @@
* dc_link_exports.c or other dc files implement dc.h
*
* DC to Link:
- * dc_link_exports.c or other dc files include link.h
- * link_factory.c implements link.h
+ * dc_link_exports.c or other dc files include link_service.h
+ * link_factory.c implements link_service.h
*
* Link sub-component to Link sub-component:
* link_factory.c includes --> link_xxx.h
@@ -73,7 +73,7 @@
* 2. Implement your function in the suitable link_xxx.c file.
* 3. Assign the function to link_service in link_factory.c
* 4. NEVER include link_xxx.h headers outside link component.
- * 5. NEVER include link.h on DM side.
+ * 5. NEVER include link_service.h on DM side.
*/
#include "core_types.h"
@@ -144,9 +144,13 @@ struct link_service {
uint32_t (*dp_link_bandwidth_kbps)(
const struct dc_link *link,
const struct dc_link_settings *link_settings);
- bool (*validate_dpia_bandwidth)(
- const struct dc_stream_state *stream,
- const unsigned int num_streams);
+ enum dc_status (*validate_dp_tunnel_bandwidth)(
+ const struct dc *dc,
+ const struct dc_state *new_ctx);
+
+ uint32_t (*dp_required_hblank_size_bytes)(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params);
/*************************** DPMS *************************************/
@@ -203,6 +207,9 @@ struct link_service {
bool (*dp_decide_link_settings)(
struct dc_stream_state *stream,
struct dc_link_settings *link_setting);
+ void (*dp_decide_tunnel_settings)(
+ struct dc_stream_state *stream,
+ struct dc_tunnel_settings *dp_tunnel_setting);
enum dp_link_encoding (*mst_decide_link_encoding_format)(
const struct dc_link *link);
bool (*edp_decide_link_settings)(struct dc_link *link,
@@ -211,13 +218,14 @@ struct link_service {
bool (*dp_overwrite_extended_receiver_cap)(struct dc_link *link);
enum lttpr_mode (*dp_decide_lttpr_mode)(struct dc_link *link,
struct dc_link_settings *link_setting);
-
+ uint8_t (*dp_get_lttpr_count)(struct dc_link *link);
+ void (*edp_get_alpm_support)(struct dc_link *link,
+ bool *auxless_support,
+ bool *auxwake_support);
/*************************** DP DPIA/PHY ******************************/
- int (*dpia_handle_usb4_bandwidth_allocation_for_link)(
+ void (*dpia_handle_usb4_bandwidth_allocation_for_link)(
struct dc_link *link, int peak_bw);
- void (*dpia_handle_bw_alloc_response)(
- struct dc_link *link, uint8_t bw, uint8_t result);
void (*dp_set_drive_settings)(
struct dc_link *link,
const struct link_resource *link_res,
@@ -248,8 +256,7 @@ struct link_service {
uint32_t *backlight_millinits_avg,
uint32_t *backlight_millinits_peak);
bool (*edp_set_backlight_level)(const struct dc_link *link,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
+ struct set_backlight_level_params *backlight_level_params);
bool (*edp_set_backlight_level_nits)(struct dc_link *link,
bool isHDR,
uint32_t backlight_millinits,
@@ -272,7 +279,7 @@ struct link_service {
uint16_t psr_vtotal_idle,
uint16_t psr_vtotal_su);
void (*edp_get_psr_residency)(
- const struct dc_link *link, uint32_t *residency);
+ const struct dc_link *link, uint32_t *residency, enum psr_residency_mode mode);
bool (*edp_get_replay_state)(
const struct dc_link *link, uint64_t *state);
@@ -281,11 +288,16 @@ struct link_service {
const unsigned int *power_opts);
bool (*edp_setup_replay)(struct dc_link *link,
const struct dc_stream_state *stream);
+ bool (*edp_send_replay_cmd)(struct dc_link *link,
+ enum replay_FW_Message_type msg,
+ union dmub_replay_cmd_set *cmd_data);
bool (*edp_set_coasting_vtotal)(
- struct dc_link *link, uint16_t coasting_vtotal);
+ struct dc_link *link, uint32_t coasting_vtotal);
bool (*edp_replay_residency)(const struct dc_link *link,
unsigned int *residency, const bool is_start,
- const bool is_alpm);
+ const enum pr_residency_mode mode);
+ bool (*edp_set_replay_power_opt_and_coasting_vtotal)(struct dc_link *link,
+ const unsigned int *power_opts, uint32_t coasting_vtotal);
bool (*edp_wait_for_t12)(struct dc_link *link);
bool (*edp_is_ilr_optimization_required)(struct dc_link *link,
@@ -295,6 +307,7 @@ struct link_service {
bool (*edp_receiver_ready_T9)(struct dc_link *link);
bool (*edp_receiver_ready_T7)(struct dc_link *link);
bool (*edp_power_alpm_dpcd_enable)(struct dc_link *link, bool enable);
+ void (*edp_set_panel_power)(struct dc_link *link, bool powerOn);
/*************************** DP CTS ************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
index a402df225a76..26cb1459b743 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
@@ -508,6 +508,10 @@ uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx,
initial_val, \
n, __VA_ARGS__)
+#define IX_REG_SET_SYNC(index, init_value, f1, v1) \
+ IX_REG_SET_N_SYNC(index, 1, init_value, \
+ FN(reg, f1), v1)
+
#define IX_REG_SET_2_SYNC(index, init_value, f1, v1, f2, v2) \
IX_REG_SET_N_SYNC(index, 2, init_value, \
FN(reg, f1), v1,\
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index e546b9c506c1..4e26a16a8743 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -32,6 +32,7 @@
#define MEMORY_TYPE_MULTIPLIER_CZ 4
#define MEMORY_TYPE_HBM 2
+#define MAX_MCACHES 8
#define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0)
@@ -44,6 +45,7 @@ enum dce_version resource_parse_asic_id(
struct resource_caps {
int num_timing_generator;
int num_opp;
+ int num_dpp;
int num_video_plane;
int num_audio;
int num_stream_encoder;
@@ -65,6 +67,13 @@ struct resource_straps {
uint32_t audio_stream_number;
};
+struct dc_mcache_allocations {
+ int global_mcache_ids_plane0[MAX_MCACHES + 1];
+ int global_mcache_ids_plane1[MAX_MCACHES + 1];
+ int global_mcache_ids_mall_plane0[MAX_MCACHES + 1];
+ int global_mcache_ids_mall_plane1[MAX_MCACHES + 1];
+};
+
struct resource_create_funcs {
void (*read_dce_straps)(
struct dc_context *ctx, struct resource_straps *straps);
@@ -77,11 +86,9 @@ struct resource_create_funcs {
struct hpo_dp_stream_encoder *(*create_hpo_dp_stream_encoder)(
enum engine_id eng_id, struct dc_context *ctx);
-
struct hpo_dp_link_encoder *(*create_hpo_dp_link_encoder)(
uint8_t inst,
struct dc_context *ctx);
-
struct dce_hwseq *(*create_hwseq)(
struct dc_context *ctx);
};
@@ -103,6 +110,10 @@ enum dc_status resource_map_pool_resources(
struct dc_state *context,
struct dc_stream_state *stream);
+void resource_build_test_pattern_params(
+ struct resource_context *res_ctx,
+ struct pipe_ctx *pipe_ctx);
+
bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx);
enum dc_status resource_build_scaling_params_for_context(
@@ -149,6 +160,8 @@ bool resource_attach_surfaces_to_context(
struct dc_state *context,
const struct resource_pool *pool);
+bool resource_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx);
+
#define FREE_PIPE_INDEX_NOT_FOUND -1
/*
@@ -213,6 +226,21 @@ bool resource_attach_surfaces_to_context(
* | | | | |
* | 5 | (FREE) | | |
* |________|_______________|___________|_____________|
+ *
+ * The following is a quick reference of the class relation:
+ *
+ * DC state ---1--------0..N--- streams
+ *
+ * stream ---1-----------1--- OTG Master pipe
+ *
+ * OTG Master pipe ---1--------1..N--- OPP Head pipes
+ *
+ * OPP Head pipe ---1--------0..N--- DPP pipes
+ *
+ * stream ---1--------0..N--- Planes
+ *
+ * Plane ---1--------1..N--- DPP pipes
+ *
*/
enum pipe_type {
/* free pipe - free pipe is an uninitialized pipe without a stream
@@ -223,8 +251,8 @@ enum pipe_type {
/* OTG master pipe - the master pipe of its OPP head pipes with a
* functional OTG. It merges all its OPP head pipes pixel data in ODM
- * block and output to backend DIG. OTG master pipe is responsible for
- * generating entire crtc timing to backend DIG. An OTG master pipe may
+ * block and output to back end DIG. OTG master pipe is responsible for
+ * generating entire CRTC timing to back end DIG. An OTG master pipe may
* or may not have a plane. If it has a plane it blends it as the left
* most MPC slice of the top most layer. If it doesn't have a plane it
* can output pixel data from its OPP head pipes' test pattern
@@ -252,33 +280,216 @@ enum pipe_type {
};
/*
- * Determine if the input pipe ctx is of a pipe type.
- * return - true if pipe ctx is of the input type.
+ * Determine if the input pipe_ctx is of a pipe type.
+ * return - true if pipe_ctx is of the input type.
*/
bool resource_is_pipe_type(const struct pipe_ctx *pipe_ctx, enum pipe_type type);
/*
- * Determine if the input pipe ctx is used for rendering a plane with MPCC
- * combine. MPCC combine is a hardware feature to combine multiple DPP pipes
- * into a single plane. It is typically used for bypassing pipe bandwidth
- * limitation for rendering a very large plane or saving power by reducing UCLK
- * and DPPCLK speeds.
+ * Acquire a pipe as OTG master pipe and allocate pipe resources required to
+ * enable stream output.
+ */
+enum dc_status resource_add_otg_master_for_stream_output(struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream);
+
+/*
+ * Release pipe resources and the OTG master pipe associated with the stream
+ * The stream must have all planes removed and ODM/MPC slice counts are reset
+ * to 1 before invoking this interface.
+ */
+void resource_remove_otg_master_for_stream_output(struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream);
+
+/*
+ * Add plane to the bottom most layer in plane composition and allocate DPP pipe
+ * resources as needed.
+ * return - true if plane is added in plane composition, false otherwise.
+ */
+bool resource_append_dpp_pipes_for_plane_composition(
+ struct dc_state *new_ctx,
+ struct dc_state *cur_ctx,
+ struct resource_pool *pool,
+ struct pipe_ctx *otg_master_pipe,
+ struct dc_plane_state *plane_state);
+
+/*
+ * Add plane to the bottom most layer in plane composition and allocate DPP pipe
+ * resources as needed.
+ * return - true if plane is added in plane composition, false otherwise.
+ */
+void resource_remove_dpp_pipes_for_plane_composition(
+ struct dc_state *context,
+ const struct resource_pool *pool,
+ const struct dc_plane_state *plane_state);
+
+/*
+ * Update ODM slice count by acquiring or releasing pipes. If new slices need
+ * to be added, it is going to add them to the last ODM index. If existing
+ * slices need to be removed, it is going to remove them from the last ODM
+ * index.
*
- * For instance in the Inter-pipe Relation diagram shown below, both PIPE 0 and
- * 1 are for MPCC combine for plane 0
+ * return - true if ODM slices are updated and required pipes are acquired. All
+ * affected pipe parameters are updated.
*
- * Inter-pipe Relation
- * __________________________________________________
- * |PIPE IDX| DPP PIPES | OPP HEADS | OTG MASTER |
- * | | plane 0 | | |
- * | 0 | -------------MPC----------------------- |
- * | | plane 0 | | | |
- * | 1 | ------------- | | |
- * |________|_______________|___________|_____________|
+ * false if resource fails to complete this update. The function is not designed
+ * to recover the creation of invalid topologies. Returning false is typically
+ * an indication of insufficient validation in caller's stack. new_ctx will be
+ * invalid. Caller may attempt to restore new_ctx by calling this function
+ * again with original slice count.
+ */
+bool resource_update_pipes_for_stream_with_slice_count(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_stream_state *stream,
+ int new_slice_count);
+
+/*
+ * Update MPC slice count by acquiring or releasing DPP pipes. If new slices
+ * need to be added it is going to add to the last MPC index. If existing
+ * slices need to be removed, it is going to remove them from the last MPC
+ * index.
+ *
+ * @dpp_pipe - top most dpp pipe for MPCC combine.
*
- * return - true if pipe ctx is used for mpcc combine.
+ * return - true if MPC slices are updated and required pipes are acquired. All
+ * affected pipe parameters are updated.
+ *
+ * false if resource fails to complete this update. The function is not designed
+ * to recover the creation of invalid topologies. Returning false is typically
+ * an indication of insufficient validation in caller's stack. new_ctx will be
+ * invalid. Caller may attempt to restore new_ctx by calling this function
+ * again with original slice count.
+ */
+bool resource_update_pipes_for_plane_with_slice_count(
+ struct dc_state *new_ctx,
+ const struct dc_state *cur_ctx,
+ const struct resource_pool *pool,
+ const struct dc_plane_state *plane,
+ int slice_count);
+
+/*
+ * Get the OTG master pipe in resource context associated with the stream.
+ * return - NULL if not found. Otherwise the OTG master pipe associated with the
+ * stream.
+ */
+struct pipe_ctx *resource_get_otg_master_for_stream(
+ struct resource_context *res_ctx,
+ const struct dc_stream_state *stream);
+
+/*
+ * Get an array of OPP heads in opp_heads ordered with index low to high for OTG
+ * master pipe in res_ctx.
+ * return - number of OPP heads in the array. If otg_master passed in is not
+ * an OTG master, the function returns 0.
+ */
+int resource_get_opp_heads_for_otg_master(const struct pipe_ctx *otg_master,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *opp_heads[MAX_PIPES]);
+
+/*
+ * Get an array of DPP pipes in dpp_pipes ordered with index low to high for OPP
+ * head pipe in res_ctx.
+ * return - number of DPP pipes in the array. If opp_head passed in is not
+ * an OPP pipe, the function returns 0.
*/
-bool resource_is_for_mpcc_combine(const struct pipe_ctx *pipe_ctx);
+int resource_get_dpp_pipes_for_opp_head(const struct pipe_ctx *opp_head,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *dpp_pipes[MAX_PIPES]);
+
+/*
+ * Get an array of DPP pipes in dpp_pipes ordered with index low to high for
+ * plane in res_ctx.
+ * return - number of DPP pipes in the array.
+ */
+int resource_get_dpp_pipes_for_plane(const struct dc_plane_state *plane,
+ struct resource_context *res_ctx,
+ struct pipe_ctx *dpp_pipes[MAX_PIPES]);
+
+/*
+ * Get the OTG master pipe for the input pipe context.
+ * return - the OTG master pipe for the input pipe
+ * context.
+ */
+struct pipe_ctx *resource_get_otg_master(const struct pipe_ctx *pipe_ctx);
+
+/*
+ * Get the OPP head pipe for the input pipe context.
+ * return - the OPP head pipe for the input pipe
+ * context.
+ */
+struct pipe_ctx *resource_get_opp_head(const struct pipe_ctx *pipe_ctx);
+
+/*
+ * Get the DPP pipe allocated for MPC slice 0 and ODM slice 0 of the plane
+ * associated with dpp_pipe.
+ */
+struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe);
+
+/*
+ * Get the MPC slice index counting from 0 from left most slice
+ * For example, if a DPP pipe is used as a secondary pipe in MPCC combine, MPC
+ * split index is greater than 0.
+ */
+int resource_get_mpc_slice_index(const struct pipe_ctx *dpp_pipe);
+
+/*
+ * Get the number of MPC slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an MPC combine
+ * pipe topology.
+ */
+int resource_get_mpc_slice_count(const struct pipe_ctx *pipe);
+
+/*
+ * Get the number of ODM slices associated with the pipe.
+ * The function returns 0 if the pipe is not associated with an ODM combine
+ * pipe topology.
+ */
+int resource_get_odm_slice_count(const struct pipe_ctx *pipe);
+
+/* Get the ODM slice index counting from 0 from left most slice */
+int resource_get_odm_slice_index(const struct pipe_ctx *opp_head);
+
+/* Get ODM slice source rect in timing active as input to OPP block */
+struct rect resource_get_odm_slice_src_rect(struct pipe_ctx *pipe_ctx);
+
+/* Get ODM slice destination rect in timing active as output from OPP block */
+struct rect resource_get_odm_slice_dst_rect(struct pipe_ctx *pipe_ctx);
+
+/* Get ODM slice destination width in timing active as output from OPP block */
+int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master,
+ bool is_last_segment);
+
+/* determine if pipe topology is changed between state a and state b */
+bool resource_is_pipe_topology_changed(const struct dc_state *state_a,
+ const struct dc_state *state_b);
+
+/*
+ * determine if the two OTG master pipes have the same ODM topology
+ * return
+ * false - if pipes passed in are not OTG masters or ODM topology is
+ * changed.
+ * true - otherwise
+ */
+bool resource_is_odm_topology_changed(const struct pipe_ctx *otg_master_a,
+ const struct pipe_ctx *otg_master_b);
+
+/* log the pipe topology update in state */
+void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state);
+
+/*
+ * Look for a free pipe in new resource context that is used as a secondary OPP
+ * head by cur_otg_master.
+ *
+ * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
+ * pipe idx of the free pipe
+ */
+int resource_find_free_pipe_used_as_sec_opp_head_by_cur_otg_master(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct pipe_ctx *cur_otg_master);
/*
* Look for a free pipe in new resource context that is used as a secondary DPP
@@ -305,6 +516,29 @@ int recource_find_free_pipe_not_used_in_cur_res_ctx(
const struct resource_pool *pool);
/*
+ * Look for a free pipe in new resource context that is used in current resource
+ * context as an OTG master pipe.
+ *
+ * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
+ * pipe idx of the free pipe
+ */
+int recource_find_free_pipe_used_as_otg_master_in_cur_res_ctx(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool);
+
+/*
+ * Look for a free pipe in new resource context that is used as a secondary DPP
+ * pipe in current resource context.
+ * return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
+ * pipe idx of the free pipe
+ */
+int resource_find_free_pipe_used_as_cur_sec_dpp(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool);
+
+/*
* Look for a free pipe in new resource context that is used as a secondary DPP
* pipe in any MPCC combine in current resource context.
* return - FREE_PIPE_INDEX_NOT_FOUND if free pipe is not found, otherwise
@@ -333,48 +567,6 @@ struct pipe_ctx *resource_find_free_secondary_pipe_legacy(
const struct resource_pool *pool,
const struct pipe_ctx *primary_pipe);
-/*
- * Get number of MPC "cuts" of the plane associated with the pipe. MPC slice
- * count is equal to MPC splits + 1. For example if a plane is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for a plane with MPCC combine. otherwise
- * the number of MPC "cuts" for the plane.
- */
-int resource_get_num_mpc_splits(const struct pipe_ctx *pipe);
-
-/*
- * Get number of ODM "cuts" of the timing associated with the pipe. ODM slice
- * count is equal to ODM splits + 1. For example if a timing is cut 3 times, it
- * will have 4 pieces of slice.
- * return - 0 if pipe is not used for ODM combine. otherwise
- * the number of ODM "cuts" for the timing.
- */
-int resource_get_num_odm_splits(const struct pipe_ctx *pipe);
-
-/*
- * Get the OTG master pipe in resource context associated with the stream.
- * return - NULL if not found. Otherwise the OTG master pipe associated with the
- * stream.
- */
-struct pipe_ctx *resource_get_otg_master_for_stream(
- struct resource_context *res_ctx,
- struct dc_stream_state *stream);
-
-/*
- * Get the OTG master pipe for the input pipe context.
- * return - the OTG master pipe for the input pipe
- * context.
- */
-struct pipe_ctx *resource_get_otg_master(const struct pipe_ctx *pipe_ctx);
-
-/*
- * Get the OPP head pipe for the input pipe context.
- * return - the OPP head pipe for the input pipe
- * context.
- */
-struct pipe_ctx *resource_get_opp_head(const struct pipe_ctx *pipe_ctx);
-
-
bool resource_validate_attach_surfaces(
const struct dc_validation_set set[],
int set_count,
@@ -407,20 +599,10 @@ void update_audio_usage(
unsigned int resource_pixel_format_to_bpp(enum surface_pixel_format format);
-void get_audio_check(struct audio_info *aud_modes,
- struct audio_check *aud_chk);
-
bool get_temp_dp_link_res(struct dc_link *link,
struct link_resource *link_res,
struct dc_link_settings *link_settings);
-#if defined(CONFIG_DRM_AMD_DC_FP)
-struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt(
- const struct resource_context *res_ctx,
- const struct resource_pool *pool,
- const struct dc_link *link);
-#endif
-
void reset_syncd_pipes_from_disabled_pipes(struct dc *dc,
struct dc_state *context);
@@ -439,7 +621,7 @@ const struct link_hwss *get_link_hwss(const struct dc_link *link,
bool is_h_timing_divisible_by_2(struct dc_stream_state *stream);
-bool dc_resource_acquire_secondary_pipe_for_mpc_odm(
+bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
const struct dc *dc,
struct dc_state *state,
struct pipe_ctx *pri_pipe,
@@ -454,4 +636,27 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm(
enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
struct dc_state *context,
struct pipe_ctx *pipe_ctx);
+
+/* Get hw programming parameters container from pipe context
+ * @pipe_ctx: pipe context
+ * @dscl_prog_data: struct to hold programmable hw reg values
+ */
+struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx);
+/* Setup dc callbacks for dml2
+ * @dc: the display core structure
+ * @dml2_options: struct to hold callbacks
+ */
+void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options);
+
+/*
+ *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe
+ */
+int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master);
+
+bool resource_is_hpo_acquired(struct dc_state *context);
+
+struct link_encoder *get_temp_dio_link_enc(
+ const struct resource_context *res_ctx,
+ const struct resource_pool *const pool,
+ const struct dc_link *link);
#endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h
new file mode 100644
index 000000000000..23daf98b8aa8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#ifndef __SOC_AND_IP_TRANSLATOR_H__
+#define __SOC_AND_IP_TRANSLATOR_H__
+
+#include "dc.h"
+#include "dml_top_soc_parameter_types.h"
+
+struct soc_and_ip_translator_funcs {
+ void (*get_soc_bb)(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config);
+ void (*get_ip_caps)(struct dml2_ip_capabilities *dml_ip_caps);
+};
+
+struct soc_and_ip_translator {
+ const struct soc_and_ip_translator_funcs *translator_funcs;
+};
+
+struct soc_and_ip_translator *dc_create_soc_and_ip_translator(enum dce_version dc_version);
+void dc_destroy_soc_and_ip_translator(struct soc_and_ip_translator **soc_and_ip_translator);
+
+
+#endif // __SOC_AND_IP_TRANSLATOR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/irq/Makefile b/drivers/gpu/drm/amd/display/dc/irq/Makefile
index a0d86a154a98..b5e14d792378 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/irq/Makefile
@@ -162,3 +162,39 @@ IRQ_DCN32 = irq_service_dcn32.o
AMD_DAL_IRQ_DCN32= $(addprefix $(AMDDALPATH)/dc/irq/dcn32/,$(IRQ_DCN32))
AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCN32)
+
+###############################################################################
+# DCN 35
+###############################################################################
+IRQ_DCN35 = irq_service_dcn35.o
+
+AMD_DAL_IRQ_DCN35= $(addprefix $(AMDDALPATH)/dc/irq/dcn35/,$(IRQ_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCN35)
+
+###############################################################################
+# DCN 351
+###############################################################################
+IRQ_DCN351 = irq_service_dcn351.o
+
+AMD_DAL_IRQ_DCN351= $(addprefix $(AMDDALPATH)/dc/irq/dcn351/,$(IRQ_DCN351))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCN351)
+
+###############################################################################
+# DCN 36
+###############################################################################
+IRQ_DCN36 = irq_service_dcn36.o
+
+AMD_DAL_IRQ_DCN36= $(addprefix $(AMDDALPATH)/dc/irq/dcn36/,$(IRQ_DCN36))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCN36)
+
+###############################################################################
+# DCN 401
+###############################################################################
+IRQ_DCN401 = irq_service_dcn401.o
+
+AMD_DAL_IRQ_DCN401= $(addprefix $(AMDDALPATH)/dc/irq/dcn401/,$(IRQ_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_IRQ_DCN401)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
index 44649db5f3e3..bb576a9c5fdb 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
@@ -61,27 +61,27 @@ static bool hpd_ack(struct irq_service *irq_service,
return true;
}
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
.ack = hpd_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = dce110_vblank_set,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -211,8 +211,12 @@ bool dce110_vblank_set(struct irq_service *irq_service,
info->ext_id);
uint8_t pipe_offset = dal_irq_src - IRQ_TYPE_VBLANK;
- struct timing_generator *tg =
- dc->current_state->res_ctx.pipe_ctx[pipe_offset].stream_res.tg;
+ struct timing_generator *tg;
+
+ if (pipe_offset >= MAX_PIPES)
+ return false;
+
+ tg = dc->current_state->res_ctx.pipe_ctx[pipe_offset].stream_res.tg;
if (enable) {
if (!tg || !tg->funcs->arm_vert_intr(tg, 2)) {
@@ -225,7 +229,7 @@ bool dce110_vblank_set(struct irq_service *irq_service,
return true;
}
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
index 0a5e1a2a3c61..33ce470e4c88 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
@@ -37,54 +37,27 @@
#include "ivsrcid/ivsrcid_vislands30.h"
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = dce110_vblank_set,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -174,7 +147,7 @@ static const struct irq_source_info_funcs vupdate_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c b/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c
index 524481885fd0..d777b85e70da 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c
@@ -46,54 +46,27 @@
#include "dc_types.h"
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- DC_HPD1_INT_STATUS,
- DC_HPD1_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- DC_HPD1_INT_CONTROL,
- DC_HPD1_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd1_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = dce110_vblank_set,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs_dce60 = {
+static struct irq_source_info_funcs vblank_irq_info_funcs_dce60 = {
.set = NULL,
.ack = NULL
};
@@ -192,7 +165,7 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs_dce60 = {
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
@@ -391,5 +364,3 @@ struct irq_service *dal_irq_service_dce60_create(
dce60_irq_construct(irq_service, init_data);
return irq_service;
}
-
-
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
index 85f63b4a8b90..3a9163acb49b 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
@@ -37,54 +37,27 @@
#include "dc_types.h"
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- DC_HPD1_INT_STATUS,
- DC_HPD1_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- DC_HPD1_INT_CONTROL,
- DC_HPD1_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd1_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = dce110_vblank_set,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -184,7 +157,7 @@ static const struct irq_source_info_funcs vupdate_irq_info_funcs = {
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
@@ -303,5 +276,3 @@ struct irq_service *dal_irq_service_dce80_create(
dce80_irq_construct(irq_service, init_data);
return irq_service;
}
-
-
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
index 532e506d027b..4ce9edd16344 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
@@ -129,59 +129,32 @@ static enum dc_irq_source to_dal_irq_source_dcn10(struct irq_service *irq_servic
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -280,7 +253,7 @@ static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
index 93c31111500b..5847af0e66cb 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "include/logger_interface.h"
@@ -132,59 +130,32 @@ static enum dc_irq_source to_dal_irq_source_dcn20(
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -287,7 +258,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
index 3c7cb3dc046b..6417011d2246 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,8 +30,8 @@
#include "../dce110/irq_service_dce110.h"
#include "irq_service_dcn201.h"
-#include "dcn/dcn_2_0_3_offset.h"
-#include "dcn/dcn_2_0_3_sh_mask.h"
+#include "dcn/dcn_2_0_1_offset.h"
+#include "dcn/dcn_2_0_1_sh_mask.h"
#include "cyan_skillfish_ip_offset.h"
#include "soc15_hw_ip.h"
@@ -80,58 +80,31 @@ static enum dc_irq_source to_dal_irq_source_dcn201(
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -239,7 +212,7 @@ static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
index 717977aec6d0..71d2f065140b 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
@@ -23,8 +23,6 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "include/logger_interface.h"
@@ -134,64 +132,37 @@ static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_servic
return DC_IRQ_SOURCE_INVALID;
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs dmub_outbox_irq_info_funcs = {
+static struct irq_source_info_funcs dmub_outbox_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -316,7 +287,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
index 2aa74ee1502a..2a4080bdcf6b 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c
@@ -139,64 +139,37 @@ static enum dc_irq_source to_dal_irq_source_dcn30(
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs dmub_trace_irq_info_funcs = {
+static struct irq_source_info_funcs dmub_trace_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -321,7 +294,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
@@ -447,4 +420,3 @@ struct irq_service *dal_irq_service_dcn30_create(
dcn30_irq_construct(irq_service, init_data);
return irq_service;
}
-
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c b/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c
index 40fd34fb1d5e..624f1ac309f8 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c
@@ -126,54 +126,37 @@ static enum dc_irq_source to_dal_irq_source_dcn302(struct irq_service *irq_servi
}
}
-static bool hpd_ack(struct irq_service *irq_service, const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status = get_reg_field_value(value, HPD0_DC_HPD_INT_STATUS, DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(value, current_status ? 0 : 1, HPD0_DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs dmub_trace_irq_info_funcs = {
+static struct irq_source_info_funcs dmub_trace_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -290,7 +273,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c
index 1d149d290147..137caffae916 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c
@@ -2,7 +2,26 @@
/*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
* Authors: AMD
+ *
*/
#include "dm_services.h"
@@ -58,49 +77,32 @@ static enum dc_irq_source to_dal_irq_source_dcn303(struct irq_service *irq_servi
}
}
-static bool hpd_ack(struct irq_service *irq_service, const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status = get_reg_field_value(value, HPD0_DC_HPD_INT_STATUS, DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(value, current_status ? 0 : 1, HPD0_DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -195,7 +197,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.h b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.h
index fd64e3848ff3..be8fe836b3f1 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.h
@@ -2,7 +2,26 @@
/*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
* Authors: AMD
+ *
*/
#ifndef __DAL_IRQ_SERVICE_DCN303_H__
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c
index 1b88e4e627fd..921cb167d920 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c
@@ -128,64 +128,37 @@ static enum dc_irq_source to_dal_irq_source_dcn31(struct irq_service *irq_servic
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs outbox_irq_info_funcs = {
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -309,7 +282,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c b/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c
index 37bc98faa7a0..0118fd6e5db0 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c
@@ -130,64 +130,37 @@ static enum dc_irq_source to_dal_irq_source_dcn314(struct irq_service *irq_servi
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs outbox_irq_info_funcs = {
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -311,7 +284,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c b/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c
index e722171f0d2d..adebfc888618 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c
@@ -135,64 +135,37 @@ static enum dc_irq_source to_dal_irq_source_dcn315(
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
-
- return true;
-}
-
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = hpd0_ack
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs outbox_irq_info_funcs = {
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -316,7 +289,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c
index b1012fa1977b..e9e315c75d76 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c
@@ -129,64 +129,47 @@ static enum dc_irq_source to_dal_irq_source_dcn32(
}
}
-static bool hpd_ack(
- struct irq_service *irq_service,
- const struct irq_source_info *info)
-{
- uint32_t addr = info->status_reg;
- uint32_t value = dm_read_reg(irq_service->ctx, addr);
- uint32_t current_status =
- get_reg_field_value(
- value,
- HPD0_DC_HPD_INT_STATUS,
- DC_HPD_SENSE_DELAYED);
-
- dal_irq_service_ack_generic(irq_service, info);
-
- value = dm_read_reg(irq_service->ctx, info->enable_reg);
-
- set_reg_field_value(
- value,
- current_status ? 0 : 1,
- HPD0_DC_HPD_INT_CONTROL,
- DC_HPD_INT_POLARITY);
-
- dm_write_reg(irq_service->ctx, info->enable_reg, value);
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
+ .set = NULL,
+ .ack = hpd0_ack
+};
- return true;
-}
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
-static const struct irq_source_info_funcs hpd_irq_info_funcs = {
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
.set = NULL,
- .ack = hpd_ack
+ .ack = NULL
};
-static const struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs pflip_irq_info_funcs = {
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vblank_irq_info_funcs = {
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs outbox_irq_info_funcs = {
+static struct irq_source_info_funcs vline1_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
-static const struct irq_source_info_funcs vline0_irq_info_funcs = {
+static struct irq_source_info_funcs vline2_irq_info_funcs = {
.set = NULL,
.ack = NULL
};
@@ -259,6 +242,13 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
.funcs = &pflip_irq_info_funcs\
}
+#define vblank_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
+ .funcs = &vblank_irq_info_funcs\
+ }
/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
* of DCE's DC_IRQ_SOURCE_VUPDATEx.
*/
@@ -270,14 +260,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
.funcs = &vupdate_no_lock_irq_info_funcs\
}
-#define vblank_int_entry(reg_num)\
- [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
- IRQ_REG_ENTRY(OTG, reg_num,\
- OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
- OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
- .funcs = &vblank_irq_info_funcs\
-}
-
#define vline0_int_entry(reg_num)\
[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
IRQ_REG_ENTRY(OTG, reg_num,\
@@ -285,6 +267,20 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\
.funcs = &vline0_irq_info_funcs\
}
+#define vline1_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\
+ .funcs = &vline1_irq_info_funcs\
+ }
+#define vline2_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\
+ .funcs = &vline2_irq_info_funcs\
+ }
#define dmub_outbox_int_entry()\
[DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\
IRQ_REG_ENTRY_DMUB(\
@@ -310,7 +306,7 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = {
#define dc_underflow_int_entry(reg_num) \
[DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
-static const struct irq_source_info_funcs dummy_irq_info_funcs = {
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
.set = dal_irq_service_dummy_set,
.ack = dal_irq_service_dummy_ack
};
@@ -387,21 +383,29 @@ irq_source_info_dcn32[DAL_IRQ_SOURCES_NUMBER] = {
dc_underflow_int_entry(6),
[DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(),
[DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(),
- vupdate_no_lock_int_entry(0),
- vupdate_no_lock_int_entry(1),
- vupdate_no_lock_int_entry(2),
- vupdate_no_lock_int_entry(3),
vblank_int_entry(0),
vblank_int_entry(1),
vblank_int_entry(2),
vblank_int_entry(3),
+ [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(),
+ [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(),
+ dmub_outbox_int_entry(),
+ vupdate_no_lock_int_entry(0),
+ vupdate_no_lock_int_entry(1),
+ vupdate_no_lock_int_entry(2),
+ vupdate_no_lock_int_entry(3),
vline0_int_entry(0),
vline0_int_entry(1),
vline0_int_entry(2),
vline0_int_entry(3),
- [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(),
- [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(),
- dmub_outbox_int_entry(),
+ vline1_int_entry(0),
+ vline1_int_entry(1),
+ vline1_int_entry(2),
+ vline1_int_entry(3),
+ vline2_int_entry(0),
+ vline2_int_entry(1),
+ vline2_int_entry(2),
+ vline2_int_entry(3)
};
static const struct irq_service_funcs irq_service_funcs_dcn32 = {
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c b/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c
new file mode 100644
index 000000000000..79e5e8c137ca
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "include/logger_interface.h"
+#include "../dce110/irq_service_dce110.h"
+
+#include "dcn/dcn_3_5_0_offset.h"
+#include "dcn/dcn_3_5_0_sh_mask.h"
+
+#include "irq_service_dcn35.h"
+
+#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
+
+static enum dc_irq_source to_dal_irq_source_dcn35(
+ struct irq_service *irq_service,
+ uint32_t src_id,
+ uint32_t ext_id)
+{
+ switch (src_id) {
+ case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK1;
+ case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK2;
+ case DCN_1_0__SRCID__DC_D3_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK3;
+ case DCN_1_0__SRCID__DC_D4_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK4;
+ case DCN_1_0__SRCID__DC_D5_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK5;
+ case DCN_1_0__SRCID__DC_D6_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK6;
+ case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC1_VLINE0;
+ case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC2_VLINE0;
+ case DCN_1_0__SRCID__OTG3_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC3_VLINE0;
+ case DCN_1_0__SRCID__OTG4_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC4_VLINE0;
+ case DCN_1_0__SRCID__OTG5_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC5_VLINE0;
+ case DCN_1_0__SRCID__OTG6_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC6_VLINE0;
+ case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP1;
+ case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP2;
+ case DCN_1_0__SRCID__HUBP2_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP3;
+ case DCN_1_0__SRCID__HUBP3_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP4;
+ case DCN_1_0__SRCID__HUBP4_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP5;
+ case DCN_1_0__SRCID__HUBP5_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP6;
+ case DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE1;
+ case DCN_1_0__SRCID__OTG1_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE2;
+ case DCN_1_0__SRCID__OTG2_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE3;
+ case DCN_1_0__SRCID__OTG3_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE4;
+ case DCN_1_0__SRCID__OTG4_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE5;
+ case DCN_1_0__SRCID__OTG5_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE6;
+ case DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT:
+ return DC_IRQ_SOURCE_DMCUB_OUTBOX;
+ case DCN_1_0__SRCID__DC_HPD1_INT:
+ /* generic src_id for all HPD and HPDRX interrupts */
+ switch (ext_id) {
+ case DCN_1_0__CTXID__DC_HPD1_INT:
+ return DC_IRQ_SOURCE_HPD1;
+ case DCN_1_0__CTXID__DC_HPD2_INT:
+ return DC_IRQ_SOURCE_HPD2;
+ case DCN_1_0__CTXID__DC_HPD3_INT:
+ return DC_IRQ_SOURCE_HPD3;
+ case DCN_1_0__CTXID__DC_HPD4_INT:
+ return DC_IRQ_SOURCE_HPD4;
+ case DCN_1_0__CTXID__DC_HPD5_INT:
+ return DC_IRQ_SOURCE_HPD5;
+ case DCN_1_0__CTXID__DC_HPD6_INT:
+ return DC_IRQ_SOURCE_HPD6;
+ case DCN_1_0__CTXID__DC_HPD1_RX_INT:
+ return DC_IRQ_SOURCE_HPD1RX;
+ case DCN_1_0__CTXID__DC_HPD2_RX_INT:
+ return DC_IRQ_SOURCE_HPD2RX;
+ case DCN_1_0__CTXID__DC_HPD3_RX_INT:
+ return DC_IRQ_SOURCE_HPD3RX;
+ case DCN_1_0__CTXID__DC_HPD4_RX_INT:
+ return DC_IRQ_SOURCE_HPD4RX;
+ case DCN_1_0__CTXID__DC_HPD5_RX_INT:
+ return DC_IRQ_SOURCE_HPD5RX;
+ case DCN_1_0__CTXID__DC_HPD6_RX_INT:
+ return DC_IRQ_SOURCE_HPD6RX;
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+ break;
+
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+}
+
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
+ .set = NULL,
+ .ack = hpd0_ack
+};
+
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+#undef BASE_INNER
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+
+/* compile time expand base address. */
+#define BASE(seg) \
+ BASE_INNER(seg)
+
+#define SRI(reg_name, block, id)\
+ BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_DMUB(reg_name)\
+ BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define IRQ_REG_ENTRY(base, block, reg_num, reg1, mask1, reg2, mask2)\
+ REG_STRUCT[base + reg_num].enable_reg = SRI(reg1, block, reg_num),\
+ REG_STRUCT[base + reg_num].enable_mask = \
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base + reg_num].enable_value[0] = \
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base + reg_num].enable_value[1] = \
+ ~block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK, \
+ REG_STRUCT[base + reg_num].ack_reg = SRI(reg2, block, reg_num),\
+ REG_STRUCT[base + reg_num].ack_mask = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK,\
+ REG_STRUCT[base + reg_num].ack_value = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK \
+
+#define IRQ_REG_ENTRY_DMUB(base, reg1, mask1, reg2, mask2)\
+ REG_STRUCT[base].enable_reg = SRI_DMUB(reg1),\
+ REG_STRUCT[base].enable_mask = \
+ reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base].enable_value[0] = \
+ reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base].enable_value[1] = \
+ ~reg1 ## __ ## mask1 ## _MASK, \
+ REG_STRUCT[base].ack_reg = SRI_DMUB(reg2),\
+ REG_STRUCT[base].ack_mask = \
+ reg2 ## __ ## mask2 ## _MASK,\
+ REG_STRUCT[base].ack_value = \
+ reg2 ## __ ## mask2 ## _MASK \
+
+#define hpd_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_HPD1, HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1 + reg_num].funcs = &hpd_irq_info_funcs;\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1 + reg_num].status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num);\
+
+#define hpd_rx_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_HPD1RX, HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1RX + reg_num].status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num);\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1RX + reg_num].funcs = &hpd_rx_irq_info_funcs;\
+
+#define pflip_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_PFLIP1, HUBPREQ, reg_num,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_PFLIP1 + reg_num].funcs = &pflip_irq_info_funcs\
+
+/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
+ * of DCE's DC_IRQ_SOURCE_VUPDATEx.
+ */
+#define vupdate_no_lock_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_VUPDATE1, OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_VUPDATE1 + reg_num].funcs = &vupdate_no_lock_irq_info_funcs\
+
+#define vblank_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_VBLANK1, OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_VBLANK1 + reg_num].funcs = &vblank_irq_info_funcs\
+
+#define vline0_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_DC1_VLINE0, OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num].funcs = &vline0_irq_info_funcs\
+
+#define dmub_outbox_int_entry()\
+ IRQ_REG_ENTRY_DMUB(DC_IRQ_SOURCE_DMCUB_OUTBOX, \
+ DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX1_READY_INT_EN,\
+ DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX1_READY_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_DMCUB_OUTBOX].funcs = &outbox_irq_info_funcs
+
+#define dummy_irq_entry(irqno) \
+ REG_STRUCT[irqno].funcs = &dummy_irq_info_funcs\
+
+#define i2c_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_I2C_DDC ## reg_num)
+
+#define dp_sink_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_DPSINK ## reg_num)
+
+#define gpio_pad_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_GPIOPAD ## reg_num)
+
+#define dc_underflow_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW)
+
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
+ .set = dal_irq_service_dummy_set,
+ .ack = dal_irq_service_dummy_ack
+};
+
+#define dcn35_irq_init_part_1() \
+ dummy_irq_entry(DC_IRQ_SOURCE_INVALID); \
+ hpd_int_entry(0); \
+ hpd_int_entry(1); \
+ hpd_int_entry(2); \
+ hpd_int_entry(3); \
+ hpd_int_entry(4); \
+ hpd_rx_int_entry(0); \
+ hpd_rx_int_entry(1); \
+ hpd_rx_int_entry(2); \
+ hpd_rx_int_entry(3); \
+ hpd_rx_int_entry(4); \
+ i2c_int_entry(1); \
+ i2c_int_entry(2); \
+ i2c_int_entry(3); \
+ i2c_int_entry(4); \
+ i2c_int_entry(5); \
+ i2c_int_entry(6); \
+ dp_sink_int_entry(1); \
+ dp_sink_int_entry(2); \
+ dp_sink_int_entry(3); \
+ dp_sink_int_entry(4); \
+ dp_sink_int_entry(5); \
+ dp_sink_int_entry(6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_TIMER); \
+ pflip_int_entry(0); \
+ pflip_int_entry(1); \
+ pflip_int_entry(2); \
+ pflip_int_entry(3); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP5); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP_UNDERLAY0); \
+ gpio_pad_int_entry(0); \
+ gpio_pad_int_entry(1); \
+ gpio_pad_int_entry(2); \
+ gpio_pad_int_entry(3); \
+ gpio_pad_int_entry(4); \
+ gpio_pad_int_entry(5); \
+ gpio_pad_int_entry(6); \
+ gpio_pad_int_entry(7); \
+ gpio_pad_int_entry(8); \
+ gpio_pad_int_entry(9); \
+ gpio_pad_int_entry(10); \
+ gpio_pad_int_entry(11); \
+ gpio_pad_int_entry(12); \
+ gpio_pad_int_entry(13); \
+ gpio_pad_int_entry(14); \
+ gpio_pad_int_entry(15); \
+ gpio_pad_int_entry(16); \
+ gpio_pad_int_entry(17); \
+ gpio_pad_int_entry(18); \
+ gpio_pad_int_entry(19); \
+ gpio_pad_int_entry(20); \
+ gpio_pad_int_entry(21); \
+ gpio_pad_int_entry(22); \
+ gpio_pad_int_entry(23); \
+ gpio_pad_int_entry(24); \
+ gpio_pad_int_entry(25); \
+ gpio_pad_int_entry(26); \
+ gpio_pad_int_entry(27); \
+ gpio_pad_int_entry(28); \
+ gpio_pad_int_entry(29); \
+ gpio_pad_int_entry(30); \
+ dc_underflow_int_entry(1); \
+ dc_underflow_int_entry(2); \
+ dc_underflow_int_entry(3); \
+ dc_underflow_int_entry(4); \
+ dc_underflow_int_entry(5); \
+ dc_underflow_int_entry(6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DMCU_SCP); \
+ dummy_irq_entry(DC_IRQ_SOURCE_VBIOS_SW); \
+
+#define dcn35_irq_init_part_2() \
+ vupdate_no_lock_int_entry(0); \
+ vupdate_no_lock_int_entry(1); \
+ vupdate_no_lock_int_entry(2); \
+ vupdate_no_lock_int_entry(3); \
+ vblank_int_entry(0); \
+ vblank_int_entry(1); \
+ vblank_int_entry(2); \
+ vblank_int_entry(3); \
+ vline0_int_entry(0); \
+ vline0_int_entry(1); \
+ vline0_int_entry(2); \
+ vline0_int_entry(3); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC5_VLINE1); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC6_VLINE1); \
+ dmub_outbox_int_entry()
+
+#define dcn35_irq_init() \
+ dcn35_irq_init_part_1(); \
+ dcn35_irq_init_part_2(); \
+
+static struct irq_source_info irq_source_info_dcn35[DAL_IRQ_SOURCES_NUMBER] = {0};
+
+static struct irq_service_funcs irq_service_funcs_dcn35 = {
+ .to_dal_irq_source = to_dal_irq_source_dcn35
+};
+
+static void dcn35_irq_construct(
+ struct irq_service *irq_service,
+ struct irq_service_init_data *init_data)
+{
+ struct dc_context *ctx = init_data->ctx;
+
+#define REG_STRUCT irq_source_info_dcn35
+ dcn35_irq_init();
+
+ dal_irq_service_construct(irq_service, init_data);
+
+ irq_service->info = irq_source_info_dcn35;
+ irq_service->funcs = &irq_service_funcs_dcn35;
+}
+
+struct irq_service *dal_irq_service_dcn35_create(
+ struct irq_service_init_data *init_data)
+{
+ struct irq_service *irq_service = kzalloc(sizeof(*irq_service),
+ GFP_KERNEL);
+
+ if (!irq_service)
+ return NULL;
+
+ dcn35_irq_construct(irq_service, init_data);
+ return irq_service;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.h b/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.h
new file mode 100644
index 000000000000..bf71b1887d67
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DAL_IRQ_SERVICE_DCN35_H__
+#define __DAL_IRQ_SERVICE_DCN35_H__
+
+#include "../irq_service.h"
+
+struct irq_service *dal_irq_service_dcn35_create(
+ struct irq_service_init_data *init_data);
+
+#endif /* __DAL_IRQ_SERVICE_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c b/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c
new file mode 100644
index 000000000000..163b8ee9ebf7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c
@@ -0,0 +1,382 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#include "dm_services.h"
+#include "include/logger_interface.h"
+#include "../dce110/irq_service_dce110.h"
+
+
+#include "dcn/dcn_3_5_1_offset.h"
+#include "dcn/dcn_3_5_1_sh_mask.h"
+
+#include "irq_service_dcn351.h"
+
+#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
+
+static enum dc_irq_source to_dal_irq_source_dcn351(
+ struct irq_service *irq_service,
+ uint32_t src_id,
+ uint32_t ext_id)
+{
+ switch (src_id) {
+ case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK1;
+ case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK2;
+ case DCN_1_0__SRCID__DC_D3_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK3;
+ case DCN_1_0__SRCID__DC_D4_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK4;
+ case DCN_1_0__SRCID__DC_D5_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK5;
+ case DCN_1_0__SRCID__DC_D6_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK6;
+ case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC1_VLINE0;
+ case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC2_VLINE0;
+ case DCN_1_0__SRCID__OTG3_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC3_VLINE0;
+ case DCN_1_0__SRCID__OTG4_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC4_VLINE0;
+ case DCN_1_0__SRCID__OTG5_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC5_VLINE0;
+ case DCN_1_0__SRCID__OTG6_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC6_VLINE0;
+ case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP1;
+ case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP2;
+ case DCN_1_0__SRCID__HUBP2_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP3;
+ case DCN_1_0__SRCID__HUBP3_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP4;
+ case DCN_1_0__SRCID__HUBP4_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP5;
+ case DCN_1_0__SRCID__HUBP5_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP6;
+ case DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE1;
+ case DCN_1_0__SRCID__OTG1_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE2;
+ case DCN_1_0__SRCID__OTG2_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE3;
+ case DCN_1_0__SRCID__OTG3_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE4;
+ case DCN_1_0__SRCID__OTG4_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE5;
+ case DCN_1_0__SRCID__OTG5_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE6;
+ case DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT:
+ return DC_IRQ_SOURCE_DMCUB_OUTBOX;
+ case DCN_1_0__SRCID__DC_HPD1_INT:
+ /* generic src_id for all HPD and HPDRX interrupts */
+ switch (ext_id) {
+ case DCN_1_0__CTXID__DC_HPD1_INT:
+ return DC_IRQ_SOURCE_HPD1;
+ case DCN_1_0__CTXID__DC_HPD2_INT:
+ return DC_IRQ_SOURCE_HPD2;
+ case DCN_1_0__CTXID__DC_HPD3_INT:
+ return DC_IRQ_SOURCE_HPD3;
+ case DCN_1_0__CTXID__DC_HPD4_INT:
+ return DC_IRQ_SOURCE_HPD4;
+ case DCN_1_0__CTXID__DC_HPD5_INT:
+ return DC_IRQ_SOURCE_HPD5;
+ case DCN_1_0__CTXID__DC_HPD6_INT:
+ return DC_IRQ_SOURCE_HPD6;
+ case DCN_1_0__CTXID__DC_HPD1_RX_INT:
+ return DC_IRQ_SOURCE_HPD1RX;
+ case DCN_1_0__CTXID__DC_HPD2_RX_INT:
+ return DC_IRQ_SOURCE_HPD2RX;
+ case DCN_1_0__CTXID__DC_HPD3_RX_INT:
+ return DC_IRQ_SOURCE_HPD3RX;
+ case DCN_1_0__CTXID__DC_HPD4_RX_INT:
+ return DC_IRQ_SOURCE_HPD4RX;
+ case DCN_1_0__CTXID__DC_HPD5_RX_INT:
+ return DC_IRQ_SOURCE_HPD5RX;
+ case DCN_1_0__CTXID__DC_HPD6_RX_INT:
+ return DC_IRQ_SOURCE_HPD6RX;
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+ break;
+
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+}
+
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
+ .set = NULL,
+ .ack = hpd0_ack
+};
+
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+#undef BASE_INNER
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+
+/* compile time expand base address. */
+#define BASE(seg) \
+ BASE_INNER(seg)
+
+#define SRI(reg_name, block, id)\
+ BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_DMUB(reg_name)\
+ BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define IRQ_REG_ENTRY(base, block, reg_num, reg1, mask1, reg2, mask2)\
+ REG_STRUCT[base + reg_num].enable_reg = SRI(reg1, block, reg_num),\
+ REG_STRUCT[base + reg_num].enable_mask = \
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base + reg_num].enable_value[0] = \
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base + reg_num].enable_value[1] = \
+ ~block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK, \
+ REG_STRUCT[base + reg_num].ack_reg = SRI(reg2, block, reg_num),\
+ REG_STRUCT[base + reg_num].ack_mask = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK,\
+ REG_STRUCT[base + reg_num].ack_value = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK \
+
+#define IRQ_REG_ENTRY_DMUB(base, reg1, mask1, reg2, mask2)\
+ REG_STRUCT[base].enable_reg = SRI_DMUB(reg1),\
+ REG_STRUCT[base].enable_mask = \
+ reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base].enable_value[0] = \
+ reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base].enable_value[1] = \
+ ~reg1 ## __ ## mask1 ## _MASK, \
+ REG_STRUCT[base].ack_reg = SRI_DMUB(reg2),\
+ REG_STRUCT[base].ack_mask = \
+ reg2 ## __ ## mask2 ## _MASK,\
+ REG_STRUCT[base].ack_value = \
+ reg2 ## __ ## mask2 ## _MASK \
+
+#define hpd_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_HPD1, HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1 + reg_num].funcs = &hpd_irq_info_funcs;\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1 + reg_num].status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num);\
+
+#define hpd_rx_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_HPD1RX, HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1RX + reg_num].status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num);\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1RX + reg_num].funcs = &hpd_rx_irq_info_funcs;\
+
+#define pflip_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_PFLIP1, HUBPREQ, reg_num,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_PFLIP1 + reg_num].funcs = &pflip_irq_info_funcs\
+
+/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
+ * of DCE's DC_IRQ_SOURCE_VUPDATEx.
+ */
+#define vupdate_no_lock_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_VUPDATE1, OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_VUPDATE1 + reg_num].funcs = &vupdate_no_lock_irq_info_funcs\
+
+#define vblank_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_VBLANK1, OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_VBLANK1 + reg_num].funcs = &vblank_irq_info_funcs\
+
+#define vline0_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_DC1_VLINE0, OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num].funcs = &vline0_irq_info_funcs\
+
+#define dmub_outbox_int_entry()\
+ IRQ_REG_ENTRY_DMUB(DC_IRQ_SOURCE_DMCUB_OUTBOX, \
+ DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX1_READY_INT_EN,\
+ DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX1_READY_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_DMCUB_OUTBOX].funcs = &outbox_irq_info_funcs
+
+#define dummy_irq_entry(irqno) \
+ REG_STRUCT[irqno].funcs = &dummy_irq_info_funcs\
+
+#define i2c_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_I2C_DDC ## reg_num)
+
+#define dp_sink_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_DPSINK ## reg_num)
+
+#define gpio_pad_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_GPIOPAD ## reg_num)
+
+#define dc_underflow_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW)
+
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
+ .set = dal_irq_service_dummy_set,
+ .ack = dal_irq_service_dummy_ack
+};
+
+#define dcn351_irq_init_part_1() {\
+ dummy_irq_entry(DC_IRQ_SOURCE_INVALID); \
+ hpd_int_entry(0); \
+ hpd_int_entry(1); \
+ hpd_int_entry(2); \
+ hpd_int_entry(3); \
+ hpd_int_entry(4); \
+ hpd_rx_int_entry(0); \
+ hpd_rx_int_entry(1); \
+ hpd_rx_int_entry(2); \
+ hpd_rx_int_entry(3); \
+ hpd_rx_int_entry(4); \
+ i2c_int_entry(1); \
+ i2c_int_entry(2); \
+ i2c_int_entry(3); \
+ i2c_int_entry(4); \
+ i2c_int_entry(5); \
+ i2c_int_entry(6); \
+ dp_sink_int_entry(1); \
+ dp_sink_int_entry(2); \
+ dp_sink_int_entry(3); \
+ dp_sink_int_entry(4); \
+ dp_sink_int_entry(5); \
+ dp_sink_int_entry(6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_TIMER); \
+ pflip_int_entry(0); \
+ pflip_int_entry(1); \
+ pflip_int_entry(2); \
+ pflip_int_entry(3); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP5); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP_UNDERLAY0); \
+ gpio_pad_int_entry(0); \
+ gpio_pad_int_entry(1); \
+ gpio_pad_int_entry(2); \
+ gpio_pad_int_entry(3); \
+ gpio_pad_int_entry(4); \
+ gpio_pad_int_entry(5); \
+ gpio_pad_int_entry(6); \
+ gpio_pad_int_entry(7); \
+ gpio_pad_int_entry(8); \
+ gpio_pad_int_entry(9); \
+ gpio_pad_int_entry(10); \
+ gpio_pad_int_entry(11); \
+ gpio_pad_int_entry(12); \
+ gpio_pad_int_entry(13); \
+ gpio_pad_int_entry(14); \
+ gpio_pad_int_entry(15); \
+ gpio_pad_int_entry(16); \
+ gpio_pad_int_entry(17); \
+ gpio_pad_int_entry(18); \
+ gpio_pad_int_entry(19); \
+ gpio_pad_int_entry(20); \
+ gpio_pad_int_entry(21); \
+ gpio_pad_int_entry(22); \
+ gpio_pad_int_entry(23); \
+ gpio_pad_int_entry(24); \
+ gpio_pad_int_entry(25); \
+ gpio_pad_int_entry(26); \
+ gpio_pad_int_entry(27); \
+ gpio_pad_int_entry(28); \
+ gpio_pad_int_entry(29); \
+ gpio_pad_int_entry(30); \
+ dc_underflow_int_entry(1); \
+ dc_underflow_int_entry(2); \
+ dc_underflow_int_entry(3); \
+ dc_underflow_int_entry(4); \
+ dc_underflow_int_entry(5); \
+ dc_underflow_int_entry(6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DMCU_SCP); \
+ dummy_irq_entry(DC_IRQ_SOURCE_VBIOS_SW); \
+}
+
+#define dcn351_irq_init_part_2() {\
+ vupdate_no_lock_int_entry(0); \
+ vupdate_no_lock_int_entry(1); \
+ vupdate_no_lock_int_entry(2); \
+ vupdate_no_lock_int_entry(3); \
+ vblank_int_entry(0); \
+ vblank_int_entry(1); \
+ vblank_int_entry(2); \
+ vblank_int_entry(3); \
+ vline0_int_entry(0); \
+ vline0_int_entry(1); \
+ vline0_int_entry(2); \
+ vline0_int_entry(3); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC5_VLINE1); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC6_VLINE1); \
+ dmub_outbox_int_entry(); \
+}
+
+#define dcn351_irq_init() {\
+ dcn351_irq_init_part_1(); \
+ dcn351_irq_init_part_2(); \
+}
+
+static struct irq_source_info irq_source_info_dcn351[DAL_IRQ_SOURCES_NUMBER] = {0};
+
+static struct irq_service_funcs irq_service_funcs_dcn351 = {
+ .to_dal_irq_source = to_dal_irq_source_dcn351
+};
+
+static void dcn351_irq_construct(
+ struct irq_service *irq_service,
+ struct irq_service_init_data *init_data)
+{
+ struct dc_context *ctx = init_data->ctx;
+
+#define REG_STRUCT irq_source_info_dcn351
+ dcn351_irq_init();
+
+ dal_irq_service_construct(irq_service, init_data);
+
+ irq_service->info = irq_source_info_dcn351;
+ irq_service->funcs = &irq_service_funcs_dcn351;
+}
+
+struct irq_service *dal_irq_service_dcn351_create(
+ struct irq_service_init_data *init_data)
+{
+ struct irq_service *irq_service = kzalloc(sizeof(*irq_service),
+ GFP_KERNEL);
+
+ if (!irq_service)
+ return NULL;
+
+ dcn351_irq_construct(irq_service, init_data);
+ return irq_service;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.h b/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.h
new file mode 100644
index 000000000000..4094631ffec6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2021 Advanced Micro Devices, Inc. */
+
+#ifndef __DAL_IRQ_SERVICE_DCN351_H__
+#define __DAL_IRQ_SERVICE_DCN351_H__
+
+#include "../irq_service.h"
+
+struct irq_service *dal_irq_service_dcn351_create(
+ struct irq_service_init_data *init_data);
+
+#endif /* __DAL_IRQ_SERVICE_DCN351_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c b/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c
new file mode 100644
index 000000000000..f716ab0fd30e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2025 Advanced Micro Devices, Inc. */
+
+#include "dm_services.h"
+#include "include/logger_interface.h"
+#include "../dce110/irq_service_dce110.h"
+
+#include "dcn/dcn_3_6_0_offset.h"
+#include "dcn/dcn_3_6_0_sh_mask.h"
+
+#include "irq_service_dcn36.h"
+
+#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
+
+static enum dc_irq_source to_dal_irq_source_dcn36(
+ struct irq_service *irq_service,
+ uint32_t src_id,
+ uint32_t ext_id)
+{
+ switch (src_id) {
+ case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK1;
+ case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK2;
+ case DCN_1_0__SRCID__DC_D3_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK3;
+ case DCN_1_0__SRCID__DC_D4_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK4;
+ case DCN_1_0__SRCID__DC_D5_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK5;
+ case DCN_1_0__SRCID__DC_D6_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK6;
+ case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC1_VLINE0;
+ case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC2_VLINE0;
+ case DCN_1_0__SRCID__OTG3_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC3_VLINE0;
+ case DCN_1_0__SRCID__OTG4_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC4_VLINE0;
+ case DCN_1_0__SRCID__OTG5_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC5_VLINE0;
+ case DCN_1_0__SRCID__OTG6_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC6_VLINE0;
+ case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP1;
+ case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP2;
+ case DCN_1_0__SRCID__HUBP2_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP3;
+ case DCN_1_0__SRCID__HUBP3_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP4;
+ case DCN_1_0__SRCID__HUBP4_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP5;
+ case DCN_1_0__SRCID__HUBP5_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP6;
+ case DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE1;
+ case DCN_1_0__SRCID__OTG1_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE2;
+ case DCN_1_0__SRCID__OTG2_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE3;
+ case DCN_1_0__SRCID__OTG3_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE4;
+ case DCN_1_0__SRCID__OTG4_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE5;
+ case DCN_1_0__SRCID__OTG5_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE6;
+ case DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT:
+ return DC_IRQ_SOURCE_DMCUB_OUTBOX;
+ case DCN_1_0__SRCID__DC_HPD1_INT:
+ /* generic src_id for all HPD and HPDRX interrupts */
+ switch (ext_id) {
+ case DCN_1_0__CTXID__DC_HPD1_INT:
+ return DC_IRQ_SOURCE_HPD1;
+ case DCN_1_0__CTXID__DC_HPD2_INT:
+ return DC_IRQ_SOURCE_HPD2;
+ case DCN_1_0__CTXID__DC_HPD3_INT:
+ return DC_IRQ_SOURCE_HPD3;
+ case DCN_1_0__CTXID__DC_HPD4_INT:
+ return DC_IRQ_SOURCE_HPD4;
+ case DCN_1_0__CTXID__DC_HPD5_INT:
+ return DC_IRQ_SOURCE_HPD5;
+ case DCN_1_0__CTXID__DC_HPD6_INT:
+ return DC_IRQ_SOURCE_HPD6;
+ case DCN_1_0__CTXID__DC_HPD1_RX_INT:
+ return DC_IRQ_SOURCE_HPD1RX;
+ case DCN_1_0__CTXID__DC_HPD2_RX_INT:
+ return DC_IRQ_SOURCE_HPD2RX;
+ case DCN_1_0__CTXID__DC_HPD3_RX_INT:
+ return DC_IRQ_SOURCE_HPD3RX;
+ case DCN_1_0__CTXID__DC_HPD4_RX_INT:
+ return DC_IRQ_SOURCE_HPD4RX;
+ case DCN_1_0__CTXID__DC_HPD5_RX_INT:
+ return DC_IRQ_SOURCE_HPD5RX;
+ case DCN_1_0__CTXID__DC_HPD6_RX_INT:
+ return DC_IRQ_SOURCE_HPD6RX;
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+ break;
+
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+}
+
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
+ .set = NULL,
+ .ack = hpd0_ack
+};
+
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+#undef BASE_INNER
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+
+/* compile time expand base address. */
+#define BASE(seg) \
+ BASE_INNER(seg)
+
+#define SRI(reg_name, block, id)\
+ BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_DMUB(reg_name)\
+ BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define IRQ_REG_ENTRY(base, block, reg_num, reg1, mask1, reg2, mask2)\
+ REG_STRUCT[base + reg_num].enable_reg = SRI(reg1, block, reg_num),\
+ REG_STRUCT[base + reg_num].enable_mask = \
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base + reg_num].enable_value[0] = \
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base + reg_num].enable_value[1] = \
+ ~block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK, \
+ REG_STRUCT[base + reg_num].ack_reg = SRI(reg2, block, reg_num),\
+ REG_STRUCT[base + reg_num].ack_mask = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK,\
+ REG_STRUCT[base + reg_num].ack_value = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK \
+
+#define IRQ_REG_ENTRY_DMUB(base, reg1, mask1, reg2, mask2)\
+ REG_STRUCT[base].enable_reg = SRI_DMUB(reg1),\
+ REG_STRUCT[base].enable_mask = \
+ reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base].enable_value[0] = \
+ reg1 ## __ ## mask1 ## _MASK,\
+ REG_STRUCT[base].enable_value[1] = \
+ ~reg1 ## __ ## mask1 ## _MASK, \
+ REG_STRUCT[base].ack_reg = SRI_DMUB(reg2),\
+ REG_STRUCT[base].ack_mask = \
+ reg2 ## __ ## mask2 ## _MASK,\
+ REG_STRUCT[base].ack_value = \
+ reg2 ## __ ## mask2 ## _MASK \
+
+#define hpd_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_HPD1, HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1 + reg_num].funcs = &hpd_irq_info_funcs;\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1 + reg_num].status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num);\
+
+#define hpd_rx_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_HPD1RX, HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1RX + reg_num].status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num);\
+ REG_STRUCT[DC_IRQ_SOURCE_HPD1RX + reg_num].funcs = &hpd_rx_irq_info_funcs;\
+
+#define pflip_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_PFLIP1, HUBPREQ, reg_num,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_PFLIP1 + reg_num].funcs = &pflip_irq_info_funcs\
+
+/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
+ * of DCE's DC_IRQ_SOURCE_VUPDATEx.
+ */
+#define vupdate_no_lock_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_VUPDATE1, OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_VUPDATE1 + reg_num].funcs = &vupdate_no_lock_irq_info_funcs\
+
+#define vblank_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_VBLANK1, OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_VBLANK1 + reg_num].funcs = &vblank_irq_info_funcs\
+
+#define vline0_int_entry(reg_num)\
+ IRQ_REG_ENTRY(DC_IRQ_SOURCE_DC1_VLINE0, OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\
+ REG_STRUCT[DC_IRQ_SOURCE_DC1_VLINE0 + reg_num].funcs = &vline0_irq_info_funcs\
+
+#define dmub_outbox_int_entry()\
+ IRQ_REG_ENTRY_DMUB(DC_IRQ_SOURCE_DMCUB_OUTBOX, \
+ DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX1_READY_INT_EN,\
+ DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX1_READY_INT_ACK),\
+ REG_STRUCT[DC_IRQ_SOURCE_DMCUB_OUTBOX].funcs = &outbox_irq_info_funcs
+
+#define dummy_irq_entry(irqno) \
+ REG_STRUCT[irqno].funcs = &dummy_irq_info_funcs\
+
+#define i2c_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_I2C_DDC ## reg_num)
+
+#define dp_sink_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_DPSINK ## reg_num)
+
+#define gpio_pad_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_GPIOPAD ## reg_num)
+
+#define dc_underflow_int_entry(reg_num) \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW)
+
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
+ .set = dal_irq_service_dummy_set,
+ .ack = dal_irq_service_dummy_ack
+};
+
+#define dcn36_irq_init_part_1() {\
+ dummy_irq_entry(DC_IRQ_SOURCE_INVALID); \
+ hpd_int_entry(0); \
+ hpd_int_entry(1); \
+ hpd_int_entry(2); \
+ hpd_int_entry(3); \
+ hpd_int_entry(4); \
+ hpd_rx_int_entry(0); \
+ hpd_rx_int_entry(1); \
+ hpd_rx_int_entry(2); \
+ hpd_rx_int_entry(3); \
+ hpd_rx_int_entry(4); \
+ i2c_int_entry(1); \
+ i2c_int_entry(2); \
+ i2c_int_entry(3); \
+ i2c_int_entry(4); \
+ i2c_int_entry(5); \
+ i2c_int_entry(6); \
+ dp_sink_int_entry(1); \
+ dp_sink_int_entry(2); \
+ dp_sink_int_entry(3); \
+ dp_sink_int_entry(4); \
+ dp_sink_int_entry(5); \
+ dp_sink_int_entry(6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_TIMER); \
+ pflip_int_entry(0); \
+ pflip_int_entry(1); \
+ pflip_int_entry(2); \
+ pflip_int_entry(3); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP5); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_PFLIP_UNDERLAY0); \
+ gpio_pad_int_entry(0); \
+ gpio_pad_int_entry(1); \
+ gpio_pad_int_entry(2); \
+ gpio_pad_int_entry(3); \
+ gpio_pad_int_entry(4); \
+ gpio_pad_int_entry(5); \
+ gpio_pad_int_entry(6); \
+ gpio_pad_int_entry(7); \
+ gpio_pad_int_entry(8); \
+ gpio_pad_int_entry(9); \
+ gpio_pad_int_entry(10); \
+ gpio_pad_int_entry(11); \
+ gpio_pad_int_entry(12); \
+ gpio_pad_int_entry(13); \
+ gpio_pad_int_entry(14); \
+ gpio_pad_int_entry(15); \
+ gpio_pad_int_entry(16); \
+ gpio_pad_int_entry(17); \
+ gpio_pad_int_entry(18); \
+ gpio_pad_int_entry(19); \
+ gpio_pad_int_entry(20); \
+ gpio_pad_int_entry(21); \
+ gpio_pad_int_entry(22); \
+ gpio_pad_int_entry(23); \
+ gpio_pad_int_entry(24); \
+ gpio_pad_int_entry(25); \
+ gpio_pad_int_entry(26); \
+ gpio_pad_int_entry(27); \
+ gpio_pad_int_entry(28); \
+ gpio_pad_int_entry(29); \
+ gpio_pad_int_entry(30); \
+ dc_underflow_int_entry(1); \
+ dc_underflow_int_entry(2); \
+ dc_underflow_int_entry(3); \
+ dc_underflow_int_entry(4); \
+ dc_underflow_int_entry(5); \
+ dc_underflow_int_entry(6); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DMCU_SCP); \
+ dummy_irq_entry(DC_IRQ_SOURCE_VBIOS_SW); \
+}
+
+#define dcn36_irq_init_part_2() {\
+ vupdate_no_lock_int_entry(0); \
+ vupdate_no_lock_int_entry(1); \
+ vupdate_no_lock_int_entry(2); \
+ vupdate_no_lock_int_entry(3); \
+ vblank_int_entry(0); \
+ vblank_int_entry(1); \
+ vblank_int_entry(2); \
+ vblank_int_entry(3); \
+ vline0_int_entry(0); \
+ vline0_int_entry(1); \
+ vline0_int_entry(2); \
+ vline0_int_entry(3); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC5_VLINE1); \
+ dummy_irq_entry(DC_IRQ_SOURCE_DC6_VLINE1); \
+ dmub_outbox_int_entry(); \
+}
+
+#define dcn36_irq_init() {\
+ dcn36_irq_init_part_1(); \
+ dcn36_irq_init_part_2(); \
+}
+
+static struct irq_source_info irq_source_info_dcn36[DAL_IRQ_SOURCES_NUMBER] = {0};
+
+static struct irq_service_funcs irq_service_funcs_dcn36 = {
+ .to_dal_irq_source = to_dal_irq_source_dcn36
+};
+
+static void dcn36_irq_construct(
+ struct irq_service *irq_service,
+ struct irq_service_init_data *init_data)
+{
+ struct dc_context *ctx = init_data->ctx;
+
+#define REG_STRUCT irq_source_info_dcn36
+ dcn36_irq_init();
+
+ dal_irq_service_construct(irq_service, init_data);
+
+ irq_service->info = irq_source_info_dcn36;
+ irq_service->funcs = &irq_service_funcs_dcn36;
+}
+
+struct irq_service *dal_irq_service_dcn36_create(
+ struct irq_service_init_data *init_data)
+{
+ struct irq_service *irq_service = kzalloc(sizeof(*irq_service),
+ GFP_KERNEL);
+
+ if (!irq_service)
+ return NULL;
+
+ dcn36_irq_construct(irq_service, init_data);
+ return irq_service;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.h b/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.h
new file mode 100644
index 000000000000..21ff95f6562d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2025 Advanced Micro Devices, Inc. */
+
+#ifndef __DAL_IRQ_SERVICE_DCN36_H__
+#define __DAL_IRQ_SERVICE_DCN36_H__
+
+#include "../irq_service.h"
+
+struct irq_service *dal_irq_service_dcn36_create(
+ struct irq_service_init_data *init_data);
+
+#endif /* __DAL_IRQ_SERVICE_DCN36_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c
new file mode 100644
index 000000000000..fd9bb1950c20
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dm_services.h"
+#include "include/logger_interface.h"
+#include "../dce110/irq_service_dce110.h"
+
+#include "dcn/dcn_4_1_0_offset.h"
+#include "dcn/dcn_4_1_0_sh_mask.h"
+
+#include "irq_service_dcn401.h"
+
+#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
+
+#define DCN_BASE__INST0_SEG2 0x000034C0
+
+static enum dc_irq_source to_dal_irq_source_dcn401(
+ struct irq_service *irq_service,
+ uint32_t src_id,
+ uint32_t ext_id)
+{
+ switch (src_id) {
+ case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK1;
+ case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK2;
+ case DCN_1_0__SRCID__DC_D3_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK3;
+ case DCN_1_0__SRCID__DC_D4_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK4;
+ case DCN_1_0__SRCID__DC_D5_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK5;
+ case DCN_1_0__SRCID__DC_D6_OTG_VSTARTUP:
+ return DC_IRQ_SOURCE_VBLANK6;
+ case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC1_VLINE0;
+ case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC2_VLINE0;
+ case DCN_1_0__SRCID__OTG3_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC3_VLINE0;
+ case DCN_1_0__SRCID__OTG4_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC4_VLINE0;
+ case DCN_1_0__SRCID__OTG5_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC5_VLINE0;
+ case DCN_1_0__SRCID__OTG6_VERTICAL_INTERRUPT0_CONTROL:
+ return DC_IRQ_SOURCE_DC6_VLINE0;
+ case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP1;
+ case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP2;
+ case DCN_1_0__SRCID__HUBP2_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP3;
+ case DCN_1_0__SRCID__HUBP3_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP4;
+ case DCN_1_0__SRCID__HUBP4_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP5;
+ case DCN_1_0__SRCID__HUBP5_FLIP_INTERRUPT:
+ return DC_IRQ_SOURCE_PFLIP6;
+ case DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE1;
+ case DCN_1_0__SRCID__OTG1_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE2;
+ case DCN_1_0__SRCID__OTG2_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE3;
+ case DCN_1_0__SRCID__OTG3_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE4;
+ case DCN_1_0__SRCID__OTG4_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE5;
+ case DCN_1_0__SRCID__OTG5_IHC_V_UPDATE_NO_LOCK_INTERRUPT:
+ return DC_IRQ_SOURCE_VUPDATE6;
+ case DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT:
+ return DC_IRQ_SOURCE_DMCUB_OUTBOX;
+
+ case DCN_1_0__SRCID__DC_HPD1_INT:
+ /* generic src_id for all HPD and HPDRX interrupts */
+ switch (ext_id) {
+ case DCN_1_0__CTXID__DC_HPD1_INT:
+ return DC_IRQ_SOURCE_HPD1;
+ case DCN_1_0__CTXID__DC_HPD2_INT:
+ return DC_IRQ_SOURCE_HPD2;
+ case DCN_1_0__CTXID__DC_HPD3_INT:
+ return DC_IRQ_SOURCE_HPD3;
+ case DCN_1_0__CTXID__DC_HPD4_INT:
+ return DC_IRQ_SOURCE_HPD4;
+ case DCN_1_0__CTXID__DC_HPD5_INT:
+ return DC_IRQ_SOURCE_HPD5;
+ case DCN_1_0__CTXID__DC_HPD6_INT:
+ return DC_IRQ_SOURCE_HPD6;
+ case DCN_1_0__CTXID__DC_HPD1_RX_INT:
+ return DC_IRQ_SOURCE_HPD1RX;
+ case DCN_1_0__CTXID__DC_HPD2_RX_INT:
+ return DC_IRQ_SOURCE_HPD2RX;
+ case DCN_1_0__CTXID__DC_HPD3_RX_INT:
+ return DC_IRQ_SOURCE_HPD3RX;
+ case DCN_1_0__CTXID__DC_HPD4_RX_INT:
+ return DC_IRQ_SOURCE_HPD4RX;
+ case DCN_1_0__CTXID__DC_HPD5_RX_INT:
+ return DC_IRQ_SOURCE_HPD5RX;
+ case DCN_1_0__CTXID__DC_HPD6_RX_INT:
+ return DC_IRQ_SOURCE_HPD6RX;
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+ break;
+
+ default:
+ return DC_IRQ_SOURCE_INVALID;
+ }
+}
+
+static struct irq_source_info_funcs hpd_irq_info_funcs = {
+ .set = NULL,
+ .ack = hpd0_ack
+};
+
+static struct irq_source_info_funcs hpd_rx_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs pflip_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vblank_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs outbox_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vline0_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vline1_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+static struct irq_source_info_funcs vline2_irq_info_funcs = {
+ .set = NULL,
+ .ack = NULL
+};
+
+#undef BASE_INNER
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+/* compile time expand base address. */
+#define BASE(seg) \
+ BASE_INNER(seg)
+
+#define SRI(reg_name, block, id)\
+ BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_DMUB(reg_name)\
+ BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define IRQ_REG_ENTRY(block, reg_num, reg1, mask1, reg2, mask2)\
+ .enable_reg = SRI(reg1, block, reg_num),\
+ .enable_mask = \
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ .enable_value = {\
+ block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK,\
+ ~block ## reg_num ## _ ## reg1 ## __ ## mask1 ## _MASK \
+ },\
+ .ack_reg = SRI(reg2, block, reg_num),\
+ .ack_mask = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK,\
+ .ack_value = \
+ block ## reg_num ## _ ## reg2 ## __ ## mask2 ## _MASK \
+
+#define IRQ_REG_ENTRY_DMUB(reg1, mask1, reg2, mask2)\
+ .enable_reg = SRI_DMUB(reg1),\
+ .enable_mask = \
+ reg1 ## __ ## mask1 ## _MASK,\
+ .enable_value = {\
+ reg1 ## __ ## mask1 ## _MASK,\
+ ~reg1 ## __ ## mask1 ## _MASK \
+ },\
+ .ack_reg = SRI_DMUB(reg2),\
+ .ack_mask = \
+ reg2 ## __ ## mask2 ## _MASK,\
+ .ack_value = \
+ reg2 ## __ ## mask2 ## _MASK \
+
+#define hpd_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_HPD1 + reg_num] = {\
+ IRQ_REG_ENTRY(HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_INT_ACK),\
+ .status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num),\
+ .funcs = &hpd_irq_info_funcs\
+ }
+
+#define hpd_rx_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_HPD1RX + reg_num] = {\
+ IRQ_REG_ENTRY(HPD, reg_num,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_EN,\
+ DC_HPD_INT_CONTROL, DC_HPD_RX_INT_ACK),\
+ .status_reg = SRI(DC_HPD_INT_STATUS, HPD, reg_num),\
+ .funcs = &hpd_rx_irq_info_funcs\
+ }
+#define pflip_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_PFLIP1 + reg_num] = {\
+ IRQ_REG_ENTRY(HUBPREQ, reg_num,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK,\
+ DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_CLEAR),\
+ .funcs = &pflip_irq_info_funcs\
+ }
+
+#define vblank_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\
+ .funcs = &vblank_irq_info_funcs\
+ }
+/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
+ * of DCE's DC_IRQ_SOURCE_VUPDATEx.
+ */
+#define vupdate_no_lock_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\
+ OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\
+ .funcs = &vupdate_no_lock_irq_info_funcs\
+ }
+
+#define vline0_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\
+ .funcs = &vline0_irq_info_funcs\
+ }
+#define vline1_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\
+ .funcs = &vline1_irq_info_funcs\
+ }
+#define vline2_int_entry(reg_num)\
+ [DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\
+ IRQ_REG_ENTRY(OTG, reg_num,\
+ OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\
+ OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\
+ .funcs = &vline2_irq_info_funcs\
+ }
+#define dmub_outbox_int_entry()\
+ [DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\
+ IRQ_REG_ENTRY_DMUB(\
+ DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX1_READY_INT_EN,\
+ DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX1_READY_INT_ACK),\
+ .funcs = &outbox_irq_info_funcs\
+ }
+
+#define dummy_irq_entry() \
+ {\
+ .funcs = &dummy_irq_info_funcs\
+ }
+
+#define i2c_int_entry(reg_num) \
+ [DC_IRQ_SOURCE_I2C_DDC ## reg_num] = dummy_irq_entry()
+
+#define dp_sink_int_entry(reg_num) \
+ [DC_IRQ_SOURCE_DPSINK ## reg_num] = dummy_irq_entry()
+
+#define gpio_pad_int_entry(reg_num) \
+ [DC_IRQ_SOURCE_GPIOPAD ## reg_num] = dummy_irq_entry()
+
+#define dc_underflow_int_entry(reg_num) \
+ [DC_IRQ_SOURCE_DC ## reg_num ## UNDERFLOW] = dummy_irq_entry()
+
+static struct irq_source_info_funcs dummy_irq_info_funcs = {
+ .set = dal_irq_service_dummy_set,
+ .ack = dal_irq_service_dummy_ack
+};
+
+static const struct irq_source_info
+irq_source_info_dcn401[DAL_IRQ_SOURCES_NUMBER] = {
+ [DC_IRQ_SOURCE_INVALID] = dummy_irq_entry(),
+ hpd_int_entry(0),
+ hpd_int_entry(1),
+ hpd_int_entry(2),
+ hpd_int_entry(3),
+ hpd_rx_int_entry(0),
+ hpd_rx_int_entry(1),
+ hpd_rx_int_entry(2),
+ hpd_rx_int_entry(3),
+ i2c_int_entry(1),
+ i2c_int_entry(2),
+ i2c_int_entry(3),
+ i2c_int_entry(4),
+ i2c_int_entry(5),
+ i2c_int_entry(6),
+ dp_sink_int_entry(1),
+ dp_sink_int_entry(2),
+ dp_sink_int_entry(3),
+ dp_sink_int_entry(4),
+ dp_sink_int_entry(5),
+ dp_sink_int_entry(6),
+ [DC_IRQ_SOURCE_TIMER] = dummy_irq_entry(),
+ pflip_int_entry(0),
+ pflip_int_entry(1),
+ pflip_int_entry(2),
+ pflip_int_entry(3),
+ [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(),
+ [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(),
+ [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(),
+ gpio_pad_int_entry(0),
+ gpio_pad_int_entry(1),
+ gpio_pad_int_entry(2),
+ gpio_pad_int_entry(3),
+ gpio_pad_int_entry(4),
+ gpio_pad_int_entry(5),
+ gpio_pad_int_entry(6),
+ gpio_pad_int_entry(7),
+ gpio_pad_int_entry(8),
+ gpio_pad_int_entry(9),
+ gpio_pad_int_entry(10),
+ gpio_pad_int_entry(11),
+ gpio_pad_int_entry(12),
+ gpio_pad_int_entry(13),
+ gpio_pad_int_entry(14),
+ gpio_pad_int_entry(15),
+ gpio_pad_int_entry(16),
+ gpio_pad_int_entry(17),
+ gpio_pad_int_entry(18),
+ gpio_pad_int_entry(19),
+ gpio_pad_int_entry(20),
+ gpio_pad_int_entry(21),
+ gpio_pad_int_entry(22),
+ gpio_pad_int_entry(23),
+ gpio_pad_int_entry(24),
+ gpio_pad_int_entry(25),
+ gpio_pad_int_entry(26),
+ gpio_pad_int_entry(27),
+ gpio_pad_int_entry(28),
+ gpio_pad_int_entry(29),
+ gpio_pad_int_entry(30),
+ dc_underflow_int_entry(1),
+ dc_underflow_int_entry(2),
+ dc_underflow_int_entry(3),
+ dc_underflow_int_entry(4),
+ dc_underflow_int_entry(5),
+ dc_underflow_int_entry(6),
+ [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(),
+ [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(),
+ vblank_int_entry(0),
+ vblank_int_entry(1),
+ vblank_int_entry(2),
+ vblank_int_entry(3),
+ [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(),
+ [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(),
+ dmub_outbox_int_entry(),
+ vupdate_no_lock_int_entry(0),
+ vupdate_no_lock_int_entry(1),
+ vupdate_no_lock_int_entry(2),
+ vupdate_no_lock_int_entry(3),
+ vline0_int_entry(0),
+ vline0_int_entry(1),
+ vline0_int_entry(2),
+ vline0_int_entry(3),
+ vline1_int_entry(0),
+ vline1_int_entry(1),
+ vline1_int_entry(2),
+ vline1_int_entry(3),
+ vline2_int_entry(0),
+ vline2_int_entry(1),
+ vline2_int_entry(2),
+ vline2_int_entry(3),
+};
+
+static const struct irq_service_funcs irq_service_funcs_dcn401 = {
+ .to_dal_irq_source = to_dal_irq_source_dcn401
+};
+
+static void dcn401_irq_construct(
+ struct irq_service *irq_service,
+ struct irq_service_init_data *init_data)
+{
+ dal_irq_service_construct(irq_service, init_data);
+
+ irq_service->info = irq_source_info_dcn401;
+ irq_service->funcs = &irq_service_funcs_dcn401;
+}
+
+struct irq_service *dal_irq_service_dcn401_create(
+ struct irq_service_init_data *init_data)
+{
+ struct irq_service *irq_service = kzalloc(sizeof(*irq_service),
+ GFP_KERNEL);
+
+ if (!irq_service)
+ return NULL;
+
+ dcn401_irq_construct(irq_service, init_data);
+ return irq_service;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h
new file mode 100644
index 000000000000..221959aa6fc7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DAL_IRQ_SERVICE_DCN401_H__
+#define __DAL_IRQ_SERVICE_DCN401_H__
+
+#include "../irq_service.h"
+
+struct irq_service *dal_irq_service_dcn401_create(
+ struct irq_service_init_data *init_data);
+
+#endif /* __DAL_IRQ_SERVICE_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index d100edaedbbb..b595a11c5eaf 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -41,6 +41,16 @@
#include "reg_helper.h"
#include "irq_service.h"
+//HPD0_DC_HPD_INT_STATUS
+#define HPD0_DC_HPD_INT_STATUS__DC_HPD_SENSE_DELAYED_MASK 0x00000010L
+#define HPD0_DC_HPD_INT_CONTROL__DC_HPD_INT_POLARITY_MASK 0x00000100L
+#define HPD0_DC_HPD_INT_STATUS__DC_HPD_SENSE_DELAYED__SHIFT 0x4
+#define HPD0_DC_HPD_INT_CONTROL__DC_HPD_INT_POLARITY__SHIFT 0x8
+//HPD1_DC_HPD_INT_STATUS
+#define DC_HPD1_INT_STATUS__DC_HPD1_SENSE_DELAYED_MASK 0x10
+#define DC_HPD1_INT_STATUS__DC_HPD1_SENSE_DELAYED__SHIFT 0x4
+#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK 0x100
+#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY__SHIFT 0x8
#define CTX \
@@ -76,7 +86,7 @@ static const struct irq_source_info *find_irq_source_info(
struct irq_service *irq_service,
enum dc_irq_source source)
{
- if (source >= DAL_IRQ_SOURCES_NUMBER || source < DC_IRQ_SOURCE_INVALID)
+ if (source >= DAL_IRQ_SOURCES_NUMBER)
return NULL;
return &irq_service->info[source];
@@ -177,3 +187,57 @@ enum dc_irq_source dal_irq_service_to_irq_source(
src_id,
ext_id);
}
+
+bool hpd0_ack(
+ struct irq_service *irq_service,
+ const struct irq_source_info *info)
+{
+ uint32_t addr = info->status_reg;
+ uint32_t value = dm_read_reg(irq_service->ctx, addr);
+ uint32_t current_status =
+ get_reg_field_value(
+ value,
+ HPD0_DC_HPD_INT_STATUS,
+ DC_HPD_SENSE_DELAYED);
+
+ dal_irq_service_ack_generic(irq_service, info);
+
+ value = dm_read_reg(irq_service->ctx, info->enable_reg);
+
+ set_reg_field_value(
+ value,
+ current_status ? 0 : 1,
+ HPD0_DC_HPD_INT_CONTROL,
+ DC_HPD_INT_POLARITY);
+
+ dm_write_reg(irq_service->ctx, info->enable_reg, value);
+
+ return true;
+}
+
+bool hpd1_ack(
+ struct irq_service *irq_service,
+ const struct irq_source_info *info)
+{
+ uint32_t addr = info->status_reg;
+ uint32_t value = dm_read_reg(irq_service->ctx, addr);
+ uint32_t current_status =
+ get_reg_field_value(
+ value,
+ DC_HPD1_INT_STATUS,
+ DC_HPD1_SENSE_DELAYED);
+
+ dal_irq_service_ack_generic(irq_service, info);
+
+ value = dm_read_reg(irq_service->ctx, info->enable_reg);
+
+ set_reg_field_value(
+ value,
+ current_status ? 0 : 1,
+ DC_HPD1_INT_CONTROL,
+ DC_HPD1_INT_POLARITY);
+
+ dm_write_reg(irq_service->ctx, info->enable_reg, value);
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
index dbfcb096eedd..bbcef3d2fe33 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
@@ -53,7 +53,7 @@ struct irq_source_info {
uint32_t ack_mask;
uint32_t ack_value;
uint32_t status_reg;
- const struct irq_source_info_funcs *funcs;
+ struct irq_source_info_funcs *funcs;
};
struct irq_service_funcs {
@@ -82,4 +82,12 @@ void dal_irq_service_set_generic(
const struct irq_source_info *info,
bool enable);
+bool hpd0_ack(
+ struct irq_service *irq_service,
+ const struct irq_source_info *info);
+
+bool hpd1_ack(
+ struct irq_service *irq_service,
+ const struct irq_source_info *info);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h
index 530c2578db40..a2f7b933bebf 100644
--- a/drivers/gpu/drm/amd/display/dc/irq_types.h
+++ b/drivers/gpu/drm/amd/display/dc/irq_types.h
@@ -153,6 +153,28 @@ enum dc_irq_source {
DC_IRQ_SOURCE_DMCUB_OUTBOX,
DC_IRQ_SOURCE_DMCUB_OUTBOX0,
DC_IRQ_SOURCE_DMCUB_GENERAL_DATAOUT,
+
+ DC_IRQ_SOURCE_DPCX_TX_PHYA,
+ DC_IRQ_SOURCE_DPCX_TX_PHYB,
+ DC_IRQ_SOURCE_DPCX_TX_PHYC,
+ DC_IRQ_SOURCE_DPCX_TX_PHYD,
+ DC_IRQ_SOURCE_DPCX_TX_PHYE,
+ DC_IRQ_SOURCE_DPCX_TX_PHYF,
+
+ DC_IRQ_SOURCE_DC1_VLINE2,
+ DC_IRQ_SOURCE_DC2_VLINE2,
+ DC_IRQ_SOURCE_DC3_VLINE2,
+ DC_IRQ_SOURCE_DC4_VLINE2,
+ DC_IRQ_SOURCE_DC5_VLINE2,
+ DC_IRQ_SOURCE_DC6_VLINE2,
+
+ DC_IRQ_SOURCE_DCI2C_RR_DDC1,
+ DC_IRQ_SOURCE_DCI2C_RR_DDC2,
+ DC_IRQ_SOURCE_DCI2C_RR_DDC3,
+ DC_IRQ_SOURCE_DCI2C_RR_DDC4,
+ DC_IRQ_SOURCE_DCI2C_RR_DDC5,
+ DC_IRQ_SOURCE_DCI2C_RR_DDC6,
+
DAL_IRQ_SOURCES_NUMBER
};
@@ -162,6 +184,9 @@ enum irq_type
IRQ_TYPE_VUPDATE = DC_IRQ_SOURCE_VUPDATE1,
IRQ_TYPE_VBLANK = DC_IRQ_SOURCE_VBLANK1,
IRQ_TYPE_VLINE0 = DC_IRQ_SOURCE_DC1_VLINE0,
+ IRQ_TYPE_VLINE1 = DC_IRQ_SOURCE_DC1_VLINE1,
+ IRQ_TYPE_VLINE2 = DC_IRQ_SOURCE_DC1_VLINE2,
+ IRQ_TYPE_DCUNDERFLOW = DC_IRQ_SOURCE_DC1UNDERFLOW,
};
#define DAL_VALID_IRQ_SRC_NUM(src) \
@@ -178,7 +203,7 @@ enum dc_interrupt_context {
INTERRUPT_CONTEXT_NUMBER
};
-enum dc_interrupt_porlarity {
+enum dc_interrupt_polarity {
INTERRUPT_POLARITY_DEFAULT = 0,
INTERRUPT_POLARITY_LOW = INTERRUPT_POLARITY_DEFAULT,
INTERRUPT_POLARITY_HIGH,
@@ -199,12 +224,12 @@ struct dc_interrupt_params {
/* The polarity *change* which will trigger an interrupt.
* If 'requested_polarity == INTERRUPT_POLARITY_BOTH', then
* 'current_polarity' must be initialised. */
- enum dc_interrupt_porlarity requested_polarity;
+ enum dc_interrupt_polarity requested_polarity;
/* If 'requested_polarity == INTERRUPT_POLARITY_BOTH',
* 'current_polarity' should contain the current state, which means
* the interrupt will be triggered when state changes from what is,
* in 'current_polarity'. */
- enum dc_interrupt_porlarity current_polarity;
+ enum dc_interrupt_polarity current_polarity;
enum dc_irq_source irq_source;
enum dc_interrupt_context int_context;
};
diff --git a/drivers/gpu/drm/amd/display/dc/link/Makefile b/drivers/gpu/drm/amd/display/dc/link/Makefile
index 6af8a97d4a77..84c7af5fa589 100644
--- a/drivers/gpu/drm/amd/display/dc/link/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/link/Makefile
@@ -33,7 +33,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_LINK)
###############################################################################
# accessories
###############################################################################
-LINK_ACCESSORIES = link_dp_trace.o link_dp_cts.o link_fpga.o
+LINK_ACCESSORIES = link_dp_trace.o link_dp_cts.o
AMD_DAL_LINK_ACCESSORIES = $(addprefix $(AMDDALPATH)/dc/link/accessories/, \
$(LINK_ACCESSORIES))
@@ -61,4 +61,4 @@ link_edp_panel_control.o link_dp_irq_handler.o link_dp_dpia_bw.o
AMD_DAL_LINK_PROTOCOLS = $(addprefix $(AMDDALPATH)/dc/link/protocols/, \
$(LINK_PROTOCOLS))
-AMD_DISPLAY_FILES += $(AMD_DAL_LINK_PROTOCOLS) \ No newline at end of file
+AMD_DISPLAY_FILES += $(AMD_DAL_LINK_PROTOCOLS)
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
index fe4282771cd0..9e33bf937a69 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
@@ -34,6 +34,7 @@
#include "dm_helpers.h"
#include "dc_dmub_srv.h"
#include "dce/dmub_hw_lock_mgr.h"
+#include "clk_mgr.h"
#define DC_LOGGER \
link->ctx->logger
@@ -53,6 +54,7 @@ static enum dc_link_rate get_link_rate_from_test_link_rate(uint8_t test_rate)
return LINK_RATE_UHBR10;
case DP_TEST_LINK_RATE_UHBR20:
return LINK_RATE_UHBR20;
+ case DP_TEST_LINK_RATE_UHBR13_5_LEGACY:
case DP_TEST_LINK_RATE_UHBR13_5:
return LINK_RATE_UHBR13_5;
default:
@@ -60,30 +62,25 @@ static enum dc_link_rate get_link_rate_from_test_link_rate(uint8_t test_rate)
}
}
-static bool is_dp_phy_sqaure_pattern(enum dp_test_pattern test_pattern)
-{
- return (DP_TEST_PATTERN_SQUARE_BEGIN <= test_pattern &&
- test_pattern <= DP_TEST_PATTERN_SQUARE_END);
-}
-
-static bool is_dp_phy_pattern(enum dp_test_pattern test_pattern)
-{
- if ((DP_TEST_PATTERN_PHY_PATTERN_BEGIN <= test_pattern &&
- test_pattern <= DP_TEST_PATTERN_PHY_PATTERN_END) ||
- test_pattern == DP_TEST_PATTERN_VIDEO_MODE)
- return true;
- else
- return false;
-}
-
static void dp_retrain_link_dp_test(struct dc_link *link,
struct dc_link_settings *link_setting,
bool skip_video_pattern)
{
struct pipe_ctx *pipes[MAX_PIPES];
struct dc_state *state = link->dc->current_state;
+ struct dc_stream_update stream_update = { 0 };
+ bool dpms_off = false;
+ bool needs_divider_update = false;
+ bool was_hpo_acquired = resource_is_hpo_acquired(link->dc->current_state);
+ bool is_hpo_acquired;
uint8_t count;
int i;
+ struct audio_output audio_output[MAX_PIPES];
+ struct dc_stream_state *streams_on_link[MAX_PIPES];
+ int num_streams_on_link = 0;
+
+ needs_divider_update = (link->dc->link_srv->dp_get_encoding_format(link_setting) !=
+ link->dc->link_srv->dp_get_encoding_format((const struct dc_link_settings *) &link->cur_link_settings));
udelay(100);
@@ -96,10 +93,66 @@ static void dp_retrain_link_dp_test(struct dc_link *link,
link->dc,
state,
pipes[i]);
+
+ // Disable OTG and re-enable after updating clocks
+ pipes[i]->stream_res.tg->funcs->disable_crtc(pipes[i]->stream_res.tg);
+ }
+
+ if (needs_divider_update && link->dc->res_pool->funcs->update_dc_state_for_encoder_switch) {
+ link->dc->res_pool->funcs->update_dc_state_for_encoder_switch(link,
+ link_setting, count,
+ *pipes, &audio_output[0]);
+ for (i = 0; i < count; i++) {
+ pipes[i]->clock_source->funcs->program_pix_clk(
+ pipes[i]->clock_source,
+ &pipes[i]->stream_res.pix_clk_params,
+ link->dc->link_srv->dp_get_encoding_format(&pipes[i]->link_config.dp_link_settings),
+ &pipes[i]->pll_settings);
+
+ if (pipes[i]->stream_res.audio != NULL) {
+ const struct link_hwss *link_hwss = get_link_hwss(
+ link, &pipes[i]->link_res);
+
+ link_hwss->setup_audio_output(pipes[i], &audio_output[i],
+ pipes[i]->stream_res.audio->inst);
+
+ pipes[i]->stream_res.audio->funcs->az_configure(
+ pipes[i]->stream_res.audio,
+ pipes[i]->stream->signal,
+ &audio_output[i].crtc_info,
+ &pipes[i]->stream->audio_info,
+ &audio_output[i].dp_link_info);
+
+ if (link->dc->config.disable_hbr_audio_dp2 &&
+ pipes[i]->stream_res.audio->funcs->az_disable_hbr_audio &&
+ link->dc->link_srv->dp_is_128b_132b_signal(pipes[i]))
+ pipes[i]->stream_res.audio->funcs->az_disable_hbr_audio(pipes[i]->stream_res.audio);
+ }
+ }
+ }
+
+ // Toggle on HPO I/O if necessary
+ is_hpo_acquired = resource_is_hpo_acquired(state);
+ if (was_hpo_acquired != is_hpo_acquired && link->dc->hwss.setup_hpo_hw_control)
+ link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired);
+
+ for (i = 0; i < count; i++)
+ pipes[i]->stream_res.tg->funcs->enable_crtc(pipes[i]->stream_res.tg);
+
+ // Set DPMS on with stream update
+ // Cache all streams on current link since dc_update_planes_and_stream might kill current_state
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (state->streams[i] && state->streams[i]->link && state->streams[i]->link == link)
+ streams_on_link[num_streams_on_link++] = state->streams[i];
}
- for (i = count-1; i >= 0; i--)
- link_set_dpms_on(state, pipes[i]);
+ for (i = 0; i < num_streams_on_link; i++) {
+ if (streams_on_link[i] && streams_on_link[i]->link && streams_on_link[i]->link == link) {
+ stream_update.stream = streams_on_link[i];
+ stream_update.dpms_off = &dpms_off;
+ dc_update_planes_and_stream(state->clk_mgr->ctx->dc, NULL, 0, streams_on_link[i], &stream_update);
+ }
+ }
}
static void dp_test_send_link_training(struct dc_link *link)
@@ -119,6 +172,11 @@ static void dp_test_send_link_training(struct dc_link *link)
1);
link_settings.link_rate = get_link_rate_from_test_link_rate(test_rate);
+ if (link_settings.link_rate == LINK_RATE_UNKNOWN) {
+ DC_LOG_ERROR("%s: Invalid test link rate.", __func__);
+ ASSERT(0);
+ }
+
/* Set preferred link settings */
link->verified_link_cap.lane_count = link_settings.lane_count;
link->verified_link_cap.link_rate = link_settings.link_rate;
@@ -253,7 +311,7 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link)
link_training_settings.lttpr_mode = dp_decide_lttpr_mode(link, &link->cur_link_settings);
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
link_training_settings.lttpr_mode == LTTPR_MODE_TRANSPARENT)
dp_fixed_vs_pe_read_lane_adjust(
link,
@@ -355,7 +413,7 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link)
test_pattern_size);
}
- if (is_dp_phy_sqaure_pattern(test_pattern)) {
+ if (IS_DP_PHY_SQUARE_PATTERN(test_pattern)) {
test_pattern_size = 1; // Square pattern data is 1 byte (DP spec)
core_link_read_dpcd(
link,
@@ -429,49 +487,13 @@ static void set_crtc_test_pattern(struct dc_link *link,
struct bit_depth_reduction_params params;
struct output_pixel_processor *opp = pipe_ctx->stream_res.opp;
struct pipe_ctx *odm_pipe;
- int odm_cnt = 1;
- int h_active = pipe_ctx->stream->timing.h_addressable +
- pipe_ctx->stream->timing.h_border_left +
- pipe_ctx->stream->timing.h_border_right;
- int v_active = pipe_ctx->stream->timing.v_addressable +
- pipe_ctx->stream->timing.v_border_bottom +
- pipe_ctx->stream->timing.v_border_top;
- int odm_slice_width, last_odm_slice_width, offset = 0;
+ struct test_pattern_params *tp_params;
memset(&params, 0, sizeof(params));
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
- odm_cnt++;
-
- odm_slice_width = h_active / odm_cnt;
- last_odm_slice_width = h_active - odm_slice_width * (odm_cnt - 1);
-
- switch (test_pattern) {
- case DP_TEST_PATTERN_COLOR_SQUARES:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_COLORSQUARES;
- break;
- case DP_TEST_PATTERN_COLOR_SQUARES_CEA:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA;
- break;
- case DP_TEST_PATTERN_VERTICAL_BARS:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_VERTICALBARS;
- break;
- case DP_TEST_PATTERN_HORIZONTAL_BARS:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS;
- break;
- case DP_TEST_PATTERN_COLOR_RAMP:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_COLORRAMP;
- break;
- default:
- controller_test_pattern =
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE;
- break;
- }
+ resource_build_test_pattern_params(&link->dc->current_state->res_ctx,
+ pipe_ctx);
+ controller_test_pattern = pipe_ctx->stream_res.test_pattern_params.test_pattern;
switch (test_pattern) {
case DP_TEST_PATTERN_COLOR_SQUARES:
@@ -490,51 +512,29 @@ static void set_crtc_test_pattern(struct dc_link *link,
enum controller_dp_color_space controller_color_space;
struct output_pixel_processor *odm_opp;
- switch (test_pattern_color_space) {
- case DP_TEST_PATTERN_COLOR_SPACE_RGB:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_RGB;
- break;
- case DP_TEST_PATTERN_COLOR_SPACE_YCBCR601:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR601;
- break;
- case DP_TEST_PATTERN_COLOR_SPACE_YCBCR709:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_YCBCR709;
- break;
- case DP_TEST_PATTERN_COLOR_SPACE_UNDEFINED:
- default:
- controller_color_space = CONTROLLER_DP_COLOR_SPACE_UDEFINED;
+ controller_color_space = pipe_ctx->stream_res.test_pattern_params.color_space;
+
+ if (controller_color_space == CONTROLLER_DP_COLOR_SPACE_UDEFINED) {
DC_LOG_ERROR("%s: Color space must be defined for test pattern", __func__);
ASSERT(0);
- break;
}
odm_pipe = pipe_ctx;
- while (odm_pipe->next_odm_pipe) {
+ while (odm_pipe) {
+ tp_params = &odm_pipe->stream_res.test_pattern_params;
odm_opp = odm_pipe->stream_res.opp;
odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
link->dc->hwss.set_disp_pattern_generator(link->dc,
odm_pipe,
- controller_test_pattern,
- controller_color_space,
- color_depth,
+ tp_params->test_pattern,
+ tp_params->color_space,
+ tp_params->color_depth,
NULL,
- odm_slice_width,
- v_active,
- offset);
- offset += odm_slice_width;
+ tp_params->width,
+ tp_params->height,
+ tp_params->offset);
odm_pipe = odm_pipe->next_odm_pipe;
}
- odm_opp = odm_pipe->stream_res.opp;
- odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
- link->dc->hwss.set_disp_pattern_generator(link->dc,
- odm_pipe,
- controller_test_pattern,
- controller_color_space,
- color_depth,
- NULL,
- last_odm_slice_width,
- v_active,
- offset);
}
}
break;
@@ -552,32 +552,21 @@ static void set_crtc_test_pattern(struct dc_link *link,
struct output_pixel_processor *odm_opp;
odm_pipe = pipe_ctx;
- while (odm_pipe->next_odm_pipe) {
+ while (odm_pipe) {
+ tp_params = &odm_pipe->stream_res.test_pattern_params;
odm_opp = odm_pipe->stream_res.opp;
odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
link->dc->hwss.set_disp_pattern_generator(link->dc,
odm_pipe,
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- color_depth,
+ tp_params->test_pattern,
+ tp_params->color_space,
+ tp_params->color_depth,
NULL,
- odm_slice_width,
- v_active,
- offset);
- offset += odm_slice_width;
+ tp_params->width,
+ tp_params->height,
+ tp_params->offset);
odm_pipe = odm_pipe->next_odm_pipe;
}
- odm_opp = odm_pipe->stream_res.opp;
- odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
- link->dc->hwss.set_disp_pattern_generator(link->dc,
- odm_pipe,
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- color_depth,
- NULL,
- last_odm_slice_width,
- v_active,
- offset);
}
}
break;
@@ -661,6 +650,7 @@ bool dp_set_test_pattern(
const unsigned char *p_custom_pattern,
unsigned int cust_pattern_size)
{
+ const struct link_hwss *link_hwss;
struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx;
struct pipe_ctx *pipe_ctx = NULL;
unsigned int lane;
@@ -685,6 +675,8 @@ bool dp_set_test_pattern(
if (pipe_ctx == NULL)
return false;
+ link->pending_test_pattern = test_pattern;
+
/* Reset CRTC Test Pattern if it is currently running and request is VideoMode */
if (link->test_pattern_enabled && test_pattern ==
DP_TEST_PATTERN_VIDEO_MODE) {
@@ -705,15 +697,16 @@ bool dp_set_test_pattern(
/* Reset Test Pattern state */
link->test_pattern_enabled = false;
link->current_test_pattern = test_pattern;
+ link->pending_test_pattern = DP_TEST_PATTERN_UNSUPPORTED;
return true;
}
/* Check for PHY Test Patterns */
- if (is_dp_phy_pattern(test_pattern)) {
+ if (IS_DP_PHY_PATTERN(test_pattern)) {
/* Set DPCD Lane Settings before running test pattern */
if (p_link_settings != NULL) {
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
p_link_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
dp_fixed_vs_pe_set_retimer_lane_settings(
link,
@@ -743,6 +736,7 @@ bool dp_set_test_pattern(
/* Set Test Pattern state */
link->test_pattern_enabled = true;
link->current_test_pattern = test_pattern;
+ link->pending_test_pattern = DP_TEST_PATTERN_UNSUPPORTED;
if (p_link_settings != NULL)
dpcd_set_link_settings(link,
p_link_settings);
@@ -818,7 +812,7 @@ bool dp_set_test_pattern(
return false;
if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
- if (is_dp_phy_sqaure_pattern(test_pattern))
+ if (IS_DP_PHY_SQUARE_PATTERN(test_pattern))
core_link_write_dpcd(link,
DP_LINK_SQUARE_PATTERN,
p_custom_pattern,
@@ -878,8 +872,11 @@ bool dp_set_test_pattern(
break;
}
+ if (!pipe_ctx->stream)
+ return false;
+
if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) {
- if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) {
+ if (should_use_dmub_lock(pipe_ctx->stream->link)) {
union dmub_hw_lock_flags hw_locks = { 0 };
struct dmub_hw_lock_inst_flags inst_flags = { 0 };
@@ -897,17 +894,21 @@ bool dp_set_test_pattern(
pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg);
/* update MSA to requested color space */
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(pipe_ctx->stream_res.stream_enc,
- &pipe_ctx->stream->timing,
- color_space,
- pipe_ctx->stream->use_vsc_sdp_for_colorimetry,
- link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP);
+ link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
+ pipe_ctx->stream->output_color_space = color_space;
+ link_hwss->setup_stream_attribute(pipe_ctx);
if (pipe_ctx->stream->use_vsc_sdp_for_colorimetry) {
if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA)
pipe_ctx->stream->vsc_infopacket.sb[17] |= (1 << 7); // sb17 bit 7 Dynamic Range: 0 = VESA range, 1 = CTA range
else
pipe_ctx->stream->vsc_infopacket.sb[17] &= ~(1 << 7);
+
+ if (color_space == COLOR_SPACE_YCBCR601_LIMITED)
+ pipe_ctx->stream->vsc_infopacket.sb[16] &= 0xf0;
+ else if (color_space == COLOR_SPACE_YCBCR709_LIMITED)
+ pipe_ctx->stream->vsc_infopacket.sb[16] |= 1;
+
resource_build_info_frame(pipe_ctx);
link->dc->hwss.update_info_frame(pipe_ctx);
}
@@ -923,7 +924,7 @@ bool dp_set_test_pattern(
CRTC_STATE_VACTIVE);
if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_disable) {
- if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) {
+ if (should_use_dmub_lock(pipe_ctx->stream->link)) {
union dmub_hw_lock_flags hw_locks = { 0 };
struct dmub_hw_lock_inst_flags inst_flags = { 0 };
@@ -942,6 +943,7 @@ bool dp_set_test_pattern(
/* Set Test Pattern state */
link->test_pattern_enabled = true;
link->current_test_pattern = test_pattern;
+ link->pending_test_pattern = DP_TEST_PATTERN_UNSUPPORTED;
}
return true;
@@ -953,7 +955,7 @@ void dp_set_preferred_link_settings(struct dc *dc,
{
int i;
struct pipe_ctx *pipe;
- struct dc_stream_state *link_stream;
+ struct dc_stream_state *link_stream = 0;
struct dc_link_settings store_settings = *link_setting;
link->preferred_link_setting = store_settings;
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h
index eae23ea7f6ec..033650cdb811 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_DP_CTS_H__
#define __LINK_DP_CTS_H__
-#include "link.h"
+#include "link_service.h"
void dp_handle_automated_test(struct dc_link *link);
bool dp_set_test_pattern(
struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c
index fbcd8fb58ea8..c8c55f196f8d 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.c
@@ -24,7 +24,6 @@
*/
#include "link_dp_trace.h"
#include "link/protocols/link_dpcd.h"
-#include "link.h"
void dp_trace_init(struct dc_link *link)
{
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h
index ab437a0c9101..9ff4a6c46a2b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_DP_TRACE_H__
#define __LINK_DP_TRACE_H__
-#include "link.h"
+#include "link_service.h"
void dp_trace_init(struct dc_link *link);
void dp_trace_reset(struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c
deleted file mode 100644
index d3cc604eed67..000000000000
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_fpga.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2023 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#include "link_fpga.h"
-#include "link/link_dpms.h"
-#include "dm_helpers.h"
-#include "link_hwss.h"
-#include "dccg.h"
-#include "resource.h"
-
-#define DC_LOGGER_INIT(logger)
-
-void dp_fpga_hpo_enable_link_and_stream(struct dc_state *state, struct pipe_ctx *pipe_ctx)
-{
- struct dc *dc = pipe_ctx->stream->ctx->dc;
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct link_mst_stream_allocation_table proposed_table = {0};
- struct fixed31_32 avg_time_slots_per_mtp;
- uint8_t req_slot_count = 0;
- uint8_t vc_id = 1; /// VC ID always 1 for SST
- struct dc_link_settings link_settings = pipe_ctx->link_config.dp_link_settings;
- const struct link_hwss *link_hwss = get_link_hwss(stream->link, &pipe_ctx->link_res);
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
-
- stream->link->cur_link_settings = link_settings;
-
- if (link_hwss->ext.enable_dp_link_output)
- link_hwss->ext.enable_dp_link_output(stream->link, &pipe_ctx->link_res,
- stream->signal, pipe_ctx->clock_source->id,
- &link_settings);
-
- /* Enable DP_STREAM_ENC */
- dc->hwss.enable_stream(pipe_ctx);
-
- /* Set DPS PPS SDP (AKA "info frames") */
- if (pipe_ctx->stream->timing.flags.DSC) {
- link_set_dsc_pps_packet(pipe_ctx, true, true);
- }
-
- /* Allocate Payload */
- if ((stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) && (state->stream_count > 1)) {
- // MST case
- uint8_t i;
-
- proposed_table.stream_count = state->stream_count;
- for (i = 0; i < state->stream_count; i++) {
- avg_time_slots_per_mtp = link_calculate_sst_avg_time_slots_per_mtp(state->streams[i], state->streams[i]->link);
- req_slot_count = dc_fixpt_ceil(avg_time_slots_per_mtp);
- proposed_table.stream_allocations[i].slot_count = req_slot_count;
- proposed_table.stream_allocations[i].vcp_id = i+1;
- /* NOTE: This makes assumption that pipe_ctx index is same as stream index */
- proposed_table.stream_allocations[i].hpo_dp_stream_enc = state->res_ctx.pipe_ctx[i].stream_res.hpo_dp_stream_enc;
- }
- } else {
- // SST case
- avg_time_slots_per_mtp = link_calculate_sst_avg_time_slots_per_mtp(stream, stream->link);
- req_slot_count = dc_fixpt_ceil(avg_time_slots_per_mtp);
- proposed_table.stream_count = 1; /// Always 1 stream for SST
- proposed_table.stream_allocations[0].slot_count = req_slot_count;
- proposed_table.stream_allocations[0].vcp_id = vc_id;
- proposed_table.stream_allocations[0].hpo_dp_stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc;
- }
-
- link_hwss->ext.update_stream_allocation_table(stream->link,
- &pipe_ctx->link_res,
- &proposed_table);
-
- if (link_hwss->ext.set_throttled_vcp_size)
- link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp);
-
- dc->hwss.unblank_stream(pipe_ctx, &stream->link->cur_link_settings);
- dc->hwss.enable_audio_stream(pipe_ctx);
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c
index 1328a0ade342..892907991f91 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c
@@ -26,6 +26,16 @@
#include "core_types.h"
#include "link_enc_cfg.h"
+/**
+ * DOC: overview
+ *
+ * Display Input Output (DIO), is the display input and output unit in DCN. It
+ * includes output encoders to support different display output, like
+ * DisplayPort, HDMI, DVI interface, and others. It also includes the control
+ * and status channels for these interfaces.
+ */
+
+
void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx,
struct fixed31_32 throttled_vcp_size)
{
@@ -38,29 +48,56 @@ void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx,
void setup_dio_stream_encoder(struct pipe_ctx *pipe_ctx)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
+ if (!pipe_ctx->stream->ctx->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
+ if (!link_enc) {
+ ASSERT(link_enc);
+ return;
+ }
+
link_enc->funcs->connect_dig_be_to_fe(link_enc,
pipe_ctx->stream_res.stream_enc->id, true);
if (dc_is_dp_signal(pipe_ctx->stream->signal))
pipe_ctx->stream->ctx->dc->link_srv->dp_trace_source_sequence(pipe_ctx->stream->link,
DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_BE);
+ if (stream_enc->funcs->enable_stream)
+ stream_enc->funcs->enable_stream(stream_enc,
+ pipe_ctx->stream->signal, true);
if (stream_enc->funcs->map_stream_to_link)
stream_enc->funcs->map_stream_to_link(stream_enc,
stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A);
+ if (stream_enc->funcs->set_input_mode)
+ stream_enc->funcs->set_input_mode(stream_enc,
+ pipe_ctx->stream_res.pix_clk_params.dio_se_pix_per_cycle);
if (stream_enc->funcs->enable_fifo)
stream_enc->funcs->enable_fifo(stream_enc);
}
void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc;
- if (stream_enc && stream_enc->funcs->disable_fifo)
- stream_enc->funcs->disable_fifo(stream_enc);
+ if (!pipe_ctx->stream->ctx->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
+ if (!link_enc) {
+ ASSERT(link_enc);
+ return;
+ }
+
+ if (!stream_enc)
+ return;
+ if (stream_enc->funcs->disable_fifo)
+ stream_enc->funcs->disable_fifo(stream_enc);
+ if (stream_enc->funcs->set_input_mode)
+ stream_enc->funcs->set_input_mode(stream_enc, 0);
+ if (stream_enc->funcs->enable_stream)
+ stream_enc->funcs->enable_stream(stream_enc,
+ pipe_ctx->stream->signal, false);
link_enc->funcs->connect_dig_be_to_fe(
link_enc,
pipe_ctx->stream_res.stream_enc->id,
@@ -101,8 +138,7 @@ void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx)
stream_encoder->funcs->dvi_set_stream_attribute(
stream_encoder,
&stream->timing,
- (stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ?
- true : false);
+ stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK);
else if (dc_is_lvds_signal(stream->signal))
stream_encoder->funcs->lvds_set_stream_attribute(
stream_encoder,
@@ -119,7 +155,14 @@ void enable_dio_dp_link_output(struct dc_link *link,
enum clock_source_id clock_source,
const struct dc_link_settings *link_settings)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link_enc) {
+ ASSERT(link_enc);
+ return;
+ }
if (dc_is_dp_sst_signal(signal))
link_enc->funcs->enable_dp_output(
@@ -139,7 +182,14 @@ void disable_dio_link_output(struct dc_link *link,
const struct link_resource *link_res,
enum signal_type signal)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link_enc) {
+ ASSERT(link_enc);
+ return;
+ }
link_enc->funcs->disable_output(link_enc, signal);
link->dc->link_srv->dp_trace_source_sequence(link,
@@ -150,7 +200,14 @@ void set_dio_dp_link_test_pattern(struct dc_link *link,
const struct link_resource *link_res,
struct encoder_set_dp_phy_pattern_param *tp_params)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link_enc) {
+ ASSERT(link_enc);
+ return;
+ }
link_enc->funcs->dp_set_phy_pattern(link_enc, tp_params);
link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN);
@@ -161,7 +218,14 @@ void set_dio_dp_lane_settings(struct dc_link *link,
const struct dc_link_settings *link_settings,
const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link_enc) {
+ ASSERT(link_enc);
+ return;
+ }
link_enc->funcs->dp_set_lane_settings(link_enc, link_settings, lane_settings);
}
@@ -170,9 +234,15 @@ void update_dio_stream_allocation_table(struct dc_link *link,
const struct link_resource *link_res,
const struct link_mst_stream_allocation_table *table)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link_enc) {
+ ASSERT(link_enc);
+ return;
+ }
- ASSERT(link_enc);
link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
}
@@ -244,12 +314,34 @@ static const struct link_hwss dio_link_hwss = {
},
};
+/**
+ * can_use_dio_link_hwss - Check if the link_hwss is accessible
+ *
+ * @link: Reference a link struct containing one or more sinks and the
+ * connective status.
+ * @link_res: Mappable hardware resource used to enable a link.
+ *
+ * Returns:
+ * Return true if the link encoder is accessible from link.
+ */
bool can_use_dio_link_hwss(const struct dc_link *link,
const struct link_resource *link_res)
{
- return link->link_enc != NULL;
+ if (!link->dc->config.unify_link_enc_assignment)
+ return link->link_enc != NULL;
+ else
+ return link_res->dio_link_enc != NULL;
}
+/**
+ * get_dio_link_hwss - Return link_hwss reference
+ *
+ * This function behaves like a get function to return the link_hwss populated
+ * in the link_hwss_dio.c file.
+ *
+ * Returns:
+ * Return the reference to the filled struct of link_hwss.
+ */
const struct link_hwss *get_dio_link_hwss(void)
{
return &dio_link_hwss;
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
index f4633d3cf9b9..4a25210a344f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h
@@ -22,11 +22,12 @@
* Authors: AMD
*
*/
+
#ifndef __LINK_HWSS_DIO_H__
#define __LINK_HWSS_DIO_H__
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
const struct link_hwss *get_dio_link_hwss(void);
bool can_use_dio_link_hwss(const struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c
index b659baa23147..e1dff4e3f446 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c
@@ -80,21 +80,23 @@ static bool set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override(struct dc_
const uint8_t vendor_lttpr_write_data_pg0[4] = {0x1, 0x11, 0x0, 0x0};
const uint8_t vendor_lttpr_exit_manual_automation_0[4] = {0x1, 0x11, 0x0, 0x06};
+ if (!link->dpcd_caps.lttpr_caps.main_link_channel_coding.bits.DP_128b_132b_SUPPORTED)
+ return false;
if (tp_params == NULL)
return false;
- if (link->current_test_pattern >= DP_TEST_PATTERN_SQUARE_BEGIN &&
- link->current_test_pattern <= DP_TEST_PATTERN_SQUARE_END) {
+ if (IS_DP_PHY_SQUARE_PATTERN(link->current_test_pattern))
// Deprogram overrides from previous test pattern
dp_dio_fixed_vs_pe_retimer_exit_manual_automation(link);
- }
switch (tp_params->dp_phy_pattern) {
case DP_TEST_PATTERN_80BIT_CUSTOM:
if (tp_params->custom_pattern_size == 0 || memcmp(tp_params->custom_pattern,
pltpat_custom, tp_params->custom_pattern_size) != 0)
return false;
+ hw_tp_params.custom_pattern = tp_params->custom_pattern;
+ hw_tp_params.custom_pattern_size = tp_params->custom_pattern_size;
break;
case DP_TEST_PATTERN_D102:
break;
@@ -125,7 +127,10 @@ static void set_dio_fixed_vs_pe_retimer_dp_link_test_pattern(struct dc_link *lin
const struct link_resource *link_res,
struct encoder_set_dp_phy_pattern_param *tp_params)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
if (!set_dio_fixed_vs_pe_retimer_dp_link_test_pattern_override(
link, link_res, tp_params, get_dio_link_hwss())) {
@@ -185,13 +190,7 @@ static const struct link_hwss dio_fixed_vs_pe_retimer_link_hwss = {
bool requires_fixed_vs_pe_retimer_dio_link_hwss(const struct dc_link *link)
{
- if (!(link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN))
- return false;
-
- if (!link->dpcd_caps.lttpr_caps.main_link_channel_coding.bits.DP_128b_132b_SUPPORTED)
- return false;
-
- return true;
+ return ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN);
}
const struct link_hwss *get_dio_fixed_vs_pe_retimer_link_hwss(void)
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h
index 9ac08a332540..cf578a8662a4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h
@@ -25,7 +25,7 @@
#ifndef __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__
#define __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__
-#include "link.h"
+#include "link_service.h"
uint32_t dp_dio_fixed_vs_pe_retimer_get_lttpr_write_address(struct dc_link *link);
uint8_t dp_dio_fixed_vs_pe_retimer_lane_cfg_to_hw_cfg(struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
index 861f3cd5b356..81bf3c5e1fdf 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
@@ -27,18 +27,23 @@
#include "link_hwss_dio.h"
#include "link_enc_cfg.h"
+#define DC_LOGGER \
+ link->ctx->logger
#define DC_LOGGER_INIT(logger)
static void update_dpia_stream_allocation_table(struct dc_link *link,
const struct link_resource *link_res,
const struct link_mst_stream_allocation_table *table)
{
- struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+ struct link_encoder *link_enc = link_res->dio_link_enc;
static enum dc_status status;
uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF;
int i;
DC_LOGGER_INIT(link->ctx->logger);
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+
for (i = 0; i < table->stream_count; i++)
mst_alloc_slots += table->stream_allocations[i].slot_count;
@@ -48,23 +53,110 @@ static void update_dpia_stream_allocation_table(struct dc_link *link,
DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n",
status, mst_alloc_slots, prev_mst_slots_in_use);
- ASSERT(link_enc);
- link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
+ if (link_enc)
+ link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
+}
+
+static void set_dio_dpia_link_test_pattern(struct dc_link *link,
+ const struct link_resource *link_res,
+ struct encoder_set_dp_phy_pattern_param *tp_params)
+{
+ if (tp_params->dp_phy_pattern != DP_TEST_PATTERN_VIDEO_MODE)
+ return;
+
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+
+ if (!link_enc)
+ return;
+
+ link_enc->funcs->dp_set_phy_pattern(link_enc, tp_params);
+ link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN);
+}
+
+static void set_dio_dpia_lane_settings(struct dc_link *link,
+ const struct link_resource *link_res,
+ const struct dc_link_settings *link_settings,
+ const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
+{
+}
+
+static void enable_dpia_link_output(struct dc_link *link,
+ const struct link_resource *link_res,
+ enum signal_type signal,
+ enum clock_source_id clock_source,
+ const struct dc_link_settings *link_settings)
+{
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+ DC_LOGGER_INIT(link->ctx->logger);
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+
+ if (link_enc != NULL) {
+ if (link->dc->config.enable_dpia_pre_training || link->dc->config.unify_link_enc_assignment) {
+ uint8_t fec_rdy = link->dc->link_srv->dp_should_enable_fec(link);
+ uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE;
+
+ if (link_enc->funcs->enable_dpia_output)
+ link_enc->funcs->enable_dpia_output(
+ link_enc,
+ link_settings,
+ link->ddc_hw_inst,
+ digmode,
+ fec_rdy);
+ else
+ DC_LOG_ERROR("%s: link encoder does not support enable_dpia_output\n", __func__);
+ } else
+ enable_dio_dp_link_output(link, link_res, signal, clock_source, link_settings);
+
+ }
+
+ link->dc->link_srv->dp_trace_source_sequence(link,
+ DPCD_SOURCE_SEQ_AFTER_ENABLE_LINK_PHY);
+}
+
+static void disable_dpia_link_output(struct dc_link *link,
+ const struct link_resource *link_res,
+ enum signal_type signal)
+{
+ struct link_encoder *link_enc = link_res->dio_link_enc;
+ DC_LOGGER_INIT(link->ctx->logger);
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+
+ if (link_enc != NULL) {
+ if (link->dc->config.enable_dpia_pre_training || link->dc->config.unify_link_enc_assignment) {
+ uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE;
+
+ if (link_enc->funcs->disable_dpia_output)
+ link_enc->funcs->disable_dpia_output(link_enc, link->ddc_hw_inst, digmode);
+ else
+ DC_LOG_ERROR("%s: link encoder does not support disable_dpia_output\n", __func__);
+ } else
+ link_enc->funcs->disable_output(link_enc, signal);
+ }
+
+ link->dc->link_srv->dp_trace_source_sequence(link,
+ DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
}
static const struct link_hwss dpia_link_hwss = {
.setup_stream_encoder = setup_dio_stream_encoder,
.reset_stream_encoder = reset_dio_stream_encoder,
.setup_stream_attribute = setup_dio_stream_attribute,
- .disable_link_output = disable_dio_link_output,
+ .disable_link_output = disable_dpia_link_output,
.setup_audio_output = setup_dio_audio_output,
.enable_audio_packet = enable_dio_audio_packet,
.disable_audio_packet = disable_dio_audio_packet,
.ext = {
.set_throttled_vcp_size = set_dio_throttled_vcp_size,
- .enable_dp_link_output = enable_dio_dp_link_output,
- .set_dp_link_test_pattern = set_dio_dp_link_test_pattern,
- .set_dp_lane_settings = set_dio_dp_lane_settings,
+ .enable_dp_link_output = enable_dpia_link_output,
+ .set_dp_link_test_pattern = set_dio_dpia_link_test_pattern,
+ .set_dp_lane_settings = set_dio_dpia_lane_settings,
.update_stream_allocation_table = update_dpia_stream_allocation_table,
},
};
@@ -72,8 +164,10 @@ static const struct link_hwss dpia_link_hwss = {
bool can_use_dpia_link_hwss(const struct dc_link *link,
const struct link_resource *link_res)
{
- return link->is_dig_mapping_flexible &&
- link->dc->res_pool->funcs->link_encs_assign;
+ if (!link->dc->config.unify_link_enc_assignment)
+ return link->is_dig_mapping_flexible && link->dc->res_pool->funcs->link_encs_assign;
+ else
+ return link->is_dig_mapping_flexible && link_res->dio_link_enc != NULL;
}
const struct link_hwss *get_dpia_link_hwss(void)
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h
index ad16ec5d9bb7..259e0f4775e1 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h
@@ -27,6 +27,9 @@
#include "link_hwss.h"
+#define DIG_SST_MODE 0
+#define DIG_MST_MODE 5
+
const struct link_hwss *get_dpia_link_hwss(void);
bool can_use_dpia_link_hwss(const struct dc_link *link,
const struct link_resource *link_res);
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c
index e1257404357b..cec68c5dba13 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c
@@ -28,6 +28,8 @@
#include "dccg.h"
#include "clk_mgr.h"
+#define DC_LOGGER link->ctx->logger
+
void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx,
struct fixed31_32 throttled_vcp_size)
{
@@ -108,6 +110,11 @@ void enable_hpo_dp_link_output(struct dc_link *link,
enum clock_source_id clock_source,
const struct dc_link_settings *link_settings)
{
+ if (!link_res->hpo_dp_link_enc) {
+ DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__);
+ return;
+ }
+
if (link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating)
link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating(
link->dc->res_pool->dccg,
@@ -124,6 +131,11 @@ void disable_hpo_dp_link_output(struct dc_link *link,
const struct link_resource *link_res,
enum signal_type signal)
{
+ if (!link_res->hpo_dp_link_enc) {
+ DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__);
+ return;
+ }
+
link_res->hpo_dp_link_enc->funcs->link_disable(link_res->hpo_dp_link_enc);
link_res->hpo_dp_link_enc->funcs->disable_link_phy(
link_res->hpo_dp_link_enc, signal);
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h
index 1d3ed8ca83b5..7c9005bc2587 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h
@@ -26,7 +26,7 @@
#define __LINK_HWSS_HPO_DP_H__
#include "link_hwss.h"
-#include "link.h"
+#include "link_service.h"
void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx,
struct fixed31_32 throttled_vcp_size);
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c
index b621b97711b6..55c5148de800 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c
@@ -74,13 +74,16 @@ static void dp_hpo_fixed_vs_pe_retimer_set_tx_ffe(struct dc_link *link,
static void dp_hpo_fixed_vs_pe_retimer_program_override_test_pattern(struct dc_link *link,
struct encoder_set_dp_phy_pattern_param *tp_params)
{
+ uint8_t clk_src = 0xC4;
+ uint8_t pattern = 0x4F; /* SQ128 */
+
const uint8_t vendor_lttpr_write_data_pg0[4] = {0x1, 0x11, 0x0, 0x0};
- const uint8_t vendor_lttpr_write_data_pg1[4] = {0x1, 0x50, 0x50, 0x0};
- const uint8_t vendor_lttpr_write_data_pg2[4] = {0x1, 0x51, 0x50, 0x0};
+ const uint8_t vendor_lttpr_write_data_pg1[4] = {0x1, 0x50, 0x50, clk_src};
+ const uint8_t vendor_lttpr_write_data_pg2[4] = {0x1, 0x51, 0x50, clk_src};
const uint8_t vendor_lttpr_write_data_pg3[4] = {0x1, 0x10, 0x58, 0x21};
const uint8_t vendor_lttpr_write_data_pg4[4] = {0x1, 0x10, 0x59, 0x21};
- const uint8_t vendor_lttpr_write_data_pg5[4] = {0x1, 0x1C, 0x58, 0x4F};
- const uint8_t vendor_lttpr_write_data_pg6[4] = {0x1, 0x1C, 0x59, 0x4F};
+ const uint8_t vendor_lttpr_write_data_pg5[4] = {0x1, 0x1C, 0x58, pattern};
+ const uint8_t vendor_lttpr_write_data_pg6[4] = {0x1, 0x1C, 0x59, pattern};
const uint8_t vendor_lttpr_write_data_pg7[4] = {0x1, 0x30, 0x51, 0x20};
const uint8_t vendor_lttpr_write_data_pg8[4] = {0x1, 0x30, 0x52, 0x20};
const uint8_t vendor_lttpr_write_data_pg9[4] = {0x1, 0x30, 0x54, 0x20};
@@ -123,18 +126,20 @@ static bool dp_hpo_fixed_vs_pe_retimer_set_override_test_pattern(struct dc_link
struct encoder_set_dp_phy_pattern_param hw_tp_params = { 0 };
const uint8_t vendor_lttpr_exit_manual_automation_0[4] = {0x1, 0x11, 0x0, 0x06};
+ if (!link->dpcd_caps.lttpr_caps.main_link_channel_coding.bits.DP_128b_132b_SUPPORTED)
+ return false;
+
if (tp_params == NULL)
return false;
- if (tp_params->dp_phy_pattern < DP_TEST_PATTERN_SQUARE_BEGIN ||
- tp_params->dp_phy_pattern > DP_TEST_PATTERN_SQUARE_END) {
+ if (!IS_DP_PHY_SQUARE_PATTERN(tp_params->dp_phy_pattern)) {
// Deprogram overrides from previously set square wave override
if (link->current_test_pattern == DP_TEST_PATTERN_80BIT_CUSTOM ||
link->current_test_pattern == DP_TEST_PATTERN_D102)
link->dc->link_srv->configure_fixed_vs_pe_retimer(link->ddc,
&vendor_lttpr_exit_manual_automation_0[0],
sizeof(vendor_lttpr_exit_manual_automation_0));
- else
+ else if (IS_DP_PHY_SQUARE_PATTERN(link->current_test_pattern))
dp_dio_fixed_vs_pe_retimer_exit_manual_automation(link);
return false;
@@ -148,8 +153,6 @@ static bool dp_hpo_fixed_vs_pe_retimer_set_override_test_pattern(struct dc_link
dp_hpo_fixed_vs_pe_retimer_program_override_test_pattern(link, tp_params);
- dp_hpo_fixed_vs_pe_retimer_set_tx_ffe(link, &link->cur_lane_setting[0]);
-
return true;
}
@@ -162,7 +165,12 @@ static void set_hpo_fixed_vs_pe_retimer_dp_link_test_pattern(struct dc_link *lin
link_res->hpo_dp_link_enc->funcs->set_link_test_pattern(
link_res->hpo_dp_link_enc, tp_params);
}
+
link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN);
+
+ // Give retimer extra time to lock before updating DP_TRAINING_PATTERN_SET to TPS1 or phy pattern
+ if (tp_params->dp_phy_pattern != DP_TEST_PATTERN_128b_132b_TPS2_TRAINING_MODE)
+ msleep(50);
}
static void set_hpo_fixed_vs_pe_retimer_dp_lane_settings(struct dc_link *link,
@@ -170,16 +178,18 @@ static void set_hpo_fixed_vs_pe_retimer_dp_lane_settings(struct dc_link *link,
const struct dc_link_settings *link_settings,
const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
{
- link_res->hpo_dp_link_enc->funcs->set_ffe(
- link_res->hpo_dp_link_enc,
- link_settings,
- lane_settings[0].FFE_PRESET.raw);
-
- // FFE is programmed when retimer is programmed for SQ128, but explicit
- // programming needed here as well in case FFE-only update is requested
- if (link->current_test_pattern >= DP_TEST_PATTERN_SQUARE_BEGIN &&
- link->current_test_pattern <= DP_TEST_PATTERN_SQUARE_END)
- dp_hpo_fixed_vs_pe_retimer_set_tx_ffe(link, &lane_settings[0]);
+ // Don't update our HW FFE when outputting phy test patterns
+ if (IS_DP_PHY_PATTERN(link->pending_test_pattern)) {
+ // Directly program FIXED_VS retimer FFE for SQ128 override
+ if (IS_DP_PHY_SQUARE_PATTERN(link->pending_test_pattern)) {
+ dp_hpo_fixed_vs_pe_retimer_set_tx_ffe(link, &lane_settings[0]);
+ }
+ } else {
+ link_res->hpo_dp_link_enc->funcs->set_ffe(
+ link_res->hpo_dp_link_enc,
+ link_settings,
+ lane_settings[0].FFE_PRESET.raw);
+ }
}
static void enable_hpo_fixed_vs_pe_retimer_dp_link_output(struct dc_link *link,
@@ -214,13 +224,7 @@ static const struct link_hwss hpo_fixed_vs_pe_retimer_dp_link_hwss = {
bool requires_fixed_vs_pe_retimer_hpo_link_hwss(const struct dc_link *link)
{
- if (!(link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN))
- return false;
-
- if (!link->dpcd_caps.lttpr_caps.main_link_channel_coding.bits.DP_128b_132b_SUPPORTED)
- return false;
-
- return true;
+ return requires_fixed_vs_pe_retimer_dio_link_hwss(link);
}
const struct link_hwss *get_hpo_fixed_vs_pe_retimer_dp_link_hwss(void)
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h
index 82301187bc7c..8bf36827ecfb 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h
@@ -25,7 +25,7 @@
#ifndef __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__
#define __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__
-#include "link.h"
+#include "link_service.h"
bool requires_fixed_vs_pe_retimer_hpo_link_hwss(const struct dc_link *link);
const struct link_hwss *get_hpo_fixed_vs_pe_retimer_dp_link_hwss(void);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index c9b6676eaf53..85303167a553 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -41,12 +41,18 @@
#include "protocols/link_dp_dpia.h"
#include "protocols/link_dp_phy.h"
#include "protocols/link_dp_training.h"
+#include "protocols/link_dp_dpia_bw.h"
#include "accessories/link_dp_trace.h"
#include "link_enc_cfg.h"
#include "dm_helpers.h"
#include "clk_mgr.h"
+ // Offset DPCD 050Eh == 0x5A
+#define MST_HUB_ID_0x5A 0x5A
+
+#define DC_LOGGER \
+ link->ctx->logger
#define DC_LOGGER_INIT(logger)
#define LINK_INFO(...) \
@@ -62,7 +68,7 @@
static const u8 DP_SINK_BRANCH_DEV_NAME_7580[] = "7580\x80u";
-static const uint8_t dp_hdmi_dongle_signature_str[] = "DP-HDMI ADAPTOR";
+static const u8 dp_hdmi_dongle_signature_str[] = "DP-HDMI ADAPTOR";
static enum ddc_transaction_type get_ddc_transaction_type(enum signal_type sink_signal)
{
@@ -322,6 +328,7 @@ static void query_dp_dual_mode_adaptor(
bool is_type2_dongle = false;
int retry_count = 2;
struct dp_hdmi_dongle_signature_data *dongle_signature;
+ struct dc_link *link = ddc->link;
/* Assume we have no valid DP passive dongle connected */
*dongle = DISPLAY_DONGLE_NONE;
@@ -512,8 +519,8 @@ static void query_hdcp_capability(enum signal_type signal, struct dc_link *link)
static void read_current_link_settings_on_detect(struct dc_link *link)
{
union lane_count_set lane_count_set = {0};
- uint8_t link_bw_set;
- uint8_t link_rate_set;
+ uint8_t link_bw_set = 0;
+ uint8_t link_rate_set = 0;
uint32_t read_dpcd_retry_cnt = 10;
enum dc_status status = DC_ERROR_UNEXPECTED;
int i;
@@ -586,8 +593,9 @@ static bool detect_dp(struct dc_link *link,
if (sink_caps->transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT;
- if (!detect_dp_sink_caps(link))
+ if (!detect_dp_sink_caps(link)) {
return false;
+ }
if (is_dp_branch_device(link))
/* DP SST branch */
@@ -604,6 +612,7 @@ static bool detect_dp(struct dc_link *link,
link->dpcd_caps.dongle_type = sink_caps->dongle_type;
link->dpcd_caps.is_dongle_type_one = sink_caps->is_dongle_type_one;
link->dpcd_caps.dpcd_rev.raw = 0;
+ link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.raw = 0;
}
return true;
@@ -647,7 +656,7 @@ static bool wait_for_entering_dp_alt_mode(struct dc_link *link)
return true;
is_in_alt_mode = link->link_enc->funcs->is_in_alt_mode(link->link_enc);
- DC_LOG_DC("DP Alt mode state on HPD: %d\n", is_in_alt_mode);
+ DC_LOG_DC("DP Alt mode state on HPD: %d Link=%d\n", is_in_alt_mode, link->link_index);
if (is_in_alt_mode)
return true;
@@ -688,6 +697,15 @@ static void apply_dpia_mst_dsc_always_on_wa(struct dc_link *link)
link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
!link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around)
link->wa_flags.dpia_mst_dsc_always_on = true;
+
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ link->type == dc_connection_mst_branch &&
+ link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
+ link->dpcd_caps.branch_vendor_specific_data[2] == MST_HUB_ID_0x5A &&
+ link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
+ !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around) {
+ link->wa_flags.dpia_mst_dsc_always_on = true;
+ }
}
static void revert_dpia_mst_dsc_always_on_wa(struct dc_link *link)
@@ -800,7 +818,10 @@ static bool should_verify_link_capability_destructively(struct dc_link *link,
{
bool destrictive = false;
struct dc_link_settings max_link_cap;
- bool is_link_enc_unavailable = link->link_enc &&
+ bool is_link_enc_unavailable = false;
+
+ if (!link->dc->config.unify_link_enc_assignment)
+ is_link_enc_unavailable = link->link_enc &&
link->dc->res_pool->funcs->link_encs_assign &&
!link_enc_cfg_is_link_enc_avail(
link->ctx->dc,
@@ -813,7 +834,8 @@ static bool should_verify_link_capability_destructively(struct dc_link *link,
if (link->dc->debug.skip_detection_link_training ||
dc_is_embedded_signal(link->local_sink->sink_signal) ||
- link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ !link->dc->config.enable_dpia_pre_training)) {
destrictive = false;
} else if (link_dp_get_encoding_format(&max_link_cap) ==
DP_8b_10b_ENCODING) {
@@ -859,7 +881,6 @@ static bool detect_link_and_local_sink(struct dc_link *link,
struct dc_sink *prev_sink = NULL;
struct dpcd_caps prev_dpcd_caps;
enum dc_connection_type new_connection_type = dc_connection_none;
- enum dc_connection_type pre_connection_type = link->type;
const uint32_t post_oui_delay = 30; // 30ms
DC_LOGGER_INIT(link->ctx->logger);
@@ -876,7 +897,7 @@ static bool detect_link_and_local_sink(struct dc_link *link,
(link->dpcd_sink_ext_caps.bits.oled == 1)) {
dpcd_set_source_specific_data(link);
msleep(post_oui_delay);
- set_cached_brightness_aux(link);
+ set_default_brightness_aux(link);
}
return true;
@@ -961,7 +982,6 @@ static bool detect_link_and_local_sink(struct dc_link *link,
}
if (!detect_dp(link, &sink_caps, reason)) {
- link->type = pre_connection_type;
if (prev_sink)
dc_sink_release(prev_sink);
@@ -988,6 +1008,13 @@ static bool detect_link_and_local_sink(struct dc_link *link,
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
link->reported_link_cap.link_rate > LINK_RATE_HIGH3)
link->reported_link_cap.link_rate = LINK_RATE_HIGH3;
+
+ if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling
+ && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc
+ && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support) {
+ if (link_dpia_enable_usb4_dp_bw_alloc_mode(link) == false)
+ link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc = false;
+ }
break;
}
@@ -1085,6 +1112,9 @@ static bool detect_link_and_local_sink(struct dc_link *link,
if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
link->ctx->dc->debug.hdmi20_disable = true;
+ if (sink->edid_caps.panel_patch.remove_sink_ext_caps)
+ link->dpcd_sink_ext_caps.raw = 0;
+
if (dc_is_hdmi_signal(link->connector_signal))
read_scdc_caps(link->ddc, link->local_sink);
@@ -1110,6 +1140,10 @@ static bool detect_link_and_local_sink(struct dc_link *link,
if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
!sink->edid_caps.edid_hdmi)
sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
+ else if (dc_is_dvi_signal(sink->sink_signal) &&
+ aud_support->hdmi_audio_native &&
+ sink->edid_caps.edid_hdmi)
+ sink->sink_signal = SIGNAL_TYPE_HDMI_TYPE_A;
if (link->local_sink && dc_is_dp_signal(sink_caps.signal))
dp_trace_init(link);
@@ -1163,6 +1197,11 @@ static bool detect_link_and_local_sink(struct dc_link *link,
dm_helpers_init_panel_settings(dc_ctx, &link->panel_config, sink);
// Override dc_panel_config if system has specific settings
dm_helpers_override_panel_settings(dc_ctx, &link->panel_config);
+
+ //sink only can use supported link rate table, we are foreced to enable it
+ if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN)
+ link->panel_config.ilr.optimize_edp_link_rate = true;
+ link->reported_link_cap.link_rate = get_max_edp_link_rate(link);
}
} else {
@@ -1269,8 +1308,7 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason)
link->dpcd_caps.is_mst_capable)
is_delegated_to_mst_top_mgr = discover_dp_mst_topology(link, reason);
- if (is_local_sink_detect_success &&
- pre_link_type == dc_connection_mst_branch &&
+ if (pre_link_type == dc_connection_mst_branch &&
link->type != dc_connection_mst_branch)
is_delegated_to_mst_top_mgr = link_reset_cur_dp_mst_topology(link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.h b/drivers/gpu/drm/amd/display/dc/link/link_detection.h
index 7da05078721e..1ab29476060b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.h
@@ -25,7 +25,7 @@
#ifndef __DC_LINK_DETECTION_H__
#define __DC_LINK_DETECTION_H__
-#include "link.h"
+#include "link_service.h"
bool link_detect(struct dc_link *link, enum dc_detect_reason reason);
bool link_detect_connection_type(struct dc_link *link,
enum dc_connection_type *type);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 79aef205598b..83419e1a9036 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -38,7 +38,6 @@
#include "link_dpms.h"
#include "link_hwss.h"
#include "link_validation.h"
-#include "accessories/link_fpga.h"
#include "accessories/link_dp_trace.h"
#include "protocols/link_dpcd.h"
#include "protocols/link_ddc.h"
@@ -56,7 +55,12 @@
#include "dccg.h"
#include "clk_mgr.h"
#include "atomfirmware.h"
-#define DC_LOGGER_INIT(logger)
+#include "vpg.h"
+
+#define DC_LOGGER \
+ dc_logger
+#define DC_LOGGER_INIT(logger) \
+ struct dal_logger *dc_logger = logger
#define LINK_INFO(...) \
DC_LOG_HW_HOTPLUG( \
@@ -65,7 +69,6 @@
#define RETIMER_REDRIVER_INFO(...) \
DC_LOG_RETIMER_REDRIVER( \
__VA_ARGS__)
-#include "dc/dcn30/dcn30_vpg.h"
#define MAX_MTP_SLOT_COUNT 64
#define LINK_TRAINING_ATTEMPTS 4
@@ -125,7 +128,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init)
if (link->ep_type == DISPLAY_ENDPOINT_PHY &&
link->link_enc->funcs->get_dig_frontend &&
link->link_enc->funcs->is_dig_enabled(link->link_enc)) {
- unsigned int fe = link->link_enc->funcs->get_dig_frontend(link->link_enc);
+ int fe = link->link_enc->funcs->get_dig_frontend(link->link_enc);
if (fe != ENGINE_ID_UNKNOWN)
for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
@@ -137,7 +140,8 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init)
}
}
- if ((!link->wa_flags.dp_keep_receiver_powered) || hw_init)
+ if (((!dc->is_switch_in_progress_dest) && ((!link->wa_flags.dp_keep_receiver_powered) || hw_init)) &&
+ (link->type != dc_connection_none))
dpcd_write_rx_power_ctrl(link, false);
}
}
@@ -145,6 +149,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init)
void link_set_all_streams_dpms_off_for_link(struct dc_link *link)
{
struct pipe_ctx *pipes[MAX_PIPES];
+ struct dc_stream_state *streams[MAX_PIPES];
struct dc_state *state = link->dc->current_state;
uint8_t count;
int i;
@@ -157,10 +162,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link)
link_get_master_pipes_with_dpms_on(link, state, &count, pipes);
+ /* The subsequent call to dc_commit_updates_for_stream for a full update
+ * will release the current state and swap to a new state. Releasing the
+ * current state results in the stream pointers in the pipe_ctx structs
+ * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream.
+ */
+ for (i = 0; i < count; i++)
+ streams[i] = pipes[i]->stream;
+
for (i = 0; i < count; i++) {
- stream_update.stream = pipes[i]->stream;
+ stream_update.stream = streams[i];
dc_commit_updates_for_stream(link->ctx->dc, NULL, 0,
- pipes[i]->stream, &stream_update,
+ streams[i], &stream_update,
state);
}
@@ -649,15 +662,15 @@ static void write_i2c_redriver_setting(
static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off)
{
struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp;
- struct link_encoder *link_enc = NULL;
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
struct cp_psp_stream_config config = {0};
enum dp_panel_mode panel_mode =
dp_get_panel_mode(pipe_ctx->stream->link);
if (cp_psp == NULL || cp_psp->funcs.update_stream_config == NULL)
return;
-
- link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
+ if (!pipe_ctx->stream->ctx->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link);
ASSERT(link_enc);
if (link_enc == NULL)
return;
@@ -723,7 +736,7 @@ static void set_avmute(struct pipe_ctx *pipe_ctx, bool enable)
static void enable_mst_on_sink(struct dc_link *link, bool enable)
{
- unsigned char mstmCntl;
+ unsigned char mstmCntl = 0;
core_link_read_dpcd(link, DP_MSTM_CTRL, &mstmCntl, 1);
if (enable)
@@ -769,15 +782,45 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
return result;
}
+static bool dp_set_hblank_reduction_on_rx(struct pipe_ctx *pipe_ctx)
+{
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ bool result = false;
+
+ if (dc_is_virtual_signal(stream->signal))
+ result = true;
+ else
+ result = dm_helpers_dp_write_hblank_reduction(dc->ctx, stream);
+ return result;
+}
+
+
/* The stream with these settings can be sent (unblanked) only after DSC was enabled on RX first,
* i.e. after dp_enable_dsc_on_rx() had been called
*/
void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
+ /* TODO: Move this to HWSS as this is hardware programming sequence not a
+ * link layer sequence
+ */
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
struct dc_stream_state *stream = pipe_ctx->stream;
struct pipe_ctx *odm_pipe;
int opp_cnt = 1;
+ struct dccg *dccg = dc->res_pool->dccg;
+ /* It has been found that when DSCCLK is lower than 16Mhz, we will get DCN
+ * register access hung. When DSCCLk is based on refclk, DSCCLk is always a
+ * fixed value higher than 16Mhz so the issue doesn't occur. When DSCCLK is
+ * generated by DTO, DSCCLK would be based on 1/3 dispclk. For small timings
+ * with DSC such as 480p60Hz, the dispclk could be low enough to trigger
+ * this problem. We are implementing a workaround here to keep using dscclk
+ * based on fixed value refclk when timing is smaller than 3x16Mhz (i.e
+ * 48Mhz) pixel clock to avoid hitting this problem.
+ */
+ bool should_use_dto_dscclk = (dccg->funcs->set_dto_dscclk != NULL) &&
+ stream->timing.pix_clk_100hz > 480000;
DC_LOGGER_INIT(dsc->ctx->logger);
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
@@ -785,11 +828,12 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
if (enable) {
struct dsc_config dsc_cfg;
- struct dsc_optc_config dsc_optc_cfg;
+ struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
/* Enable DSC hw block */
- dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
+ dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding +
+ stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
dsc_cfg.color_depth = stream->timing.display_color_depth;
@@ -798,11 +842,15 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0);
dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
+ if (should_use_dto_dscclk)
+ dccg->funcs->set_dto_dscclk(dccg, dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h);
dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg);
dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst);
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc;
+ if (should_use_dto_dscclk)
+ dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h);
odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg);
odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst);
}
@@ -815,10 +863,11 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
if (dc_is_dp_signal(stream->signal) && !dp_is_128b_132b_signal(pipe_ctx)) {
DC_LOG_DSC("Setting stream encoder DSC config for engine %d:", (int)pipe_ctx->stream_res.stream_enc->id);
dsc_optc_config_log(dsc, &dsc_optc_cfg);
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(pipe_ctx->stream_res.stream_enc,
- optc_dsc_mode,
- dsc_optc_cfg.bytes_per_pixel,
- dsc_optc_cfg.slice_width);
+ if (pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config)
+ pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(pipe_ctx->stream_res.stream_enc,
+ optc_dsc_mode,
+ dsc_optc_cfg.bytes_per_pixel,
+ dsc_optc_cfg.slice_width);
/* PPS SDP is set elsewhere because it has to be done after DIG FE is connected to DIG BE */
}
@@ -845,18 +894,43 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
NULL,
true);
else {
- pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(
- pipe_ctx->stream_res.stream_enc,
- OPTC_DSC_DISABLED, 0, 0);
+ if (pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config)
+ pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config(
+ pipe_ctx->stream_res.stream_enc,
+ OPTC_DSC_DISABLED, 0, 0);
pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet(
pipe_ctx->stream_res.stream_enc, false, NULL, true);
}
}
/* disable DSC block */
- pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc);
- for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
+ for (odm_pipe = pipe_ctx; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
+ odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc);
+ /*
+ * TODO - dsc_disconnect is a double buffered register.
+ * by the time we call dsc_disable, dsc may still remain
+ * connected to OPP. In this case OPTC will no longer
+ * get correct pixel data because DSCC is off. However
+ * we also can't wait for the disconnect pending
+ * complete, because this function can be called
+ * with/without OTG master lock acquired. When the lock
+ * is acquired we will never get pending complete until
+ * we release the lock later. So there is no easy way to
+ * solve this problem especially when the lock is
+ * acquired. DSC is a front end hw block it should be
+ * programmed as part of front end sequence, where the
+ * commit sequence without lock and update sequence
+ * with lock are completely separated. However because
+ * we are programming dsc as part of back end link
+ * programming sequence, we don't know if front end OPTC
+ * master lock is acquired. The back end should be
+ * agnostic to front end lock. DSC programming shouldn't
+ * belong to this sequence.
+ */
odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc);
+ if (dccg->funcs->set_ref_dscclk)
+ dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst);
+ }
}
}
@@ -873,11 +947,15 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
struct dc_stream_state *stream = pipe_ctx->stream;
- DC_LOGGER_INIT(dsc->ctx->logger);
- if (!pipe_ctx->stream->timing.flags.DSC || !dsc)
+ if (!pipe_ctx->stream->timing.flags.DSC)
return false;
+ if (!dsc)
+ return false;
+
+ DC_LOGGER_INIT(dsc->ctx->logger);
+
if (enable) {
struct dsc_config dsc_cfg;
uint8_t dsc_packed_pps[128];
@@ -1055,18 +1133,21 @@ static struct fixed31_32 get_pbn_from_bw_in_kbps(uint64_t kbps)
uint32_t denominator = 1;
/*
- * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006
+ * The 1.006 factor (margin 5300ppm + 300ppm ~ 0.6% as per spec) is not
+ * required when determining PBN/time slot utilization on the link between
+ * us and the branch, since that overhead is already accounted for in
+ * the get_pbn_per_slot function.
+ *
* The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on
* common multiplier to render an integer PBN for all link rate/lane
* counts combinations
* calculate
- * peak_kbps *= (1006/1000)
* peak_kbps *= (64/54)
- * peak_kbps *= 8 convert to bytes
+ * peak_kbps /= (8 * 1000) convert to bytes
*/
- numerator = 64 * PEAK_FACTOR_X1000;
- denominator = 54 * 8 * 1000 * 1000;
+ numerator = 64;
+ denominator = 54 * 8 * 1000;
kbps *= numerator;
peak_kbps = dc_fixpt_from_fraction(kbps, denominator);
@@ -1123,12 +1204,13 @@ static bool poll_for_allocation_change_trigger(struct dc_link *link)
int i;
const int act_retries = 30;
enum act_return_status result = ACT_FAILED;
+ enum dc_connection_type display_connected = (link->type != dc_connection_none);
union payload_table_update_status update_status = {0};
union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX];
union lane_align_status_updated lane_status_updated;
DC_LOGGER_INIT(link->ctx->logger);
- if (link->aux_access_disabled)
+ if (!display_connected || link->aux_access_disabled)
return true;
for (i = 0; i < act_retries; i++) {
get_lane_status(link, link->cur_link_settings.lane_count, dpcd_lane_status, &lane_status_updated);
@@ -1245,86 +1327,6 @@ static void remove_stream_from_alloc_table(
}
}
-static enum dc_status deallocate_mst_payload_with_temp_drm_wa(
- struct pipe_ctx *pipe_ctx)
-{
- struct dc_stream_state *stream = pipe_ctx->stream;
- struct dc_link *link = stream->link;
- struct dc_dp_mst_stream_allocation_table proposed_table = {0};
- struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
- int i;
- bool mst_mode = (link->type == dc_connection_mst_branch);
- /* adjust for drm changes*/
- const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
- const struct dc_link_settings empty_link_settings = {0};
- DC_LOGGER_INIT(link->ctx->logger);
-
- if (link_hwss->ext.set_throttled_vcp_size)
- link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp);
- if (link_hwss->ext.set_hblank_min_symbol_width)
- link_hwss->ext.set_hblank_min_symbol_width(pipe_ctx,
- &empty_link_settings,
- avg_time_slots_per_mtp);
-
- if (dm_helpers_dp_mst_write_payload_allocation_table(
- stream->ctx,
- stream,
- &proposed_table,
- false))
- update_mst_stream_alloc_table(
- link,
- pipe_ctx->stream_res.stream_enc,
- pipe_ctx->stream_res.hpo_dp_stream_enc,
- &proposed_table);
- else
- DC_LOG_WARNING("Failed to update"
- "MST allocation table for"
- "pipe idx:%d\n",
- pipe_ctx->pipe_idx);
-
- DC_LOG_MST("%s"
- "stream_count: %d: ",
- __func__,
- link->mst_stream_alloc_table.stream_count);
-
- for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
- DC_LOG_MST("stream_enc[%d]: %p "
- "stream[%d].hpo_dp_stream_enc: %p "
- "stream[%d].vcp_id: %d "
- "stream[%d].slot_count: %d\n",
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
- i,
- (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
- i,
- link->mst_stream_alloc_table.stream_allocations[i].slot_count);
- }
-
- if (link_hwss->ext.update_stream_allocation_table == NULL ||
- link_dp_get_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) {
- DC_LOG_DEBUG("Unknown encoding format\n");
- return DC_ERROR_UNEXPECTED;
- }
-
- link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
- &link->mst_stream_alloc_table);
-
- if (mst_mode) {
- dm_helpers_dp_mst_poll_for_allocation_change_trigger(
- stream->ctx,
- stream);
- }
-
- dm_helpers_dp_mst_send_payload_allocation(
- stream->ctx,
- stream,
- false);
-
- return DC_OK;
-}
-
static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
{
struct dc_stream_state *stream = pipe_ctx->stream;
@@ -1337,9 +1339,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
const struct dc_link_settings empty_link_settings = {0};
DC_LOGGER_INIT(link->ctx->logger);
- if (link->dc->debug.temp_mst_deallocation_sequence)
- return deallocate_mst_payload_with_temp_drm_wa(pipe_ctx);
-
/* deallocate_mst_payload is called before disable link. When mode or
* disable/enable monitor, new stream is created which is not in link
* stream[] yet. For this, payload is not allocated yet, so de-alloc
@@ -1412,16 +1411,14 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx)
link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res,
&link->mst_stream_alloc_table);
- if (mst_mode) {
+ if (mst_mode)
dm_helpers_dp_mst_poll_for_allocation_change_trigger(
stream->ctx,
stream);
- dm_helpers_dp_mst_send_payload_allocation(
- stream->ctx,
- stream,
- false);
- }
+ dm_helpers_dp_mst_update_mst_mgr_for_deallocation(
+ stream->ctx,
+ stream);
return DC_OK;
}
@@ -1502,12 +1499,10 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx)
stream->ctx,
stream);
- if (ret != ACT_LINK_LOST) {
+ if (ret != ACT_LINK_LOST)
dm_helpers_dp_mst_send_payload_allocation(
stream->ctx,
- stream,
- true);
- }
+ stream);
/* slot X.Y for only current stream */
pbn_per_slot = get_pbn_per_slot(stream);
@@ -1564,6 +1559,7 @@ static bool write_128b_132b_sst_payload_allocation_table(
union payload_table_update_status update_status = { 0 };
const uint32_t max_retries = 30;
uint32_t retries = 0;
+ enum dc_connection_type display_connected = (link->type != dc_connection_none);
DC_LOGGER_INIT(link->ctx->logger);
if (allocate) {
@@ -1581,7 +1577,7 @@ static bool write_128b_132b_sst_payload_allocation_table(
proposed_table->stream_allocations[0].slot_count = req_slot_count;
proposed_table->stream_allocations[0].vcp_id = vc_id;
- if (link->aux_access_disabled)
+ if (!display_connected || link->aux_access_disabled)
return true;
/// Write DPCD 2C0 = 1 to start updating
@@ -1628,7 +1624,7 @@ static bool write_128b_132b_sst_payload_allocation_table(
break;
}
} else {
- union dpcd_rev dpcdRev;
+ union dpcd_rev dpcdRev = {0};
if (core_link_read_dpcd(
link,
@@ -1767,8 +1763,7 @@ enum dc_status link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_in
/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
dm_helpers_dp_mst_send_payload_allocation(
stream->ctx,
- stream,
- true);
+ stream);
/* notify immediate branch device table update */
if (dm_helpers_dp_mst_write_payload_allocation_table(
@@ -1897,8 +1892,7 @@ enum dc_status link_increase_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw_
/* send ALLOCATE_PAYLOAD sideband message with updated pbn */
dm_helpers_dp_mst_send_payload_allocation(
stream->ctx,
- stream,
- true);
+ stream);
}
/* increase throttled vcp size */
@@ -1930,7 +1924,7 @@ static void disable_link_dp(struct dc_link *link,
dp_disable_link_phy(link, link_res, signal);
if (link->connector_signal == SIGNAL_TYPE_EDP) {
- if (!link->dc->config.edp_no_power_sequencing)
+ if (!link->skip_implict_edp_power_control)
link->dc->hwss.edp_power_control(link, false);
}
@@ -1940,7 +1934,7 @@ static void disable_link_dp(struct dc_link *link,
if (link_dp_get_encoding_format(&link_settings) ==
DP_8b_10b_ENCODING) {
- dp_set_fec_enable(link, false);
+ dp_set_fec_enable(link, link_res, false);
dp_set_fec_ready(link, link_res, false);
}
}
@@ -1951,7 +1945,9 @@ static void disable_link(struct dc_link *link,
{
if (dc_is_dp_signal(signal)) {
disable_link_dp(link, link_res, signal);
- } else if (signal != SIGNAL_TYPE_VIRTUAL) {
+ } else if (signal == SIGNAL_TYPE_VIRTUAL) {
+ link->dc->hwss.disable_link_output(link, link_res, SIGNAL_TYPE_DISPLAY_PORT);
+ } else {
link->dc->hwss.disable_link_output(link, link_res, signal);
}
@@ -1981,11 +1977,15 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10;
if (stream->phy_pix_clk > 340000)
is_over_340mhz = true;
+ if (dc_is_tmds_signal(stream->signal) && stream->phy_pix_clk > 6000000UL) {
+ ASSERT(false);
+ return;
+ }
if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) {
unsigned short masked_chip_caps = pipe_ctx->stream->link->chip_caps &
- EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
- if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
+ AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
/* DP159, Retimer settings */
eng_id = pipe_ctx->stream_res.stream_enc->id;
@@ -1996,7 +1996,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
write_i2c_default_retimer_setting(pipe_ctx,
is_vga_mode, is_over_340mhz);
}
- } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
+ } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
/* PI3EQX1204, Redriver settings */
write_i2c_redriver_setting(pipe_ctx, is_over_340mhz);
}
@@ -2018,7 +2018,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
/* We need to enable stream encoder for TMDS first to apply 1/4 TMDS
* character clock in case that beyond 340MHz.
*/
- if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal))
+ if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal))
link_hwss->setup_stream_encoder(pipe_ctx);
dc->hwss.enable_tmds_link_output(
@@ -2052,7 +2052,7 @@ static enum dc_status enable_link_dp(struct dc_state *state,
int lt_attempts = LINK_TRAINING_ATTEMPTS;
// Increase retry count if attempting DP1.x on FIXED_VS link
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING)
lt_attempts = 10;
@@ -2064,17 +2064,12 @@ static enum dc_status enable_link_dp(struct dc_state *state,
}
}
- /*
- * If the link is DP-over-USB4 do the following:
- * - Train with fallback when enabling DPIA link. Conventional links are
+ /* Train with fallback when enabling DPIA link. Conventional links are
* trained with fallback during sink detection.
- * - Allocate only what the stream needs for bw in Gbps. Inform the CM
- * in case stream needs more or less bw from what has been allocated
- * earlier at plug time.
*/
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ !link->dc->config.enable_dpia_pre_training)
do_fallback = true;
- }
/*
* Temporary w/a to get DP2.0 link rates to work with SST.
@@ -2117,6 +2112,9 @@ static enum dc_status enable_link_dp(struct dc_state *state,
if (link_settings->link_rate == LINK_RATE_LOW)
skip_video_pattern = false;
+ if (stream->sink_patches.oled_optimize_display_on)
+ set_default_brightness_aux(link);
+
if (perform_link_training_with_retries(link_settings,
skip_video_pattern,
lt_attempts,
@@ -2134,17 +2132,20 @@ static enum dc_status enable_link_dp(struct dc_state *state,
fec_enable = true;
if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING)
- dp_set_fec_enable(link, fec_enable);
+ dp_set_fec_enable(link, &pipe_ctx->link_res, fec_enable);
// during mode set we do DP_SET_POWER off then on, aux writes are lost
if (link->dpcd_sink_ext_caps.bits.oled == 1 ||
link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 ||
link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) {
- set_cached_brightness_aux(link);
-
- if (link->dpcd_sink_ext_caps.bits.oled == 1)
- msleep(bl_oled_enable_delay);
- edp_backlight_enable_aux(link, true);
+ if (!stream->sink_patches.oled_optimize_display_on) {
+ set_default_brightness_aux(link);
+ if (link->dpcd_sink_ext_caps.bits.oled == 1)
+ msleep(bl_oled_enable_delay);
+ edp_backlight_enable_aux(link, true);
+ } else {
+ edp_backlight_enable_aux(link, true);
+ }
}
return status;
@@ -2181,7 +2182,7 @@ static enum dc_status enable_link_dp_mst(
struct pipe_ctx *pipe_ctx)
{
struct dc_link *link = pipe_ctx->stream->link;
- unsigned char mstm_cntl;
+ unsigned char mstm_cntl = 0;
/* sink signal type after MST branch is MST. Multiple MST sinks
* share one link. Link DP PHY is enable or training only once.
@@ -2205,6 +2206,18 @@ static enum dc_status enable_link_dp_mst(
return enable_link_dp(state, pipe_ctx);
}
+static enum dc_status enable_link_virtual(struct pipe_ctx *pipe_ctx)
+{
+ struct dc_link *link = pipe_ctx->stream->link;
+
+ link->dc->hwss.enable_dp_link_output(link,
+ &pipe_ctx->link_res,
+ SIGNAL_TYPE_DISPLAY_PORT,
+ pipe_ctx->clock_source->id,
+ &pipe_ctx->link_config.dp_link_settings);
+ return DC_OK;
+}
+
static enum dc_status enable_link(
struct dc_state *state,
struct pipe_ctx *pipe_ctx)
@@ -2219,7 +2232,7 @@ static enum dc_status enable_link(
* link settings. Need to call disable first before enabling at
* new link settings.
*/
- if (link->link_status.link_active && !stream->skip_edp_power_down)
+ if (link->link_status.link_active)
disable_link(link, &pipe_ctx->link_res, pipe_ctx->stream->signal);
switch (pipe_ctx->stream->signal) {
@@ -2244,7 +2257,7 @@ static enum dc_status enable_link(
status = DC_OK;
break;
case SIGNAL_TYPE_VIRTUAL:
- status = DC_OK;
+ status = enable_link_virtual(pipe_ctx);
break;
default:
break;
@@ -2257,32 +2270,100 @@ static enum dc_status enable_link(
return status;
}
+static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw)
+{
+ struct dc_link *link = stream->sink->link;
+ int req_bw = bw;
+
+ DC_LOGGER_INIT(link->ctx->logger);
+
+ if (!link->dpia_bw_alloc_config.bw_alloc_enabled)
+ return false;
+
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+ int sink_index = 0;
+ int i = 0;
+
+ for (i = 0; i < link->sink_count; i++) {
+ if (link->remote_sinks[i] == NULL)
+ continue;
+
+ if (stream->sink->sink_id != link->remote_sinks[i]->sink_id)
+ req_bw += link->dpia_bw_alloc_config.remote_sink_req_bw[i];
+ else
+ sink_index = i;
+ }
+
+ link->dpia_bw_alloc_config.remote_sink_req_bw[sink_index] = bw;
+ }
+
+ link->dpia_bw_alloc_config.dp_overhead = link_dpia_get_dp_overhead(link);
+ req_bw += link->dpia_bw_alloc_config.dp_overhead;
+
+ link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, req_bw);
+
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+ int i = 0;
+
+ for (i = 0; i < link->sink_count; i++) {
+ if (link->remote_sinks[i] == NULL)
+ continue;
+ DC_LOG_DEBUG("%s, remote_sink=%s, request_bw=%d\n", __func__,
+ (const char *)(&link->remote_sinks[i]->edid_caps.display_name[0]),
+ link->dpia_bw_alloc_config.remote_sink_req_bw[i]);
+ }
+ }
+
+ return true;
+}
+
+static bool allocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+ bool ret;
+
+ int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing,
+ dc_link_get_highest_encoding_format(stream->sink->link));
+
+ ret = allocate_usb4_bandwidth_for_stream(stream, bw);
+
+ return ret;
+}
+
+static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+ bool ret;
+
+ ret = allocate_usb4_bandwidth_for_stream(stream, 0);
+
+ return ret;
+}
+
void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
{
struct dc *dc = pipe_ctx->stream->ctx->dc;
struct dc_stream_state *stream = pipe_ctx->stream;
struct dc_link *link = stream->sink->link;
struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg;
+ enum dp_panel_mode panel_mode_dp = dp_get_panel_mode(link);
+
+ DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
ASSERT(is_master_pipe_for_link(link, pipe_ctx));
if (dp_is_128b_132b_signal(pipe_ctx))
vpg = pipe_ctx->stream_res.hpo_dp_stream_enc->vpg;
-
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
+ if (dc_is_virtual_signal(pipe_ctx->stream->signal))
+ return;
if (pipe_ctx->stream->sink) {
if (pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_NONE) {
- DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x\n", __func__,
+ DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x link=%d\n", __func__,
pipe_ctx->stream->sink->edid_caps.display_name,
- pipe_ctx->stream->signal);
+ pipe_ctx->stream->signal, link->link_index);
}
}
- if (dc_is_virtual_signal(pipe_ctx->stream->signal))
- return;
-
if (!pipe_ctx->stream->sink->edid_caps.panel_patch.skip_avmute) {
if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
set_avmute(pipe_ctx, true);
@@ -2293,9 +2374,12 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
update_psp_stream_config(pipe_ctx, true);
dc->hwss.blank_stream(pipe_ctx);
+ if (pipe_ctx->link_config.dp_tunnel_settings.should_use_dp_bw_allocation)
+ deallocate_usb4_bandwidth(pipe_ctx->stream);
+
if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
deallocate_mst_payload(pipe_ctx);
- else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
+ else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) &&
dp_is_128b_132b_signal(pipe_ctx))
update_sst_payload(pipe_ctx, false);
@@ -2304,13 +2388,13 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id;
unsigned short masked_chip_caps = link->chip_caps &
- EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
//Need to inform that sink is going to use legacy HDMI mode.
write_scdc_data(
link->ddc,
165000,//vbios only handles 165Mhz.
false);
- if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
+ if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
/* DP159, Retimer settings */
if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings))
write_i2c_retimer_setting(pipe_ctx,
@@ -2318,7 +2402,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
else
write_i2c_default_retimer_setting(pipe_ctx,
false, false);
- } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
+ } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
/* PI3EQX1204, Redriver settings */
write_i2c_redriver_setting(pipe_ctx, false);
}
@@ -2338,10 +2422,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
dc->hwss.disable_stream(pipe_ctx);
} else {
dc->hwss.disable_stream(pipe_ctx);
- if (!pipe_ctx->stream->skip_edp_power_down) {
- disable_link(pipe_ctx->stream->link, &pipe_ctx->link_res, pipe_ctx->stream->signal);
- }
+ disable_link(pipe_ctx->stream->link, &pipe_ctx->link_res, pipe_ctx->stream->signal);
}
+ edp_set_panel_assr(link, pipe_ctx, &panel_mode_dp, false);
if (pipe_ctx->stream->timing.flags.DSC) {
if (dc_is_dp_signal(pipe_ctx->stream->signal))
@@ -2354,6 +2437,14 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
if (vpg && vpg->funcs->vpg_powerdown)
vpg->funcs->vpg_powerdown(vpg);
+
+ /* for psp not exist case */
+ if (link->connector_signal == SIGNAL_TYPE_EDP && dc->debug.psp_disabled_wa) {
+ /* reset internal save state to default since eDP is off */
+ enum dp_panel_mode panel_mode = dp_get_panel_mode(pipe_ctx->stream->link);
+ /* since current psp not loaded, we need to reset it to default */
+ link->panel_mode = panel_mode;
+ }
}
void link_set_dpms_on(
@@ -2364,33 +2455,33 @@ void link_set_dpms_on(
struct dc_stream_state *stream = pipe_ctx->stream;
struct dc_link *link = stream->sink->link;
enum dc_status status;
- struct link_encoder *link_enc;
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO;
struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg;
- const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
bool apply_edp_fast_boot_optimization =
pipe_ctx->stream->apply_edp_fast_boot_optimization;
+ DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
+
ASSERT(is_master_pipe_for_link(link, pipe_ctx));
if (dp_is_128b_132b_signal(pipe_ctx))
vpg = pipe_ctx->stream_res.hpo_dp_stream_enc->vpg;
-
- DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
+ if (dc_is_virtual_signal(pipe_ctx->stream->signal))
+ return;
if (pipe_ctx->stream->sink) {
if (pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_VIRTUAL &&
pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_NONE) {
- DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x\n", __func__,
+ DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x link=%d\n", __func__,
pipe_ctx->stream->sink->edid_caps.display_name,
- pipe_ctx->stream->signal);
+ pipe_ctx->stream->signal,
+ link->link_index);
}
}
- if (dc_is_virtual_signal(pipe_ctx->stream->signal))
- return;
-
- link_enc = link_enc_cfg_get_link_enc(link);
+ if (!dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
ASSERT(link_enc);
if (!dc_is_virtual_signal(pipe_ctx->stream->signal)
@@ -2411,8 +2502,6 @@ void link_set_dpms_on(
pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest);
}
- link_hwss->setup_stream_attribute(pipe_ctx);
-
pipe_ctx->stream->apply_edp_fast_boot_optimization = false;
// Enable VPG before building infoframe
@@ -2446,12 +2535,29 @@ void link_set_dpms_on(
!pipe_ctx->next_odm_pipe) {
pipe_ctx->stream->dpms_off = false;
update_psp_stream_config(pipe_ctx, false);
+
+ if (link->is_dds) {
+ uint32_t post_oui_delay = 30; // 30ms
+
+ dpcd_set_source_specific_data(link);
+ msleep(post_oui_delay);
+ }
+
return;
}
if (pipe_ctx->stream->dpms_off)
return;
+ /* For Dp tunneling link, a pending HPD means that we have a race condition between processing
+ * current link and processing the pending HPD. If we enable the link now, we may end up with a
+ * link that is not actually connected to a sink. So we skip enabling the link in this case.
+ */
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending) {
+ DC_LOG_DEBUG("%s, Link%d HPD is pending, not enable it.\n", __func__, link->link_index);
+ return;
+ }
+
/* Have to setup DSC before DIG FE and BE are connected (which happens before the
* link training). This is to make sure the bandwidth sent to DIG BE won't be
* bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag
@@ -2460,9 +2566,8 @@ void link_set_dpms_on(
*/
if (pipe_ctx->stream->timing.flags.DSC) {
if (dc_is_dp_signal(pipe_ctx->stream->signal) ||
- dc_is_virtual_signal(pipe_ctx->stream->signal))
- link_set_dsc_enable(pipe_ctx, true);
-
+ dc_is_virtual_signal(pipe_ctx->stream->signal))
+ link_set_dsc_enable(pipe_ctx, true);
}
status = enable_link(state, pipe_ctx);
@@ -2499,10 +2604,12 @@ void link_set_dpms_on(
*/
if (!(dc_is_virtual_signal(pipe_ctx->stream->signal) ||
dp_is_128b_132b_signal(pipe_ctx))) {
+
if (link_enc)
link_enc->funcs->setup(
link_enc,
pipe_ctx->stream->signal);
+
}
dc->hwss.enable_stream(pipe_ctx);
@@ -2516,12 +2623,27 @@ void link_set_dpms_on(
}
}
+ if (dc_is_dp_signal(pipe_ctx->stream->signal))
+ dp_set_hblank_reduction_on_rx(pipe_ctx);
+
+ if (pipe_ctx->link_config.dp_tunnel_settings.should_use_dp_bw_allocation)
+ allocate_usb4_bandwidth(pipe_ctx->stream);
+
if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
allocate_mst_payload(pipe_ctx);
- else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
+ else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) &&
dp_is_128b_132b_signal(pipe_ctx))
update_sst_payload(pipe_ctx, true);
+ /* Corruption was observed on systems with display mux when stream gets
+ * enabled after the mux switch. Having a small delay between link
+ * training and stream unblank resolves the corruption issue.
+ * This is workaround.
+ */
+ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
+ link->is_display_mux_present)
+ msleep(20);
+
dc->hwss.unblank_stream(pipe_ctx,
&pipe_ctx->stream->link->cur_link_settings);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h
index 9398f9c1666a..bd6fc63064a3 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DPMS_H__
#define __DC_LINK_DPMS_H__
-#include "link.h"
+#include "link_service.h"
void link_set_dpms_on(
struct dc_state *state,
struct pipe_ctx *pipe_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 0895742a3102..31a73867cd4c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -33,7 +33,6 @@
#include "link_dpms.h"
#include "accessories/link_dp_cts.h"
#include "accessories/link_dp_trace.h"
-#include "accessories/link_fpga.h"
#include "protocols/link_ddc.h"
#include "protocols/link_dp_capability.h"
#include "protocols/link_dp_dpia_bw.h"
@@ -46,6 +45,8 @@
#include "gpio_service_interface.h"
#include "atomfirmware.h"
+#define DC_LOGGER \
+ dc_ctx->logger
#define DC_LOGGER_INIT(logger)
#define LINK_INFO(...) \
@@ -99,7 +100,8 @@ static void construct_link_service_validation(struct link_service *link_srv)
{
link_srv->validate_mode_timing = link_validate_mode_timing;
link_srv->dp_link_bandwidth_kbps = dp_link_bandwidth_kbps;
- link_srv->validate_dpia_bandwidth = link_validate_dpia_bandwidth;
+ link_srv->validate_dp_tunnel_bandwidth = link_validate_dp_tunnel_bandwidth;
+ link_srv->dp_required_hblank_size_bytes = dp_required_hblank_size_bytes;
}
/* link dpms owns the programming sequence of stream's dpms state associated
@@ -154,6 +156,7 @@ static void construct_link_service_dp_capability(struct link_service *link_srv)
link_srv->dp_get_encoding_format = link_dp_get_encoding_format;
link_srv->dp_should_enable_fec = dp_should_enable_fec;
link_srv->dp_decide_link_settings = link_decide_link_settings;
+ link_srv->dp_decide_tunnel_settings = link_decide_dp_tunnel_settings;
link_srv->mst_decide_link_encoding_format =
mst_decide_link_encoding_format;
link_srv->edp_decide_link_settings = edp_decide_link_settings;
@@ -162,6 +165,8 @@ static void construct_link_service_dp_capability(struct link_service *link_srv)
link_srv->dp_overwrite_extended_receiver_cap =
dp_overwrite_extended_receiver_cap;
link_srv->dp_decide_lttpr_mode = dp_decide_lttpr_mode;
+ link_srv->dp_get_lttpr_count = dp_get_lttpr_count;
+ link_srv->edp_get_alpm_support = edp_get_alpm_support;
}
/* link dp phy/dpia implements basic dp phy/dpia functionality such as
@@ -173,7 +178,6 @@ static void construct_link_service_dp_phy_or_dpia(struct link_service *link_srv)
{
link_srv->dpia_handle_usb4_bandwidth_allocation_for_link =
dpia_handle_usb4_bandwidth_allocation_for_link;
- link_srv->dpia_handle_bw_alloc_response = dpia_handle_bw_alloc_response;
link_srv->dp_set_drive_settings = dp_set_drive_settings;
link_srv->dpcd_write_rx_power_ctrl = dpcd_write_rx_power_ctrl;
}
@@ -212,8 +216,10 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s
link_srv->edp_get_replay_state = edp_get_replay_state;
link_srv->edp_set_replay_allow_active = edp_set_replay_allow_active;
link_srv->edp_setup_replay = edp_setup_replay;
+ link_srv->edp_send_replay_cmd = edp_send_replay_cmd;
link_srv->edp_set_coasting_vtotal = edp_set_coasting_vtotal;
link_srv->edp_replay_residency = edp_replay_residency;
+ link_srv->edp_set_replay_power_opt_and_coasting_vtotal = edp_set_replay_power_opt_and_coasting_vtotal;
link_srv->edp_wait_for_t12 = edp_wait_for_t12;
link_srv->edp_is_ilr_optimization_required =
@@ -223,6 +229,7 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s
link_srv->edp_receiver_ready_T9 = edp_receiver_ready_T9;
link_srv->edp_receiver_ready_T7 = edp_receiver_ready_T7;
link_srv->edp_power_alpm_dpcd_enable = edp_power_alpm_dpcd_enable;
+ link_srv->edp_set_panel_power = edp_set_panel_power;
}
/* link dp cts implements dp compliance test automation protocols and manual
@@ -381,12 +388,12 @@ static void link_destruct(struct dc_link *link)
if (link->panel_cntl)
link->panel_cntl->funcs->destroy(&link->panel_cntl);
- if (link->link_enc) {
+ if (link->link_enc && !link->is_dig_mapping_flexible) {
/* Update link encoder resource tracking variables. These are used for
* the dynamic assignment of link encoders to streams. Virtual links
* are not assigned encoder resources on creation.
*/
- if (link->link_id.id != CONNECTOR_ID_VIRTUAL) {
+ if (link->link_id.id != CONNECTOR_ID_VIRTUAL && link->eng_id != ENGINE_ID_UNKNOWN) {
link->dc->res_pool->link_encoders[link->eng_id - ENGINE_ID_DIGA] = NULL;
link->dc->res_pool->dig_link_enc_count--;
}
@@ -452,7 +459,6 @@ static bool construct_phy(struct dc_link *link,
struct dc_context *dc_ctx = init_params->ctx;
struct encoder_init_data enc_init_data = { 0 };
struct panel_cntl_init_data panel_cntl_init_data = { 0 };
- struct integrated_info info = { 0 };
struct dc_bios *bios = init_params->dc->ctx->dc_bios;
const struct dc_vbios_funcs *bp_funcs = bios->funcs;
struct bp_disp_connector_caps_info disp_connect_caps_info = { 0 };
@@ -461,6 +467,7 @@ static bool construct_phy(struct dc_link *link,
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID;
+ link->irq_source_read_request = DC_IRQ_SOURCE_INVALID;
link->link_status.dpcd_caps = &link->dpcd_caps;
link->dc = init_params->dc;
@@ -511,6 +518,9 @@ static bool construct_phy(struct dc_link *link,
case CONNECTOR_ID_HDMI_TYPE_A:
link->connector_signal = SIGNAL_TYPE_HDMI_TYPE_A;
+ if (link->hpd_gpio)
+ link->irq_source_read_request =
+ dal_irq_get_read_request(link->hpd_gpio);
break;
case CONNECTOR_ID_SINGLE_LINK_DVID:
case CONNECTOR_ID_SINGLE_LINK_DVII:
@@ -521,6 +531,7 @@ static bool construct_phy(struct dc_link *link,
link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK;
break;
case CONNECTOR_ID_DISPLAY_PORT:
+ case CONNECTOR_ID_MXM:
case CONNECTOR_ID_USBC:
link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT;
@@ -530,10 +541,16 @@ static bool construct_phy(struct dc_link *link,
break;
case CONNECTOR_ID_EDP:
+ // If smartmux is supported, only create the link on the primary eDP.
+ // Dual eDP is not supported with smartmux.
+ if (!(!link->dc->config.smart_mux_version || dc_ctx->dc_edp_id_count == 0))
+ goto create_fail;
+
link->connector_signal = SIGNAL_TYPE_EDP;
if (link->hpd_gpio) {
- if (!link->dc->config.allow_edp_hotplug_detection)
+ if (!link->dc->config.allow_edp_hotplug_detection
+ && !is_smartmux_suported(link))
link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
switch (link->dc->config.allow_edp_hotplug_detection) {
@@ -593,24 +610,6 @@ static bool construct_phy(struct dc_link *link,
link->ddc_hw_inst =
dal_ddc_get_line(get_ddc_pin(link->ddc));
-
- if (link->dc->res_pool->funcs->panel_cntl_create &&
- (link->link_id.id == CONNECTOR_ID_EDP ||
- link->link_id.id == CONNECTOR_ID_LVDS)) {
- panel_cntl_init_data.ctx = dc_ctx;
- panel_cntl_init_data.inst =
- panel_cntl_init_data.ctx->dc_edp_id_count;
- link->panel_cntl =
- link->dc->res_pool->funcs->panel_cntl_create(
- &panel_cntl_init_data);
- panel_cntl_init_data.ctx->dc_edp_id_count++;
-
- if (link->panel_cntl == NULL) {
- DC_ERROR("Failed to create link panel_cntl!\n");
- goto panel_cntl_create_fail;
- }
- }
-
enc_init_data.ctx = dc_ctx;
bp_funcs->get_src_obj(dc_ctx->dc_bios, link->link_id, 0,
&enc_init_data.encoder);
@@ -625,14 +624,14 @@ static bool construct_phy(struct dc_link *link,
link->link_enc =
link->dc->res_pool->funcs->link_enc_create(dc_ctx, &enc_init_data);
- DC_LOG_DC("BIOS object table - DP_IS_USB_C: %d", link->link_enc->features.flags.bits.DP_IS_USB_C);
- DC_LOG_DC("BIOS object table - IS_DP2_CAPABLE: %d", link->link_enc->features.flags.bits.IS_DP2_CAPABLE);
-
if (!link->link_enc) {
DC_ERROR("Failed to create link encoder!\n");
goto link_enc_create_fail;
}
+ DC_LOG_DC("BIOS object table - DP_IS_USB_C: %d", link->link_enc->features.flags.bits.DP_IS_USB_C);
+ DC_LOG_DC("BIOS object table - IS_DP2_CAPABLE: %d", link->link_enc->features.flags.bits.IS_DP2_CAPABLE);
+
/* Update link encoder tracking variables. These are used for the dynamic
* assignment of link encoders to streams.
*/
@@ -641,6 +640,23 @@ static bool construct_phy(struct dc_link *link,
link->dc->res_pool->dig_link_enc_count++;
link->link_enc_hw_inst = link->link_enc->transmitter;
+
+ if (link->dc->res_pool->funcs->panel_cntl_create &&
+ (link->link_id.id == CONNECTOR_ID_EDP ||
+ link->link_id.id == CONNECTOR_ID_LVDS)) {
+ panel_cntl_init_data.ctx = dc_ctx;
+ panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count;
+ panel_cntl_init_data.eng_id = link->eng_id;
+ link->panel_cntl =
+ link->dc->res_pool->funcs->panel_cntl_create(
+ &panel_cntl_init_data);
+ panel_cntl_init_data.ctx->dc_edp_id_count++;
+
+ if (link->panel_cntl == NULL) {
+ DC_ERROR("Failed to create link panel_cntl!\n");
+ goto panel_cntl_create_fail;
+ }
+ }
for (i = 0; i < 4; i++) {
if (bp_funcs->get_device_tag(dc_ctx->dc_bios,
link->link_id, i,
@@ -650,7 +666,7 @@ static bool construct_phy(struct dc_link *link,
}
/* Look for device tag that matches connector signal,
- * CRT for rgb, LCD for other supported signal tyes
+ * CRT for rgb, LCD for other supported signal types
*/
if (!bp_funcs->is_device_id_supported(dc_ctx->dc_bios,
link->device_tag.dev_id))
@@ -668,42 +684,44 @@ static bool construct_phy(struct dc_link *link,
break;
}
- if (bios->integrated_info)
- info = *bios->integrated_info;
-
- /* Look for channel mapping corresponding to connector and device tag */
- for (i = 0; i < MAX_NUMBER_OF_EXT_DISPLAY_PATH; i++) {
- struct external_display_path *path =
- &info.ext_disp_conn_info.path[i];
-
- if (path->device_connector_id.enum_id == link->link_id.enum_id &&
- path->device_connector_id.id == link->link_id.id &&
- path->device_connector_id.type == link->link_id.type) {
- if (link->device_tag.acpi_device != 0 &&
- path->device_acpi_enum == link->device_tag.acpi_device) {
- link->ddi_channel_mapping = path->channel_mapping;
- link->chip_caps = path->caps;
- DC_LOG_DC("BIOS object table - ddi_channel_mapping: 0x%04X", link->ddi_channel_mapping.raw);
- DC_LOG_DC("BIOS object table - chip_caps: %d", link->chip_caps);
- } else if (path->device_tag ==
- link->device_tag.dev_id.raw_device_tag) {
- link->ddi_channel_mapping = path->channel_mapping;
- link->chip_caps = path->caps;
- DC_LOG_DC("BIOS object table - ddi_channel_mapping: 0x%04X", link->ddi_channel_mapping.raw);
- DC_LOG_DC("BIOS object table - chip_caps: %d", link->chip_caps);
- }
+ if (bios->integrated_info) {
+ /* Look for channel mapping corresponding to connector and device tag */
+ for (i = 0; i < MAX_NUMBER_OF_EXT_DISPLAY_PATH; i++) {
+ struct external_display_path *path =
+ &bios->integrated_info->ext_disp_conn_info.path[i];
+
+ if (path->device_connector_id.enum_id == link->link_id.enum_id &&
+ path->device_connector_id.id == link->link_id.id &&
+ path->device_connector_id.type == link->link_id.type) {
+ if (link->device_tag.acpi_device != 0 &&
+ path->device_acpi_enum == link->device_tag.acpi_device) {
+ link->ddi_channel_mapping = path->channel_mapping;
+ link->chip_caps = path->caps;
+ DC_LOG_DC("BIOS object table - ddi_channel_mapping: 0x%04X",
+ link->ddi_channel_mapping.raw);
+ DC_LOG_DC("BIOS object table - chip_caps: %d",
+ link->chip_caps);
+ } else if (path->device_tag ==
+ link->device_tag.dev_id.raw_device_tag) {
+ link->ddi_channel_mapping = path->channel_mapping;
+ link->chip_caps = path->caps;
+ DC_LOG_DC("BIOS object table - ddi_channel_mapping: 0x%04X",
+ link->ddi_channel_mapping.raw);
+ DC_LOG_DC("BIOS object table - chip_caps: %d",
+ link->chip_caps);
+ }
+
+ if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) {
+ link->bios_forced_drive_settings.VOLTAGE_SWING =
+ (bios->integrated_info->ext_disp_conn_info.fixdpvoltageswing & 0x3);
+ link->bios_forced_drive_settings.PRE_EMPHASIS =
+ ((bios->integrated_info->ext_disp_conn_info.fixdpvoltageswing >> 2) & 0x3);
+ }
- if (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) {
- link->bios_forced_drive_settings.VOLTAGE_SWING =
- (info.ext_disp_conn_info.fixdpvoltageswing & 0x3);
- link->bios_forced_drive_settings.PRE_EMPHASIS =
- ((info.ext_disp_conn_info.fixdpvoltageswing >> 2) & 0x3);
+ break;
}
-
- break;
}
}
-
if (bios->funcs->get_atom_dc_golden_table)
bios->funcs->get_atom_dc_golden_table(bios);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.h b/drivers/gpu/drm/amd/display/dc/link/link_factory.h
index e96220d48d03..aad36ca1a31c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_FACTORY_H__
#define __LINK_FACTORY_H__
-#include "link.h"
+#include "link_service.h"
struct dc_link *link_create(const struct link_init_data *init_params);
void link_destroy(struct dc_link **link);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_resource.h b/drivers/gpu/drm/amd/display/dc/link/link_resource.h
index 1907bda3cb6e..f7aa3bc3a93a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_resource.h
@@ -24,7 +24,7 @@
*/
#ifndef __LINK_RESOURCE_H__
#define __LINK_RESOURCE_H__
-#include "link.h"
+#include "link_service.h"
void link_get_cur_res_map(const struct dc *dc, uint32_t *map);
void link_restore_res_map(const struct dc *dc, uint32_t *map);
void link_get_cur_link_res(const struct dc_link *link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index b45fda96eaf6..acdc162de535 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -86,6 +86,10 @@ static bool dp_active_dongle_validate_timing(
if (!dongle_caps->is_dp_hdmi_ycbcr420_pass_through)
return false;
break;
+ case PIXEL_ENCODING_UNDEFINED:
+ /* These color depths are currently not supported */
+ ASSERT(false);
+ break;
default:
/* Invalid Pixel Encoding*/
return false;
@@ -104,6 +108,10 @@ static bool dp_active_dongle_validate_timing(
if (dongle_caps->dp_hdmi_max_bpc < 12)
return false;
break;
+ case COLOR_DEPTH_UNDEFINED:
+ /* These color depths are currently not supported */
+ ASSERT(false);
+ break;
case COLOR_DEPTH_141414:
case COLOR_DEPTH_161616:
default:
@@ -125,11 +133,9 @@ static bool dp_active_dongle_validate_timing(
if (dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps > 0) { // DP to HDMI FRL converter
struct dc_crtc_timing outputTiming = *timing;
-#if defined(CONFIG_DRM_AMD_DC_FP)
if (timing->flags.DSC && !timing->dsc_cfg.is_frl)
/* DP input has DSC, HDMI FRL output doesn't have DSC, remove DSC from output timing */
outputTiming.flags.DSC = 0;
-#endif
if (dc_bandwidth_in_kbps_from_timing(&outputTiming, DC_LINK_ENCODING_HDMI_FRL) >
dongle_caps->dp_hdmi_frl_max_link_bw_in_kbps)
return false;
@@ -257,6 +263,14 @@ uint32_t dp_link_bandwidth_kbps(
return link_rate_per_lane_kbps * link_settings->lane_count / 10000 * total_data_bw_efficiency_x10000;
}
+static uint32_t dp_get_timing_bandwidth_kbps(
+ const struct dc_crtc_timing *timing,
+ const struct dc_link *link)
+{
+ return dc_bandwidth_in_kbps_from_timing(timing,
+ dc_link_get_highest_encoding_format(link));
+}
+
static bool dp_validate_mode_timing(
struct dc_link *link,
const struct dc_crtc_timing *timing)
@@ -289,6 +303,13 @@ static bool dp_validate_mode_timing(
req_bw = dc_bandwidth_in_kbps_from_timing(timing, dc_link_get_highest_encoding_format(link));
max_bw = dp_link_bandwidth_kbps(link, link_setting);
+ bool is_max_uncompressed_pixel_rate_exceeded = link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.valid &&
+ timing->pix_clk_100hz > link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.max_uncompressed_pixel_rate_cap * 10000;
+
+ if (is_max_uncompressed_pixel_rate_exceeded && !timing->flags.DSC) {
+ return false;
+ }
+
if (req_bw <= max_bw) {
/* remember the biggest mode here, during
* initial link training (to get
@@ -346,23 +367,260 @@ enum dc_status link_validate_mode_timing(
return DC_OK;
}
-bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams)
+static const struct dc_tunnel_settings *get_dp_tunnel_settings(const struct dc_state *context,
+ const struct dc_stream_state *stream)
{
- bool ret = true;
- int bw_needed[MAX_DPIA_NUM];
- struct dc_link *link[MAX_DPIA_NUM];
+ int i;
+ const struct dc_tunnel_settings *dp_tunnel_settings = NULL;
- if (!num_streams || num_streams > MAX_DPIA_NUM)
- return ret;
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (context->res_ctx.pipe_ctx[i].stream && (context->res_ctx.pipe_ctx[i].stream == stream)) {
+ dp_tunnel_settings = &context->res_ctx.pipe_ctx[i].link_config.dp_tunnel_settings;
+ break;
+ }
+ }
- for (uint8_t i = 0; i < num_streams; ++i) {
+ return dp_tunnel_settings;
+}
- link[i] = stream[i].link;
- bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
- dc_link_get_highest_encoding_format(link[i]));
+/*
+ * Calculates the DP tunneling bandwidth required for the stream timing
+ * and aggregates the stream bandwidth for the respective DP tunneling link
+ *
+ * return: dc_status
+ */
+enum dc_status link_validate_dp_tunnel_bandwidth(const struct dc *dc, const struct dc_state *new_ctx)
+{
+ struct dc_validation_dpia_set dpia_link_sets[MAX_DPIA_NUM] = { 0 };
+ uint8_t link_count = 0;
+ enum dc_status result = DC_OK;
+
+ // Iterate through streams in the new context
+ for (uint8_t i = 0; (i < MAX_PIPES && i < new_ctx->stream_count); i++) {
+ const struct dc_stream_state *stream = new_ctx->streams[i];
+ const struct dc_link *link;
+ const struct dc_tunnel_settings *dp_tunnel_settings;
+ uint32_t timing_bw;
+
+ if (stream == NULL)
+ continue;
+
+ link = stream->link;
+
+ if (!(link && (stream->signal == SIGNAL_TYPE_DISPLAY_PORT
+ || stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)))
+ continue;
+
+ if ((link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) && (link->hpd_status == false))
+ continue;
+
+ dp_tunnel_settings = get_dp_tunnel_settings(new_ctx, stream);
+
+ if ((dp_tunnel_settings == NULL) || (dp_tunnel_settings->should_use_dp_bw_allocation == false))
+ continue;
+
+ timing_bw = dp_get_timing_bandwidth_kbps(&stream->timing, link);
+
+ // Find an existing entry for this 'link' in 'dpia_link_sets'
+ for (uint8_t j = 0; j < MAX_DPIA_NUM; j++) {
+ bool is_new_slot = false;
+
+ if (dpia_link_sets[j].link == NULL) {
+ is_new_slot = true;
+ link_count++;
+ dpia_link_sets[j].required_bw = 0;
+ dpia_link_sets[j].link = link;
+ }
+
+ if (is_new_slot || (dpia_link_sets[j].link == link)) {
+ dpia_link_sets[j].tunnel_settings = dp_tunnel_settings;
+ dpia_link_sets[j].required_bw += timing_bw;
+ break;
+ }
+ }
}
- ret = dpia_validate_usb4_bw(link, bw_needed, num_streams);
+ if (link_count && link_dpia_validate_dp_tunnel_bandwidth(dpia_link_sets, link_count) == false)
+ result = DC_FAIL_DP_TUNNEL_BW_VALIDATE;
- return ret;
+ return result;
}
+
+struct dp_audio_layout_config {
+ uint8_t layouts_per_sample_denom;
+ uint8_t symbols_per_layout;
+ uint8_t max_layouts_per_audio_sdp;
+};
+
+static void get_audio_layout_config(
+ uint32_t channel_count,
+ enum dp_link_encoding encoding,
+ struct dp_audio_layout_config *output)
+{
+ memset(output, 0, sizeof(struct dp_audio_layout_config));
+
+ /* Assuming L-PCM audio. Current implementation uses max 1 layout per SDP,
+ * with each layout being the same size (8ch layout).
+ */
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ }
+}
+
+static uint32_t get_av_stream_map_lane_count(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t av_stream_map_lane_count = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (!is_mst)
+ av_stream_map_lane_count = lane_count;
+ else
+ av_stream_map_lane_count = 4;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ av_stream_map_lane_count = 4;
+ }
+
+ ASSERT(av_stream_map_lane_count != 0);
+
+ return av_stream_map_lane_count;
+}
+
+static uint32_t get_audio_sdp_overhead(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t audio_sdp_overhead = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (is_mst)
+ audio_sdp_overhead = 16; /* 4 * 2 + 8 */
+ else
+ audio_sdp_overhead = lane_count * 2 + 8;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ audio_sdp_overhead = 10; /* 4 x 2.5 */
+ }
+
+ ASSERT(audio_sdp_overhead != 0);
+
+ return audio_sdp_overhead;
+}
+
+/* Current calculation only applicable for 8b/10b MST and 128b/132b SST/MST.
+ */
+static uint32_t calculate_overhead_hblank_bw_in_symbols(
+ uint32_t max_slice_h)
+{
+ uint32_t overhead_hblank_bw = 0; /* in stream symbols */
+
+ overhead_hblank_bw += max_slice_h * 4; /* EOC overhead */
+ overhead_hblank_bw += 12; /* Main link overhead (VBID, BS/BE) */
+
+ return overhead_hblank_bw;
+}
+
+uint32_t dp_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params)
+{
+ /* Main logic from dce_audio is duplicated here, with the main
+ * difference being:
+ * - Pre-determined lane count of 4
+ * - Assumed 16 dsc slices for worst case
+ * - Assumed SDP split disabled for worst case
+ * TODO: Unify logic from dce_audio to prevent duplicated logic.
+ */
+
+ const struct dc_crtc_timing *timing = audio_params->crtc_timing;
+ const uint32_t channel_count = audio_params->channel_count;
+ const uint32_t sample_rate_hz = audio_params->sample_rate_hz;
+ const enum dp_link_encoding link_encoding = audio_params->link_encoding;
+
+ // 8b/10b MST and 128b/132b are always 4 logical lanes.
+ const uint32_t lane_count = 4;
+ const bool is_mst = (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT);
+ // Maximum slice count is with ODM 4:1, 4 slices per DSC
+ const uint32_t max_slices_h = 16;
+
+ const uint32_t av_stream_map_lane_count = get_av_stream_map_lane_count(
+ link_encoding, lane_count, is_mst);
+ const uint32_t audio_sdp_overhead = get_audio_sdp_overhead(
+ link_encoding, lane_count, is_mst);
+ struct dp_audio_layout_config layout_config;
+
+ if (link_encoding == DP_8b_10b_ENCODING && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
+ return 0;
+
+ get_audio_layout_config(
+ channel_count, link_encoding, &layout_config);
+
+ /* DP spec recommends between 1.05 to 1.1 safety margin to prevent sample under-run */
+ struct fixed31_32 audio_sdp_margin = dc_fixpt_from_fraction(110, 100);
+ struct fixed31_32 horizontal_line_freq_khz = dc_fixpt_from_fraction(
+ timing->pix_clk_100hz, (long long)timing->h_total * 10);
+ struct fixed31_32 samples_per_line;
+ struct fixed31_32 layouts_per_line;
+ struct fixed31_32 symbols_per_sdp_max_layout;
+ struct fixed31_32 remainder;
+ uint32_t num_sdp_with_max_layouts;
+ uint32_t required_symbols_per_hblank;
+ uint32_t required_bytes_per_hblank = 0;
+
+ samples_per_line = dc_fixpt_from_fraction(sample_rate_hz, 1000);
+ samples_per_line = dc_fixpt_div(samples_per_line, horizontal_line_freq_khz);
+ layouts_per_line = dc_fixpt_div_int(samples_per_line, layout_config.layouts_per_sample_denom);
+ // HBlank expansion usage assumes SDP split disabled to allow for worst case.
+ layouts_per_line = dc_fixpt_from_int(dc_fixpt_ceil(layouts_per_line));
+
+ num_sdp_with_max_layouts = dc_fixpt_floor(
+ dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp));
+ symbols_per_sdp_max_layout = dc_fixpt_from_int(
+ layout_config.max_layouts_per_audio_sdp * layout_config.symbols_per_layout);
+ symbols_per_sdp_max_layout = dc_fixpt_add_int(symbols_per_sdp_max_layout, audio_sdp_overhead);
+ symbols_per_sdp_max_layout = dc_fixpt_mul(symbols_per_sdp_max_layout, audio_sdp_margin);
+ required_symbols_per_hblank = num_sdp_with_max_layouts;
+ required_symbols_per_hblank *= ((dc_fixpt_ceil(symbols_per_sdp_max_layout) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+
+ if (num_sdp_with_max_layouts != dc_fixpt_ceil(
+ dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp))) {
+ remainder = dc_fixpt_sub_int(layouts_per_line,
+ num_sdp_with_max_layouts * layout_config.max_layouts_per_audio_sdp);
+ remainder = dc_fixpt_mul_int(remainder, layout_config.symbols_per_layout);
+ remainder = dc_fixpt_add_int(remainder, audio_sdp_overhead);
+ remainder = dc_fixpt_mul(remainder, audio_sdp_margin);
+ required_symbols_per_hblank += ((dc_fixpt_ceil(remainder) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+ }
+
+ required_symbols_per_hblank += calculate_overhead_hblank_bw_in_symbols(max_slices_h);
+
+ if (link_encoding == DP_8b_10b_ENCODING)
+ required_bytes_per_hblank = required_symbols_per_hblank; // 8 bits per 8b/10b symbol
+ else if (link_encoding == DP_128b_132b_ENCODING)
+ required_bytes_per_hblank = required_symbols_per_hblank * 4; // 32 bits per 128b/132b symbol
+
+ return required_bytes_per_hblank;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
index 4a954317d0da..595774e76453 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
@@ -24,16 +24,22 @@
*/
#ifndef __LINK_VALIDATION_H__
#define __LINK_VALIDATION_H__
-#include "link.h"
+#include "link_service.h"
+
enum dc_status link_validate_mode_timing(
const struct dc_stream_state *stream,
struct dc_link *link,
const struct dc_crtc_timing *timing);
-bool link_validate_dpia_bandwidth(
- const struct dc_stream_state *stream,
- const unsigned int num_streams);
+enum dc_status link_validate_dp_tunnel_bandwidth(
+ const struct dc *dc,
+ const struct dc_state *new_ctx);
uint32_t dp_link_bandwidth_kbps(
const struct dc_link *link,
const struct dc_link_settings *link_settings);
+
+uint32_t dp_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params);
+
#endif /* __LINK_VALIDATION_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c
index ecfd83299e75..267180e7bc48 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c
@@ -38,6 +38,8 @@
#include "dm_helpers.h"
#include "atomfirmware.h"
+#define DC_LOGGER \
+ ddc_service->ctx->logger
#define DC_LOGGER_INIT(logger)
static const uint8_t DP_VGA_DONGLE_BRANCH_DEV_NAME[] = "DpVga";
@@ -49,10 +51,6 @@ struct i2c_payloads {
struct vector payloads;
};
-struct aux_payloads {
- struct vector payloads;
-};
-
static bool i2c_payloads_create(
struct dc_context *ctx,
struct i2c_payloads *payloads,
@@ -507,7 +505,7 @@ bool try_to_configure_aux_timeout(struct ddc_service *ddc,
bool result = false;
struct ddc *ddc_pin = ddc->ddc_pin;
- if ((ddc->link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((ddc->link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
!ddc->link->dc->debug.disable_fixed_vs_aux_timeout_wa &&
ddc->ctx->dce_version == DCN_VERSION_3_1) {
/* Fixed VS workaround for AUX timeout */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h
index a3e25e55bed6..d3e6f01a6a90 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h
@@ -26,7 +26,7 @@
#ifndef __DAL_DDC_SERVICE_H__
#define __DAL_DDC_SERVICE_H__
-#include "link.h"
+#include "link_service.h"
#define AUX_POWER_UP_WA_DELAY 500
#define I2C_OVER_AUX_DEFER_WA_DELAY 70
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index 237e0ff955f3..b12c11bd6a14 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -51,9 +51,10 @@
#include "dc_dmub_srv.h"
#include "gpio_service_interface.h"
+#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */
+
#define DC_LOGGER \
link->ctx->logger
-#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */
#ifndef MAX
#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
@@ -157,6 +158,14 @@ uint8_t dp_parse_lttpr_repeater_count(uint8_t lttpr_repeater_count)
return 0; // invalid value
}
+uint32_t dp_get_closest_lttpr_offset(uint8_t lttpr_count)
+{
+ /* Calculate offset for LTTPR closest to DPTX which is highest in the chain
+ * Offset is 0 for single LTTPR cases as base LTTPR DPCD addresses target LTTPR 1
+ */
+ return DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE * (lttpr_count - 1);
+}
+
uint32_t link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw)
{
switch (bw) {
@@ -209,6 +218,16 @@ static enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in
case 8100000:
link_rate = LINK_RATE_HIGH3; // Rate_9 (HBR3)- 8.10 Gbps/Lane
break;
+ case 10000000:
+ link_rate = LINK_RATE_UHBR10; // UHBR10 - 10.0 Gbps/Lane
+ break;
+ case 13500000:
+ link_rate = LINK_RATE_UHBR13_5; // UHBR13.5 - 13.5 Gbps/Lane
+ break;
+ case 20000000:
+ link_rate = LINK_RATE_UHBR20; // UHBR20 - 20.0 Gbps/Lane
+ break;
+
default:
link_rate = LINK_RATE_UNKNOWN;
break;
@@ -239,21 +258,23 @@ static uint32_t intersect_frl_link_bw_support(
{
uint32_t supported_bw_in_kbps = max_supported_frl_bw_in_kbps;
- // HDMI_ENCODED_LINK_BW bits are only valid if HDMI Link Configuration bit is 1 (FRL mode)
- if (hdmi_encoded_link_bw.bits.FRL_MODE) {
- if (hdmi_encoded_link_bw.bits.BW_48Gbps)
- supported_bw_in_kbps = 48000000;
- else if (hdmi_encoded_link_bw.bits.BW_40Gbps)
- supported_bw_in_kbps = 40000000;
- else if (hdmi_encoded_link_bw.bits.BW_32Gbps)
- supported_bw_in_kbps = 32000000;
- else if (hdmi_encoded_link_bw.bits.BW_24Gbps)
- supported_bw_in_kbps = 24000000;
- else if (hdmi_encoded_link_bw.bits.BW_18Gbps)
- supported_bw_in_kbps = 18000000;
- else if (hdmi_encoded_link_bw.bits.BW_9Gbps)
- supported_bw_in_kbps = 9000000;
- }
+ /* Skip checking FRL_MODE bit, as certain PCON will clear
+ * it despite supporting the link BW indicated in the other bits.
+ */
+ if (hdmi_encoded_link_bw.bits.BW_48Gbps)
+ supported_bw_in_kbps = 48000000;
+ else if (hdmi_encoded_link_bw.bits.BW_40Gbps)
+ supported_bw_in_kbps = 40000000;
+ else if (hdmi_encoded_link_bw.bits.BW_32Gbps)
+ supported_bw_in_kbps = 32000000;
+ else if (hdmi_encoded_link_bw.bits.BW_24Gbps)
+ supported_bw_in_kbps = 24000000;
+ else if (hdmi_encoded_link_bw.bits.BW_18Gbps)
+ supported_bw_in_kbps = 18000000;
+ else if (hdmi_encoded_link_bw.bits.BW_9Gbps)
+ supported_bw_in_kbps = 9000000;
+ else if (hdmi_encoded_link_bw.bits.FRL_LINK_TRAINING_FINISHED)
+ supported_bw_in_kbps = 0; /* This case should only get hit in regulated autonomous mode. */
return supported_bw_in_kbps;
}
@@ -319,9 +340,12 @@ bool dp_is_fec_supported(const struct dc_link *link)
/* TODO - use asic cap instead of link_enc->features
* we no longer know which link enc to use for this link before commit
*/
- struct link_encoder *link_enc = NULL;
+ struct resource_context *res_ctx = &link->dc->current_state->res_ctx;
+ struct resource_pool *res_pool = link->dc->res_pool;
+ struct link_encoder *link_enc = get_temp_dio_link_enc(res_ctx, res_pool, link);
- link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
ASSERT(link_enc);
return (dc_is_dp_signal(link->connector_signal) && link_enc &&
@@ -360,10 +384,16 @@ bool dp_is_128b_132b_signal(struct pipe_ctx *pipe_ctx)
bool dp_is_lttpr_present(struct dc_link *link)
{
- return (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 &&
+ /* Some sink devices report invalid LTTPR revision, so don't validate against that cap */
+ uint32_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+ bool is_lttpr_present = (lttpr_count > 0 &&
link->dpcd_caps.lttpr_caps.max_lane_count > 0 &&
- link->dpcd_caps.lttpr_caps.max_lane_count <= 4 &&
- link->dpcd_caps.lttpr_caps.revision.raw >= 0x14);
+ link->dpcd_caps.lttpr_caps.max_lane_count <= 4);
+
+ if (lttpr_count > 0 && !is_lttpr_present)
+ DC_LOG_ERROR("LTTPR count is nonzero but invalid lane count reported. Assuming no LTTPR present.\n");
+
+ return is_lttpr_present;
}
/* in DP compliance test, DPR-120 may have
@@ -396,7 +426,17 @@ static enum dc_link_rate get_link_rate_from_max_link_bw(
static enum dc_link_rate get_lttpr_max_link_rate(struct dc_link *link)
{
- enum dc_link_rate lttpr_max_link_rate = link->dpcd_caps.lttpr_caps.max_link_rate;
+
+ enum dc_link_rate lttpr_max_link_rate = LINK_RATE_UNKNOWN;
+
+ switch (link->dpcd_caps.lttpr_caps.max_link_rate) {
+ case LINK_RATE_LOW:
+ case LINK_RATE_HIGH:
+ case LINK_RATE_HIGH2:
+ case LINK_RATE_HIGH3:
+ lttpr_max_link_rate = link->dpcd_caps.lttpr_caps.max_link_rate;
+ break;
+ }
if (link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.bits.UHBR20)
lttpr_max_link_rate = LINK_RATE_UHBR20;
@@ -412,12 +452,18 @@ static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link)
{
enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN;
- if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20)
+ if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20) {
cable_max_link_rate = LINK_RATE_UHBR20;
- else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY)
+ } else if (link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY) {
cable_max_link_rate = LINK_RATE_UHBR13_5;
- else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10)
- cable_max_link_rate = LINK_RATE_UHBR10;
+ } else if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR10) {
+ // allow DP40 cables to do UHBR13.5 for passive or unknown cable type
+ if (link->dpcd_caps.cable_id.bits.CABLE_TYPE < 2) {
+ cable_max_link_rate = LINK_RATE_UHBR13_5;
+ } else {
+ cable_max_link_rate = LINK_RATE_UHBR10;
+ }
+ }
return cable_max_link_rate;
}
@@ -522,13 +568,30 @@ static enum dc_link_rate increase_link_rate(struct dc_link *link,
}
}
+static void increase_edp_link_rate(struct dc_link *link,
+ struct dc_link_settings *current_link_setting)
+{
+ if (current_link_setting->use_link_rate_set) {
+ if (current_link_setting->link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
+ current_link_setting->link_rate_set++;
+ current_link_setting->link_rate =
+ link->dpcd_caps.edp_supported_link_rates[current_link_setting->link_rate_set];
+ } else {
+ current_link_setting->use_link_rate_set = false;
+ current_link_setting->link_rate = LINK_RATE_UHBR10;
+ }
+ } else {
+ current_link_setting->link_rate = increase_link_rate(link, current_link_setting->link_rate);
+ }
+}
+
static bool decide_fallback_link_setting_max_bw_policy(
struct dc_link *link,
const struct dc_link_settings *max,
struct dc_link_settings *cur,
enum link_training_result training_result)
{
- uint8_t cur_idx = 0, next_idx;
+ uint32_t cur_idx = 0, next_idx;
bool found = false;
if (training_result == LINK_TRAINING_ABORT)
@@ -707,8 +770,7 @@ bool edp_decide_link_settings(struct dc_link *link,
* edp_supported_link_rates_count is only valid for eDP v1.4 or higher.
* Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h"
*/
- if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_13 ||
- link->dpcd_caps.edp_supported_link_rates_count == 0) {
+ if (!edp_is_ilr_optimization_enabled(link)) {
*link_setting = link->verified_link_cap;
return true;
}
@@ -741,14 +803,7 @@ bool edp_decide_link_settings(struct dc_link *link,
increase_lane_count(
current_link_setting.lane_count);
} else {
- if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
- current_link_setting.link_rate_set++;
- current_link_setting.link_rate =
- link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set];
- current_link_setting.lane_count =
- initial_link_setting.lane_count;
- } else
- break;
+ increase_edp_link_rate(link, &current_link_setting);
}
}
return false;
@@ -772,8 +827,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link,
* edp_supported_link_rates_count is only valid for eDP v1.4 or higher.
* Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h"
*/
- if ((link->dpcd_caps.dpcd_rev.raw < DPCD_REV_13 ||
- link->dpcd_caps.edp_supported_link_rates_count == 0)) {
+ if (!edp_is_ilr_optimization_enabled(link)) {
/* for DSC enabled case, we search for minimum lane count */
memset(&initial_link_setting, 0, sizeof(initial_link_setting));
initial_link_setting.lane_count = LANE_COUNT_ONE;
@@ -801,9 +855,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link,
if (policy) {
/* minimize lane */
if (current_link_setting.link_rate < max_link_rate) {
- current_link_setting.link_rate =
- increase_link_rate(link,
- current_link_setting.link_rate);
+ increase_edp_link_rate(link, &current_link_setting);
} else {
if (current_link_setting.lane_count <
link->verified_link_cap.lane_count) {
@@ -822,9 +874,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link,
increase_lane_count(
current_link_setting.lane_count);
} else {
- current_link_setting.link_rate =
- increase_link_rate(link,
- current_link_setting.link_rate);
+ increase_edp_link_rate(link, &current_link_setting);
current_link_setting.lane_count =
initial_link_setting.lane_count;
}
@@ -857,18 +907,15 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link,
}
if (policy) {
/* minimize lane */
- if (current_link_setting.link_rate_set <
- link->dpcd_caps.edp_supported_link_rates_count
- && current_link_setting.link_rate < max_link_rate) {
- current_link_setting.link_rate_set++;
- current_link_setting.link_rate =
- link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set];
+ if (current_link_setting.link_rate < max_link_rate) {
+ increase_edp_link_rate(link, &current_link_setting);
} else {
if (current_link_setting.lane_count < link->verified_link_cap.lane_count) {
current_link_setting.lane_count =
increase_lane_count(
current_link_setting.lane_count);
current_link_setting.link_rate_set = initial_link_setting.link_rate_set;
+ current_link_setting.use_link_rate_set = initial_link_setting.use_link_rate_set;
current_link_setting.link_rate =
link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set];
} else
@@ -882,13 +929,8 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link,
increase_lane_count(
current_link_setting.lane_count);
} else {
- if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) {
- current_link_setting.link_rate_set++;
- current_link_setting.link_rate =
- link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set];
- current_link_setting.lane_count =
- initial_link_setting.lane_count;
- } else
+ increase_edp_link_rate(link, &current_link_setting);
+ if (current_link_setting.link_rate == LINK_RATE_UNKNOWN)
break;
}
}
@@ -910,22 +952,21 @@ bool link_decide_link_settings(struct dc_stream_state *stream,
memset(link_setting, 0, sizeof(*link_setting));
- /* if preferred is specified through AMDDP, use it, if it's enough
- * to drive the mode
- */
- if (link->preferred_link_setting.lane_count !=
- LANE_COUNT_UNKNOWN &&
- link->preferred_link_setting.link_rate !=
- LINK_RATE_UNKNOWN) {
+ if (dc_is_dp_signal(stream->signal) &&
+ link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN &&
+ link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN) {
+ /* if preferred is specified through AMDDP, use it, if it's enough
+ * to drive the mode
+ */
*link_setting = link->preferred_link_setting;
- return true;
- }
-
- /* MST doesn't perform link training for now
- * TODO: add MST specific link training routine
- */
- if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+ } else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+ /* MST doesn't perform link training for now
+ * TODO: add MST specific link training routine
+ */
decide_mst_link_settings(link, link_setting);
+ } else if (stream->signal == SIGNAL_TYPE_VIRTUAL) {
+ link_setting->lane_count = LANE_COUNT_FOUR;
+ link_setting->link_rate = LINK_RATE_HIGH3;
} else if (link->connector_signal == SIGNAL_TYPE_EDP) {
/* enable edp link optimization for DSC eDP case */
if (stream->timing.flags.DSC) {
@@ -988,7 +1029,7 @@ enum dp_link_encoding mst_decide_link_encoding_format(const struct dc_link *link
static void read_dp_device_vendor_id(struct dc_link *link)
{
- struct dp_device_vendor_id dp_id;
+ struct dp_device_vendor_id dp_id = {0};
/* read IEEE branch device id */
core_link_read_dpcd(
@@ -1050,6 +1091,48 @@ static enum dc_status wake_up_aux_channel(struct dc_link *link)
return DC_OK;
}
+static void read_and_intersect_post_frl_lt_status(
+ struct dc_link *link)
+{
+ union autonomous_mode_and_frl_link_status autonomous_mode_caps = {0};
+ union hdmi_tx_link_status hdmi_tx_link_status = {0};
+ union hdmi_encoded_link_bw hdmi_encoded_link_bw = {0};
+
+ /* Check if dongle supports regulated autonomous mode. */
+ core_link_read_dpcd(link, DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS,
+ &autonomous_mode_caps.raw, sizeof(autonomous_mode_caps));
+
+ link->dpcd_caps.dongle_caps.dp_hdmi_regulated_autonomous_mode_support =
+ autonomous_mode_caps.bits.REGULATED_AUTONOMOUS_MODE_SUPPORTED;
+
+ if (link->dpcd_caps.dongle_caps.dp_hdmi_regulated_autonomous_mode_support) {
+ DC_LOG_DC("%s: PCON supports regulated autonomous mode.\n", __func__);
+
+ core_link_read_dpcd(link, DP_PCON_HDMI_TX_LINK_STATUS,
+ &hdmi_tx_link_status.raw, sizeof(hdmi_tx_link_status));
+ }
+
+ // Intersect reported max link bw support with the supported link rate post FRL link training
+ if (core_link_read_dpcd(link, DP_PCON_HDMI_POST_FRL_STATUS,
+ &hdmi_encoded_link_bw.raw, sizeof(hdmi_encoded_link_bw)) == DC_OK) {
+
+ if (link->dpcd_caps.dongle_caps.dp_hdmi_regulated_autonomous_mode_support &&
+ (!hdmi_tx_link_status.bits.HDMI_TX_READY_STATUS ||
+ !hdmi_encoded_link_bw.bits.FRL_LINK_TRAINING_FINISHED)) {
+ DC_LOG_WARNING("%s: PCON TX link training has not finished.\n", __func__);
+
+ /* Link training not finished, ignore values from this DPCD reg. */
+ return;
+ }
+
+ link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support(
+ link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps,
+ hdmi_encoded_link_bw);
+ DC_LOG_DC("%s: pcon frl link bw = %u\n", __func__,
+ link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps);
+ }
+}
+
static void get_active_converter_info(
uint8_t data, struct dc_link *link)
{
@@ -1083,7 +1166,7 @@ static void get_active_converter_info(
}
if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_11) {
- uint8_t det_caps[16]; /* CTS 4.2.2.7 expects source to read Detailed Capabilities Info : 00080h-0008F.*/
+ uint8_t det_caps[16] = {0}; /* CTS 4.2.2.7 expects source to read Detailed Capabilities Info : 00080h-0008F.*/
union dwnstream_port_caps_byte0 *port_caps =
(union dwnstream_port_caps_byte0 *)det_caps;
if (core_link_read_dpcd(link, DP_DOWNSTREAM_PORT_0,
@@ -1138,19 +1221,12 @@ static void get_active_converter_info(
hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT);
if (link->dc->caps.dp_hdmi21_pcon_support) {
- union hdmi_encoded_link_bw hdmi_encoded_link_bw;
link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps =
link_bw_kbps_from_raw_frl_link_rate_data(
hdmi_color_caps.bits.MAX_ENCODED_LINK_BW_SUPPORT);
- // Intersect reported max link bw support with the supported link rate post FRL link training
- if (core_link_read_dpcd(link, DP_PCON_HDMI_POST_FRL_STATUS,
- &hdmi_encoded_link_bw.raw, sizeof(hdmi_encoded_link_bw)) == DC_OK) {
- link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support(
- link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps,
- hdmi_encoded_link_bw);
- }
+ read_and_intersect_post_frl_lt_status(link);
if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0)
link->dpcd_caps.dongle_caps.extendedCapValid = true;
@@ -1168,7 +1244,7 @@ static void get_active_converter_info(
set_dongle_type(link->ddc, link->dpcd_caps.dongle_type);
{
- struct dp_sink_hw_fw_revision dp_hw_fw_revision;
+ struct dp_sink_hw_fw_revision dp_hw_fw_revision = {0};
core_link_read_dpcd(
link,
@@ -1184,6 +1260,13 @@ static void get_active_converter_info(
dp_hw_fw_revision.ieee_fw_rev,
sizeof(dp_hw_fw_revision.ieee_fw_rev));
}
+
+ core_link_read_dpcd(
+ link,
+ DP_BRANCH_VENDOR_SPECIFIC_START,
+ (uint8_t *)link->dpcd_caps.branch_vendor_specific_data,
+ sizeof(link->dpcd_caps.branch_vendor_specific_data));
+
if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 &&
link->dpcd_caps.dongle_type != DISPLAY_DONGLE_NONE) {
union dp_dfp_cap_ext dfp_cap_ext;
@@ -1238,7 +1321,7 @@ static void apply_usbc_combo_phy_reset_wa(struct dc_link *link,
bool dp_overwrite_extended_receiver_cap(struct dc_link *link)
{
- uint8_t dpcd_data[16];
+ uint8_t dpcd_data[16] = {0};
uint32_t read_dpcd_retry_cnt = 3;
enum dc_status status = DC_ERROR_UNEXPECTED;
union dp_downstream_port_present ds_port = { 0 };
@@ -1305,6 +1388,21 @@ void dpcd_set_source_specific_data(struct dc_link *link)
struct dpcd_amd_signature amd_signature = {0};
struct dpcd_amd_device_id amd_device_id = {0};
+ if (link->is_dds) {
+ uint8_t dpcd_dp_edp_backlight_mode = 0;
+
+ /*
+ * Write 0 to bits 0:1 for dp_edp_backlight_mode_set register
+ * if platform is DDS
+ */
+ core_link_read_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
+ &dpcd_dp_edp_backlight_mode, sizeof(uint8_t));
+ dpcd_dp_edp_backlight_mode &= ~0x3;
+
+ core_link_write_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
+ &dpcd_dp_edp_backlight_mode, sizeof(uint8_t));
+ }
+
amd_device_id.device_id_byte1 =
(uint8_t)(link->ctx->asic_id.chip_id);
amd_device_id.device_id_byte2 =
@@ -1394,7 +1492,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id)
cmd.cable_id.header.payload_bytes = sizeof(cmd.cable_id.data);
cmd.cable_id.data.input.phy_inst = resource_transmitter_to_phy_idx(
link->dc, link->link_enc->transmitter);
- if (dm_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+ if (dc_wake_and_execute_dmub_cmd(link->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
cmd.cable_id.header.ret_status == 1) {
cable_id->raw = cmd.cable_id.data.output_raw;
DC_LOG_DC("usbc_cable_id = %d.\n", cable_id->raw);
@@ -1404,7 +1502,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id)
static void retrieve_cable_id(struct dc_link *link)
{
- union dp_cable_id usbc_cable_id;
+ union dp_cable_id usbc_cable_id = {0};
link->dpcd_caps.cable_id.raw = 0;
core_link_read_dpcd(link, DP_CABLE_ATTRIBUTES_UPDATED_BY_DPRX,
@@ -1427,8 +1525,8 @@ bool read_is_mst_supported(struct dc_link *link)
return false;
}
- rev.raw = 0;
- cap.raw = 0;
+ rev.raw = 0;
+ cap.raw = 0;
st = core_link_read_dpcd(link, DP_DPCD_REV, &rev.raw,
sizeof(rev));
@@ -1460,6 +1558,10 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link)
return false;
link->dpcd_sink_ext_caps.raw = dpcd_data;
+ if (link->is_dds && !link->dpcd_sink_ext_caps.bits.oled) {
+ link->dpcd_sink_ext_caps.raw = 0;
+ return false;
+ }
if (core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_2, &edp_general_cap2, 1) != DC_OK)
return false;
@@ -1471,9 +1573,11 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link)
enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link)
{
- uint8_t lttpr_dpcd_data[8];
+ uint8_t lttpr_dpcd_data[10] = {0};
enum dc_status status;
bool is_lttpr_present;
+ uint32_t lttpr_count;
+ uint32_t closest_lttpr_offset;
/* Logic to determine LTTPR support*/
bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
@@ -1521,22 +1625,54 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link)
lttpr_dpcd_data[DP_PHY_REPEATER_128B132B_RATES -
DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+ link->dpcd_caps.lttpr_caps.alpm.raw =
+ lttpr_dpcd_data[DP_LTTPR_ALPM_CAPABILITIES -
+ DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV];
+
+ lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+
/* If this chip cap is set, at least one retimer must exist in the chain
* Override count to 1 if we receive a known bad count (0 or an invalid value) */
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
- (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) {
- ASSERT(0);
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ lttpr_count == 0) {
+ /* If you see this message consistently, either the host platform has FIXED_VS flag
+ * incorrectly configured or the sink device is returning an invalid count.
+ */
+ DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.",
+ link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80;
+ lttpr_count = 1;
DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
}
- /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */
is_lttpr_present = dp_is_lttpr_present(link);
- if (is_lttpr_present)
+ DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present);
+
+ if (is_lttpr_present) {
CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
- DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present);
+ // Identify closest LTTPR to determine if workarounds required for known embedded LTTPR
+ closest_lttpr_offset = dp_get_closest_lttpr_offset(lttpr_count);
+
+ core_link_read_dpcd(link, (DP_LTTPR_IEEE_OUI + closest_lttpr_offset),
+ link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui));
+ core_link_read_dpcd(link, (DP_LTTPR_DEVICE_ID + closest_lttpr_offset),
+ link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id));
+
+ if (lttpr_count > 1) {
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui),
+ "Closest LTTPR To Host's IEEE OUI: ");
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id),
+ "Closest LTTPR To Host's LTTPR Device ID: ");
+ } else {
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui),
+ "LTTPR IEEE OUI: ");
+ CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id),
+ "LTTPR Device ID: ");
+ }
+ }
+
return status;
}
@@ -1586,11 +1722,16 @@ static bool retrieve_link_cap(struct dc_link *link)
return false;
}
- if (dp_is_lttpr_present(link))
+ if (dp_is_lttpr_present(link)) {
configure_lttpr_mode_transparent(link);
- /* Read DP tunneling information. */
- status = dpcd_get_tunneling_device_data(link);
+ // Echo TOTAL_LTTPR_CNT back downstream
+ core_link_write_dpcd(
+ link,
+ DP_TOTAL_LTTPR_CNT,
+ &link->dpcd_caps.lttpr_caps.phy_repeater_cnt,
+ sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt));
+ }
dpcd_set_source_specific_data(link);
/* Sink may need to configure internals based on vendor, so allow some
@@ -1664,7 +1805,7 @@ static bool retrieve_link_cap(struct dc_link *link)
link->dpcd_caps.dprx_feature.raw = dpcd_dprx_data;
if (status != DC_OK)
- dm_error("%s: Read DPRX caps data failed.\n", __func__);
+ dm_error("%s: Read DPRX feature list failed.\n", __func__);
/* AdaptiveSyncCapability */
dpcd_dprx_data = 0;
@@ -1679,15 +1820,13 @@ static bool retrieve_link_cap(struct dc_link *link)
link->dpcd_caps.adaptive_sync_caps.dp_adap_sync_caps.raw = dpcd_dprx_data;
if (status != DC_OK)
- dm_error("%s: Read DPRX caps data failed. Addr:%#x\n",
+ dm_error("%s: Read DPRX feature list_1 failed. Addr:%#x\n",
__func__, DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1);
}
-
else {
link->dpcd_caps.dprx_feature.raw = 0;
}
-
/* Error condition checking...
* It is impossible for Sink to report Max Lane Count = 0.
* It is possible for Sink to report Max Link Rate = 0, if it is
@@ -1741,6 +1880,11 @@ static bool retrieve_link_cap(struct dc_link *link)
link->test_pattern_enabled = false;
link->compliance_test_state.raw = 0;
+ link->dpcd_caps.receive_port0_cap.raw[0] =
+ dpcd_data[DP_RECEIVE_PORT_0_CAP_0 - DP_DPCD_REV];
+ link->dpcd_caps.receive_port0_cap.raw[1] =
+ dpcd_data[DP_RECEIVE_PORT_0_BUFFER_SIZE - DP_DPCD_REV];
+
/* read sink count */
core_link_read_dpcd(link,
DP_SINK_COUNT,
@@ -1807,6 +1951,9 @@ static bool retrieve_link_cap(struct dc_link *link)
DP_FEC_CAPABILITY,
&link->dpcd_caps.fec_cap.raw,
sizeof(link->dpcd_caps.fec_cap.raw));
+ if (status != DC_OK)
+ DC_LOG_ERROR("%s:%d: core_link_read_dpcd (DP_FEC_CAPABILITY) failed\n", __func__, __LINE__);
+
status = core_link_read_dpcd(
link,
DP_DSC_SUPPORT,
@@ -1829,6 +1976,9 @@ static bool retrieve_link_cap(struct dc_link *link)
DP_DSC_BRANCH_OVERALL_THROUGHPUT_0,
link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw,
sizeof(link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw));
+ if (status != DC_OK)
+ DC_LOG_ERROR("%s:%d: core_link_read_dpcd (DP_DSC_BRANCH_OVERALL_THROUGHPUT_0) failed\n", __func__, __LINE__);
+
DC_LOG_DSC("DSC branch decoder capability is read at link %d", link->link_index);
DC_LOG_DSC("\tBRANCH_OVERALL_THROUGHPUT_0 = 0x%02x",
link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.fields.BRANCH_OVERALL_THROUGHPUT_0);
@@ -1865,6 +2015,7 @@ static bool retrieve_link_cap(struct dc_link *link)
if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
DC_LOG_DP2("128b/132b encoding is supported at link %d", link->link_index);
+ /* Read 128b/132b suppoerted link rates */
core_link_read_dpcd(link,
DP_128B132B_SUPPORTED_LINK_RATES,
&link->dpcd_caps.dp_128b_132b_supported_link_rates.raw,
@@ -1907,6 +2058,16 @@ static bool retrieve_link_cap(struct dc_link *link)
DC_LOG_DP2("\tFEC aggregated error counters are supported");
}
+ core_link_read_dpcd(link,
+ DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP,
+ link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw,
+ sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw));
+
+ /* Read DP tunneling information. */
+ status = dpcd_get_tunneling_device_data(link);
+ if (status != DC_OK)
+ DC_LOG_DP2("%s: Read DP tunneling device data failed.\n", __func__);
+
retrieve_cable_id(link);
dpcd_write_cable_id_to_dprx(link);
@@ -1927,8 +2088,8 @@ void detect_edp_sink_caps(struct dc_link *link)
uint32_t entry;
uint32_t link_rate_in_khz;
enum dc_link_rate link_rate = LINK_RATE_UNKNOWN;
- uint8_t backlight_adj_cap;
- uint8_t general_edp_cap;
+ uint8_t backlight_adj_cap = 0;
+ uint8_t general_edp_cap = 0;
retrieve_link_cap(link);
link->dpcd_caps.edp_supported_link_rates_count = 0;
@@ -1938,9 +2099,7 @@ void detect_edp_sink_caps(struct dc_link *link)
* edp_supported_link_rates_count is only valid for eDP v1.4 or higher.
* Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h"
*/
- if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13 &&
- (link->panel_config.ilr.optimize_edp_link_rate ||
- link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN)) {
+ if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13) {
// Read DPCD 00010h - 0001Fh 16 bytes at one shot
core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES,
supported_link_rates, sizeof(supported_link_rates));
@@ -1958,23 +2117,21 @@ void detect_edp_sink_caps(struct dc_link *link)
link_rate = linkRateInKHzToLinkRateMultiplier(link_rate_in_khz);
link->dpcd_caps.edp_supported_link_rates[link->dpcd_caps.edp_supported_link_rates_count] = link_rate;
link->dpcd_caps.edp_supported_link_rates_count++;
-
- if (link->reported_link_cap.link_rate < link_rate)
- link->reported_link_cap.link_rate = link_rate;
}
}
}
+
core_link_read_dpcd(link, DP_EDP_BACKLIGHT_ADJUSTMENT_CAP,
&backlight_adj_cap, sizeof(backlight_adj_cap));
link->dpcd_caps.dynamic_backlight_capable_edp =
- (backlight_adj_cap & DP_EDP_DYNAMIC_BACKLIGHT_CAP) ? true:false;
+ (backlight_adj_cap & DP_EDP_DYNAMIC_BACKLIGHT_CAP) ? true : false;
core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_1,
&general_edp_cap, sizeof(general_edp_cap));
link->dpcd_caps.set_power_state_capable_edp =
- (general_edp_cap & DP_EDP_SET_POWER_CAP) ? true:false;
+ (general_edp_cap & DP_EDP_SET_POWER_CAP) ? true : false;
set_default_brightness_aux(link);
@@ -2019,18 +2176,46 @@ void detect_edp_sink_caps(struct dc_link *link)
core_link_read_dpcd(link, DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE,
&link->dpcd_caps.pr_info.max_deviation_line,
sizeof(link->dpcd_caps.pr_info.max_deviation_line));
+
+ /*
+ * OLED Emission Rate info
+ */
+ if (link->dpcd_sink_ext_caps.bits.emission_output)
+ core_link_read_dpcd(link, DP_SINK_EMISSION_RATE,
+ (uint8_t *)&link->dpcd_caps.edp_oled_emission_rate,
+ sizeof(link->dpcd_caps.edp_oled_emission_rate));
+
+ /*
+ * Read Multi-SST (Single Stream Transport) capability
+ * for eDP version 1.4 or higher.
+ */
+ if (link->dpcd_caps.dpcd_rev.raw >= DP_EDP_14)
+ core_link_read_dpcd(
+ link,
+ DP_EDP_MSO_LINK_CAPABILITIES,
+ (uint8_t *)&link->dpcd_caps.mso_cap_sst_links_supported,
+ sizeof(link->dpcd_caps.mso_cap_sst_links_supported));
+ /*
+ * Read eDP general capability 2
+ */
+ core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_2,
+ (uint8_t *)&link->dpcd_caps.dp_edp_general_cap_2,
+ sizeof(link->dpcd_caps.dp_edp_general_cap_2));
}
bool dp_get_max_link_enc_cap(const struct dc_link *link, struct dc_link_settings *max_link_enc_cap)
{
- struct link_encoder *link_enc = NULL;
+ struct resource_context *res_ctx = &link->dc->current_state->res_ctx;
+ struct resource_pool *res_pool = link->dc->res_pool;
+ struct link_encoder *link_enc = get_temp_dio_link_enc(res_ctx, res_pool, link);
if (!max_link_enc_cap) {
DC_LOG_ERROR("%s: Could not return max link encoder caps", __func__);
return false;
}
- link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
ASSERT(link_enc);
if (link_enc && link_enc->funcs->get_max_link_cap) {
@@ -2058,10 +2243,13 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link)
struct dc_link_settings max_link_cap = {0};
enum dc_link_rate lttpr_max_link_rate;
enum dc_link_rate cable_max_link_rate;
- struct link_encoder *link_enc = NULL;
-
+ struct resource_context *res_ctx = &link->dc->current_state->res_ctx;
+ struct resource_pool *res_pool = link->dc->res_pool;
+ struct link_encoder *link_enc = get_temp_dio_link_enc(res_ctx, res_pool, link);
+ bool is_uhbr13_5_supported = true;
- link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
ASSERT(link_enc);
/* get max link encoder capability */
@@ -2080,6 +2268,9 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link)
max_link_cap.link_spread =
link->reported_link_cap.link_spread;
+ if (!link->dpcd_caps.dp_128b_132b_supported_link_rates.bits.UHBR13_5)
+ is_uhbr13_5_supported = false;
+
/* Lower link settings based on cable attributes
* Cable ID is a DP2 feature to identify max certified link rate that
* a cable can carry. The cable identification method requires both
@@ -2098,20 +2289,32 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link)
cable_max_link_rate = get_cable_max_link_rate(link);
if (!link->dc->debug.ignore_cable_id &&
- cable_max_link_rate != LINK_RATE_UNKNOWN &&
- cable_max_link_rate < max_link_cap.link_rate)
- max_link_cap.link_rate = cable_max_link_rate;
+ cable_max_link_rate != LINK_RATE_UNKNOWN) {
+ if (cable_max_link_rate < max_link_cap.link_rate)
+ max_link_cap.link_rate = cable_max_link_rate;
+
+ if (!link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY &&
+ link->dpcd_caps.cable_id.bits.CABLE_TYPE >= 2)
+ is_uhbr13_5_supported = false;
+ }
/* account for lttpr repeaters cap
* notes: repeaters do not snoop in the DPRX Capabilities addresses (3.6.3).
*/
if (dp_is_lttpr_present(link)) {
- if (link->dpcd_caps.lttpr_caps.max_lane_count < max_link_cap.lane_count)
- max_link_cap.lane_count = link->dpcd_caps.lttpr_caps.max_lane_count;
- lttpr_max_link_rate = get_lttpr_max_link_rate(link);
- if (lttpr_max_link_rate < max_link_cap.link_rate)
- max_link_cap.link_rate = lttpr_max_link_rate;
+ /* Some LTTPR devices do not report valid DPCD revisions, if so, do not take it's link cap into consideration. */
+ if (link->dpcd_caps.lttpr_caps.revision.raw >= DPCD_REV_14) {
+ if (link->dpcd_caps.lttpr_caps.max_lane_count < max_link_cap.lane_count)
+ max_link_cap.lane_count = link->dpcd_caps.lttpr_caps.max_lane_count;
+ lttpr_max_link_rate = get_lttpr_max_link_rate(link);
+
+ if (lttpr_max_link_rate < max_link_cap.link_rate)
+ max_link_cap.link_rate = lttpr_max_link_rate;
+
+ if (!link->dpcd_caps.lttpr_caps.supported_128b_132b_rates.bits.UHBR13_5)
+ is_uhbr13_5_supported = false;
+ }
DC_LOG_HW_LINK_TRAINING("%s\n Training with LTTPR, max_lane count %d max_link rate %d \n",
__func__,
@@ -2119,6 +2322,10 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link)
max_link_cap.link_rate);
}
+ if (max_link_cap.link_rate == LINK_RATE_UHBR13_5 &&
+ !is_uhbr13_5_supported)
+ max_link_cap.link_rate = LINK_RATE_UHBR10;
+
if (link_dp_get_encoding_format(&max_link_cap) == DP_128b_132b_ENCODING &&
link->dc->debug.disable_uhbr)
max_link_cap.link_rate = LINK_RATE_HIGH3;
@@ -2215,7 +2422,7 @@ bool dp_verify_link_cap_with_retries(
memset(&link->verified_link_cap, 0,
sizeof(struct dc_link_settings));
- if (!link_detect_connection_type(link, &type) || type == dc_connection_none) {
+ if (link->link_enc && (!link_detect_connection_type(link, &type) || type == dc_connection_none)) {
link->verified_link_cap = fail_safe_link_settings;
break;
} else if (dp_verify_link_cap(link, known_limit_link_setting, &fail_count)) {
@@ -2227,6 +2434,14 @@ bool dp_verify_link_cap_with_retries(
} else {
link->verified_link_cap = last_verified_link_cap;
}
+
+ /* For Dp tunneling link, a pending HPD means that we have a race condition between processing
+ * current link and processing the pending HPD. Since the training is failed, we should just brak
+ * the loop so that we have chance to process the pending HPD.
+ */
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending)
+ break;
+
fsleep(10 * 1000);
}
@@ -2297,3 +2512,40 @@ bool dp_is_sink_present(struct dc_link *link)
return present;
}
+
+uint8_t dp_get_lttpr_count(struct dc_link *link)
+{
+ if (dp_is_lttpr_present(link))
+ return dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+
+ return 0;
+}
+
+void edp_get_alpm_support(struct dc_link *link,
+ bool *auxless_support,
+ bool *auxwake_support)
+{
+ bool lttpr_present = dp_is_lttpr_present(link);
+
+ if (auxless_support == NULL || auxwake_support == NULL)
+ return;
+
+ *auxless_support = false;
+ *auxwake_support = false;
+
+ if (!dc_is_embedded_signal(link->connector_signal))
+ return;
+
+ if (link->dpcd_caps.alpm_caps.bits.AUX_LESS_ALPM_CAP) {
+ if (lttpr_present) {
+ if (link->dpcd_caps.lttpr_caps.alpm.bits.AUX_LESS_ALPM_SUPPORTED)
+ *auxless_support = true;
+ } else
+ *auxless_support = true;
+ }
+
+ if (link->dpcd_caps.alpm_caps.bits.AUX_WAKE_ALPM_CAP) {
+ if (!lttpr_present)
+ *auxwake_support = true;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h
index 8f0ce97f2362..6e17f72a752f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_CAPABILITY_H__
#define __DC_LINK_DP_CAPABILITY_H__
-#include "link.h"
+#include "link_service.h"
bool detect_dp_sink_caps(struct dc_link *link);
@@ -48,6 +48,9 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link);
/* Convert PHY repeater count read from DPCD uint8_t. */
uint8_t dp_parse_lttpr_repeater_count(uint8_t lttpr_repeater_count);
+/* Calculate embedded LTTPR address offset for vendor-specific behaviour */
+uint32_t dp_get_closest_lttpr_offset(uint8_t lttpr_count);
+
bool dp_is_sink_present(struct dc_link *link);
bool dp_is_lttpr_present(struct dc_link *link);
@@ -67,6 +70,7 @@ bool dp_is_128b_132b_signal(struct pipe_ctx *pipe_ctx);
/* Initialize output parameter lt_settings. */
void dp_decide_training_settings(
struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_setting,
struct link_training_settings *lt_settings);
@@ -104,4 +108,10 @@ uint32_t link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw);
bool dp_overwrite_extended_receiver_cap(struct dc_link *link);
+uint8_t dp_get_lttpr_count(struct dc_link *link);
+
+void edp_get_alpm_support(struct dc_link *link,
+ bool *auxless_support,
+ bool *auxwake_support);
+
#endif /* __DC_LINK_DP_CAPABILITY_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
index 0bb749133909..9b2f1a7da1d1 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
@@ -59,22 +59,55 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link)
dpcd_dp_tun_data,
sizeof(dpcd_dp_tun_data));
- status = core_link_read_dpcd(
- link,
- DP_USB4_ROUTER_TOPOLOGY_ID,
- dpcd_topology_data,
- sizeof(dpcd_topology_data));
+ if (status != DC_OK)
+ goto err;
link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.raw =
dpcd_dp_tun_data[DP_TUNNELING_CAPABILITIES_SUPPORT - DP_TUNNELING_CAPABILITIES_SUPPORT];
+
+ if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling == false)
+ goto err;
+
link->dpcd_caps.usb4_dp_tun_info.dpia_info.raw =
dpcd_dp_tun_data[DP_IN_ADAPTER_INFO - DP_TUNNELING_CAPABILITIES_SUPPORT];
link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id =
dpcd_dp_tun_data[DP_USB4_DRIVER_ID - DP_TUNNELING_CAPABILITIES_SUPPORT];
+ if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc) {
+ status = core_link_read_dpcd(link, USB4_DRIVER_BW_CAPABILITY,
+ dpcd_dp_tun_data, 2);
+
+ if (status != DC_OK)
+ goto err;
+
+ link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.raw =
+ dpcd_dp_tun_data[USB4_DRIVER_BW_CAPABILITY - USB4_DRIVER_BW_CAPABILITY];
+ link->dpcd_caps.usb4_dp_tun_info.dpia_tunnel_info.raw =
+ dpcd_dp_tun_data[DP_IN_ADAPTER_TUNNEL_INFO - USB4_DRIVER_BW_CAPABILITY];
+ }
+
+ DC_LOG_DEBUG("%s: Link[%d] DP tunneling support (RouterId=%d AdapterId=%d) "
+ "DPIA_BW_Alloc_support=%d "
+ "CM_BW_Alloc_support=%d ",
+ __func__, link->link_index,
+ link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id,
+ link->dpcd_caps.usb4_dp_tun_info.dpia_info.bits.dpia_num,
+ link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc,
+ link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support);
+
+ status = core_link_read_dpcd(
+ link,
+ DP_USB4_ROUTER_TOPOLOGY_ID,
+ dpcd_topology_data,
+ sizeof(dpcd_topology_data));
+
+ if (status != DC_OK)
+ goto err;
+
for (i = 0; i < DPCD_USB4_TOPOLOGY_ID_LEN; i++)
link->dpcd_caps.usb4_dp_tun_info.usb4_topology_id[i] = dpcd_topology_data[i];
+err:
return status;
}
@@ -82,24 +115,57 @@ bool dpia_query_hpd_status(struct dc_link *link)
{
union dmub_rb_cmd cmd = {0};
struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv;
- bool is_hpd_high = false;
/* prepare QUERY_HPD command */
cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
+ cmd.query_hpd.header.payload_bytes = sizeof(cmd.query_hpd.data);
cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
- /* Return HPD status reported by DMUB if query successfully executed. */
- if (dm_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) && cmd.query_hpd.data.status == AUX_RET_SUCCESS)
- is_hpd_high = cmd.query_hpd.data.result;
-
- DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n",
- __func__,
- link->link_index,
- link->link_id.enum_id - ENUM_ID_1,
- cmd.query_hpd.data.status,
- cmd.query_hpd.data.result);
+ /* Query dpia hpd status from dmub */
+ if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd,
+ DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+ cmd.query_hpd.data.status == AUX_RET_SUCCESS) {
+ DC_LOG_DEBUG("%s: for link(%d) dpia(%d) success, current_hpd_status(%d) new_hpd_status(%d)\n",
+ __func__,
+ link->link_index,
+ link->link_id.enum_id - ENUM_ID_1,
+ link->hpd_status,
+ cmd.query_hpd.data.result);
+ link->hpd_status = cmd.query_hpd.data.result;
+ } else {
+ DC_LOG_ERROR("%s: for link(%d) dpia(%d) failed with status(%d), current_hpd_status(%d) new_hpd_status(0)\n",
+ __func__,
+ link->link_index,
+ link->link_id.enum_id - ENUM_ID_1,
+ cmd.query_hpd.data.status,
+ link->hpd_status);
+ link->hpd_status = false;
+ }
+
+ return link->hpd_status;
+}
- return is_hpd_high;
+void link_decide_dp_tunnel_settings(struct dc_stream_state *stream,
+ struct dc_tunnel_settings *dp_tunnel_setting)
+{
+ struct dc_link *link = stream->link;
+
+ memset(dp_tunnel_setting, 0, sizeof(*dp_tunnel_setting));
+
+ if ((stream->signal == SIGNAL_TYPE_DISPLAY_PORT) || (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) {
+ dp_tunnel_setting->should_enable_dp_tunneling =
+ link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling;
+
+ if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc
+ && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support) {
+ dp_tunnel_setting->should_use_dp_bw_allocation = true;
+ dp_tunnel_setting->cm_id = link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id & 0x0F;
+ dp_tunnel_setting->group_id = link->dpcd_caps.usb4_dp_tun_info.dpia_tunnel_info.bits.group_id;
+ dp_tunnel_setting->estimated_bw = link->dpia_bw_alloc_config.estimated_bw;
+ dp_tunnel_setting->allocated_bw = link->dpia_bw_alloc_config.allocated_bw;
+ dp_tunnel_setting->bw_granularity = link->dpia_bw_alloc_config.bw_granularity;
+ }
+ }
}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h
index 363f45a1a964..7cd03fa4892b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h
@@ -27,7 +27,7 @@
#ifndef __DC_LINK_DPIA_H__
#define __DC_LINK_DPIA_H__
-#include "link.h"
+#include "link_service.h"
/* Read tunneling device capability from DPCD and update link capability
* accordingly.
@@ -38,4 +38,10 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link);
* Returns true if HPD high.
*/
bool dpia_query_hpd_status(struct dc_link *link);
+
+/* Decide the DP tunneling settings based on the DPCD capabilities
+ */
+void link_decide_dp_tunnel_settings(struct dc_stream_state *stream,
+ struct dc_tunnel_settings *dp_tunnel_setting);
+
#endif /* __DC_LINK_DPIA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index 7581023daa47..8a3c18ae97a7 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -24,7 +24,7 @@
*
*/
/*********************************************************************/
-// USB4 DPIA BANDWIDTH ALLOCATION LOGIC
+// USB4 DPIA BANDWIDTH ALLOCATION LOGIC
/*********************************************************************/
#include "link_dp_dpia_bw.h"
#include "link_dpcd.h"
@@ -35,8 +35,10 @@
#define Kbps_TO_Gbps (1000 * 1000)
+#define MST_TIME_SLOT_COUNT 64
+
// ------------------------------------------------------------------
-// PRIVATE FUNCTIONS
+// PRIVATE FUNCTIONS
// ------------------------------------------------------------------
/*
* Always Check the following:
@@ -44,21 +46,33 @@
* - Is HPD HIGH?
* - Is BW Allocation Support Mode enabled on DP-Tx?
*/
-static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
+static bool link_dp_is_bw_alloc_available(struct dc_link *link)
{
- return (tmp && DISPLAY_ENDPOINT_USB4_DPIA == tmp->ep_type
- && tmp->hpd_status
- && tmp->dpia_bw_alloc_config.bw_alloc_enabled);
+ return (link && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling
+ && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc
+ && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support);
}
+
static void reset_bw_alloc_struct(struct dc_link *link)
{
link->dpia_bw_alloc_config.bw_alloc_enabled = false;
- link->dpia_bw_alloc_config.sink_verified_bw = 0;
- link->dpia_bw_alloc_config.sink_max_bw = 0;
+ link->dpia_bw_alloc_config.link_verified_bw = 0;
+ link->dpia_bw_alloc_config.link_max_bw = 0;
+ link->dpia_bw_alloc_config.allocated_bw = 0;
link->dpia_bw_alloc_config.estimated_bw = 0;
link->dpia_bw_alloc_config.bw_granularity = 0;
- link->dpia_bw_alloc_config.response_ready = false;
+ link->dpia_bw_alloc_config.dp_overhead = 0;
+ link->dpia_bw_alloc_config.nrd_max_lane_count = 0;
+ link->dpia_bw_alloc_config.nrd_max_link_rate = 0;
+ for (int i = 0; i < MAX_SINKS_PER_LINK; i++)
+ link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0;
+ DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index);
}
+
+#define BW_GRANULARITY_0 4 // 0.25 Gbps
+#define BW_GRANULARITY_1 2 // 0.5 Gbps
+#define BW_GRANULARITY_2 1 // 1 Gbps
+
static uint8_t get_bw_granularity(struct dc_link *link)
{
uint8_t bw_granularity = 0;
@@ -71,16 +85,20 @@ static uint8_t get_bw_granularity(struct dc_link *link)
switch (bw_granularity & 0x3) {
case 0:
- bw_granularity = 4;
+ bw_granularity = BW_GRANULARITY_0;
break;
case 1:
+ bw_granularity = BW_GRANULARITY_1;
+ break;
+ case 2:
default:
- bw_granularity = 2;
+ bw_granularity = BW_GRANULARITY_2;
break;
}
return bw_granularity;
}
+
static int get_estimated_bw(struct dc_link *link)
{
uint8_t bw_estimated_bw = 0;
@@ -93,99 +111,56 @@ static int get_estimated_bw(struct dc_link *link)
return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
}
-static bool allocate_usb4_bw(int *stream_allocated_bw, int bw_needed, struct dc_link *link)
+
+static int get_non_reduced_max_link_rate(struct dc_link *link)
{
- if (bw_needed > 0)
- *stream_allocated_bw += bw_needed;
+ uint8_t nrd_max_link_rate = 0;
+
+ core_link_read_dpcd(
+ link,
+ DP_TUNNELING_MAX_LINK_RATE,
+ &nrd_max_link_rate,
+ sizeof(uint8_t));
- return true;
+ return nrd_max_link_rate;
}
-static bool deallocate_usb4_bw(int *stream_allocated_bw, int bw_to_dealloc, struct dc_link *link)
-{
- bool ret = false;
- if (*stream_allocated_bw > 0) {
- *stream_allocated_bw -= bw_to_dealloc;
- ret = true;
- } else {
- //Do nothing for now
- ret = true;
- }
+static int get_non_reduced_max_lane_count(struct dc_link *link)
+{
+ uint8_t nrd_max_lane_count = 0;
- // Unplug so reset values
- if (!link->hpd_status)
- reset_bw_alloc_struct(link);
+ core_link_read_dpcd(
+ link,
+ DP_TUNNELING_MAX_LANE_COUNT,
+ &nrd_max_lane_count,
+ sizeof(uint8_t));
- return ret;
+ return nrd_max_lane_count;
}
+
/*
* Read all New BW alloc configuration ex: estimated_bw, allocated_bw,
* granuality, Driver_ID, CM_Group, & populate the BW allocation structs
* for host router and dpia
*/
-static void init_usb4_bw_struct(struct dc_link *link)
+static void retrieve_usb4_dp_bw_allocation_info(struct dc_link *link)
{
- // Init the known values
+ reset_bw_alloc_struct(link);
+
+ /* init the known values */
link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link);
link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
+ link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link);
+ link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link);
+
+ DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n",
+ __func__, link->dpia_bw_alloc_config.bw_granularity,
+ link->dpia_bw_alloc_config.estimated_bw);
+ DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n",
+ __func__, link->dpia_bw_alloc_config.nrd_max_link_rate,
+ link->dpia_bw_alloc_config.nrd_max_lane_count);
}
-static uint8_t get_lowest_dpia_index(struct dc_link *link)
-{
- const struct dc *dc_struct = link->dc;
- uint8_t idx = 0xFF;
- int i;
-
- for (i = 0; i < MAX_PIPES * 2; ++i) {
-
- if (!dc_struct->links[i] ||
- dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
- continue;
-
- if (idx > dc_struct->links[i]->link_index)
- idx = dc_struct->links[i]->link_index;
- }
-
- return idx;
-}
-/*
- * Get the Max Available BW or Max Estimated BW for each Host Router
- *
- * @link: pointer to the dc_link struct instance
- * @type: ESTIMATD BW or MAX AVAILABLE BW
- *
- * return: response_ready flag from dc_link struct
- */
-static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
-{
- const struct dc *dc_struct = link->dc;
- uint8_t lowest_dpia_index = get_lowest_dpia_index(link);
- uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0;
- struct dc_link *link_temp;
- int total_bw = 0;
- int i;
-
- for (i = 0; i < MAX_PIPES * 2; ++i) {
-
- if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA)
- continue;
-
- link_temp = dc_struct->links[i];
- if (!link_temp || !link_temp->hpd_status)
- continue;
-
- idx_temp = (link_temp->link_index - lowest_dpia_index) / 2;
-
- if (idx_temp == idx) {
-
- if (type == HOST_ROUTER_BW_ESTIMATED)
- total_bw += link_temp->dpia_bw_alloc_config.estimated_bw;
- else if (type == HOST_ROUTER_BW_ALLOCATED)
- total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw;
- }
- }
- return total_bw;
-}
/*
* Cleanup function for when the dpia is unplugged to reset struct
* and perform any required clean up
@@ -194,299 +169,274 @@ static int get_host_router_total_bw(struct dc_link *link, uint8_t type)
*
* return: none
*/
-static bool dpia_bw_alloc_unplug(struct dc_link *link)
+static void dpia_bw_alloc_unplug(struct dc_link *link)
{
- if (!link)
- return true;
-
- return deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
- link->dpia_bw_alloc_config.sink_allocated_bw, link);
+ if (link) {
+ DC_LOG_DEBUG("%s: resetting BW alloc config for link(%d)\n",
+ __func__, link->link_index);
+ reset_bw_alloc_struct(link);
+ }
}
-static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
+
+static void link_dpia_send_bw_alloc_request(struct dc_link *link, int req_bw)
{
- uint8_t requested_bw;
- uint32_t temp;
+ uint8_t request_reg_val;
+ uint32_t temp, request_bw;
- // 1. Add check for this corner case #1
- if (req_bw > link->dpia_bw_alloc_config.estimated_bw)
- req_bw = link->dpia_bw_alloc_config.estimated_bw;
+ if (link->dpia_bw_alloc_config.bw_granularity == 0) {
+ DC_LOG_ERROR("%s: Link[%d]: bw_granularity is zero!", __func__, link->link_index);
+ return;
+ }
temp = req_bw * link->dpia_bw_alloc_config.bw_granularity;
- requested_bw = temp / Kbps_TO_Gbps;
+ request_reg_val = temp / Kbps_TO_Gbps;
- // Always make sure to add more to account for floating points
+ /* Always make sure to add more to account for floating points */
if (temp % Kbps_TO_Gbps)
- ++requested_bw;
+ ++request_reg_val;
- // 2. Add check for this corner case #2
- req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
- if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw)
- return;
+ request_bw = request_reg_val * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
- if (core_link_write_dpcd(
- link,
- REQUESTED_BW,
- &requested_bw,
- sizeof(uint8_t)) == DC_OK)
- link->dpia_bw_alloc_config.response_ready = false; // Reset flag
-}
-/*
- * Return the response_ready flag from dc_link struct
- *
- * @link: pointer to the dc_link struct instance
- *
- * return: response_ready flag from dc_link struct
- */
-static bool get_cm_response_ready_flag(struct dc_link *link)
-{
- return link->dpia_bw_alloc_config.response_ready;
+ if (request_bw > link->dpia_bw_alloc_config.estimated_bw) {
+ DC_LOG_ERROR("%s: Link[%d]: Request BW (%d --> %d) > Estimated BW (%d)... Set to Estimated BW!",
+ __func__, link->link_index,
+ req_bw, request_bw, link->dpia_bw_alloc_config.estimated_bw);
+ req_bw = link->dpia_bw_alloc_config.estimated_bw;
+
+ temp = req_bw * link->dpia_bw_alloc_config.bw_granularity;
+ request_reg_val = temp / Kbps_TO_Gbps;
+ if (temp % Kbps_TO_Gbps)
+ ++request_reg_val;
+ }
+
+ link->dpia_bw_alloc_config.allocated_bw = request_bw;
+ DC_LOG_DC("%s: Link[%d]: Request BW: %d", __func__, link->link_index, request_bw);
+
+ core_link_write_dpcd(link, REQUESTED_BW,
+ &request_reg_val,
+ sizeof(uint8_t));
}
+
// ------------------------------------------------------------------
-// PUBLIC FUNCTIONS
+// PUBLIC FUNCTIONS
// ------------------------------------------------------------------
-bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link)
+bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link)
{
bool ret = false;
- uint8_t response = 0,
- bw_support_dpia = 0,
- bw_support_cm = 0;
-
- if (!(link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->hpd_status))
- goto out;
+ uint8_t val;
- if (core_link_read_dpcd(
- link,
- DP_TUNNELING_CAPABILITIES,
- &response,
- sizeof(uint8_t)) == DC_OK)
- bw_support_dpia = (response >> 7) & 1;
-
- if (core_link_read_dpcd(
- link,
- USB4_DRIVER_BW_CAPABILITY,
- &response,
- sizeof(uint8_t)) == DC_OK)
- bw_support_cm = (response >> 7) & 1;
-
- /* Send request acknowledgment to Turn ON DPTX support */
- if (bw_support_cm && bw_support_dpia) {
-
- response = 0x80;
- if (core_link_write_dpcd(
- link,
- DPTX_BW_ALLOCATION_MODE_CONTROL,
- &response,
- sizeof(uint8_t)) != DC_OK) {
- DC_LOG_DEBUG("%s: **** FAILURE Enabling DPtx BW Allocation Mode Support ***\n",
- __func__);
- } else {
- // SUCCESS Enabled DPtx BW Allocation Mode Support
- link->dpia_bw_alloc_config.bw_alloc_enabled = true;
- DC_LOG_DEBUG("%s: **** SUCCESS Enabling DPtx BW Allocation Mode Support ***\n",
- __func__);
-
- ret = true;
- init_usb4_bw_struct(link);
- }
+ if (link->dc->debug.dpia_debug.bits.enable_bw_allocation_mode == false) {
+ DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode disabled", __func__, link->link_index);
+ return false;
}
-out:
- return ret;
-}
-void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result)
-{
- int bw_needed = 0;
- int estimated = 0;
- int host_router_total_estimated_bw = 0;
+ val = DPTX_BW_ALLOC_MODE_ENABLE | DPTX_BW_ALLOC_UNMASK_IRQ;
- if (!get_bw_alloc_proceed_flag((link)))
- return;
+ if (core_link_write_dpcd(link, DPTX_BW_ALLOCATION_MODE_CONTROL, &val, sizeof(uint8_t)) == DC_OK) {
+ DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode enabled", __func__, link->link_index);
- switch (result) {
+ retrieve_usb4_dp_bw_allocation_info(link);
- case DPIA_BW_REQ_FAILED:
+ if (
+ link->dpia_bw_alloc_config.nrd_max_link_rate
+ && link->dpia_bw_alloc_config.nrd_max_lane_count) {
+ link->reported_link_cap.link_rate = link->dpia_bw_alloc_config.nrd_max_link_rate;
+ link->reported_link_cap.lane_count = link->dpia_bw_alloc_config.nrd_max_lane_count;
+ }
- DC_LOG_DEBUG("%s: *** *** BW REQ FAILURE for DP-TX Request *** ***\n", __func__);
+ link->dpia_bw_alloc_config.bw_alloc_enabled = true;
+ ret = true;
- // Update the new Estimated BW value updated by CM
- link->dpia_bw_alloc_config.estimated_bw =
- bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+ if (link->dc->debug.dpia_debug.bits.enable_usb4_bw_zero_alloc_patch) {
+ /*
+ * During DP tunnel creation, the CM preallocates BW
+ * and reduces the estimated BW of other DPIAs.
+ * The CM releases the preallocation only when the allocation is complete.
+ * Perform a zero allocation to make the CM release the preallocation
+ * and correctly update the estimated BW for all DPIAs per host router.
+ */
+ link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, 0);
+ }
+ } else
+ DC_LOG_DEBUG("%s: link[%d] failed to enable DPTX BW allocation mode", __func__, link->link_index);
- set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw);
- link->dpia_bw_alloc_config.response_ready = false;
+ return ret;
+}
- /*
- * If FAIL then it is either:
- * 1. Due to DP-Tx trying to allocate more than available i.e. it failed locally
- * => get estimated and allocate that
- * 2. Due to the fact that DP-Tx tried to allocated ESTIMATED BW and failed then
- * CM will have to update 0xE0023 with new ESTIMATED BW value.
- */
- break;
+/*
+ * Handle DP BW allocation status register
+ *
+ * @link: pointer to the dc_link struct instance
+ * @status: content of DP tunneling status DPCD register
+ *
+ * return: none
+ */
+void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status)
+{
+ link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
- case DPIA_BW_REQ_SUCCESS:
+ if (status & DP_TUNNELING_BW_REQUEST_SUCCEEDED) {
+ DC_LOG_DEBUG("%s: BW Allocation request succeeded on link(%d)",
+ __func__, link->link_index);
+ } else if (status & DP_TUNNELING_BW_REQUEST_FAILED) {
+ DC_LOG_DEBUG("%s: BW Allocation request failed on link(%d) allocated/estimated BW=%d",
+ __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw);
+
+ link_dpia_send_bw_alloc_request(link, link->dpia_bw_alloc_config.estimated_bw);
+ } else if (status & DP_TUNNELING_ESTIMATED_BW_CHANGED) {
+ DC_LOG_DEBUG("%s: Estimated BW changed on link(%d) new estimated BW=%d",
+ __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw);
+ }
- DC_LOG_DEBUG("%s: *** BW REQ SUCCESS for DP-TX Request ***\n", __func__);
+ core_link_write_dpcd(
+ link, DP_TUNNELING_STATUS,
+ &status, sizeof(status));
+}
- // 1. SUCCESS 1st time before any Pruning is done
- // 2. SUCCESS after prev. FAIL before any Pruning is done
- // 3. SUCCESS after Pruning is done but before enabling link
+/*
+ * Handle the DP Bandwidth allocation for DPIA
+ *
+ */
+void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw)
+{
+ if (link && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling
+ && link->dpia_bw_alloc_config.bw_alloc_enabled) {
+ if (peak_bw > 0) {
+ // If DP over USB4 then we need to check BW allocation
+ link->dpia_bw_alloc_config.link_max_bw = peak_bw;
+
+ link_dpia_send_bw_alloc_request(link, peak_bw);
+ } else
+ dpia_bw_alloc_unplug(link);
+ }
+}
- bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
+void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
+{
+ link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link);
- // 1.
- if (!link->dpia_bw_alloc_config.sink_allocated_bw) {
+ DC_LOG_DEBUG("%s: ENTER: link[%d] hpd(%d) Allocated_BW: %d Estimated_BW: %d Req_BW: %d",
+ __func__, link->link_index, link->hpd_status,
+ link->dpia_bw_alloc_config.allocated_bw,
+ link->dpia_bw_alloc_config.estimated_bw,
+ req_bw);
- allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed, link);
- link->dpia_bw_alloc_config.sink_verified_bw =
- link->dpia_bw_alloc_config.sink_allocated_bw;
+ if (link_dp_is_bw_alloc_available(link))
+ link_dpia_send_bw_alloc_request(link, req_bw);
+ else
+ DC_LOG_DEBUG("%s: BW Allocation mode not available", __func__);
+}
- // SUCCESS from first attempt
- if (link->dpia_bw_alloc_config.sink_allocated_bw >
- link->dpia_bw_alloc_config.sink_max_bw)
- link->dpia_bw_alloc_config.sink_verified_bw =
- link->dpia_bw_alloc_config.sink_max_bw;
- }
- // 3.
- else if (link->dpia_bw_alloc_config.sink_allocated_bw) {
-
- // Find out how much do we need to de-alloc
- if (link->dpia_bw_alloc_config.sink_allocated_bw > bw_needed)
- deallocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
- link->dpia_bw_alloc_config.sink_allocated_bw - bw_needed, link);
- else
- allocate_usb4_bw(&link->dpia_bw_alloc_config.sink_allocated_bw,
- bw_needed - link->dpia_bw_alloc_config.sink_allocated_bw, link);
- }
+uint32_t link_dpia_get_dp_overhead(const struct dc_link *link)
+{
+ uint32_t link_dp_overhead = 0;
- // 4. If this is the 2nd sink then any unused bw will be reallocated to master DPIA
- // => check if estimated_bw changed
+ if ((link->type == dc_connection_mst_branch) &&
+ !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
+ /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH
+ * MST overhead is 1/64 of link bandwidth (excluding any overhead)
+ */
+ const struct dc_link_settings *link_cap = dc_link_get_link_cap(link);
+
+ if (link_cap) {
+ uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate *
+ (uint32_t)link_cap->lane_count *
+ LINK_RATE_REF_FREQ_IN_KHZ * 8;
+ link_dp_overhead = (link_bw_in_kbps / MST_TIME_SLOT_COUNT)
+ + ((link_bw_in_kbps % MST_TIME_SLOT_COUNT) ? 1 : 0);
+ }
+ }
- link->dpia_bw_alloc_config.response_ready = true;
- break;
+ return link_dp_overhead;
+}
- case DPIA_EST_BW_CHANGED:
+/*
+ * Aggregates the DPIA bandwidth usage for the respective USB4 Router.
+ * And then validate if the required bandwidth is within the router's capacity.
+ *
+ * @dc_validation_dpia_set: pointer to the dc_validation_dpia_set
+ * @count: number of DPIA validation sets
+ *
+ * return: true if validation is succeeded
+ */
+bool link_dpia_validate_dp_tunnel_bandwidth(const struct dc_validation_dpia_set *dpia_link_sets, uint8_t count)
+{
+ uint32_t granularity_Gbps;
+ const struct dc_link *link;
+ uint32_t link_bw_granularity;
+ uint32_t link_required_bw;
+ struct usb4_router_validation_set router_sets[MAX_HOST_ROUTERS_NUM] = { 0 };
+ uint8_t i;
+ bool is_success = true;
+ uint8_t router_count = 0;
- DC_LOG_DEBUG("%s: *** ESTIMATED BW CHANGED for DP-TX Request ***\n", __func__);
+ if ((dpia_link_sets == NULL) || (count == 0))
+ return is_success;
- estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
- host_router_total_estimated_bw = get_host_router_total_bw(link, HOST_ROUTER_BW_ESTIMATED);
+ // Iterate through each DP tunneling link (DPIA).
+ // Aggregate its bandwidth requirements onto the respective USB4 router.
+ for (i = 0; i < count; i++) {
+ link = dpia_link_sets[i].link;
+ link_required_bw = dpia_link_sets[i].required_bw;
+ const struct dc_tunnel_settings *dp_tunnel_settings = dpia_link_sets[i].tunnel_settings;
- // 1. If due to unplug of other sink
- if (estimated == host_router_total_estimated_bw) {
- // First update the estimated & max_bw fields
- if (link->dpia_bw_alloc_config.estimated_bw < estimated)
- link->dpia_bw_alloc_config.estimated_bw = estimated;
- }
- // 2. If due to realloc bw btw 2 dpia due to plug OR realloc unused Bw
- else {
- // We lost estimated bw usually due to plug event of other dpia
- link->dpia_bw_alloc_config.estimated_bw = estimated;
- }
- break;
+ if ((link == NULL) || (dp_tunnel_settings == NULL) || dp_tunnel_settings->bw_granularity == 0)
+ break;
- case DPIA_BW_ALLOC_CAPS_CHANGED:
+ if (link->type == dc_connection_mst_branch)
+ link_required_bw += link_dpia_get_dp_overhead(link);
- DC_LOG_DEBUG("%s: *** BW ALLOC CAPABILITY CHANGED for DP-TX Request ***\n", __func__);
- link->dpia_bw_alloc_config.bw_alloc_enabled = false;
- break;
- }
-}
-int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw)
-{
- int ret = 0;
- uint8_t timeout = 10;
+ granularity_Gbps = (Kbps_TO_Gbps / dp_tunnel_settings->bw_granularity);
+ link_bw_granularity = (link_required_bw / granularity_Gbps) * granularity_Gbps +
+ ((link_required_bw % granularity_Gbps) ? granularity_Gbps : 0);
- if (!(link && DISPLAY_ENDPOINT_USB4_DPIA == link->ep_type
- && link->dpia_bw_alloc_config.bw_alloc_enabled))
- goto out;
+ // Find or add the USB4 router associated with the current DPIA link
+ for (uint8_t j = 0; j < MAX_HOST_ROUTERS_NUM; j++) {
+ if (router_sets[j].is_valid == false) {
+ router_sets[j].is_valid = true;
+ router_sets[j].cm_id = dp_tunnel_settings->cm_id;
+ router_count++;
+ }
- //1. Hot Plug
- if (link->hpd_status && peak_bw > 0) {
+ if (router_sets[j].cm_id == dp_tunnel_settings->cm_id) {
+ uint32_t remaining_bw =
+ dp_tunnel_settings->estimated_bw - dp_tunnel_settings->allocated_bw;
- // If DP over USB4 then we need to check BW allocation
- link->dpia_bw_alloc_config.sink_max_bw = peak_bw;
- set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
+ router_sets[j].allocated_bw += dp_tunnel_settings->allocated_bw;
- do {
- if (!(timeout > 0))
- timeout--;
- else
- break;
- fsleep(10 * 1000);
- } while (!get_cm_response_ready_flag(link));
+ if (remaining_bw > router_sets[j].remaining_bw)
+ router_sets[j].remaining_bw = remaining_bw;
- if (!timeout)
- ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
- else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
- ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
- }
- //2. Cold Unplug
- else if (!link->hpd_status)
- dpia_bw_alloc_unplug(link);
+ // Get the max estimated BW within the same CM_ID
+ if (dp_tunnel_settings->estimated_bw > router_sets[j].estimated_bw)
+ router_sets[j].estimated_bw = dp_tunnel_settings->estimated_bw;
-out:
- return ret;
-}
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
-{
- int ret = 0;
- uint8_t timeout = 10;
-
- if (!get_bw_alloc_proceed_flag(link))
- goto out;
-
- /*
- * Sometimes stream uses same timing parameters as the already
- * allocated max sink bw so no need to re-alloc
- */
- if (req_bw != link->dpia_bw_alloc_config.sink_allocated_bw) {
- set_usb4_req_bw_req(link, req_bw);
- do {
- if (!(timeout > 0))
- timeout--;
- else
+ router_sets[j].required_bw += link_bw_granularity;
+ router_sets[j].dpia_count++;
break;
- udelay(10 * 1000);
- } while (!get_cm_response_ready_flag(link));
-
- if (!timeout)
- ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
- else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
- ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED);
+ }
+ }
}
-out:
- return ret;
-}
-bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias)
-{
- bool ret = true;
- int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 };
- uint8_t lowest_dpia_index = 0, dpia_index = 0;
- uint8_t i;
-
- if (!num_dpias || num_dpias > MAX_DPIA_NUM)
- return ret;
-
- //Get total Host Router BW & Validate against each Host Router max BW
- for (i = 0; i < num_dpias; ++i) {
+ // Validate bandwidth for each unique router found.
+ for (i = 0; i < router_count; i++) {
+ uint32_t total_bw = 0;
- if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled)
- continue;
-
- lowest_dpia_index = get_lowest_dpia_index(link[i]);
- if (link[i]->link_index < lowest_dpia_index)
- continue;
+ if (router_sets[i].is_valid == false)
+ break;
- dpia_index = (link[i]->link_index - lowest_dpia_index) / 2;
- bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i];
- if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) {
+ // Determine the total available bandwidth for the current router based on aggregated data
+ if ((router_sets[i].dpia_count == 1) || (router_sets[i].allocated_bw == 0))
+ total_bw = router_sets[i].estimated_bw;
+ else
+ total_bw = router_sets[i].allocated_bw + router_sets[i].remaining_bw;
- ret = false;
+ if (router_sets[i].required_bw > total_bw) {
+ is_success = false;
break;
}
}
- return ret;
+ return is_success;
}
+
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
index 7292690383ae..30cd8e2b9d35 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
@@ -26,12 +26,8 @@
#ifndef DC_INC_LINK_DP_DPIA_BW_H_
#define DC_INC_LINK_DP_DPIA_BW_H_
-#include "link.h"
+#include "link_service.h"
-/* Number of Host Routers per motherboard is 2 */
-#define MAX_HR_NUM 2
-/* Number of DPIA per host router is 2 */
-#define MAX_DPIA_NUM (MAX_HR_NUM * 2)
/*
* Host Router BW type
@@ -42,14 +38,24 @@ enum bw_type {
HOST_ROUTER_BW_INVALID,
};
+struct usb4_router_validation_set {
+ bool is_valid;
+ uint8_t cm_id;
+ uint8_t dpia_count;
+ uint32_t required_bw;
+ uint32_t allocated_bw;
+ uint32_t estimated_bw;
+ uint32_t remaining_bw;
+};
+
/*
- * Enable BW Allocation Mode Support from the DP-Tx side
+ * Enable USB4 DP BW allocation mode
*
* @link: pointer to the dc_link struct instance
*
* return: SUCCESS or FAILURE
*/
-bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link);
+bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link);
/*
* Allocates only what the stream needs for bw, so if:
@@ -59,9 +65,8 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link);
* @link: pointer to the dc_link struct instance
* @req_bw: Bw requested by the stream
*
- * return: allocated bw else return 0
*/
-int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
+void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw);
/*
* Handle the USB4 BW Allocation related functionality here:
@@ -71,32 +76,37 @@ int link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int re
* @link: pointer to the dc_link struct instance
* @peak_bw: Peak bw used by the link/sink
*
- * return: allocated bw else return 0
*/
-int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw);
+void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw);
/*
- * Handle function for when the status of the Request above is complete.
- * We will find out the result of allocating on CM and update structs.
+ * Obtain all the DP overheads in dp tunneling for the dpia link
*
* @link: pointer to the dc_link struct instance
- * @bw: Allocated or Estimated BW depending on the result
- * @result: Response type
+ *
+ * return: DP overheads in DP tunneling
+ */
+uint32_t link_dpia_get_dp_overhead(const struct dc_link *link);
+
+/*
+ * Handle DP BW allocation status register
+ *
+ * @link: pointer to the dc_link struct instance
+ * @status: content of DP tunneling status register
*
* return: none
*/
-void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result);
+void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status);
/*
- * Handle the validation of total BW here and confirm that the bw used by each
- * DPIA doesn't exceed available BW for each host router (HR)
+ * Aggregates the DPIA bandwidth usage for the respective USB4 Router.
*
- * @link[]: array of link pointer to all possible DPIA links
- * @bw_needed[]: bw needed for each DPIA link based on timing
- * @num_dpias: Number of DPIAs for the above 2 arrays. Should always be <= MAX_DPIA_NUM
+ * @dc_validation_dpia_set: pointer to the dc_validation_dpia_set
+ * @count: number of DPIA validation sets
*
- * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE
+ * return: true if validation is succeeded
*/
-bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias);
+bool link_dpia_validate_dp_tunnel_bandwidth(const struct dc_validation_dpia_set *dpia_link_sets, uint8_t count);
#endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
+
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index e047bbeaa49a..693477413347 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -37,7 +37,10 @@
#include "link/accessories/link_dp_trace.h"
#include "link/link_dpms.h"
#include "dm_helpers.h"
+#include "link_dp_dpia_bw.h"
+#define DC_LOGGER \
+ link->ctx->logger
#define DC_LOGGER_INIT(logger)
bool dp_parse_link_loss_status(
@@ -118,7 +121,7 @@ bool dp_parse_link_loss_status(
static bool handle_hpd_irq_psr_sink(struct dc_link *link)
{
- union dpcd_psr_configuration psr_configuration;
+ union dpcd_psr_configuration psr_configuration = {0};
if (!link->psr_settings.psr_feature_enabled)
return false;
@@ -182,21 +185,35 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link)
return false;
}
-static bool handle_hpd_irq_replay_sink(struct dc_link *link)
+static void handle_hpd_irq_replay_sink(struct dc_link *link)
{
- union dpcd_replay_configuration replay_configuration;
+ union dpcd_replay_configuration replay_configuration = {0};
/*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/
- union psr_error_status replay_error_status;
+ union psr_error_status replay_error_status = {0};
+ bool ret = false;
+ int retries = 0;
if (!link->replay_settings.replay_feature_enabled)
- return false;
+ return;
- dm_helpers_dp_read_dpcd(
- link->ctx,
- link,
- DP_SINK_PR_REPLAY_STATUS,
- &replay_configuration.raw,
- sizeof(replay_configuration.raw));
+ while (retries < 10) {
+ ret = dm_helpers_dp_read_dpcd(
+ link->ctx,
+ link,
+ DP_SINK_PR_REPLAY_STATUS,
+ &replay_configuration.raw,
+ sizeof(replay_configuration.raw));
+
+ if (ret)
+ break;
+
+ retries++;
+ }
+
+ if (!ret)
+ DC_LOG_WARNING("[%s][%d] DPCD read addr.0x%x failed with %d retries\n",
+ __func__, __LINE__,
+ DP_SINK_PR_REPLAY_STATUS, retries);
dm_helpers_dp_read_dpcd(
link->ctx,
@@ -205,18 +222,20 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link)
&replay_error_status.raw,
sizeof(replay_error_status.raw));
- link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR =
- replay_error_status.bits.LINK_CRC_ERROR;
- link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR =
- replay_configuration.bits.DESYNC_ERROR_STATUS;
- link->replay_settings.config.replay_error_status.bits.STATE_TRANSITION_ERROR =
- replay_configuration.bits.STATE_TRANSITION_ERROR_STATUS;
-
- if (link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR ||
- link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR ||
- link->replay_settings.config.replay_error_status.bits.STATE_TRANSITION_ERROR) {
+ if (replay_error_status.bits.LINK_CRC_ERROR ||
+ replay_configuration.bits.DESYNC_ERROR_STATUS ||
+ replay_configuration.bits.STATE_TRANSITION_ERROR_STATUS) {
bool allow_active;
+ link->replay_settings.config.replay_error_status.raw |= replay_error_status.raw;
+
+ /* Increment desync error counter if a desync error is detected */
+ if (replay_configuration.bits.DESYNC_ERROR_STATUS)
+ link->replay_settings.replay_desync_error_fail_count++;
+
+ if (link->replay_settings.config.force_disable_desync_error_check)
+ return;
+
/* Acknowledge and clear configuration bits */
dm_helpers_dp_write_dpcd(
link->ctx,
@@ -241,7 +260,6 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link)
edp_set_replay_allow_active(link, &allow_active, true, false, NULL);
}
}
- return true;
}
void dp_handle_link_loss(struct dc_link *link)
@@ -258,7 +276,7 @@ void dp_handle_link_loss(struct dc_link *link)
for (i = count - 1; i >= 0; i--) {
// Always use max settings here for DP 1.4a LL Compliance CTS
- if (link->is_automated) {
+ if (link->skip_fallback_on_link_loss) {
pipes[i]->link_config.dp_link_settings.lane_count =
link->verified_link_cap.lane_count;
pipes[i]->link_config.dp_link_settings.link_rate =
@@ -270,10 +288,34 @@ void dp_handle_link_loss(struct dc_link *link)
}
}
+static void dp_handle_tunneling_irq(struct dc_link *link)
+{
+ enum dc_status retval;
+ uint8_t tunneling_status = 0;
+
+ retval = core_link_read_dpcd(
+ link, DP_TUNNELING_STATUS,
+ &tunneling_status,
+ sizeof(tunneling_status));
+
+ if (retval == DC_OK) {
+ DC_LOG_HW_HPD_IRQ("%s: Got DP tunneling status on link %d status=0x%x",
+ __func__, link->link_index, tunneling_status);
+
+ if (tunneling_status & DP_TUNNELING_BW_ALLOC_BITS_MASK)
+ link_dp_dpia_handle_bw_alloc_status(link, tunneling_status);
+ }
+
+ tunneling_status = DP_TUNNELING_IRQ;
+ core_link_write_dpcd(
+ link, DP_LINK_SERVICE_IRQ_VECTOR_ESI0,
+ &tunneling_status, 1);
+}
+
static void read_dpcd204h_on_irq_hpd(struct dc_link *link, union hpd_irq_data *irq_data)
{
enum dc_status retval;
- union lane_align_status_updated dpcd_lane_status_updated;
+ union lane_align_status_updated dpcd_lane_status_updated = {0};
retval = core_link_read_dpcd(
link,
@@ -303,17 +345,23 @@ enum dc_status dp_read_hpd_rx_irq_data(
*
* For DP 1.4 we need to read those from 2002h range.
*/
- if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_14)
+ if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_14) {
retval = core_link_read_dpcd(
link,
DP_SINK_COUNT,
irq_data->raw,
- sizeof(union hpd_irq_data));
- else {
+ DP_SINK_STATUS - DP_SINK_COUNT + 1);
+
+ if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling) {
+ retval = core_link_read_dpcd(
+ link, DP_LINK_SERVICE_IRQ_VECTOR_ESI0,
+ &irq_data->bytes.link_service_irq_esi0.raw, 1);
+ }
+ } else {
/* Read 14 bytes in a single read and then copy only the required fields.
* This is more efficient than doing it in two separate AUX reads. */
- uint8_t tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI + 1];
+ uint8_t tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI + 1] = {0};
retval = core_link_read_dpcd(
link,
@@ -330,6 +378,7 @@ enum dc_status dp_read_hpd_rx_irq_data(
irq_data->bytes.lane23_status.raw = tmp[DP_LANE2_3_STATUS_ESI - DP_SINK_COUNT_ESI];
irq_data->bytes.lane_status_updated.raw = tmp[DP_LANE_ALIGN_STATUS_UPDATED_ESI - DP_SINK_COUNT_ESI];
irq_data->bytes.sink_status.raw = tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI];
+ irq_data->bytes.link_service_irq_esi0.raw = tmp[DP_LINK_SERVICE_IRQ_VECTOR_ESI0 - DP_SINK_COUNT_ESI];
/*
* This display doesn't have correct values in DPCD200Eh.
@@ -397,7 +446,10 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
// Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC
- link->is_automated = true;
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ !link->dc->config.enable_dpia_pre_training)
+ link->skip_fallback_on_link_loss = true;
+
device_service_clear.bits.AUTOMATED_TEST = 1;
core_link_write_dpcd(
link,
@@ -422,9 +474,7 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
/* PSR-related error was detected and handled */
return true;
- if (handle_hpd_irq_replay_sink(link))
- /* Replay-related error was detected and handled */
- return true;
+ handle_hpd_irq_replay_sink(link);
/* If PSR-related error handled, Main link may be off,
* so do not handle as a normal sink status change interrupt.
@@ -447,7 +497,8 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
* If we got sink count changed it means
* Downstream port status changed,
* then DM should call DC to do the detection.
- * NOTE: Do not handle link loss on eDP since it is internal link*/
+ * NOTE: Do not handle link loss on eDP since it is internal link
+ */
if ((link->connector_signal != SIGNAL_TYPE_EDP) &&
dp_parse_link_loss_status(
link,
@@ -470,6 +521,11 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
dp_trace_link_loss_increment(link);
}
+ if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling) {
+ if (hpd_irq_dpcd_data.bytes.link_service_irq_esi0.bits.DP_LINK_TUNNELING_IRQ)
+ dp_handle_tunneling_irq(link);
+ }
+
if (link->type == dc_connection_sst_branch &&
hpd_irq_dpcd_data.bytes.sink_cnt.bits.SINK_COUNT
!= link->dpcd_sink_count)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h
index ac33730fedd4..87516fb3b45a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_IRQ_HANDLER_H__
#define __DC_LINK_DP_IRQ_HANDLER_H__
-#include "link.h"
+#include "link_service.h"
bool dp_parse_link_loss_status(
struct dc_link *link,
union hpd_irq_data *hpd_irq_dpcd_data);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
index b7abba55bc2f..49521ac4b0e8 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
@@ -37,6 +37,7 @@
#include "clk_mgr.h"
#include "resource.h"
#include "link_enc_cfg.h"
+#include "atomfirmware.h"
#define DC_LOGGER \
link->ctx->logger
@@ -73,7 +74,9 @@ void dp_disable_link_phy(struct dc_link *link,
{
struct dc *dc = link->ctx->dc;
- if (!link->wa_flags.dp_keep_receiver_powered)
+ if (!link->wa_flags.dp_keep_receiver_powered &&
+ !link->skip_implict_edp_power_control &&
+ link->type != dc_connection_none)
dpcd_write_rx_power_ctrl(link, false);
dc->hwss.disable_link_output(link, link_res, signal);
@@ -99,8 +102,11 @@ void dp_set_hw_lane_settings(
{
const struct link_hwss *link_hwss = get_link_hwss(link, link_res);
+ // Don't return here if using FIXED_VS link HWSS and encoding is 128b/132b
if ((link_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) &&
- !is_immediate_downstream(link, offset))
+ !is_immediate_downstream(link, offset) &&
+ (!((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) ||
+ link_dp_get_encoding_format(&link_settings->link_settings) == DP_8b_10b_ENCODING))
return;
if (link_hwss->ext.set_dp_lane_settings)
@@ -136,38 +142,33 @@ enum dc_status dp_set_fec_ready(struct dc_link *link, const struct link_resource
* if the sink supports it and leave it enabled on link.
* If FEC is not supported, disable it.
*/
- struct link_encoder *link_enc = NULL;
+ struct link_encoder *link_enc = link_res->dio_link_enc;
enum dc_status status = DC_OK;
uint8_t fec_config = 0;
- link_enc = link_enc_cfg_get_link_enc(link);
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
ASSERT(link_enc);
+ if (link_enc->funcs->fec_set_ready == NULL)
+ return DC_NOT_SUPPORTED;
- if (!dp_should_enable_fec(link))
- return status;
-
- if (link_enc->funcs->fec_set_ready &&
- link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) {
- if (ready) {
- fec_config = 1;
- status = core_link_write_dpcd(link,
- DP_FEC_CONFIGURATION,
- &fec_config,
- sizeof(fec_config));
- if (status == DC_OK) {
- link_enc->funcs->fec_set_ready(link_enc, true);
- link->fec_state = dc_link_fec_ready;
- } else {
- link_enc->funcs->fec_set_ready(link_enc, false);
- link->fec_state = dc_link_fec_not_ready;
- dm_error("dpcd write failed to set fec_ready");
- }
- } else if (link->fec_state == dc_link_fec_ready) {
+ if (ready && dp_should_enable_fec(link)) {
+ fec_config = 1;
+
+ status = core_link_write_dpcd(link, DP_FEC_CONFIGURATION,
+ &fec_config, sizeof(fec_config));
+
+ if (status == DC_OK) {
+ link_enc->funcs->fec_set_ready(link_enc, true);
+ link->fec_state = dc_link_fec_ready;
+ }
+ } else {
+ if (link->fec_state == dc_link_fec_ready) {
fec_config = 0;
- status = core_link_write_dpcd(link,
- DP_FEC_CONFIGURATION,
- &fec_config,
- sizeof(fec_config));
+ if (link->type != dc_connection_none)
+ core_link_write_dpcd(link, DP_FEC_CONFIGURATION,
+ &fec_config, sizeof(fec_config));
+
link_enc->funcs->fec_set_ready(link_enc, false);
link->fec_state = dc_link_fec_not_ready;
}
@@ -176,20 +177,19 @@ enum dc_status dp_set_fec_ready(struct dc_link *link, const struct link_resource
return status;
}
-void dp_set_fec_enable(struct dc_link *link, bool enable)
+void dp_set_fec_enable(struct dc_link *link, const struct link_resource *link_res, bool enable)
{
- struct link_encoder *link_enc = NULL;
+ struct link_encoder *link_enc = link_res->dio_link_enc;
- link_enc = link_enc_cfg_get_link_enc(link);
- ASSERT(link_enc);
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
- if (!dp_should_enable_fec(link))
+ if (link_enc == NULL || link_enc->funcs == NULL || link_enc->funcs->fec_set_enable == NULL)
return;
- if (link_enc->funcs->fec_set_enable &&
- link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) {
- if (link->fec_state == dc_link_fec_ready && enable) {
- /* Accord to DP spec, FEC enable sequence can first
+ if (enable && dp_should_enable_fec(link)) {
+ if (link->fec_state == dc_link_fec_ready) {
+ /* According to DP spec, FEC enable sequence can first
* be transmitted anytime after 1000 LL codes have
* been transmitted on the link after link training
* completion. Using 1 lane RBR should have the maximum
@@ -199,7 +199,9 @@ void dp_set_fec_enable(struct dc_link *link, bool enable)
udelay(7);
link_enc->funcs->fec_set_enable(link_enc, true);
link->fec_state = dc_link_fec_enabled;
- } else if (link->fec_state == dc_link_fec_enabled && !enable) {
+ }
+ } else {
+ if (link->fec_state == dc_link_fec_enabled) {
link_enc->funcs->fec_set_enable(link_enc, false);
link->fec_state = dc_link_fec_ready;
}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h
index 1eb0619d6710..58e154494582 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_PHY_H__
#define __DC_LINK_DP_PHY_H__
-#include "link.h"
+#include "link_service.h"
void dp_enable_link_phy(
struct dc_link *link,
const struct link_resource *link_res,
@@ -52,7 +52,8 @@ void dp_set_drive_settings(
enum dc_status dp_set_fec_ready(struct dc_link *link,
const struct link_resource *link_res, bool ready);
-void dp_set_fec_enable(struct dc_link *link, bool enable);
+void dp_set_fec_enable(struct dc_link *link,
+ const struct link_resource *link_res, bool enable);
void dpcd_write_rx_power_ctrl(struct dc_link *link, bool on);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 90339c2dfd84..08e2b572e0ff 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -272,7 +272,7 @@ void dp_wait_for_training_aux_rd_interval(
struct dc_link *link,
uint32_t wait_in_micro_secs)
{
- fsleep(wait_in_micro_secs);
+ usleep_range_state(wait_in_micro_secs, wait_in_micro_secs, TASK_UNINTERRUPTIBLE);
DC_LOG_HW_LINK_TRAINING("%s:\n wait = %d\n",
__func__,
@@ -329,8 +329,12 @@ static void maximize_lane_settings(const struct link_training_settings *lt_setti
if (max_requested.PRE_EMPHASIS > PRE_EMPHASIS_MAX_LEVEL)
max_requested.PRE_EMPHASIS = PRE_EMPHASIS_MAX_LEVEL;
- if (max_requested.FFE_PRESET.settings.level > DP_FFE_PRESET_MAX_LEVEL)
- max_requested.FFE_PRESET.settings.level = DP_FFE_PRESET_MAX_LEVEL;
+
+ /* Note, we are not checking
+ * if max_requested.FFE_PRESET.settings.level > DP_FFE_PRESET_MAX_LEVEL,
+ * since FFE_PRESET.settings.level is 4 bits and DP_FFE_PRESET_MAX_LEVEL equals 15,
+ * so FFE_PRESET.settings.level will never be greater than 15.
+ */
/* make sure the pre-emphasis matches the voltage swing*/
if (max_requested.PRE_EMPHASIS >
@@ -511,12 +515,48 @@ bool dp_is_interlane_aligned(union lane_align_status_updated align_status)
return align_status.bits.INTERLANE_ALIGN_DONE == 1;
}
+bool dp_check_interlane_aligned(union lane_align_status_updated align_status,
+ struct dc_link *link,
+ uint8_t retries)
+{
+ /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
+ * has to share encoders unlike DP and USBC
+ */
+ return (dp_is_interlane_aligned(align_status) ||
+ (link->skip_fallback_on_link_loss && retries));
+}
+
+uint32_t dp_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t offset,
+ uint8_t retries)
+{
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ if (offset == 0 && retries == 1 && lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
+ return max(lt_settings->eq_pattern_time, (uint32_t) DPIA_CLK_SYNC_DELAY);
+ else
+ return dpia_get_eq_aux_rd_interval(link, lt_settings, offset);
+ } else if (is_repeater(lt_settings, offset))
+ return dp_translate_training_aux_read_interval(
+ link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
+ else
+ return lt_settings->eq_pattern_time;
+}
+
+bool dp_check_dpcd_reqeust_status(const struct dc_link *link,
+ enum dc_status status)
+{
+ return (status != DC_OK && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA);
+}
+
enum link_training_result dp_check_link_loss_status(
struct dc_link *link,
const struct link_training_settings *link_training_setting)
{
enum link_training_result status = LINK_TRAINING_SUCCESS;
union lane_status lane_status;
+ union lane_align_status_updated dpcd_lane_status_updated;
uint8_t dpcd_buf[6] = {0};
uint32_t lane;
@@ -532,10 +572,12 @@ enum link_training_result dp_check_link_loss_status(
* check lanes status
*/
lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane);
+ dpcd_lane_status_updated.raw = dpcd_buf[4];
if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
!lane_status.bits.CR_DONE_0 ||
- !lane_status.bits.SYMBOL_LOCKED_0) {
+ !lane_status.bits.SYMBOL_LOCKED_0 ||
+ !dp_is_interlane_aligned(dpcd_lane_status_updated)) {
/* if one of the channel equalization, clock
* recovery or symbol lock is dropped
* consider it as (link has been
@@ -694,10 +736,12 @@ void override_training_settings(
lt_settings->pre_emphasis = overrides->pre_emphasis;
if (overrides->post_cursor2 != NULL)
lt_settings->post_cursor2 = overrides->post_cursor2;
+ if (link->wa_flags.force_dp_ffe_preset && !dp_is_lttpr_present(link))
+ lt_settings->ffe_preset = &link->forced_dp_ffe_preset;
if (overrides->ffe_preset != NULL)
lt_settings->ffe_preset = overrides->ffe_preset;
/* Override HW lane settings with BIOS forced values if present */
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
lt_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
lt_settings->voltage_swing = &link->bios_forced_drive_settings.VOLTAGE_SWING;
lt_settings->pre_emphasis = &link->bios_forced_drive_settings.PRE_EMPHASIS;
@@ -741,7 +785,6 @@ void override_training_settings(
lt_settings->lttpr_mode = LTTPR_MODE_NON_LTTPR;
dp_get_lttpr_mode_override(link, &lt_settings->lttpr_mode);
-
}
enum dc_dp_training_pattern decide_cr_training_pattern(
@@ -757,19 +800,23 @@ enum dc_dp_training_pattern decide_cr_training_pattern(
}
enum dc_dp_training_pattern decide_eq_training_pattern(struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_settings)
{
- struct link_encoder *link_enc;
+ struct link_encoder *link_enc = link_res->dio_link_enc;
struct encoder_feature_support *enc_caps;
struct dpcd_caps *rx_caps = &link->dpcd_caps;
enum dc_dp_training_pattern pattern = DP_TRAINING_PATTERN_SEQUENCE_2;
- link_enc = link_enc_cfg_get_link_enc(link);
- ASSERT(link_enc);
- enc_caps = &link_enc->features;
-
switch (link_dp_get_encoding_format(link_settings)) {
case DP_8b_10b_ENCODING:
+ if (!link->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+
+ if (!link_enc)
+ break;
+
+ enc_caps = &link_enc->features;
if (enc_caps->flags.bits.IS_TPS4_CAPABLE &&
rx_caps->max_down_spread.bits.TPS4_SUPPORTED)
pattern = DP_TRAINING_PATTERN_SEQUENCE_4;
@@ -807,7 +854,7 @@ void dp_decide_lane_settings(
const struct link_training_settings *lt_settings,
const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
- union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX])
+ union dpcd_training_lane *dpcd_lane_settings)
{
uint32_t lane;
@@ -842,13 +889,14 @@ void dp_decide_lane_settings(
void dp_decide_training_settings(
struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_settings,
struct link_training_settings *lt_settings)
{
if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING)
- decide_8b_10b_training_settings(link, link_settings, lt_settings);
+ decide_8b_10b_training_settings(link, link_res, link_settings, lt_settings);
else if (link_dp_get_encoding_format(link_settings) == DP_128b_132b_ENCODING)
- decide_128b_132b_training_settings(link, link_settings, lt_settings);
+ decide_128b_132b_training_settings(link, link_res, link_settings, lt_settings);
}
@@ -911,10 +959,10 @@ static enum dc_status configure_lttpr_mode_non_transparent(
/* Driver does not need to train the first hop. Skip DPCD read and clear
* AUX_RD_INTERVAL for DPTX-to-DPIA hop.
*/
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && repeater_cnt > 0 && repeater_cnt < MAX_REPEATER_CNT)
link->dpcd_caps.lttpr_caps.aux_rd_interval[--repeater_cnt] = 0;
- for (repeater_id = repeater_cnt; repeater_id > 0; repeater_id--) {
+ for (repeater_id = repeater_cnt; repeater_id > 0 && repeater_id < MAX_REPEATER_CNT; repeater_id--) {
aux_interval_address = DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 +
((DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE) * (repeater_id - 1));
core_link_read_dpcd(
@@ -966,23 +1014,34 @@ void repeater_training_done(struct dc_link *link, uint32_t offset)
dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
}
-static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding)
+static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding)
{
+ enum dc_status status;
uint8_t sink_status = 0;
- uint8_t i;
+ uint32_t i;
+ uint8_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+ uint32_t intra_hop_disable_time_ms = (lttpr_count > 0 ? lttpr_count * 300 : 10);
+
+ // Each hop could theoretically take over 256ms (max 128b/132b AUX RD INTERVAL)
+ // To be safe, allow 300ms per LTTPR and 10ms for no LTTPR case
/* clear training pattern set */
- dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
+ status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
+
+ if (dp_check_dpcd_reqeust_status(link, status))
+ return LINK_TRAINING_ABORT;
if (encoding == DP_128b_132b_ENCODING) {
/* poll for intra-hop disable */
- for (i = 0; i < 10; i++) {
+ for (i = 0; i < intra_hop_disable_time_ms; i++) {
if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) &&
(sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0)
break;
fsleep(1000);
}
}
+
+ return LINK_TRAINING_SUCCESS;
}
enum dc_status dpcd_configure_channel_coding(struct dc_link *link,
@@ -1006,17 +1065,18 @@ enum dc_status dpcd_configure_channel_coding(struct dc_link *link,
return status;
}
-void dpcd_set_training_pattern(
+enum dc_status dpcd_set_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern training_pattern)
{
+ enum dc_status status;
union dpcd_training_pattern dpcd_pattern = {0};
dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
dp_training_pattern_to_dpcd_training_pattern(
link, training_pattern);
- core_link_write_dpcd(
+ status = core_link_write_dpcd(
link,
DP_TRAINING_PATTERN_SET,
&dpcd_pattern.raw,
@@ -1026,6 +1086,8 @@ void dpcd_set_training_pattern(
__func__,
DP_TRAINING_PATTERN_SET,
dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
+
+ return status;
}
enum dc_status dpcd_set_link_settings(
@@ -1056,9 +1118,13 @@ enum dc_status dpcd_set_link_settings(
status = core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
&downspread.raw, sizeof(downspread));
+ if (status != DC_OK)
+ DC_LOG_ERROR("%s:%d: core_link_write_dpcd (DP_DOWNSPREAD_CTRL) failed\n", __func__, __LINE__);
status = core_link_write_dpcd(link, DP_LANE_COUNT_SET,
&lane_count_set.raw, 1);
+ if (status != DC_OK)
+ DC_LOG_ERROR("%s:%d: core_link_write_dpcd (DP_LANE_COUNT_SET) failed\n", __func__, __LINE__);
if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13 &&
lt_settings->link_settings.use_link_rate_set == true) {
@@ -1068,18 +1134,25 @@ enum dc_status dpcd_set_link_settings(
* MUX chip gets link rate set back before link training.
*/
if (link->connector_signal == SIGNAL_TYPE_EDP) {
- uint8_t supported_link_rates[16];
+ uint8_t supported_link_rates[16] = {0};
core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES,
supported_link_rates, sizeof(supported_link_rates));
}
status = core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1);
+ if (status != DC_OK)
+ DC_LOG_ERROR("%s:%d: core_link_write_dpcd (DP_LINK_BW_SET) failed\n", __func__, __LINE__);
+
status = core_link_write_dpcd(link, DP_LINK_RATE_SET,
&lt_settings->link_settings.link_rate_set, 1);
+ if (status != DC_OK)
+ DC_LOG_ERROR("%s:%d: core_link_write_dpcd (DP_LINK_RATE_SET) failed\n", __func__, __LINE__);
} else {
rate = get_dpcd_link_rate(&lt_settings->link_settings);
status = core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1);
+ if (status != DC_OK)
+ DC_LOG_ERROR("%s:%d: core_link_write_dpcd (DP_LINK_BW_SET) failed\n", __func__, __LINE__);
}
if (rate) {
@@ -1178,6 +1251,13 @@ void dpcd_set_lt_pattern_and_lane_settings(
dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET]
= dpcd_pattern.raw;
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ dpia_set_tps_notification(
+ link,
+ lt_settings,
+ dpcd_pattern.v1_4.TRAINING_PATTERN_SET,
+ offset);
+
if (is_repeater(lt_settings, offset)) {
DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n",
__func__,
@@ -1448,7 +1528,8 @@ static enum link_training_result dp_transition_to_video_idle(
*/
if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
msleep(5);
- status = dp_check_link_loss_status(link, lt_settings);
+ if (!link->skip_fallback_on_link_loss)
+ status = dp_check_link_loss_status(link, lt_settings);
}
return status;
}
@@ -1484,6 +1565,7 @@ enum link_training_result dp_perform_link_training(
/* decide training settings */
dp_decide_training_settings(
link,
+ link_res,
link_settings,
&lt_settings);
@@ -1497,18 +1579,16 @@ enum link_training_result dp_perform_link_training(
/* configure link prior to entering training mode */
dpcd_configure_lttpr_mode(link, &lt_settings);
- dp_set_fec_ready(link, link_res, lt_settings.should_set_fec_ready);
+ if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING)
+ dp_set_fec_ready(link, link_res, lt_settings.should_set_fec_ready);
dpcd_configure_channel_coding(link, &lt_settings);
/* enter training mode:
* Per DP specs starting from here, DPTX device shall not issue
* Non-LT AUX transactions inside training mode.
*/
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING)
- if (link->dc->config.use_old_fixed_vs_sequence)
- status = dp_perform_fixed_vs_pe_training_sequence_legacy(link, link_res, &lt_settings);
- else
- status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, &lt_settings);
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING)
+ status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, &lt_settings);
else if (encoding == DP_8b_10b_ENCODING)
status = dp_perform_8b_10b_link_training(link, link_res, &lt_settings);
else if (encoding == DP_128b_132b_ENCODING)
@@ -1517,7 +1597,9 @@ enum link_training_result dp_perform_link_training(
ASSERT(0);
/* exit training mode */
- dpcd_exit_training_mode(link, encoding);
+ if ((dpcd_exit_training_mode(link, encoding) != LINK_TRAINING_SUCCESS || status == LINK_TRAINING_ABORT) &&
+ link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ dpia_training_abort(link, &lt_settings, 0);
/* switch to video idle */
if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern)
@@ -1587,21 +1669,7 @@ bool perform_link_training_with_retries(
msleep(delay_dp_power_up_in_ms);
}
- if (panel_mode == DP_PANEL_MODE_EDP) {
- struct cp_psp *cp_psp = &stream->ctx->cp_psp;
-
- if (cp_psp && cp_psp->funcs.enable_assr) {
- /* ASSR is bound to fail with unsigned PSP
- * verstage used during devlopment phase.
- * Report and continue with eDP panel mode to
- * perform eDP link training with right settings
- */
- bool result;
- result = cp_psp->funcs.enable_assr(cp_psp->handle, link);
- if (!result && link->panel_mode != DP_PANEL_MODE_EDP)
- panel_mode = DP_PANEL_MODE_DEFAULT;
- }
- }
+ edp_set_panel_assr(link, pipe_ctx, &panel_mode, true);
dp_set_panel_mode(link, panel_mode);
@@ -1609,8 +1677,7 @@ bool perform_link_training_with_retries(
dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings);
return true;
} else {
- /** @todo Consolidate USB4 DP and DPx.x training. */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ if (!link->dc->config.consolidated_dpia_dp_lt && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
status = dpia_perform_link_training(
link,
&pipe_ctx->link_res,
@@ -1639,8 +1706,17 @@ bool perform_link_training_with_retries(
dp_trace_lt_total_count_increment(link, false);
dp_trace_lt_result_update(link, status, false);
dp_trace_set_lt_end_timestamp(link, false);
- if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
+ if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) {
+ // Update verified link settings to current one
+ // Because DPIA LT might fallback to lower link setting.
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+ link->verified_link_cap.link_rate = link->cur_link_settings.link_rate;
+ link->verified_link_cap.lane_count = link->cur_link_settings.lane_count;
+ dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link);
+ }
return true;
+ }
}
fail_count++;
@@ -1653,6 +1729,15 @@ bool perform_link_training_with_retries(
break;
}
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST &&
+ !link->dc->config.enable_dpia_pre_training) {
+ if (j == (attempts - 1))
+ do_fallback = true;
+ else
+ do_fallback = false;
+ }
+
if (j == (attempts - 1)) {
DC_LOG_WARNING(
"%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) @ spread = %x : fail reason:(%d)\n",
@@ -1673,8 +1758,7 @@ bool perform_link_training_with_retries(
if (status == LINK_TRAINING_ABORT) {
enum dc_connection_type type = dc_connection_none;
- link_detect_connection_type(link, &type);
- if (type == dc_connection_none) {
+ if (link_detect_connection_type(link, &type) && type == dc_connection_none) {
DC_LOG_HW_LINK_TRAINING("%s: Aborting training because sink unplugged\n", __func__);
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
index 7d027bac8255..ce52de22ab7a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_DP_TRAINING_H__
#define __DC_LINK_DP_TRAINING_H__
-#include "link.h"
+#include "link_service.h"
bool perform_link_training_with_retries(
const struct dc_link_settings *link_setting,
@@ -55,7 +55,7 @@ void dp_set_hw_test_pattern(
uint8_t *custom_pattern,
uint32_t custom_pattern_size);
-void dpcd_set_training_pattern(
+enum dc_status dpcd_set_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern training_pattern);
@@ -104,6 +104,7 @@ void start_clock_recovery_pattern_early(struct dc_link *link,
void dp_decide_training_settings(
struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_settings,
struct link_training_settings *lt_settings);
@@ -111,12 +112,13 @@ void dp_decide_lane_settings(
const struct link_training_settings *lt_settings,
const union lane_adjust ln_adjust[LANE_COUNT_DP_MAX],
struct dc_lane_settings hw_lane_settings[LANE_COUNT_DP_MAX],
- union dpcd_training_lane dpcd_lane_settings[LANE_COUNT_DP_MAX]);
+ union dpcd_training_lane *dpcd_lane_settings);
enum dc_dp_training_pattern decide_cr_training_pattern(
const struct dc_link_settings *link_settings);
enum dc_dp_training_pattern decide_eq_training_pattern(struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_settings);
enum lttpr_mode dp_decide_lttpr_mode(struct dc_link *link,
@@ -182,4 +184,18 @@ uint32_t dp_translate_training_aux_read_interval(
uint8_t dp_get_nibble_at_index(const uint8_t *buf,
uint32_t index);
+
+bool dp_check_interlane_aligned(union lane_align_status_updated align_status,
+ struct dc_link *link,
+ uint8_t retries);
+
+uint32_t dp_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t offset,
+ uint8_t retries);
+
+bool dp_check_dpcd_reqeust_status(const struct dc_link *link,
+ enum dc_status status);
+
#endif /* __DC_LINK_DP_TRAINING_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.c
index db87cfe37b5c..11565f187ac7 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.c
@@ -204,6 +204,7 @@ enum link_training_result dp_perform_128b_132b_link_training(
struct link_training_settings legacy_settings;
decide_8b_10b_training_settings(link,
+ link_res,
&lt_settings->link_settings,
&legacy_settings);
return dp_perform_8b_10b_link_training(link, link_res, &legacy_settings);
@@ -227,6 +228,7 @@ enum link_training_result dp_perform_128b_132b_link_training(
}
void decide_128b_132b_training_settings(struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_settings,
struct link_training_settings *lt_settings)
{
@@ -238,7 +240,7 @@ void decide_128b_132b_training_settings(struct dc_link *link,
LINK_SPREAD_05_DOWNSPREAD_30KHZ;
lt_settings->pattern_for_cr = decide_cr_training_pattern(link_settings);
- lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_settings);
+ lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_res, link_settings);
lt_settings->eq_pattern_time = 2500;
lt_settings->eq_wait_time_limit = 400000;
lt_settings->eq_loop_count_limit = 20;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.h
index 2147f24efc8b..901a42edafa1 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_128b_132b.h
@@ -34,6 +34,7 @@ enum link_training_result dp_perform_128b_132b_link_training(
struct link_training_settings *lt_settings);
void decide_128b_132b_training_settings(struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_settings,
struct link_training_settings *lt_settings);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c
index 2b4c15b0b407..66d0fb1b9b9d 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c
@@ -35,22 +35,41 @@
#define DC_LOGGER \
link->ctx->logger
+static void get_default_8b_10b_lttpr_aux_rd_interval(
+ union training_aux_rd_interval *training_rd_interval)
+{
+ /* LTTPR are required to program DPCD 0000Eh to 0x4 (16ms) upon AUX
+ * read reply to this register. Since old sinks with DPCD rev 1.1
+ * and earlier may not support this register, assume the mandatory
+ * value is programmed by the LTTPR to avoid AUX timeout issues.
+ */
+ training_rd_interval->raw = 0x4;
+}
+
static int32_t get_cr_training_aux_rd_interval(struct dc_link *link,
- const struct dc_link_settings *link_settings)
+ const struct dc_link_settings *link_settings,
+ enum lttpr_mode lttpr_mode)
{
union training_aux_rd_interval training_rd_interval;
uint32_t wait_in_micro_secs = 100;
memset(&training_rd_interval, 0, sizeof(training_rd_interval));
- if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING &&
- link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
- core_link_read_dpcd(
- link,
- DP_TRAINING_AUX_RD_INTERVAL,
- (uint8_t *)&training_rd_interval,
- sizeof(training_rd_interval));
- if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL)
- wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000;
+ if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) {
+ if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12)
+ core_link_read_dpcd(
+ link,
+ DP_TRAINING_AUX_RD_INTERVAL,
+ (uint8_t *)&training_rd_interval,
+ sizeof(training_rd_interval));
+ else if (dp_is_lttpr_present(link))
+ get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval);
+
+ if (training_rd_interval.raw != 0) {
+ if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT)
+ wait_in_micro_secs = 400;
+ if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL)
+ wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000;
+ }
}
return wait_in_micro_secs;
}
@@ -68,13 +87,15 @@ static uint32_t get_eq_training_aux_rd_interval(
DP_128B132B_TRAINING_AUX_RD_INTERVAL,
(uint8_t *)&training_rd_interval,
sizeof(training_rd_interval));
- } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING &&
- link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
- core_link_read_dpcd(
- link,
- DP_TRAINING_AUX_RD_INTERVAL,
- (uint8_t *)&training_rd_interval,
- sizeof(training_rd_interval));
+ } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) {
+ if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12)
+ core_link_read_dpcd(
+ link,
+ DP_TRAINING_AUX_RD_INTERVAL,
+ (uint8_t *)&training_rd_interval,
+ sizeof(training_rd_interval));
+ else if (dp_is_lttpr_present(link))
+ get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval);
}
switch (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) {
@@ -90,7 +111,8 @@ static uint32_t get_eq_training_aux_rd_interval(
}
void decide_8b_10b_training_settings(
- struct dc_link *link,
+ struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_setting,
struct link_training_settings *lt_settings)
{
@@ -110,16 +132,24 @@ void decide_8b_10b_training_settings(
*/
lt_settings->link_settings.link_spread = link->dp_ss_off ?
LINK_SPREAD_DISABLED : LINK_SPREAD_05_DOWNSPREAD_30KHZ;
- lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting);
lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting);
lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting);
- lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting);
+ lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_res, link_setting);
lt_settings->enhanced_framing = 1;
lt_settings->should_set_fec_ready = true;
lt_settings->disallow_per_lane_settings = true;
lt_settings->always_match_dpcd_with_hw_lane_settings = true;
lt_settings->lttpr_mode = dp_decide_8b_10b_lttpr_mode(link);
+ lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting, lt_settings->lttpr_mode);
dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
+
+ /* Some embedded LTTPRs rely on receiving TPS2 before LT to interop reliably with sensitive VGA dongles
+ * This allows these LTTPRs to minimize freq/phase and skew variation during lock and deskew sequences
+ */
+ if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) ==
+ AMD_EXT_DISPLAY_PATH_CAPS__DP_EARLY_8B10B_TPS2) {
+ lt_settings->lttpr_early_tps2 = true;
+ }
}
enum lttpr_mode dp_decide_8b_10b_lttpr_mode(struct dc_link *link)
@@ -151,12 +181,49 @@ enum lttpr_mode dp_decide_8b_10b_lttpr_mode(struct dc_link *link)
return LTTPR_MODE_NON_LTTPR;
}
+static void set_link_settings_and_perform_early_tps2_retimer_pre_lt_sequence(struct dc_link *link,
+ const struct link_resource *link_res,
+ struct link_training_settings *lt_settings,
+ uint32_t lttpr_count)
+{
+ /* Vendor-specific LTTPR early TPS2 sequence:
+ * 1. Output TPS2
+ * 2. Wait 400us
+ * 3. Set link settings as usual
+ * 4. Write TPS1 to DP_TRAINING_PATTERN_SET_PHY_REPEATERx targeting LTTPR closest to host
+ * 5. Wait 1ms
+ * 6. Begin link training as usual
+ * */
+
+ uint32_t closest_lttpr_address_offset = dp_get_closest_lttpr_offset(lttpr_count);
+
+ union dpcd_training_pattern dpcd_pattern = {0};
+
+ dpcd_pattern.v1_4.TRAINING_PATTERN_SET = 1;
+ dpcd_pattern.v1_4.SCRAMBLING_DISABLE = 1;
+
+ DC_LOG_HW_LINK_TRAINING("%s\n GPU sends TPS2. Wait 400us.\n", __func__);
+
+ dp_set_hw_training_pattern(link, link_res, DP_TRAINING_PATTERN_SEQUENCE_2, DPRX);
+
+ dp_set_hw_lane_settings(link, link_res, lt_settings, DPRX);
+
+ udelay(400);
+
+ dpcd_set_link_settings(link, lt_settings);
+
+ core_link_write_dpcd(link, DP_TRAINING_PATTERN_SET_PHY_REPEATER1 + closest_lttpr_address_offset, &dpcd_pattern.raw, 1);
+
+ udelay(1000);
+ }
+
enum link_training_result perform_8b_10b_clock_recovery_sequence(
struct dc_link *link,
const struct link_resource *link_res,
struct link_training_settings *lt_settings,
uint32_t offset)
{
+ enum dc_status status;
uint32_t retries_cr;
uint32_t retry_count;
uint32_t wait_time_microsec;
@@ -216,7 +283,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
/* 4. Read lane status and requested drive
* settings as set by the sink
*/
- dp_get_lane_status_and_lane_adjust(
+ status = dp_get_lane_status_and_lane_adjust(
link,
lt_settings,
dpcd_lane_status,
@@ -224,6 +291,9 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
dpcd_lane_adjust,
offset);
+ if (dp_check_dpcd_reqeust_status(link, status))
+ return LINK_TRAINING_ABORT;
+
/* 5. check CR done*/
if (dp_is_cr_done(lane_count, dpcd_lane_status)) {
DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__);
@@ -273,6 +343,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
struct link_training_settings *lt_settings,
uint32_t offset)
{
+ enum dc_status status;
enum dc_dp_training_pattern tr_pattern;
uint32_t retries_ch_eq;
uint32_t wait_time_microsec;
@@ -308,12 +379,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
dpcd_set_lane_settings(link, lt_settings, offset);
/* 3. wait for receiver to lock-on*/
- wait_time_microsec = lt_settings->eq_pattern_time;
-
- if (is_repeater(lt_settings, offset))
- wait_time_microsec =
- dp_translate_training_aux_read_interval(
- link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
+ wait_time_microsec = dp_get_eq_aux_rd_interval(link, lt_settings, offset, retries_ch_eq);
dp_wait_for_training_aux_rd_interval(
link,
@@ -322,7 +388,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
/* 4. Read lane status and requested
* drive settings as set by the sink*/
- dp_get_lane_status_and_lane_adjust(
+ status = dp_get_lane_status_and_lane_adjust(
link,
lt_settings,
dpcd_lane_status,
@@ -330,6 +396,9 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
dpcd_lane_adjust,
offset);
+ if (dp_check_dpcd_reqeust_status(link, status))
+ return LINK_TRAINING_ABORT;
+
/* 5. check CR done*/
if (!dp_is_cr_done(lane_count, dpcd_lane_status))
return dpcd_lane_status[0].bits.CR_DONE_0 ?
@@ -339,7 +408,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
/* 6. check CHEQ done*/
if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
dp_is_symbol_locked(lane_count, dpcd_lane_status) &&
- dp_is_interlane_aligned(dpcd_lane_status_updated))
+ dp_check_interlane_aligned(dpcd_lane_status_updated, link, retries_ch_eq))
return LINK_TRAINING_SUCCESS;
/* 7. update VS/PE/PC2 in lt_settings*/
@@ -358,7 +427,7 @@ enum link_training_result dp_perform_8b_10b_link_training(
{
enum link_training_result status = LINK_TRAINING_SUCCESS;
- uint8_t repeater_cnt;
+ uint8_t repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
uint8_t repeater_id;
uint8_t lane = 0;
@@ -366,14 +435,16 @@ enum link_training_result dp_perform_8b_10b_link_training(
start_clock_recovery_pattern_early(link, link_res, lt_settings, DPRX);
/* 1. set link rate, lane count and spread. */
- dpcd_set_link_settings(link, lt_settings);
+ if (lt_settings->lttpr_early_tps2)
+ set_link_settings_and_perform_early_tps2_retimer_pre_lt_sequence(link, link_res, lt_settings, repeater_cnt);
+ else
+ dpcd_set_link_settings(link, lt_settings);
if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) {
/* 2. perform link training (set link training done
* to false is done as well)
*/
- repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS);
repeater_id--) {
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.h
index d26de15ce954..ea0de701d83f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.h
@@ -54,7 +54,8 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
enum lttpr_mode dp_decide_8b_10b_lttpr_mode(struct dc_link *link);
void decide_8b_10b_training_settings(
- struct dc_link *link,
+ struct dc_link *link,
+ const struct link_resource *link_res,
const struct dc_link_settings *link_setting,
struct link_training_settings *lt_settings);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.c
index 4c6b886a9da8..f99d26290bc0 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_auxless.c
@@ -39,6 +39,7 @@ bool dp_perform_link_training_skip_aux(
dp_decide_training_settings(
link,
+ link_res,
link_setting,
&lt_settings);
override_training_settings(
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
index 4f4e899e5c46..603537ffd128 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c
@@ -43,9 +43,6 @@
#define DC_LOGGER \
link->ctx->logger
-/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */
-#define DPIA_CLK_SYNC_DELAY 16000
-
/* Extend interval between training status checks for manual testing. */
#define DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US 60000000
@@ -113,6 +110,7 @@ static enum link_training_result dpia_configure_link(
dp_decide_training_settings(
link,
+ link_res,
link_setting,
lt_settings);
@@ -132,11 +130,14 @@ static enum link_training_result dpia_configure_link(
if (status != DC_OK && link->is_hpd_pending)
return LINK_TRAINING_ABORT;
- if (link->preferred_training_settings.fec_enable != NULL)
- fec_enable = *link->preferred_training_settings.fec_enable;
- else
- fec_enable = true;
- status = dp_set_fec_ready(link, link_res, fec_enable);
+ if (link_dp_get_encoding_format(link_setting) == DP_8b_10b_ENCODING) {
+ if (link->preferred_training_settings.fec_enable != NULL)
+ fec_enable = *link->preferred_training_settings.fec_enable;
+ else
+ fec_enable = true;
+ status = dp_set_fec_ready(link, link_res, fec_enable);
+ }
+
if (status != DC_OK && link->is_hpd_pending)
return LINK_TRAINING_ABORT;
@@ -291,7 +292,7 @@ static enum link_training_result dpia_training_cr_non_transparent(
{
enum link_training_result result = LINK_TRAINING_CR_FAIL_LANE0;
uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
- enum dc_status status;
+ enum dc_status status = DC_ERROR_UNEXPECTED;
uint32_t retries_cr = 0; /* Number of consecutive attempts with same VS or PE. */
uint32_t retry_count = 0;
uint32_t wait_time_microsec = TRAINING_AUX_RD_INTERVAL; /* From DP spec, CR read interval is always 100us. */
@@ -430,10 +431,6 @@ static enum link_training_result dpia_training_cr_non_transparent(
retry_count++;
}
- /* Abort link training if clock recovery failed due to HPD unplug. */
- if (link->is_hpd_pending)
- result = LINK_TRAINING_ABORT;
-
DC_LOG_HW_LINK_TRAINING(
"%s\n DPIA(%d) clock recovery\n -hop(%d)\n - result(%d)\n - retries(%d)\n - status(%d)\n",
__func__,
@@ -537,10 +534,6 @@ static enum link_training_result dpia_training_cr_transparent(
retry_count++;
}
- /* Abort link training if clock recovery failed due to HPD unplug. */
- if (link->is_hpd_pending)
- result = LINK_TRAINING_ABORT;
-
DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) clock recovery\n -hop(%d)\n - result(%d)\n - retries(%d)\n",
__func__,
link->link_id.enum_id - ENUM_ID_1,
@@ -574,28 +567,6 @@ static enum link_training_result dpia_training_cr_phase(
return result;
}
-/* Return status read interval during equalization phase. */
-static uint32_t dpia_get_eq_aux_rd_interval(
- const struct dc_link *link,
- const struct link_training_settings *lt_settings,
- uint32_t hop)
-{
- uint32_t wait_time_microsec;
-
- if (hop == DPRX)
- wait_time_microsec = lt_settings->eq_pattern_time;
- else
- wait_time_microsec =
- dp_translate_training_aux_read_interval(
- link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]);
-
- /* Check debug option for extending aux read interval. */
- if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval)
- wait_time_microsec = DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US;
-
- return wait_time_microsec;
-}
-
/* Execute equalization phase of link training for specified hop in display
* path in non-transparent mode:
* - driver issues both DPCD and SET_CONFIG transactions.
@@ -617,9 +588,9 @@ static enum link_training_result dpia_training_eq_non_transparent(
enum link_training_result result = LINK_TRAINING_EQ_FAIL_EQ;
uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
uint32_t retries_eq = 0;
- enum dc_status status;
+ enum dc_status status = DC_ERROR_UNEXPECTED;
enum dc_dp_training_pattern tr_pattern;
- uint32_t wait_time_microsec;
+ uint32_t wait_time_microsec = 0;
enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
union lane_align_status_updated dpcd_lane_status_updated = {0};
union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
@@ -731,10 +702,6 @@ static enum link_training_result dpia_training_eq_non_transparent(
lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
}
- /* Abort link training if equalization failed due to HPD unplug. */
- if (link->is_hpd_pending)
- result = LINK_TRAINING_ABORT;
-
DC_LOG_HW_LINK_TRAINING(
"%s\n DPIA(%d) equalization\n - hop(%d)\n - result(%d)\n - retries(%d)\n - status(%d)\n",
__func__,
@@ -811,7 +778,7 @@ static enum link_training_result dpia_training_eq_transparent(
/* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
* has to share encoders unlike DP and USBC
*/
- if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) {
+ if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->skip_fallback_on_link_loss && retries_eq)) {
result = LINK_TRAINING_SUCCESS;
break;
}
@@ -822,10 +789,6 @@ static enum link_training_result dpia_training_eq_transparent(
lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
}
- /* Abort link training if equalization failed due to HPD unplug. */
- if (link->is_hpd_pending)
- result = LINK_TRAINING_ABORT;
-
DC_LOG_HW_LINK_TRAINING("%s\n DPIA(%d) equalization\n - hop(%d)\n - result(%d)\n - retries(%d)\n",
__func__,
link->link_id.enum_id - ENUM_ID_1,
@@ -952,6 +915,22 @@ static enum link_training_result dpia_training_end(
return result;
}
+/* Return status read interval during equalization phase. */
+uint32_t dpia_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t hop)
+{
+ /* Check debug option for extending aux read interval. */
+ if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval)
+ return DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US;
+ else if (hop == DPRX)
+ return lt_settings->eq_pattern_time;
+ else
+ return dp_translate_training_aux_read_interval(
+ link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]);
+}
+
/* When aborting training of specified hop in display path, clean up by:
* - Attempting to clear DPCD TRAINING_PATTERN_SET, LINK_BW_SET and LANE_COUNT_SET.
* - Sending SET_CONFIG(SET_LINK) with lane count and link rate set to 0.
@@ -959,7 +938,7 @@ static enum link_training_result dpia_training_end(
* @param link DPIA link being trained.
* @param hop Hop in display path. DPRX = 0.
*/
-static void dpia_training_abort(
+void dpia_training_abort(
struct dc_link *link,
struct link_training_settings *lt_settings,
uint32_t hop)
@@ -984,7 +963,26 @@ static void dpia_training_abort(
core_link_write_dpcd(link, dpcd_tps_offset, &data, 1);
core_link_write_dpcd(link, DP_LINK_BW_SET, &data, 1);
core_link_write_dpcd(link, DP_LANE_COUNT_SET, &data, 1);
- core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data);
+
+ if (!link->dc->config.consolidated_dpia_dp_lt)
+ core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data);
+}
+
+void dpia_set_tps_notification(
+ struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint8_t pattern,
+ uint32_t hop)
+{
+ uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
+
+ if (lt_settings->lttpr_mode != LTTPR_MODE_NON_TRANSPARENT || pattern == DPCD_TRAINING_PATTERN_VIDEOIDLE)
+ return;
+
+ repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
+
+ if (hop != repeater_cnt)
+ dc_process_dmub_dpia_set_tps_notification(link->ctx->dc, link->link_index, pattern);
}
enum link_training_result dpia_perform_link_training(
@@ -1037,7 +1035,7 @@ enum link_training_result dpia_perform_link_training(
*/
if (result == LINK_TRAINING_SUCCESS) {
fsleep(5000);
- if (!link->is_automated)
+ if (!link->skip_fallback_on_link_loss)
result = dp_check_link_loss_status(link, &lt_settings);
} else if (result == LINK_TRAINING_ABORT)
dpia_training_abort(link, &lt_settings, repeater_id);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h
index b39fb9faf1c2..9f4eceb494c2 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h
@@ -28,6 +28,9 @@
#define __DC_LINK_DP_TRAINING_DPIA_H__
#include "link_dp_training.h"
+/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */
+#define DPIA_CLK_SYNC_DELAY 16000
+
/* Train DP tunneling link for USB4 DPIA display endpoint.
* DPIA equivalent of dc_link_dp_perfrorm_link_training.
* Aborts link training upon detection of sink unplug.
@@ -38,4 +41,20 @@ enum link_training_result dpia_perform_link_training(
const struct dc_link_settings *link_setting,
bool skip_video_pattern);
+void dpia_training_abort(
+ struct dc_link *link,
+ struct link_training_settings *lt_settings,
+ uint32_t hop);
+
+uint32_t dpia_get_eq_aux_rd_interval(
+ const struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint32_t hop);
+
+void dpia_set_tps_notification(
+ struct dc_link *link,
+ const struct link_training_settings *lt_settings,
+ uint8_t pattern,
+ uint32_t offset);
+
#endif /* __DC_LINK_DP_TRAINING_DPIA_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
index fd8f6f198146..ce174ce5579c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c
@@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq
lt_settings->cr_pattern_time = 16000;
/* Fixed VS/PE specific: Toggle link rate */
- apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate);
+ apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0));
target_rate = get_dpcd_link_rate(&lt_settings->link_settings);
toggle_rate = (target_rate == 0x6) ? 0xA : 0x6;
@@ -186,351 +186,6 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq
return status;
}
-
-enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
- struct dc_link *link,
- const struct link_resource *link_res,
- struct link_training_settings *lt_settings)
-{
- const uint8_t vendor_lttpr_write_data_reset[4] = {0x1, 0x50, 0x63, 0xFF};
- const uint8_t offset = dp_parse_lttpr_repeater_count(
- link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
- const uint8_t vendor_lttpr_write_data_intercept_en[4] = {0x1, 0x55, 0x63, 0x0};
- const uint8_t vendor_lttpr_write_data_intercept_dis[4] = {0x1, 0x55, 0x63, 0x68};
- uint32_t pre_disable_intercept_delay_ms = 0;
- uint8_t vendor_lttpr_write_data_vs[4] = {0x1, 0x51, 0x63, 0x0};
- uint8_t vendor_lttpr_write_data_pe[4] = {0x1, 0x52, 0x63, 0x0};
- const uint8_t vendor_lttpr_write_data_4lane_1[4] = {0x1, 0x6E, 0xF2, 0x19};
- const uint8_t vendor_lttpr_write_data_4lane_2[4] = {0x1, 0x6B, 0xF2, 0x01};
- const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
- const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
- const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
- enum link_training_result status = LINK_TRAINING_SUCCESS;
- uint8_t lane = 0;
- union down_spread_ctrl downspread = {0};
- union lane_count_set lane_count_set = {0};
- uint8_t toggle_rate;
- uint8_t rate;
-
- /* Only 8b/10b is supported */
- ASSERT(link_dp_get_encoding_format(&lt_settings->link_settings) ==
- DP_8b_10b_ENCODING);
-
- if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) {
- status = perform_fixed_vs_pe_nontransparent_training_sequence(link, link_res, lt_settings);
- return status;
- }
-
- if (offset != 0xFF) {
- if (offset == 2) {
- pre_disable_intercept_delay_ms = link->dc->debug.fixed_vs_aux_delay_config_wa;
-
- /* Certain display and cable configuration require extra delay */
- } else if (offset > 2) {
- pre_disable_intercept_delay_ms = link->dc->debug.fixed_vs_aux_delay_config_wa * 2;
- }
- }
-
- /* Vendor specific: Reset lane settings */
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_reset[0], sizeof(vendor_lttpr_write_data_reset));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_vs[0], sizeof(vendor_lttpr_write_data_vs));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_pe[0], sizeof(vendor_lttpr_write_data_pe));
-
- /* Vendor specific: Enable intercept */
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_intercept_en[0], sizeof(vendor_lttpr_write_data_intercept_en));
-
-
- /* 1. set link rate, lane count and spread. */
-
- downspread.raw = (uint8_t)(lt_settings->link_settings.link_spread);
-
- lane_count_set.bits.LANE_COUNT_SET =
- lt_settings->link_settings.lane_count;
-
- lane_count_set.bits.ENHANCED_FRAMING = lt_settings->enhanced_framing;
- lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED = 0;
-
-
- if (lt_settings->pattern_for_eq < DP_TRAINING_PATTERN_SEQUENCE_4) {
- lane_count_set.bits.POST_LT_ADJ_REQ_GRANTED =
- link->dpcd_caps.max_ln_count.bits.POST_LT_ADJ_REQ_SUPPORTED;
- }
-
- core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
- &downspread.raw, sizeof(downspread));
-
- core_link_write_dpcd(link, DP_LANE_COUNT_SET,
- &lane_count_set.raw, 1);
-
- rate = get_dpcd_link_rate(&lt_settings->link_settings);
-
- /* Vendor specific: Toggle link rate */
- toggle_rate = (rate == 0x6) ? 0xA : 0x6;
-
- if (link->vendor_specific_lttpr_link_rate_wa == rate) {
- core_link_write_dpcd(
- link,
- DP_LINK_BW_SET,
- &toggle_rate,
- 1);
- }
-
- link->vendor_specific_lttpr_link_rate_wa = rate;
-
- core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1);
-
- DC_LOG_HW_LINK_TRAINING("%s\n %x rate = %x\n %x lane = %x framing = %x\n %x spread = %x\n",
- __func__,
- DP_LINK_BW_SET,
- lt_settings->link_settings.link_rate,
- DP_LANE_COUNT_SET,
- lt_settings->link_settings.lane_count,
- lt_settings->enhanced_framing,
- DP_DOWNSPREAD_CTRL,
- lt_settings->link_settings.link_spread);
-
- if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_4lane_2[0], sizeof(vendor_lttpr_write_data_4lane_2));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_4lane_3[0], sizeof(vendor_lttpr_write_data_4lane_3));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_4lane_4[0], sizeof(vendor_lttpr_write_data_4lane_4));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_4lane_5[0], sizeof(vendor_lttpr_write_data_4lane_5));
- }
-
- /* 2. Perform link training */
-
- /* Perform Clock Recovery Sequence */
- if (status == LINK_TRAINING_SUCCESS) {
- const uint8_t max_vendor_dpcd_retries = 10;
- uint32_t retries_cr;
- uint32_t retry_count;
- uint32_t wait_time_microsec;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX];
- union lane_align_status_updated dpcd_lane_status_updated;
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0};
- uint8_t i = 0;
-
- retries_cr = 0;
- retry_count = 0;
-
- memset(&dpcd_lane_status, '\0', sizeof(dpcd_lane_status));
- memset(&dpcd_lane_status_updated, '\0',
- sizeof(dpcd_lane_status_updated));
-
- while ((retries_cr < LINK_TRAINING_MAX_RETRY_COUNT) &&
- (retry_count < LINK_TRAINING_MAX_CR_RETRY)) {
-
-
- /* 1. call HWSS to set lane settings */
- dp_set_hw_lane_settings(
- link,
- link_res,
- lt_settings,
- 0);
-
- /* 2. update DPCD of the receiver */
- if (!retry_count) {
- /* EPR #361076 - write as a 5-byte burst,
- * but only for the 1-st iteration.
- */
- dpcd_set_lt_pattern_and_lane_settings(
- link,
- lt_settings,
- lt_settings->pattern_for_cr,
- 0);
- /* Vendor specific: Disable intercept */
- for (i = 0; i < max_vendor_dpcd_retries; i++) {
- if (pre_disable_intercept_delay_ms != 0)
- msleep(pre_disable_intercept_delay_ms);
- if (link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_intercept_dis[0],
- sizeof(vendor_lttpr_write_data_intercept_dis)))
- break;
-
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_intercept_en[0],
- sizeof(vendor_lttpr_write_data_intercept_en));
- }
- } else {
- vendor_lttpr_write_data_vs[3] = 0;
- vendor_lttpr_write_data_pe[3] = 0;
-
- for (lane = 0; lane < lane_count; lane++) {
- vendor_lttpr_write_data_vs[3] |=
- lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane);
- vendor_lttpr_write_data_pe[3] |=
- lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane);
- }
-
- /* Vendor specific: Update VS and PE to DPRX requested value */
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_vs[0], sizeof(vendor_lttpr_write_data_vs));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_pe[0], sizeof(vendor_lttpr_write_data_pe));
-
- dpcd_set_lane_settings(
- link,
- lt_settings,
- 0);
- }
-
- /* 3. wait receiver to lock-on*/
- wait_time_microsec = lt_settings->cr_pattern_time;
-
- dp_wait_for_training_aux_rd_interval(
- link,
- wait_time_microsec);
-
- /* 4. Read lane status and requested drive
- * settings as set by the sink
- */
- dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- 0);
-
- /* 5. check CR done*/
- if (dp_is_cr_done(lane_count, dpcd_lane_status)) {
- status = LINK_TRAINING_SUCCESS;
- break;
- }
-
- /* 6. max VS reached*/
- if (dp_is_max_vs_reached(lt_settings))
- break;
-
- /* 7. same lane settings */
- /* Note: settings are the same for all lanes,
- * so comparing first lane is sufficient
- */
- if (lt_settings->dpcd_lane_settings[0].bits.VOLTAGE_SWING_SET ==
- dpcd_lane_adjust[0].bits.VOLTAGE_SWING_LANE)
- retries_cr++;
- else
- retries_cr = 0;
-
- /* 8. update VS/PE/PC2 in lt_settings*/
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- retry_count++;
- }
-
- if (retry_count >= LINK_TRAINING_MAX_CR_RETRY) {
- ASSERT(0);
- DC_LOG_ERROR("%s: Link Training Error, could not get CR after %d tries. Possibly voltage swing issue",
- __func__,
- LINK_TRAINING_MAX_CR_RETRY);
-
- }
-
- status = dp_get_cr_failure(lane_count, dpcd_lane_status);
- }
-
- /* Perform Channel EQ Sequence */
- if (status == LINK_TRAINING_SUCCESS) {
- enum dc_dp_training_pattern tr_pattern;
- uint32_t retries_ch_eq;
- uint32_t wait_time_microsec;
- enum dc_lane_count lane_count = lt_settings->link_settings.lane_count;
- union lane_align_status_updated dpcd_lane_status_updated = {0};
- union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0};
- union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0};
-
- /* Note: also check that TPS4 is a supported feature*/
- tr_pattern = lt_settings->pattern_for_eq;
-
- dp_set_hw_training_pattern(link, link_res, tr_pattern, 0);
-
- status = LINK_TRAINING_EQ_FAIL_EQ;
-
- for (retries_ch_eq = 0; retries_ch_eq <= LINK_TRAINING_MAX_RETRY_COUNT;
- retries_ch_eq++) {
-
- dp_set_hw_lane_settings(link, link_res, lt_settings, 0);
-
- vendor_lttpr_write_data_vs[3] = 0;
- vendor_lttpr_write_data_pe[3] = 0;
-
- for (lane = 0; lane < lane_count; lane++) {
- vendor_lttpr_write_data_vs[3] |=
- lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET << (2 * lane);
- vendor_lttpr_write_data_pe[3] |=
- lt_settings->dpcd_lane_settings[lane].bits.PRE_EMPHASIS_SET << (2 * lane);
- }
-
- /* Vendor specific: Update VS and PE to DPRX requested value */
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_vs[0], sizeof(vendor_lttpr_write_data_vs));
- link_configure_fixed_vs_pe_retimer(link->ddc,
- &vendor_lttpr_write_data_pe[0], sizeof(vendor_lttpr_write_data_pe));
-
- /* 2. update DPCD*/
- if (!retries_ch_eq)
- /* EPR #361076 - write as a 5-byte burst,
- * but only for the 1-st iteration
- */
-
- dpcd_set_lt_pattern_and_lane_settings(
- link,
- lt_settings,
- tr_pattern, 0);
- else
- dpcd_set_lane_settings(link, lt_settings, 0);
-
- /* 3. wait for receiver to lock-on*/
- wait_time_microsec = lt_settings->eq_pattern_time;
-
- dp_wait_for_training_aux_rd_interval(
- link,
- wait_time_microsec);
-
- /* 4. Read lane status and requested
- * drive settings as set by the sink
- */
- dp_get_lane_status_and_lane_adjust(
- link,
- lt_settings,
- dpcd_lane_status,
- &dpcd_lane_status_updated,
- dpcd_lane_adjust,
- 0);
-
- /* 5. check CR done*/
- if (!dp_is_cr_done(lane_count, dpcd_lane_status)) {
- status = LINK_TRAINING_EQ_FAIL_CR;
- break;
- }
-
- /* 6. check CHEQ done*/
- if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
- dp_is_symbol_locked(lane_count, dpcd_lane_status) &&
- dp_is_interlane_aligned(dpcd_lane_status_updated)) {
- status = LINK_TRAINING_SUCCESS;
- break;
- }
-
- /* 7. update VS/PE/PC2 in lt_settings*/
- dp_decide_lane_settings(lt_settings, dpcd_lane_adjust,
- lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings);
- }
- }
-
- return status;
-}
-
enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
struct dc_link *link,
const struct link_resource *link_res,
@@ -552,6 +207,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
const uint8_t vendor_lttpr_write_data_4lane_3[4] = {0x1, 0x6D, 0xF2, 0x18};
const uint8_t vendor_lttpr_write_data_4lane_4[4] = {0x1, 0x6C, 0xF2, 0x03};
const uint8_t vendor_lttpr_write_data_4lane_5[4] = {0x1, 0x03, 0xF3, 0x06};
+ const uint8_t vendor_lttpr_write_data_dpmf[4] = {0x1, 0x6, 0x70, 0x87};
enum link_training_result status = LINK_TRAINING_SUCCESS;
uint8_t lane = 0;
union down_spread_ctrl downspread = {0};
@@ -614,18 +270,21 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
rate = get_dpcd_link_rate(&lt_settings->link_settings);
- /* Vendor specific: Toggle link rate */
- toggle_rate = (rate == 0x6) ? 0xA : 0x6;
+ // Only perform toggle if FIXED_VS LTTPR reports no IEEE OUI
+ if (memcmp("\x0,\x0,\x0", &link->dpcd_caps.lttpr_caps.lttpr_ieee_oui[0], 3) == 0) {
+ /* Vendor specific: Toggle link rate */
+ toggle_rate = (rate == 0x6) ? 0xA : 0x6;
- if (link->vendor_specific_lttpr_link_rate_wa == rate) {
- core_link_write_dpcd(
- link,
- DP_LINK_BW_SET,
- &toggle_rate,
- 1);
- }
+ if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) {
+ core_link_write_dpcd(
+ link,
+ DP_LINK_BW_SET,
+ &toggle_rate,
+ 1);
+ }
- link->vendor_specific_lttpr_link_rate_wa = rate;
+ link->vendor_specific_lttpr_link_rate_wa = rate;
+ }
core_link_write_dpcd(link, DP_LINK_BW_SET, &rate, 1);
@@ -639,6 +298,10 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
DP_DOWNSPREAD_CTRL,
lt_settings->link_settings.link_spread);
+ link_configure_fixed_vs_pe_retimer(link->ddc,
+ &vendor_lttpr_write_data_dpmf[0],
+ sizeof(vendor_lttpr_write_data_dpmf));
+
if (lt_settings->link_settings.lane_count == LANE_COUNT_FOUR) {
link_configure_fixed_vs_pe_retimer(link->ddc,
&vendor_lttpr_write_data_4lane_1[0], sizeof(vendor_lttpr_write_data_4lane_1));
@@ -750,7 +413,6 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
/* 5. check CR done*/
if (dp_is_cr_done(lane_count, dpcd_lane_status)) {
- status = LINK_TRAINING_SUCCESS;
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h
index c0d6ea329504..e61970e27661 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.h
@@ -28,11 +28,6 @@
#define __DC_LINK_DP_FIXED_VS_PE_RETIMER_H__
#include "link_dp_training.h"
-enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy(
- struct dc_link *link,
- const struct link_resource *link_res,
- struct link_training_settings *lt_settings);
-
enum link_training_result dp_perform_fixed_vs_pe_training_sequence(
struct dc_link *link,
const struct link_resource *link_res,
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
index 5c9a30211c10..584b9295a12a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.c
@@ -130,7 +130,7 @@ static uint32_t dpcd_get_next_partition_size(const uint32_t address, const uint3
* XXX: Do not allow any two address ranges in this array to overlap
*/
static const struct dpcd_address_range mandatory_dpcd_blocks[] = {
- { DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT }};
+ { DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, DP_PHY_REPEATER_128B132B_RATES }};
/*
* extend addresses to read all mandatory blocks together
@@ -164,7 +164,8 @@ static void dpcd_extend_address_range(
if (new_addr_range.start != in_address || new_addr_range.end != end_address) {
*out_address = new_addr_range.start;
*out_size = ADDRESS_RANGE_SIZE(new_addr_range.start, new_addr_range.end);
- *out_data = kzalloc(*out_size * sizeof(**out_data), GFP_KERNEL);
+ *out_data = kcalloc(*out_size, sizeof(**out_data), GFP_KERNEL);
+ ASSERT(*out_data);
}
}
@@ -205,7 +206,7 @@ enum dc_status core_link_read_dpcd(
uint32_t extended_size;
/* size of the remaining partitioned address space */
uint32_t size_left_to_read;
- enum dc_status status;
+ enum dc_status status = DC_ERROR_UNEXPECTED;
/* size of the next partition to be read from */
uint32_t partition_size;
uint32_t data_index = 0;
@@ -234,7 +235,7 @@ enum dc_status core_link_write_dpcd(
{
uint32_t partition_size;
uint32_t data_index = 0;
- enum dc_status status;
+ enum dc_status status = DC_ERROR_UNEXPECTED;
while (size) {
partition_size = dpcd_get_next_partition_size(address, size);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h
index 08d787a1e451..c2717c678c72 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h
@@ -25,7 +25,7 @@
#ifndef __LINK_DPCD_H__
#define __LINK_DPCD_H__
-#include "link.h"
+#include "link_service.h"
#include "dpcd_defs.h"
enum dc_status core_link_read_dpcd(
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index 98e715aa6d8e..5e806edbb9f6 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -33,10 +33,14 @@
#include "link_dp_capability.h"
#include "dm_helpers.h"
#include "dal_asic_id.h"
+#include "link_dp_phy.h"
#include "dce/dmub_psr.h"
#include "dc/dc_dmub_srv.h"
#include "dce/dmub_replay.h"
#include "abm.h"
+#include "resource.h"
+#define DC_LOGGER \
+ link->ctx->logger
#define DC_LOGGER_INIT(logger)
#define DP_SINK_PR_ENABLE_AND_CONFIGURATION 0x37B
@@ -153,33 +157,17 @@ bool edp_set_backlight_level_nits(struct dc_link *link,
uint32_t backlight_millinits,
uint32_t transition_time_in_ms)
{
- struct dpcd_source_backlight_set dpcd_backlight_set;
- uint8_t backlight_control = isHDR ? 1 : 0;
-
if (!link || (link->connector_signal != SIGNAL_TYPE_EDP &&
link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT))
return false;
- // OLEDs have no PWM, they can only use AUX
- if (link->dpcd_sink_ext_caps.bits.oled == 1)
- backlight_control = 1;
-
- *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits;
- *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms;
-
- link->backlight_settings.backlight_millinits = backlight_millinits;
-
- if (!link->dpcd_caps.panel_luminance_control) {
- if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL,
- (uint8_t *)(&dpcd_backlight_set),
- sizeof(dpcd_backlight_set)) != DC_OK)
- return false;
+ if (link->is_dds && !link->dpcd_caps.panel_luminance_control)
+ return true;
- if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL,
- &backlight_control, 1) != DC_OK)
- return false;
- } else {
- const uint8_t backlight_enable = DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE;
+ // use internal backlight control if dmub capabilities are not present
+ if (link->backlight_control_type == BACKLIGHT_CONTROL_VESA_AUX &&
+ !link->dc->caps.dmub_caps.aux_backlight_support) {
+ uint8_t backlight_enable = 0;
struct target_luminance_value *target_luminance = NULL;
//if target luminance value is greater than 24 bits, clip the value to 24 bits
@@ -188,6 +176,20 @@ bool edp_set_backlight_level_nits(struct dc_link *link,
target_luminance = (struct target_luminance_value *)&backlight_millinits;
+ //make sure we disable AMD ABC first.
+ core_link_read_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL,
+ &backlight_enable, sizeof(uint8_t));
+ if (backlight_enable) {
+ backlight_enable = 0;
+ core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL,
+ &backlight_enable, 1);
+ }
+
+ core_link_read_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
+ &backlight_enable, sizeof(uint8_t));
+
+ backlight_enable |= DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE;
+
if (core_link_write_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
&backlight_enable,
sizeof(backlight_enable)) != DC_OK)
@@ -197,6 +199,36 @@ bool edp_set_backlight_level_nits(struct dc_link *link,
(uint8_t *)(target_luminance),
sizeof(struct target_luminance_value)) != DC_OK)
return false;
+ } else if (link->backlight_control_type == BACKLIGHT_CONTROL_AMD_AUX) {
+ struct dpcd_source_backlight_set dpcd_backlight_set;
+ *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits;
+ *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms;
+
+ uint8_t backlight_control = isHDR ? 1 : 0;
+ uint8_t backlight_enable = 0;
+
+ // OLEDs have no PWM, they can only use AUX
+ if (link->dpcd_sink_ext_caps.bits.oled == 1)
+ backlight_control = 1;
+
+ //make sure we disable VESA ABC first.
+ core_link_read_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
+ &backlight_enable, sizeof(uint8_t));
+
+ if (backlight_enable & DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE) {
+ backlight_enable &= ~DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE;
+ core_link_write_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
+ &backlight_enable, sizeof(backlight_enable));
+ }
+
+ if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL,
+ (uint8_t *)(&dpcd_backlight_set),
+ sizeof(dpcd_backlight_set)) != DC_OK)
+ return false;
+
+ if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL,
+ &backlight_control, 1) != DC_OK)
+ return false;
}
return true;
@@ -214,6 +246,8 @@ bool edp_get_backlight_level_nits(struct dc_link *link,
link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT))
return false;
+ if (link->is_dds)
+ return false;
if (!core_link_read_dpcd(link, DP_SOURCE_BACKLIGHT_CURRENT_PEAK,
dpcd_backlight_get.raw,
sizeof(union dpcd_source_backlight_get)))
@@ -240,6 +274,8 @@ bool edp_backlight_enable_aux(struct dc_link *link, bool enable)
link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT))
return false;
+ if (link->is_dds)
+ return true;
if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_ENABLE,
&backlight_enable, 1) != DC_OK)
return false;
@@ -280,9 +316,9 @@ bool set_default_brightness_aux(struct dc_link *link)
if (link && link->dpcd_sink_ext_caps.bits.oled == 1) {
if (!read_default_bl_aux(link, &default_backlight))
default_backlight = 150000;
- // if < 5 nits or > 5000, it might be wrong readback
- if (default_backlight < 5000 || default_backlight > 5000000)
- default_backlight = 150000; //
+ // if > 5000, it might be wrong readback. 0 nits is a valid default value for OLED panel.
+ if (default_backlight < 1000 || default_backlight > 5000000)
+ default_backlight = 150000;
return edp_set_backlight_level_nits(link, true,
default_backlight, 0);
@@ -290,29 +326,38 @@ bool set_default_brightness_aux(struct dc_link *link)
return false;
}
-bool set_cached_brightness_aux(struct dc_link *link)
+bool edp_is_ilr_optimization_enabled(struct dc_link *link)
{
- if (link->backlight_settings.backlight_millinits)
- return edp_set_backlight_level_nits(link, true,
- link->backlight_settings.backlight_millinits, 0);
- else
- return set_default_brightness_aux(link);
- return false;
+ if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate)
+ return false;
+ return true;
+}
+
+enum dc_link_rate get_max_edp_link_rate(struct dc_link *link)
+{
+ enum dc_link_rate max_ilr_rate = LINK_RATE_UNKNOWN;
+ enum dc_link_rate max_non_ilr_rate = dp_get_max_link_cap(link).link_rate;
+
+ for (int i = 0; i < link->dpcd_caps.edp_supported_link_rates_count; i++) {
+ if (max_ilr_rate < link->dpcd_caps.edp_supported_link_rates[i])
+ max_ilr_rate = link->dpcd_caps.edp_supported_link_rates[i];
+ }
+
+ return (max_ilr_rate > max_non_ilr_rate ? max_ilr_rate : max_non_ilr_rate);
}
bool edp_is_ilr_optimization_required(struct dc_link *link,
struct dc_crtc_timing *crtc_timing)
{
struct dc_link_settings link_setting;
- uint8_t link_bw_set;
- uint8_t link_rate_set;
+ uint8_t link_bw_set = 0;
+ uint8_t link_rate_set = 0;
uint32_t req_bw;
union lane_count_set lane_count_set = {0};
ASSERT(link || crtc_timing); // invalid input
- if (link->dpcd_caps.edp_supported_link_rates_count == 0 ||
- !link->panel_config.ilr.optimize_edp_link_rate)
+ if (!edp_is_ilr_optimization_enabled(link))
return false;
@@ -362,6 +407,34 @@ void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd)
link->dc->hwss.edp_backlight_control(link, true);
}
+void edp_set_panel_power(struct dc_link *link, bool powerOn)
+{
+ if (powerOn) {
+ // 1. panel VDD on
+ if (!link->dc->config.edp_no_power_sequencing)
+ link->dc->hwss.edp_power_control(link, true);
+ link->dc->hwss.edp_wait_for_hpd_ready(link, true);
+
+ // 2. panel BL on
+ if (link->dc->hwss.edp_backlight_control)
+ link->dc->hwss.edp_backlight_control(link, true);
+
+ // 3. Rx power on
+ dpcd_write_rx_power_ctrl(link, true);
+ } else {
+ // 3. Rx power off
+ dpcd_write_rx_power_ctrl(link, false);
+
+ // 2. panel BL off
+ if (link->dc->hwss.edp_backlight_control)
+ link->dc->hwss.edp_backlight_control(link, false);
+
+ // 1. panel VDD off
+ if (!link->dc->config.edp_no_power_sequencing)
+ link->dc->hwss.edp_power_control(link, false);
+ }
+}
+
bool edp_wait_for_t12(struct dc_link *link)
{
if (link->connector_signal == SIGNAL_TYPE_EDP && link->dc->hwss.edp_wait_for_T12) {
@@ -474,18 +547,21 @@ static struct pipe_ctx *get_pipe_from_link(const struct dc_link *link)
}
bool edp_set_backlight_level(const struct dc_link *link,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp)
+ struct set_backlight_level_params *backlight_level_params)
{
struct dc *dc = link->ctx->dc;
+ uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16;
+ uint32_t frame_ramp = backlight_level_params->frame_ramp;
- DC_LOGGER_INIT(link->ctx->logger);
DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
backlight_pwm_u16_16, backlight_pwm_u16_16);
if (dc_is_embedded_signal(link->connector_signal)) {
struct pipe_ctx *pipe_ctx = get_pipe_from_link(link);
+ if (link->panel_cntl)
+ link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16;
+
if (pipe_ctx) {
/* Disable brightness ramping when the display is blanked
* as it can hang the DMCU
@@ -496,10 +572,11 @@ bool edp_set_backlight_level(const struct dc_link *link,
return false;
}
+ backlight_level_params->frame_ramp = frame_ramp;
+
dc->hwss.set_backlight_level(
pipe_ctx,
- backlight_pwm_u16_16,
- frame_ramp);
+ backlight_level_params);
}
return true;
}
@@ -626,6 +703,32 @@ bool edp_setup_psr(struct dc_link *link,
if (!link)
return false;
+ /* This is a workaround: some vendors require the source to
+ * read the PSR cap; otherwise, the vendor's PSR feature will
+ * fall back to its default behavior, causing a misconfiguration
+ * of this feature.
+ */
+ if (link->panel_config.psr.read_psrcap_again) {
+ dm_helpers_dp_read_dpcd(
+ link->ctx,
+ link,
+ DP_PSR_SUPPORT,
+ &link->dpcd_caps.psr_info.psr_version,
+ sizeof(link->dpcd_caps.psr_info.psr_version));
+ }
+
+ //Clear PSR cfg
+ memset(&psr_configuration, 0, sizeof(psr_configuration));
+ dm_helpers_dp_write_dpcd(
+ link->ctx,
+ link,
+ DP_PSR_EN_CFG,
+ &psr_configuration.raw,
+ sizeof(psr_configuration.raw));
+
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED)
+ return false;
+
dc = link->ctx->dc;
dmcu = dc->res_pool->dmcu;
psr = dc->res_pool->psr;
@@ -636,9 +739,6 @@ bool edp_setup_psr(struct dc_link *link,
if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
return false;
-
- memset(&psr_configuration, 0, sizeof(psr_configuration));
-
psr_configuration.bits.ENABLE = 1;
psr_configuration.bits.CRC_VERIFICATION = 1;
psr_configuration.bits.FRAME_CAPTURE_INDICATION =
@@ -716,7 +816,7 @@ bool edp_setup_psr(struct dc_link *link,
psr_context->crtcTimingVerticalTotal = stream->timing.v_total;
psr_context->vsync_rate_hz = div64_u64(div64_u64((stream->
- timing.pix_clk_100hz * 100),
+ timing.pix_clk_100hz * (u64)100),
stream->timing.v_total),
stream->timing.h_total);
@@ -784,6 +884,8 @@ bool edp_setup_psr(struct dc_link *link,
psr_context->dsc_slice_height = psr_config->dsc_slice_height;
+ psr_context->os_request_force_ffu = psr_config->os_request_force_ffu;
+
if (psr) {
link->psr_settings.psr_feature_enabled = psr->funcs->psr_copy_settings(psr,
link, psr_context, panel_inst);
@@ -803,7 +905,7 @@ bool edp_setup_psr(struct dc_link *link,
}
-void edp_get_psr_residency(const struct dc_link *link, uint32_t *residency)
+void edp_get_psr_residency(const struct dc_link *link, uint32_t *residency, enum psr_residency_mode mode)
{
struct dc *dc = link->ctx->dc;
struct dmub_psr *psr = dc->res_pool->psr;
@@ -814,7 +916,7 @@ void edp_get_psr_residency(const struct dc_link *link, uint32_t *residency)
// PSR residency measurements only supported on DMCUB
if (psr != NULL && link->psr_settings.psr_feature_enabled)
- psr->funcs->psr_get_residency(psr, residency, panel_inst);
+ psr->funcs->psr_get_residency(psr, residency, panel_inst, mode);
else
*residency = 0;
}
@@ -846,7 +948,8 @@ bool edp_set_replay_allow_active(struct dc_link *link, const bool *allow_active,
/* Set power optimization flag */
if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts) {
- if (link->replay_settings.replay_feature_enabled && replay->funcs->replay_set_power_opt) {
+ if (replay != NULL && link->replay_settings.replay_feature_enabled &&
+ replay->funcs->replay_set_power_opt) {
replay->funcs->replay_set_power_opt(replay, *power_opts, panel_inst);
link->replay_settings.replay_power_opt_active = *power_opts;
}
@@ -884,15 +987,14 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state)
bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream)
{
/* To-do: Setup Replay */
- struct dc *dc = link->ctx->dc;
- struct dmub_replay *replay = dc->res_pool->replay;
+ struct dc *dc;
+ struct dmub_replay *replay;
int i;
unsigned int panel_inst;
struct replay_context replay_context = { 0 };
unsigned int lineTimeInNs = 0;
-
- union replay_enable_and_configuration replay_config;
+ union replay_enable_and_configuration replay_config = { 0 };
union dpcd_alpm_configuration alpm_config;
@@ -901,6 +1003,20 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
if (!link)
return false;
+ //Clear Replay config
+ dm_helpers_dp_write_dpcd(link->ctx, link,
+ DP_SINK_PR_ENABLE_AND_CONFIGURATION,
+ (uint8_t *)&(replay_config.raw), sizeof(uint8_t));
+
+ if (!(link->replay_settings.config.replay_supported))
+ return false;
+
+ link->replay_settings.config.replay_error_status.raw = 0;
+
+ dc = link->ctx->dc;
+
+ replay = dc->res_pool->replay;
+
if (!replay)
return false;
@@ -929,8 +1045,9 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
replay_context.line_time_in_ns = lineTimeInNs;
- if (replay)
- link->replay_settings.replay_feature_enabled =
+ replay_context.os_request_force_ffu = link->replay_settings.config.os_request_force_ffu;
+
+ link->replay_settings.replay_feature_enabled =
replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst);
if (link->replay_settings.replay_feature_enabled) {
@@ -943,7 +1060,13 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
(uint8_t *)&(replay_config.raw), sizeof(uint8_t));
memset(&alpm_config, 0, sizeof(alpm_config));
- alpm_config.bits.ENABLE = 1;
+ alpm_config.bits.ENABLE = link->replay_settings.config.alpm_mode != DC_ALPM_UNSUPPORTED ? 1 : 0;
+
+ if (link->replay_settings.config.alpm_mode == DC_ALPM_AUXLESS) {
+ alpm_config.bits.ALPM_MODE_SEL = 1;
+ alpm_config.bits.ACDS_PERIOD_DURATION = 0;
+ }
+
dm_helpers_dp_write_dpcd(
link->ctx,
link,
@@ -951,10 +1074,43 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
&alpm_config.raw,
sizeof(alpm_config.raw));
}
+
+ link->replay_settings.config.replay_video_conferencing_optimization_enabled = false;
+
return true;
}
-bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal)
+/*
+ * This is general Interface for Replay to set an 32 bit variable to dmub
+ * replay_FW_Message_type: Indicates which instruction or variable pass to DMUB
+ * cmd_data: Value of the config.
+ */
+bool edp_send_replay_cmd(struct dc_link *link,
+ enum replay_FW_Message_type msg,
+ union dmub_replay_cmd_set *cmd_data)
+{
+ struct dc *dc = link->ctx->dc;
+ struct dmub_replay *replay = dc->res_pool->replay;
+ unsigned int panel_inst;
+
+ if (!replay)
+ return false;
+
+ DC_LOGGER_INIT(link->ctx->logger);
+
+ if (dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+ cmd_data->panel_inst = panel_inst;
+ else {
+ DC_LOG_DC("%s(): get edp panel inst fail ", __func__);
+ return false;
+ }
+
+ replay->funcs->replay_send_cmd(replay, msg, cmd_data);
+
+ return true;
+}
+
+bool edp_set_coasting_vtotal(struct dc_link *link, uint32_t coasting_vtotal)
{
struct dc *dc = link->ctx->dc;
struct dmub_replay *replay = dc->res_pool->replay;
@@ -975,7 +1131,7 @@ bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal)
}
bool edp_replay_residency(const struct dc_link *link,
- unsigned int *residency, const bool is_start, const bool is_alpm)
+ unsigned int *residency, const bool is_start, const enum pr_residency_mode mode)
{
struct dc *dc = link->ctx->dc;
struct dmub_replay *replay = dc->res_pool->replay;
@@ -984,14 +1140,44 @@ bool edp_replay_residency(const struct dc_link *link,
if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
return false;
+ if (!residency)
+ return false;
+
if (replay != NULL && link->replay_settings.replay_feature_enabled)
- replay->funcs->replay_residency(replay, panel_inst, residency, is_start, is_alpm);
+ replay->funcs->replay_residency(replay, panel_inst, residency, is_start, mode);
else
*residency = 0;
return true;
}
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+ const unsigned int *power_opts, uint32_t coasting_vtotal)
+{
+ struct dc *dc = link->ctx->dc;
+ struct dmub_replay *replay = dc->res_pool->replay;
+ unsigned int panel_inst;
+
+ if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst))
+ return false;
+
+ /* Only both power and coasting vtotal changed, this func could return true */
+ if (power_opts && link->replay_settings.replay_power_opt_active != *power_opts &&
+ coasting_vtotal && link->replay_settings.coasting_vtotal != coasting_vtotal) {
+ if (link->replay_settings.replay_feature_enabled &&
+ replay->funcs->replay_set_power_opt_and_coasting_vtotal) {
+ replay->funcs->replay_set_power_opt_and_coasting_vtotal(replay,
+ *power_opts, panel_inst, coasting_vtotal);
+ link->replay_settings.replay_power_opt_active = *power_opts;
+ link->replay_settings.coasting_vtotal = coasting_vtotal;
+ } else
+ return false;
+ } else
+ return false;
+
+ return true;
+}
+
static struct abm *get_abm_from_stream_res(const struct dc_link *link)
{
int i;
@@ -999,11 +1185,11 @@ static struct abm *get_abm_from_stream_res(const struct dc_link *link)
struct abm *abm = NULL;
for (i = 0; i < MAX_PIPES; i++) {
- struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i];
- struct dc_stream_state *stream = pipe_ctx.stream;
+ struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct dc_stream_state *stream = pipe_ctx->stream;
if (stream && stream->link == link) {
- abm = pipe_ctx.stream_res.abm;
+ abm = pipe_ctx->stream_res.abm;
break;
}
}
@@ -1038,3 +1224,76 @@ int edp_get_target_backlight_pwm(const struct dc_link *link)
return (int) abm->funcs->get_target_backlight(abm);
}
+
+bool is_smartmux_suported(struct dc_link *link)
+{
+ if (link->dc->caps.is_apu)
+ return false;
+ if (!link->dc->config.smart_mux_version)
+ return false;
+
+ return true;
+}
+
+static void edp_set_assr_enable(const struct dc *pDC, struct dc_link *link,
+ struct link_resource *link_res, bool enable)
+{
+ union dmub_rb_cmd cmd;
+ bool use_hpo_dp_link_enc = false;
+ uint8_t link_enc_index = 0;
+ uint8_t phy_type = 0;
+ uint8_t phy_id = 0;
+
+ if (!pDC->config.use_assr_psp_message)
+ return;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ link_enc_index = link->link_enc->transmitter - TRANSMITTER_UNIPHY_A;
+
+ if (link_res->hpo_dp_link_enc) {
+ link_enc_index = link_res->hpo_dp_link_enc->inst;
+ use_hpo_dp_link_enc = true;
+ }
+
+ if (enable)
+ phy_type = ((dp_get_panel_mode(link) == DP_PANEL_MODE_EDP) ? 1 : 0);
+
+ phy_id = resource_transmitter_to_phy_idx(pDC, link->link_enc->transmitter);
+
+ cmd.assr_enable.header.type = DMUB_CMD__PSP;
+ cmd.assr_enable.header.sub_type = DMUB_CMD__PSP_ASSR_ENABLE;
+ cmd.assr_enable.assr_data.enable = enable;
+ cmd.assr_enable.assr_data.phy_port_type = phy_type;
+ cmd.assr_enable.assr_data.phy_port_id = phy_id;
+ cmd.assr_enable.assr_data.link_enc_index = link_enc_index;
+ cmd.assr_enable.assr_data.hpo_mode = use_hpo_dp_link_enc;
+
+ dc_wake_and_execute_dmub_cmd(pDC->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+}
+
+void edp_set_panel_assr(struct dc_link *link, struct pipe_ctx *pipe_ctx,
+ enum dp_panel_mode *panel_mode, bool enable)
+{
+ struct link_resource *link_res = &pipe_ctx->link_res;
+ struct cp_psp *cp_psp = &pipe_ctx->stream->ctx->cp_psp;
+
+ if (*panel_mode != DP_PANEL_MODE_EDP)
+ return;
+
+ if (link->dc->config.use_assr_psp_message) {
+ edp_set_assr_enable(link->dc, link, link_res, enable);
+ } else if (cp_psp && cp_psp->funcs.enable_assr && enable) {
+ /* ASSR is bound to fail with unsigned PSP
+ * verstage used during devlopment phase.
+ * Report and continue with eDP panel mode to
+ * perform eDP link training with right settings
+ */
+ bool result;
+
+ result = cp_psp->funcs.enable_assr(cp_psp->handle, link);
+
+ if (!result && link->panel_mode != DP_PANEL_MODE_EDP)
+ *panel_mode = DP_PANEL_MODE_DEFAULT;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
index 0a5bbda8c739..62a6344e613e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h
@@ -25,20 +25,19 @@
#ifndef __DC_LINK_EDP_PANEL_CONTROL_H__
#define __DC_LINK_EDP_PANEL_CONTROL_H__
-#include "link.h"
+#include "link_service.h"
enum dp_panel_mode dp_get_panel_mode(struct dc_link *link);
void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode);
bool set_default_brightness_aux(struct dc_link *link);
-bool set_cached_brightness_aux(struct dc_link *link);
+bool is_smartmux_suported(struct dc_link *link);
void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd);
int edp_get_backlight_level(const struct dc_link *link);
bool edp_get_backlight_level_nits(struct dc_link *link,
uint32_t *backlight_millinits_avg,
uint32_t *backlight_millinits_peak);
bool edp_set_backlight_level(const struct dc_link *link,
- uint32_t backlight_pwm_u16_16,
- uint32_t frame_ramp);
+ struct set_backlight_level_params *backlight_level_params);
bool edp_set_backlight_level_nits(struct dc_link *link,
bool isHDR,
uint32_t backlight_millinits,
@@ -52,21 +51,31 @@ bool edp_setup_psr(struct dc_link *link,
struct psr_context *psr_context);
bool edp_set_sink_vtotal_in_psr_active(const struct dc_link *link,
uint16_t psr_vtotal_idle, uint16_t psr_vtotal_su);
-void edp_get_psr_residency(const struct dc_link *link, uint32_t *residency);
+void edp_get_psr_residency(const struct dc_link *link, uint32_t *residency, enum psr_residency_mode mode);
bool edp_set_replay_allow_active(struct dc_link *dc_link, const bool *enable,
bool wait, bool force_static, const unsigned int *power_opts);
bool edp_setup_replay(struct dc_link *link,
const struct dc_stream_state *stream);
-bool edp_set_coasting_vtotal(struct dc_link *link, uint16_t coasting_vtotal);
+bool edp_send_replay_cmd(struct dc_link *link,
+ enum replay_FW_Message_type msg,
+ union dmub_replay_cmd_set *cmd_data);
+bool edp_set_coasting_vtotal(struct dc_link *link, uint32_t coasting_vtotal);
bool edp_replay_residency(const struct dc_link *link,
- unsigned int *residency, const bool is_start, const bool is_alpm);
+ unsigned int *residency, const bool is_start, const enum pr_residency_mode mode);
bool edp_get_replay_state(const struct dc_link *link, uint64_t *state);
+bool edp_set_replay_power_opt_and_coasting_vtotal(struct dc_link *link,
+ const unsigned int *power_opts, uint32_t coasting_vtotal);
bool edp_wait_for_t12(struct dc_link *link);
bool edp_is_ilr_optimization_required(struct dc_link *link,
struct dc_crtc_timing *crtc_timing);
+bool edp_is_ilr_optimization_enabled(struct dc_link *link);
+enum dc_link_rate get_max_edp_link_rate(struct dc_link *link);
bool edp_backlight_enable_aux(struct dc_link *link, bool enable);
void edp_add_delay_for_T9(struct dc_link *link);
bool edp_receiver_ready_T9(struct dc_link *link);
bool edp_receiver_ready_T7(struct dc_link *link);
bool edp_power_alpm_dpcd_enable(struct dc_link *link, bool enable);
+void edp_set_panel_power(struct dc_link *link, bool powerOn);
+void edp_set_panel_assr(struct dc_link *link, struct pipe_ctx *pipe_ctx,
+ enum dp_panel_mode *panel_mode, bool enable);
#endif /* __DC_LINK_EDP_POWER_CONTROL_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.c
index e3d729ab5b9f..caa617883f62 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.c
@@ -35,7 +35,7 @@
bool link_get_hpd_state(struct dc_link *link)
{
- uint32_t state;
+ uint32_t state = 0;
dal_gpio_lock_pin(link->hpd_gpio);
dal_gpio_get_value(link->hpd_gpio, &state);
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h
index 4fb526b264f9..af529328ba17 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h
@@ -26,7 +26,7 @@
#ifndef __DC_LINK_HPD_H__
#define __DC_LINK_HPD_H__
-#include "link.h"
+#include "link_service.h"
enum hpd_source_id get_hpd_line(struct dc_link *link);
/*
diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile
new file mode 100644
index 000000000000..2d4b7a85847d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile
@@ -0,0 +1,54 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+#
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN20
+###############################################################################
+MMHUBBUB_DCN20 = dcn20_mmhubbub.o
+
+AMD_DAL_MMHUBBUB_DCN20 = $(addprefix $(AMDDALPATH)/dc/mmhubbub/dcn20/,$(MMHUBBUB_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MMHUBBUB_DCN20)
+
+###############################################################################
+# DCN32
+###############################################################################
+MMHUBBUB_DCN32 = dcn32_mmhubbub.o
+
+AMD_DAL_MMHUBBUB_DCN32 = $(addprefix $(AMDDALPATH)/dc/mmhubbub/dcn32/,$(MMHUBBUB_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MMHUBBUB_DCN32)
+
+###############################################################################
+# DCN35
+###############################################################################
+MMHUBBUB_DCN35 = dcn35_mmhubbub.o
+
+AMD_DAL_MMHUBBUB_DCN35 = $(addprefix $(AMDDALPATH)/dc/mmhubbub/dcn35/,$(MMHUBBUB_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MMHUBBUB_DCN35)
+endif
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c
index 259a98e4ee2c..2a422e223bf2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c
@@ -284,7 +284,7 @@ void mcifwb2_dump_frame(struct mcif_wb *mcif_wb,
REG_UPDATE(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_LOCK, 0xf);
- memcpy(dest_luma_buffer, luma_buffer, mcif_params->luma_pitch * dest_height);
+ memcpy(dest_luma_buffer, luma_buffer, (size_t)mcif_params->luma_pitch * dest_height);
memcpy(dest_chroma_buffer, chroma_buffer, mcif_params->chroma_pitch * dest_height / 2);
REG_UPDATE(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_LOCK, 0x0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h
index 5ab32aa51e13..5ab32aa51e13 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.c
index c3b089ba511a..c3b089ba511a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.c
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.h
index e460cf8d9041..ef15b4f1f6b9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn32/dcn32_mmhubbub.h
@@ -43,8 +43,6 @@
SRI2(MCIF_WB_BUF_4_STATUS2, MCIF_WB, inst),\
SRI2(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB, inst),\
SRI2(MCIF_WB_SCLK_CHANGE, MCIF_WB, inst),\
- SRI2(MCIF_WB_TEST_DEBUG_INDEX, MCIF_WB, inst),\
- SRI2(MCIF_WB_TEST_DEBUG_DATA, MCIF_WB, inst),\
SRI2(MCIF_WB_BUF_1_ADDR_Y, MCIF_WB, inst),\
SRI2(MCIF_WB_BUF_1_ADDR_C, MCIF_WB, inst),\
SRI2(MCIF_WB_BUF_2_ADDR_Y, MCIF_WB, inst),\
@@ -157,8 +155,6 @@
SF(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB_CLIENT_ARBITRATION_SLICE, mask_sh),\
SF(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB_TIME_PER_PIXEL, mask_sh),\
SF(MCIF_WB_SCLK_CHANGE, WM_CHANGE_ACK_FORCE_ON, mask_sh),\
- SF(MCIF_WB_TEST_DEBUG_INDEX, MCIF_WB_TEST_DEBUG_INDEX, mask_sh),\
- SF(MCIF_WB_TEST_DEBUG_DATA, MCIF_WB_TEST_DEBUG_DATA, mask_sh),\
SF(MCIF_WB_BUF_1_ADDR_Y, MCIF_WB_BUF_1_ADDR_Y, mask_sh),\
SF(MCIF_WB_BUF_1_ADDR_C, MCIF_WB_BUF_1_ADDR_C, mask_sh),\
SF(MCIF_WB_BUF_2_ADDR_Y, MCIF_WB_BUF_2_ADDR_Y, mask_sh),\
diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.c
new file mode 100644
index 000000000000..4317100564a4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.c
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn35_mmhubbub.h"
+#include "reg_helper.h"
+
+#define REG(reg) \
+ ((const struct dcn35_mmhubbub_registers *)(mcif_wb30->mcif_wb_regs)) \
+ ->reg
+
+#define CTX mcif_wb30->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ ((const struct dcn35_mmhubbub_shift *)(mcif_wb30->mcif_wb_shift)) \
+ ->field_name, \
+ ((const struct dcn35_mmhubbub_mask *)(mcif_wb30->mcif_wb_mask)) \
+ ->field_name
+
+void dcn35_mmhubbub_construct(
+ struct dcn30_mmhubbub *mcif_wb30, struct dc_context *ctx,
+ const struct dcn35_mmhubbub_registers *mcif_wb_regs,
+ const struct dcn35_mmhubbub_shift *mcif_wb_shift,
+ const struct dcn35_mmhubbub_mask *mcif_wb_mask, int inst)
+{
+ dcn32_mmhubbub_construct(
+ mcif_wb30, ctx,
+ (const struct dcn30_mmhubbub_registers *)(mcif_wb_regs),
+ (const struct dcn30_mmhubbub_shift *)(mcif_wb_shift),
+ (const struct dcn30_mmhubbub_mask *)(mcif_wb_mask), inst);
+}
+
+void dcn35_mmhubbub_set_fgcg(struct dcn30_mmhubbub *mcif_wb30, bool enabled)
+{
+ REG_UPDATE(MMHUBBUB_CLOCK_CNTL, MMHUBBUB_FGCG_REP_DIS, !enabled);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.h
new file mode 100644
index 000000000000..098e13e07272
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn35/dcn35_mmhubbub.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_MMHUBBUB_H
+#define __DCN35_MMHUBBUB_H
+
+#include "mcif_wb.h"
+#include "dcn32/dcn32_mmhubbub.h"
+
+#define MCIF_WB_REG_VARIABLE_LIST_DCN3_5 \
+ MCIF_WB_REG_VARIABLE_LIST_DCN3_0; \
+ uint32_t MMHUBBUB_CLOCK_CNTL
+
+#define MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(mask_sh) \
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN32(mask_sh), \
+ SF(MMHUBBUB_CLOCK_CNTL, MMHUBBUB_TEST_CLK_SEL, mask_sh), \
+ SF(MMHUBBUB_CLOCK_CNTL, DISPCLK_R_MMHUBBUB_GATE_DIS, mask_sh), \
+ SF(MMHUBBUB_CLOCK_CNTL, DISPCLK_G_WBIF0_GATE_DIS, mask_sh), \
+ SF(MMHUBBUB_CLOCK_CNTL, SOCCLK_G_WBIF0_GATE_DIS, mask_sh), \
+ SF(MMHUBBUB_CLOCK_CNTL, MMHUBBUB_FGCG_REP_DIS, mask_sh)
+
+#define MCIF_WB_REG_FIELD_LIST_DCN3_5(type) \
+ struct { \
+ MCIF_WB_REG_FIELD_LIST_DCN3_0(type); \
+ type MMHUBBUB_TEST_CLK_SEL; \
+ type DISPCLK_R_MMHUBBUB_GATE_DIS; \
+ type DISPCLK_G_WBIF0_GATE_DIS; \
+ type SOCCLK_G_WBIF0_GATE_DIS; \
+ type MMHUBBUB_FGCG_REP_DIS; \
+ }
+
+struct dcn35_mmhubbub_registers {
+ MCIF_WB_REG_VARIABLE_LIST_DCN3_5;
+};
+
+struct dcn35_mmhubbub_mask {
+ MCIF_WB_REG_FIELD_LIST_DCN3_5(uint32_t);
+};
+
+struct dcn35_mmhubbub_shift {
+ MCIF_WB_REG_FIELD_LIST_DCN3_5(uint8_t);
+};
+
+void dcn35_mmhubbub_construct(
+ struct dcn30_mmhubbub *mcif_wb30, struct dc_context *ctx,
+ const struct dcn35_mmhubbub_registers *mcif_wb_regs,
+ const struct dcn35_mmhubbub_shift *mcif_wb_shift,
+ const struct dcn35_mmhubbub_mask *mcif_wb_mask, int inst);
+
+void dcn35_mmhubbub_set_fgcg(struct dcn30_mmhubbub *mcif_wb30, bool enabled);
+
+#endif // __DCN35_MMHUBBUB_H
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/Makefile b/drivers/gpu/drm/amd/display/dc/mpc/Makefile
new file mode 100644
index 000000000000..5402c3529f5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/mpc/Makefile
@@ -0,0 +1,72 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+#
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN10
+###############################################################################
+MPC_DCN10 = dcn10_mpc.o
+
+AMD_DAL_MPC_DCN10 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn10/,$(MPC_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN10)
+
+###############################################################################
+# DCN20
+###############################################################################
+MPC_DCN20 = dcn20_mpc.o
+
+AMD_DAL_MPC_DCN20 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn20/,$(MPC_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN20)
+
+###############################################################################
+# DCN30
+###############################################################################
+MPC_DCN30 = dcn30_mpc.o
+
+AMD_DAL_MPC_DCN30 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn30/,$(MPC_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN30)
+
+###############################################################################
+# DCN32
+###############################################################################
+MPC_DCN32 = dcn32_mpc.o
+
+AMD_DAL_MPC_DCN32 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn32/,$(MPC_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN32)
+
+###############################################################################
+# DCN401
+###############################################################################
+MPC_DCN401 = dcn401_mpc.o
+
+AMD_DAL_MPC_DCN401 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn401/,$(MPC_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN401)
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c
index f2f55565e98a..b23c64004dd5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c
@@ -142,22 +142,6 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
return NULL;
}
-bool mpc1_is_mpcc_idle(struct mpc *mpc, int mpcc_id)
-{
- struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
- unsigned int top_sel;
- unsigned int opp_id;
- unsigned int idle;
-
- REG_GET(MPCC_TOP_SEL[mpcc_id], MPCC_TOP_SEL, &top_sel);
- REG_GET(MPCC_OPP_ID[mpcc_id], MPCC_OPP_ID, &opp_id);
- REG_GET(MPCC_STATUS[mpcc_id], MPCC_IDLE, &idle);
- if (top_sel == 0xf && opp_id == 0xf && idle)
- return true;
- else
- return false;
-}
-
void mpc1_assert_mpcc_idle_before_connect(struct mpc *mpc, int mpcc_id)
{
struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h
index dbfffc6383dc..874e36e39e1b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h
@@ -173,10 +173,6 @@ void mpc1_update_stereo_mix(
struct mpcc_sm_cfg *sm_cfg,
int mpcc_id);
-bool mpc1_is_mpcc_idle(
- struct mpc *mpc,
- int mpcc_id);
-
void mpc1_assert_mpcc_idle_before_connect(
struct mpc *mpc,
int mpcc_id);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c
index 5da6e44f284a..ea73473b970a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c
@@ -395,9 +395,12 @@ static void mpc20_program_ogam_pwl(
MPCC_OGAM_LUT_DATA, rgb[i].delta_green_reg);
REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0,
MPCC_OGAM_LUT_DATA, rgb[i].delta_blue_reg);
-
}
+ REG_SEQ_SUBMIT();
+ PERF_TRACE();
+ REG_SEQ_WAIT_DONE();
+ PERF_TRACE();
}
static void apply_DEDCN20_305_wa(struct mpc *mpc, int mpcc_id,
@@ -501,11 +504,6 @@ void mpc2_assert_mpcc_idle_before_connect(struct mpc *mpc, int mpcc_id)
ASSERT(!mpc_disabled);
ASSERT(!mpc_idle);
}
-
- REG_SEQ_SUBMIT();
- PERF_TRACE();
- REG_SEQ_WAIT_DONE();
- PERF_TRACE();
}
static void mpc2_init_mpcc(struct mpcc *mpcc, int mpcc_inst)
@@ -542,8 +540,30 @@ static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id)
return NULL;
}
+static void mpc2_read_mpcc_state(
+ struct mpc *mpc,
+ int mpcc_inst,
+ struct mpcc_state *s)
+{
+ struct dcn20_mpc *mpc20 = TO_DCN20_MPC(mpc);
+
+ REG_GET(MPCC_OPP_ID[mpcc_inst], MPCC_OPP_ID, &s->opp_id);
+ REG_GET(MPCC_TOP_SEL[mpcc_inst], MPCC_TOP_SEL, &s->dpp_id);
+ REG_GET(MPCC_BOT_SEL[mpcc_inst], MPCC_BOT_SEL, &s->bot_mpcc_id);
+ REG_GET_4(MPCC_CONTROL[mpcc_inst], MPCC_MODE, &s->mode,
+ MPCC_ALPHA_BLND_MODE, &s->alpha_mode,
+ MPCC_ALPHA_MULTIPLIED_MODE, &s->pre_multiplied_alpha,
+ MPCC_BLND_ACTIVE_OVERLAP_ONLY, &s->overlap_only);
+ REG_GET_2(MPCC_STATUS[mpcc_inst], MPCC_IDLE, &s->idle,
+ MPCC_BUSY, &s->busy);
+
+ /* Gamma block state */
+ REG_GET(MPCC_OGAM_LUT_RAM_CONTROL[mpcc_inst],
+ MPCC_OGAM_CONFIG_STATUS, &s->rgam_mode);
+}
+
static const struct mpc_funcs dcn20_mpc_funcs = {
- .read_mpcc_state = mpc1_read_mpcc_state,
+ .read_mpcc_state = mpc2_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
.mpc_init = mpc1_mpc_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h
index 496658f420db..496658f420db 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c
index 6cf40c1332bc..85298b8a1b5e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c
@@ -25,7 +25,7 @@
#include "reg_helper.h"
#include "dcn30_mpc.h"
-#include "dcn30_cm_common.h"
+#include "dcn30/dcn30_cm_common.h"
#include "basics/conversion.h"
#include "dcn10/dcn10_cm_common.h"
#include "dc.h"
@@ -44,6 +44,36 @@
#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+void mpc3_mpc_init(struct mpc *mpc)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ int opp_id;
+
+ mpc1_mpc_init(mpc);
+
+ for (opp_id = 0; opp_id < MAX_OPP; opp_id++) {
+ if (REG(MUX[opp_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[opp_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+ }
+}
+
+void mpc3_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+
+ mpc1_mpc_init_single_inst(mpc, mpcc_id);
+
+ /* assuming mpc out mux is connected to opp with the same index at this
+ * point in time (e.g. transitioning from vbios to driver)
+ */
+ if (mpcc_id < MAX_OPP && REG(MUX[mpcc_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[mpcc_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+}
+
bool mpc3_is_dwb_idle(
struct mpc *mpc,
int dwb_id)
@@ -89,14 +119,12 @@ void mpc3_set_out_rate_control(
{
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ /* Always disable mpc out rate and flow control.
+ * MPC flow rate control is not needed for DCN30 and above.
+ */
REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_RATE_CONTROL_DISABLE, !enable,
- MPC_OUT_RATE_CONTROL, rate_2x_mode);
-
- if (flow_control)
- REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_FLOW_CONTROL_MODE, flow_control->flow_ctrl_mode,
- MPC_OUT_FLOW_CONTROL_COUNT, flow_control->flow_ctrl_cnt1);
+ MPC_OUT_RATE_CONTROL_DISABLE, 1,
+ MPC_OUT_RATE_CONTROL, 0);
}
enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id)
@@ -278,22 +306,10 @@ static void mpc3_program_ogam_pwl(
{
uint32_t i;
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
- uint32_t last_base_value_red = rgb[num-1].red_reg + rgb[num-1].delta_red_reg;
- uint32_t last_base_value_green = rgb[num-1].green_reg + rgb[num-1].delta_green_reg;
- uint32_t last_base_value_blue = rgb[num-1].blue_reg + rgb[num-1].delta_blue_reg;
-
- /*the entries of DCN3AG gamma LUTs take 18bit base values as opposed to
- *38 base+delta values per entry in earlier DCN architectures
- *last base value for our lut is compute by adding the last base value
- *in our data + last delta
- */
if (is_rgb_equal(rgb, num)) {
for (i = 0 ; i < num; i++)
REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].red_reg);
-
- REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_red);
-
} else {
REG_UPDATE(MPCC_OGAM_LUT_CONTROL[mpcc_id],
@@ -302,8 +318,6 @@ static void mpc3_program_ogam_pwl(
for (i = 0 ; i < num; i++)
REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].red_reg);
- REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_red);
-
REG_SET(MPCC_OGAM_LUT_INDEX[mpcc_id], 0, MPCC_OGAM_LUT_INDEX, 0);
REG_UPDATE(MPCC_OGAM_LUT_CONTROL[mpcc_id],
@@ -312,8 +326,6 @@ static void mpc3_program_ogam_pwl(
for (i = 0 ; i < num; i++)
REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].green_reg);
- REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_green);
-
REG_SET(MPCC_OGAM_LUT_INDEX[mpcc_id], 0, MPCC_OGAM_LUT_INDEX, 0);
REG_UPDATE(MPCC_OGAM_LUT_CONTROL[mpcc_id],
@@ -322,7 +334,6 @@ static void mpc3_program_ogam_pwl(
for (i = 0 ; i < num; i++)
REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, rgb[i].blue_reg);
- REG_SET(MPCC_OGAM_LUT_DATA[mpcc_id], 0, MPCC_OGAM_LUT_DATA, last_base_value_blue);
}
}
@@ -1146,6 +1157,64 @@ void mpc3_set_gamut_remap(
}
}
+static void read_gamut_remap(struct dcn30_mpc *mpc30,
+ int mpcc_id,
+ uint16_t *regval,
+ uint32_t *select)
+{
+ struct color_matrices_reg gam_regs;
+
+ //current coefficient set in use
+ REG_GET(MPCC_GAMUT_REMAP_MODE[mpcc_id], MPCC_GAMUT_REMAP_MODE_CURRENT, select);
+
+ gam_regs.shifts.csc_c11 = mpc30->mpc_shift->MPCC_GAMUT_REMAP_C11_A;
+ gam_regs.masks.csc_c11 = mpc30->mpc_mask->MPCC_GAMUT_REMAP_C11_A;
+ gam_regs.shifts.csc_c12 = mpc30->mpc_shift->MPCC_GAMUT_REMAP_C12_A;
+ gam_regs.masks.csc_c12 = mpc30->mpc_mask->MPCC_GAMUT_REMAP_C12_A;
+
+ if (*select == GAMUT_REMAP_COEFF) {
+ gam_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_A[mpcc_id]);
+ gam_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_A[mpcc_id]);
+
+ cm_helper_read_color_matrices(
+ mpc30->base.ctx,
+ regval,
+ &gam_regs);
+
+ } else if (*select == GAMUT_REMAP_COMA_COEFF) {
+
+ gam_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_B[mpcc_id]);
+ gam_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_B[mpcc_id]);
+
+ cm_helper_read_color_matrices(
+ mpc30->base.ctx,
+ regval,
+ &gam_regs);
+
+ }
+
+}
+
+void mpc3_get_gamut_remap(struct mpc *mpc,
+ int mpcc_id,
+ struct mpc_grph_gamut_adjustment *adjust)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ uint16_t arr_reg_val[12] = {0};
+ int select;
+
+ read_gamut_remap(mpc30, mpcc_id, arr_reg_val, &select);
+
+ if (select == GAMUT_REMAP_BYPASS) {
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ return;
+ }
+
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ convert_hw_matrix(adjust->temperature_matrix,
+ arr_reg_val, ARRAY_SIZE(arr_reg_val));
+}
+
bool mpc3_program_3dlut(
struct mpc *mpc,
const struct tetrahedral_params *params,
@@ -1399,12 +1468,58 @@ static void mpc3_set_mpc_mem_lp_mode(struct mpc *mpc)
}
}
+static void mpc3_read_mpcc_state(
+ struct mpc *mpc,
+ int mpcc_inst,
+ struct mpcc_state *s)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ uint32_t rmu_status = 0xf;
+
+ REG_GET(MPCC_OPP_ID[mpcc_inst], MPCC_OPP_ID, &s->opp_id);
+ REG_GET(MPCC_TOP_SEL[mpcc_inst], MPCC_TOP_SEL, &s->dpp_id);
+ REG_GET(MPCC_BOT_SEL[mpcc_inst], MPCC_BOT_SEL, &s->bot_mpcc_id);
+ REG_GET_4(MPCC_CONTROL[mpcc_inst], MPCC_MODE, &s->mode,
+ MPCC_ALPHA_BLND_MODE, &s->alpha_mode,
+ MPCC_ALPHA_MULTIPLIED_MODE, &s->pre_multiplied_alpha,
+ MPCC_BLND_ACTIVE_OVERLAP_ONLY, &s->overlap_only);
+ REG_GET_2(MPCC_STATUS[mpcc_inst], MPCC_IDLE, &s->idle,
+ MPCC_BUSY, &s->busy);
+
+ /* Color blocks state */
+ REG_GET(MPC_RMU_CONTROL, MPC_RMU0_MUX_STATUS, &rmu_status);
+
+ if (rmu_status == mpcc_inst) {
+ REG_GET(SHAPER_CONTROL[0],
+ MPC_RMU_SHAPER_LUT_MODE_CURRENT, &s->shaper_lut_mode);
+ REG_GET(RMU_3DLUT_MODE[0],
+ MPC_RMU_3DLUT_MODE_CURRENT, &s->lut3d_mode);
+ REG_GET(RMU_3DLUT_READ_WRITE_CONTROL[0],
+ MPC_RMU_3DLUT_30BIT_EN, &s->lut3d_bit_depth);
+ REG_GET(RMU_3DLUT_MODE[0],
+ MPC_RMU_3DLUT_SIZE, &s->lut3d_size);
+ } else {
+ REG_GET(SHAPER_CONTROL[1],
+ MPC_RMU_SHAPER_LUT_MODE_CURRENT, &s->shaper_lut_mode);
+ REG_GET(RMU_3DLUT_MODE[1],
+ MPC_RMU_3DLUT_MODE_CURRENT, &s->lut3d_mode);
+ REG_GET(RMU_3DLUT_READ_WRITE_CONTROL[1],
+ MPC_RMU_3DLUT_30BIT_EN, &s->lut3d_bit_depth);
+ REG_GET(RMU_3DLUT_MODE[1],
+ MPC_RMU_3DLUT_SIZE, &s->lut3d_size);
+ }
+
+ REG_GET_2(MPCC_OGAM_CONTROL[mpcc_inst],
+ MPCC_OGAM_MODE_CURRENT, &s->rgam_mode,
+ MPCC_OGAM_SELECT_CURRENT, &s->rgam_lut);
+}
+
static const struct mpc_funcs dcn30_mpc_funcs = {
- .read_mpcc_state = mpc1_read_mpcc_state,
+ .read_mpcc_state = mpc3_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
- .mpc_init = mpc1_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init = mpc3_mpc_init,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h
index c8a3a6a96ff7..103f29900a2c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h
@@ -779,7 +779,6 @@
type MPCC_MCM_1DLUT_LUT_DATA;\
type MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK;\
type MPCC_MCM_1DLUT_LUT_READ_COLOR_SEL;\
- type MPCC_MCM_1DLUT_LUT_READ_DBG;\
type MPCC_MCM_1DLUT_LUT_HOST_SEL;\
type MPCC_MCM_1DLUT_LUT_CONFIG_MODE;\
type MPCC_MCM_1DLUT_RAMA_EXP_REGION_START_B;\
@@ -1008,6 +1007,13 @@ void dcn30_mpc_construct(struct dcn30_mpc *mpc30,
int num_mpcc,
int num_rmu);
+void mpc3_mpc_init(
+ struct mpc *mpc);
+
+void mpc3_mpc_init_single_inst(
+ struct mpc *mpc,
+ unsigned int mpcc_id);
+
bool mpc3_program_shaper(
struct mpc *mpc,
const struct pwl_params *params,
@@ -1057,6 +1063,10 @@ void mpc3_set_gamut_remap(
int mpcc_id,
const struct mpc_grph_gamut_adjustment *adjust);
+void mpc3_get_gamut_remap(struct mpc *mpc,
+ int mpcc_id,
+ struct mpc_grph_gamut_adjustment *adjust);
+
void mpc3_set_rmu_mux(
struct mpc *mpc,
int rmu_idx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c
index 3082da04a63d..6f0e017a8ae2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c
@@ -47,7 +47,7 @@ void mpc32_mpc_init(struct mpc *mpc)
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
int mpcc_id;
- mpc1_mpc_init(mpc);
+ mpc3_mpc_init(mpc);
if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) {
if (mpc30->mpc_mask->MPCC_MCM_SHAPER_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPCC_MCM_3DLUT_MEM_LOW_PWR_MODE) {
@@ -71,12 +71,13 @@ void mpc32_power_on_blnd_lut(
{
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ REG_SET(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], 0, MPCC_MCM_1DLUT_MEM_PWR_DIS, power_on);
+
if (mpc->ctx->dc->debug.enable_mem_low_power.bits.cm) {
if (power_on) {
REG_UPDATE(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_FORCE, 0);
REG_WAIT(MPCC_MCM_MEM_PWR_CTRL[mpcc_id], MPCC_MCM_1DLUT_MEM_PWR_STATE, 0, 1, 5);
- } else {
- ASSERT(false);
+ } else if (!mpc->ctx->dc->debug.disable_mem_low_power) {
/* TODO: change to mpc
* dpp_base->ctx->dc->optimized_required = true;
* dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true;
@@ -237,16 +238,19 @@ void mpc32_program_post1dlut_pwl(
REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].red_reg);
REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_red);
} else {
+ REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0);
REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 4);
for (i = 0 ; i < num; i++)
REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].red_reg);
REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_red);
+ REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0);
REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 2);
for (i = 0 ; i < num; i++)
REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].green_reg);
REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, last_base_value_green);
+ REG_SET(MPCC_MCM_1DLUT_LUT_INDEX[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_INDEX, 0);
REG_UPDATE(MPCC_MCM_1DLUT_LUT_CONTROL[mpcc_id], MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, 1);
for (i = 0 ; i < num; i++)
REG_SET(MPCC_MCM_1DLUT_LUT_DATA[mpcc_id], 0, MPCC_MCM_1DLUT_LUT_DATA, rgb[i].blue_reg);
@@ -365,275 +369,279 @@ void mpc32_program_shaper_luta_settings(
MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y);
curve = params->arr_curve_points;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0,
+ if (curve) {
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-}
-
-
-void mpc32_program_shaper_lutb_settings(
- struct mpc *mpc,
- const struct pwl_params *params,
- uint32_t mpcc_id)
-{
- const struct gamma_curve *curve;
- struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
-
- REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_B[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].blue.custom_float_x,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
- REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_G[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].green.custom_float_x,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
- REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_R[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].red.custom_float_x,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
-
- REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_B[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].blue.custom_float_x,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].blue.custom_float_y);
- REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_G[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].green.custom_float_x,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].green.custom_float_y);
- REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_R[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].red.custom_float_x,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y);
-
- curve = params->arr_curve_points;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
-
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
- MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+ }
+}
+
+
+void mpc32_program_shaper_lutb_settings(
+ struct mpc *mpc,
+ const struct pwl_params *params,
+ uint32_t mpcc_id)
+{
+ const struct gamma_curve *curve;
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0,
+ REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_B[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].blue.custom_float_x,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
+ REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_G[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].green.custom_float_x,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
+ REG_SET_2(MPCC_MCM_SHAPER_RAMB_START_CNTL_R[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_B, params->corner_points[0].red.custom_float_x,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_START_SEGMENT_B, 0);
+
+ REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_B[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].blue.custom_float_x,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].blue.custom_float_y);
+ REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_G[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].green.custom_float_x,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].green.custom_float_y);
+ REG_SET_2(MPCC_MCM_SHAPER_RAMB_END_CNTL_R[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_B, params->corner_points[1].red.custom_float_x,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y);
+
+ curve = params->arr_curve_points;
+ if (curve) {
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
- curve += 2;
- REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0,
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+
+ curve += 2;
+ REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset,
+ MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num);
+ }
}
@@ -870,7 +878,7 @@ void mpc32_set3dlut_ram10(
}
-static void mpc32_set_3dlut_mode(
+void mpc32_set_3dlut_mode(
struct mpc *mpc,
enum dc_lut_mode mode,
bool is_color_channel_12bits,
@@ -987,7 +995,7 @@ static const struct mpc_funcs dcn32_mpc_funcs = {
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
.mpc_init = mpc32_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
@@ -1004,7 +1012,6 @@ static const struct mpc_funcs dcn32_mpc_funcs = {
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
- .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc32_program_shaper,
.program_3dlut = mpc32_program_3dlut,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h
index 9ac584fa89ce..8c9b20bcca85 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h
@@ -232,7 +232,6 @@
SF(MPCC_OGAM0_MPCC_OGAM_CONTROL, MPCC_OGAM_SELECT_CURRENT, mask_sh),\
SF(MPCC_OGAM0_MPCC_OGAM_LUT_CONTROL, MPCC_OGAM_LUT_WRITE_COLOR_MASK, mask_sh),\
SF(MPCC_OGAM0_MPCC_OGAM_LUT_CONTROL, MPCC_OGAM_LUT_READ_COLOR_SEL, mask_sh),\
- SF(MPCC_OGAM0_MPCC_OGAM_LUT_CONTROL, MPCC_OGAM_LUT_READ_DBG, mask_sh),\
SF(MPCC_OGAM0_MPCC_OGAM_LUT_CONTROL, MPCC_OGAM_LUT_HOST_SEL, mask_sh),\
SF(MPCC_OGAM0_MPCC_OGAM_LUT_CONTROL, MPCC_OGAM_LUT_CONFIG_MODE, mask_sh),\
SF(MPCC_OGAM0_MPCC_OGAM_LUT_DATA, MPCC_OGAM_LUT_DATA, mask_sh),\
@@ -276,7 +275,6 @@
SF(MPCC_MCM0_MPCC_MCM_1DLUT_LUT_DATA, MPCC_MCM_1DLUT_LUT_DATA, mask_sh),\
SF(MPCC_MCM0_MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM_1DLUT_LUT_WRITE_COLOR_MASK, mask_sh),\
SF(MPCC_MCM0_MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM_1DLUT_LUT_READ_COLOR_SEL, mask_sh),\
- SF(MPCC_MCM0_MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM_1DLUT_LUT_READ_DBG, mask_sh),\
SF(MPCC_MCM0_MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM_1DLUT_LUT_HOST_SEL, mask_sh),\
SF(MPCC_MCM0_MPCC_MCM_1DLUT_LUT_CONTROL, MPCC_MCM_1DLUT_LUT_CONFIG_MODE, mask_sh),\
SF(MPCC_MCM0_MPCC_MCM_1DLUT_RAMA_START_CNTL_B, MPCC_MCM_1DLUT_RAMA_EXP_REGION_START_B, mask_sh),\
@@ -393,4 +391,12 @@ void mpc32_select_3dlut_ram(
enum dc_lut_mode mode,
bool is_color_channel_12bits,
uint32_t mpcc_id);
+
+void mpc32_set_3dlut_mode(
+ struct mpc *mpc,
+ enum dc_lut_mode mode,
+ bool is_color_channel_12bits,
+ bool is_lut_size17x17x17,
+ uint32_t mpcc_id);
+
#endif //__DC_MPCC_DCN32_H__
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c
new file mode 100644
index 000000000000..e1a0308dee57
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "dc.h"
+#include "dcn401_mpc.h"
+#include "dcn10/dcn10_cm_common.h"
+#include "basics/conversion.h"
+#include "mpc.h"
+
+#define REG(reg)\
+ mpc401->mpc_regs->reg
+
+#define CTX \
+ mpc401->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ mpc401->mpc_shift->field_name, mpc401->mpc_mask->field_name
+
+void mpc401_update_3dlut_fast_load_select(struct mpc *mpc, int mpcc_id, int hubp_idx)
+{
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+
+ REG_SET(MPCC_MCM_3DLUT_FAST_LOAD_SELECT[mpcc_id], 0, MPCC_MCM_3DLUT_FL_SEL, hubp_idx);
+}
+
+void mpc401_set_movable_cm_location(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id)
+{
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+
+ switch (location) {
+ case MPCC_MOVABLE_CM_LOCATION_BEFORE:
+ REG_UPDATE(MPCC_MOVABLE_CM_LOCATION_CONTROL[mpcc_id],
+ MPCC_MOVABLE_CM_LOCATION_CNTL, 0);
+ break;
+ case MPCC_MOVABLE_CM_LOCATION_AFTER:
+ REG_UPDATE(MPCC_MOVABLE_CM_LOCATION_CONTROL[mpcc_id],
+ MPCC_MOVABLE_CM_LOCATION_CNTL, 1);
+ break;
+ }
+}
+
+static enum dc_lut_mode get3dlut_config(
+ struct mpc *mpc,
+ bool *is_17x17x17,
+ bool *is_12bits_color_channel,
+ int mpcc_id)
+{
+ uint32_t i_mode, i_enable_10bits, lut_size;
+ enum dc_lut_mode mode;
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+
+ REG_GET(MPCC_MCM_3DLUT_MODE[mpcc_id],
+ MPCC_MCM_3DLUT_MODE_CURRENT, &i_mode);
+
+ REG_GET(MPCC_MCM_3DLUT_READ_WRITE_CONTROL[mpcc_id],
+ MPCC_MCM_3DLUT_30BIT_EN, &i_enable_10bits);
+
+ switch (i_mode) {
+ case 0:
+ mode = LUT_BYPASS;
+ break;
+ case 1:
+ mode = LUT_RAM_A;
+ break;
+ case 2:
+ mode = LUT_RAM_B;
+ break;
+ default:
+ mode = LUT_BYPASS;
+ break;
+ }
+ if (i_enable_10bits > 0)
+ *is_12bits_color_channel = false;
+ else
+ *is_12bits_color_channel = true;
+
+ REG_GET(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_SIZE, &lut_size);
+
+ if (lut_size == 0)
+ *is_17x17x17 = true;
+ else
+ *is_17x17x17 = false;
+
+ return mode;
+}
+
+void mpc401_populate_lut(struct mpc *mpc, const enum MCM_LUT_ID id, const union mcm_lut_params params, bool lut_bank_a, int mpcc_id)
+{
+ const enum dc_lut_mode next_mode = lut_bank_a ? LUT_RAM_A : LUT_RAM_B;
+ const struct pwl_params *lut1d = params.pwl;
+ const struct pwl_params *lut_shaper = params.pwl;
+ bool is_17x17x17;
+ bool is_12bits_color_channel;
+ const struct dc_rgb *lut0;
+ const struct dc_rgb *lut1;
+ const struct dc_rgb *lut2;
+ const struct dc_rgb *lut3;
+ int lut_size0;
+ int lut_size;
+ const struct tetrahedral_params *lut3d = params.lut3d;
+
+ switch (id) {
+ case MCM_LUT_1DLUT:
+ if (lut1d == NULL)
+ return;
+
+ mpc32_power_on_blnd_lut(mpc, mpcc_id, true);
+ mpc32_configure_post1dlut(mpc, mpcc_id, next_mode == LUT_RAM_A);
+
+ if (next_mode == LUT_RAM_A)
+ mpc32_program_post1dluta_settings(mpc, mpcc_id, lut1d);
+ else
+ mpc32_program_post1dlutb_settings(mpc, mpcc_id, lut1d);
+
+ mpc32_program_post1dlut_pwl(
+ mpc, mpcc_id, lut1d->rgb_resulted, lut1d->hw_points_num);
+
+ break;
+ case MCM_LUT_SHAPER:
+ if (lut_shaper == NULL)
+ return;
+ if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc)
+ mpc32_power_on_shaper_3dlut(mpc, mpcc_id, true);
+
+ mpc32_configure_shaper_lut(mpc, next_mode == LUT_RAM_A, mpcc_id);
+
+ if (next_mode == LUT_RAM_A)
+ mpc32_program_shaper_luta_settings(mpc, lut_shaper, mpcc_id);
+ else
+ mpc32_program_shaper_lutb_settings(mpc, lut_shaper, mpcc_id);
+
+ mpc32_program_shaper_lut(
+ mpc, lut_shaper->rgb_resulted, lut_shaper->hw_points_num, mpcc_id);
+
+ mpc32_power_on_shaper_3dlut(mpc, mpcc_id, false);
+ break;
+ case MCM_LUT_3DLUT:
+ if (lut3d == NULL)
+ return;
+
+ mpc32_power_on_shaper_3dlut(mpc, mpcc_id, true);
+
+ get3dlut_config(mpc, &is_17x17x17, &is_12bits_color_channel, mpcc_id);
+
+ is_17x17x17 = !lut3d->use_tetrahedral_9;
+ is_12bits_color_channel = lut3d->use_12bits;
+ if (is_17x17x17) {
+ lut0 = lut3d->tetrahedral_17.lut0;
+ lut1 = lut3d->tetrahedral_17.lut1;
+ lut2 = lut3d->tetrahedral_17.lut2;
+ lut3 = lut3d->tetrahedral_17.lut3;
+ lut_size0 = sizeof(lut3d->tetrahedral_17.lut0)/
+ sizeof(lut3d->tetrahedral_17.lut0[0]);
+ lut_size = sizeof(lut3d->tetrahedral_17.lut1)/
+ sizeof(lut3d->tetrahedral_17.lut1[0]);
+ } else {
+ lut0 = lut3d->tetrahedral_9.lut0;
+ lut1 = lut3d->tetrahedral_9.lut1;
+ lut2 = lut3d->tetrahedral_9.lut2;
+ lut3 = lut3d->tetrahedral_9.lut3;
+ lut_size0 = sizeof(lut3d->tetrahedral_9.lut0)/
+ sizeof(lut3d->tetrahedral_9.lut0[0]);
+ lut_size = sizeof(lut3d->tetrahedral_9.lut1)/
+ sizeof(lut3d->tetrahedral_9.lut1[0]);
+ }
+
+ mpc32_select_3dlut_ram(mpc, next_mode,
+ is_12bits_color_channel, mpcc_id);
+ mpc32_select_3dlut_ram_mask(mpc, 0x1, mpcc_id);
+ if (is_12bits_color_channel)
+ mpc32_set3dlut_ram12(mpc, lut0, lut_size0, mpcc_id);
+ else
+ mpc32_set3dlut_ram10(mpc, lut0, lut_size0, mpcc_id);
+
+ mpc32_select_3dlut_ram_mask(mpc, 0x2, mpcc_id);
+ if (is_12bits_color_channel)
+ mpc32_set3dlut_ram12(mpc, lut1, lut_size, mpcc_id);
+ else
+ mpc32_set3dlut_ram10(mpc, lut1, lut_size, mpcc_id);
+
+ mpc32_select_3dlut_ram_mask(mpc, 0x4, mpcc_id);
+ if (is_12bits_color_channel)
+ mpc32_set3dlut_ram12(mpc, lut2, lut_size, mpcc_id);
+ else
+ mpc32_set3dlut_ram10(mpc, lut2, lut_size, mpcc_id);
+
+ mpc32_select_3dlut_ram_mask(mpc, 0x8, mpcc_id);
+ if (is_12bits_color_channel)
+ mpc32_set3dlut_ram12(mpc, lut3, lut_size, mpcc_id);
+ else
+ mpc32_set3dlut_ram10(mpc, lut3, lut_size, mpcc_id);
+
+ if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc)
+ mpc32_power_on_shaper_3dlut(mpc, mpcc_id, false);
+
+ break;
+ }
+
+}
+
+void mpc401_program_lut_mode(
+ struct mpc *mpc,
+ const enum MCM_LUT_ID id,
+ const enum MCM_LUT_XABLE xable,
+ bool lut_bank_a,
+ int mpcc_id)
+{
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+
+ switch (id) {
+ case MCM_LUT_3DLUT:
+ switch (xable) {
+ case MCM_LUT_DISABLE:
+ REG_UPDATE(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_MODE, 0);
+ break;
+ case MCM_LUT_ENABLE:
+ REG_UPDATE(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_MODE, lut_bank_a ? 1 : 2);
+ break;
+ }
+ break;
+ case MCM_LUT_SHAPER:
+ switch (xable) {
+ case MCM_LUT_DISABLE:
+ REG_UPDATE(MPCC_MCM_SHAPER_CONTROL[mpcc_id], MPCC_MCM_SHAPER_LUT_MODE, 0);
+ break;
+ case MCM_LUT_ENABLE:
+ REG_UPDATE(MPCC_MCM_SHAPER_CONTROL[mpcc_id], MPCC_MCM_SHAPER_LUT_MODE, lut_bank_a ? 1 : 2);
+ break;
+ }
+ break;
+ case MCM_LUT_1DLUT:
+ switch (xable) {
+ case MCM_LUT_DISABLE:
+ REG_UPDATE(MPCC_MCM_1DLUT_CONTROL[mpcc_id],
+ MPCC_MCM_1DLUT_MODE, 0);
+ break;
+ case MCM_LUT_ENABLE:
+ REG_UPDATE(MPCC_MCM_1DLUT_CONTROL[mpcc_id],
+ MPCC_MCM_1DLUT_MODE, 2);
+ break;
+ }
+ REG_UPDATE(MPCC_MCM_1DLUT_CONTROL[mpcc_id],
+ MPCC_MCM_1DLUT_SELECT, lut_bank_a ? 0 : 1);
+ break;
+ }
+}
+
+void mpc401_program_lut_read_write_control(struct mpc *mpc, const enum MCM_LUT_ID id, bool lut_bank_a, int mpcc_id)
+{
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+
+ switch (id) {
+ case MCM_LUT_3DLUT:
+ mpc32_select_3dlut_ram_mask(mpc, 0xf, mpcc_id);
+ REG_UPDATE(MPCC_MCM_3DLUT_READ_WRITE_CONTROL[mpcc_id], MPCC_MCM_3DLUT_RAM_SEL, lut_bank_a ? 0 : 1);
+ break;
+ case MCM_LUT_SHAPER:
+ mpc32_configure_shaper_lut(mpc, lut_bank_a, mpcc_id);
+ break;
+ case MCM_LUT_1DLUT:
+ mpc32_configure_post1dlut(mpc, lut_bank_a, mpcc_id);
+ break;
+ }
+}
+
+void mpc_program_gamut_remap(
+ struct mpc *mpc,
+ unsigned int mpcc_id,
+ const uint16_t *regval,
+ enum mpcc_gamut_remap_id gamut_remap_block_id,
+ enum mpcc_gamut_remap_mode_select mode_select)
+{
+ struct color_matrices_reg gamut_regs;
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+
+ switch (gamut_remap_block_id) {
+ case MPCC_OGAM_GAMUT_REMAP:
+
+ if (regval == NULL || mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) {
+ REG_SET(MPCC_GAMUT_REMAP_MODE[mpcc_id], 0,
+ MPCC_GAMUT_REMAP_MODE, mode_select);
+ return;
+ }
+
+ gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C11_A;
+ gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C11_A;
+ gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C12_A;
+ gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C12_A;
+
+ switch (mode_select) {
+ case MPCC_GAMUT_REMAP_MODE_SELECT_1:
+ gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_A[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_A[mpcc_id]);
+ break;
+ case MPCC_GAMUT_REMAP_MODE_SELECT_2:
+ gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_B[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_B[mpcc_id]);
+ break;
+ default:
+ break;
+ }
+
+ cm_helper_program_color_matrices(
+ mpc->ctx,
+ regval,
+ &gamut_regs);
+
+ //select coefficient set to use, set A (MODE_1) or set B (MODE_2)
+ REG_SET(MPCC_GAMUT_REMAP_MODE[mpcc_id], 0, MPCC_GAMUT_REMAP_MODE, mode_select);
+ break;
+
+ case MPCC_MCM_FIRST_GAMUT_REMAP:
+ if (regval == NULL || mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) {
+ REG_SET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id], 0,
+ MPCC_MCM_FIRST_GAMUT_REMAP_MODE, mode_select);
+ return;
+ }
+
+ gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A;
+ gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A;
+ gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A;
+ gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A;
+
+ switch (mode_select) {
+ case MPCC_GAMUT_REMAP_MODE_SELECT_1:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A[mpcc_id]);
+ break;
+ case MPCC_GAMUT_REMAP_MODE_SELECT_2:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B[mpcc_id]);
+ break;
+ default:
+ break;
+ }
+
+ cm_helper_program_color_matrices(
+ mpc->ctx,
+ regval,
+ &gamut_regs);
+
+ //select coefficient set to use, set A (MODE_1) or set B (MODE_2)
+ REG_SET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id], 0,
+ MPCC_MCM_FIRST_GAMUT_REMAP_MODE, mode_select);
+ break;
+
+ case MPCC_MCM_SECOND_GAMUT_REMAP:
+ if (regval == NULL || mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) {
+ REG_SET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id], 0,
+ MPCC_MCM_SECOND_GAMUT_REMAP_MODE, mode_select);
+ return;
+ }
+
+ gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A;
+ gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A;
+ gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A;
+ gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A;
+
+ switch (mode_select) {
+ case MPCC_GAMUT_REMAP_MODE_SELECT_1:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A[mpcc_id]);
+ break;
+ case MPCC_GAMUT_REMAP_MODE_SELECT_2:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B[mpcc_id]);
+ break;
+ default:
+ break;
+ }
+
+ cm_helper_program_color_matrices(
+ mpc->ctx,
+ regval,
+ &gamut_regs);
+
+ //select coefficient set to use, set A (MODE_1) or set B (MODE_2)
+ REG_SET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id], 0,
+ MPCC_MCM_SECOND_GAMUT_REMAP_MODE, mode_select);
+ break;
+
+ default:
+ break;
+ }
+}
+
+void mpc401_set_gamut_remap(
+ struct mpc *mpc,
+ int mpcc_id,
+ const struct mpc_grph_gamut_adjustment *adjust)
+{
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+ unsigned int i = 0;
+ uint32_t mode_select = 0;
+
+ if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) {
+ /* Bypass / Disable if type is bypass or hw */
+ mpc_program_gamut_remap(mpc, mpcc_id, NULL,
+ adjust->mpcc_gamut_remap_block_id, MPCC_GAMUT_REMAP_MODE_SELECT_0);
+ } else {
+ struct fixed31_32 arr_matrix[12];
+ uint16_t arr_reg_val[12];
+
+ for (i = 0; i < 12; i++)
+ arr_matrix[i] = adjust->temperature_matrix[i];
+
+ convert_float_matrix(arr_reg_val, arr_matrix, 12);
+
+ switch (adjust->mpcc_gamut_remap_block_id) {
+ case MPCC_OGAM_GAMUT_REMAP:
+ REG_GET(MPCC_GAMUT_REMAP_MODE[mpcc_id],
+ MPCC_GAMUT_REMAP_MODE_CURRENT, &mode_select);
+ break;
+ case MPCC_MCM_FIRST_GAMUT_REMAP:
+ REG_GET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id],
+ MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT, &mode_select);
+ break;
+ case MPCC_MCM_SECOND_GAMUT_REMAP:
+ REG_GET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id],
+ MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT, &mode_select);
+ break;
+ default:
+ break;
+ }
+
+ //If current set in use not set A (MODE_1), then use set A, otherwise use set B
+ if (mode_select != MPCC_GAMUT_REMAP_MODE_SELECT_1)
+ mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_1;
+ else
+ mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_2;
+
+ mpc_program_gamut_remap(mpc, mpcc_id, arr_reg_val,
+ adjust->mpcc_gamut_remap_block_id, mode_select);
+ }
+}
+
+void mpc_read_gamut_remap(struct mpc *mpc,
+ int mpcc_id,
+ uint16_t *regval,
+ enum mpcc_gamut_remap_id gamut_remap_block_id,
+ uint32_t *mode_select)
+{
+ struct color_matrices_reg gamut_regs = {0};
+ struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc);
+
+ switch (gamut_remap_block_id) {
+ case MPCC_OGAM_GAMUT_REMAP:
+ //current coefficient set in use
+ REG_GET(MPCC_GAMUT_REMAP_MODE[mpcc_id], MPCC_GAMUT_REMAP_MODE_CURRENT, mode_select);
+
+ gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C11_A;
+ gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C11_A;
+ gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_GAMUT_REMAP_C12_A;
+ gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_GAMUT_REMAP_C12_A;
+
+ switch (*mode_select) {
+ case MPCC_GAMUT_REMAP_MODE_SELECT_1:
+ gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_A[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_A[mpcc_id]);
+ break;
+ case MPCC_GAMUT_REMAP_MODE_SELECT_2:
+ gamut_regs.csc_c11_c12 = REG(MPC_GAMUT_REMAP_C11_C12_B[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_GAMUT_REMAP_C33_C34_B[mpcc_id]);
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case MPCC_MCM_FIRST_GAMUT_REMAP:
+ REG_GET(MPCC_MCM_FIRST_GAMUT_REMAP_MODE[mpcc_id],
+ MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT, mode_select);
+
+ gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A;
+ gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C11_A;
+ gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A;
+ gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_FIRST_GAMUT_REMAP_C12_A;
+
+ switch (*mode_select) {
+ case MPCC_GAMUT_REMAP_MODE_SELECT_1:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A[mpcc_id]);
+ break;
+ case MPCC_GAMUT_REMAP_MODE_SELECT_2:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B[mpcc_id]);
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case MPCC_MCM_SECOND_GAMUT_REMAP:
+ REG_GET(MPCC_MCM_SECOND_GAMUT_REMAP_MODE[mpcc_id],
+ MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT, mode_select);
+
+ gamut_regs.shifts.csc_c11 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A;
+ gamut_regs.masks.csc_c11 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C11_A;
+ gamut_regs.shifts.csc_c12 = mpc401->mpc_shift->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A;
+ gamut_regs.masks.csc_c12 = mpc401->mpc_mask->MPCC_MCM_SECOND_GAMUT_REMAP_C12_A;
+
+ switch (*mode_select) {
+ case MPCC_GAMUT_REMAP_MODE_SELECT_1:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A[mpcc_id]);
+ break;
+ case MPCC_GAMUT_REMAP_MODE_SELECT_2:
+ gamut_regs.csc_c11_c12 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B[mpcc_id]);
+ gamut_regs.csc_c33_c34 = REG(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B[mpcc_id]);
+ break;
+ default:
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (*mode_select != MPCC_GAMUT_REMAP_MODE_SELECT_0) {
+ cm_helper_read_color_matrices(
+ mpc401->base.ctx,
+ regval,
+ &gamut_regs);
+ }
+}
+
+void mpc401_get_gamut_remap(struct mpc *mpc,
+ int mpcc_id,
+ struct mpc_grph_gamut_adjustment *adjust)
+{
+ uint16_t arr_reg_val[12] = {0};
+ uint32_t mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_0;
+
+ mpc_read_gamut_remap(mpc, mpcc_id, arr_reg_val, adjust->mpcc_gamut_remap_block_id, &mode_select);
+
+ if (mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) {
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
+ return;
+ }
+
+ adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
+ convert_hw_matrix(adjust->temperature_matrix,
+ arr_reg_val, ARRAY_SIZE(arr_reg_val));
+}
+
+static const struct mpc_funcs dcn401_mpc_funcs = {
+ .read_mpcc_state = mpc1_read_mpcc_state,
+ .insert_plane = mpc1_insert_plane,
+ .remove_mpcc = mpc1_remove_mpcc,
+ .mpc_init = mpc32_mpc_init,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
+ .update_blending = mpc2_update_blending,
+ .cursor_lock = mpc1_cursor_lock,
+ .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
+ .wait_for_idle = mpc2_assert_idle_mpcc,
+ .assert_mpcc_idle_before_connect = mpc2_assert_mpcc_idle_before_connect,
+ .init_mpcc_list_from_hw = mpc1_init_mpcc_list_from_hw,
+ .set_denorm = mpc3_set_denorm,
+ .set_denorm_clamp = mpc3_set_denorm_clamp,
+ .set_output_csc = mpc3_set_output_csc,
+ .set_ocsc_default = mpc3_set_ocsc_default,
+ .set_output_gamma = mpc3_set_output_gamma,
+ .insert_plane_to_secondary = NULL,
+ .remove_mpcc_from_secondary = NULL,
+ .set_dwb_mux = mpc3_set_dwb_mux,
+ .disable_dwb_mux = mpc3_disable_dwb_mux,
+ .is_dwb_idle = mpc3_is_dwb_idle,
+ .set_gamut_remap = mpc401_set_gamut_remap,
+ .program_shaper = mpc32_program_shaper,
+ .program_3dlut = mpc32_program_3dlut,
+ .program_1dlut = mpc32_program_post1dlut,
+ .acquire_rmu = NULL,
+ .release_rmu = NULL,
+ .power_on_mpc_mem_pwr = mpc3_power_on_ogam_lut,
+ .get_mpc_out_mux = mpc1_get_mpc_out_mux,
+ .set_bg_color = mpc1_set_bg_color,
+ .set_movable_cm_location = mpc401_set_movable_cm_location,
+ .update_3dlut_fast_load_select = mpc401_update_3dlut_fast_load_select,
+ .populate_lut = mpc401_populate_lut,
+ .program_lut_read_write_control = mpc401_program_lut_read_write_control,
+ .program_lut_mode = mpc401_program_lut_mode,
+};
+
+
+void dcn401_mpc_construct(struct dcn401_mpc *mpc401,
+ struct dc_context *ctx,
+ const struct dcn401_mpc_registers *mpc_regs,
+ const struct dcn401_mpc_shift *mpc_shift,
+ const struct dcn401_mpc_mask *mpc_mask,
+ int num_mpcc,
+ int num_rmu)
+{
+ int i;
+
+ mpc401->base.ctx = ctx;
+
+ mpc401->base.funcs = &dcn401_mpc_funcs;
+
+ mpc401->mpc_regs = mpc_regs;
+ mpc401->mpc_shift = mpc_shift;
+ mpc401->mpc_mask = mpc_mask;
+
+ mpc401->mpcc_in_use_mask = 0;
+ mpc401->num_mpcc = num_mpcc;
+ mpc401->num_rmu = num_rmu;
+
+ for (i = 0; i < MAX_MPCC; i++)
+ mpc3_init_mpcc(&mpc401->base.mpcc_array[i], i);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h
new file mode 100644
index 000000000000..fdc42f8ab3ff
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h
@@ -0,0 +1,257 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_MPCC_DCN401_H__
+#define __DC_MPCC_DCN401_H__
+#include "dcn30/dcn30_mpc.h"
+#include "dcn32/dcn32_mpc.h"
+
+#define TO_DCN401_MPC(mpc_base) \
+ container_of(mpc_base, struct dcn401_mpc, base)
+
+#define MPC_REG_VARIABLE_LIST_DCN4_01 \
+ MPC_REG_VARIABLE_LIST_DCN3_0; \
+ MPC_REG_VARIABLE_LIST_DCN32; \
+ uint32_t MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT[MAX_MPCC]; \
+ uint32_t MPCC_MCM_FIRST_GAMUT_REMAP_MODE[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B[MAX_MPCC]; \
+ uint32_t MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT[MAX_MPCC]; \
+ uint32_t MPCC_MCM_SECOND_GAMUT_REMAP_MODE[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_B[MAX_MPCC]; \
+ uint32_t MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B[MAX_MPCC]; \
+ uint32_t MPCC_MCM_3DLUT_FAST_LOAD_SELECT[MAX_MPCC]; \
+ uint32_t MPCC_MCM_3DLUT_FAST_LOAD_STATUS[MAX_MPCC];
+
+#define MPC_COMMON_MASK_SH_LIST_DCN4_01(mask_sh) \
+ MPC_COMMON_MASK_SH_LIST_DCN32(mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_FIRST_GAMUT_REMAP_MODE, MPCC_MCM_FIRST_GAMUT_REMAP_MODE, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_FIRST_GAMUT_REMAP_MODE, MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A, MPCC_MCM_FIRST_GAMUT_REMAP_C11_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A, MPCC_MCM_FIRST_GAMUT_REMAP_C12_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A, MPCC_MCM_FIRST_GAMUT_REMAP_C13_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A, MPCC_MCM_FIRST_GAMUT_REMAP_C14_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A, MPCC_MCM_FIRST_GAMUT_REMAP_C21_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A, MPCC_MCM_FIRST_GAMUT_REMAP_C22_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A, MPCC_MCM_FIRST_GAMUT_REMAP_C23_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A, MPCC_MCM_FIRST_GAMUT_REMAP_C24_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A, MPCC_MCM_FIRST_GAMUT_REMAP_C31_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A, MPCC_MCM_FIRST_GAMUT_REMAP_C32_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A, MPCC_MCM_FIRST_GAMUT_REMAP_C33_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A, MPCC_MCM_FIRST_GAMUT_REMAP_C34_A, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_SECOND_GAMUT_REMAP_MODE, MPCC_MCM_SECOND_GAMUT_REMAP_MODE, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_SECOND_GAMUT_REMAP_MODE, MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A, MPCC_MCM_SECOND_GAMUT_REMAP_C11_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A, MPCC_MCM_SECOND_GAMUT_REMAP_C12_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A, MPCC_MCM_SECOND_GAMUT_REMAP_C13_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A, MPCC_MCM_SECOND_GAMUT_REMAP_C14_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A, MPCC_MCM_SECOND_GAMUT_REMAP_C21_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A, MPCC_MCM_SECOND_GAMUT_REMAP_C22_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A, MPCC_MCM_SECOND_GAMUT_REMAP_C23_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A, MPCC_MCM_SECOND_GAMUT_REMAP_C24_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A, MPCC_MCM_SECOND_GAMUT_REMAP_C31_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A, MPCC_MCM_SECOND_GAMUT_REMAP_C32_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A, MPCC_MCM_SECOND_GAMUT_REMAP_C33_A, mask_sh), \
+ SF(MPCC_MCM0_MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A, MPCC_MCM_SECOND_GAMUT_REMAP_C34_A, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_SELECT, MPCC_MCM_3DLUT_FL_SEL, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM_3DLUT_FL_DONE, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW, mask_sh), \
+ SF(MPCC_MCM0_MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW, mask_sh)
+
+
+#define MPC_REG_LIST_DCN4_01_RI(inst) \
+ MPC_REG_LIST_DCN3_2_RI(inst),\
+ SRII(MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM, inst),\
+ SRII(MPCC_MCM_FIRST_GAMUT_REMAP_MODE, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_A, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_A, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_A, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_A, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_A, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_A, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C11_C12_B, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C13_C14_B, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C21_C22_B, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C23_C24_B, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C31_C32_B, MPCC_MCM, inst),\
+ SRII(MPC_MCM_FIRST_GAMUT_REMAP_C33_C34_B, MPCC_MCM, inst),\
+ SRII(MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT, MPCC_MCM, inst), \
+ SRII(MPCC_MCM_SECOND_GAMUT_REMAP_MODE, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_A, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_A, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_A, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_A, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_A, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_A, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C11_C12_B, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C13_C14_B, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C21_C22_B, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C23_C24_B, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C31_C32_B, MPCC_MCM, inst), \
+ SRII(MPC_MCM_SECOND_GAMUT_REMAP_C33_C34_B, MPCC_MCM, inst), \
+ SRII(MPCC_MCM_3DLUT_FAST_LOAD_STATUS, MPCC_MCM, inst),\
+ SRII(MPCC_MCM_3DLUT_FAST_LOAD_SELECT, MPCC_MCM, inst)
+
+#define MPC_REG_FIELD_LIST_DCN4_01(type)\
+ MPC_REG_FIELD_LIST_DCN3_0(type);\
+ MPC_REG_FIELD_LIST_DCN32(type);\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_COEF_FORMAT;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_MODE;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_MODE_CURRENT;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C11_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C12_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C13_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C14_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C21_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C22_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C23_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C24_A;\
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C31_A; \
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C32_A; \
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C33_A; \
+ type MPCC_MCM_FIRST_GAMUT_REMAP_C34_A; \
+ type MPCC_MCM_SECOND_GAMUT_REMAP_COEF_FORMAT;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_MODE;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_MODE_CURRENT;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C11_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C12_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C13_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C14_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C21_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C22_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C23_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C24_A;\
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C31_A; \
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C32_A; \
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C33_A; \
+ type MPCC_MCM_SECOND_GAMUT_REMAP_C34_A; \
+ type MPCC_MCM_3DLUT_FL_SEL;\
+ type MPCC_MCM_3DLUT_FL_DONE;\
+ type MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW;\
+ type MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW
+
+struct dcn401_mpc_shift {
+ MPC_REG_FIELD_LIST_DCN4_01(uint8_t);
+};
+
+struct dcn401_mpc_mask {
+ MPC_REG_FIELD_LIST_DCN4_01(uint32_t);
+};
+
+struct dcn401_mpc_registers {
+ MPC_REG_VARIABLE_LIST_DCN4_01
+};
+
+struct dcn401_mpc {
+ struct mpc base;
+
+ int mpcc_in_use_mask;
+ int num_mpcc;
+ const struct dcn401_mpc_registers *mpc_regs;
+ const struct dcn401_mpc_shift *mpc_shift;
+ const struct dcn401_mpc_mask *mpc_mask;
+ int num_rmu;
+};
+void dcn401_mpc_construct(struct dcn401_mpc *mpc401,
+ struct dc_context *ctx,
+ const struct dcn401_mpc_registers *mpc_regs,
+ const struct dcn401_mpc_shift *mpc_shift,
+ const struct dcn401_mpc_mask *mpc_mask,
+ int num_mpcc,
+ int num_rmu);
+
+void mpc401_set_movable_cm_location(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id);
+void mpc401_populate_lut(struct mpc *mpc, const enum MCM_LUT_ID id, const union mcm_lut_params params,
+ bool lut_bank_a, int mpcc_id);
+
+void mpc401_program_lut_mode(
+ struct mpc *mpc,
+ const enum MCM_LUT_ID id,
+ const enum MCM_LUT_XABLE xable,
+ bool lut_bank_a,
+ int mpcc_id);
+
+void mpc401_program_lut_read_write_control(
+ struct mpc *mpc,
+ const enum MCM_LUT_ID id,
+ bool lut_bank_a,
+ int mpcc_id);
+
+void mpc401_set_gamut_remap(
+ struct mpc *mpc,
+ int mpcc_id,
+ const struct mpc_grph_gamut_adjustment *adjust);
+
+void mpc401_get_gamut_remap(
+ struct mpc *mpc,
+ int mpcc_id,
+ struct mpc_grph_gamut_adjustment *adjust);
+
+void mpc401_update_3dlut_fast_load_select(
+ struct mpc *mpc,
+ int mpcc_id,
+ int hubp_idx);
+
+void mpc_program_gamut_remap(
+ struct mpc *mpc,
+ unsigned int mpcc_id,
+ const uint16_t *regval,
+ enum mpcc_gamut_remap_id gamut_remap_block_id,
+ enum mpcc_gamut_remap_mode_select mode_select);
+
+void mpc_read_gamut_remap(struct mpc *mpc,
+ int mpcc_id,
+ uint16_t *regval,
+ enum mpcc_gamut_remap_id gamut_remap_block_id,
+ uint32_t *mode_select);
+
+void mpc401_update_3dlut_fast_load_select(
+ struct mpc *mpc,
+ int mpcc_id,
+ int hubp_idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/opp/Makefile b/drivers/gpu/drm/amd/display/dc/opp/Makefile
new file mode 100644
index 000000000000..1be76754db30
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/opp/Makefile
@@ -0,0 +1,51 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+#
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN10
+###############################################################################
+OPP_DCN10 = dcn10_opp.o
+
+AMD_DAL_OPP_DCN10 = $(addprefix $(AMDDALPATH)/dc/opp/dcn10/,$(OPP_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN10)
+###############################################################################
+# DCN20
+###############################################################################
+OPP_DCN20 = dcn20_opp.o
+
+AMD_DAL_OPP_DCN20 = $(addprefix $(AMDDALPATH)/dc/opp/dcn20/,$(OPP_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN20)
+###############################################################################
+# DCN35
+###############################################################################
+OPP_DCN35 = dcn35_opp.o
+
+AMD_DAL_OPP_DCN35 = $(addprefix $(AMDDALPATH)/dc/opp/dcn35/,$(OPP_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN35)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c
index 0dec57679269..71e9288d60ed 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c
@@ -23,6 +23,7 @@
*
*/
+#include "core_types.h"
#include "dm_services.h"
#include "dcn10_opp.h"
#include "reg_helper.h"
@@ -160,10 +161,17 @@ static void opp1_set_pixel_encoding(
struct dcn10_opp *oppn10,
const struct clamping_and_pixel_encoding_params *params)
{
+ bool force_chroma_subsampling_1tap =
+ oppn10->base.ctx->dc->debug.force_chroma_subsampling_1tap;
+
switch (params->pixel_encoding) {
case PIXEL_ENCODING_RGB:
case PIXEL_ENCODING_YCBCR444:
+ REG_UPDATE_3(FMT_CONTROL,
+ FMT_PIXEL_ENCODING, 0,
+ FMT_SUBSAMPLING_MODE, 0,
+ FMT_CBCR_BIT_REDUCTION_BYPASS, 0);
REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 0);
break;
case PIXEL_ENCODING_YCBCR422:
@@ -173,11 +181,17 @@ static void opp1_set_pixel_encoding(
FMT_CBCR_BIT_REDUCTION_BYPASS, 0);
break;
case PIXEL_ENCODING_YCBCR420:
- REG_UPDATE(FMT_CONTROL, FMT_PIXEL_ENCODING, 2);
+ REG_UPDATE_3(FMT_CONTROL,
+ FMT_PIXEL_ENCODING, 2,
+ FMT_SUBSAMPLING_MODE, 2,
+ FMT_CBCR_BIT_REDUCTION_BYPASS, 1);
break;
default:
break;
}
+
+ if (force_chroma_subsampling_1tap)
+ REG_UPDATE(FMT_CONTROL, FMT_SUBSAMPLING_MODE, 0);
}
/**
@@ -377,6 +391,7 @@ static const struct opp_funcs dcn10_opp_funcs = {
.opp_set_disp_pattern_generator = NULL,
.opp_program_dpg_dimensions = NULL,
.dpg_is_blanked = NULL,
+ .dpg_is_pending = NULL,
.opp_destroy = opp1_destroy
};
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h
index 2c0ecfa5a643..c87de68a509e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h
@@ -79,6 +79,8 @@
OPP_SF(FMT0_FMT_CONTROL, FMT_SPATIAL_DITHER_FRAME_COUNTER_MAX, mask_sh), \
OPP_SF(FMT0_FMT_CONTROL, FMT_SPATIAL_DITHER_FRAME_COUNTER_BIT_SWAP, mask_sh), \
OPP_SF(FMT0_FMT_CONTROL, FMT_PIXEL_ENCODING, mask_sh), \
+ OPP_SF(FMT0_FMT_CONTROL, FMT_SUBSAMPLING_MODE, mask_sh), \
+ OPP_SF(FMT0_FMT_CONTROL, FMT_CBCR_BIT_REDUCTION_BYPASS, mask_sh), \
OPP_SF(FMT0_FMT_CONTROL, FMT_STEREOSYNC_OVERRIDE, mask_sh), \
OPP_SF(FMT0_FMT_DITHER_RAND_R_SEED, FMT_RAND_R_SEED, mask_sh), \
OPP_SF(FMT0_FMT_DITHER_RAND_G_SEED, FMT_RAND_G_SEED, mask_sh), \
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c
index 0784d0198661..f5fe0cac7cb0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c
@@ -23,6 +23,7 @@
*
*/
+#include "core_types.h"
#include "dm_services.h"
#include "dcn20_opp.h"
#include "reg_helper.h"
@@ -337,19 +338,45 @@ bool opp2_dpg_is_blanked(struct output_pixel_processor *opp)
(double_buffer_pending == 0);
}
-void opp2_program_left_edge_extra_pixel (
+bool opp2_dpg_is_pending(struct output_pixel_processor *opp)
+{
+ struct dcn20_opp *oppn20 = TO_DCN20_OPP(opp);
+ uint32_t double_buffer_pending;
+ uint32_t dpg_en;
+
+ REG_GET(DPG_CONTROL, DPG_EN, &dpg_en);
+
+ REG_GET(DPG_STATUS, DPG_DOUBLE_BUFFER_PENDING, &double_buffer_pending);
+
+ return (dpg_en == 1 && double_buffer_pending == 1);
+}
+
+void opp2_program_left_edge_extra_pixel(
struct output_pixel_processor *opp,
- bool count)
+ enum dc_pixel_encoding pixel_encoding,
+ bool is_primary)
{
struct dcn20_opp *oppn20 = TO_DCN20_OPP(opp);
+ uint32_t count = opp2_get_left_edge_extra_pixel_count(opp, pixel_encoding, is_primary);
- /* Specifies the number of extra left edge pixels that are supplied to
+ /*
+ * Specifies the number of extra left edge pixels that are supplied to
* the 422 horizontal chroma sub-sample filter.
- * Note that when left edge pixel is not "0", fmt pixel encoding can be in either 420 or 422 mode
- * */
+ */
REG_UPDATE(FMT_422_CONTROL, FMT_LEFT_EDGE_EXTRA_PIXEL_COUNT, count);
}
+uint32_t opp2_get_left_edge_extra_pixel_count(struct output_pixel_processor *opp,
+ enum dc_pixel_encoding pixel_encoding, bool is_primary)
+{
+ if ((pixel_encoding == PIXEL_ENCODING_YCBCR422 || pixel_encoding == PIXEL_ENCODING_YCBCR420) &&
+ !opp->ctx->dc->debug.force_chroma_subsampling_1tap &&
+ !is_primary)
+ return 1;
+ else
+ return 0;
+}
+
/*****************************************/
/* Constructor, Destructor */
/*****************************************/
@@ -363,9 +390,11 @@ static struct opp_funcs dcn20_opp_funcs = {
.opp_set_disp_pattern_generator = opp2_set_disp_pattern_generator,
.opp_program_dpg_dimensions = opp2_program_dpg_dimensions,
.dpg_is_blanked = opp2_dpg_is_blanked,
+ .dpg_is_pending = opp2_dpg_is_pending,
.opp_dpg_set_blank_color = opp2_dpg_set_blank_color,
.opp_destroy = opp1_destroy,
.opp_program_left_edge_extra_pixel = opp2_program_left_edge_extra_pixel,
+ .opp_get_left_edge_extra_pixel_count = opp2_get_left_edge_extra_pixel_count,
};
void dcn20_opp_construct(struct dcn20_opp *oppn20,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h
index 3ab221bdd27d..34936e6c49f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h
+++ b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h
@@ -159,12 +159,16 @@ void opp2_program_dpg_dimensions(
bool opp2_dpg_is_blanked(struct output_pixel_processor *opp);
+bool opp2_dpg_is_pending(struct output_pixel_processor *opp);
+
void opp2_dpg_set_blank_color(
struct output_pixel_processor *opp,
const struct tg_color *color);
void opp2_program_left_edge_extra_pixel (
struct output_pixel_processor *opp,
- bool count);
+ enum dc_pixel_encoding pixel_encoding, bool is_primary);
+uint32_t opp2_get_left_edge_extra_pixel_count(struct output_pixel_processor *opp,
+ enum dc_pixel_encoding pixel_encoding, bool is_primary);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c
new file mode 100644
index 000000000000..3542b51c9aac
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn35_opp.h"
+#include "reg_helper.h"
+
+#define REG(reg) ((const struct dcn35_opp_registers *)(oppn20->regs))->reg
+
+#undef FN
+#define FN(reg_name, field_name) \
+ ((const struct dcn35_opp_shift *)(oppn20->opp_shift))->field_name, \
+ ((const struct dcn35_opp_mask *)(oppn20->opp_mask))->field_name
+
+#define CTX oppn20->base.ctx
+
+void dcn35_opp_construct(struct dcn20_opp *oppn20, struct dc_context *ctx,
+ uint32_t inst, const struct dcn35_opp_registers *regs,
+ const struct dcn35_opp_shift *opp_shift,
+ const struct dcn35_opp_mask *opp_mask)
+{
+ dcn20_opp_construct(oppn20, ctx, inst,
+ (const struct dcn20_opp_registers *)regs,
+ (const struct dcn20_opp_shift *)opp_shift,
+ (const struct dcn20_opp_mask *)opp_mask);
+}
+
+void dcn35_opp_set_fgcg(struct dcn20_opp *oppn20, bool enable)
+{
+ REG_UPDATE(OPP_TOP_CLK_CONTROL, OPP_FGCG_REP_DIS, !enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h
new file mode 100644
index 000000000000..a9a413527801
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/opp/dcn35/dcn35_opp.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DCN35_OPP_H
+#define __DCN35_OPP_H
+
+#include "dcn20/dcn20_opp.h"
+
+#define OPP_REG_VARIABLE_LIST_DCN3_5 \
+ OPP_REG_VARIABLE_LIST_DCN2_0; \
+ uint32_t OPP_TOP_CLK_CONTROL
+
+#define OPP_MASK_SH_LIST_DCN35(mask_sh) \
+ OPP_MASK_SH_LIST_DCN20(mask_sh), \
+ OPP_SF(OPP_TOP_CLK_CONTROL, OPP_FGCG_REP_DIS, mask_sh)
+
+#define OPP_DCN35_REG_FIELD_LIST(type) \
+ struct { \
+ OPP_DCN20_REG_FIELD_LIST(type); \
+ type OPP_FGCG_REP_DIS; \
+ }
+
+struct dcn35_opp_registers {
+ OPP_REG_VARIABLE_LIST_DCN3_5;
+};
+
+struct dcn35_opp_shift {
+ OPP_DCN35_REG_FIELD_LIST(uint8_t);
+};
+
+struct dcn35_opp_mask {
+ OPP_DCN35_REG_FIELD_LIST(uint32_t);
+};
+
+void dcn35_opp_construct(struct dcn20_opp *oppn20,
+ struct dc_context *ctx,
+ uint32_t inst,
+ const struct dcn35_opp_registers *regs,
+ const struct dcn35_opp_shift *opp_shift,
+ const struct dcn35_opp_mask *opp_mask);
+
+void dcn35_opp_set_fgcg(struct dcn20_opp *oppn20, bool enable);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/optc/Makefile b/drivers/gpu/drm/amd/display/dc/optc/Makefile
new file mode 100644
index 000000000000..29fb610c8660
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/Makefile
@@ -0,0 +1,114 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'optc' sub-component of DAL.
+#
+
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+OPTC_DCN10 = dcn10_optc.o
+
+AMD_DAL_OPTC_DCN10 = $(addprefix $(AMDDALPATH)/dc/optc/dcn10/,$(OPTC_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN10)
+
+###############################################################################
+
+OPTC_DCN20 = dcn20_optc.o
+
+AMD_DAL_OPTC_DCN20 = $(addprefix $(AMDDALPATH)/dc/optc/dcn20/,$(OPTC_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN20)
+
+###############################################################################
+
+OPTC_DCN201 = dcn201_optc.o
+
+AMD_DAL_OPTC_DCN201 = $(addprefix $(AMDDALPATH)/dc/optc/dcn201/,$(OPTC_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN201)
+
+###############################################################################
+
+###############################################################################
+
+###############################################################################
+
+OPTC_DCN30 = dcn30_optc.o
+
+AMD_DAL_OPTC_DCN30 = $(addprefix $(AMDDALPATH)/dc/optc/dcn30/,$(OPTC_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN30)
+
+###############################################################################
+
+OPTC_DCN301 = dcn301_optc.o
+
+AMD_DAL_OPTC_DCN301 = $(addprefix $(AMDDALPATH)/dc/optc/dcn301/,$(OPTC_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN301)
+
+###############################################################################
+
+OPTC_DCN31 = dcn31_optc.o
+
+AMD_DAL_OPTC_DCN31 = $(addprefix $(AMDDALPATH)/dc/optc/dcn31/,$(OPTC_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN31)
+
+###############################################################################
+
+OPTC_DCN314 = dcn314_optc.o
+
+AMD_DAL_OPTC_DCN314 = $(addprefix $(AMDDALPATH)/dc/optc/dcn314/,$(OPTC_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN314)
+
+###############################################################################
+
+OPTC_DCN32 = dcn32_optc.o
+
+AMD_DAL_OPTC_DCN32 = $(addprefix $(AMDDALPATH)/dc/optc/dcn32/,$(OPTC_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN32)
+
+###############################################################################
+
+OPTC_DCN35 = dcn35_optc.o
+
+AMD_DAL_OPTC_DCN35 = $(addprefix $(AMDDALPATH)/dc/optc/dcn35/,$(OPTC_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN35)
+
+###############################################################################
+
+###############################################################################
+OPTC_DCN401 = dcn401_optc.o
+
+AMD_DAL_OPTC_DCN401 = $(addprefix $(AMDDALPATH)/dc/optc/dcn401/,$(OPTC_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_OPTC_DCN401)
+endif
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
index 0e8f4f36c87c..6f7b0f816f2a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
@@ -65,7 +65,8 @@ void optc1_program_global_sync(
int vready_offset,
int vstartup_start,
int vupdate_offset,
- int vupdate_width)
+ int vupdate_width,
+ int pstate_keepout)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -73,6 +74,7 @@ void optc1_program_global_sync(
optc1->vstartup_start = vstartup_start;
optc1->vupdate_offset = vupdate_offset;
optc1->vupdate_width = vupdate_width;
+ optc1->pstate_keepout = pstate_keepout;
if (optc1->vstartup_start == 0) {
BREAK_TO_DEBUGGER();
@@ -146,6 +148,7 @@ void optc1_setup_vertical_interrupt2(
* @vstartup_start: Vstartup period.
* @vupdate_offset: Vupdate starting position.
* @vupdate_width: Vupdate duration.
+ * @pstate_keepout: determines low power mode timing during refresh
* @signal: DC signal types.
* @use_vbios: to program timings from BIOS command table.
*
@@ -157,6 +160,7 @@ void optc1_program_timing(
int vstartup_start,
int vupdate_offset,
int vupdate_width,
+ int pstate_keepout,
const enum signal_type signal,
bool use_vbios)
{
@@ -177,6 +181,7 @@ void optc1_program_timing(
optc1->vstartup_start = vstartup_start;
optc1->vupdate_offset = vupdate_offset;
optc1->vupdate_width = vupdate_width;
+ optc1->pstate_keepout = pstate_keepout;
patched_crtc_timing = *dc_crtc_timing;
apply_front_porch_workaround(&patched_crtc_timing);
optc1->orginal_patched_timing = patched_crtc_timing;
@@ -282,7 +287,8 @@ void optc1_program_timing(
vready_offset,
vstartup_start,
vupdate_offset,
- vupdate_width);
+ vupdate_width,
+ pstate_keepout);
optc->funcs->set_vtg_params(optc, dc_crtc_timing, true);
@@ -296,8 +302,7 @@ void optc1_program_timing(
/* Enable stereo - only when we need to pack 3D frame. Other types
* of stereo handled in explicit call
*/
-
- if (optc1_is_two_pixels_per_containter(&patched_crtc_timing) || optc1->opp_count == 2)
+ if (optc->funcs->is_two_pixels_per_container(&patched_crtc_timing) || optc1->opp_count == 2)
h_div = H_TIMING_DIV_BY2;
if (REG(OPTC_DATA_FORMAT_CONTROL) && optc1->tg_mask->OPTC_DATA_FORMAT != 0) {
@@ -1307,7 +1312,7 @@ bool optc1_get_hw_timing(struct timing_generator *tg,
if (tg == NULL || hw_crtc_timing == NULL)
return false;
- optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s);
+ optc1_read_otg_state(tg, &s);
hw_crtc_timing->h_total = s.h_total + 1;
hw_crtc_timing->h_addressable = s.h_total - ((s.h_total - s.h_blank_start) + s.h_blank_end);
@@ -1323,9 +1328,11 @@ bool optc1_get_hw_timing(struct timing_generator *tg,
}
-void optc1_read_otg_state(struct optc *optc1,
+void optc1_read_otg_state(struct timing_generator *optc,
struct dcn_otg_state *s)
{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
REG_GET(OTG_CONTROL,
OTG_MASTER_EN, &s->otg_enabled);
@@ -1383,6 +1390,9 @@ void optc1_read_otg_state(struct optc *optc1,
REG_GET(OTG_VERTICAL_INTERRUPT2_POSITION,
OTG_VERTICAL_INTERRUPT2_LINE_START, &s->vertical_interrupt2_line);
+
+ s->otg_master_update_lock = REG_READ(OTG_MASTER_UPDATE_LOCK);
+ s->otg_double_buffer_control = REG_READ(OTG_DOUBLE_BUFFER_CONTROL);
}
bool optc1_get_otg_active_size(struct timing_generator *optc,
@@ -1411,8 +1421,8 @@ bool optc1_get_otg_active_size(struct timing_generator *optc,
OTG_H_BLANK_START, &h_blank_start,
OTG_H_BLANK_END, &h_blank_end);
- *otg_active_width = v_blank_start - v_blank_end;
- *otg_active_height = h_blank_start - h_blank_end;
+ *otg_active_width = h_blank_start - h_blank_end;
+ *otg_active_height = v_blank_start - v_blank_end;
return true;
}
@@ -1462,37 +1472,71 @@ bool optc1_configure_crc(struct timing_generator *optc,
if (!optc1_is_tg_enabled(optc))
return false;
- REG_WRITE(OTG_CRC_CNTL, 0);
+ if (!params->enable || params->reset)
+ REG_WRITE(OTG_CRC_CNTL, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL,
- OTG_CRC0_WINDOWA_X_START, params->windowa_x_start,
- OTG_CRC0_WINDOWA_X_END, params->windowa_x_end);
-
- /* Window A y axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL,
- OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start,
- OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end);
-
- /* Window B x axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL,
- OTG_CRC0_WINDOWB_X_START, params->windowb_x_start,
- OTG_CRC0_WINDOWB_X_END, params->windowb_x_end);
-
- /* Window B y axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL,
- OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start,
- OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- REG_UPDATE_3(OTG_CRC_CNTL,
- OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
- OTG_CRC0_SELECT, params->selection,
- OTG_CRC_EN, 1);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL,
+ OTG_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL,
+ OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL,
+ OTG_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL,
+ OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable.*/
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC0_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL,
+ OTG_CRC1_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC1_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL,
+ OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL,
+ OTG_CRC1_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC1_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL,
+ OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable.*/
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC1_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ default:
+ return false;
+ }
return true;
}
@@ -1501,6 +1545,7 @@ bool optc1_configure_crc(struct timing_generator *optc,
* optc1_get_crc - Capture CRC result per component
*
* @optc: timing_generator instance.
+ * @idx: index of crc engine to get CRC from
* @r_cr: 16-bit primary CRC signature for red data.
* @g_y: 16-bit primary CRC signature for green data.
* @b_cb: 16-bit primary CRC signature for blue data.
@@ -1512,7 +1557,7 @@ bool optc1_configure_crc(struct timing_generator *optc,
* If CRC is disabled, return false; otherwise, return true, and the CRC
* results in the parameters.
*/
-bool optc1_get_crc(struct timing_generator *optc,
+bool optc1_get_crc(struct timing_generator *optc, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
uint32_t field = 0;
@@ -1524,18 +1569,55 @@ bool optc1_get_crc(struct timing_generator *optc,
if (!field)
return false;
- /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */
- REG_GET_2(OTG_CRC0_DATA_RG,
- CRC0_R_CR, r_cr,
- CRC0_G_Y, g_y);
+ switch (idx) {
+ case 0:
+ /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */
+ REG_GET_2(OTG_CRC0_DATA_RG,
+ CRC0_R_CR, r_cr,
+ CRC0_G_Y, g_y);
- /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */
- REG_GET(OTG_CRC0_DATA_B,
- CRC0_B_CB, b_cb);
+ /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */
+ REG_GET(OTG_CRC0_DATA_B,
+ CRC0_B_CB, b_cb);
+ break;
+ case 1:
+ /* OTG_CRC1_DATA_RG has the CRC16 results for the red and green component */
+ REG_GET_2(OTG_CRC1_DATA_RG,
+ CRC1_R_CR, r_cr,
+ CRC1_G_Y, g_y);
+
+ /* OTG_CRC1_DATA_B has the CRC16 results for the blue component */
+ REG_GET(OTG_CRC1_DATA_B,
+ CRC1_B_CB, b_cb);
+ break;
+ default:
+ return false;
+ }
return true;
}
+/* "Container" vs. "pixel" is a concept within HW blocks, mostly those closer to the back-end. It works like this:
+ *
+ * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 4:2:2 Simple) pixel rate is the same as
+ * container rate.
+ *
+ * - In 4:2:0 (DSC or uncompressed) there are two pixels per container, hence the target container rate has to be
+ * halved to maintain the correct pixel rate.
+ *
+ * - Unlike 4:2:2 uncompressed, DSC 4:2:2 Native also has two pixels per container (this happens when DSC is applied
+ * to it) and has to be treated the same as 4:2:0, i.e. target containter rate has to be halved in this case as well.
+ *
+ */
+bool optc1_is_two_pixels_per_container(const struct dc_crtc_timing *timing)
+{
+ bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
+
+ two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !timing->dsc_cfg.ycbcr422_simple);
+ return two_pix;
+}
+
static const struct timing_generator_funcs dcn10_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
@@ -1582,6 +1664,8 @@ static const struct timing_generator_funcs dcn10_tg_funcs = {
.program_manual_trigger = optc1_program_manual_trigger,
.setup_manual_trigger = optc1_setup_manual_trigger,
.get_hw_timing = optc1_get_hw_timing,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .read_otg_state = optc1_read_otg_state,
};
void dcn10_timing_generator_init(struct optc *optc1)
@@ -1597,25 +1681,3 @@ void dcn10_timing_generator_init(struct optc *optc1)
optc1->min_h_sync_width = 4;
optc1->min_v_sync_width = 1;
}
-
-/* "Containter" vs. "pixel" is a concept within HW blocks, mostly those closer to the back-end. It works like this:
- *
- * - In most of the formats (RGB or YCbCr 4:4:4, 4:2:2 uncompressed and DSC 4:2:2 Simple) pixel rate is the same as
- * containter rate.
- *
- * - In 4:2:0 (DSC or uncompressed) there are two pixels per container, hence the target container rate has to be
- * halved to maintain the correct pixel rate.
- *
- * - Unlike 4:2:2 uncompressed, DSC 4:2:2 Native also has two pixels per container (this happens when DSC is applied
- * to it) and has to be treated the same as 4:2:0, i.e. target containter rate has to be halved in this case as well.
- *
- */
-bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
-{
- bool two_pix = timing->pixel_encoding == PIXEL_ENCODING_YCBCR420;
-
- two_pix = two_pix || (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
- && !timing->dsc_cfg.ycbcr422_simple);
- return two_pix;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
index db766689af58..8b2a8455eb56 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
@@ -26,7 +26,7 @@
#ifndef __DC_TIMING_GENERATOR_DCN10_H__
#define __DC_TIMING_GENERATOR_DCN10_H__
-#include "timing_generator.h"
+#include "optc.h"
#define DCN10TG_FROM_TG(tg)\
container_of(tg, struct optc, base)
@@ -86,6 +86,12 @@
SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\
SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\
SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_DATA_RG, OTG, inst),\
+ SRI(OTG_CRC1_DATA_B, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\
SR(GSL_SOURCE_SELECT),\
SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\
SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst)
@@ -98,97 +104,115 @@
SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst)
+#define OPTC_REG_VARIABLE_LIST_DCN \
+ uint32_t OTG_GLOBAL_CONTROL1; \
+ uint32_t OTG_GLOBAL_CONTROL2; \
+ uint32_t OTG_VERT_SYNC_CONTROL; \
+ uint32_t OTG_MASTER_UPDATE_MODE; \
+ uint32_t OTG_GSL_CONTROL; \
+ uint32_t OTG_VSTARTUP_PARAM; \
+ uint32_t OTG_VUPDATE_PARAM; \
+ uint32_t OTG_VREADY_PARAM; \
+ uint32_t OTG_BLANK_CONTROL; \
+ uint32_t OTG_MASTER_UPDATE_LOCK; \
+ uint32_t OTG_GLOBAL_CONTROL0; \
+ uint32_t OTG_DOUBLE_BUFFER_CONTROL; \
+ uint32_t OTG_H_TOTAL; \
+ uint32_t OTG_H_BLANK_START_END; \
+ uint32_t OTG_H_SYNC_A; \
+ uint32_t OTG_H_SYNC_A_CNTL; \
+ uint32_t OTG_H_TIMING_CNTL; \
+ uint32_t OTG_V_TOTAL; \
+ uint32_t OTG_V_BLANK_START_END; \
+ uint32_t OTG_V_SYNC_A; \
+ uint32_t OTG_V_SYNC_A_CNTL; \
+ uint32_t OTG_INTERLACE_CONTROL; \
+ uint32_t OTG_CONTROL; \
+ uint32_t OTG_STEREO_CONTROL; \
+ uint32_t OTG_3D_STRUCTURE_CONTROL; \
+ uint32_t OTG_STEREO_STATUS; \
+ uint32_t OTG_V_TOTAL_MAX; \
+ uint32_t OTG_V_TOTAL_MID; \
+ uint32_t OTG_V_TOTAL_MIN; \
+ uint32_t OTG_V_TOTAL_CONTROL; \
+ uint32_t OTG_V_COUNT_STOP_CONTROL; \
+ uint32_t OTG_V_COUNT_STOP_CONTROL2; \
+ uint32_t OTG_TRIGA_CNTL; \
+ uint32_t OTG_TRIGA_MANUAL_TRIG; \
+ uint32_t OTG_MANUAL_FLOW_CONTROL; \
+ uint32_t OTG_FORCE_COUNT_NOW_CNTL; \
+ uint32_t OTG_STATIC_SCREEN_CONTROL; \
+ uint32_t OTG_STATUS_FRAME_COUNT; \
+ uint32_t OTG_STATUS; \
+ uint32_t OTG_STATUS_POSITION; \
+ uint32_t OTG_NOM_VERT_POSITION; \
+ uint32_t OTG_BLACK_COLOR; \
+ uint32_t OTG_TEST_PATTERN_PARAMETERS; \
+ uint32_t OTG_TEST_PATTERN_CONTROL; \
+ uint32_t OTG_TEST_PATTERN_COLOR; \
+ uint32_t OTG_CLOCK_CONTROL; \
+ uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL; \
+ uint32_t OTG_VERTICAL_INTERRUPT0_POSITION; \
+ uint32_t OTG_VERTICAL_INTERRUPT1_CONTROL; \
+ uint32_t OTG_VERTICAL_INTERRUPT1_POSITION; \
+ uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL; \
+ uint32_t OTG_VERTICAL_INTERRUPT2_POSITION; \
+ uint32_t OPTC_INPUT_CLOCK_CONTROL; \
+ uint32_t OPTC_DATA_SOURCE_SELECT; \
+ uint32_t OPTC_MEMORY_CONFIG; \
+ uint32_t OPTC_INPUT_GLOBAL_CONTROL; \
+ uint32_t CONTROL; \
+ uint32_t OTG_GSL_WINDOW_X; \
+ uint32_t OTG_GSL_WINDOW_Y; \
+ uint32_t OTG_VUPDATE_KEEPOUT; \
+ uint32_t OTG_CRC_CNTL; \
+ uint32_t OTG_CRC_CNTL2; \
+ uint32_t OTG_CRC0_DATA_RG; \
+ uint32_t OTG_CRC0_DATA_B; \
+ uint32_t OTG_CRC1_DATA_B; \
+ uint32_t OTG_CRC2_DATA_B; \
+ uint32_t OTG_CRC3_DATA_B; \
+ uint32_t OTG_CRC1_DATA_RG; \
+ uint32_t OTG_CRC2_DATA_RG; \
+ uint32_t OTG_CRC3_DATA_RG; \
+ uint32_t OTG_CRC0_WINDOWA_X_CONTROL; \
+ uint32_t OTG_CRC0_WINDOWA_Y_CONTROL; \
+ uint32_t OTG_CRC0_WINDOWB_X_CONTROL; \
+ uint32_t OTG_CRC0_WINDOWB_Y_CONTROL; \
+ uint32_t OTG_CRC1_WINDOWA_X_CONTROL; \
+ uint32_t OTG_CRC1_WINDOWA_Y_CONTROL; \
+ uint32_t OTG_CRC1_WINDOWB_X_CONTROL; \
+ uint32_t OTG_CRC1_WINDOWB_Y_CONTROL; \
+ uint32_t GSL_SOURCE_SELECT; \
+ uint32_t DWB_SOURCE_SELECT; \
+ uint32_t OTG_DSC_START_POSITION; \
+ uint32_t OPTC_DATA_FORMAT_CONTROL; \
+ uint32_t OPTC_BYTES_PER_PIXEL; \
+ uint32_t OPTC_WIDTH_CONTROL; \
+ uint32_t OTG_DRR_CONTROL; \
+ uint32_t OTG_BLANK_DATA_COLOR; \
+ uint32_t OTG_BLANK_DATA_COLOR_EXT; \
+ uint32_t OTG_DRR_TRIGGER_WINDOW; \
+ uint32_t OTG_M_CONST_DTO0; \
+ uint32_t OTG_M_CONST_DTO1; \
+ uint32_t OTG_DRR_V_TOTAL_CHANGE; \
+ uint32_t OTG_GLOBAL_CONTROL4; \
+ uint32_t OTG_CRC0_WINDOWA_X_CONTROL_READBACK; \
+ uint32_t OTG_CRC0_WINDOWA_Y_CONTROL_READBACK; \
+ uint32_t OTG_CRC0_WINDOWB_X_CONTROL_READBACK; \
+ uint32_t OTG_CRC0_WINDOWB_Y_CONTROL_READBACK; \
+ uint32_t OTG_CRC1_WINDOWA_X_CONTROL_READBACK; \
+ uint32_t OTG_CRC1_WINDOWA_Y_CONTROL_READBACK; \
+ uint32_t OTG_CRC1_WINDOWB_X_CONTROL_READBACK; \
+ uint32_t OTG_CRC1_WINDOWB_Y_CONTROL_READBACK; \
+ uint32_t OPTC_CLOCK_CONTROL; \
+ uint32_t OPTC_WIDTH_CONTROL2; \
+ uint32_t OTG_PSTATE_REGISTER; \
+ uint32_t OTG_PIPE_UPDATE_STATUS; \
+ uint32_t INTERRUPT_DEST
+
struct dcn_optc_registers {
- uint32_t OTG_GLOBAL_CONTROL1;
- uint32_t OTG_GLOBAL_CONTROL2;
- uint32_t OTG_VERT_SYNC_CONTROL;
- uint32_t OTG_MASTER_UPDATE_MODE;
- uint32_t OTG_GSL_CONTROL;
- uint32_t OTG_VSTARTUP_PARAM;
- uint32_t OTG_VUPDATE_PARAM;
- uint32_t OTG_VREADY_PARAM;
- uint32_t OTG_BLANK_CONTROL;
- uint32_t OTG_MASTER_UPDATE_LOCK;
- uint32_t OTG_GLOBAL_CONTROL0;
- uint32_t OTG_DOUBLE_BUFFER_CONTROL;
- uint32_t OTG_H_TOTAL;
- uint32_t OTG_H_BLANK_START_END;
- uint32_t OTG_H_SYNC_A;
- uint32_t OTG_H_SYNC_A_CNTL;
- uint32_t OTG_H_TIMING_CNTL;
- uint32_t OTG_V_TOTAL;
- uint32_t OTG_V_BLANK_START_END;
- uint32_t OTG_V_SYNC_A;
- uint32_t OTG_V_SYNC_A_CNTL;
- uint32_t OTG_INTERLACE_CONTROL;
- uint32_t OTG_CONTROL;
- uint32_t OTG_STEREO_CONTROL;
- uint32_t OTG_3D_STRUCTURE_CONTROL;
- uint32_t OTG_STEREO_STATUS;
- uint32_t OTG_V_TOTAL_MAX;
- uint32_t OTG_V_TOTAL_MID;
- uint32_t OTG_V_TOTAL_MIN;
- uint32_t OTG_V_TOTAL_CONTROL;
- uint32_t OTG_TRIGA_CNTL;
- uint32_t OTG_TRIGA_MANUAL_TRIG;
- uint32_t OTG_MANUAL_FLOW_CONTROL;
- uint32_t OTG_FORCE_COUNT_NOW_CNTL;
- uint32_t OTG_STATIC_SCREEN_CONTROL;
- uint32_t OTG_STATUS_FRAME_COUNT;
- uint32_t OTG_STATUS;
- uint32_t OTG_STATUS_POSITION;
- uint32_t OTG_NOM_VERT_POSITION;
- uint32_t OTG_BLACK_COLOR;
- uint32_t OTG_TEST_PATTERN_PARAMETERS;
- uint32_t OTG_TEST_PATTERN_CONTROL;
- uint32_t OTG_TEST_PATTERN_COLOR;
- uint32_t OTG_CLOCK_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT0_POSITION;
- uint32_t OTG_VERTICAL_INTERRUPT1_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT1_POSITION;
- uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL;
- uint32_t OTG_VERTICAL_INTERRUPT2_POSITION;
- uint32_t OPTC_INPUT_CLOCK_CONTROL;
- uint32_t OPTC_DATA_SOURCE_SELECT;
- uint32_t OPTC_MEMORY_CONFIG;
- uint32_t OPTC_INPUT_GLOBAL_CONTROL;
- uint32_t CONTROL;
- uint32_t OTG_GSL_WINDOW_X;
- uint32_t OTG_GSL_WINDOW_Y;
- uint32_t OTG_VUPDATE_KEEPOUT;
- uint32_t OTG_CRC_CNTL;
- uint32_t OTG_CRC_CNTL2;
- uint32_t OTG_CRC0_DATA_RG;
- uint32_t OTG_CRC0_DATA_B;
- uint32_t OTG_CRC1_DATA_B;
- uint32_t OTG_CRC2_DATA_B;
- uint32_t OTG_CRC3_DATA_B;
- uint32_t OTG_CRC1_DATA_RG;
- uint32_t OTG_CRC2_DATA_RG;
- uint32_t OTG_CRC3_DATA_RG;
- uint32_t OTG_CRC0_WINDOWA_X_CONTROL;
- uint32_t OTG_CRC0_WINDOWA_Y_CONTROL;
- uint32_t OTG_CRC0_WINDOWB_X_CONTROL;
- uint32_t OTG_CRC0_WINDOWB_Y_CONTROL;
- uint32_t OTG_CRC1_WINDOWA_X_CONTROL;
- uint32_t OTG_CRC1_WINDOWA_Y_CONTROL;
- uint32_t OTG_CRC1_WINDOWB_X_CONTROL;
- uint32_t OTG_CRC1_WINDOWB_Y_CONTROL;
- uint32_t GSL_SOURCE_SELECT;
- uint32_t DWB_SOURCE_SELECT;
- uint32_t OTG_DSC_START_POSITION;
- uint32_t OPTC_DATA_FORMAT_CONTROL;
- uint32_t OPTC_BYTES_PER_PIXEL;
- uint32_t OPTC_WIDTH_CONTROL;
- uint32_t OTG_DRR_CONTROL;
- uint32_t OTG_BLANK_DATA_COLOR;
- uint32_t OTG_BLANK_DATA_COLOR_EXT;
- uint32_t OTG_DRR_TRIGGER_WINDOW;
- uint32_t OTG_M_CONST_DTO0;
- uint32_t OTG_M_CONST_DTO1;
- uint32_t OTG_DRR_V_TOTAL_CHANGE;
- uint32_t OTG_GLOBAL_CONTROL4;
+ OPTC_REG_VARIABLE_LIST_DCN;
};
#define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\
@@ -301,6 +325,7 @@ struct dcn_optc_registers {
SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\
SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\
SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\
+ SF(OTG0_OTG_CRC_CNTL, OTG_CRC1_SELECT, mask_sh),\
SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\
SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\
SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\
@@ -313,13 +338,22 @@ struct dcn_optc_registers {
SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\
SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\
SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\
SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\
SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\
SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\
SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh)
-
-
#define TG_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\
TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\
SF(OTG0_OTG_TEST_PATTERN_PARAMETERS, OTG_TEST_PATTERN_INC0, mask_sh),\
@@ -470,6 +504,7 @@ struct dcn_optc_registers {
type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\
type OTG_CRC_CONT_EN;\
type OTG_CRC0_SELECT;\
+ type OTG_CRC1_SELECT;\
type OTG_CRC_EN;\
type CRC0_R_CR;\
type CRC0_G_Y;\
@@ -506,12 +541,15 @@ struct dcn_optc_registers {
type MANUAL_FLOW_CONTROL;\
type MANUAL_FLOW_CONTROL_SEL;
+#define V_TOTAL_REGS(type)
+
#define TG_REG_FIELD_LIST(type) \
TG_REG_FIELD_LIST_DCN1_0(type)\
type OTG_V_SYNC_MODE;\
type OTG_DRR_TRIGGER_WINDOW_START_X;\
type OTG_DRR_TRIGGER_WINDOW_END_X;\
type OTG_DRR_V_TOTAL_CHANGE_LIMIT;\
+ V_TOTAL_REGS(type)\
type OTG_OUT_MUX;\
type OTG_M_CONST_DTO_PHASE;\
type OTG_M_CONST_DTO_MODULO;\
@@ -548,206 +586,65 @@ struct dcn_optc_registers {
type OTG_CRC_DATA_STREAM_SPLIT_MODE;\
type OTG_CRC_DATA_FORMAT;\
type OTG_V_TOTAL_LAST_USED_BY_DRR;\
- type OTG_DRR_TIMING_DBUF_UPDATE_PENDING;
+ type OTG_DRR_TIMING_DBUF_UPDATE_PENDING;\
+ type OTG_H_TIMING_DIV_MODE_DB_UPDATE_PENDING;\
+ type OPTC_DOUBLE_BUFFER_PENDING;\
+
+#define TG_REG_FIELD_LIST_DCN2_0(type) \
+ type OTG_FLIP_PENDING;\
+ type OTG_DC_REG_UPDATE_PENDING;\
+ type OTG_CURSOR_UPDATE_PENDING;\
+ type OTG_VUPDATE_KEEPOUT_STATUS;\
+ type OTG0_IHC_OTG_VERTICAL_INTERRUPT2_DEST;
#define TG_REG_FIELD_LIST_DCN3_2(type) \
type OTG_H_TIMING_DIV_MODE_MANUAL;
+#define TG_REG_FIELD_LIST_DCN3_5(type) \
+ type OTG_CRC0_WINDOWA_X_START_READBACK;\
+ type OTG_CRC0_WINDOWA_X_END_READBACK;\
+ type OTG_CRC0_WINDOWA_Y_START_READBACK;\
+ type OTG_CRC0_WINDOWA_Y_END_READBACK;\
+ type OTG_CRC0_WINDOWB_X_START_READBACK;\
+ type OTG_CRC0_WINDOWB_X_END_READBACK;\
+ type OTG_CRC0_WINDOWB_Y_START_READBACK;\
+ type OTG_CRC0_WINDOWB_Y_END_READBACK; \
+ type OTG_CRC1_WINDOWA_X_START_READBACK;\
+ type OTG_CRC1_WINDOWA_X_END_READBACK;\
+ type OTG_CRC1_WINDOWA_Y_START_READBACK;\
+ type OTG_CRC1_WINDOWA_Y_END_READBACK;\
+ type OTG_CRC1_WINDOWB_X_START_READBACK;\
+ type OTG_CRC1_WINDOWB_X_END_READBACK;\
+ type OTG_CRC1_WINDOWB_Y_START_READBACK;\
+ type OTG_CRC1_WINDOWB_Y_END_READBACK;\
+ type OPTC_FGCG_REP_DIS;\
+ type OTG_V_COUNT_STOP;\
+ type OTG_V_COUNT_STOP_TIMER;
+
+#define TG_REG_FIELD_LIST_DCN401(type) \
+ type OPTC_SEGMENT_WIDTH_LAST;\
+ type OTG_PSTATE_KEEPOUT_START;\
+ type OTG_PSTATE_EXTEND;\
+ type OTG_UNBLANK;\
+ type OTG_PSTATE_ALLOW_WIDTH_MIN;
+
struct dcn_optc_shift {
TG_REG_FIELD_LIST(uint8_t)
+ TG_REG_FIELD_LIST_DCN2_0(uint8_t)
TG_REG_FIELD_LIST_DCN3_2(uint8_t)
+ TG_REG_FIELD_LIST_DCN3_5(uint8_t)
+ TG_REG_FIELD_LIST_DCN401(uint8_t)
};
struct dcn_optc_mask {
TG_REG_FIELD_LIST(uint32_t)
+ TG_REG_FIELD_LIST_DCN2_0(uint32_t)
TG_REG_FIELD_LIST_DCN3_2(uint32_t)
-};
-
-struct optc {
- struct timing_generator base;
-
- const struct dcn_optc_registers *tg_regs;
- const struct dcn_optc_shift *tg_shift;
- const struct dcn_optc_mask *tg_mask;
-
- int opp_count;
-
- uint32_t max_h_total;
- uint32_t max_v_total;
-
- uint32_t min_h_blank;
-
- uint32_t min_h_sync_width;
- uint32_t min_v_sync_width;
- uint32_t min_v_blank;
- uint32_t min_v_blank_interlace;
-
- int vstartup_start;
- int vupdate_offset;
- int vupdate_width;
- int vready_offset;
- struct dc_crtc_timing orginal_patched_timing;
- enum signal_type signal;
+ TG_REG_FIELD_LIST_DCN3_5(uint32_t)
+ TG_REG_FIELD_LIST_DCN401(uint32_t)
};
void dcn10_timing_generator_init(struct optc *optc);
-struct dcn_otg_state {
- uint32_t v_blank_start;
- uint32_t v_blank_end;
- uint32_t v_sync_a_pol;
- uint32_t v_total;
- uint32_t v_total_max;
- uint32_t v_total_min;
- uint32_t v_total_min_sel;
- uint32_t v_total_max_sel;
- uint32_t v_sync_a_start;
- uint32_t v_sync_a_end;
- uint32_t h_blank_start;
- uint32_t h_blank_end;
- uint32_t h_sync_a_start;
- uint32_t h_sync_a_end;
- uint32_t h_sync_a_pol;
- uint32_t h_total;
- uint32_t underflow_occurred_status;
- uint32_t otg_enabled;
- uint32_t blank_enabled;
- uint32_t vertical_interrupt1_en;
- uint32_t vertical_interrupt1_line;
- uint32_t vertical_interrupt2_en;
- uint32_t vertical_interrupt2_line;
-};
-
-void optc1_read_otg_state(struct optc *optc1,
- struct dcn_otg_state *s);
-
-bool optc1_get_hw_timing(struct timing_generator *tg,
- struct dc_crtc_timing *hw_crtc_timing);
-
-bool optc1_validate_timing(
- struct timing_generator *optc,
- const struct dc_crtc_timing *timing);
-
-void optc1_program_timing(
- struct timing_generator *optc,
- const struct dc_crtc_timing *dc_crtc_timing,
- int vready_offset,
- int vstartup_start,
- int vupdate_offset,
- int vupdate_width,
- const enum signal_type signal,
- bool use_vbios);
-
-void optc1_setup_vertical_interrupt0(
- struct timing_generator *optc,
- uint32_t start_line,
- uint32_t end_line);
-void optc1_setup_vertical_interrupt1(
- struct timing_generator *optc,
- uint32_t start_line);
-void optc1_setup_vertical_interrupt2(
- struct timing_generator *optc,
- uint32_t start_line);
-
-void optc1_program_global_sync(
- struct timing_generator *optc,
- int vready_offset,
- int vstartup_start,
- int vupdate_offset,
- int vupdate_width);
-
-bool optc1_disable_crtc(struct timing_generator *optc);
-
-bool optc1_is_counter_moving(struct timing_generator *optc);
-
-void optc1_get_position(struct timing_generator *optc,
- struct crtc_position *position);
-
-uint32_t optc1_get_vblank_counter(struct timing_generator *optc);
-
-void optc1_get_crtc_scanoutpos(
- struct timing_generator *optc,
- uint32_t *v_blank_start,
- uint32_t *v_blank_end,
- uint32_t *h_position,
- uint32_t *v_position);
-
-void optc1_set_early_control(
- struct timing_generator *optc,
- uint32_t early_cntl);
-
-void optc1_wait_for_state(struct timing_generator *optc,
- enum crtc_state state);
-
-void optc1_set_blank(struct timing_generator *optc,
- bool enable_blanking);
-
-bool optc1_is_blanked(struct timing_generator *optc);
-
-void optc1_program_blank_color(
- struct timing_generator *optc,
- const struct tg_color *black_color);
-
-bool optc1_did_triggered_reset_occur(
- struct timing_generator *optc);
-
-void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst);
-
-void optc1_disable_reset_trigger(struct timing_generator *optc);
-
-void optc1_lock(struct timing_generator *optc);
-
-void optc1_unlock(struct timing_generator *optc);
-
-void optc1_enable_optc_clock(struct timing_generator *optc, bool enable);
-
-void optc1_set_drr(
- struct timing_generator *optc,
- const struct drr_params *params);
-
-void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
-
-void optc1_set_static_screen_control(
- struct timing_generator *optc,
- uint32_t event_triggers,
- uint32_t num_frames);
-
-void optc1_program_stereo(struct timing_generator *optc,
- const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
-
-bool optc1_is_stereo_left_eye(struct timing_generator *optc);
-
-void optc1_clear_optc_underflow(struct timing_generator *optc);
-
-void optc1_tg_init(struct timing_generator *optc);
-
-bool optc1_is_tg_enabled(struct timing_generator *optc);
-
-bool optc1_is_optc_underflow_occurred(struct timing_generator *optc);
-
-void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable);
-
-void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable);
-
-bool optc1_get_otg_active_size(struct timing_generator *optc,
- uint32_t *otg_active_width,
- uint32_t *otg_active_height);
-
-void optc1_enable_crtc_reset(
- struct timing_generator *optc,
- int source_tg_inst,
- struct crtc_trigger_info *crtc_tp);
-
-bool optc1_configure_crc(struct timing_generator *optc,
- const struct crc_params *params);
-
-bool optc1_get_crc(struct timing_generator *optc,
- uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
-
-bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
-
-void optc1_set_vtg_params(struct timing_generator *optc,
- const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2);
-
#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
index 58bdbd859bf9..e7a90a437fff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.c
@@ -158,13 +158,6 @@ void optc2_get_dsc_status(struct timing_generator *optc,
OPTC_DSC_MODE, dsc_mode);
}
-
-/*TEMP: Need to figure out inheritance model here.*/
-bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
-{
- return optc1_is_two_pixels_per_containter(timing);
-}
-
void optc2_set_odm_bypass(struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing)
{
@@ -177,7 +170,7 @@ void optc2_set_odm_bypass(struct timing_generator *optc,
OPTC_SEG1_SRC_SEL, 0xf);
REG_WRITE(OTG_H_TIMING_CNTL, 0);
- h_div_2 = optc2_is_two_pixels_per_containter(dc_crtc_timing);
+ h_div_2 = optc->funcs->is_two_pixels_per_container(dc_crtc_timing);
REG_UPDATE(OTG_H_TIMING_CNTL,
OTG_H_TIMING_DIV_BY2, h_div_2);
REG_SET(OPTC_MEMORY_CONFIG, 0,
@@ -186,11 +179,9 @@ void optc2_set_odm_bypass(struct timing_generator *optc,
}
void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing)
+ int segment_width, int last_segment_width)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
- int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right)
- / opp_cnt;
uint32_t memory_mask;
ASSERT(opp_cnt == 2);
@@ -220,7 +211,7 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c
OPTC_SEG1_SRC_SEL, opp_id[1]);
REG_UPDATE(OPTC_WIDTH_CONTROL,
- OPTC_SEGMENT_WIDTH, mpcc_hactive);
+ OPTC_SEGMENT_WIDTH, segment_width);
REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_BY2, 1);
optc1->opp_count = opp_cnt;
@@ -511,7 +502,7 @@ void optc2_get_last_used_drr_vtotal(struct timing_generator *optc, uint32_t *ref
REG_GET(OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, refresh_rate);
}
-static struct timing_generator_funcs dcn20_tg_funcs = {
+static const struct timing_generator_funcs dcn20_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
@@ -570,6 +561,8 @@ static struct timing_generator_funcs dcn20_tg_funcs = {
.setup_manual_trigger = optc2_setup_manual_trigger,
.get_hw_timing = optc1_get_hw_timing,
.align_vblanks = optc2_align_vblanks,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .read_otg_state = optc1_read_otg_state,
};
void dcn20_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
index f7968b9ca16e..928e110b95fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h
@@ -26,7 +26,7 @@
#ifndef __DC_OPTC_DCN20_H__
#define __DC_OPTC_DCN20_H__
-#include "../dcn10/dcn10_optc.h"
+#include "dcn10/dcn10_optc.h"
#define TG_COMMON_REG_LIST_DCN2_0(inst) \
TG_COMMON_REG_LIST_DCN(inst),\
@@ -43,7 +43,8 @@
SRI(OPTC_MEMORY_CONFIG, ODM, inst),\
SR(DWB_SOURCE_SELECT),\
SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst), \
- SRI(OTG_DRR_CONTROL, OTG, inst)
+ SRI(OTG_DRR_CONTROL, OTG, inst),\
+ SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst)
#define TG_COMMON_MASK_SH_LIST_DCN2_0(mask_sh)\
TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\
@@ -53,6 +54,10 @@
SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\
SF(OTG0_OTG_GLOBAL_CONTROL2, DIG_UPDATE_LOCATION, mask_sh),\
SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\
SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\
SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \
SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\
@@ -105,7 +110,7 @@ void optc2_set_odm_bypass(struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing);
void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing);
+ int segment_width, int last_segment_width);
void optc2_get_optc_source(struct timing_generator *optc,
uint32_t *num_of_src_opp,
@@ -118,7 +123,6 @@ void optc2_lock_doublebuffer_disable(struct timing_generator *optc);
void optc2_lock_doublebuffer_enable(struct timing_generator *optc);
void optc2_setup_manual_trigger(struct timing_generator *optc);
void optc2_program_manual_trigger(struct timing_generator *optc);
-bool optc2_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
bool optc2_configure_crc(struct timing_generator *optc,
const struct crc_params *params);
#endif /* __DC_OPTC_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
index 70fcbec03fb6..772a8bfb949c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.c
@@ -38,12 +38,6 @@
#define FN(reg_name, field_name) \
optc1->tg_shift->field_name, optc1->tg_mask->field_name
-/*TEMP: Need to figure out inheritance model here.*/
-bool optc201_is_two_pixels_per_containter(const struct dc_crtc_timing *timing)
-{
- return optc1_is_two_pixels_per_containter(timing);
-}
-
static void optc201_triplebuffer_lock(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -135,7 +129,7 @@ static void optc201_get_optc_source(struct timing_generator *optc,
*num_of_src_opp = 1;
}
-static struct timing_generator_funcs dcn201_tg_funcs = {
+static const struct timing_generator_funcs dcn201_tg_funcs = {
.validate_timing = optc201_validate_timing,
.program_timing = optc1_program_timing,
.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
@@ -185,6 +179,8 @@ static struct timing_generator_funcs dcn201_tg_funcs = {
.program_manual_trigger = optc2_program_manual_trigger,
.setup_manual_trigger = optc2_setup_manual_trigger,
.get_hw_timing = optc1_get_hw_timing,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .read_otg_state = optc1_read_otg_state,
};
void dcn201_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
index e9545b73513a..a9b281abfd44 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn201/dcn201_optc.h
@@ -68,7 +68,4 @@
SF(DWB_SOURCE_SELECT, OPTC_DWB1_SOURCE_SELECT, mask_sh)
void dcn201_timing_generator_init(struct optc *optc);
-
-bool optc201_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
-
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
index 5bf4d0aa6230..ee4665aa49e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c
@@ -206,8 +206,8 @@ void optc3_set_odm_bypass(struct timing_generator *optc,
OPTC_SEG3_SRC_SEL, 0xf
);
- h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing);
- REG_SET(OTG_H_TIMING_CNTL, 0,
+ h_div = optc->funcs->is_two_pixels_per_container(dc_crtc_timing);
+ REG_UPDATE(OTG_H_TIMING_CNTL,
OTG_H_TIMING_DIV_MODE, h_div);
REG_SET(OPTC_MEMORY_CONFIG, 0,
@@ -216,11 +216,9 @@ void optc3_set_odm_bypass(struct timing_generator *optc,
}
void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing)
+ int segment_width, int last_segment_width)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
- int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right)
- / opp_cnt;
uint32_t memory_mask = 0;
/* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic
@@ -267,12 +265,54 @@ void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c
}
REG_UPDATE(OPTC_WIDTH_CONTROL,
- OPTC_SEGMENT_WIDTH, mpcc_hactive);
+ OPTC_SEGMENT_WIDTH, segment_width);
REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
optc1->opp_count = opp_cnt;
}
+/* OTG status register that indicates OPTC update is pending */
+bool optc3_get_optc_double_buffer_pending(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t update_pending = 0;
+
+ REG_GET(OPTC_INPUT_GLOBAL_CONTROL,
+ OPTC_DOUBLE_BUFFER_PENDING,
+ &update_pending);
+
+ return (update_pending == 1);
+}
+
+/* OTG status register that indicates OTG update is pending */
+bool optc3_get_otg_update_pending(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t update_pending = 0;
+
+ REG_GET(OTG_DOUBLE_BUFFER_CONTROL,
+ OTG_UPDATE_PENDING,
+ &update_pending);
+
+ return (update_pending == 1);
+}
+
+/* OTG status register that indicates surface update is pending */
+bool optc3_get_pipe_update_pending(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t flip_pending = 0;
+ uint32_t dc_update_pending = 0;
+
+ REG_GET_2(OTG_PIPE_UPDATE_STATUS,
+ OTG_FLIP_PENDING,
+ &flip_pending,
+ OTG_DC_REG_UPDATE_PENDING,
+ &dc_update_pending);
+
+ return (flip_pending == 1 || dc_update_pending == 1);
+}
+
/**
* optc3_set_timing_double_buffer() - DRR double buffering control
*
@@ -317,7 +357,7 @@ void optc3_tg_init(struct timing_generator *optc)
optc1_clear_optc_underflow(optc);
}
-static struct timing_generator_funcs dcn30_tg_funcs = {
+static const struct timing_generator_funcs dcn30_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
@@ -376,6 +416,11 @@ static struct timing_generator_funcs dcn30_tg_funcs = {
.setup_manual_trigger = optc2_setup_manual_trigger,
.get_hw_timing = optc1_get_hw_timing,
.wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending,
+ .get_otg_double_buffer_pending = optc3_get_otg_update_pending,
+ .get_pipe_update_pending = optc3_get_pipe_update_pending,
+ .read_otg_state = optc1_read_otg_state,
};
void dcn30_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
index d3a056c12b0d..e2303f9eaf13 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h
@@ -109,7 +109,8 @@
SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\
SRI(OPTC_WIDTH_CONTROL, ODM, inst),\
SRI(OPTC_MEMORY_CONFIG, ODM, inst),\
- SR(DWB_SOURCE_SELECT)
+ SR(DWB_SOURCE_SELECT),\
+ SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst)
#define DCN30_VTOTAL_REGS_SF(mask_sh)
@@ -209,6 +210,7 @@
SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\
SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\
SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\
+ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_DOUBLE_BUFFER_PENDING, mask_sh),\
SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\
SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\
SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\
@@ -319,7 +321,11 @@
SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\
SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\
SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, mask_sh),\
- SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh)
+ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\
void dcn30_timing_generator_init(struct optc *optc1);
@@ -352,8 +358,11 @@ void optc3_set_timing_db_mode(struct timing_generator *optc, bool enable);
void optc3_set_odm_bypass(struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing);
void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing);
+ int segment_width, int last_segment_width);
void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc);
void optc3_tg_init(struct timing_generator *optc);
void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
+bool optc3_get_optc_double_buffer_pending(struct timing_generator *optc);
+bool optc3_get_otg_update_pending(struct timing_generator *optc);
+bool optc3_get_pipe_update_pending(struct timing_generator *optc);
#endif /* __DC_OPTC_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
index b3cfcb887905..38f85bc2681a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c
@@ -109,7 +109,7 @@ void optc301_setup_manual_trigger(struct timing_generator *optc)
OTG_TRIGA_CLEAR, 1);
}
-static struct timing_generator_funcs dcn30_tg_funcs = {
+static const struct timing_generator_funcs dcn30_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
@@ -168,6 +168,11 @@ static struct timing_generator_funcs dcn30_tg_funcs = {
.setup_manual_trigger = optc301_setup_manual_trigger,
.get_hw_timing = optc1_get_hw_timing,
.wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending,
+ .get_otg_double_buffer_pending = optc3_get_otg_update_pending,
+ .get_pipe_update_pending = optc3_get_pipe_update_pending,
+ .read_otg_state = optc1_read_otg_state,
};
void dcn301_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
index b49585682a15..b49585682a15 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
index 63a677c8ee27..4f1830ba619f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c
@@ -41,13 +41,11 @@
optc1->tg_shift->field_name, optc1->tg_mask->field_name
static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing)
+ int segment_width, int last_segment_width)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
- int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right)
- / opp_cnt;
uint32_t memory_mask = 0;
- int mem_count_per_opp = (mpcc_hactive + 2559) / 2560;
+ int mem_count_per_opp = (segment_width + 2559) / 2560;
/* Assume less than 6 pipes */
if (opp_cnt == 4) {
@@ -85,7 +83,7 @@ static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, i
}
REG_UPDATE(OPTC_WIDTH_CONTROL,
- OPTC_SEGMENT_WIDTH, mpcc_hactive);
+ OPTC_SEGMENT_WIDTH, segment_width);
REG_SET(OTG_H_TIMING_CNTL, 0, OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
optc1->opp_count = opp_cnt;
@@ -123,6 +121,17 @@ static bool optc31_enable_crtc(struct timing_generator *optc)
static bool optc31_disable_crtc(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, 0xf,
+ OPTC_SEG1_SRC_SEL, 0xf,
+ OPTC_SEG2_SRC_SEL, 0xf,
+ OPTC_SEG3_SRC_SEL, 0xf,
+ OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
+ REG_UPDATE(OPTC_MEMORY_CONFIG,
+ OPTC_MEM_SEL, 0);
+
/* disable otg request until end of the first line
* in the vertical blank region
*/
@@ -140,7 +149,9 @@ static bool optc31_disable_crtc(struct timing_generator *optc)
return true;
}
-
+/*
+ * Immediate_Disable_Crtc - this is to temp disable Timing generator without reset ODM.
+ */
bool optc31_immediate_disable_crtc(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -153,10 +164,12 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc)
VTG0_ENABLE, 0);
/* CRTC disabled, so disable clock. */
- REG_WAIT(OTG_CLOCK_CONTROL,
+ if (optc->ctx->dce_environment != DCE_ENV_DIAG)
+ REG_WAIT(OTG_CLOCK_CONTROL,
OTG_BUSY, 0,
1, 100000);
+
/* clear the false state */
optc1_clear_optc_underflow(optc);
@@ -232,7 +245,77 @@ void optc3_init_odm(struct timing_generator *optc)
optc1->opp_count = 1;
}
-static struct timing_generator_funcs dcn31_tg_funcs = {
+void optc31_read_otg_state(struct timing_generator *optc,
+ struct dcn_otg_state *s)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_GET(OTG_CONTROL,
+ OTG_MASTER_EN, &s->otg_enabled);
+
+ REG_GET_2(OTG_V_BLANK_START_END,
+ OTG_V_BLANK_START, &s->v_blank_start,
+ OTG_V_BLANK_END, &s->v_blank_end);
+
+ REG_GET(OTG_V_SYNC_A_CNTL,
+ OTG_V_SYNC_A_POL, &s->v_sync_a_pol);
+
+ REG_GET(OTG_V_TOTAL,
+ OTG_V_TOTAL, &s->v_total);
+
+ REG_GET(OTG_V_TOTAL_MAX,
+ OTG_V_TOTAL_MAX, &s->v_total_max);
+
+ REG_GET(OTG_V_TOTAL_MIN,
+ OTG_V_TOTAL_MIN, &s->v_total_min);
+
+ REG_GET(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MAX_SEL, &s->v_total_max_sel);
+
+ REG_GET(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, &s->v_total_min_sel);
+
+ REG_GET_2(OTG_V_SYNC_A,
+ OTG_V_SYNC_A_START, &s->v_sync_a_start,
+ OTG_V_SYNC_A_END, &s->v_sync_a_end);
+
+ REG_GET_2(OTG_H_BLANK_START_END,
+ OTG_H_BLANK_START, &s->h_blank_start,
+ OTG_H_BLANK_END, &s->h_blank_end);
+
+ REG_GET_2(OTG_H_SYNC_A,
+ OTG_H_SYNC_A_START, &s->h_sync_a_start,
+ OTG_H_SYNC_A_END, &s->h_sync_a_end);
+
+ REG_GET(OTG_H_SYNC_A_CNTL,
+ OTG_H_SYNC_A_POL, &s->h_sync_a_pol);
+
+ REG_GET(OTG_H_TOTAL,
+ OTG_H_TOTAL, &s->h_total);
+
+ REG_GET(OPTC_INPUT_GLOBAL_CONTROL,
+ OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status);
+
+ REG_GET(OTG_VERTICAL_INTERRUPT1_CONTROL,
+ OTG_VERTICAL_INTERRUPT1_INT_ENABLE, &s->vertical_interrupt1_en);
+
+ REG_GET(OTG_VERTICAL_INTERRUPT1_POSITION,
+ OTG_VERTICAL_INTERRUPT1_LINE_START, &s->vertical_interrupt1_line);
+
+ REG_GET(OTG_VERTICAL_INTERRUPT2_CONTROL,
+ OTG_VERTICAL_INTERRUPT2_INT_ENABLE, &s->vertical_interrupt2_en);
+
+ REG_GET(OTG_VERTICAL_INTERRUPT2_POSITION,
+ OTG_VERTICAL_INTERRUPT2_LINE_START, &s->vertical_interrupt2_line);
+
+ REG_GET(INTERRUPT_DEST,
+ OTG0_IHC_OTG_VERTICAL_INTERRUPT2_DEST, &s->vertical_interrupt2_dest);
+
+ s->otg_master_update_lock = REG_READ(OTG_MASTER_UPDATE_LOCK);
+ s->otg_double_buffer_control = REG_READ(OTG_DOUBLE_BUFFER_CONTROL);
+}
+
+static const struct timing_generator_funcs dcn31_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
@@ -292,6 +375,8 @@ static struct timing_generator_funcs dcn31_tg_funcs = {
.setup_manual_trigger = optc2_setup_manual_trigger,
.get_hw_timing = optc1_get_hw_timing,
.init_odm = optc3_init_odm,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .read_otg_state = optc31_read_otg_state,
};
void dcn31_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
index 30b81a448ce2..0f72c274f40b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h
@@ -99,7 +99,9 @@
SRI(OPTC_MEMORY_CONFIG, ODM, inst),\
SRI(OTG_CRC_CNTL2, OTG, inst),\
SR(DWB_SOURCE_SELECT),\
- SRI(OTG_DRR_CONTROL, OTG, inst)
+ SRI(OTG_DRR_CONTROL, OTG, inst),\
+ SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst),\
+ SRI(INTERRUPT_DEST, OTG, inst)
#define OPTC_COMMON_MASK_SH_LIST_DCN3_1(mask_sh)\
SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\
@@ -254,7 +256,12 @@
SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\
SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\
SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\
- SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh)
+ SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\
+ SF(OTG0_INTERRUPT_DEST, OTG0_IHC_OTG_VERTICAL_INTERRUPT2_DEST, mask_sh)
void dcn31_timing_generator_init(struct optc *optc1);
@@ -264,4 +271,7 @@ void optc31_set_drr(struct timing_generator *optc, const struct drr_params *para
void optc3_init_odm(struct timing_generator *optc);
+void optc31_read_otg_state(struct timing_generator *optc,
+ struct dcn_otg_state *s);
+
#endif /* __DC_OPTC_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
index 0086cafb0f7a..4a2caca37255 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.c
@@ -48,12 +48,11 @@
*/
static void optc314_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing)
+ int segment_width, int last_segment_width)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
uint32_t memory_mask = 0;
- int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right;
- int mpcc_hactive = h_active / opp_cnt;
+ int h_active = segment_width * opp_cnt;
/* Each memory instance is 2048x(314x2) bits to support half line of 4096 */
int odm_mem_count = (h_active + 2047) / 2048;
@@ -96,7 +95,7 @@ static void optc314_set_odm_combine(struct timing_generator *optc, int *opp_id,
}
REG_UPDATE(OPTC_WIDTH_CONTROL,
- OPTC_SEGMENT_WIDTH, mpcc_hactive);
+ OPTC_SEGMENT_WIDTH, segment_width);
REG_UPDATE(OTG_H_TIMING_CNTL,
OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
@@ -175,7 +174,7 @@ static void optc314_set_odm_bypass(struct timing_generator *optc,
OPTC_SEG3_SRC_SEL, 0xf
);
- h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing);
+ h_div = optc->funcs->is_two_pixels_per_container(dc_crtc_timing);
REG_UPDATE(OTG_H_TIMING_CNTL,
OTG_H_TIMING_DIV_MODE, h_div);
@@ -193,7 +192,7 @@ static void optc314_set_h_timing_div_manual_mode(struct timing_generator *optc,
}
-static struct timing_generator_funcs dcn314_tg_funcs = {
+static const struct timing_generator_funcs dcn314_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
@@ -255,6 +254,8 @@ static struct timing_generator_funcs dcn314_tg_funcs = {
.set_odm_bypass = optc314_set_odm_bypass,
.set_odm_combine = optc314_set_odm_combine,
.set_h_timing_div_manual_mode = optc314_set_h_timing_div_manual_mode,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .read_otg_state = optc31_read_otg_state,
};
void dcn314_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
index 99c098e76116..6bfdee3fcf5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h
@@ -98,7 +98,9 @@
SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\
SRI(OPTC_WIDTH_CONTROL, ODM, inst),\
SRI(OPTC_MEMORY_CONFIG, ODM, inst),\
- SRI(OTG_DRR_CONTROL, OTG, inst)
+ SRI(OTG_DRR_CONTROL, OTG, inst),\
+ SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst),\
+ SRI(INTERRUPT_DEST, OTG, inst)
#define OPTC_COMMON_MASK_SH_LIST_DCN3_14(mask_sh)\
SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\
@@ -248,7 +250,12 @@
SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\
SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\
SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\
- SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh)
+ SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\
+ SF(OTG0_INTERRUPT_DEST, OTG0_IHC_OTG_VERTICAL_INTERRUPT2_DEST, mask_sh)
void dcn314_timing_generator_init(struct optc *optc1);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index 8abb94f60078..b2b226bcd871 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -43,12 +43,11 @@
optc1->tg_shift->field_name, optc1->tg_mask->field_name
static void optc32_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
- struct dc_crtc_timing *timing)
+ int segment_width, int last_segment_width)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
uint32_t memory_mask = 0;
- int h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right;
- int mpcc_hactive = h_active / opp_cnt;
+ int h_active = segment_width * opp_cnt;
/* Each memory instance is 2048x(32x2) bits to support half line of 4096 */
int odm_mem_count = (h_active + 2047) / 2048;
@@ -91,13 +90,44 @@ static void optc32_set_odm_combine(struct timing_generator *optc, int *opp_id, i
}
REG_UPDATE(OPTC_WIDTH_CONTROL,
- OPTC_SEGMENT_WIDTH, mpcc_hactive);
+ OPTC_SEGMENT_WIDTH, segment_width);
REG_UPDATE(OTG_H_TIMING_CNTL,
OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
optc1->opp_count = opp_cnt;
}
+void optc32_get_odm_combine_segments(struct timing_generator *tg, int *odm_combine_segments)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(tg);
+ int segments;
+
+ REG_GET(OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, &segments);
+
+ switch (segments) {
+ case 0:
+ *odm_combine_segments = 1;
+ break;
+ case 1:
+ *odm_combine_segments = 2;
+ break;
+ case 3:
+ *odm_combine_segments = 4;
+ break;
+ /* 2 is reserved */
+ case 2:
+ default:
+ *odm_combine_segments = -1;
+ }
+}
+
+void optc32_wait_odm_doublebuffer_pending_clear(struct timing_generator *tg)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(tg);
+
+ REG_WAIT(OTG_DOUBLE_BUFFER_CONTROL, OTG_H_TIMING_DIV_MODE_DB_UPDATE_PENDING, 0, 2, 50000);
+}
+
void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -142,6 +172,16 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, 0xf,
+ OPTC_SEG1_SRC_SEL, 0xf,
+ OPTC_SEG2_SRC_SEL, 0xf,
+ OPTC_SEG3_SRC_SEL, 0xf,
+ OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
+ REG_UPDATE(OPTC_MEMORY_CONFIG,
+ OPTC_MEM_SEL, 0);
+
/* disable otg request until end of the first line
* in the vertical blank region
*/
@@ -174,10 +214,17 @@ static void optc32_disable_phantom_otg(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, 0xf,
+ OPTC_SEG1_SRC_SEL, 0xf,
+ OPTC_SEG2_SRC_SEL, 0xf,
+ OPTC_SEG3_SRC_SEL, 0xf,
+ OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0);
}
-static void optc32_set_odm_bypass(struct timing_generator *optc,
+void optc32_set_odm_bypass(struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -191,7 +238,7 @@ static void optc32_set_odm_bypass(struct timing_generator *optc,
OPTC_SEG3_SRC_SEL, 0xf
);
- h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing);
+ h_div = optc->funcs->is_two_pixels_per_container(dc_crtc_timing);
REG_UPDATE(OTG_H_TIMING_CNTL,
OTG_H_TIMING_DIV_MODE, h_div);
@@ -219,9 +266,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc)
OTG_V_TOTAL_MAX_SEL, 1,
OTG_FORCE_LOCK_ON_EVENT, 0,
OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
- // Setup manual flow control for EOF via TRIG_A
- optc->funcs->setup_manual_trigger(optc);
}
}
@@ -253,7 +297,7 @@ static void optc32_set_drr(
optc32_setup_manual_trigger(optc);
}
-static struct timing_generator_funcs dcn32_tg_funcs = {
+static const struct timing_generator_funcs dcn32_tg_funcs = {
.validate_timing = optc1_validate_timing,
.program_timing = optc1_program_timing,
.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
@@ -303,6 +347,8 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
.set_dwb_source = NULL,
.set_odm_bypass = optc32_set_odm_bypass,
.set_odm_combine = optc32_set_odm_combine,
+ .get_odm_combine_segments = optc32_get_odm_combine_segments,
+ .wait_odm_doublebuffer_pending_clear = optc32_wait_odm_doublebuffer_pending_clear,
.set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode,
.get_optc_source = optc2_get_optc_source,
.set_out_mux = optc3_set_out_mux,
@@ -314,6 +360,11 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
.program_manual_trigger = optc2_program_manual_trigger,
.setup_manual_trigger = optc2_setup_manual_trigger,
.get_hw_timing = optc1_get_hw_timing,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending,
+ .get_otg_double_buffer_pending = optc3_get_otg_update_pending,
+ .get_pipe_update_pending = optc3_get_pipe_update_pending,
+ .read_otg_state = optc31_read_otg_state,
};
void dcn32_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
index abf0121a1006..ead92ad78a23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h
@@ -62,6 +62,7 @@
SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\
SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\
SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\
+ SF(OTG0_OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, mask_sh),\
SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\
SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\
SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\
@@ -116,6 +117,7 @@
SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\
SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\
SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\
+ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_DOUBLE_BUFFER_PENDING, mask_sh),\
SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\
SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\
SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\
@@ -176,9 +178,18 @@
SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\
SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\
SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\
- SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh)
+ SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\
+ SF(OTG0_INTERRUPT_DEST, OTG0_IHC_OTG_VERTICAL_INTERRUPT2_DEST, mask_sh)
void dcn32_timing_generator_init(struct optc *optc1);
void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode);
+void optc32_get_odm_combine_segments(struct timing_generator *tg, int *odm_combine_segments);
+void optc32_set_odm_bypass(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing);
+void optc32_wait_odm_doublebuffer_pending_clear(struct timing_generator *tg);
#endif /* __DC_OPTC_DCN32_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
new file mode 100644
index 000000000000..52d5ea98c86b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dcn35_optc.h"
+
+#include "dcn30/dcn30_optc.h"
+#include "dcn31/dcn31_optc.h"
+#include "dcn32/dcn32_optc.h"
+#include "reg_helper.h"
+#include "dc.h"
+#include "dcn_calc_math.h"
+#include "dc_dmub_srv.h"
+
+#define REG(reg)\
+ optc1->tg_regs->reg
+
+#define CTX \
+ optc1->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ optc1->tg_shift->field_name, optc1->tg_mask->field_name
+
+/**
+ * optc35_set_odm_combine() - Enable CRTC - call ASIC Control Object to enable Timing generator.
+ *
+ * @optc: Output Pipe Timing Combine instance reference.
+ * @opp_id: Output Plane Processor instance ID.
+ * @opp_cnt: Output Plane Processor count.
+ * @segment_width: Width of the segment.
+ * @last_segment_width: Width of the last segment.
+ *
+ * Return: void.
+ */
+static void optc35_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
+ int segment_width, int last_segment_width)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t memory_mask = 0;
+ int h_active = segment_width * opp_cnt;
+ /* Each memory instance is 2048x(314x2) bits to support half line of 4096 */
+ int odm_mem_count = (h_active + 2047) / 2048;
+
+ /*
+ * display <= 4k : 2 memories + 2 pipes
+ * 4k < display <= 8k : 4 memories + 2 pipes
+ * 8k < display <= 12k : 6 memories + 4 pipes
+ */
+ if (opp_cnt == 4) {
+ if (odm_mem_count <= 2)
+ memory_mask = 0x3;
+ else if (odm_mem_count <= 4)
+ memory_mask = 0xf;
+ else
+ memory_mask = 0x3f;
+ } else {
+ if (odm_mem_count <= 2)
+ memory_mask = 0x1 << (opp_id[0] * 2) | 0x1 << (opp_id[1] * 2);
+ else if (odm_mem_count <= 4)
+ memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2);
+ else
+ memory_mask = 0x77;
+ }
+
+ REG_SET(OPTC_MEMORY_CONFIG, 0,
+ OPTC_MEM_SEL, memory_mask);
+
+ if (opp_cnt == 2) {
+ REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0,
+ OPTC_NUM_OF_INPUT_SEGMENT, 1,
+ OPTC_SEG0_SRC_SEL, opp_id[0],
+ OPTC_SEG1_SRC_SEL, opp_id[1]);
+ } else if (opp_cnt == 4) {
+ REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0,
+ OPTC_NUM_OF_INPUT_SEGMENT, 3,
+ OPTC_SEG0_SRC_SEL, opp_id[0],
+ OPTC_SEG1_SRC_SEL, opp_id[1],
+ OPTC_SEG2_SRC_SEL, opp_id[2],
+ OPTC_SEG3_SRC_SEL, opp_id[3]);
+ }
+
+ REG_UPDATE(OPTC_WIDTH_CONTROL,
+ OPTC_SEGMENT_WIDTH, segment_width);
+
+ REG_UPDATE(OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, opp_cnt - 1);
+ optc1->opp_count = opp_cnt;
+}
+
+static bool optc35_enable_crtc(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ /* opp instance for OTG, 1 to 1 mapping and odm will adjust */
+ REG_UPDATE(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, optc->inst);
+
+ /* VTG enable first is for HW workaround */
+ REG_UPDATE(CONTROL,
+ VTG0_ENABLE, 1);
+
+ REG_SEQ_START();
+
+ /* Enable CRTC */
+ REG_UPDATE_2(OTG_CONTROL,
+ OTG_DISABLE_POINT_CNTL, 2,
+ OTG_MASTER_EN, 1);
+
+ REG_SEQ_SUBMIT();
+ REG_SEQ_WAIT_DONE();
+
+ return true;
+}
+
+/* disable_crtc */
+static bool optc35_disable_crtc(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, 0xf,
+ OPTC_SEG1_SRC_SEL, 0xf,
+ OPTC_SEG2_SRC_SEL, 0xf,
+ OPTC_SEG3_SRC_SEL, 0xf,
+ OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
+ REG_UPDATE(OPTC_MEMORY_CONFIG,
+ OPTC_MEM_SEL, 0);
+
+ /* disable otg request until end of the first line
+ * in the vertical blank region
+ */
+ REG_UPDATE(OTG_CONTROL,
+ OTG_MASTER_EN, 0);
+
+ REG_UPDATE(CONTROL,
+ VTG0_ENABLE, 0);
+
+ /* CRTC disabled, so disable clock. */
+ REG_WAIT(OTG_CLOCK_CONTROL,
+ OTG_BUSY, 0,
+ 1, 100000);
+ REG_WAIT(OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, 0, 1, 100000);
+
+ optc1_clear_optc_underflow(optc);
+
+ return true;
+}
+
+static void optc35_phantom_crtc_post_enable(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ /* Disable immediately. */
+ REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0);
+
+ /* CRTC disabled, so disable clock. */
+ REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000);
+}
+
+static bool optc35_configure_crc(struct timing_generator *optc,
+ const struct crc_params *params)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ /* Cannot configure crc on a CRTC that is disabled */
+ if (!optc1_is_tg_enabled(optc))
+ return false;
+
+ if (!params->enable || params->reset)
+ REG_WRITE(OTG_CRC_CNTL, 0);
+
+ if (!params->enable)
+ return true;
+
+ /* Program frame boundaries */
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL,
+ OTG_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL,
+ OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL,
+ OTG_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL,
+ OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0)
+ REG_UPDATE_4(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC0_SELECT, params->selection,
+ OTG_CRC_EN, 1,
+ OTG_CRC_WINDOW_DB_EN, 1);
+ else
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC0_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL,
+ OTG_CRC1_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC1_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL,
+ OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL,
+ OTG_CRC1_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC1_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL,
+ OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end);
+
+ if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0)
+ REG_UPDATE_4(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC1_SELECT, params->selection,
+ OTG_CRC_EN, 1,
+ OTG_CRC_WINDOW_DB_EN, 1);
+ else
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC1_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+static void optc35_setup_manual_trigger(struct timing_generator *optc)
+{
+ if (!optc || !optc->ctx)
+ return;
+
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ struct dc *dc = optc->ctx->dc;
+
+ if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams)
+ dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst);
+ else {
+ /*
+ * MIN_MASK_EN is gone and MASK is now always enabled.
+ *
+ * To get it to it work with manual trigger we need to make sure
+ * we program the correct bit.
+ */
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, 1,
+ OTG_V_TOTAL_MAX_SEL, 1,
+ OTG_FORCE_LOCK_ON_EVENT, 0,
+ OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
+
+ // Setup manual flow control for EOF via TRIG_A
+ if (optc->funcs && optc->funcs->setup_manual_trigger)
+ optc->funcs->setup_manual_trigger(optc);
+ }
+}
+
+void optc35_set_drr(
+ struct timing_generator *optc,
+ const struct drr_params *params)
+{
+ if (!optc || !params)
+ return;
+
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t max_otg_v_total = optc1->max_v_total - 1;
+
+ if (params != NULL &&
+ params->vertical_total_max > 0 &&
+ params->vertical_total_min > 0) {
+
+ if (params->vertical_total_mid != 0) {
+
+ REG_SET(OTG_V_TOTAL_MID, 0,
+ OTG_V_TOTAL_MID, params->vertical_total_mid - 1);
+
+ REG_UPDATE_2(OTG_V_TOTAL_CONTROL,
+ OTG_VTOTAL_MID_REPLACING_MAX_EN, 1,
+ OTG_VTOTAL_MID_FRAME_NUM,
+ (uint8_t)params->vertical_total_mid_frame_num);
+
+ }
+
+ if (optc->funcs && optc->funcs->set_vtotal_min_max)
+ optc->funcs->set_vtotal_min_max(optc,
+ params->vertical_total_min - 1, params->vertical_total_max - 1);
+ optc35_setup_manual_trigger(optc);
+ } else {
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_SET_V_TOTAL_MIN_MASK, 0,
+ OTG_V_TOTAL_MIN_SEL, 0,
+ OTG_V_TOTAL_MAX_SEL, 0,
+ OTG_FORCE_LOCK_ON_EVENT, 0);
+
+ if (optc->funcs && optc->funcs->set_vtotal_min_max)
+ optc->funcs->set_vtotal_min_max(optc, 0, 0);
+ }
+
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL, max_otg_v_total);
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL2, 0);
+}
+
+static void optc35_set_long_vtotal(
+ struct timing_generator *optc,
+ const struct long_vtotal_params *params)
+{
+ if (!optc || !params)
+ return;
+
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t vcount_stop_timer = 0, vcount_stop = 0;
+ uint32_t max_otg_v_total = optc1->max_v_total - 1;
+
+ if (params->vertical_total_min <= max_otg_v_total && params->vertical_total_max <= max_otg_v_total)
+ return;
+
+ if (params->vertical_total_max == 0 || params->vertical_total_min == 0) {
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_SET_V_TOTAL_MIN_MASK, 0,
+ OTG_V_TOTAL_MIN_SEL, 0,
+ OTG_V_TOTAL_MAX_SEL, 0,
+ OTG_FORCE_LOCK_ON_EVENT, 0);
+
+ if (optc->funcs && optc->funcs->set_vtotal_min_max)
+ optc->funcs->set_vtotal_min_max(optc, 0, 0);
+ } else if (params->vertical_total_max == params->vertical_total_min) {
+ vcount_stop = params->vertical_blank_start;
+ vcount_stop_timer = params->vertical_total_max - max_otg_v_total;
+
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, 1,
+ OTG_V_TOTAL_MAX_SEL, 1,
+ OTG_FORCE_LOCK_ON_EVENT, 0,
+ OTG_SET_V_TOTAL_MIN_MASK, 0);
+
+ if (optc->funcs && optc->funcs->set_vtotal_min_max)
+ optc->funcs->set_vtotal_min_max(optc, max_otg_v_total, max_otg_v_total);
+
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL, vcount_stop);
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL2, vcount_stop_timer);
+ } else {
+ // Variable rate, keep DRR trigger mask
+ if (params->vertical_total_min > max_otg_v_total) {
+ // cannot be supported
+ // If MAX_OTG_V_COUNT < DRR trigger < v_total_min < v_total_max,
+ // DRR trigger will drop the vtotal counting directly to a new frame.
+ // But it should trigger between v_total_min and v_total_max.
+ ASSERT(0);
+
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_SET_V_TOTAL_MIN_MASK, 0,
+ OTG_V_TOTAL_MIN_SEL, 0,
+ OTG_V_TOTAL_MAX_SEL, 0,
+ OTG_FORCE_LOCK_ON_EVENT, 0);
+
+ if (optc->funcs && optc->funcs->set_vtotal_min_max)
+ optc->funcs->set_vtotal_min_max(optc, 0, 0);
+
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL, max_otg_v_total);
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL2, 0);
+ } else {
+ // For total_min <= MAX_OTG_V_COUNT and total_max > MAX_OTG_V_COUNT
+ vcount_stop = params->vertical_total_min;
+ vcount_stop_timer = params->vertical_total_max - max_otg_v_total;
+
+ // Example:
+ // params->vertical_total_min 1000
+ // params->vertical_total_max 2000
+ // MAX_OTG_V_COUNT_STOP = 1500
+ //
+ // If DRR event not happened,
+ // time 0,1,2,3,4,...1000,1001,........,1500,1501,1502, ...1999
+ // vcount 0,1,2,3,4....1000...................,1001,1002,1003,...1399
+ // vcount2 0,1,2,3,4,..499,
+ // else (DRR event happened, ex : at line 1004)
+ // time 0,1,2,3,4,...1000,1001.....1004, 0
+ // vcount 0,1,2,3,4....1000,.............. 0 (new frame)
+ // vcount2 0,1,2, 3, -
+ if (optc->funcs && optc->funcs->set_vtotal_min_max)
+ optc->funcs->set_vtotal_min_max(optc,
+ params->vertical_total_min - 1, max_otg_v_total);
+ optc35_setup_manual_trigger(optc);
+
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL, vcount_stop);
+ REG_WRITE(OTG_V_COUNT_STOP_CONTROL2, vcount_stop_timer);
+ }
+ }
+}
+
+static void optc35_wait_otg_disable(struct timing_generator *optc)
+{
+ struct optc *optc1;
+ uint32_t is_master_en;
+
+ if (!optc || !optc->ctx)
+ return;
+
+ optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_GET(OTG_CONTROL, OTG_MASTER_EN, &is_master_en);
+ if (!is_master_en)
+ REG_WAIT(OTG_CLOCK_CONTROL, OTG_CURRENT_MASTER_EN_STATE, 0, 1, 100000);
+}
+
+static const struct timing_generator_funcs dcn35_tg_funcs = {
+ .validate_timing = optc1_validate_timing,
+ .program_timing = optc1_program_timing,
+ .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
+ .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1,
+ .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2,
+ .program_global_sync = optc1_program_global_sync,
+ .enable_crtc = optc35_enable_crtc,
+ .disable_crtc = optc35_disable_crtc,
+ .immediate_disable_crtc = optc31_immediate_disable_crtc,
+ .phantom_crtc_post_enable = optc35_phantom_crtc_post_enable,
+ /* used by enable_timing_synchronization. Not need for FPGA */
+ .is_counter_moving = optc1_is_counter_moving,
+ .get_position = optc1_get_position,
+ .get_frame_count = optc1_get_vblank_counter,
+ .get_scanoutpos = optc1_get_crtc_scanoutpos,
+ .get_otg_active_size = optc1_get_otg_active_size,
+ .set_early_control = optc1_set_early_control,
+ /* used by enable_timing_synchronization. Not need for FPGA */
+ .wait_for_state = optc1_wait_for_state,
+ .set_blank_color = optc3_program_blank_color,
+ .did_triggered_reset_occur = optc1_did_triggered_reset_occur,
+ .triplebuffer_lock = optc3_triplebuffer_lock,
+ .triplebuffer_unlock = optc2_triplebuffer_unlock,
+ .enable_reset_trigger = optc1_enable_reset_trigger,
+ .enable_crtc_reset = optc1_enable_crtc_reset,
+ .disable_reset_trigger = optc1_disable_reset_trigger,
+ .lock = optc3_lock,
+ .unlock = optc1_unlock,
+ .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
+ .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
+ .enable_optc_clock = optc1_enable_optc_clock,
+ .set_drr = optc35_set_drr,
+ .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
+ .set_vtotal_min_max = optc1_set_vtotal_min_max,
+ .set_static_screen_control = optc1_set_static_screen_control,
+ .program_stereo = optc1_program_stereo,
+ .is_stereo_left_eye = optc1_is_stereo_left_eye,
+ .tg_init = optc3_tg_init,
+ .is_tg_enabled = optc1_is_tg_enabled,
+ .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred,
+ .clear_optc_underflow = optc1_clear_optc_underflow,
+ .setup_global_swap_lock = NULL,
+ .get_crc = optc1_get_crc,
+ .configure_crc = optc35_configure_crc,
+ .set_dsc_config = optc3_set_dsc_config,
+ .get_dsc_status = optc2_get_dsc_status,
+ .set_dwb_source = NULL,
+ .set_odm_bypass = optc32_set_odm_bypass,
+ .set_odm_combine = optc35_set_odm_combine,
+ .get_optc_source = optc2_get_optc_source,
+ .wait_otg_disable = optc35_wait_otg_disable,
+ .set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode,
+ .set_out_mux = optc3_set_out_mux,
+ .set_drr_trigger_window = optc3_set_drr_trigger_window,
+ .set_vtotal_change_limit = optc3_set_vtotal_change_limit,
+ .set_gsl = optc2_set_gsl,
+ .set_gsl_source_select = optc2_set_gsl_source_select,
+ .set_vtg_params = optc1_set_vtg_params,
+ .program_manual_trigger = optc2_program_manual_trigger,
+ .setup_manual_trigger = optc2_setup_manual_trigger,
+ .get_hw_timing = optc1_get_hw_timing,
+ .init_odm = optc3_init_odm,
+ .set_long_vtotal = optc35_set_long_vtotal,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .read_otg_state = optc31_read_otg_state,
+};
+
+void dcn35_timing_generator_init(struct optc *optc1)
+{
+ optc1->base.funcs = &dcn35_tg_funcs;
+
+ optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1;
+ optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1;
+
+ optc1->min_h_blank = 32;
+ optc1->min_v_blank = 3;
+ optc1->min_v_blank_interlace = 5;
+ optc1->min_h_sync_width = 4;
+ optc1->min_v_sync_width = 1;
+ optc1->max_frame_count = 0xFFFFFF;
+
+ dcn35_timing_generator_set_fgcg(
+ optc1, CTX->dc->debug.enable_fine_grain_clock_gating.bits.optc);
+}
+
+void dcn35_timing_generator_set_fgcg(struct optc *optc1, bool enable)
+{
+ REG_UPDATE(OPTC_CLOCK_CONTROL, OPTC_FGCG_REP_DIS, !enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
new file mode 100644
index 000000000000..733a2f149d9a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_OPTC_DCN35_H__
+#define __DC_OPTC_DCN35_H__
+
+#include "dcn10/dcn10_optc.h"
+#include "dcn32/dcn32_optc.h"
+#define OPTC_COMMON_MASK_SH_LIST_DCN3_5(mask_sh)\
+ OPTC_COMMON_MASK_SH_LIST_DCN3_2(mask_sh),\
+ SF(OTG0_OTG_CRC_CNTL, OTG_CRC_WINDOW_DB_EN, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\
+ SF(OTG0_OTG_CRC2_DATA_RG, CRC2_R_CR, mask_sh),\
+ SF(OTG0_OTG_CRC2_DATA_RG, CRC2_G_Y, mask_sh),\
+ SF(OTG0_OTG_CRC2_DATA_B, CRC2_B_CB, mask_sh),\
+ SF(OTG0_OTG_CRC3_DATA_RG, CRC3_R_CR, mask_sh),\
+ SF(OTG0_OTG_CRC3_DATA_RG, CRC3_G_Y, mask_sh),\
+ SF(OTG0_OTG_CRC3_DATA_B, CRC3_B_CB, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG_CRC0_WINDOWA_X_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG_CRC0_WINDOWA_X_END_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG_CRC0_WINDOWA_Y_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG_CRC0_WINDOWA_Y_END_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG_CRC0_WINDOWB_X_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG_CRC0_WINDOWB_X_END_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG_CRC0_WINDOWB_Y_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG_CRC0_WINDOWB_Y_END_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG_CRC1_WINDOWA_X_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG_CRC1_WINDOWA_X_END_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG_CRC1_WINDOWA_Y_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG_CRC1_WINDOWA_Y_END_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG_CRC1_WINDOWB_X_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG_CRC1_WINDOWB_X_END_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG_CRC1_WINDOWB_Y_START_READBACK, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG_CRC1_WINDOWB_Y_END_READBACK, mask_sh),\
+ SF(OPTC_CLOCK_CONTROL, OPTC_FGCG_REP_DIS, mask_sh),\
+ SF(OTG0_OTG_V_COUNT_STOP_CONTROL, OTG_V_COUNT_STOP, mask_sh),\
+ SF(OTG0_OTG_V_COUNT_STOP_CONTROL2, OTG_V_COUNT_STOP_TIMER, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\
+ SF(OTG0_INTERRUPT_DEST, OTG0_IHC_OTG_VERTICAL_INTERRUPT2_DEST, mask_sh)
+
+void dcn35_timing_generator_init(struct optc *optc1);
+
+void dcn35_timing_generator_set_fgcg(struct optc *optc1, bool enable);
+
+void optc35_set_drr(struct timing_generator *optc, const struct drr_params *params);
+
+#endif /* __DC_OPTC_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c
new file mode 100644
index 000000000000..5af13706e601
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dcn401_optc.h"
+#include "dcn30/dcn30_optc.h"
+#include "dcn31/dcn31_optc.h"
+#include "dcn32/dcn32_optc.h"
+#include "reg_helper.h"
+#include "dc.h"
+#include "dcn_calc_math.h"
+#include "dc_dmub_srv.h"
+
+#define REG(reg)\
+ optc1->tg_regs->reg
+
+#define CTX \
+ optc1->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ optc1->tg_shift->field_name, optc1->tg_mask->field_name
+
+/*
+ * OPTC uses ODM_MEM sub block to merge pixel data coming from different OPPs
+ * into unified memory location per horizontal line. ODM_MEM contains shared
+ * memory resources global to the ASIC. Each memory resource is capable of
+ * storing 2048 pixels independent from actual pixel data size. Total number of
+ * memory allocated must be even. The memory resource allocation is described in
+ * a memory bit map per OPTC instance. Driver has to make sure that there is no
+ * double allocation across different OPTC instances. Bit offset in the map
+ * represents memory instance id. Driver allocates a memory instance to the
+ * current OPTC by setting the bit with offset associated with the desired
+ * memory instance to 1 in the current OPTC memory map register.
+ *
+ * It is upto software to decide how to allocate the shared memory resources
+ * across different OPTC instances. Driver understands that the total number
+ * of memory available is always 2 times the max number of OPP pipes. So each
+ * OPP pipe can be mapped 2 pieces of memory. However there exists cases such as
+ * 11520x2160 which could use 6 pieces of memory for 2 OPP pipes i.e. 3 pieces
+ * for each OPP pipe.
+ *
+ * Driver will reserve the first and second preferred memory instances for each
+ * OPP pipe. For example, OPP0's first and second preferred memory is ODM_MEM0
+ * and ODM_MEM1. OPP1's first and second preferred memory is ODM_MEM2 and
+ * ODM_MEM3 so on so forth.
+ *
+ * Driver will first allocate from first preferred memory instances associated
+ * with current OPP pipes in use. If needed driver will then allocate from
+ * second preferred memory instances associated with current OPP pipes in use.
+ * Finally if still needed, driver will allocate from second preferred memory
+ * instances not associated with current OPP pipes. So if memory instances are
+ * enough other OPTCs can still allocate from their OPPs' first preferred memory
+ * instances without worrying about double allocation.
+ */
+
+static uint32_t decide_odm_mem_bit_map(int *opp_id, int opp_cnt, int h_active)
+{
+ bool first_preferred_memory_for_opp[MAX_PIPES] = {0};
+ bool second_preferred_memory_for_opp[MAX_PIPES] = {0};
+ uint32_t memory_bit_map = 0;
+ int total_required = ((h_active + 4095) / 4096) * 2;
+ int total_allocated = 0;
+ int i;
+
+ for (i = 0; i < opp_cnt; i++) {
+ first_preferred_memory_for_opp[opp_id[i]] = true;
+ total_allocated++;
+ if (total_required == total_allocated)
+ break;
+ }
+
+ if (total_required > total_allocated) {
+ for (i = 0; i < opp_cnt; i++) {
+ second_preferred_memory_for_opp[opp_id[i]] = true;
+ total_allocated++;
+ if (total_required == total_allocated)
+ break;
+ }
+ }
+
+ if (total_required > total_allocated) {
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (second_preferred_memory_for_opp[i] == false) {
+ second_preferred_memory_for_opp[i] = true;
+ total_allocated++;
+ if (total_required == total_allocated)
+ break;
+ }
+ }
+ }
+ ASSERT(total_required == total_allocated);
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ if (first_preferred_memory_for_opp[i])
+ memory_bit_map |= 0x1 << (i * 2);
+ if (second_preferred_memory_for_opp[i])
+ memory_bit_map |= 0x2 << (i * 2);
+ }
+
+ return memory_bit_map;
+}
+
+void optc401_set_odm_combine(struct timing_generator *optc, int *opp_id,
+ int opp_cnt, int segment_width, int last_segment_width)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ uint32_t h_active = segment_width * (opp_cnt - 1) + last_segment_width;
+ uint32_t odm_mem_bit_map = decide_odm_mem_bit_map(
+ opp_id, opp_cnt, h_active);
+
+ REG_SET(OPTC_MEMORY_CONFIG, 0,
+ OPTC_MEM_SEL, odm_mem_bit_map);
+
+ switch (opp_cnt) {
+ case 2: /* ODM Combine 2:1 */
+ REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0,
+ OPTC_NUM_OF_INPUT_SEGMENT, 1,
+ OPTC_SEG0_SRC_SEL, opp_id[0],
+ OPTC_SEG1_SRC_SEL, opp_id[1]);
+ REG_UPDATE(OPTC_WIDTH_CONTROL,
+ OPTC_SEGMENT_WIDTH, segment_width);
+
+ REG_UPDATE(OTG_H_TIMING_CNTL,
+ OTG_H_TIMING_DIV_MODE, H_TIMING_DIV_BY2);
+ break;
+ case 3: /* ODM Combine 3:1 */
+ REG_SET_4(OPTC_DATA_SOURCE_SELECT, 0,
+ OPTC_NUM_OF_INPUT_SEGMENT, 2,
+ OPTC_SEG0_SRC_SEL, opp_id[0],
+ OPTC_SEG1_SRC_SEL, opp_id[1],
+ OPTC_SEG2_SRC_SEL, opp_id[2]);
+ REG_UPDATE(OPTC_WIDTH_CONTROL,
+ OPTC_SEGMENT_WIDTH, segment_width);
+ REG_UPDATE(OPTC_WIDTH_CONTROL2,
+ OPTC_SEGMENT_WIDTH_LAST,
+ last_segment_width);
+ /* In ODM combine 3:1 mode ODM packs 4 pixels per data transfer
+ * so OTG_H_TIMING_DIV_MODE should be configured to
+ * H_TIMING_DIV_BY4 even though ODM combines 3 OPP inputs, it
+ * outputs 4 pixels from single OPP at a time.
+ */
+ REG_UPDATE(OTG_H_TIMING_CNTL,
+ OTG_H_TIMING_DIV_MODE, H_TIMING_DIV_BY4);
+ break;
+ case 4: /* ODM Combine 4:1 */
+ REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0,
+ OPTC_NUM_OF_INPUT_SEGMENT, 3,
+ OPTC_SEG0_SRC_SEL, opp_id[0],
+ OPTC_SEG1_SRC_SEL, opp_id[1],
+ OPTC_SEG2_SRC_SEL, opp_id[2],
+ OPTC_SEG3_SRC_SEL, opp_id[3]);
+ REG_UPDATE(OPTC_WIDTH_CONTROL,
+ OPTC_SEGMENT_WIDTH, segment_width);
+ REG_UPDATE(OTG_H_TIMING_CNTL,
+ OTG_H_TIMING_DIV_MODE, H_TIMING_DIV_BY4);
+ break;
+ default:
+ ASSERT(false);
+ }
+;
+ optc1->opp_count = opp_cnt;
+}
+
+void optc401_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_UPDATE(OTG_H_TIMING_CNTL,
+ OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0);
+}
+/**
+ * optc401_enable_crtc() - Enable CRTC
+ * @optc: Pointer to the timing generator structure
+ *
+ * This function calls ASIC Control Object to enable Timing generator.
+ *
+ * Return: Always returns true
+ */
+bool optc401_enable_crtc(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ /* opp instance for OTG, 1 to 1 mapping and odm will adjust */
+ REG_UPDATE(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, optc->inst);
+
+ /* VTG enable first is for HW workaround */
+ REG_UPDATE(CONTROL,
+ VTG0_ENABLE, 1);
+
+ REG_SEQ_START();
+
+ /* Enable CRTC */
+ REG_UPDATE_2(OTG_CONTROL,
+ OTG_DISABLE_POINT_CNTL, 2,
+ OTG_MASTER_EN, 1);
+
+ REG_SEQ_SUBMIT();
+ REG_SEQ_WAIT_DONE();
+
+ return true;
+}
+
+/* disable_crtc */
+bool optc401_disable_crtc(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, 0xf,
+ OPTC_SEG1_SRC_SEL, 0xf,
+ OPTC_SEG2_SRC_SEL, 0xf,
+ OPTC_SEG3_SRC_SEL, 0xf,
+ OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
+ REG_UPDATE(OPTC_MEMORY_CONFIG,
+ OPTC_MEM_SEL, 0);
+
+ /* disable otg request until end of the first line
+ * in the vertical blank region
+ */
+ REG_UPDATE(OTG_CONTROL,
+ OTG_MASTER_EN, 0);
+
+ REG_UPDATE(CONTROL,
+ VTG0_ENABLE, 0);
+
+ // wait until CRTC_CURRENT_MASTER_EN_STATE == 0
+ REG_WAIT(OTG_CONTROL,
+ OTG_CURRENT_MASTER_EN_STATE,
+ 0, 10, 15000);
+
+ /* CRTC disabled, so disable clock. */
+ REG_WAIT(OTG_CLOCK_CONTROL,
+ OTG_BUSY, 0,
+ 1, 150000);
+
+ return true;
+}
+
+void optc401_phantom_crtc_post_enable(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ /* Disable immediately. */
+ REG_UPDATE_2(OTG_CONTROL, OTG_DISABLE_POINT_CNTL, 0, OTG_MASTER_EN, 0);
+
+ /* CRTC disabled, so disable clock. */
+ REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000);
+}
+
+void optc401_disable_phantom_otg(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+ OPTC_SEG0_SRC_SEL, 0xf,
+ OPTC_SEG1_SRC_SEL, 0xf,
+ OPTC_SEG2_SRC_SEL, 0xf,
+ OPTC_SEG3_SRC_SEL, 0xf,
+ OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
+ REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0);
+}
+
+void optc401_set_odm_bypass(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ enum h_timing_div_mode h_div = H_TIMING_NO_DIV;
+
+ REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0,
+ OPTC_NUM_OF_INPUT_SEGMENT, 0,
+ OPTC_SEG0_SRC_SEL, optc->inst,
+ OPTC_SEG1_SRC_SEL, 0xf,
+ OPTC_SEG2_SRC_SEL, 0xf,
+ OPTC_SEG3_SRC_SEL, 0xf
+ );
+
+ h_div = optc->funcs->is_two_pixels_per_container(dc_crtc_timing);
+ REG_UPDATE(OTG_H_TIMING_CNTL,
+ OTG_H_TIMING_DIV_MODE, h_div);
+
+ REG_SET(OPTC_MEMORY_CONFIG, 0,
+ OPTC_MEM_SEL, 0);
+ optc1->opp_count = 1;
+}
+
+/* only to be used when FAMS2 is disabled or unsupported */
+void optc401_setup_manual_trigger(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ struct dc *dc = optc->ctx->dc;
+
+ if (dc->caps.dmub_caps.fams_ver == 1 && !dc->debug.disable_fams)
+ /* FAMS */
+ dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst);
+ else {
+ /*
+ * MIN_MASK_EN is gone and MASK is now always enabled.
+ *
+ * To get it to it work with manual trigger we need to make sure
+ * we program the correct bit.
+ */
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, 1,
+ OTG_V_TOTAL_MAX_SEL, 1,
+ OTG_FORCE_LOCK_ON_EVENT, 0,
+ OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
+ }
+}
+
+void optc401_set_drr(
+ struct timing_generator *optc,
+ const struct drr_params *params)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ struct dc *dc = optc->ctx->dc;
+ struct drr_params amended_params = { 0 };
+ bool program_manual_trigger = false;
+
+ if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) {
+ if (params != NULL &&
+ params->vertical_total_max > 0 &&
+ params->vertical_total_min > 0) {
+ amended_params.vertical_total_max = params->vertical_total_max - 1;
+ amended_params.vertical_total_min = params->vertical_total_min - 1;
+ if (params->vertical_total_mid != 0) {
+ amended_params.vertical_total_mid = params->vertical_total_mid - 1;
+ amended_params.vertical_total_mid_frame_num = params->vertical_total_mid_frame_num;
+ }
+ program_manual_trigger = true;
+ }
+
+ dc_dmub_srv_fams2_drr_update(dc, optc->inst,
+ amended_params.vertical_total_min,
+ amended_params.vertical_total_max,
+ amended_params.vertical_total_mid,
+ amended_params.vertical_total_mid_frame_num,
+ program_manual_trigger);
+ } else {
+ if (params != NULL &&
+ params->vertical_total_max > 0 &&
+ params->vertical_total_min > 0) {
+
+ if (params->vertical_total_mid != 0) {
+
+ REG_SET(OTG_V_TOTAL_MID, 0,
+ OTG_V_TOTAL_MID, params->vertical_total_mid - 1);
+
+ REG_UPDATE_2(OTG_V_TOTAL_CONTROL,
+ OTG_VTOTAL_MID_REPLACING_MAX_EN, 1,
+ OTG_VTOTAL_MID_FRAME_NUM,
+ (uint8_t)params->vertical_total_mid_frame_num);
+
+ }
+
+ optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1);
+ optc401_setup_manual_trigger(optc);
+ } else {
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_SET_V_TOTAL_MIN_MASK, 0,
+ OTG_V_TOTAL_MIN_SEL, 0,
+ OTG_V_TOTAL_MAX_SEL, 0,
+ OTG_FORCE_LOCK_ON_EVENT, 0);
+
+ optc->funcs->set_vtotal_min_max(optc, 0, 0);
+ }
+ }
+}
+
+void optc401_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ /* 00 - OTG_CONTROL_OTG_OUT_MUX_0 : Connects to DIO.
+ 01 - OTG_CONTROL_OTG_OUT_MUX_1 : Reserved.
+ 02 - OTG_CONTROL_OTG_OUT_MUX_2 : Connects to HPO.
+ */
+ REG_UPDATE(OTG_CONTROL, OTG_OUT_MUX, dest);
+}
+
+void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max)
+{
+ struct dc *dc = optc->ctx->dc;
+
+ if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) {
+ /* FAMS2 */
+ dc_dmub_srv_fams2_drr_update(dc, optc->inst,
+ vtotal_min,
+ vtotal_max,
+ 0,
+ 0,
+ false);
+ } else if (dc->caps.dmub_caps.fams_ver == 1 && !dc->debug.disable_fams) {
+ /* FAMS */
+ dc_dmub_srv_drr_update_cmd(dc, optc->inst, vtotal_min, vtotal_max);
+ } else {
+ optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max);
+ }
+}
+
+void optc401_program_global_sync(
+ struct timing_generator *optc,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ int pstate_keepout)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ optc1->vready_offset = vready_offset;
+ optc1->vstartup_start = vstartup_start;
+ optc1->vupdate_offset = vupdate_offset;
+ optc1->vupdate_width = vupdate_width;
+ optc1->pstate_keepout = pstate_keepout;
+
+ if (optc1->vstartup_start == 0) {
+ BREAK_TO_DEBUGGER();
+ return;
+ }
+
+ REG_SET(OTG_VSTARTUP_PARAM, 0,
+ VSTARTUP_START, optc1->vstartup_start);
+
+ REG_SET_2(OTG_VUPDATE_PARAM, 0,
+ VUPDATE_OFFSET, optc1->vupdate_offset,
+ VUPDATE_WIDTH, optc1->vupdate_width);
+
+ REG_SET(OTG_VREADY_PARAM, 0,
+ VREADY_OFFSET, optc1->vready_offset);
+
+ REG_UPDATE(OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, pstate_keepout);
+}
+
+void optc401_set_vupdate_keepout(struct timing_generator *tg, bool enable)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(tg);
+
+ REG_SET_3(OTG_VUPDATE_KEEPOUT, 0,
+ MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0,
+ MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, optc1->vready_offset + 10,
+ OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, enable);
+
+ return;
+}
+
+bool optc401_wait_update_lock_status(struct timing_generator *tg, bool locked)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(tg);
+ uint32_t lock_status = 0;
+
+ REG_WAIT(OTG_MASTER_UPDATE_LOCK,
+ UPDATE_LOCK_STATUS, locked,
+ 1, 150000);
+
+ REG_GET(OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, &lock_status);
+
+ if (lock_status != locked)
+ return false;
+
+ return true;
+}
+
+static const struct timing_generator_funcs dcn401_tg_funcs = {
+ .validate_timing = optc1_validate_timing,
+ .program_timing = optc1_program_timing,
+ .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
+ .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1,
+ .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2,
+ .program_global_sync = optc401_program_global_sync,
+ .enable_crtc = optc401_enable_crtc,
+ .disable_crtc = optc401_disable_crtc,
+ .phantom_crtc_post_enable = optc401_phantom_crtc_post_enable,
+ .disable_phantom_crtc = optc401_disable_phantom_otg,
+ /* used by enable_timing_synchronization. Not need for FPGA */
+ .is_counter_moving = optc1_is_counter_moving,
+ .get_position = optc1_get_position,
+ .get_frame_count = optc1_get_vblank_counter,
+ .get_scanoutpos = optc1_get_crtc_scanoutpos,
+ .get_otg_active_size = optc1_get_otg_active_size,
+ .set_early_control = optc1_set_early_control,
+ /* used by enable_timing_synchronization. Not need for FPGA */
+ .wait_for_state = optc1_wait_for_state,
+ .set_blank_color = optc3_program_blank_color,
+ .did_triggered_reset_occur = optc1_did_triggered_reset_occur,
+ .triplebuffer_lock = optc3_triplebuffer_lock,
+ .triplebuffer_unlock = optc2_triplebuffer_unlock,
+ .enable_reset_trigger = optc1_enable_reset_trigger,
+ .enable_crtc_reset = optc1_enable_crtc_reset,
+ .disable_reset_trigger = optc1_disable_reset_trigger,
+ .lock = optc3_lock,
+ .unlock = optc1_unlock,
+ .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
+ .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
+ .enable_optc_clock = optc1_enable_optc_clock,
+ .set_drr = optc401_set_drr,
+ .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
+ .set_vtotal_min_max = optc401_set_vtotal_min_max,
+ .set_static_screen_control = optc1_set_static_screen_control,
+ .program_stereo = optc1_program_stereo,
+ .is_stereo_left_eye = optc1_is_stereo_left_eye,
+ .tg_init = optc3_tg_init,
+ .is_tg_enabled = optc1_is_tg_enabled,
+ .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred,
+ .clear_optc_underflow = optc1_clear_optc_underflow,
+ .setup_global_swap_lock = NULL,
+ .get_crc = optc1_get_crc,
+ .configure_crc = optc1_configure_crc,
+ .set_dsc_config = optc3_set_dsc_config,
+ .get_dsc_status = optc2_get_dsc_status,
+ .set_dwb_source = NULL,
+ .set_odm_bypass = optc401_set_odm_bypass,
+ .set_odm_combine = optc401_set_odm_combine,
+ .wait_odm_doublebuffer_pending_clear = optc32_wait_odm_doublebuffer_pending_clear,
+ .set_h_timing_div_manual_mode = optc401_set_h_timing_div_manual_mode,
+ .get_optc_source = optc2_get_optc_source,
+ .set_out_mux = optc401_set_out_mux,
+ .set_drr_trigger_window = optc3_set_drr_trigger_window,
+ .set_vtotal_change_limit = optc3_set_vtotal_change_limit,
+ .set_gsl = optc2_set_gsl,
+ .set_gsl_source_select = optc2_set_gsl_source_select,
+ .set_vtg_params = optc1_set_vtg_params,
+ .program_manual_trigger = optc2_program_manual_trigger,
+ .setup_manual_trigger = optc2_setup_manual_trigger,
+ .get_hw_timing = optc1_get_hw_timing,
+ .is_two_pixels_per_container = optc1_is_two_pixels_per_container,
+ .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending,
+ .get_otg_double_buffer_pending = optc3_get_otg_update_pending,
+ .get_pipe_update_pending = optc3_get_pipe_update_pending,
+ .set_vupdate_keepout = optc401_set_vupdate_keepout,
+ .wait_update_lock_status = optc401_wait_update_lock_status,
+ .read_otg_state = optc31_read_otg_state,
+};
+
+void dcn401_timing_generator_init(struct optc *optc1)
+{
+ optc1->base.funcs = &dcn401_tg_funcs;
+
+ optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1;
+ optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1;
+
+ optc1->min_h_blank = 32;
+ optc1->min_v_blank = 3;
+ optc1->min_v_blank_interlace = 5;
+ optc1->min_h_sync_width = 4;
+ optc1->min_v_sync_width = 1;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h
new file mode 100644
index 000000000000..fa62737b5b1b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_OPTC_DCN401_H__
+#define __DC_OPTC_DCN401_H__
+
+#include "dcn10/dcn10_optc.h"
+
+#define OPTC_COMMON_MASK_SH_LIST_DCN401(mask_sh)\
+ SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\
+ SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_OFFSET, mask_sh),\
+ SF(OTG0_OTG_VUPDATE_PARAM, VUPDATE_WIDTH, mask_sh),\
+ SF(OTG0_OTG_VREADY_PARAM, VREADY_OFFSET, mask_sh),\
+ SF(OTG0_OTG_MASTER_UPDATE_LOCK, OTG_MASTER_UPDATE_LOCK, mask_sh),\
+ SF(OTG0_OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_START_X, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_END_X, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_START_Y, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL1, MASTER_UPDATE_LOCK_DB_END_Y, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL2, OTG_MASTER_UPDATE_LOCK_SEL, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_X, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL4, DIG_UPDATE_POSITION_Y, mask_sh),\
+ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_H_TOTAL, OTG_H_TOTAL, mask_sh),\
+ SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_START, mask_sh),\
+ SF(OTG0_OTG_H_BLANK_START_END, OTG_H_BLANK_END, mask_sh),\
+ SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_START, mask_sh),\
+ SF(OTG0_OTG_H_SYNC_A, OTG_H_SYNC_A_END, mask_sh),\
+ SF(OTG0_OTG_H_SYNC_A_CNTL, OTG_H_SYNC_A_POL, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL, OTG_V_TOTAL, mask_sh),\
+ SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_START, mask_sh),\
+ SF(OTG0_OTG_V_BLANK_START_END, OTG_V_BLANK_END, mask_sh),\
+ SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_START, mask_sh),\
+ SF(OTG0_OTG_V_SYNC_A, OTG_V_SYNC_A_END, mask_sh),\
+ SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, mask_sh),\
+ SF(OTG0_OTG_V_SYNC_A_CNTL, OTG_V_SYNC_MODE, mask_sh),\
+ SF(OTG0_OTG_CONTROL, OTG_MASTER_EN, mask_sh),\
+ SF(OTG0_OTG_CONTROL, OTG_START_POINT_CNTL, mask_sh),\
+ SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\
+ SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\
+ SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\
+ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\
+ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\
+ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\
+ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EYE_FLAG_POLARITY, mask_sh),\
+ SF(OTG0_OTG_STEREO_CONTROL, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, mask_sh),\
+ SF(OTG0_OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, mask_sh),\
+ SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, mask_sh),\
+ SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_V_UPDATE_MODE, mask_sh),\
+ SF(OTG0_OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_STEREO_SEL_OVR, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_MAX, OTG_V_TOTAL_MAX, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MIN_SEL, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_V_TOTAL_MAX_SEL, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_FORCE_LOCK_ON_EVENT, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MIN_EN, mask_sh),\
+ SF(OTG0_OTG_V_TOTAL_CONTROL, OTG_VTOTAL_MID_REPLACING_MAX_EN, mask_sh),\
+ SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_CLEAR, mask_sh),\
+ SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_MODE, mask_sh),\
+ SF(OTG0_OTG_FORCE_COUNT_NOW_CNTL, OTG_FORCE_COUNT_NOW_OCCURRED, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_SELECT, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\
+ SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\
+ SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\
+ SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\
+ SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\
+ SF(OTG0_OTG_STATUS, OTG_V_BLANK, mask_sh),\
+ SF(OTG0_OTG_STATUS, OTG_V_ACTIVE_DISP, mask_sh),\
+ SF(OTG0_OTG_STATUS_POSITION, OTG_HORZ_COUNT, mask_sh),\
+ SF(OTG0_OTG_STATUS_POSITION, OTG_VERT_COUNT, mask_sh),\
+ SF(OTG0_OTG_NOM_VERT_POSITION, OTG_VERT_COUNT_NOM, mask_sh),\
+ SF(OTG0_OTG_M_CONST_DTO0, OTG_M_CONST_DTO_PHASE, mask_sh),\
+ SF(OTG0_OTG_M_CONST_DTO1, OTG_M_CONST_DTO_MODULO, mask_sh),\
+ SF(OTG0_OTG_CLOCK_CONTROL, OTG_BUSY, mask_sh),\
+ SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\
+ SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\
+ SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT1_POSITION, OTG_VERTICAL_INTERRUPT1_LINE_START, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\
+ SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\
+ SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\
+ SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_ON, mask_sh),\
+ SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\
+ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\
+ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_DOUBLE_BUFFER_PENDING, mask_sh),\
+ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\
+ SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\
+ SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\
+ SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\
+ SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_OCCURRED, mask_sh),\
+ SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, mask_sh),\
+ SF(OTG0_OTG_VERT_SYNC_CONTROL, OTG_AUTO_FORCE_VSYNC_MODE, mask_sh),\
+ SF(OTG0_OTG_GSL_CONTROL, OTG_GSL0_EN, mask_sh),\
+ SF(OTG0_OTG_GSL_CONTROL, OTG_GSL1_EN, mask_sh),\
+ SF(OTG0_OTG_GSL_CONTROL, OTG_GSL2_EN, mask_sh),\
+ SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_EN, mask_sh),\
+ SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_FORCE_DELAY, mask_sh),\
+ SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\
+ SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\
+ SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\
+ SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\
+ SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\
+ SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\
+ SF(OTG0_OTG_CRC0_DATA_B, CRC0_B_CB, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_X_CONTROL, OTG_CRC0_WINDOWA_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWA_Y_CONTROL, OTG_CRC0_WINDOWA_Y_END, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\
+ SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\
+ SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\
+ SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\
+ SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh),\
+ SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\
+ SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\
+ SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \
+ SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\
+ SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_END_Y, mask_sh),\
+ SF(OTG0_OTG_VUPDATE_KEEPOUT, OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, mask_sh), \
+ SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, mask_sh), \
+ SF(OTG0_OTG_VUPDATE_KEEPOUT, MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, mask_sh), \
+ SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_MASTER_MODE, mask_sh), \
+ SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \
+ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\
+ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\
+ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\
+ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG3_SRC_SEL, mask_sh),\
+ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\
+ SF(ODM0_OPTC_MEMORY_CONFIG, OPTC_MEM_SEL, mask_sh),\
+ SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, mask_sh),\
+ SF(ODM0_OPTC_DATA_FORMAT_CONTROL, OPTC_DSC_MODE, mask_sh),\
+ SF(ODM0_OPTC_BYTES_PER_PIXEL, OPTC_DSC_BYTES_PER_PIXEL, mask_sh),\
+ SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_DSC_SLICE_WIDTH, mask_sh),\
+ SF(ODM0_OPTC_WIDTH_CONTROL, OPTC_SEGMENT_WIDTH, mask_sh),\
+ SF(ODM0_OPTC_WIDTH_CONTROL2, OPTC_SEGMENT_WIDTH_LAST, mask_sh),\
+ SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_START_X, mask_sh),\
+ SF(OTG0_OTG_DRR_TRIGGER_WINDOW, OTG_DRR_TRIGGER_WINDOW_END_X, mask_sh),\
+ SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\
+ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\
+ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\
+ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\
+ SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\
+ SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, mask_sh),\
+ SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_EXTEND, mask_sh),\
+ SF(OTG0_OTG_PSTATE_REGISTER, OTG_UNBLANK, mask_sh),\
+ SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_ALLOW_WIDTH_MIN, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\
+ SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\
+ SF(OTG0_INTERRUPT_DEST, OTG0_IHC_OTG_VERTICAL_INTERRUPT2_DEST, mask_sh)
+
+void dcn401_timing_generator_init(struct optc *optc1);
+
+void optc401_set_drr(
+ struct timing_generator *optc,
+ const struct drr_params *params);
+void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
+void optc401_setup_manual_trigger(struct timing_generator *optc);
+void optc401_program_global_sync(
+ struct timing_generator *optc,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ int pstate_keepout);
+bool optc401_enable_crtc(struct timing_generator *optc);
+bool optc401_disable_crtc(struct timing_generator *optc);
+void optc401_phantom_crtc_post_enable(struct timing_generator *optc);
+void optc401_disable_phantom_otg(struct timing_generator *optc);
+void optc401_set_odm_bypass(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing);
+void optc401_set_odm_combine(struct timing_generator *optc, int *opp_id,
+ int opp_cnt, int segment_width, int last_segment_width);
+void optc401_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode);
+void optc401_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest);
+bool optc401_wait_update_lock_status(struct timing_generator *tg, bool locked);
+void optc401_set_vupdate_keepout(struct timing_generator *tg, bool enable);
+
+#endif /* __DC_OPTC_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index aad8095660c9..782316348941 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -29,14 +29,14 @@
#include <linux/slab.h>
#include <linux/kgdb.h>
-#include <linux/kref.h>
-#include <linux/types.h>
#include <linux/delay.h>
#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include <asm/byteorder.h>
#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_device.h>
#include <drm/drm_print.h>
#include "cgs_common.h"
diff --git a/drivers/gpu/drm/amd/display/dc/pg/Makefile b/drivers/gpu/drm/amd/display/dc/pg/Makefile
new file mode 100644
index 000000000000..ec11d3157a57
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/pg/Makefile
@@ -0,0 +1,35 @@
+#
+# Copyright 2020 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Authors: AMD
+#
+#
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN35
+###############################################################################
+PG_DCN35 = dcn35_pg_cntl.o
+
+AMD_DAL_PG_DCN35 = $(addprefix $(AMDDALPATH)/dc/pg/dcn35/,$(PG_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_PG_DCN35)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c
new file mode 100644
index 000000000000..72bd43f9bbe2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c
@@ -0,0 +1,573 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "core_types.h"
+#include "dcn35_pg_cntl.h"
+#include "dccg.h"
+
+#define TO_DCN_PG_CNTL(pg_cntl)\
+ container_of(pg_cntl, struct dcn_pg_cntl, base)
+
+#define REG(reg) \
+ (pg_cntl_dcn->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+ pg_cntl_dcn->pg_cntl_shift->field_name, pg_cntl_dcn->pg_cntl_mask->field_name
+
+#define CTX \
+ pg_cntl_dcn->base.ctx
+#define DC_LOGGER \
+ pg_cntl->ctx->logger
+
+static bool pg_cntl35_dsc_pg_status(struct pg_cntl *pg_cntl, unsigned int dsc_inst)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t pwr_status = 0;
+
+ if (pg_cntl->ctx->dc->debug.ignore_pg)
+ return true;
+
+ switch (dsc_inst) {
+ case 0: /* DSC0 */
+ REG_GET(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ case 1: /* DSC1 */
+ REG_GET(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ case 2: /* DSC2 */
+ REG_GET(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ case 3: /* DSC3 */
+ REG_GET(DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ return pwr_status == 0;
+}
+
+void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bool power_on)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t power_gate = power_on ? 0 : 1;
+ uint32_t pwr_status = power_on ? 0 : 2;
+ uint32_t org_ip_request_cntl = 0;
+ bool block_enabled = false;
+ bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg ||
+ pg_cntl->ctx->dc->debug.disable_dsc_power_gate ||
+ pg_cntl->ctx->dc->idle_optimizations_allowed;
+
+ if (skip_pg && !power_on)
+ return;
+
+ block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, dsc_inst);
+ if (power_on) {
+ if (block_enabled)
+ return;
+ } else {
+ if (!block_enabled)
+ return;
+ }
+
+ REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
+
+ switch (dsc_inst) {
+ case 0: /* DSC0 */
+ REG_UPDATE(DOMAIN16_PG_CONFIG,
+ DOMAIN_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN16_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, pwr_status,
+ 1, 10000);
+ break;
+ case 1: /* DSC1 */
+ REG_UPDATE(DOMAIN17_PG_CONFIG,
+ DOMAIN_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN17_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, pwr_status,
+ 1, 10000);
+ break;
+ case 2: /* DSC2 */
+ REG_UPDATE(DOMAIN18_PG_CONFIG,
+ DOMAIN_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN18_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, pwr_status,
+ 1, 10000);
+ break;
+ case 3: /* DSC3 */
+ REG_UPDATE(DOMAIN19_PG_CONFIG,
+ DOMAIN_POWER_GATE, power_gate);
+
+ REG_WAIT(DOMAIN19_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, pwr_status,
+ 1, 10000);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ if (dsc_inst < MAX_PIPES)
+ pg_cntl->pg_pipe_res_enable[PG_DSC][dsc_inst] = power_on;
+}
+
+static bool pg_cntl35_hubp_dpp_pg_status(struct pg_cntl *pg_cntl, unsigned int hubp_dpp_inst)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t pwr_status = 0;
+
+ switch (hubp_dpp_inst) {
+ case 0:
+ /* DPP0 & HUBP0 */
+ REG_GET(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ case 1:
+ /* DPP1 & HUBP1 */
+ REG_GET(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ case 2:
+ /* DPP2 & HUBP2 */
+ REG_GET(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ case 3:
+ /* DPP3 & HUBP3 */
+ REG_GET(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ return pwr_status == 0;
+}
+
+void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dpp_inst, bool power_on)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t power_gate = power_on ? 0 : 1;
+ uint32_t pwr_status = power_on ? 0 : 2;
+ uint32_t org_ip_request_cntl;
+ bool block_enabled;
+ bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg ||
+ pg_cntl->ctx->dc->debug.disable_hubp_power_gate ||
+ pg_cntl->ctx->dc->debug.disable_dpp_power_gate ||
+ pg_cntl->ctx->dc->idle_optimizations_allowed;
+
+ if (skip_pg && !power_on)
+ return;
+
+ block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, hubp_dpp_inst);
+ if (power_on) {
+ if (block_enabled)
+ return;
+ } else {
+ if (!block_enabled)
+ return;
+ }
+
+ REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
+
+ switch (hubp_dpp_inst) {
+ case 0:
+ /* DPP0 & HUBP0 */
+ REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
+ REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
+ break;
+ case 1:
+ /* DPP1 & HUBP1 */
+ REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
+ REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
+ break;
+ case 2:
+ /* DPP2 & HUBP2 */
+ REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
+ REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
+ break;
+ case 3:
+ /* DPP3 & HUBP3 */
+ REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
+ REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
+ break;
+ default:
+ BREAK_TO_DEBUGGER();
+ break;
+ }
+
+ DC_LOG_DEBUG("HUBP DPP instance %d, power %s", hubp_dpp_inst,
+ power_on ? "ON" : "OFF");
+
+ if (hubp_dpp_inst < MAX_PIPES) {
+ pg_cntl->pg_pipe_res_enable[PG_HUBP][hubp_dpp_inst] = power_on;
+ pg_cntl->pg_pipe_res_enable[PG_DPP][hubp_dpp_inst] = power_on;
+ }
+}
+
+static bool pg_cntl35_hpo_pg_status(struct pg_cntl *pg_cntl)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t pwr_status = 0;
+
+ REG_GET(DOMAIN25_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+
+ return pwr_status == 0;
+}
+
+void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t power_gate = power_on ? 0 : 1;
+ uint32_t pwr_status = power_on ? 0 : 2;
+ uint32_t org_ip_request_cntl;
+ uint32_t power_forceon;
+ bool block_enabled;
+
+ if (pg_cntl->ctx->dc->debug.ignore_pg ||
+ pg_cntl->ctx->dc->debug.disable_hpo_power_gate ||
+ pg_cntl->ctx->dc->idle_optimizations_allowed)
+ return;
+
+ block_enabled = pg_cntl35_hpo_pg_status(pg_cntl);
+ if (power_on) {
+ if (block_enabled)
+ return;
+ } else {
+ if (!block_enabled)
+ return;
+ }
+
+ REG_GET(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+ if (power_forceon)
+ return;
+
+ REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
+
+ REG_UPDATE(DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
+ REG_WAIT(DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
+
+ pg_cntl->pg_res_enable[PG_HPO] = power_on;
+}
+
+static bool pg_cntl35_io_clk_status(struct pg_cntl *pg_cntl)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t pwr_status = 0;
+
+ REG_GET(DOMAIN22_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+
+ return pwr_status == 0;
+}
+
+void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t power_gate = power_on ? 0 : 1;
+ uint32_t pwr_status = power_on ? 0 : 2;
+ uint32_t org_ip_request_cntl;
+ uint32_t power_forceon;
+ bool block_enabled;
+
+ if (pg_cntl->ctx->dc->debug.ignore_pg ||
+ pg_cntl->ctx->dc->idle_optimizations_allowed)
+ return;
+
+ block_enabled = pg_cntl35_io_clk_status(pg_cntl);
+ if (power_on) {
+ if (block_enabled)
+ return;
+ } else {
+ if (!block_enabled)
+ return;
+ }
+
+ REG_GET(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon);
+ if (power_forceon)
+ return;
+
+ REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
+
+ /* DCCG, DIO, DCIO */
+ REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
+ REG_WAIT(DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
+
+ pg_cntl->pg_res_enable[PG_DCCG] = power_on;
+ pg_cntl->pg_res_enable[PG_DIO] = power_on;
+ pg_cntl->pg_res_enable[PG_DCIO] = power_on;
+}
+
+static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t pwr_status = 0;
+
+ REG_GET(DOMAIN24_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+
+ return pwr_status == 0;
+}
+
+void pg_cntl35_mpcc_pg_control(struct pg_cntl *pg_cntl,
+ unsigned int mpcc_inst, bool power_on)
+{
+ if (pg_cntl->ctx->dc->idle_optimizations_allowed)
+ return;
+
+ if (mpcc_inst < MAX_PIPES)
+ pg_cntl->pg_pipe_res_enable[PG_MPCC][mpcc_inst] = power_on;
+}
+
+void pg_cntl35_opp_pg_control(struct pg_cntl *pg_cntl,
+ unsigned int opp_inst, bool power_on)
+{
+ if (pg_cntl->ctx->dc->idle_optimizations_allowed)
+ return;
+
+ if (opp_inst < MAX_PIPES)
+ pg_cntl->pg_pipe_res_enable[PG_OPP][opp_inst] = power_on;
+}
+
+void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl,
+ unsigned int optc_inst, bool power_on)
+{
+ if (pg_cntl->ctx->dc->idle_optimizations_allowed)
+ return;
+
+ if (optc_inst < MAX_PIPES)
+ pg_cntl->pg_pipe_res_enable[PG_OPTC][optc_inst] = power_on;
+}
+
+void pg_cntl35_plane_otg_pg_control(struct pg_cntl *pg_cntl, bool power_on)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t power_gate = power_on ? 0 : 1;
+ uint32_t pwr_status = power_on ? 0 : 2;
+ uint32_t org_ip_request_cntl;
+ int i;
+ bool block_enabled;
+ bool all_mpcc_disabled = true, all_opp_disabled = true;
+ bool all_optc_disabled = true, all_stream_disabled = true;
+
+ if (pg_cntl->ctx->dc->debug.ignore_pg ||
+ pg_cntl->ctx->dc->debug.disable_optc_power_gate ||
+ pg_cntl->ctx->dc->idle_optimizations_allowed)
+ return;
+
+ block_enabled = pg_cntl35_plane_otg_status(pg_cntl);
+ if (power_on) {
+ if (block_enabled)
+ return;
+ } else {
+ if (!block_enabled)
+ return;
+ }
+
+ for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &pg_cntl->ctx->dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx) {
+ if (pipe_ctx->stream)
+ all_stream_disabled = false;
+ }
+
+ if (pg_cntl->pg_pipe_res_enable[PG_MPCC][i])
+ all_mpcc_disabled = false;
+
+ if (pg_cntl->pg_pipe_res_enable[PG_OPP][i])
+ all_opp_disabled = false;
+
+ if (pg_cntl->pg_pipe_res_enable[PG_OPTC][i])
+ all_optc_disabled = false;
+ }
+
+ if (!power_on) {
+ if (!all_mpcc_disabled || !all_opp_disabled || !all_optc_disabled
+ || !all_stream_disabled || pg_cntl->pg_res_enable[PG_DWB])
+ return;
+ }
+
+ REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
+ if (org_ip_request_cntl == 0)
+ REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
+
+ /* MPC, OPP, OPTC, DWB */
+ REG_UPDATE(DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
+ REG_WAIT(DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
+
+ for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) {
+ pg_cntl->pg_pipe_res_enable[PG_MPCC][i] = power_on;
+ pg_cntl->pg_pipe_res_enable[PG_OPP][i] = power_on;
+ pg_cntl->pg_pipe_res_enable[PG_OPTC][i] = power_on;
+ }
+ pg_cntl->pg_res_enable[PG_DWB] = power_on;
+}
+
+void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on)
+{
+ if (pg_cntl->ctx->dc->idle_optimizations_allowed)
+ return;
+
+ pg_cntl->pg_res_enable[PG_DWB] = power_on;
+}
+
+static bool pg_cntl35_mem_status(struct pg_cntl *pg_cntl)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl);
+ uint32_t pwr_status = 0;
+
+ REG_GET(DOMAIN23_PG_STATUS,
+ DOMAIN_PGFSM_PWR_STATUS, &pwr_status);
+
+ return pwr_status == 0;
+}
+
+void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl)
+{
+ int i = 0;
+ bool block_enabled;
+
+ pg_cntl->pg_res_enable[PG_HPO] = pg_cntl35_hpo_pg_status(pg_cntl);
+
+ block_enabled = pg_cntl35_io_clk_status(pg_cntl);
+ pg_cntl->pg_res_enable[PG_DCCG] = block_enabled;
+ pg_cntl->pg_res_enable[PG_DIO] = block_enabled;
+ pg_cntl->pg_res_enable[PG_DCIO] = block_enabled;
+
+ block_enabled = pg_cntl35_mem_status(pg_cntl);
+ pg_cntl->pg_res_enable[PG_DCHUBBUB] = block_enabled;
+ pg_cntl->pg_res_enable[PG_DCHVM] = block_enabled;
+
+ for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) {
+ block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, i);
+ pg_cntl->pg_pipe_res_enable[PG_HUBP][i] = block_enabled;
+ pg_cntl->pg_pipe_res_enable[PG_DPP][i] = block_enabled;
+
+ block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, i);
+ pg_cntl->pg_pipe_res_enable[PG_DSC][i] = block_enabled;
+ }
+
+ block_enabled = pg_cntl35_plane_otg_status(pg_cntl);
+ for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) {
+ pg_cntl->pg_pipe_res_enable[PG_MPCC][i] = block_enabled;
+ pg_cntl->pg_pipe_res_enable[PG_OPP][i] = block_enabled;
+ pg_cntl->pg_pipe_res_enable[PG_OPTC][i] = block_enabled;
+ }
+ pg_cntl->pg_res_enable[PG_DWB] = block_enabled;
+}
+
+static void pg_cntl35_print_pg_status(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log)
+{
+ int i = 0;
+ bool block_enabled = false;
+
+ DC_LOG_DEBUG("%s: %s", debug_func, debug_log);
+
+ DC_LOG_DEBUG("PG_CNTL status:\n");
+
+ block_enabled = pg_cntl35_io_clk_status(pg_cntl);
+ DC_LOG_DEBUG("ONO0=%d (DCCG, DIO, DCIO)\n", block_enabled ? 1 : 0);
+
+ block_enabled = pg_cntl35_mem_status(pg_cntl);
+ DC_LOG_DEBUG("ONO1=%d (DCHUBBUB, DCHVM, DCHUBBUBMEM)\n", block_enabled ? 1 : 0);
+
+ block_enabled = pg_cntl35_plane_otg_status(pg_cntl);
+ DC_LOG_DEBUG("ONO2=%d (MPC, OPP, OPTC, DWB)\n", block_enabled ? 1 : 0);
+
+ block_enabled = pg_cntl35_hpo_pg_status(pg_cntl);
+ DC_LOG_DEBUG("ONO3=%d (HPO)\n", block_enabled ? 1 : 0);
+
+ for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) {
+ block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, i);
+ DC_LOG_DEBUG("ONO%d=%d (DCHUBP%d, DPP%d)\n", 4 + i * 2, block_enabled ? 1 : 0, i, i);
+
+ block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, i);
+ DC_LOG_DEBUG("ONO%d=%d (DSC%d)\n", 5 + i * 2, block_enabled ? 1 : 0, i);
+ }
+}
+
+static const struct pg_cntl_funcs pg_cntl35_funcs = {
+ .init_pg_status = pg_cntl35_init_pg_status,
+ .dsc_pg_control = pg_cntl35_dsc_pg_control,
+ .hubp_dpp_pg_control = pg_cntl35_hubp_dpp_pg_control,
+ .hpo_pg_control = pg_cntl35_hpo_pg_control,
+ .io_clk_pg_control = pg_cntl35_io_clk_pg_control,
+ .plane_otg_pg_control = pg_cntl35_plane_otg_pg_control,
+ .mpcc_pg_control = pg_cntl35_mpcc_pg_control,
+ .opp_pg_control = pg_cntl35_opp_pg_control,
+ .optc_pg_control = pg_cntl35_optc_pg_control,
+ .dwb_pg_control = pg_cntl35_dwb_pg_control,
+ .print_pg_status = pg_cntl35_print_pg_status
+};
+
+struct pg_cntl *pg_cntl35_create(
+ struct dc_context *ctx,
+ const struct pg_cntl_registers *regs,
+ const struct pg_cntl_shift *pg_cntl_shift,
+ const struct pg_cntl_mask *pg_cntl_mask)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = kzalloc(sizeof(*pg_cntl_dcn), GFP_KERNEL);
+ struct pg_cntl *base;
+
+ if (pg_cntl_dcn == NULL) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+ base = &pg_cntl_dcn->base;
+ base->ctx = ctx;
+ base->funcs = &pg_cntl35_funcs;
+
+ pg_cntl_dcn->regs = regs;
+ pg_cntl_dcn->pg_cntl_shift = pg_cntl_shift;
+ pg_cntl_dcn->pg_cntl_mask = pg_cntl_mask;
+
+ memset(base->pg_pipe_res_enable, 0, PG_HW_PIPE_RESOURCES_NUM_ELEMENT * MAX_PIPES * sizeof(bool));
+ memset(base->pg_res_enable, 0, PG_HW_RESOURCES_NUM_ELEMENT * sizeof(bool));
+
+ return &pg_cntl_dcn->base;
+}
+
+void dcn_pg_cntl_destroy(struct pg_cntl **pg_cntl)
+{
+ struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(*pg_cntl);
+
+ kfree(pg_cntl_dcn);
+ *pg_cntl = NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.h
new file mode 100644
index 000000000000..3de240884d22
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DCN35_PG_CNTL_H_
+#define _DCN35_PG_CNTL_H_
+
+#include "pg_cntl.h"
+
+#define PG_CNTL_REG_LIST_DCN35()\
+ SR(DOMAIN0_PG_CONFIG), \
+ SR(DOMAIN1_PG_CONFIG), \
+ SR(DOMAIN2_PG_CONFIG), \
+ SR(DOMAIN3_PG_CONFIG), \
+ SR(DOMAIN16_PG_CONFIG), \
+ SR(DOMAIN17_PG_CONFIG), \
+ SR(DOMAIN18_PG_CONFIG), \
+ SR(DOMAIN19_PG_CONFIG), \
+ SR(DOMAIN22_PG_CONFIG), \
+ SR(DOMAIN23_PG_CONFIG), \
+ SR(DOMAIN24_PG_CONFIG), \
+ SR(DOMAIN25_PG_CONFIG), \
+ SR(DOMAIN0_PG_STATUS), \
+ SR(DOMAIN1_PG_STATUS), \
+ SR(DOMAIN2_PG_STATUS), \
+ SR(DOMAIN3_PG_STATUS), \
+ SR(DOMAIN16_PG_STATUS), \
+ SR(DOMAIN17_PG_STATUS), \
+ SR(DOMAIN18_PG_STATUS), \
+ SR(DOMAIN19_PG_STATUS), \
+ SR(DOMAIN22_PG_STATUS), \
+ SR(DOMAIN23_PG_STATUS), \
+ SR(DOMAIN24_PG_STATUS), \
+ SR(DOMAIN25_PG_STATUS), \
+ SR(DC_IP_REQUEST_CNTL)
+
+#define PG_CNTL_SF(reg_name, field_name, post_fix)\
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define PG_CNTL_MASK_SH_LIST_DCN35(mask_sh) \
+ PG_CNTL_SF(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ PG_CNTL_SF(DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN0_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN1_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN2_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN3_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN16_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN17_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN18_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN19_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN22_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN23_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN24_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DOMAIN25_PG_STATUS, DOMAIN_DESIRED_PWR_STATE, mask_sh), \
+ PG_CNTL_SF(DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ PG_CNTL_SF(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh)
+
+#define PG_CNTL_REG_FIELD_LIST(type) \
+ type IPS2;\
+ type IPS1;\
+ type IPS0;\
+ type IPS0_All
+
+#define PG_CNTL_DCN35_REG_FIELD_LIST(type) \
+ type IP_REQUEST_EN; \
+ type DOMAIN_POWER_FORCEON; \
+ type DOMAIN_POWER_GATE; \
+ type DOMAIN_DESIRED_PWR_STATE; \
+ type DOMAIN_PGFSM_PWR_STATUS
+
+struct pg_cntl_shift {
+ PG_CNTL_REG_FIELD_LIST(uint8_t);
+ PG_CNTL_DCN35_REG_FIELD_LIST(uint8_t);
+};
+
+struct pg_cntl_mask {
+ PG_CNTL_REG_FIELD_LIST(uint32_t);
+ PG_CNTL_DCN35_REG_FIELD_LIST(uint32_t);
+};
+
+struct pg_cntl_registers {
+ uint32_t LONO_STATE;
+ uint32_t DC_IP_REQUEST_CNTL;
+ uint32_t DOMAIN0_PG_CONFIG;
+ uint32_t DOMAIN1_PG_CONFIG;
+ uint32_t DOMAIN2_PG_CONFIG;
+ uint32_t DOMAIN3_PG_CONFIG;
+ uint32_t DOMAIN16_PG_CONFIG;
+ uint32_t DOMAIN17_PG_CONFIG;
+ uint32_t DOMAIN18_PG_CONFIG;
+ uint32_t DOMAIN19_PG_CONFIG;
+ uint32_t DOMAIN22_PG_CONFIG;
+ uint32_t DOMAIN23_PG_CONFIG;
+ uint32_t DOMAIN24_PG_CONFIG;
+ uint32_t DOMAIN25_PG_CONFIG;
+ uint32_t DOMAIN0_PG_STATUS;
+ uint32_t DOMAIN1_PG_STATUS;
+ uint32_t DOMAIN2_PG_STATUS;
+ uint32_t DOMAIN3_PG_STATUS;
+ uint32_t DOMAIN16_PG_STATUS;
+ uint32_t DOMAIN17_PG_STATUS;
+ uint32_t DOMAIN18_PG_STATUS;
+ uint32_t DOMAIN19_PG_STATUS;
+ uint32_t DOMAIN22_PG_STATUS;
+ uint32_t DOMAIN23_PG_STATUS;
+ uint32_t DOMAIN24_PG_STATUS;
+ uint32_t DOMAIN25_PG_STATUS;
+};
+
+struct dcn_pg_cntl {
+ struct pg_cntl base;
+ const struct pg_cntl_registers *regs;
+ const struct pg_cntl_shift *pg_cntl_shift;
+ const struct pg_cntl_mask *pg_cntl_mask;
+};
+
+void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bool power_on);
+void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl,
+ unsigned int hubp_dpp_inst, bool power_on);
+void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on);
+void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on);
+void pg_cntl35_plane_otg_pg_control(struct pg_cntl *pg_cntl, bool power_on);
+void pg_cntl35_mpcc_pg_control(struct pg_cntl *pg_cntl,
+ unsigned int mpcc_inst, bool power_on);
+void pg_cntl35_opp_pg_control(struct pg_cntl *pg_cntl,
+ unsigned int opp_inst, bool power_on);
+void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl,
+ unsigned int optc_inst, bool power_on);
+void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on);
+void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl);
+
+struct pg_cntl *pg_cntl35_create(
+ struct dc_context *ctx,
+ const struct pg_cntl_registers *regs,
+ const struct pg_cntl_shift *pg_cntl_shift,
+ const struct pg_cntl_mask *pg_cntl_mask);
+
+void dcn_pg_cntl_destroy(struct pg_cntl **pg_cntl);
+
+#endif /* DCN35_PG_CNTL */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile
new file mode 100644
index 000000000000..5b42da8b79c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile
@@ -0,0 +1,225 @@
+
+# Copyright 2022 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# Makefile for the 'resource' sub-component of DAL.
+#
+
+
+###############################################################################
+# DCE
+###############################################################################
+
+ifdef CONFIG_DRM_AMD_DC_SI
+RESOURCE_DCE60 = dce60_resource.o
+
+AMD_DAL_RESOURCE_DCE60 = $(addprefix $(AMDDALPATH)/dc/resource/dce60/,$(RESOURCE_DCE60))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE60)
+endif
+
+###############################################################################
+
+RESOURCE_DCE80 = dce80_resource.o
+
+AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80)
+
+###############################################################################
+
+RESOURCE_DCE100 = dce100_resource.o
+
+AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE100)
+
+###############################################################################
+
+RESOURCE_DCE110 = dce110_resource.o
+
+AMD_DAL_RESOURCE_DCE110 = $(addprefix $(AMDDALPATH)/dc/resource/dce110/,$(RESOURCE_DCE110))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE110)
+
+###############################################################################
+
+RESOURCE_DCE112 = dce112_resource.o
+
+AMD_DAL_RESOURCE_DCE112 = $(addprefix $(AMDDALPATH)/dc/resource/dce112/,$(RESOURCE_DCE112))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE112)
+
+###############################################################################
+
+RESOURCE_DCE120 = dce120_resource.o
+
+AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOURCE_DCE120))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120)
+
+ifdef CONFIG_DRM_AMD_DC_FP
+###############################################################################
+# DCN
+###############################################################################
+
+RESOURCE_DCN10 = dcn10_resource.o
+
+AMD_DAL_RESOURCE_DCN10 = $(addprefix $(AMDDALPATH)/dc/resource/dcn10/,$(RESOURCE_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN10)
+
+###############################################################################
+
+RESOURCE_DCN20 = dcn20_resource.o
+
+AMD_DAL_RESOURCE_DCN20 = $(addprefix $(AMDDALPATH)/dc/resource/dcn20/,$(RESOURCE_DCN20))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN20)
+
+###############################################################################
+
+RESOURCE_DCN201 = dcn201_resource.o
+
+AMD_DAL_RESOURCE_DCN201 = $(addprefix $(AMDDALPATH)/dc/resource/dcn201/,$(RESOURCE_DCN201))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN201)
+
+###############################################################################
+
+RESOURCE_DCN21 = dcn21_resource.o
+
+AMD_DAL_RESOURCE_DCN21 = $(addprefix $(AMDDALPATH)/dc/resource/dcn21/,$(RESOURCE_DCN21))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN21)
+
+###############################################################################
+
+RESOURCE_DCN30 = dcn30_resource.o
+
+AMD_DAL_RESOURCE_DCN30 = $(addprefix $(AMDDALPATH)/dc/resource/dcn30/,$(RESOURCE_DCN30))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN30)
+
+###############################################################################
+
+RESOURCE_DCN301 = dcn301_resource.o
+
+AMD_DAL_RESOURCE_DCN301 = $(addprefix $(AMDDALPATH)/dc/resource/dcn301/,$(RESOURCE_DCN301))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN301)
+
+###############################################################################
+
+RESOURCE_DCN302 = dcn302_resource.o
+
+AMD_DAL_RESOURCE_DCN302 = $(addprefix $(AMDDALPATH)/dc/resource/dcn302/,$(RESOURCE_DCN302))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN302)
+
+###############################################################################
+
+RESOURCE_DCN303 = dcn303_resource.o
+
+AMD_DAL_RESOURCE_DCN303 = $(addprefix $(AMDDALPATH)/dc/resource/dcn303/,$(RESOURCE_DCN303))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN303)
+
+###############################################################################
+
+RESOURCE_DCN31 = dcn31_resource.o
+
+AMD_DAL_RESOURCE_DCN31 = $(addprefix $(AMDDALPATH)/dc/resource/dcn31/,$(RESOURCE_DCN31))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN31)
+
+###############################################################################
+
+RESOURCE_DCN314 = dcn314_resource.o
+
+AMD_DAL_RESOURCE_DCN314 = $(addprefix $(AMDDALPATH)/dc/resource/dcn314/,$(RESOURCE_DCN314))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN314)
+
+###############################################################################
+
+RESOURCE_DCN315 = dcn315_resource.o
+
+AMD_DAL_RESOURCE_DCN315 = $(addprefix $(AMDDALPATH)/dc/resource/dcn315/,$(RESOURCE_DCN315))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN315)
+
+###############################################################################
+
+RESOURCE_DCN316 = dcn316_resource.o
+
+AMD_DAL_RESOURCE_DCN316 = $(addprefix $(AMDDALPATH)/dc/resource/dcn316/,$(RESOURCE_DCN316))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN316)
+
+###############################################################################
+
+RESOURCE_DCN32 = dcn32_resource.o dcn32_resource_helpers.o
+
+AMD_DAL_RESOURCE_DCN32 = $(addprefix $(AMDDALPATH)/dc/resource/dcn32/,$(RESOURCE_DCN32))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN32)
+
+###############################################################################
+
+RESOURCE_DCN321 = dcn321_resource.o
+
+AMD_DAL_RESOURCE_DCN321 = $(addprefix $(AMDDALPATH)/dc/resource/dcn321/,$(RESOURCE_DCN321))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN321)
+
+###############################################################################
+
+RESOURCE_DCN35 = dcn35_resource.o
+
+AMD_DAL_RESOURCE_DCN35 = $(addprefix $(AMDDALPATH)/dc/resource/dcn35/,$(RESOURCE_DCN35))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN35)
+
+###############################################################################
+
+RESOURCE_DCN351 = dcn351_resource.o
+
+AMD_DAL_RESOURCE_DCN351 = $(addprefix $(AMDDALPATH)/dc/resource/dcn351/,$(RESOURCE_DCN351))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN351)
+
+###############################################################################
+
+RESOURCE_DCN36 = dcn36_resource.o
+
+AMD_DAL_RESOURCE_DCN36 = $(addprefix $(AMDDALPATH)/dc/resource/dcn36/,$(RESOURCE_DCN36))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN36)
+
+###############################################################################
+
+RESOURCE_DCN401 = dcn401_resource.o
+
+AMD_DAL_RESOURCE_DCN401 = $(addprefix $(AMDDALPATH)/dc/resource/dcn401/,$(RESOURCE_DCN401))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN401)
+
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
index 899b25b0bad8..c4b4dc3ad8c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c
@@ -29,8 +29,9 @@
#include "stream_encoder.h"
#include "resource.h"
+#include "clk_mgr.h"
#include "include/irq_service_interface.h"
-#include "../virtual/virtual_stream_encoder.h"
+#include "virtual/virtual_stream_encoder.h"
#include "dce110/dce110_resource.h"
#include "dce110/dce110_timing_generator.h"
#include "irq/dce110/irq_service_dce110.h"
@@ -43,7 +44,7 @@
#include "dce/dce_clock_source.h"
#include "dce/dce_audio.h"
#include "dce/dce_hwseq.h"
-#include "dce100/dce100_hw_sequencer.h"
+#include "dce100/dce100_hwseq.h"
#include "dce/dce_panel_cntl.h"
#include "reg_helper.h"
@@ -623,7 +624,7 @@ static struct link_encoder *dce100_link_encoder_create(
kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc110)
+ if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -836,17 +837,24 @@ static enum dc_status build_mapped_resource(
return DC_OK;
}
-static bool dce100_validate_bandwidth(
+enum dc_status dce100_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int i;
bool at_least_one_pipe = false;
+ struct dc_stream_state *stream = NULL;
+ const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].stream)
+ stream = context->res_ctx.pipe_ctx[i].stream;
+ if (stream) {
at_least_one_pipe = true;
+
+ if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10)
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+ }
}
if (at_least_one_pipe) {
@@ -854,11 +862,20 @@ static bool dce100_validate_bandwidth(
context->bw_ctx.bw.dce.dispclk_khz = 681000;
context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ;
} else {
- context->bw_ctx.bw.dce.dispclk_khz = 0;
+ /* On DCE 6.0 and 6.4 the PLL0 is both the display engine clock and
+ * the DP clock, and shouldn't be turned off. Just select the display
+ * clock value from its low power mode.
+ */
+ if (dc->ctx->dce_version == DCE_VERSION_6_0 ||
+ dc->ctx->dce_version == DCE_VERSION_6_4)
+ context->bw_ctx.bw.dce.dispclk_khz = 352000;
+ else
+ context->bw_ctx.bw.dce.dispclk_khz = 0;
+
context->bw_ctx.bw.dce.yclk_khz = 0;
}
- return true;
+ return DC_OK;
}
static bool dce100_validate_surface_sets(
@@ -881,7 +898,7 @@ static bool dce100_validate_surface_sets(
return true;
}
-static enum dc_status dce100_validate_global(
+enum dc_status dce100_validate_global(
struct dc *dc,
struct dc_state *context)
{
@@ -1069,7 +1086,7 @@ static bool dce100_resource_construct(
pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator;
dc->caps.max_downscale_ratio = 200;
dc->caps.i2c_speed_in_khz = 40;
- dc->caps.i2c_speed_in_khz = 40;
+ dc->caps.i2c_speed_in_khz_hdcp = 40;
dc->caps.max_cursor_size = 128;
dc->caps.min_horizontal_blanking_period = 80;
dc->caps.dual_link_dvi = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
index fecab7c560f5..dd150a4b4610 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h
@@ -41,6 +41,15 @@ struct resource_pool *dce100_create_resource_pool(
enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps);
+enum dc_status dce100_validate_global(
+ struct dc *dc,
+ struct dc_state *context);
+
+enum dc_status dce100_validate_bandwidth(
+ struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode);
+
enum dc_status dce100_add_stream_to_ctx(
struct dc *dc,
struct dc_state *new_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
index 1289b9418877..cccde5a6f3cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c
@@ -46,7 +46,7 @@
#include "dce110/dce110_opp_v.h"
#include "dce/dce_clock_source.h"
#include "dce/dce_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dce/dce_aux.h"
#include "dce/dce_abm.h"
#include "dce/dce_dmcu.h"
@@ -668,7 +668,7 @@ static struct link_encoder *dce110_link_encoder_create(
kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc110)
+ if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -960,10 +960,10 @@ static enum dc_status build_mapped_resource(
return DC_OK;
}
-static bool dce110_validate_bandwidth(
+static enum dc_status dce110_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool result = false;
@@ -1031,7 +1031,7 @@ static bool dce110_validate_bandwidth(
context->bw_ctx.bw.dce.yclk_khz,
context->bw_ctx.bw.dce.blackout_recovery_time_us);
}
- return result;
+ return result ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
static enum dc_status dce110_validate_plane(const struct dc_plane_state *plane_state,
@@ -1163,6 +1163,7 @@ static struct pipe_ctx *dce110_acquire_underlay(
0,
0,
0,
+ 0,
pipe_ctx->stream->signal,
false);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
index aa4531e0800e..aa4531e0800e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
index 2b20180f1a32..869a8e515fc0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c
@@ -44,7 +44,7 @@
#include "dce/dce_clock_source.h"
#include "dce/dce_hwseq.h"
-#include "dce112/dce112_hw_sequencer.h"
+#include "dce112/dce112_hwseq.h"
#include "dce/dce_abm.h"
#include "dce/dce_dmcu.h"
#include "dce/dce_aux.h"
@@ -629,7 +629,7 @@ static struct link_encoder *dce112_link_encoder_create(
kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc110)
+ if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -864,8 +864,6 @@ static struct clock_source *find_matching_pll(
default:
return NULL;
}
-
- return NULL;
}
static enum dc_status build_mapped_resource(
@@ -885,10 +883,10 @@ static enum dc_status build_mapped_resource(
return DC_OK;
}
-bool dce112_validate_bandwidth(
+enum dc_status dce112_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool result = false;
@@ -954,7 +952,7 @@ bool dce112_validate_bandwidth(
context->bw_ctx.bw.dce.yclk_khz,
context->bw_ctx.bw.dce.blackout_recovery_time_us);
}
- return result;
+ return result ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
enum dc_status resource_map_phy_clock_resources(
@@ -1069,7 +1067,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
struct dm_pp_clock_levels clks = {0};
int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ;
- if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm)
+ if (!dc->bw_vbios)
+ return;
+
+ if (dc->bw_vbios->memory_type == bw_def_hbm)
memory_type_multiplier = MEMORY_TYPE_HBM;
/*do system clock TODO PPLIB: after PPLIB implement,
@@ -1110,12 +1111,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
&clks);
dc->bw_vbios->low_yclk = bw_frc_to_fixed(
- clks.clocks_in_khz[0] * memory_type_multiplier, 1000);
+ (int64_t)clks.clocks_in_khz[0] * memory_type_multiplier, 1000);
dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
- clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier,
+ (int64_t)clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier,
1000);
dc->bw_vbios->high_yclk = bw_frc_to_fixed(
- clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier,
+ (int64_t)clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier,
1000);
return;
@@ -1151,12 +1152,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
* YCLK = UMACLK*m_memoryTypeMultiplier
*/
dc->bw_vbios->low_yclk = bw_frc_to_fixed(
- mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
+ (int64_t)mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
- mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
+ (int64_t)mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
1000);
dc->bw_vbios->high_yclk = bw_frc_to_fixed(
- mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
+ (int64_t)mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
1000);
/* Now notify PPLib/SMU about which Watermarks sets they should select
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
index 1f57ebc6f9b4..3efc4c55d2d2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h
@@ -42,10 +42,10 @@ enum dc_status dce112_validate_with_context(
struct dc_state *context,
struct dc_state *old_context);
-bool dce112_validate_bandwidth(
+enum dc_status dce112_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
enum dc_status dce112_add_stream_to_ctx(
struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
index 18c5a86d2d61..540e04ec1e2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c
@@ -35,8 +35,8 @@
#include "dce112/dce112_resource.h"
#include "dce110/dce110_resource.h"
-#include "../virtual/virtual_stream_encoder.h"
-#include "dce120_timing_generator.h"
+#include "virtual/virtual_stream_encoder.h"
+#include "dce120/dce120_timing_generator.h"
#include "irq/dce120/irq_service_dce120.h"
#include "dce/dce_opp.h"
#include "dce/dce_clock_source.h"
@@ -44,8 +44,8 @@
#include "dce/dce_mem_input.h"
#include "dce/dce_panel_cntl.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dce120/dce120_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
+#include "dce120/dce120_hwseq.h"
#include "dce/dce_transform.h"
#include "clk_mgr.h"
#include "dce/dce_audio.h"
@@ -67,6 +67,7 @@
#include "reg_helper.h"
#include "dce100/dce100_resource.h"
+#include "link_service.h"
#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
#define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f
@@ -659,6 +660,12 @@ static void dce120_resource_destruct(struct dce110_resource_pool *pool)
if (pool->base.dmcu != NULL)
dce_dmcu_destroy(&pool->base.dmcu);
+
+ if (pool->base.oem_device != NULL) {
+ struct dc *dc = pool->base.oem_device->ctx->dc;
+
+ dc->link_srv->destroy_ddc_service(&pool->base.oem_device);
+ }
}
static void read_dce_straps(
@@ -706,7 +713,7 @@ static struct link_encoder *dce120_link_encoder_create(
kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc110)
+ if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -983,12 +990,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc)
memory_type_multiplier = MEMORY_TYPE_HBM;
dc->bw_vbios->low_yclk = bw_frc_to_fixed(
- mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
+ (int64_t)mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000);
dc->bw_vbios->mid_yclk = bw_frc_to_fixed(
- mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
+ (int64_t)mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier,
1000);
dc->bw_vbios->high_yclk = bw_frc_to_fixed(
- mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
+ (int64_t)mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier,
1000);
/* Now notify PPLib/SMU about which Watermarks sets they should select
@@ -1054,13 +1061,14 @@ static bool dce120_resource_construct(
struct dc *dc,
struct dce110_resource_pool *pool)
{
+ struct ddc_service_init_data ddc_init_data = {0};
unsigned int i;
int j;
struct dc_context *ctx = dc->ctx;
struct irq_service_init_data irq_init_data;
static const struct resource_create_funcs *res_funcs;
bool is_vg20 = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev);
- uint32_t pipe_fuses;
+ uint32_t pipe_fuses = 0;
ctx->dc_bios->regs = &bios_regs;
@@ -1257,6 +1265,15 @@ static bool dce120_resource_construct(
bw_calcs_data_update_from_pplib(dc);
+ if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
+ ddc_init_data.ctx = dc->ctx;
+ ddc_init_data.link = NULL;
+ ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
+ ddc_init_data.id.enum_id = 0;
+ ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
+ pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data);
+ }
+
return true;
irqs_create_fail:
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
index 3d1f3cf012f4..3d1f3cf012f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
index 8db9f7514466..b75be6ad64f6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
@@ -34,6 +34,7 @@
#include "stream_encoder.h"
#include "resource.h"
+#include "clk_mgr.h"
#include "include/irq_service_interface.h"
#include "irq/dce60/irq_service_dce60.h"
#include "dce110/dce110_timing_generator.h"
@@ -48,7 +49,7 @@
#include "dce/dce_clock_source.h"
#include "dce/dce_audio.h"
#include "dce/dce_hwseq.h"
-#include "dce60/dce60_hw_sequencer.h"
+#include "dce60/dce60_hwseq.h"
#include "dce100/dce100_resource.h"
#include "dce/dce_panel_cntl.h"
@@ -373,7 +374,7 @@ static const struct resource_caps res_cap = {
.num_timing_generator = 6,
.num_audio = 6,
.num_stream_encoder = 6,
- .num_pll = 2,
+ .num_pll = 3,
.num_ddc = 6,
};
@@ -389,7 +390,7 @@ static const struct resource_caps res_cap_64 = {
.num_timing_generator = 2,
.num_audio = 2,
.num_stream_encoder = 2,
- .num_pll = 2,
+ .num_pll = 3,
.num_ddc = 2,
};
@@ -403,13 +404,13 @@ static const struct dc_plane_cap plane_cap = {
},
.max_upscale_factor = {
- .argb8888 = 16000,
+ .argb8888 = 1,
.nv12 = 1,
.fp16 = 1
},
.max_downscale_factor = {
- .argb8888 = 250,
+ .argb8888 = 1,
.nv12 = 1,
.fp16 = 1
}
@@ -717,7 +718,7 @@ static struct link_encoder *dce60_link_encoder_create(
kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc110)
+ if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -863,61 +864,6 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool)
}
}
-static bool dce60_validate_bandwidth(
- struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
-{
- int i;
- bool at_least_one_pipe = false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].stream)
- at_least_one_pipe = true;
- }
-
- if (at_least_one_pipe) {
- /* TODO implement when needed but for now hardcode max value*/
- context->bw_ctx.bw.dce.dispclk_khz = 681000;
- context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ;
- } else {
- context->bw_ctx.bw.dce.dispclk_khz = 0;
- context->bw_ctx.bw.dce.yclk_khz = 0;
- }
-
- return true;
-}
-
-static bool dce60_validate_surface_sets(
- struct dc_state *context)
-{
- int i;
-
- for (i = 0; i < context->stream_count; i++) {
- if (context->stream_status[i].plane_count == 0)
- continue;
-
- if (context->stream_status[i].plane_count > 1)
- return false;
-
- if (context->stream_status[i].plane_states[0]->format
- >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
- return false;
- }
-
- return true;
-}
-
-static enum dc_status dce60_validate_global(
- struct dc *dc,
- struct dc_state *context)
-{
- if (!dce60_validate_surface_sets(context))
- return DC_FAIL_SURFACE_VALIDATE;
-
- return DC_OK;
-}
-
static void dce60_destroy_resource_pool(struct resource_pool **pool)
{
struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -931,10 +877,10 @@ static const struct resource_funcs dce60_res_pool_funcs = {
.destroy = dce60_destroy_resource_pool,
.link_enc_create = dce60_link_encoder_create,
.panel_cntl_create = dce60_panel_cntl_create,
- .validate_bandwidth = dce60_validate_bandwidth,
+ .validate_bandwidth = dce100_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
- .validate_global = dce60_validate_global,
+ .validate_global = dce100_validate_global,
.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
};
@@ -973,21 +919,24 @@ static bool dce60_construct(
if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) {
pool->base.dp_clock_source =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true);
+ /* DCE 6.0 and 6.4: PLL0 can only be used with DP. Don't initialize it here. */
pool->base.clock_sources[0] =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false);
pool->base.clock_sources[1] =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false);
pool->base.clk_src_count = 2;
} else {
pool->base.dp_clock_source =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true);
pool->base.clock_sources[0] =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false);
- pool->base.clk_src_count = 1;
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false);
+ pool->base.clock_sources[1] =
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false);
+ pool->base.clk_src_count = 2;
}
if (pool->base.dp_clock_source == NULL) {
@@ -1365,21 +1314,24 @@ static bool dce64_construct(
if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) {
pool->base.dp_clock_source =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true);
+ /* DCE 6.0 and 6.4: PLL0 can only be used with DP. Don't initialize it here. */
pool->base.clock_sources[0] =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], false);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false);
pool->base.clock_sources[1] =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false);
pool->base.clk_src_count = 2;
} else {
pool->base.dp_clock_source =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], true);
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true);
pool->base.clock_sources[0] =
- dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false);
- pool->base.clk_src_count = 1;
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false);
+ pool->base.clock_sources[1] =
+ dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false);
+ pool->base.clk_src_count = 2;
}
if (pool->base.dp_clock_source == NULL) {
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h
index 5d653a76b0b0..5d653a76b0b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
index 061221394ce0..5b7769745202 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
@@ -32,6 +32,7 @@
#include "stream_encoder.h"
#include "resource.h"
+#include "clk_mgr.h"
#include "include/irq_service_interface.h"
#include "irq/dce80/irq_service_dce80.h"
#include "dce110/dce110_timing_generator.h"
@@ -46,7 +47,7 @@
#include "dce/dce_clock_source.h"
#include "dce/dce_audio.h"
#include "dce/dce_hwseq.h"
-#include "dce80/dce80_hw_sequencer.h"
+#include "dce80/dce80_hwseq.h"
#include "dce100/dce100_resource.h"
#include "dce/dce_panel_cntl.h"
@@ -56,7 +57,6 @@
#include "dce/dce_aux.h"
#include "dce/dce_abm.h"
#include "dce/dce_i2c.h"
-/* TODO remove this include */
#ifndef mmMC_HUB_RDREQ_DMIF_LIMIT
#include "gmc/gmc_7_1_d.h"
@@ -724,7 +724,7 @@ static struct link_encoder *dce80_link_encoder_create(
kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc110)
+ if (!enc110 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -870,61 +870,6 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool)
}
}
-static bool dce80_validate_bandwidth(
- struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
-{
- int i;
- bool at_least_one_pipe = false;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].stream)
- at_least_one_pipe = true;
- }
-
- if (at_least_one_pipe) {
- /* TODO implement when needed but for now hardcode max value*/
- context->bw_ctx.bw.dce.dispclk_khz = 681000;
- context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ;
- } else {
- context->bw_ctx.bw.dce.dispclk_khz = 0;
- context->bw_ctx.bw.dce.yclk_khz = 0;
- }
-
- return true;
-}
-
-static bool dce80_validate_surface_sets(
- struct dc_state *context)
-{
- int i;
-
- for (i = 0; i < context->stream_count; i++) {
- if (context->stream_status[i].plane_count == 0)
- continue;
-
- if (context->stream_status[i].plane_count > 1)
- return false;
-
- if (context->stream_status[i].plane_states[0]->format
- >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN)
- return false;
- }
-
- return true;
-}
-
-static enum dc_status dce80_validate_global(
- struct dc *dc,
- struct dc_state *context)
-{
- if (!dce80_validate_surface_sets(context))
- return DC_FAIL_SURFACE_VALIDATE;
-
- return DC_OK;
-}
-
static void dce80_destroy_resource_pool(struct resource_pool **pool)
{
struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -938,10 +883,10 @@ static const struct resource_funcs dce80_res_pool_funcs = {
.destroy = dce80_destroy_resource_pool,
.link_enc_create = dce80_link_encoder_create,
.panel_cntl_create = dce80_panel_cntl_create,
- .validate_bandwidth = dce80_validate_bandwidth,
+ .validate_bandwidth = dce100_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
- .validate_global = dce80_validate_global,
+ .validate_global = dce100_validate_global,
.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
};
@@ -1539,6 +1484,7 @@ struct resource_pool *dce83_create_resource_pool(
if (dce83_construct(num_virtual_links, dc, pool))
return &pool->base;
+ kfree(pool);
BREAK_TO_DEBUGGER();
return NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
index eff31ab83a39..eff31ab83a39 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
index 9f9145742f14..652c05c35494 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -23,32 +23,36 @@
*
*/
+#include "core_status.h"
#include "dm_services.h"
#include "dc.h"
-#include "dcn10_init.h"
+#include "dcn10/dcn10_init.h"
#include "resource.h"
#include "include/irq_service_interface.h"
-#include "dcn10_resource.h"
-#include "dcn10_ipp.h"
-#include "dcn10_mpc.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn10/dcn10_ipp.h"
+#include "dcn10/dcn10_mpc.h"
+
+#include "dcn10/dcn10_dwb.h"
+
#include "irq/dcn10/irq_service_dcn10.h"
-#include "dcn10_dpp.h"
-#include "dcn10_optc.h"
-#include "dcn10_hw_sequencer.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn10_opp.h"
-#include "dcn10_link_encoder.h"
-#include "dcn10_stream_encoder.h"
+#include "dcn10/dcn10_dpp.h"
+#include "dcn10/dcn10_optc.h"
+#include "dcn10/dcn10_hwseq.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn10/dcn10_opp.h"
+#include "dcn10/dcn10_link_encoder.h"
+#include "dcn10/dcn10_stream_encoder.h"
#include "dce/dce_clock_source.h"
#include "dce/dce_audio.h"
#include "dce/dce_hwseq.h"
#include "virtual/virtual_stream_encoder.h"
#include "dce110/dce110_resource.h"
#include "dce112/dce112_resource.h"
-#include "dcn10_hubp.h"
-#include "dcn10_hubbub.h"
+#include "dcn10/dcn10_hubp.h"
+#include "dcn10/dcn10_hubbub.h"
#include "dce/dce_panel_cntl.h"
#include "soc15_hw_ip.h"
@@ -510,7 +514,7 @@ static const struct dc_plane_cap plane_cap = {
.argb8888 = true,
.nv12 = true,
.fp16 = true,
- .p010 = true
+ .p010 = false
},
.max_upscale_factor = {
@@ -530,7 +534,6 @@ static const struct dc_debug_options debug_defaults_drv = {
.sanity_checks = true,
.disable_dmcu = false,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
/* raven smu dones't allow 0 disp clk,
@@ -554,17 +557,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.max_downscale_src_width = 3840,
.underflow_assert_delay_us = 0xFFFFFFFF,
.enable_legacy_fast_update = true,
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = false,
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_stutter = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .underflow_assert_delay_us = 0xFFFFFFFF,
+ .using_dml2 = false,
};
static void dcn10_dpp_destroy(struct dpp **dpp)
@@ -746,7 +739,7 @@ static struct link_encoder *dcn10_link_encoder_create(
kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc10)
+ if (!enc10 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -1133,18 +1126,18 @@ static void dcn10_destroy_resource_pool(struct resource_pool **pool)
*pool = NULL;
}
-static bool dcn10_validate_bandwidth(
+static enum dc_status dcn10_validate_bandwidth(
struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool voltage_supported;
DC_FP_START();
- voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate);
+ voltage_supported = dcn_validate_bandwidth(dc, context, validate_mode);
DC_FP_END();
- return voltage_supported;
+ return voltage_supported ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps)
@@ -1246,10 +1239,17 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
/* Store first available for MST second display
* in daisy chain use case
*/
- j = i;
+
+ if (pool->stream_enc[i]->id != ENGINE_ID_VIRTUAL)
+ j = i;
+
if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id ==
link->link_enc->preferred_engine)
return pool->stream_enc[i];
+
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && pool->stream_enc[i]->id ==
+ link->dpia_preferred_eng_id)
+ return pool->stream_enc[i];
}
}
@@ -1263,6 +1263,11 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
return NULL;
}
+unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx)
+{
+ return pipe_ctx->pipe_dlg_param.vstartup_start;
+}
+
static const struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn10_get_dcc_compression_cap
};
@@ -1277,7 +1282,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = {
.validate_global = dcn10_validate_global,
.add_stream_to_ctx = dcn10_add_stream_to_ctx,
.patch_unknown_plane_state = dcn10_patch_unknown_plane_state,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1392,8 +1398,6 @@ static bool dcn10_resource_construct(
if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
dc->debug = debug_defaults_drv;
- else
- dc->debug = debug_defaults_diags;
/*************************************************
* Create resources *
@@ -1624,6 +1628,7 @@ static bool dcn10_resource_construct(
/* valid pipe num */
pool->base.pipe_count = j;
pool->base.timing_generator_count = j;
+ pool->base.mpcc_count = j;
/* within dml lib, it is hard code to 4. If ASIC pipe is fused,
* the value may be changed
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
index bf8e33cd8147..7bc1be53e800 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
@@ -51,6 +51,7 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
const struct resource_pool *pool,
struct dc_stream_state *stream);
+unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx);
#endif /* __DC_RESOURCE_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index d587f807dfd7..84b38d2d6967 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -24,12 +24,10 @@
*
*/
-#include <linux/slab.h>
-
#include "dm_services.h"
#include "dc.h"
-#include "dcn20_init.h"
+#include "dcn20/dcn20_init.h"
#include "resource.h"
#include "include/irq_service_interface.h"
@@ -39,31 +37,34 @@
#include "dcn10/dcn10_hubp.h"
#include "dcn10/dcn10_ipp.h"
-#include "dcn20_hubbub.h"
-#include "dcn20_mpc.h"
-#include "dcn20_hubp.h"
+#include "dcn20/dcn20_hubbub.h"
+#include "dcn20/dcn20_mpc.h"
+#include "dcn20/dcn20_hubp.h"
#include "irq/dcn20/irq_service_dcn20.h"
-#include "dcn20_dpp.h"
-#include "dcn20_optc.h"
-#include "dcn20_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dcn20/dcn20_dpp.h"
+#include "dcn20/dcn20_optc.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn10/dcn10_resource.h"
-#include "dcn20_opp.h"
+#include "dcn20/dcn20_opp.h"
-#include "dcn20_dsc.h"
+#include "dcn20/dcn20_dsc.h"
-#include "dcn20_link_encoder.h"
-#include "dcn20_stream_encoder.h"
+#include "dcn20/dcn20_link_encoder.h"
+#include "dcn20/dcn20_stream_encoder.h"
#include "dce/dce_clock_source.h"
#include "dce/dce_audio.h"
#include "dce/dce_hwseq.h"
#include "virtual/virtual_stream_encoder.h"
#include "dce110/dce110_resource.h"
#include "dml/display_mode_vba.h"
-#include "dcn20_dccg.h"
-#include "dcn20_vmid.h"
+#include "dcn20/dcn20_dccg.h"
+#include "dcn20/dcn20_vmid.h"
#include "dce/dce_panel_cntl.h"
+#include "dcn20/dcn20_dwb.h"
+#include "dcn20/dcn20_mmhubbub.h"
+
#include "navi10_ip_offset.h"
#include "dcn/dcn_2_0_0_offset.h"
@@ -73,9 +74,6 @@
#include "nbio/nbio_2_3_offset.h"
-#include "dcn20/dcn20_dwb.h"
-#include "dcn20/dcn20_mmhubbub.h"
-
#include "mmhub/mmhub_2_0_0_offset.h"
#include "mmhub/mmhub_2_0_0_sh_mask.h"
@@ -85,11 +83,10 @@
#include "dce/dce_aux.h"
#include "dce/dce_i2c.h"
#include "vm_helper.h"
-#include "link_enc_cfg.h"
-#include "amdgpu_socbb.h"
+#include "link_enc_cfg.h"
+#include "link_service.h"
-#include "link.h"
#define DC_LOGGER_INIT(logger)
#ifndef mmDP0_DP_DPHY_INTERNAL_CTRL
@@ -709,7 +706,6 @@ static const struct resource_caps res_cap_nv14 = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = false,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
.pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
@@ -723,6 +719,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.sanity_checks = false,
.underflow_assert_delay_us = 0xFFFFFFFF,
.enable_legacy_fast_update = true,
+ .using_dml2 = false,
};
void dcn20_dpp_destroy(struct dpp **dpp)
@@ -922,7 +919,7 @@ struct link_encoder *dcn20_link_encoder_create(
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -1223,7 +1220,7 @@ static void get_pixel_clock_parameters(
struct pipe_ctx *odm_pipe;
int opp_cnt = 1;
struct dc_link *link = stream->link;
- struct link_encoder *link_enc = NULL;
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
struct dc *dc = pipe_ctx->stream->ctx->dc;
struct dce_hwseq *hws = dc->hwseq;
@@ -1232,7 +1229,8 @@ static void get_pixel_clock_parameters(
pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz;
- link_enc = link_enc_cfg_get_link_enc(link);
+ if (!dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
if (link_enc)
pixel_clk_params->encoder_object_id = link_enc->id;
@@ -1253,7 +1251,7 @@ static void get_pixel_clock_parameters(
if (opp_cnt == 4)
pixel_clk_params->requested_pix_clk_100hz /= 4;
- else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2)
+ else if (pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing) || opp_cnt == 2)
pixel_clk_params->requested_pix_clk_100hz /= 2;
else if (hws->funcs.is_dp_dig_pixel_rate_div_policy) {
if (hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx))
@@ -1263,6 +1261,15 @@ static void get_pixel_clock_parameters(
if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
pixel_clk_params->requested_pix_clk_100hz *= 2;
+ if ((pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container &&
+ pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&pipe_ctx->stream->timing)) ||
+ (hws->funcs.is_dp_dig_pixel_rate_div_policy &&
+ hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)) ||
+ opp_cnt > 1) {
+ pixel_clk_params->dio_se_pix_per_cycle = 2;
+ } else {
+ pixel_clk_params->dio_se_pix_per_cycle = 1;
+ }
}
static void build_clamping_params(struct dc_stream_state *stream)
@@ -1272,15 +1279,24 @@ static void build_clamping_params(struct dc_stream_state *stream)
stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
}
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx)
{
-
get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
- pipe_ctx->clock_source,
- &pipe_ctx->stream_res.pix_clk_params,
- &pipe_ctx->pll_settings);
+ pipe_ctx->clock_source,
+ &pipe_ctx->stream_res.pix_clk_params,
+ &pipe_ctx->pll_settings);
+}
+
+static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
+{
+ struct resource_pool *pool = pipe_ctx->stream->ctx->dc->res_pool;
+
+ if (pool->funcs->build_pipe_pix_clk_params) {
+ pool->funcs->build_pipe_pix_clk_params(pipe_ctx);
+ } else {
+ dcn20_build_pipe_pix_clk_params(pipe_ctx);
+ }
pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
@@ -1494,41 +1510,12 @@ bool dcn20_split_stream_for_odm(
next_odm_pipe->prev_odm_pipe = prev_odm_pipe;
if (prev_odm_pipe->plane_state) {
- struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data;
- int new_width;
-
- /* HACTIVE halved for odm combine */
- sd->h_active /= 2;
- /* Calculate new vp and recout for left pipe */
- /* Need at least 16 pixels width per side */
- if (sd->recout.x + 16 >= sd->h_active)
- return false;
- new_width = sd->h_active - sd->recout.x;
- sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->recout.width - new_width));
- sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->recout.width - new_width));
- sd->recout.width = new_width;
-
- /* Calculate new vp and recout for right pipe */
- sd = &next_odm_pipe->plane_res.scl_data;
- /* HACTIVE halved for odm combine */
- sd->h_active /= 2;
- /* Need at least 16 pixels width per side */
- if (new_width <= 16)
- return false;
- new_width = sd->recout.width + sd->recout.x - sd->h_active;
- sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->recout.width - new_width));
- sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->recout.width - new_width));
- sd->recout.width = new_width;
- sd->viewport.x += dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->h_active - sd->recout.x));
- sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->h_active - sd->recout.x));
- sd->recout.x = 0;
+ if (!resource_build_scaling_params(prev_odm_pipe) ||
+ !resource_build_scaling_params(next_odm_pipe)) {
+ return false;
+ }
}
+
if (!next_odm_pipe->top_pipe)
next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
else
@@ -1596,7 +1583,7 @@ unsigned int dcn20_calc_max_scaled_time(
if (time_per_byte == 0)
time_per_byte = 1;
- small_free_entry = (total_y_free_entry > total_c_free_entry) ? total_c_free_entry : total_y_free_entry;
+ small_free_entry = total_c_free_entry;
max_free_entry = (mode == PACKED_444) ? total_y_free_entry + total_c_free_entry : small_free_entry;
buf_lh_capability = max_free_entry*time_per_byte*32/16; /* there is 4bit fraction */
max_scaled_time = buf_lh_capability - urgent_watermark;
@@ -1650,8 +1637,6 @@ void dcn20_set_mcif_arb_params(
if (dwb_pipe >= MAX_DWB_PIPES)
return;
}
- if (dwb_pipe >= MAX_DWB_PIPES)
- return;
}
}
@@ -1948,7 +1933,7 @@ int dcn20_validate_apply_pipe_split_flags(
v->ODMCombineEnablePerState[vlevel][pipe_plane];
if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) {
- if (resource_get_num_mpc_splits(pipe) == 1) {
+ if (resource_get_mpc_slice_count(pipe) == 2) {
/*If need split for mpc but 2 way split already*/
if (split[i] == 4)
split[i] = 2; /* 2 -> 4 MPC */
@@ -1956,7 +1941,7 @@ int dcn20_validate_apply_pipe_split_flags(
split[i] = 0; /* 2 -> 2 MPC */
else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
merge[i] = true; /* 2 -> 1 MPC */
- } else if (resource_get_num_mpc_splits(pipe) == 3) {
+ } else if (resource_get_mpc_slice_count(pipe) == 4) {
/*If need split for mpc but 4 way split already*/
if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe)
|| !pipe->bottom_pipe)) {
@@ -1965,7 +1950,7 @@ int dcn20_validate_apply_pipe_split_flags(
pipe->top_pipe->plane_state == pipe->plane_state)
merge[i] = true; /* 4 -> 1 MPC */
split[i] = 0;
- } else if (resource_get_num_odm_splits(pipe)) {
+ } else if (resource_get_odm_slice_count(pipe) > 1) {
/* ODM -> MPC transition */
if (pipe->prev_odm_pipe) {
split[i] = 0;
@@ -1973,7 +1958,7 @@ int dcn20_validate_apply_pipe_split_flags(
}
}
} else {
- if (resource_get_num_odm_splits(pipe) == 1) {
+ if (resource_get_odm_slice_count(pipe) == 2) {
/*If need split for odm but 2 way split already*/
if (split[i] == 4)
split[i] = 2; /* 2 -> 4 ODM */
@@ -1983,7 +1968,7 @@ int dcn20_validate_apply_pipe_split_flags(
ASSERT(0); /* NOT expected yet */
merge[i] = true; /* exit ODM */
}
- } else if (resource_get_num_odm_splits(pipe) == 3) {
+ } else if (resource_get_odm_slice_count(pipe) == 4) {
/*If need split for odm but 4 way split already*/
if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe)
|| !pipe->next_odm_pipe)) {
@@ -1993,7 +1978,7 @@ int dcn20_validate_apply_pipe_split_flags(
merge[i] = true; /* exit ODM */
}
split[i] = 0;
- } else if (resource_get_num_mpc_splits(pipe)) {
+ } else if (resource_get_mpc_slice_count(pipe) > 1) {
/* MPC -> ODM transition */
ASSERT(0); /* NOT expected yet */
if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
@@ -2022,10 +2007,11 @@ bool dcn20_fast_validate_bw(
int *pipe_cnt_out,
int *pipe_split_from,
int *vlevel_out,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool out = false;
int split[MAX_PIPES] = { 0 };
+ bool merge[MAX_PIPES] = { false };
int pipe_cnt, i, pipe_idx, vlevel;
ASSERT(pipes);
@@ -2035,7 +2021,7 @@ bool dcn20_fast_validate_bw(
dcn20_merge_pipes_for_validate(dc, context);
DC_FP_START();
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
DC_FP_END();
*pipe_cnt_out = pipe_cnt;
@@ -2050,7 +2036,7 @@ bool dcn20_fast_validate_bw(
if (vlevel > context->bw_ctx.dml.soc.num_states)
goto validate_fail;
- vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL);
+ vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
/*initialize pipe_just_split_from to invalid idx*/
for (i = 0; i < MAX_PIPES; i++)
@@ -2118,6 +2104,7 @@ bool dcn20_fast_validate_bw(
ASSERT(0);
}
}
+
/* Actual dsc count per stream dsc validation*/
if (!dcn20_validate_dsc(dc, context)) {
context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] =
@@ -2137,14 +2124,22 @@ validate_out:
return out;
}
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
- bool fast_validate)
+enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode)
{
bool voltage_supported;
+ display_e2e_pipe_params_st *pipes;
+
+ pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+ if (!pipes)
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+
DC_FP_START();
- voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate);
+ voltage_supported = dcn20_validate_bandwidth_fp(dc, context, validate_mode, pipes);
DC_FP_END();
- return voltage_supported;
+
+ kfree(pipes);
+ return voltage_supported ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
struct pipe_ctx *dcn20_acquire_free_pipe_for_layer(
@@ -2178,10 +2173,11 @@ bool dcn20_get_dcc_compression_cap(const struct dc *dc,
const struct dc_dcc_surface_param *input,
struct dc_surface_dcc_cap *output)
{
- return dc->res_pool->hubbub->funcs->get_dcc_compression_cap(
- dc->res_pool->hubbub,
- input,
- output);
+ if (dc->res_pool->hubbub->funcs->get_dcc_compression_cap)
+ return dc->res_pool->hubbub->funcs->get_dcc_compression_cap(
+ dc->res_pool->hubbub, input, output);
+
+ return false;
}
static void dcn20_destroy_resource_pool(struct resource_pool **pool)
@@ -2211,12 +2207,22 @@ enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_stat
return DC_OK;
}
+void dcn20_release_pipe(struct dc_state *context,
+ struct pipe_ctx *pipe,
+ const struct resource_pool *pool)
+{
+ if (resource_is_pipe_type(pipe, OPP_HEAD) && pipe->stream_res.dsc)
+ dcn20_release_dsc(&context->res_ctx, pool, &pipe->stream_res.dsc);
+ memset(pipe, 0, sizeof(*pipe));
+}
+
static const struct resource_funcs dcn20_res_pool_funcs = {
.destroy = dcn20_destroy_resource_pool,
.link_enc_create = dcn20_link_encoder_create,
.panel_cntl_create = dcn20_panel_cntl_create,
.validate_bandwidth = dcn20_validate_bandwidth,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn20_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -2224,7 +2230,8 @@ static const struct resource_funcs dcn20_res_pool_funcs = {
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.set_mcif_arb_params = dcn20_set_mcif_arb_params,
.populate_dml_pipes = dcn20_populate_dml_pipes_from_context,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
@@ -2315,7 +2322,6 @@ static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params(
uint32_t hw_internal_rev)
{
- /* NV14 */
if (ASICREV_IS_NAVI14_M(hw_internal_rev))
return &dcn2_0_nv14_ip;
@@ -2428,6 +2434,7 @@ static bool dcn20_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
@@ -2596,7 +2603,7 @@ static bool dcn20_resource_construct(
ranges.writer_wm_sets[0].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX;
/* Notify PP Lib/SMU which Watermarks to use for which clock ranges */
- if (pool->base.pp_smu->nv_funcs.set_wm_ranges)
+ if (pool->base.pp_smu && pool->base.pp_smu->nv_funcs.set_wm_ranges)
pool->base.pp_smu->nv_funcs.set_wm_ranges(&pool->base.pp_smu->nv_funcs.pp_smu, &ranges);
}
@@ -2729,6 +2736,8 @@ static bool dcn20_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 2;
+
dc->cap_funcs = cap_funcs;
if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
index 6d1a8924e57b..e997d35a8b86 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h
@@ -63,7 +63,9 @@ struct pipe_ctx *dcn20_acquire_free_pipe_for_layer(
struct dc_state *new_ctx,
const struct resource_pool *pool,
const struct pipe_ctx *opp_head_pipe);
-
+void dcn20_release_pipe(struct dc_state *context,
+ struct pipe_ctx *pipe,
+ const struct resource_pool *pool);
struct stream_encoder *dcn20_stream_encoder_create(
enum engine_id eng_id,
struct dc_context *ctx);
@@ -117,7 +119,7 @@ void dcn20_set_mcif_arb_params(
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int pipe_cnt);
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate);
+enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, enum dc_validate_mode validate_mode);
void dcn20_merge_pipes_for_validate(
struct dc *dc,
struct dc_state *context);
@@ -156,13 +158,14 @@ bool dcn20_fast_validate_bw(
int *pipe_cnt_out,
int *pipe_split_from,
int *vlevel_out,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream);
enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream);
enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream);
enum dc_status dcn20_patch_unknown_plane_state(struct dc_plane_state *plane_state);
+void dcn20_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx);
#endif /* __DC_RESOURCE_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
index 2dc4d2c1410b..e4a1338d21e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
@@ -26,7 +26,7 @@
#include "dm_services.h"
#include "dc.h"
-#include "dcn201_init.h"
+#include "dcn201/dcn201_init.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "resource.h"
#include "include/irq_service_interface.h"
@@ -36,16 +36,16 @@
#include "dcn10/dcn10_hubp.h"
#include "dcn10/dcn10_ipp.h"
-#include "dcn201_mpc.h"
-#include "dcn201_hubp.h"
+#include "dcn201/dcn201_mpc.h"
+#include "dcn201/dcn201_hubp.h"
#include "irq/dcn201/irq_service_dcn201.h"
#include "dcn201/dcn201_dpp.h"
#include "dcn201/dcn201_hubbub.h"
-#include "dcn201_dccg.h"
-#include "dcn201_optc.h"
-#include "dcn201_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
-#include "dcn201_opp.h"
+#include "dcn201/dcn201_dccg.h"
+#include "dcn201/dcn201_optc.h"
+#include "dcn201/dcn201_hwseq.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn201/dcn201_opp.h"
#include "dcn201/dcn201_link_encoder.h"
#include "dcn20/dcn20_stream_encoder.h"
#include "dce/dce_clock_source.h"
@@ -55,13 +55,12 @@
#include "dce110/dce110_resource.h"
#include "dce/dce_aux.h"
#include "dce/dce_i2c.h"
-#include "dcn201_hubbub.h"
#include "dcn10/dcn10_resource.h"
#include "cyan_skillfish_ip_offset.h"
-#include "dcn/dcn_2_0_3_offset.h"
-#include "dcn/dcn_2_0_3_sh_mask.h"
+#include "dcn/dcn_2_0_1_offset.h"
+#include "dcn/dcn_2_0_1_sh_mask.h"
#include "dpcs/dpcs_2_0_3_offset.h"
#include "dpcs/dpcs_2_0_3_sh_mask.h"
@@ -182,6 +181,7 @@ static struct _vcs_dpi_soc_bounding_box_st dcn201_soc = {
.socclk_mhz = 1254.0,
.dram_speed_mts = 14000.0,
},
+ /* state4 is not an actual state, just defines unsupported for dml*/
{
.state = 4,
.dscclk_mhz = 400.0,
@@ -566,6 +566,8 @@ static const struct resource_caps res_cap_dnc201 = {
.num_audio = 2,
.num_stream_encoder = 2,
.num_pll = 2,
+ .num_dwb = 0,
+ .num_dsc = 0,
.num_ddc = 2,
};
@@ -598,7 +600,6 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
.pipe_split_policy = MPC_SPLIT_DYNAMIC,
@@ -612,8 +613,9 @@ static const struct dc_debug_options debug_defaults_drv = {
.scl_reset_length10 = true,
.sanity_checks = false,
.underflow_assert_delay_us = 0xFFFFFFFF,
- .enable_tri_buf = false,
+ .enable_tri_buf = true,
.enable_legacy_fast_update = true,
+ .using_dml2 = false,
};
static void dcn201_dpp_destroy(struct dpp **dpp)
@@ -792,11 +794,13 @@ static struct link_encoder *dcn201_link_encoder_create(
{
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_ATOMIC);
- struct dcn10_link_encoder *enc10 = &enc20->enc10;
+ struct dcn10_link_encoder *enc10;
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
+ enc10 = &enc20->enc10;
+
dcn201_link_encoder_construct(enc20,
enc_init_data,
&link_enc_feature,
@@ -1002,8 +1006,10 @@ static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer(
struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream);
struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe);
- if (!head_pipe)
+ if (!head_pipe) {
ASSERT(0);
+ return NULL;
+ }
if (!idle_pipe)
return NULL;
@@ -1069,10 +1075,12 @@ static struct resource_funcs dcn201_res_pool_funcs = {
.add_dsc_to_stream_resource = NULL,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn201_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.set_mcif_arb_params = dcn20_set_mcif_arb_params,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn201_resource_construct(
@@ -1277,6 +1285,8 @@ static bool dcn201_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 2;
+
dc->cap_funcs = cap_funcs;
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
index e0467d17d4ae..e0467d17d4ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
index d1a25fe6c44f..918742a42ded 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -29,7 +29,7 @@
#include "dm_services.h"
#include "dc.h"
-#include "dcn21_init.h"
+#include "dcn21/dcn21_init.h"
#include "resource.h"
#include "include/irq_service_interface.h"
@@ -44,12 +44,12 @@
#include "dcn20/dcn20_hubbub.h"
#include "dcn20/dcn20_mpc.h"
#include "dcn20/dcn20_hubp.h"
-#include "dcn21_hubp.h"
+#include "dcn21/dcn21_hubp.h"
#include "irq/dcn21/irq_service_dcn21.h"
#include "dcn20/dcn20_dpp.h"
#include "dcn20/dcn20_optc.h"
#include "dcn21/dcn21_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn20/dcn20_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn21/dcn21_link_encoder.h"
@@ -61,7 +61,7 @@
#include "dml/display_mode_vba.h"
#include "dcn20/dcn20_dccg.h"
#include "dcn21/dcn21_dccg.h"
-#include "dcn21_hubbub.h"
+#include "dcn21/dcn21_hubbub.h"
#include "dcn10/dcn10_resource.h"
#include "dce/dce_panel_cntl.h"
@@ -581,32 +581,6 @@ static const struct resource_caps res_cap_rn = {
.num_dsc = 3,
};
-#ifdef DIAGS_BUILD
-static const struct resource_caps res_cap_rn_FPGA_4pipe = {
- .num_timing_generator = 4,
- .num_opp = 4,
- .num_video_plane = 4,
- .num_audio = 7,
- .num_stream_encoder = 4,
- .num_pll = 4,
- .num_dwb = 1,
- .num_ddc = 4,
- .num_dsc = 0,
-};
-
-static const struct resource_caps res_cap_rn_FPGA_2pipe_dsc = {
- .num_timing_generator = 2,
- .num_opp = 2,
- .num_video_plane = 2,
- .num_audio = 7,
- .num_stream_encoder = 2,
- .num_pll = 4,
- .num_dwb = 1,
- .num_ddc = 4,
- .num_dsc = 2,
-};
-#endif
-
static const struct dc_plane_cap plane_cap = {
.type = DC_PLANE_TYPE_DCN_UNIVERSAL,
.per_pixel_alpha = true,
@@ -636,7 +610,6 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = false,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
.min_disp_clk_khz = 100000,
@@ -654,6 +627,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.dmub_command_table = true,
.use_max_lb = true,
.enable_legacy_fast_update = true,
+ .using_dml2 = false,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -712,9 +686,8 @@ static void dcn21_resource_destruct(struct dcn21_resource_pool *pool)
pool->base.hubps[i] = NULL;
}
- if (pool->base.irqs != NULL) {
+ if (pool->base.irqs != NULL)
dal_irq_service_destroy(&pool->base.irqs);
- }
}
for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
@@ -796,10 +769,11 @@ bool dcn21_fast_validate_bw(struct dc *dc,
int *pipe_cnt_out,
int *pipe_split_from,
int *vlevel_out,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool out = false;
int split[MAX_PIPES] = { 0 };
+ bool merge[MAX_PIPES] = { false };
int pipe_cnt, i, pipe_idx, vlevel;
ASSERT(pipes);
@@ -809,7 +783,7 @@ bool dcn21_fast_validate_bw(struct dc *dc,
dcn20_merge_pipes_for_validate(dc, context);
DC_FP_START();
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
DC_FP_END();
*pipe_cnt_out = pipe_cnt;
@@ -842,7 +816,7 @@ bool dcn21_fast_validate_bw(struct dc *dc,
goto validate_fail;
}
- vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL);
+ vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -949,14 +923,22 @@ validate_out:
* with DC_FP_START()/DC_FP_END(). Use the same approach as for
* dcn20_validate_bandwidth in dcn20_resource.c.
*/
-static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context,
- bool fast_validate)
+static enum dc_status dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode)
{
bool voltage_supported;
+ display_e2e_pipe_params_st *pipes;
+
+ pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+ if (!pipes)
+ return DC_FAIL_BANDWIDTH_VALIDATE;
+
DC_FP_START();
- voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate);
+ voltage_supported = dcn21_validate_bandwidth_fp(dc, context, validate_mode, pipes);
DC_FP_END();
- return voltage_supported;
+
+ kfree(pipes);
+ return voltage_supported ? DC_OK : DC_NOT_SUPPORTED;
}
static void dcn21_destroy_resource_pool(struct resource_pool **pool)
@@ -1315,7 +1297,7 @@ static struct link_encoder *dcn21_link_encoder_create(
kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL);
int link_regs_id;
- if (!enc21)
+ if (!enc21 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
link_regs_id =
@@ -1389,12 +1371,14 @@ static const struct resource_funcs dcn21_res_pool_funcs = {
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context,
.patch_unknown_plane_state = dcn21_patch_unknown_plane_state,
.set_mcif_arb_params = dcn20_set_mcif_arb_params,
.find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
.update_bw_bounding_box = dcn21_update_bw_bounding_box,
.get_panel_config_defaults = dcn21_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn21_resource_construct(
@@ -1406,16 +1390,11 @@ static bool dcn21_resource_construct(
struct dc_context *ctx = dc->ctx;
struct irq_service_init_data init_data;
uint32_t pipe_fuses = read_pipe_fuses(ctx);
- uint32_t num_pipes;
+ uint32_t num_pipes = 0;
ctx->dc_bios->regs = &bios_regs;
pool->base.res_cap = &res_cap_rn;
-#ifdef DIAGS_BUILD
- if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- //pool->base.res_cap = &res_cap_nv10_FPGA_2pipe_dsc;
- pool->base.res_cap = &res_cap_rn_FPGA_4pipe;
-#endif
pool->base.funcs = &dcn21_res_pool_funcs;
@@ -1434,9 +1413,9 @@ static bool dcn21_resource_construct(
dc->caps.min_horizontal_blanking_period = 80;
dc->caps.dmdata_alloc_size = 2048;
- dc->caps.max_slave_planes = 1;
- dc->caps.max_slave_yuv_planes = 1;
- dc->caps.max_slave_rgb_planes = 1;
+ dc->caps.max_slave_planes = 3;
+ dc->caps.max_slave_yuv_planes = 3;
+ dc->caps.max_slave_rgb_planes = 3;
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.extended_aux_timeout_support = true;
@@ -1705,6 +1684,8 @@ static bool dcn21_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 2;
+
dc->cap_funcs = cap_funcs;
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
index f7ecc002c2f7..a017fd9854d1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h
@@ -51,6 +51,6 @@ bool dcn21_fast_validate_bw(
int *pipe_cnt_out,
int *pipe_split_from,
int *vlevel_out,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
#endif /* _DCN21_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index 88c0b24a3249..ff63f59ff928 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -27,7 +27,7 @@
#include "dm_services.h"
#include "dc.h"
-#include "dcn30_init.h"
+#include "dcn30/dcn30_init.h"
#include "resource.h"
#include "include/irq_service_interface.h"
@@ -44,7 +44,7 @@
#include "dcn30/dcn30_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -60,7 +60,7 @@
#include "dml/display_mode_vba.h"
#include "dcn30/dcn30_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dce/dce_panel_cntl.h"
#include "dcn30/dcn30_dwb.h"
@@ -91,6 +91,8 @@
#include "amdgpu_socbb.h"
#include "dc_dmub_srv.h"
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
enum dcn30_clk_src_array_id {
@@ -709,7 +711,6 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true, //No DMCU on DCN30
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
.pipe_split_policy = MPC_SPLIT_DYNAMIC,
@@ -727,6 +728,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.use_max_lb = true,
.exit_idle_opt_for_cursor_updates = true,
.enable_legacy_fast_update = false,
+ .using_dml2 = false,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -924,7 +926,7 @@ static struct link_encoder *dcn30_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn30_link_encoder_construct(enc20,
@@ -1317,13 +1319,13 @@ static struct clock_source *dcn30_clock_source_create(
int dcn30_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int i, pipe_cnt;
struct resource_context *res_ctx = &context->res_ctx;
DC_FP_START();
- dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+ dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
DC_FP_END();
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1416,10 +1418,7 @@ void dcn30_set_mcif_arb_params(
if (dwb_pipe >= MAX_DWB_PIPES)
return;
}
- if (dwb_pipe >= MAX_DWB_PIPES)
- return;
}
-
}
static struct dc_cap_funcs cap_funcs = {
@@ -1628,7 +1627,7 @@ noinline bool dcn30_internal_validate_bw(
display_e2e_pipe_params_st *pipes,
int *pipe_cnt_out,
int *vlevel_out,
- bool fast_validate,
+ enum dc_validate_mode validate_mode,
bool allow_self_refresh_only)
{
bool out = false;
@@ -1636,7 +1635,7 @@ noinline bool dcn30_internal_validate_bw(
int split[MAX_PIPES] = { 0 };
bool merge[MAX_PIPES] = { false };
bool newly_split[MAX_PIPES] = { false };
- int pipe_cnt, i, pipe_idx, vlevel;
+ int pipe_cnt, i, pipe_idx, vlevel = 0;
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
ASSERT(pipes);
@@ -1647,7 +1646,7 @@ noinline bool dcn30_internal_validate_bw(
context->bw_ctx.dml.vba.VoltageLevel = 0;
context->bw_ctx.dml.vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
if (!pipe_cnt) {
out = true;
@@ -1656,7 +1655,7 @@ noinline bool dcn30_internal_validate_bw(
dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
- if (!fast_validate || !allow_self_refresh_only) {
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING || !allow_self_refresh_only) {
/*
* DML favors voltage over p-state, but we're more interested in
* supporting p-state over voltage. We can't support p-state in
@@ -1670,7 +1669,7 @@ noinline bool dcn30_internal_validate_bw(
vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
}
if (allow_self_refresh_only &&
- (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
+ (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING || vlevel == context->bw_ctx.dml.soc.num_states ||
vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported)) {
/*
* If mode is unsupported or there's still no p-state support
@@ -1679,6 +1678,7 @@ noinline bool dcn30_internal_validate_bw(
* We don't actually support prefetch mode 2, so require that we
* at least support prefetch mode 1.
*/
+ context->bw_ctx.dml.validate_max_state = (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING);
context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
dm_allow_self_refresh;
@@ -1688,6 +1688,7 @@ noinline bool dcn30_internal_validate_bw(
memset(merge, 0, sizeof(merge));
vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
}
+ context->bw_ctx.dml.validate_max_state = false;
}
dml_log_mode_support_params(&context->bw_ctx.dml);
@@ -1864,7 +1865,7 @@ noinline bool dcn30_internal_validate_bw(
}
if (repopulate_pipes)
- pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
+ pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode);
context->bw_ctx.dml.vba.VoltageLevel = vlevel;
*vlevel_out = vlevel;
*pipe_cnt_out = pipe_cnt;
@@ -1890,8 +1891,6 @@ static int get_refresh_rate(struct dc_state *context)
/* check if refresh rate at least 120hz */
timing = &context->streams[0]->timing;
- if (timing == NULL)
- return 0;
h_v_total = timing->h_total * timing->v_total;
if (h_v_total == 0)
@@ -1961,6 +1960,7 @@ bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc,
{
int refresh_rate = 0;
const int minimum_refreshrate_supported = 120;
+ struct dc_stream_status *stream_status = NULL;
if (context == NULL || context->streams[0] == NULL)
return false;
@@ -1991,10 +1991,15 @@ bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc,
if (!context->streams[0]->allow_freesync)
return false;
- if (context->streams[0]->vrr_active_variable && dc->debug.disable_fams_gaming)
+ if (context->streams[0]->vrr_active_variable && (dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE))
+ return false;
+
+ stream_status = dc_state_get_stream_status(context, context->streams[0]);
+
+ if (!stream_status)
return false;
- context->streams[0]->fpo_in_use = true;
+ stream_status->fpo_in_use = true;
return true;
}
@@ -2030,9 +2035,9 @@ void dcn30_calculate_wm_and_dlg(
DC_FP_END();
}
-bool dcn30_validate_bandwidth(struct dc *dc,
+enum dc_status dcn30_validate_bandwidth(struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool out = false;
@@ -2040,13 +2045,17 @@ bool dcn30_validate_bandwidth(struct dc *dc,
int vlevel = 0;
int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+ display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+ sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
DC_LOGGER_INIT(dc->ctx->logger);
BW_VAL_TRACE_COUNT();
+ if (!pipes)
+ goto validate_fail;
+
DC_FP_START();
- out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true);
+ out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode, true);
DC_FP_END();
if (pipe_cnt == 0)
@@ -2057,7 +2066,7 @@ bool dcn30_validate_bandwidth(struct dc *dc,
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (fast_validate) {
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
goto validate_out;
}
@@ -2083,7 +2092,7 @@ validate_out:
BW_VAL_TRACE_FINISH();
- return out;
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
@@ -2164,6 +2173,17 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
optimal_uclk_for_dcfclk_sta_targets[i] =
bw_params->clk_table.entries[j].memclk_mhz * 16;
break;
+ } else {
+ /* condition where (dcfclk_sta_targets[i] >= optimal_dcfclk_for_uclk[j]):
+ * If it just so happens that the memory bandwidth is low enough such that
+ * all the optimal DCFCLK for each UCLK is lower than the smallest DCFCLK STA
+ * target, we need to populate the optimal UCLK for each DCFCLK STA target to
+ * be the max UCLK.
+ */
+ if (j == num_uclk_states - 1) {
+ optimal_uclk_for_dcfclk_sta_targets[i] =
+ bw_params->clk_table.entries[j].memclk_mhz * 16;
+ }
}
}
}
@@ -2172,7 +2192,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
j = 0;
// create the final dcfclk and uclk table
while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) {
- if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) {
+ if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) {
dcfclk_mhz[num_states] = dcfclk_sta_targets[i];
dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++];
} else {
@@ -2217,6 +2237,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = {
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -2228,6 +2249,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = {
.update_bw_bounding_box = dcn30_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn30_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
#define CTX ctx
@@ -2331,6 +2353,7 @@ static bool dcn30_resource_construct(
dc->caps.dp_hdmi21_pcon_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* read VBIOS LTTPR caps */
{
@@ -2550,7 +2573,7 @@ static bool dcn30_resource_construct(
pool->base.sw_i2cs[i] = NULL;
}
- /* Audio, Stream Encoders including DIG and virtual, MPC 3D LUTs */
+ /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
if (!resource_construct(num_virtual_links, dc, &pool->base,
&res_create_funcs))
goto create_fail;
@@ -2563,6 +2586,8 @@ static bool dcn30_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
index 8e6b8b7368fd..2c967fe55712 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h
@@ -56,15 +56,15 @@ unsigned int dcn30_calc_max_scaled_time(
enum mmhubbub_wbif_mode mode,
unsigned int urgent_watermark);
-bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context,
- bool fast_validate);
+enum dc_status dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context,
+ enum dc_validate_mode validate_mode);
bool dcn30_internal_validate_bw(
struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int *pipe_cnt_out,
int *vlevel_out,
- bool fast_validate,
+ enum dc_validate_mode validate_mode,
bool allow_self_refresh_only);
void dcn30_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
@@ -78,7 +78,7 @@ void dcn30_populate_dml_writeback_from_context(
int dcn30_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
bool dcn30_acquire_post_bldn_3dlut(
struct resource_context *res_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index 79d6697d13b6..82a205a7c25c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -27,7 +27,7 @@
#include "dm_services.h"
#include "dc.h"
-#include "dcn301_init.h"
+#include "dcn301/dcn301_init.h"
#include "resource.h"
#include "include/irq_service_interface.h"
@@ -45,7 +45,7 @@
#include "dcn301/dcn301_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -61,7 +61,7 @@
#include "dcn10/dcn10_resource.h"
#include "dcn30/dcn30_dio_stream_encoder.h"
#include "dcn301/dcn301_dio_link_encoder.h"
-#include "dcn301_panel_cntl.h"
+#include "dcn301/dcn301_panel_cntl.h"
#include "vangogh_ip_offset.h"
@@ -92,6 +92,8 @@
#define TO_DCN301_RES_POOL(pool)\
container_of(pool, struct dcn301_resource_pool, base)
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
enum dcn301_clk_src_array_id {
@@ -669,9 +671,9 @@ static const struct dc_plane_cap plane_cap = {
/* 6:1 downscaling ratio: 1000/6 = 166.666 */
.max_downscale_factor = {
- .argb8888 = 167,
- .nv12 = 167,
- .fp16 = 167
+ .argb8888 = 358,
+ .nv12 = 358,
+ .fp16 = 358
},
64,
64
@@ -680,7 +682,6 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_dpp_power_gate = false,
.disable_hubp_power_gate = false,
@@ -692,14 +693,16 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
.performance_trace = false,
- .max_downscale_src_width = 7680,/*upto 8K*/
+ .max_downscale_src_width = 4096,/*upto true 4k*/
.scl_reset_length10 = true,
.sanity_checks = false,
.underflow_assert_delay_us = 0xFFFFFFFF,
.dwb_fi_phase = -1, // -1 = disable
.dmub_command_table = true,
.use_max_lb = false,
- .exit_idle_opt_for_cursor_updates = true
+ .exit_idle_opt_for_cursor_updates = true,
+ .enable_legacy_fast_update = true,
+ .using_dml2 = false,
};
static void dcn301_dpp_destroy(struct dpp **dpp)
@@ -879,7 +882,7 @@ static struct link_encoder *dcn301_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn301_link_encoder_construct(enc20,
@@ -996,7 +999,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id
vpg = dcn301_vpg_create(ctx, vpg_inst);
afmt = dcn301_afmt_create(ctx, afmt_inst);
- if (!enc1 || !vpg || !afmt) {
+ if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) {
kfree(enc1);
kfree(vpg);
kfree(afmt);
@@ -1360,14 +1363,21 @@ static void set_wm_ranges(
pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges);
}
-static void dcn301_calculate_wm_and_dlg(
- struct dc *dc, struct dc_state *context,
- display_e2e_pipe_params_st *pipes,
- int pipe_cnt,
- int vlevel)
+static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
DC_FP_START();
- dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
+ dcn301_fpu_update_bw_bounding_box(dc, bw_params);
+ DC_FP_END();
+}
+
+static void dcn301_calculate_wm_and_dlg(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ int pipe_cnt,
+ int vlevel_req)
+{
+ DC_FP_START();
+ dcn301_fpu_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel_req);
DC_FP_END();
}
@@ -1380,6 +1390,7 @@ static struct resource_funcs dcn301_res_pool_funcs = {
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1389,7 +1400,8 @@ static struct resource_funcs dcn301_res_pool_funcs = {
.acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
.release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
.update_bw_bounding_box = dcn301_update_bw_bounding_box,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state
+ .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn301_resource_construct(
@@ -1694,6 +1706,8 @@ static bool dcn301_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
index ae8672680cdd..ae8672680cdd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
index 447abcd593be..61623cb518d9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
@@ -23,9 +23,9 @@
*
*/
-#include "dcn302_init.h"
+#include "dcn302/dcn302_init.h"
#include "dcn302_resource.h"
-#include "dcn302_dccg.h"
+#include "dcn302/dcn302_dccg.h"
#include "irq/dcn302/irq_service_dcn302.h"
#include "dcn30/dcn30_dio_link_encoder.h"
@@ -47,7 +47,8 @@
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
+
#include "dce/dce_abm.h"
#include "dce/dce_audio.h"
#include "dce/dce_aux.h"
@@ -74,12 +75,13 @@
#include "nbio/nbio_7_4_offset.h"
#include "amdgpu_socbb.h"
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
.pipe_split_policy = MPC_SPLIT_DYNAMIC,
@@ -97,6 +99,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.use_max_lb = true,
.exit_idle_opt_for_cursor_updates = true,
.enable_legacy_fast_update = false,
+ .using_dml2 = false,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -890,7 +893,7 @@ static struct link_encoder *dcn302_link_encoder_create(
{
struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature,
@@ -1137,6 +1140,7 @@ static struct resource_funcs dcn302_res_pool_funcs = {
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1148,6 +1152,7 @@ static struct resource_funcs dcn302_res_pool_funcs = {
.update_bw_bounding_box = dcn302_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn302_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static struct dc_cap_funcs cap_funcs = {
@@ -1230,6 +1235,7 @@ static bool dcn302_resource_construct(
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
@@ -1476,6 +1482,8 @@ static bool dcn302_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
index 9f24e73b92b3..9f24e73b92b3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
index adf4989177f7..02b9a84f2db3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -2,12 +2,30 @@
/*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
* Authors: AMD
*/
-#include "dcn303_init.h"
+#include "dcn303/dcn303_init.h"
#include "dcn303_resource.h"
-#include "dcn303_dccg.h"
+#include "dcn303/dcn303_dccg.h"
#include "irq/dcn303/irq_service_dcn303.h"
#include "dcn30/dcn30_dio_link_encoder.h"
@@ -29,7 +47,7 @@
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dce/dce_abm.h"
#include "dce/dce_audio.h"
@@ -56,13 +74,14 @@
#include "dml/dcn303/dcn303_fpu.h"
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
.pipe_split_policy = MPC_SPLIT_AVOID,
@@ -77,8 +96,10 @@ static const struct dc_debug_options debug_defaults_drv = {
.underflow_assert_delay_us = 0xFFFFFFFF,
.dwb_fi_phase = -1, // -1 = disable,
.dmub_command_table = true,
+ .use_max_lb = true,
.exit_idle_opt_for_cursor_updates = true,
- .disable_idle_power_optimizations = false,
+ .enable_legacy_fast_update = false,
+ .using_dml2 = false,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -124,9 +145,9 @@ static const struct dc_plane_cap plane_cap = {
.fp16 = 16000
},
.max_downscale_factor = {
- .argb8888 = 600,
- .nv12 = 600,
- .fp16 = 600
+ .argb8888 = 167,
+ .nv12 = 167,
+ .fp16 = 167
},
16,
16
@@ -817,7 +838,7 @@ static struct link_encoder *dcn303_link_encoder_create(
{
struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn30_link_encoder_construct(enc20, enc_init_data, &link_enc_feature,
@@ -1063,6 +1084,7 @@ static struct resource_funcs dcn303_res_pool_funcs = {
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1074,6 +1096,7 @@ static struct resource_funcs dcn303_res_pool_funcs = {
.update_bw_bounding_box = dcn303_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn303_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static struct dc_cap_funcs cap_funcs = {
@@ -1121,7 +1144,7 @@ static bool dcn303_resource_construct(
int i;
struct dc_context *ctx = dc->ctx;
struct irq_service_init_data init_data;
- struct ddc_service_init_data ddc_init_data;
+ struct ddc_service_init_data ddc_init_data = {0};
ctx->dc_bios->regs = &bios_regs;
@@ -1149,11 +1172,14 @@ static bool dcn303_resource_construct(
dc->caps.cursor_cache_size =
dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
dc->caps.max_slave_planes = 1;
+ dc->caps.max_slave_yuv_planes = 1;
+ dc->caps.max_slave_rgb_planes = 1;
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
@@ -1388,6 +1414,8 @@ static bool dcn303_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
new file mode 100644
index 000000000000..37cf1525820b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.h
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ */
+
+#ifndef _DCN303_RESOURCE_H_
+#define _DCN303_RESOURCE_H_
+
+#include "core_types.h"
+
+extern struct _vcs_dpi_ip_params_st dcn3_03_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_03_soc;
+
+struct resource_pool *dcn303_create_resource_pool(const struct dc_init_data *init_data, struct dc *dc);
+
+void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+
+#endif /* _DCN303_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index 82de4fe2637f..3ed7f50554e2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -48,7 +48,7 @@
#include "dcn31/dcn31_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -70,12 +70,11 @@
#include "dml/dcn31/dcn31_fpu.h"
#include "dcn31/dcn31_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "dcn31_panel_cntl.h"
+#include "dcn31/dcn31_panel_cntl.h"
#include "dcn30/dcn30_dwb.h"
#include "dcn30/dcn30_mmhubbub.h"
-// TODO: change include headers /amd/include/asic_reg after upstream
#include "yellow_carp_offset.h"
#include "dcn/dcn_3_1_2_offset.h"
#include "dcn/dcn_3_1_2_sh_mask.h"
@@ -104,6 +103,8 @@
#include "link_enc_cfg.h"
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
enum dcn31_clk_src_array_id {
@@ -857,7 +858,6 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = false,
.pipe_split_policy = MPC_SPLIT_DYNAMIC,
@@ -868,7 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.max_downscale_src_width = 4096,/*upto true 4K*/
.disable_pplib_wm_range = false,
.scl_reset_length10 = true,
- .sanity_checks = true,
+ .sanity_checks = false,
.underflow_assert_delay_us = 0xFFFFFFFF,
.dwb_fi_phase = -1, // -1 = disable,
.dmub_command_table = true,
@@ -891,6 +891,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.enable_legacy_fast_update = true,
.enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/
.dml_hostvm_override = DML_HOSTVM_OVERRIDE_FALSE,
+ .using_dml2 = false,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -1091,7 +1092,7 @@ static struct link_encoder *dcn31_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn31_link_encoder_construct(enc20,
@@ -1308,6 +1309,8 @@ static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
/* allocate HPO link encoder */
hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
&hpo_dp_link_enc_regs[inst],
@@ -1613,14 +1616,14 @@ static bool is_dual_plane(enum surface_pixel_format format)
int dcn31x_populate_dml_pipes_from_context(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
uint32_t pipe_cnt;
int i;
dc_assert_fp_enabled();
- pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+ pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
for (i = 0; i < pipe_cnt; i++) {
pipes[i].pipe.src.gpuvm = 1;
@@ -1638,15 +1641,15 @@ int dcn31x_populate_dml_pipes_from_context(struct dc *dc,
int dcn31_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int i, pipe_cnt;
struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
+ struct pipe_ctx *pipe = 0;
bool upscaled = false;
DC_FP_START();
- dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+ dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
DC_FP_END();
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1717,6 +1720,12 @@ int dcn31_populate_dml_pipes_from_context(
return pipe_cnt;
}
+unsigned int dcn31_get_det_buffer_size(
+ const struct dc_state *context)
+{
+ return context->bw_ctx.dml.ip.det_buffer_size_kbytes;
+}
+
void dcn31_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -1749,9 +1758,9 @@ dcn31_set_mcif_arb_params(struct dc *dc,
DC_FP_END();
}
-bool dcn31_validate_bandwidth(struct dc *dc,
+enum dc_status dcn31_validate_bandwidth(struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool out = false;
@@ -1759,25 +1768,29 @@ bool dcn31_validate_bandwidth(struct dc *dc,
int vlevel = 0;
int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+ display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+ sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
DC_LOGGER_INIT(dc->ctx->logger);
BW_VAL_TRACE_COUNT();
+ if (!pipes)
+ goto validate_fail;
+
DC_FP_START();
- out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true);
+ out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode, true);
DC_FP_END();
- // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg
+ // Disable DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX to set min dcfclk in calculate_wm_and_dlg
if (pipe_cnt == 0)
- fast_validate = false;
+ validate_mode = DC_VALIDATE_MODE_AND_PROGRAMMING;
if (!out)
goto validate_fail;
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (fast_validate) {
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
goto validate_out;
}
@@ -1800,7 +1813,7 @@ validate_out:
BW_VAL_TRACE_FINISH();
- return out;
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
static void dcn31_get_panel_config_defaults(struct dc_panel_config *panel_config)
@@ -1824,6 +1837,7 @@ static struct resource_funcs dcn31_res_pool_funcs = {
.update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
.populate_dml_pipes = dcn31_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1835,6 +1849,10 @@ static struct resource_funcs dcn31_res_pool_funcs = {
.update_bw_bounding_box = dcn31_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn31_get_panel_config_defaults,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params
};
static struct clock_source *dcn30_clock_source_create(
@@ -1856,6 +1874,7 @@ static struct clock_source *dcn30_clock_source_create(
return &clk_src->base;
}
+ kfree(clk_src);
BREAK_TO_DEBUGGER();
return NULL;
}
@@ -1937,10 +1956,12 @@ static bool dcn31_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
- dc->config.use_old_fixed_vs_sequence = true;
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
/* Use pipe context based otg sync logic */
dc->config.use_pipe_ctx_sync_logic = true;
+ dc->config.disable_hbr_audio_dp2 = true;
/* read VBIOS LTTPR caps */
{
@@ -2183,6 +2204,8 @@ static bool dcn31_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp;
@@ -2212,3 +2235,35 @@ struct resource_pool *dcn31_create_resource_pool(
kfree(pool);
return NULL;
}
+
+enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link,
+ struct dc_link_settings *link_setting,
+ uint8_t pipe_count,
+ struct pipe_ctx *pipes,
+ struct audio_output *audio_output)
+{
+ struct dc_state *state = link->dc->current_state;
+ int i;
+
+#if defined(CONFIG_DRM_AMD_DC_FP)
+ for (i = 0; i < state->stream_count; i++)
+ if (state->streams[i] && state->streams[i]->link && state->streams[i]->link == link)
+ link->dc->hwss.calculate_pix_rate_divider((struct dc *)link->dc, state, state->streams[i]);
+
+ for (i = 0; i < pipe_count; i++) {
+ link->dc->res_pool->funcs->build_pipe_pix_clk_params(&pipes[i]);
+
+ // Setup audio
+ if (pipes[i].stream_res.audio != NULL)
+ build_audio_output(state, &pipes[i], &audio_output[i]);
+ }
+#else
+ /* This DCN requires rate divider updates and audio reprogramming to allow DP1<-->DP2 link rate switching,
+ * but the above will not compile on architectures without an FPU.
+ */
+ DC_LOG_WARNING("%s: DP1<-->DP2 link retraining will not work on this DCN on non-FPU platforms", __func__);
+ ASSERT(0);
+#endif
+
+ return DC_OK;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
index 901436591ed4..c32c85ef0ba4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
@@ -37,9 +37,9 @@ struct dcn31_resource_pool {
struct resource_pool base;
};
-bool dcn31_validate_bandwidth(struct dc *dc,
+enum dc_status dcn31_validate_bandwidth(struct dc *dc,
struct dc_state *context,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
void dcn31_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -48,7 +48,7 @@ void dcn31_calculate_wm_and_dlg(
int dcn31_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
void
dcn31_populate_dml_writeback_from_context(struct dc *dc,
struct resource_context *res_ctx,
@@ -63,6 +63,15 @@ struct resource_pool *dcn31_create_resource_pool(
const struct dc_init_data *init_data,
struct dc *dc);
+unsigned int dcn31_get_det_buffer_size(
+ const struct dc_state *context);
+
+enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link,
+ struct dc_link_settings *link_setting,
+ uint8_t pipe_count,
+ struct pipe_ctx *pipes,
+ struct audio_output *audio_output);
+
/*temp: B0 specific before switch to dcn313 headers*/
#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL
#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index 004beed9bd44..d4917a35b991 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -50,7 +50,7 @@
#include "dcn314/dcn314_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -118,6 +118,8 @@
#define regBIF_BX2_BIOS_SCRATCH_6 0x003e
#define regBIF_BX2_BIOS_SCRATCH_6_BASE_IDX 1
+#define DC_LOGGER \
+ dc->ctx->logger
#define DC_LOGGER_INIT(logger)
enum dcn31_clk_src_array_id {
@@ -869,12 +871,11 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_z10 = false,
.enable_z9_disable_interface = true,
- .minimum_z8_residency_time = 2000,
+ .minimum_z8_residency_time = 2100,
.psr_skip_crtc_disable = true,
.replay_skip_crtc_disabled = true,
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_dpp_power_gate = false,
.disable_hubp_power_gate = false,
@@ -887,7 +888,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.max_downscale_src_width = 4096,/*upto true 4k*/
.disable_pplib_wm_range = false,
.scl_reset_length10 = true,
- .sanity_checks = true,
+ .sanity_checks = false,
.underflow_assert_delay_us = 0xFFFFFFFF,
.dwb_fi_phase = -1, // -1 = disable,
.dmub_command_table = true,
@@ -914,7 +915,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.hdmistream = true,
.hdmichar = true,
.dpstream = true,
- .symclk32_se = true,
+ .symclk32_se = false,
.symclk32_le = true,
.symclk_fe = true,
.physymclk = true,
@@ -922,25 +923,11 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
- .seamless_boot_odm_combine = true
-};
-
-static const struct dc_debug_options debug_defaults_diags = {
- .disable_dmcu = true,
- .force_abm_enable = false,
- .timing_trace = true,
- .clock_trace = true,
- .disable_dpp_power_gate = true,
- .disable_hubp_power_gate = true,
- .disable_clock_gate = true,
- .disable_pplib_clock_request = true,
- .disable_pplib_wm_range = true,
- .disable_stutter = false,
- .scl_reset_length10 = true,
- .dwb_fi_phase = -1, // -1 = disable
- .dmub_command_table = true,
- .enable_tri_buf = true,
- .use_max_lb = true
+ .seamless_boot_odm_combine = true,
+ .enable_legacy_fast_update = true,
+ .using_dml2 = false,
+ .disable_dsc_power_gate = true,
+ .min_disp_clk_khz = 100000,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -1163,7 +1150,7 @@ static struct link_encoder *dcn31_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn31_link_encoder_construct(enc20,
@@ -1381,6 +1368,8 @@ static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
/* allocate HPO link encoder */
hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
&hpo_dp_link_enc_regs[inst],
@@ -1672,20 +1661,20 @@ static struct clock_source *dcn31_clock_source_create(
return &clk_src->base;
}
- BREAK_TO_DEBUGGER();
kfree(clk_src);
+ BREAK_TO_DEBUGGER();
return NULL;
}
static int dcn314_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int pipe_cnt;
DC_FP_START();
- pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate);
+ pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode);
DC_FP_END();
return pipe_cnt;
@@ -1707,28 +1696,9 @@ static void dcn314_get_panel_config_defaults(struct dc_panel_config *panel_confi
*panel_config = panel_config_defaults;
}
-static bool filter_modes_for_single_channel_workaround(struct dc *dc,
- struct dc_state *context)
-{
- // Filter 2K@240Hz+8K@24fps above combination timing if memory only has single dimm LPDDR
- if (dc->clk_mgr->bw_params->vram_type == 34 &&
- dc->clk_mgr->bw_params->num_channels < 2 &&
- context->stream_count > 1) {
- int total_phy_pix_clk = 0;
-
- for (int i = 0; i < context->stream_count; i++)
- if (context->res_ctx.pipe_ctx[i].stream)
- total_phy_pix_clk += context->res_ctx.pipe_ctx[i].stream->phy_pix_clk;
-
- if (total_phy_pix_clk >= (1148928+826260)) //2K@240Hz+8K@24fps
- return true;
- }
- return false;
-}
-
-bool dcn314_validate_bandwidth(struct dc *dc,
+enum dc_status dcn314_validate_bandwidth(struct dc *dc,
struct dc_state *context,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
bool out = false;
@@ -1736,29 +1706,30 @@ bool dcn314_validate_bandwidth(struct dc *dc,
int vlevel = 0;
int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+ display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+ sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
DC_LOGGER_INIT(dc->ctx->logger);
BW_VAL_TRACE_COUNT();
- if (filter_modes_for_single_channel_workaround(dc, context))
+ if (!pipes)
goto validate_fail;
DC_FP_START();
// do not support self refresh only
- out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, false);
+ out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode, false);
DC_FP_END();
- // Disable fast_validate to set min dcfclk in calculate_wm_and_dlg
+ // Disable DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX to set min dcfclk in calculate_wm_and_dlg
if (pipe_cnt == 0)
- fast_validate = false;
+ validate_mode = DC_VALIDATE_MODE_AND_PROGRAMMING;
if (!out)
goto validate_fail;
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (fast_validate) {
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
goto validate_out;
}
@@ -1781,7 +1752,7 @@ validate_out:
BW_VAL_TRACE_FINISH();
- return out;
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
}
static struct resource_funcs dcn314_res_pool_funcs = {
@@ -1796,6 +1767,7 @@ static struct resource_funcs dcn314_res_pool_funcs = {
.update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
.populate_dml_pipes = dcn314_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1808,6 +1780,10 @@ static struct resource_funcs dcn314_res_pool_funcs = {
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn314_get_panel_config_defaults,
.get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params
};
static struct clock_source *dcn30_clock_source_create(
@@ -1829,8 +1805,8 @@ static struct clock_source *dcn30_clock_source_create(
return &clk_src->base;
}
- BREAK_TO_DEBUGGER();
kfree(clk_src);
+ BREAK_TO_DEBUGGER();
return NULL;
}
@@ -1911,6 +1887,11 @@ static bool dcn314_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.max_disp_clock_khz_at_vmin = 650000;
+
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
/* Use pipe context based otg sync logic */
dc->config.use_pipe_ctx_sync_logic = true;
@@ -1932,8 +1913,6 @@ static bool dcn314_resource_construct(
if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
dc->debug = debug_defaults_drv;
- else
- dc->debug = debug_defaults_diags;
/* Disable pipe power gating */
dc->debug.disable_dpp_power_gate = true;
@@ -2142,6 +2121,8 @@ static bool dcn314_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
dc->dcn_ip->max_num_dpp = dcn3_14_ip.max_num_dpp;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
index 49ffe71018df..ac9bb7f097d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h
@@ -39,9 +39,9 @@ struct dcn314_resource_pool {
struct resource_pool base;
};
-bool dcn314_validate_bandwidth(struct dc *dc,
+enum dc_status dcn314_validate_bandwidth(struct dc *dc,
struct dc_state *context,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
struct resource_pool *dcn314_create_resource_pool(
const struct dc_init_data *init_data,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
index 127487ea3d7d..82cc78c291d8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -47,7 +47,7 @@
#include "dcn31/dcn31_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -125,6 +125,7 @@
#include "reg_helper.h"
#include "dce/dmub_abm.h"
#include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
#include "dce/dce_aux.h"
#include "dce/dce_i2c.h"
@@ -137,8 +138,8 @@
#define DCN3_15_MAX_DET_SIZE 384
#define DCN3_15_CRB_SEGMENT_SIZE_KB 64
#define DCN3_15_MAX_DET_SEGS (DCN3_15_MAX_DET_SIZE / DCN3_15_CRB_SEGMENT_SIZE_KB)
-/* Minimum 2 extra segments need to be in compbuf and claimable to guarantee seamless mpo transitions */
-#define MIN_RESERVED_DET_SEGS 2
+/* Minimum 3 extra segments need to be in compbuf and claimable to guarantee seamless mpo transitions */
+#define MIN_RESERVED_DET_SEGS 3
enum dcn31_clk_src_array_id {
DCN31_CLK_SRC_PLL0,
@@ -857,7 +858,6 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_z10 = true, /*hw not support it*/
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = false,
.pipe_split_policy = MPC_SPLIT_DYNAMIC,
@@ -889,6 +889,7 @@ static const struct dc_debug_options debug_defaults_drv = {
},
.enable_legacy_fast_update = true,
.psr_power_use_phy_fsm = 0,
+ .using_dml2 = false,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -1089,7 +1090,7 @@ static struct link_encoder *dcn31_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn31_link_encoder_construct(enc20,
@@ -1308,6 +1309,8 @@ static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
/* allocate HPO link encoder */
hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
&hpo_dp_link_enc_regs[inst],
@@ -1481,6 +1484,9 @@ static void dcn315_resource_destruct(struct dcn315_resource_pool *pool)
if (pool->base.psr != NULL)
dmub_psr_destroy(&pool->base.psr);
+ if (pool->base.replay != NULL)
+ dmub_replay_destroy(&pool->base.replay);
+
if (pool->base.dccg != NULL)
dcn_dccg_destroy(&pool->base.dccg);
}
@@ -1630,8 +1636,10 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context)
int i;
struct resource_context *res_ctx = &context->res_ctx;
- /*Don't apply for single stream*/
- if (context->stream_count < 2)
+ /* Only apply for dual stream scenarios with edp*/
+ if (context->stream_count != 2)
+ return false;
+ if (context->streams[0]->signal != SIGNAL_TYPE_EDP && context->streams[1]->signal != SIGNAL_TYPE_EDP)
return false;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1656,7 +1664,7 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context)
static int dcn315_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int i, pipe_cnt, crb_idx, crb_pipes;
struct resource_context *res_ctx = &context->res_ctx;
@@ -1666,7 +1674,7 @@ static int dcn315_populate_dml_pipes_from_context(
bool pixel_rate_crb = allow_pixel_rate_crb(dc, context);
DC_FP_START();
- dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+ dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
DC_FP_END();
for (i = 0, pipe_cnt = 0, crb_pipes = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1690,7 +1698,7 @@ static int dcn315_populate_dml_pipes_from_context(
pipes[pipe_cnt].dout.dsc_input_bpc = 0;
DC_FP_START();
dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
- if (pixel_rate_crb && !pipe->top_pipe && !pipe->prev_odm_pipe) {
+ if (pixel_rate_crb) {
int bpp = source_format_to_bpp(pipes[pipe_cnt].pipe.src.source_format);
/* Ceil to crb segment size */
int approx_det_segs_required_for_pstate = dcn_get_approx_det_segs_required_for_pstate(
@@ -1747,28 +1755,26 @@ static int dcn315_populate_dml_pipes_from_context(
continue;
}
- if (!pipe->top_pipe && !pipe->prev_odm_pipe) {
- bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)
- || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120);
-
- if (remaining_det_segs > MIN_RESERVED_DET_SEGS)
- pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes +
- (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0);
- if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) {
- /* Clamp to 2 pipe split max det segments */
- remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS);
- pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS;
- }
- if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) {
- /* If we are splitting we must have an even number of segments */
- remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2;
- pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2;
- }
- /* Convert segments into size for DML use */
- pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB;
-
- crb_idx++;
+ bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)
+ || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120);
+
+ if (remaining_det_segs > MIN_RESERVED_DET_SEGS && crb_pipes != 0)
+ pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes +
+ (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0);
+ if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) {
+ /* Clamp to 2 pipe split max det segments */
+ remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS);
+ pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS;
}
+ if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) {
+ /* If we are splitting we must have an even number of segments */
+ remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2;
+ pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2;
+ }
+ /* Convert segments into size for DML use */
+ pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB;
+
+ crb_idx++;
pipe_cnt++;
}
}
@@ -1803,6 +1809,11 @@ static void dcn315_get_panel_config_defaults(struct dc_panel_config *panel_confi
*panel_config = panel_config_defaults;
}
+static int dcn315_get_power_profile(const struct dc_state *context)
+{
+ return !context->bw_ctx.bw.dcn.clk.p_state_change_support;
+}
+
static struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn20_get_dcc_compression_cap
};
@@ -1819,6 +1830,7 @@ static struct resource_funcs dcn315_res_pool_funcs = {
.update_soc_for_wm_a = dcn315_update_soc_for_wm_a,
.populate_dml_pipes = dcn315_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1830,6 +1842,11 @@ static struct resource_funcs dcn315_res_pool_funcs = {
.update_bw_bounding_box = dcn315_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn315_get_panel_config_defaults,
+ .get_power_profile = dcn315_get_power_profile,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params
};
static bool dcn315_resource_construct(
@@ -2042,6 +2059,14 @@ static bool dcn315_resource_construct(
goto create_fail;
}
+ /* Replay */
+ pool->base.replay = dmub_replay_create(ctx);
+ if (pool->base.replay == NULL) {
+ dm_error("DC: failed to create replay obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
/* ABM */
for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
pool->base.multiple_abms[i] = dmub_abm_create(ctx,
@@ -2117,6 +2142,8 @@ static bool dcn315_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
dc->dcn_ip->max_num_dpp = dcn3_15_ip.max_num_dpp;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
index 22849eaa6f24..22849eaa6f24 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
index 5fe2c61527df..636110e48d01 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
@@ -47,7 +47,7 @@
#include "dcn31/dcn31_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -125,7 +125,6 @@
#include "link_enc_cfg.h"
#define DCN3_16_MAX_DET_SIZE 384
-#define DCN3_16_MIN_COMPBUF_SIZE_KB 128
#define DCN3_16_CRB_SEGMENT_SIZE_KB 64
enum dcn31_clk_src_array_id {
@@ -854,7 +853,6 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_z10 = true, /*hw not support it*/
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = false,
.pipe_split_policy = MPC_SPLIT_DYNAMIC,
@@ -885,6 +883,7 @@ static const struct dc_debug_options debug_defaults_drv = {
}
},
.enable_legacy_fast_update = true,
+ .using_dml2 = false,
};
static const struct dc_panel_config panel_config_defaults = {
@@ -1085,7 +1084,7 @@ static struct link_encoder *dcn31_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
dcn31_link_encoder_construct(enc20,
@@ -1305,6 +1304,8 @@ static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
/* allocate HPO link encoder */
hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
&hpo_dp_link_enc_regs[inst],
@@ -1609,15 +1610,15 @@ static bool is_dual_plane(enum surface_pixel_format format)
static int dcn316_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int i, pipe_cnt;
struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
+ struct pipe_ctx *pipe = 0;
const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_16_MIN_COMPBUF_SIZE_KB;
DC_FP_START();
- dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+ dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
DC_FP_END();
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1706,6 +1707,7 @@ static struct resource_funcs dcn316_res_pool_funcs = {
.update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
.populate_dml_pipes = dcn316_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1717,6 +1719,10 @@ static struct resource_funcs dcn316_res_pool_funcs = {
.update_bw_bounding_box = dcn316_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn316_get_panel_config_defaults,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params
};
static bool dcn316_resource_construct(
@@ -2004,6 +2010,8 @@ static bool dcn316_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
dc->dcn_ip->max_num_dpp = dcn3_16_ip.max_num_dpp;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
index aba6d634131b..aba6d634131b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index f9d601c8c721..3965a7f1b64b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -24,10 +24,11 @@
*
*/
+#include "dc_types.h"
#include "dm_services.h"
#include "dc.h"
-#include "dcn32_init.h"
+#include "dcn32/dcn32_init.h"
#include "resource.h"
#include "include/irq_service_interface.h"
@@ -41,13 +42,13 @@
#include "dcn31/dcn31_hubbub.h"
#include "dcn32/dcn32_hubbub.h"
#include "dcn32/dcn32_mpc.h"
-#include "dcn32_hubp.h"
+#include "dcn32/dcn32_hubp.h"
#include "irq/dcn32/irq_service_dcn32.h"
#include "dcn32/dcn32_dpp.h"
#include "dcn32/dcn32_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -68,7 +69,7 @@
#include "dml/display_mode_vba.h"
#include "dcn32/dcn32_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn31/dcn31_panel_cntl.h"
#include "dcn30/dcn30_dwb.h"
@@ -89,6 +90,10 @@
#include "dcn20/dcn20_vmid.h"
#include "dml/dcn32/dcn32_fpu.h"
+#include "dc_state_priv.h"
+
+#include "dml2/dml2_wrapper.h"
+
#define DC_LOGGER_INIT(logger)
enum dcn32_clk_src_array_id {
@@ -685,7 +690,6 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = false,
.pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore
@@ -714,6 +718,8 @@ static const struct dc_debug_options debug_defaults_drv = {
.use_max_lb = true,
.force_disable_subvp = false,
.exit_idle_opt_for_cursor_updates = true,
+ .using_dml2 = false,
+ .using_dml21 = false, // TODO : Temporary for N-1 validation. Remove after N-1 is done.
.enable_single_display_2to1_odm_policy = true,
/* Must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/
@@ -733,6 +739,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.fpo_vactive_min_active_margin_us = 200,
.fpo_vactive_max_blank_us = 1000,
.enable_legacy_fast_update = false,
+ .disable_stutter_for_wm_program = true
};
static struct dce_aux *dcn32_aux_engine_create(
@@ -1033,7 +1040,7 @@ static struct link_encoder *dcn32_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
#undef REG_STRUCT
@@ -1299,6 +1306,8 @@ static struct hpo_dp_link_encoder *dcn32_hpo_dp_link_encoder_create(
/* allocate HPO link encoder */
hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
#undef REG_STRUCT
#define REG_STRUCT hpo_dp_link_enc_regs
@@ -1641,7 +1650,10 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state)
phantom_plane = prev_phantom_plane;
else
- phantom_plane = dc_create_plane_state(dc);
+ phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state);
+
+ if (!phantom_plane)
+ continue;
memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality,
@@ -1662,9 +1674,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
phantom_plane->clip_rect.y = 0;
phantom_plane->clip_rect.height = phantom_stream->src.height;
- phantom_plane->is_phantom = true;
-
- dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context);
+ dc_state_add_phantom_plane(dc, phantom_stream, phantom_plane, context);
curr_pipe = curr_pipe->bottom_pipe;
prev_phantom_plane = phantom_plane;
@@ -1680,13 +1690,9 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
struct dc_stream_state *phantom_stream = NULL;
struct pipe_ctx *ref_pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx];
- phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink);
- phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
- phantom_stream->dpms_off = true;
- phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
- phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
- ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
- ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
+ phantom_stream = dc_state_create_phantom_stream(dc, context, ref_pipe->stream);
+ if (!phantom_stream)
+ return phantom_stream;
/* stream has limited viewport and small timing */
memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
@@ -1696,81 +1702,10 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx);
DC_FP_END();
- dc_add_stream_to_ctx(dc, context, phantom_stream);
+ dc_state_add_phantom_stream(dc, context, phantom_stream, ref_pipe->stream);
return phantom_stream;
}
-void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
- int i;
- struct dc_plane_state *phantom_plane = NULL;
- struct dc_stream_state *phantom_stream = NULL;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (resource_is_pipe_type(pipe, OTG_MASTER) &&
- resource_is_pipe_type(pipe, DPP_PIPE) &&
- pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- phantom_plane = pipe->plane_state;
- phantom_stream = pipe->stream;
-
- dc_plane_state_retain(phantom_plane);
- dc_stream_retain(phantom_stream);
- }
- }
-}
-
-// return true if removed piped from ctx, false otherwise
-bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
-{
- int i;
- bool removed_pipe = false;
- struct dc_plane_state *phantom_plane = NULL;
- struct dc_stream_state *phantom_stream = NULL;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- // build scaling params for phantom pipes
- if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
- phantom_plane = pipe->plane_state;
- phantom_stream = pipe->stream;
-
- dc_rem_all_planes_for_stream(dc, pipe->stream, context);
- dc_remove_stream_from_ctx(dc, context, pipe->stream);
-
- /* Ref count is incremented on allocation and also when added to the context.
- * Therefore we must call release for the the phantom plane and stream once
- * they are removed from the ctx to finally decrement the refcount to 0 to free.
- */
- dc_plane_state_release(phantom_plane);
- dc_stream_release(phantom_stream);
-
- removed_pipe = true;
- }
-
- /* For non-full updates, a shallow copy of the current state
- * is created. In this case we don't want to erase the current
- * state (there can be 2 HIRQL threads, one in flip, and one in
- * checkMPO) that can cause a race condition.
- *
- * This is just a workaround, needs a proper fix.
- */
- if (!fast_update) {
- // Clear all phantom stream info
- if (pipe->stream) {
- pipe->stream->mall_stream_config.type = SUBVP_NONE;
- pipe->stream->mall_stream_config.paired_stream = NULL;
- }
-
- if (pipe->plane_state) {
- pipe->plane_state->is_phantom = false;
- }
- }
- }
- return removed_pipe;
-}
-
/* TODO: Input to this function should indicate which pipe indexes (or streams)
* require a phantom pipe / stream
*/
@@ -1786,6 +1721,9 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context,
// be a valid candidate for SubVP (i.e. has a plane, stream, doesn't
// already have phantom pipe assigned, etc.) by previous checks.
phantom_stream = dcn32_enable_phantom_stream(dc, context, pipes, pipe_cnt, index);
+ if (!phantom_stream)
+ return;
+
dcn32_enable_phantom_plane(dc, context, phantom_stream, index);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1795,7 +1733,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context,
// We determine which phantom pipes were added by comparing with
// the phantom stream.
if (pipe->plane_state && pipe->stream && pipe->stream == phantom_stream &&
- pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+ dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
pipe->stream->use_dynamic_meta = false;
pipe->plane_state->flip_immediate = false;
if (!resource_build_scaling_params(pipe)) {
@@ -1805,9 +1743,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context,
}
}
-bool dcn32_validate_bandwidth(struct dc *dc,
- struct dc_state *context,
- bool fast_validate)
+static bool dml1_validate(struct dc *dc, struct dc_state *context, enum dc_validate_mode validate_mode)
{
bool out = false;
@@ -1815,8 +1751,8 @@ bool dcn32_validate_bandwidth(struct dc *dc,
int vlevel = 0;
int pipe_cnt = 0;
- display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
- struct mall_temp_config mall_temp_config;
+ display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count,
+ sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
/* To handle Freesync properly, setting FreeSync DML parameters
* to its default state for the first stage of validation
@@ -1826,29 +1762,15 @@ bool dcn32_validate_bandwidth(struct dc *dc,
DC_LOGGER_INIT(dc->ctx->logger);
- /* For fast validation, there are situations where a shallow copy of
- * of the dc->current_state is created for the validation. In this case
- * we want to save and restore the mall config because we always
- * teardown subvp at the beginning of validation (and don't attempt
- * to add it back if it's fast validation). If we don't restore the
- * subvp config in cases of fast validation + shallow copy of the
- * dc->current_state, the dc->current_state will have a partially
- * removed subvp state when we did not intend to remove it.
- */
- if (fast_validate) {
- memset(&mall_temp_config, 0, sizeof(mall_temp_config));
- dcn32_save_mall_state(dc, context, &mall_temp_config);
- }
-
BW_VAL_TRACE_COUNT();
+ if (!pipes)
+ goto validate_fail;
+
DC_FP_START();
- out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
+ out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode);
DC_FP_END();
- if (fast_validate)
- dcn32_restore_mall_state(dc, context, &mall_temp_config);
-
if (pipe_cnt == 0)
goto validate_out;
@@ -1857,7 +1779,7 @@ bool dcn32_validate_bandwidth(struct dc *dc,
BW_VAL_TRACE_END_VOLTAGE_LEVEL();
- if (fast_validate) {
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) {
BW_VAL_TRACE_SKIP(fast);
goto validate_out;
}
@@ -1865,6 +1787,7 @@ bool dcn32_validate_bandwidth(struct dc *dc,
dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
dcn32_override_min_req_memclk(dc, context);
+ dcn32_override_min_req_dcfclk(dc, context);
BW_VAL_TRACE_END_WATERMARKS();
@@ -1885,46 +1808,108 @@ validate_out:
return out;
}
+enum dc_status dcn32_validate_bandwidth(struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode)
+{
+ unsigned int i;
+ enum dc_status status;
+ const struct dc_stream_state *stream;
+
+ /* reset cursor limitations on subvp */
+ for (i = 0; i < context->stream_count; i++) {
+ stream = context->streams[i];
+
+ if (dc_state_can_clear_stream_cursor_subvp_limit(stream, context)) {
+ dc_state_set_stream_cursor_subvp_limit(stream, context, false);
+ }
+ }
+
+ if (dc->debug.using_dml2)
+ status = dml2_validate(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+ validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+ else
+ status = dml1_validate(dc, context, validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_OK && dc_state_is_subvp_in_use(context)) {
+ /* check new stream configuration still supports cursor if subvp used */
+ for (i = 0; i < context->stream_count; i++) {
+ stream = context->streams[i];
+
+ if (dc_state_get_stream_subvp_type(context, stream) != SUBVP_PHANTOM &&
+ stream->cursor_position.enable &&
+ !dc_stream_check_cursor_attributes(stream, context, &stream->cursor_attributes)) {
+ /* hw cursor cannot be supported with subvp active, so disable subvp for now */
+ dc_state_set_stream_cursor_subvp_limit(stream, context, true);
+ status = DC_FAIL_HW_CURSOR_SUPPORT;
+ }
+ };
+ }
+
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_FAIL_HW_CURSOR_SUPPORT) {
+ /* attempt to validate again with subvp disabled due to cursor */
+ if (dc->debug.using_dml2)
+ status = dml2_validate(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+ validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+ else
+ status = dml1_validate(dc, context, validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+
+ return status;
+}
+
int dcn32_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate)
+ enum dc_validate_mode validate_mode)
{
int i, pipe_cnt;
struct resource_context *res_ctx = &context->res_ctx;
struct pipe_ctx *pipe = NULL;
bool subvp_in_use = false;
struct dc_crtc_timing *timing;
- bool vsr_odm_support = false;
-
- dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
- /* Determine whether we will apply ODM 2to1 policy:
- * Applies to single display and where the number of planes is less than 3.
- * For 3 plane case ( 2 MPO planes ), we will not set the policy for the MPO pipes.
- *
- * Apply pipe split policy first so we can predict the pipe split correctly
- * (dcn32_predict_pipe_split).
+ int subvp_main_pipe_index = -1;
+ enum mall_stream_type mall_type;
+ bool single_display_subvp = false;
+ struct dc_stream_state *stream = NULL;
+ int num_subvp_main = 0;
+ int num_subvp_phantom = 0;
+ int num_subvp_none = 0;
+ int odm_slice_count;
+
+ dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode);
+
+ /* For single display subvp, look for subvp main so if we have phantom
+ * pipe, we can set odm policy to match main pipe
*/
- for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
- if (!res_ctx->pipe_ctx[i].stream)
- continue;
- pipe = &res_ctx->pipe_ctx[i];
- timing = &pipe->stream->timing;
-
- pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
- vsr_odm_support = (res_ctx->pipe_ctx[i].stream->src.width >= 5120 &&
- res_ctx->pipe_ctx[i].stream->src.width > res_ctx->pipe_ctx[i].stream->dst.width);
- if (context->stream_count == 1 &&
- context->stream_status[0].plane_count == 1 &&
- !dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) &&
- is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) &&
- pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ &&
- dc->debug.enable_single_display_2to1_odm_policy &&
- !vsr_odm_support) { //excluding 2to1 ODM combine on >= 5k vsr
- pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+ for (i = 0; i < context->stream_count; i++) {
+ stream = context->streams[i];
+ mall_type = dc_state_get_stream_subvp_type(context, stream);
+ if (mall_type == SUBVP_MAIN)
+ num_subvp_main++;
+ else if (mall_type == SUBVP_PHANTOM)
+ num_subvp_phantom++;
+ else
+ num_subvp_none++;
+ }
+ if (num_subvp_main == 1 && num_subvp_phantom == 1 && num_subvp_none == 0)
+ single_display_subvp = true;
+
+ if (single_display_subvp) {
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &res_ctx->pipe_ctx[i];
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+
+ mall_type = dc_state_get_pipe_subvp_type(context, pipe);
+ if (mall_type == SUBVP_MAIN) {
+ if (resource_is_pipe_type(pipe, OTG_MASTER))
+ subvp_main_pipe_index = i;
+ }
+ pipe_cnt++;
}
- pipe_cnt++;
}
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1939,6 +1924,36 @@ int dcn32_populate_dml_pipes_from_context(
dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
+ if (dc->config.enable_windowed_mpo_odm &&
+ dc->debug.enable_single_display_2to1_odm_policy) {
+ /* For single display subvp, if pipe is phantom pipe,
+ * then copy odm policy from subvp main pipe
+ */
+ mall_type = dc_state_get_pipe_subvp_type(context, pipe);
+ if (single_display_subvp && (mall_type == SUBVP_PHANTOM)) {
+ if (subvp_main_pipe_index < 0) {
+ odm_slice_count = -1;
+ ASSERT(0);
+ } else {
+ odm_slice_count = resource_get_odm_slice_count(&res_ctx->pipe_ctx[subvp_main_pipe_index]);
+ }
+ } else {
+ odm_slice_count = resource_get_odm_slice_count(pipe);
+ }
+ switch (odm_slice_count) {
+ case 2:
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+ break;
+ case 4:
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+ break;
+ default:
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+ }
+ } else {
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+ }
+
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
@@ -1946,8 +1961,8 @@ int dcn32_populate_dml_pipes_from_context(
/* Only populate DML input with subvp info for full updates.
* This is just a workaround -- needs a proper fix.
*/
- if (!fast_validate) {
- switch (pipe->stream->mall_stream_config.type) {
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) {
+ switch (dc_state_get_pipe_subvp_type(context, pipe)) {
case SUBVP_MAIN:
pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
subvp_in_use = true;
@@ -1985,9 +2000,6 @@ int dcn32_populate_dml_pipes_from_context(
}
}
- DC_FP_START();
- dcn32_predict_pipe_split(context, &pipes[pipe_cnt]);
- DC_FP_END();
pipe_cnt++;
}
@@ -2009,8 +2021,33 @@ int dcn32_populate_dml_pipes_from_context(
return pipe_cnt;
}
+unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned int total_size_in_mall_bytes)
+{
+ uint32_t cache_lines_used, lines_per_way, total_cache_lines, num_ways;
+
+ if (total_size_in_mall_bytes == 0) {
+ return 0;
+ }
+
+ if (dc->caps.max_cab_allocation_bytes == 0) {
+ return 0xffffffff;
+ }
+
+ /* add 2 lines for worst case alignment */
+ cache_lines_used = total_size_in_mall_bytes / dc->caps.cache_line_size + 2;
+
+ total_cache_lines = dc->caps.max_cab_allocation_bytes / dc->caps.cache_line_size;
+ lines_per_way = total_cache_lines / dc->caps.cache_num_ways;
+ num_ways = cache_lines_used / lines_per_way;
+ if (cache_lines_used % lines_per_way > 0)
+ num_ways++;
+
+ return num_ways;
+}
+
static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
+ .get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
+ .get_subvp_en = dcn32_subvp_in_use,
};
void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,
@@ -2026,10 +2063,30 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,
static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
DC_FP_START();
+
dcn32_update_bw_bounding_box_fpu(dc, bw_params);
+
+ if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2)
+ dml2_reinit(dc, &dc->dml2_options, &dc->current_state->bw_ctx.dml2);
+
+ if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2_dc_power_source)
+ dml2_reinit(dc, &dc->dml2_dc_power_options, &dc->current_state->bw_ctx.dml2_dc_power_source);
+
DC_FP_END();
}
+unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc,
+ struct dc_state *state,
+ const struct dc_stream_state *stream)
+{
+ bool limit_cur_to_buf;
+
+ limit_cur_to_buf = dc_state_get_stream_subvp_cursor_limit(stream, state) &&
+ !stream->hw_cursor_req;
+
+ return limit_cur_to_buf ? dc->caps.max_buffered_cursor_size : dc->caps.max_cursor_size;
+}
+
static struct resource_funcs dcn32_res_pool_funcs = {
.destroy = dcn32_destroy_resource_pool,
.link_enc_create = dcn32_link_encoder_create,
@@ -2039,6 +2096,8 @@ static struct resource_funcs dcn32_res_pool_funcs = {
.calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg,
.populate_dml_pipes = dcn32_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe,
+ .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -2051,10 +2110,10 @@ static struct resource_funcs dcn32_res_pool_funcs = {
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.add_phantom_pipes = dcn32_add_phantom_pipes,
- .remove_phantom_pipes = dcn32_remove_phantom_pipes,
- .retain_phantom_pipes = dcn32_retain_phantom_pipes,
- .save_mall_state = dcn32_save_mall_state,
- .restore_mall_state = dcn32_restore_mall_state,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
+ .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size,
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -2101,8 +2160,6 @@ static bool dcn32_resource_construct(
#define REG_STRUCT dccg_regs
dccg_regs_init();
- DC_FP_START();
-
ctx->dc_bios->regs = &bios_regs;
pool->base.res_cap = &res_cap_dcn32;
@@ -2140,10 +2197,12 @@ static bool dcn32_resource_construct(
dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/
/* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/
dc->caps.max_cursor_size = 64;
+ dc->caps.max_buffered_cursor_size = 64; // sqrt(16 * 1024 / 4)
dc->caps.min_horizontal_blanking_period = 80;
dc->caps.dmdata_alloc_size = 2048;
dc->caps.mall_size_per_mem_channel = 4;
- dc->caps.mall_size_total = 0;
+ /* total size = mall per channel * num channels * 1024 * 1024 */
+ dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576;
dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
dc->caps.cache_line_size = 64;
@@ -2177,6 +2236,7 @@ static bool dcn32_resource_construct(
dc->caps.dmcub_support = true;
dc->caps.seamless_odm = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
@@ -2192,7 +2252,7 @@ static bool dcn32_resource_construct(
dc->caps.color.dpp.gamma_corr = 1;
dc->caps.color.dpp.dgam_rom_for_yuv = 0;
- dc->caps.color.dpp.hw_3d_lut = 1;
+ dc->caps.color.dpp.hw_3d_lut = 0;
dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1
// no OGAM ROM on DCN2 and later ASICs
dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
@@ -2211,11 +2271,14 @@ static bool dcn32_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.pq = 0;
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.color.mpc.preblend = true;
/* Use pipe context based otg sync logic */
dc->config.use_pipe_ctx_sync_logic = true;
dc->config.dc_mode_clk_limit_support = true;
+ dc->config.enable_windowed_mpo_odm = true;
+ dc->config.disable_hbr_audio_dp2 = true;
/* read VBIOS LTTPR caps */
{
if (ctx->dc_bios->funcs->get_lttpr_caps) {
@@ -2438,6 +2501,8 @@ static bool dcn32_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
@@ -2451,17 +2516,46 @@ static bool dcn32_resource_construct(
pool->base.oem_device = NULL;
}
+ dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
+ dc->dml2_options.use_native_soc_bb_construction = true;
+ dc->dml2_options.minimize_dispclk_using_odm = true;
+
+ resource_init_common_dml2_callbacks(dc, &dc->dml2_options);
+ dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch;
+ dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+ dc->dml2_options.svp_pstate.callbacks.calculate_mall_ways_from_bytes = pool->base.funcs->calculate_mall_ways_from_bytes;
+
+ dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
+ dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
+ dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us;
+ dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines;
+
+ dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp;
+ dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch;
+
+ dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size;
+ dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways;
+ dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes;
+ dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE;
+ dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE;
+ dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES;
+ dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH;
+
+ dc->dml2_options.max_segments_per_hubp = 18;
+ dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;
+ dc->dml2_options.map_dc_pipes_with_callbacks = true;
+
if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0))
dc->config.sdpif_request_limit_words_per_umc = 16;
- DC_FP_END();
+ /* init DC limited DML2 options */
+ memcpy(&dc->dml2_dc_power_options, &dc->dml2_options, sizeof(struct dml2_configuration_options));
+ dc->dml2_dc_power_options.use_clock_dc_limits = true;
return true;
create_fail:
- DC_FP_END();
-
dcn32_resource_destruct(pool);
return false;
@@ -2527,7 +2621,7 @@ struct resource_pool *dcn32_create_resource_pool(
* full update which delays the flip for 1 frame. If we use the original pipe
* we don't have to toggle its power. So we can flip faster.
*/
-static int find_optimal_free_pipe_as_secondary_dpp_pipe(
+int dcn32_find_optimal_free_pipe_as_secondary_dpp_pipe(
const struct resource_context *cur_res_ctx,
struct resource_context *new_res_ctx,
const struct resource_pool *pool,
@@ -2637,8 +2731,10 @@ static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx;
int head_index;
- if (!head_pipe)
+ if (!head_pipe) {
ASSERT(0);
+ return NULL;
+ }
/*
* Modified from dcn20_acquire_idle_pipe_for_layer
@@ -2669,6 +2765,33 @@ static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
return idle_pipe;
}
+static int find_optimal_free_pipe_as_secondary_opp_head(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool,
+ const struct pipe_ctx *new_otg_master)
+{
+ const struct pipe_ctx *cur_otg_master;
+ int free_pipe_idx;
+
+ cur_otg_master = &cur_res_ctx->pipe_ctx[new_otg_master->pipe_idx];
+ free_pipe_idx = resource_find_free_pipe_used_as_sec_opp_head_by_cur_otg_master(
+ cur_res_ctx, new_res_ctx, cur_otg_master);
+
+ /* Up until here if we have not found a free secondary pipe, we will
+ * need to wait for at least one frame to complete the transition
+ * sequence.
+ */
+ if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ free_pipe_idx = recource_find_free_pipe_not_used_in_cur_res_ctx(
+ cur_res_ctx, new_res_ctx, pool);
+
+ if (free_pipe_idx == FREE_PIPE_INDEX_NOT_FOUND)
+ free_pipe_idx = resource_find_any_free_pipe(new_res_ctx, pool);
+
+ return free_pipe_idx;
+}
+
struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe(
const struct dc_state *cur_ctx,
struct dc_state *new_ctx,
@@ -2683,7 +2806,7 @@ struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe(
return dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
new_ctx, pool, opp_head_pipe->stream, opp_head_pipe);
- free_pipe_idx = find_optimal_free_pipe_as_secondary_dpp_pipe(
+ free_pipe_idx = dcn32_find_optimal_free_pipe_as_secondary_dpp_pipe(
&cur_ctx->res_ctx, &new_ctx->res_ctx,
pool, opp_head_pipe);
if (free_pipe_idx >= 0) {
@@ -2706,6 +2829,50 @@ struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe(
return free_pipe;
}
+struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head(
+ const struct dc_state *cur_ctx,
+ struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ const struct pipe_ctx *otg_master)
+{
+ int free_pipe_idx = find_optimal_free_pipe_as_secondary_opp_head(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx,
+ pool, otg_master);
+ struct pipe_ctx *free_pipe;
+
+ if (free_pipe_idx >= 0) {
+ free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx];
+ free_pipe->pipe_idx = free_pipe_idx;
+ free_pipe->stream = otg_master->stream;
+ free_pipe->stream_res.tg = otg_master->stream_res.tg;
+ free_pipe->stream_res.dsc = NULL;
+ free_pipe->stream_res.opp = pool->opps[free_pipe_idx];
+ free_pipe->plane_res.mi = pool->mis[free_pipe_idx];
+ free_pipe->plane_res.hubp = pool->hubps[free_pipe_idx];
+ free_pipe->plane_res.ipp = pool->ipps[free_pipe_idx];
+ free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx];
+ free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx];
+ free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst;
+ free_pipe->dsc_padding_params = otg_master->dsc_padding_params;
+ if (free_pipe->stream->timing.flags.DSC == 1) {
+ dcn20_acquire_dsc(free_pipe->stream->ctx->dc,
+ &new_ctx->res_ctx,
+ &free_pipe->stream_res.dsc,
+ free_pipe_idx);
+ ASSERT(free_pipe->stream_res.dsc);
+ if (free_pipe->stream_res.dsc == NULL) {
+ memset(free_pipe, 0, sizeof(*free_pipe));
+ free_pipe = NULL;
+ }
+ }
+ } else {
+ ASSERT(otg_master);
+ free_pipe = NULL;
+ }
+
+ return free_pipe;
+}
+
unsigned int dcn32_calc_num_avail_chans_for_mall(struct dc *dc, int num_chans)
{
/*
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index 103a2b54d025..99f0432288b4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -38,9 +38,11 @@
#define DCN3_2_MBLK_HEIGHT_4BPE 128
#define DCN3_2_MBLK_HEIGHT_8BPE 64
#define DCN3_2_DCFCLK_DS_INIT_KHZ 10000 // Choose 10Mhz for init DCFCLK DS freq
-#define SUBVP_HIGH_REFRESH_LIST_LEN 3
+#define SUBVP_HIGH_REFRESH_LIST_LEN 4
+#define SUBVP_ACTIVE_MARGIN_LIST_LEN 2
#define DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ 1800
#define DCN3_2_VMIN_DISPCLK_HZ 717000000
+#define MIN_SUBVP_DCFCLK_KHZ 400000
#define TO_DCN32_RES_POOL(pool)\
container_of(pool, struct dcn32_resource_pool, base)
@@ -57,6 +59,15 @@ struct subvp_high_refresh_list {
} res[SUBVP_HIGH_REFRESH_LIST_LEN];
};
+struct subvp_active_margin_list {
+ int min_refresh;
+ int max_refresh;
+ struct {
+ int width;
+ int height;
+ } res[SUBVP_ACTIVE_MARGIN_LIST_LEN];
+};
+
struct dcn32_resource_pool {
struct resource_pool base;
};
@@ -81,26 +92,20 @@ bool dcn32_release_post_bldn_3dlut(
struct dc_3dlut **lut,
struct dc_transfer_func **shaper);
-bool dcn32_remove_phantom_pipes(struct dc *dc,
- struct dc_state *context, bool fast_update);
-
-void dcn32_retain_phantom_pipes(struct dc *dc,
- struct dc_state *context);
-
void dcn32_add_phantom_pipes(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes,
unsigned int pipe_cnt,
unsigned int index);
-bool dcn32_validate_bandwidth(struct dc *dc,
+enum dc_status dcn32_validate_bandwidth(struct dc *dc,
struct dc_state *context,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
int dcn32_populate_dml_pipes_from_context(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
- bool fast_validate);
+ enum dc_validate_mode validate_mode);
void dcn32_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
@@ -108,10 +113,6 @@ void dcn32_calculate_wm_and_dlg(
int pipe_cnt,
int vlevel);
-uint32_t dcn32_helper_mall_bytes_to_ways(
- struct dc *dc,
- uint32_t total_size_in_mall_bytes);
-
uint32_t dcn32_helper_calculate_mall_bytes_for_cursor(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
@@ -136,12 +137,28 @@ bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context);
bool dcn32_is_center_timing(struct pipe_ctx *pipe);
bool dcn32_is_psr_capable(struct pipe_ctx *pipe);
+int dcn32_find_optimal_free_pipe_as_secondary_dpp_pipe(
+ const struct resource_context *cur_res_ctx,
+ struct resource_context *new_res_ctx,
+ const struct resource_pool *pool,
+ const struct pipe_ctx *new_opp_head);
+
struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe(
const struct dc_state *cur_ctx,
struct dc_state *new_ctx,
const struct resource_pool *pool,
const struct pipe_ctx *opp_head_pipe);
+struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head(
+ const struct dc_state *cur_ctx,
+ struct dc_state *new_ctx,
+ const struct resource_pool *pool,
+ const struct pipe_ctx *otg_master);
+
+void dcn32_release_pipe(struct dc_state *context,
+ struct pipe_ctx *pipe,
+ const struct resource_pool *pool);
+
void dcn32_determine_det_override(struct dc *dc,
struct dc_state *context,
display_e2e_pipe_params_st *pipes);
@@ -149,15 +166,7 @@ void dcn32_determine_det_override(struct dc *dc,
void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes);
-void dcn32_save_mall_state(struct dc *dc,
- struct dc_state *context,
- struct mall_temp_config *temp_config);
-
-void dcn32_restore_mall_state(struct dc *dc,
- struct dc_state *context,
- struct mall_temp_config *temp_config);
-
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context);
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe);
@@ -173,11 +182,20 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context);
bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int vlevel);
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes);
+
+void dcn32_override_min_req_dcfclk(struct dc *dc, struct dc_state *context);
+
+unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned int total_size_in_mall_bytes);
+
+unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc,
+ struct dc_state *state,
+ const struct dc_stream_state *stream);
+
/* definitions for run time init of reg offsets */
/* CLK SRC */
#define CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid) \
- ( \
SRI_ARR_ALPHABET(PIXCLK_RESYNC_CNTL, PHYPLL, index, pllid), \
SRII_ARR_2(PHASE, DP_DTO, 0, index), \
SRII_ARR_2(PHASE, DP_DTO, 1, index), \
@@ -190,12 +208,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 0, index), \
SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 1, index), \
SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 2, index), \
- SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 3, index) \
- )
+ SRII_ARR_2(PIXEL_RATE_CNTL, OTG, 3, index)
/* ABM */
#define ABM_DCN32_REG_LIST_RI(id) \
- ( \
SRI_ARR(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \
SRI_ARR(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \
SRI_ARR(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \
@@ -207,12 +223,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \
SRI_ARR(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \
SRI_ARR(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \
- SRI_ARR(DC_ABM1_ACE_THRES_12, ABM, id), NBIO_SR_ARR(BIOS_SCRATCH_2, id) \
- )
+ SRI_ARR(DC_ABM1_ACE_THRES_12, ABM, id), NBIO_SR_ARR(BIOS_SCRATCH_2, id)
/* Audio */
#define AUD_COMMON_REG_LIST_RI(id) \
- ( \
SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_INDEX, AZF0ENDPOINT, id), \
SRI_ARR(AZALIA_F0_CODEC_ENDPOINT_DATA, AZF0ENDPOINT, id), \
SR_ARR(AZALIA_F0_CODEC_FUNCTION_PARAMETER_STREAM_FORMATS, id), \
@@ -221,41 +235,33 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SR_ARR(DCCG_AUDIO_DTO_SOURCE, id), SR_ARR(DCCG_AUDIO_DTO0_MODULE, id), \
SR_ARR(DCCG_AUDIO_DTO0_PHASE, id), SR_ARR(DCCG_AUDIO_DTO1_MODULE, id), \
SR_ARR(DCCG_AUDIO_DTO1_PHASE, id) \
- )
/* VPG */
#define VPG_DCN3_REG_LIST_RI(id) \
- ( \
SRI_ARR(VPG_GENERIC_STATUS, VPG, id), \
SRI_ARR(VPG_GENERIC_PACKET_ACCESS_CTRL, VPG, id), \
SRI_ARR(VPG_GENERIC_PACKET_DATA, VPG, id), \
SRI_ARR(VPG_GSP_FRAME_UPDATE_CTRL, VPG, id), \
- SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id) \
- )
+ SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id)
/* AFMT */
#define AFMT_DCN3_REG_LIST_RI(id) \
- ( \
SRI_ARR(AFMT_INFOFRAME_CONTROL0, AFMT, id), \
SRI_ARR(AFMT_VBI_PACKET_CONTROL, AFMT, id), \
SRI_ARR(AFMT_AUDIO_PACKET_CONTROL, AFMT, id), \
SRI_ARR(AFMT_AUDIO_PACKET_CONTROL2, AFMT, id), \
SRI_ARR(AFMT_AUDIO_SRC_CONTROL, AFMT, id), \
SRI_ARR(AFMT_60958_0, AFMT, id), SRI_ARR(AFMT_60958_1, AFMT, id), \
- SRI_ARR(AFMT_60958_2, AFMT, id), SRI_ARR(AFMT_MEM_PWR, AFMT, id) \
- )
+ SRI_ARR(AFMT_60958_2, AFMT, id), SRI_ARR(AFMT_MEM_PWR, AFMT, id)
/* APG */
#define APG_DCN31_REG_LIST_RI(id) \
- (\
SRI_ARR(APG_CONTROL, APG, id), SRI_ARR(APG_CONTROL2, APG, id), \
- SRI_ARR(APG_MEM_PWR, APG, id), SRI_ARR(APG_DBG_GEN_CONTROL, APG, id) \
- )
+ SRI_ARR(APG_MEM_PWR, APG, id), SRI_ARR(APG_DBG_GEN_CONTROL, APG, id)
/* Stream encoder */
#define SE_DCN32_REG_LIST_RI(id) \
- ( \
SRI_ARR(AFMT_CNTL, DIG, id), SRI_ARR(DIG_FE_CNTL, DIG, id), \
SRI_ARR(HDMI_CONTROL, DIG, id), SRI_ARR(HDMI_DB_CONTROL, DIG, id), \
SRI_ARR(HDMI_GC, DIG, id), \
@@ -299,28 +305,22 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \
SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
SRI_ARR(DIG_FE_CNTL, DIG, id), SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \
- SRI_ARR(DIG_FIFO_CTRL0, DIG, id) \
- )
+ SRI_ARR(DIG_FIFO_CTRL0, DIG, id)
/* Aux regs */
#define AUX_REG_LIST_RI(id) \
- ( \
SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \
- SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id) \
- )
+ SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id)
#define DCN2_AUX_REG_LIST_RI(id) \
- ( \
- AUX_REG_LIST_RI(id), SRI_ARR(AUX_DPHY_TX_CONTROL, DP_AUX, id) \
- )
+ AUX_REG_LIST_RI(id), SRI_ARR(AUX_DPHY_TX_CONTROL, DP_AUX, id)
/* HDP */
#define HPD_REG_LIST_RI(id) SRI_ARR(DC_HPD_CONTROL, HPD, id)
/* Link encoder */
#define LE_DCN3_REG_LIST_RI(id) \
- ( \
SRI_ARR(DIG_BE_CNTL, DIG, id), SRI_ARR(DIG_BE_EN_CNTL, DIG, id), \
SRI_ARR(TMDS_CTL_BITS, DIG, id), \
SRI_ARR(TMDS_DCBALANCER_CONTROL, DIG, id), SRI_ARR(DP_CONFIG, DP, id), \
@@ -334,26 +334,20 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DP_SEC_CNTL, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \
SRI_ARR(DP_DPHY_FAST_TRAINING, DP, id), SRI_ARR(DP_SEC_CNTL1, DP, id), \
SRI_ARR(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
- SRI_ARR(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id) \
- )
+ SRI_ARR(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id)
#define LE_DCN31_REG_LIST_RI(id) \
- ( \
LE_DCN3_REG_LIST_RI(id), SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \
SR_ARR(DIO_LINKA_CNTL, id), SR_ARR(DIO_LINKB_CNTL, id), \
SR_ARR(DIO_LINKC_CNTL, id), SR_ARR(DIO_LINKD_CNTL, id), \
- SR_ARR(DIO_LINKE_CNTL, id), SR_ARR(DIO_LINKF_CNTL, id) \
- )
+ SR_ARR(DIO_LINKE_CNTL, id), SR_ARR(DIO_LINKF_CNTL, id)
#define UNIPHY_DCN2_REG_LIST_RI(id, phyid) \
- ( \
SRI_ARR_ALPHABET(CLOCK_ENABLE, SYMCLK, id, phyid), \
- SRI_ARR_ALPHABET(CHANNEL_XBAR_CNTL, UNIPHY, id, phyid) \
- )
+ SRI_ARR_ALPHABET(CHANNEL_XBAR_CNTL, UNIPHY, id, phyid)
/* HPO DP stream encoder */
#define DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id) \
- ( \
SR_ARR(DP_STREAM_MAPPER_CONTROL0, id), \
SR_ARR(DP_STREAM_MAPPER_CONTROL1, id), \
SR_ARR(DP_STREAM_MAPPER_CONTROL2, id), \
@@ -388,12 +382,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DP_SYM32_ENC_SDP_METADATA_PACKET_CONTROL, DP_SYM32_ENC, id), \
SRI_ARR(DP_SYM32_ENC_SDP_AUDIO_CONTROL0, DP_SYM32_ENC, id), \
SRI_ARR(DP_SYM32_ENC_VID_CRC_CONTROL, DP_SYM32_ENC, id), \
- SRI_ARR(DP_SYM32_ENC_HBLANK_CONTROL, DP_SYM32_ENC, id) \
- )
+ SRI_ARR(DP_SYM32_ENC_HBLANK_CONTROL, DP_SYM32_ENC, id)
/* HPO DP link encoder regs */
#define DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id) \
- ( \
SRI_ARR(DP_LINK_ENC_CLOCK_CONTROL, DP_LINK_ENC, id), \
SRI_ARR(DP_DPHY_SYM32_CONTROL, DP_DPHY_SYM32, id), \
SRI_ARR(DP_DPHY_SYM32_STATUS, DP_DPHY_SYM32, id), \
@@ -422,12 +414,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL1, DP_DPHY_SYM32, id), \
SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL2, DP_DPHY_SYM32, id), \
SRI_ARR(DP_DPHY_SYM32_VC_RATE_CNTL3, DP_DPHY_SYM32, id), \
- SRI_ARR(DP_DPHY_SYM32_SAT_UPDATE, DP_DPHY_SYM32, id) \
- )
+ SRI_ARR(DP_DPHY_SYM32_SAT_UPDATE, DP_DPHY_SYM32, id)
/* DPP */
#define DPP_REG_LIST_DCN30_COMMON_RI(id) \
- ( \
SRI_ARR(CM_DEALPHA, CM, id), SRI_ARR(CM_MEM_PWR_STATUS, CM, id), \
SRI_ARR(CM_BIAS_CR_R, CM, id), SRI_ARR(CM_BIAS_Y_G_CB_B, CM, id), \
SRI_ARR(PRE_DEGAM, CNVC_CFG, id), SRI_ARR(CM_GAMCOR_CONTROL, CM, id), \
@@ -519,6 +509,8 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \
SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \
SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \
+ SRI_ARR(CM_TEST_DEBUG_INDEX, CM, id), \
+ SRI_ARR(CM_TEST_DEBUG_DATA, CM, id), \
SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \
SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \
SRI_ARR(CURSOR0_CONTROL, CNVC_CUR, id), \
@@ -542,12 +534,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \
SRI_ARR(OBUF_MEM_PWR_CTRL, DSCL, id), \
SRI_ARR(DSCL_MEM_PWR_STATUS, DSCL, id), \
- SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id) \
- )
+ SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id)
/* OPP */
#define OPP_REG_LIST_DCN_RI(id) \
- ( \
SRI_ARR(FMT_BIT_DEPTH_CONTROL, FMT, id), SRI_ARR(FMT_CONTROL, FMT, id), \
SRI_ARR(FMT_DITHER_RAND_R_SEED, FMT, id), \
SRI_ARR(FMT_DITHER_RAND_G_SEED, FMT, id), \
@@ -559,37 +549,29 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(OPPBUF_3D_PARAMETERS_0, OPPBUF, id), \
SRI_ARR(OPPBUF_3D_PARAMETERS_1, OPPBUF, id), \
SRI_ARR(OPP_PIPE_CONTROL, OPP_PIPE, id) \
- )
#define OPP_REG_LIST_DCN10_RI(id) OPP_REG_LIST_DCN_RI(id)
#define OPP_DPG_REG_LIST_RI(id) \
- ( \
SRI_ARR(DPG_CONTROL, DPG, id), SRI_ARR(DPG_DIMENSIONS, DPG, id), \
SRI_ARR(DPG_OFFSET_SEGMENT, DPG, id), SRI_ARR(DPG_COLOUR_B_CB, DPG, id), \
SRI_ARR(DPG_COLOUR_G_Y, DPG, id), SRI_ARR(DPG_COLOUR_R_CR, DPG, id), \
- SRI_ARR(DPG_RAMP_CONTROL, DPG, id), SRI_ARR(DPG_STATUS, DPG, id) \
- )
+ SRI_ARR(DPG_RAMP_CONTROL, DPG, id), SRI_ARR(DPG_STATUS, DPG, id)
#define OPP_REG_LIST_DCN30_RI(id) \
- ( \
OPP_REG_LIST_DCN10_RI(id), OPP_DPG_REG_LIST_RI(id), \
- SRI_ARR(FMT_422_CONTROL, FMT, id) \
- )
+ SRI_ARR(FMT_422_CONTROL, FMT, id)
/* Aux engine regs */
#define AUX_COMMON_REG_LIST0_RI(id) \
- ( \
SRI_ARR(AUX_CONTROL, DP_AUX, id), SRI_ARR(AUX_ARB_CONTROL, DP_AUX, id), \
SRI_ARR(AUX_SW_DATA, DP_AUX, id), SRI_ARR(AUX_SW_CONTROL, DP_AUX, id), \
SRI_ARR(AUX_INTERRUPT_CONTROL, DP_AUX, id), \
SRI_ARR(AUX_DPHY_RX_CONTROL1, DP_AUX, id), \
- SRI_ARR(AUX_SW_STATUS, DP_AUX, id) \
- )
+ SRI_ARR(AUX_SW_STATUS, DP_AUX, id)
/* DWBC */
#define DWBC_COMMON_REG_LIST_DCN30_RI(id) \
- ( \
SR_ARR(DWB_ENABLE_CLK_CTRL, id), SR_ARR(DWB_MEM_PWR_CTRL, id), \
SR_ARR(FC_MODE_CTRL, id), SR_ARR(FC_FLOW_CTRL, id), \
SR_ARR(FC_WINDOW_START, id), SR_ARR(FC_WINDOW_SIZE, id), \
@@ -683,13 +665,11 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SR_ARR(DWB_OGAM_RAMB_REGION_26_27, id), \
SR_ARR(DWB_OGAM_RAMB_REGION_28_29, id), \
SR_ARR(DWB_OGAM_RAMB_REGION_30_31, id), \
- SR_ARR(DWB_OGAM_RAMB_REGION_32_33, id) \
- )
+ SR_ARR(DWB_OGAM_RAMB_REGION_32_33, id)
/* MCIF */
#define MCIF_WB_COMMON_REG_LIST_DCN32_RI(inst) \
- ( \
SRI2_ARR(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst), \
SRI2_ARR(MCIF_WB_BUFMGR_STATUS, MCIF_WB, inst), \
SRI2_ARR(MCIF_WB_BUF_PITCH, MCIF_WB, inst), \
@@ -703,8 +683,6 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI2_ARR(MCIF_WB_BUF_4_STATUS2, MCIF_WB, inst), \
SRI2_ARR(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB, inst), \
SRI2_ARR(MCIF_WB_SCLK_CHANGE, MCIF_WB, inst), \
- SRI2_ARR(MCIF_WB_TEST_DEBUG_INDEX, MCIF_WB, inst), \
- SRI2_ARR(MCIF_WB_TEST_DEBUG_DATA, MCIF_WB, inst), \
SRI2_ARR(MCIF_WB_BUF_1_ADDR_Y, MCIF_WB, inst), \
SRI2_ARR(MCIF_WB_BUF_1_ADDR_C, MCIF_WB, inst), \
SRI2_ARR(MCIF_WB_BUF_2_ADDR_Y, MCIF_WB, inst), \
@@ -739,13 +717,11 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI2_ARR(MMHUBBUB_WARMUP_ADDR_REGION, MMHUBBUB, inst), \
SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_HIGH, MMHUBBUB, inst), \
SRI2_ARR(MMHUBBUB_WARMUP_BASE_ADDR_LOW, MMHUBBUB, inst), \
- SRI2_ARR(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB, inst) \
- )
+ SRI2_ARR(MMHUBBUB_WARMUP_CONTROL_STATUS, MMHUBBUB, inst)
/* DSC */
#define DSC_REG_LIST_DCN20_RI(id) \
- ( \
SRI_ARR(DSC_TOP_CONTROL, DSC_TOP, id), \
SRI_ARR(DSC_DEBUG_CONTROL, DSC_TOP, id), \
SRI_ARR(DSCC_CONFIG0, DSCC, id), SRI_ARR(DSCC_CONFIG1, DSCC, id), \
@@ -791,10 +767,10 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \
SRI_ARR(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \
SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \
+ SRI_ARR(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id), \
SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \
SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \
- SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) \
- )
+ SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id)
/* MPC */
@@ -802,32 +778,25 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRII_DWB(DWB_MUX, MUX, MPC_DWB, inst)
#define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst) \
- ( \
- SRII(MUX, MPC_OUT, inst), VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst) \
- )
+ SRII(MUX, MPC_OUT, inst), VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst)
#define MPC_OUT_MUX_REG_LIST_DCN3_0_RI(inst) \
- ( \
MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(CSC_MODE, MPC_OUT, inst), \
SRII(CSC_C11_C12_A, MPC_OUT, inst), SRII(CSC_C33_C34_A, MPC_OUT, inst), \
SRII(CSC_C11_C12_B, MPC_OUT, inst), SRII(CSC_C33_C34_B, MPC_OUT, inst), \
SRII(DENORM_CONTROL, MPC_OUT, inst), \
SRII(DENORM_CLAMP_G_Y, MPC_OUT, inst), \
- SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst), SR(MPC_OUT_CSC_COEF_FORMAT) \
- )
+ SRII(DENORM_CLAMP_B_CB, MPC_OUT, inst), SR(MPC_OUT_CSC_COEF_FORMAT)
#define MPC_COMMON_REG_LIST_DCN1_0_RI(inst) \
- ( \
SRII(MPCC_TOP_SEL, MPCC, inst), SRII(MPCC_BOT_SEL, MPCC, inst), \
SRII(MPCC_CONTROL, MPCC, inst), SRII(MPCC_STATUS, MPCC, inst), \
SRII(MPCC_OPP_ID, MPCC, inst), SRII(MPCC_BG_G_Y, MPCC, inst), \
SRII(MPCC_BG_R_CR, MPCC, inst), SRII(MPCC_BG_B_CB, MPCC, inst), \
SRII(MPCC_SM_CONTROL, MPCC, inst), \
- SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst) \
- )
+ SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst)
#define MPC_REG_LIST_DCN3_0_RI(inst) \
- ( \
MPC_COMMON_REG_LIST_DCN1_0_RI(inst), SRII(MPCC_TOP_GAIN, MPCC, inst), \
SRII(MPCC_BOT_GAIN_INSIDE, MPCC, inst), \
SRII(MPCC_BOT_GAIN_OUTSIDE, MPCC, inst), \
@@ -881,8 +850,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_G, MPCC_OGAM, inst), \
SRII(MPCC_OGAM_RAMB_START_BASE_CNTL_R, MPCC_OGAM, inst), \
SRII(MPCC_OGAM_CONTROL, MPCC_OGAM, inst), \
- SRII(MPCC_OGAM_LUT_CONTROL, MPCC_OGAM, inst) \
- )
+ SRII(MPCC_OGAM_LUT_CONTROL, MPCC_OGAM, inst)
#define MPC_REG_LIST_DCN3_2_RI(inst) \
MPC_REG_LIST_DCN3_0_RI(inst),\
@@ -1026,11 +994,9 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRII(MPCC_MCM_1DLUT_RAMB_REGION_30_31, MPCC_MCM, inst),\
SRII(MPCC_MCM_1DLUT_RAMB_REGION_32_33, MPCC_MCM, inst),\
SRII(MPCC_MCM_MEM_PWR_CTRL, MPCC_MCM, inst)
-
/* OPTC */
#define OPTC_COMMON_REG_LIST_DCN3_2_RI(inst) \
- ( \
SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst), \
SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst), \
SRI_ARR(OTG_VREADY_PARAM, OTG, inst), \
@@ -1092,22 +1058,19 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst), \
SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \
SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \
- SRI_ARR(OTG_DRR_CONTROL, OTG, inst) \
- )
+ SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_PIPE_UPDATE_STATUS, OTG, inst), \
+ SRI_ARR(INTERRUPT_DEST, OTG, inst)
/* HUBP */
#define HUBP_REG_LIST_DCN_VM_RI(id) \
- ( \
SRI_ARR(NOM_PARAMETERS_0, HUBPREQ, id), \
SRI_ARR(NOM_PARAMETERS_1, HUBPREQ, id), \
SRI_ARR(NOM_PARAMETERS_2, HUBPREQ, id), \
SRI_ARR(NOM_PARAMETERS_3, HUBPREQ, id), \
- SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) \
- )
-
+ SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id)
#define HUBP_REG_LIST_DCN_RI(id) \
- ( \
SRI_ARR(DCHUBP_CNTL, HUBP, id), SRI_ARR(HUBPREQ_DEBUG_DB, HUBP, id), \
SRI_ARR(HUBPREQ_DEBUG, HUBP, id), SRI_ARR(DCSURF_ADDR_CONFIG, HUBP, id), \
SRI_ARR(DCSURF_TILING_CONFIG, HUBP, id), \
@@ -1178,11 +1141,9 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \
SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \
SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \
- SRI_ARR(HUBP_CLK_CNTL, HUBP, id) \
- )
-
+ SRI_ARR(HUBP_CLK_CNTL, HUBP, id), \
+ SRI_ARR(HUBPRET_READ_LINE_VALUE, HUBPRET, id)
#define HUBP_REG_LIST_DCN2_COMMON_RI(id) \
- ( \
HUBP_REG_LIST_DCN_RI(id), HUBP_REG_LIST_DCN_VM_RI(id), \
SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \
SRI_ARR(PREFETCH_SETTINGS_C, HUBPREQ, id), \
@@ -1209,35 +1170,24 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SRI_ARR(DCN_CUR1_TTU_CNTL0, HUBPREQ, id), \
SRI_ARR(DCN_CUR1_TTU_CNTL1, HUBPREQ, id), \
SRI_ARR(DCSURF_FLIP_CONTROL2, HUBPREQ, id), \
- SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id) \
- )
-
+ SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id)
#define HUBP_REG_LIST_DCN21_RI(id) \
- ( \
HUBP_REG_LIST_DCN2_COMMON_RI(id), SRI_ARR(FLIP_PARAMETERS_3, HUBPREQ, id), \
SRI_ARR(FLIP_PARAMETERS_4, HUBPREQ, id), \
SRI_ARR(FLIP_PARAMETERS_5, HUBPREQ, id), \
SRI_ARR(FLIP_PARAMETERS_6, HUBPREQ, id), \
SRI_ARR(VBLANK_PARAMETERS_5, HUBPREQ, id), \
- SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id) \
- )
-
+ SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id)
#define HUBP_REG_LIST_DCN30_RI(id) \
- ( \
- HUBP_REG_LIST_DCN21_RI(id), SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id) \
- )
-
+ HUBP_REG_LIST_DCN21_RI(id), SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id)
#define HUBP_REG_LIST_DCN32_RI(id) \
- ( \
HUBP_REG_LIST_DCN30_RI(id), SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \
SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \
- SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id) \
- )
+ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id)
/* HUBBUB */
#define HUBBUB_REG_LIST_DCN32_RI(id) \
- ( \
SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A), \
SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B), \
SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C), \
@@ -1245,6 +1195,8 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \
SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), SR(DCHUBBUB_ARB_SAT_LEVEL), \
SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+ SR(DCHUBBUB_TEST_DEBUG_INDEX), \
+ SR(DCHUBBUB_TEST_DEBUG_DATA), \
SR(DCHUBBUB_SOFT_RESET), SR(DCHUBBUB_CRC_CTRL), \
SR(DCN_VM_FB_LOCATION_BASE), SR(DCN_VM_FB_LOCATION_TOP), \
SR(DCN_VM_FB_OFFSET), SR(DCN_VM_AGP_BOT), SR(DCN_VM_AGP_TOP), \
@@ -1275,15 +1227,15 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B), \
SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C), \
SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D), \
+ SR(DCHUBBUB_ARB_MALL_CNTL), \
SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \
SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \
- SR(SDPIF_REQUEST_RATE_LIMIT) \
- )
+ SR(SDPIF_REQUEST_RATE_LIMIT), \
+ SR(DCHUBBUB_SDPIF_CFG0)
/* DCCG */
#define DCCG_REG_LIST_DCN32_RI() \
- ( \
SR(DPPCLK_DTO_CTRL), DCCG_SRII(DTO_PARAM, DPPCLK, 0), \
DCCG_SRII(DTO_PARAM, DPPCLK, 1), DCCG_SRII(DTO_PARAM, DPPCLK, 2), \
DCCG_SRII(DTO_PARAM, DPPCLK, 3), DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0), \
@@ -1299,38 +1251,31 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
DCCG_SRII(PHASE, DTBCLK_DTO, 2), DCCG_SRII(PHASE, DTBCLK_DTO, 3), \
SR(DCCG_AUDIO_DTBCLK_DTO_MODULO), SR(DCCG_AUDIO_DTBCLK_DTO_PHASE), \
SR(OTG_PIXEL_RATE_DIV), SR(DTBCLK_P_CNTL), \
- SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL) \
- )
+ SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL)
/* VMID */
#define DCN20_VMID_REG_LIST_RI(id) \
- ( \
SRI_ARR(CNTL, DCN_VM_CONTEXT, id), \
SRI_ARR(PAGE_TABLE_BASE_ADDR_HI32, DCN_VM_CONTEXT, id), \
SRI_ARR(PAGE_TABLE_BASE_ADDR_LO32, DCN_VM_CONTEXT, id), \
SRI_ARR(PAGE_TABLE_START_ADDR_HI32, DCN_VM_CONTEXT, id), \
SRI_ARR(PAGE_TABLE_START_ADDR_LO32, DCN_VM_CONTEXT, id), \
SRI_ARR(PAGE_TABLE_END_ADDR_HI32, DCN_VM_CONTEXT, id), \
- SRI_ARR(PAGE_TABLE_END_ADDR_LO32, DCN_VM_CONTEXT, id) \
- )
+ SRI_ARR(PAGE_TABLE_END_ADDR_LO32, DCN_VM_CONTEXT, id)
/* I2C HW */
#define I2C_HW_ENGINE_COMMON_REG_LIST_RI(id) \
- ( \
SRI_ARR_I2C(SETUP, DC_I2C_DDC, id), SRI_ARR_I2C(SPEED, DC_I2C_DDC, id), \
SRI_ARR_I2C(HW_STATUS, DC_I2C_DDC, id), \
SR_ARR_I2C(DC_I2C_ARBITRATION, id), \
SR_ARR_I2C(DC_I2C_CONTROL, id), SR_ARR_I2C(DC_I2C_SW_STATUS, id), \
SR_ARR_I2C(DC_I2C_TRANSACTION0, id), SR_ARR_I2C(DC_I2C_TRANSACTION1, id),\
SR_ARR_I2C(DC_I2C_TRANSACTION2, id), SR_ARR_I2C(DC_I2C_TRANSACTION3, id),\
- SR_ARR_I2C(DC_I2C_DATA, id), SR_ARR_I2C(MICROSECOND_TIME_BASE_DIV, id) \
- )
+ SR_ARR_I2C(DC_I2C_DATA, id), SR_ARR_I2C(MICROSECOND_TIME_BASE_DIV, id)
#define I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id) \
- ( \
I2C_HW_ENGINE_COMMON_REG_LIST_RI(id), SR_ARR_I2C(DIO_MEM_PWR_CTRL, id), \
- SR_ARR_I2C(DIO_MEM_PWR_STATUS, id) \
- )
+ SR_ARR_I2C(DIO_MEM_PWR_STATUS, id)
#endif /* _DCN32_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c
index 3ad2b48954e0..f5a4e97c40ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c
@@ -24,35 +24,18 @@
*/
// header file of functions being implemented
-#include "dcn32_resource.h"
+#include "dcn32/dcn32_resource.h"
#include "dcn20/dcn20_resource.h"
#include "dml/dcn32/display_mode_vba_util_32.h"
#include "dml/dcn32/dcn32_fpu.h"
+#include "dc_state_priv.h"
+#include "dc_stream_priv.h"
static bool is_dual_plane(enum surface_pixel_format format)
{
return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
}
-
-uint32_t dcn32_helper_mall_bytes_to_ways(
- struct dc *dc,
- uint32_t total_size_in_mall_bytes)
-{
- uint32_t cache_lines_used, lines_per_way, total_cache_lines, num_ways;
-
- /* add 2 lines for worst case alignment */
- cache_lines_used = total_size_in_mall_bytes / dc->caps.cache_line_size + 2;
-
- total_cache_lines = dc->caps.max_cab_allocation_bytes / dc->caps.cache_line_size;
- lines_per_way = total_cache_lines / dc->caps.cache_num_ways;
- num_ways = cache_lines_used / lines_per_way;
- if (cache_lines_used % lines_per_way > 0)
- num_ways++;
-
- return num_ways;
-}
-
uint32_t dcn32_helper_calculate_mall_bytes_for_cursor(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
@@ -111,8 +94,10 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(
if (context->bw_ctx.bw.dcn.mall_subvp_size_bytes > 0) {
if (dc->debug.force_subvp_num_ways) {
return dc->debug.force_subvp_num_ways;
+ } else if (dc->res_pool->funcs->calculate_mall_ways_from_bytes) {
+ return dc->res_pool->funcs->calculate_mall_ways_from_bytes(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes);
} else {
- return dcn32_helper_mall_bytes_to_ways(dc, context->bw_ctx.bw.dcn.mall_subvp_size_bytes);
+ return 0;
}
} else {
return 0;
@@ -190,7 +175,7 @@ bool dcn32_subvp_in_use(struct dc *dc,
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
- if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE)
+ if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
return true;
}
return false;
@@ -233,12 +218,12 @@ bool dcn32_is_center_timing(struct pipe_ctx *pipe)
pipe->stream->timing.v_addressable != pipe->stream->src.height) {
is_center_timing = true;
}
- }
- if (pipe->plane_state) {
- if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height &&
- pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) {
- is_center_timing = true;
+ if (pipe->plane_state) {
+ if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height &&
+ pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) {
+ is_center_timing = true;
+ }
}
}
@@ -255,6 +240,50 @@ bool dcn32_is_psr_capable(struct pipe_ctx *pipe)
return psr_capable;
}
+static void override_det_for_subvp(struct dc *dc, struct dc_state *context, uint8_t pipe_segments[])
+{
+ uint32_t i;
+ uint8_t fhd_count = 0;
+ uint8_t subvp_high_refresh_count = 0;
+ uint8_t stream_count = 0;
+
+ // Do not override if a stream has multiple planes
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->stream_status[i].plane_count > 1)
+ return;
+
+ if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
+ stream_count++;
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+ if (dcn32_allow_subvp_high_refresh_rate(dc, context, pipe_ctx)) {
+
+ if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
+ fhd_count++;
+ }
+ subvp_high_refresh_count++;
+ }
+ }
+ }
+
+ if (stream_count == 2 && subvp_high_refresh_count == 2 && fhd_count == 1) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->stream && pipe_ctx->plane_state && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) {
+ if (pipe_ctx->stream->timing.v_addressable == 1080 && pipe_ctx->stream->timing.h_addressable == 1920) {
+ if (pipe_segments[i] > 4)
+ pipe_segments[i] = 4;
+ }
+ }
+ }
+ }
+}
+
/**
* dcn32_determine_det_override(): Determine DET allocation for each pipe
*
@@ -292,14 +321,14 @@ void dcn32_determine_det_override(struct dc *dc,
for (i = 0; i < context->stream_count; i++) {
/* Don't count SubVP streams for DET allocation */
- if (context->streams[i]->mall_stream_config.type != SUBVP_PHANTOM)
+ if (dc_state_get_stream_subvp_type(context, context->streams[i]) != SUBVP_PHANTOM)
stream_count++;
}
if (stream_count > 0) {
stream_segments = 18 / stream_count;
for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->mall_stream_config.type == SUBVP_PHANTOM)
+ if (dc_state_get_stream_subvp_type(context, context->streams[i]) == SUBVP_PHANTOM)
continue;
if (context->stream_status[i].plane_count > 0)
@@ -336,6 +365,7 @@ void dcn32_determine_det_override(struct dc *dc,
}
}
+ override_det_for_subvp(dc, context, pipe_segments);
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
if (!context->res_ctx.pipe_ctx[i].stream)
continue;
@@ -353,7 +383,7 @@ void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
{
int i, pipe_cnt;
struct resource_context *res_ctx = &context->res_ctx;
- struct pipe_ctx *pipe;
+ struct pipe_ctx *pipe = 0;
bool disable_unbounded_requesting = dc->debug.disable_z9_mpc || dc->debug.disable_unbounded_requesting;
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -384,71 +414,6 @@ void dcn32_set_det_allocations(struct dc *dc, struct dc_state *context,
dcn32_determine_det_override(dc, context, pipes);
}
-/**
- * dcn32_save_mall_state(): Save MALL (SubVP) state for fast validation cases
- *
- * This function saves the MALL (SubVP) case for fast validation cases. For fast validation,
- * there are situations where a shallow copy of the dc->current_state is created for the
- * validation. In this case we want to save and restore the mall config because we always
- * teardown subvp at the beginning of validation (and don't attempt to add it back if it's
- * fast validation). If we don't restore the subvp config in cases of fast validation +
- * shallow copy of the dc->current_state, the dc->current_state will have a partially
- * removed subvp state when we did not intend to remove it.
- *
- * NOTE: This function ONLY works if the streams are not moved to a different pipe in the
- * validation. We don't expect this to happen in fast_validation=1 cases.
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed
- * @temp_config: struct used to cache the existing MALL state
- *
- * Return: void
- */
-void dcn32_save_mall_state(struct dc *dc,
- struct dc_state *context,
- struct mall_temp_config *temp_config)
-{
- uint32_t i;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->stream)
- temp_config->mall_stream_config[i] = pipe->stream->mall_stream_config;
-
- if (pipe->plane_state)
- temp_config->is_phantom_plane[i] = pipe->plane_state->is_phantom;
- }
-}
-
-/**
- * dcn32_restore_mall_state(): Restore MALL (SubVP) state for fast validation cases
- *
- * Restore the MALL state based on the previously saved state from dcn32_save_mall_state
- *
- * @dc: Current DC state
- * @context: New DC state to be programmed, restore MALL state into here
- * @temp_config: struct that has the cached MALL state
- *
- * Return: void
- */
-void dcn32_restore_mall_state(struct dc *dc,
- struct dc_state *context,
- struct mall_temp_config *temp_config)
-{
- uint32_t i;
-
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
- if (pipe->stream)
- pipe->stream->mall_stream_config = temp_config->mall_stream_config[i];
-
- if (pipe->plane_state)
- pipe->plane_state->is_phantom = temp_config->is_phantom_plane[i];
- }
-}
-
#define MAX_STRETCHED_V_BLANK 1000 // in micro-seconds (must ensure to match value in FW)
/*
* Scaling factor for v_blank stretch calculations considering timing in
@@ -495,7 +460,7 @@ static int get_frame_rate_at_max_stretch_100hz(
}
static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(
- struct dc_stream_state *fpo_candidate_stream, uint32_t fpo_vactive_margin_us)
+ struct dc_stream_state *fpo_candidate_stream, uint32_t fpo_vactive_margin_us, int current_refresh_rate)
{
int refresh_rate_max_stretch_100hz;
int min_refresh_100hz;
@@ -509,6 +474,10 @@ static bool is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(
if (refresh_rate_max_stretch_100hz < min_refresh_100hz)
return false;
+ if (fpo_candidate_stream->ctx->dc->config.enable_fpo_flicker_detection == 1 &&
+ !dc_stream_is_refresh_rate_range_flickerless(fpo_candidate_stream, (refresh_rate_max_stretch_100hz / 100), current_refresh_rate, false))
+ return false;
+
return true;
}
@@ -543,13 +512,14 @@ static int get_refresh_rate(struct dc_stream_state *fpo_candidate_stream)
*
* Return: Pointer to FPO stream candidate if config can support FPO, otherwise NULL
*/
-struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, const struct dc_state *context)
+struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context)
{
int refresh_rate = 0;
const int minimum_refreshrate_supported = 120;
struct dc_stream_state *fpo_candidate_stream = NULL;
bool is_fpo_vactive = false;
uint32_t fpo_vactive_margin_us = 0;
+ struct dc_stream_status *fpo_stream_status = NULL;
if (context == NULL)
return NULL;
@@ -572,16 +542,28 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
DC_FP_START();
dcn32_assign_fpo_vactive_candidate(dc, context, &fpo_candidate_stream);
DC_FP_END();
-
+ if (fpo_candidate_stream)
+ fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
DC_FP_START();
- is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, dc->debug.fpo_vactive_min_active_margin_us);
+ is_fpo_vactive = dcn32_find_vactive_pipe(dc, context, fpo_candidate_stream, dc->debug.fpo_vactive_min_active_margin_us);
DC_FP_END();
if (!is_fpo_vactive || dc->debug.disable_fpo_vactive)
return NULL;
- } else
+ } else {
fpo_candidate_stream = context->streams[0];
+ if (fpo_candidate_stream)
+ fpo_stream_status = dc_state_get_stream_status(context, fpo_candidate_stream);
+ }
- if (!fpo_candidate_stream)
+ /* In DCN32/321, FPO uses per-pipe P-State force.
+ * If there's no planes, HUBP is power gated and
+ * therefore programming UCLK_PSTATE_FORCE does
+ * nothing (P-State will always be asserted naturally
+ * on a pipe that has HUBP power gated. Therefore we
+ * only want to enable FPO if the FPO pipe has both
+ * a stream and a plane.
+ */
+ if (!fpo_candidate_stream || !fpo_stream_status || fpo_stream_status->plane_count == 0)
return NULL;
if (fpo_candidate_stream->sink->edid_caps.panel_patch.disable_fams)
@@ -592,13 +574,15 @@ struct dc_stream_state *dcn32_can_support_mclk_switch_using_fw_based_vblank_stre
return NULL;
fpo_vactive_margin_us = is_fpo_vactive ? dc->debug.fpo_vactive_margin_us : 0; // For now hardcode the FPO + Vactive stretch margin to be 2000us
- if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(fpo_candidate_stream, fpo_vactive_margin_us))
+ if (!is_refresh_rate_support_mclk_switch_using_fw_based_vblank_stretch(fpo_candidate_stream, fpo_vactive_margin_us, refresh_rate))
return NULL;
if (!fpo_candidate_stream->allow_freesync)
return NULL;
- if (fpo_candidate_stream->vrr_active_variable && dc->debug.disable_fams_gaming)
+ if (fpo_candidate_stream->vrr_active_variable &&
+ ((dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE) ||
+ (context->stream_count > 1 && !(dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_ENABLE))))
return NULL;
return fpo_candidate_stream;
@@ -620,6 +604,30 @@ bool dcn32_check_native_scaling_for_res(struct pipe_ctx *pipe, unsigned int widt
}
/**
+ * disallow_subvp_in_active_plus_blank() - Function to determine disallowed subvp + drr/vblank configs
+ *
+ * @pipe: subvp pipe to be used for the subvp + drr/vblank config
+ *
+ * Since subvp is being enabled on more configs (such as 1080p60), we want
+ * to explicitly block any configs that we don't want to enable. We do not
+ * want to enable any 1080p60 (SubVP) + drr / vblank configs since these
+ * are already convered by FPO.
+ *
+ * Return: True if disallowed, false otherwise
+ */
+static bool disallow_subvp_in_active_plus_blank(struct pipe_ctx *pipe)
+{
+ bool disallow = false;
+
+ if (resource_is_pipe_type(pipe, OPP_HEAD) &&
+ resource_is_pipe_type(pipe, DPP_PIPE)) {
+ if (pipe->stream->timing.v_addressable == 1080 && pipe->stream->timing.h_addressable == 1920)
+ disallow = true;
+ }
+ return disallow;
+}
+
+/**
* dcn32_subvp_drr_admissable() - Determine if SubVP + DRR config is admissible
*
* @dc: Current DC state
@@ -642,32 +650,35 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context)
bool drr_pipe_found = false;
bool drr_psr_capable = false;
uint64_t refresh_rate = 0;
+ bool subvp_disallow = false;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
if (resource_is_pipe_type(pipe, OPP_HEAD) &&
resource_is_pipe_type(pipe, DPP_PIPE)) {
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ if (pipe_mall_type == SUBVP_MAIN) {
subvp_count++;
+ subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
- pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+ pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1);
refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
}
- if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ if (pipe_mall_type == SUBVP_NONE) {
non_subvp_pipes++;
drr_psr_capable = (drr_psr_capable || dcn32_is_psr_capable(pipe));
if (pipe->stream->ignore_msa_timing_param &&
- (pipe->stream->allow_freesync || pipe->stream->vrr_active_variable)) {
+ (pipe->stream->allow_freesync || pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed)) {
drr_pipe_found = true;
}
}
}
}
- if (subvp_count == 1 && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
+ if (subvp_count == 1 && !subvp_disallow && non_subvp_pipes == 1 && drr_pipe_found && !drr_psr_capable &&
((uint32_t)refresh_rate < 120))
result = true;
@@ -700,25 +711,28 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
bool vblank_psr_capable = false;
uint64_t refresh_rate = 0;
+ bool subvp_disallow = false;
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ enum mall_stream_type pipe_mall_type = dc_state_get_pipe_subvp_type(context, pipe);
if (resource_is_pipe_type(pipe, OPP_HEAD) &&
resource_is_pipe_type(pipe, DPP_PIPE)) {
- if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
+ if (pipe_mall_type == SUBVP_MAIN) {
subvp_count++;
+ subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe);
refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 +
- pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1);
+ pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1);
refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total);
refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total);
}
- if (pipe->stream->mall_stream_config.type == SUBVP_NONE) {
+ if (pipe_mall_type == SUBVP_NONE) {
non_subvp_pipes++;
vblank_psr_capable = (vblank_psr_capable || dcn32_is_psr_capable(pipe));
if (pipe->stream->ignore_msa_timing_param &&
- (pipe->stream->allow_freesync || pipe->stream->vrr_active_variable)) {
+ (pipe->stream->allow_freesync || pipe->stream->vrr_active_variable || pipe->stream->vrr_active_fixed)) {
drr_pipe_found = true;
}
}
@@ -726,9 +740,41 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int
}
if (subvp_count == 1 && non_subvp_pipes == 1 && !drr_pipe_found && !vblank_psr_capable &&
- ((uint32_t)refresh_rate < 120) &&
+ ((uint32_t)refresh_rate < 120) && !subvp_disallow &&
vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp)
result = true;
return result;
}
+
+void dcn32_update_dml_pipes_odm_policy_based_on_context(struct dc *dc, struct dc_state *context,
+ display_e2e_pipe_params_st *pipes)
+{
+ int i, pipe_cnt;
+ struct resource_context *res_ctx = &context->res_ctx;
+ struct pipe_ctx *pipe = NULL;
+
+ for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+ int odm_slice_count = 0;
+
+ if (!res_ctx->pipe_ctx[i].stream)
+ continue;
+ pipe = &res_ctx->pipe_ctx[i];
+ odm_slice_count = resource_get_odm_slice_count(pipe);
+
+ if (odm_slice_count == 1)
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+ else if (odm_slice_count == 2)
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
+ else if (odm_slice_count == 4)
+ pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1;
+
+ pipe_cnt++;
+ }
+}
+
+void dcn32_override_min_req_dcfclk(struct dc *dc, struct dc_state *context)
+{
+ if (dcn32_subvp_in_use(dc, context) && context->bw_ctx.bw.dcn.clk.dcfclk_khz <= MIN_SUBVP_DCFCLK_KHZ)
+ context->bw_ctx.bw.dcn.clk.dcfclk_khz = MIN_SUBVP_DCFCLK_KHZ;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 8d73cceb485b..ad214986f7ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -50,7 +50,7 @@
#include "dcn32/dcn32_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
-#include "dce110/dce110_hw_sequencer.h"
+#include "dce110/dce110_hwseq.h"
#include "dcn30/dcn30_opp.h"
#include "dcn20/dcn20_dsc.h"
#include "dcn30/dcn30_vpg.h"
@@ -63,7 +63,7 @@
#include "dcn31/dcn31_apg.h"
#include "dcn31/dcn31_dio_link_encoder.h"
#include "dcn32/dcn32_dio_link_encoder.h"
-#include "dcn321_dio_link_encoder.h"
+#include "dcn321/dcn321_dio_link_encoder.h"
#include "dce/dce_clock_source.h"
#include "dce/dce_audio.h"
#include "dce/dce_hwseq.h"
@@ -72,7 +72,7 @@
#include "dml/display_mode_vba.h"
#include "dcn32/dcn32_dccg.h"
#include "dcn10/dcn10_resource.h"
-#include "link.h"
+#include "link_service.h"
#include "dcn31/dcn31_panel_cntl.h"
#include "dcn30/dcn30_dwb.h"
@@ -92,6 +92,8 @@
#include "vm_helper.h"
#include "dcn20/dcn20_vmid.h"
+#include "dc_state_priv.h"
+
#define DC_LOGGER_INIT(logger)
enum dcn321_clk_src_array_id {
@@ -684,7 +686,6 @@ static const struct dc_plane_cap plane_cap = {
static const struct dc_debug_options debug_defaults_drv = {
.disable_dmcu = true,
.force_abm_enable = false,
- .timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = false,
.pipe_split_policy = MPC_SPLIT_AVOID,
@@ -732,6 +733,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.fpo_vactive_max_blank_us = 1000,
.enable_legacy_fast_update = false,
.disable_dc_mode_overwrite = true,
+ .using_dml2 = false,
};
static struct dce_aux *dcn321_aux_engine_create(
@@ -1032,7 +1034,7 @@ static struct link_encoder *dcn321_link_encoder_create(
struct dcn20_link_encoder *enc20 =
kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
- if (!enc20)
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
return NULL;
#undef REG_STRUCT
@@ -1285,6 +1287,8 @@ static struct hpo_dp_link_encoder *dcn321_hpo_dp_link_encoder_create(
/* allocate HPO link encoder */
hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
#undef REG_STRUCT
#define REG_STRUCT hpo_dp_link_enc_regs
@@ -1570,13 +1574,22 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool)
}
static struct dc_cap_funcs cap_funcs = {
- .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
+ .get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
+ .get_subvp_en = dcn32_subvp_in_use,
};
static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
{
DC_FP_START();
+
dcn321_update_bw_bounding_box_fpu(dc, bw_params);
+
+ if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2)
+ dml2_reinit(dc, &dc->dml2_options, &dc->current_state->bw_ctx.dml2);
+
+ if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2_dc_power_source)
+ dml2_reinit(dc, &dc->dml2_dc_power_options, &dc->current_state->bw_ctx.dml2_dc_power_source);
+
DC_FP_END();
}
@@ -1589,6 +1602,8 @@ static struct resource_funcs dcn321_res_pool_funcs = {
.calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg,
.populate_dml_pipes = dcn32_populate_dml_pipes_from_context,
.acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe,
+ .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head,
+ .release_pipe = dcn20_release_pipe,
.add_stream_to_ctx = dcn30_add_stream_to_ctx,
.add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
.remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
@@ -1601,10 +1616,10 @@ static struct resource_funcs dcn321_res_pool_funcs = {
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
.add_phantom_pipes = dcn32_add_phantom_pipes,
- .remove_phantom_pipes = dcn32_remove_phantom_pipes,
- .retain_phantom_pipes = dcn32_retain_phantom_pipes,
- .save_mall_state = dcn32_save_mall_state,
- .restore_mall_state = dcn32_restore_mall_state,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
+ .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size,
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1689,10 +1704,13 @@ static bool dcn321_resource_construct(
dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/
/* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/
dc->caps.max_cursor_size = 64;
+ dc->caps.max_buffered_cursor_size = 64; // sqrt(16 * 1024 / 4)
dc->caps.min_horizontal_blanking_period = 80;
dc->caps.dmdata_alloc_size = 2048;
dc->caps.mall_size_per_mem_channel = 4;
- dc->caps.mall_size_total = 0;
+ /* total size = mall per channel * num channels * 1024 * 1024 */
+ dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576;
+
dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
dc->caps.cache_line_size = 64;
dc->caps.cache_num_ways = 16;
@@ -1721,6 +1739,7 @@ static bool dcn321_resource_construct(
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
@@ -1736,8 +1755,8 @@ static bool dcn321_resource_construct(
dc->caps.color.dpp.gamma_corr = 1;
dc->caps.color.dpp.dgam_rom_for_yuv = 0;
- dc->caps.color.dpp.hw_3d_lut = 1;
- dc->caps.color.dpp.ogam_ram = 1;
+ dc->caps.color.dpp.hw_3d_lut = 0;
+ dc->caps.color.dpp.ogam_ram = 0;
// no OGAM ROM on DCN2 and later ASICs
dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
@@ -1755,8 +1774,14 @@ static bool dcn321_resource_construct(
dc->caps.color.mpc.ogam_rom_caps.pq = 0;
dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
dc->caps.color.mpc.ocsc = 1;
+ dc->caps.color.mpc.preblend = true;
+
+ /* Use pipe context based otg sync logic */
+ dc->config.use_pipe_ctx_sync_logic = true;
dc->config.dc_mode_clk_limit_support = true;
+ dc->config.enable_windowed_mpo_odm = true;
+ dc->config.disable_hbr_audio_dp2 = true;
/* read VBIOS LTTPR caps */
{
if (ctx->dc_bios->funcs->get_lttpr_caps) {
@@ -1974,6 +1999,8 @@ static bool dcn321_resource_construct(
for (i = 0; i < dc->caps.max_planes; ++i)
dc->caps.planes[i] = plane_cap;
+ dc->caps.max_odm_combine_factor = 4;
+
dc->cap_funcs = cap_funcs;
if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
@@ -1987,6 +2014,38 @@ static bool dcn321_resource_construct(
pool->base.oem_device = NULL;
}
+ dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
+ dc->dml2_options.use_native_soc_bb_construction = true;
+ dc->dml2_options.minimize_dispclk_using_odm = true;
+
+ resource_init_common_dml2_callbacks(dc, &dc->dml2_options);
+ dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch;
+ dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+ dc->dml2_options.svp_pstate.callbacks.calculate_mall_ways_from_bytes = pool->base.funcs->calculate_mall_ways_from_bytes;
+
+ dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
+ dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
+ dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us;
+ dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines;
+
+ dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp;
+ dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch;
+
+ dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size;
+ dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways;
+ dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes;
+ dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE;
+ dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE;
+ dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES;
+ dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH;
+
+ dc->dml2_options.max_segments_per_hubp = 18;
+ dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;
+
+ /* init DC limited DML2 options */
+ memcpy(&dc->dml2_dc_power_options, &dc->dml2_options, sizeof(struct dml2_configuration_options));
+ dc->dml2_dc_power_options.use_clock_dc_limits = true;
+
return true;
create_fail:
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
index 82cbf009f2d3..82cbf009f2d3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.h
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
new file mode 100644
index 000000000000..fff57f23f4f7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -0,0 +1,2219 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dm_services.h"
+#include "dc.h"
+
+#include "dcn31/dcn31_init.h"
+#include "dcn35/dcn35_init.h"
+
+#include "resource.h"
+#include "include/irq_service_interface.h"
+#include "dcn35_resource.h"
+#include "dml2/dml2_wrapper.h"
+
+#include "dcn20/dcn20_resource.h"
+#include "dcn30/dcn30_resource.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+
+#include "dcn10/dcn10_ipp.h"
+#include "dcn30/dcn30_hubbub.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn35/dcn35_hubbub.h"
+#include "dcn32/dcn32_mpc.h"
+#include "dcn35/dcn35_hubp.h"
+#include "irq/dcn35/irq_service_dcn35.h"
+#include "dcn35/dcn35_dpp.h"
+#include "dcn35/dcn35_optc.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn35/dcn35_opp.h"
+#include "dcn35/dcn35_dsc.h"
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn35/dcn35_dio_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_link_encoder.h"
+#include "dcn32/dcn32_hpo_dp_link_encoder.h"
+#include "link_service.h"
+#include "dcn31/dcn31_apg.h"
+#include "dcn32/dcn32_dio_link_encoder.h"
+#include "dcn31/dcn31_vpg.h"
+#include "dcn31/dcn31_afmt.h"
+#include "dce/dce_clock_source.h"
+#include "dce/dce_audio.h"
+#include "dce/dce_hwseq.h"
+#include "clk_mgr.h"
+#include "virtual/virtual_stream_encoder.h"
+#include "dce110/dce110_resource.h"
+#include "dml/display_mode_vba.h"
+#include "dcn35/dcn35_dccg.h"
+#include "dcn35/dcn35_pg_cntl.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn31/dcn31_panel_cntl.h"
+#include "dcn35/dcn35_hwseq.h"
+#include "dcn35/dcn35_dio_link_encoder.h"
+#include "dml/dcn31/dcn31_fpu.h" /*todo*/
+#include "dml/dcn35/dcn35_fpu.h"
+#include "dcn35/dcn35_dwb.h"
+#include "dcn35/dcn35_mmhubbub.h"
+
+#include "dcn/dcn_3_5_0_offset.h"
+#include "dcn/dcn_3_5_0_sh_mask.h"
+#include "nbio/nbio_7_11_0_offset.h"
+#include "mmhub/mmhub_3_3_0_offset.h"
+#include "mmhub/mmhub_3_3_0_sh_mask.h"
+
+#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE__SHIFT 0x0
+#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE_MASK 0x0000000FL
+
+#include "reg_helper.h"
+#include "dce/dmub_abm.h"
+#include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
+#include "dce/dce_aux.h"
+#include "dce/dce_i2c.h"
+#include "dml/dcn31/display_mode_vba_31.h" /*temp*/
+#include "vm_helper.h"
+#include "dcn20/dcn20_vmid.h"
+
+#include "dc_state_priv.h"
+
+#include "link_enc_cfg.h"
+#define DC_LOGGER_INIT(logger)
+
+enum dcn35_clk_src_array_id {
+ DCN35_CLK_SRC_PLL0,
+ DCN35_CLK_SRC_PLL1,
+ DCN35_CLK_SRC_PLL2,
+ DCN35_CLK_SRC_PLL3,
+ DCN35_CLK_SRC_PLL4,
+ DCN35_CLK_SRC_TOTAL
+};
+
+/* begin *********************
+ * macros to expend register list macro defined in HW object header file
+ */
+
+/* DCN */
+/* TODO awful hack. fixup dcn20_dwb.h */
+#undef BASE_INNER
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SR_ARR(reg_name, id) \
+ REG_STRUCT[id].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+#define SR_ARR_INIT(reg_name, id, value) \
+ REG_STRUCT[id].reg_name = value
+
+#define SRI(reg_name, block, id)\
+ REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SR_ARR_I2C(reg_name, id) \
+ REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+#define SRI_ARR_I2C(reg_name, block, id)\
+ REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR_ALPHABET(reg_name, block, index, id)\
+ REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI2(reg_name, block, id)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SRI2_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SRIR(var_name, reg_name, block, id)\
+ .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_ARR_2(reg_name, block, id, inst)\
+ REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_MPC_RMU(reg_name, block, id)\
+ .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_DWB(reg_name, temp_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## temp_name
+
+#define SF_DWB2(reg_name, block, id, field_name, post_fix) \
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define DCCG_SRII(reg_name, block, id)\
+ REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define VUPDATE_SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
+ reg ## reg_name ## _ ## block ## id
+
+/* NBIO */
+#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg]
+
+#define NBIO_BASE(seg) \
+ NBIO_BASE_INNER(seg)
+
+#define NBIO_SR(reg_name)\
+ REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX2_ ## reg_name
+
+#define NBIO_SR_ARR(reg_name, id)\
+ REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX2_ ## reg_name
+
+#define bios_regs_init() \
+ ( \
+ NBIO_SR(BIOS_SCRATCH_3),\
+ NBIO_SR(BIOS_SCRATCH_6)\
+ )
+
+static struct bios_registers bios_regs;
+
+#define clk_src_regs_init(index, pllid)\
+ CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid)
+
+static struct dce110_clk_src_regs clk_src_regs[5];
+
+static const struct dce110_clk_src_shift cs_shift = {
+ CS_COMMON_MASK_SH_LIST_DCN3_1_4(__SHIFT)
+};
+
+static const struct dce110_clk_src_mask cs_mask = {
+ CS_COMMON_MASK_SH_LIST_DCN3_1_4(_MASK)
+};
+
+#define abm_regs_init(id)\
+ ABM_DCN32_REG_LIST_RI(id)
+
+static struct dce_abm_registers abm_regs[4];
+
+static const struct dce_abm_shift abm_shift = {
+ ABM_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dce_abm_mask abm_mask = {
+ ABM_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define audio_regs_init(id)\
+ AUD_COMMON_REG_LIST_RI(id)
+
+static struct dce_audio_registers audio_regs[7];
+
+
+#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
+ AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
+
+static const struct dce_audio_shift audio_shift = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_audio_mask audio_mask = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
+};
+
+#define vpg_regs_init(id)\
+ VPG_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_vpg_registers vpg_regs[10];
+
+static const struct dcn31_vpg_shift vpg_shift = {
+ DCN31_VPG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_vpg_mask vpg_mask = {
+ DCN31_VPG_MASK_SH_LIST(_MASK)
+};
+
+#define afmt_regs_init(id)\
+ AFMT_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_afmt_registers afmt_regs[6];
+
+static const struct dcn31_afmt_shift afmt_shift = {
+ DCN31_AFMT_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_afmt_mask afmt_mask = {
+ DCN31_AFMT_MASK_SH_LIST(_MASK)
+};
+
+#define apg_regs_init(id)\
+ APG_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_apg_registers apg_regs[4];
+
+static const struct dcn31_apg_shift apg_shift = {
+ DCN31_APG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_apg_mask apg_mask = {
+ DCN31_APG_MASK_SH_LIST(_MASK)
+};
+
+#define stream_enc_regs_init(id)\
+ SE_DCN35_REG_LIST_RI(id)
+
+static struct dcn10_stream_enc_registers stream_enc_regs[5];
+
+static const struct dcn10_stream_encoder_shift se_shift = {
+ SE_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn10_stream_encoder_mask se_mask = {
+ SE_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define aux_regs_init(id)\
+ DCN2_AUX_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5];
+
+#define hpd_regs_init(id)\
+ HPD_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5];
+
+
+static const struct dce110_aux_registers_shift aux_shift = {
+ DCN_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+ DCN_AUX_MASK_SH_LIST(_MASK)
+};
+
+#define link_regs_init(id, phyid)\
+ ( \
+ LE_DCN35_REG_LIST_RI(id), \
+ UNIPHY_DCN2_REG_LIST_RI(id, phyid)\
+ )
+
+static struct dcn10_link_enc_registers link_enc_regs[5];
+
+static const struct dcn10_link_enc_shift le_shift = {
+ LINK_ENCODER_MASK_SH_LIST_DCN35(__SHIFT), \
+ //DPCS_DCN31_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn10_link_enc_mask le_mask = {
+ LINK_ENCODER_MASK_SH_LIST_DCN35(_MASK), \
+ //DPCS_DCN31_MASK_SH_LIST(_MASK)
+};
+
+#define hpo_dp_stream_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id)
+
+static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4];
+
+static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK)
+};
+
+#define hpo_dp_link_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id)
+ /*DCN3_1_RDPCSTX_REG_LIST(0),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(1),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(2),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(3),*/
+
+static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2];
+
+static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = {
+ DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = {
+ DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(_MASK)
+};
+
+#define dpp_regs_init(id)\
+ DPP_REG_LIST_DCN35_RI(id)
+
+static struct dcn3_dpp_registers dpp_regs[4];
+
+static const struct dcn35_dpp_shift tf_shift = {
+ DPP_REG_LIST_SH_MASK_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dpp_mask tf_mask = {
+ DPP_REG_LIST_SH_MASK_DCN35(_MASK)
+};
+
+#define opp_regs_init(id)\
+ OPP_REG_LIST_DCN35_RI(id)
+
+static struct dcn35_opp_registers opp_regs[4];
+
+static const struct dcn35_opp_shift opp_shift = {
+ OPP_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_opp_mask opp_mask = {
+ OPP_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define aux_engine_regs_init(id)\
+ ( \
+ AUX_COMMON_REG_LIST0_RI(id), \
+ SR_ARR_INIT(AUXN_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUXP_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK) \
+ )
+
+static struct dce110_aux_registers aux_engine_regs[5];
+
+#define dwbc_regs_dcn3_init(id)\
+ DWBC_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dcn30_dwbc_registers dwbc35_regs[1];
+
+static const struct dcn35_dwbc_shift dwbc35_shift = {
+ DWBC_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dwbc_mask dwbc35_mask = {
+ DWBC_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define mcif_wb_regs_dcn3_init(id)\
+ MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(id)
+
+static struct dcn35_mmhubbub_registers mcif_wb35_regs[1];
+
+static const struct dcn35_mmhubbub_shift mcif_wb35_shift = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT)
+};
+
+static const struct dcn35_mmhubbub_mask mcif_wb35_mask = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(_MASK)
+};
+
+#define dsc_regsDCN35_init(id)\
+ DSC_REG_LIST_DCN20_RI(id)
+
+static struct dcn20_dsc_registers dsc_regs[4];
+
+static const struct dcn35_dsc_shift dsc_shift = {
+ DSC_REG_LIST_SH_MASK_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dsc_mask dsc_mask = {
+ DSC_REG_LIST_SH_MASK_DCN35(_MASK)
+};
+
+static struct dcn30_mpc_registers mpc_regs;
+
+#define dcn_mpc_regs_init() \
+ MPC_REG_LIST_DCN3_2_RI(0),\
+ MPC_REG_LIST_DCN3_2_RI(1),\
+ MPC_REG_LIST_DCN3_2_RI(2),\
+ MPC_REG_LIST_DCN3_2_RI(3),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\
+ MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0)
+
+static const struct dcn30_mpc_shift mpc_shift = {
+ MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct dcn30_mpc_mask mpc_mask = {
+ MPC_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define optc_regs_init(id)\
+ OPTC_COMMON_REG_LIST_DCN3_5_RI(id)
+
+static struct dcn_optc_registers optc_regs[4];
+
+static const struct dcn_optc_shift optc_shift = {
+ OPTC_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT)
+};
+
+static const struct dcn_optc_mask optc_mask = {
+ OPTC_COMMON_MASK_SH_LIST_DCN3_5(_MASK)
+};
+
+#define hubp_regs_init(id)\
+ HUBP_REG_LIST_DCN30_RI(id)
+
+static struct dcn_hubp2_registers hubp_regs[4];
+
+
+static const struct dcn35_hubp2_shift hubp_shift = {
+ HUBP_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_hubp2_mask hubp_mask = {
+ HUBP_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct dcn_hubbub_registers hubbub_reg;
+
+#define hubbub_reg_init()\
+ HUBBUB_REG_LIST_DCN35(0)
+
+static const struct dcn_hubbub_shift hubbub_shift = {
+ HUBBUB_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn_hubbub_mask hubbub_mask = {
+ HUBBUB_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct dccg_registers dccg_regs;
+
+#define dccg_regs_init()\
+ DCCG_REG_LIST_DCN35()
+
+static const struct dccg_shift dccg_shift = {
+ DCCG_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dccg_mask dccg_mask = {
+ DCCG_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct pg_cntl_registers pg_cntl_regs;
+
+#define pg_cntl_dcn35_regs_init() \
+ PG_CNTL_REG_LIST_DCN35()
+
+static const struct pg_cntl_shift pg_cntl_shift = {
+ PG_CNTL_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct pg_cntl_mask pg_cntl_mask = {
+ PG_CNTL_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define SRII2(reg_name_pre, reg_name_post, id)\
+ .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \
+ ## id ## _ ## reg_name_post ## _BASE_IDX) + \
+ reg ## reg_name_pre ## id ## _ ## reg_name_post
+
+static struct dce_hwseq_registers hwseq_reg;
+
+#define hwseq_reg_init()\
+ HWSEQ_DCN35_REG_LIST()
+
+#define HWSEQ_DCN35_MASK_SH_LIST(mask_sh)\
+ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
+ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+ HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
+ HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \
+ HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \
+ HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \
+ HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_R_DMU_GATE_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_G_RBBMIF_GATE_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, RBBMIF_FGCG_REP_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPREFCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPPCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DTBCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DCFCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPIACLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh)
+
+static const struct dce_hwseq_shift hwseq_shift = {
+ HWSEQ_DCN35_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_hwseq_mask hwseq_mask = {
+ HWSEQ_DCN35_MASK_SH_LIST(_MASK)
+};
+
+#define vmid_regs_init(id)\
+ DCN20_VMID_REG_LIST_RI(id)
+
+static struct dcn_vmid_registers vmid_regs[16];
+
+static const struct dcn20_vmid_shift vmid_shifts = {
+ DCN20_VMID_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn20_vmid_mask vmid_masks = {
+ DCN20_VMID_MASK_SH_LIST(_MASK)
+};
+
+static const struct resource_caps res_cap_dcn35 = {
+ .num_timing_generator = 4,
+ .num_opp = 4,
+ .num_video_plane = 4,
+ .num_audio = 5,
+ .num_stream_encoder = 5,
+ .num_dig_link_enc = 5,
+ .num_hpo_dp_stream_encoder = 4,
+ .num_hpo_dp_link_encoder = 2,
+ .num_pll = 4,/*1 c10 edp, 3xc20 combo PHY*/
+ .num_dwb = 1,
+ .num_ddc = 5,
+ .num_vmid = 16,
+ .num_mpc_3dlut = 2,
+ .num_dsc = 4,
+};
+
+static const struct dc_plane_cap plane_cap = {
+ .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
+ .per_pixel_alpha = true,
+
+ .pixel_format_support = {
+ .argb8888 = true,
+ .nv12 = true,
+ .fp16 = true,
+ .p010 = true,
+ .ayuv = false,
+ },
+
+ .max_upscale_factor = {
+ .argb8888 = 16000,
+ .nv12 = 16000,
+ .fp16 = 16000
+ },
+
+ // 6:1 downscaling ratio: 1000/6 = 166.666
+ .max_downscale_factor = {
+ .argb8888 = 250,
+ .nv12 = 167,
+ .fp16 = 167
+ },
+ 64,
+ 64
+};
+
+static const struct dc_debug_options debug_defaults_drv = {
+ .disable_dmcu = true,
+ .force_abm_enable = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = false,
+ .pipe_split_policy = MPC_SPLIT_AVOID,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .disable_dpp_power_gate = true,
+ .disable_hubp_power_gate = true,
+ .disable_optc_power_gate = true, /*should the same as above two*/
+ .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
+ .disable_clock_gate = false,
+ .disable_dsc_power_gate = true,
+ .vsr_support = true,
+ .performance_trace = false,
+ .max_downscale_src_width = 4096,/*upto true 4k*/
+ .disable_pplib_wm_range = false,
+ .scl_reset_length10 = true,
+ .sanity_checks = false,
+ .underflow_assert_delay_us = 0xFFFFFFFF,
+ .dwb_fi_phase = -1, // -1 = disable,
+ .dmub_command_table = true,
+ .pstate_enabled = true,
+ .use_max_lb = true,
+ .enable_mem_low_power = {
+ .bits = {
+ .vga = false,
+ .i2c = true,
+ .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
+ .dscl = true,
+ .cm = true,
+ .mpc = true,
+ .optc = true,
+ .vpg = true,
+ .afmt = true,
+ }
+ },
+ .root_clock_optimization = {
+ .bits = {
+ .dpp = true,
+ .dsc = true,/*dscclk and dsc pg*/
+ .hdmistream = true,
+ .hdmichar = true,
+ .dpstream = true,
+ .symclk32_se = true,
+ .symclk32_le = true,
+ .symclk_fe = true,
+ .physymclk = false,
+ .dpiasymclk = true,
+ }
+ },
+ .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT,
+ .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/
+ .minimum_z8_residency_time = 1, /* Always allow when other conditions are met */
+ .using_dml2 = true,
+ .support_eDP1_5 = true,
+ .enable_hpo_pg_support = false,
+ .enable_legacy_fast_update = true,
+ .enable_single_display_2to1_odm_policy = true,
+ .disable_idle_power_optimizations = false,
+ .dmcub_emulation = false,
+ .disable_boot_optimizations = false,
+ .disable_unbounded_requesting = false,
+ .disable_mem_low_power = false,
+ //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions
+ .enable_double_buffered_dsc_pg_support = true,
+ .enable_dp_dig_pixel_rate_div_policy = 1,
+ .disable_z10 = false,
+ .ignore_pg = true,
+ .psp_disabled_wa = true,
+ .ips2_eval_delay_us = 2000,
+ .ips2_entry_delay_us = 800,
+ .disable_dmub_reallow_idle = false,
+ .static_screen_wait_frames = 2,
+ .disable_timeout = true,
+ .min_disp_clk_khz = 50000,
+};
+
+static const struct dc_panel_config panel_config_defaults = {
+ .psr = {
+ .disable_psr = false,
+ .disallow_psrsu = false,
+ .disallow_replay = false,
+ },
+ .ilr = {
+ .optimize_edp_link_rate = true,
+ },
+};
+
+static void dcn35_dpp_destroy(struct dpp **dpp)
+{
+ kfree(TO_DCN20_DPP(*dpp));
+ *dpp = NULL;
+}
+
+static struct dpp *dcn35_dpp_create(struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL);
+ bool success = (dpp != NULL);
+
+ if (!success)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT dpp_regs
+ dpp_regs_init(0),
+ dpp_regs_init(1),
+ dpp_regs_init(2),
+ dpp_regs_init(3);
+
+ success = dpp35_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift,
+ &tf_mask);
+ if (success) {
+ dpp35_set_fgcg(
+ dpp,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.dpp);
+ return &dpp->base;
+ }
+
+ BREAK_TO_DEBUGGER();
+ kfree(dpp);
+ return NULL;
+}
+
+static struct output_pixel_processor *dcn35_opp_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn20_opp *opp =
+ kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
+
+ if (!opp) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT opp_regs
+ opp_regs_init(0),
+ opp_regs_init(1),
+ opp_regs_init(2),
+ opp_regs_init(3);
+
+ dcn35_opp_construct(opp, ctx, inst,
+ &opp_regs[inst], &opp_shift, &opp_mask);
+
+ dcn35_opp_set_fgcg(opp, ctx->dc->debug.enable_fine_grain_clock_gating.bits.opp);
+
+ return &opp->base;
+}
+
+static struct dce_aux *dcn31_aux_engine_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct aux_engine_dce110 *aux_engine =
+ kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
+
+ if (!aux_engine)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT aux_engine_regs
+ aux_engine_regs_init(0),
+ aux_engine_regs_init(1),
+ aux_engine_regs_init(2),
+ aux_engine_regs_init(3),
+ aux_engine_regs_init(4);
+
+ dce110_aux_engine_construct(aux_engine, ctx, inst,
+ SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
+ &aux_engine_regs[inst],
+ &aux_mask,
+ &aux_shift,
+ ctx->dc->caps.extended_aux_timeout_support);
+
+ return &aux_engine->base;
+}
+
+#define i2c_inst_regs_init(id)\
+ I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dce_i2c_registers i2c_hw_regs[5];
+
+static const struct dce_i2c_shift i2c_shifts = {
+ I2C_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dce_i2c_mask i2c_masks = {
+ I2C_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+/* ========================================================== */
+
+/*
+ * DPIA index | Preferred Encoder | Host Router
+ * 0 | C | 0
+ * 1 | First Available | 0
+ * 2 | D | 1
+ * 3 | First Available | 1
+ */
+/* ========================================================== */
+static const enum engine_id dpia_to_preferred_enc_id_table[] = {
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGD,
+ ENGINE_ID_DIGD
+};
+
+static enum engine_id dcn35_get_preferred_eng_id_dpia(unsigned int dpia_index)
+{
+ return dpia_to_preferred_enc_id_table[dpia_index];
+}
+
+static struct dce_i2c_hw *dcn31_i2c_hw_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dce_i2c_hw *dce_i2c_hw =
+ kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
+
+ if (!dce_i2c_hw)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT i2c_hw_regs
+ i2c_inst_regs_init(1),
+ i2c_inst_regs_init(2),
+ i2c_inst_regs_init(3),
+ i2c_inst_regs_init(4),
+ i2c_inst_regs_init(5);
+
+ dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
+ &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
+
+ return dce_i2c_hw;
+}
+static struct mpc *dcn35_mpc_create(
+ struct dc_context *ctx,
+ int num_mpcc,
+ int num_rmu)
+{
+ struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL);
+
+ if (!mpc30)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT mpc_regs
+ dcn_mpc_regs_init();
+
+ dcn32_mpc_construct(mpc30, ctx,
+ &mpc_regs,
+ &mpc_shift,
+ &mpc_mask,
+ num_mpcc,
+ num_rmu);
+
+ return &mpc30->base;
+}
+
+static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx)
+{
+ int i;
+
+ struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub),
+ GFP_KERNEL);
+
+ if (!hubbub3)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT hubbub_reg
+ hubbub_reg_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT vmid_regs
+ vmid_regs_init(0),
+ vmid_regs_init(1),
+ vmid_regs_init(2),
+ vmid_regs_init(3),
+ vmid_regs_init(4),
+ vmid_regs_init(5),
+ vmid_regs_init(6),
+ vmid_regs_init(7),
+ vmid_regs_init(8),
+ vmid_regs_init(9),
+ vmid_regs_init(10),
+ vmid_regs_init(11),
+ vmid_regs_init(12),
+ vmid_regs_init(13),
+ vmid_regs_init(14),
+ vmid_regs_init(15);
+
+ hubbub35_construct(hubbub3, ctx,
+ &hubbub_reg,
+ &hubbub_shift,
+ &hubbub_mask,
+ 384,/*ctx->dc->dml.ip.det_buffer_size_kbytes,*/
+ 8, /*ctx->dc->dml.ip.pixel_chunk_size_kbytes,*/
+ 1792 /*ctx->dc->dml.ip.config_return_buffer_size_in_kbytes*/);
+
+
+ for (i = 0; i < res_cap_dcn35.num_vmid; i++) {
+ struct dcn20_vmid *vmid = &hubbub3->vmid[i];
+
+ vmid->ctx = ctx;
+
+ vmid->regs = &vmid_regs[i];
+ vmid->shifts = &vmid_shifts;
+ vmid->masks = &vmid_masks;
+ }
+
+ return &hubbub3->base;
+}
+
+static struct timing_generator *dcn35_timing_generator_create(
+ struct dc_context *ctx,
+ uint32_t instance)
+{
+ struct optc *tgn10 =
+ kzalloc(sizeof(struct optc), GFP_KERNEL);
+
+ if (!tgn10)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT optc_regs
+ optc_regs_init(0),
+ optc_regs_init(1),
+ optc_regs_init(2),
+ optc_regs_init(3);
+
+ tgn10->base.inst = instance;
+ tgn10->base.ctx = ctx;
+
+ tgn10->tg_regs = &optc_regs[instance];
+ tgn10->tg_shift = &optc_shift;
+ tgn10->tg_mask = &optc_mask;
+
+ dcn35_timing_generator_init(tgn10);
+
+ return &tgn10->base;
+}
+
+static const struct encoder_feature_support link_enc_feature = {
+ .max_hdmi_deep_color = COLOR_DEPTH_121212,
+ .max_hdmi_pixel_clock = 600000,
+ .hdmi_ycbcr420_supported = true,
+ .dp_ycbcr420_supported = true,
+ .fec_supported = true,
+ .flags.bits.IS_HBR2_CAPABLE = true,
+ .flags.bits.IS_HBR3_CAPABLE = true,
+ .flags.bits.IS_TPS3_CAPABLE = true,
+ .flags.bits.IS_TPS4_CAPABLE = true
+};
+
+static struct link_encoder *dcn35_link_encoder_create(
+ struct dc_context *ctx,
+ const struct encoder_init_data *enc_init_data)
+{
+ struct dcn20_link_encoder *enc20 =
+ kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_aux_regs
+ aux_regs_init(0),
+ aux_regs_init(1),
+ aux_regs_init(2),
+ aux_regs_init(3),
+ aux_regs_init(4);
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_hpd_regs
+ hpd_regs_init(0),
+ hpd_regs_init(1),
+ hpd_regs_init(2),
+ hpd_regs_init(3),
+ hpd_regs_init(4);
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_regs
+ link_regs_init(0, A),
+ link_regs_init(1, B),
+ link_regs_init(2, C),
+ link_regs_init(3, D),
+ link_regs_init(4, E);
+
+ dcn35_link_encoder_construct(enc20,
+ enc_init_data,
+ &link_enc_feature,
+ &link_enc_regs[enc_init_data->transmitter],
+ &link_enc_aux_regs[enc_init_data->channel - 1],
+ &link_enc_hpd_regs[enc_init_data->hpd_source],
+ &le_shift,
+ &le_mask);
+
+ return &enc20->enc10.base;
+}
+
+/* Create a minimal link encoder object not associated with a particular
+ * physical connector.
+ * resource_funcs.link_enc_create_minimal
+ */
+static struct link_encoder *dcn31_link_enc_create_minimal(
+ struct dc_context *ctx, enum engine_id eng_id)
+{
+ struct dcn20_link_encoder *enc20;
+
+ if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc)
+ return NULL;
+
+ enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+ if (!enc20)
+ return NULL;
+
+ dcn31_link_encoder_construct_minimal(
+ enc20,
+ ctx,
+ &link_enc_feature,
+ &link_enc_regs[eng_id - ENGINE_ID_DIGA],
+ eng_id);
+
+ return &enc20->enc10.base;
+}
+
+static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+ struct dcn31_panel_cntl *panel_cntl =
+ kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL);
+
+ if (!panel_cntl)
+ return NULL;
+
+ dcn31_panel_cntl_construct(panel_cntl, init_data);
+
+ return &panel_cntl->base;
+}
+
+static void read_dce_straps(
+ struct dc_context *ctx,
+ struct resource_straps *straps)
+{
+ generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX),
+ FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
+
+}
+
+static struct audio *dcn31_create_audio(
+ struct dc_context *ctx, unsigned int inst)
+{
+
+#undef REG_STRUCT
+#define REG_STRUCT audio_regs
+ audio_regs_init(0),
+ audio_regs_init(1),
+ audio_regs_init(2),
+ audio_regs_init(3),
+ audio_regs_init(4);
+ audio_regs_init(5);
+ audio_regs_init(6);
+
+ return dce_audio_create(ctx, inst,
+ &audio_regs[inst], &audio_shift, &audio_mask);
+}
+
+static struct vpg *dcn31_vpg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL);
+
+ if (!vpg31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT vpg_regs
+ vpg_regs_init(0),
+ vpg_regs_init(1),
+ vpg_regs_init(2),
+ vpg_regs_init(3),
+ vpg_regs_init(4),
+ vpg_regs_init(5),
+ vpg_regs_init(6),
+ vpg_regs_init(7),
+ vpg_regs_init(8),
+ vpg_regs_init(9);
+
+ vpg31_construct(vpg31, ctx, inst,
+ &vpg_regs[inst],
+ &vpg_shift,
+ &vpg_mask);
+
+ return &vpg31->base;
+}
+
+static struct afmt *dcn31_afmt_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL);
+
+ if (!afmt31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT afmt_regs
+ afmt_regs_init(0),
+ afmt_regs_init(1),
+ afmt_regs_init(2),
+ afmt_regs_init(3),
+ afmt_regs_init(4),
+ afmt_regs_init(5);
+
+ afmt31_construct(afmt31, ctx, inst,
+ &afmt_regs[inst],
+ &afmt_shift,
+ &afmt_mask);
+
+ // Light sleep by default, no need to power down here
+
+ return &afmt31->base;
+}
+
+static struct apg *dcn31_apg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL);
+
+ if (!apg31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT apg_regs
+ apg_regs_init(0),
+ apg_regs_init(1),
+ apg_regs_init(2),
+ apg_regs_init(3);
+
+ apg31_construct(apg31, ctx, inst,
+ &apg_regs[inst],
+ &apg_shift,
+ &apg_mask);
+
+ return &apg31->base;
+}
+
+static struct stream_encoder *dcn35_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn10_stream_encoder *enc1;
+ struct vpg *vpg;
+ struct afmt *afmt;
+ int vpg_inst;
+ int afmt_inst;
+
+ /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
+ if (eng_id <= ENGINE_ID_DIGF) {
+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+ } else
+ return NULL;
+
+ enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
+ vpg = dcn31_vpg_create(ctx, vpg_inst);
+ afmt = dcn31_afmt_create(ctx, afmt_inst);
+
+ if (!enc1 || !vpg || !afmt) {
+ kfree(enc1);
+ kfree(vpg);
+ kfree(afmt);
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT stream_enc_regs
+ stream_enc_regs_init(0),
+ stream_enc_regs_init(1),
+ stream_enc_regs_init(2),
+ stream_enc_regs_init(3),
+ stream_enc_regs_init(4);
+
+ dcn35_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios,
+ eng_id, vpg, afmt,
+ &stream_enc_regs[eng_id],
+ &se_shift, &se_mask);
+
+ return &enc1->base;
+}
+
+static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31;
+ struct vpg *vpg;
+ struct apg *apg;
+ uint32_t hpo_dp_inst;
+ uint32_t vpg_inst;
+ uint32_t apg_inst;
+
+ ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3));
+ hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0;
+
+ /* Mapping of VPG register blocks to HPO DP block instance:
+ * VPG[6] -> HPO_DP[0]
+ * VPG[7] -> HPO_DP[1]
+ * VPG[8] -> HPO_DP[2]
+ * VPG[9] -> HPO_DP[3]
+ */
+ vpg_inst = hpo_dp_inst + 6;
+
+ /* Mapping of APG register blocks to HPO DP block instance:
+ * APG[0] -> HPO_DP[0]
+ * APG[1] -> HPO_DP[1]
+ * APG[2] -> HPO_DP[2]
+ * APG[3] -> HPO_DP[3]
+ */
+ apg_inst = hpo_dp_inst;
+
+ /* allocate HPO stream encoder and create VPG sub-block */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL);
+ vpg = dcn31_vpg_create(ctx, vpg_inst);
+ apg = dcn31_apg_create(ctx, apg_inst);
+
+ if (!hpo_dp_enc31 || !vpg || !apg) {
+ kfree(hpo_dp_enc31);
+ kfree(vpg);
+ kfree(apg);
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_stream_enc_regs
+ hpo_dp_stream_encoder_reg_init(0),
+ hpo_dp_stream_encoder_reg_init(1),
+ hpo_dp_stream_encoder_reg_init(2),
+ hpo_dp_stream_encoder_reg_init(3);
+
+ dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios,
+ hpo_dp_inst, eng_id, vpg, apg,
+ &hpo_dp_stream_enc_regs[hpo_dp_inst],
+ &hpo_dp_se_shift, &hpo_dp_se_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
+ uint8_t inst,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31;
+
+ /* allocate HPO link encoder */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_link_enc_regs
+ hpo_dp_link_encoder_reg_init(0),
+ hpo_dp_link_encoder_reg_init(1);
+
+ hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
+ &hpo_dp_link_enc_regs[inst],
+ &hpo_dp_le_shift, &hpo_dp_le_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static struct dce_hwseq *dcn35_hwseq_create(
+ struct dc_context *ctx)
+{
+ struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
+
+#undef REG_STRUCT
+#define REG_STRUCT hwseq_reg
+ hwseq_reg_init();
+
+ if (hws) {
+ hws->ctx = ctx;
+ hws->regs = &hwseq_reg;
+ hws->shifts = &hwseq_shift;
+ hws->masks = &hwseq_mask;
+ }
+ return hws;
+}
+static const struct resource_create_funcs res_create_funcs = {
+ .read_dce_straps = read_dce_straps,
+ .create_audio = dcn31_create_audio,
+ .create_stream_encoder = dcn35_stream_encoder_create,
+ .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create,
+ .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create,
+ .create_hwseq = dcn35_hwseq_create,
+};
+
+static void dcn35_resource_destruct(struct dcn35_resource_pool *pool)
+{
+ unsigned int i;
+
+ for (i = 0; i < pool->base.stream_enc_count; i++) {
+ if (pool->base.stream_enc[i] != NULL) {
+ if (pool->base.stream_enc[i]->vpg != NULL) {
+ kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg));
+ pool->base.stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.stream_enc[i]->afmt != NULL) {
+ kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt));
+ pool->base.stream_enc[i]->afmt = NULL;
+ }
+ kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
+ pool->base.stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) {
+ if (pool->base.hpo_dp_stream_enc[i] != NULL) {
+ if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) {
+ kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg));
+ pool->base.hpo_dp_stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) {
+ kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg));
+ pool->base.hpo_dp_stream_enc[i]->apg = NULL;
+ }
+ kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i]));
+ pool->base.hpo_dp_stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) {
+ if (pool->base.hpo_dp_link_enc[i] != NULL) {
+ kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i]));
+ pool->base.hpo_dp_link_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ if (pool->base.dscs[i] != NULL)
+ dcn20_dsc_destroy(&pool->base.dscs[i]);
+ }
+
+ if (pool->base.mpc != NULL) {
+ kfree(TO_DCN20_MPC(pool->base.mpc));
+ pool->base.mpc = NULL;
+ }
+ if (pool->base.hubbub != NULL) {
+ kfree(pool->base.hubbub);
+ pool->base.hubbub = NULL;
+ }
+ for (i = 0; i < pool->base.pipe_count; i++) {
+ if (pool->base.dpps[i] != NULL)
+ dcn35_dpp_destroy(&pool->base.dpps[i]);
+
+ if (pool->base.ipps[i] != NULL)
+ pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
+
+ if (pool->base.hubps[i] != NULL) {
+ kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
+ pool->base.hubps[i] = NULL;
+ }
+
+ if (pool->base.irqs != NULL) {
+ dal_irq_service_destroy(&pool->base.irqs);
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ if (pool->base.engines[i] != NULL)
+ dce110_engine_destroy(&pool->base.engines[i]);
+ if (pool->base.hw_i2cs[i] != NULL) {
+ kfree(pool->base.hw_i2cs[i]);
+ pool->base.hw_i2cs[i] = NULL;
+ }
+ if (pool->base.sw_i2cs[i] != NULL) {
+ kfree(pool->base.sw_i2cs[i]);
+ pool->base.sw_i2cs[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_opp; i++) {
+ if (pool->base.opps[i] != NULL)
+ pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.timing_generators[i] != NULL) {
+ kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
+ pool->base.timing_generators[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
+ if (pool->base.dwbc[i] != NULL) {
+ kfree(TO_DCN30_DWBC(pool->base.dwbc[i]));
+ pool->base.dwbc[i] = NULL;
+ }
+ if (pool->base.mcif_wb[i] != NULL) {
+ kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i]));
+ pool->base.mcif_wb[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.audio_count; i++) {
+ if (pool->base.audios[i])
+ dce_aud_destroy(&pool->base.audios[i]);
+ }
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] != NULL) {
+ dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
+ pool->base.clock_sources[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) {
+ if (pool->base.mpc_lut[i] != NULL) {
+ dc_3dlut_func_release(pool->base.mpc_lut[i]);
+ pool->base.mpc_lut[i] = NULL;
+ }
+ if (pool->base.mpc_shaper[i] != NULL) {
+ dc_transfer_func_release(pool->base.mpc_shaper[i]);
+ pool->base.mpc_shaper[i] = NULL;
+ }
+ }
+
+ if (pool->base.dp_clock_source != NULL) {
+ dcn20_clock_source_destroy(&pool->base.dp_clock_source);
+ pool->base.dp_clock_source = NULL;
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.multiple_abms[i] != NULL)
+ dce_abm_destroy(&pool->base.multiple_abms[i]);
+ }
+
+ if (pool->base.psr != NULL)
+ dmub_psr_destroy(&pool->base.psr);
+
+ if (pool->base.replay != NULL)
+ dmub_replay_destroy(&pool->base.replay);
+
+ if (pool->base.pg_cntl != NULL)
+ dcn_pg_cntl_destroy(&pool->base.pg_cntl);
+
+ if (pool->base.dccg != NULL)
+ dcn_dccg_destroy(&pool->base.dccg);
+}
+
+static struct hubp *dcn35_hubp_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn20_hubp *hubp2 =
+ kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
+
+ if (!hubp2)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT hubp_regs
+ hubp_regs_init(0),
+ hubp_regs_init(1),
+ hubp_regs_init(2),
+ hubp_regs_init(3);
+
+ if (hubp35_construct(hubp2, ctx, inst,
+ &hubp_regs[inst], &hubp_shift, &hubp_mask))
+ return &hubp2->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(hubp2);
+ return NULL;
+}
+
+static void dcn35_dwbc_init(struct dcn30_dwbc *dwbc30, struct dc_context *ctx)
+{
+ dcn35_dwbc_set_fgcg(
+ dwbc30, ctx->dc->debug.enable_fine_grain_clock_gating.bits.dwb);
+}
+
+static bool dcn35_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t pipe_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < pipe_count; i++) {
+ struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc),
+ GFP_KERNEL);
+
+ if (!dwbc30) {
+ dm_error("DC: failed to create dwbc30!\n");
+ return false;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT dwbc35_regs
+ dwbc_regs_dcn3_init(0);
+
+ dcn35_dwbc_construct(dwbc30, ctx,
+ &dwbc35_regs[i],
+ &dwbc35_shift,
+ &dwbc35_mask,
+ i);
+
+ pool->dwbc[i] = &dwbc30->base;
+
+ dcn35_dwbc_init(dwbc30, ctx);
+ }
+ return true;
+}
+
+static void dcn35_mmhubbub_init(struct dcn30_mmhubbub *mcif_wb30,
+ struct dc_context *ctx)
+{
+ dcn35_mmhubbub_set_fgcg(
+ mcif_wb30,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.mmhubbub);
+}
+
+static bool dcn35_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t pipe_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < pipe_count; i++) {
+ struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub),
+ GFP_KERNEL);
+
+ if (!mcif_wb30) {
+ dm_error("DC: failed to create mcif_wb30!\n");
+ return false;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT mcif_wb35_regs
+ mcif_wb_regs_dcn3_init(0);
+
+ dcn35_mmhubbub_construct(mcif_wb30, ctx,
+ &mcif_wb35_regs[i],
+ &mcif_wb35_shift,
+ &mcif_wb35_mask,
+ i);
+
+ dcn35_mmhubbub_init(mcif_wb30, ctx);
+
+ pool->mcif_wb[i] = &mcif_wb30->base;
+ }
+ return true;
+}
+
+static struct display_stream_compressor *dcn35_dsc_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn20_dsc *dsc =
+ kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
+
+ if (!dsc) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT dsc_regs
+ dsc_regsDCN35_init(0),
+ dsc_regsDCN35_init(1),
+ dsc_regsDCN35_init(2),
+ dsc_regsDCN35_init(3);
+
+ dsc35_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
+ dsc35_set_fgcg(dsc,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.dsc);
+ return &dsc->base;
+}
+
+static void dcn35_destroy_resource_pool(struct resource_pool **pool)
+{
+ struct dcn35_resource_pool *dcn35_pool = TO_DCN35_RES_POOL(*pool);
+
+ dcn35_resource_destruct(dcn35_pool);
+ kfree(dcn35_pool);
+ *pool = NULL;
+}
+
+static struct clock_source *dcn35_clock_source_create(
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ bool dp_clk_src)
+{
+ struct dce110_clk_src *clk_src =
+ kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
+
+ if (!clk_src)
+ return NULL;
+
+ if (dcn31_clk_src_construct(clk_src, ctx, bios, id,
+ regs, &cs_shift, &cs_mask)) {
+ clk_src->base.dp_clk_src = dp_clk_src;
+ return &clk_src->base;
+ }
+
+ kfree(clk_src);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+}
+
+static struct dc_cap_funcs cap_funcs = {
+ .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
+};
+
+static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config)
+{
+ *panel_config = panel_config_defaults;
+}
+
+
+static enum dc_status dcn35_validate_bandwidth(struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode)
+{
+ bool out = false;
+
+ out = dml2_validate(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+ validate_mode);
+
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+
+ DC_FP_START();
+ dcn35_decide_zstate_support(dc, context);
+ DC_FP_END();
+
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+}
+
+enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state)
+{
+ plane_state->tiling_info.gfxversion = DcGfxVersion9;
+ dcn20_patch_unknown_plane_state(plane_state);
+ return DC_OK;
+}
+
+
+static int populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int ret;
+
+ DC_FP_START();
+ ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode);
+ DC_FP_END();
+
+ return ret;
+}
+
+static struct resource_funcs dcn35_res_pool_funcs = {
+ .destroy = dcn35_destroy_resource_pool,
+ .link_enc_create = dcn35_link_encoder_create,
+ .link_enc_create_minimal = dcn31_link_enc_create_minimal,
+ .link_encs_assign = link_enc_cfg_link_encs_assign,
+ .link_enc_unassign = link_enc_cfg_link_enc_unassign,
+ .panel_cntl_create = dcn31_panel_cntl_create,
+ .validate_bandwidth = dcn35_validate_bandwidth,
+ .calculate_wm_and_dlg = NULL,
+ .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
+ .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
+ .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
+ .add_stream_to_ctx = dcn30_add_stream_to_ctx,
+ .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
+ .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
+ .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
+ .set_mcif_arb_params = dcn30_set_mcif_arb_params,
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
+ .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
+ .update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu,
+ .patch_unknown_plane_state = dcn35_patch_unknown_plane_state,
+ .get_panel_config_defaults = dcn35_get_panel_config_defaults,
+ .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params
+};
+
+static bool dcn35_resource_construct(
+ uint8_t num_virtual_links,
+ struct dc *dc,
+ struct dcn35_resource_pool *pool)
+{
+ int i;
+ struct dc_context *ctx = dc->ctx;
+ struct irq_service_init_data init_data;
+
+#undef REG_STRUCT
+#define REG_STRUCT bios_regs
+ bios_regs_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT clk_src_regs
+ clk_src_regs_init(0, A),
+ clk_src_regs_init(1, B),
+ clk_src_regs_init(2, C),
+ clk_src_regs_init(3, D),
+ clk_src_regs_init(4, E);
+
+#undef REG_STRUCT
+#define REG_STRUCT abm_regs
+ abm_regs_init(0),
+ abm_regs_init(1),
+ abm_regs_init(2),
+ abm_regs_init(3);
+
+#undef REG_STRUCT
+#define REG_STRUCT dccg_regs
+ dccg_regs_init();
+
+ ctx->dc_bios->regs = &bios_regs;
+
+ pool->base.res_cap = &res_cap_dcn35;
+
+ pool->base.funcs = &dcn35_res_pool_funcs;
+
+ /*************************************************
+ * Resource + asic cap harcoding *
+ *************************************************/
+ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
+ pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
+ pool->base.mpcc_count = pool->base.res_cap->num_timing_generator;
+ dc->caps.max_downscale_ratio = 600;
+ dc->caps.i2c_speed_in_khz = 100;
+ dc->caps.i2c_speed_in_khz_hdcp = 100;
+ dc->caps.max_cursor_size = 256;
+ dc->caps.min_horizontal_blanking_period = 80;
+ dc->caps.dmdata_alloc_size = 2048;
+ dc->caps.max_slave_planes = 3;
+ dc->caps.max_slave_yuv_planes = 3;
+ dc->caps.max_slave_rgb_planes = 3;
+ dc->caps.post_blend_color_processing = true;
+ dc->caps.force_dp_tps4_for_cp2520 = true;
+ if (dc->config.forceHBR2CP2520)
+ dc->caps.force_dp_tps4_for_cp2520 = false;
+ dc->caps.dp_hpo = true;
+ dc->caps.dp_hdmi21_pcon_support = true;
+
+ dc->caps.edp_dsc_support = true;
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.is_apu = true;
+ dc->caps.seamless_odm = true;
+
+ dc->caps.zstate_support = true;
+ dc->caps.ips_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+ dc->caps.color.dpp.input_lut_shared = 0;
+ dc->caps.color.dpp.icsc = 1;
+ dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
+ dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
+ dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.pq = 1;
+ dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
+ dc->caps.color.dpp.post_csc = 1;
+ dc->caps.color.dpp.gamma_corr = 1;
+ dc->caps.color.dpp.dgam_rom_for_yuv = 0;
+
+ dc->caps.color.dpp.hw_3d_lut = 0;
+ dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1
+ // no OGAM ROM on DCN301
+ dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
+ dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.pq = 0;
+ dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
+ dc->caps.color.dpp.ocsc = 0;
+
+ dc->caps.color.mpc.gamut_remap = 1;
+ dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2
+ dc->caps.color.mpc.ogam_ram = 1;
+ dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
+ dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.pq = 0;
+ dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
+ dc->caps.color.mpc.ocsc = 1;
+ dc->caps.color.mpc.preblend = true;
+
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
+ /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
+ * to provide some margin.
+ * It's expected for furture ASIC to have equal or higher value, in order to
+ * have determinstic power improvement from generate to genration.
+ * (i.e., we should not expect new ASIC generation with lower vmin rate)
+ */
+ dc->caps.max_disp_clock_khz_at_vmin = 650000;
+
+ /* Sequential ONO is based on ASIC. */
+ if (dc->ctx->asic_id.hw_internal_rev >= 0x40)
+ dc->caps.sequential_ono = true;
+
+ /* Use pipe context based otg sync logic */
+ dc->config.use_pipe_ctx_sync_logic = true;
+
+
+ dc->config.disable_hbr_audio_dp2 = true;
+ /* read VBIOS LTTPR caps */
+ {
+ if (ctx->dc_bios->funcs->get_lttpr_caps) {
+ enum bp_result bp_query_result;
+ uint8_t is_vbios_lttpr_enable = 0;
+
+ bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
+ dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
+ }
+
+ /* interop bit is implicit */
+ {
+ dc->caps.vbios_lttpr_aware = true;
+ }
+ }
+
+ if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
+ dc->debug = debug_defaults_drv;
+ /*HW default is to have all the FGCG enabled, SW no need to program them*/
+ dc->debug.enable_fine_grain_clock_gating.u32All = 0xFFFF;
+ // Init the vm_helper
+ if (dc->vm_helper)
+ vm_helper_init(dc->vm_helper, 16);
+
+ /*************************************************
+ * Create resources *
+ *************************************************/
+
+ /* Clock Sources for Pixel Clock*/
+ pool->base.clock_sources[DCN35_CLK_SRC_PLL0] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL0,
+ &clk_src_regs[0], false);
+ pool->base.clock_sources[DCN35_CLK_SRC_PLL1] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL1,
+ &clk_src_regs[1], false);
+ pool->base.clock_sources[DCN35_CLK_SRC_PLL2] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL2,
+ &clk_src_regs[2], false);
+ pool->base.clock_sources[DCN35_CLK_SRC_PLL3] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL3,
+ &clk_src_regs[3], false);
+ pool->base.clock_sources[DCN35_CLK_SRC_PLL4] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL4,
+ &clk_src_regs[4], false);
+
+ pool->base.clk_src_count = DCN35_CLK_SRC_TOTAL;
+
+ /* todo: not reuse phy_pll registers */
+ pool->base.dp_clock_source =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_ID_DP_DTO,
+ &clk_src_regs[0], true);
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] == NULL) {
+ dm_error("DC: failed to create clock sources!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+ }
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31);
+
+ /* TODO: DCCG */
+ pool->base.dccg = dccg35_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
+ if (pool->base.dccg == NULL) {
+ dm_error("DC: failed to create dccg!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT pg_cntl_regs
+ pg_cntl_dcn35_regs_init();
+
+ pool->base.pg_cntl = pg_cntl35_create(ctx, &pg_cntl_regs, &pg_cntl_shift, &pg_cntl_mask);
+ if (pool->base.pg_cntl == NULL) {
+ dm_error("DC: failed to create power gate control!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* TODO: IRQ */
+ init_data.ctx = dc->ctx;
+ pool->base.irqs = dal_irq_service_dcn35_create(&init_data);
+ if (!pool->base.irqs)
+ goto create_fail;
+
+ /* HUBBUB */
+ pool->base.hubbub = dcn35_hubbub_create(ctx);
+ if (pool->base.hubbub == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create hubbub!\n");
+ goto create_fail;
+ }
+
+ /* HUBPs, DPPs, OPPs and TGs */
+ for (i = 0; i < pool->base.pipe_count; i++) {
+ pool->base.hubps[i] = dcn35_hubp_create(ctx, i);
+ if (pool->base.hubps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create hubps!\n");
+ goto create_fail;
+ }
+
+ pool->base.dpps[i] = dcn35_dpp_create(ctx, i);
+ if (pool->base.dpps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create dpps!\n");
+ goto create_fail;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_opp; i++) {
+ pool->base.opps[i] = dcn35_opp_create(ctx, i);
+ if (pool->base.opps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create output pixel processor!\n");
+ goto create_fail;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ pool->base.timing_generators[i] = dcn35_timing_generator_create(
+ ctx, i);
+ if (pool->base.timing_generators[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create tg!\n");
+ goto create_fail;
+ }
+ }
+ pool->base.timing_generator_count = i;
+
+ /* PSR */
+ pool->base.psr = dmub_psr_create(ctx);
+ if (pool->base.psr == NULL) {
+ dm_error("DC: failed to create psr obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* Replay */
+ pool->base.replay = dmub_replay_create(ctx);
+ if (pool->base.replay == NULL) {
+ dm_error("DC: failed to create replay obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* ABM */
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ pool->base.multiple_abms[i] = dmub_abm_create(ctx,
+ &abm_regs[i],
+ &abm_shift,
+ &abm_mask);
+ if (pool->base.multiple_abms[i] == NULL) {
+ dm_error("DC: failed to create abm for pipe %d!\n", i);
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+ }
+
+ /* MPC and DSC */
+ pool->base.mpc = dcn35_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut);
+ if (pool->base.mpc == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mpc!\n");
+ goto create_fail;
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ pool->base.dscs[i] = dcn35_dsc_create(ctx, i);
+ if (pool->base.dscs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create display stream compressor %d!\n", i);
+ goto create_fail;
+ }
+ }
+
+ /* DWB and MMHUBBUB */
+ if (!dcn35_dwbc_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create dwbc!\n");
+ goto create_fail;
+ }
+
+ if (!dcn35_mmhubbub_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mcif_wb!\n");
+ goto create_fail;
+ }
+
+ /* AUX and I2C */
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ pool->base.engines[i] = dcn31_aux_engine_create(ctx, i);
+ if (pool->base.engines[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create aux engine!!\n");
+ goto create_fail;
+ }
+ pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i);
+ if (pool->base.hw_i2cs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create hw i2c!!\n");
+ goto create_fail;
+ }
+ pool->base.sw_i2cs[i] = NULL;
+ }
+
+ /* DCN3.5 has 6 DPIA */
+ pool->base.usb4_dpia_count = 4;
+ if (dc->debug.dpia_debug.bits.disable_dpia)
+ pool->base.usb4_dpia_count = 0;
+
+ /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
+ if (!resource_construct(num_virtual_links, dc, &pool->base,
+ &res_create_funcs))
+ goto create_fail;
+
+ /* HW Sequencer and Plane caps */
+ dcn35_hw_sequencer_construct(dc);
+
+ dc->caps.max_planes = pool->base.pipe_count;
+
+ for (i = 0; i < dc->caps.max_planes; ++i)
+ dc->caps.planes[i] = plane_cap;
+
+ dc->caps.max_odm_combine_factor = 4;
+
+ dc->cap_funcs = cap_funcs;
+
+ dc->dcn_ip->max_num_dpp = pool->base.pipe_count;
+
+ dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
+ dc->dml2_options.use_native_soc_bb_construction = true;
+ dc->dml2_options.minimize_dispclk_using_odm = false;
+ if (dc->config.EnableMinDispClkODM)
+ dc->dml2_options.minimize_dispclk_using_odm = true;
+ dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm;
+
+ resource_init_common_dml2_callbacks(dc, &dc->dml2_options);
+ dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch;
+
+ dc->dml2_options.max_segments_per_hubp = 24;
+ dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
+ dc->dml2_options.override_det_buffer_size_kbytes = true;
+
+ if (dc->config.sdpif_request_limit_words_per_umc == 0)
+ dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
+
+ return true;
+
+create_fail:
+
+ dcn35_resource_destruct(pool);
+
+ return false;
+}
+
+struct resource_pool *dcn35_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc)
+{
+ struct dcn35_resource_pool *pool =
+ kzalloc(sizeof(struct dcn35_resource_pool), GFP_KERNEL);
+
+ if (!pool)
+ return NULL;
+
+ if (dcn35_resource_construct(init_data->num_virtual_links, dc, pool))
+ return &pool->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(pool);
+ return NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
new file mode 100644
index 000000000000..9c56ae76e0c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DCN35_RESOURCE_H_
+#define _DCN35_RESOURCE_H_
+
+#include "core_types.h"
+
+#define DCN3_5_VMIN_DISPCLK_HZ 717000000
+#define TO_DCN35_RES_POOL(pool)\
+ container_of(pool, struct dcn35_resource_pool, base)
+
+extern struct _vcs_dpi_ip_params_st dcn3_5_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc;
+enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state);
+
+struct dcn35_resource_pool {
+ struct resource_pool base;
+};
+
+struct resource_pool *dcn35_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc);
+
+/* Defs for runtime init of registers */
+
+#define OPP_REG_LIST_DCN20_RI(id) \
+ OPP_REG_LIST_DCN10_RI(id), \
+ OPP_DPG_REG_LIST_RI(id), \
+ SRI_ARR(FMT_422_CONTROL, FMT, id), \
+ SRI_ARR(OPPBUF_CONTROL1, OPPBUF, id)
+
+#define OPP_REG_LIST_DCN35_RI(id) \
+ OPP_REG_LIST_DCN20_RI(id), \
+ SRI2_ARR(OPP_TOP_CLK_CONTROL, OPP, id)
+
+#define VPG_DCN31_REG_LIST_RI(id) \
+ SRI_ARR(VPG_GENERIC_STATUS, VPG, id), \
+ SRI_ARR(VPG_GENERIC_PACKET_ACCESS_CTRL, VPG, id), \
+ SRI_ARR(VPG_GENERIC_PACKET_DATA, VPG, id), \
+ SRI_ARR(VPG_GSP_FRAME_UPDATE_CTRL, VPG, id), \
+ SRI_ARR(VPG_GSP_IMMEDIATE_UPDATE_CTRL, VPG, id), \
+ SRI_ARR(VPG_MEM_PWR, VPG, id)
+
+#define AFMT_DCN31_REG_LIST_RI(id) \
+ SRI_ARR(AFMT_INFOFRAME_CONTROL0, AFMT, id), \
+ SRI_ARR(AFMT_VBI_PACKET_CONTROL, AFMT, id), \
+ SRI_ARR(AFMT_AUDIO_PACKET_CONTROL, AFMT, id), \
+ SRI_ARR(AFMT_AUDIO_PACKET_CONTROL2, AFMT, id), \
+ SRI_ARR(AFMT_AUDIO_SRC_CONTROL, AFMT, id), \
+ SRI_ARR(AFMT_60958_0, AFMT, id), \
+ SRI_ARR(AFMT_60958_1, AFMT, id), \
+ SRI_ARR(AFMT_60958_2, AFMT, id), \
+ SRI_ARR(AFMT_MEM_PWR, AFMT, id)
+
+/* Stream encoder */
+#define SE_DCN35_REG_LIST_RI(id) \
+ SRI_ARR(AFMT_CNTL, DIG, id), \
+ SRI_ARR(DIG_FE_CNTL, DIG, id), \
+ SRI_ARR(HDMI_CONTROL, DIG, id), \
+ SRI_ARR(HDMI_DB_CONTROL, DIG, id), \
+ SRI_ARR(HDMI_GC, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \
+ SRI_ARR(HDMI_INFOFRAME_CONTROL0, DIG, id), \
+ SRI_ARR(HDMI_INFOFRAME_CONTROL1, DIG, id), \
+ SRI_ARR(HDMI_VBI_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\
+ SRI_ARR(HDMI_ACR_PACKET_CONTROL, DIG, id),\
+ SRI_ARR(HDMI_ACR_32_0, DIG, id),\
+ SRI_ARR(HDMI_ACR_32_1, DIG, id),\
+ SRI_ARR(HDMI_ACR_44_0, DIG, id),\
+ SRI_ARR(HDMI_ACR_44_1, DIG, id),\
+ SRI_ARR(HDMI_ACR_48_0, DIG, id),\
+ SRI_ARR(HDMI_ACR_48_1, DIG, id),\
+ SRI_ARR(DP_DB_CNTL, DP, id), \
+ SRI_ARR(DP_MSA_MISC, DP, id), \
+ SRI_ARR(DP_MSA_VBID_MISC, DP, id), \
+ SRI_ARR(DP_MSA_COLORIMETRY, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM1, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM2, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM3, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \
+ SRI_ARR(DP_MSE_RATE_CNTL, DP, id), \
+ SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \
+ SRI_ARR(DP_PIXEL_FORMAT, DP, id), \
+ SRI_ARR(DP_SEC_CNTL, DP, id), \
+ SRI_ARR(DP_SEC_CNTL1, DP, id), \
+ SRI_ARR(DP_SEC_CNTL2, DP, id), \
+ SRI_ARR(DP_SEC_CNTL5, DP, id), \
+ SRI_ARR(DP_SEC_CNTL6, DP, id), \
+ SRI_ARR(DP_STEER_FIFO, DP, id), \
+ SRI_ARR(DP_VID_M, DP, id), \
+ SRI_ARR(DP_VID_N, DP, id), \
+ SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \
+ SRI_ARR(DP_VID_TIMING, DP, id), \
+ SRI_ARR(DP_SEC_AUD_N, DP, id), \
+ SRI_ARR(DP_SEC_TIMESTAMP, DP, id), \
+ SRI_ARR(DP_DSC_CNTL, DP, id), \
+ SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(DP_SEC_FRAMING4, DP, id), \
+ SRI_ARR(DP_GSP11_CNTL, DP, id), \
+ SRI_ARR(DME_CONTROL, DME, id),\
+ SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(DIG_FE_CNTL, DIG, id), \
+ SRI_ARR(DIG_FE_EN_CNTL, DIG, id), \
+ SRI_ARR(DIG_FE_CLK_CNTL, DIG, id), \
+ SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \
+ SRI_ARR(DIG_FIFO_CTRL0, DIG, id), \
+ SRI_ARR(STREAM_MAPPER_CONTROL, DIG, id)
+
+#define LE_DCN35_REG_LIST_RI(id)\
+ LE_DCN3_REG_LIST_RI(id),\
+ SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \
+ SR_ARR(DIO_LINKA_CNTL, id), \
+ SR_ARR(DIO_LINKB_CNTL, id), \
+ SR_ARR(DIO_LINKC_CNTL, id), \
+ SR_ARR(DIO_LINKD_CNTL, id), \
+ SR_ARR(DIO_LINKE_CNTL, id), \
+ SR_ARR(DIO_LINKF_CNTL, id),\
+ SRI_ARR(DIG_BE_CLK_CNTL, DIG, id),\
+ SR_ARR(DIO_CLK_CNTL, id)
+
+#define MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(inst) \
+ MCIF_WB_COMMON_REG_LIST_DCN32_RI(inst), \
+ SRI2_ARR(MMHUBBUB_CLOCK_CNTL, MMHUBBUB, inst)
+
+#define HWSEQ_DCN35_REG_LIST()\
+ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+ SR(DCHUBBUB_ARB_HOSTVM_CNTL), \
+ SR(DIO_MEM_PWR_CTRL), \
+ SR(ODM_MEM_PWR_CTRL3), \
+ SR(MMHUBBUB_MEM_PWR_CNTL), \
+ SR(DCCG_GATE_DISABLE_CNTL), \
+ SR(DCCG_GATE_DISABLE_CNTL2), \
+ SR(DCCG_GATE_DISABLE_CNTL4), \
+ SR(DCCG_GATE_DISABLE_CNTL5), \
+ SR(DCFCLK_CNTL),\
+ SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \
+ SRII(PIXEL_RATE_CNTL, OTG, 0), \
+ SRII(PIXEL_RATE_CNTL, OTG, 1),\
+ SRII(PIXEL_RATE_CNTL, OTG, 2),\
+ SRII(PIXEL_RATE_CNTL, OTG, 3),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\
+ SR(MICROSECOND_TIME_BASE_DIV), \
+ SR(MILLISECOND_TIME_BASE_DIV), \
+ SR(DISPCLK_FREQ_CHANGE_CNTL), \
+ SR(RBBMIF_TIMEOUT_DIS), \
+ SR(RBBMIF_TIMEOUT_DIS_2), \
+ SR(DCHUBBUB_CRC_CTRL), \
+ SR(DPP_TOP0_DPP_CRC_CTRL), \
+ SR(DPP_TOP0_DPP_CRC_VAL_B_A), \
+ SR(DPP_TOP0_DPP_CRC_VAL_R_G), \
+ SR(MPC_CRC_CTRL), \
+ SR(MPC_CRC_RESULT_GB), \
+ SR(MPC_CRC_RESULT_C), \
+ SR(MPC_CRC_RESULT_AR), \
+ SR(DOMAIN0_PG_CONFIG), \
+ SR(DOMAIN1_PG_CONFIG), \
+ SR(DOMAIN2_PG_CONFIG), \
+ SR(DOMAIN3_PG_CONFIG), \
+ SR(DOMAIN16_PG_CONFIG), \
+ SR(DOMAIN17_PG_CONFIG), \
+ SR(DOMAIN18_PG_CONFIG), \
+ SR(DOMAIN19_PG_CONFIG), \
+ SR(DOMAIN0_PG_STATUS), \
+ SR(DOMAIN1_PG_STATUS), \
+ SR(DOMAIN2_PG_STATUS), \
+ SR(DOMAIN3_PG_STATUS), \
+ SR(DOMAIN16_PG_STATUS), \
+ SR(DOMAIN17_PG_STATUS), \
+ SR(DOMAIN18_PG_STATUS), \
+ SR(DOMAIN19_PG_STATUS), \
+ SR(DC_IP_REQUEST_CNTL), \
+ SR(AZALIA_AUDIO_DTO), \
+ SR(AZALIA_CONTROLLER_CLOCK_GATING), \
+ SR(HPO_TOP_HW_CONTROL),\
+ SR(DMU_CLK_CNTL)
+
+/* OPTC */
+#define OPTC_COMMON_REG_LIST_DCN3_5_RI(inst) \
+ SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst),\
+ SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst),\
+ SRI_ARR(OTG_VREADY_PARAM, OTG, inst),\
+ SRI_ARR(OTG_MASTER_UPDATE_LOCK, OTG, inst),\
+ SRI_ARR(OTG_GLOBAL_CONTROL0, OTG, inst),\
+ SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst),\
+ SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst),\
+ SRI_ARR(OTG_GLOBAL_CONTROL4, OTG, inst),\
+ SRI_ARR(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_H_TOTAL, OTG, inst),\
+ SRI_ARR(OTG_H_BLANK_START_END, OTG, inst),\
+ SRI_ARR(OTG_H_SYNC_A, OTG, inst),\
+ SRI_ARR(OTG_H_SYNC_A_CNTL, OTG, inst),\
+ SRI_ARR(OTG_H_TIMING_CNTL, OTG, inst),\
+ SRI_ARR(OTG_V_TOTAL, OTG, inst),\
+ SRI_ARR(OTG_V_BLANK_START_END, OTG, inst),\
+ SRI_ARR(OTG_V_SYNC_A, OTG, inst),\
+ SRI_ARR(OTG_V_SYNC_A_CNTL, OTG, inst),\
+ SRI_ARR(OTG_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_STEREO_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_3D_STRUCTURE_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_STEREO_STATUS, OTG, inst),\
+ SRI_ARR(OTG_V_TOTAL_MAX, OTG, inst),\
+ SRI_ARR(OTG_V_TOTAL_MIN, OTG, inst),\
+ SRI_ARR(OTG_V_TOTAL_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_V_COUNT_STOP_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_V_COUNT_STOP_CONTROL2, OTG, inst),\
+ SRI_ARR(OTG_TRIGA_CNTL, OTG, inst),\
+ SRI_ARR(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst),\
+ SRI_ARR(OTG_STATIC_SCREEN_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_STATUS_FRAME_COUNT, OTG, inst),\
+ SRI_ARR(OTG_STATUS, OTG, inst),\
+ SRI_ARR(OTG_STATUS_POSITION, OTG, inst),\
+ SRI_ARR(OTG_NOM_VERT_POSITION, OTG, inst),\
+ SRI_ARR(OTG_M_CONST_DTO0, OTG, inst),\
+ SRI_ARR(OTG_M_CONST_DTO1, OTG, inst),\
+ SRI_ARR(OTG_CLOCK_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\
+ SRI_ARR(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst),\
+ SRI_ARR(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\
+ SRI_ARR(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\
+ SRI_ARR(OPTC_DATA_SOURCE_SELECT, ODM, inst),\
+ SRI_ARR(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\
+ SRI_ARR(CONTROL, VTG, inst),\
+ SRI_ARR(OTG_VERT_SYNC_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_GSL_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC_CNTL, OTG, inst),\
+ SRI_ARR(OTG_CRC0_DATA_RG, OTG, inst),\
+ SRI_ARR(OTG_CRC0_DATA_B, OTG, inst),\
+ SRI_ARR(OTG_CRC1_DATA_RG, OTG, inst),\
+ SRI_ARR(OTG_CRC1_DATA_B, OTG, inst),\
+ SRI_ARR(OTG_CRC2_DATA_RG, OTG, inst),\
+ SRI_ARR(OTG_CRC2_DATA_B, OTG, inst),\
+ SRI_ARR(OTG_CRC3_DATA_RG, OTG, inst),\
+ SRI_ARR(OTG_CRC3_DATA_B, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL_READBACK, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL_READBACK, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL_READBACK, OTG, inst),\
+ SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL_READBACK, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWA_X_CONTROL_READBACK, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWA_Y_CONTROL_READBACK, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWB_X_CONTROL_READBACK, OTG, inst),\
+ SRI_ARR(OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG, inst),\
+ SR_ARR(GSL_SOURCE_SELECT, inst),\
+ SRI_ARR(OTG_TRIGA_MANUAL_TRIG, OTG, inst),\
+ SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst),\
+ SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst),\
+ SRI_ARR(OTG_GSL_WINDOW_X, OTG, inst),\
+ SRI_ARR(OTG_GSL_WINDOW_Y, OTG, inst),\
+ SRI_ARR(OTG_VUPDATE_KEEPOUT, OTG, inst),\
+ SRI_ARR(OTG_DSC_START_POSITION, OTG, inst),\
+ SRI_ARR(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\
+ SRI_ARR(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\
+ SRI_ARR(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\
+ SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst),\
+ SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst),\
+ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst),\
+ SRI_ARR(OTG_DRR_CONTROL, OTG, inst),\
+ SRI2_ARR(OPTC_CLOCK_CONTROL, OPTC, inst),\
+ SRI_ARR(INTERRUPT_DEST, OTG, inst)
+
+/* DPP */
+#define DPP_REG_LIST_DCN35_RI(id)\
+ DPP_REG_LIST_DCN30_COMMON_RI(id)
+
+#endif /* _DCN35_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
new file mode 100644
index 000000000000..0abd163b425e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -0,0 +1,2192 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+
+#include "dm_services.h"
+#include "dc.h"
+
+#include "dcn31/dcn31_init.h"
+#include "dcn351/dcn351_init.h"
+
+#include "resource.h"
+#include "include/irq_service_interface.h"
+#include "dcn351_resource.h"
+
+#include "dcn20/dcn20_resource.h"
+#include "dcn30/dcn30_resource.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn35/dcn35_resource.h"
+
+#include "dcn10/dcn10_ipp.h"
+#include "dcn30/dcn30_hubbub.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn35/dcn35_hubbub.h"
+#include "dcn32/dcn32_mpc.h"
+#include "dcn35/dcn35_hubp.h"
+#include "irq/dcn351/irq_service_dcn351.h"
+#include "dcn35/dcn35_dpp.h"
+#include "dcn35/dcn35_optc.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn35/dcn35_opp.h"
+#include "dcn35/dcn35_dsc.h"
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn35/dcn35_dio_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_link_encoder.h"
+#include "dcn32/dcn32_hpo_dp_link_encoder.h"
+#include "link_service.h"
+#include "dcn31/dcn31_apg.h"
+#include "dcn32/dcn32_dio_link_encoder.h"
+#include "dcn31/dcn31_vpg.h"
+#include "dcn31/dcn31_afmt.h"
+#include "dce/dce_clock_source.h"
+#include "dce/dce_audio.h"
+#include "dce/dce_hwseq.h"
+#include "clk_mgr.h"
+#include "virtual/virtual_stream_encoder.h"
+#include "dce110/dce110_resource.h"
+#include "dml/display_mode_vba.h"
+#include "dcn35/dcn35_dccg.h"
+#include "dcn35/dcn35_pg_cntl.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn31/dcn31_panel_cntl.h"
+#include "dcn35/dcn35_hwseq.h"
+#include "dcn35/dcn35_dio_link_encoder.h"
+#include "dml/dcn31/dcn31_fpu.h" /*todo*/
+#include "dml/dcn35/dcn35_fpu.h"
+#include "dml/dcn351/dcn351_fpu.h"
+#include "dcn35/dcn35_dwb.h"
+#include "dcn35/dcn35_mmhubbub.h"
+
+#include "dcn/dcn_3_5_1_offset.h"
+#include "dcn/dcn_3_5_1_sh_mask.h"
+#include "nbio/nbio_7_11_0_offset.h"
+#include "mmhub/mmhub_3_3_0_offset.h"
+#include "mmhub/mmhub_3_3_0_sh_mask.h"
+
+#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE__SHIFT 0x0
+#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE_MASK 0x0000000FL
+
+#include "reg_helper.h"
+#include "dce/dmub_abm.h"
+#include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
+#include "dce/dce_aux.h"
+#include "dce/dce_i2c.h"
+#include "dml/dcn31/display_mode_vba_31.h" /*temp*/
+#include "vm_helper.h"
+#include "dcn20/dcn20_vmid.h"
+
+#include "dml2/dml2_wrapper.h"
+
+#include "link_enc_cfg.h"
+#define DC_LOGGER_INIT(logger)
+
+enum dcn351_clk_src_array_id {
+ DCN351_CLK_SRC_PLL0,
+ DCN351_CLK_SRC_PLL1,
+ DCN351_CLK_SRC_PLL2,
+ DCN351_CLK_SRC_PLL3,
+ DCN351_CLK_SRC_PLL4,
+ DCN351_CLK_SRC_TOTAL
+};
+
+/* begin *********************
+ * macros to expend register list macro defined in HW object header file
+ */
+
+/* DCN */
+/* TODO awful hack. fixup dcn20_dwb.h */
+#undef BASE_INNER
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SR_ARR(reg_name, id) \
+ REG_STRUCT[id].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+#define SR_ARR_INIT(reg_name, id, value) \
+ REG_STRUCT[id].reg_name = value
+
+#define SRI(reg_name, block, id)\
+ REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SR_ARR_I2C(reg_name, id) \
+ REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+#define SRI_ARR_I2C(reg_name, block, id)\
+ REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR_ALPHABET(reg_name, block, index, id)\
+ REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI2(reg_name, block, id)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SRI2_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SRIR(var_name, reg_name, block, id)\
+ .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_ARR_2(reg_name, block, id, inst)\
+ REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_MPC_RMU(reg_name, block, id)\
+ .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_DWB(reg_name, temp_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## temp_name
+
+#define SF_DWB2(reg_name, block, id, field_name, post_fix) \
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define DCCG_SRII(reg_name, block, id)\
+ REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define VUPDATE_SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
+ reg ## reg_name ## _ ## block ## id
+
+/* NBIO */
+#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg]
+
+#define NBIO_BASE(seg) \
+ NBIO_BASE_INNER(seg)
+
+#define NBIO_SR(reg_name)\
+ REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX2_ ## reg_name
+
+#define NBIO_SR_ARR(reg_name, id)\
+ REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX2_ ## reg_name
+
+#define bios_regs_init() \
+ ( \
+ NBIO_SR(BIOS_SCRATCH_3),\
+ NBIO_SR(BIOS_SCRATCH_6)\
+ )
+
+static struct bios_registers bios_regs;
+
+#define clk_src_regs_init(index, pllid)\
+ CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid)
+
+static struct dce110_clk_src_regs clk_src_regs[5];
+
+static const struct dce110_clk_src_shift cs_shift = {
+ CS_COMMON_MASK_SH_LIST_DCN3_1_4(__SHIFT)
+};
+
+static const struct dce110_clk_src_mask cs_mask = {
+ CS_COMMON_MASK_SH_LIST_DCN3_1_4(_MASK)
+};
+
+#define abm_regs_init(id)\
+ ABM_DCN32_REG_LIST_RI(id)
+
+static struct dce_abm_registers abm_regs[4];
+
+static const struct dce_abm_shift abm_shift = {
+ ABM_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dce_abm_mask abm_mask = {
+ ABM_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define audio_regs_init(id)\
+ AUD_COMMON_REG_LIST_RI(id)
+
+static struct dce_audio_registers audio_regs[7];
+
+
+#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
+ AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
+
+static const struct dce_audio_shift audio_shift = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_audio_mask audio_mask = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
+};
+
+#define vpg_regs_init(id)\
+ VPG_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_vpg_registers vpg_regs[10];
+
+static const struct dcn31_vpg_shift vpg_shift = {
+ DCN31_VPG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_vpg_mask vpg_mask = {
+ DCN31_VPG_MASK_SH_LIST(_MASK)
+};
+
+#define afmt_regs_init(id)\
+ AFMT_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_afmt_registers afmt_regs[6];
+
+static const struct dcn31_afmt_shift afmt_shift = {
+ DCN31_AFMT_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_afmt_mask afmt_mask = {
+ DCN31_AFMT_MASK_SH_LIST(_MASK)
+};
+
+#define apg_regs_init(id)\
+ APG_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_apg_registers apg_regs[4];
+
+static const struct dcn31_apg_shift apg_shift = {
+ DCN31_APG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_apg_mask apg_mask = {
+ DCN31_APG_MASK_SH_LIST(_MASK)
+};
+
+#define stream_enc_regs_init(id)\
+ SE_DCN35_REG_LIST_RI(id)
+
+static struct dcn10_stream_enc_registers stream_enc_regs[5];
+
+static const struct dcn10_stream_encoder_shift se_shift = {
+ SE_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn10_stream_encoder_mask se_mask = {
+ SE_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define aux_regs_init(id)\
+ DCN2_AUX_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5];
+
+#define hpd_regs_init(id)\
+ HPD_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5];
+
+
+static const struct dce110_aux_registers_shift aux_shift = {
+ DCN_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+ DCN_AUX_MASK_SH_LIST(_MASK)
+};
+
+#define link_regs_init(id, phyid)\
+ ( \
+ LE_DCN35_REG_LIST_RI(id), \
+ UNIPHY_DCN2_REG_LIST_RI(id, phyid)\
+ )
+
+static struct dcn10_link_enc_registers link_enc_regs[5];
+
+static const struct dcn10_link_enc_shift le_shift = {
+ LINK_ENCODER_MASK_SH_LIST_DCN35(__SHIFT), \
+ //DPCS_DCN31_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn10_link_enc_mask le_mask = {
+ LINK_ENCODER_MASK_SH_LIST_DCN35(_MASK), \
+ //DPCS_DCN31_MASK_SH_LIST(_MASK)
+};
+
+#define hpo_dp_stream_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id)
+
+static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4];
+
+static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK)
+};
+
+#define hpo_dp_link_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id)
+ /*DCN3_1_RDPCSTX_REG_LIST(0),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(1),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(2),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(3),*/
+
+static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2];
+
+static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = {
+ DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = {
+ DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(_MASK)
+};
+
+#define dpp_regs_init(id)\
+ DPP_REG_LIST_DCN35_RI(id)
+
+static struct dcn3_dpp_registers dpp_regs[4];
+
+static const struct dcn35_dpp_shift tf_shift = {
+ DPP_REG_LIST_SH_MASK_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dpp_mask tf_mask = {
+ DPP_REG_LIST_SH_MASK_DCN35(_MASK)
+};
+
+#define opp_regs_init(id)\
+ OPP_REG_LIST_DCN35_RI(id)
+
+static struct dcn35_opp_registers opp_regs[4];
+
+static const struct dcn35_opp_shift opp_shift = {
+ OPP_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_opp_mask opp_mask = {
+ OPP_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define aux_engine_regs_init(id)\
+ ( \
+ AUX_COMMON_REG_LIST0_RI(id), \
+ SR_ARR_INIT(AUXN_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUXP_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK) \
+ )
+
+static struct dce110_aux_registers aux_engine_regs[5];
+
+#define dwbc_regs_dcn3_init(id)\
+ DWBC_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dcn30_dwbc_registers dwbc35_regs[1];
+
+static const struct dcn35_dwbc_shift dwbc35_shift = {
+ DWBC_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dwbc_mask dwbc35_mask = {
+ DWBC_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define mcif_wb_regs_dcn3_init(id)\
+ MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(id)
+
+static struct dcn35_mmhubbub_registers mcif_wb35_regs[1];
+
+static const struct dcn35_mmhubbub_shift mcif_wb35_shift = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT)
+};
+
+static const struct dcn35_mmhubbub_mask mcif_wb35_mask = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(_MASK)
+};
+
+#define dsc_regsDCN35_init(id)\
+ DSC_REG_LIST_DCN20_RI(id)
+
+static struct dcn20_dsc_registers dsc_regs[4];
+
+static const struct dcn35_dsc_shift dsc_shift = {
+ DSC_REG_LIST_SH_MASK_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dsc_mask dsc_mask = {
+ DSC_REG_LIST_SH_MASK_DCN35(_MASK)
+};
+
+static struct dcn30_mpc_registers mpc_regs;
+
+#define dcn_mpc_regs_init() \
+ MPC_REG_LIST_DCN3_2_RI(0),\
+ MPC_REG_LIST_DCN3_2_RI(1),\
+ MPC_REG_LIST_DCN3_2_RI(2),\
+ MPC_REG_LIST_DCN3_2_RI(3),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\
+ MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0)
+
+static const struct dcn30_mpc_shift mpc_shift = {
+ MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct dcn30_mpc_mask mpc_mask = {
+ MPC_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define optc_regs_init(id)\
+ OPTC_COMMON_REG_LIST_DCN3_5_RI(id)
+
+static struct dcn_optc_registers optc_regs[4];
+
+static const struct dcn_optc_shift optc_shift = {
+ OPTC_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT)
+};
+
+static const struct dcn_optc_mask optc_mask = {
+ OPTC_COMMON_MASK_SH_LIST_DCN3_5(_MASK)
+};
+
+#define hubp_regs_init(id)\
+ HUBP_REG_LIST_DCN30_RI(id)
+
+static struct dcn_hubp2_registers hubp_regs[4];
+
+
+static const struct dcn35_hubp2_shift hubp_shift = {
+ HUBP_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_hubp2_mask hubp_mask = {
+ HUBP_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct dcn_hubbub_registers hubbub_reg;
+
+#define hubbub_reg_init()\
+ HUBBUB_REG_LIST_DCN35(0)
+
+static const struct dcn_hubbub_shift hubbub_shift = {
+ HUBBUB_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn_hubbub_mask hubbub_mask = {
+ HUBBUB_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct dccg_registers dccg_regs;
+
+#define dccg_regs_init()\
+ DCCG_REG_LIST_DCN35()
+
+static const struct dccg_shift dccg_shift = {
+ DCCG_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dccg_mask dccg_mask = {
+ DCCG_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct pg_cntl_registers pg_cntl_regs;
+
+#define pg_cntl_dcn35_regs_init() \
+ PG_CNTL_REG_LIST_DCN35()
+
+static const struct pg_cntl_shift pg_cntl_shift = {
+ PG_CNTL_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct pg_cntl_mask pg_cntl_mask = {
+ PG_CNTL_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define SRII2(reg_name_pre, reg_name_post, id)\
+ .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \
+ ## id ## _ ## reg_name_post ## _BASE_IDX) + \
+ reg ## reg_name_pre ## id ## _ ## reg_name_post
+
+static struct dce_hwseq_registers hwseq_reg;
+
+#define hwseq_reg_init()\
+ HWSEQ_DCN35_REG_LIST()
+
+#define HWSEQ_DCN35_MASK_SH_LIST(mask_sh)\
+ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
+ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+ HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
+ HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \
+ HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \
+ HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \
+ HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_R_DMU_GATE_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_G_RBBMIF_GATE_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, RBBMIF_FGCG_REP_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPREFCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPPCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DTBCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DCFCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPIACLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh)
+
+static const struct dce_hwseq_shift hwseq_shift = {
+ HWSEQ_DCN35_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_hwseq_mask hwseq_mask = {
+ HWSEQ_DCN35_MASK_SH_LIST(_MASK)
+};
+
+#define vmid_regs_init(id)\
+ DCN20_VMID_REG_LIST_RI(id)
+
+static struct dcn_vmid_registers vmid_regs[16];
+
+static const struct dcn20_vmid_shift vmid_shifts = {
+ DCN20_VMID_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn20_vmid_mask vmid_masks = {
+ DCN20_VMID_MASK_SH_LIST(_MASK)
+};
+
+static const struct resource_caps res_cap_dcn351 = {
+ .num_timing_generator = 4,
+ .num_opp = 4,
+ .num_video_plane = 4,
+ .num_audio = 5,
+ .num_stream_encoder = 5,
+ .num_dig_link_enc = 5,
+ .num_hpo_dp_stream_encoder = 4,
+ .num_hpo_dp_link_encoder = 2,
+ .num_pll = 4,/*1 c10 edp, 3xc20 combo PHY*/
+ .num_dwb = 1,
+ .num_ddc = 5,
+ .num_vmid = 16,
+ .num_mpc_3dlut = 2,
+ .num_dsc = 4,
+};
+
+static const struct dc_plane_cap plane_cap = {
+ .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
+ .per_pixel_alpha = true,
+
+ .pixel_format_support = {
+ .argb8888 = true,
+ .nv12 = true,
+ .fp16 = true,
+ .p010 = true,
+ .ayuv = false,
+ },
+
+ .max_upscale_factor = {
+ .argb8888 = 16000,
+ .nv12 = 16000,
+ .fp16 = 16000
+ },
+
+ // 6:1 downscaling ratio: 1000/6 = 166.666
+ .max_downscale_factor = {
+ .argb8888 = 250,
+ .nv12 = 167,
+ .fp16 = 167
+ },
+ 64,
+ 64
+};
+
+static const struct dc_debug_options debug_defaults_drv = {
+ .disable_dmcu = true,
+ .force_abm_enable = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = false,
+ .pipe_split_policy = MPC_SPLIT_AVOID,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .disable_dpp_power_gate = true,
+ .disable_hubp_power_gate = true,
+ .disable_optc_power_gate = true, /*should the same as above two*/
+ .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
+ .disable_clock_gate = false,
+ .disable_dsc_power_gate = true,
+ .vsr_support = true,
+ .performance_trace = false,
+ .max_downscale_src_width = 4096,/*upto true 4k*/
+ .disable_pplib_wm_range = false,
+ .scl_reset_length10 = true,
+ .sanity_checks = false,
+ .underflow_assert_delay_us = 0xFFFFFFFF,
+ .dwb_fi_phase = -1, // -1 = disable,
+ .dmub_command_table = true,
+ .pstate_enabled = true,
+ .use_max_lb = true,
+ .enable_mem_low_power = {
+ .bits = {
+ .vga = false,
+ .i2c = true,
+ .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
+ .dscl = true,
+ .cm = true,
+ .mpc = true,
+ .optc = true,
+ .vpg = true,
+ .afmt = true,
+ }
+ },
+ .root_clock_optimization = {
+ .bits = {
+ .dpp = true,
+ .dsc = true,/*dscclk and dsc pg*/
+ .hdmistream = true,
+ .hdmichar = true,
+ .dpstream = true,
+ .symclk32_se = true,
+ .symclk32_le = false,
+ .symclk_fe = true,
+ .physymclk = false,
+ .dpiasymclk = true,
+ }
+ },
+ .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT,
+ .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/
+ .minimum_z8_residency_time = 1,
+ .using_dml2 = true,
+ .support_eDP1_5 = true,
+ .enable_hpo_pg_support = false,
+ .enable_legacy_fast_update = true,
+ .enable_single_display_2to1_odm_policy = true,
+ .disable_idle_power_optimizations = false,
+ .dmcub_emulation = false,
+ .disable_boot_optimizations = false,
+ .disable_unbounded_requesting = false,
+ .disable_mem_low_power = false,
+ //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions
+ .enable_double_buffered_dsc_pg_support = true,
+ .enable_dp_dig_pixel_rate_div_policy = 1,
+ .disable_z10 = false,
+ .ignore_pg = true,
+ .psp_disabled_wa = true,
+ .ips2_eval_delay_us = 2000,
+ .ips2_entry_delay_us = 800,
+ .disable_dmub_reallow_idle = false,
+ .static_screen_wait_frames = 2,
+ .notify_dpia_hr_bw = true,
+ .min_disp_clk_khz = 50000,
+};
+
+static const struct dc_panel_config panel_config_defaults = {
+ .psr = {
+ .disable_psr = false,
+ .disallow_psrsu = false,
+ .disallow_replay = false,
+ },
+ .ilr = {
+ .optimize_edp_link_rate = true,
+ },
+};
+
+static void dcn35_dpp_destroy(struct dpp **dpp)
+{
+ kfree(TO_DCN20_DPP(*dpp));
+ *dpp = NULL;
+}
+
+static struct dpp *dcn35_dpp_create(struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL);
+ bool success = (dpp != NULL);
+
+ if (!success)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT dpp_regs
+ dpp_regs_init(0),
+ dpp_regs_init(1),
+ dpp_regs_init(2),
+ dpp_regs_init(3);
+
+ success = dpp35_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift,
+ &tf_mask);
+ if (success) {
+ dpp35_set_fgcg(
+ dpp,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.dpp);
+ return &dpp->base;
+ }
+
+ BREAK_TO_DEBUGGER();
+ kfree(dpp);
+ return NULL;
+}
+
+static struct output_pixel_processor *dcn35_opp_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn20_opp *opp =
+ kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
+
+ if (!opp) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT opp_regs
+ opp_regs_init(0),
+ opp_regs_init(1),
+ opp_regs_init(2),
+ opp_regs_init(3);
+
+ dcn35_opp_construct(opp, ctx, inst,
+ &opp_regs[inst], &opp_shift, &opp_mask);
+
+ dcn35_opp_set_fgcg(opp, ctx->dc->debug.enable_fine_grain_clock_gating.bits.opp);
+
+ return &opp->base;
+}
+
+static struct dce_aux *dcn31_aux_engine_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct aux_engine_dce110 *aux_engine =
+ kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
+
+ if (!aux_engine)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT aux_engine_regs
+ aux_engine_regs_init(0),
+ aux_engine_regs_init(1),
+ aux_engine_regs_init(2),
+ aux_engine_regs_init(3),
+ aux_engine_regs_init(4);
+
+ dce110_aux_engine_construct(aux_engine, ctx, inst,
+ SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
+ &aux_engine_regs[inst],
+ &aux_mask,
+ &aux_shift,
+ ctx->dc->caps.extended_aux_timeout_support);
+
+ return &aux_engine->base;
+}
+
+#define i2c_inst_regs_init(id)\
+ I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dce_i2c_registers i2c_hw_regs[5];
+
+static const struct dce_i2c_shift i2c_shifts = {
+ I2C_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dce_i2c_mask i2c_masks = {
+ I2C_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+/* ========================================================== */
+
+/*
+ * DPIA index | Preferred Encoder | Host Router
+ * 0 | C | 0
+ * 1 | First Available | 0
+ * 2 | D | 1
+ * 3 | First Available | 1
+ */
+/* ========================================================== */
+static const enum engine_id dpia_to_preferred_enc_id_table[] = {
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGD,
+ ENGINE_ID_DIGD
+};
+
+static enum engine_id dcn351_get_preferred_eng_id_dpia(unsigned int dpia_index)
+{
+ return dpia_to_preferred_enc_id_table[dpia_index];
+}
+
+static struct dce_i2c_hw *dcn31_i2c_hw_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dce_i2c_hw *dce_i2c_hw =
+ kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
+
+ if (!dce_i2c_hw)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT i2c_hw_regs
+ i2c_inst_regs_init(1),
+ i2c_inst_regs_init(2),
+ i2c_inst_regs_init(3),
+ i2c_inst_regs_init(4),
+ i2c_inst_regs_init(5);
+
+ dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
+ &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
+
+ return dce_i2c_hw;
+}
+static struct mpc *dcn35_mpc_create(
+ struct dc_context *ctx,
+ int num_mpcc,
+ int num_rmu)
+{
+ struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL);
+
+ if (!mpc30)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT mpc_regs
+ dcn_mpc_regs_init();
+
+ dcn32_mpc_construct(mpc30, ctx,
+ &mpc_regs,
+ &mpc_shift,
+ &mpc_mask,
+ num_mpcc,
+ num_rmu);
+
+ return &mpc30->base;
+}
+
+static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx)
+{
+ int i;
+
+ struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub),
+ GFP_KERNEL);
+
+ if (!hubbub3)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT hubbub_reg
+ hubbub_reg_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT vmid_regs
+ vmid_regs_init(0),
+ vmid_regs_init(1),
+ vmid_regs_init(2),
+ vmid_regs_init(3),
+ vmid_regs_init(4),
+ vmid_regs_init(5),
+ vmid_regs_init(6),
+ vmid_regs_init(7),
+ vmid_regs_init(8),
+ vmid_regs_init(9),
+ vmid_regs_init(10),
+ vmid_regs_init(11),
+ vmid_regs_init(12),
+ vmid_regs_init(13),
+ vmid_regs_init(14),
+ vmid_regs_init(15);
+
+ hubbub35_construct(hubbub3, ctx,
+ &hubbub_reg,
+ &hubbub_shift,
+ &hubbub_mask,
+ 384,/*ctx->dc->dml.ip.det_buffer_size_kbytes,*/
+ 8, /*ctx->dc->dml.ip.pixel_chunk_size_kbytes,*/
+ 1792 /*ctx->dc->dml.ip.config_return_buffer_size_in_kbytes*/);
+
+
+ for (i = 0; i < res_cap_dcn351.num_vmid; i++) {
+ struct dcn20_vmid *vmid = &hubbub3->vmid[i];
+
+ vmid->ctx = ctx;
+
+ vmid->regs = &vmid_regs[i];
+ vmid->shifts = &vmid_shifts;
+ vmid->masks = &vmid_masks;
+ }
+
+ return &hubbub3->base;
+}
+
+static struct timing_generator *dcn35_timing_generator_create(
+ struct dc_context *ctx,
+ uint32_t instance)
+{
+ struct optc *tgn10 =
+ kzalloc(sizeof(struct optc), GFP_KERNEL);
+
+ if (!tgn10)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT optc_regs
+ optc_regs_init(0),
+ optc_regs_init(1),
+ optc_regs_init(2),
+ optc_regs_init(3);
+
+ tgn10->base.inst = instance;
+ tgn10->base.ctx = ctx;
+
+ tgn10->tg_regs = &optc_regs[instance];
+ tgn10->tg_shift = &optc_shift;
+ tgn10->tg_mask = &optc_mask;
+
+ dcn35_timing_generator_init(tgn10);
+
+ return &tgn10->base;
+}
+
+static const struct encoder_feature_support link_enc_feature = {
+ .max_hdmi_deep_color = COLOR_DEPTH_121212,
+ .max_hdmi_pixel_clock = 600000,
+ .hdmi_ycbcr420_supported = true,
+ .dp_ycbcr420_supported = true,
+ .fec_supported = true,
+ .flags.bits.IS_HBR2_CAPABLE = true,
+ .flags.bits.IS_HBR3_CAPABLE = true,
+ .flags.bits.IS_TPS3_CAPABLE = true,
+ .flags.bits.IS_TPS4_CAPABLE = true
+};
+
+static struct link_encoder *dcn35_link_encoder_create(
+ struct dc_context *ctx,
+ const struct encoder_init_data *enc_init_data)
+{
+ struct dcn20_link_encoder *enc20 =
+ kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_aux_regs
+ aux_regs_init(0),
+ aux_regs_init(1),
+ aux_regs_init(2),
+ aux_regs_init(3),
+ aux_regs_init(4);
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_hpd_regs
+ hpd_regs_init(0),
+ hpd_regs_init(1),
+ hpd_regs_init(2),
+ hpd_regs_init(3),
+ hpd_regs_init(4);
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_regs
+ link_regs_init(0, A),
+ link_regs_init(1, B),
+ link_regs_init(2, C),
+ link_regs_init(3, D),
+ link_regs_init(4, E);
+
+ dcn35_link_encoder_construct(enc20,
+ enc_init_data,
+ &link_enc_feature,
+ &link_enc_regs[enc_init_data->transmitter],
+ &link_enc_aux_regs[enc_init_data->channel - 1],
+ &link_enc_hpd_regs[enc_init_data->hpd_source],
+ &le_shift,
+ &le_mask);
+
+ return &enc20->enc10.base;
+}
+
+/* Create a minimal link encoder object not associated with a particular
+ * physical connector.
+ * resource_funcs.link_enc_create_minimal
+ */
+static struct link_encoder *dcn31_link_enc_create_minimal(
+ struct dc_context *ctx, enum engine_id eng_id)
+{
+ struct dcn20_link_encoder *enc20;
+
+ if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc)
+ return NULL;
+
+ enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+ if (!enc20)
+ return NULL;
+
+ dcn31_link_encoder_construct_minimal(
+ enc20,
+ ctx,
+ &link_enc_feature,
+ &link_enc_regs[eng_id - ENGINE_ID_DIGA],
+ eng_id);
+
+ return &enc20->enc10.base;
+}
+
+static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+ struct dcn31_panel_cntl *panel_cntl =
+ kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL);
+
+ if (!panel_cntl)
+ return NULL;
+
+ dcn31_panel_cntl_construct(panel_cntl, init_data);
+
+ return &panel_cntl->base;
+}
+
+static void read_dce_straps(
+ struct dc_context *ctx,
+ struct resource_straps *straps)
+{
+ generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX),
+ FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
+
+}
+
+static struct audio *dcn31_create_audio(
+ struct dc_context *ctx, unsigned int inst)
+{
+
+#undef REG_STRUCT
+#define REG_STRUCT audio_regs
+ audio_regs_init(0),
+ audio_regs_init(1),
+ audio_regs_init(2),
+ audio_regs_init(3),
+ audio_regs_init(4);
+ audio_regs_init(5);
+ audio_regs_init(6);
+
+ return dce_audio_create(ctx, inst,
+ &audio_regs[inst], &audio_shift, &audio_mask);
+}
+
+static struct vpg *dcn31_vpg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL);
+
+ if (!vpg31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT vpg_regs
+ vpg_regs_init(0),
+ vpg_regs_init(1),
+ vpg_regs_init(2),
+ vpg_regs_init(3),
+ vpg_regs_init(4),
+ vpg_regs_init(5),
+ vpg_regs_init(6),
+ vpg_regs_init(7),
+ vpg_regs_init(8),
+ vpg_regs_init(9);
+
+ vpg31_construct(vpg31, ctx, inst,
+ &vpg_regs[inst],
+ &vpg_shift,
+ &vpg_mask);
+
+ return &vpg31->base;
+}
+
+static struct afmt *dcn31_afmt_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL);
+
+ if (!afmt31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT afmt_regs
+ afmt_regs_init(0),
+ afmt_regs_init(1),
+ afmt_regs_init(2),
+ afmt_regs_init(3),
+ afmt_regs_init(4),
+ afmt_regs_init(5);
+
+ afmt31_construct(afmt31, ctx, inst,
+ &afmt_regs[inst],
+ &afmt_shift,
+ &afmt_mask);
+
+ // Light sleep by default, no need to power down here
+
+ return &afmt31->base;
+}
+
+static struct apg *dcn31_apg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL);
+
+ if (!apg31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT apg_regs
+ apg_regs_init(0),
+ apg_regs_init(1),
+ apg_regs_init(2),
+ apg_regs_init(3);
+
+ apg31_construct(apg31, ctx, inst,
+ &apg_regs[inst],
+ &apg_shift,
+ &apg_mask);
+
+ return &apg31->base;
+}
+
+static struct stream_encoder *dcn35_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn10_stream_encoder *enc1;
+ struct vpg *vpg;
+ struct afmt *afmt;
+ int vpg_inst;
+ int afmt_inst;
+
+ /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
+ if (eng_id <= ENGINE_ID_DIGF) {
+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+ } else
+ return NULL;
+
+ enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
+ vpg = dcn31_vpg_create(ctx, vpg_inst);
+ afmt = dcn31_afmt_create(ctx, afmt_inst);
+
+ if (!enc1 || !vpg || !afmt) {
+ kfree(enc1);
+ kfree(vpg);
+ kfree(afmt);
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT stream_enc_regs
+ stream_enc_regs_init(0),
+ stream_enc_regs_init(1),
+ stream_enc_regs_init(2),
+ stream_enc_regs_init(3),
+ stream_enc_regs_init(4);
+
+ dcn35_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios,
+ eng_id, vpg, afmt,
+ &stream_enc_regs[eng_id],
+ &se_shift, &se_mask);
+
+ return &enc1->base;
+}
+
+static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31;
+ struct vpg *vpg;
+ struct apg *apg;
+ uint32_t hpo_dp_inst;
+ uint32_t vpg_inst;
+ uint32_t apg_inst;
+
+ ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3));
+ hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0;
+
+ /* Mapping of VPG register blocks to HPO DP block instance:
+ * VPG[6] -> HPO_DP[0]
+ * VPG[7] -> HPO_DP[1]
+ * VPG[8] -> HPO_DP[2]
+ * VPG[9] -> HPO_DP[3]
+ */
+ vpg_inst = hpo_dp_inst + 6;
+
+ /* Mapping of APG register blocks to HPO DP block instance:
+ * APG[0] -> HPO_DP[0]
+ * APG[1] -> HPO_DP[1]
+ * APG[2] -> HPO_DP[2]
+ * APG[3] -> HPO_DP[3]
+ */
+ apg_inst = hpo_dp_inst;
+
+ /* allocate HPO stream encoder and create VPG sub-block */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL);
+ vpg = dcn31_vpg_create(ctx, vpg_inst);
+ apg = dcn31_apg_create(ctx, apg_inst);
+
+ if (!hpo_dp_enc31 || !vpg || !apg) {
+ kfree(hpo_dp_enc31);
+ kfree(vpg);
+ kfree(apg);
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_stream_enc_regs
+ hpo_dp_stream_encoder_reg_init(0),
+ hpo_dp_stream_encoder_reg_init(1),
+ hpo_dp_stream_encoder_reg_init(2),
+ hpo_dp_stream_encoder_reg_init(3);
+
+ dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios,
+ hpo_dp_inst, eng_id, vpg, apg,
+ &hpo_dp_stream_enc_regs[hpo_dp_inst],
+ &hpo_dp_se_shift, &hpo_dp_se_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
+ uint8_t inst,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31;
+
+ /* allocate HPO link encoder */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_link_enc_regs
+ hpo_dp_link_encoder_reg_init(0),
+ hpo_dp_link_encoder_reg_init(1);
+
+ hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
+ &hpo_dp_link_enc_regs[inst],
+ &hpo_dp_le_shift, &hpo_dp_le_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static struct dce_hwseq *dcn351_hwseq_create(
+ struct dc_context *ctx)
+{
+ struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
+
+#undef REG_STRUCT
+#define REG_STRUCT hwseq_reg
+ hwseq_reg_init();
+
+ if (hws) {
+ hws->ctx = ctx;
+ hws->regs = &hwseq_reg;
+ hws->shifts = &hwseq_shift;
+ hws->masks = &hwseq_mask;
+ }
+ return hws;
+}
+static const struct resource_create_funcs res_create_funcs = {
+ .read_dce_straps = read_dce_straps,
+ .create_audio = dcn31_create_audio,
+ .create_stream_encoder = dcn35_stream_encoder_create,
+ .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create,
+ .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create,
+ .create_hwseq = dcn351_hwseq_create,
+};
+
+static void dcn351_resource_destruct(struct dcn351_resource_pool *pool)
+{
+ unsigned int i;
+
+ for (i = 0; i < pool->base.stream_enc_count; i++) {
+ if (pool->base.stream_enc[i] != NULL) {
+ if (pool->base.stream_enc[i]->vpg != NULL) {
+ kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg));
+ pool->base.stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.stream_enc[i]->afmt != NULL) {
+ kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt));
+ pool->base.stream_enc[i]->afmt = NULL;
+ }
+ kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
+ pool->base.stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) {
+ if (pool->base.hpo_dp_stream_enc[i] != NULL) {
+ if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) {
+ kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg));
+ pool->base.hpo_dp_stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) {
+ kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg));
+ pool->base.hpo_dp_stream_enc[i]->apg = NULL;
+ }
+ kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i]));
+ pool->base.hpo_dp_stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) {
+ if (pool->base.hpo_dp_link_enc[i] != NULL) {
+ kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i]));
+ pool->base.hpo_dp_link_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ if (pool->base.dscs[i] != NULL)
+ dcn20_dsc_destroy(&pool->base.dscs[i]);
+ }
+
+ if (pool->base.mpc != NULL) {
+ kfree(TO_DCN20_MPC(pool->base.mpc));
+ pool->base.mpc = NULL;
+ }
+ if (pool->base.hubbub != NULL) {
+ kfree(pool->base.hubbub);
+ pool->base.hubbub = NULL;
+ }
+ for (i = 0; i < pool->base.pipe_count; i++) {
+ if (pool->base.dpps[i] != NULL)
+ dcn35_dpp_destroy(&pool->base.dpps[i]);
+
+ if (pool->base.ipps[i] != NULL)
+ pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
+
+ if (pool->base.hubps[i] != NULL) {
+ kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
+ pool->base.hubps[i] = NULL;
+ }
+
+ if (pool->base.irqs != NULL) {
+ dal_irq_service_destroy(&pool->base.irqs);
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ if (pool->base.engines[i] != NULL)
+ dce110_engine_destroy(&pool->base.engines[i]);
+ if (pool->base.hw_i2cs[i] != NULL) {
+ kfree(pool->base.hw_i2cs[i]);
+ pool->base.hw_i2cs[i] = NULL;
+ }
+ if (pool->base.sw_i2cs[i] != NULL) {
+ kfree(pool->base.sw_i2cs[i]);
+ pool->base.sw_i2cs[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_opp; i++) {
+ if (pool->base.opps[i] != NULL)
+ pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.timing_generators[i] != NULL) {
+ kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
+ pool->base.timing_generators[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
+ if (pool->base.dwbc[i] != NULL) {
+ kfree(TO_DCN30_DWBC(pool->base.dwbc[i]));
+ pool->base.dwbc[i] = NULL;
+ }
+ if (pool->base.mcif_wb[i] != NULL) {
+ kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i]));
+ pool->base.mcif_wb[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.audio_count; i++) {
+ if (pool->base.audios[i])
+ dce_aud_destroy(&pool->base.audios[i]);
+ }
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] != NULL) {
+ dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
+ pool->base.clock_sources[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) {
+ if (pool->base.mpc_lut[i] != NULL) {
+ dc_3dlut_func_release(pool->base.mpc_lut[i]);
+ pool->base.mpc_lut[i] = NULL;
+ }
+ if (pool->base.mpc_shaper[i] != NULL) {
+ dc_transfer_func_release(pool->base.mpc_shaper[i]);
+ pool->base.mpc_shaper[i] = NULL;
+ }
+ }
+
+ if (pool->base.dp_clock_source != NULL) {
+ dcn20_clock_source_destroy(&pool->base.dp_clock_source);
+ pool->base.dp_clock_source = NULL;
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.multiple_abms[i] != NULL)
+ dce_abm_destroy(&pool->base.multiple_abms[i]);
+ }
+
+ if (pool->base.psr != NULL)
+ dmub_psr_destroy(&pool->base.psr);
+
+ if (pool->base.replay != NULL)
+ dmub_replay_destroy(&pool->base.replay);
+
+ if (pool->base.pg_cntl != NULL)
+ dcn_pg_cntl_destroy(&pool->base.pg_cntl);
+
+ if (pool->base.dccg != NULL)
+ dcn_dccg_destroy(&pool->base.dccg);
+}
+
+static struct hubp *dcn35_hubp_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn20_hubp *hubp2 =
+ kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
+
+ if (!hubp2)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT hubp_regs
+ hubp_regs_init(0),
+ hubp_regs_init(1),
+ hubp_regs_init(2),
+ hubp_regs_init(3);
+
+ if (hubp35_construct(hubp2, ctx, inst,
+ &hubp_regs[inst], &hubp_shift, &hubp_mask))
+ return &hubp2->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(hubp2);
+ return NULL;
+}
+
+static void dcn35_dwbc_init(struct dcn30_dwbc *dwbc30, struct dc_context *ctx)
+{
+ dcn35_dwbc_set_fgcg(
+ dwbc30, ctx->dc->debug.enable_fine_grain_clock_gating.bits.dwb);
+}
+
+static bool dcn35_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t pipe_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < pipe_count; i++) {
+ struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc),
+ GFP_KERNEL);
+
+ if (!dwbc30) {
+ dm_error("DC: failed to create dwbc30!\n");
+ return false;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT dwbc35_regs
+ dwbc_regs_dcn3_init(0);
+
+ dcn35_dwbc_construct(dwbc30, ctx,
+ &dwbc35_regs[i],
+ &dwbc35_shift,
+ &dwbc35_mask,
+ i);
+
+ pool->dwbc[i] = &dwbc30->base;
+
+ dcn35_dwbc_init(dwbc30, ctx);
+ }
+ return true;
+}
+
+static void dcn35_mmhubbub_init(struct dcn30_mmhubbub *mcif_wb30,
+ struct dc_context *ctx)
+{
+ dcn35_mmhubbub_set_fgcg(
+ mcif_wb30,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.mmhubbub);
+}
+
+static bool dcn35_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t pipe_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < pipe_count; i++) {
+ struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub),
+ GFP_KERNEL);
+
+ if (!mcif_wb30) {
+ dm_error("DC: failed to create mcif_wb30!\n");
+ return false;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT mcif_wb35_regs
+ mcif_wb_regs_dcn3_init(0);
+
+ dcn35_mmhubbub_construct(mcif_wb30, ctx,
+ &mcif_wb35_regs[i],
+ &mcif_wb35_shift,
+ &mcif_wb35_mask,
+ i);
+
+ dcn35_mmhubbub_init(mcif_wb30, ctx);
+
+ pool->mcif_wb[i] = &mcif_wb30->base;
+ }
+ return true;
+}
+
+static struct display_stream_compressor *dcn35_dsc_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn20_dsc *dsc =
+ kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
+
+ if (!dsc) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT dsc_regs
+ dsc_regsDCN35_init(0),
+ dsc_regsDCN35_init(1),
+ dsc_regsDCN35_init(2),
+ dsc_regsDCN35_init(3);
+
+ dsc35_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
+ dsc35_set_fgcg(dsc,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.dsc);
+ return &dsc->base;
+}
+
+static void dcn351_destroy_resource_pool(struct resource_pool **pool)
+{
+ struct dcn351_resource_pool *dcn351_pool = TO_DCN351_RES_POOL(*pool);
+
+ dcn351_resource_destruct(dcn351_pool);
+ kfree(dcn351_pool);
+ *pool = NULL;
+}
+
+static struct clock_source *dcn35_clock_source_create(
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ bool dp_clk_src)
+{
+ struct dce110_clk_src *clk_src =
+ kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
+
+ if (!clk_src)
+ return NULL;
+
+ if (dcn31_clk_src_construct(clk_src, ctx, bios, id,
+ regs, &cs_shift, &cs_mask)) {
+ clk_src->base.dp_clk_src = dp_clk_src;
+ return &clk_src->base;
+ }
+
+ kfree(clk_src);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+}
+
+static struct dc_cap_funcs cap_funcs = {
+ .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
+};
+
+static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config)
+{
+ *panel_config = panel_config_defaults;
+}
+
+
+static enum dc_status dcn351_validate_bandwidth(struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode)
+{
+ bool out = false;
+
+ out = dml2_validate(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+ validate_mode);
+
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+
+ DC_FP_START();
+ dcn35_decide_zstate_support(dc, context);
+ DC_FP_END();
+
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+}
+
+static int populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int ret;
+
+ DC_FP_START();
+ ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode);
+ DC_FP_END();
+
+ return ret;
+
+}
+
+static struct resource_funcs dcn351_res_pool_funcs = {
+ .destroy = dcn351_destroy_resource_pool,
+ .link_enc_create = dcn35_link_encoder_create,
+ .link_enc_create_minimal = dcn31_link_enc_create_minimal,
+ .link_encs_assign = link_enc_cfg_link_encs_assign,
+ .link_enc_unassign = link_enc_cfg_link_enc_unassign,
+ .panel_cntl_create = dcn31_panel_cntl_create,
+ .validate_bandwidth = dcn351_validate_bandwidth,
+ .calculate_wm_and_dlg = NULL,
+ .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
+ .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
+ .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
+ .add_stream_to_ctx = dcn30_add_stream_to_ctx,
+ .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
+ .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
+ .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
+ .set_mcif_arb_params = dcn30_set_mcif_arb_params,
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
+ .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
+ .update_bw_bounding_box = dcn351_update_bw_bounding_box_fpu,
+ .patch_unknown_plane_state = dcn35_patch_unknown_plane_state,
+ .get_panel_config_defaults = dcn35_get_panel_config_defaults,
+ .get_preferred_eng_id_dpia = dcn351_get_preferred_eng_id_dpia,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params
+};
+
+static bool dcn351_resource_construct(
+ uint8_t num_virtual_links,
+ struct dc *dc,
+ struct dcn351_resource_pool *pool)
+{
+ int i;
+ struct dc_context *ctx = dc->ctx;
+ struct irq_service_init_data init_data;
+
+#undef REG_STRUCT
+#define REG_STRUCT bios_regs
+ bios_regs_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT clk_src_regs
+ clk_src_regs_init(0, A),
+ clk_src_regs_init(1, B),
+ clk_src_regs_init(2, C),
+ clk_src_regs_init(3, D),
+ clk_src_regs_init(4, E);
+
+#undef REG_STRUCT
+#define REG_STRUCT abm_regs
+ abm_regs_init(0),
+ abm_regs_init(1),
+ abm_regs_init(2),
+ abm_regs_init(3);
+
+#undef REG_STRUCT
+#define REG_STRUCT dccg_regs
+ dccg_regs_init();
+
+ ctx->dc_bios->regs = &bios_regs;
+
+ pool->base.res_cap = &res_cap_dcn351;
+
+ pool->base.funcs = &dcn351_res_pool_funcs;
+
+ /*************************************************
+ * Resource + asic cap harcoding *
+ *************************************************/
+ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
+ pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
+ pool->base.mpcc_count = pool->base.res_cap->num_timing_generator;
+ dc->caps.max_downscale_ratio = 600;
+ dc->caps.i2c_speed_in_khz = 100;
+ dc->caps.i2c_speed_in_khz_hdcp = 100;
+ dc->caps.max_cursor_size = 256;
+ dc->caps.min_horizontal_blanking_period = 80;
+ dc->caps.dmdata_alloc_size = 2048;
+ dc->caps.max_slave_planes = 3;
+ dc->caps.max_slave_yuv_planes = 3;
+ dc->caps.max_slave_rgb_planes = 3;
+ dc->caps.post_blend_color_processing = true;
+ dc->caps.force_dp_tps4_for_cp2520 = true;
+ if (dc->config.forceHBR2CP2520)
+ dc->caps.force_dp_tps4_for_cp2520 = false;
+ dc->caps.dp_hpo = true;
+ dc->caps.dp_hdmi21_pcon_support = true;
+
+ dc->caps.edp_dsc_support = true;
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.is_apu = true;
+ dc->caps.seamless_odm = true;
+
+ dc->caps.zstate_support = true;
+ dc->caps.ips_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+ dc->caps.color.dpp.input_lut_shared = 0;
+ dc->caps.color.dpp.icsc = 1;
+ dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
+ dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
+ dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.pq = 1;
+ dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
+ dc->caps.color.dpp.post_csc = 1;
+ dc->caps.color.dpp.gamma_corr = 1;
+ dc->caps.color.dpp.dgam_rom_for_yuv = 0;
+
+ dc->caps.color.dpp.hw_3d_lut = 0;
+ dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1
+ // no OGAM ROM on DCN301
+ dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
+ dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.pq = 0;
+ dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
+ dc->caps.color.dpp.ocsc = 0;
+
+ dc->caps.color.mpc.gamut_remap = 1;
+ dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2
+ dc->caps.color.mpc.ogam_ram = 1;
+ dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
+ dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.pq = 0;
+ dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
+ dc->caps.color.mpc.ocsc = 1;
+ dc->caps.color.mpc.preblend = true;
+
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
+ /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
+ * to provide some margin.
+ * It's expected for furture ASIC to have equal or higher value, in order to
+ * have determinstic power improvement from generate to genration.
+ * (i.e., we should not expect new ASIC generation with lower vmin rate)
+ */
+ dc->caps.max_disp_clock_khz_at_vmin = 650000;
+
+ /* Use pipe context based otg sync logic */
+ dc->config.use_pipe_ctx_sync_logic = true;
+
+
+ /* Use psp mailbox to enable assr */
+ dc->config.use_assr_psp_message = true;
+
+ /* read VBIOS LTTPR caps */
+ {
+ if (ctx->dc_bios->funcs->get_lttpr_caps) {
+ enum bp_result bp_query_result;
+ uint8_t is_vbios_lttpr_enable = 0;
+
+ bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
+ dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
+ }
+
+ /* interop bit is implicit */
+ {
+ dc->caps.vbios_lttpr_aware = true;
+ }
+ }
+
+ if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
+ dc->debug = debug_defaults_drv;
+
+ /*HW default is to have all the FGCG enabled, SW no need to program them*/
+ dc->debug.enable_fine_grain_clock_gating.u32All = 0xFFFF;
+ // Init the vm_helper
+ if (dc->vm_helper)
+ vm_helper_init(dc->vm_helper, 16);
+
+ /*************************************************
+ * Create resources *
+ *************************************************/
+
+ /* Clock Sources for Pixel Clock*/
+ pool->base.clock_sources[DCN351_CLK_SRC_PLL0] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL0,
+ &clk_src_regs[0], false);
+ pool->base.clock_sources[DCN351_CLK_SRC_PLL1] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL1,
+ &clk_src_regs[1], false);
+ pool->base.clock_sources[DCN351_CLK_SRC_PLL2] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL2,
+ &clk_src_regs[2], false);
+ pool->base.clock_sources[DCN351_CLK_SRC_PLL3] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL3,
+ &clk_src_regs[3], false);
+ pool->base.clock_sources[DCN351_CLK_SRC_PLL4] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL4,
+ &clk_src_regs[4], false);
+
+ pool->base.clk_src_count = DCN351_CLK_SRC_TOTAL;
+
+ /* todo: not reuse phy_pll registers */
+ pool->base.dp_clock_source =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_ID_DP_DTO,
+ &clk_src_regs[0], true);
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] == NULL) {
+ dm_error("DC: failed to create clock sources!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+ }
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31);
+
+ /* TODO: DCCG */
+ pool->base.dccg = dccg35_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
+ if (pool->base.dccg == NULL) {
+ dm_error("DC: failed to create dccg!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT pg_cntl_regs
+ pg_cntl_dcn35_regs_init();
+
+ pool->base.pg_cntl = pg_cntl35_create(ctx, &pg_cntl_regs, &pg_cntl_shift, &pg_cntl_mask);
+ if (pool->base.pg_cntl == NULL) {
+ dm_error("DC: failed to create power gate control!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* TODO: IRQ */
+ init_data.ctx = dc->ctx;
+ pool->base.irqs = dal_irq_service_dcn351_create(&init_data);
+ if (!pool->base.irqs)
+ goto create_fail;
+
+ /* HUBBUB */
+ pool->base.hubbub = dcn35_hubbub_create(ctx);
+ if (pool->base.hubbub == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create hubbub!\n");
+ goto create_fail;
+ }
+
+ /* HUBPs, DPPs, OPPs and TGs */
+ for (i = 0; i < pool->base.pipe_count; i++) {
+ pool->base.hubps[i] = dcn35_hubp_create(ctx, i);
+ if (pool->base.hubps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create hubps!\n");
+ goto create_fail;
+ }
+
+ pool->base.dpps[i] = dcn35_dpp_create(ctx, i);
+ if (pool->base.dpps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create dpps!\n");
+ goto create_fail;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_opp; i++) {
+ pool->base.opps[i] = dcn35_opp_create(ctx, i);
+ if (pool->base.opps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create output pixel processor!\n");
+ goto create_fail;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ pool->base.timing_generators[i] = dcn35_timing_generator_create(
+ ctx, i);
+ if (pool->base.timing_generators[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create tg!\n");
+ goto create_fail;
+ }
+ }
+ pool->base.timing_generator_count = i;
+
+ /* PSR */
+ pool->base.psr = dmub_psr_create(ctx);
+ if (pool->base.psr == NULL) {
+ dm_error("DC: failed to create psr obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* Replay */
+ pool->base.replay = dmub_replay_create(ctx);
+ if (pool->base.replay == NULL) {
+ dm_error("DC: failed to create replay obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* ABM */
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ pool->base.multiple_abms[i] = dmub_abm_create(ctx,
+ &abm_regs[i],
+ &abm_shift,
+ &abm_mask);
+ if (pool->base.multiple_abms[i] == NULL) {
+ dm_error("DC: failed to create abm for pipe %d!\n", i);
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+ }
+
+ /* MPC and DSC */
+ pool->base.mpc = dcn35_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut);
+ if (pool->base.mpc == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mpc!\n");
+ goto create_fail;
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ pool->base.dscs[i] = dcn35_dsc_create(ctx, i);
+ if (pool->base.dscs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create display stream compressor %d!\n", i);
+ goto create_fail;
+ }
+ }
+
+ /* DWB and MMHUBBUB */
+ if (!dcn35_dwbc_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create dwbc!\n");
+ goto create_fail;
+ }
+
+ if (!dcn35_mmhubbub_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mcif_wb!\n");
+ goto create_fail;
+ }
+
+ /* AUX and I2C */
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ pool->base.engines[i] = dcn31_aux_engine_create(ctx, i);
+ if (pool->base.engines[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create aux engine!!\n");
+ goto create_fail;
+ }
+ pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i);
+ if (pool->base.hw_i2cs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create hw i2c!!\n");
+ goto create_fail;
+ }
+ pool->base.sw_i2cs[i] = NULL;
+ }
+
+ /* DCN3.5 has 6 DPIA */
+ pool->base.usb4_dpia_count = 4;
+ if (dc->debug.dpia_debug.bits.disable_dpia)
+ pool->base.usb4_dpia_count = 0;
+
+ /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
+ if (!resource_construct(num_virtual_links, dc, &pool->base,
+ &res_create_funcs))
+ goto create_fail;
+
+ /* HW Sequencer and Plane caps */
+ dcn351_hw_sequencer_construct(dc);
+
+ dc->caps.max_planes = pool->base.pipe_count;
+
+ for (i = 0; i < dc->caps.max_planes; ++i)
+ dc->caps.planes[i] = plane_cap;
+
+ dc->caps.max_odm_combine_factor = 4;
+
+ dc->cap_funcs = cap_funcs;
+
+
+ dc->dcn_ip->max_num_dpp = pool->base.pipe_count;
+
+ dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
+ dc->dml2_options.use_native_soc_bb_construction = true;
+ dc->dml2_options.minimize_dispclk_using_odm = false;
+ if (dc->config.EnableMinDispClkODM)
+ dc->dml2_options.minimize_dispclk_using_odm = true;
+ dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm;
+
+ resource_init_common_dml2_callbacks(dc, &dc->dml2_options);
+ dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch;
+
+ dc->dml2_options.max_segments_per_hubp = 24;
+ dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
+ dc->dml2_options.override_det_buffer_size_kbytes = true;
+
+ if (dc->config.sdpif_request_limit_words_per_umc == 0)
+ dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
+
+ return true;
+
+create_fail:
+
+ dcn351_resource_destruct(pool);
+
+ return false;
+}
+
+struct resource_pool *dcn351_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc)
+{
+ struct dcn351_resource_pool *pool =
+ kzalloc(sizeof(struct dcn351_resource_pool), GFP_KERNEL);
+
+ if (!pool)
+ return NULL;
+
+ if (dcn351_resource_construct(init_data->num_virtual_links, dc, pool))
+ return &pool->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(pool);
+ return NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.h
new file mode 100644
index 000000000000..f3e045777a3d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef _DCN351_RESOURCE_H_
+#define _DCN351_RESOURCE_H_
+
+#include "core_types.h"
+
+extern struct _vcs_dpi_ip_params_st dcn3_51_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc;
+
+#define TO_DCN351_RES_POOL(pool)\
+ container_of(pool, struct dcn351_resource_pool, base)
+
+struct dcn351_resource_pool {
+ struct resource_pool base;
+};
+
+struct resource_pool *dcn351_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc);
+
+#endif /* _DCN351_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
new file mode 100644
index 000000000000..ca125ee6c2fb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c
@@ -0,0 +1,2192 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2025 Advanced Micro Devices, Inc. */
+
+#include "dm_services.h"
+#include "dc.h"
+
+#include "dcn31/dcn31_init.h"
+#include "dcn35/dcn35_init.h"
+#include "dcn36/dcn36_resource.h"
+
+#include "resource.h"
+#include "include/irq_service_interface.h"
+#include "dcn36_resource.h"
+#include "dml2/dml2_wrapper.h"
+
+#include "dcn20/dcn20_resource.h"
+#include "dcn30/dcn30_resource.h"
+#include "dcn31/dcn31_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn35/dcn35_resource.h"
+
+#include "dcn10/dcn10_ipp.h"
+#include "dcn30/dcn30_hubbub.h"
+#include "dcn31/dcn31_hubbub.h"
+#include "dcn35/dcn35_hubbub.h"
+#include "dcn32/dcn32_mpc.h"
+#include "dcn35/dcn35_hubp.h"
+#include "irq/dcn36/irq_service_dcn36.h"
+#include "dcn35/dcn35_dpp.h"
+#include "dcn35/dcn35_optc.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn35/dcn35_opp.h"
+#include "dcn35/dcn35_dsc.h"
+#include "dcn30/dcn30_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn35/dcn35_dio_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_link_encoder.h"
+#include "dcn32/dcn32_hpo_dp_link_encoder.h"
+#include "link_service.h"
+#include "dcn31/dcn31_apg.h"
+#include "dcn32/dcn32_dio_link_encoder.h"
+#include "dcn31/dcn31_vpg.h"
+#include "dcn31/dcn31_afmt.h"
+#include "dce/dce_clock_source.h"
+#include "dce/dce_audio.h"
+#include "dce/dce_hwseq.h"
+#include "clk_mgr.h"
+#include "virtual/virtual_stream_encoder.h"
+#include "dce110/dce110_resource.h"
+#include "dml/display_mode_vba.h"
+#include "dcn35/dcn35_dccg.h"
+#include "dcn35/dcn35_pg_cntl.h"
+#include "dcn10/dcn10_resource.h"
+#include "dcn31/dcn31_panel_cntl.h"
+#include "dcn35/dcn35_hwseq.h"
+#include "dcn35/dcn35_dio_link_encoder.h"
+#include "dml/dcn31/dcn31_fpu.h" /*todo*/
+#include "dml/dcn35/dcn35_fpu.h"
+#include "dcn35/dcn35_dwb.h"
+#include "dcn35/dcn35_mmhubbub.h"
+
+#include "dcn/dcn_3_6_0_offset.h"
+#include "dcn/dcn_3_6_0_sh_mask.h"
+
+#define regBIF_BX2_BIOS_SCRATCH_2 0x2ffc004e
+#define regBIF_BX2_BIOS_SCRATCH_2_BASE_IDX 5
+
+#define regBIF_BX2_BIOS_SCRATCH_3 0x2ffc004f
+#define regBIF_BX2_BIOS_SCRATCH_3_BASE_IDX 5
+
+#define regBIF_BX2_BIOS_SCRATCH_6 0x2ffc0052
+#define regBIF_BX2_BIOS_SCRATCH_6_BASE_IDX 5
+
+#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE__SHIFT 0x0
+#define DSCC0_DSCC_CONFIG0__ICH_RESET_AT_END_OF_LINE_MASK 0x0000000FL
+
+#include "reg_helper.h"
+#include "dce/dmub_abm.h"
+#include "dce/dmub_psr.h"
+#include "dce/dmub_replay.h"
+#include "dce/dce_aux.h"
+#include "dce/dce_i2c.h"
+#include "dml/dcn31/display_mode_vba_31.h" /*temp*/
+#include "vm_helper.h"
+#include "dcn20/dcn20_vmid.h"
+
+#include "dc_state_priv.h"
+
+#include "link_enc_cfg.h"
+#define DC_LOGGER_INIT(logger)
+
+enum dcn36_clk_src_array_id {
+ DCN36_CLK_SRC_PLL0,
+ DCN36_CLK_SRC_PLL1,
+ DCN36_CLK_SRC_PLL2,
+ DCN36_CLK_SRC_PLL3,
+ DCN36_CLK_SRC_PLL4,
+ DCN36_CLK_SRC_TOTAL
+};
+
+/* begin *********************
+ * macros to expend register list macro defined in HW object header file
+ */
+
+/* DCN */
+/* TODO awful hack. fixup dcn20_dwb.h */
+#undef BASE_INNER
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SR_ARR(reg_name, id) \
+ REG_STRUCT[id].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+#define SR_ARR_INIT(reg_name, id, value) \
+ REG_STRUCT[id].reg_name = value
+
+#define SRI(reg_name, block, id)\
+ REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SR_ARR_I2C(reg_name, id) \
+ REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+#define SRI_ARR_I2C(reg_name, block, id)\
+ REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR_ALPHABET(reg_name, block, index, id)\
+ REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI2(reg_name, block, id)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SRI2_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SRIR(var_name, reg_name, block, id)\
+ .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_ARR_2(reg_name, block, id, inst)\
+ REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_MPC_RMU(reg_name, block, id)\
+ .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_DWB(reg_name, temp_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## temp_name
+
+#define SF_DWB2(reg_name, block, id, field_name, post_fix) \
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define DCCG_SRII(reg_name, block, id)\
+ REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define VUPDATE_SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
+ reg ## reg_name ## _ ## block ## id
+
+/* NBIO */
+#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg]
+
+#define NBIO_BASE(seg) \
+ NBIO_BASE_INNER(seg)
+
+#define NBIO_SR(reg_name)\
+ REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX2_ ## reg_name
+
+#define NBIO_SR_ARR(reg_name, id)\
+ REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX2_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX2_ ## reg_name
+
+#define bios_regs_init() \
+ ( \
+ NBIO_SR(BIOS_SCRATCH_3),\
+ NBIO_SR(BIOS_SCRATCH_6)\
+ )
+
+static struct bios_registers bios_regs;
+
+#define clk_src_regs_init(index, pllid)\
+ CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid)
+
+static struct dce110_clk_src_regs clk_src_regs[5];
+
+static const struct dce110_clk_src_shift cs_shift = {
+ CS_COMMON_MASK_SH_LIST_DCN3_1_4(__SHIFT)
+};
+
+static const struct dce110_clk_src_mask cs_mask = {
+ CS_COMMON_MASK_SH_LIST_DCN3_1_4(_MASK)
+};
+
+#define abm_regs_init(id)\
+ ABM_DCN32_REG_LIST_RI(id)
+
+static struct dce_abm_registers abm_regs[4];
+
+static const struct dce_abm_shift abm_shift = {
+ ABM_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dce_abm_mask abm_mask = {
+ ABM_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define audio_regs_init(id)\
+ AUD_COMMON_REG_LIST_RI(id)
+
+static struct dce_audio_registers audio_regs[7];
+
+
+#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
+ AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
+
+static const struct dce_audio_shift audio_shift = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_audio_mask audio_mask = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
+};
+
+#define vpg_regs_init(id)\
+ VPG_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_vpg_registers vpg_regs[10];
+
+static const struct dcn31_vpg_shift vpg_shift = {
+ DCN31_VPG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_vpg_mask vpg_mask = {
+ DCN31_VPG_MASK_SH_LIST(_MASK)
+};
+
+#define afmt_regs_init(id)\
+ AFMT_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_afmt_registers afmt_regs[6];
+
+static const struct dcn31_afmt_shift afmt_shift = {
+ DCN31_AFMT_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_afmt_mask afmt_mask = {
+ DCN31_AFMT_MASK_SH_LIST(_MASK)
+};
+
+#define apg_regs_init(id)\
+ APG_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_apg_registers apg_regs[4];
+
+static const struct dcn31_apg_shift apg_shift = {
+ DCN31_APG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_apg_mask apg_mask = {
+ DCN31_APG_MASK_SH_LIST(_MASK)
+};
+
+#define stream_enc_regs_init(id)\
+ SE_DCN35_REG_LIST_RI(id)
+
+static struct dcn10_stream_enc_registers stream_enc_regs[5];
+
+static const struct dcn10_stream_encoder_shift se_shift = {
+ SE_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn10_stream_encoder_mask se_mask = {
+ SE_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define aux_regs_init(id)\
+ DCN2_AUX_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5];
+
+#define hpd_regs_init(id)\
+ HPD_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5];
+
+
+static const struct dce110_aux_registers_shift aux_shift = {
+ DCN_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+ DCN_AUX_MASK_SH_LIST(_MASK)
+};
+
+#define link_regs_init(id, phyid)\
+ ( \
+ LE_DCN35_REG_LIST_RI(id), \
+ UNIPHY_DCN2_REG_LIST_RI(id, phyid)\
+ )
+
+static struct dcn10_link_enc_registers link_enc_regs[5];
+
+static const struct dcn10_link_enc_shift le_shift = {
+ LINK_ENCODER_MASK_SH_LIST_DCN35(__SHIFT), \
+ //DPCS_DCN31_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn10_link_enc_mask le_mask = {
+ LINK_ENCODER_MASK_SH_LIST_DCN35(_MASK), \
+ //DPCS_DCN31_MASK_SH_LIST(_MASK)
+};
+
+#define hpo_dp_stream_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id)
+
+static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4];
+
+static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK)
+};
+
+#define hpo_dp_link_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id)
+
+static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[2];
+
+static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = {
+ DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = {
+ DCN3_1_HPO_DP_LINK_ENC_COMMON_MASK_SH_LIST(_MASK)
+};
+
+#define dpp_regs_init(id)\
+ DPP_REG_LIST_DCN35_RI(id)
+
+static struct dcn3_dpp_registers dpp_regs[4];
+
+static const struct dcn35_dpp_shift tf_shift = {
+ DPP_REG_LIST_SH_MASK_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dpp_mask tf_mask = {
+ DPP_REG_LIST_SH_MASK_DCN35(_MASK)
+};
+
+#define opp_regs_init(id)\
+ OPP_REG_LIST_DCN35_RI(id)
+
+static struct dcn35_opp_registers opp_regs[4];
+
+static const struct dcn35_opp_shift opp_shift = {
+ OPP_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_opp_mask opp_mask = {
+ OPP_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define aux_engine_regs_init(id)\
+ ( \
+ AUX_COMMON_REG_LIST0_RI(id), \
+ SR_ARR_INIT(AUXN_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUXP_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK) \
+ )
+
+static struct dce110_aux_registers aux_engine_regs[5];
+
+#define dwbc_regs_dcn3_init(id)\
+ DWBC_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dcn30_dwbc_registers dwbc35_regs[1];
+
+static const struct dcn35_dwbc_shift dwbc35_shift = {
+ DWBC_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dwbc_mask dwbc35_mask = {
+ DWBC_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define mcif_wb_regs_dcn3_init(id)\
+ MCIF_WB_COMMON_REG_LIST_DCN3_5_RI(id)
+
+static struct dcn35_mmhubbub_registers mcif_wb35_regs[1];
+
+static const struct dcn35_mmhubbub_shift mcif_wb35_shift = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT)
+};
+
+static const struct dcn35_mmhubbub_mask mcif_wb35_mask = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN3_5(_MASK)
+};
+
+#define dsc_regsDCN35_init(id)\
+ DSC_REG_LIST_DCN20_RI(id)
+
+static struct dcn20_dsc_registers dsc_regs[4];
+
+static const struct dcn35_dsc_shift dsc_shift = {
+ DSC_REG_LIST_SH_MASK_DCN35(__SHIFT)
+};
+
+static const struct dcn35_dsc_mask dsc_mask = {
+ DSC_REG_LIST_SH_MASK_DCN35(_MASK)
+};
+
+static struct dcn30_mpc_registers mpc_regs;
+
+#define dcn_mpc_regs_init() \
+ MPC_REG_LIST_DCN3_2_RI(0),\
+ MPC_REG_LIST_DCN3_2_RI(1),\
+ MPC_REG_LIST_DCN3_2_RI(2),\
+ MPC_REG_LIST_DCN3_2_RI(3),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\
+ MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0)
+
+static const struct dcn30_mpc_shift mpc_shift = {
+ MPC_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct dcn30_mpc_mask mpc_mask = {
+ MPC_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define optc_regs_init(id)\
+ OPTC_COMMON_REG_LIST_DCN3_5_RI(id)
+
+static struct dcn_optc_registers optc_regs[4];
+
+static const struct dcn_optc_shift optc_shift = {
+ OPTC_COMMON_MASK_SH_LIST_DCN3_5(__SHIFT)
+};
+
+static const struct dcn_optc_mask optc_mask = {
+ OPTC_COMMON_MASK_SH_LIST_DCN3_5(_MASK)
+};
+
+#define hubp_regs_init(id)\
+ HUBP_REG_LIST_DCN30_RI(id)
+
+static struct dcn_hubp2_registers hubp_regs[4];
+
+
+static const struct dcn35_hubp2_shift hubp_shift = {
+ HUBP_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn35_hubp2_mask hubp_mask = {
+ HUBP_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct dcn_hubbub_registers hubbub_reg;
+
+#define hubbub_reg_init()\
+ HUBBUB_REG_LIST_DCN35(0)
+
+static const struct dcn_hubbub_shift hubbub_shift = {
+ HUBBUB_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dcn_hubbub_mask hubbub_mask = {
+ HUBBUB_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct dccg_registers dccg_regs;
+
+#define dccg_regs_init()\
+ DCCG_REG_LIST_DCN35()
+
+static const struct dccg_shift dccg_shift = {
+ DCCG_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dccg_mask dccg_mask = {
+ DCCG_MASK_SH_LIST_DCN35(_MASK)
+};
+
+static struct pg_cntl_registers pg_cntl_regs;
+
+#define pg_cntl_dcn35_regs_init() \
+ PG_CNTL_REG_LIST_DCN35()
+
+static const struct pg_cntl_shift pg_cntl_shift = {
+ PG_CNTL_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct pg_cntl_mask pg_cntl_mask = {
+ PG_CNTL_MASK_SH_LIST_DCN35(_MASK)
+};
+
+#define SRII2(reg_name_pre, reg_name_post, id)\
+ .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \
+ ## id ## _ ## reg_name_post ## _BASE_IDX) + \
+ reg ## reg_name_pre ## id ## _ ## reg_name_post
+
+static struct dce_hwseq_registers hwseq_reg;
+
+#define hwseq_reg_init()\
+ HWSEQ_DCN36_REG_LIST()
+
+#define HWSEQ_DCN36_MASK_SH_LIST(mask_sh)\
+ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
+ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+ HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
+ HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \
+ HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \
+ HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \
+ HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_R_DMU_GATE_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_G_RBBMIF_GATE_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, RBBMIF_FGCG_REP_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPREFCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DISPCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPPCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DTBCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DCFCLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, DPIACLK_ALLOW_DS_CLKSTOP, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL5, DPSTREAMCLK3_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK0_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK1_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK2_GATE_DISABLE, mask_sh),\
+ HWS_SF(, DCCG_GATE_DISABLE_CNTL4, DPIASYMCLK3_GATE_DISABLE, mask_sh)
+
+static const struct dce_hwseq_shift hwseq_shift = {
+ HWSEQ_DCN36_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_hwseq_mask hwseq_mask = {
+ HWSEQ_DCN36_MASK_SH_LIST(_MASK)
+};
+
+#define vmid_regs_init(id)\
+ DCN20_VMID_REG_LIST_RI(id)
+
+static struct dcn_vmid_registers vmid_regs[16];
+
+static const struct dcn20_vmid_shift vmid_shifts = {
+ DCN20_VMID_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn20_vmid_mask vmid_masks = {
+ DCN20_VMID_MASK_SH_LIST(_MASK)
+};
+
+static const struct resource_caps res_cap_dcn36 = {
+ .num_timing_generator = 4,
+ .num_opp = 4,
+ .num_video_plane = 4,
+ .num_audio = 5,
+ .num_stream_encoder = 5,
+ .num_dig_link_enc = 5,
+ .num_hpo_dp_stream_encoder = 4,
+ .num_hpo_dp_link_encoder = 2,
+ .num_pll = 4,/*1 c10 edp, 3xc20 combo PHY*/
+ .num_dwb = 1,
+ .num_ddc = 5,
+ .num_vmid = 16,
+ .num_mpc_3dlut = 2,
+ .num_dsc = 4,
+};
+
+static const struct dc_plane_cap plane_cap = {
+ .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
+ .per_pixel_alpha = true,
+
+ .pixel_format_support = {
+ .argb8888 = true,
+ .nv12 = true,
+ .fp16 = true,
+ .p010 = true,
+ .ayuv = false,
+ },
+
+ .max_upscale_factor = {
+ .argb8888 = 16000,
+ .nv12 = 16000,
+ .fp16 = 16000
+ },
+
+ // 6:1 downscaling ratio: 1000/6 = 166.666
+ .max_downscale_factor = {
+ .argb8888 = 250,
+ .nv12 = 167,
+ .fp16 = 167
+ },
+ 64,
+ 64
+};
+
+static const struct dc_debug_options debug_defaults_drv = {
+ .disable_dmcu = true,
+ .force_abm_enable = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = false,
+ .pipe_split_policy = MPC_SPLIT_AVOID,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .disable_dpp_power_gate = true,
+ .disable_hubp_power_gate = true,
+ .disable_optc_power_gate = true, /*should the same as above two*/
+ .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
+ .disable_clock_gate = false,
+ .disable_dsc_power_gate = true,
+ .vsr_support = true,
+ .performance_trace = false,
+ .max_downscale_src_width = 4096,/*upto true 4k*/
+ .disable_pplib_wm_range = false,
+ .scl_reset_length10 = true,
+ .sanity_checks = false,
+ .underflow_assert_delay_us = 0xFFFFFFFF,
+ .dwb_fi_phase = -1, // -1 = disable,
+ .dmub_command_table = true,
+ .pstate_enabled = true,
+ .use_max_lb = true,
+ .enable_mem_low_power = {
+ .bits = {
+ .vga = false,
+ .i2c = true,
+ .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
+ .dscl = true,
+ .cm = true,
+ .mpc = true,
+ .optc = true,
+ .vpg = true,
+ .afmt = true,
+ }
+ },
+ .root_clock_optimization = {
+ .bits = {
+ .dpp = true,
+ .dsc = true,/*dscclk and dsc pg*/
+ .hdmistream = true,
+ .hdmichar = true,
+ .dpstream = true,
+ .symclk32_se = true,
+ .symclk32_le = true,
+ .symclk_fe = true,
+ .physymclk = false,
+ .dpiasymclk = true,
+ }
+ },
+ .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT,
+ .enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/
+ .minimum_z8_residency_time = 1, /* Always allow when other conditions are met */
+ .using_dml2 = true,
+ .support_eDP1_5 = true,
+ .enable_hpo_pg_support = false,
+ .enable_legacy_fast_update = true,
+ .enable_single_display_2to1_odm_policy = true,
+ .disable_idle_power_optimizations = false,
+ .dmcub_emulation = false,
+ .disable_boot_optimizations = false,
+ .disable_unbounded_requesting = false,
+ .disable_mem_low_power = false,
+ //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions
+ .enable_double_buffered_dsc_pg_support = true,
+ .enable_dp_dig_pixel_rate_div_policy = 1,
+ .disable_z10 = false,
+ .ignore_pg = true,
+ .psp_disabled_wa = true,
+ .ips2_eval_delay_us = 2000,
+ .ips2_entry_delay_us = 800,
+ .disable_dmub_reallow_idle = false,
+ .static_screen_wait_frames = 2,
+ .disable_timeout = true,
+ .min_disp_clk_khz = 50000,
+};
+
+static const struct dc_panel_config panel_config_defaults = {
+ .psr = {
+ .disable_psr = false,
+ .disallow_psrsu = false,
+ .disallow_replay = false,
+ },
+ .ilr = {
+ .optimize_edp_link_rate = true,
+ },
+};
+
+static void dcn35_dpp_destroy(struct dpp **dpp)
+{
+ kfree(TO_DCN20_DPP(*dpp));
+ *dpp = NULL;
+}
+
+static struct dpp *dcn35_dpp_create(struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn3_dpp *dpp = kzalloc(sizeof(struct dcn3_dpp), GFP_KERNEL);
+ bool success = (dpp != NULL);
+
+ if (!success)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT dpp_regs
+ dpp_regs_init(0),
+ dpp_regs_init(1),
+ dpp_regs_init(2),
+ dpp_regs_init(3);
+
+ success = dpp35_construct(dpp, ctx, inst, &dpp_regs[inst], &tf_shift,
+ &tf_mask);
+ if (success) {
+ dpp35_set_fgcg(
+ dpp,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.dpp);
+ return &dpp->base;
+ }
+
+ BREAK_TO_DEBUGGER();
+ kfree(dpp);
+ return NULL;
+}
+
+static struct output_pixel_processor *dcn35_opp_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn20_opp *opp =
+ kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
+
+ if (!opp) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT opp_regs
+ opp_regs_init(0),
+ opp_regs_init(1),
+ opp_regs_init(2),
+ opp_regs_init(3);
+
+ dcn35_opp_construct(opp, ctx, inst,
+ &opp_regs[inst], &opp_shift, &opp_mask);
+
+ dcn35_opp_set_fgcg(opp, ctx->dc->debug.enable_fine_grain_clock_gating.bits.opp);
+
+ return &opp->base;
+}
+
+static struct dce_aux *dcn31_aux_engine_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct aux_engine_dce110 *aux_engine =
+ kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
+
+ if (!aux_engine)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT aux_engine_regs
+ aux_engine_regs_init(0),
+ aux_engine_regs_init(1),
+ aux_engine_regs_init(2),
+ aux_engine_regs_init(3),
+ aux_engine_regs_init(4);
+
+ dce110_aux_engine_construct(aux_engine, ctx, inst,
+ SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
+ &aux_engine_regs[inst],
+ &aux_mask,
+ &aux_shift,
+ ctx->dc->caps.extended_aux_timeout_support);
+
+ return &aux_engine->base;
+}
+
+#define i2c_inst_regs_init(id)\
+ I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dce_i2c_registers i2c_hw_regs[5];
+
+static const struct dce_i2c_shift i2c_shifts = {
+ I2C_COMMON_MASK_SH_LIST_DCN35(__SHIFT)
+};
+
+static const struct dce_i2c_mask i2c_masks = {
+ I2C_COMMON_MASK_SH_LIST_DCN35(_MASK)
+};
+
+/* ========================================================== */
+
+/*
+ * DPIA index | Preferred Encoder | Host Router
+ * 0 | C | 0
+ * 1 | First Available | 0
+ * 2 | D | 1
+ * 3 | First Available | 1
+ */
+/* ========================================================== */
+static const enum engine_id dpia_to_preferred_enc_id_table[] = {
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGC,
+ ENGINE_ID_DIGD,
+ ENGINE_ID_DIGD
+};
+
+static enum engine_id dcn36_get_preferred_eng_id_dpia(unsigned int dpia_index)
+{
+ return dpia_to_preferred_enc_id_table[dpia_index];
+}
+
+static struct dce_i2c_hw *dcn31_i2c_hw_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dce_i2c_hw *dce_i2c_hw =
+ kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
+
+ if (!dce_i2c_hw)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT i2c_hw_regs
+ i2c_inst_regs_init(1),
+ i2c_inst_regs_init(2),
+ i2c_inst_regs_init(3),
+ i2c_inst_regs_init(4),
+ i2c_inst_regs_init(5);
+
+ dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
+ &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
+
+ return dce_i2c_hw;
+}
+static struct mpc *dcn35_mpc_create(
+ struct dc_context *ctx,
+ int num_mpcc,
+ int num_rmu)
+{
+ struct dcn30_mpc *mpc30 = kzalloc(sizeof(struct dcn30_mpc), GFP_KERNEL);
+
+ if (!mpc30)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT mpc_regs
+ dcn_mpc_regs_init();
+
+ dcn32_mpc_construct(mpc30, ctx,
+ &mpc_regs,
+ &mpc_shift,
+ &mpc_mask,
+ num_mpcc,
+ num_rmu);
+
+ return &mpc30->base;
+}
+
+static struct hubbub *dcn35_hubbub_create(struct dc_context *ctx)
+{
+ int i;
+
+ struct dcn20_hubbub *hubbub3 = kzalloc(sizeof(struct dcn20_hubbub),
+ GFP_KERNEL);
+
+ if (!hubbub3)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT hubbub_reg
+ hubbub_reg_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT vmid_regs
+ vmid_regs_init(0),
+ vmid_regs_init(1),
+ vmid_regs_init(2),
+ vmid_regs_init(3),
+ vmid_regs_init(4),
+ vmid_regs_init(5),
+ vmid_regs_init(6),
+ vmid_regs_init(7),
+ vmid_regs_init(8),
+ vmid_regs_init(9),
+ vmid_regs_init(10),
+ vmid_regs_init(11),
+ vmid_regs_init(12),
+ vmid_regs_init(13),
+ vmid_regs_init(14),
+ vmid_regs_init(15);
+
+ hubbub35_construct(hubbub3, ctx,
+ &hubbub_reg,
+ &hubbub_shift,
+ &hubbub_mask,
+ 384,/*ctx->dc->dml.ip.det_buffer_size_kbytes,*/
+ 8, /*ctx->dc->dml.ip.pixel_chunk_size_kbytes,*/
+ 1792 /*ctx->dc->dml.ip.config_return_buffer_size_in_kbytes*/);
+
+
+ for (i = 0; i < res_cap_dcn36.num_vmid; i++) {
+ struct dcn20_vmid *vmid = &hubbub3->vmid[i];
+
+ vmid->ctx = ctx;
+
+ vmid->regs = &vmid_regs[i];
+ vmid->shifts = &vmid_shifts;
+ vmid->masks = &vmid_masks;
+ }
+
+ return &hubbub3->base;
+}
+
+static struct timing_generator *dcn35_timing_generator_create(
+ struct dc_context *ctx,
+ uint32_t instance)
+{
+ struct optc *tgn10 =
+ kzalloc(sizeof(struct optc), GFP_KERNEL);
+
+ if (!tgn10)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT optc_regs
+ optc_regs_init(0),
+ optc_regs_init(1),
+ optc_regs_init(2),
+ optc_regs_init(3);
+
+ tgn10->base.inst = instance;
+ tgn10->base.ctx = ctx;
+
+ tgn10->tg_regs = &optc_regs[instance];
+ tgn10->tg_shift = &optc_shift;
+ tgn10->tg_mask = &optc_mask;
+
+ dcn35_timing_generator_init(tgn10);
+
+ return &tgn10->base;
+}
+
+static const struct encoder_feature_support link_enc_feature = {
+ .max_hdmi_deep_color = COLOR_DEPTH_121212,
+ .max_hdmi_pixel_clock = 600000,
+ .hdmi_ycbcr420_supported = true,
+ .dp_ycbcr420_supported = true,
+ .fec_supported = true,
+ .flags.bits.IS_HBR2_CAPABLE = true,
+ .flags.bits.IS_HBR3_CAPABLE = true,
+ .flags.bits.IS_TPS3_CAPABLE = true,
+ .flags.bits.IS_TPS4_CAPABLE = true
+};
+
+static struct link_encoder *dcn35_link_encoder_create(
+ struct dc_context *ctx,
+ const struct encoder_init_data *enc_init_data)
+{
+ struct dcn20_link_encoder *enc20 =
+ kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_aux_regs
+ aux_regs_init(0),
+ aux_regs_init(1),
+ aux_regs_init(2),
+ aux_regs_init(3),
+ aux_regs_init(4);
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_hpd_regs
+ hpd_regs_init(0),
+ hpd_regs_init(1),
+ hpd_regs_init(2),
+ hpd_regs_init(3),
+ hpd_regs_init(4);
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_regs
+ link_regs_init(0, A),
+ link_regs_init(1, B),
+ link_regs_init(2, C),
+ link_regs_init(3, D),
+ link_regs_init(4, E);
+
+ dcn35_link_encoder_construct(enc20,
+ enc_init_data,
+ &link_enc_feature,
+ &link_enc_regs[enc_init_data->transmitter],
+ &link_enc_aux_regs[enc_init_data->channel - 1],
+ &link_enc_hpd_regs[enc_init_data->hpd_source],
+ &le_shift,
+ &le_mask);
+
+ return &enc20->enc10.base;
+}
+
+/* Create a minimal link encoder object not associated with a particular
+ * physical connector.
+ * resource_funcs.link_enc_create_minimal
+ */
+static struct link_encoder *dcn31_link_enc_create_minimal(
+ struct dc_context *ctx, enum engine_id eng_id)
+{
+ struct dcn20_link_encoder *enc20;
+
+ if ((eng_id - ENGINE_ID_DIGA) > ctx->dc->res_pool->res_cap->num_dig_link_enc)
+ return NULL;
+
+ enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+ if (!enc20)
+ return NULL;
+
+ dcn31_link_encoder_construct_minimal(
+ enc20,
+ ctx,
+ &link_enc_feature,
+ &link_enc_regs[eng_id - ENGINE_ID_DIGA],
+ eng_id);
+
+ return &enc20->enc10.base;
+}
+
+static struct panel_cntl *dcn31_panel_cntl_create(const struct panel_cntl_init_data *init_data)
+{
+ struct dcn31_panel_cntl *panel_cntl =
+ kzalloc(sizeof(struct dcn31_panel_cntl), GFP_KERNEL);
+
+ if (!panel_cntl)
+ return NULL;
+
+ dcn31_panel_cntl_construct(panel_cntl, init_data);
+
+ return &panel_cntl->base;
+}
+
+static void read_dce_straps(
+ struct dc_context *ctx,
+ struct resource_straps *straps)
+{
+ generic_reg_get(ctx, regDC_PINSTRAPS + BASE(regDC_PINSTRAPS_BASE_IDX),
+ FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
+
+}
+
+static struct audio *dcn31_create_audio(
+ struct dc_context *ctx, unsigned int inst)
+{
+
+#undef REG_STRUCT
+#define REG_STRUCT audio_regs
+ audio_regs_init(0),
+ audio_regs_init(1),
+ audio_regs_init(2),
+ audio_regs_init(3),
+ audio_regs_init(4);
+ audio_regs_init(5);
+ audio_regs_init(6);
+
+ return dce_audio_create(ctx, inst,
+ &audio_regs[inst], &audio_shift, &audio_mask);
+}
+
+static struct vpg *dcn31_vpg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_vpg *vpg31 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL);
+
+ if (!vpg31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT vpg_regs
+ vpg_regs_init(0),
+ vpg_regs_init(1),
+ vpg_regs_init(2),
+ vpg_regs_init(3),
+ vpg_regs_init(4),
+ vpg_regs_init(5),
+ vpg_regs_init(6),
+ vpg_regs_init(7),
+ vpg_regs_init(8),
+ vpg_regs_init(9);
+
+ vpg31_construct(vpg31, ctx, inst,
+ &vpg_regs[inst],
+ &vpg_shift,
+ &vpg_mask);
+
+ return &vpg31->base;
+}
+
+static struct afmt *dcn31_afmt_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_afmt *afmt31 = kzalloc(sizeof(struct dcn31_afmt), GFP_KERNEL);
+
+ if (!afmt31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT afmt_regs
+ afmt_regs_init(0),
+ afmt_regs_init(1),
+ afmt_regs_init(2),
+ afmt_regs_init(3),
+ afmt_regs_init(4),
+ afmt_regs_init(5);
+
+ afmt31_construct(afmt31, ctx, inst,
+ &afmt_regs[inst],
+ &afmt_shift,
+ &afmt_mask);
+
+ // Light sleep by default, no need to power down here
+
+ return &afmt31->base;
+}
+
+static struct apg *dcn31_apg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL);
+
+ if (!apg31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT apg_regs
+ apg_regs_init(0),
+ apg_regs_init(1),
+ apg_regs_init(2),
+ apg_regs_init(3);
+
+ apg31_construct(apg31, ctx, inst,
+ &apg_regs[inst],
+ &apg_shift,
+ &apg_mask);
+
+ return &apg31->base;
+}
+
+static struct stream_encoder *dcn35_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn10_stream_encoder *enc1;
+ struct vpg *vpg;
+ struct afmt *afmt;
+ int vpg_inst;
+ int afmt_inst;
+
+ /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
+ if (eng_id <= ENGINE_ID_DIGF) {
+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+ } else
+ return NULL;
+
+ enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
+ vpg = dcn31_vpg_create(ctx, vpg_inst);
+ afmt = dcn31_afmt_create(ctx, afmt_inst);
+
+ if (!enc1 || !vpg || !afmt) {
+ kfree(enc1);
+ kfree(vpg);
+ kfree(afmt);
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT stream_enc_regs
+ stream_enc_regs_init(0),
+ stream_enc_regs_init(1),
+ stream_enc_regs_init(2),
+ stream_enc_regs_init(3),
+ stream_enc_regs_init(4);
+
+ dcn35_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios,
+ eng_id, vpg, afmt,
+ &stream_enc_regs[eng_id],
+ &se_shift, &se_mask);
+
+ return &enc1->base;
+}
+
+static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31;
+ struct vpg *vpg;
+ struct apg *apg;
+ uint32_t hpo_dp_inst;
+ uint32_t vpg_inst;
+ uint32_t apg_inst;
+
+ ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3));
+ hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0;
+
+ /* Mapping of VPG register blocks to HPO DP block instance:
+ * VPG[6] -> HPO_DP[0]
+ * VPG[7] -> HPO_DP[1]
+ * VPG[8] -> HPO_DP[2]
+ * VPG[9] -> HPO_DP[3]
+ */
+ vpg_inst = hpo_dp_inst + 6;
+
+ /* Mapping of APG register blocks to HPO DP block instance:
+ * APG[0] -> HPO_DP[0]
+ * APG[1] -> HPO_DP[1]
+ * APG[2] -> HPO_DP[2]
+ * APG[3] -> HPO_DP[3]
+ */
+ apg_inst = hpo_dp_inst;
+
+ /* allocate HPO stream encoder and create VPG sub-block */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL);
+ vpg = dcn31_vpg_create(ctx, vpg_inst);
+ apg = dcn31_apg_create(ctx, apg_inst);
+
+ if (!hpo_dp_enc31 || !vpg || !apg) {
+ kfree(hpo_dp_enc31);
+ kfree(vpg);
+ kfree(apg);
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_stream_enc_regs
+ hpo_dp_stream_encoder_reg_init(0),
+ hpo_dp_stream_encoder_reg_init(1),
+ hpo_dp_stream_encoder_reg_init(2),
+ hpo_dp_stream_encoder_reg_init(3);
+
+ dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios,
+ hpo_dp_inst, eng_id, vpg, apg,
+ &hpo_dp_stream_enc_regs[hpo_dp_inst],
+ &hpo_dp_se_shift, &hpo_dp_se_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static struct hpo_dp_link_encoder *dcn31_hpo_dp_link_encoder_create(
+ uint8_t inst,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31;
+
+ /* allocate HPO link encoder */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_link_enc_regs
+ hpo_dp_link_encoder_reg_init(0),
+ hpo_dp_link_encoder_reg_init(1);
+
+ hpo_dp_link_encoder31_construct(hpo_dp_enc31, ctx, inst,
+ &hpo_dp_link_enc_regs[inst],
+ &hpo_dp_le_shift, &hpo_dp_le_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static struct dce_hwseq *dcn36_hwseq_create(
+ struct dc_context *ctx)
+{
+ struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
+
+#undef REG_STRUCT
+#define REG_STRUCT hwseq_reg
+ hwseq_reg_init();
+
+ if (hws) {
+ hws->ctx = ctx;
+ hws->regs = &hwseq_reg;
+ hws->shifts = &hwseq_shift;
+ hws->masks = &hwseq_mask;
+ }
+ return hws;
+}
+static const struct resource_create_funcs res_create_funcs = {
+ .read_dce_straps = read_dce_straps,
+ .create_audio = dcn31_create_audio,
+ .create_stream_encoder = dcn35_stream_encoder_create,
+ .create_hpo_dp_stream_encoder = dcn31_hpo_dp_stream_encoder_create,
+ .create_hpo_dp_link_encoder = dcn31_hpo_dp_link_encoder_create,
+ .create_hwseq = dcn36_hwseq_create,
+};
+
+static void dcn36_resource_destruct(struct dcn36_resource_pool *pool)
+{
+ unsigned int i;
+
+ for (i = 0; i < pool->base.stream_enc_count; i++) {
+ if (pool->base.stream_enc[i] != NULL) {
+ if (pool->base.stream_enc[i]->vpg != NULL) {
+ kfree(DCN30_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg));
+ pool->base.stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.stream_enc[i]->afmt != NULL) {
+ kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt));
+ pool->base.stream_enc[i]->afmt = NULL;
+ }
+ kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
+ pool->base.stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) {
+ if (pool->base.hpo_dp_stream_enc[i] != NULL) {
+ if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) {
+ kfree(DCN30_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg));
+ pool->base.hpo_dp_stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) {
+ kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg));
+ pool->base.hpo_dp_stream_enc[i]->apg = NULL;
+ }
+ kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i]));
+ pool->base.hpo_dp_stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) {
+ if (pool->base.hpo_dp_link_enc[i] != NULL) {
+ kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i]));
+ pool->base.hpo_dp_link_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ if (pool->base.dscs[i] != NULL)
+ dcn20_dsc_destroy(&pool->base.dscs[i]);
+ }
+
+ if (pool->base.mpc != NULL) {
+ kfree(TO_DCN20_MPC(pool->base.mpc));
+ pool->base.mpc = NULL;
+ }
+ if (pool->base.hubbub != NULL) {
+ kfree(pool->base.hubbub);
+ pool->base.hubbub = NULL;
+ }
+ for (i = 0; i < pool->base.pipe_count; i++) {
+ if (pool->base.dpps[i] != NULL)
+ dcn35_dpp_destroy(&pool->base.dpps[i]);
+
+ if (pool->base.ipps[i] != NULL)
+ pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
+
+ if (pool->base.hubps[i] != NULL) {
+ kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
+ pool->base.hubps[i] = NULL;
+ }
+
+ if (pool->base.irqs != NULL) {
+ dal_irq_service_destroy(&pool->base.irqs);
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ if (pool->base.engines[i] != NULL)
+ dce110_engine_destroy(&pool->base.engines[i]);
+ if (pool->base.hw_i2cs[i] != NULL) {
+ kfree(pool->base.hw_i2cs[i]);
+ pool->base.hw_i2cs[i] = NULL;
+ }
+ if (pool->base.sw_i2cs[i] != NULL) {
+ kfree(pool->base.sw_i2cs[i]);
+ pool->base.sw_i2cs[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_opp; i++) {
+ if (pool->base.opps[i] != NULL)
+ pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.timing_generators[i] != NULL) {
+ kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
+ pool->base.timing_generators[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
+ if (pool->base.dwbc[i] != NULL) {
+ kfree(TO_DCN30_DWBC(pool->base.dwbc[i]));
+ pool->base.dwbc[i] = NULL;
+ }
+ if (pool->base.mcif_wb[i] != NULL) {
+ kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i]));
+ pool->base.mcif_wb[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.audio_count; i++) {
+ if (pool->base.audios[i])
+ dce_aud_destroy(&pool->base.audios[i]);
+ }
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] != NULL) {
+ dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
+ pool->base.clock_sources[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) {
+ if (pool->base.mpc_lut[i] != NULL) {
+ dc_3dlut_func_release(pool->base.mpc_lut[i]);
+ pool->base.mpc_lut[i] = NULL;
+ }
+ if (pool->base.mpc_shaper[i] != NULL) {
+ dc_transfer_func_release(pool->base.mpc_shaper[i]);
+ pool->base.mpc_shaper[i] = NULL;
+ }
+ }
+
+ if (pool->base.dp_clock_source != NULL) {
+ dcn20_clock_source_destroy(&pool->base.dp_clock_source);
+ pool->base.dp_clock_source = NULL;
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.multiple_abms[i] != NULL)
+ dce_abm_destroy(&pool->base.multiple_abms[i]);
+ }
+
+ if (pool->base.psr != NULL)
+ dmub_psr_destroy(&pool->base.psr);
+
+ if (pool->base.replay != NULL)
+ dmub_replay_destroy(&pool->base.replay);
+
+ if (pool->base.pg_cntl != NULL)
+ dcn_pg_cntl_destroy(&pool->base.pg_cntl);
+
+ if (pool->base.dccg != NULL)
+ dcn_dccg_destroy(&pool->base.dccg);
+}
+
+static struct hubp *dcn35_hubp_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn20_hubp *hubp2 =
+ kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
+
+ if (!hubp2)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT hubp_regs
+ hubp_regs_init(0),
+ hubp_regs_init(1),
+ hubp_regs_init(2),
+ hubp_regs_init(3);
+
+ if (hubp35_construct(hubp2, ctx, inst,
+ &hubp_regs[inst], &hubp_shift, &hubp_mask))
+ return &hubp2->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(hubp2);
+ return NULL;
+}
+
+static void dcn35_dwbc_init(struct dcn30_dwbc *dwbc30, struct dc_context *ctx)
+{
+ dcn35_dwbc_set_fgcg(
+ dwbc30, ctx->dc->debug.enable_fine_grain_clock_gating.bits.dwb);
+}
+
+static bool dcn35_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t pipe_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < pipe_count; i++) {
+ struct dcn30_dwbc *dwbc30 = kzalloc(sizeof(struct dcn30_dwbc),
+ GFP_KERNEL);
+
+ if (!dwbc30) {
+ dm_error("DC: failed to create dwbc30!\n");
+ return false;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT dwbc35_regs
+ dwbc_regs_dcn3_init(0);
+
+ dcn35_dwbc_construct(dwbc30, ctx,
+ &dwbc35_regs[i],
+ &dwbc35_shift,
+ &dwbc35_mask,
+ i);
+
+ pool->dwbc[i] = &dwbc30->base;
+
+ dcn35_dwbc_init(dwbc30, ctx);
+ }
+ return true;
+}
+
+static void dcn35_mmhubbub_init(struct dcn30_mmhubbub *mcif_wb30,
+ struct dc_context *ctx)
+{
+ dcn35_mmhubbub_set_fgcg(
+ mcif_wb30,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.mmhubbub);
+}
+
+static bool dcn35_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t pipe_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < pipe_count; i++) {
+ struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub),
+ GFP_KERNEL);
+
+ if (!mcif_wb30) {
+ dm_error("DC: failed to create mcif_wb30!\n");
+ return false;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT mcif_wb35_regs
+ mcif_wb_regs_dcn3_init(0);
+
+ dcn35_mmhubbub_construct(mcif_wb30, ctx,
+ &mcif_wb35_regs[i],
+ &mcif_wb35_shift,
+ &mcif_wb35_mask,
+ i);
+
+ dcn35_mmhubbub_init(mcif_wb30, ctx);
+
+ pool->mcif_wb[i] = &mcif_wb30->base;
+ }
+ return true;
+}
+
+static struct display_stream_compressor *dcn35_dsc_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn20_dsc *dsc =
+ kzalloc(sizeof(struct dcn20_dsc), GFP_KERNEL);
+
+ if (!dsc) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT dsc_regs
+ dsc_regsDCN35_init(0),
+ dsc_regsDCN35_init(1),
+ dsc_regsDCN35_init(2),
+ dsc_regsDCN35_init(3);
+
+ dsc35_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
+ dsc35_set_fgcg(dsc,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.dsc);
+ return &dsc->base;
+}
+
+static void dcn36_destroy_resource_pool(struct resource_pool **pool)
+{
+ struct dcn36_resource_pool *dcn36_pool = TO_DCN36_RES_POOL(*pool);
+
+ dcn36_resource_destruct(dcn36_pool);
+ kfree(dcn36_pool);
+ *pool = NULL;
+}
+
+static struct clock_source *dcn35_clock_source_create(
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ bool dp_clk_src)
+{
+ struct dce110_clk_src *clk_src =
+ kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
+
+ if (!clk_src)
+ return NULL;
+
+ if (dcn31_clk_src_construct(clk_src, ctx, bios, id,
+ regs, &cs_shift, &cs_mask)) {
+ clk_src->base.dp_clk_src = dp_clk_src;
+ return &clk_src->base;
+ }
+
+ kfree(clk_src);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+}
+
+static struct dc_cap_funcs cap_funcs = {
+ .get_dcc_compression_cap = dcn20_get_dcc_compression_cap
+};
+
+static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config)
+{
+ *panel_config = panel_config_defaults;
+}
+
+
+static enum dc_status dcn35_validate_bandwidth(struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode)
+{
+ bool out = false;
+
+ out = dml2_validate(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+ validate_mode);
+
+ if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING)
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+
+ DC_FP_START();
+ dcn35_decide_zstate_support(dc, context);
+ DC_FP_END();
+
+ return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+}
+
+
+static int populate_dml_pipes_from_context_fpu(struct dc *dc,
+ struct dc_state *context,
+ display_e2e_pipe_params_st *pipes,
+ enum dc_validate_mode validate_mode)
+{
+ int ret;
+
+ DC_FP_START();
+ ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode);
+ DC_FP_END();
+
+ return ret;
+}
+
+static struct resource_funcs dcn36_res_pool_funcs = {
+ .destroy = dcn36_destroy_resource_pool,
+ .link_enc_create = dcn35_link_encoder_create,
+ .link_enc_create_minimal = dcn31_link_enc_create_minimal,
+ .link_encs_assign = link_enc_cfg_link_encs_assign,
+ .link_enc_unassign = link_enc_cfg_link_enc_unassign,
+ .panel_cntl_create = dcn31_panel_cntl_create,
+ .validate_bandwidth = dcn35_validate_bandwidth,
+ .calculate_wm_and_dlg = NULL,
+ .update_soc_for_wm_a = dcn31_update_soc_for_wm_a,
+ .populate_dml_pipes = populate_dml_pipes_from_context_fpu,
+ .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer,
+ .release_pipe = dcn20_release_pipe,
+ .add_stream_to_ctx = dcn30_add_stream_to_ctx,
+ .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
+ .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
+ .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
+ .set_mcif_arb_params = dcn30_set_mcif_arb_params,
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
+ .release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
+ .update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu,
+ .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+ .get_panel_config_defaults = dcn35_get_panel_config_defaults,
+ .get_preferred_eng_id_dpia = dcn36_get_preferred_eng_id_dpia,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe,
+ .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch,
+ .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params
+};
+
+static bool dcn36_resource_construct(
+ uint8_t num_virtual_links,
+ struct dc *dc,
+ struct dcn36_resource_pool *pool)
+{
+ int i;
+ struct dc_context *ctx = dc->ctx;
+ struct irq_service_init_data init_data;
+
+#undef REG_STRUCT
+#define REG_STRUCT bios_regs
+ bios_regs_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT clk_src_regs
+ clk_src_regs_init(0, A),
+ clk_src_regs_init(1, B),
+ clk_src_regs_init(2, C),
+ clk_src_regs_init(3, D),
+ clk_src_regs_init(4, E);
+
+#undef REG_STRUCT
+#define REG_STRUCT abm_regs
+ abm_regs_init(0),
+ abm_regs_init(1),
+ abm_regs_init(2),
+ abm_regs_init(3);
+
+#undef REG_STRUCT
+#define REG_STRUCT dccg_regs
+ dccg_regs_init();
+
+ ctx->dc_bios->regs = &bios_regs;
+
+ pool->base.res_cap = &res_cap_dcn36;
+
+ pool->base.funcs = &dcn36_res_pool_funcs;
+
+ /*************************************************
+ * Resource + asic cap harcoding *
+ *************************************************/
+ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
+ pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
+ pool->base.mpcc_count = pool->base.res_cap->num_timing_generator;
+ dc->caps.max_downscale_ratio = 600;
+ dc->caps.i2c_speed_in_khz = 100;
+ dc->caps.i2c_speed_in_khz_hdcp = 100;
+ dc->caps.max_cursor_size = 256;
+ dc->caps.min_horizontal_blanking_period = 80;
+ dc->caps.dmdata_alloc_size = 2048;
+ dc->caps.max_slave_planes = 3;
+ dc->caps.max_slave_yuv_planes = 3;
+ dc->caps.max_slave_rgb_planes = 3;
+ dc->caps.post_blend_color_processing = true;
+ dc->caps.force_dp_tps4_for_cp2520 = true;
+ if (dc->config.forceHBR2CP2520)
+ dc->caps.force_dp_tps4_for_cp2520 = false;
+ dc->caps.dp_hpo = true;
+ dc->caps.dp_hdmi21_pcon_support = true;
+
+ dc->caps.edp_dsc_support = true;
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.is_apu = true;
+ dc->caps.seamless_odm = true;
+
+ dc->caps.zstate_support = true;
+ dc->caps.ips_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+ dc->caps.color.dpp.input_lut_shared = 0;
+ dc->caps.color.dpp.icsc = 1;
+ dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
+ dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
+ dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.pq = 1;
+ dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
+ dc->caps.color.dpp.post_csc = 1;
+ dc->caps.color.dpp.gamma_corr = 1;
+ dc->caps.color.dpp.dgam_rom_for_yuv = 0;
+
+ dc->caps.color.dpp.hw_3d_lut = 0;
+ dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1
+ // no OGAM ROM on DCN301
+ dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
+ dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.pq = 0;
+ dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
+ dc->caps.color.dpp.ocsc = 0;
+
+ dc->caps.color.mpc.gamut_remap = 1;
+ dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //2
+ dc->caps.color.mpc.ogam_ram = 1;
+ dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
+ dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.pq = 0;
+ dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
+ dc->caps.color.mpc.ocsc = 1;
+ dc->caps.color.mpc.preblend = true;
+
+ dc->caps.num_of_host_routers = 2;
+ dc->caps.num_of_dpias_per_host_router = 2;
+
+ /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order
+ * to provide some margin.
+ * It's expected for furture ASIC to have equal or higher value, in order to
+ * have determinstic power improvement from generate to genration.
+ * (i.e., we should not expect new ASIC generation with lower vmin rate)
+ */
+ dc->caps.max_disp_clock_khz_at_vmin = 650000;
+
+ /* Sequential ONO is based on ASIC. */
+ if (dc->ctx->asic_id.hw_internal_rev >= 0x40)
+ dc->caps.sequential_ono = true;
+
+ /* Use pipe context based otg sync logic */
+ dc->config.use_pipe_ctx_sync_logic = true;
+
+ dc->config.disable_hbr_audio_dp2 = true;
+ /* read VBIOS LTTPR caps */
+ {
+ if (ctx->dc_bios->funcs->get_lttpr_caps) {
+ enum bp_result bp_query_result;
+ uint8_t is_vbios_lttpr_enable = 0;
+
+ bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
+ dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
+ }
+
+ /* interop bit is implicit */
+ {
+ dc->caps.vbios_lttpr_aware = true;
+ }
+ }
+
+ if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
+ dc->debug = debug_defaults_drv;
+
+ /*HW default is to have all the FGCG enabled, SW no need to program them*/
+ dc->debug.enable_fine_grain_clock_gating.u32All = 0xFFFF;
+ // Init the vm_helper
+ if (dc->vm_helper)
+ vm_helper_init(dc->vm_helper, 16);
+
+ /*************************************************
+ * Create resources *
+ *************************************************/
+
+ /* Clock Sources for Pixel Clock*/
+ pool->base.clock_sources[DCN36_CLK_SRC_PLL0] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL0,
+ &clk_src_regs[0], false);
+ pool->base.clock_sources[DCN36_CLK_SRC_PLL1] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL1,
+ &clk_src_regs[1], false);
+ pool->base.clock_sources[DCN36_CLK_SRC_PLL2] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL2,
+ &clk_src_regs[2], false);
+ pool->base.clock_sources[DCN36_CLK_SRC_PLL3] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL3,
+ &clk_src_regs[3], false);
+ pool->base.clock_sources[DCN36_CLK_SRC_PLL4] =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL4,
+ &clk_src_regs[4], false);
+
+ pool->base.clk_src_count = DCN36_CLK_SRC_TOTAL;
+
+ /* todo: not reuse phy_pll registers */
+ pool->base.dp_clock_source =
+ dcn35_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_ID_DP_DTO,
+ &clk_src_regs[0], true);
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] == NULL) {
+ dm_error("DC: failed to create clock sources!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+ }
+ /*temp till dml2 fully work without dml1*/
+ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31);
+
+ /* TODO: DCCG */
+ pool->base.dccg = dccg35_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
+ if (pool->base.dccg == NULL) {
+ dm_error("DC: failed to create dccg!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT pg_cntl_regs
+ pg_cntl_dcn35_regs_init();
+
+ pool->base.pg_cntl = pg_cntl35_create(ctx, &pg_cntl_regs, &pg_cntl_shift, &pg_cntl_mask);
+ if (pool->base.pg_cntl == NULL) {
+ dm_error("DC: failed to create power gate control!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* TODO: IRQ */
+ init_data.ctx = dc->ctx;
+ pool->base.irqs = dal_irq_service_dcn36_create(&init_data);
+ if (!pool->base.irqs)
+ goto create_fail;
+
+ /* HUBBUB */
+ pool->base.hubbub = dcn35_hubbub_create(ctx);
+ if (pool->base.hubbub == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create hubbub!\n");
+ goto create_fail;
+ }
+
+ /* HUBPs, DPPs, OPPs and TGs */
+ for (i = 0; i < pool->base.pipe_count; i++) {
+ pool->base.hubps[i] = dcn35_hubp_create(ctx, i);
+ if (pool->base.hubps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create hubps!\n");
+ goto create_fail;
+ }
+
+ pool->base.dpps[i] = dcn35_dpp_create(ctx, i);
+ if (pool->base.dpps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create dpps!\n");
+ goto create_fail;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_opp; i++) {
+ pool->base.opps[i] = dcn35_opp_create(ctx, i);
+ if (pool->base.opps[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create output pixel processor!\n");
+ goto create_fail;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ pool->base.timing_generators[i] = dcn35_timing_generator_create(
+ ctx, i);
+ if (pool->base.timing_generators[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create tg!\n");
+ goto create_fail;
+ }
+ }
+ pool->base.timing_generator_count = i;
+
+ /* PSR */
+ pool->base.psr = dmub_psr_create(ctx);
+ if (pool->base.psr == NULL) {
+ dm_error("DC: failed to create psr obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* Replay */
+ pool->base.replay = dmub_replay_create(ctx);
+ if (pool->base.replay == NULL) {
+ dm_error("DC: failed to create replay obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* ABM */
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ pool->base.multiple_abms[i] = dmub_abm_create(ctx,
+ &abm_regs[i],
+ &abm_shift,
+ &abm_mask);
+ if (pool->base.multiple_abms[i] == NULL) {
+ dm_error("DC: failed to create abm for pipe %d!\n", i);
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+ }
+
+ /* MPC and DSC */
+ pool->base.mpc = dcn35_mpc_create(ctx, pool->base.mpcc_count, pool->base.res_cap->num_mpc_3dlut);
+ if (pool->base.mpc == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mpc!\n");
+ goto create_fail;
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ pool->base.dscs[i] = dcn35_dsc_create(ctx, i);
+ if (pool->base.dscs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create display stream compressor %d!\n", i);
+ goto create_fail;
+ }
+ }
+
+ /* DWB and MMHUBBUB */
+ if (!dcn35_dwbc_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create dwbc!\n");
+ goto create_fail;
+ }
+
+ if (!dcn35_mmhubbub_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mcif_wb!\n");
+ goto create_fail;
+ }
+
+ /* AUX and I2C */
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ pool->base.engines[i] = dcn31_aux_engine_create(ctx, i);
+ if (pool->base.engines[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create aux engine!!\n");
+ goto create_fail;
+ }
+ pool->base.hw_i2cs[i] = dcn31_i2c_hw_create(ctx, i);
+ if (pool->base.hw_i2cs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create hw i2c!!\n");
+ goto create_fail;
+ }
+ pool->base.sw_i2cs[i] = NULL;
+ }
+
+ /* DCN3.5 has 6 DPIA */
+ pool->base.usb4_dpia_count = 4;
+ if (dc->debug.dpia_debug.bits.disable_dpia)
+ pool->base.usb4_dpia_count = 0;
+
+ /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */
+ if (!resource_construct(num_virtual_links, dc, &pool->base,
+ &res_create_funcs))
+ goto create_fail;
+
+ /* HW Sequencer and Plane caps */
+ dcn35_hw_sequencer_construct(dc);
+
+ dc->caps.max_planes = pool->base.pipe_count;
+
+ for (i = 0; i < dc->caps.max_planes; ++i)
+ dc->caps.planes[i] = plane_cap;
+
+ dc->caps.max_odm_combine_factor = 4;
+
+ dc->cap_funcs = cap_funcs;
+
+ dc->dcn_ip->max_num_dpp = pool->base.pipe_count;
+
+ dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
+ dc->dml2_options.use_native_soc_bb_construction = true;
+ dc->dml2_options.minimize_dispclk_using_odm = false;
+ if (dc->config.EnableMinDispClkODM)
+ dc->dml2_options.minimize_dispclk_using_odm = true;
+ dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm;
+
+ resource_init_common_dml2_callbacks(dc, &dc->dml2_options);
+ dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch;
+
+ dc->dml2_options.max_segments_per_hubp = 24;
+ dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
+ dc->dml2_options.override_det_buffer_size_kbytes = true;
+
+ if (dc->config.sdpif_request_limit_words_per_umc == 0)
+ dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/
+
+ return true;
+
+create_fail:
+
+ dcn36_resource_destruct(pool);
+
+ return false;
+}
+
+struct resource_pool *dcn36_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc)
+{
+ struct dcn36_resource_pool *pool =
+ kzalloc(sizeof(struct dcn36_resource_pool), GFP_KERNEL);
+
+ if (!pool)
+ return NULL;
+
+ if (dcn36_resource_construct(init_data->num_virtual_links, dc, pool))
+ return &pool->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(pool);
+ return NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.h
new file mode 100644
index 000000000000..5490c9975e23
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2025 Advanced Micro Devices, Inc. */
+
+#ifndef _DCN36_RESOURCE_H_
+#define _DCN36_RESOURCE_H_
+
+#include "core_types.h"
+
+extern struct _vcs_dpi_ip_params_st dcn3_6_ip;
+extern struct _vcs_dpi_soc_bounding_box_st dcn3_6_soc;
+
+#define TO_DCN36_RES_POOL(pool)\
+ container_of(pool, struct dcn36_resource_pool, base)
+
+struct dcn36_resource_pool {
+ struct resource_pool base;
+};
+
+struct resource_pool *dcn36_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc);
+
+#define HWSEQ_DCN36_REG_LIST()\
+ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+ SR(DCHUBBUB_ARB_HOSTVM_CNTL), \
+ SR(DIO_MEM_PWR_CTRL), \
+ SR(ODM_MEM_PWR_CTRL3), \
+ SR(MMHUBBUB_MEM_PWR_CNTL), \
+ SR(DCCG_GATE_DISABLE_CNTL), \
+ SR(DCCG_GATE_DISABLE_CNTL2), \
+ SR(DCCG_GATE_DISABLE_CNTL4), \
+ SR(DCCG_GATE_DISABLE_CNTL5), \
+ SR(DCFCLK_CNTL),\
+ SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \
+ SRII(PIXEL_RATE_CNTL, OTG, 0), \
+ SRII(PIXEL_RATE_CNTL, OTG, 1),\
+ SRII(PIXEL_RATE_CNTL, OTG, 2),\
+ SRII(PIXEL_RATE_CNTL, OTG, 3),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\
+ SR(MICROSECOND_TIME_BASE_DIV), \
+ SR(MILLISECOND_TIME_BASE_DIV), \
+ SR(DISPCLK_FREQ_CHANGE_CNTL), \
+ SR(RBBMIF_TIMEOUT_DIS), \
+ SR(RBBMIF_TIMEOUT_DIS_2), \
+ SR(DCHUBBUB_CRC_CTRL), \
+ SR(DPP_TOP0_DPP_CRC_CTRL), \
+ SR(MPC_CRC_CTRL), \
+ SR(DOMAIN0_PG_CONFIG), \
+ SR(DOMAIN1_PG_CONFIG), \
+ SR(DOMAIN2_PG_CONFIG), \
+ SR(DOMAIN3_PG_CONFIG), \
+ SR(DOMAIN16_PG_CONFIG), \
+ SR(DOMAIN17_PG_CONFIG), \
+ SR(DOMAIN18_PG_CONFIG), \
+ SR(DOMAIN19_PG_CONFIG), \
+ SR(DOMAIN0_PG_STATUS), \
+ SR(DOMAIN1_PG_STATUS), \
+ SR(DOMAIN2_PG_STATUS), \
+ SR(DOMAIN3_PG_STATUS), \
+ SR(DOMAIN16_PG_STATUS), \
+ SR(DOMAIN17_PG_STATUS), \
+ SR(DOMAIN18_PG_STATUS), \
+ SR(DOMAIN19_PG_STATUS), \
+ SR(DC_IP_REQUEST_CNTL), \
+ SR(AZALIA_AUDIO_DTO), \
+ SR(AZALIA_CONTROLLER_CLOCK_GATING), \
+ SR(HPO_TOP_HW_CONTROL),\
+ SR(DMU_CLK_CNTL)
+
+#endif /* _DCN36_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
new file mode 100644
index 000000000000..1d18807e4749
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
@@ -0,0 +1,2278 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dm_services.h"
+#include "dc.h"
+
+#include "dcn32/dcn32_init.h"
+#include "dcn401/dcn401_init.h"
+
+#include "resource.h"
+#include "include/irq_service_interface.h"
+#include "dcn401_resource.h"
+
+#include "dcn20/dcn20_resource.h"
+#include "dcn30/dcn30_resource.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn321/dcn321_resource.h"
+
+#include "dcn10/dcn10_ipp.h"
+#include "dcn401/dcn401_hubbub.h"
+#include "dcn401/dcn401_mpc.h"
+#include "dcn401/dcn401_hubp.h"
+#include "irq/dcn401/irq_service_dcn401.h"
+#include "dcn401/dcn401_dpp.h"
+#include "dcn401/dcn401_optc.h"
+#include "dcn20/dcn20_hwseq.h"
+#include "dcn30/dcn30_hwseq.h"
+#include "dce110/dce110_hwseq.h"
+#include "dcn20/dcn20_opp.h"
+#include "dcn401/dcn401_dsc.h"
+#include "dcn30/dcn30_vpg.h"
+#include "dcn31/dcn31_vpg.h"
+#include "dcn30/dcn30_afmt.h"
+#include "dcn30/dcn30_dio_stream_encoder.h"
+#include "dcn401/dcn401_dio_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_stream_encoder.h"
+#include "dcn31/dcn31_hpo_dp_link_encoder.h"
+#include "dcn32/dcn32_hpo_dp_link_encoder.h"
+#include "dcn31/dcn31_apg.h"
+#include "dcn31/dcn31_dio_link_encoder.h"
+#include "dcn401/dcn401_dio_link_encoder.h"
+#include "dcn10/dcn10_link_encoder.h"
+#include "dcn321/dcn321_dio_link_encoder.h"
+#include "dce/dce_clock_source.h"
+#include "dce/dce_audio.h"
+#include "dce/dce_hwseq.h"
+#include "clk_mgr.h"
+#include "virtual/virtual_stream_encoder.h"
+#include "dml/display_mode_vba.h"
+#include "dcn401/dcn401_dccg.h"
+#include "dcn10/dcn10_resource.h"
+#include "link_service.h"
+#include "link_enc_cfg.h"
+#include "dcn31/dcn31_panel_cntl.h"
+
+#include "dcn30/dcn30_dwb.h"
+#include "dcn32/dcn32_mmhubbub.h"
+
+#include "dcn/dcn_4_1_0_offset.h"
+#include "dcn/dcn_4_1_0_sh_mask.h"
+#include "nbif/nbif_6_3_1_offset.h"
+
+#include "reg_helper.h"
+#include "dce/dmub_abm.h"
+#include "dce/dmub_psr.h"
+#include "dce/dce_aux.h"
+#include "dce/dce_i2c.h"
+
+#include "dml/dcn30/display_mode_vba_30.h"
+#include "vm_helper.h"
+#include "dcn20/dcn20_vmid.h"
+
+#include "dc_state_priv.h"
+
+#include "dml2/dml2_wrapper.h"
+
+#define DC_LOGGER_INIT(logger)
+
+enum dcn401_clk_src_array_id {
+ DCN401_CLK_SRC_PLL0,
+ DCN401_CLK_SRC_PLL1,
+ DCN401_CLK_SRC_PLL2,
+ DCN401_CLK_SRC_PLL3,
+ //DCN401_CLK_SRC_PLL4,
+ DCN401_CLK_SRC_TOTAL
+};
+
+/* begin *********************
+ * macros to expend register list macro defined in HW object header file
+ */
+
+/* DCN */
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ REG_STRUCT.reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+#define SR_ARR(reg_name, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+#define SR_ARR_INIT(reg_name, id, value)\
+ REG_STRUCT[id].reg_name = value
+
+#define SRI(reg_name, block, id)\
+ REG_STRUCT.reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+/*
+ * Used when a reg_name would otherwise begin with an integer
+ */
+#define SRI_ARR_US(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## block ## id ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## reg_name
+#define SR_ARR_I2C(reg_name, id) \
+ REG_STRUCT[id-1].reg_name = BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+#define SRI_ARR_I2C(reg_name, block, id)\
+ REG_STRUCT[id-1].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI_ARR_ALPHABET(reg_name, block, index, id)\
+ REG_STRUCT[index].reg_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRI2(reg_name, block, id)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+#define SRI2_ARR(reg_name, block, id)\
+ REG_STRUCT[id].reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define SRIR(var_name, reg_name, block, id)\
+ .var_name = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_ARR_2(reg_name, block, id, inst)\
+ REG_STRUCT[inst].reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_MPC_RMU(reg_name, block, id)\
+ .RMU##_##reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SRII_DWB(reg_name, temp_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## temp_name
+
+#define DCCG_SRII(reg_name, block, id)\
+ REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
+ reg ## block ## id ## _ ## reg_name
+
+#define SF_DWB2(reg_name, block, id, field_name, post_fix) \
+ .field_name = reg_name ## __ ## field_name ## post_fix
+
+#define VUPDATE_SRII(reg_name, block, id)\
+ REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
+ reg ## reg_name ## _ ## block ## id
+
+/* NBIO */
+#define NBIO_BASE_INNER(seg) ctx->nbio_reg_offsets[seg]
+
+#define NBIO_BASE(seg) \
+ NBIO_BASE_INNER(seg)
+
+#define NBIO_SR(reg_name)\
+ REG_STRUCT.reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX0_ ## reg_name
+#define NBIO_SR_ARR(reg_name, id)\
+ REG_STRUCT[id].reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \
+ regBIF_BX0_ ## reg_name
+
+#define CTX ctx
+#define REG(reg_name) \
+ (ctx->dcn_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
+static struct bios_registers bios_regs;
+
+#define bios_regs_init() \
+ NBIO_SR(BIOS_SCRATCH_3),\
+ NBIO_SR(BIOS_SCRATCH_6)
+
+#define clk_src_regs_init(index, pllid)\
+ CS_COMMON_REG_LIST_DCN3_0_RI(index, pllid)
+
+static struct dce110_clk_src_regs clk_src_regs[5];
+
+static const struct dce110_clk_src_shift cs_shift = {
+ CS_COMMON_MASK_SH_LIST_DCN3_2(__SHIFT)
+};
+
+static const struct dce110_clk_src_mask cs_mask = {
+ CS_COMMON_MASK_SH_LIST_DCN3_2(_MASK)
+};
+
+#define abm_regs_init(id)\
+ ABM_DCN401_REG_LIST_RI(id)
+
+static struct dce_abm_registers abm_regs[4];
+
+static const struct dce_abm_shift abm_shift = {
+ ABM_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct dce_abm_mask abm_mask = {
+ ABM_MASK_SH_LIST_DCN401(_MASK)
+};
+
+#define audio_regs_init(id)\
+ AUD_COMMON_REG_LIST_RI(id)
+
+static struct dce_audio_registers audio_regs[5];
+
+#define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_INDEX, AZALIA_ENDPOINT_REG_INDEX, mask_sh),\
+ SF(AZF0ENDPOINT0_AZALIA_F0_CODEC_ENDPOINT_DATA, AZALIA_ENDPOINT_REG_DATA, mask_sh),\
+ AUD_COMMON_MASK_SH_LIST_BASE(mask_sh)
+
+static const struct dce_audio_shift audio_shift = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_audio_mask audio_mask = {
+ DCE120_AUD_COMMON_MASK_SH_LIST(_MASK)
+};
+
+#define vpg_regs_init(id)\
+ VPG_DCN401_REG_LIST_RI(id)
+
+static struct dcn31_vpg_registers vpg_regs[9];
+
+static const struct dcn31_vpg_shift vpg_shift = {
+ DCN31_VPG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_vpg_mask vpg_mask = {
+ DCN31_VPG_MASK_SH_LIST(_MASK)
+};
+
+#define afmt_regs_init(id)\
+ AFMT_DCN3_REG_LIST_RI(id)
+
+static struct dcn30_afmt_registers afmt_regs[5];
+
+static const struct dcn30_afmt_shift afmt_shift = {
+ DCN3_AFMT_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn30_afmt_mask afmt_mask = {
+ DCN3_AFMT_MASK_SH_LIST(_MASK)
+};
+
+#define apg_regs_init(id)\
+ APG_DCN31_REG_LIST_RI(id)
+
+static struct dcn31_apg_registers apg_regs[4];
+
+static const struct dcn31_apg_shift apg_shift = {
+ DCN31_APG_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_apg_mask apg_mask = {
+ DCN31_APG_MASK_SH_LIST(_MASK)
+};
+
+#define stream_enc_regs_init(id)\
+ SE_DCN4_01_REG_LIST_RI(id)
+
+static struct dcn10_stream_enc_registers stream_enc_regs[4];
+
+static const struct dcn10_stream_encoder_shift se_shift = {
+ SE_COMMON_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct dcn10_stream_encoder_mask se_mask = {
+ SE_COMMON_MASK_SH_LIST_DCN401(_MASK)
+};
+
+#define aux_regs_init(id)\
+ DCN2_AUX_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_aux_registers link_enc_aux_regs[5];
+
+#define hpd_regs_init(id)\
+ HPD_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[5];
+
+#define link_regs_init(id, phyid)\
+ LE_DCN401_REG_LIST_RI(id)
+
+static struct dcn10_link_enc_registers link_enc_regs[4];
+
+
+static const struct dcn10_link_enc_shift le_shift = {
+ LINK_ENCODER_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+
+static const struct dcn10_link_enc_mask le_mask = {
+ LINK_ENCODER_MASK_SH_LIST_DCN401(_MASK)
+};
+
+
+#define hpo_dp_stream_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_STREAM_ENC_REG_LIST_RI(id)
+
+static struct dcn31_hpo_dp_stream_encoder_registers hpo_dp_stream_enc_regs[4];
+
+static const struct dcn31_hpo_dp_stream_encoder_shift hpo_dp_se_shift = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_stream_encoder_mask hpo_dp_se_mask = {
+ DCN3_1_HPO_DP_STREAM_ENC_MASK_SH_LIST(_MASK)
+};
+
+
+#define hpo_dp_link_encoder_reg_init(id)\
+ DCN3_1_HPO_DP_LINK_ENC_REG_LIST_RI(id)
+ /*DCN3_1_RDPCSTX_REG_LIST(0),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(1),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(2),*/
+ /*DCN3_1_RDPCSTX_REG_LIST(3),*/
+
+static struct dcn31_hpo_dp_link_encoder_registers hpo_dp_link_enc_regs[4];
+
+static const struct dcn31_hpo_dp_link_encoder_shift hpo_dp_le_shift = {
+ DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn31_hpo_dp_link_encoder_mask hpo_dp_le_mask = {
+ DCN3_2_HPO_DP_LINK_ENC_MASK_SH_LIST(_MASK)
+};
+
+#define dpp_regs_init(id)\
+ DPP_REG_LIST_DCN401_COMMON_RI(id)
+
+static struct dcn401_dpp_registers dpp_regs[4];
+
+static const struct dcn401_dpp_shift tf_shift = {
+ DPP_REG_LIST_SH_MASK_DCN401_COMMON(__SHIFT)
+};
+
+static const struct dcn401_dpp_mask tf_mask = {
+ DPP_REG_LIST_SH_MASK_DCN401_COMMON(_MASK)
+};
+
+#define opp_regs_init(id)\
+ OPP_REG_LIST_DCN401_RI(id)
+
+static struct dcn20_opp_registers opp_regs[4];
+
+static const struct dcn20_opp_shift opp_shift = {
+ OPP_MASK_SH_LIST_DCN20(__SHIFT)
+};
+
+static const struct dcn20_opp_mask opp_mask = {
+ OPP_MASK_SH_LIST_DCN20(_MASK)
+};
+
+#define aux_engine_regs_init(id) \
+ AUX_COMMON_REG_LIST0_RI(id), SR_ARR_INIT(AUXN_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUXP_IMPCAL, id, 0), \
+ SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK), \
+ SR_ARR_INIT(AUX_RESET_MASK, id, DP_AUX0_AUX_CONTROL__AUX_RESET_MASK)
+
+static struct dce110_aux_registers aux_engine_regs[5];
+
+static const struct dce110_aux_registers_shift aux_shift = {
+ DCN_AUX_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce110_aux_registers_mask aux_mask = {
+ DCN_AUX_MASK_SH_LIST(_MASK)
+};
+
+#define dwbc_regs_dcn401_init(id)\
+ DWBC_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dcn30_dwbc_registers dwbc401_regs[1];
+
+static const struct dcn30_dwbc_shift dwbc401_shift = {
+ DWBC_COMMON_MASK_SH_LIST_DCN30(__SHIFT)
+};
+
+static const struct dcn30_dwbc_mask dwbc401_mask = {
+ DWBC_COMMON_MASK_SH_LIST_DCN30(_MASK)
+};
+
+
+#define mcif_wb_regs_dcn3_init(id)\
+ MCIF_WB_COMMON_REG_LIST_DCN32_RI(id)
+
+static struct dcn30_mmhubbub_registers mcif_wb30_regs[1];
+
+static const struct dcn30_mmhubbub_shift mcif_wb30_shift = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct dcn30_mmhubbub_mask mcif_wb30_mask = {
+ MCIF_WB_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define dsc_regs_init(id)\
+ DSC_REG_LIST_DCN401_RI(id)
+
+static struct dcn401_dsc_registers dsc_regs[4];
+
+static const struct dcn401_dsc_shift dsc_shift = {
+ DSC_REG_LIST_SH_MASK_DCN401(__SHIFT)
+};
+
+static const struct dcn401_dsc_mask dsc_mask = {
+ DSC_REG_LIST_SH_MASK_DCN401(_MASK)
+};
+
+static struct dcn401_mpc_registers mpc_regs;
+
+#define dcn_mpc_regs_init()\
+ MPC_REG_LIST_DCN4_01_RI(0),\
+ MPC_REG_LIST_DCN4_01_RI(1),\
+ MPC_REG_LIST_DCN4_01_RI(2),\
+ MPC_REG_LIST_DCN4_01_RI(3),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(0),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(1),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(2),\
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(3),\
+ MPC_DWB_MUX_REG_LIST_DCN3_0_RI(0)
+
+static const struct dcn401_mpc_shift mpc_shift = {
+ MPC_COMMON_MASK_SH_LIST_DCN4_01(__SHIFT)
+};
+
+static const struct dcn401_mpc_mask mpc_mask = {
+ MPC_COMMON_MASK_SH_LIST_DCN4_01(_MASK)
+};
+
+#define optc_regs_init(id)\
+ OPTC_COMMON_REG_LIST_DCN401_RI(id)
+
+static struct dcn_optc_registers optc_regs[4];
+
+static const struct dcn_optc_shift optc_shift = {
+ OPTC_COMMON_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct dcn_optc_mask optc_mask = {
+ OPTC_COMMON_MASK_SH_LIST_DCN401(_MASK)
+};
+
+#define hubp_regs_init(id)\
+ HUBP_REG_LIST_DCN401_RI(id)
+
+static struct dcn_hubp2_registers hubp_regs[4];
+
+static const struct dcn_hubp2_shift hubp_shift = {
+ HUBP_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct dcn_hubp2_mask hubp_mask = {
+ HUBP_MASK_SH_LIST_DCN401(_MASK)
+};
+
+static struct dcn_hubbub_registers hubbub_reg;
+#define hubbub_reg_init()\
+ HUBBUB_REG_LIST_DCN4_01_RI(0)
+
+static const struct dcn_hubbub_shift hubbub_shift = {
+ HUBBUB_MASK_SH_LIST_DCN4_01(__SHIFT)
+};
+
+static const struct dcn_hubbub_mask hubbub_mask = {
+ HUBBUB_MASK_SH_LIST_DCN4_01(_MASK)
+};
+
+static struct dccg_registers dccg_regs;
+
+#define dccg_regs_init()\
+ DCCG_REG_LIST_DCN401_RI()
+
+static const struct dccg_shift dccg_shift = {
+ DCCG_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct dccg_mask dccg_mask = {
+ DCCG_MASK_SH_LIST_DCN401(_MASK)
+};
+
+#define SRII2(reg_name_pre, reg_name_post, id)\
+ .reg_name_pre ## _ ## reg_name_post[id] = BASE(reg ## reg_name_pre \
+ ## id ## _ ## reg_name_post ## _BASE_IDX) + \
+ reg ## reg_name_pre ## id ## _ ## reg_name_post
+
+
+#define HWSEQ_DCN401_REG_LIST()\
+ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+ SR(DIO_MEM_PWR_CTRL), \
+ SR(ODM_MEM_PWR_CTRL3), \
+ SR(MMHUBBUB_MEM_PWR_CNTL), \
+ SR(DCCG_GATE_DISABLE_CNTL), \
+ SR(DCCG_GATE_DISABLE_CNTL2), \
+ SR(DCFCLK_CNTL),\
+ SR(DC_MEM_GLOBAL_PWR_REQ_CNTL), \
+ SRII(PIXEL_RATE_CNTL, OTG, 0), \
+ SRII(PIXEL_RATE_CNTL, OTG, 1),\
+ SRII(PIXEL_RATE_CNTL, OTG, 2),\
+ SRII(PIXEL_RATE_CNTL, OTG, 3),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 0),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 1),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 2),\
+ SRII(PHYPLL_PIXEL_RATE_CNTL, OTG, 3),\
+ SR(MICROSECOND_TIME_BASE_DIV), \
+ SR(MILLISECOND_TIME_BASE_DIV), \
+ SR(DISPCLK_FREQ_CHANGE_CNTL), \
+ SR(RBBMIF_TIMEOUT_DIS), \
+ SR(RBBMIF_TIMEOUT_DIS_2), \
+ SR(DCHUBBUB_CRC_CTRL), \
+ SR(DPP_TOP0_DPP_CRC_CTRL), \
+ SR(DPP_TOP0_DPP_CRC_VAL_B_A), \
+ SR(DPP_TOP0_DPP_CRC_VAL_R_G), \
+ SR(MPC_CRC_CTRL), \
+ SR(MPC_CRC_RESULT_GB), \
+ SR(MPC_CRC_RESULT_C), \
+ SR(MPC_CRC_RESULT_AR), \
+ SR(DOMAIN0_PG_CONFIG), \
+ SR(DOMAIN1_PG_CONFIG), \
+ SR(DOMAIN2_PG_CONFIG), \
+ SR(DOMAIN3_PG_CONFIG), \
+ SR(DOMAIN16_PG_CONFIG), \
+ SR(DOMAIN17_PG_CONFIG), \
+ SR(DOMAIN18_PG_CONFIG), \
+ SR(DOMAIN19_PG_CONFIG), \
+ SR(DOMAIN22_PG_CONFIG), \
+ SR(DOMAIN23_PG_CONFIG), \
+ SR(DOMAIN24_PG_CONFIG), \
+ SR(DOMAIN25_PG_CONFIG), \
+ SR(DOMAIN0_PG_STATUS), \
+ SR(DOMAIN1_PG_STATUS), \
+ SR(DOMAIN2_PG_STATUS), \
+ SR(DOMAIN3_PG_STATUS), \
+ SR(DOMAIN16_PG_STATUS), \
+ SR(DOMAIN17_PG_STATUS), \
+ SR(DOMAIN18_PG_STATUS), \
+ SR(DOMAIN19_PG_STATUS), \
+ SR(DOMAIN22_PG_STATUS), \
+ SR(DOMAIN23_PG_STATUS), \
+ SR(DOMAIN24_PG_STATUS), \
+ SR(DOMAIN25_PG_STATUS), \
+ SR(DC_IP_REQUEST_CNTL), \
+ SR(AZALIA_AUDIO_DTO), \
+ SR(HPO_TOP_HW_CONTROL),\
+ SR(AZALIA_CONTROLLER_CLOCK_GATING)
+
+static struct dce_hwseq_registers hwseq_reg;
+
+#define hwseq_reg_init()\
+ HWSEQ_DCN401_REG_LIST()
+
+#define HWSEQ_DCN401_MASK_SH_LIST(mask_sh)\
+ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
+ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
+ HWS_SF(, DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN22_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN23_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN24_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DOMAIN25_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, mask_sh), \
+ HWS_SF(, DC_IP_REQUEST_CNTL, IP_REQUEST_EN, mask_sh), \
+ HWS_SF(, AZALIA_AUDIO_DTO, AZALIA_AUDIO_DTO_MODULE, mask_sh), \
+ HWS_SF(, HPO_TOP_CLOCK_CONTROL, HPO_HDMISTREAMCLK_G_GATE_DIS, mask_sh), \
+ HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \
+ HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh)
+
+static const struct dce_hwseq_shift hwseq_shift = {
+ HWSEQ_DCN401_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dce_hwseq_mask hwseq_mask = {
+ HWSEQ_DCN401_MASK_SH_LIST(_MASK)
+};
+
+#define vmid_regs_init(id)\
+ DCN20_VMID_REG_LIST_RI(id)
+
+static struct dcn_vmid_registers vmid_regs[16];
+
+static const struct dcn20_vmid_shift vmid_shifts = {
+ DCN20_VMID_MASK_SH_LIST(__SHIFT)
+};
+
+static const struct dcn20_vmid_mask vmid_masks = {
+ DCN20_VMID_MASK_SH_LIST(_MASK)
+};
+
+static const struct resource_caps res_cap_dcn4_01 = {
+ .num_timing_generator = 4,
+ .num_opp = 4,
+ .num_video_plane = 4,
+ .num_audio = 4,
+ .num_stream_encoder = 4,
+ .num_hpo_dp_stream_encoder = 4,
+ .num_hpo_dp_link_encoder = 4,
+ .num_pll = 4,
+ .num_dwb = 1,
+ .num_ddc = 4,
+ .num_vmid = 16,
+ .num_mpc_3dlut = 4,
+ .num_dsc = 4,
+};
+
+static const struct dc_plane_cap plane_cap = {
+ .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
+ .per_pixel_alpha = true,
+
+ .pixel_format_support = {
+ .argb8888 = true,
+ .nv12 = true,
+ .fp16 = true,
+ .p010 = true,
+ .ayuv = false,
+ },
+
+ .max_upscale_factor = {
+ .argb8888 = 16000,
+ .nv12 = 16000,
+ .fp16 = 16000
+ },
+
+ // 6:1 downscaling ratio: 1000/6 = 166.666
+ .max_downscale_factor = {
+ .argb8888 = 167,
+ .nv12 = 167,
+ .fp16 = 167
+ },
+ 64,
+ 64
+};
+
+static const struct dc_debug_options debug_defaults_drv = {
+ .disable_dmcu = true,
+ .force_abm_enable = false,
+ .clock_trace = true,
+ .disable_pplib_clock_request = false,
+ .pipe_split_policy = MPC_SPLIT_AVOID,
+ .force_single_disp_pipe_split = false,
+ .disable_dcc = DCC_ENABLE,
+ .vsr_support = true,
+ .performance_trace = false,
+ .max_downscale_src_width = 7680,/*upto 8K*/
+ .disable_pplib_wm_range = false,
+ .scl_reset_length10 = true,
+ .sanity_checks = false,
+ .underflow_assert_delay_us = 0xFFFFFFFF,
+ .dwb_fi_phase = -1, // -1 = disable,
+ .dmub_command_table = true,
+ .enable_mem_low_power = {
+ .bits = {
+ .vga = false,
+ .i2c = false,
+ .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled
+ .dscl = false,
+ .cm = false,
+ .mpc = false,
+ .optc = true,
+ }
+ },
+ .use_max_lb = true,
+ .force_disable_subvp = false,
+ .disable_force_pstate_allow_on_hw_release = false,
+ .exit_idle_opt_for_cursor_updates = true,
+ .using_dml2 = true,
+ .using_dml21 = true,
+ .enable_single_display_2to1_odm_policy = true,
+
+ //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions
+ .enable_double_buffered_dsc_pg_support = true,
+ .enable_dp_dig_pixel_rate_div_policy = 1,
+ .allow_sw_cursor_fallback = false,
+ .alloc_extra_way_for_cursor = true,
+ .min_prefetch_in_strobe_ns = 60000, // 60us
+ .disable_unbounded_requesting = false,
+ .enable_legacy_fast_update = false,
+ .dcc_meta_propagation_delay_us = 10,
+ .fams_version = {
+ .minor = 1,
+ .major = 2,
+ }, //v2.1
+ .fams2_config = {
+ .bits = {
+ .enable = true,
+ .enable_offload_flip = true,
+ .enable_stall_recovery = true,
+ }
+ },
+ .force_cositing = CHROMA_COSITING_NONE + 1,
+};
+
+static struct dce_aux *dcn401_aux_engine_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct aux_engine_dce110 *aux_engine =
+ kzalloc(sizeof(struct aux_engine_dce110), GFP_KERNEL);
+
+ if (!aux_engine)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT aux_engine_regs
+ aux_engine_regs_init(0),
+ aux_engine_regs_init(1),
+ aux_engine_regs_init(2),
+ aux_engine_regs_init(3);
+
+ dce110_aux_engine_construct(aux_engine, ctx, inst,
+ SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD,
+ &aux_engine_regs[inst],
+ &aux_mask,
+ &aux_shift,
+ ctx->dc->caps.extended_aux_timeout_support);
+
+ return &aux_engine->base;
+}
+#define i2c_inst_regs_init(id)\
+ I2C_HW_ENGINE_COMMON_REG_LIST_DCN30_RI(id)
+
+static struct dce_i2c_registers i2c_hw_regs[5];
+
+static const struct dce_i2c_shift i2c_shifts = {
+ I2C_COMMON_MASK_SH_LIST_DCN401(__SHIFT)
+};
+
+static const struct dce_i2c_mask i2c_masks = {
+ I2C_COMMON_MASK_SH_LIST_DCN401(_MASK)
+};
+
+static struct dce_i2c_hw *dcn401_i2c_hw_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dce_i2c_hw *dce_i2c_hw =
+ kzalloc(sizeof(struct dce_i2c_hw), GFP_KERNEL);
+
+ if (!dce_i2c_hw)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT i2c_hw_regs
+ i2c_inst_regs_init(1),
+ i2c_inst_regs_init(2),
+ i2c_inst_regs_init(3),
+ i2c_inst_regs_init(4);
+
+ dcn2_i2c_hw_construct(dce_i2c_hw, ctx, inst,
+ &i2c_hw_regs[inst], &i2c_shifts, &i2c_masks);
+
+ return dce_i2c_hw;
+}
+
+static struct clock_source *dcn401_clock_source_create(
+ struct dc_context *ctx,
+ struct dc_bios *bios,
+ enum clock_source_id id,
+ const struct dce110_clk_src_regs *regs,
+ bool dp_clk_src)
+{
+ struct dce110_clk_src *clk_src =
+ kzalloc(sizeof(struct dce110_clk_src), GFP_KERNEL);
+
+ if (!clk_src)
+ return NULL;
+
+ if (dcn401_clk_src_construct(clk_src, ctx, bios, id,
+ regs, &cs_shift, &cs_mask)) {
+ clk_src->base.dp_clk_src = dp_clk_src;
+ return &clk_src->base;
+ }
+
+ kfree(clk_src);
+ BREAK_TO_DEBUGGER();
+ return NULL;
+}
+
+static struct hubbub *dcn401_hubbub_create(struct dc_context *ctx)
+{
+ int i;
+
+ struct dcn20_hubbub *hubbub2 = kzalloc(sizeof(struct dcn20_hubbub),
+ GFP_KERNEL);
+
+ if (!hubbub2)
+ return NULL;
+
+
+#undef REG_STRUCT
+#define REG_STRUCT hubbub_reg
+ hubbub_reg_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT vmid_regs
+ vmid_regs_init(0),
+ vmid_regs_init(1),
+ vmid_regs_init(2),
+ vmid_regs_init(3),
+ vmid_regs_init(4),
+ vmid_regs_init(5),
+ vmid_regs_init(6),
+ vmid_regs_init(7),
+ vmid_regs_init(8),
+ vmid_regs_init(9),
+ vmid_regs_init(10),
+ vmid_regs_init(11),
+ vmid_regs_init(12),
+ vmid_regs_init(13),
+ vmid_regs_init(14),
+ vmid_regs_init(15);
+
+ hubbub401_construct(hubbub2, ctx,
+ &hubbub_reg,
+ &hubbub_shift,
+ &hubbub_mask,
+ DCN4_01_DEFAULT_DET_SIZE, //nominal (default) detile buffer size in kbytes,
+ 8, //dml2 ip_params_st.pixel_chunk_size_kbytes
+ DCN4_01_CRB_SIZE_KB); //dml2 ip_params_st.config_return_buffer_size_in_kbytes
+
+ for (i = 0; i < res_cap_dcn4_01.num_vmid; i++) {
+ struct dcn20_vmid *vmid = &hubbub2->vmid[i];
+
+ vmid->ctx = ctx;
+
+ vmid->regs = &vmid_regs[i];
+ vmid->shifts = &vmid_shifts;
+ vmid->masks = &vmid_masks;
+ }
+
+ return &hubbub2->base;
+}
+
+static struct hubp *dcn401_hubp_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn20_hubp *hubp2 =
+ kzalloc(sizeof(struct dcn20_hubp), GFP_KERNEL);
+
+ if (!hubp2)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT hubp_regs
+ hubp_regs_init(0),
+ hubp_regs_init(1),
+ hubp_regs_init(2),
+ hubp_regs_init(3);
+
+ if (hubp401_construct(hubp2, ctx, inst,
+ &hubp_regs[inst], &hubp_shift, &hubp_mask))
+ return &hubp2->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(hubp2);
+ return NULL;
+}
+
+static void dcn401_dpp_destroy(struct dpp **dpp)
+{
+ kfree(TO_DCN401_DPP(*dpp));
+ *dpp = NULL;
+}
+
+static struct dpp *dcn401_dpp_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn401_dpp *dpp401 =
+ kzalloc(sizeof(struct dcn401_dpp), GFP_KERNEL);
+
+ if (!dpp401)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT dpp_regs
+ dpp_regs_init(0),
+ dpp_regs_init(1),
+ dpp_regs_init(2),
+ dpp_regs_init(3);
+
+ if (dpp401_construct(dpp401, ctx, inst,
+ &dpp_regs[inst], &tf_shift, &tf_mask))
+ return &dpp401->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(dpp401);
+ return NULL;
+}
+
+static struct mpc *dcn401_mpc_create(
+ struct dc_context *ctx,
+ int num_mpcc,
+ int num_rmu)
+{
+ struct dcn401_mpc *mpc401 = kzalloc(sizeof(struct dcn401_mpc),
+ GFP_KERNEL);
+
+ if (!mpc401)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT mpc_regs
+ dcn_mpc_regs_init();
+
+ dcn401_mpc_construct(mpc401, ctx,
+ &mpc_regs,
+ &mpc_shift,
+ &mpc_mask,
+ num_mpcc,
+ num_rmu);
+
+ return &mpc401->base;
+}
+
+static struct output_pixel_processor *dcn401_opp_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn20_opp *opp4 =
+ kzalloc(sizeof(struct dcn20_opp), GFP_KERNEL);
+
+ if (!opp4) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT opp_regs
+ opp_regs_init(0),
+ opp_regs_init(1),
+ opp_regs_init(2),
+ opp_regs_init(3);
+
+ dcn20_opp_construct(opp4, ctx, inst,
+ &opp_regs[inst], &opp_shift, &opp_mask);
+ return &opp4->base;
+}
+
+
+static struct timing_generator *dcn401_timing_generator_create(
+ struct dc_context *ctx,
+ uint32_t instance)
+{
+ struct optc *tgn10 =
+ kzalloc(sizeof(struct optc), GFP_KERNEL);
+
+ if (!tgn10)
+ return NULL;
+#undef REG_STRUCT
+#define REG_STRUCT optc_regs
+ optc_regs_init(0),
+ optc_regs_init(1),
+ optc_regs_init(2),
+ optc_regs_init(3);
+
+ tgn10->base.inst = instance;
+ tgn10->base.ctx = ctx;
+
+ tgn10->tg_regs = &optc_regs[instance];
+ tgn10->tg_shift = &optc_shift;
+ tgn10->tg_mask = &optc_mask;
+
+ dcn401_timing_generator_init(tgn10);
+
+ return &tgn10->base;
+}
+
+static const struct encoder_feature_support link_enc_feature = {
+ .max_hdmi_deep_color = COLOR_DEPTH_121212,
+ .max_hdmi_pixel_clock = 600000,
+ .hdmi_ycbcr420_supported = true,
+ .dp_ycbcr420_supported = true,
+ .fec_supported = true,
+ .flags.bits.IS_HBR2_CAPABLE = true,
+ .flags.bits.IS_HBR3_CAPABLE = true,
+ .flags.bits.IS_TPS3_CAPABLE = true,
+ .flags.bits.IS_TPS4_CAPABLE = true
+};
+
+static struct link_encoder *dcn401_link_encoder_create(
+ struct dc_context *ctx,
+ const struct encoder_init_data *enc_init_data)
+{
+ struct dcn20_link_encoder *enc20 =
+ kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL);
+
+ if (!enc20 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs))
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_aux_regs
+ aux_regs_init(0),
+ aux_regs_init(1),
+ aux_regs_init(2),
+ aux_regs_init(3);
+
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_hpd_regs
+ hpd_regs_init(0),
+ hpd_regs_init(1),
+ hpd_regs_init(2),
+ hpd_regs_init(3);
+#undef REG_STRUCT
+#define REG_STRUCT link_enc_regs
+ link_regs_init(0, A),
+ link_regs_init(1, B),
+ link_regs_init(2, C),
+ link_regs_init(3, D);
+
+ dcn401_link_encoder_construct(enc20,
+ enc_init_data,
+ &link_enc_feature,
+ &link_enc_regs[enc_init_data->transmitter],
+ &link_enc_aux_regs[enc_init_data->channel - 1],
+ &link_enc_hpd_regs[enc_init_data->hpd_source],
+ &le_shift,
+ &le_mask);
+ return &enc20->enc10.base;
+}
+
+static void read_dce_straps(
+ struct dc_context *ctx,
+ struct resource_straps *straps)
+{
+ generic_reg_get(ctx, ctx->dcn_reg_offsets[regDC_PINSTRAPS_BASE_IDX] + regDC_PINSTRAPS,
+ FN(DC_PINSTRAPS, DC_PINSTRAPS_AUDIO), &straps->dc_pinstraps_audio);
+
+}
+
+static struct audio *dcn401_create_audio(
+ struct dc_context *ctx, unsigned int inst)
+{
+
+#undef REG_STRUCT
+#define REG_STRUCT audio_regs
+ audio_regs_init(0),
+ audio_regs_init(1),
+ audio_regs_init(2),
+ audio_regs_init(3),
+ audio_regs_init(4);
+
+ return dce_audio_create(ctx, inst,
+ &audio_regs[inst], &audio_shift, &audio_mask);
+}
+
+static struct vpg *dcn401_vpg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_vpg *vpg4 = kzalloc(sizeof(struct dcn31_vpg), GFP_KERNEL);
+
+ if (!vpg4)
+ return NULL;
+
+
+#undef REG_STRUCT
+#define REG_STRUCT vpg_regs
+ vpg_regs_init(0),
+ vpg_regs_init(1),
+ vpg_regs_init(2),
+ vpg_regs_init(3),
+ vpg_regs_init(4),
+ vpg_regs_init(5),
+ vpg_regs_init(6),
+ vpg_regs_init(7),
+ vpg_regs_init(8);
+
+ vpg31_construct(vpg4, ctx, inst,
+ &vpg_regs[inst],
+ &vpg_shift,
+ &vpg_mask);
+
+ return &vpg4->base;
+}
+
+static struct afmt *dcn401_afmt_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn30_afmt *afmt401 = kzalloc(sizeof(struct dcn30_afmt), GFP_KERNEL);
+
+ if (!afmt401)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT afmt_regs
+ afmt_regs_init(0),
+ afmt_regs_init(1),
+ afmt_regs_init(2),
+ afmt_regs_init(3),
+ afmt_regs_init(4);
+
+ afmt3_construct(afmt401, ctx, inst,
+ &afmt_regs[inst],
+ &afmt_shift,
+ &afmt_mask);
+
+ return &afmt401->base;
+}
+
+static struct apg *dcn401_apg_create(
+ struct dc_context *ctx,
+ uint32_t inst)
+{
+ struct dcn31_apg *apg31 = kzalloc(sizeof(struct dcn31_apg), GFP_KERNEL);
+
+ if (!apg31)
+ return NULL;
+
+#undef REG_STRUCT
+#define REG_STRUCT apg_regs
+ apg_regs_init(0),
+ apg_regs_init(1),
+ apg_regs_init(2),
+ apg_regs_init(3);
+
+ apg31_construct(apg31, ctx, inst,
+ &apg_regs[inst],
+ &apg_shift,
+ &apg_mask);
+
+ return &apg31->base;
+}
+
+static struct stream_encoder *dcn401_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn10_stream_encoder *enc1;
+ struct vpg *vpg;
+ struct afmt *afmt;
+ int vpg_inst;
+ int afmt_inst;
+
+ /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
+ if (eng_id <= ENGINE_ID_DIGF) {
+ vpg_inst = eng_id;
+ afmt_inst = eng_id;
+ } else
+ return NULL;
+
+ enc1 = kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
+ vpg = dcn401_vpg_create(ctx, vpg_inst);
+ afmt = dcn401_afmt_create(ctx, afmt_inst);
+
+ if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) {
+ kfree(enc1);
+ kfree(vpg);
+ kfree(afmt);
+ return NULL;
+ }
+#undef REG_STRUCT
+#define REG_STRUCT stream_enc_regs
+ stream_enc_regs_init(0),
+ stream_enc_regs_init(1),
+ stream_enc_regs_init(2),
+ stream_enc_regs_init(3);
+ //stream_enc_regs_init(4);
+
+ dcn401_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios,
+ eng_id, vpg, afmt,
+ &stream_enc_regs[eng_id],
+ &se_shift, &se_mask);
+ return &enc1->base;
+}
+
+static struct hpo_dp_stream_encoder *dcn401_hpo_dp_stream_encoder_create(
+ enum engine_id eng_id,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_stream_encoder *hpo_dp_enc31;
+ struct vpg *vpg;
+ struct apg *apg;
+ uint32_t hpo_dp_inst;
+ uint32_t vpg_inst;
+ uint32_t apg_inst;
+
+ ASSERT((eng_id >= ENGINE_ID_HPO_DP_0) && (eng_id <= ENGINE_ID_HPO_DP_3));
+ hpo_dp_inst = eng_id - ENGINE_ID_HPO_DP_0;
+
+ /* Mapping of VPG register blocks to HPO DP block instance:
+ * VPG[6] -> HPO_DP[0]
+ * VPG[7] -> HPO_DP[1]
+ * VPG[8] -> HPO_DP[2]
+ * VPG[9] -> HPO_DP[3]
+ */
+ vpg_inst = hpo_dp_inst + 5;
+
+ /* Mapping of APG register blocks to HPO DP block instance:
+ * APG[0] -> HPO_DP[0]
+ * APG[1] -> HPO_DP[1]
+ * APG[2] -> HPO_DP[2]
+ * APG[3] -> HPO_DP[3]
+ */
+ apg_inst = hpo_dp_inst;
+
+ /* allocate HPO stream encoder and create VPG sub-block */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_stream_encoder), GFP_KERNEL);
+ vpg = dcn401_vpg_create(ctx, vpg_inst);
+ apg = dcn401_apg_create(ctx, apg_inst);
+
+ if (!hpo_dp_enc31 || !vpg || !apg) {
+ kfree(hpo_dp_enc31);
+ kfree(vpg);
+ kfree(apg);
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_stream_enc_regs
+ hpo_dp_stream_encoder_reg_init(0),
+ hpo_dp_stream_encoder_reg_init(1),
+ hpo_dp_stream_encoder_reg_init(2),
+ hpo_dp_stream_encoder_reg_init(3);
+
+ dcn31_hpo_dp_stream_encoder_construct(hpo_dp_enc31, ctx, ctx->dc_bios,
+ hpo_dp_inst, eng_id, vpg, apg,
+ &hpo_dp_stream_enc_regs[hpo_dp_inst],
+ &hpo_dp_se_shift, &hpo_dp_se_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static struct hpo_dp_link_encoder *dcn401_hpo_dp_link_encoder_create(
+ uint8_t inst,
+ struct dc_context *ctx)
+{
+ struct dcn31_hpo_dp_link_encoder *hpo_dp_enc31;
+
+ /* allocate HPO link encoder */
+ hpo_dp_enc31 = kzalloc(sizeof(struct dcn31_hpo_dp_link_encoder), GFP_KERNEL);
+ if (!hpo_dp_enc31)
+ return NULL; /* out of memory */
+
+#undef REG_STRUCT
+#define REG_STRUCT hpo_dp_link_enc_regs
+ hpo_dp_link_encoder_reg_init(0),
+ hpo_dp_link_encoder_reg_init(1),
+ hpo_dp_link_encoder_reg_init(2),
+ hpo_dp_link_encoder_reg_init(3);
+
+ hpo_dp_link_encoder32_construct(hpo_dp_enc31, ctx, inst,
+ &hpo_dp_link_enc_regs[inst],
+ &hpo_dp_le_shift, &hpo_dp_le_mask);
+
+ return &hpo_dp_enc31->base;
+}
+
+static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans)
+{
+ unsigned int num_available_chans = 1;
+
+ /* channels for MALL must be a power of 2 */
+ while (num_chans > 1) {
+ num_available_chans = (num_available_chans << 1);
+ num_chans = (num_chans >> 1);
+ }
+
+ /* cannot be odd */
+ num_available_chans &= ~1;
+
+ /* clamp to max available channels for MALL per ASIC */
+ if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) {
+ num_available_chans = num_available_chans > 16 ? 16 : num_available_chans;
+ } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) {
+ num_available_chans = num_available_chans > 8 ? 8 : num_available_chans;
+ }
+
+ return num_available_chans;
+}
+
+static struct dce_hwseq *dcn401_hwseq_create(
+ struct dc_context *ctx)
+{
+ struct dce_hwseq *hws = kzalloc(sizeof(struct dce_hwseq), GFP_KERNEL);
+
+#undef REG_STRUCT
+#define REG_STRUCT hwseq_reg
+ hwseq_reg_init();
+
+ if (hws) {
+ hws->ctx = ctx;
+ hws->regs = &hwseq_reg;
+ hws->shifts = &hwseq_shift;
+ hws->masks = &hwseq_mask;
+ }
+
+ return hws;
+}
+static const struct resource_create_funcs res_create_funcs = {
+ .read_dce_straps = read_dce_straps,
+ .create_audio = dcn401_create_audio,
+ .create_stream_encoder = dcn401_stream_encoder_create,
+ .create_hpo_dp_stream_encoder = dcn401_hpo_dp_stream_encoder_create,
+ .create_hpo_dp_link_encoder = dcn401_hpo_dp_link_encoder_create,
+ .create_hwseq = dcn401_hwseq_create,
+};
+
+static void dcn401_dsc_destroy(struct display_stream_compressor **dsc)
+{
+ kfree(container_of(*dsc, struct dcn401_dsc, base));
+ *dsc = NULL;
+}
+
+static void dcn401_resource_destruct(struct dcn401_resource_pool *pool)
+{
+ unsigned int i;
+
+ for (i = 0; i < pool->base.stream_enc_count; i++) {
+ if (pool->base.stream_enc[i] != NULL) {
+ if (pool->base.stream_enc[i]->vpg != NULL) {
+ kfree(DCN31_VPG_FROM_VPG(pool->base.stream_enc[i]->vpg));
+ pool->base.stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.stream_enc[i]->afmt != NULL) {
+ kfree(DCN30_AFMT_FROM_AFMT(pool->base.stream_enc[i]->afmt));
+ pool->base.stream_enc[i]->afmt = NULL;
+ }
+ kfree(DCN10STRENC_FROM_STRENC(pool->base.stream_enc[i]));
+ pool->base.stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_stream_enc_count; i++) {
+ if (pool->base.hpo_dp_stream_enc[i] != NULL) {
+ if (pool->base.hpo_dp_stream_enc[i]->vpg != NULL) {
+ kfree(DCN31_VPG_FROM_VPG(pool->base.hpo_dp_stream_enc[i]->vpg));
+ pool->base.hpo_dp_stream_enc[i]->vpg = NULL;
+ }
+ if (pool->base.hpo_dp_stream_enc[i]->apg != NULL) {
+ kfree(DCN31_APG_FROM_APG(pool->base.hpo_dp_stream_enc[i]->apg));
+ pool->base.hpo_dp_stream_enc[i]->apg = NULL;
+ }
+ kfree(DCN3_1_HPO_DP_STREAM_ENC_FROM_HPO_STREAM_ENC(pool->base.hpo_dp_stream_enc[i]));
+ pool->base.hpo_dp_stream_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.hpo_dp_link_enc_count; i++) {
+ if (pool->base.hpo_dp_link_enc[i] != NULL) {
+ kfree(DCN3_1_HPO_DP_LINK_ENC_FROM_HPO_LINK_ENC(pool->base.hpo_dp_link_enc[i]));
+ pool->base.hpo_dp_link_enc[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ if (pool->base.dscs[i] != NULL)
+ dcn401_dsc_destroy(&pool->base.dscs[i]);
+ }
+
+ if (pool->base.mpc != NULL) {
+ kfree(TO_DCN20_MPC(pool->base.mpc));
+ pool->base.mpc = NULL;
+ }
+ if (pool->base.hubbub != NULL) {
+ kfree(TO_DCN20_HUBBUB(pool->base.hubbub));
+ pool->base.hubbub = NULL;
+ }
+ for (i = 0; i < pool->base.pipe_count; i++) {
+ if (pool->base.dpps[i] != NULL)
+ dcn401_dpp_destroy(&pool->base.dpps[i]);
+
+ if (pool->base.ipps[i] != NULL)
+ pool->base.ipps[i]->funcs->ipp_destroy(&pool->base.ipps[i]);
+
+ if (pool->base.hubps[i] != NULL) {
+ kfree(TO_DCN20_HUBP(pool->base.hubps[i]));
+ pool->base.hubps[i] = NULL;
+ }
+
+ if (pool->base.irqs != NULL) {
+ dal_irq_service_destroy(&pool->base.irqs);
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ if (pool->base.engines[i] != NULL)
+ dce110_engine_destroy(&pool->base.engines[i]);
+ if (pool->base.hw_i2cs[i] != NULL) {
+ kfree(pool->base.hw_i2cs[i]);
+ pool->base.hw_i2cs[i] = NULL;
+ }
+ if (pool->base.sw_i2cs[i] != NULL) {
+ kfree(pool->base.sw_i2cs[i]);
+ pool->base.sw_i2cs[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_opp; i++) {
+ if (pool->base.opps[i] != NULL)
+ pool->base.opps[i]->funcs->opp_destroy(&pool->base.opps[i]);
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.timing_generators[i] != NULL) {
+ kfree(DCN10TG_FROM_TG(pool->base.timing_generators[i]));
+ pool->base.timing_generators[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_dwb; i++) {
+ if (pool->base.dwbc[i] != NULL) {
+ kfree(TO_DCN30_DWBC(pool->base.dwbc[i]));
+ pool->base.dwbc[i] = NULL;
+ }
+ if (pool->base.mcif_wb[i] != NULL) {
+ kfree(TO_DCN30_MMHUBBUB(pool->base.mcif_wb[i]));
+ pool->base.mcif_wb[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.audio_count; i++) {
+ if (pool->base.audios[i])
+ dce_aud_destroy(&pool->base.audios[i]);
+ }
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] != NULL) {
+ dcn20_clock_source_destroy(&pool->base.clock_sources[i]);
+ pool->base.clock_sources[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_mpc_3dlut; i++) {
+ if (pool->base.mpc_lut[i] != NULL) {
+ dc_3dlut_func_release(pool->base.mpc_lut[i]);
+ pool->base.mpc_lut[i] = NULL;
+ }
+ if (pool->base.mpc_shaper[i] != NULL) {
+ dc_transfer_func_release(pool->base.mpc_shaper[i]);
+ pool->base.mpc_shaper[i] = NULL;
+ }
+ }
+
+ if (pool->base.dp_clock_source != NULL) {
+ dcn20_clock_source_destroy(&pool->base.dp_clock_source);
+ pool->base.dp_clock_source = NULL;
+ }
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+ if (pool->base.multiple_abms[i] != NULL)
+ dce_abm_destroy(&pool->base.multiple_abms[i]);
+ }
+
+ if (pool->base.psr != NULL)
+ dmub_psr_destroy(&pool->base.psr);
+
+ if (pool->base.dccg != NULL)
+ dcn_dccg_destroy(&pool->base.dccg);
+
+ if (pool->base.oem_device != NULL) {
+ struct dc *dc = pool->base.oem_device->ctx->dc;
+
+ dc->link_srv->destroy_ddc_service(&pool->base.oem_device);
+ }
+}
+
+
+static bool dcn401_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t dwb_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < dwb_count; i++) {
+ struct dcn30_dwbc *dwbc401 = kzalloc(sizeof(struct dcn30_dwbc),
+ GFP_KERNEL);
+
+ if (!dwbc401) {
+ dm_error("DC: failed to create dwbc401!\n");
+ return false;
+ }
+
+
+#undef REG_STRUCT
+#define REG_STRUCT dwbc401_regs
+ dwbc_regs_dcn401_init(0);
+
+ dcn30_dwbc_construct(dwbc401, ctx,
+ &dwbc401_regs[i],
+ &dwbc401_shift,
+ &dwbc401_mask,
+ i);
+
+ pool->dwbc[i] = &dwbc401->base;
+
+ }
+ return true;
+}
+
+static bool dcn401_mmhubbub_create(struct dc_context *ctx, struct resource_pool *pool)
+{
+ int i;
+ uint32_t dwb_count = pool->res_cap->num_dwb;
+
+ for (i = 0; i < dwb_count; i++) {
+ struct dcn30_mmhubbub *mcif_wb30 = kzalloc(sizeof(struct dcn30_mmhubbub),
+ GFP_KERNEL);
+
+ if (!mcif_wb30) {
+ dm_error("DC: failed to create mcif_wb30!\n");
+ return false;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT mcif_wb30_regs
+ mcif_wb_regs_dcn3_init(0);
+
+ dcn32_mmhubbub_construct(mcif_wb30, ctx,
+ &mcif_wb30_regs[i],
+ &mcif_wb30_shift,
+ &mcif_wb30_mask,
+ i);
+
+ pool->mcif_wb[i] = &mcif_wb30->base;
+ }
+ return true;
+}
+
+static struct display_stream_compressor *dcn401_dsc_create(
+ struct dc_context *ctx, uint32_t inst)
+{
+ struct dcn401_dsc *dsc =
+ kzalloc(sizeof(struct dcn401_dsc), GFP_KERNEL);
+
+ if (!dsc) {
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+
+#undef REG_STRUCT
+#define REG_STRUCT dsc_regs
+ dsc_regs_init(0),
+ dsc_regs_init(1),
+ dsc_regs_init(2),
+ dsc_regs_init(3);
+
+ dsc401_construct(dsc, ctx, inst, &dsc_regs[inst], &dsc_shift, &dsc_mask);
+ dsc401_set_fgcg(dsc,
+ ctx->dc->debug.enable_fine_grain_clock_gating.bits.dsc);
+
+ //dsc->max_image_width = 6016;
+ dsc->max_image_width = 5760;
+
+ return &dsc->base;
+}
+
+static void dcn401_destroy_resource_pool(struct resource_pool **pool)
+{
+ struct dcn401_resource_pool *dcn401_pool = TO_DCN401_RES_POOL(*pool);
+
+ dcn401_resource_destruct(dcn401_pool);
+ kfree(dcn401_pool);
+ *pool = NULL;
+}
+
+static struct dc_cap_funcs cap_funcs = {
+ .get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
+ .get_subvp_en = dcn32_subvp_in_use,
+};
+
+static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ /* re-calculate the available MALL size if required */
+ if (bw_params->num_channels > 0) {
+ dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall(
+ dc, bw_params->num_channels) *
+ dc->caps.mall_size_per_mem_channel * 1024 * 1024;
+ dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes;
+ }
+
+ DC_FP_START();
+
+ if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2)
+ dml2_reinit(dc, &dc->dml2_options, &dc->current_state->bw_ctx.dml2);
+
+ if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2_dc_power_source)
+ dml2_reinit(dc, &dc->dml2_dc_power_options, &dc->current_state->bw_ctx.dml2_dc_power_source);
+
+ DC_FP_END();
+}
+
+enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state)
+{
+ plane_state->tiling_info.gfxversion = DcGfxAddr3;
+ plane_state->tiling_info.gfx_addr3.swizzle = DC_ADDR3_SW_64KB_2D;
+ return DC_OK;
+}
+
+enum dc_status dcn401_validate_bandwidth(struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode)
+{
+ unsigned int i;
+ enum dc_status status = DC_OK;
+ const struct dc_stream_state *stream;
+
+ /* reset cursor limitations on subvp */
+ for (i = 0; i < context->stream_count; i++) {
+ stream = context->streams[i];
+
+ if (dc_state_can_clear_stream_cursor_subvp_limit(stream, context)) {
+ dc_state_set_stream_cursor_subvp_limit(stream, context, false);
+ }
+ }
+
+ if (dc->debug.using_dml2)
+ status = dml2_validate(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+ validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_OK && dc_state_is_subvp_in_use(context)) {
+ /* check new stream configuration still supports cursor if subvp used */
+ for (i = 0; i < context->stream_count; i++) {
+ stream = context->streams[i];
+
+ if (dc_state_get_stream_subvp_type(context, stream) != SUBVP_PHANTOM &&
+ stream->cursor_position.enable &&
+ !dc_stream_check_cursor_attributes(stream, context, &stream->cursor_attributes)) {
+ /* hw cursor cannot be supported with subvp active, so disable subvp for now */
+ dc_state_set_stream_cursor_subvp_limit(stream, context, true);
+ status = DC_FAIL_HW_CURSOR_SUPPORT;
+ }
+ };
+ }
+
+ if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_FAIL_HW_CURSOR_SUPPORT) {
+ /* attempt to validate again with subvp disabled due to cursor */
+ if (dc->debug.using_dml2)
+ status = dml2_validate(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2,
+ validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE;
+ }
+
+ return status;
+}
+
+void dcn401_prepare_mcache_programming(struct dc *dc,
+ struct dc_state *context)
+{
+ if (dc->debug.using_dml21)
+ dml2_prepare_mcache_programming(dc, context,
+ context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2);
+}
+
+static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx)
+{
+ const struct dc_stream_state *stream = pipe_ctx->stream;
+ struct dc_link *link = stream->link;
+ struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc;
+ struct pixel_clk_params *pixel_clk_params = &pipe_ctx->stream_res.pix_clk_params;
+
+ pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz;
+
+ if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0)
+ pixel_clk_params->requested_pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz;
+
+ if (!pipe_ctx->stream->ctx->dc->config.unify_link_enc_assignment)
+ link_enc = link_enc_cfg_get_link_enc(link);
+ if (link_enc)
+ pixel_clk_params->encoder_object_id = link_enc->id;
+
+ pixel_clk_params->signal_type = pipe_ctx->stream->signal;
+ pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1;
+ /* TODO: un-hardcode*/
+
+ /* TODO - DP2.0 HW: calculate requested_sym_clk for UHBR rates */
+
+ pixel_clk_params->requested_sym_clk = LINK_RATE_LOW *
+ LINK_RATE_REF_FREQ_IN_KHZ;
+ pixel_clk_params->flags.ENABLE_SS = 0;
+ pixel_clk_params->color_depth =
+ stream->timing.display_color_depth;
+ pixel_clk_params->flags.DISPLAY_BLANKED = 1;
+ pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding;
+
+ if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422)
+ pixel_clk_params->color_depth = COLOR_DEPTH_888;
+
+ if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
+ pixel_clk_params->requested_pix_clk_100hz *= 2;
+ if (dc_is_tmds_signal(stream->signal) &&
+ stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+ pixel_clk_params->requested_pix_clk_100hz /= 2;
+
+ pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
+ pipe_ctx->clock_source,
+ &pipe_ctx->stream_res.pix_clk_params,
+ &pipe_ctx->pll_settings);
+
+ pixel_clk_params->dio_se_pix_per_cycle = 1;
+ if (dc_is_tmds_signal(stream->signal) &&
+ stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
+ pixel_clk_params->dio_se_pix_per_cycle = 2;
+ } else if (dc_is_dp_signal(stream->signal)) {
+ /* round up to nearest power of 2, or max at 8 pixels per cycle */
+ if (pixel_clk_params->requested_pix_clk_100hz > 4 * stream->ctx->dc->clk_mgr->dprefclk_khz * 10) {
+ pixel_clk_params->dio_se_pix_per_cycle = 8;
+ } else if (pixel_clk_params->requested_pix_clk_100hz > 2 * stream->ctx->dc->clk_mgr->dprefclk_khz * 10) {
+ pixel_clk_params->dio_se_pix_per_cycle = 4;
+ } else if (pixel_clk_params->requested_pix_clk_100hz > stream->ctx->dc->clk_mgr->dprefclk_khz * 10) {
+ pixel_clk_params->dio_se_pix_per_cycle = 2;
+ } else {
+ pixel_clk_params->dio_se_pix_per_cycle = 1;
+ }
+ }
+}
+
+static int dcn401_get_power_profile(const struct dc_state *context)
+{
+ int uclk_mhz = context->bw_ctx.bw.dcn.clk.dramclk_khz / 1000;
+ int dpm_level = 0;
+
+ for (int i = 0; i < context->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) {
+ if (context->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz == 0 ||
+ uclk_mhz < context->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz)
+ break;
+ if (uclk_mhz > context->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz)
+ dpm_level++;
+ }
+
+ return dpm_level;
+}
+
+static unsigned int dcn401_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx)
+{
+ return pipe_ctx->global_sync.dcn4x.vstartup_lines;
+}
+
+static struct resource_funcs dcn401_res_pool_funcs = {
+ .destroy = dcn401_destroy_resource_pool,
+ .link_enc_create = dcn401_link_encoder_create,
+ .link_enc_create_minimal = NULL,
+ .panel_cntl_create = dcn32_panel_cntl_create,
+ .validate_bandwidth = dcn401_validate_bandwidth,
+ .calculate_wm_and_dlg = NULL,
+ .populate_dml_pipes = NULL,
+ .acquire_free_pipe_as_secondary_dpp_pipe = dcn32_acquire_free_pipe_as_secondary_dpp_pipe,
+ .acquire_free_pipe_as_secondary_opp_head = dcn32_acquire_free_pipe_as_secondary_opp_head,
+ .release_pipe = dcn20_release_pipe,
+ .add_stream_to_ctx = dcn30_add_stream_to_ctx,
+ .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource,
+ .remove_stream_from_ctx = dcn20_remove_stream_from_ctx,
+ .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context,
+ .set_mcif_arb_params = dcn30_set_mcif_arb_params,
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .acquire_post_bldn_3dlut = dcn32_acquire_post_bldn_3dlut,
+ .release_post_bldn_3dlut = dcn32_release_post_bldn_3dlut,
+ .update_bw_bounding_box = dcn401_update_bw_bounding_box,
+ .patch_unknown_plane_state = dcn401_patch_unknown_plane_state,
+ .update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
+ .add_phantom_pipes = dcn32_add_phantom_pipes,
+ .prepare_mcache_programming = dcn401_prepare_mcache_programming,
+ .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params,
+ .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
+ .get_power_profile = dcn401_get_power_profile,
+ .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe,
+ .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size
+};
+
+static uint32_t read_pipe_fuses(struct dc_context *ctx)
+{
+ uint32_t value = REG_READ(CC_DC_PIPE_DIS);
+ /* DCN401 support max 4 pipes */
+ value = value & 0xf;
+ return value;
+}
+
+
+static bool dcn401_resource_construct(
+ uint8_t num_virtual_links,
+ struct dc *dc,
+ struct dcn401_resource_pool *pool)
+{
+ int i, j;
+ struct dc_context *ctx = dc->ctx;
+ struct irq_service_init_data init_data;
+ struct ddc_service_init_data ddc_init_data = {0};
+ uint32_t pipe_fuses = 0;
+ uint32_t num_pipes = 4;
+
+#undef REG_STRUCT
+#define REG_STRUCT bios_regs
+ bios_regs_init();
+
+#undef REG_STRUCT
+#define REG_STRUCT clk_src_regs
+ clk_src_regs_init(0, A),
+ clk_src_regs_init(1, B),
+ clk_src_regs_init(2, C),
+ clk_src_regs_init(3, D);
+
+#undef REG_STRUCT
+#define REG_STRUCT abm_regs
+ abm_regs_init(0),
+ abm_regs_init(1),
+ abm_regs_init(2),
+ abm_regs_init(3);
+
+#undef REG_STRUCT
+#define REG_STRUCT dccg_regs
+ dccg_regs_init();
+
+ ctx->dc_bios->regs = &bios_regs;
+
+ pool->base.res_cap = &res_cap_dcn4_01;
+
+ /* max number of pipes for ASIC before checking for pipe fuses */
+ num_pipes = pool->base.res_cap->num_timing_generator;
+ pipe_fuses = read_pipe_fuses(ctx);
+
+ for (i = 0; i < pool->base.res_cap->num_timing_generator; i++)
+ if (pipe_fuses & 1 << i)
+ num_pipes--;
+
+ if (pipe_fuses & 1)
+ ASSERT(0); //Unexpected - Pipe 0 should always be fully functional!
+
+ if (pipe_fuses & CC_DC_PIPE_DIS__DC_FULL_DIS_MASK)
+ ASSERT(0); //Entire DCN is harvested!
+
+ pool->base.funcs = &dcn401_res_pool_funcs;
+
+ /*************************************************
+ * Resource + asic cap harcoding *
+ *************************************************/
+ pool->base.underlay_pipe_index = NO_UNDERLAY_PIPE;
+ pool->base.timing_generator_count = num_pipes;
+ pool->base.pipe_count = num_pipes;
+ pool->base.mpcc_count = num_pipes;
+ dc->caps.max_downscale_ratio = 600;
+ dc->caps.i2c_speed_in_khz = 95;
+ dc->caps.i2c_speed_in_khz_hdcp = 95; /*1.4 w/a applied by default*/
+ /* used to set cursor pitch, so must be aligned to power of 2 (HW actually supported 78x78) */
+ dc->caps.max_cursor_size = 64;
+ dc->caps.max_buffered_cursor_size = 64;
+ dc->caps.cursor_not_scaled = true;
+ dc->caps.min_horizontal_blanking_period = 80;
+ dc->caps.dmdata_alloc_size = 2048;
+ dc->caps.mall_size_per_mem_channel = 4;
+ dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8;
+ dc->caps.cache_line_size = 64;
+ dc->caps.cache_num_ways = 16;
+
+ /* Calculate the available MALL space */
+ dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall(
+ dc, dc->ctx->dc_bios->vram_info.num_chans) *
+ dc->caps.mall_size_per_mem_channel * 1024 * 1024;
+ dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes;
+
+ dc->caps.subvp_fw_processing_delay_us = 15;
+ dc->caps.subvp_drr_max_vblank_margin_us = 40;
+ dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
+ dc->caps.subvp_swath_height_margin_lines = 16;
+ dc->caps.subvp_pstate_allow_width_us = 20;
+ dc->caps.subvp_vertical_int_margin_us = 30;
+ dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin
+
+ dc->caps.max_slave_planes = 3;
+ dc->caps.max_slave_yuv_planes = 3;
+ dc->caps.max_slave_rgb_planes = 3;
+ dc->caps.post_blend_color_processing = true;
+ dc->caps.force_dp_tps4_for_cp2520 = true;
+ dc->caps.dp_hpo = true;
+ dc->caps.dp_hdmi21_pcon_support = true;
+ dc->caps.edp_dsc_support = true;
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
+
+ if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev))
+ dc->caps.dcc_plane_width_limit = 7680;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+ dc->caps.color.dpp.input_lut_shared = 0;
+ dc->caps.color.dpp.icsc = 1;
+ dc->caps.color.dpp.dgam_ram = 0; // must use gamma_corr
+ dc->caps.color.dpp.dgam_rom_caps.srgb = 1;
+ dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 1;
+ dc->caps.color.dpp.dgam_rom_caps.pq = 1;
+ dc->caps.color.dpp.dgam_rom_caps.hlg = 1;
+ dc->caps.color.dpp.post_csc = 1;
+ dc->caps.color.dpp.gamma_corr = 1;
+ dc->caps.color.dpp.dgam_rom_for_yuv = 0;
+
+ dc->caps.color.dpp.hw_3d_lut = 0;
+ dc->caps.color.dpp.ogam_ram = 0;
+ // no OGAM ROM on DCN2 and later ASICs
+ dc->caps.color.dpp.ogam_rom_caps.srgb = 0;
+ dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.dpp.ogam_rom_caps.pq = 0;
+ dc->caps.color.dpp.ogam_rom_caps.hlg = 0;
+ dc->caps.color.dpp.ocsc = 0;
+
+ dc->caps.color.mpc.gamut_remap = 1;
+ dc->caps.color.mpc.num_3dluts = pool->base.res_cap->num_mpc_3dlut; //4, configurable to be before or after BLND in MPCC
+ dc->caps.color.mpc.ogam_ram = 1;
+ dc->caps.color.mpc.ogam_rom_caps.srgb = 0;
+ dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0;
+ dc->caps.color.mpc.ogam_rom_caps.pq = 0;
+ dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
+ dc->caps.color.mpc.ocsc = 1;
+ dc->caps.color.mpc.preblend = true;
+ dc->config.use_spl = true;
+ dc->config.prefer_easf = true;
+
+ dc->config.dcn_sharpness_range.sdr_rgb_min = 0;
+ dc->config.dcn_sharpness_range.sdr_rgb_max = 1750;
+ dc->config.dcn_sharpness_range.sdr_rgb_mid = 750;
+ dc->config.dcn_sharpness_range.sdr_yuv_min = 0;
+ dc->config.dcn_sharpness_range.sdr_yuv_max = 3500;
+ dc->config.dcn_sharpness_range.sdr_yuv_mid = 1500;
+ dc->config.dcn_sharpness_range.hdr_rgb_min = 0;
+ dc->config.dcn_sharpness_range.hdr_rgb_max = 2750;
+ dc->config.dcn_sharpness_range.hdr_rgb_mid = 1500;
+
+ dc->config.dcn_override_sharpness_range.sdr_rgb_min = 0;
+ dc->config.dcn_override_sharpness_range.sdr_rgb_max = 3250;
+ dc->config.dcn_override_sharpness_range.sdr_rgb_mid = 1250;
+ dc->config.dcn_override_sharpness_range.sdr_yuv_min = 0;
+ dc->config.dcn_override_sharpness_range.sdr_yuv_max = 3500;
+ dc->config.dcn_override_sharpness_range.sdr_yuv_mid = 1500;
+ dc->config.dcn_override_sharpness_range.hdr_rgb_min = 0;
+ dc->config.dcn_override_sharpness_range.hdr_rgb_max = 2750;
+ dc->config.dcn_override_sharpness_range.hdr_rgb_mid = 1500;
+
+ dc->config.dc_mode_clk_limit_support = true;
+ dc->config.enable_windowed_mpo_odm = true;
+ dc->config.set_pipe_unlock_order = true; /* Need to ensure DET gets freed before allocating */
+
+ /* read VBIOS LTTPR caps */
+ {
+ if (ctx->dc_bios->funcs->get_lttpr_caps) {
+ enum bp_result bp_query_result;
+ uint8_t is_vbios_lttpr_enable = 0;
+
+ bp_query_result = ctx->dc_bios->funcs->get_lttpr_caps(ctx->dc_bios, &is_vbios_lttpr_enable);
+ dc->caps.vbios_lttpr_enable = (bp_query_result == BP_RESULT_OK) && !!is_vbios_lttpr_enable;
+ }
+
+ /* interop bit is implicit */
+ {
+ dc->caps.vbios_lttpr_aware = true;
+ }
+ }
+
+ if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
+ dc->debug = debug_defaults_drv;
+
+ // Init the vm_helper
+ if (dc->vm_helper)
+ vm_helper_init(dc->vm_helper, 16);
+
+ /*************************************************
+ * Create resources *
+ *************************************************/
+
+ /* Clock Sources for Pixel Clock*/
+ pool->base.clock_sources[DCN401_CLK_SRC_PLL0] =
+ dcn401_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL0,
+ &clk_src_regs[0], false);
+ pool->base.clock_sources[DCN401_CLK_SRC_PLL1] =
+ dcn401_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL1,
+ &clk_src_regs[1], false);
+ pool->base.clock_sources[DCN401_CLK_SRC_PLL2] =
+ dcn401_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL2,
+ &clk_src_regs[2], false);
+ pool->base.clock_sources[DCN401_CLK_SRC_PLL3] =
+ dcn401_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_COMBO_PHY_PLL3,
+ &clk_src_regs[3], false);
+ // pool->base.clock_sources[DCN401_CLK_SRC_PLL4] =
+ // dcn401_clock_source_create(ctx, ctx->dc_bios,
+ // CLOCK_SOURCE_COMBO_PHY_PLL4,
+ // &clk_src_regs[4], false);
+
+ pool->base.clk_src_count = DCN401_CLK_SRC_TOTAL;
+
+ /* todo: not reuse phy_pll registers */
+ pool->base.dp_clock_source =
+ dcn401_clock_source_create(ctx, ctx->dc_bios,
+ CLOCK_SOURCE_ID_DP_DTO,
+ &clk_src_regs[0], true);
+
+ for (i = 0; i < pool->base.clk_src_count; i++) {
+ if (pool->base.clock_sources[i] == NULL) {
+ dm_error("DC: failed to create clock sources!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+ }
+
+ /* DCCG */
+ pool->base.dccg = dccg401_create(ctx, &dccg_regs, &dccg_shift, &dccg_mask);
+ if (pool->base.dccg == NULL) {
+ dm_error("DC: failed to create dccg!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* IRQ Service */
+ init_data.ctx = dc->ctx;
+ pool->base.irqs = dal_irq_service_dcn401_create(&init_data);
+ if (!pool->base.irqs)
+ goto create_fail;
+
+ /* HUBBUB */
+ pool->base.hubbub = dcn401_hubbub_create(ctx);
+ if (pool->base.hubbub == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create hubbub!\n");
+ goto create_fail;
+ }
+
+ /* HUBPs, DPPs, OPPs, TGs, ABMs */
+ for (i = 0, j = 0; i < pool->base.res_cap->num_timing_generator; i++) {
+
+ /* if pipe is disabled, skip instance of HW pipe,
+ * i.e, skip ASIC register instance
+ */
+ if (pipe_fuses & 1 << i)
+ continue;
+
+ pool->base.hubps[j] = dcn401_hubp_create(ctx, i);
+ if (pool->base.hubps[j] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create hubps!\n");
+ goto create_fail;
+ }
+
+ pool->base.dpps[j] = dcn401_dpp_create(ctx, i);
+ if (pool->base.dpps[j] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create dpps!\n");
+ goto create_fail;
+ }
+
+ pool->base.opps[j] = dcn401_opp_create(ctx, i);
+ if (pool->base.opps[j] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC: failed to create output pixel processor!\n");
+ goto create_fail;
+ }
+
+ pool->base.timing_generators[j] = dcn401_timing_generator_create(
+ ctx, i);
+ if (pool->base.timing_generators[j] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create tg!\n");
+ goto create_fail;
+ }
+
+ pool->base.multiple_abms[j] = dmub_abm_create(ctx,
+ &abm_regs[i],
+ &abm_shift,
+ &abm_mask);
+ if (pool->base.multiple_abms[j] == NULL) {
+ dm_error("DC: failed to create abm for pipe %d!\n", i);
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* index for resource pool arrays for next valid pipe */
+ j++;
+ }
+
+ /* PSR */
+ pool->base.psr = dmub_psr_create(ctx);
+ if (pool->base.psr == NULL) {
+ dm_error("DC: failed to create psr obj!\n");
+ BREAK_TO_DEBUGGER();
+ goto create_fail;
+ }
+
+ /* MPCCs */
+ pool->base.mpc = dcn401_mpc_create(ctx, pool->base.res_cap->num_timing_generator, pool->base.res_cap->num_mpc_3dlut);
+ if (pool->base.mpc == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mpc!\n");
+ goto create_fail;
+ }
+
+ /* DSCs */
+ for (i = 0; i < pool->base.res_cap->num_dsc; i++) {
+ pool->base.dscs[i] = dcn401_dsc_create(ctx, i);
+ if (pool->base.dscs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create display stream compressor %d!\n", i);
+ goto create_fail;
+ }
+ }
+
+ /* DWB */
+ if (!dcn401_dwbc_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create dwbc!\n");
+ goto create_fail;
+ }
+
+ /* MMHUBBUB */
+ if (!dcn401_mmhubbub_create(ctx, &pool->base)) {
+ BREAK_TO_DEBUGGER();
+ dm_error("DC: failed to create mcif_wb!\n");
+ goto create_fail;
+ }
+
+ /* AUX and I2C */
+ for (i = 0; i < pool->base.res_cap->num_ddc; i++) {
+ pool->base.engines[i] = dcn401_aux_engine_create(ctx, i);
+ if (pool->base.engines[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create aux engine!!\n");
+ goto create_fail;
+ }
+ pool->base.hw_i2cs[i] = dcn401_i2c_hw_create(ctx, i);
+ if (pool->base.hw_i2cs[i] == NULL) {
+ BREAK_TO_DEBUGGER();
+ dm_error(
+ "DC:failed to create hw i2c!!\n");
+ goto create_fail;
+ }
+ pool->base.sw_i2cs[i] = NULL;
+ }
+
+ /* Audio, HWSeq, Stream Encoders including HPO and virtual, MPC 3D LUTs */
+ if (!resource_construct(num_virtual_links, dc, &pool->base,
+ &res_create_funcs))
+ goto create_fail;
+
+ /* HW Sequencer init functions and Plane caps */
+ dcn401_hw_sequencer_init_functions(dc);
+
+ dc->caps.max_planes = pool->base.pipe_count;
+
+ for (i = 0; i < dc->caps.max_planes; ++i)
+ dc->caps.planes[i] = plane_cap;
+
+ dc->caps.max_odm_combine_factor = 4;
+
+ dc->cap_funcs = cap_funcs;
+
+ if (dc->ctx->dc_bios->fw_info.oem_i2c_present) {
+ ddc_init_data.ctx = dc->ctx;
+ ddc_init_data.link = NULL;
+ ddc_init_data.id.id = dc->ctx->dc_bios->fw_info.oem_i2c_obj_id;
+ ddc_init_data.id.enum_id = 0;
+ ddc_init_data.id.type = OBJECT_TYPE_GENERIC;
+ pool->base.oem_device = dc->link_srv->create_ddc_service(&ddc_init_data);
+ } else {
+ pool->base.oem_device = NULL;
+ }
+
+ //For now enable SDPIF_REQUEST_RATE_LIMIT on DCN4_01 when vram_info.num_chans provided
+ if (dc->config.sdpif_request_limit_words_per_umc == 0)
+ dc->config.sdpif_request_limit_words_per_umc = 16;
+
+ dc->dml2_options.dcn_pipe_count = pool->base.pipe_count;
+ dc->dml2_options.use_native_soc_bb_construction = true;
+ dc->dml2_options.minimize_dispclk_using_odm = true;
+ dc->dml2_options.map_dc_pipes_with_callbacks = true;
+ dc->dml2_options.force_tdlut_enable = true;
+
+ resource_init_common_dml2_callbacks(dc, &dc->dml2_options);
+ dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch;
+ dc->dml2_options.svp_pstate.callbacks.release_dsc = &dcn20_release_dsc;
+ dc->dml2_options.svp_pstate.callbacks.calculate_mall_ways_from_bytes = pool->base.funcs->calculate_mall_ways_from_bytes;
+
+ dc->dml2_options.svp_pstate.subvp_fw_processing_delay_us = dc->caps.subvp_fw_processing_delay_us;
+ dc->dml2_options.svp_pstate.subvp_prefetch_end_to_mall_start_us = dc->caps.subvp_prefetch_end_to_mall_start_us;
+ dc->dml2_options.svp_pstate.subvp_pstate_allow_width_us = dc->caps.subvp_pstate_allow_width_us;
+ dc->dml2_options.svp_pstate.subvp_swath_height_margin_lines = dc->caps.subvp_swath_height_margin_lines;
+
+ dc->dml2_options.svp_pstate.force_disable_subvp = dc->debug.force_disable_subvp;
+ dc->dml2_options.svp_pstate.force_enable_subvp = dc->debug.force_subvp_mclk_switch;
+
+ dc->dml2_options.mall_cfg.cache_line_size_bytes = dc->caps.cache_line_size;
+ dc->dml2_options.mall_cfg.cache_num_ways = dc->caps.cache_num_ways;
+ dc->dml2_options.mall_cfg.max_cab_allocation_bytes = dc->caps.max_cab_allocation_bytes;
+ dc->dml2_options.mall_cfg.mblk_height_4bpe_pixels = DCN3_2_MBLK_HEIGHT_4BPE;
+ dc->dml2_options.mall_cfg.mblk_height_8bpe_pixels = DCN3_2_MBLK_HEIGHT_8BPE;
+ dc->dml2_options.mall_cfg.mblk_size_bytes = DCN3_2_MALL_MBLK_SIZE_BYTES;
+ dc->dml2_options.mall_cfg.mblk_width_pixels = DCN3_2_MBLK_WIDTH;
+
+ dc->dml2_options.max_segments_per_hubp = 20;
+ dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB;
+
+ /* SPL */
+ dc->caps.scl_caps.sharpener_support = true;
+
+ /* init DC limited DML2 options */
+ memcpy(&dc->dml2_dc_power_options, &dc->dml2_options, sizeof(struct dml2_configuration_options));
+ dc->dml2_dc_power_options.use_clock_dc_limits = true;
+
+ return true;
+
+create_fail:
+
+ dcn401_resource_destruct(pool);
+
+ return false;
+}
+
+struct resource_pool *dcn401_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc)
+{
+ struct dcn401_resource_pool *pool =
+ kzalloc(sizeof(struct dcn401_resource_pool), GFP_KERNEL);
+
+ if (!pool)
+ return NULL;
+
+ if (dcn401_resource_construct(init_data->num_virtual_links, dc, pool))
+ return &pool->base;
+
+ BREAK_TO_DEBUGGER();
+ kfree(pool);
+ return NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
new file mode 100644
index 000000000000..0fc66487d800
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef _DCN401_RESOURCE_H_
+#define _DCN401_RESOURCE_H_
+
+#include "core_types.h"
+#include "dcn32/dcn32_resource.h"
+#include "dcn401/dcn401_hubp.h"
+
+#define TO_DCN401_RES_POOL(pool)\
+ container_of(pool, struct dcn401_resource_pool, base)
+
+struct dcn401_resource_pool {
+ struct resource_pool base;
+};
+
+struct resource_pool *dcn401_create_resource_pool(
+ const struct dc_init_data *init_data,
+ struct dc *dc);
+
+enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state);
+
+enum dc_status dcn401_validate_bandwidth(struct dc *dc,
+ struct dc_state *context,
+ enum dc_validate_mode validate_mode);
+
+void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context);
+
+/* Following are definitions for run time init of reg offsets */
+
+/* HUBP */
+#define HUBP_REG_LIST_DCN401_RI(id) \
+ SRI_ARR(NOM_PARAMETERS_0, HUBPREQ, id), \
+ SRI_ARR(NOM_PARAMETERS_1, HUBPREQ, id), \
+ SRI_ARR(NOM_PARAMETERS_2, HUBPREQ, id), \
+ SRI_ARR(NOM_PARAMETERS_3, HUBPREQ, id), \
+ SRI_ARR(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id), \
+ SRI_ARR(DCHUBP_CNTL, HUBP, id), \
+ SRI_ARR(HUBPREQ_DEBUG_DB, HUBP, id), \
+ SRI_ARR(HUBPREQ_DEBUG, HUBP, id), \
+ SRI_ARR(DCSURF_ADDR_CONFIG, HUBP, id), \
+ SRI_ARR(DCSURF_TILING_CONFIG, HUBP, id), \
+ SRI_ARR(DCSURF_SURFACE_PITCH, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_PITCH_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_CONFIG, HUBP, id), \
+ SRI_ARR(DCSURF_FLIP_CONTROL, HUBPREQ, id), \
+ SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION, HUBP, id), \
+ SRI_ARR(DCSURF_PRI_VIEWPORT_START, HUBP, id), \
+ SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION, HUBP, id), \
+ SRI_ARR(DCSURF_SEC_VIEWPORT_START, HUBP, id), \
+ SRI_ARR(DCSURF_PRI_VIEWPORT_DIMENSION_C, HUBP, id), \
+ SRI_ARR(DCSURF_PRI_VIEWPORT_START_C, HUBP, id), \
+ SRI_ARR(DCSURF_SEC_VIEWPORT_DIMENSION_C, HUBP, id), \
+ SRI_ARR(DCSURF_SEC_VIEWPORT_START_C, HUBP, id), \
+ SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \
+ SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS, HUBPREQ, id), \
+ SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_PRIMARY_SURFACE_ADDRESS_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SECONDARY_SURFACE_ADDRESS_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_INUSE, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_INUSE_HIGH, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_INUSE_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_INUSE_HIGH_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_CONTROL, HUBPREQ, id), \
+ SRI_ARR(DCSURF_SURFACE_FLIP_INTERRUPT, HUBPREQ, id), \
+ SRI_ARR(HUBPRET_CONTROL, HUBPRET, id), \
+ SRI_ARR(HUBPRET_READ_LINE_STATUS, HUBPRET, id), \
+ SRI_ARR(DCN_EXPANSION_MODE, HUBPREQ, id), \
+ SRI_ARR(DCHUBP_REQ_SIZE_CONFIG, HUBP, id), \
+ SRI_ARR(DCHUBP_REQ_SIZE_CONFIG_C, HUBP, id), \
+ SRI_ARR(BLANK_OFFSET_0, HUBPREQ, id), \
+ SRI_ARR(BLANK_OFFSET_1, HUBPREQ, id), \
+ SRI_ARR(DST_DIMENSIONS, HUBPREQ, id), \
+ SRI_ARR(DST_AFTER_SCALER, HUBPREQ, id), \
+ SRI_ARR(VBLANK_PARAMETERS_0, HUBPREQ, id), \
+ SRI_ARR(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id), \
+ SRI_ARR(VBLANK_PARAMETERS_1, HUBPREQ, id), \
+ SRI_ARR(VBLANK_PARAMETERS_3, HUBPREQ, id), \
+ SRI_ARR(NOM_PARAMETERS_4, HUBPREQ, id), \
+ SRI_ARR(NOM_PARAMETERS_5, HUBPREQ, id), \
+ SRI_ARR(PER_LINE_DELIVERY_PRE, HUBPREQ, id), \
+ SRI_ARR(PER_LINE_DELIVERY, HUBPREQ, id), \
+ SRI_ARR(VBLANK_PARAMETERS_2, HUBPREQ, id), \
+ SRI_ARR(VBLANK_PARAMETERS_4, HUBPREQ, id), \
+ SRI_ARR(NOM_PARAMETERS_6, HUBPREQ, id), \
+ SRI_ARR(NOM_PARAMETERS_7, HUBPREQ, id), \
+ SRI_ARR(DCN_TTU_QOS_WM, HUBPREQ, id), \
+ SRI_ARR(DCN_GLOBAL_TTU_CNTL, HUBPREQ, id), \
+ SRI_ARR(DCN_SURF0_TTU_CNTL0, HUBPREQ, id), \
+ SRI_ARR(DCN_SURF0_TTU_CNTL1, HUBPREQ, id), \
+ SRI_ARR(DCN_SURF1_TTU_CNTL0, HUBPREQ, id), \
+ SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \
+ SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \
+ SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \
+ SRI_ARR(HUBP_CLK_CNTL, HUBP, id), \
+ SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \
+ SRI_ARR(PREFETCH_SETTINGS_C, HUBPREQ, id), \
+ SRI_ARR(DCN_VM_SYSTEM_APERTURE_LOW_ADDR, HUBPREQ, id), \
+ SRI_ARR(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, HUBPREQ, id), \
+ SRI_ARR(CURSOR_SETTINGS, HUBPREQ, id), \
+ SRI_ARR(CURSOR_SURFACE_ADDRESS_HIGH, CURSOR0_, id), \
+ SRI_ARR(CURSOR_SURFACE_ADDRESS, CURSOR0_, id), \
+ SRI_ARR(CURSOR_SIZE, CURSOR0_, id), \
+ SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \
+ SRI_ARR(CURSOR_POSITION, CURSOR0_, id), \
+ SRI_ARR(CURSOR_HOT_SPOT, CURSOR0_, id), \
+ SRI_ARR(CURSOR_DST_OFFSET, CURSOR0_, id), \
+ SRI_ARR(DMDATA_ADDRESS_HIGH, CURSOR0_, id), \
+ SRI_ARR(DMDATA_ADDRESS_LOW, CURSOR0_, id), \
+ SRI_ARR(DMDATA_CNTL, CURSOR0_, id), \
+ SRI_ARR(DMDATA_SW_CNTL, CURSOR0_, id), \
+ SRI_ARR(DMDATA_QOS_CNTL, CURSOR0_, id), \
+ SRI_ARR(DMDATA_SW_DATA, CURSOR0_, id), \
+ SRI_ARR(DMDATA_STATUS, CURSOR0_, id), \
+ SRI_ARR(FLIP_PARAMETERS_0, HUBPREQ, id), \
+ SRI_ARR(FLIP_PARAMETERS_1, HUBPREQ, id), \
+ SRI_ARR(FLIP_PARAMETERS_2, HUBPREQ, id), \
+ SRI_ARR(DCN_CUR1_TTU_CNTL0, HUBPREQ, id), \
+ SRI_ARR(DCN_CUR1_TTU_CNTL1, HUBPREQ, id), \
+ SRI_ARR(DCSURF_FLIP_CONTROL2, HUBPREQ, id), \
+ SRI_ARR(VMID_SETTINGS_0, HUBPREQ, id), \
+ SRI_ARR(FLIP_PARAMETERS_3, HUBPREQ, id), \
+ SRI_ARR(FLIP_PARAMETERS_4, HUBPREQ, id), \
+ SRI_ARR(FLIP_PARAMETERS_5, HUBPREQ, id), \
+ SRI_ARR(FLIP_PARAMETERS_6, HUBPREQ, id), \
+ SRI_ARR(VBLANK_PARAMETERS_5, HUBPREQ, id), \
+ SRI_ARR(VBLANK_PARAMETERS_6, HUBPREQ, id), \
+ SRI_ARR(DCN_DMDATA_VM_CNTL, HUBPREQ, id), \
+ SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \
+ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \
+ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \
+ HUBP_3DLUT_FL_REG_LIST_DCN401(id), \
+ SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \
+ SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id), \
+ SRI_ARR(HUBPRET_READ_LINE_VALUE, HUBPRET, id)
+
+/* ABM */
+#define ABM_DCN401_REG_LIST_RI(id) \
+ SRI_ARR(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \
+ SRI_ARR(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_MISC_CTRL, ABM, id), \
+ SRI_ARR(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \
+ SRI_ARR(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \
+ SRI_ARR(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \
+ SRI_ARR(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \
+ SRI_ARR(BL1_PWM_USER_LEVEL, ABM, id), \
+ SRI_ARR(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \
+ SRI_ARR(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_BIN_33_40_SHIFT_INDEX, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_BIN_33_64_SHIFT_FLAG, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_BIN_41_48_SHIFT_INDEX, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_BIN_49_56_SHIFT_INDEX, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_BIN_57_64_SHIFT_INDEX, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_RESULT_DATA, ABM, id), \
+ SRI_ARR(DC_ABM1_HG_RESULT_INDEX, ABM, id), \
+ SRI_ARR(DC_ABM1_ACE_OFFSET_SLOPE_DATA, ABM, id), \
+ SRI_ARR(DC_ABM1_ACE_PWL_CNTL, ABM, id), \
+ SRI_ARR(DC_ABM1_ACE_THRES_DATA, ABM, id), \
+ NBIO_SR_ARR(BIOS_SCRATCH_2, id)
+
+/* VPG */
+#define VPG_DCN401_REG_LIST_RI(id) \
+ VPG_DCN3_REG_LIST_RI(id), \
+ SRI_ARR(VPG_MEM_PWR, VPG, id)
+
+/* Stream encoder */
+#define SE_DCN4_01_REG_LIST_RI(id) \
+ SRI_ARR(AFMT_CNTL, DIG, id), SRI_ARR(DIG_FE_CNTL, DIG, id), \
+ SRI_ARR(HDMI_CONTROL, DIG, id), SRI_ARR(HDMI_DB_CONTROL, DIG, id), \
+ SRI_ARR(HDMI_GC, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL4, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL5, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL6, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL7, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL8, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL9, DIG, id), \
+ SRI_ARR(HDMI_GENERIC_PACKET_CONTROL10, DIG, id), \
+ SRI_ARR(HDMI_INFOFRAME_CONTROL0, DIG, id), \
+ SRI_ARR(HDMI_INFOFRAME_CONTROL1, DIG, id), \
+ SRI_ARR(HDMI_VBI_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(HDMI_AUDIO_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(HDMI_ACR_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(HDMI_ACR_32_0, DIG, id), SRI_ARR(HDMI_ACR_32_1, DIG, id), \
+ SRI_ARR(HDMI_ACR_44_0, DIG, id), SRI_ARR(HDMI_ACR_44_1, DIG, id), \
+ SRI_ARR(HDMI_ACR_48_0, DIG, id), SRI_ARR(HDMI_ACR_48_1, DIG, id), \
+ SRI_ARR(DP_DB_CNTL, DP, id), SRI_ARR(DP_MSA_MISC, DP, id), \
+ SRI_ARR(DP_MSA_VBID_MISC, DP, id), SRI_ARR(DP_MSA_COLORIMETRY, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM1, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM2, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM3, DP, id), \
+ SRI_ARR(DP_MSA_TIMING_PARAM4, DP, id), \
+ SRI_ARR(DP_MSE_RATE_CNTL, DP, id), SRI_ARR(DP_MSE_RATE_UPDATE, DP, id), \
+ SRI_ARR(DP_PIXEL_FORMAT, DP, id), SRI_ARR(DP_SEC_CNTL, DP, id), \
+ SRI_ARR(DP_SEC_CNTL1, DP, id), SRI_ARR(DP_SEC_CNTL2, DP, id), \
+ SRI_ARR(DP_SEC_CNTL5, DP, id), SRI_ARR(DP_SEC_CNTL6, DP, id), \
+ SRI_ARR(DP_STEER_FIFO, DP, id), SRI_ARR(DP_VID_M, DP, id), \
+ SRI_ARR(DP_VID_N, DP, id), SRI_ARR(DP_VID_STREAM_CNTL, DP, id), \
+ SRI_ARR(DP_VID_TIMING, DP, id), SRI_ARR(DP_SEC_AUD_N, DP, id), \
+ SRI_ARR(DP_SEC_TIMESTAMP, DP, id), \
+ SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(DP_SEC_FRAMING4, DP, id), SRI_ARR(DP_GSP11_CNTL, DP, id), \
+ SRI_ARR(DME_CONTROL, DME, id), \
+ SRI_ARR(DP_SEC_METADATA_TRANSMISSION, DP, id), \
+ SRI_ARR(HDMI_METADATA_PACKET_CONTROL, DIG, id), \
+ SRI_ARR(DIG_FE_CNTL, DIG, id), \
+ SRI_ARR(DIG_FE_EN_CNTL, DIG, id), \
+ SRI_ARR(DIG_FE_CLK_CNTL, DIG, id), \
+ SRI_ARR(DIG_CLOCK_PATTERN, DIG, id), \
+ SRI_ARR(DIG_FIFO_CTRL0, DIG, id), \
+ SRI_ARR(STREAM_MAPPER_CONTROL, DIG, id)
+
+/* Link encoder */
+#define LE_DCN401_REG_LIST_RI(id) \
+ LE_DCN3_REG_LIST_RI(id), \
+ SRI_ARR(DP_DPHY_INTERNAL_CTRL, DP, id), \
+ SRI_ARR(DIG_BE_CLK_CNTL, DIG, id)
+
+/* DPP */
+#define DPP_REG_LIST_DCN401_COMMON_RI(id) \
+ SRI_ARR(CM_DEALPHA, CM, id), SRI_ARR(CM_MEM_PWR_STATUS, CM, id), \
+ SRI_ARR(CM_BIAS_CR_R, CM, id), SRI_ARR(CM_BIAS_Y_G_CB_B, CM, id), \
+ SRI_ARR(PRE_DEGAM, CNVC_CFG, id), SRI_ARR(CM_GAMCOR_CONTROL, CM, id), \
+ SRI_ARR(CM_GAMCOR_LUT_CONTROL, CM, id), \
+ SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \
+ SRI_ARR(CM_GAMCOR_LUT_INDEX, CM, id), \
+ SRI_ARR(CM_GAMCOR_LUT_DATA, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_CNTL_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_SLOPE_CNTL_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_END_CNTL1_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_END_CNTL2_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_REGION_0_1, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_REGION_32_33, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_OFFSET_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_OFFSET_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_OFFSET_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMB_START_BASE_CNTL_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_CNTL_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_SLOPE_CNTL_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_END_CNTL1_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_END_CNTL2_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_REGION_0_1, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_REGION_32_33, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_OFFSET_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_OFFSET_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_OFFSET_R, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_B, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_G, CM, id), \
+ SRI_ARR(CM_GAMCOR_RAMA_START_BASE_CNTL_R, CM, id), \
+ SRI_ARR(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \
+ SRI_ARR(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \
+ SRI_ARR(OTG_H_BLANK, DSCL, id), SRI_ARR(OTG_V_BLANK, DSCL, id), \
+ SRI_ARR(SCL_MODE, DSCL, id), SRI_ARR(LB_DATA_FORMAT, DSCL, id), \
+ SRI_ARR(LB_MEMORY_CTRL, DSCL, id), SRI_ARR(DSCL_AUTOCAL, DSCL, id), \
+ SRI_ARR(SCL_TAP_CONTROL, DSCL, id), \
+ SRI_ARR(SCL_COEF_RAM_TAP_SELECT, DSCL, id), \
+ SRI_ARR(SCL_COEF_RAM_TAP_DATA, DSCL, id), \
+ SRI_ARR(DSCL_2TAP_CONTROL, DSCL, id), SRI_ARR(MPC_SIZE, DSCL, id), \
+ SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO, DSCL, id), \
+ SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO, DSCL, id), \
+ SRI_ARR(SCL_HORZ_FILTER_SCALE_RATIO_C, DSCL, id), \
+ SRI_ARR(SCL_VERT_FILTER_SCALE_RATIO_C, DSCL, id), \
+ SRI_ARR(SCL_HORZ_FILTER_INIT, DSCL, id), \
+ SRI_ARR(SCL_HORZ_FILTER_INIT_C, DSCL, id), \
+ SRI_ARR(SCL_VERT_FILTER_INIT, DSCL, id), \
+ SRI_ARR(SCL_VERT_FILTER_INIT_C, DSCL, id), \
+ SRI_ARR(RECOUT_START, DSCL, id), SRI_ARR(RECOUT_SIZE, DSCL, id), \
+ SRI_ARR(PRE_DEALPHA, CNVC_CFG, id), SRI_ARR(PRE_REALPHA, CNVC_CFG, id), \
+ SRI_ARR(PRE_CSC_MODE, CNVC_CFG, id), \
+ SRI_ARR(PRE_CSC_C11_C12, CNVC_CFG, id), \
+ SRI_ARR(PRE_CSC_C33_C34, CNVC_CFG, id), \
+ SRI_ARR(PRE_CSC_B_C11_C12, CNVC_CFG, id), \
+ SRI_ARR(PRE_CSC_B_C33_C34, CNVC_CFG, id), \
+ SRI_ARR(CM_POST_CSC_CONTROL, CM, id), \
+ SRI_ARR(CM_POST_CSC_C11_C12, CM, id), \
+ SRI_ARR(CM_POST_CSC_C33_C34, CM, id), \
+ SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \
+ SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \
+ SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \
+ SRI_ARR(CM_TEST_DEBUG_INDEX, CM, id), \
+ SRI_ARR(CM_TEST_DEBUG_DATA, CM, id), \
+ SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \
+ SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \
+ SRI_ARR(CURSOR0_CONTROL, CM_CUR, id), \
+ SRI_ARR(CURSOR0_COLOR0, CM_CUR, id), \
+ SRI_ARR(CURSOR0_COLOR1, CM_CUR, id), \
+ SRI_ARR(CURSOR0_FP_SCALE_BIAS_G_Y, CM_CUR, id), \
+ SRI_ARR(CURSOR0_FP_SCALE_BIAS_RB_CRCB, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_MODE, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C11_C12_A, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C13_C14_A, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C21_C22_A, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C23_C24_A, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C31_C32_A, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C33_C34_A, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C11_C12_B, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C13_C14_B, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C21_C22_B, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C23_C24_B, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C31_C32_B, CM_CUR, id), \
+ SRI_ARR(CUR0_MATRIX_C33_C34_B, CM_CUR, id), \
+ SRI_ARR(DPP_CONTROL, DPP_TOP, id), SRI_ARR(CM_HDR_MULT_COEF, CM, id), \
+ SRI_ARR(CURSOR_CONTROL, CURSOR0_, id), \
+ SRI_ARR(ALPHA_2BIT_LUT, CNVC_CFG, id), \
+ SRI_ARR(FCNV_FP_BIAS_R, CNVC_CFG, id), \
+ SRI_ARR(FCNV_FP_BIAS_G, CNVC_CFG, id), \
+ SRI_ARR(FCNV_FP_BIAS_B, CNVC_CFG, id), \
+ SRI_ARR(FCNV_FP_SCALE_R, CNVC_CFG, id), \
+ SRI_ARR(FCNV_FP_SCALE_G, CNVC_CFG, id), \
+ SRI_ARR(FCNV_FP_SCALE_B, CNVC_CFG, id), \
+ SRI_ARR(COLOR_KEYER_CONTROL, CNVC_CFG, id), \
+ SRI_ARR(COLOR_KEYER_ALPHA, CNVC_CFG, id), \
+ SRI_ARR(COLOR_KEYER_RED, CNVC_CFG, id), \
+ SRI_ARR(COLOR_KEYER_GREEN, CNVC_CFG, id), \
+ SRI_ARR(COLOR_KEYER_BLUE, CNVC_CFG, id), \
+ SRI_ARR(OBUF_MEM_PWR_CTRL, DSCL, id), \
+ SRI_ARR(DSCL_MEM_PWR_STATUS, DSCL, id), \
+ SRI_ARR(DSCL_MEM_PWR_CTRL, DSCL, id), \
+ SRI_ARR(DSCL_CONTROL, DSCL, id), \
+ SRI_ARR(DSCL_SC_MODE, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_MODE, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF_CNTL, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF_FINAL_MAX_MIN, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG0, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG1, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG2, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG3, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG4, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG5, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG6, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF1_PWL_SEG7, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF3_PWL_SEG0, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF3_PWL_SEG1, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF3_PWL_SEG2, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF3_PWL_SEG3, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF3_PWL_SEG4, DSCL, id), \
+ SRI_ARR(DSCL_EASF_H_BF3_PWL_SEG5, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_MODE, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF_CNTL, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_RINGEST_3TAP_CNTL1, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_RINGEST_3TAP_CNTL2, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_RINGEST_3TAP_CNTL3, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF_FINAL_MAX_MIN, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG0, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG1, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG2, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG3, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG4, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG5, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG6, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF1_PWL_SEG7, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF3_PWL_SEG0, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF3_PWL_SEG1, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF3_PWL_SEG2, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF3_PWL_SEG3, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF3_PWL_SEG4, DSCL, id), \
+ SRI_ARR(DSCL_EASF_V_BF3_PWL_SEG5, DSCL, id), \
+ SRI_ARR(DSCL_SC_MATRIX_C0C1, DSCL, id), \
+ SRI_ARR(DSCL_SC_MATRIX_C2C3, DSCL, id), \
+ SRI_ARR(ISHARP_MODE, DSCL, id), \
+ SRI_ARR(ISHARP_NOISEDET_THRESHOLD, DSCL, id), \
+ SRI_ARR(ISHARP_NOISE_GAIN_PWL, DSCL, id), \
+ SRI_ARR(ISHARP_LBA_PWL_SEG0, DSCL, id), \
+ SRI_ARR(ISHARP_LBA_PWL_SEG1, DSCL, id), \
+ SRI_ARR(ISHARP_LBA_PWL_SEG2, DSCL, id), \
+ SRI_ARR(ISHARP_LBA_PWL_SEG3, DSCL, id), \
+ SRI_ARR(ISHARP_LBA_PWL_SEG4, DSCL, id), \
+ SRI_ARR(ISHARP_LBA_PWL_SEG5, DSCL, id), \
+ SRI_ARR(ISHARP_DELTA_CTRL, DSCL, id), \
+ SRI_ARR(ISHARP_DELTA_DATA, DSCL, id), \
+ SRI_ARR(ISHARP_DELTA_INDEX, DSCL, id), \
+ SRI_ARR(ISHARP_NLDELTA_SOFT_CLIP, DSCL, id), \
+ SRI_ARR(SCL_VERT_FILTER_INIT_BOT, DSCL, id), \
+ SRI_ARR(SCL_VERT_FILTER_INIT_BOT_C, DSCL, id)
+
+/* OPP */
+#define OPP_REG_LIST_DCN401_RI(id) \
+ OPP_REG_LIST_DCN10_RI(id), OPP_DPG_REG_LIST_RI(id), \
+ SRI_ARR(FMT_422_CONTROL, FMT, id)
+
+/* DSC */
+#define DSC_REG_LIST_DCN401_RI(id) \
+ SRI_ARR(DSC_TOP_CONTROL, DSC_TOP, id), \
+ SRI_ARR(DSC_DEBUG_CONTROL, DSC_TOP, id), \
+ SRI_ARR(DSCC_CONFIG0, DSCC, id), SRI_ARR(DSCC_CONFIG1, DSCC, id), \
+ SRI_ARR(DSCC_STATUS, DSCC, id), \
+ SRI_ARR(DSCC_INTERRUPT_CONTROL0, DSCC, id), \
+ SRI_ARR(DSCC_INTERRUPT_CONTROL1, DSCC, id), \
+ SRI_ARR(DSCC_INTERRUPT_STATUS0, DSCC, id), \
+ SRI_ARR(DSCC_INTERRUPT_STATUS1, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG0, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG1, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG2, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG3, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG4, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG5, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG6, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG7, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG8, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG9, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG10, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG11, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG12, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG13, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG14, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG15, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG16, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG17, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG18, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG19, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG20, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG21, DSCC, id), \
+ SRI_ARR(DSCC_PPS_CONFIG22, DSCC, id), \
+ SRI_ARR(DSCC_MEM_POWER_CONTROL0, DSCC, id), \
+ SRI_ARR(DSCC_MEM_POWER_CONTROL1, DSCC, id), \
+ SRI_ARR(DSCC_R_Y_SQUARED_ERROR_LOWER, DSCC, id), \
+ SRI_ARR(DSCC_R_Y_SQUARED_ERROR_UPPER, DSCC, id), \
+ SRI_ARR(DSCC_G_CB_SQUARED_ERROR_LOWER, DSCC, id), \
+ SRI_ARR(DSCC_G_CB_SQUARED_ERROR_UPPER, DSCC, id), \
+ SRI_ARR(DSCC_B_CR_SQUARED_ERROR_LOWER, DSCC, id), \
+ SRI_ARR(DSCC_B_CR_SQUARED_ERROR_UPPER, DSCC, id), \
+ SRI_ARR(DSCC_MAX_ABS_ERROR0, DSCC, id), \
+ SRI_ARR(DSCC_MAX_ABS_ERROR1, DSCC, id), \
+ SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL0, DSCC, id), \
+ SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL1, DSCC, id), \
+ SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL2, DSCC, id), \
+ SRI_ARR(DSCC_RATE_BUFFER_MODEL_MAX_FULLNESS_LEVEL3, DSCC, id), \
+ SRI_ARR(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id), \
+ SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \
+ SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id)
+
+/* MPC */
+#define MPC_DWB_MUX_REG_LIST_DCN4_01_RI(inst) \
+ MPC_DWB_MUX_REG_LIST_DCN3_0_RI(inst)
+
+#define MPC_OUT_MUX_COMMON_REG_LIST_DCN4_01_RI(inst) \
+ MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0_RI(inst)
+
+#define MPC_OUT_MUX_REG_LIST_DCN4_01_RI(inst) \
+ MPC_OUT_MUX_REG_LIST_DCN3_0_RI(inst)
+
+/* OPTC */
+#define OPTC_COMMON_REG_LIST_DCN401_RI(inst) \
+ SRI_ARR(OTG_VSTARTUP_PARAM, OTG, inst), \
+ SRI_ARR(OTG_VUPDATE_PARAM, OTG, inst), \
+ SRI_ARR(OTG_VREADY_PARAM, OTG, inst), \
+ SRI_ARR(OTG_MASTER_UPDATE_LOCK, OTG, inst), \
+ SRI_ARR(OTG_GLOBAL_CONTROL0, OTG, inst), \
+ SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \
+ SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \
+ SRI_ARR(OTG_GLOBAL_CONTROL4, OTG, inst), \
+ SRI_ARR(OTG_DOUBLE_BUFFER_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_H_TOTAL, OTG, inst), \
+ SRI_ARR(OTG_H_BLANK_START_END, OTG, inst), \
+ SRI_ARR(OTG_H_SYNC_A, OTG, inst), SRI_ARR(OTG_H_SYNC_A_CNTL, OTG, inst), \
+ SRI_ARR(OTG_H_TIMING_CNTL, OTG, inst), SRI_ARR(OTG_V_TOTAL, OTG, inst), \
+ SRI_ARR(OTG_V_BLANK_START_END, OTG, inst), \
+ SRI_ARR(OTG_V_SYNC_A, OTG, inst), SRI_ARR(OTG_V_SYNC_A_CNTL, OTG, inst), \
+ SRI_ARR(OTG_CONTROL, OTG, inst), SRI_ARR(OTG_STEREO_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_3D_STRUCTURE_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_STEREO_STATUS, OTG, inst), \
+ SRI_ARR(OTG_V_TOTAL_MAX, OTG, inst), \
+ SRI_ARR(OTG_V_TOTAL_MIN, OTG, inst), \
+ SRI_ARR(OTG_V_TOTAL_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_TRIGA_CNTL, OTG, inst), \
+ SRI_ARR(OTG_FORCE_COUNT_NOW_CNTL, OTG, inst), \
+ SRI_ARR(OTG_STATIC_SCREEN_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_STATUS_FRAME_COUNT, OTG, inst), \
+ SRI_ARR(OTG_STATUS, OTG, inst), SRI_ARR(OTG_STATUS_POSITION, OTG, inst), \
+ SRI_ARR(OTG_NOM_VERT_POSITION, OTG, inst), \
+ SRI_ARR(OTG_M_CONST_DTO0, OTG, inst), \
+ SRI_ARR(OTG_M_CONST_DTO1, OTG, inst), \
+ SRI_ARR(OTG_CLOCK_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst), \
+ SRI_ARR(OTG_VERTICAL_INTERRUPT1_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_VERTICAL_INTERRUPT1_POSITION, OTG, inst), \
+ SRI_ARR(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst), \
+ SRI_ARR(OPTC_INPUT_CLOCK_CONTROL, ODM, inst), \
+ SRI_ARR(OPTC_DATA_SOURCE_SELECT, ODM, inst), \
+ SRI_ARR(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst), \
+ SRI_ARR(CONTROL, VTG, inst), SRI_ARR(OTG_VERT_SYNC_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_GSL_CONTROL, OTG, inst), SRI_ARR(OTG_CRC_CNTL, OTG, inst), \
+ SRI_ARR(OTG_CRC0_DATA_RG, OTG, inst), \
+ SRI_ARR(OTG_CRC0_DATA_B, OTG, inst), \
+ SRI_ARR(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst), \
+ SR_ARR(GSL_SOURCE_SELECT, inst), \
+ SRI_ARR(OTG_TRIGA_MANUAL_TRIG, OTG, inst), \
+ SRI_ARR(OTG_GLOBAL_CONTROL1, OTG, inst), \
+ SRI_ARR(OTG_GLOBAL_CONTROL2, OTG, inst), \
+ SRI_ARR(OTG_GSL_WINDOW_X, OTG, inst), \
+ SRI_ARR(OTG_GSL_WINDOW_Y, OTG, inst), \
+ SRI_ARR(OTG_VUPDATE_KEEPOUT, OTG, inst), \
+ SRI_ARR(OTG_DRR_TRIGGER_WINDOW, OTG, inst), \
+ SRI_ARR(OTG_DRR_V_TOTAL_CHANGE, OTG, inst), \
+ SRI_ARR(OPTC_DATA_FORMAT_CONTROL, ODM, inst), \
+ SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst), \
+ SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \
+ SRI_ARR(OPTC_WIDTH_CONTROL2, ODM, inst), \
+ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \
+ SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \
+ SRI_ARR(OTG_PSTATE_REGISTER, OTG, inst), \
+ SRI_ARR(OTG_PIPE_UPDATE_STATUS, OTG, inst), \
+ SRI_ARR(INTERRUPT_DEST, OTG, inst)
+
+/* HUBBUB */
+#define HUBBUB_REG_LIST_DCN4_01_RI(id) \
+ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A), \
+ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B), \
+ SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \
+ SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), \
+ SR(DCHUBBUB_ARB_SAT_LEVEL), \
+ SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), \
+ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+ SR(DCHUBBUB_TEST_DEBUG_INDEX), \
+ SR(DCHUBBUB_TEST_DEBUG_DATA), \
+ SR(DCHUBBUB_SOFT_RESET), \
+ SR(DCHUBBUB_CRC_CTRL), \
+ SR(DCN_VM_FB_LOCATION_BASE), \
+ SR(DCN_VM_FB_LOCATION_TOP), \
+ SR(DCN_VM_FB_OFFSET), \
+ SR(DCN_VM_AGP_BOT), \
+ SR(DCN_VM_AGP_TOP), \
+ SR(DCN_VM_AGP_BASE), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B), \
+ SR(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B), \
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A), \
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B), \
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A), \
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B), \
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A), \
+ SR(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B), \
+ SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A), \
+ SR(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B), \
+ SR(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A), \
+ SR(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B), \
+ SR(DCHUBBUB_DET0_CTRL), \
+ SR(DCHUBBUB_DET1_CTRL), \
+ SR(DCHUBBUB_DET2_CTRL), \
+ SR(DCHUBBUB_DET3_CTRL), \
+ SR(DCHUBBUB_COMPBUF_CTRL), \
+ SR(COMPBUF_RESERVED_SPACE), \
+ SR(DCHUBBUB_DEBUG_CTRL_0), \
+ SR(DCHUBBUB_ARB_USR_RETRAINING_CNTL), \
+ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A), \
+ SR(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B), \
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A), \
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B), \
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A), \
+ SR(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B), \
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A), \
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B), \
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A), \
+ SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B), \
+ SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \
+ SR(DCN_VM_FAULT_CNTL), \
+ SR(DCN_VM_FAULT_STATUS), \
+ SR(SDPIF_REQUEST_RATE_LIMIT), \
+ SR(DCHUBBUB_CLOCK_CNTL), \
+ SR(DCHUBBUB_SDPIF_CFG0), \
+ SR(DCHUBBUB_SDPIF_CFG1), \
+ SR(DCHUBBUB_MEM_PWR_MODE_CTRL), \
+ SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL1), \
+ SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2), \
+ SR(DCHUBBUB_CTRL_STATUS)
+
+/* DCCG */
+
+#define DCCG_REG_LIST_DCN401_RI() \
+ SR(DPPCLK_DTO_CTRL), DCCG_SRII(DTO_PARAM, DPPCLK, 0), \
+ DCCG_SRII(DTO_PARAM, DPPCLK, 1), DCCG_SRII(DTO_PARAM, DPPCLK, 2), \
+ DCCG_SRII(DTO_PARAM, DPPCLK, 3), DCCG_SRII(CLOCK_CNTL, HDMICHARCLK, 0), \
+ SR(PHYASYMCLK_CLOCK_CNTL), SR(PHYBSYMCLK_CLOCK_CNTL), \
+ SR(PHYCSYMCLK_CLOCK_CNTL), SR(PHYDSYMCLK_CLOCK_CNTL), \
+ SR(DPSTREAMCLK_CNTL), SR(HDMISTREAMCLK_CNTL), \
+ SR(SYMCLK32_SE_CNTL), SR(SYMCLK32_LE_CNTL), \
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 0), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 1), \
+ DCCG_SRII(PIXEL_RATE_CNTL, OTG, 2), DCCG_SRII(PIXEL_RATE_CNTL, OTG, 3), \
+ SR(OTG_PIXEL_RATE_DIV), SR(DTBCLK_P_CNTL), \
+ SR(DCCG_AUDIO_DTO_SOURCE), SR(DENTIST_DISPCLK_CNTL), \
+ SR(DPPCLK_CTRL), \
+ DCCG_SRII(MODULO, DP_DTO, 0), DCCG_SRII(MODULO, DP_DTO, 1), \
+ DCCG_SRII(MODULO, DP_DTO, 2), DCCG_SRII(MODULO, DP_DTO, 3), \
+ DCCG_SRII(PHASE, DP_DTO, 0), DCCG_SRII(PHASE, DP_DTO, 1), \
+ DCCG_SRII(PHASE, DP_DTO, 2), DCCG_SRII(PHASE, DP_DTO, 3), \
+ SR(DSCCLK0_DTO_PARAM),\
+ SR(DSCCLK1_DTO_PARAM),\
+ SR(DSCCLK2_DTO_PARAM),\
+ SR(DSCCLK3_DTO_PARAM),\
+ SR(DSCCLK_DTO_CTRL),\
+ SR(DCCG_GATE_DISABLE_CNTL),\
+ SR(DCCG_GATE_DISABLE_CNTL2),\
+ SR(DCCG_GATE_DISABLE_CNTL3),\
+ SR(DCCG_GATE_DISABLE_CNTL4),\
+ SR(DCCG_GATE_DISABLE_CNTL5),\
+ SR(DCCG_GATE_DISABLE_CNTL6),\
+ SR(SYMCLKA_CLOCK_ENABLE),\
+ SR(SYMCLKB_CLOCK_ENABLE),\
+ SR(SYMCLKC_CLOCK_ENABLE),\
+ SR(SYMCLKD_CLOCK_ENABLE)
+
+#endif /* _DCN401_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile
new file mode 100644
index 000000000000..bc93356a0b5b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: MIT
+#
+# Copyright 2025 Advanced Micro Devices, Inc.
+# Makefile for bounding box component.
+# Floating point required due to nature of bounding box values
+
+soc_and_ip_translator_ccflags := $(CC_FLAGS_FPU)
+soc_and_ip_translator_rcflags := $(CC_FLAGS_NO_FPU)
+
+CFLAGS_$(AMDDALPATH)/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.o := $(soc_and_ip_translator_ccflags)
+
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.o := $(soc_and_ip_translator_rcflags)
+
+soc_and_ip_translator := soc_and_ip_translator.o
+soc_and_ip_translator += dcn401/dcn401_soc_and_ip_translator.o
+
+AMD_DAL_soc_and_ip_translator := $(addprefix $(AMDDALPATH)/dc/soc_and_ip_translator/, $(soc_and_ip_translator))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_soc_and_ip_translator)
diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c
new file mode 100644
index 000000000000..3190c76eb482
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#include "dcn401_soc_and_ip_translator.h"
+#include "bounding_boxes/dcn4_soc_bb.h"
+
+/* soc_and_ip_translator component used to get up-to-date values for bounding box.
+ * Bounding box values are stored in several locations and locations can vary with DCN revision.
+ * This component provides an interface to get DCN-specific bounding box values.
+ */
+
+static void get_default_soc_bb(struct dml2_soc_bb *soc_bb)
+{
+ memcpy(soc_bb, &dml2_socbb_dcn401, sizeof(struct dml2_soc_bb));
+ memcpy(&soc_bb->qos_parameters, &dml_dcn4_variant_a_soc_qos_params, sizeof(struct dml2_soc_qos_parameters));
+}
+
+/*
+ * DC clock table is obtained from SMU during runtime.
+ * SMU stands for System Management Unit. It is a power management processor.
+ * It owns the initialization of dc's clock table and programming of clock values
+ * based on dc's requests.
+ * Our clock values in base soc bb is a dummy placeholder. The real clock values
+ * are retrieved from SMU firmware to dc clock table at runtime.
+ * This function overrides our dummy placeholder values with real values in dc
+ * clock table.
+ */
+static void dcn401_convert_dc_clock_table_to_soc_bb_clock_table(
+ struct dml2_soc_state_table *dml_clk_table,
+ const struct clk_bw_params *dc_bw_params,
+ bool use_clock_dc_limits)
+{
+ int i;
+ const struct clk_limit_table *dc_clk_table;
+
+ if (dc_bw_params == NULL)
+ /* skip if bw params could not be obtained from smu */
+ return;
+
+ dc_clk_table = &dc_bw_params->clk_table;
+
+ /* dcfclk */
+ if (dc_clk_table->num_entries_per_clk.num_dcfclk_levels) {
+ dml_clk_table->dcfclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dcfclk_levels;
+ for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) {
+ if (i < dml_clk_table->dcfclk.num_clk_values) {
+ if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dcfclk_mhz &&
+ dc_clk_table->entries[i].dcfclk_mhz > dc_bw_params->dc_mode_limit.dcfclk_mhz) {
+ if (i == 0 || dc_clk_table->entries[i-1].dcfclk_mhz < dc_bw_params->dc_mode_limit.dcfclk_mhz) {
+ dml_clk_table->dcfclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dcfclk_mhz * 1000;
+ dml_clk_table->dcfclk.num_clk_values = i + 1;
+ } else {
+ dml_clk_table->dcfclk.clk_values_khz[i] = 0;
+ dml_clk_table->dcfclk.num_clk_values = i;
+ }
+ } else {
+ dml_clk_table->dcfclk.clk_values_khz[i] = dc_clk_table->entries[i].dcfclk_mhz * 1000;
+ }
+ } else {
+ dml_clk_table->dcfclk.clk_values_khz[i] = 0;
+ }
+ }
+ }
+
+ /* fclk */
+ if (dc_clk_table->num_entries_per_clk.num_fclk_levels) {
+ dml_clk_table->fclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_fclk_levels;
+ for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) {
+ if (i < dml_clk_table->fclk.num_clk_values) {
+ if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.fclk_mhz &&
+ dc_clk_table->entries[i].fclk_mhz > dc_bw_params->dc_mode_limit.fclk_mhz) {
+ if (i == 0 || dc_clk_table->entries[i-1].fclk_mhz < dc_bw_params->dc_mode_limit.fclk_mhz) {
+ dml_clk_table->fclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.fclk_mhz * 1000;
+ dml_clk_table->fclk.num_clk_values = i + 1;
+ } else {
+ dml_clk_table->fclk.clk_values_khz[i] = 0;
+ dml_clk_table->fclk.num_clk_values = i;
+ }
+ } else {
+ dml_clk_table->fclk.clk_values_khz[i] = dc_clk_table->entries[i].fclk_mhz * 1000;
+ }
+ } else {
+ dml_clk_table->fclk.clk_values_khz[i] = 0;
+ }
+ }
+ }
+
+ /* uclk */
+ if (dc_clk_table->num_entries_per_clk.num_memclk_levels) {
+ dml_clk_table->uclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_memclk_levels;
+ for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) {
+ if (i < dml_clk_table->uclk.num_clk_values) {
+ if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.memclk_mhz &&
+ dc_clk_table->entries[i].memclk_mhz > dc_bw_params->dc_mode_limit.memclk_mhz) {
+ if (i == 0 || dc_clk_table->entries[i-1].memclk_mhz < dc_bw_params->dc_mode_limit.memclk_mhz) {
+ dml_clk_table->uclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.memclk_mhz * 1000;
+ dml_clk_table->uclk.num_clk_values = i + 1;
+ } else {
+ dml_clk_table->uclk.clk_values_khz[i] = 0;
+ dml_clk_table->uclk.num_clk_values = i;
+ }
+ } else {
+ dml_clk_table->uclk.clk_values_khz[i] = dc_clk_table->entries[i].memclk_mhz * 1000;
+ }
+ } else {
+ dml_clk_table->uclk.clk_values_khz[i] = 0;
+ }
+ }
+ }
+
+ /* dispclk */
+ if (dc_clk_table->num_entries_per_clk.num_dispclk_levels) {
+ dml_clk_table->dispclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dispclk_levels;
+ for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) {
+ if (i < dml_clk_table->dispclk.num_clk_values) {
+ if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dispclk_mhz &&
+ dc_clk_table->entries[i].dispclk_mhz > dc_bw_params->dc_mode_limit.dispclk_mhz) {
+ if (i == 0 || dc_clk_table->entries[i-1].dispclk_mhz < dc_bw_params->dc_mode_limit.dispclk_mhz) {
+ dml_clk_table->dispclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dispclk_mhz * 1000;
+ dml_clk_table->dispclk.num_clk_values = i + 1;
+ } else {
+ dml_clk_table->dispclk.clk_values_khz[i] = 0;
+ dml_clk_table->dispclk.num_clk_values = i;
+ }
+ } else {
+ dml_clk_table->dispclk.clk_values_khz[i] = dc_clk_table->entries[i].dispclk_mhz * 1000;
+ }
+ } else {
+ dml_clk_table->dispclk.clk_values_khz[i] = 0;
+ }
+ }
+ }
+
+ /* dppclk */
+ if (dc_clk_table->num_entries_per_clk.num_dppclk_levels) {
+ dml_clk_table->dppclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dppclk_levels;
+ for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) {
+ if (i < dml_clk_table->dppclk.num_clk_values) {
+ if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dppclk_mhz &&
+ dc_clk_table->entries[i].dppclk_mhz > dc_bw_params->dc_mode_limit.dppclk_mhz) {
+ if (i == 0 || dc_clk_table->entries[i-1].dppclk_mhz < dc_bw_params->dc_mode_limit.dppclk_mhz) {
+ dml_clk_table->dppclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dppclk_mhz * 1000;
+ dml_clk_table->dppclk.num_clk_values = i + 1;
+ } else {
+ dml_clk_table->dppclk.clk_values_khz[i] = 0;
+ dml_clk_table->dppclk.num_clk_values = i;
+ }
+ } else {
+ dml_clk_table->dppclk.clk_values_khz[i] = dc_clk_table->entries[i].dppclk_mhz * 1000;
+ }
+ } else {
+ dml_clk_table->dppclk.clk_values_khz[i] = 0;
+ }
+ }
+ }
+
+ /* dtbclk */
+ if (dc_clk_table->num_entries_per_clk.num_dtbclk_levels) {
+ dml_clk_table->dtbclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dtbclk_levels;
+ for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) {
+ if (i < dml_clk_table->dtbclk.num_clk_values) {
+ if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dtbclk_mhz &&
+ dc_clk_table->entries[i].dtbclk_mhz > dc_bw_params->dc_mode_limit.dtbclk_mhz) {
+ if (i == 0 || dc_clk_table->entries[i-1].dtbclk_mhz < dc_bw_params->dc_mode_limit.dtbclk_mhz) {
+ dml_clk_table->dtbclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dtbclk_mhz * 1000;
+ dml_clk_table->dtbclk.num_clk_values = i + 1;
+ } else {
+ dml_clk_table->dtbclk.clk_values_khz[i] = 0;
+ dml_clk_table->dtbclk.num_clk_values = i;
+ }
+ } else {
+ dml_clk_table->dtbclk.clk_values_khz[i] = dc_clk_table->entries[i].dtbclk_mhz * 1000;
+ }
+ } else {
+ dml_clk_table->dtbclk.clk_values_khz[i] = 0;
+ }
+ }
+ }
+
+ /* socclk */
+ if (dc_clk_table->num_entries_per_clk.num_socclk_levels) {
+ dml_clk_table->socclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_socclk_levels;
+ for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) {
+ if (i < dml_clk_table->socclk.num_clk_values) {
+ if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.socclk_mhz &&
+ dc_clk_table->entries[i].socclk_mhz > dc_bw_params->dc_mode_limit.socclk_mhz) {
+ if (i == 0 || dc_clk_table->entries[i-1].socclk_mhz < dc_bw_params->dc_mode_limit.socclk_mhz) {
+ dml_clk_table->socclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.socclk_mhz * 1000;
+ dml_clk_table->socclk.num_clk_values = i + 1;
+ } else {
+ dml_clk_table->socclk.clk_values_khz[i] = 0;
+ dml_clk_table->socclk.num_clk_values = i;
+ }
+ } else {
+ dml_clk_table->socclk.clk_values_khz[i] = dc_clk_table->entries[i].socclk_mhz * 1000;
+ }
+ } else {
+ dml_clk_table->socclk.clk_values_khz[i] = 0;
+ }
+ }
+ }
+
+ /* dram config */
+ dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels;
+ dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes;
+}
+
+void dcn401_update_soc_bb_with_values_from_clk_mgr(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config)
+{
+ soc_bb->dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000;
+ soc_bb->dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
+ soc_bb->mall_allocated_for_dcn_mbytes = dc->caps.mall_size_total / (1024 * 1024);
+
+ if (dc->clk_mgr->funcs->is_smu_present &&
+ dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr)) {
+ dcn401_convert_dc_clock_table_to_soc_bb_clock_table(&soc_bb->clk_table,
+ dc->clk_mgr->bw_params,
+ config->use_clock_dc_limits);
+ }
+}
+
+void dcn401_update_soc_bb_with_values_from_vbios(struct dml2_soc_bb *soc_bb, const struct dc *dc)
+{
+ soc_bb->dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000;
+ soc_bb->xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000;
+
+ /* latencies in vbios are platform specific and should be used if provided */
+ if (dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns)
+ soc_bb->power_management_parameters.dram_clk_change_blackout_us =
+ dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns / 10.0;
+
+ if (dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns)
+ soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us =
+ dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns / 10.0;
+
+ if (dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns)
+ soc_bb->power_management_parameters.stutter_exit_latency_us =
+ dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns / 10.0;
+}
+
+void dcn401_update_soc_bb_with_values_from_software_policy(struct dml2_soc_bb *soc_bb, const struct dc *dc)
+{
+ /* set if the value is provided */
+ if (dc->bb_overrides.sr_exit_time_ns)
+ soc_bb->power_management_parameters.stutter_exit_latency_us =
+ dc->bb_overrides.sr_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_time_ns)
+ soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+
+ if (dc->bb_overrides.dram_clock_change_latency_ns)
+ soc_bb->power_management_parameters.dram_clk_change_blackout_us =
+ dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+
+ if (dc->bb_overrides.fclk_clock_change_latency_ns)
+ soc_bb->power_management_parameters.fclk_change_blackout_us =
+ dc->bb_overrides.fclk_clock_change_latency_ns / 1000.0;
+
+ //Z8 values not expected nor used on DCN401 but still added for completeness
+ if (dc->bb_overrides.sr_exit_z8_time_ns)
+ soc_bb->power_management_parameters.z8_stutter_exit_latency_us =
+ dc->bb_overrides.sr_exit_z8_time_ns / 1000.0;
+
+ if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns)
+ soc_bb->power_management_parameters.z8_stutter_enter_plus_exit_latency_us =
+ dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0;
+}
+
+static void apply_soc_bb_updates(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config)
+{
+ /* Individual modification can be overwritten even if it was obtained by a previous function.
+ * Modifications are acquired in order of priority (lowest to highest).
+ */
+ dc_assert_fp_enabled();
+
+ dcn401_update_soc_bb_with_values_from_clk_mgr(soc_bb, dc, config);
+ dcn401_update_soc_bb_with_values_from_vbios(soc_bb, dc);
+ dcn401_update_soc_bb_with_values_from_software_policy(soc_bb, dc);
+}
+
+void dcn401_get_soc_bb(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config)
+{
+ //get default soc_bb with static values
+ get_default_soc_bb(soc_bb);
+ //update soc_bb values with more accurate values
+ apply_soc_bb_updates(soc_bb, dc, config);
+}
+
+static void dcn401_get_ip_caps(struct dml2_ip_capabilities *ip_caps)
+{
+ *ip_caps = dml2_dcn401_max_ip_caps;
+}
+
+static struct soc_and_ip_translator_funcs dcn401_translator_funcs = {
+ .get_soc_bb = dcn401_get_soc_bb,
+ .get_ip_caps = dcn401_get_ip_caps,
+};
+
+void dcn401_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator)
+{
+ soc_and_ip_translator->translator_funcs = &dcn401_translator_funcs;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h
new file mode 100644
index 000000000000..21d842857601
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#ifndef _DCN401_SOC_AND_IP_TRANSLATOR_H_
+#define _DCN401_SOC_AND_IP_TRANSLATOR_H_
+
+#include "core_types.h"
+#include "dc.h"
+#include "clk_mgr.h"
+#include "soc_and_ip_translator.h"
+#include "dml2/dml21/inc/dml_top_soc_parameter_types.h"
+
+void dcn401_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator);
+
+/* Functions that can be re-used by higher DCN revisions of this component */
+void dcn401_get_soc_bb(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config);
+void dcn401_update_soc_bb_with_values_from_clk_mgr(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config);
+void dcn401_update_soc_bb_with_values_from_vbios(struct dml2_soc_bb *soc_bb, const struct dc *dc);
+void dcn401_update_soc_bb_with_values_from_software_policy(struct dml2_soc_bb *soc_bb, const struct dc *dc);
+
+#endif /* _DCN401_SOC_AND_IP_TRANSLATOR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c
new file mode 100644
index 000000000000..c9e224d262c9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#include "dcn42_soc_and_ip_translator.h"
+#include "soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h"
+#include "bounding_boxes/dcn42_soc_bb.h"
+
+/* soc_and_ip_translator component used to get up-to-date values for bounding box.
+ * Bounding box values are stored in several locations and locations can vary with DCN revision.
+ * This component provides an interface to get DCN-specific bounding box values.
+ */
+
+static void dcn42_get_ip_caps(struct dml2_ip_capabilities *ip_caps)
+{
+ *ip_caps = dml2_dcn42_max_ip_caps;
+}
+
+static struct soc_and_ip_translator_funcs dcn42_translator_funcs = {
+ .get_soc_bb = dcn401_get_soc_bb,
+ .get_ip_caps = dcn42_get_ip_caps,
+};
+
+void dcn42_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator)
+{
+ soc_and_ip_translator->translator_funcs = &dcn42_translator_funcs;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h
new file mode 100644
index 000000000000..914dcbb369a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#ifndef _DCN42_SOC_AND_IP_TRANSLATOR_H_
+#define _DCN42_SOC_AND_IP_TRANSLATOR_H_
+
+#include "core_types.h"
+#include "dc.h"
+#include "clk_mgr.h"
+#include "dml_top_soc_parameter_types.h"
+#include "soc_and_ip_translator.h"
+
+void dcn42_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator);
+
+#endif /* _DCN42_SOC_AND_IP_TRANSLATOR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c
new file mode 100644
index 000000000000..0fc0e5a6c171
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2025 Advanced Micro Devices, Inc.
+
+#include "soc_and_ip_translator.h"
+#include "soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h"
+
+static void dc_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator,
+ enum dce_version dc_version)
+{
+ switch (dc_version) {
+ case DCN_VERSION_4_01:
+ dcn401_construct_soc_and_ip_translator(soc_and_ip_translator);
+ break;
+ default:
+ break;
+ }
+}
+
+struct soc_and_ip_translator *dc_create_soc_and_ip_translator(enum dce_version dc_version)
+{
+ struct soc_and_ip_translator *soc_and_ip_translator;
+
+ soc_and_ip_translator = kzalloc(sizeof(*soc_and_ip_translator), GFP_KERNEL);
+ if (!soc_and_ip_translator)
+ return NULL;
+
+ dc_construct_soc_and_ip_translator(soc_and_ip_translator, dc_version);
+
+ return soc_and_ip_translator;
+}
+
+void dc_destroy_soc_and_ip_translator(struct soc_and_ip_translator **soc_and_ip_translator)
+{
+ kfree(*soc_and_ip_translator);
+ *soc_and_ip_translator = NULL;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/Makefile b/drivers/gpu/drm/amd/display/dc/sspl/Makefile
new file mode 100644
index 000000000000..5e3e4aa13820
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/Makefile
@@ -0,0 +1,33 @@
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Makefile for the 'spl' sub-component of DAL.
+# It provides the scaling library interface.
+
+SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o spl_fixpt31_32.o spl_custom_float.o
+
+AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/sspl/,$(SPL))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_SPL)
+
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
new file mode 100644
index 000000000000..b1fb0f8a253a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c
@@ -0,0 +1,1910 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dc_spl.h"
+#include "dc_spl_scl_easf_filters.h"
+#include "dc_spl_isharp_filters.h"
+#include "spl_debug.h"
+
+#define IDENTITY_RATIO(ratio) (spl_fixpt_u3d19(ratio) == (1 << 19))
+#define MIN_VIEWPORT_SIZE 12
+
+static bool spl_is_yuv420(enum spl_pixel_format format)
+{
+ if ((format >= SPL_PIXEL_FORMAT_420BPP8) &&
+ (format <= SPL_PIXEL_FORMAT_420BPP10))
+ return true;
+
+ return false;
+}
+
+static bool spl_is_rgb8(enum spl_pixel_format format)
+{
+ if (format == SPL_PIXEL_FORMAT_ARGB8888)
+ return true;
+
+ return false;
+}
+
+static bool spl_is_video_format(enum spl_pixel_format format)
+{
+ if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN
+ && format <= SPL_PIXEL_FORMAT_VIDEO_END)
+ return true;
+ else
+ return false;
+}
+
+static bool spl_is_subsampled_format(enum spl_pixel_format format)
+{
+ if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN
+ && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END)
+ return true;
+ else
+ return false;
+}
+
+static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1)
+{
+ struct spl_rect rec;
+ int r0_x_end = r0->x + r0->width;
+ int r1_x_end = r1->x + r1->width;
+ int r0_y_end = r0->y + r0->height;
+ int r1_y_end = r1->y + r1->height;
+
+ rec.x = r0->x > r1->x ? r0->x : r1->x;
+ rec.width = r0_x_end > r1_x_end ? r1_x_end - rec.x : r0_x_end - rec.x;
+ rec.y = r0->y > r1->y ? r0->y : r1->y;
+ rec.height = r0_y_end > r1_y_end ? r1_y_end - rec.y : r0_y_end - rec.y;
+
+ /* in case that there is no intersection */
+ if (rec.width < 0 || rec.height < 0)
+ memset(&rec, 0, sizeof(rec));
+
+ return rec;
+}
+
+static struct spl_rect shift_rec(const struct spl_rect *rec_in, int x, int y)
+{
+ struct spl_rect rec_out = *rec_in;
+
+ rec_out.x += x;
+ rec_out.y += y;
+
+ return rec_out;
+}
+
+static void spl_opp_adjust_rect(struct spl_rect *rec, const struct spl_opp_adjust *adjust)
+{
+ if ((rec->x + adjust->x) >= 0)
+ rec->x += adjust->x;
+
+ if ((rec->y + adjust->y) >= 0)
+ rec->y += adjust->y;
+
+ if ((rec->width + adjust->width) >= 1)
+ rec->width += adjust->width;
+
+ if ((rec->height + adjust->height) >= 1)
+ rec->height += adjust->height;
+}
+
+static struct spl_rect calculate_plane_rec_in_timing_active(
+ struct spl_in *spl_in,
+ const struct spl_rect *rec_in)
+{
+ /*
+ * The following diagram shows an example where we map a 1920x1200
+ * desktop to a 2560x1440 timing with a plane rect in the middle
+ * of the screen. To map a plane rect from Stream Source to Timing
+ * Active space, we first multiply stream scaling ratios (i.e 2304/1920
+ * horizontal and 1440/1200 vertical) to the plane's x and y, then
+ * we add stream destination offsets (i.e 128 horizontal, 0 vertical).
+ * This will give us a plane rect's position in Timing Active. However
+ * we have to remove the fractional. The rule is that we find left/right
+ * and top/bottom positions and round the value to the adjacent integer.
+ *
+ * Stream Source Space
+ * ------------
+ * __________________________________________________
+ * |Stream Source (1920 x 1200) ^ |
+ * | y |
+ * | <------- w --------|> |
+ * | __________________V |
+ * |<-- x -->|Plane//////////////| ^ |
+ * | |(pre scale)////////| | |
+ * | |///////////////////| | |
+ * | |///////////////////| h |
+ * | |///////////////////| | |
+ * | |///////////////////| | |
+ * | |///////////////////| V |
+ * | |
+ * | |
+ * |__________________________________________________|
+ *
+ *
+ * Timing Active Space
+ * ---------------------------------
+ *
+ * Timing Active (2560 x 1440)
+ * __________________________________________________
+ * |*****| Stteam Destination (2304 x 1440) |*****|
+ * |*****| |*****|
+ * |<128>| |*****|
+ * |*****| __________________ |*****|
+ * |*****| |Plane/////////////| |*****|
+ * |*****| |(post scale)//////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |//////////////////| |*****|
+ * |*****| |*****|
+ * |*****| |*****|
+ * |*****| |*****|
+ * |*****|______________________________________|*****|
+ *
+ * So the resulting formulas are shown below:
+ *
+ * recout_x = 128 + round(plane_x * 2304 / 1920)
+ * recout_w = 128 + round((plane_x + plane_w) * 2304 / 1920) - recout_x
+ * recout_y = 0 + round(plane_y * 1440 / 1200)
+ * recout_h = 0 + round((plane_y + plane_h) * 1440 / 1200) - recout_y
+ *
+ * NOTE: fixed point division is not error free. To reduce errors
+ * introduced by fixed point division, we divide only after
+ * multiplication is complete.
+ */
+ const struct spl_rect *stream_src = &spl_in->basic_out.src_rect;
+ const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect;
+ struct spl_rect rec_out = {0};
+ struct spl_fixed31_32 temp;
+
+
+ temp = spl_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width,
+ stream_src->width);
+ rec_out.x = stream_dst->x + spl_fixpt_round(temp);
+
+ temp = spl_fixpt_from_fraction(
+ (rec_in->x + rec_in->width) * (long long)stream_dst->width,
+ stream_src->width);
+ rec_out.width = stream_dst->x + spl_fixpt_round(temp) - rec_out.x;
+
+ temp = spl_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height,
+ stream_src->height);
+ rec_out.y = stream_dst->y + spl_fixpt_round(temp);
+
+ temp = spl_fixpt_from_fraction(
+ (rec_in->y + rec_in->height) * (long long)stream_dst->height,
+ stream_src->height);
+ rec_out.height = stream_dst->y + spl_fixpt_round(temp) - rec_out.y;
+
+ return rec_out;
+}
+
+static struct spl_rect calculate_mpc_slice_in_timing_active(
+ struct spl_in *spl_in,
+ struct spl_rect *plane_clip_rec)
+{
+ bool use_recout_width_aligned =
+ spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned;
+ int mpc_slice_count =
+ spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices;
+ int recout_width_align =
+ spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_recout_width_align;
+ int mpc_slice_idx = spl_in->basic_in.mpc_h_slice_index;
+ int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1;
+ struct spl_rect mpc_rec;
+
+ if (spl_in->basic_in.custom_width != 0) {
+ mpc_rec.width = spl_in->basic_in.custom_width;
+ mpc_rec.x = spl_in->basic_in.custom_x;
+ mpc_rec.height = plane_clip_rec->height;
+ mpc_rec.y = plane_clip_rec->y;
+ } else if (use_recout_width_aligned) {
+ mpc_rec.width = recout_width_align;
+ if ((mpc_rec.width * (mpc_slice_idx + 1)) > plane_clip_rec->width) {
+ mpc_rec.width = plane_clip_rec->width % recout_width_align;
+ mpc_rec.x = plane_clip_rec->x + recout_width_align * mpc_slice_idx;
+ } else
+ mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx;
+ mpc_rec.height = plane_clip_rec->height;
+ mpc_rec.y = plane_clip_rec->y;
+
+ } else {
+ mpc_rec.width = plane_clip_rec->width / mpc_slice_count;
+ mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx;
+ mpc_rec.height = plane_clip_rec->height;
+ mpc_rec.y = plane_clip_rec->y;
+ }
+ SPL_ASSERT(mpc_slice_count == 1 ||
+ spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE ||
+ mpc_rec.width % 2 == 0);
+
+ /* extra pixels in the division remainder need to go to pipes after
+ * the extra pixel index minus one(epimo) defined here as:
+ */
+ if (mpc_slice_idx > epimo && spl_in->basic_in.custom_width == 0) {
+ mpc_rec.x += mpc_slice_idx - epimo - 1;
+ mpc_rec.width += 1;
+ }
+
+ if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) {
+ SPL_ASSERT(mpc_rec.height % 2 == 0);
+ mpc_rec.height /= 2;
+ }
+ return mpc_rec;
+}
+
+static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_in)
+{
+ int odm_slice_count = spl_in->basic_out.odm_combine_factor;
+ int odm_slice_idx = spl_in->odm_slice_index;
+ bool is_last_odm_slice = (odm_slice_idx + 1) == odm_slice_count;
+ int h_active = spl_in->basic_out.output_size.width;
+ int v_active = spl_in->basic_out.output_size.height;
+ int odm_slice_width;
+ struct spl_rect odm_rec;
+
+ if (spl_in->basic_out.odm_combine_factor > 0) {
+ odm_slice_width = h_active / odm_slice_count;
+ /*
+ * deprecated, caller must pass in odm slice rect i.e OPP input
+ * rect in timing active for the new interface.
+ */
+ if (spl_in->basic_out.use_two_pixels_per_container && (odm_slice_width % 2))
+ odm_slice_width++;
+
+ odm_rec.x = odm_slice_width * odm_slice_idx;
+ odm_rec.width = is_last_odm_slice ?
+ /* last slice width is the reminder of h_active */
+ h_active - odm_slice_width * (odm_slice_count - 1) :
+ /* odm slice width is the floor of h_active / count */
+ odm_slice_width;
+ odm_rec.y = 0;
+ odm_rec.height = v_active;
+
+ return odm_rec;
+ }
+
+ return spl_in->basic_out.odm_slice_rect;
+}
+
+static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out)
+{
+ /*
+ * A plane clip represents the desired plane size and position in Stream
+ * Source Space. Stream Source is the destination where all planes are
+ * blended (i.e. positioned, scaled and overlaid). It is a canvas where
+ * all planes associated with the current stream are drawn together.
+ * After Stream Source is completed, we will further scale and
+ * reposition the entire canvas of the stream source to Stream
+ * Destination in Timing Active Space. This could be due to display
+ * overscan adjustment where we will need to rescale and reposition all
+ * the planes so they can fit into a TV with overscan or downscale
+ * upscale features such as GPU scaling or VSR.
+ *
+ * This two step blending is a virtual procedure in software. In
+ * hardware there is no such thing as Stream Source. all planes are
+ * blended once in Timing Active Space. Software virtualizes a Stream
+ * Source space to decouple the math complicity so scaling param
+ * calculation focuses on one step at a time.
+ *
+ * In the following two diagrams, user applied 10% overscan adjustment
+ * so the Stream Source needs to be scaled down a little before mapping
+ * to Timing Active Space. As a result the Plane Clip is also scaled
+ * down by the same ratio, Plane Clip position (i.e. x and y) with
+ * respect to Stream Source is also scaled down. To map it in Timing
+ * Active Space additional x and y offsets from Stream Destination are
+ * added to Plane Clip as well.
+ *
+ * Stream Source Space
+ * ------------
+ * __________________________________________________
+ * |Stream Source (3840 x 2160) ^ |
+ * | y |
+ * | | |
+ * | __________________V |
+ * |<-- x -->|Plane Clip/////////| |
+ * | |(pre scale)////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |///////////////////| |
+ * | |
+ * | |
+ * |__________________________________________________|
+ *
+ *
+ * Timing Active Space (3840 x 2160)
+ * ---------------------------------
+ *
+ * Timing Active
+ * __________________________________________________
+ * | y_____________________________________________ |
+ * |x |Stream Destination (3456 x 1944) | |
+ * | | | |
+ * | | __________________ | |
+ * | | |Plane Clip////////| | |
+ * | | |(post scale)//////| | |
+ * | | |//////////////////| | |
+ * | | |//////////////////| | |
+ * | | |//////////////////| | |
+ * | | |//////////////////| | |
+ * | | | |
+ * | | | |
+ * | |____________________________________________| |
+ * |__________________________________________________|
+ *
+ *
+ * In Timing Active Space a plane clip could be further sliced into
+ * pieces called MPC slices. Each Pipe Context is responsible for
+ * processing only one MPC slice so the plane processing workload can be
+ * distributed to multiple DPP Pipes. MPC slices could be blended
+ * together to a single ODM slice. Each ODM slice is responsible for
+ * processing a portion of Timing Active divided horizontally so the
+ * output pixel processing workload can be distributed to multiple OPP
+ * pipes. All ODM slices are mapped together in ODM block so all MPC
+ * slices belong to different ODM slices could be pieced together to
+ * form a single image in Timing Active. MPC slices must belong to
+ * single ODM slice. If an MPC slice goes across ODM slice boundary, it
+ * needs to be divided into two MPC slices one for each ODM slice.
+ *
+ * In the following diagram the output pixel processing workload is
+ * divided horizontally into two ODM slices one for each OPP blend tree.
+ * OPP0 blend tree is responsible for processing left half of Timing
+ * Active, while OPP2 blend tree is responsible for processing right
+ * half.
+ *
+ * The plane has two MPC slices. However since the right MPC slice goes
+ * across ODM boundary, two DPP pipes are needed one for each OPP blend
+ * tree. (i.e. DPP1 for OPP0 blend tree and DPP2 for OPP2 blend tree).
+ *
+ * Assuming that we have a Pipe Context associated with OPP0 and DPP1
+ * working on processing the plane in the diagram. We want to know the
+ * width and height of the shaded rectangle and its relative position
+ * with respect to the ODM slice0. This is called the recout of the pipe
+ * context.
+ *
+ * Planes can be at arbitrary size and position and there could be an
+ * arbitrary number of MPC and ODM slices. The algorithm needs to take
+ * all scenarios into account.
+ *
+ * Timing Active Space (3840 x 2160)
+ * ---------------------------------
+ *
+ * Timing Active
+ * __________________________________________________
+ * |OPP0(ODM slice0)^ |OPP2(ODM slice1) |
+ * | y | |
+ * | | <- w -> |
+ * | _____V________|____ |
+ * | |DPP0 ^ |DPP1 |DPP2| |
+ * |<------ x |-----|->|/////| | |
+ * | | | |/////| | |
+ * | | h |/////| | |
+ * | | | |/////| | |
+ * | |_____V__|/////|____| |
+ * | | |
+ * | | |
+ * | | |
+ * |_________________________|________________________|
+ *
+ *
+ */
+ struct spl_rect plane_clip;
+ struct spl_rect mpc_slice_of_plane_clip;
+ struct spl_rect odm_slice;
+ struct spl_rect overlapping_area;
+
+ plane_clip = calculate_plane_rec_in_timing_active(spl_in,
+ &spl_in->basic_in.clip_rect);
+ /* guard plane clip from drawing beyond stream dst here */
+ plane_clip = intersect_rec(&plane_clip,
+ &spl_in->basic_out.dst_rect);
+ mpc_slice_of_plane_clip = calculate_mpc_slice_in_timing_active(
+ spl_in, &plane_clip);
+ odm_slice = calculate_odm_slice_in_timing_active(spl_in);
+ overlapping_area = intersect_rec(&mpc_slice_of_plane_clip, &odm_slice);
+
+ if (overlapping_area.height > 0 &&
+ overlapping_area.width > 0) {
+ /* shift the overlapping area so it is with respect to current
+ * ODM slice's position
+ */
+ spl_scratch->scl_data.recout = shift_rec(
+ &overlapping_area,
+ -odm_slice.x, -odm_slice.y);
+ spl_scratch->scl_data.recout.height -=
+ spl_in->debug.visual_confirm_base_offset;
+ spl_scratch->scl_data.recout.height -=
+ spl_in->debug.visual_confirm_dpp_offset;
+ } else
+ /* if there is no overlap, zero recout */
+ memset(&spl_scratch->scl_data.recout, 0,
+ sizeof(struct spl_rect));
+}
+
+/* Calculate scaling ratios */
+static void spl_calculate_scaling_ratios(struct spl_in *spl_in,
+ struct spl_scratch *spl_scratch,
+ struct spl_out *spl_out)
+{
+ const int in_w = spl_in->basic_out.src_rect.width;
+ const int in_h = spl_in->basic_out.src_rect.height;
+ const int out_w = spl_in->basic_out.dst_rect.width;
+ const int out_h = spl_in->basic_out.dst_rect.height;
+ struct spl_rect surf_src = spl_in->basic_in.src_rect;
+
+ /*Swap surf_src height and width since scaling ratios are in recout rotation*/
+ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 ||
+ spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270)
+ spl_swap(surf_src.height, surf_src.width);
+
+ spl_scratch->scl_data.ratios.horz = spl_fixpt_from_fraction(
+ surf_src.width,
+ spl_in->basic_in.dst_rect.width);
+ spl_scratch->scl_data.ratios.vert = spl_fixpt_from_fraction(
+ surf_src.height,
+ spl_in->basic_in.dst_rect.height);
+
+ if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE)
+ spl_scratch->scl_data.ratios.horz.value *= 2;
+ else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM)
+ spl_scratch->scl_data.ratios.vert.value *= 2;
+
+ spl_scratch->scl_data.ratios.vert.value = spl_div64_s64(
+ spl_scratch->scl_data.ratios.vert.value * in_h, out_h);
+ spl_scratch->scl_data.ratios.horz.value = spl_div64_s64(
+ spl_scratch->scl_data.ratios.horz.value * in_w, out_w);
+
+ spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz;
+ spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert;
+
+ if (spl_is_yuv420(spl_in->basic_in.format)) {
+ spl_scratch->scl_data.ratios.horz_c.value /= 2;
+ spl_scratch->scl_data.ratios.vert_c.value /= 2;
+ }
+ spl_scratch->scl_data.ratios.horz = spl_fixpt_truncate(
+ spl_scratch->scl_data.ratios.horz, 19);
+ spl_scratch->scl_data.ratios.vert = spl_fixpt_truncate(
+ spl_scratch->scl_data.ratios.vert, 19);
+ spl_scratch->scl_data.ratios.horz_c = spl_fixpt_truncate(
+ spl_scratch->scl_data.ratios.horz_c, 19);
+ spl_scratch->scl_data.ratios.vert_c = spl_fixpt_truncate(
+ spl_scratch->scl_data.ratios.vert_c, 19);
+
+ /*
+ * Coefficient table and some registers are different based on ratio
+ * that is output/input. Currently we calculate input/output
+ * Store 1/ratio in recip_ratio for those lookups
+ */
+ spl_scratch->scl_data.recip_ratios.horz = spl_fixpt_recip(
+ spl_scratch->scl_data.ratios.horz);
+ spl_scratch->scl_data.recip_ratios.vert = spl_fixpt_recip(
+ spl_scratch->scl_data.ratios.vert);
+ spl_scratch->scl_data.recip_ratios.horz_c = spl_fixpt_recip(
+ spl_scratch->scl_data.ratios.horz_c);
+ spl_scratch->scl_data.recip_ratios.vert_c = spl_fixpt_recip(
+ spl_scratch->scl_data.ratios.vert_c);
+}
+
+/* Calculate Viewport size */
+static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_scratch *spl_scratch)
+{
+ spl_scratch->scl_data.viewport.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz,
+ spl_scratch->scl_data.recout.width));
+ spl_scratch->scl_data.viewport.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert,
+ spl_scratch->scl_data.recout.height));
+ spl_scratch->scl_data.viewport_c.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz_c,
+ spl_scratch->scl_data.recout.width));
+ spl_scratch->scl_data.viewport_c.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert_c,
+ spl_scratch->scl_data.recout.height));
+ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 ||
+ spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) {
+ spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height);
+ spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height);
+ }
+}
+
+static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation,
+ bool horizontal_mirror,
+ bool *orthogonal_rotation,
+ bool *flip_vert_scan_dir,
+ bool *flip_horz_scan_dir)
+{
+ *orthogonal_rotation = false;
+ *flip_vert_scan_dir = false;
+ *flip_horz_scan_dir = false;
+ if (rotation == SPL_ROTATION_ANGLE_180) {
+ *flip_vert_scan_dir = true;
+ *flip_horz_scan_dir = true;
+ } else if (rotation == SPL_ROTATION_ANGLE_90) {
+ *orthogonal_rotation = true;
+ *flip_horz_scan_dir = true;
+ } else if (rotation == SPL_ROTATION_ANGLE_270) {
+ *orthogonal_rotation = true;
+ *flip_vert_scan_dir = true;
+ }
+
+ if (horizontal_mirror)
+ *flip_horz_scan_dir = !*flip_horz_scan_dir;
+}
+
+/*
+ * We completely calculate vp offset, size and inits here based entirely on scaling
+ * ratios and recout for pixel perfect pipe combine.
+ */
+static void spl_calculate_init_and_vp(bool flip_scan_dir,
+ int recout_offset_within_recout_full,
+ int recout_size,
+ int src_size,
+ int taps,
+ struct spl_fixed31_32 ratio,
+ struct spl_fixed31_32 init_adj,
+ struct spl_fixed31_32 *init,
+ int *vp_offset,
+ int *vp_size)
+{
+ struct spl_fixed31_32 temp;
+ int int_part;
+
+ /*
+ * First of the taps starts sampling pixel number <init_int_part> corresponding to recout
+ * pixel 1. Next recout pixel samples int part of <init + scaling ratio> and so on.
+ * All following calculations are based on this logic.
+ *
+ * Init calculated according to formula:
+ * init = (scaling_ratio + number_of_taps + 1) / 2
+ * init_bot = init + scaling_ratio
+ * to get pixel perfect combine add the fraction from calculating vp offset
+ */
+ temp = spl_fixpt_mul_int(ratio, recout_offset_within_recout_full);
+ *vp_offset = spl_fixpt_floor(temp);
+ temp.value &= 0xffffffff;
+ *init = spl_fixpt_add(spl_fixpt_div_int(spl_fixpt_add_int(ratio, taps + 1), 2), temp);
+ *init = spl_fixpt_add(*init, init_adj);
+ *init = spl_fixpt_truncate(*init, 19);
+
+ /*
+ * If viewport has non 0 offset and there are more taps than covered by init then
+ * we should decrease the offset and increase init so we are never sampling
+ * outside of viewport.
+ */
+ int_part = spl_fixpt_floor(*init);
+ if (int_part < taps) {
+ int_part = taps - int_part;
+ if (int_part > *vp_offset)
+ int_part = *vp_offset;
+ *vp_offset -= int_part;
+ *init = spl_fixpt_add_int(*init, int_part);
+ }
+ /*
+ * If taps are sampling outside of viewport at end of recout and there are more pixels
+ * available in the surface we should increase the viewport size, regardless set vp to
+ * only what is used.
+ */
+ temp = spl_fixpt_add(*init, spl_fixpt_mul_int(ratio, recout_size - 1));
+ *vp_size = spl_fixpt_floor(temp);
+ if (*vp_size + *vp_offset > src_size)
+ *vp_size = src_size - *vp_offset;
+
+ /* We did all the math assuming we are scanning same direction as display does,
+ * however mirror/rotation changes how vp scans vs how it is offset. If scan direction
+ * is flipped we simply need to calculate offset from the other side of plane.
+ * Note that outside of viewport all scaling hardware works in recout space.
+ */
+ if (flip_scan_dir)
+ *vp_offset = src_size - *vp_offset - *vp_size;
+}
+
+/*Calculate inits and viewport */
+static void spl_calculate_inits_and_viewports(struct spl_in *spl_in,
+ struct spl_scratch *spl_scratch)
+{
+ struct spl_rect src = spl_in->basic_in.src_rect;
+ struct spl_rect recout_dst_in_active_timing;
+ struct spl_rect recout_clip_in_active_timing;
+ struct spl_rect recout_clip_in_recout_dst;
+ struct spl_rect overlap_in_active_timing;
+ struct spl_rect odm_slice = calculate_odm_slice_in_timing_active(spl_in);
+ int vpc_div = spl_is_subsampled_format(spl_in->basic_in.format) ? 2 : 1;
+ bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir;
+ struct spl_fixed31_32 init_adj_h = spl_fixpt_zero;
+ struct spl_fixed31_32 init_adj_v = spl_fixpt_zero;
+
+ recout_clip_in_active_timing = shift_rec(
+ &spl_scratch->scl_data.recout, odm_slice.x, odm_slice.y);
+ recout_dst_in_active_timing = calculate_plane_rec_in_timing_active(
+ spl_in, &spl_in->basic_in.dst_rect);
+ overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing,
+ &recout_dst_in_active_timing);
+ if (overlap_in_active_timing.width > 0 &&
+ overlap_in_active_timing.height > 0)
+ recout_clip_in_recout_dst = shift_rec(&overlap_in_active_timing,
+ -recout_dst_in_active_timing.x,
+ -recout_dst_in_active_timing.y);
+ else
+ memset(&recout_clip_in_recout_dst, 0, sizeof(struct spl_rect));
+ /*
+ * Work in recout rotation since that requires less transformations
+ */
+ spl_get_vp_scan_direction(
+ spl_in->basic_in.rotation,
+ spl_in->basic_in.horizontal_mirror,
+ &orthogonal_rotation,
+ &flip_vert_scan_dir,
+ &flip_horz_scan_dir);
+
+ if (spl_is_subsampled_format(spl_in->basic_in.format)) {
+ /* this gives the direction of the cositing (negative will move
+ * left, right otherwise)
+ */
+ int h_sign = flip_horz_scan_dir ? -1 : 1;
+ int v_sign = flip_vert_scan_dir ? -1 : 1;
+
+ switch (spl_in->basic_in.cositing) {
+ case CHROMA_COSITING_TOPLEFT:
+ init_adj_h = spl_fixpt_from_fraction(h_sign, 4);
+ init_adj_v = spl_fixpt_from_fraction(v_sign, 4);
+ break;
+ case CHROMA_COSITING_LEFT:
+ init_adj_h = spl_fixpt_from_fraction(h_sign, 4);
+ init_adj_v = spl_fixpt_zero;
+ break;
+ case CHROMA_COSITING_NONE:
+ default:
+ init_adj_h = spl_fixpt_zero;
+ init_adj_v = spl_fixpt_zero;
+ break;
+ }
+ }
+
+ if (orthogonal_rotation) {
+ spl_swap(src.width, src.height);
+ spl_swap(flip_vert_scan_dir, flip_horz_scan_dir);
+ spl_swap(init_adj_h, init_adj_v);
+ }
+
+ spl_calculate_init_and_vp(
+ flip_horz_scan_dir,
+ recout_clip_in_recout_dst.x,
+ spl_scratch->scl_data.recout.width,
+ src.width,
+ spl_scratch->scl_data.taps.h_taps,
+ spl_scratch->scl_data.ratios.horz,
+ spl_fixpt_zero,
+ &spl_scratch->scl_data.inits.h,
+ &spl_scratch->scl_data.viewport.x,
+ &spl_scratch->scl_data.viewport.width);
+ spl_calculate_init_and_vp(
+ flip_horz_scan_dir,
+ recout_clip_in_recout_dst.x,
+ spl_scratch->scl_data.recout.width,
+ src.width / vpc_div,
+ spl_scratch->scl_data.taps.h_taps_c,
+ spl_scratch->scl_data.ratios.horz_c,
+ init_adj_h,
+ &spl_scratch->scl_data.inits.h_c,
+ &spl_scratch->scl_data.viewport_c.x,
+ &spl_scratch->scl_data.viewport_c.width);
+ spl_calculate_init_and_vp(
+ flip_vert_scan_dir,
+ recout_clip_in_recout_dst.y,
+ spl_scratch->scl_data.recout.height,
+ src.height,
+ spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.ratios.vert,
+ spl_fixpt_zero,
+ &spl_scratch->scl_data.inits.v,
+ &spl_scratch->scl_data.viewport.y,
+ &spl_scratch->scl_data.viewport.height);
+ spl_calculate_init_and_vp(
+ flip_vert_scan_dir,
+ recout_clip_in_recout_dst.y,
+ spl_scratch->scl_data.recout.height,
+ src.height / vpc_div,
+ spl_scratch->scl_data.taps.v_taps_c,
+ spl_scratch->scl_data.ratios.vert_c,
+ init_adj_v,
+ &spl_scratch->scl_data.inits.v_c,
+ &spl_scratch->scl_data.viewport_c.y,
+ &spl_scratch->scl_data.viewport_c.height);
+ if (orthogonal_rotation) {
+ spl_swap(spl_scratch->scl_data.viewport.x, spl_scratch->scl_data.viewport.y);
+ spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height);
+ spl_swap(spl_scratch->scl_data.viewport_c.x, spl_scratch->scl_data.viewport_c.y);
+ spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height);
+ }
+ spl_scratch->scl_data.viewport.x += src.x;
+ spl_scratch->scl_data.viewport.y += src.y;
+ SPL_ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0);
+ spl_scratch->scl_data.viewport_c.x += src.x / vpc_div;
+ spl_scratch->scl_data.viewport_c.y += src.y / vpc_div;
+}
+
+static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout)
+{
+ /*
+ * Handle side by side and top bottom 3d recout offsets after vp calculation
+ * since 3d is special and needs to calculate vp as if there is no recout offset
+ * This may break with rotation, good thing we aren't mixing hw rotation and 3d
+ */
+ if (spl_in->basic_in.mpc_h_slice_index) {
+ SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 ||
+ (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM &&
+ spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE));
+ if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM)
+ recout->y += recout->height;
+ else if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE)
+ recout->x += recout->width;
+ }
+}
+
+static void spl_clamp_viewport(struct spl_rect *viewport, int min_viewport_size)
+{
+ if (min_viewport_size == 0)
+ min_viewport_size = MIN_VIEWPORT_SIZE;
+ /* Clamp minimum viewport size */
+ if (viewport->height < min_viewport_size)
+ viewport->height = min_viewport_size;
+ if (viewport->width < min_viewport_size)
+ viewport->width = min_viewport_size;
+}
+
+static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in,
+ const struct spl_scaler_data *data,
+ bool enable_isharp, bool enable_easf)
+{
+ const long long one = spl_fixpt_one.value;
+ enum spl_pixel_format pixel_format = spl_in->basic_in.format;
+
+ /* Bypass if ratio is 1:1 with no ISHARP or force scale on */
+ if (data->ratios.horz.value == one
+ && data->ratios.vert.value == one
+ && data->ratios.horz_c.value == one
+ && data->ratios.vert_c.value == one
+ && !spl_in->basic_out.always_scale
+ && !enable_isharp)
+ return SCL_MODE_SCALING_444_BYPASS;
+
+ if (!spl_is_subsampled_format(pixel_format)) {
+ if (spl_is_video_format(pixel_format))
+ return SCL_MODE_SCALING_444_YCBCR_ENABLE;
+ else
+ return SCL_MODE_SCALING_444_RGB_ENABLE;
+ }
+
+ /*
+ * Bypass YUV if Y is 1:1 with no ISHARP
+ * Do not bypass UV at 1:1 for cositing to be applied
+ */
+ if (!enable_isharp) {
+ if (data->ratios.horz.value == one && data->ratios.vert.value == one && !spl_in->basic_out.always_scale)
+ return SCL_MODE_SCALING_420_LUMA_BYPASS;
+ }
+
+ return SCL_MODE_SCALING_420_YCBCR_ENABLE;
+}
+
+static void spl_choose_lls_policy(enum spl_pixel_format format,
+ enum linear_light_scaling *lls_pref)
+{
+ if (spl_is_subsampled_format(format))
+ *lls_pref = LLS_PREF_NO;
+ else /* RGB or YUV444 */
+ *lls_pref = LLS_PREF_YES;
+}
+
+/* Enable EASF ?*/
+static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch)
+{
+ int vratio = 0;
+ int hratio = 0;
+ bool skip_easf = false;
+
+ if (spl_in->disable_easf)
+ skip_easf = true;
+
+ vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert);
+ hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz);
+
+ /*
+ * No EASF support for downscaling > 2:1
+ * EASF support for upscaling or downscaling up to 2:1
+ */
+ if ((vratio > 2) || (hratio > 2))
+ skip_easf = true;
+
+ /*
+ * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format
+ * to determine whether to use LINEAR or NONLINEAR scaling
+ */
+ if (spl_in->lls_pref == LLS_PREF_DONT_CARE)
+ spl_choose_lls_policy(spl_in->basic_in.format,
+ &spl_in->lls_pref);
+
+ /* Check for linear scaling or EASF preferred */
+ if (spl_in->lls_pref != LLS_PREF_YES && !spl_in->prefer_easf)
+ skip_easf = true;
+
+ return skip_easf;
+}
+
+/* Check if video is in fullscreen mode */
+static bool spl_is_video_fullscreen(struct spl_in *spl_in)
+{
+ if (spl_is_video_format(spl_in->basic_in.format) && spl_in->is_fullscreen)
+ return true;
+ return false;
+}
+
+static bool spl_get_isharp_en(struct spl_in *spl_in,
+ struct spl_scratch *spl_scratch)
+{
+ bool enable_isharp = false;
+ int vratio = 0;
+ int hratio = 0;
+ struct spl_taps taps = spl_scratch->scl_data.taps;
+ bool fullscreen = spl_is_video_fullscreen(spl_in);
+
+ /* Return if adaptive sharpness is disabled */
+ if (spl_in->adaptive_sharpness.enable == false)
+ return enable_isharp;
+
+ vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert);
+ hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz);
+
+ /* No iSHARP support for downscaling */
+ if (vratio > 1 || hratio > 1)
+ return enable_isharp;
+
+ // Scaling is up to 1:1 (no scaling) or upscaling
+
+ /*
+ * Apply sharpness to RGB and YUV (NV12/P010)
+ * surfaces based on policy setting
+ */
+ if (!spl_is_video_format(spl_in->basic_in.format) &&
+ (spl_in->sharpen_policy == SHARPEN_YUV))
+ return enable_isharp;
+ else if ((spl_is_video_format(spl_in->basic_in.format) && !fullscreen) &&
+ (spl_in->sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV))
+ return enable_isharp;
+ else if (!spl_in->is_fullscreen &&
+ spl_in->sharpen_policy == SHARPEN_FULLSCREEN_ALL)
+ return enable_isharp;
+
+ /*
+ * Apply sharpness if supports horizontal taps 4,6 AND
+ * vertical taps 3, 4, 6
+ */
+ if ((taps.h_taps == 4 || taps.h_taps == 6) &&
+ (taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6))
+ enable_isharp = true;
+
+ return enable_isharp;
+}
+
+/* Calculate number of tap with adaptive scaling off */
+static void spl_get_taps_non_adaptive_scaler(
+ struct spl_scratch *spl_scratch,
+ const struct spl_taps *in_taps,
+ bool is_subsampled)
+{
+ bool check_max_downscale = false;
+
+ if (in_taps->h_taps == 0) {
+ if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1)
+ spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil(
+ spl_scratch->scl_data.ratios.horz), 8);
+ else
+ spl_scratch->scl_data.taps.h_taps = 4;
+ } else
+ spl_scratch->scl_data.taps.h_taps = in_taps->h_taps;
+
+ if (in_taps->v_taps == 0) {
+ if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1)
+ spl_scratch->scl_data.taps.v_taps = spl_min(2 * spl_fixpt_ceil(
+ spl_scratch->scl_data.ratios.vert), 8);
+ else
+ spl_scratch->scl_data.taps.v_taps = 4;
+ } else
+ spl_scratch->scl_data.taps.v_taps = in_taps->v_taps;
+
+ if (in_taps->v_taps_c == 0) {
+ if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1)
+ spl_scratch->scl_data.taps.v_taps_c = spl_min(2 * spl_fixpt_ceil(
+ spl_scratch->scl_data.ratios.vert_c), 8);
+ else
+ spl_scratch->scl_data.taps.v_taps_c = 4;
+ } else
+ spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c;
+
+ if (in_taps->h_taps_c == 0) {
+ if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1)
+ spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil(
+ spl_scratch->scl_data.ratios.horz_c), 8);
+ else
+ spl_scratch->scl_data.taps.h_taps_c = 4;
+ } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1)
+ /* Only 1 and even h_taps_c are supported by hw */
+ spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1;
+ else
+ spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c;
+
+
+ /*
+ * Max downscale supported is 6.0x. Add ASSERT to catch if go beyond that
+ */
+ check_max_downscale = spl_fixpt_le(spl_scratch->scl_data.ratios.horz,
+ spl_fixpt_from_fraction(6, 1));
+ SPL_ASSERT(check_max_downscale);
+ check_max_downscale = spl_fixpt_le(spl_scratch->scl_data.ratios.vert,
+ spl_fixpt_from_fraction(6, 1));
+ SPL_ASSERT(check_max_downscale);
+ check_max_downscale = spl_fixpt_le(spl_scratch->scl_data.ratios.horz_c,
+ spl_fixpt_from_fraction(6, 1));
+ SPL_ASSERT(check_max_downscale);
+ check_max_downscale = spl_fixpt_le(spl_scratch->scl_data.ratios.vert_c,
+ spl_fixpt_from_fraction(6, 1));
+ SPL_ASSERT(check_max_downscale);
+
+
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz))
+ spl_scratch->scl_data.taps.h_taps = 1;
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))
+ spl_scratch->scl_data.taps.v_taps = 1;
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_subsampled)
+ spl_scratch->scl_data.taps.h_taps_c = 1;
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_subsampled)
+ spl_scratch->scl_data.taps.v_taps_c = 1;
+
+}
+
+/* Calculate optimal number of taps */
+static bool spl_get_optimal_number_of_taps(
+ int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch,
+ const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h,
+ bool *enable_isharp)
+{
+ int num_part_y, num_part_c;
+ unsigned int max_taps_y, max_taps_c;
+ unsigned int min_taps_y, min_taps_c;
+ enum lb_memory_config lb_config;
+ bool skip_easf = false;
+ bool is_subsampled = spl_is_subsampled_format(spl_in->basic_in.format);
+
+ if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active &&
+ max_downscale_src_width != 0 &&
+ spl_scratch->scl_data.viewport.width > max_downscale_src_width) {
+ spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, is_subsampled);
+ *enable_easf_v = false;
+ *enable_easf_h = false;
+ *enable_isharp = false;
+ return false;
+ }
+
+ /* Disable adaptive scaler and sharpener when integer scaling is enabled */
+ if (spl_in->scaling_quality.integer_scaling) {
+ spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, is_subsampled);
+ *enable_easf_v = false;
+ *enable_easf_h = false;
+ *enable_isharp = false;
+ return true;
+ }
+
+ /* Check if we are using EASF or not */
+ skip_easf = enable_easf(spl_in, spl_scratch);
+
+ /*
+ * Set default taps if none are provided
+ * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling
+ * taps = 4 for upscaling
+ */
+ if (skip_easf) {
+ spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, is_subsampled);
+ }
+ else {
+ if (spl_is_video_format(spl_in->basic_in.format)) {
+ spl_scratch->scl_data.taps.h_taps = 6;
+ spl_scratch->scl_data.taps.v_taps = 6;
+ spl_scratch->scl_data.taps.h_taps_c = 4;
+ spl_scratch->scl_data.taps.v_taps_c = 4;
+ } else { /* RGB */
+ spl_scratch->scl_data.taps.h_taps = 6;
+ spl_scratch->scl_data.taps.v_taps = 6;
+ spl_scratch->scl_data.taps.h_taps_c = 6;
+ spl_scratch->scl_data.taps.v_taps_c = 6;
+ }
+ }
+
+ /*Ensure we can support the requested number of vtaps*/
+ min_taps_y = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert);
+ min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c);
+
+ /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */
+ if (spl_is_yuv420(spl_in->basic_in.format))
+ lb_config = LB_MEMORY_CONFIG_3;
+ else
+ lb_config = LB_MEMORY_CONFIG_0;
+ // Determine max vtap support by calculating how much line buffer can fit
+ spl_in->callbacks.spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data,
+ lb_config, &num_part_y, &num_part_c);
+ /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */
+ if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2)
+ if ((spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2) > num_part_y)
+ max_taps_y = 0;
+ else
+ max_taps_y = num_part_y - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2);
+ else
+ max_taps_y = num_part_y;
+
+ if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 2)
+ if ((spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2) > num_part_c)
+ max_taps_c = 0;
+ else
+ max_taps_c = num_part_c - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2);
+ else
+ max_taps_c = num_part_c;
+
+ if (max_taps_y < min_taps_y)
+ return false;
+ else if (max_taps_c < min_taps_c)
+ return false;
+
+ if (spl_scratch->scl_data.taps.v_taps > max_taps_y)
+ spl_scratch->scl_data.taps.v_taps = max_taps_y;
+
+ if (spl_scratch->scl_data.taps.v_taps_c > max_taps_c)
+ spl_scratch->scl_data.taps.v_taps_c = max_taps_c;
+
+ if (!skip_easf) {
+ /*
+ * RGB ( L + NL ) and Linear HDR support 6x6, 6x4, 6x3, 4x4, 4x3
+ * NL YUV420 only supports 6x6, 6x4 for Y and 4x4 for UV
+ *
+ * If LB does not support 3, 4, or 6 taps, then disable EASF_V
+ * and only enable EASF_H. So for RGB, support 6x2, 4x2
+ * and for NL YUV420, support 6x2 for Y and 4x2 for UV
+ *
+ * All other cases, have to disable EASF_V and EASF_H
+ *
+ * If optimal no of taps is 5, then set it to 4
+ * If optimal no of taps is 7 or 8, then fine since max tap is 6
+ *
+ */
+ if (spl_scratch->scl_data.taps.v_taps == 5)
+ spl_scratch->scl_data.taps.v_taps = 4;
+
+ if (spl_scratch->scl_data.taps.v_taps_c == 5)
+ spl_scratch->scl_data.taps.v_taps_c = 4;
+
+ if (spl_scratch->scl_data.taps.h_taps == 5)
+ spl_scratch->scl_data.taps.h_taps = 4;
+
+ if (spl_scratch->scl_data.taps.h_taps_c == 5)
+ spl_scratch->scl_data.taps.h_taps_c = 4;
+
+ if (spl_is_video_format(spl_in->basic_in.format)) {
+ if (spl_scratch->scl_data.taps.h_taps <= 4) {
+ *enable_easf_v = false;
+ *enable_easf_h = false;
+ } else if (spl_scratch->scl_data.taps.v_taps <= 3) {
+ *enable_easf_v = false;
+ *enable_easf_h = true;
+ } else {
+ *enable_easf_v = true;
+ *enable_easf_h = true;
+ }
+ SPL_ASSERT((spl_scratch->scl_data.taps.v_taps > 1) &&
+ (spl_scratch->scl_data.taps.v_taps_c > 1));
+ } else { /* RGB */
+ if (spl_scratch->scl_data.taps.h_taps <= 3) {
+ *enable_easf_v = false;
+ *enable_easf_h = false;
+ } else if (spl_scratch->scl_data.taps.v_taps < 3) {
+ *enable_easf_v = false;
+ *enable_easf_h = true;
+ } else {
+ *enable_easf_v = true;
+ *enable_easf_h = true;
+ }
+ SPL_ASSERT(spl_scratch->scl_data.taps.v_taps > 1);
+ }
+ } else {
+ *enable_easf_v = false;
+ *enable_easf_h = false;
+ } // end of if prefer_easf
+
+ /* Sharpener requires scaler to be enabled, including for 1:1
+ * Check if ISHARP can be enabled
+ * If ISHARP is not enabled, set taps to 1 if ratio is 1:1
+ * except for chroma taps. Keep previous taps so it can
+ * handle cositing
+ */
+
+ *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch);
+ if (!*enable_isharp && !spl_in->basic_out.always_scale) {
+ if ((IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) &&
+ (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) {
+ spl_scratch->scl_data.taps.h_taps = 1;
+ spl_scratch->scl_data.taps.v_taps = 1;
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_subsampled)
+ spl_scratch->scl_data.taps.h_taps_c = 1;
+
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_subsampled)
+ spl_scratch->scl_data.taps.v_taps_c = 1;
+
+ *enable_easf_v = false;
+ *enable_easf_h = false;
+ } else {
+ if ((!*enable_easf_h) &&
+ (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)))
+ spl_scratch->scl_data.taps.h_taps = 1;
+
+ if ((!*enable_easf_v) &&
+ (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert)))
+ spl_scratch->scl_data.taps.v_taps = 1;
+
+ if ((!*enable_easf_h) && !is_subsampled &&
+ (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)))
+ spl_scratch->scl_data.taps.h_taps_c = 1;
+
+ if ((!*enable_easf_v) && !is_subsampled &&
+ (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)))
+ spl_scratch->scl_data.taps.v_taps_c = 1;
+
+ }
+ }
+ return true;
+}
+
+static void spl_set_black_color_data(enum spl_pixel_format format,
+ struct scl_black_color *scl_black_color)
+{
+ bool ycbcr = spl_is_video_format(format);
+ if (ycbcr) {
+ scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y;
+ scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR;
+ } else {
+ scl_black_color->offset_rgb_y = 0x0;
+ scl_black_color->offset_rgb_cbcr = 0x0;
+ }
+}
+
+static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data,
+ const struct spl_scaler_data *scl_data)
+{
+ struct spl_fixed31_32 bot;
+
+ dscl_prog_data->ratios.h_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.horz) << 5;
+ dscl_prog_data->ratios.v_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.vert) << 5;
+ dscl_prog_data->ratios.h_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.horz_c) << 5;
+ dscl_prog_data->ratios.v_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.vert_c) << 5;
+ /*
+ * 0.24 format for fraction, first five bits zeroed
+ */
+ dscl_prog_data->init.h_filter_init_frac =
+ spl_fixpt_u0d19(scl_data->inits.h) << 5;
+ dscl_prog_data->init.h_filter_init_int =
+ spl_fixpt_floor(scl_data->inits.h);
+ dscl_prog_data->init.h_filter_init_frac_c =
+ spl_fixpt_u0d19(scl_data->inits.h_c) << 5;
+ dscl_prog_data->init.h_filter_init_int_c =
+ spl_fixpt_floor(scl_data->inits.h_c);
+ dscl_prog_data->init.v_filter_init_frac =
+ spl_fixpt_u0d19(scl_data->inits.v) << 5;
+ dscl_prog_data->init.v_filter_init_int =
+ spl_fixpt_floor(scl_data->inits.v);
+ dscl_prog_data->init.v_filter_init_frac_c =
+ spl_fixpt_u0d19(scl_data->inits.v_c) << 5;
+ dscl_prog_data->init.v_filter_init_int_c =
+ spl_fixpt_floor(scl_data->inits.v_c);
+
+ bot = spl_fixpt_add(scl_data->inits.v, scl_data->ratios.vert);
+ dscl_prog_data->init.v_filter_init_bot_frac = spl_fixpt_u0d19(bot) << 5;
+ dscl_prog_data->init.v_filter_init_bot_int = spl_fixpt_floor(bot);
+ bot = spl_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c);
+ dscl_prog_data->init.v_filter_init_bot_frac_c = spl_fixpt_u0d19(bot) << 5;
+ dscl_prog_data->init.v_filter_init_bot_int_c = spl_fixpt_floor(bot);
+}
+
+static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data,
+ const struct spl_scaler_data *scl_data)
+{
+ dscl_prog_data->taps.v_taps = scl_data->taps.v_taps - 1;
+ dscl_prog_data->taps.h_taps = scl_data->taps.h_taps - 1;
+ dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1;
+ dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1;
+}
+
+/* Populate dscl prog data structure from scaler data calculated by SPL */
+static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *spl_scratch,
+ struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, bool enable_isharp)
+{
+ struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data;
+
+ const struct spl_scaler_data *data = &spl_scratch->scl_data;
+
+ struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color;
+
+ bool enable_easf = enable_easf_v || enable_easf_h;
+
+ // Set values for recout
+ dscl_prog_data->recout = spl_scratch->scl_data.recout;
+ // Set values for MPC Size
+ dscl_prog_data->mpc_size.width = spl_scratch->scl_data.h_active;
+ dscl_prog_data->mpc_size.height = spl_scratch->scl_data.v_active;
+
+ // SCL_MODE - Set SCL_MODE data
+ dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp,
+ enable_easf);
+
+ // SCL_BLACK_COLOR
+ spl_set_black_color_data(spl_in->basic_in.format, scl_black_color);
+
+ /* Manually calculate scale ratio and init values */
+ spl_set_manual_ratio_init_data(dscl_prog_data, data);
+
+ // Set HTaps/VTaps
+ spl_set_taps_data(dscl_prog_data, data);
+ // Set viewport
+ dscl_prog_data->viewport = spl_scratch->scl_data.viewport;
+ // Set viewport_c
+ dscl_prog_data->viewport_c = spl_scratch->scl_data.viewport_c;
+ // Set filters data
+ spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h);
+}
+
+/* Calculate C0-C3 coefficients based on HDR_mult */
+static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t sdr_white_level_nits)
+{
+ struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult;
+ struct spl_fixed31_32 c0_calc, c1_calc, c2_calc;
+ struct spl_custom_float_format fmt;
+ uint32_t hdr_multx100_int;
+
+ if ((sdr_white_level_nits >= 80) && (sdr_white_level_nits <= 480))
+ hdr_multx100_int = sdr_white_level_nits * 100 / 80;
+ else
+ hdr_multx100_int = 100; /* default for 80 nits otherwise */
+
+ hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100_int, 100LL);
+ c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL);
+ c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL);
+ c2_mult = spl_fixpt_from_fraction(722LL, 10000LL);
+
+ c0_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c0_mult, spl_fixpt_from_fraction(
+ 16384LL, 125LL)));
+ c1_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c1_mult, spl_fixpt_from_fraction(
+ 16384LL, 125LL)));
+ c2_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c2_mult, spl_fixpt_from_fraction(
+ 16384LL, 125LL)));
+
+ fmt.exponenta_bits = 5;
+ fmt.mantissa_bits = 10;
+ fmt.sign = true;
+
+ // fp1.5.10, C0 coefficient (LN_rec709: HDR_MULT * 0.212600 * 2^14/125)
+ spl_convert_to_custom_float_format(c0_calc, &fmt, &dscl_prog_data->easf_matrix_c0);
+ // fp1.5.10, C1 coefficient (LN_rec709: HDR_MULT * 0.715200 * 2^14/125)
+ spl_convert_to_custom_float_format(c1_calc, &fmt, &dscl_prog_data->easf_matrix_c1);
+ // fp1.5.10, C2 coefficient (LN_rec709: HDR_MULT * 0.072200 * 2^14/125)
+ spl_convert_to_custom_float_format(c2_calc, &fmt, &dscl_prog_data->easf_matrix_c2);
+ dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient
+}
+
+/* Set EASF data */
+static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v,
+ bool enable_easf_h, enum linear_light_scaling lls_pref,
+ enum spl_pixel_format format, enum system_setup setup,
+ uint32_t sdr_white_level_nits)
+{
+ struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data;
+ if (enable_easf_v) {
+ dscl_prog_data->easf_v_en = true;
+ dscl_prog_data->easf_v_ring = 0;
+ dscl_prog_data->easf_v_sharp_factor = 1;
+ dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable
+ dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode
+ /* 2-bit, BF3 chroma mode correction calculation mode */
+ dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode(
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10 [ minCoef ]*/
+ dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt =
+ spl_get_3tap_dntilt_uptilt_offset(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10 [ upTiltMaxVal ]*/
+ dscl_prog_data->easf_v_ringest_3tap_uptilt_max =
+ spl_get_3tap_uptilt_maxval(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10 [ dnTiltSlope ]*/
+ dscl_prog_data->easf_v_ringest_3tap_dntilt_slope =
+ spl_get_3tap_dntilt_slope(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10 [ upTilt1Slope ]*/
+ dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope =
+ spl_get_3tap_uptilt1_slope(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10 [ upTilt2Slope ]*/
+ dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope =
+ spl_get_3tap_uptilt2_slope(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10 [ upTilt2Offset ]*/
+ dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset =
+ spl_get_3tap_uptilt2_offset(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */
+ dscl_prog_data->easf_v_ringest_eventap_reduceg1 =
+ spl_get_reducer_gain4(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */
+ dscl_prog_data->easf_v_ringest_eventap_reduceg2 =
+ spl_get_reducer_gain6(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */
+ dscl_prog_data->easf_v_ringest_eventap_gain1 =
+ spl_get_gainRing4(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */
+ dscl_prog_data->easf_v_ringest_eventap_gain2 =
+ spl_get_gainRing6(spl_scratch->scl_data.taps.v_taps,
+ spl_scratch->scl_data.recip_ratios.vert);
+ dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0
+ dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1
+ dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0
+ dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1
+ if (lls_pref == LLS_PREF_YES) {
+ dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control
+ dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control
+ dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control
+
+ dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512
+ dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0
+ dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20
+ dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1
+ dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2
+ dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2
+ dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3
+ dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56
+ dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4
+ dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48
+ dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5
+ dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240
+ dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6
+ dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160
+ dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7
+ dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7
+
+ dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0
+ dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0
+ dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0
+ dscl_prog_data->easf_v_bf3_pwl_in_set1 =
+ 0x0B37; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0078125 * 125^3)
+ dscl_prog_data->easf_v_bf3_pwl_base_set1 = 62; // S0.6, BF3 Base PWL Segment 1
+ dscl_prog_data->easf_v_bf3_pwl_slope_set1 =
+ 0x13B8; // FP1.6.6, BF3 Slope PWL Segment 1
+ dscl_prog_data->easf_v_bf3_pwl_in_set2 =
+ 0x0BB7; // FP0.6.6, BF3 Input value PWL Segment 2 (0.03125 * 125^3)
+ dscl_prog_data->easf_v_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2
+ dscl_prog_data->easf_v_bf3_pwl_slope_set2 =
+ 0x1356; // FP1.6.6, BF3 Slope PWL Segment 2
+ dscl_prog_data->easf_v_bf3_pwl_in_set3 =
+ 0x0BF7; // FP0.6.6, BF3 Input value PWL Segment 3 (0.0625 * 125^3)
+ dscl_prog_data->easf_v_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3
+ dscl_prog_data->easf_v_bf3_pwl_slope_set3 =
+ 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3
+ dscl_prog_data->easf_v_bf3_pwl_in_set4 =
+ 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3)
+ dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50
+ dscl_prog_data->easf_v_bf3_pwl_slope_set4 =
+ 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4
+ dscl_prog_data->easf_v_bf3_pwl_in_set5 =
+ 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3)
+ dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63
+ } else {
+ dscl_prog_data->easf_v_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control
+ dscl_prog_data->easf_v_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control
+ dscl_prog_data->easf_v_bf2_roc_gain = 14; // U2.2, Rate Of Change control
+
+ dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960
+ dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0
+ dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60
+ dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1
+ dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2
+ dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2
+ dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3
+ dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19
+ dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4
+ dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16
+ dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5
+ dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80
+ dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6
+ dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6
+ dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53
+ dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7
+ dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7
+
+ dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0
+ dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0
+ dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0
+ dscl_prog_data->easf_v_bf3_pwl_in_set1 =
+ 0x06C0; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0625)
+ dscl_prog_data->easf_v_bf3_pwl_base_set1 = 63; // S0.6, BF3 Base PWL Segment 1
+ dscl_prog_data->easf_v_bf3_pwl_slope_set1 = 0x1896; // FP1.6.6, BF3 Slope PWL Segment 1
+ dscl_prog_data->easf_v_bf3_pwl_in_set2 =
+ 0x0700; // FP0.6.6, BF3 Input value PWL Segment 2 (0.125)
+ dscl_prog_data->easf_v_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2
+ dscl_prog_data->easf_v_bf3_pwl_slope_set2 = 0x1810; // FP1.6.6, BF3 Slope PWL Segment 2
+ dscl_prog_data->easf_v_bf3_pwl_in_set3 =
+ 0x0740; // FP0.6.6, BF3 Input value PWL Segment 3 (0.25)
+ dscl_prog_data->easf_v_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3
+ dscl_prog_data->easf_v_bf3_pwl_slope_set3 =
+ 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3
+ dscl_prog_data->easf_v_bf3_pwl_in_set4 =
+ 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375)
+ dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60
+ dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4
+ dscl_prog_data->easf_v_bf3_pwl_in_set5 =
+ 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5)
+ dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63
+ }
+ } else
+ dscl_prog_data->easf_v_en = false;
+
+ if (enable_easf_h) {
+ dscl_prog_data->easf_h_en = true;
+ dscl_prog_data->easf_h_ring = 0;
+ dscl_prog_data->easf_h_sharp_factor = 1;
+ dscl_prog_data->easf_h_bf1_en =
+ 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable
+ dscl_prog_data->easf_h_bf2_mode =
+ 0xF; // 4-bit, BF2 calculation mode
+ /* 2-bit, BF3 chroma mode correction calculation mode */
+ dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode(
+ spl_scratch->scl_data.recip_ratios.horz);
+ /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */
+ dscl_prog_data->easf_h_ringest_eventap_reduceg1 =
+ spl_get_reducer_gain4(spl_scratch->scl_data.taps.h_taps,
+ spl_scratch->scl_data.recip_ratios.horz);
+ /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */
+ dscl_prog_data->easf_h_ringest_eventap_reduceg2 =
+ spl_get_reducer_gain6(spl_scratch->scl_data.taps.h_taps,
+ spl_scratch->scl_data.recip_ratios.horz);
+ /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */
+ dscl_prog_data->easf_h_ringest_eventap_gain1 =
+ spl_get_gainRing4(spl_scratch->scl_data.taps.h_taps,
+ spl_scratch->scl_data.recip_ratios.horz);
+ /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */
+ dscl_prog_data->easf_h_ringest_eventap_gain2 =
+ spl_get_gainRing6(spl_scratch->scl_data.taps.h_taps,
+ spl_scratch->scl_data.recip_ratios.horz);
+ dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0
+ dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1
+ dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0
+ dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1
+ if (lls_pref == LLS_PREF_YES) {
+ dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control
+ dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control
+ dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control
+
+ dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512
+ dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0
+ dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20
+ dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1
+ dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2
+ dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2
+ dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3
+ dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56
+ dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4
+ dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48
+ dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5
+ dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240
+ dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6
+ dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160
+ dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7
+ dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7
+
+ dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0
+ dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0
+ dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0
+ dscl_prog_data->easf_h_bf3_pwl_in_set1 =
+ 0x0B37; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0078125 * 125^3)
+ dscl_prog_data->easf_h_bf3_pwl_base_set1 = 62; // S0.6, BF3 Base PWL Segment 1
+ dscl_prog_data->easf_h_bf3_pwl_slope_set1 = 0x13B8; // FP1.6.6, BF3 Slope PWL Segment 1
+ dscl_prog_data->easf_h_bf3_pwl_in_set2 =
+ 0x0BB7; // FP0.6.6, BF3 Input value PWL Segment 2 (0.03125 * 125^3)
+ dscl_prog_data->easf_h_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2
+ dscl_prog_data->easf_h_bf3_pwl_slope_set2 = 0x1356; // FP1.6.6, BF3 Slope PWL Segment 2
+ dscl_prog_data->easf_h_bf3_pwl_in_set3 =
+ 0x0BF7; // FP0.6.6, BF3 Input value PWL Segment 3 (0.0625 * 125^3)
+ dscl_prog_data->easf_h_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3
+ dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3
+ dscl_prog_data->easf_h_bf3_pwl_in_set4 =
+ 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3)
+ dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50
+ dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4
+ dscl_prog_data->easf_h_bf3_pwl_in_set5 =
+ 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3)
+ dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63
+ } else {
+ dscl_prog_data->easf_h_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control
+ dscl_prog_data->easf_h_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control
+ dscl_prog_data->easf_h_bf2_roc_gain = 14; // U2.2, Rate Of Change control
+
+ dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960
+ dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0
+ dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60
+ dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1
+ dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2
+ dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2
+ dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3
+ dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19
+ dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4
+ dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16
+ dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5
+ dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80
+ dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6
+ dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6
+ dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53
+ dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7
+ dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7
+
+ dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0
+ dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0
+ dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0
+ dscl_prog_data->easf_h_bf3_pwl_in_set1 =
+ 0x06C0; // FP0.6.6, BF3 Input value PWL Segment 1 (0.0625)
+ dscl_prog_data->easf_h_bf3_pwl_base_set1 = 63; // S0.6, BF3 Base PWL Segment 1
+ dscl_prog_data->easf_h_bf3_pwl_slope_set1 = 0x1896; // FP1.6.6, BF3 Slope PWL Segment 1
+ dscl_prog_data->easf_h_bf3_pwl_in_set2 =
+ 0x0700; // FP0.6.6, BF3 Input value PWL Segment 2 (0.125)
+ dscl_prog_data->easf_h_bf3_pwl_base_set2 = 20; // S0.6, BF3 Base PWL Segment 2
+ dscl_prog_data->easf_h_bf3_pwl_slope_set2 = 0x1810; // FP1.6.6, BF3 Slope PWL Segment 2
+ dscl_prog_data->easf_h_bf3_pwl_in_set3 =
+ 0x0740; // FP0.6.6, BF3 Input value PWL Segment 3 (0.25)
+ dscl_prog_data->easf_h_bf3_pwl_base_set3 = 0; // S0.6, BF3 Base PWL Segment 3
+ dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3
+ dscl_prog_data->easf_h_bf3_pwl_in_set4 =
+ 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375)
+ dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60
+ dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4
+ dscl_prog_data->easf_h_bf3_pwl_in_set5 =
+ 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5)
+ dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63
+ } // if (lls_pref == LLS_PREF_YES)
+ } else
+ dscl_prog_data->easf_h_en = false;
+
+ if (lls_pref == LLS_PREF_YES) {
+ dscl_prog_data->easf_ltonl_en = 1; // Linear input
+ if ((setup == HDR_L) && (spl_is_rgb8(format))) {
+ /* Calculate C0-C3 coefficients based on HDR multiplier */
+ spl_calculate_c0_c3_hdr(dscl_prog_data, sdr_white_level_nits);
+ } else { // HDR_L ( DWM ) and SDR_L
+ dscl_prog_data->easf_matrix_c0 =
+ 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720)
+ dscl_prog_data->easf_matrix_c1 =
+ 0x55DC; // fp1.5.10, C1 coefficient (LN_rec709: 0.7152 * (2^14)/125 = 93.74269440)
+ dscl_prog_data->easf_matrix_c2 =
+ 0x48BB; // fp1.5.10, C2 coefficient (LN_rec709: 0.0722 * (2^14)/125 = 9.46339840)
+ dscl_prog_data->easf_matrix_c3 =
+ 0x0; // fp1.5.10, C3 coefficient
+ }
+ } else {
+ dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input
+ dscl_prog_data->easf_matrix_c0 =
+ 0x3434; // fp1.5.10, C0 coefficient (LN_BT2020: 0.262695312500000)
+ dscl_prog_data->easf_matrix_c1 =
+ 0x396D; // fp1.5.10, C1 coefficient (LN_BT2020: 0.678222656250000)
+ dscl_prog_data->easf_matrix_c2 =
+ 0x2B97; // fp1.5.10, C2 coefficient (LN_BT2020: 0.059295654296875)
+ dscl_prog_data->easf_matrix_c3 =
+ 0x0; // fp1.5.10, C3 coefficient
+ }
+
+ if (spl_is_subsampled_format(format)) { /* TODO: 0 = RGB, 1 = YUV */
+ dscl_prog_data->easf_matrix_mode = 1;
+ /*
+ * 2-bit, BF3 chroma mode correction calculation mode
+ * Needs to be disabled for YUV420 mode
+ * Override lookup value
+ */
+ dscl_prog_data->easf_v_bf3_mode = 0;
+ dscl_prog_data->easf_h_bf3_mode = 0;
+ } else
+ dscl_prog_data->easf_matrix_mode = 0;
+
+}
+
+/*Set isharp noise detection */
+static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data,
+ const struct spl_scaler_data *data)
+{
+ // ISHARP_NOISEDET_MODE
+ // 0: 3x5 as VxH
+ // 1: 4x5 as VxH
+ // 2:
+ // 3: 5x5 as VxH
+ if (data->taps.v_taps == 6)
+ dscl_prog_data->isharp_noise_det.mode = 3;
+ else if (data->taps.v_taps == 4)
+ dscl_prog_data->isharp_noise_det.mode = 1;
+ else if (data->taps.v_taps == 3)
+ dscl_prog_data->isharp_noise_det.mode = 0;
+};
+/* Set Sharpener data */
+static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
+ struct adaptive_sharpness adp_sharpness, bool enable_isharp,
+ enum linear_light_scaling lls_pref, enum spl_pixel_format format,
+ const struct spl_scaler_data *data, struct spl_fixed31_32 ratio,
+ enum system_setup setup, enum scale_to_sharpness_policy scale_to_sharpness_policy)
+{
+ /* Turn off sharpener if not required */
+ if (!enable_isharp) {
+ dscl_prog_data->isharp_en = 0;
+ return;
+ }
+
+ spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness,
+ scale_to_sharpness_policy);
+ memcpy(dscl_prog_data->isharp_delta, spl_get_pregen_filter_isharp_1D_lut(setup),
+ sizeof(uint32_t) * ISHARP_LUT_TABLE_SIZE);
+ dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level;
+
+ dscl_prog_data->isharp_en = 1; // ISHARP_EN
+ // Set ISHARP_NOISEDET_MODE if htaps = 6-tap
+ if (data->taps.h_taps == 6) {
+ dscl_prog_data->isharp_noise_det.enable = 1; /* ISHARP_NOISEDET_EN */
+ spl_set_isharp_noise_det_mode(dscl_prog_data, data); /* ISHARP_NOISEDET_MODE */
+ } else
+ dscl_prog_data->isharp_noise_det.enable = 0; // ISHARP_NOISEDET_EN
+ // Program noise detection threshold
+ dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE
+ dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE
+ // Program noise detection gain
+ dscl_prog_data->isharp_noise_det.pwl_start_in = 3; // ISHARP_NOISEDET_PWL_START_IN
+ dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN
+ dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE
+
+ if (lls_pref == LLS_PREF_NO) /* ISHARP_FMT_MODE */
+ dscl_prog_data->isharp_fmt.mode = 1;
+ else
+ dscl_prog_data->isharp_fmt.mode = 0;
+
+ dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM
+ dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE
+
+ if (setup == SDR_L) {
+ // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0
+ dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[0] = 62; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1
+ dscl_prog_data->isharp_lba.in_seg[1] = 130; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2
+ dscl_prog_data->isharp_lba.in_seg[2] = 450; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[2] = 0x18D; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -115
+ // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3
+ dscl_prog_data->isharp_lba.in_seg[3] = 520; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4
+ dscl_prog_data->isharp_lba.in_seg[4] = 520; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5
+ dscl_prog_data->isharp_lba.in_seg[5] = 520; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format
+ } else if (setup == HDR_L) {
+ // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0
+ dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1
+ dscl_prog_data->isharp_lba.in_seg[1] = 254; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2
+ dscl_prog_data->isharp_lba.in_seg[2] = 559; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[2] = 0x10C; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -244
+ // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3
+ dscl_prog_data->isharp_lba.in_seg[3] = 592; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4
+ dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5
+ dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format
+ } else {
+ // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0
+ dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[0] = 40; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1
+ dscl_prog_data->isharp_lba.in_seg[1] = 204; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2
+ dscl_prog_data->isharp_lba.in_seg[2] = 818; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[2] = 0x1D9; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -39
+ // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3
+ dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4
+ dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format
+ dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format
+ // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5
+ dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format
+ dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format
+ }
+
+ // Program the nldelta soft clip values
+ if (lls_pref == LLS_PREF_YES) {
+ dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */
+ dscl_prog_data->isharp_nldelta_sclip.pivot_p = 0; /* ISHARP_NLDELTA_SCLIP_PIVOT_P */
+ dscl_prog_data->isharp_nldelta_sclip.slope_p = 0; /* ISHARP_NLDELTA_SCLIP_SLOPE_P */
+ dscl_prog_data->isharp_nldelta_sclip.enable_n = 1; /* ISHARP_NLDELTA_SCLIP_EN_N */
+ dscl_prog_data->isharp_nldelta_sclip.pivot_n = 71; /* ISHARP_NLDELTA_SCLIP_PIVOT_N */
+ dscl_prog_data->isharp_nldelta_sclip.slope_n = 16; /* ISHARP_NLDELTA_SCLIP_SLOPE_N */
+ } else {
+ dscl_prog_data->isharp_nldelta_sclip.enable_p = 1; /* ISHARP_NLDELTA_SCLIP_EN_P */
+ dscl_prog_data->isharp_nldelta_sclip.pivot_p = 70; /* ISHARP_NLDELTA_SCLIP_PIVOT_P */
+ dscl_prog_data->isharp_nldelta_sclip.slope_p = 24; /* ISHARP_NLDELTA_SCLIP_SLOPE_P */
+ dscl_prog_data->isharp_nldelta_sclip.enable_n = 1; /* ISHARP_NLDELTA_SCLIP_EN_N */
+ dscl_prog_data->isharp_nldelta_sclip.pivot_n = 70; /* ISHARP_NLDELTA_SCLIP_PIVOT_N */
+ dscl_prog_data->isharp_nldelta_sclip.slope_n = 24; /* ISHARP_NLDELTA_SCLIP_SLOPE_N */
+ }
+
+ // Set the values as per lookup table
+ spl_set_blur_scale_data(dscl_prog_data, data);
+}
+
+/* Calculate recout, scaling ratio, and viewport, then get optimal number of taps */
+static bool spl_calculate_number_of_taps(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out,
+ bool *enable_easf_v, bool *enable_easf_h, bool *enable_isharp)
+{
+ bool res = false;
+
+ memset(spl_scratch, 0, sizeof(struct spl_scratch));
+ spl_scratch->scl_data.h_active = spl_in->h_active;
+ spl_scratch->scl_data.v_active = spl_in->v_active;
+
+ // All SPL calls
+ /* recout calculation */
+ /* depends on h_active */
+ spl_calculate_recout(spl_in, spl_scratch, spl_out);
+ /* depends on pixel format */
+ spl_calculate_scaling_ratios(spl_in, spl_scratch, spl_out);
+ /* Adjust recout for opp if needed */
+ spl_opp_adjust_rect(&spl_scratch->scl_data.recout, &spl_in->basic_in.opp_recout_adjust);
+ /* depends on scaling ratios and recout, does not calculate offset yet */
+ spl_calculate_viewport_size(spl_in, spl_scratch);
+
+ res = spl_get_optimal_number_of_taps(
+ spl_in->basic_out.max_downscale_src_width, spl_in,
+ spl_scratch, &spl_in->scaling_quality, enable_easf_v,
+ enable_easf_h, enable_isharp);
+ return res;
+}
+
+/* Calculate scaler parameters */
+bool SPL_NAMESPACE(spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out))
+{
+ bool res = false;
+ bool enable_easf_v = false;
+ bool enable_easf_h = false;
+ int vratio = 0;
+ int hratio = 0;
+ struct spl_scratch spl_scratch;
+ struct spl_fixed31_32 isharp_scale_ratio;
+ enum system_setup setup;
+ bool enable_isharp = false;
+ const struct spl_scaler_data *data = &spl_scratch.scl_data;
+
+ res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out,
+ &enable_easf_v, &enable_easf_h, &enable_isharp);
+
+ /*
+ * Depends on recout, scaling ratios, h_active and taps
+ * May need to re-check lb size after this in some obscure scenario
+ */
+ if (res)
+ spl_calculate_inits_and_viewports(spl_in, &spl_scratch);
+ // Handle 3d recout
+ spl_handle_3d_recout(spl_in, &spl_scratch.scl_data.recout);
+ // Clamp
+ spl_clamp_viewport(&spl_scratch.scl_data.viewport, spl_in->min_viewport_size);
+
+ // Save all calculated parameters in dscl_prog_data structure to program hw registers
+ spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp);
+
+ if (!res)
+ return res;
+
+ if (spl_in->lls_pref == LLS_PREF_YES) {
+ if (spl_in->is_hdr_on)
+ setup = HDR_L;
+ else
+ setup = SDR_L;
+ } else {
+ if (spl_in->is_hdr_on)
+ setup = HDR_NL;
+ else
+ setup = SDR_NL;
+ }
+
+ // Set EASF
+ spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref,
+ spl_in->basic_in.format, setup, spl_in->sdr_white_level_nits);
+
+ // Set iSHARP
+ vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert);
+ hratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.horz);
+ if (vratio <= hratio)
+ isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.vert;
+ else
+ isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz;
+
+ spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp,
+ spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup,
+ spl_in->debug.scale_to_sharpness_policy);
+
+ return res;
+}
+
+/* External interface to get number of taps only */
+bool SPL_NAMESPACE(spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out))
+{
+ bool res = false;
+ bool enable_easf_v = false;
+ bool enable_easf_h = false;
+ bool enable_isharp = false;
+ struct spl_scratch spl_scratch;
+ struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data;
+ const struct spl_scaler_data *data = &spl_scratch.scl_data;
+
+ res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out,
+ &enable_easf_v, &enable_easf_h, &enable_isharp);
+ spl_set_taps_data(dscl_prog_data, data);
+ return res;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h
new file mode 100644
index 000000000000..d621c42a237e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_SPL_H__
+#define __DC_SPL_H__
+
+#include "dc_spl_types.h"
+#define BLACK_OFFSET_RGB_Y 0x0
+#define BLACK_OFFSET_CBCR 0x8000
+
+#ifndef SPL_PFX_
+#define SPL_PFX_
+#endif
+
+#define SPL_EXPAND2(a, b) a##b
+#define SPL_EXPAND(a, b) SPL_EXPAND2(a, b)
+#define SPL_NAMESPACE(symbol) SPL_EXPAND(SPL_PFX_, symbol)
+
+
+/* SPL interfaces */
+
+bool SPL_NAMESPACE(spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out));
+
+bool SPL_NAMESPACE(spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out));
+
+#endif /* __DC_SPL_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.c
new file mode 100644
index 000000000000..99238644e0a1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dc_spl_filters.h"
+
+void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter,
+ uint16_t *s1_12_filter, int num_taps)
+{
+ int num_entries = NUM_PHASES_COEFF * num_taps;
+ int i;
+
+ for (i = 0; i < num_entries; i++)
+ *(s1_12_filter + i) = *(s1_10_filter + i) * 4;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.h
new file mode 100644
index 000000000000..20439cdbdb10
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_filters.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef __DC_SPL_FILTERS_H__
+#define __DC_SPL_FILTERS_H__
+
+#include "dc_spl_types.h"
+
+#define NUM_PHASES_COEFF 33
+
+void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter,
+ uint16_t *s1_12_filter, int num_taps);
+
+#endif /* __DC_SPL_FILTERS_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.c
new file mode 100644
index 000000000000..12acdd34e6a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.c
@@ -0,0 +1,553 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "spl_debug.h"
+#include "dc_spl_filters.h"
+#include "dc_spl_isharp_filters.h"
+
+//========================================
+// Delta Gain 1DLUT
+// LUT content is packed as 4-bytes into one DWORD/entry
+// A_start = 0.000000
+// A_end = 10.000000
+// A_gain = 3.000000
+// B_start = 11.000000
+// B_end = 127.000000
+// C_start = 40.000000
+// C_end = 127.000000
+//========================================
+static const uint32_t filter_isharp_1D_lut_3p0x[ISHARP_LUT_TABLE_SIZE] = {
+0x03010000,
+0x0F0B0805,
+0x211E1813,
+0x2B292624,
+0x3533302E,
+0x3E3C3A37,
+0x46444240,
+0x4D4B4A48,
+0x5352504F,
+0x59575655,
+0x5D5C5B5A,
+0x61605F5E,
+0x64646362,
+0x66666565,
+0x68686767,
+0x68686868,
+0x68686868,
+0x67676868,
+0x65656666,
+0x62636464,
+0x5E5F6061,
+0x5A5B5C5D,
+0x55565759,
+0x4F505253,
+0x484A4B4D,
+0x40424446,
+0x373A3C3E,
+0x2E303335,
+0x2426292B,
+0x191B1E21,
+0x0D101316,
+0x0003060A,
+};
+
+// Blur and scale coefficients
+//========================================================
+// <using> gen_BlurScale_coeffs.m
+// <date> 25-Apr-2022
+// <num_taps> 4
+// <num_phases> 64
+// <CoefType> Blur & Scale LPF
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = {
+0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000,
+0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000,
+0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000,
+0x0000, 0x00D0, 0x0235, 0x00FA, 0x0001, 0x0000,
+0x0000, 0x00C9, 0x0234, 0x0101, 0x0002, 0x0000,
+0x0000, 0x00C2, 0x0233, 0x0108, 0x0003, 0x0000,
+0x0000, 0x00BB, 0x0232, 0x0110, 0x0003, 0x0000,
+0x0000, 0x00B5, 0x0230, 0x0117, 0x0004, 0x0000,
+0x0000, 0x00AE, 0x022E, 0x011F, 0x0005, 0x0000,
+0x0000, 0x00A8, 0x022C, 0x0126, 0x0006, 0x0000,
+0x0000, 0x00A2, 0x022A, 0x012D, 0x0007, 0x0000,
+0x0000, 0x009C, 0x0228, 0x0134, 0x0008, 0x0000,
+0x0000, 0x0096, 0x0225, 0x013C, 0x0009, 0x0000,
+0x0000, 0x0090, 0x0222, 0x0143, 0x000B, 0x0000,
+0x0000, 0x008A, 0x021F, 0x014B, 0x000C, 0x0000,
+0x0000, 0x0085, 0x021C, 0x0151, 0x000E, 0x0000,
+0x0000, 0x007F, 0x0218, 0x015A, 0x000F, 0x0000,
+0x0000, 0x007A, 0x0215, 0x0160, 0x0011, 0x0000,
+0x0000, 0x0074, 0x0211, 0x0168, 0x0013, 0x0000,
+0x0000, 0x006F, 0x020D, 0x016F, 0x0015, 0x0000,
+0x0000, 0x006A, 0x0209, 0x0176, 0x0017, 0x0000,
+0x0000, 0x0065, 0x0204, 0x017E, 0x0019, 0x0000,
+0x0000, 0x0060, 0x0200, 0x0185, 0x001B, 0x0000,
+0x0000, 0x005C, 0x01FB, 0x018C, 0x001D, 0x0000,
+0x0000, 0x0057, 0x01F6, 0x0193, 0x0020, 0x0000,
+0x0000, 0x0053, 0x01F1, 0x019A, 0x0022, 0x0000,
+0x0000, 0x004E, 0x01EC, 0x01A1, 0x0025, 0x0000,
+0x0000, 0x004A, 0x01E6, 0x01A8, 0x0028, 0x0000,
+0x0000, 0x0046, 0x01E1, 0x01AF, 0x002A, 0x0000,
+0x0000, 0x0042, 0x01DB, 0x01B6, 0x002D, 0x0000,
+0x0000, 0x003F, 0x01D5, 0x01BB, 0x0031, 0x0000,
+0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000,
+0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000
+};
+//========================================================
+// <using> gen_BlurScale_coeffs.m
+// <date> 25-Apr-2022
+// <num_taps> 4
+// <num_phases> 64
+// <CoefType> Blur & Scale LPF
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t filter_isharp_bs_4tap_64p[132] = {
+0x00E5, 0x0237, 0x00E4, 0x0000,
+0x00DE, 0x0237, 0x00EB, 0x0000,
+0x00D7, 0x0236, 0x00F2, 0x0001,
+0x00D0, 0x0235, 0x00FA, 0x0001,
+0x00C9, 0x0234, 0x0101, 0x0002,
+0x00C2, 0x0233, 0x0108, 0x0003,
+0x00BB, 0x0232, 0x0110, 0x0003,
+0x00B5, 0x0230, 0x0117, 0x0004,
+0x00AE, 0x022E, 0x011F, 0x0005,
+0x00A8, 0x022C, 0x0126, 0x0006,
+0x00A2, 0x022A, 0x012D, 0x0007,
+0x009C, 0x0228, 0x0134, 0x0008,
+0x0096, 0x0225, 0x013C, 0x0009,
+0x0090, 0x0222, 0x0143, 0x000B,
+0x008A, 0x021F, 0x014B, 0x000C,
+0x0085, 0x021C, 0x0151, 0x000E,
+0x007F, 0x0218, 0x015A, 0x000F,
+0x007A, 0x0215, 0x0160, 0x0011,
+0x0074, 0x0211, 0x0168, 0x0013,
+0x006F, 0x020D, 0x016F, 0x0015,
+0x006A, 0x0209, 0x0176, 0x0017,
+0x0065, 0x0204, 0x017E, 0x0019,
+0x0060, 0x0200, 0x0185, 0x001B,
+0x005C, 0x01FB, 0x018C, 0x001D,
+0x0057, 0x01F6, 0x0193, 0x0020,
+0x0053, 0x01F1, 0x019A, 0x0022,
+0x004E, 0x01EC, 0x01A1, 0x0025,
+0x004A, 0x01E6, 0x01A8, 0x0028,
+0x0046, 0x01E1, 0x01AF, 0x002A,
+0x0042, 0x01DB, 0x01B6, 0x002D,
+0x003F, 0x01D5, 0x01BB, 0x0031,
+0x003B, 0x01CF, 0x01C2, 0x0034,
+0x0037, 0x01C9, 0x01C9, 0x0037,
+};
+//========================================================
+// <using> gen_BlurScale_coeffs.m
+// <date> 09-Jun-2022
+// <num_taps> 3
+// <num_phases> 64
+// <CoefType> Blur & Scale LPF
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t filter_isharp_bs_3tap_64p[99] = {
+0x0200, 0x0200, 0x0000,
+0x01F6, 0x0206, 0x0004,
+0x01EC, 0x020B, 0x0009,
+0x01E2, 0x0211, 0x000D,
+0x01D8, 0x0216, 0x0012,
+0x01CE, 0x021C, 0x0016,
+0x01C4, 0x0221, 0x001B,
+0x01BA, 0x0226, 0x0020,
+0x01B0, 0x022A, 0x0026,
+0x01A6, 0x022F, 0x002B,
+0x019C, 0x0233, 0x0031,
+0x0192, 0x0238, 0x0036,
+0x0188, 0x023C, 0x003C,
+0x017E, 0x0240, 0x0042,
+0x0174, 0x0244, 0x0048,
+0x016A, 0x0248, 0x004E,
+0x0161, 0x024A, 0x0055,
+0x0157, 0x024E, 0x005B,
+0x014D, 0x0251, 0x0062,
+0x0144, 0x0253, 0x0069,
+0x013A, 0x0256, 0x0070,
+0x0131, 0x0258, 0x0077,
+0x0127, 0x025B, 0x007E,
+0x011E, 0x025C, 0x0086,
+0x0115, 0x025E, 0x008D,
+0x010B, 0x0260, 0x0095,
+0x0102, 0x0262, 0x009C,
+0x00F9, 0x0263, 0x00A4,
+0x00F0, 0x0264, 0x00AC,
+0x00E7, 0x0265, 0x00B4,
+0x00DF, 0x0264, 0x00BD,
+0x00D6, 0x0265, 0x00C5,
+0x00CD, 0x0266, 0x00CD,
+};
+
+/* Converted Blur & Scale coeff tables from S1.10 to S1.12 */
+static const uint16_t filter_isharp_bs_4tap_in_6_64p_s1_12[198] = {
+0x0000, 0x0394, 0x08dc, 0x0390, 0x0000, 0x0000,
+0x0000, 0x0378, 0x08dc, 0x03ac, 0x0000, 0x0000,
+0x0000, 0x035c, 0x08d8, 0x03c8, 0x0004, 0x0000,
+0x0000, 0x0340, 0x08d4, 0x03e8, 0x0004, 0x0000,
+0x0000, 0x0324, 0x08d0, 0x0404, 0x0008, 0x0000,
+0x0000, 0x0308, 0x08cc, 0x0420, 0x000c, 0x0000,
+0x0000, 0x02ec, 0x08c8, 0x0440, 0x000c, 0x0000,
+0x0000, 0x02d4, 0x08c0, 0x045c, 0x0010, 0x0000,
+0x0000, 0x02b8, 0x08b8, 0x047c, 0x0014, 0x0000,
+0x0000, 0x02a0, 0x08b0, 0x0498, 0x0018, 0x0000,
+0x0000, 0x0288, 0x08a8, 0x04b4, 0x001c, 0x0000,
+0x0000, 0x0270, 0x08a0, 0x04d0, 0x0020, 0x0000,
+0x0000, 0x0258, 0x0894, 0x04f0, 0x0024, 0x0000,
+0x0000, 0x0240, 0x0888, 0x050c, 0x002c, 0x0000,
+0x0000, 0x0228, 0x087c, 0x052c, 0x0030, 0x0000,
+0x0000, 0x0214, 0x0870, 0x0544, 0x0038, 0x0000,
+0x0000, 0x01fc, 0x0860, 0x0568, 0x003c, 0x0000,
+0x0000, 0x01e8, 0x0854, 0x0580, 0x0044, 0x0000,
+0x0000, 0x01d0, 0x0844, 0x05a0, 0x004c, 0x0000,
+0x0000, 0x01bc, 0x0834, 0x05bc, 0x0054, 0x0000,
+0x0000, 0x01a8, 0x0824, 0x05d8, 0x005c, 0x0000,
+0x0000, 0x0194, 0x0810, 0x05f8, 0x0064, 0x0000,
+0x0000, 0x0180, 0x0800, 0x0614, 0x006c, 0x0000,
+0x0000, 0x0170, 0x07ec, 0x0630, 0x0074, 0x0000,
+0x0000, 0x015c, 0x07d8, 0x064c, 0x0080, 0x0000,
+0x0000, 0x014c, 0x07c4, 0x0668, 0x0088, 0x0000,
+0x0000, 0x0138, 0x07b0, 0x0684, 0x0094, 0x0000,
+0x0000, 0x0128, 0x0798, 0x06a0, 0x00a0, 0x0000,
+0x0000, 0x0118, 0x0784, 0x06bc, 0x00a8, 0x0000,
+0x0000, 0x0108, 0x076c, 0x06d8, 0x00b4, 0x0000,
+0x0000, 0x00fc, 0x0754, 0x06ec, 0x00c4, 0x0000,
+0x0000, 0x00ec, 0x073c, 0x0708, 0x00d0, 0x0000,
+0x0000, 0x00dc, 0x0724, 0x0724, 0x00dc, 0x0000,
+};
+
+static const uint16_t filter_isharp_bs_4tap_64p_s1_12[132] = {
+0x0394, 0x08dc, 0x0390, 0x0000,
+0x0378, 0x08dc, 0x03ac, 0x0000,
+0x035c, 0x08d8, 0x03c8, 0x0004,
+0x0340, 0x08d4, 0x03e8, 0x0004,
+0x0324, 0x08d0, 0x0404, 0x0008,
+0x0308, 0x08cc, 0x0420, 0x000c,
+0x02ec, 0x08c8, 0x0440, 0x000c,
+0x02d4, 0x08c0, 0x045c, 0x0010,
+0x02b8, 0x08b8, 0x047c, 0x0014,
+0x02a0, 0x08b0, 0x0498, 0x0018,
+0x0288, 0x08a8, 0x04b4, 0x001c,
+0x0270, 0x08a0, 0x04d0, 0x0020,
+0x0258, 0x0894, 0x04f0, 0x0024,
+0x0240, 0x0888, 0x050c, 0x002c,
+0x0228, 0x087c, 0x052c, 0x0030,
+0x0214, 0x0870, 0x0544, 0x0038,
+0x01fc, 0x0860, 0x0568, 0x003c,
+0x01e8, 0x0854, 0x0580, 0x0044,
+0x01d0, 0x0844, 0x05a0, 0x004c,
+0x01bc, 0x0834, 0x05bc, 0x0054,
+0x01a8, 0x0824, 0x05d8, 0x005c,
+0x0194, 0x0810, 0x05f8, 0x0064,
+0x0180, 0x0800, 0x0614, 0x006c,
+0x0170, 0x07ec, 0x0630, 0x0074,
+0x015c, 0x07d8, 0x064c, 0x0080,
+0x014c, 0x07c4, 0x0668, 0x0088,
+0x0138, 0x07b0, 0x0684, 0x0094,
+0x0128, 0x0798, 0x06a0, 0x00a0,
+0x0118, 0x0784, 0x06bc, 0x00a8,
+0x0108, 0x076c, 0x06d8, 0x00b4,
+0x00fc, 0x0754, 0x06ec, 0x00c4,
+0x00ec, 0x073c, 0x0708, 0x00d0,
+0x00dc, 0x0724, 0x0724, 0x00dc,
+};
+
+static const uint16_t filter_isharp_bs_3tap_64p_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07d8, 0x0818, 0x0010,
+0x07b0, 0x082c, 0x0024,
+0x0788, 0x0844, 0x0034,
+0x0760, 0x0858, 0x0048,
+0x0738, 0x0870, 0x0058,
+0x0710, 0x0884, 0x006c,
+0x06e8, 0x0898, 0x0080,
+0x06c0, 0x08a8, 0x0098,
+0x0698, 0x08bc, 0x00ac,
+0x0670, 0x08cc, 0x00c4,
+0x0648, 0x08e0, 0x00d8,
+0x0620, 0x08f0, 0x00f0,
+0x05f8, 0x0900, 0x0108,
+0x05d0, 0x0910, 0x0120,
+0x05a8, 0x0920, 0x0138,
+0x0584, 0x0928, 0x0154,
+0x055c, 0x0938, 0x016c,
+0x0534, 0x0944, 0x0188,
+0x0510, 0x094c, 0x01a4,
+0x04e8, 0x0958, 0x01c0,
+0x04c4, 0x0960, 0x01dc,
+0x049c, 0x096c, 0x01f8,
+0x0478, 0x0970, 0x0218,
+0x0454, 0x0978, 0x0234,
+0x042c, 0x0980, 0x0254,
+0x0408, 0x0988, 0x0270,
+0x03e4, 0x098c, 0x0290,
+0x03c0, 0x0990, 0x02b0,
+0x039c, 0x0994, 0x02d0,
+0x037c, 0x0990, 0x02f4,
+0x0358, 0x0994, 0x0314,
+0x0334, 0x0998, 0x0334,
+};
+
+/* Pre-generated 1DLUT for given setup and sharpness level */
+struct isharp_1D_lut_pregen filter_isharp_1D_lut_pregen[NUM_SHARPNESS_SETUPS] = {
+ {
+ 0, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ }
+ },
+ {
+ 0, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ }
+ },
+ {
+ 0, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ }
+ },
+ {
+ 0, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ }
+ },
+};
+
+struct scale_ratio_to_sharpness_level_adj sharpness_level_adj[NUM_SHARPNESS_ADJ_LEVELS] = {
+ {1125, 1000, 0},
+ {11, 10, 1},
+ {1075, 1000, 2},
+ {105, 100, 3},
+ {1025, 1000, 4},
+ {1, 1, 5},
+};
+
+static unsigned int spl_calculate_sharpness_level_adj(struct spl_fixed31_32 ratio)
+{
+ int j;
+ struct spl_fixed31_32 ratio_level;
+ struct scale_ratio_to_sharpness_level_adj *lookup_ptr;
+ unsigned int sharpness_level_down_adj;
+
+ /*
+ * Adjust sharpness level based on current scaling ratio
+ *
+ * We have 5 discrete scaling ratios which we will use to adjust the
+ * sharpness level down by 1 as we pass each ratio. The ratios
+ * are
+ *
+ * 1.125 upscale and higher - no adj
+ * 1.100 - under 1.125 - adj level down 1
+ * 1.075 - under 1.100 - adj level down 2
+ * 1.050 - under 1.075 - adj level down 3
+ * 1.025 - under 1.050 - adj level down 4
+ * 1.000 - under 1.025 - adj level down 5
+ *
+ */
+ j = 0;
+ sharpness_level_down_adj = 0;
+ lookup_ptr = sharpness_level_adj;
+ while (j < NUM_SHARPNESS_ADJ_LEVELS) {
+ ratio_level = spl_fixpt_from_fraction(lookup_ptr->ratio_numer,
+ lookup_ptr->ratio_denom);
+ if (ratio.value >= ratio_level.value) {
+ sharpness_level_down_adj = lookup_ptr->level_down_adj;
+ break;
+ }
+ lookup_ptr++;
+ j++;
+ }
+ return sharpness_level_down_adj;
+}
+
+static unsigned int spl_calculate_sharpness_level(struct spl_fixed31_32 ratio,
+ unsigned int discrete_sharpness_level, enum system_setup setup,
+ struct spl_sharpness_range sharpness_range,
+ enum scale_to_sharpness_policy scale_to_sharpness_policy)
+{
+ unsigned int sharpness_level = 0;
+ unsigned int sharpness_level_down_adj = 0;
+
+ int min_sharpness, max_sharpness, mid_sharpness;
+
+ /*
+ * Adjust sharpness level if policy requires we adjust it based on
+ * scale ratio. Based on scale ratio, we may adjust the sharpness
+ * level down by a certain number of steps. We will not select
+ * a sharpness value of 0 so the lowest sharpness level will be
+ * 0 or 1 depending on what the min_sharpness is
+ *
+ * If the policy is no required, this code maybe removed at a later
+ * date
+ */
+ switch (setup) {
+
+ case HDR_L:
+ min_sharpness = sharpness_range.hdr_rgb_min;
+ max_sharpness = sharpness_range.hdr_rgb_max;
+ mid_sharpness = sharpness_range.hdr_rgb_mid;
+ if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL)
+ sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
+ break;
+ case HDR_NL:
+ /* currently no use case, use Non-linear SDR values for now */
+ case SDR_NL:
+ min_sharpness = sharpness_range.sdr_yuv_min;
+ max_sharpness = sharpness_range.sdr_yuv_max;
+ mid_sharpness = sharpness_range.sdr_yuv_mid;
+ if (scale_to_sharpness_policy >= SCALE_TO_SHARPNESS_ADJ_YUV)
+ sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
+ break;
+ case SDR_L:
+ default:
+ min_sharpness = sharpness_range.sdr_rgb_min;
+ max_sharpness = sharpness_range.sdr_rgb_max;
+ mid_sharpness = sharpness_range.sdr_rgb_mid;
+ if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL)
+ sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
+ break;
+ }
+
+ if ((min_sharpness == 0) && (sharpness_level_down_adj >= discrete_sharpness_level))
+ discrete_sharpness_level = 1;
+ else if (sharpness_level_down_adj >= discrete_sharpness_level)
+ discrete_sharpness_level = 0;
+ else
+ discrete_sharpness_level -= sharpness_level_down_adj;
+
+ int lower_half_step_size = (mid_sharpness - min_sharpness) / 5;
+ int upper_half_step_size = (max_sharpness - mid_sharpness) / 5;
+
+ // lower half linear approximation
+ if (discrete_sharpness_level < 5)
+ sharpness_level = min_sharpness + (lower_half_step_size * discrete_sharpness_level);
+ // upper half linear approximation
+ else
+ sharpness_level = mid_sharpness + (upper_half_step_size * (discrete_sharpness_level - 5));
+
+ return sharpness_level;
+}
+
+void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup,
+ struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy)
+{
+ uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst;
+ struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level;
+ int j;
+ int size_1dlut;
+ int sharp_calc_int;
+ uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE];
+
+ /* Custom sharpnessX1000 value */
+ unsigned int sharpnessX1000 = spl_calculate_sharpness_level(ratio,
+ sharpness.sharpness_level, setup,
+ sharpness.sharpness_range, scale_to_sharpness_policy);
+ sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000);
+
+ /*
+ * Check if pregen 1dlut table is already precalculated
+ * If numer/denom is different, then recalculate
+ */
+ if ((filter_isharp_1D_lut_pregen[setup].sharpness_numer == sharpnessX1000) &&
+ (filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000))
+ return;
+
+ /*
+ * Calculate LUT_128_gained with this equation:
+ *
+ * LUT_128_gained[i] = (uint8)(0.5 + min(255,(double)(LUT_128[i])*sharpLevel/iGain))
+ * where LUT_128[i] is contents of 3p0x isharp 1dlut
+ * where sharpLevel is desired sharpness level
+ * where iGain is base sharpness level 3.0
+ * where LUT_128_gained[i] is adjusted 1dlut value based on desired sharpness level
+ */
+ byte_ptr_1dlut_src = (uint8_t *)filter_isharp_1D_lut_3p0x;
+ byte_ptr_1dlut_dst = (uint8_t *)filter_pregen_store;
+ size_1dlut = sizeof(filter_isharp_1D_lut_3p0x);
+ memset(byte_ptr_1dlut_dst, 0, size_1dlut);
+ for (j = 0; j < size_1dlut; j++) {
+ sharp_base = spl_fixpt_from_int((int)*byte_ptr_1dlut_src);
+ sharp_calc = spl_fixpt_mul(sharp_base, sharp_level);
+ sharp_calc = spl_fixpt_div(sharp_calc, spl_fixpt_from_int(3));
+ sharp_calc = spl_fixpt_min(spl_fixpt_from_int(255), sharp_calc);
+ sharp_calc = spl_fixpt_add(sharp_calc, spl_fixpt_from_fraction(1, 2));
+ sharp_calc_int = spl_fixpt_floor(sharp_calc);
+ /* Clamp it at 0x7F so it doesn't wrap */
+ if (sharp_calc_int > 127)
+ sharp_calc_int = 127;
+ *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int;
+
+ byte_ptr_1dlut_src++;
+ byte_ptr_1dlut_dst++;
+ }
+
+ /* Update 1dlut table and sharpness level */
+ memcpy((void *)filter_isharp_1D_lut_pregen[setup].value, (void *)filter_pregen_store, size_1dlut);
+ filter_isharp_1D_lut_pregen[setup].sharpness_numer = sharpnessX1000;
+ filter_isharp_1D_lut_pregen[setup].sharpness_denom = 1000;
+}
+
+uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup)
+{
+ return filter_isharp_1D_lut_pregen[setup].value;
+}
+
+const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps)
+{
+ if (taps == 3)
+ return filter_isharp_bs_3tap_64p_s1_12;
+ else if (taps == 4)
+ return filter_isharp_bs_4tap_64p_s1_12;
+ else if (taps == 6)
+ return filter_isharp_bs_4tap_in_6_64p_s1_12;
+ else {
+ /* should never happen, bug */
+ SPL_BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+}
+
+const uint16_t *spl_dscl_get_blur_scale_coeffs_64p_s1_10(int taps)
+{
+ if (taps == 3)
+ return filter_isharp_bs_3tap_64p;
+ else if (taps == 4)
+ return filter_isharp_bs_4tap_64p;
+ else if (taps == 6)
+ return filter_isharp_bs_4tap_in_6_64p;
+ else {
+ /* should never happen, bug */
+ SPL_BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+}
+
+void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data,
+ const struct spl_scaler_data *data)
+{
+ dscl_prog_data->filter_blur_scale_h =
+ spl_dscl_get_blur_scale_coeffs_64p(data->taps.h_taps);
+
+ dscl_prog_data->filter_blur_scale_v =
+ spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.h
new file mode 100644
index 000000000000..f5e3d3ecc913
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_isharp_filters.h
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_SPL_ISHARP_FILTERS_H__
+#define __DC_SPL_ISHARP_FILTERS_H__
+
+#include "dc_spl_types.h"
+
+#define NUM_SHARPNESS_ADJ_LEVELS 6
+struct scale_ratio_to_sharpness_level_adj {
+ unsigned int ratio_numer;
+ unsigned int ratio_denom;
+ unsigned int level_down_adj; /* adjust sharpness level down */
+};
+
+struct isharp_1D_lut_pregen {
+ unsigned int sharpness_numer;
+ unsigned int sharpness_denom;
+ uint32_t value[ISHARP_LUT_TABLE_SIZE];
+};
+
+enum system_setup {
+ SDR_NL = 0,
+ SDR_L,
+ HDR_NL,
+ HDR_L,
+ NUM_SHARPNESS_SETUPS
+};
+
+void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data,
+ const struct spl_scaler_data *data);
+
+void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup,
+ struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy);
+uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup);
+
+// public API
+const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps);
+const uint16_t *spl_dscl_get_blur_scale_coeffs_64p_s1_10(int taps);
+
+#endif /* __DC_SPL_ISHARP_FILTERS_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.c
new file mode 100644
index 000000000000..0d1bd81ff04a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.c
@@ -0,0 +1,2586 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "spl_debug.h"
+#include "dc_spl_filters.h"
+#include "dc_spl_scl_filters.h"
+#include "dc_spl_scl_easf_filters.h"
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_0.3_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 0.300000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_0_30[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01F6, 0x0206, 0x0004,
+ 0x01EC, 0x020B, 0x0009,
+ 0x01E2, 0x0211, 0x000D,
+ 0x01D8, 0x0216, 0x0012,
+ 0x01CE, 0x021C, 0x0016,
+ 0x01C4, 0x0221, 0x001B,
+ 0x01BA, 0x0226, 0x0020,
+ 0x01B0, 0x022A, 0x0026,
+ 0x01A6, 0x022F, 0x002B,
+ 0x019C, 0x0233, 0x0031,
+ 0x0192, 0x0238, 0x0036,
+ 0x0188, 0x023C, 0x003C,
+ 0x017E, 0x0240, 0x0042,
+ 0x0174, 0x0244, 0x0048,
+ 0x016A, 0x0248, 0x004E,
+ 0x0161, 0x024A, 0x0055,
+ 0x0157, 0x024E, 0x005B,
+ 0x014D, 0x0251, 0x0062,
+ 0x0144, 0x0253, 0x0069,
+ 0x013A, 0x0256, 0x0070,
+ 0x0131, 0x0258, 0x0077,
+ 0x0127, 0x025B, 0x007E,
+ 0x011E, 0x025C, 0x0086,
+ 0x0115, 0x025E, 0x008D,
+ 0x010B, 0x0260, 0x0095,
+ 0x0102, 0x0262, 0x009C,
+ 0x00F9, 0x0263, 0x00A4,
+ 0x00F0, 0x0264, 0x00AC,
+ 0x00E7, 0x0265, 0x00B4,
+ 0x00DF, 0x0264, 0x00BD,
+ 0x00D6, 0x0265, 0x00C5,
+ 0x00CD, 0x0266, 0x00CD,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_0.4_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 0.400000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_0_40[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01F6, 0x0206, 0x0004,
+ 0x01EB, 0x020E, 0x0007,
+ 0x01E1, 0x0214, 0x000B,
+ 0x01D7, 0x021A, 0x000F,
+ 0x01CD, 0x0220, 0x0013,
+ 0x01C2, 0x0226, 0x0018,
+ 0x01B8, 0x022C, 0x001C,
+ 0x01AE, 0x0231, 0x0021,
+ 0x01A3, 0x0237, 0x0026,
+ 0x0199, 0x023C, 0x002B,
+ 0x018F, 0x0240, 0x0031,
+ 0x0185, 0x0245, 0x0036,
+ 0x017A, 0x024A, 0x003C,
+ 0x0170, 0x024F, 0x0041,
+ 0x0166, 0x0253, 0x0047,
+ 0x015C, 0x0257, 0x004D,
+ 0x0152, 0x025A, 0x0054,
+ 0x0148, 0x025E, 0x005A,
+ 0x013E, 0x0261, 0x0061,
+ 0x0134, 0x0264, 0x0068,
+ 0x012B, 0x0266, 0x006F,
+ 0x0121, 0x0269, 0x0076,
+ 0x0117, 0x026C, 0x007D,
+ 0x010E, 0x026E, 0x0084,
+ 0x0104, 0x0270, 0x008C,
+ 0x00FB, 0x0271, 0x0094,
+ 0x00F2, 0x0272, 0x009C,
+ 0x00E9, 0x0273, 0x00A4,
+ 0x00E0, 0x0274, 0x00AC,
+ 0x00D7, 0x0275, 0x00B4,
+ 0x00CE, 0x0275, 0x00BD,
+ 0x00C5, 0x0276, 0x00C5,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_0.5_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 0.500000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_0_50[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01F5, 0x0209, 0x0002,
+ 0x01EA, 0x0211, 0x0005,
+ 0x01DF, 0x021A, 0x0007,
+ 0x01D4, 0x0222, 0x000A,
+ 0x01C9, 0x022A, 0x000D,
+ 0x01BE, 0x0232, 0x0010,
+ 0x01B3, 0x0239, 0x0014,
+ 0x01A8, 0x0241, 0x0017,
+ 0x019D, 0x0248, 0x001B,
+ 0x0192, 0x024F, 0x001F,
+ 0x0187, 0x0255, 0x0024,
+ 0x017C, 0x025C, 0x0028,
+ 0x0171, 0x0262, 0x002D,
+ 0x0166, 0x0268, 0x0032,
+ 0x015B, 0x026E, 0x0037,
+ 0x0150, 0x0273, 0x003D,
+ 0x0146, 0x0278, 0x0042,
+ 0x013B, 0x027D, 0x0048,
+ 0x0130, 0x0282, 0x004E,
+ 0x0126, 0x0286, 0x0054,
+ 0x011B, 0x028A, 0x005B,
+ 0x0111, 0x028D, 0x0062,
+ 0x0107, 0x0290, 0x0069,
+ 0x00FD, 0x0293, 0x0070,
+ 0x00F3, 0x0296, 0x0077,
+ 0x00E9, 0x0298, 0x007F,
+ 0x00DF, 0x029A, 0x0087,
+ 0x00D5, 0x029C, 0x008F,
+ 0x00CC, 0x029D, 0x0097,
+ 0x00C3, 0x029E, 0x009F,
+ 0x00BA, 0x029E, 0x00A8,
+ 0x00B1, 0x029E, 0x00B1,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_0.6_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 0.600000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_0_60[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01F4, 0x020B, 0x0001,
+ 0x01E8, 0x0216, 0x0002,
+ 0x01DC, 0x0221, 0x0003,
+ 0x01D0, 0x022B, 0x0005,
+ 0x01C4, 0x0235, 0x0007,
+ 0x01B8, 0x0240, 0x0008,
+ 0x01AC, 0x0249, 0x000B,
+ 0x01A0, 0x0253, 0x000D,
+ 0x0194, 0x025C, 0x0010,
+ 0x0188, 0x0265, 0x0013,
+ 0x017C, 0x026E, 0x0016,
+ 0x0170, 0x0277, 0x0019,
+ 0x0164, 0x027F, 0x001D,
+ 0x0158, 0x0287, 0x0021,
+ 0x014C, 0x028F, 0x0025,
+ 0x0140, 0x0297, 0x0029,
+ 0x0135, 0x029D, 0x002E,
+ 0x0129, 0x02A4, 0x0033,
+ 0x011D, 0x02AB, 0x0038,
+ 0x0112, 0x02B0, 0x003E,
+ 0x0107, 0x02B5, 0x0044,
+ 0x00FC, 0x02BA, 0x004A,
+ 0x00F1, 0x02BF, 0x0050,
+ 0x00E6, 0x02C3, 0x0057,
+ 0x00DB, 0x02C7, 0x005E,
+ 0x00D1, 0x02CA, 0x0065,
+ 0x00C7, 0x02CC, 0x006D,
+ 0x00BD, 0x02CE, 0x0075,
+ 0x00B3, 0x02D0, 0x007D,
+ 0x00A9, 0x02D2, 0x0085,
+ 0x00A0, 0x02D2, 0x008E,
+ 0x0097, 0x02D2, 0x0097,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_0.7_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 0.700000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_0_70[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01F3, 0x020D, 0x0000,
+ 0x01E5, 0x021B, 0x0000,
+ 0x01D8, 0x0228, 0x0000,
+ 0x01CB, 0x0235, 0x0000,
+ 0x01BD, 0x0243, 0x0000,
+ 0x01B0, 0x024F, 0x0001,
+ 0x01A2, 0x025C, 0x0002,
+ 0x0195, 0x0268, 0x0003,
+ 0x0187, 0x0275, 0x0004,
+ 0x017A, 0x0280, 0x0006,
+ 0x016D, 0x028C, 0x0007,
+ 0x015F, 0x0298, 0x0009,
+ 0x0152, 0x02A2, 0x000C,
+ 0x0145, 0x02AD, 0x000E,
+ 0x0138, 0x02B7, 0x0011,
+ 0x012B, 0x02C0, 0x0015,
+ 0x011E, 0x02CA, 0x0018,
+ 0x0111, 0x02D3, 0x001C,
+ 0x0105, 0x02DB, 0x0020,
+ 0x00F8, 0x02E3, 0x0025,
+ 0x00EC, 0x02EA, 0x002A,
+ 0x00E0, 0x02F1, 0x002F,
+ 0x00D5, 0x02F6, 0x0035,
+ 0x00C9, 0x02FC, 0x003B,
+ 0x00BE, 0x0301, 0x0041,
+ 0x00B3, 0x0305, 0x0048,
+ 0x00A8, 0x0309, 0x004F,
+ 0x009E, 0x030C, 0x0056,
+ 0x0094, 0x030E, 0x005E,
+ 0x008A, 0x0310, 0x0066,
+ 0x0081, 0x0310, 0x006F,
+ 0x0077, 0x0312, 0x0077,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_0.8_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 0.800000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_0_80[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01F1, 0x0210, 0x0FFF,
+ 0x01E2, 0x0220, 0x0FFE,
+ 0x01D2, 0x0232, 0x0FFC,
+ 0x01C3, 0x0241, 0x0FFC,
+ 0x01B4, 0x0251, 0x0FFB,
+ 0x01A4, 0x0262, 0x0FFA,
+ 0x0195, 0x0271, 0x0FFA,
+ 0x0186, 0x0281, 0x0FF9,
+ 0x0176, 0x0291, 0x0FF9,
+ 0x0167, 0x02A0, 0x0FF9,
+ 0x0158, 0x02AE, 0x0FFA,
+ 0x0149, 0x02BD, 0x0FFA,
+ 0x013A, 0x02CB, 0x0FFB,
+ 0x012C, 0x02D7, 0x0FFD,
+ 0x011D, 0x02E5, 0x0FFE,
+ 0x010F, 0x02F1, 0x0000,
+ 0x0101, 0x02FD, 0x0002,
+ 0x00F3, 0x0308, 0x0005,
+ 0x00E5, 0x0313, 0x0008,
+ 0x00D8, 0x031D, 0x000B,
+ 0x00CB, 0x0326, 0x000F,
+ 0x00BE, 0x032F, 0x0013,
+ 0x00B2, 0x0337, 0x0017,
+ 0x00A6, 0x033E, 0x001C,
+ 0x009A, 0x0345, 0x0021,
+ 0x008F, 0x034A, 0x0027,
+ 0x0084, 0x034F, 0x002D,
+ 0x0079, 0x0353, 0x0034,
+ 0x006F, 0x0356, 0x003B,
+ 0x0065, 0x0358, 0x0043,
+ 0x005C, 0x0359, 0x004B,
+ 0x0053, 0x035A, 0x0053,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_0.9_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 0.900000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_0_90[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01EE, 0x0214, 0x0FFE,
+ 0x01DC, 0x0228, 0x0FFC,
+ 0x01CA, 0x023C, 0x0FFA,
+ 0x01B9, 0x024F, 0x0FF8,
+ 0x01A7, 0x0262, 0x0FF7,
+ 0x0195, 0x0276, 0x0FF5,
+ 0x0183, 0x028A, 0x0FF3,
+ 0x0172, 0x029C, 0x0FF2,
+ 0x0160, 0x02AF, 0x0FF1,
+ 0x014F, 0x02C2, 0x0FEF,
+ 0x013E, 0x02D4, 0x0FEE,
+ 0x012D, 0x02E5, 0x0FEE,
+ 0x011C, 0x02F7, 0x0FED,
+ 0x010C, 0x0307, 0x0FED,
+ 0x00FB, 0x0318, 0x0FED,
+ 0x00EC, 0x0327, 0x0FED,
+ 0x00DC, 0x0336, 0x0FEE,
+ 0x00CD, 0x0344, 0x0FEF,
+ 0x00BE, 0x0352, 0x0FF0,
+ 0x00B0, 0x035E, 0x0FF2,
+ 0x00A2, 0x036A, 0x0FF4,
+ 0x0095, 0x0375, 0x0FF6,
+ 0x0088, 0x037F, 0x0FF9,
+ 0x007B, 0x0388, 0x0FFD,
+ 0x006F, 0x0391, 0x0000,
+ 0x0064, 0x0397, 0x0005,
+ 0x0059, 0x039D, 0x000A,
+ 0x004E, 0x03A3, 0x000F,
+ 0x0045, 0x03A6, 0x0015,
+ 0x003B, 0x03A9, 0x001C,
+ 0x0033, 0x03AA, 0x0023,
+ 0x002A, 0x03AC, 0x002A,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 3t_64p_LanczosEd_p_1_p_10qb_
+// <num_taps> 3
+// <num_phases> 64
+// <scale_ratio> input/output = 1.000000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_3tap_64p_ratio_1_00[99] = {
+ 0x0200, 0x0200, 0x0000,
+ 0x01EB, 0x0217, 0x0FFE,
+ 0x01D5, 0x022F, 0x0FFC,
+ 0x01C0, 0x0247, 0x0FF9,
+ 0x01AB, 0x025E, 0x0FF7,
+ 0x0196, 0x0276, 0x0FF4,
+ 0x0181, 0x028D, 0x0FF2,
+ 0x016C, 0x02A5, 0x0FEF,
+ 0x0158, 0x02BB, 0x0FED,
+ 0x0144, 0x02D1, 0x0FEB,
+ 0x0130, 0x02E8, 0x0FE8,
+ 0x011C, 0x02FE, 0x0FE6,
+ 0x0109, 0x0313, 0x0FE4,
+ 0x00F6, 0x0328, 0x0FE2,
+ 0x00E4, 0x033C, 0x0FE0,
+ 0x00D2, 0x034F, 0x0FDF,
+ 0x00C0, 0x0363, 0x0FDD,
+ 0x00B0, 0x0374, 0x0FDC,
+ 0x009F, 0x0385, 0x0FDC,
+ 0x0090, 0x0395, 0x0FDB,
+ 0x0081, 0x03A4, 0x0FDB,
+ 0x0072, 0x03B3, 0x0FDB,
+ 0x0064, 0x03C0, 0x0FDC,
+ 0x0057, 0x03CC, 0x0FDD,
+ 0x004B, 0x03D6, 0x0FDF,
+ 0x003F, 0x03E0, 0x0FE1,
+ 0x0034, 0x03E8, 0x0FE4,
+ 0x002A, 0x03EF, 0x0FE7,
+ 0x0020, 0x03F5, 0x0FEB,
+ 0x0017, 0x03FA, 0x0FEF,
+ 0x000F, 0x03FD, 0x0FF4,
+ 0x0007, 0x03FF, 0x0FFA,
+ 0x0000, 0x0400, 0x0000,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_0.3_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 0.300000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_0_30[132] = {
+ 0x0104, 0x01F8, 0x0104, 0x0000,
+ 0x00FE, 0x01F7, 0x010A, 0x0001,
+ 0x00F8, 0x01F6, 0x010F, 0x0003,
+ 0x00F2, 0x01F5, 0x0114, 0x0005,
+ 0x00EB, 0x01F4, 0x011B, 0x0006,
+ 0x00E5, 0x01F3, 0x0120, 0x0008,
+ 0x00DF, 0x01F2, 0x0125, 0x000A,
+ 0x00DA, 0x01F0, 0x012A, 0x000C,
+ 0x00D4, 0x01EE, 0x0130, 0x000E,
+ 0x00CE, 0x01ED, 0x0135, 0x0010,
+ 0x00C8, 0x01EB, 0x013A, 0x0013,
+ 0x00C2, 0x01E9, 0x0140, 0x0015,
+ 0x00BD, 0x01E7, 0x0145, 0x0017,
+ 0x00B7, 0x01E5, 0x014A, 0x001A,
+ 0x00B1, 0x01E2, 0x0151, 0x001C,
+ 0x00AC, 0x01E0, 0x0155, 0x001F,
+ 0x00A7, 0x01DD, 0x015A, 0x0022,
+ 0x00A1, 0x01DB, 0x015F, 0x0025,
+ 0x009C, 0x01D8, 0x0165, 0x0027,
+ 0x0097, 0x01D5, 0x016A, 0x002A,
+ 0x0092, 0x01D2, 0x016E, 0x002E,
+ 0x008C, 0x01CF, 0x0174, 0x0031,
+ 0x0087, 0x01CC, 0x0179, 0x0034,
+ 0x0083, 0x01C9, 0x017D, 0x0037,
+ 0x007E, 0x01C5, 0x0182, 0x003B,
+ 0x0079, 0x01C2, 0x0187, 0x003E,
+ 0x0074, 0x01BE, 0x018C, 0x0042,
+ 0x0070, 0x01BA, 0x0190, 0x0046,
+ 0x006B, 0x01B7, 0x0195, 0x0049,
+ 0x0066, 0x01B3, 0x019A, 0x004D,
+ 0x0062, 0x01AF, 0x019E, 0x0051,
+ 0x005E, 0x01AB, 0x01A2, 0x0055,
+ 0x005A, 0x01A6, 0x01A6, 0x005A,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_0.4_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 0.400000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_0_40[132] = {
+ 0x00FB, 0x0209, 0x00FC, 0x0000,
+ 0x00F5, 0x0209, 0x0101, 0x0001,
+ 0x00EE, 0x0208, 0x0108, 0x0002,
+ 0x00E8, 0x0207, 0x010E, 0x0003,
+ 0x00E2, 0x0206, 0x0114, 0x0004,
+ 0x00DB, 0x0205, 0x011A, 0x0006,
+ 0x00D5, 0x0204, 0x0120, 0x0007,
+ 0x00CF, 0x0203, 0x0125, 0x0009,
+ 0x00C9, 0x0201, 0x012C, 0x000A,
+ 0x00C3, 0x01FF, 0x0132, 0x000C,
+ 0x00BD, 0x01FD, 0x0138, 0x000E,
+ 0x00B7, 0x01FB, 0x013E, 0x0010,
+ 0x00B1, 0x01F9, 0x0144, 0x0012,
+ 0x00AC, 0x01F7, 0x0149, 0x0014,
+ 0x00A6, 0x01F4, 0x0150, 0x0016,
+ 0x00A0, 0x01F2, 0x0156, 0x0018,
+ 0x009B, 0x01EF, 0x015C, 0x001A,
+ 0x0095, 0x01EC, 0x0162, 0x001D,
+ 0x0090, 0x01E9, 0x0168, 0x001F,
+ 0x008B, 0x01E6, 0x016D, 0x0022,
+ 0x0085, 0x01E3, 0x0173, 0x0025,
+ 0x0080, 0x01DF, 0x0179, 0x0028,
+ 0x007B, 0x01DC, 0x017E, 0x002B,
+ 0x0076, 0x01D8, 0x0184, 0x002E,
+ 0x0071, 0x01D4, 0x018A, 0x0031,
+ 0x006D, 0x01D1, 0x018E, 0x0034,
+ 0x0068, 0x01CD, 0x0193, 0x0038,
+ 0x0063, 0x01C8, 0x019A, 0x003B,
+ 0x005F, 0x01C4, 0x019E, 0x003F,
+ 0x005B, 0x01C0, 0x01A3, 0x0042,
+ 0x0056, 0x01BB, 0x01A9, 0x0046,
+ 0x0052, 0x01B7, 0x01AD, 0x004A,
+ 0x004E, 0x01B2, 0x01B2, 0x004E,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_0.5_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 0.500000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_0_50[132] = {
+ 0x00E5, 0x0236, 0x00E5, 0x0000,
+ 0x00DE, 0x0235, 0x00ED, 0x0000,
+ 0x00D7, 0x0235, 0x00F4, 0x0000,
+ 0x00D0, 0x0235, 0x00FB, 0x0000,
+ 0x00C9, 0x0234, 0x0102, 0x0001,
+ 0x00C2, 0x0233, 0x010A, 0x0001,
+ 0x00BC, 0x0232, 0x0111, 0x0001,
+ 0x00B5, 0x0230, 0x0119, 0x0002,
+ 0x00AE, 0x022F, 0x0121, 0x0002,
+ 0x00A8, 0x022D, 0x0128, 0x0003,
+ 0x00A2, 0x022B, 0x012F, 0x0004,
+ 0x009B, 0x0229, 0x0137, 0x0005,
+ 0x0095, 0x0226, 0x013F, 0x0006,
+ 0x008F, 0x0224, 0x0146, 0x0007,
+ 0x0089, 0x0221, 0x014E, 0x0008,
+ 0x0083, 0x021E, 0x0155, 0x000A,
+ 0x007E, 0x021B, 0x015C, 0x000B,
+ 0x0078, 0x0217, 0x0164, 0x000D,
+ 0x0072, 0x0213, 0x016D, 0x000E,
+ 0x006D, 0x0210, 0x0173, 0x0010,
+ 0x0068, 0x020C, 0x017A, 0x0012,
+ 0x0063, 0x0207, 0x0182, 0x0014,
+ 0x005E, 0x0203, 0x0189, 0x0016,
+ 0x0059, 0x01FE, 0x0191, 0x0018,
+ 0x0054, 0x01F9, 0x0198, 0x001B,
+ 0x0050, 0x01F4, 0x019F, 0x001D,
+ 0x004B, 0x01EF, 0x01A6, 0x0020,
+ 0x0047, 0x01EA, 0x01AC, 0x0023,
+ 0x0043, 0x01E4, 0x01B3, 0x0026,
+ 0x003F, 0x01DF, 0x01B9, 0x0029,
+ 0x003B, 0x01D9, 0x01C0, 0x002C,
+ 0x0037, 0x01D3, 0x01C6, 0x0030,
+ 0x0033, 0x01CD, 0x01CD, 0x0033,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_0.6_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 0.600000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_0_60[132] = {
+ 0x00C8, 0x026F, 0x00C9, 0x0000,
+ 0x00C0, 0x0270, 0x00D1, 0x0FFF,
+ 0x00B8, 0x0270, 0x00D9, 0x0FFF,
+ 0x00B1, 0x0270, 0x00E1, 0x0FFE,
+ 0x00A9, 0x026F, 0x00EB, 0x0FFD,
+ 0x00A2, 0x026E, 0x00F3, 0x0FFD,
+ 0x009A, 0x026D, 0x00FD, 0x0FFC,
+ 0x0093, 0x026C, 0x0105, 0x0FFC,
+ 0x008C, 0x026A, 0x010F, 0x0FFB,
+ 0x0085, 0x0268, 0x0118, 0x0FFB,
+ 0x007E, 0x0265, 0x0122, 0x0FFB,
+ 0x0078, 0x0263, 0x012A, 0x0FFB,
+ 0x0071, 0x0260, 0x0134, 0x0FFB,
+ 0x006B, 0x025C, 0x013E, 0x0FFB,
+ 0x0065, 0x0259, 0x0147, 0x0FFB,
+ 0x005F, 0x0255, 0x0151, 0x0FFB,
+ 0x0059, 0x0251, 0x015A, 0x0FFC,
+ 0x0054, 0x024D, 0x0163, 0x0FFC,
+ 0x004E, 0x0248, 0x016D, 0x0FFD,
+ 0x0049, 0x0243, 0x0176, 0x0FFE,
+ 0x0044, 0x023E, 0x017F, 0x0FFF,
+ 0x003F, 0x0238, 0x0189, 0x0000,
+ 0x003A, 0x0232, 0x0193, 0x0001,
+ 0x0036, 0x022C, 0x019C, 0x0002,
+ 0x0031, 0x0226, 0x01A5, 0x0004,
+ 0x002D, 0x021F, 0x01AF, 0x0005,
+ 0x0029, 0x0218, 0x01B8, 0x0007,
+ 0x0025, 0x0211, 0x01C1, 0x0009,
+ 0x0022, 0x020A, 0x01C9, 0x000B,
+ 0x001E, 0x0203, 0x01D2, 0x000D,
+ 0x001B, 0x01FB, 0x01DA, 0x0010,
+ 0x0018, 0x01F3, 0x01E3, 0x0012,
+ 0x0015, 0x01EB, 0x01EB, 0x0015,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_0.7_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 0.700000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_0_70[132] = {
+ 0x00A3, 0x02B9, 0x00A4, 0x0000,
+ 0x009A, 0x02BA, 0x00AD, 0x0FFF,
+ 0x0092, 0x02BA, 0x00B6, 0x0FFE,
+ 0x0089, 0x02BA, 0x00C1, 0x0FFC,
+ 0x0081, 0x02B9, 0x00CB, 0x0FFB,
+ 0x0079, 0x02B8, 0x00D5, 0x0FFA,
+ 0x0071, 0x02B7, 0x00DF, 0x0FF9,
+ 0x0069, 0x02B5, 0x00EA, 0x0FF8,
+ 0x0062, 0x02B3, 0x00F4, 0x0FF7,
+ 0x005B, 0x02B0, 0x00FF, 0x0FF6,
+ 0x0054, 0x02AD, 0x010B, 0x0FF4,
+ 0x004D, 0x02A9, 0x0117, 0x0FF3,
+ 0x0046, 0x02A5, 0x0123, 0x0FF2,
+ 0x0040, 0x02A1, 0x012D, 0x0FF2,
+ 0x003A, 0x029C, 0x0139, 0x0FF1,
+ 0x0034, 0x0297, 0x0145, 0x0FF0,
+ 0x002F, 0x0292, 0x0150, 0x0FEF,
+ 0x0029, 0x028C, 0x015C, 0x0FEF,
+ 0x0024, 0x0285, 0x0169, 0x0FEE,
+ 0x001F, 0x027F, 0x0174, 0x0FEE,
+ 0x001B, 0x0278, 0x017F, 0x0FEE,
+ 0x0016, 0x0270, 0x018D, 0x0FED,
+ 0x0012, 0x0268, 0x0199, 0x0FED,
+ 0x000E, 0x0260, 0x01A4, 0x0FEE,
+ 0x000B, 0x0258, 0x01AF, 0x0FEE,
+ 0x0007, 0x024F, 0x01BC, 0x0FEE,
+ 0x0004, 0x0246, 0x01C7, 0x0FEF,
+ 0x0001, 0x023D, 0x01D3, 0x0FEF,
+ 0x0FFE, 0x0233, 0x01DF, 0x0FF0,
+ 0x0FFC, 0x0229, 0x01EA, 0x0FF1,
+ 0x0FFA, 0x021F, 0x01F4, 0x0FF3,
+ 0x0FF8, 0x0215, 0x01FF, 0x0FF4,
+ 0x0FF6, 0x020A, 0x020A, 0x0FF6,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_0.8_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 0.800000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_0_80[132] = {
+ 0x0075, 0x0315, 0x0076, 0x0000,
+ 0x006C, 0x0316, 0x007F, 0x0FFF,
+ 0x0062, 0x0316, 0x008A, 0x0FFE,
+ 0x0059, 0x0315, 0x0096, 0x0FFC,
+ 0x0050, 0x0314, 0x00A1, 0x0FFB,
+ 0x0048, 0x0312, 0x00AD, 0x0FF9,
+ 0x0040, 0x0310, 0x00B8, 0x0FF8,
+ 0x0038, 0x030D, 0x00C5, 0x0FF6,
+ 0x0030, 0x030A, 0x00D1, 0x0FF5,
+ 0x0029, 0x0306, 0x00DE, 0x0FF3,
+ 0x0022, 0x0301, 0x00EB, 0x0FF2,
+ 0x001C, 0x02FC, 0x00F8, 0x0FF0,
+ 0x0015, 0x02F7, 0x0106, 0x0FEE,
+ 0x0010, 0x02F1, 0x0112, 0x0FED,
+ 0x000A, 0x02EA, 0x0121, 0x0FEB,
+ 0x0005, 0x02E3, 0x012F, 0x0FE9,
+ 0x0000, 0x02DB, 0x013D, 0x0FE8,
+ 0x0FFB, 0x02D3, 0x014C, 0x0FE6,
+ 0x0FF7, 0x02CA, 0x015A, 0x0FE5,
+ 0x0FF3, 0x02C1, 0x0169, 0x0FE3,
+ 0x0FF0, 0x02B7, 0x0177, 0x0FE2,
+ 0x0FEC, 0x02AD, 0x0186, 0x0FE1,
+ 0x0FE9, 0x02A2, 0x0196, 0x0FDF,
+ 0x0FE7, 0x0297, 0x01A4, 0x0FDE,
+ 0x0FE4, 0x028C, 0x01B3, 0x0FDD,
+ 0x0FE2, 0x0280, 0x01C2, 0x0FDC,
+ 0x0FE0, 0x0274, 0x01D0, 0x0FDC,
+ 0x0FDF, 0x0268, 0x01DE, 0x0FDB,
+ 0x0FDD, 0x025B, 0x01EE, 0x0FDA,
+ 0x0FDC, 0x024E, 0x01FC, 0x0FDA,
+ 0x0FDB, 0x0241, 0x020A, 0x0FDA,
+ 0x0FDB, 0x0233, 0x0218, 0x0FDA,
+ 0x0FDA, 0x0226, 0x0226, 0x0FDA,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_0.9_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 0.900000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_0_90[132] = {
+ 0x003F, 0x0383, 0x003E, 0x0000,
+ 0x0034, 0x0383, 0x004A, 0x0FFF,
+ 0x002B, 0x0383, 0x0054, 0x0FFE,
+ 0x0021, 0x0381, 0x0061, 0x0FFD,
+ 0x0019, 0x037F, 0x006C, 0x0FFC,
+ 0x0010, 0x037C, 0x0079, 0x0FFB,
+ 0x0008, 0x0378, 0x0086, 0x0FFA,
+ 0x0001, 0x0374, 0x0093, 0x0FF8,
+ 0x0FFA, 0x036E, 0x00A1, 0x0FF7,
+ 0x0FF3, 0x0368, 0x00B0, 0x0FF5,
+ 0x0FED, 0x0361, 0x00BF, 0x0FF3,
+ 0x0FE8, 0x035A, 0x00CD, 0x0FF1,
+ 0x0FE2, 0x0352, 0x00DC, 0x0FF0,
+ 0x0FDE, 0x0349, 0x00EB, 0x0FEE,
+ 0x0FD9, 0x033F, 0x00FC, 0x0FEC,
+ 0x0FD5, 0x0335, 0x010D, 0x0FE9,
+ 0x0FD2, 0x032A, 0x011D, 0x0FE7,
+ 0x0FCF, 0x031E, 0x012E, 0x0FE5,
+ 0x0FCC, 0x0312, 0x013F, 0x0FE3,
+ 0x0FCA, 0x0305, 0x0150, 0x0FE1,
+ 0x0FC8, 0x02F8, 0x0162, 0x0FDE,
+ 0x0FC6, 0x02EA, 0x0174, 0x0FDC,
+ 0x0FC5, 0x02DC, 0x0185, 0x0FDA,
+ 0x0FC4, 0x02CD, 0x0197, 0x0FD8,
+ 0x0FC3, 0x02BE, 0x01AA, 0x0FD5,
+ 0x0FC3, 0x02AF, 0x01BB, 0x0FD3,
+ 0x0FC3, 0x029F, 0x01CD, 0x0FD1,
+ 0x0FC3, 0x028E, 0x01E0, 0x0FCF,
+ 0x0FC3, 0x027E, 0x01F2, 0x0FCD,
+ 0x0FC4, 0x026D, 0x0203, 0x0FCC,
+ 0x0FC5, 0x025C, 0x0215, 0x0FCA,
+ 0x0FC6, 0x024B, 0x0227, 0x0FC8,
+ 0x0FC7, 0x0239, 0x0239, 0x0FC7,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 03-Apr-2024
+// <coeffDescrip> 4t_64p_LanczosEd_p_1_p_10qb_
+// <num_taps> 4
+// <num_phases> 64
+// <scale_ratio> input/output = 1.000000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_4tap_64p_ratio_1_00[132] = {
+ 0x0000, 0x0400, 0x0000, 0x0000,
+ 0x0FF6, 0x03FF, 0x000B, 0x0000,
+ 0x0FED, 0x03FE, 0x0015, 0x0000,
+ 0x0FE4, 0x03FB, 0x0022, 0x0FFF,
+ 0x0FDC, 0x03F7, 0x002E, 0x0FFF,
+ 0x0FD5, 0x03F2, 0x003B, 0x0FFE,
+ 0x0FCE, 0x03EC, 0x0048, 0x0FFE,
+ 0x0FC8, 0x03E5, 0x0056, 0x0FFD,
+ 0x0FC3, 0x03DC, 0x0065, 0x0FFC,
+ 0x0FBE, 0x03D3, 0x0075, 0x0FFA,
+ 0x0FB9, 0x03C9, 0x0085, 0x0FF9,
+ 0x0FB6, 0x03BE, 0x0094, 0x0FF8,
+ 0x0FB2, 0x03B2, 0x00A6, 0x0FF6,
+ 0x0FB0, 0x03A5, 0x00B7, 0x0FF4,
+ 0x0FAD, 0x0397, 0x00CA, 0x0FF2,
+ 0x0FAB, 0x0389, 0x00DC, 0x0FF0,
+ 0x0FAA, 0x0379, 0x00EF, 0x0FEE,
+ 0x0FA9, 0x0369, 0x0102, 0x0FEC,
+ 0x0FA9, 0x0359, 0x0115, 0x0FE9,
+ 0x0FA9, 0x0348, 0x0129, 0x0FE6,
+ 0x0FA9, 0x0336, 0x013D, 0x0FE4,
+ 0x0FA9, 0x0323, 0x0153, 0x0FE1,
+ 0x0FAA, 0x0310, 0x0168, 0x0FDE,
+ 0x0FAC, 0x02FD, 0x017C, 0x0FDB,
+ 0x0FAD, 0x02E9, 0x0192, 0x0FD8,
+ 0x0FAF, 0x02D5, 0x01A7, 0x0FD5,
+ 0x0FB1, 0x02C0, 0x01BD, 0x0FD2,
+ 0x0FB3, 0x02AC, 0x01D2, 0x0FCF,
+ 0x0FB5, 0x0296, 0x01E9, 0x0FCC,
+ 0x0FB8, 0x0281, 0x01FE, 0x0FC9,
+ 0x0FBA, 0x026C, 0x0214, 0x0FC6,
+ 0x0FBD, 0x0256, 0x022A, 0x0FC3,
+ 0x0FC0, 0x0240, 0x0240, 0x0FC0,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_0.3_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 0.300000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_0_30[198] = {
+ 0x004B, 0x0100, 0x0169, 0x0101, 0x004B, 0x0000,
+ 0x0049, 0x00FD, 0x0169, 0x0103, 0x004E, 0x0000,
+ 0x0047, 0x00FA, 0x0169, 0x0106, 0x0050, 0x0000,
+ 0x0045, 0x00F7, 0x0168, 0x0109, 0x0052, 0x0001,
+ 0x0043, 0x00F5, 0x0168, 0x010B, 0x0054, 0x0001,
+ 0x0040, 0x00F2, 0x0168, 0x010E, 0x0057, 0x0001,
+ 0x003E, 0x00EF, 0x0168, 0x0110, 0x0059, 0x0002,
+ 0x003C, 0x00EC, 0x0167, 0x0113, 0x005C, 0x0002,
+ 0x003A, 0x00E9, 0x0167, 0x0116, 0x005E, 0x0002,
+ 0x0038, 0x00E6, 0x0166, 0x0118, 0x0061, 0x0003,
+ 0x0036, 0x00E3, 0x0165, 0x011C, 0x0063, 0x0003,
+ 0x0034, 0x00E0, 0x0165, 0x011D, 0x0066, 0x0004,
+ 0x0033, 0x00DD, 0x0164, 0x0120, 0x0068, 0x0004,
+ 0x0031, 0x00DA, 0x0163, 0x0122, 0x006B, 0x0005,
+ 0x002F, 0x00D7, 0x0163, 0x0125, 0x006D, 0x0005,
+ 0x002D, 0x00D3, 0x0162, 0x0128, 0x0070, 0x0006,
+ 0x002B, 0x00D0, 0x0161, 0x012A, 0x0073, 0x0007,
+ 0x002A, 0x00CD, 0x0160, 0x012D, 0x0075, 0x0007,
+ 0x0028, 0x00CA, 0x015F, 0x012F, 0x0078, 0x0008,
+ 0x0026, 0x00C7, 0x015E, 0x0131, 0x007B, 0x0009,
+ 0x0025, 0x00C4, 0x015D, 0x0133, 0x007E, 0x0009,
+ 0x0023, 0x00C1, 0x015C, 0x0136, 0x0080, 0x000A,
+ 0x0022, 0x00BE, 0x015A, 0x0138, 0x0083, 0x000B,
+ 0x0020, 0x00BB, 0x0159, 0x013A, 0x0086, 0x000C,
+ 0x001F, 0x00B8, 0x0158, 0x013B, 0x0089, 0x000D,
+ 0x001E, 0x00B5, 0x0156, 0x013E, 0x008C, 0x000D,
+ 0x001C, 0x00B2, 0x0155, 0x0140, 0x008F, 0x000E,
+ 0x001B, 0x00AF, 0x0153, 0x0143, 0x0091, 0x000F,
+ 0x0019, 0x00AC, 0x0152, 0x0145, 0x0094, 0x0010,
+ 0x0018, 0x00A9, 0x0150, 0x0147, 0x0097, 0x0011,
+ 0x0017, 0x00A6, 0x014F, 0x0148, 0x009A, 0x0012,
+ 0x0016, 0x00A3, 0x014D, 0x0149, 0x009D, 0x0014,
+ 0x0015, 0x00A0, 0x014B, 0x014B, 0x00A0, 0x0015,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_0.4_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 0.400000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_0_40[198] = {
+ 0x0028, 0x0106, 0x01A3, 0x0107, 0x0028, 0x0000,
+ 0x0026, 0x0102, 0x01A3, 0x010A, 0x002B, 0x0000,
+ 0x0024, 0x00FE, 0x01A3, 0x010F, 0x002D, 0x0FFF,
+ 0x0022, 0x00FA, 0x01A3, 0x0113, 0x002F, 0x0FFF,
+ 0x0021, 0x00F6, 0x01A3, 0x0116, 0x0031, 0x0FFF,
+ 0x001F, 0x00F2, 0x01A2, 0x011B, 0x0034, 0x0FFE,
+ 0x001D, 0x00EE, 0x01A2, 0x011F, 0x0036, 0x0FFE,
+ 0x001B, 0x00EA, 0x01A1, 0x0123, 0x0039, 0x0FFE,
+ 0x0019, 0x00E6, 0x01A1, 0x0127, 0x003B, 0x0FFE,
+ 0x0018, 0x00E2, 0x01A0, 0x012A, 0x003E, 0x0FFE,
+ 0x0016, 0x00DE, 0x01A0, 0x012E, 0x0041, 0x0FFD,
+ 0x0015, 0x00DA, 0x019F, 0x0132, 0x0043, 0x0FFD,
+ 0x0013, 0x00D6, 0x019E, 0x0136, 0x0046, 0x0FFD,
+ 0x0012, 0x00D2, 0x019D, 0x0139, 0x0049, 0x0FFD,
+ 0x0010, 0x00CE, 0x019C, 0x013D, 0x004C, 0x0FFD,
+ 0x000F, 0x00CA, 0x019A, 0x0141, 0x004F, 0x0FFD,
+ 0x000E, 0x00C6, 0x0199, 0x0144, 0x0052, 0x0FFD,
+ 0x000D, 0x00C2, 0x0197, 0x0148, 0x0055, 0x0FFD,
+ 0x000B, 0x00BE, 0x0196, 0x014C, 0x0058, 0x0FFD,
+ 0x000A, 0x00BA, 0x0195, 0x014F, 0x005B, 0x0FFD,
+ 0x0009, 0x00B6, 0x0193, 0x0153, 0x005E, 0x0FFD,
+ 0x0008, 0x00B2, 0x0191, 0x0157, 0x0061, 0x0FFD,
+ 0x0007, 0x00AE, 0x0190, 0x015A, 0x0064, 0x0FFD,
+ 0x0006, 0x00AA, 0x018E, 0x015D, 0x0068, 0x0FFD,
+ 0x0005, 0x00A6, 0x018C, 0x0161, 0x006B, 0x0FFD,
+ 0x0005, 0x00A2, 0x0189, 0x0164, 0x006F, 0x0FFD,
+ 0x0004, 0x009E, 0x0187, 0x0167, 0x0072, 0x0FFE,
+ 0x0003, 0x009A, 0x0185, 0x016B, 0x0075, 0x0FFE,
+ 0x0002, 0x0096, 0x0183, 0x016E, 0x0079, 0x0FFE,
+ 0x0002, 0x0093, 0x0180, 0x016F, 0x007D, 0x0FFF,
+ 0x0001, 0x008F, 0x017E, 0x0173, 0x0080, 0x0FFF,
+ 0x0001, 0x008B, 0x017B, 0x0175, 0x0084, 0x0000,
+ 0x0000, 0x0087, 0x0179, 0x0179, 0x0087, 0x0000,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_0.5_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 0.500000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_0_50[198] = {
+ 0x0000, 0x0107, 0x01F3, 0x0106, 0x0000, 0x0000,
+ 0x0FFE, 0x0101, 0x01F3, 0x010D, 0x0002, 0x0FFF,
+ 0x0FFD, 0x00FB, 0x01F3, 0x0113, 0x0003, 0x0FFF,
+ 0x0FFC, 0x00F6, 0x01F3, 0x0118, 0x0005, 0x0FFE,
+ 0x0FFA, 0x00F0, 0x01F3, 0x011E, 0x0007, 0x0FFE,
+ 0x0FF9, 0x00EB, 0x01F2, 0x0124, 0x0009, 0x0FFD,
+ 0x0FF8, 0x00E5, 0x01F2, 0x0129, 0x000B, 0x0FFD,
+ 0x0FF7, 0x00E0, 0x01F1, 0x012F, 0x000D, 0x0FFC,
+ 0x0FF6, 0x00DA, 0x01F0, 0x0135, 0x0010, 0x0FFB,
+ 0x0FF5, 0x00D4, 0x01EF, 0x013B, 0x0012, 0x0FFB,
+ 0x0FF4, 0x00CF, 0x01EE, 0x0141, 0x0014, 0x0FFA,
+ 0x0FF3, 0x00C9, 0x01ED, 0x0147, 0x0017, 0x0FF9,
+ 0x0FF2, 0x00C4, 0x01EB, 0x014C, 0x001A, 0x0FF9,
+ 0x0FF1, 0x00BF, 0x01EA, 0x0152, 0x001C, 0x0FF8,
+ 0x0FF1, 0x00B9, 0x01E8, 0x0157, 0x001F, 0x0FF8,
+ 0x0FF0, 0x00B4, 0x01E6, 0x015D, 0x0022, 0x0FF7,
+ 0x0FF0, 0x00AE, 0x01E4, 0x0163, 0x0025, 0x0FF6,
+ 0x0FEF, 0x00A9, 0x01E2, 0x0168, 0x0028, 0x0FF6,
+ 0x0FEF, 0x00A4, 0x01DF, 0x016E, 0x002B, 0x0FF5,
+ 0x0FEF, 0x009F, 0x01DD, 0x0172, 0x002E, 0x0FF5,
+ 0x0FEE, 0x009A, 0x01DA, 0x0178, 0x0032, 0x0FF4,
+ 0x0FEE, 0x0094, 0x01D8, 0x017E, 0x0035, 0x0FF3,
+ 0x0FEE, 0x008F, 0x01D5, 0x0182, 0x0039, 0x0FF3,
+ 0x0FEE, 0x008A, 0x01D2, 0x0188, 0x003C, 0x0FF2,
+ 0x0FEE, 0x0085, 0x01CF, 0x018C, 0x0040, 0x0FF2,
+ 0x0FEE, 0x0081, 0x01CB, 0x0191, 0x0044, 0x0FF1,
+ 0x0FEE, 0x007C, 0x01C8, 0x0196, 0x0047, 0x0FF1,
+ 0x0FEE, 0x0077, 0x01C4, 0x019C, 0x004B, 0x0FF0,
+ 0x0FEE, 0x0072, 0x01C1, 0x01A0, 0x004F, 0x0FF0,
+ 0x0FEE, 0x006E, 0x01BD, 0x01A4, 0x0053, 0x0FF0,
+ 0x0FEE, 0x0069, 0x01B9, 0x01A9, 0x0058, 0x0FEF,
+ 0x0FEE, 0x0065, 0x01B5, 0x01AD, 0x005C, 0x0FEF,
+ 0x0FEF, 0x0060, 0x01B1, 0x01B1, 0x0060, 0x0FEF,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_0.6_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 0.600000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_0_60[198] = {
+ 0x0FD9, 0x00FB, 0x0258, 0x00FB, 0x0FD9, 0x0000,
+ 0x0FD9, 0x00F3, 0x0258, 0x0102, 0x0FDA, 0x0000,
+ 0x0FD8, 0x00EB, 0x0258, 0x010B, 0x0FDB, 0x0FFF,
+ 0x0FD8, 0x00E3, 0x0258, 0x0112, 0x0FDC, 0x0FFF,
+ 0x0FD8, 0x00DC, 0x0257, 0x011B, 0x0FDC, 0x0FFE,
+ 0x0FD7, 0x00D4, 0x0256, 0x0123, 0x0FDE, 0x0FFE,
+ 0x0FD7, 0x00CD, 0x0255, 0x012B, 0x0FDF, 0x0FFD,
+ 0x0FD7, 0x00C5, 0x0254, 0x0133, 0x0FE0, 0x0FFD,
+ 0x0FD7, 0x00BE, 0x0252, 0x013C, 0x0FE1, 0x0FFC,
+ 0x0FD7, 0x00B6, 0x0251, 0x0143, 0x0FE3, 0x0FFC,
+ 0x0FD8, 0x00AF, 0x024F, 0x014B, 0x0FE4, 0x0FFB,
+ 0x0FD8, 0x00A8, 0x024C, 0x0154, 0x0FE6, 0x0FFA,
+ 0x0FD8, 0x00A1, 0x024A, 0x015B, 0x0FE8, 0x0FFA,
+ 0x0FD9, 0x009A, 0x0247, 0x0163, 0x0FEA, 0x0FF9,
+ 0x0FD9, 0x0093, 0x0244, 0x016C, 0x0FEC, 0x0FF8,
+ 0x0FD9, 0x008C, 0x0241, 0x0174, 0x0FEF, 0x0FF7,
+ 0x0FDA, 0x0085, 0x023E, 0x017B, 0x0FF1, 0x0FF7,
+ 0x0FDB, 0x007F, 0x023A, 0x0183, 0x0FF3, 0x0FF6,
+ 0x0FDB, 0x0078, 0x0237, 0x018B, 0x0FF6, 0x0FF5,
+ 0x0FDC, 0x0072, 0x0233, 0x0192, 0x0FF9, 0x0FF4,
+ 0x0FDD, 0x006C, 0x022F, 0x0199, 0x0FFC, 0x0FF3,
+ 0x0FDD, 0x0065, 0x022A, 0x01A3, 0x0FFF, 0x0FF2,
+ 0x0FDE, 0x005F, 0x0226, 0x01AA, 0x0002, 0x0FF1,
+ 0x0FDF, 0x005A, 0x0221, 0x01B0, 0x0006, 0x0FF0,
+ 0x0FE0, 0x0054, 0x021C, 0x01B7, 0x0009, 0x0FF0,
+ 0x0FE1, 0x004E, 0x0217, 0x01BE, 0x000D, 0x0FEF,
+ 0x0FE2, 0x0048, 0x0212, 0x01C6, 0x0010, 0x0FEE,
+ 0x0FE3, 0x0043, 0x020C, 0x01CD, 0x0014, 0x0FED,
+ 0x0FE4, 0x003E, 0x0207, 0x01D3, 0x0018, 0x0FEC,
+ 0x0FE5, 0x0039, 0x0200, 0x01DA, 0x001D, 0x0FEB,
+ 0x0FE6, 0x0034, 0x01FA, 0x01E1, 0x0021, 0x0FEA,
+ 0x0FE7, 0x002F, 0x01F5, 0x01E7, 0x0025, 0x0FE9,
+ 0x0FE8, 0x002A, 0x01EE, 0x01EE, 0x002A, 0x0FE8,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_0.7_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 0.700000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_0_70[198] = {
+ 0x0FC0, 0x00DA, 0x02CC, 0x00DA, 0x0FC0, 0x0000,
+ 0x0FC1, 0x00D0, 0x02CC, 0x00E4, 0x0FBF, 0x0000,
+ 0x0FC2, 0x00C6, 0x02CB, 0x00EF, 0x0FBE, 0x0000,
+ 0x0FC3, 0x00BC, 0x02CA, 0x00F9, 0x0FBE, 0x0000,
+ 0x0FC4, 0x00B2, 0x02C9, 0x0104, 0x0FBD, 0x0000,
+ 0x0FC5, 0x00A8, 0x02C7, 0x010F, 0x0FBD, 0x0000,
+ 0x0FC7, 0x009F, 0x02C5, 0x0119, 0x0FBC, 0x0000,
+ 0x0FC8, 0x0095, 0x02C3, 0x0124, 0x0FBC, 0x0000,
+ 0x0FC9, 0x008C, 0x02C0, 0x012F, 0x0FBC, 0x0000,
+ 0x0FCB, 0x0083, 0x02BD, 0x0139, 0x0FBC, 0x0000,
+ 0x0FCC, 0x007A, 0x02BA, 0x0144, 0x0FBC, 0x0000,
+ 0x0FCE, 0x0072, 0x02B6, 0x014D, 0x0FBD, 0x0000,
+ 0x0FD0, 0x0069, 0x02B2, 0x0159, 0x0FBD, 0x0FFF,
+ 0x0FD1, 0x0061, 0x02AD, 0x0164, 0x0FBE, 0x0FFF,
+ 0x0FD3, 0x0059, 0x02A9, 0x016E, 0x0FBF, 0x0FFE,
+ 0x0FD4, 0x0051, 0x02A4, 0x017A, 0x0FBF, 0x0FFE,
+ 0x0FD6, 0x0049, 0x029E, 0x0184, 0x0FC1, 0x0FFE,
+ 0x0FD8, 0x0042, 0x0299, 0x018E, 0x0FC2, 0x0FFD,
+ 0x0FD9, 0x003A, 0x0293, 0x019B, 0x0FC3, 0x0FFC,
+ 0x0FDB, 0x0033, 0x028D, 0x01A4, 0x0FC5, 0x0FFC,
+ 0x0FDC, 0x002D, 0x0286, 0x01AF, 0x0FC7, 0x0FFB,
+ 0x0FDE, 0x0026, 0x0280, 0x01BA, 0x0FC8, 0x0FFA,
+ 0x0FE0, 0x001F, 0x0279, 0x01C4, 0x0FCB, 0x0FF9,
+ 0x0FE1, 0x0019, 0x0272, 0x01CE, 0x0FCD, 0x0FF9,
+ 0x0FE3, 0x0013, 0x026A, 0x01D9, 0x0FCF, 0x0FF8,
+ 0x0FE4, 0x000D, 0x0263, 0x01E3, 0x0FD2, 0x0FF7,
+ 0x0FE6, 0x0008, 0x025B, 0x01EC, 0x0FD5, 0x0FF6,
+ 0x0FE7, 0x0002, 0x0253, 0x01F7, 0x0FD8, 0x0FF5,
+ 0x0FE9, 0x0FFD, 0x024A, 0x0202, 0x0FDB, 0x0FF3,
+ 0x0FEA, 0x0FF8, 0x0242, 0x020B, 0x0FDF, 0x0FF2,
+ 0x0FEC, 0x0FF3, 0x0239, 0x0215, 0x0FE2, 0x0FF1,
+ 0x0FED, 0x0FEF, 0x0230, 0x021E, 0x0FE6, 0x0FF0,
+ 0x0FEF, 0x0FEB, 0x0226, 0x0226, 0x0FEB, 0x0FEF,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_0.8_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 0.800000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_0_80[198] = {
+ 0x0FBF, 0x00A1, 0x0340, 0x00A1, 0x0FBF, 0x0000,
+ 0x0FC1, 0x0095, 0x0340, 0x00AD, 0x0FBC, 0x0001,
+ 0x0FC4, 0x0089, 0x033E, 0x00BA, 0x0FBA, 0x0001,
+ 0x0FC6, 0x007D, 0x033D, 0x00C6, 0x0FB8, 0x0002,
+ 0x0FC9, 0x0072, 0x033A, 0x00D3, 0x0FB6, 0x0002,
+ 0x0FCC, 0x0067, 0x0338, 0x00DF, 0x0FB3, 0x0003,
+ 0x0FCE, 0x005C, 0x0334, 0x00EE, 0x0FB1, 0x0003,
+ 0x0FD1, 0x0051, 0x0331, 0x00FA, 0x0FAF, 0x0004,
+ 0x0FD3, 0x0047, 0x032D, 0x0108, 0x0FAD, 0x0004,
+ 0x0FD6, 0x003D, 0x0328, 0x0116, 0x0FAB, 0x0004,
+ 0x0FD8, 0x0033, 0x0323, 0x0123, 0x0FAA, 0x0005,
+ 0x0FDB, 0x002A, 0x031D, 0x0131, 0x0FA8, 0x0005,
+ 0x0FDD, 0x0021, 0x0317, 0x013F, 0x0FA7, 0x0005,
+ 0x0FDF, 0x0018, 0x0311, 0x014D, 0x0FA5, 0x0006,
+ 0x0FE2, 0x0010, 0x030A, 0x015A, 0x0FA4, 0x0006,
+ 0x0FE4, 0x0008, 0x0302, 0x0169, 0x0FA3, 0x0006,
+ 0x0FE6, 0x0000, 0x02FB, 0x0177, 0x0FA2, 0x0006,
+ 0x0FE8, 0x0FF9, 0x02F3, 0x0185, 0x0FA1, 0x0006,
+ 0x0FEB, 0x0FF1, 0x02EA, 0x0193, 0x0FA1, 0x0006,
+ 0x0FED, 0x0FEB, 0x02E1, 0x01A1, 0x0FA0, 0x0006,
+ 0x0FEE, 0x0FE4, 0x02D8, 0x01B0, 0x0FA0, 0x0006,
+ 0x0FF0, 0x0FDE, 0x02CE, 0x01BE, 0x0FA0, 0x0006,
+ 0x0FF2, 0x0FD8, 0x02C5, 0x01CB, 0x0FA0, 0x0006,
+ 0x0FF4, 0x0FD3, 0x02BA, 0x01D8, 0x0FA1, 0x0006,
+ 0x0FF6, 0x0FCD, 0x02B0, 0x01E7, 0x0FA1, 0x0005,
+ 0x0FF7, 0x0FC8, 0x02A5, 0x01F5, 0x0FA2, 0x0005,
+ 0x0FF9, 0x0FC4, 0x029A, 0x0202, 0x0FA3, 0x0004,
+ 0x0FFA, 0x0FC0, 0x028E, 0x0210, 0x0FA4, 0x0004,
+ 0x0FFB, 0x0FBC, 0x0283, 0x021D, 0x0FA6, 0x0003,
+ 0x0FFD, 0x0FB8, 0x0276, 0x022A, 0x0FA8, 0x0003,
+ 0x0FFE, 0x0FB4, 0x026B, 0x0237, 0x0FAA, 0x0002,
+ 0x0FFF, 0x0FB1, 0x025E, 0x0245, 0x0FAC, 0x0001,
+ 0x0000, 0x0FAE, 0x0252, 0x0252, 0x0FAE, 0x0000,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_0.9_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 0.900000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_0_90[198] = {
+ 0x0FD8, 0x0055, 0x03A7, 0x0054, 0x0FD8, 0x0000,
+ 0x0FDB, 0x0047, 0x03A7, 0x0063, 0x0FD4, 0x0000,
+ 0x0FDF, 0x003B, 0x03A5, 0x006F, 0x0FD1, 0x0001,
+ 0x0FE2, 0x002E, 0x03A3, 0x007E, 0x0FCD, 0x0002,
+ 0x0FE5, 0x0022, 0x03A0, 0x008D, 0x0FCA, 0x0002,
+ 0x0FE8, 0x0017, 0x039D, 0x009B, 0x0FC6, 0x0003,
+ 0x0FEB, 0x000C, 0x0398, 0x00AC, 0x0FC2, 0x0003,
+ 0x0FEE, 0x0001, 0x0394, 0x00BA, 0x0FBF, 0x0004,
+ 0x0FF1, 0x0FF7, 0x038E, 0x00CA, 0x0FBB, 0x0005,
+ 0x0FF4, 0x0FED, 0x0388, 0x00DA, 0x0FB8, 0x0005,
+ 0x0FF6, 0x0FE4, 0x0381, 0x00EB, 0x0FB4, 0x0006,
+ 0x0FF9, 0x0FDB, 0x037A, 0x00FA, 0x0FB1, 0x0007,
+ 0x0FFB, 0x0FD3, 0x0372, 0x010B, 0x0FAD, 0x0008,
+ 0x0FFD, 0x0FCB, 0x0369, 0x011D, 0x0FAA, 0x0008,
+ 0x0000, 0x0FC3, 0x0360, 0x012E, 0x0FA6, 0x0009,
+ 0x0002, 0x0FBC, 0x0356, 0x013F, 0x0FA3, 0x000A,
+ 0x0003, 0x0FB6, 0x034C, 0x0150, 0x0FA0, 0x000B,
+ 0x0005, 0x0FB0, 0x0341, 0x0162, 0x0F9D, 0x000B,
+ 0x0007, 0x0FAA, 0x0336, 0x0173, 0x0F9A, 0x000C,
+ 0x0008, 0x0FA5, 0x032A, 0x0185, 0x0F97, 0x000D,
+ 0x000A, 0x0FA0, 0x031E, 0x0197, 0x0F94, 0x000D,
+ 0x000B, 0x0F9B, 0x0311, 0x01A9, 0x0F92, 0x000E,
+ 0x000C, 0x0F97, 0x0303, 0x01BC, 0x0F8F, 0x000F,
+ 0x000D, 0x0F94, 0x02F6, 0x01CD, 0x0F8D, 0x000F,
+ 0x000E, 0x0F91, 0x02E8, 0x01DE, 0x0F8B, 0x0010,
+ 0x000F, 0x0F8E, 0x02D9, 0x01F1, 0x0F89, 0x0010,
+ 0x0010, 0x0F8B, 0x02CA, 0x0202, 0x0F88, 0x0011,
+ 0x0010, 0x0F89, 0x02BB, 0x0214, 0x0F87, 0x0011,
+ 0x0011, 0x0F87, 0x02AB, 0x0226, 0x0F86, 0x0011,
+ 0x0011, 0x0F86, 0x029C, 0x0236, 0x0F85, 0x0012,
+ 0x0011, 0x0F85, 0x028B, 0x0249, 0x0F84, 0x0012,
+ 0x0012, 0x0F84, 0x027B, 0x0259, 0x0F84, 0x0012,
+ 0x0012, 0x0F84, 0x026A, 0x026A, 0x0F84, 0x0012,
+};
+
+//========================================================
+// <using> gen_scaler_coeffs_cnf_file.m
+// <using> make_test_script.m
+// <date> 02-Apr-2024
+// <coeffDescrip> 6t_64p_LanczosEd_p_1_p_10qb_
+// <num_taps> 6
+// <num_phases> 64
+// <scale_ratio> input/output = 1.000000000000
+// <CoefType> LanczosEd
+// <CoefQuant> S1.10
+//========================================================
+static const uint16_t easf_filter_6tap_64p_ratio_1_00[198] = {
+ 0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000,
+ 0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000,
+ 0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000,
+ 0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000,
+ 0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000,
+ 0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001,
+ 0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001,
+ 0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001,
+ 0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002,
+ 0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002,
+ 0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003,
+ 0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004,
+ 0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004,
+ 0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005,
+ 0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006,
+ 0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007,
+ 0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008,
+ 0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008,
+ 0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009,
+ 0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B,
+ 0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C,
+ 0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D,
+ 0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E,
+ 0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F,
+ 0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010,
+ 0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011,
+ 0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012,
+ 0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014,
+ 0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015,
+ 0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016,
+ 0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017,
+ 0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018,
+ 0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019,
+};
+
+/* Converted scaler coeff tables from S1.10 to S1.12 */
+static const uint16_t easf_filter_3tap_64p_ratio_0_30_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07d8, 0x0818, 0x0010,
+0x07b0, 0x082c, 0x0024,
+0x0788, 0x0844, 0x0034,
+0x0760, 0x0858, 0x0048,
+0x0738, 0x0870, 0x0058,
+0x0710, 0x0884, 0x006c,
+0x06e8, 0x0898, 0x0080,
+0x06c0, 0x08a8, 0x0098,
+0x0698, 0x08bc, 0x00ac,
+0x0670, 0x08cc, 0x00c4,
+0x0648, 0x08e0, 0x00d8,
+0x0620, 0x08f0, 0x00f0,
+0x05f8, 0x0900, 0x0108,
+0x05d0, 0x0910, 0x0120,
+0x05a8, 0x0920, 0x0138,
+0x0584, 0x0928, 0x0154,
+0x055c, 0x0938, 0x016c,
+0x0534, 0x0944, 0x0188,
+0x0510, 0x094c, 0x01a4,
+0x04e8, 0x0958, 0x01c0,
+0x04c4, 0x0960, 0x01dc,
+0x049c, 0x096c, 0x01f8,
+0x0478, 0x0970, 0x0218,
+0x0454, 0x0978, 0x0234,
+0x042c, 0x0980, 0x0254,
+0x0408, 0x0988, 0x0270,
+0x03e4, 0x098c, 0x0290,
+0x03c0, 0x0990, 0x02b0,
+0x039c, 0x0994, 0x02d0,
+0x037c, 0x0990, 0x02f4,
+0x0358, 0x0994, 0x0314,
+0x0334, 0x0998, 0x0334,
+};
+
+static const uint16_t easf_filter_3tap_64p_ratio_0_40_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07d8, 0x0818, 0x0010,
+0x07ac, 0x0838, 0x001c,
+0x0784, 0x0850, 0x002c,
+0x075c, 0x0868, 0x003c,
+0x0734, 0x0880, 0x004c,
+0x0708, 0x0898, 0x0060,
+0x06e0, 0x08b0, 0x0070,
+0x06b8, 0x08c4, 0x0084,
+0x068c, 0x08dc, 0x0098,
+0x0664, 0x08f0, 0x00ac,
+0x063c, 0x0900, 0x00c4,
+0x0614, 0x0914, 0x00d8,
+0x05e8, 0x0928, 0x00f0,
+0x05c0, 0x093c, 0x0104,
+0x0598, 0x094c, 0x011c,
+0x0570, 0x095c, 0x0134,
+0x0548, 0x0968, 0x0150,
+0x0520, 0x0978, 0x0168,
+0x04f8, 0x0984, 0x0184,
+0x04d0, 0x0990, 0x01a0,
+0x04ac, 0x0998, 0x01bc,
+0x0484, 0x09a4, 0x01d8,
+0x045c, 0x09b0, 0x01f4,
+0x0438, 0x09b8, 0x0210,
+0x0410, 0x09c0, 0x0230,
+0x03ec, 0x09c4, 0x0250,
+0x03c8, 0x09c8, 0x0270,
+0x03a4, 0x09cc, 0x0290,
+0x0380, 0x09d0, 0x02b0,
+0x035c, 0x09d4, 0x02d0,
+0x0338, 0x09d4, 0x02f4,
+0x0314, 0x09d8, 0x0314,
+};
+
+static const uint16_t easf_filter_3tap_64p_ratio_0_50_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07d4, 0x0824, 0x0008,
+0x07a8, 0x0844, 0x0014,
+0x077c, 0x0868, 0x001c,
+0x0750, 0x0888, 0x0028,
+0x0724, 0x08a8, 0x0034,
+0x06f8, 0x08c8, 0x0040,
+0x06cc, 0x08e4, 0x0050,
+0x06a0, 0x0904, 0x005c,
+0x0674, 0x0920, 0x006c,
+0x0648, 0x093c, 0x007c,
+0x061c, 0x0954, 0x0090,
+0x05f0, 0x0970, 0x00a0,
+0x05c4, 0x0988, 0x00b4,
+0x0598, 0x09a0, 0x00c8,
+0x056c, 0x09b8, 0x00dc,
+0x0540, 0x09cc, 0x00f4,
+0x0518, 0x09e0, 0x0108,
+0x04ec, 0x09f4, 0x0120,
+0x04c0, 0x0a08, 0x0138,
+0x0498, 0x0a18, 0x0150,
+0x046c, 0x0a28, 0x016c,
+0x0444, 0x0a34, 0x0188,
+0x041c, 0x0a40, 0x01a4,
+0x03f4, 0x0a4c, 0x01c0,
+0x03cc, 0x0a58, 0x01dc,
+0x03a4, 0x0a60, 0x01fc,
+0x037c, 0x0a68, 0x021c,
+0x0354, 0x0a70, 0x023c,
+0x0330, 0x0a74, 0x025c,
+0x030c, 0x0a78, 0x027c,
+0x02e8, 0x0a78, 0x02a0,
+0x02c4, 0x0a78, 0x02c4,
+};
+
+static const uint16_t easf_filter_3tap_64p_ratio_0_60_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07d0, 0x082c, 0x0004,
+0x07a0, 0x0858, 0x0008,
+0x0770, 0x0884, 0x000c,
+0x0740, 0x08ac, 0x0014,
+0x0710, 0x08d4, 0x001c,
+0x06e0, 0x0900, 0x0020,
+0x06b0, 0x0924, 0x002c,
+0x0680, 0x094c, 0x0034,
+0x0650, 0x0970, 0x0040,
+0x0620, 0x0994, 0x004c,
+0x05f0, 0x09b8, 0x0058,
+0x05c0, 0x09dc, 0x0064,
+0x0590, 0x09fc, 0x0074,
+0x0560, 0x0a1c, 0x0084,
+0x0530, 0x0a3c, 0x0094,
+0x0500, 0x0a5c, 0x00a4,
+0x04d4, 0x0a74, 0x00b8,
+0x04a4, 0x0a90, 0x00cc,
+0x0474, 0x0aac, 0x00e0,
+0x0448, 0x0ac0, 0x00f8,
+0x041c, 0x0ad4, 0x0110,
+0x03f0, 0x0ae8, 0x0128,
+0x03c4, 0x0afc, 0x0140,
+0x0398, 0x0b0c, 0x015c,
+0x036c, 0x0b1c, 0x0178,
+0x0344, 0x0b28, 0x0194,
+0x031c, 0x0b30, 0x01b4,
+0x02f4, 0x0b38, 0x01d4,
+0x02cc, 0x0b40, 0x01f4,
+0x02a4, 0x0b48, 0x0214,
+0x0280, 0x0b48, 0x0238,
+0x025c, 0x0b48, 0x025c,
+};
+
+static const uint16_t easf_filter_3tap_64p_ratio_0_70_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07cc, 0x0834, 0x0000,
+0x0794, 0x086c, 0x0000,
+0x0760, 0x08a0, 0x0000,
+0x072c, 0x08d4, 0x0000,
+0x06f4, 0x090c, 0x0000,
+0x06c0, 0x093c, 0x0004,
+0x0688, 0x0970, 0x0008,
+0x0654, 0x09a0, 0x000c,
+0x061c, 0x09d4, 0x0010,
+0x05e8, 0x0a00, 0x0018,
+0x05b4, 0x0a30, 0x001c,
+0x057c, 0x0a60, 0x0024,
+0x0548, 0x0a88, 0x0030,
+0x0514, 0x0ab4, 0x0038,
+0x04e0, 0x0adc, 0x0044,
+0x04ac, 0x0b00, 0x0054,
+0x0478, 0x0b28, 0x0060,
+0x0444, 0x0b4c, 0x0070,
+0x0414, 0x0b6c, 0x0080,
+0x03e0, 0x0b8c, 0x0094,
+0x03b0, 0x0ba8, 0x00a8,
+0x0380, 0x0bc4, 0x00bc,
+0x0354, 0x0bd8, 0x00d4,
+0x0324, 0x0bf0, 0x00ec,
+0x02f8, 0x0c04, 0x0104,
+0x02cc, 0x0c14, 0x0120,
+0x02a0, 0x0c24, 0x013c,
+0x0278, 0x0c30, 0x0158,
+0x0250, 0x0c38, 0x0178,
+0x0228, 0x0c40, 0x0198,
+0x0204, 0x0c40, 0x01bc,
+0x01dc, 0x0c48, 0x01dc,
+};
+
+static const uint16_t easf_filter_3tap_64p_ratio_0_80_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07c4, 0x0840, 0x3ffc,
+0x0788, 0x0880, 0x3ff8,
+0x0748, 0x08c8, 0x3ff0,
+0x070c, 0x0904, 0x3ff0,
+0x06d0, 0x0944, 0x3fec,
+0x0690, 0x0988, 0x3fe8,
+0x0654, 0x09c4, 0x3fe8,
+0x0618, 0x0a04, 0x3fe4,
+0x05d8, 0x0a44, 0x3fe4,
+0x059c, 0x0a80, 0x3fe4,
+0x0560, 0x0ab8, 0x3fe8,
+0x0524, 0x0af4, 0x3fe8,
+0x04e8, 0x0b2c, 0x3fec,
+0x04b0, 0x0b5c, 0x3ff4,
+0x0474, 0x0b94, 0x3ff8,
+0x043c, 0x0bc4, 0x0000,
+0x0404, 0x0bf4, 0x0008,
+0x03cc, 0x0c20, 0x0014,
+0x0394, 0x0c4c, 0x0020,
+0x0360, 0x0c74, 0x002c,
+0x032c, 0x0c98, 0x003c,
+0x02f8, 0x0cbc, 0x004c,
+0x02c8, 0x0cdc, 0x005c,
+0x0298, 0x0cf8, 0x0070,
+0x0268, 0x0d14, 0x0084,
+0x023c, 0x0d28, 0x009c,
+0x0210, 0x0d3c, 0x00b4,
+0x01e4, 0x0d4c, 0x00d0,
+0x01bc, 0x0d58, 0x00ec,
+0x0194, 0x0d60, 0x010c,
+0x0170, 0x0d64, 0x012c,
+0x014c, 0x0d68, 0x014c,
+};
+
+static const uint16_t easf_filter_3tap_64p_ratio_0_90_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07b8, 0x0850, 0x3ff8,
+0x0770, 0x08a0, 0x3ff0,
+0x0728, 0x08f0, 0x3fe8,
+0x06e4, 0x093c, 0x3fe0,
+0x069c, 0x0988, 0x3fdc,
+0x0654, 0x09d8, 0x3fd4,
+0x060c, 0x0a28, 0x3fcc,
+0x05c8, 0x0a70, 0x3fc8,
+0x0580, 0x0abc, 0x3fc4,
+0x053c, 0x0b08, 0x3fbc,
+0x04f8, 0x0b50, 0x3fb8,
+0x04b4, 0x0b94, 0x3fb8,
+0x0470, 0x0bdc, 0x3fb4,
+0x0430, 0x0c1c, 0x3fb4,
+0x03ec, 0x0c60, 0x3fb4,
+0x03b0, 0x0c9c, 0x3fb4,
+0x0370, 0x0cd8, 0x3fb8,
+0x0334, 0x0d10, 0x3fbc,
+0x02f8, 0x0d48, 0x3fc0,
+0x02c0, 0x0d78, 0x3fc8,
+0x0288, 0x0da8, 0x3fd0,
+0x0254, 0x0dd4, 0x3fd8,
+0x0220, 0x0dfc, 0x3fe4,
+0x01ec, 0x0e20, 0x3ff4,
+0x01bc, 0x0e44, 0x0000,
+0x0190, 0x0e5c, 0x0014,
+0x0164, 0x0e74, 0x0028,
+0x0138, 0x0e8c, 0x003c,
+0x0114, 0x0e98, 0x0054,
+0x00ec, 0x0ea4, 0x0070,
+0x00cc, 0x0ea8, 0x008c,
+0x00a8, 0x0eb0, 0x00a8,
+};
+
+static const uint16_t easf_filter_3tap_64p_ratio_1_00_s1_12[99] = {
+0x0800, 0x0800, 0x0000,
+0x07ac, 0x085c, 0x3ff8,
+0x0754, 0x08bc, 0x3ff0,
+0x0700, 0x091c, 0x3fe4,
+0x06ac, 0x0978, 0x3fdc,
+0x0658, 0x09d8, 0x3fd0,
+0x0604, 0x0a34, 0x3fc8,
+0x05b0, 0x0a94, 0x3fbc,
+0x0560, 0x0aec, 0x3fb4,
+0x0510, 0x0b44, 0x3fac,
+0x04c0, 0x0ba0, 0x3fa0,
+0x0470, 0x0bf8, 0x3f98,
+0x0424, 0x0c4c, 0x3f90,
+0x03d8, 0x0ca0, 0x3f88,
+0x0390, 0x0cf0, 0x3f80,
+0x0348, 0x0d3c, 0x3f7c,
+0x0300, 0x0d8c, 0x3f74,
+0x02c0, 0x0dd0, 0x3f70,
+0x027c, 0x0e14, 0x3f70,
+0x0240, 0x0e54, 0x3f6c,
+0x0204, 0x0e90, 0x3f6c,
+0x01c8, 0x0ecc, 0x3f6c,
+0x0190, 0x0f00, 0x3f70,
+0x015c, 0x0f30, 0x3f74,
+0x012c, 0x0f58, 0x3f7c,
+0x00fc, 0x0f80, 0x3f84,
+0x00d0, 0x0fa0, 0x3f90,
+0x00a8, 0x0fbc, 0x3f9c,
+0x0080, 0x0fd4, 0x3fac,
+0x005c, 0x0fe8, 0x3fbc,
+0x003c, 0x0ff4, 0x3fd0,
+0x001c, 0x0ffc, 0x3fe8,
+0x0000, 0x1000, 0x0000,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_0_30_s1_12[132] = {
+0x0410, 0x07e0, 0x0410, 0x0000,
+0x03f8, 0x07dc, 0x0428, 0x0004,
+0x03e0, 0x07d8, 0x043c, 0x000c,
+0x03c8, 0x07d4, 0x0450, 0x0014,
+0x03ac, 0x07d0, 0x046c, 0x0018,
+0x0394, 0x07cc, 0x0480, 0x0020,
+0x037c, 0x07c8, 0x0494, 0x0028,
+0x0368, 0x07c0, 0x04a8, 0x0030,
+0x0350, 0x07b8, 0x04c0, 0x0038,
+0x0338, 0x07b4, 0x04d4, 0x0040,
+0x0320, 0x07ac, 0x04e8, 0x004c,
+0x0308, 0x07a4, 0x0500, 0x0054,
+0x02f4, 0x079c, 0x0514, 0x005c,
+0x02dc, 0x0794, 0x0528, 0x0068,
+0x02c4, 0x0788, 0x0544, 0x0070,
+0x02b0, 0x0780, 0x0554, 0x007c,
+0x029c, 0x0774, 0x0568, 0x0088,
+0x0284, 0x076c, 0x057c, 0x0094,
+0x0270, 0x0760, 0x0594, 0x009c,
+0x025c, 0x0754, 0x05a8, 0x00a8,
+0x0248, 0x0748, 0x05b8, 0x00b8,
+0x0230, 0x073c, 0x05d0, 0x00c4,
+0x021c, 0x0730, 0x05e4, 0x00d0,
+0x020c, 0x0724, 0x05f4, 0x00dc,
+0x01f8, 0x0714, 0x0608, 0x00ec,
+0x01e4, 0x0708, 0x061c, 0x00f8,
+0x01d0, 0x06f8, 0x0630, 0x0108,
+0x01c0, 0x06e8, 0x0640, 0x0118,
+0x01ac, 0x06dc, 0x0654, 0x0124,
+0x0198, 0x06cc, 0x0668, 0x0134,
+0x0188, 0x06bc, 0x0678, 0x0144,
+0x0178, 0x06ac, 0x0688, 0x0154,
+0x0168, 0x0698, 0x0698, 0x0168,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_0_40_s1_12[132] = {
+0x03ec, 0x0824, 0x03f0, 0x0000,
+0x03d4, 0x0824, 0x0404, 0x0004,
+0x03b8, 0x0820, 0x0420, 0x0008,
+0x03a0, 0x081c, 0x0438, 0x000c,
+0x0388, 0x0818, 0x0450, 0x0010,
+0x036c, 0x0814, 0x0468, 0x0018,
+0x0354, 0x0810, 0x0480, 0x001c,
+0x033c, 0x080c, 0x0494, 0x0024,
+0x0324, 0x0804, 0x04b0, 0x0028,
+0x030c, 0x07fc, 0x04c8, 0x0030,
+0x02f4, 0x07f4, 0x04e0, 0x0038,
+0x02dc, 0x07ec, 0x04f8, 0x0040,
+0x02c4, 0x07e4, 0x0510, 0x0048,
+0x02b0, 0x07dc, 0x0524, 0x0050,
+0x0298, 0x07d0, 0x0540, 0x0058,
+0x0280, 0x07c8, 0x0558, 0x0060,
+0x026c, 0x07bc, 0x0570, 0x0068,
+0x0254, 0x07b0, 0x0588, 0x0074,
+0x0240, 0x07a4, 0x05a0, 0x007c,
+0x022c, 0x0798, 0x05b4, 0x0088,
+0x0214, 0x078c, 0x05cc, 0x0094,
+0x0200, 0x077c, 0x05e4, 0x00a0,
+0x01ec, 0x0770, 0x05f8, 0x00ac,
+0x01d8, 0x0760, 0x0610, 0x00b8,
+0x01c4, 0x0750, 0x0628, 0x00c4,
+0x01b4, 0x0744, 0x0638, 0x00d0,
+0x01a0, 0x0734, 0x064c, 0x00e0,
+0x018c, 0x0720, 0x0668, 0x00ec,
+0x017c, 0x0710, 0x0678, 0x00fc,
+0x016c, 0x0700, 0x068c, 0x0108,
+0x0158, 0x06ec, 0x06a4, 0x0118,
+0x0148, 0x06dc, 0x06b4, 0x0128,
+0x0138, 0x06c8, 0x06c8, 0x0138,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_0_50_s1_12[132] = {
+0x0394, 0x08d8, 0x0394, 0x0000,
+0x0378, 0x08d4, 0x03b4, 0x0000,
+0x035c, 0x08d4, 0x03d0, 0x0000,
+0x0340, 0x08d4, 0x03ec, 0x0000,
+0x0324, 0x08d0, 0x0408, 0x0004,
+0x0308, 0x08cc, 0x0428, 0x0004,
+0x02f0, 0x08c8, 0x0444, 0x0004,
+0x02d4, 0x08c0, 0x0464, 0x0008,
+0x02b8, 0x08bc, 0x0484, 0x0008,
+0x02a0, 0x08b4, 0x04a0, 0x000c,
+0x0288, 0x08ac, 0x04bc, 0x0010,
+0x026c, 0x08a4, 0x04dc, 0x0014,
+0x0254, 0x0898, 0x04fc, 0x0018,
+0x023c, 0x0890, 0x0518, 0x001c,
+0x0224, 0x0884, 0x0538, 0x0020,
+0x020c, 0x0878, 0x0554, 0x0028,
+0x01f8, 0x086c, 0x0570, 0x002c,
+0x01e0, 0x085c, 0x0590, 0x0034,
+0x01c8, 0x084c, 0x05b4, 0x0038,
+0x01b4, 0x0840, 0x05cc, 0x0040,
+0x01a0, 0x0830, 0x05e8, 0x0048,
+0x018c, 0x081c, 0x0608, 0x0050,
+0x0178, 0x080c, 0x0624, 0x0058,
+0x0164, 0x07f8, 0x0644, 0x0060,
+0x0150, 0x07e4, 0x0660, 0x006c,
+0x0140, 0x07d0, 0x067c, 0x0074,
+0x012c, 0x07bc, 0x0698, 0x0080,
+0x011c, 0x07a8, 0x06b0, 0x008c,
+0x010c, 0x0790, 0x06cc, 0x0098,
+0x00fc, 0x077c, 0x06e4, 0x00a4,
+0x00ec, 0x0764, 0x0700, 0x00b0,
+0x00dc, 0x074c, 0x0718, 0x00c0,
+0x00cc, 0x0734, 0x0734, 0x00cc,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_0_60_s1_12[132] = {
+0x0320, 0x09bc, 0x0324, 0x0000,
+0x0300, 0x09c0, 0x0344, 0x3ffc,
+0x02e0, 0x09c0, 0x0364, 0x3ffc,
+0x02c4, 0x09c0, 0x0384, 0x3ff8,
+0x02a4, 0x09bc, 0x03ac, 0x3ff4,
+0x0288, 0x09b8, 0x03cc, 0x3ff4,
+0x0268, 0x09b4, 0x03f4, 0x3ff0,
+0x024c, 0x09b0, 0x0414, 0x3ff0,
+0x0230, 0x09a8, 0x043c, 0x3fec,
+0x0214, 0x09a0, 0x0460, 0x3fec,
+0x01f8, 0x0994, 0x0488, 0x3fec,
+0x01e0, 0x098c, 0x04a8, 0x3fec,
+0x01c4, 0x0980, 0x04d0, 0x3fec,
+0x01ac, 0x0970, 0x04f8, 0x3fec,
+0x0194, 0x0964, 0x051c, 0x3fec,
+0x017c, 0x0954, 0x0544, 0x3fec,
+0x0164, 0x0944, 0x0568, 0x3ff0,
+0x0150, 0x0934, 0x058c, 0x3ff0,
+0x0138, 0x0920, 0x05b4, 0x3ff4,
+0x0124, 0x090c, 0x05d8, 0x3ff8,
+0x0110, 0x08f8, 0x05fc, 0x3ffc,
+0x00fc, 0x08e0, 0x0624, 0x0000,
+0x00e8, 0x08c8, 0x064c, 0x0004,
+0x00d8, 0x08b0, 0x0670, 0x0008,
+0x00c4, 0x0898, 0x0694, 0x0010,
+0x00b4, 0x087c, 0x06bc, 0x0014,
+0x00a4, 0x0860, 0x06e0, 0x001c,
+0x0094, 0x0844, 0x0704, 0x0024,
+0x0088, 0x0828, 0x0724, 0x002c,
+0x0078, 0x080c, 0x0748, 0x0034,
+0x006c, 0x07ec, 0x0768, 0x0040,
+0x0060, 0x07cc, 0x078c, 0x0048,
+0x0054, 0x07ac, 0x07ac, 0x0054,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_0_70_s1_12[132] = {
+0x028c, 0x0ae4, 0x0290, 0x0000,
+0x0268, 0x0ae8, 0x02b4, 0x3ffc,
+0x0248, 0x0ae8, 0x02d8, 0x3ff8,
+0x0224, 0x0ae8, 0x0304, 0x3ff0,
+0x0204, 0x0ae4, 0x032c, 0x3fec,
+0x01e4, 0x0ae0, 0x0354, 0x3fe8,
+0x01c4, 0x0adc, 0x037c, 0x3fe4,
+0x01a4, 0x0ad4, 0x03a8, 0x3fe0,
+0x0188, 0x0acc, 0x03d0, 0x3fdc,
+0x016c, 0x0ac0, 0x03fc, 0x3fd8,
+0x0150, 0x0ab4, 0x042c, 0x3fd0,
+0x0134, 0x0aa4, 0x045c, 0x3fcc,
+0x0118, 0x0a94, 0x048c, 0x3fc8,
+0x0100, 0x0a84, 0x04b4, 0x3fc8,
+0x00e8, 0x0a70, 0x04e4, 0x3fc4,
+0x00d0, 0x0a5c, 0x0514, 0x3fc0,
+0x00bc, 0x0a48, 0x0540, 0x3fbc,
+0x00a4, 0x0a30, 0x0570, 0x3fbc,
+0x0090, 0x0a14, 0x05a4, 0x3fb8,
+0x007c, 0x09fc, 0x05d0, 0x3fb8,
+0x006c, 0x09e0, 0x05fc, 0x3fb8,
+0x0058, 0x09c0, 0x0634, 0x3fb4,
+0x0048, 0x09a0, 0x0664, 0x3fb4,
+0x0038, 0x0980, 0x0690, 0x3fb8,
+0x002c, 0x0960, 0x06bc, 0x3fb8,
+0x001c, 0x093c, 0x06f0, 0x3fb8,
+0x0010, 0x0918, 0x071c, 0x3fbc,
+0x0004, 0x08f4, 0x074c, 0x3fbc,
+0x3ff8, 0x08cc, 0x077c, 0x3fc0,
+0x3ff0, 0x08a4, 0x07a8, 0x3fc4,
+0x3fe8, 0x087c, 0x07d0, 0x3fcc,
+0x3fe0, 0x0854, 0x07fc, 0x3fd0,
+0x3fd8, 0x0828, 0x0828, 0x3fd8,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_0_80_s1_12[132] = {
+0x01d4, 0x0c54, 0x01d8, 0x0000,
+0x01b0, 0x0c58, 0x01fc, 0x3ffc,
+0x0188, 0x0c58, 0x0228, 0x3ff8,
+0x0164, 0x0c54, 0x0258, 0x3ff0,
+0x0140, 0x0c50, 0x0284, 0x3fec,
+0x0120, 0x0c48, 0x02b4, 0x3fe4,
+0x0100, 0x0c40, 0x02e0, 0x3fe0,
+0x00e0, 0x0c34, 0x0314, 0x3fd8,
+0x00c0, 0x0c28, 0x0344, 0x3fd4,
+0x00a4, 0x0c18, 0x0378, 0x3fcc,
+0x0088, 0x0c04, 0x03ac, 0x3fc8,
+0x0070, 0x0bf0, 0x03e0, 0x3fc0,
+0x0054, 0x0bdc, 0x0418, 0x3fb8,
+0x0040, 0x0bc4, 0x0448, 0x3fb4,
+0x0028, 0x0ba8, 0x0484, 0x3fac,
+0x0014, 0x0b8c, 0x04bc, 0x3fa4,
+0x0000, 0x0b6c, 0x04f4, 0x3fa0,
+0x3fec, 0x0b4c, 0x0530, 0x3f98,
+0x3fdc, 0x0b28, 0x0568, 0x3f94,
+0x3fcc, 0x0b04, 0x05a4, 0x3f8c,
+0x3fc0, 0x0adc, 0x05dc, 0x3f88,
+0x3fb0, 0x0ab4, 0x0618, 0x3f84,
+0x3fa4, 0x0a88, 0x0658, 0x3f7c,
+0x3f9c, 0x0a5c, 0x0690, 0x3f78,
+0x3f90, 0x0a30, 0x06cc, 0x3f74,
+0x3f88, 0x0a00, 0x0708, 0x3f70,
+0x3f80, 0x09d0, 0x0740, 0x3f70,
+0x3f7c, 0x09a0, 0x0778, 0x3f6c,
+0x3f74, 0x096c, 0x07b8, 0x3f68,
+0x3f70, 0x0938, 0x07f0, 0x3f68,
+0x3f6c, 0x0904, 0x0828, 0x3f68,
+0x3f6c, 0x08cc, 0x0860, 0x3f68,
+0x3f68, 0x0898, 0x0898, 0x3f68,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_0_90_s1_12[132] = {
+0x00fc, 0x0e0c, 0x00f8, 0x0000,
+0x00d0, 0x0e0c, 0x0128, 0x3ffc,
+0x00ac, 0x0e0c, 0x0150, 0x3ff8,
+0x0084, 0x0e04, 0x0184, 0x3ff4,
+0x0064, 0x0dfc, 0x01b0, 0x3ff0,
+0x0040, 0x0df0, 0x01e4, 0x3fec,
+0x0020, 0x0de0, 0x0218, 0x3fe8,
+0x0004, 0x0dd0, 0x024c, 0x3fe0,
+0x3fe8, 0x0db8, 0x0284, 0x3fdc,
+0x3fcc, 0x0da0, 0x02c0, 0x3fd4,
+0x3fb4, 0x0d84, 0x02fc, 0x3fcc,
+0x3fa0, 0x0d68, 0x0334, 0x3fc4,
+0x3f88, 0x0d48, 0x0370, 0x3fc0,
+0x3f78, 0x0d24, 0x03ac, 0x3fb8,
+0x3f64, 0x0cfc, 0x03f0, 0x3fb0,
+0x3f54, 0x0cd4, 0x0434, 0x3fa4,
+0x3f48, 0x0ca8, 0x0474, 0x3f9c,
+0x3f3c, 0x0c78, 0x04b8, 0x3f94,
+0x3f30, 0x0c48, 0x04fc, 0x3f8c,
+0x3f28, 0x0c14, 0x0540, 0x3f84,
+0x3f20, 0x0be0, 0x0588, 0x3f78,
+0x3f18, 0x0ba8, 0x05d0, 0x3f70,
+0x3f14, 0x0b70, 0x0614, 0x3f68,
+0x3f10, 0x0b34, 0x065c, 0x3f60,
+0x3f0c, 0x0af8, 0x06a8, 0x3f54,
+0x3f0c, 0x0abc, 0x06ec, 0x3f4c,
+0x3f0c, 0x0a7c, 0x0734, 0x3f44,
+0x3f0c, 0x0a38, 0x0780, 0x3f3c,
+0x3f0c, 0x09f8, 0x07c8, 0x3f34,
+0x3f10, 0x09b4, 0x080c, 0x3f30,
+0x3f14, 0x0970, 0x0854, 0x3f28,
+0x3f18, 0x092c, 0x089c, 0x3f20,
+0x3f1c, 0x08e4, 0x08e4, 0x3f1c,
+};
+
+static const uint16_t easf_filter_4tap_64p_ratio_1_00_s1_12[132] = {
+0x0000, 0x1000, 0x0000, 0x0000,
+0x3fd8, 0x0ffc, 0x002c, 0x0000,
+0x3fb4, 0x0ff8, 0x0054, 0x0000,
+0x3f90, 0x0fec, 0x0088, 0x3ffc,
+0x3f70, 0x0fdc, 0x00b8, 0x3ffc,
+0x3f54, 0x0fc8, 0x00ec, 0x3ff8,
+0x3f38, 0x0fb0, 0x0120, 0x3ff8,
+0x3f20, 0x0f94, 0x0158, 0x3ff4,
+0x3f0c, 0x0f70, 0x0194, 0x3ff0,
+0x3ef8, 0x0f4c, 0x01d4, 0x3fe8,
+0x3ee4, 0x0f24, 0x0214, 0x3fe4,
+0x3ed8, 0x0ef8, 0x0250, 0x3fe0,
+0x3ec8, 0x0ec8, 0x0298, 0x3fd8,
+0x3ec0, 0x0e94, 0x02dc, 0x3fd0,
+0x3eb4, 0x0e5c, 0x0328, 0x3fc8,
+0x3eac, 0x0e24, 0x0370, 0x3fc0,
+0x3ea8, 0x0de4, 0x03bc, 0x3fb8,
+0x3ea4, 0x0da4, 0x0408, 0x3fb0,
+0x3ea4, 0x0d64, 0x0454, 0x3fa4,
+0x3ea4, 0x0d20, 0x04a4, 0x3f98,
+0x3ea4, 0x0cd8, 0x04f4, 0x3f90,
+0x3ea4, 0x0c8c, 0x054c, 0x3f84,
+0x3ea8, 0x0c40, 0x05a0, 0x3f78,
+0x3eb0, 0x0bf4, 0x05f0, 0x3f6c,
+0x3eb4, 0x0ba4, 0x0648, 0x3f60,
+0x3ebc, 0x0b54, 0x069c, 0x3f54,
+0x3ec4, 0x0b00, 0x06f4, 0x3f48,
+0x3ecc, 0x0ab0, 0x0748, 0x3f3c,
+0x3ed4, 0x0a58, 0x07a4, 0x3f30,
+0x3ee0, 0x0a04, 0x07f8, 0x3f24,
+0x3ee8, 0x09b0, 0x0850, 0x3f18,
+0x3ef4, 0x0958, 0x08a8, 0x3f0c,
+0x3f00, 0x0900, 0x0900, 0x3f00,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_0_30_s1_12[198] = {
+0x012c, 0x0400, 0x05a4, 0x0404, 0x012c, 0x0000,
+0x0124, 0x03f4, 0x05a4, 0x040c, 0x0138, 0x0000,
+0x011c, 0x03e8, 0x05a4, 0x0418, 0x0140, 0x0000,
+0x0114, 0x03dc, 0x05a0, 0x0424, 0x0148, 0x0004,
+0x010c, 0x03d4, 0x05a0, 0x042c, 0x0150, 0x0004,
+0x0100, 0x03c8, 0x05a0, 0x0438, 0x015c, 0x0004,
+0x00f8, 0x03bc, 0x05a0, 0x0440, 0x0164, 0x0008,
+0x00f0, 0x03b0, 0x059c, 0x044c, 0x0170, 0x0008,
+0x00e8, 0x03a4, 0x059c, 0x0458, 0x0178, 0x0008,
+0x00e0, 0x0398, 0x0598, 0x0460, 0x0184, 0x000c,
+0x00d8, 0x038c, 0x0594, 0x0470, 0x018c, 0x000c,
+0x00d0, 0x0380, 0x0594, 0x0474, 0x0198, 0x0010,
+0x00cc, 0x0374, 0x0590, 0x0480, 0x01a0, 0x0010,
+0x00c4, 0x0368, 0x058c, 0x0488, 0x01ac, 0x0014,
+0x00bc, 0x035c, 0x058c, 0x0494, 0x01b4, 0x0014,
+0x00b4, 0x034c, 0x0588, 0x04a0, 0x01c0, 0x0018,
+0x00ac, 0x0340, 0x0584, 0x04a8, 0x01cc, 0x001c,
+0x00a8, 0x0334, 0x0580, 0x04b4, 0x01d4, 0x001c,
+0x00a0, 0x0328, 0x057c, 0x04bc, 0x01e0, 0x0020,
+0x0098, 0x031c, 0x0578, 0x04c4, 0x01ec, 0x0024,
+0x0094, 0x0310, 0x0574, 0x04cc, 0x01f8, 0x0024,
+0x008c, 0x0304, 0x0570, 0x04d8, 0x0200, 0x0028,
+0x0088, 0x02f8, 0x0568, 0x04e0, 0x020c, 0x002c,
+0x0080, 0x02ec, 0x0564, 0x04e8, 0x0218, 0x0030,
+0x007c, 0x02e0, 0x0560, 0x04ec, 0x0224, 0x0034,
+0x0078, 0x02d4, 0x0558, 0x04f8, 0x0230, 0x0034,
+0x0070, 0x02c8, 0x0554, 0x0500, 0x023c, 0x0038,
+0x006c, 0x02bc, 0x054c, 0x050c, 0x0244, 0x003c,
+0x0064, 0x02b0, 0x0548, 0x0514, 0x0250, 0x0040,
+0x0060, 0x02a4, 0x0540, 0x051c, 0x025c, 0x0044,
+0x005c, 0x0298, 0x053c, 0x0520, 0x0268, 0x0048,
+0x0058, 0x028c, 0x0534, 0x0524, 0x0274, 0x0050,
+0x0054, 0x0280, 0x052c, 0x052c, 0x0280, 0x0054,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_0_40_s1_12[198] = {
+0x00a0, 0x0418, 0x068c, 0x041c, 0x00a0, 0x0000,
+0x0098, 0x0408, 0x068c, 0x0428, 0x00ac, 0x0000,
+0x0090, 0x03f8, 0x068c, 0x043c, 0x00b4, 0x3ffc,
+0x0088, 0x03e8, 0x068c, 0x044c, 0x00bc, 0x3ffc,
+0x0084, 0x03d8, 0x068c, 0x0458, 0x00c4, 0x3ffc,
+0x007c, 0x03c8, 0x0688, 0x046c, 0x00d0, 0x3ff8,
+0x0074, 0x03b8, 0x0688, 0x047c, 0x00d8, 0x3ff8,
+0x006c, 0x03a8, 0x0684, 0x048c, 0x00e4, 0x3ff8,
+0x0064, 0x0398, 0x0684, 0x049c, 0x00ec, 0x3ff8,
+0x0060, 0x0388, 0x0680, 0x04a8, 0x00f8, 0x3ff8,
+0x0058, 0x0378, 0x0680, 0x04b8, 0x0104, 0x3ff4,
+0x0054, 0x0368, 0x067c, 0x04c8, 0x010c, 0x3ff4,
+0x004c, 0x0358, 0x0678, 0x04d8, 0x0118, 0x3ff4,
+0x0048, 0x0348, 0x0674, 0x04e4, 0x0124, 0x3ff4,
+0x0040, 0x0338, 0x0670, 0x04f4, 0x0130, 0x3ff4,
+0x003c, 0x0328, 0x0668, 0x0504, 0x013c, 0x3ff4,
+0x0038, 0x0318, 0x0664, 0x0510, 0x0148, 0x3ff4,
+0x0034, 0x0308, 0x065c, 0x0520, 0x0154, 0x3ff4,
+0x002c, 0x02f8, 0x0658, 0x0530, 0x0160, 0x3ff4,
+0x0028, 0x02e8, 0x0654, 0x053c, 0x016c, 0x3ff4,
+0x0024, 0x02d8, 0x064c, 0x054c, 0x0178, 0x3ff4,
+0x0020, 0x02c8, 0x0644, 0x055c, 0x0184, 0x3ff4,
+0x001c, 0x02b8, 0x0640, 0x0568, 0x0190, 0x3ff4,
+0x0018, 0x02a8, 0x0638, 0x0574, 0x01a0, 0x3ff4,
+0x0014, 0x0298, 0x0630, 0x0584, 0x01ac, 0x3ff4,
+0x0014, 0x0288, 0x0624, 0x0590, 0x01bc, 0x3ff4,
+0x0010, 0x0278, 0x061c, 0x059c, 0x01c8, 0x3ff8,
+0x000c, 0x0268, 0x0614, 0x05ac, 0x01d4, 0x3ff8,
+0x0008, 0x0258, 0x060c, 0x05b8, 0x01e4, 0x3ff8,
+0x0008, 0x024c, 0x0600, 0x05bc, 0x01f4, 0x3ffc,
+0x0004, 0x023c, 0x05f8, 0x05cc, 0x0200, 0x3ffc,
+0x0004, 0x022c, 0x05ec, 0x05d4, 0x0210, 0x0000,
+0x0000, 0x021c, 0x05e4, 0x05e4, 0x021c, 0x0000,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_0_50_s1_12[198] = {
+0x0000, 0x041c, 0x07cc, 0x0418, 0x0000, 0x0000,
+0x3ff8, 0x0404, 0x07cc, 0x0434, 0x0008, 0x3ffc,
+0x3ff4, 0x03ec, 0x07cc, 0x044c, 0x000c, 0x3ffc,
+0x3ff0, 0x03d8, 0x07cc, 0x0460, 0x0014, 0x3ff8,
+0x3fe8, 0x03c0, 0x07cc, 0x0478, 0x001c, 0x3ff8,
+0x3fe4, 0x03ac, 0x07c8, 0x0490, 0x0024, 0x3ff4,
+0x3fe0, 0x0394, 0x07c8, 0x04a4, 0x002c, 0x3ff4,
+0x3fdc, 0x0380, 0x07c4, 0x04bc, 0x0034, 0x3ff0,
+0x3fd8, 0x0368, 0x07c0, 0x04d4, 0x0040, 0x3fec,
+0x3fd4, 0x0350, 0x07bc, 0x04ec, 0x0048, 0x3fec,
+0x3fd0, 0x033c, 0x07b8, 0x0504, 0x0050, 0x3fe8,
+0x3fcc, 0x0324, 0x07b4, 0x051c, 0x005c, 0x3fe4,
+0x3fc8, 0x0310, 0x07ac, 0x0530, 0x0068, 0x3fe4,
+0x3fc4, 0x02fc, 0x07a8, 0x0548, 0x0070, 0x3fe0,
+0x3fc4, 0x02e4, 0x07a0, 0x055c, 0x007c, 0x3fe0,
+0x3fc0, 0x02d0, 0x0798, 0x0574, 0x0088, 0x3fdc,
+0x3fc0, 0x02b8, 0x0790, 0x058c, 0x0094, 0x3fd8,
+0x3fbc, 0x02a4, 0x0788, 0x05a0, 0x00a0, 0x3fd8,
+0x3fbc, 0x0290, 0x077c, 0x05b8, 0x00ac, 0x3fd4,
+0x3fbc, 0x027c, 0x0774, 0x05c8, 0x00b8, 0x3fd4,
+0x3fb8, 0x0268, 0x0768, 0x05e0, 0x00c8, 0x3fd0,
+0x3fb8, 0x0250, 0x0760, 0x05f8, 0x00d4, 0x3fcc,
+0x3fb8, 0x023c, 0x0754, 0x0608, 0x00e4, 0x3fcc,
+0x3fb8, 0x0228, 0x0748, 0x0620, 0x00f0, 0x3fc8,
+0x3fb8, 0x0214, 0x073c, 0x0630, 0x0100, 0x3fc8,
+0x3fb8, 0x0204, 0x072c, 0x0644, 0x0110, 0x3fc4,
+0x3fb8, 0x01f0, 0x0720, 0x0658, 0x011c, 0x3fc4,
+0x3fb8, 0x01dc, 0x0710, 0x0670, 0x012c, 0x3fc0,
+0x3fb8, 0x01c8, 0x0704, 0x0680, 0x013c, 0x3fc0,
+0x3fb8, 0x01b8, 0x06f4, 0x0690, 0x014c, 0x3fc0,
+0x3fb8, 0x01a4, 0x06e4, 0x06a4, 0x0160, 0x3fbc,
+0x3fb8, 0x0194, 0x06d4, 0x06b4, 0x0170, 0x3fbc,
+0x3fbc, 0x0180, 0x06c4, 0x06c4, 0x0180, 0x3fbc,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_0_60_s1_12[198] = {
+0x3f64, 0x03ec, 0x0960, 0x03ec, 0x3f64, 0x0000,
+0x3f64, 0x03cc, 0x0960, 0x0408, 0x3f68, 0x0000,
+0x3f60, 0x03ac, 0x0960, 0x042c, 0x3f6c, 0x3ffc,
+0x3f60, 0x038c, 0x0960, 0x0448, 0x3f70, 0x3ffc,
+0x3f60, 0x0370, 0x095c, 0x046c, 0x3f70, 0x3ff8,
+0x3f5c, 0x0350, 0x0958, 0x048c, 0x3f78, 0x3ff8,
+0x3f5c, 0x0334, 0x0954, 0x04ac, 0x3f7c, 0x3ff4,
+0x3f5c, 0x0314, 0x0950, 0x04cc, 0x3f80, 0x3ff4,
+0x3f5c, 0x02f8, 0x0948, 0x04f0, 0x3f84, 0x3ff0,
+0x3f5c, 0x02d8, 0x0944, 0x050c, 0x3f8c, 0x3ff0,
+0x3f60, 0x02bc, 0x093c, 0x052c, 0x3f90, 0x3fec,
+0x3f60, 0x02a0, 0x0930, 0x0550, 0x3f98, 0x3fe8,
+0x3f60, 0x0284, 0x0928, 0x056c, 0x3fa0, 0x3fe8,
+0x3f64, 0x0268, 0x091c, 0x058c, 0x3fa8, 0x3fe4,
+0x3f64, 0x024c, 0x0910, 0x05b0, 0x3fb0, 0x3fe0,
+0x3f64, 0x0230, 0x0904, 0x05d0, 0x3fbc, 0x3fdc,
+0x3f68, 0x0214, 0x08f8, 0x05ec, 0x3fc4, 0x3fdc,
+0x3f6c, 0x01fc, 0x08e8, 0x060c, 0x3fcc, 0x3fd8,
+0x3f6c, 0x01e0, 0x08dc, 0x062c, 0x3fd8, 0x3fd4,
+0x3f70, 0x01c8, 0x08cc, 0x0648, 0x3fe4, 0x3fd0,
+0x3f74, 0x01b0, 0x08bc, 0x0664, 0x3ff0, 0x3fcc,
+0x3f74, 0x0194, 0x08a8, 0x068c, 0x3ffc, 0x3fc8,
+0x3f78, 0x017c, 0x0898, 0x06a8, 0x0008, 0x3fc4,
+0x3f7c, 0x0168, 0x0884, 0x06c0, 0x0018, 0x3fc0,
+0x3f80, 0x0150, 0x0870, 0x06dc, 0x0024, 0x3fc0,
+0x3f84, 0x0138, 0x085c, 0x06f8, 0x0034, 0x3fbc,
+0x3f88, 0x0120, 0x0848, 0x0718, 0x0040, 0x3fb8,
+0x3f8c, 0x010c, 0x0830, 0x0734, 0x0050, 0x3fb4,
+0x3f90, 0x00f8, 0x081c, 0x074c, 0x0060, 0x3fb0,
+0x3f94, 0x00e4, 0x0800, 0x0768, 0x0074, 0x3fac,
+0x3f98, 0x00d0, 0x07e8, 0x0784, 0x0084, 0x3fa8,
+0x3f9c, 0x00bc, 0x07d4, 0x079c, 0x0094, 0x3fa4,
+0x3fa0, 0x00a8, 0x07b8, 0x07b8, 0x00a8, 0x3fa0,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_0_70_s1_12[198] = {
+0x3f00, 0x0368, 0x0b30, 0x0368, 0x3f00, 0x0000,
+0x3f04, 0x0340, 0x0b30, 0x0390, 0x3efc, 0x0000,
+0x3f08, 0x0318, 0x0b2c, 0x03bc, 0x3ef8, 0x0000,
+0x3f0c, 0x02f0, 0x0b28, 0x03e4, 0x3ef8, 0x0000,
+0x3f10, 0x02c8, 0x0b24, 0x0410, 0x3ef4, 0x0000,
+0x3f14, 0x02a0, 0x0b1c, 0x043c, 0x3ef4, 0x0000,
+0x3f1c, 0x027c, 0x0b14, 0x0464, 0x3ef0, 0x0000,
+0x3f20, 0x0254, 0x0b0c, 0x0490, 0x3ef0, 0x0000,
+0x3f24, 0x0230, 0x0b00, 0x04bc, 0x3ef0, 0x0000,
+0x3f2c, 0x020c, 0x0af4, 0x04e4, 0x3ef0, 0x0000,
+0x3f30, 0x01e8, 0x0ae8, 0x0510, 0x3ef0, 0x0000,
+0x3f38, 0x01c8, 0x0ad8, 0x0534, 0x3ef4, 0x0000,
+0x3f40, 0x01a4, 0x0ac8, 0x0564, 0x3ef4, 0x3ffc,
+0x3f44, 0x0184, 0x0ab4, 0x0590, 0x3ef8, 0x3ffc,
+0x3f4c, 0x0164, 0x0aa4, 0x05b8, 0x3efc, 0x3ff8,
+0x3f50, 0x0144, 0x0a90, 0x05e8, 0x3efc, 0x3ff8,
+0x3f58, 0x0124, 0x0a78, 0x0610, 0x3f04, 0x3ff8,
+0x3f60, 0x0108, 0x0a64, 0x0638, 0x3f08, 0x3ff4,
+0x3f64, 0x00e8, 0x0a4c, 0x066c, 0x3f0c, 0x3ff0,
+0x3f6c, 0x00cc, 0x0a34, 0x0690, 0x3f14, 0x3ff0,
+0x3f70, 0x00b4, 0x0a18, 0x06bc, 0x3f1c, 0x3fec,
+0x3f78, 0x0098, 0x0a00, 0x06e8, 0x3f20, 0x3fe8,
+0x3f80, 0x007c, 0x09e4, 0x0710, 0x3f2c, 0x3fe4,
+0x3f84, 0x0064, 0x09c8, 0x0738, 0x3f34, 0x3fe4,
+0x3f8c, 0x004c, 0x09a8, 0x0764, 0x3f3c, 0x3fe0,
+0x3f90, 0x0034, 0x098c, 0x078c, 0x3f48, 0x3fdc,
+0x3f98, 0x0020, 0x096c, 0x07b0, 0x3f54, 0x3fd8,
+0x3f9c, 0x0008, 0x094c, 0x07dc, 0x3f60, 0x3fd4,
+0x3fa4, 0x3ff4, 0x0928, 0x0808, 0x3f6c, 0x3fcc,
+0x3fa8, 0x3fe0, 0x0908, 0x082c, 0x3f7c, 0x3fc8,
+0x3fb0, 0x3fcc, 0x08e4, 0x0854, 0x3f88, 0x3fc4,
+0x3fb4, 0x3fbc, 0x08c0, 0x0878, 0x3f98, 0x3fc0,
+0x3fbc, 0x3fac, 0x0898, 0x0898, 0x3fac, 0x3fbc,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_0_80_s1_12[198] = {
+0x3efc, 0x0284, 0x0d00, 0x0284, 0x3efc, 0x0000,
+0x3f04, 0x0254, 0x0d00, 0x02b4, 0x3ef0, 0x0004,
+0x3f10, 0x0224, 0x0cf8, 0x02e8, 0x3ee8, 0x0004,
+0x3f18, 0x01f4, 0x0cf4, 0x0318, 0x3ee0, 0x0008,
+0x3f24, 0x01c8, 0x0ce8, 0x034c, 0x3ed8, 0x0008,
+0x3f30, 0x019c, 0x0ce0, 0x037c, 0x3ecc, 0x000c,
+0x3f38, 0x0170, 0x0cd0, 0x03b8, 0x3ec4, 0x000c,
+0x3f44, 0x0144, 0x0cc4, 0x03e8, 0x3ebc, 0x0010,
+0x3f4c, 0x011c, 0x0cb4, 0x0420, 0x3eb4, 0x0010,
+0x3f58, 0x00f4, 0x0ca0, 0x0458, 0x3eac, 0x0010,
+0x3f60, 0x00cc, 0x0c8c, 0x048c, 0x3ea8, 0x0014,
+0x3f6c, 0x00a8, 0x0c74, 0x04c4, 0x3ea0, 0x0014,
+0x3f74, 0x0084, 0x0c5c, 0x04fc, 0x3e9c, 0x0014,
+0x3f7c, 0x0060, 0x0c44, 0x0534, 0x3e94, 0x0018,
+0x3f88, 0x0040, 0x0c28, 0x0568, 0x3e90, 0x0018,
+0x3f90, 0x0020, 0x0c08, 0x05a4, 0x3e8c, 0x0018,
+0x3f98, 0x0000, 0x0bec, 0x05dc, 0x3e88, 0x0018,
+0x3fa0, 0x3fe4, 0x0bcc, 0x0614, 0x3e84, 0x0018,
+0x3fac, 0x3fc4, 0x0ba8, 0x064c, 0x3e84, 0x0018,
+0x3fb4, 0x3fac, 0x0b84, 0x0684, 0x3e80, 0x0018,
+0x3fb8, 0x3f90, 0x0b60, 0x06c0, 0x3e80, 0x0018,
+0x3fc0, 0x3f78, 0x0b38, 0x06f8, 0x3e80, 0x0018,
+0x3fc8, 0x3f60, 0x0b14, 0x072c, 0x3e80, 0x0018,
+0x3fd0, 0x3f4c, 0x0ae8, 0x0760, 0x3e84, 0x0018,
+0x3fd8, 0x3f34, 0x0ac0, 0x079c, 0x3e84, 0x0014,
+0x3fdc, 0x3f20, 0x0a94, 0x07d4, 0x3e88, 0x0014,
+0x3fe4, 0x3f10, 0x0a68, 0x0808, 0x3e8c, 0x0010,
+0x3fe8, 0x3f00, 0x0a38, 0x0840, 0x3e90, 0x0010,
+0x3fec, 0x3ef0, 0x0a0c, 0x0874, 0x3e98, 0x000c,
+0x3ff4, 0x3ee0, 0x09d8, 0x08a8, 0x3ea0, 0x000c,
+0x3ff8, 0x3ed0, 0x09ac, 0x08dc, 0x3ea8, 0x0008,
+0x3ffc, 0x3ec4, 0x0978, 0x0914, 0x3eb0, 0x0004,
+0x0000, 0x3eb8, 0x0948, 0x0948, 0x3eb8, 0x0000,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_0_90_s1_12[198] = {
+0x3f60, 0x0154, 0x0e9c, 0x0150, 0x3f60, 0x0000,
+0x3f6c, 0x011c, 0x0e9c, 0x018c, 0x3f50, 0x0000,
+0x3f7c, 0x00ec, 0x0e94, 0x01bc, 0x3f44, 0x0004,
+0x3f88, 0x00b8, 0x0e8c, 0x01f8, 0x3f34, 0x0008,
+0x3f94, 0x0088, 0x0e80, 0x0234, 0x3f28, 0x0008,
+0x3fa0, 0x005c, 0x0e74, 0x026c, 0x3f18, 0x000c,
+0x3fac, 0x0030, 0x0e60, 0x02b0, 0x3f08, 0x000c,
+0x3fb8, 0x0004, 0x0e50, 0x02e8, 0x3efc, 0x0010,
+0x3fc4, 0x3fdc, 0x0e38, 0x0328, 0x3eec, 0x0014,
+0x3fd0, 0x3fb4, 0x0e20, 0x0368, 0x3ee0, 0x0014,
+0x3fd8, 0x3f90, 0x0e04, 0x03ac, 0x3ed0, 0x0018,
+0x3fe4, 0x3f6c, 0x0de8, 0x03e8, 0x3ec4, 0x001c,
+0x3fec, 0x3f4c, 0x0dc8, 0x042c, 0x3eb4, 0x0020,
+0x3ff4, 0x3f2c, 0x0da4, 0x0474, 0x3ea8, 0x0020,
+0x0000, 0x3f0c, 0x0d80, 0x04b8, 0x3e98, 0x0024,
+0x0008, 0x3ef0, 0x0d58, 0x04fc, 0x3e8c, 0x0028,
+0x000c, 0x3ed8, 0x0d30, 0x0540, 0x3e80, 0x002c,
+0x0014, 0x3ec0, 0x0d04, 0x0588, 0x3e74, 0x002c,
+0x001c, 0x3ea8, 0x0cd8, 0x05cc, 0x3e68, 0x0030,
+0x0020, 0x3e94, 0x0ca8, 0x0614, 0x3e5c, 0x0034,
+0x0028, 0x3e80, 0x0c78, 0x065c, 0x3e50, 0x0034,
+0x002c, 0x3e6c, 0x0c44, 0x06a4, 0x3e48, 0x0038,
+0x0030, 0x3e5c, 0x0c0c, 0x06f0, 0x3e3c, 0x003c,
+0x0034, 0x3e50, 0x0bd8, 0x0734, 0x3e34, 0x003c,
+0x0038, 0x3e44, 0x0ba0, 0x0778, 0x3e2c, 0x0040,
+0x003c, 0x3e38, 0x0b64, 0x07c4, 0x3e24, 0x0040,
+0x0040, 0x3e2c, 0x0b28, 0x0808, 0x3e20, 0x0044,
+0x0040, 0x3e24, 0x0aec, 0x0850, 0x3e1c, 0x0044,
+0x0044, 0x3e1c, 0x0aac, 0x0898, 0x3e18, 0x0044,
+0x0044, 0x3e18, 0x0a70, 0x08d8, 0x3e14, 0x0048,
+0x0044, 0x3e14, 0x0a2c, 0x0924, 0x3e10, 0x0048,
+0x0048, 0x3e10, 0x09ec, 0x0964, 0x3e10, 0x0048,
+0x0048, 0x3e10, 0x09a8, 0x09a8, 0x3e10, 0x0048,
+};
+
+static const uint16_t easf_filter_6tap_64p_ratio_1_00_s1_12[198] = {
+0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000,
+0x000c, 0x3fcc, 0x1000, 0x0034, 0x3ff4, 0x0000,
+0x0018, 0x3f9c, 0x0ff8, 0x0070, 0x3fe4, 0x0000,
+0x0024, 0x3f6c, 0x0ff0, 0x00ac, 0x3fd4, 0x0000,
+0x0030, 0x3f40, 0x0fe4, 0x00e8, 0x3fc4, 0x0000,
+0x0038, 0x3f14, 0x0fd4, 0x0128, 0x3fb4, 0x0004,
+0x0044, 0x3eec, 0x0fc0, 0x0168, 0x3fa4, 0x0004,
+0x004c, 0x3ec8, 0x0fac, 0x01a8, 0x3f94, 0x0004,
+0x0054, 0x3ea4, 0x0f90, 0x01ec, 0x3f84, 0x0008,
+0x005c, 0x3e84, 0x0f74, 0x0234, 0x3f70, 0x0008,
+0x0060, 0x3e64, 0x0f50, 0x0280, 0x3f60, 0x000c,
+0x0068, 0x3e48, 0x0f2c, 0x02c8, 0x3f4c, 0x0010,
+0x006c, 0x3e30, 0x0f04, 0x0318, 0x3f38, 0x0010,
+0x0070, 0x3e18, 0x0edc, 0x0364, 0x3f24, 0x0014,
+0x0074, 0x3e00, 0x0eac, 0x03b8, 0x3f10, 0x0018,
+0x0078, 0x3df0, 0x0e7c, 0x0404, 0x3efc, 0x001c,
+0x007c, 0x3de0, 0x0e48, 0x0454, 0x3ee8, 0x0020,
+0x007c, 0x3dd0, 0x0e14, 0x04ac, 0x3ed4, 0x0020,
+0x0080, 0x3dc4, 0x0dd8, 0x0500, 0x3ec0, 0x0024,
+0x0080, 0x3db8, 0x0d9c, 0x0554, 0x3eac, 0x002c,
+0x0080, 0x3db0, 0x0d5c, 0x05ac, 0x3e98, 0x0030,
+0x0080, 0x3da8, 0x0d1c, 0x0600, 0x3e88, 0x0034,
+0x0080, 0x3da4, 0x0cd8, 0x0658, 0x3e74, 0x0038,
+0x0080, 0x3da4, 0x0c94, 0x06ac, 0x3e60, 0x003c,
+0x007c, 0x3da0, 0x0c4c, 0x070c, 0x3e4c, 0x0040,
+0x007c, 0x3da4, 0x0c00, 0x0760, 0x3e3c, 0x0044,
+0x0078, 0x3da4, 0x0bb4, 0x07bc, 0x3e2c, 0x0048,
+0x0074, 0x3da8, 0x0b64, 0x0814, 0x3e1c, 0x0050,
+0x0074, 0x3db0, 0x0b14, 0x0868, 0x3e0c, 0x0054,
+0x0070, 0x3db8, 0x0ac4, 0x08c0, 0x3dfc, 0x0058,
+0x006c, 0x3dc0, 0x0a70, 0x091c, 0x3dec, 0x005c,
+0x0068, 0x3dc8, 0x0a1c, 0x0974, 0x3de0, 0x0060,
+0x0064, 0x3dd4, 0x09c8, 0x09c8, 0x3dd4, 0x0064,
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_v_bf3_mode_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x0000},
+ {7, 10, 0x0000},
+ {8, 10, 0x0000},
+ {9, 10, 0x0000},
+ {1, 1, 0x0000},
+ {-1, -1, 0x0002},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_h_bf3_mode_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x0000},
+ {7, 10, 0x0000},
+ {8, 10, 0x0000},
+ {9, 10, 0x0000},
+ {1, 1, 0x0000},
+ {-1, -1, 0x0002},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_6tap_lookup[] = {
+ {3, 10, 0x4100},
+ {4, 10, 0x4100},
+ {5, 10, 0x4100},
+ {6, 10, 0x4100},
+ {7, 10, 0x4100},
+ {8, 10, 0x4100},
+ {9, 10, 0x4100},
+ {1, 1, 0x4100},
+ {-1, -1, 0x4100},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_6tap_lookup[] = {
+ {3, 10, 0x4000},
+ {4, 10, 0x4000},
+ {5, 10, 0x4000},
+ {6, 10, 0x4000},
+ {7, 10, 0x4000},
+ {8, 10, 0x4000},
+ {9, 10, 0x4000},
+ {1, 1, 0x4000},
+ {-1, -1, 0x4000},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_gain_ring6_6tap_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x251F},
+ {5, 10, 0x291F},
+ {6, 10, 0xA51F},
+ {7, 10, 0xA51F},
+ {8, 10, 0xAA66},
+ {9, 10, 0xA51F},
+ {1, 1, 0xA640},
+ {-1, -1, 0xA640},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_gain_ring4_6tap_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x9600},
+ {5, 10, 0xA460},
+ {6, 10, 0xA8E0},
+ {7, 10, 0xAC00},
+ {8, 10, 0xAD20},
+ {9, 10, 0xAFC0},
+ {1, 1, 0xB058},
+ {-1, -1, 0xB058},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_4tap_lookup[] = {
+ {3, 10, 0x4100},
+ {4, 10, 0x4100},
+ {5, 10, 0x4100},
+ {6, 10, 0x4100},
+ {7, 10, 0x4100},
+ {8, 10, 0x4100},
+ {9, 10, 0x4100},
+ {1, 1, 0x4100},
+ {-1, -1, 0x4100},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_4tap_lookup[] = {
+ {3, 10, 0x4000},
+ {4, 10, 0x4000},
+ {5, 10, 0x4000},
+ {6, 10, 0x4000},
+ {7, 10, 0x4000},
+ {8, 10, 0x4000},
+ {9, 10, 0x4000},
+ {1, 1, 0x4000},
+ {-1, -1, 0x4000},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_gain_ring6_4tap_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x0000},
+ {7, 10, 0x0000},
+ {8, 10, 0x0000},
+ {9, 10, 0x0000},
+ {1, 1, 0x0000},
+ {-1, -1, 0x0000},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_gain_ring4_4tap_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x9900},
+ {7, 10, 0xA100},
+ {8, 10, 0xA8C0},
+ {9, 10, 0xAB20},
+ {1, 1, 0xAC00},
+ {-1, -1, 0xAC00},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_uptilt_offset_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x0000},
+ {7, 10, 0x0000},
+ {8, 10, 0x4100},
+ {9, 10, 0x9F00},
+ {1, 1, 0xA4C0},
+ {-1, -1, 0xA8D8},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt_maxval_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x0000},
+ {7, 10, 0x0000},
+ {8, 10, 0x4000},
+ {9, 10, 0x24FE},
+ {1, 1, 0x2D64},
+ {-1, -1, 0x3ADB},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_slope_lookup[] = {
+ {3, 10, 0x3800},
+ {4, 10, 0x3800},
+ {5, 10, 0x3800},
+ {6, 10, 0x3800},
+ {7, 10, 0x3800},
+ {8, 10, 0x3886},
+ {9, 10, 0x3940},
+ {1, 1, 0x3A4E},
+ {-1, -1, 0x3B66},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt1_slope_lookup[] = {
+ {3, 10, 0x3800},
+ {4, 10, 0x3800},
+ {5, 10, 0x3800},
+ {6, 10, 0x3800},
+ {7, 10, 0x3800},
+ {8, 10, 0x36F4},
+ {9, 10, 0x359C},
+ {1, 1, 0x3360},
+ {-1, -1, 0x2F20},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_slope_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x0000},
+ {7, 10, 0x0000},
+ {8, 10, 0x0000},
+ {9, 10, 0x359C},
+ {1, 1, 0x31F0},
+ {-1, -1, 0x1F00},
+};
+
+static struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_offset_lookup[] = {
+ {3, 10, 0x0000},
+ {4, 10, 0x0000},
+ {5, 10, 0x0000},
+ {6, 10, 0x0000},
+ {7, 10, 0x0000},
+ {8, 10, 0x0000},
+ {9, 10, 0x9F00},
+ {1, 1, 0xA400},
+ {-1, -1, 0x9E00},
+};
+
+static const uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_from_fraction(3, 10).value)
+ return easf_filter_3tap_64p_ratio_0_30_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 10).value)
+ return easf_filter_3tap_64p_ratio_0_40_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 10).value)
+ return easf_filter_3tap_64p_ratio_0_50_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(6, 10).value)
+ return easf_filter_3tap_64p_ratio_0_60_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(7, 10).value)
+ return easf_filter_3tap_64p_ratio_0_70_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(8, 10).value)
+ return easf_filter_3tap_64p_ratio_0_80_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(9, 10).value)
+ return easf_filter_3tap_64p_ratio_0_90_s1_12;
+ else
+ return easf_filter_3tap_64p_ratio_1_00_s1_12;
+}
+
+static const uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_from_fraction(3, 10).value)
+ return easf_filter_4tap_64p_ratio_0_30_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 10).value)
+ return easf_filter_4tap_64p_ratio_0_40_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 10).value)
+ return easf_filter_4tap_64p_ratio_0_50_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(6, 10).value)
+ return easf_filter_4tap_64p_ratio_0_60_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(7, 10).value)
+ return easf_filter_4tap_64p_ratio_0_70_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(8, 10).value)
+ return easf_filter_4tap_64p_ratio_0_80_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(9, 10).value)
+ return easf_filter_4tap_64p_ratio_0_90_s1_12;
+ else
+ return easf_filter_4tap_64p_ratio_1_00_s1_12;
+}
+
+static const uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_from_fraction(3, 10).value)
+ return easf_filter_6tap_64p_ratio_0_30_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 10).value)
+ return easf_filter_6tap_64p_ratio_0_40_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 10).value)
+ return easf_filter_6tap_64p_ratio_0_50_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(6, 10).value)
+ return easf_filter_6tap_64p_ratio_0_60_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(7, 10).value)
+ return easf_filter_6tap_64p_ratio_0_70_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(8, 10).value)
+ return easf_filter_6tap_64p_ratio_0_80_s1_12;
+ else if (ratio.value < spl_fixpt_from_fraction(9, 10).value)
+ return easf_filter_6tap_64p_ratio_0_90_s1_12;
+ else
+ return easf_filter_6tap_64p_ratio_1_00_s1_12;
+}
+
+const uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio)
+{
+ if (taps == 6)
+ return spl_get_easf_filter_6tap_64p(ratio);
+ else if (taps == 4)
+ return spl_get_easf_filter_4tap_64p(ratio);
+ else if (taps == 3)
+ return spl_get_easf_filter_3tap_64p(ratio);
+ else {
+ /* should never happen, bug */
+ SPL_BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+}
+
+static const uint16_t *spl_get_easf_filter_3tap_64p_s1_10(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_from_fraction(3, 10).value)
+ return easf_filter_3tap_64p_ratio_0_30;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 10).value)
+ return easf_filter_3tap_64p_ratio_0_40;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 10).value)
+ return easf_filter_3tap_64p_ratio_0_50;
+ else if (ratio.value < spl_fixpt_from_fraction(6, 10).value)
+ return easf_filter_3tap_64p_ratio_0_60;
+ else if (ratio.value < spl_fixpt_from_fraction(7, 10).value)
+ return easf_filter_3tap_64p_ratio_0_70;
+ else if (ratio.value < spl_fixpt_from_fraction(8, 10).value)
+ return easf_filter_3tap_64p_ratio_0_80;
+ else if (ratio.value < spl_fixpt_from_fraction(9, 10).value)
+ return easf_filter_3tap_64p_ratio_0_90;
+ else
+ return easf_filter_3tap_64p_ratio_1_00;
+}
+
+static const uint16_t *spl_get_easf_filter_4tap_64p_s1_10(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_from_fraction(3, 10).value)
+ return easf_filter_4tap_64p_ratio_0_30;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 10).value)
+ return easf_filter_4tap_64p_ratio_0_40;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 10).value)
+ return easf_filter_4tap_64p_ratio_0_50;
+ else if (ratio.value < spl_fixpt_from_fraction(6, 10).value)
+ return easf_filter_4tap_64p_ratio_0_60;
+ else if (ratio.value < spl_fixpt_from_fraction(7, 10).value)
+ return easf_filter_4tap_64p_ratio_0_70;
+ else if (ratio.value < spl_fixpt_from_fraction(8, 10).value)
+ return easf_filter_4tap_64p_ratio_0_80;
+ else if (ratio.value < spl_fixpt_from_fraction(9, 10).value)
+ return easf_filter_4tap_64p_ratio_0_90;
+ else
+ return easf_filter_4tap_64p_ratio_1_00;
+}
+
+static const uint16_t *spl_get_easf_filter_6tap_64p_s1_10(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_from_fraction(3, 10).value)
+ return easf_filter_6tap_64p_ratio_0_30;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 10).value)
+ return easf_filter_6tap_64p_ratio_0_40;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 10).value)
+ return easf_filter_6tap_64p_ratio_0_50;
+ else if (ratio.value < spl_fixpt_from_fraction(6, 10).value)
+ return easf_filter_6tap_64p_ratio_0_60;
+ else if (ratio.value < spl_fixpt_from_fraction(7, 10).value)
+ return easf_filter_6tap_64p_ratio_0_70;
+ else if (ratio.value < spl_fixpt_from_fraction(8, 10).value)
+ return easf_filter_6tap_64p_ratio_0_80;
+ else if (ratio.value < spl_fixpt_from_fraction(9, 10).value)
+ return easf_filter_6tap_64p_ratio_0_90;
+ else
+ return easf_filter_6tap_64p_ratio_1_00;
+}
+
+const uint16_t *spl_dscl_get_easf_filter_coeffs_64p_s1_10(int taps, struct spl_fixed31_32 ratio)
+{
+ if (taps == 6)
+ return spl_get_easf_filter_6tap_64p_s1_10(ratio);
+ else if (taps == 4)
+ return spl_get_easf_filter_4tap_64p_s1_10(ratio);
+ else if (taps == 3)
+ return spl_get_easf_filter_3tap_64p_s1_10(ratio);
+ else {
+ /* should never happen, bug */
+ SPL_BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+}
+
+void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data,
+ const struct spl_scaler_data *data, bool enable_easf_v,
+ bool enable_easf_h)
+{
+ /*
+ * Old coefficients calculated scaling ratio = input / output
+ * New coefficients are calculated based on = output / input
+ */
+ if (enable_easf_h) {
+ dscl_prog_data->filter_h = spl_dscl_get_easf_filter_coeffs_64p(
+ data->taps.h_taps, data->recip_ratios.horz);
+
+ dscl_prog_data->filter_h_c = spl_dscl_get_easf_filter_coeffs_64p(
+ data->taps.h_taps_c, data->recip_ratios.horz_c);
+ } else {
+ dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p(
+ data->taps.h_taps, data->ratios.horz);
+
+ dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p(
+ data->taps.h_taps_c, data->ratios.horz_c);
+ }
+ if (enable_easf_v) {
+ dscl_prog_data->filter_v = spl_dscl_get_easf_filter_coeffs_64p(
+ data->taps.v_taps, data->recip_ratios.vert);
+
+ dscl_prog_data->filter_v_c = spl_dscl_get_easf_filter_coeffs_64p(
+ data->taps.v_taps_c, data->recip_ratios.vert_c);
+ } else {
+ dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p(
+ data->taps.v_taps, data->ratios.vert);
+
+ dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p(
+ data->taps.v_taps_c, data->ratios.vert_c);
+ }
+}
+
+static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct spl_fixed31_32 ratio,
+ struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr,
+ unsigned int num_entries)
+{
+ unsigned int count = 0;
+ uint32_t value = 0;
+ struct scale_ratio_to_reg_value_lookup *lookup_table_index_ptr;
+
+ lookup_table_index_ptr = (lookup_table_base_ptr + num_entries - 1);
+ value = lookup_table_index_ptr->reg_value;
+
+ while (count < num_entries) {
+
+ lookup_table_index_ptr = (lookup_table_base_ptr + count);
+ if (lookup_table_index_ptr->numer < 0)
+ break;
+
+ if (ratio.value < spl_fixpt_from_fraction(
+ lookup_table_index_ptr->numer,
+ lookup_table_index_ptr->denom).value) {
+ value = lookup_table_index_ptr->reg_value;
+ break;
+ }
+
+ count++;
+ }
+ return value;
+}
+uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_v_bf3_mode_lookup, num_entries);
+ return value;
+}
+uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_h_bf3_mode_lookup, num_entries);
+ return value;
+}
+uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 4) {
+ num_entries = sizeof(easf_reducer_gain6_4tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_reducer_gain6_4tap_lookup, num_entries);
+ } else if (taps == 6) {
+ num_entries = sizeof(easf_reducer_gain6_6tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_reducer_gain6_6tap_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 4) {
+ num_entries = sizeof(easf_reducer_gain4_4tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_reducer_gain4_4tap_lookup, num_entries);
+ } else if (taps == 6) {
+ num_entries = sizeof(easf_reducer_gain4_6tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_reducer_gain4_6tap_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 4) {
+ num_entries = sizeof(easf_gain_ring6_4tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_gain_ring6_4tap_lookup, num_entries);
+ } else if (taps == 6) {
+ num_entries = sizeof(easf_gain_ring6_6tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_gain_ring6_6tap_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 4) {
+ num_entries = sizeof(easf_gain_ring4_4tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_gain_ring4_4tap_lookup, num_entries);
+ } else if (taps == 6) {
+ num_entries = sizeof(easf_gain_ring4_6tap_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_gain_ring4_6tap_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 3) {
+ num_entries = sizeof(easf_3tap_dntilt_uptilt_offset_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_3tap_dntilt_uptilt_offset_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 3) {
+ num_entries = sizeof(easf_3tap_uptilt_maxval_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_3tap_uptilt_maxval_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 3) {
+ num_entries = sizeof(easf_3tap_dntilt_slope_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_3tap_dntilt_slope_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 3) {
+ num_entries = sizeof(easf_3tap_uptilt1_slope_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_3tap_uptilt1_slope_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 3) {
+ num_entries = sizeof(easf_3tap_uptilt2_slope_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_3tap_uptilt2_slope_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
+uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio)
+{
+ uint32_t value;
+ unsigned int num_entries;
+
+ if (taps == 3) {
+ num_entries = sizeof(easf_3tap_uptilt2_offset_lookup) /
+ sizeof(struct scale_ratio_to_reg_value_lookup);
+ value = spl_easf_get_scale_ratio_to_reg_value(ratio,
+ easf_3tap_uptilt2_offset_lookup, num_entries);
+ } else
+ value = 0;
+ return value;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.h
new file mode 100644
index 000000000000..321ae22a04d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_easf_filters.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef __DC_SPL_SCL_EASF_FILTERS_H__
+#define __DC_SPL_SCL_EASF_FILTERS_H__
+
+#include "dc_spl_types.h"
+
+struct scale_ratio_to_reg_value_lookup {
+ int numer;
+ int denom;
+ const uint32_t reg_value;
+};
+
+void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data,
+ const struct spl_scaler_data *data, bool enable_easf_v,
+ bool enable_easf_h);
+
+uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio);
+uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio);
+uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio);
+uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio);
+
+/* public API */
+const uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio);
+const uint16_t *spl_dscl_get_easf_filter_coeffs_64p_s1_10(int taps, struct spl_fixed31_32 ratio);
+
+#endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.c
new file mode 100644
index 000000000000..5e52bdf1ad44
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.c
@@ -0,0 +1,1233 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "spl_debug.h"
+#include "dc_spl_scl_filters.h"
+
+//=========================================
+// <num_taps> = 2
+// <num_phases> = 64
+// <scale_ratio> = 0.833333 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = s1.10
+// <CoefOut> = s1.12
+//=========================================
+static const uint16_t filter_2tap_64p[66] = {
+ 0x1000, 0x0000,
+ 0x1000, 0x0000,
+ 0x0FFC, 0x0004,
+ 0x0FF8, 0x0008,
+ 0x0FF0, 0x0010,
+ 0x0FE4, 0x001C,
+ 0x0FD8, 0x0028,
+ 0x0FC4, 0x003C,
+ 0x0FB0, 0x0050,
+ 0x0F98, 0x0068,
+ 0x0F7C, 0x0084,
+ 0x0F58, 0x00A8,
+ 0x0F34, 0x00CC,
+ 0x0F08, 0x00F8,
+ 0x0ED8, 0x0128,
+ 0x0EA4, 0x015C,
+ 0x0E68, 0x0198,
+ 0x0E28, 0x01D8,
+ 0x0DE4, 0x021C,
+ 0x0D98, 0x0268,
+ 0x0D44, 0x02BC,
+ 0x0CEC, 0x0314,
+ 0x0C90, 0x0370,
+ 0x0C2C, 0x03D4,
+ 0x0BC4, 0x043C,
+ 0x0B58, 0x04A8,
+ 0x0AE8, 0x0518,
+ 0x0A74, 0x058C,
+ 0x09FC, 0x0604,
+ 0x0980, 0x0680,
+ 0x0900, 0x0700,
+ 0x0880, 0x0780,
+ 0x0800, 0x0800
+};
+
+//=========================================
+// <num_taps> = 3
+// <num_phases> = 64
+// <scale_ratio> = 0.83333 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_3tap_64p_upscale[99] = {
+ 0x0804, 0x07FC, 0x0000,
+ 0x07A8, 0x0860, 0x3FF8,
+ 0x0754, 0x08BC, 0x3FF0,
+ 0x0700, 0x0918, 0x3FE8,
+ 0x06AC, 0x0978, 0x3FDC,
+ 0x0654, 0x09D8, 0x3FD4,
+ 0x0604, 0x0A34, 0x3FC8,
+ 0x05B0, 0x0A90, 0x3FC0,
+ 0x055C, 0x0AF0, 0x3FB4,
+ 0x050C, 0x0B48, 0x3FAC,
+ 0x04BC, 0x0BA0, 0x3FA4,
+ 0x0470, 0x0BF4, 0x3F9C,
+ 0x0420, 0x0C50, 0x3F90,
+ 0x03D8, 0x0C9C, 0x3F8C,
+ 0x038C, 0x0CF0, 0x3F84,
+ 0x0344, 0x0D40, 0x3F7C,
+ 0x0300, 0x0D88, 0x3F78,
+ 0x02BC, 0x0DD0, 0x3F74,
+ 0x027C, 0x0E14, 0x3F70,
+ 0x023C, 0x0E54, 0x3F70,
+ 0x0200, 0x0E90, 0x3F70,
+ 0x01C8, 0x0EC8, 0x3F70,
+ 0x0190, 0x0EFC, 0x3F74,
+ 0x015C, 0x0F2C, 0x3F78,
+ 0x0128, 0x0F5C, 0x3F7C,
+ 0x00FC, 0x0F7C, 0x3F88,
+ 0x00CC, 0x0FA4, 0x3F90,
+ 0x00A4, 0x0FC0, 0x3F9C,
+ 0x007C, 0x0FD8, 0x3FAC,
+ 0x0058, 0x0FE8, 0x3FC0,
+ 0x0038, 0x0FF4, 0x3FD4,
+ 0x0018, 0x1000, 0x3FE8,
+ 0x0000, 0x1000, 0x0000
+};
+
+//=========================================
+// <num_taps> = 3
+// <num_phases> = 64
+// <scale_ratio> = 1.16666 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_3tap_64p_116[99] = {
+ 0x0804, 0x07FC, 0x0000,
+ 0x07C0, 0x0844, 0x3FFC,
+ 0x0780, 0x0888, 0x3FF8,
+ 0x0740, 0x08D0, 0x3FF0,
+ 0x0700, 0x0914, 0x3FEC,
+ 0x06C0, 0x0958, 0x3FE8,
+ 0x0684, 0x0998, 0x3FE4,
+ 0x0644, 0x09DC, 0x3FE0,
+ 0x0604, 0x0A1C, 0x3FE0,
+ 0x05C4, 0x0A5C, 0x3FE0,
+ 0x0588, 0x0A9C, 0x3FDC,
+ 0x0548, 0x0ADC, 0x3FDC,
+ 0x050C, 0x0B14, 0x3FE0,
+ 0x04CC, 0x0B54, 0x3FE0,
+ 0x0490, 0x0B8C, 0x3FE4,
+ 0x0458, 0x0BC0, 0x3FE8,
+ 0x041C, 0x0BF4, 0x3FF0,
+ 0x03E0, 0x0C28, 0x3FF8,
+ 0x03A8, 0x0C58, 0x0000,
+ 0x0374, 0x0C88, 0x0004,
+ 0x0340, 0x0CB0, 0x0010,
+ 0x0308, 0x0CD8, 0x0020,
+ 0x02D8, 0x0CFC, 0x002C,
+ 0x02A0, 0x0D20, 0x0040,
+ 0x0274, 0x0D3C, 0x0050,
+ 0x0244, 0x0D58, 0x0064,
+ 0x0214, 0x0D70, 0x007C,
+ 0x01E8, 0x0D84, 0x0094,
+ 0x01C0, 0x0D94, 0x00AC,
+ 0x0198, 0x0DA0, 0x00C8,
+ 0x0170, 0x0DAC, 0x00E4,
+ 0x014C, 0x0DB0, 0x0104,
+ 0x0128, 0x0DB4, 0x0124
+};
+
+//=========================================
+// <num_taps> = 3
+// <num_phases> = 64
+// <scale_ratio> = 1.49999 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_3tap_64p_149[99] = {
+ 0x0804, 0x07FC, 0x0000,
+ 0x07CC, 0x0834, 0x0000,
+ 0x0798, 0x0868, 0x0000,
+ 0x0764, 0x089C, 0x0000,
+ 0x0730, 0x08CC, 0x0004,
+ 0x0700, 0x08FC, 0x0004,
+ 0x06CC, 0x092C, 0x0008,
+ 0x0698, 0x095C, 0x000C,
+ 0x0660, 0x098C, 0x0014,
+ 0x062C, 0x09B8, 0x001C,
+ 0x05FC, 0x09E4, 0x0020,
+ 0x05C4, 0x0A10, 0x002C,
+ 0x0590, 0x0A3C, 0x0034,
+ 0x055C, 0x0A64, 0x0040,
+ 0x0528, 0x0A8C, 0x004C,
+ 0x04F8, 0x0AB0, 0x0058,
+ 0x04C4, 0x0AD4, 0x0068,
+ 0x0490, 0x0AF8, 0x0078,
+ 0x0460, 0x0B18, 0x0088,
+ 0x0430, 0x0B38, 0x0098,
+ 0x0400, 0x0B54, 0x00AC,
+ 0x03D0, 0x0B6C, 0x00C4,
+ 0x03A0, 0x0B88, 0x00D8,
+ 0x0374, 0x0B9C, 0x00F0,
+ 0x0348, 0x0BB0, 0x0108,
+ 0x0318, 0x0BC4, 0x0124,
+ 0x02EC, 0x0BD4, 0x0140,
+ 0x02C4, 0x0BE0, 0x015C,
+ 0x029C, 0x0BEC, 0x0178,
+ 0x0274, 0x0BF4, 0x0198,
+ 0x024C, 0x0BFC, 0x01B8,
+ 0x0228, 0x0BFC, 0x01DC,
+ 0x0200, 0x0C00, 0x0200
+};
+
+//=========================================
+// <num_taps> = 3
+// <num_phases> = 64
+// <scale_ratio> = 1.83332 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_3tap_64p_183[99] = {
+ 0x0804, 0x07FC, 0x0000,
+ 0x07D4, 0x0824, 0x0008,
+ 0x07AC, 0x0840, 0x0014,
+ 0x0780, 0x0860, 0x0020,
+ 0x0754, 0x0880, 0x002C,
+ 0x0728, 0x089C, 0x003C,
+ 0x0700, 0x08B8, 0x0048,
+ 0x06D4, 0x08D4, 0x0058,
+ 0x06A8, 0x08F0, 0x0068,
+ 0x067C, 0x090C, 0x0078,
+ 0x0650, 0x0924, 0x008C,
+ 0x0628, 0x093C, 0x009C,
+ 0x05FC, 0x0954, 0x00B0,
+ 0x05D0, 0x096C, 0x00C4,
+ 0x05A8, 0x0980, 0x00D8,
+ 0x0578, 0x0998, 0x00F0,
+ 0x0550, 0x09AC, 0x0104,
+ 0x0528, 0x09BC, 0x011C,
+ 0x04FC, 0x09D0, 0x0134,
+ 0x04D4, 0x09E0, 0x014C,
+ 0x04A8, 0x09F0, 0x0168,
+ 0x0480, 0x09FC, 0x0184,
+ 0x045C, 0x0A08, 0x019C,
+ 0x0434, 0x0A14, 0x01B8,
+ 0x0408, 0x0A20, 0x01D8,
+ 0x03E0, 0x0A2C, 0x01F4,
+ 0x03B8, 0x0A34, 0x0214,
+ 0x0394, 0x0A38, 0x0234,
+ 0x036C, 0x0A40, 0x0254,
+ 0x0348, 0x0A44, 0x0274,
+ 0x0324, 0x0A48, 0x0294,
+ 0x0300, 0x0A48, 0x02B8,
+ 0x02DC, 0x0A48, 0x02DC
+};
+
+//=========================================
+// <num_taps> = 4
+// <num_phases> = 64
+// <scale_ratio> = 0.83333 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_4tap_64p_upscale[132] = {
+ 0x0000, 0x1000, 0x0000, 0x0000,
+ 0x3FDC, 0x0FFC, 0x0028, 0x0000,
+ 0x3FB4, 0x0FF8, 0x0054, 0x0000,
+ 0x3F94, 0x0FE8, 0x0084, 0x0000,
+ 0x3F74, 0x0FDC, 0x00B4, 0x3FFC,
+ 0x3F58, 0x0FC4, 0x00E8, 0x3FFC,
+ 0x3F3C, 0x0FAC, 0x0120, 0x3FF8,
+ 0x3F24, 0x0F90, 0x0158, 0x3FF4,
+ 0x3F0C, 0x0F70, 0x0194, 0x3FF0,
+ 0x3EF8, 0x0F4C, 0x01D0, 0x3FEC,
+ 0x3EE8, 0x0F20, 0x0210, 0x3FE8,
+ 0x3ED8, 0x0EF4, 0x0254, 0x3FE0,
+ 0x3ECC, 0x0EC4, 0x0298, 0x3FD8,
+ 0x3EC0, 0x0E90, 0x02DC, 0x3FD4,
+ 0x3EB8, 0x0E58, 0x0324, 0x3FCC,
+ 0x3EB0, 0x0E20, 0x036C, 0x3FC4,
+ 0x3EAC, 0x0DE4, 0x03B8, 0x3FB8,
+ 0x3EA8, 0x0DA4, 0x0404, 0x3FB0,
+ 0x3EA4, 0x0D60, 0x0454, 0x3FA8,
+ 0x3EA4, 0x0D1C, 0x04A4, 0x3F9C,
+ 0x3EA4, 0x0CD8, 0x04F4, 0x3F90,
+ 0x3EA8, 0x0C88, 0x0548, 0x3F88,
+ 0x3EAC, 0x0C3C, 0x059C, 0x3F7C,
+ 0x3EB0, 0x0BF0, 0x05F0, 0x3F70,
+ 0x3EB8, 0x0BA0, 0x0644, 0x3F64,
+ 0x3EBC, 0x0B54, 0x0698, 0x3F58,
+ 0x3EC4, 0x0B00, 0x06F0, 0x3F4C,
+ 0x3ECC, 0x0AAC, 0x0748, 0x3F40,
+ 0x3ED8, 0x0A54, 0x07A0, 0x3F34,
+ 0x3EE0, 0x0A04, 0x07F8, 0x3F24,
+ 0x3EEC, 0x09AC, 0x0850, 0x3F18,
+ 0x3EF8, 0x0954, 0x08A8, 0x3F0C,
+ 0x3F00, 0x08FC, 0x0900, 0x3F04
+};
+
+//=========================================
+// <num_taps> = 4
+// <num_phases> = 64
+// <scale_ratio> = 1.16666 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_4tap_64p_116[132] = {
+ 0x01A8, 0x0CB4, 0x01A4, 0x0000,
+ 0x017C, 0x0CB8, 0x01D0, 0x3FFC,
+ 0x0158, 0x0CB8, 0x01F8, 0x3FF8,
+ 0x0130, 0x0CB4, 0x0228, 0x3FF4,
+ 0x0110, 0x0CB0, 0x0254, 0x3FEC,
+ 0x00EC, 0x0CA8, 0x0284, 0x3FE8,
+ 0x00CC, 0x0C9C, 0x02B4, 0x3FE4,
+ 0x00AC, 0x0C90, 0x02E8, 0x3FDC,
+ 0x0090, 0x0C80, 0x031C, 0x3FD4,
+ 0x0070, 0x0C70, 0x0350, 0x3FD0,
+ 0x0058, 0x0C5C, 0x0384, 0x3FC8,
+ 0x003C, 0x0C48, 0x03BC, 0x3FC0,
+ 0x0024, 0x0C2C, 0x03F4, 0x3FBC,
+ 0x0010, 0x0C10, 0x042C, 0x3FB4,
+ 0x3FFC, 0x0BF4, 0x0464, 0x3FAC,
+ 0x3FE8, 0x0BD4, 0x04A0, 0x3FA4,
+ 0x3FD8, 0x0BAC, 0x04DC, 0x3FA0,
+ 0x3FC4, 0x0B8C, 0x0518, 0x3F98,
+ 0x3FB4, 0x0B68, 0x0554, 0x3F90,
+ 0x3FA8, 0x0B40, 0x0590, 0x3F88,
+ 0x3F9C, 0x0B14, 0x05CC, 0x3F84,
+ 0x3F90, 0x0AEC, 0x0608, 0x3F7C,
+ 0x3F84, 0x0ABC, 0x0648, 0x3F78,
+ 0x3F7C, 0x0A90, 0x0684, 0x3F70,
+ 0x3F70, 0x0A60, 0x06C4, 0x3F6C,
+ 0x3F6C, 0x0A2C, 0x0700, 0x3F68,
+ 0x3F64, 0x09F8, 0x0740, 0x3F64,
+ 0x3F60, 0x09C4, 0x077C, 0x3F60,
+ 0x3F5C, 0x098C, 0x07BC, 0x3F5C,
+ 0x3F58, 0x0958, 0x07F8, 0x3F58,
+ 0x3F58, 0x091C, 0x0834, 0x3F58,
+ 0x3F54, 0x08E4, 0x0870, 0x3F58,
+ 0x3F54, 0x08AC, 0x08AC, 0x3F54
+};
+
+//=========================================
+// <num_taps> = 4
+// <num_phases> = 64
+// <scale_ratio> = 1.49999 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_4tap_64p_149[132] = {
+ 0x02B8, 0x0A90, 0x02B8, 0x0000,
+ 0x0294, 0x0A94, 0x02DC, 0x3FFC,
+ 0x0274, 0x0A94, 0x0300, 0x3FF8,
+ 0x0250, 0x0A94, 0x0328, 0x3FF4,
+ 0x0230, 0x0A90, 0x0350, 0x3FF0,
+ 0x0214, 0x0A8C, 0x0374, 0x3FEC,
+ 0x01F0, 0x0A88, 0x03A0, 0x3FE8,
+ 0x01D4, 0x0A80, 0x03C8, 0x3FE4,
+ 0x01B8, 0x0A78, 0x03F0, 0x3FE0,
+ 0x0198, 0x0A70, 0x041C, 0x3FDC,
+ 0x0180, 0x0A64, 0x0444, 0x3FD8,
+ 0x0164, 0x0A54, 0x0470, 0x3FD8,
+ 0x0148, 0x0A48, 0x049C, 0x3FD4,
+ 0x0130, 0x0A38, 0x04C8, 0x3FD0,
+ 0x0118, 0x0A24, 0x04F4, 0x3FD0,
+ 0x0100, 0x0A14, 0x0520, 0x3FCC,
+ 0x00E8, 0x0A00, 0x054C, 0x3FCC,
+ 0x00D4, 0x09E8, 0x057C, 0x3FC8,
+ 0x00C0, 0x09D0, 0x05A8, 0x3FC8,
+ 0x00AC, 0x09B8, 0x05D4, 0x3FC8,
+ 0x0098, 0x09A0, 0x0600, 0x3FC8,
+ 0x0084, 0x0984, 0x0630, 0x3FC8,
+ 0x0074, 0x0964, 0x065C, 0x3FCC,
+ 0x0064, 0x0948, 0x0688, 0x3FCC,
+ 0x0054, 0x0928, 0x06B4, 0x3FD0,
+ 0x0044, 0x0908, 0x06E0, 0x3FD4,
+ 0x0038, 0x08E8, 0x070C, 0x3FD4,
+ 0x002C, 0x08C4, 0x0738, 0x3FD8,
+ 0x001C, 0x08A4, 0x0760, 0x3FE0,
+ 0x0014, 0x087C, 0x078C, 0x3FE4,
+ 0x0008, 0x0858, 0x07B4, 0x3FEC,
+ 0x0000, 0x0830, 0x07DC, 0x3FF4,
+ 0x3FFC, 0x0804, 0x0804, 0x3FFC
+};
+
+//=========================================
+// <num_taps> = 4
+// <num_phases> = 64
+// <scale_ratio> = 1.83332 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_4tap_64p_183[132] = {
+ 0x03B0, 0x08A0, 0x03B0, 0x0000,
+ 0x0394, 0x08A0, 0x03CC, 0x0000,
+ 0x037C, 0x089C, 0x03E8, 0x0000,
+ 0x0360, 0x089C, 0x0400, 0x0004,
+ 0x0348, 0x0898, 0x041C, 0x0004,
+ 0x032C, 0x0894, 0x0438, 0x0008,
+ 0x0310, 0x0890, 0x0454, 0x000C,
+ 0x02F8, 0x0888, 0x0474, 0x000C,
+ 0x02DC, 0x0884, 0x0490, 0x0010,
+ 0x02C4, 0x087C, 0x04AC, 0x0014,
+ 0x02AC, 0x0874, 0x04C8, 0x0018,
+ 0x0290, 0x086C, 0x04E4, 0x0020,
+ 0x0278, 0x0864, 0x0500, 0x0024,
+ 0x0264, 0x0858, 0x051C, 0x0028,
+ 0x024C, 0x084C, 0x0538, 0x0030,
+ 0x0234, 0x0844, 0x0554, 0x0034,
+ 0x021C, 0x0838, 0x0570, 0x003C,
+ 0x0208, 0x0828, 0x058C, 0x0044,
+ 0x01F0, 0x081C, 0x05A8, 0x004C,
+ 0x01DC, 0x080C, 0x05C4, 0x0054,
+ 0x01C8, 0x07FC, 0x05E0, 0x005C,
+ 0x01B4, 0x07EC, 0x05FC, 0x0064,
+ 0x019C, 0x07DC, 0x0618, 0x0070,
+ 0x018C, 0x07CC, 0x0630, 0x0078,
+ 0x0178, 0x07B8, 0x064C, 0x0084,
+ 0x0164, 0x07A8, 0x0664, 0x0090,
+ 0x0150, 0x0794, 0x0680, 0x009C,
+ 0x0140, 0x0780, 0x0698, 0x00A8,
+ 0x0130, 0x076C, 0x06B0, 0x00B4,
+ 0x0120, 0x0758, 0x06C8, 0x00C0,
+ 0x0110, 0x0740, 0x06E0, 0x00D0,
+ 0x0100, 0x072C, 0x06F8, 0x00DC,
+ 0x00F0, 0x0714, 0x0710, 0x00EC
+};
+
+//=========================================
+// <num_taps> = 5
+// <num_phases> = 64
+// <scale_ratio> = 0.83333 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_5tap_64p_upscale[165] = {
+ 0x3E40, 0x09C0, 0x09C0, 0x3E40, 0x0000,
+ 0x3E50, 0x0964, 0x0A18, 0x3E34, 0x0000,
+ 0x3E5C, 0x0908, 0x0A6C, 0x3E2C, 0x0004,
+ 0x3E6C, 0x08AC, 0x0AC0, 0x3E20, 0x0008,
+ 0x3E78, 0x0850, 0x0B14, 0x3E18, 0x000C,
+ 0x3E88, 0x07F4, 0x0B60, 0x3E14, 0x0010,
+ 0x3E98, 0x0798, 0x0BB0, 0x3E0C, 0x0014,
+ 0x3EA8, 0x073C, 0x0C00, 0x3E08, 0x0014,
+ 0x3EB8, 0x06E4, 0x0C48, 0x3E04, 0x0018,
+ 0x3ECC, 0x0684, 0x0C90, 0x3E04, 0x001C,
+ 0x3EDC, 0x062C, 0x0CD4, 0x3E04, 0x0020,
+ 0x3EEC, 0x05D4, 0x0D1C, 0x3E04, 0x0020,
+ 0x3EFC, 0x057C, 0x0D5C, 0x3E08, 0x0024,
+ 0x3F0C, 0x0524, 0x0D98, 0x3E10, 0x0028,
+ 0x3F20, 0x04CC, 0x0DD8, 0x3E14, 0x0028,
+ 0x3F30, 0x0478, 0x0E14, 0x3E1C, 0x0028,
+ 0x3F40, 0x0424, 0x0E48, 0x3E28, 0x002C,
+ 0x3F50, 0x03D4, 0x0E7C, 0x3E34, 0x002C,
+ 0x3F60, 0x0384, 0x0EAC, 0x3E44, 0x002C,
+ 0x3F6C, 0x0338, 0x0EDC, 0x3E54, 0x002C,
+ 0x3F7C, 0x02E8, 0x0F08, 0x3E68, 0x002C,
+ 0x3F8C, 0x02A0, 0x0F2C, 0x3E7C, 0x002C,
+ 0x3F98, 0x0258, 0x0F50, 0x3E94, 0x002C,
+ 0x3FA4, 0x0210, 0x0F74, 0x3EB0, 0x0028,
+ 0x3FB0, 0x01CC, 0x0F90, 0x3ECC, 0x0028,
+ 0x3FC0, 0x018C, 0x0FA8, 0x3EE8, 0x0024,
+ 0x3FC8, 0x014C, 0x0FC0, 0x3F0C, 0x0020,
+ 0x3FD4, 0x0110, 0x0FD4, 0x3F2C, 0x001C,
+ 0x3FE0, 0x00D4, 0x0FE0, 0x3F54, 0x0018,
+ 0x3FE8, 0x009C, 0x0FF0, 0x3F7C, 0x0010,
+ 0x3FF0, 0x0064, 0x0FFC, 0x3FA4, 0x000C,
+ 0x3FFC, 0x0030, 0x0FFC, 0x3FD4, 0x0004,
+ 0x0000, 0x0000, 0x1000, 0x0000, 0x0000
+};
+
+//=========================================
+// <num_taps> = 5
+// <num_phases> = 64
+// <scale_ratio> = 1.16666 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_5tap_64p_116[165] = {
+ 0x3EDC, 0x0924, 0x0924, 0x3EDC, 0x0000,
+ 0x3ED8, 0x08EC, 0x095C, 0x3EE0, 0x0000,
+ 0x3ED4, 0x08B0, 0x0994, 0x3EE8, 0x0000,
+ 0x3ED0, 0x0878, 0x09C8, 0x3EF0, 0x0000,
+ 0x3ED0, 0x083C, 0x09FC, 0x3EF8, 0x0000,
+ 0x3ED0, 0x0800, 0x0A2C, 0x3F04, 0x0000,
+ 0x3ED0, 0x07C4, 0x0A5C, 0x3F10, 0x0000,
+ 0x3ED0, 0x0788, 0x0A8C, 0x3F1C, 0x0000,
+ 0x3ED0, 0x074C, 0x0AC0, 0x3F28, 0x3FFC,
+ 0x3ED4, 0x0710, 0x0AE8, 0x3F38, 0x3FFC,
+ 0x3ED8, 0x06D0, 0x0B18, 0x3F48, 0x3FF8,
+ 0x3EDC, 0x0694, 0x0B3C, 0x3F5C, 0x3FF8,
+ 0x3EE0, 0x0658, 0x0B68, 0x3F6C, 0x3FF4,
+ 0x3EE4, 0x061C, 0x0B90, 0x3F80, 0x3FF0,
+ 0x3EEC, 0x05DC, 0x0BB4, 0x3F98, 0x3FEC,
+ 0x3EF0, 0x05A0, 0x0BD8, 0x3FB0, 0x3FE8,
+ 0x3EF8, 0x0564, 0x0BF8, 0x3FC8, 0x3FE4,
+ 0x3EFC, 0x0528, 0x0C1C, 0x3FE0, 0x3FE0,
+ 0x3F04, 0x04EC, 0x0C38, 0x3FFC, 0x3FDC,
+ 0x3F0C, 0x04B4, 0x0C54, 0x0014, 0x3FD8,
+ 0x3F14, 0x047C, 0x0C70, 0x0030, 0x3FD0,
+ 0x3F1C, 0x0440, 0x0C88, 0x0050, 0x3FCC,
+ 0x3F24, 0x0408, 0x0CA0, 0x0070, 0x3FC4,
+ 0x3F2C, 0x03D0, 0x0CB0, 0x0094, 0x3FC0,
+ 0x3F34, 0x0398, 0x0CC4, 0x00B8, 0x3FB8,
+ 0x3F3C, 0x0364, 0x0CD4, 0x00DC, 0x3FB0,
+ 0x3F48, 0x032C, 0x0CE0, 0x0100, 0x3FAC,
+ 0x3F50, 0x02F8, 0x0CEC, 0x0128, 0x3FA4,
+ 0x3F58, 0x02C4, 0x0CF8, 0x0150, 0x3F9C,
+ 0x3F60, 0x0290, 0x0D00, 0x017C, 0x3F94,
+ 0x3F68, 0x0260, 0x0D04, 0x01A8, 0x3F8C,
+ 0x3F74, 0x0230, 0x0D04, 0x01D4, 0x3F84,
+ 0x3F7C, 0x0200, 0x0D08, 0x0200, 0x3F7C
+};
+
+//=========================================
+// <num_taps> = 5
+// <num_phases> = 64
+// <scale_ratio> = 1.49999 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_5tap_64p_149[165] = {
+ 0x3FF4, 0x080C, 0x080C, 0x3FF4, 0x0000,
+ 0x3FE8, 0x07E8, 0x0830, 0x0000, 0x0000,
+ 0x3FDC, 0x07C8, 0x0850, 0x0010, 0x3FFC,
+ 0x3FD0, 0x07A4, 0x0878, 0x001C, 0x3FF8,
+ 0x3FC4, 0x0780, 0x0898, 0x0030, 0x3FF4,
+ 0x3FB8, 0x075C, 0x08B8, 0x0040, 0x3FF4,
+ 0x3FB0, 0x0738, 0x08D8, 0x0050, 0x3FF0,
+ 0x3FA8, 0x0710, 0x08F8, 0x0064, 0x3FEC,
+ 0x3FA0, 0x06EC, 0x0914, 0x0078, 0x3FE8,
+ 0x3F98, 0x06C4, 0x0934, 0x008C, 0x3FE4,
+ 0x3F90, 0x06A0, 0x094C, 0x00A4, 0x3FE0,
+ 0x3F8C, 0x0678, 0x0968, 0x00B8, 0x3FDC,
+ 0x3F84, 0x0650, 0x0984, 0x00D0, 0x3FD8,
+ 0x3F80, 0x0628, 0x099C, 0x00E8, 0x3FD4,
+ 0x3F7C, 0x0600, 0x09B8, 0x0100, 0x3FCC,
+ 0x3F78, 0x05D8, 0x09D0, 0x0118, 0x3FC8,
+ 0x3F74, 0x05B0, 0x09E4, 0x0134, 0x3FC4,
+ 0x3F70, 0x0588, 0x09F8, 0x0150, 0x3FC0,
+ 0x3F70, 0x0560, 0x0A08, 0x016C, 0x3FBC,
+ 0x3F6C, 0x0538, 0x0A20, 0x0188, 0x3FB4,
+ 0x3F6C, 0x0510, 0x0A30, 0x01A4, 0x3FB0,
+ 0x3F6C, 0x04E8, 0x0A3C, 0x01C4, 0x3FAC,
+ 0x3F6C, 0x04C0, 0x0A48, 0x01E4, 0x3FA8,
+ 0x3F6C, 0x0498, 0x0A58, 0x0200, 0x3FA4,
+ 0x3F6C, 0x0470, 0x0A60, 0x0224, 0x3FA0,
+ 0x3F6C, 0x0448, 0x0A70, 0x0244, 0x3F98,
+ 0x3F70, 0x0420, 0x0A78, 0x0264, 0x3F94,
+ 0x3F70, 0x03F8, 0x0A80, 0x0288, 0x3F90,
+ 0x3F74, 0x03D4, 0x0A84, 0x02A8, 0x3F8C,
+ 0x3F74, 0x03AC, 0x0A8C, 0x02CC, 0x3F88,
+ 0x3F78, 0x0384, 0x0A90, 0x02F0, 0x3F84,
+ 0x3F7C, 0x0360, 0x0A90, 0x0314, 0x3F80,
+ 0x3F7C, 0x033C, 0x0A90, 0x033C, 0x3F7C
+};
+
+//=========================================
+// <num_taps> = 5
+// <num_phases> = 64
+// <scale_ratio> = 1.83332 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_5tap_64p_183[165] = {
+ 0x0168, 0x069C, 0x0698, 0x0164, 0x0000,
+ 0x0154, 0x068C, 0x06AC, 0x0174, 0x0000,
+ 0x0144, 0x0674, 0x06C0, 0x0188, 0x0000,
+ 0x0138, 0x0664, 0x06D0, 0x0198, 0x3FFC,
+ 0x0128, 0x0654, 0x06E0, 0x01A8, 0x3FFC,
+ 0x0118, 0x0640, 0x06F0, 0x01BC, 0x3FFC,
+ 0x010C, 0x0630, 0x0700, 0x01CC, 0x3FF8,
+ 0x00FC, 0x061C, 0x0710, 0x01E0, 0x3FF8,
+ 0x00F0, 0x060C, 0x071C, 0x01F0, 0x3FF8,
+ 0x00E4, 0x05F4, 0x072C, 0x0204, 0x3FF8,
+ 0x00D8, 0x05E4, 0x0738, 0x0218, 0x3FF4,
+ 0x00CC, 0x05D0, 0x0744, 0x022C, 0x3FF4,
+ 0x00C0, 0x05B8, 0x0754, 0x0240, 0x3FF4,
+ 0x00B4, 0x05A4, 0x0760, 0x0254, 0x3FF4,
+ 0x00A8, 0x0590, 0x076C, 0x0268, 0x3FF4,
+ 0x009C, 0x057C, 0x0778, 0x027C, 0x3FF4,
+ 0x0094, 0x0564, 0x0780, 0x0294, 0x3FF4,
+ 0x0088, 0x0550, 0x0788, 0x02A8, 0x3FF8,
+ 0x0080, 0x0538, 0x0794, 0x02BC, 0x3FF8,
+ 0x0074, 0x0524, 0x079C, 0x02D4, 0x3FF8,
+ 0x006C, 0x0510, 0x07A4, 0x02E8, 0x3FF8,
+ 0x0064, 0x04F4, 0x07AC, 0x0300, 0x3FFC,
+ 0x005C, 0x04E4, 0x07B0, 0x0314, 0x3FFC,
+ 0x0054, 0x04C8, 0x07B8, 0x032C, 0x0000,
+ 0x004C, 0x04B4, 0x07C0, 0x0340, 0x0000,
+ 0x0044, 0x04A0, 0x07C4, 0x0358, 0x0000,
+ 0x003C, 0x0488, 0x07C8, 0x0370, 0x0004,
+ 0x0038, 0x0470, 0x07CC, 0x0384, 0x0008,
+ 0x0030, 0x045C, 0x07D0, 0x039C, 0x0008,
+ 0x002C, 0x0444, 0x07D0, 0x03B4, 0x000C,
+ 0x0024, 0x042C, 0x07D4, 0x03CC, 0x0010,
+ 0x0020, 0x0414, 0x07D4, 0x03E0, 0x0018,
+ 0x001C, 0x03FC, 0x07D4, 0x03F8, 0x001C
+};
+
+//=========================================
+// <num_taps> = 6
+// <num_phases> = 64
+// <scale_ratio> = 0.83333 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_6tap_64p_upscale[198] = {
+ 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000,
+ 0x000C, 0x3FD0, 0x0FFC, 0x0034, 0x3FF4, 0x0000,
+ 0x0018, 0x3F9C, 0x0FF8, 0x006C, 0x3FE8, 0x0000,
+ 0x0024, 0x3F6C, 0x0FF0, 0x00A8, 0x3FD8, 0x0000,
+ 0x002C, 0x3F44, 0x0FE4, 0x00E4, 0x3FC8, 0x0000,
+ 0x0038, 0x3F18, 0x0FD4, 0x0124, 0x3FB8, 0x0000,
+ 0x0040, 0x3EF0, 0x0FC0, 0x0164, 0x3FA8, 0x0004,
+ 0x0048, 0x3EC8, 0x0FAC, 0x01A8, 0x3F98, 0x0004,
+ 0x0050, 0x3EA8, 0x0F94, 0x01EC, 0x3F84, 0x0004,
+ 0x0058, 0x3E84, 0x0F74, 0x0234, 0x3F74, 0x0008,
+ 0x0060, 0x3E68, 0x0F54, 0x027C, 0x3F60, 0x0008,
+ 0x0064, 0x3E4C, 0x0F30, 0x02C8, 0x3F4C, 0x000C,
+ 0x006C, 0x3E30, 0x0F04, 0x0314, 0x3F3C, 0x0010,
+ 0x0070, 0x3E18, 0x0EDC, 0x0360, 0x3F28, 0x0014,
+ 0x0074, 0x3E04, 0x0EB0, 0x03B0, 0x3F14, 0x0014,
+ 0x0078, 0x3DF0, 0x0E80, 0x0400, 0x3F00, 0x0018,
+ 0x0078, 0x3DE0, 0x0E4C, 0x0454, 0x3EEC, 0x001C,
+ 0x007C, 0x3DD0, 0x0E14, 0x04A8, 0x3ED8, 0x0020,
+ 0x007C, 0x3DC4, 0x0DDC, 0x04FC, 0x3EC4, 0x0024,
+ 0x007C, 0x3DBC, 0x0DA0, 0x0550, 0x3EB0, 0x0028,
+ 0x0080, 0x3DB4, 0x0D5C, 0x05A8, 0x3E9C, 0x002C,
+ 0x0080, 0x3DAC, 0x0D1C, 0x0600, 0x3E88, 0x0030,
+ 0x007C, 0x3DA8, 0x0CDC, 0x0658, 0x3E74, 0x0034,
+ 0x007C, 0x3DA4, 0x0C94, 0x06B0, 0x3E64, 0x0038,
+ 0x007C, 0x3DA4, 0x0C48, 0x0708, 0x3E50, 0x0040,
+ 0x0078, 0x3DA4, 0x0C00, 0x0760, 0x3E40, 0x0044,
+ 0x0078, 0x3DA8, 0x0BB4, 0x07B8, 0x3E2C, 0x0048,
+ 0x0074, 0x3DAC, 0x0B68, 0x0810, 0x3E1C, 0x004C,
+ 0x0070, 0x3DB4, 0x0B18, 0x0868, 0x3E0C, 0x0050,
+ 0x006C, 0x3DBC, 0x0AC4, 0x08C4, 0x3DFC, 0x0054,
+ 0x0068, 0x3DC4, 0x0A74, 0x0918, 0x3DF0, 0x0058,
+ 0x0068, 0x3DCC, 0x0A20, 0x0970, 0x3DE0, 0x005C,
+ 0x0064, 0x3DD4, 0x09C8, 0x09C8, 0x3DD4, 0x0064
+};
+
+//=========================================
+// <num_taps> = 6
+// <num_phases> = 64
+// <scale_ratio> = 1.16666 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_6tap_64p_116[198] = {
+ 0x3F0C, 0x0240, 0x0D68, 0x0240, 0x3F0C, 0x0000,
+ 0x3F18, 0x0210, 0x0D64, 0x0274, 0x3F00, 0x0000,
+ 0x3F24, 0x01E0, 0x0D58, 0x02A8, 0x3EF8, 0x0004,
+ 0x3F2C, 0x01B0, 0x0D58, 0x02DC, 0x3EEC, 0x0004,
+ 0x3F38, 0x0180, 0x0D50, 0x0310, 0x3EE0, 0x0008,
+ 0x3F44, 0x0154, 0x0D40, 0x0348, 0x3ED8, 0x0008,
+ 0x3F50, 0x0128, 0x0D34, 0x037C, 0x3ECC, 0x000C,
+ 0x3F5C, 0x00FC, 0x0D20, 0x03B4, 0x3EC4, 0x0010,
+ 0x3F64, 0x00D4, 0x0D14, 0x03EC, 0x3EB8, 0x0010,
+ 0x3F70, 0x00AC, 0x0CFC, 0x0424, 0x3EB0, 0x0014,
+ 0x3F78, 0x0084, 0x0CE8, 0x0460, 0x3EA8, 0x0014,
+ 0x3F84, 0x0060, 0x0CCC, 0x0498, 0x3EA0, 0x0018,
+ 0x3F90, 0x003C, 0x0CB4, 0x04D0, 0x3E98, 0x0018,
+ 0x3F98, 0x0018, 0x0C9C, 0x050C, 0x3E90, 0x0018,
+ 0x3FA0, 0x3FFC, 0x0C78, 0x0548, 0x3E88, 0x001C,
+ 0x3FAC, 0x3FDC, 0x0C54, 0x0584, 0x3E84, 0x001C,
+ 0x3FB4, 0x3FBC, 0x0C3C, 0x05BC, 0x3E7C, 0x001C,
+ 0x3FBC, 0x3FA0, 0x0C14, 0x05F8, 0x3E78, 0x0020,
+ 0x3FC4, 0x3F84, 0x0BF0, 0x0634, 0x3E74, 0x0020,
+ 0x3FCC, 0x3F68, 0x0BCC, 0x0670, 0x3E70, 0x0020,
+ 0x3FD4, 0x3F50, 0x0BA4, 0x06AC, 0x3E6C, 0x0020,
+ 0x3FDC, 0x3F38, 0x0B78, 0x06E8, 0x3E6C, 0x0020,
+ 0x3FE0, 0x3F24, 0x0B50, 0x0724, 0x3E68, 0x0020,
+ 0x3FE8, 0x3F0C, 0x0B24, 0x0760, 0x3E68, 0x0020,
+ 0x3FF0, 0x3EFC, 0x0AF4, 0x0798, 0x3E68, 0x0020,
+ 0x3FF4, 0x3EE8, 0x0AC8, 0x07D4, 0x3E68, 0x0020,
+ 0x3FFC, 0x3ED8, 0x0A94, 0x0810, 0x3E6C, 0x001C,
+ 0x0000, 0x3EC8, 0x0A64, 0x0848, 0x3E70, 0x001C,
+ 0x0000, 0x3EB8, 0x0A38, 0x0880, 0x3E74, 0x001C,
+ 0x0004, 0x3EAC, 0x0A04, 0x08BC, 0x3E78, 0x0018,
+ 0x0008, 0x3EA4, 0x09D0, 0x08F4, 0x3E7C, 0x0014,
+ 0x000C, 0x3E98, 0x0998, 0x092C, 0x3E84, 0x0014,
+ 0x0010, 0x3E90, 0x0964, 0x0960, 0x3E8C, 0x0010
+};
+
+//=========================================
+// <num_taps> = 6
+// <num_phases> = 64
+// <scale_ratio> = 1.49999 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_6tap_64p_149[198] = {
+ 0x3F14, 0x0394, 0x0AB0, 0x0394, 0x3F14, 0x0000,
+ 0x3F18, 0x036C, 0x0AB0, 0x03B8, 0x3F14, 0x0000,
+ 0x3F18, 0x0348, 0x0AAC, 0x03E0, 0x3F14, 0x0000,
+ 0x3F1C, 0x0320, 0x0AAC, 0x0408, 0x3F10, 0x0000,
+ 0x3F20, 0x02FC, 0x0AA8, 0x042C, 0x3F10, 0x0000,
+ 0x3F24, 0x02D8, 0x0AA0, 0x0454, 0x3F10, 0x0000,
+ 0x3F28, 0x02B4, 0x0A98, 0x047C, 0x3F10, 0x0000,
+ 0x3F28, 0x0290, 0x0A90, 0x04A4, 0x3F14, 0x0000,
+ 0x3F30, 0x026C, 0x0A84, 0x04CC, 0x3F14, 0x0000,
+ 0x3F34, 0x024C, 0x0A7C, 0x04F4, 0x3F14, 0x3FFC,
+ 0x3F38, 0x0228, 0x0A70, 0x051C, 0x3F18, 0x3FFC,
+ 0x3F3C, 0x0208, 0x0A64, 0x0544, 0x3F1C, 0x3FF8,
+ 0x3F40, 0x01E8, 0x0A54, 0x056C, 0x3F20, 0x3FF8,
+ 0x3F44, 0x01C8, 0x0A48, 0x0594, 0x3F24, 0x3FF4,
+ 0x3F4C, 0x01A8, 0x0A34, 0x05BC, 0x3F28, 0x3FF4,
+ 0x3F50, 0x0188, 0x0A28, 0x05E4, 0x3F2C, 0x3FF0,
+ 0x3F54, 0x016C, 0x0A10, 0x060C, 0x3F34, 0x3FF0,
+ 0x3F5C, 0x014C, 0x09FC, 0x0634, 0x3F3C, 0x3FEC,
+ 0x3F60, 0x0130, 0x09EC, 0x065C, 0x3F40, 0x3FE8,
+ 0x3F68, 0x0114, 0x09D0, 0x0684, 0x3F48, 0x3FE8,
+ 0x3F6C, 0x00F8, 0x09B8, 0x06AC, 0x3F54, 0x3FE4,
+ 0x3F74, 0x00E0, 0x09A0, 0x06D0, 0x3F5C, 0x3FE0,
+ 0x3F78, 0x00C4, 0x098C, 0x06F8, 0x3F64, 0x3FDC,
+ 0x3F7C, 0x00AC, 0x0970, 0x0720, 0x3F70, 0x3FD8,
+ 0x3F84, 0x0094, 0x0954, 0x0744, 0x3F7C, 0x3FD4,
+ 0x3F88, 0x007C, 0x093C, 0x0768, 0x3F88, 0x3FD0,
+ 0x3F90, 0x0064, 0x091C, 0x0790, 0x3F94, 0x3FCC,
+ 0x3F94, 0x0050, 0x08FC, 0x07B4, 0x3FA4, 0x3FC8,
+ 0x3F98, 0x003C, 0x08E0, 0x07D8, 0x3FB0, 0x3FC4,
+ 0x3FA0, 0x0024, 0x08C0, 0x07FC, 0x3FC0, 0x3FC0,
+ 0x3FA4, 0x0014, 0x08A4, 0x081C, 0x3FD0, 0x3FB8,
+ 0x3FAC, 0x0000, 0x0880, 0x0840, 0x3FE0, 0x3FB4,
+ 0x3FB0, 0x3FF0, 0x0860, 0x0860, 0x3FF0, 0x3FB0
+};
+
+//=========================================
+// <num_taps> = 6
+// <num_phases> = 64
+// <scale_ratio> = 1.83332 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_6tap_64p_183[198] = {
+ 0x002C, 0x0420, 0x076C, 0x041C, 0x002C, 0x0000,
+ 0x0028, 0x040C, 0x0768, 0x0430, 0x0034, 0x0000,
+ 0x0020, 0x03F8, 0x0768, 0x0448, 0x003C, 0x3FFC,
+ 0x0018, 0x03E4, 0x0768, 0x045C, 0x0044, 0x3FFC,
+ 0x0014, 0x03D0, 0x0768, 0x0470, 0x004C, 0x3FF8,
+ 0x000C, 0x03BC, 0x0764, 0x0484, 0x0058, 0x3FF8,
+ 0x0008, 0x03A4, 0x0764, 0x049C, 0x0060, 0x3FF4,
+ 0x0004, 0x0390, 0x0760, 0x04B0, 0x0068, 0x3FF4,
+ 0x0000, 0x037C, 0x0760, 0x04C4, 0x0070, 0x3FF0,
+ 0x3FFC, 0x0364, 0x075C, 0x04D8, 0x007C, 0x3FF0,
+ 0x3FF8, 0x0350, 0x0758, 0x04F0, 0x0084, 0x3FEC,
+ 0x3FF4, 0x033C, 0x0750, 0x0504, 0x0090, 0x3FEC,
+ 0x3FF0, 0x0328, 0x074C, 0x0518, 0x009C, 0x3FE8,
+ 0x3FEC, 0x0314, 0x0744, 0x052C, 0x00A8, 0x3FE8,
+ 0x3FE8, 0x0304, 0x0740, 0x0540, 0x00B0, 0x3FE4,
+ 0x3FE4, 0x02EC, 0x073C, 0x0554, 0x00BC, 0x3FE4,
+ 0x3FE0, 0x02DC, 0x0734, 0x0568, 0x00C8, 0x3FE0,
+ 0x3FE0, 0x02C4, 0x072C, 0x057C, 0x00D4, 0x3FE0,
+ 0x3FDC, 0x02B4, 0x0724, 0x058C, 0x00E4, 0x3FDC,
+ 0x3FDC, 0x02A0, 0x0718, 0x05A0, 0x00F0, 0x3FDC,
+ 0x3FD8, 0x028C, 0x0714, 0x05B4, 0x00FC, 0x3FD8,
+ 0x3FD8, 0x0278, 0x0704, 0x05C8, 0x010C, 0x3FD8,
+ 0x3FD4, 0x0264, 0x0700, 0x05D8, 0x0118, 0x3FD8,
+ 0x3FD4, 0x0254, 0x06F0, 0x05EC, 0x0128, 0x3FD4,
+ 0x3FD0, 0x0244, 0x06E8, 0x05FC, 0x0134, 0x3FD4,
+ 0x3FD0, 0x0230, 0x06DC, 0x060C, 0x0144, 0x3FD4,
+ 0x3FD0, 0x021C, 0x06D0, 0x0620, 0x0154, 0x3FD0,
+ 0x3FD0, 0x0208, 0x06C4, 0x0630, 0x0164, 0x3FD0,
+ 0x3FD0, 0x01F8, 0x06B8, 0x0640, 0x0170, 0x3FD0,
+ 0x3FCC, 0x01E8, 0x06AC, 0x0650, 0x0180, 0x3FD0,
+ 0x3FCC, 0x01D8, 0x069C, 0x0660, 0x0190, 0x3FD0,
+ 0x3FCC, 0x01C4, 0x068C, 0x0670, 0x01A4, 0x3FD0,
+ 0x3FCC, 0x01B8, 0x0680, 0x067C, 0x01B4, 0x3FCC
+};
+
+//=========================================
+// <num_taps> = 7
+// <num_phases> = 64
+// <scale_ratio> = 0.83333 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_7tap_64p_upscale[231] = {
+ 0x00B0, 0x3D98, 0x09BC, 0x09B8, 0x3D94, 0x00B0, 0x0000,
+ 0x00AC, 0x3DA0, 0x0968, 0x0A10, 0x3D88, 0x00B4, 0x0000,
+ 0x00A8, 0x3DAC, 0x0914, 0x0A60, 0x3D80, 0x00B8, 0x0000,
+ 0x00A4, 0x3DB8, 0x08C0, 0x0AB4, 0x3D78, 0x00BC, 0x3FFC,
+ 0x00A0, 0x3DC8, 0x0868, 0x0B00, 0x3D74, 0x00C0, 0x3FFC,
+ 0x0098, 0x3DD8, 0x0818, 0x0B54, 0x3D6C, 0x00C0, 0x3FF8,
+ 0x0094, 0x3DE8, 0x07C0, 0x0B9C, 0x3D6C, 0x00C4, 0x3FF8,
+ 0x008C, 0x3DFC, 0x0768, 0x0BEC, 0x3D68, 0x00C4, 0x3FF8,
+ 0x0088, 0x3E0C, 0x0714, 0x0C38, 0x3D68, 0x00C4, 0x3FF4,
+ 0x0080, 0x3E20, 0x06BC, 0x0C80, 0x3D6C, 0x00C4, 0x3FF4,
+ 0x0078, 0x3E34, 0x0668, 0x0CC4, 0x3D70, 0x00C4, 0x3FF4,
+ 0x0074, 0x3E48, 0x0610, 0x0D08, 0x3D78, 0x00C4, 0x3FF0,
+ 0x006C, 0x3E5C, 0x05BC, 0x0D48, 0x3D80, 0x00C4, 0x3FF0,
+ 0x0068, 0x3E74, 0x0568, 0x0D84, 0x3D88, 0x00C0, 0x3FF0,
+ 0x0060, 0x3E88, 0x0514, 0x0DC8, 0x3D94, 0x00BC, 0x3FEC,
+ 0x0058, 0x3E9C, 0x04C0, 0x0E04, 0x3DA4, 0x00B8, 0x3FEC,
+ 0x0054, 0x3EB4, 0x046C, 0x0E38, 0x3DB4, 0x00B4, 0x3FEC,
+ 0x004C, 0x3ECC, 0x0418, 0x0E6C, 0x3DC8, 0x00B0, 0x3FEC,
+ 0x0044, 0x3EE0, 0x03C8, 0x0EA4, 0x3DDC, 0x00A8, 0x3FEC,
+ 0x0040, 0x3EF8, 0x0378, 0x0ED0, 0x3DF4, 0x00A0, 0x3FEC,
+ 0x0038, 0x3F0C, 0x032C, 0x0EFC, 0x3E10, 0x0098, 0x3FEC,
+ 0x0034, 0x3F24, 0x02DC, 0x0F24, 0x3E2C, 0x0090, 0x3FEC,
+ 0x002C, 0x3F38, 0x0294, 0x0F4C, 0x3E48, 0x0088, 0x3FEC,
+ 0x0028, 0x3F50, 0x0248, 0x0F68, 0x3E6C, 0x007C, 0x3FF0,
+ 0x0020, 0x3F64, 0x0200, 0x0F88, 0x3E90, 0x0074, 0x3FF0,
+ 0x001C, 0x3F7C, 0x01B8, 0x0FA4, 0x3EB4, 0x0068, 0x3FF0,
+ 0x0018, 0x3F90, 0x0174, 0x0FBC, 0x3EDC, 0x0058, 0x3FF4,
+ 0x0014, 0x3FA4, 0x0130, 0x0FD0, 0x3F08, 0x004C, 0x3FF4,
+ 0x000C, 0x3FB8, 0x00F0, 0x0FE4, 0x3F34, 0x003C, 0x3FF8,
+ 0x0008, 0x3FCC, 0x00B0, 0x0FF0, 0x3F64, 0x0030, 0x3FF8,
+ 0x0004, 0x3FDC, 0x0070, 0x0FFC, 0x3F98, 0x0020, 0x3FFC,
+ 0x0000, 0x3FF0, 0x0038, 0x0FFC, 0x3FCC, 0x0010, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000
+};
+
+//=========================================
+// <num_taps> = 7
+// <num_phases> = 64
+// <scale_ratio> = 1.16666 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_7tap_64p_116[231] = {
+ 0x0020, 0x3E58, 0x0988, 0x0988, 0x3E58, 0x0020, 0x0000,
+ 0x0024, 0x3E4C, 0x0954, 0x09C0, 0x3E64, 0x0018, 0x0000,
+ 0x002C, 0x3E44, 0x091C, 0x09F4, 0x3E70, 0x0010, 0x0000,
+ 0x0030, 0x3E3C, 0x08E8, 0x0A24, 0x3E80, 0x0008, 0x0000,
+ 0x0034, 0x3E34, 0x08AC, 0x0A5C, 0x3E90, 0x0000, 0x0000,
+ 0x003C, 0x3E30, 0x0870, 0x0A84, 0x3EA0, 0x3FFC, 0x0004,
+ 0x0040, 0x3E28, 0x0838, 0x0AB4, 0x3EB4, 0x3FF4, 0x0004,
+ 0x0044, 0x3E24, 0x07FC, 0x0AE4, 0x3EC8, 0x3FEC, 0x0004,
+ 0x0048, 0x3E24, 0x07C4, 0x0B08, 0x3EDC, 0x3FE4, 0x0008,
+ 0x0048, 0x3E20, 0x0788, 0x0B3C, 0x3EF4, 0x3FD8, 0x0008,
+ 0x004C, 0x3E20, 0x074C, 0x0B60, 0x3F0C, 0x3FD0, 0x000C,
+ 0x0050, 0x3E20, 0x0710, 0x0B8C, 0x3F24, 0x3FC4, 0x000C,
+ 0x0050, 0x3E20, 0x06D4, 0x0BB0, 0x3F40, 0x3FBC, 0x0010,
+ 0x0054, 0x3E24, 0x0698, 0x0BD4, 0x3F5C, 0x3FB0, 0x0010,
+ 0x0054, 0x3E24, 0x065C, 0x0BFC, 0x3F78, 0x3FA4, 0x0014,
+ 0x0054, 0x3E28, 0x0624, 0x0C1C, 0x3F98, 0x3F98, 0x0014,
+ 0x0058, 0x3E2C, 0x05E4, 0x0C3C, 0x3FB8, 0x3F8C, 0x0018,
+ 0x0058, 0x3E34, 0x05A8, 0x0C58, 0x3FD8, 0x3F80, 0x001C,
+ 0x0058, 0x3E38, 0x0570, 0x0C78, 0x3FF8, 0x3F74, 0x001C,
+ 0x0058, 0x3E40, 0x0534, 0x0C94, 0x0018, 0x3F68, 0x0020,
+ 0x0058, 0x3E48, 0x04F4, 0x0CAC, 0x0040, 0x3F5C, 0x0024,
+ 0x0058, 0x3E50, 0x04BC, 0x0CC4, 0x0064, 0x3F50, 0x0024,
+ 0x0054, 0x3E58, 0x0484, 0x0CD8, 0x008C, 0x3F44, 0x0028,
+ 0x0054, 0x3E60, 0x0448, 0x0CEC, 0x00B4, 0x3F38, 0x002C,
+ 0x0054, 0x3E68, 0x0410, 0x0CFC, 0x00E0, 0x3F28, 0x0030,
+ 0x0054, 0x3E74, 0x03D4, 0x0D0C, 0x010C, 0x3F1C, 0x0030,
+ 0x0050, 0x3E7C, 0x03A0, 0x0D18, 0x0138, 0x3F10, 0x0034,
+ 0x0050, 0x3E88, 0x0364, 0x0D24, 0x0164, 0x3F04, 0x0038,
+ 0x004C, 0x3E94, 0x0330, 0x0D30, 0x0194, 0x3EF4, 0x0038,
+ 0x004C, 0x3EA0, 0x02F8, 0x0D34, 0x01C4, 0x3EE8, 0x003C,
+ 0x0048, 0x3EAC, 0x02C0, 0x0D3C, 0x01F4, 0x3EDC, 0x0040,
+ 0x0048, 0x3EB8, 0x0290, 0x0D3C, 0x0224, 0x3ED0, 0x0040,
+ 0x0044, 0x3EC4, 0x0258, 0x0D40, 0x0258, 0x3EC4, 0x0044
+};
+
+//=========================================
+// <num_taps> = 7
+// <num_phases> = 64
+// <scale_ratio> = 1.49999 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_7tap_64p_149[231] = {
+ 0x3F68, 0x3FEC, 0x08A8, 0x08AC, 0x3FF0, 0x3F68, 0x0000,
+ 0x3F70, 0x3FDC, 0x0888, 0x08CC, 0x0000, 0x3F60, 0x0000,
+ 0x3F74, 0x3FC8, 0x0868, 0x08F0, 0x0014, 0x3F58, 0x0000,
+ 0x3F7C, 0x3FB4, 0x0844, 0x0908, 0x002C, 0x3F54, 0x0004,
+ 0x3F84, 0x3FA4, 0x0820, 0x0924, 0x0044, 0x3F4C, 0x0004,
+ 0x3F88, 0x3F90, 0x0800, 0x0944, 0x005C, 0x3F44, 0x0004,
+ 0x3F90, 0x3F80, 0x07D8, 0x095C, 0x0074, 0x3F40, 0x0008,
+ 0x3F98, 0x3F70, 0x07B0, 0x097C, 0x008C, 0x3F38, 0x0008,
+ 0x3F9C, 0x3F60, 0x0790, 0x0994, 0x00A8, 0x3F30, 0x0008,
+ 0x3FA4, 0x3F54, 0x0764, 0x09B0, 0x00C4, 0x3F28, 0x0008,
+ 0x3FA8, 0x3F48, 0x0740, 0x09C4, 0x00DC, 0x3F24, 0x000C,
+ 0x3FB0, 0x3F38, 0x0718, 0x09DC, 0x00FC, 0x3F1C, 0x000C,
+ 0x3FB4, 0x3F2C, 0x06F0, 0x09F4, 0x0118, 0x3F18, 0x000C,
+ 0x3FBC, 0x3F24, 0x06C8, 0x0A08, 0x0134, 0x3F10, 0x000C,
+ 0x3FC0, 0x3F18, 0x06A0, 0x0A1C, 0x0154, 0x3F08, 0x0010,
+ 0x3FC8, 0x3F10, 0x0678, 0x0A2C, 0x0170, 0x3F04, 0x0010,
+ 0x3FCC, 0x3F04, 0x0650, 0x0A40, 0x0190, 0x3F00, 0x0010,
+ 0x3FD0, 0x3EFC, 0x0628, 0x0A54, 0x01B0, 0x3EF8, 0x0010,
+ 0x3FD4, 0x3EF4, 0x0600, 0x0A64, 0x01D0, 0x3EF4, 0x0010,
+ 0x3FDC, 0x3EEC, 0x05D8, 0x0A6C, 0x01F4, 0x3EF0, 0x0010,
+ 0x3FE0, 0x3EE8, 0x05B0, 0x0A7C, 0x0214, 0x3EE8, 0x0010,
+ 0x3FE4, 0x3EE0, 0x0588, 0x0A88, 0x0238, 0x3EE4, 0x0010,
+ 0x3FE8, 0x3EDC, 0x055C, 0x0A98, 0x0258, 0x3EE0, 0x0010,
+ 0x3FEC, 0x3ED8, 0x0534, 0x0AA0, 0x027C, 0x3EDC, 0x0010,
+ 0x3FF0, 0x3ED4, 0x050C, 0x0AAC, 0x02A0, 0x3ED8, 0x000C,
+ 0x3FF4, 0x3ED0, 0x04E4, 0x0AB4, 0x02C4, 0x3ED4, 0x000C,
+ 0x3FF4, 0x3ECC, 0x04C0, 0x0ABC, 0x02E8, 0x3ED0, 0x000C,
+ 0x3FF8, 0x3ECC, 0x0494, 0x0AC0, 0x030C, 0x3ED0, 0x000C,
+ 0x3FFC, 0x3EC8, 0x046C, 0x0AC8, 0x0334, 0x3ECC, 0x0008,
+ 0x0000, 0x3EC8, 0x0444, 0x0AC8, 0x0358, 0x3ECC, 0x0008,
+ 0x0000, 0x3EC8, 0x041C, 0x0ACC, 0x0380, 0x3EC8, 0x0008,
+ 0x0000, 0x3EC8, 0x03F4, 0x0AD0, 0x03A8, 0x3EC8, 0x0004,
+ 0x0004, 0x3EC8, 0x03CC, 0x0AD0, 0x03CC, 0x3EC8, 0x0004
+};
+
+//=========================================
+// <num_taps> = 7
+// <num_phases> = 64
+// <scale_ratio> = 1.83332 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_7tap_64p_183[231] = {
+ 0x3FA4, 0x01E8, 0x0674, 0x0674, 0x01E8, 0x3FA4, 0x0000,
+ 0x3FA4, 0x01D4, 0x0668, 0x0684, 0x01F8, 0x3FA4, 0x0000,
+ 0x3FA4, 0x01C4, 0x0658, 0x0690, 0x0208, 0x3FA8, 0x0000,
+ 0x3FA0, 0x01B4, 0x064C, 0x06A0, 0x021C, 0x3FA8, 0x3FFC,
+ 0x3FA0, 0x01A4, 0x063C, 0x06AC, 0x022C, 0x3FAC, 0x3FFC,
+ 0x3FA0, 0x0194, 0x0630, 0x06B4, 0x0240, 0x3FAC, 0x3FFC,
+ 0x3FA0, 0x0184, 0x0620, 0x06C4, 0x0250, 0x3FB0, 0x3FF8,
+ 0x3FA0, 0x0174, 0x0614, 0x06CC, 0x0264, 0x3FB0, 0x3FF8,
+ 0x3FA0, 0x0164, 0x0604, 0x06D8, 0x0278, 0x3FB4, 0x3FF4,
+ 0x3FA0, 0x0154, 0x05F4, 0x06E4, 0x0288, 0x3FB8, 0x3FF4,
+ 0x3FA0, 0x0148, 0x05E4, 0x06EC, 0x029C, 0x3FBC, 0x3FF0,
+ 0x3FA0, 0x0138, 0x05D4, 0x06F4, 0x02B0, 0x3FC0, 0x3FF0,
+ 0x3FA0, 0x0128, 0x05C4, 0x0704, 0x02C4, 0x3FC0, 0x3FEC,
+ 0x3FA0, 0x011C, 0x05B4, 0x0708, 0x02D8, 0x3FC4, 0x3FEC,
+ 0x3FA4, 0x010C, 0x05A4, 0x0714, 0x02E8, 0x3FC8, 0x3FE8,
+ 0x3FA4, 0x0100, 0x0590, 0x0718, 0x02FC, 0x3FD0, 0x3FE8,
+ 0x3FA4, 0x00F0, 0x0580, 0x0724, 0x0310, 0x3FD4, 0x3FE4,
+ 0x3FA4, 0x00E4, 0x056C, 0x072C, 0x0324, 0x3FD8, 0x3FE4,
+ 0x3FA8, 0x00D8, 0x055C, 0x0730, 0x0338, 0x3FDC, 0x3FE0,
+ 0x3FA8, 0x00CC, 0x0548, 0x0738, 0x034C, 0x3FE4, 0x3FDC,
+ 0x3FA8, 0x00BC, 0x0538, 0x0740, 0x0360, 0x3FE8, 0x3FDC,
+ 0x3FAC, 0x00B0, 0x0528, 0x0744, 0x0374, 0x3FEC, 0x3FD8,
+ 0x3FAC, 0x00A4, 0x0514, 0x0748, 0x0388, 0x3FF4, 0x3FD8,
+ 0x3FB0, 0x0098, 0x0500, 0x074C, 0x039C, 0x3FFC, 0x3FD4,
+ 0x3FB0, 0x0090, 0x04EC, 0x0750, 0x03B0, 0x0000, 0x3FD4,
+ 0x3FB0, 0x0084, 0x04DC, 0x0758, 0x03C4, 0x0004, 0x3FD0,
+ 0x3FB4, 0x0078, 0x04CC, 0x0758, 0x03D8, 0x000C, 0x3FCC,
+ 0x3FB4, 0x006C, 0x04B8, 0x075C, 0x03EC, 0x0014, 0x3FCC,
+ 0x3FB8, 0x0064, 0x04A0, 0x0760, 0x0400, 0x001C, 0x3FC8,
+ 0x3FB8, 0x0058, 0x0490, 0x0760, 0x0414, 0x0024, 0x3FC8,
+ 0x3FBC, 0x0050, 0x047C, 0x0760, 0x0428, 0x002C, 0x3FC4,
+ 0x3FBC, 0x0048, 0x0464, 0x0764, 0x043C, 0x0034, 0x3FC4,
+ 0x3FC0, 0x003C, 0x0454, 0x0764, 0x0450, 0x003C, 0x3FC0
+};
+
+//=========================================
+// <num_taps> = 8
+// <num_phases> = 64
+// <scale_ratio> = 0.83333 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_8tap_64p_upscale[264] = {
+ 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x3FFC, 0x0014, 0x3FC8, 0x1000, 0x0038, 0x3FEC, 0x0004, 0x0000,
+ 0x3FF4, 0x0024, 0x3F94, 0x0FFC, 0x0074, 0x3FD8, 0x000C, 0x0000,
+ 0x3FF0, 0x0038, 0x3F60, 0x0FEC, 0x00B4, 0x3FC4, 0x0014, 0x0000,
+ 0x3FEC, 0x004C, 0x3F2C, 0x0FE4, 0x00F4, 0x3FAC, 0x0018, 0x0000,
+ 0x3FE4, 0x005C, 0x3F00, 0x0FD4, 0x0138, 0x3F94, 0x0020, 0x0000,
+ 0x3FE0, 0x006C, 0x3ED0, 0x0FC4, 0x017C, 0x3F7C, 0x0028, 0x0000,
+ 0x3FDC, 0x007C, 0x3EA8, 0x0FA4, 0x01C4, 0x3F68, 0x0030, 0x0000,
+ 0x3FD8, 0x0088, 0x3E80, 0x0F90, 0x020C, 0x3F50, 0x0038, 0x3FFC,
+ 0x3FD4, 0x0098, 0x3E58, 0x0F70, 0x0258, 0x3F38, 0x0040, 0x3FFC,
+ 0x3FD0, 0x00A4, 0x3E34, 0x0F54, 0x02A0, 0x3F1C, 0x004C, 0x3FFC,
+ 0x3FD0, 0x00B0, 0x3E14, 0x0F28, 0x02F0, 0x3F04, 0x0054, 0x3FFC,
+ 0x3FCC, 0x00BC, 0x3DF4, 0x0F08, 0x033C, 0x3EEC, 0x005C, 0x3FF8,
+ 0x3FC8, 0x00C8, 0x3DD8, 0x0EDC, 0x038C, 0x3ED4, 0x0064, 0x3FF8,
+ 0x3FC8, 0x00D0, 0x3DC0, 0x0EAC, 0x03E0, 0x3EBC, 0x006C, 0x3FF4,
+ 0x3FC4, 0x00D8, 0x3DA8, 0x0E7C, 0x0430, 0x3EA4, 0x0078, 0x3FF4,
+ 0x3FC4, 0x00E0, 0x3D94, 0x0E48, 0x0484, 0x3E8C, 0x0080, 0x3FF0,
+ 0x3FC4, 0x00E8, 0x3D80, 0x0E10, 0x04D8, 0x3E74, 0x0088, 0x3FF0,
+ 0x3FC4, 0x00F0, 0x3D70, 0x0DD8, 0x052C, 0x3E5C, 0x0090, 0x3FEC,
+ 0x3FC0, 0x00F4, 0x3D60, 0x0DA0, 0x0584, 0x3E44, 0x0098, 0x3FEC,
+ 0x3FC0, 0x00F8, 0x3D54, 0x0D68, 0x05D8, 0x3E2C, 0x00A0, 0x3FE8,
+ 0x3FC0, 0x00FC, 0x3D48, 0x0D20, 0x0630, 0x3E18, 0x00AC, 0x3FE8,
+ 0x3FC0, 0x0100, 0x3D40, 0x0CE0, 0x0688, 0x3E00, 0x00B4, 0x3FE4,
+ 0x3FC4, 0x0100, 0x3D3C, 0x0C98, 0x06DC, 0x3DEC, 0x00BC, 0x3FE4,
+ 0x3FC4, 0x0100, 0x3D38, 0x0C58, 0x0734, 0x3DD8, 0x00C0, 0x3FE0,
+ 0x3FC4, 0x0104, 0x3D38, 0x0C0C, 0x078C, 0x3DC4, 0x00C8, 0x3FDC,
+ 0x3FC4, 0x0100, 0x3D38, 0x0BC4, 0x07E4, 0x3DB0, 0x00D0, 0x3FDC,
+ 0x3FC4, 0x0100, 0x3D38, 0x0B78, 0x083C, 0x3DA0, 0x00D8, 0x3FD8,
+ 0x3FC8, 0x0100, 0x3D3C, 0x0B28, 0x0890, 0x3D90, 0x00DC, 0x3FD8,
+ 0x3FC8, 0x00FC, 0x3D40, 0x0ADC, 0x08E8, 0x3D80, 0x00E4, 0x3FD4,
+ 0x3FCC, 0x00FC, 0x3D48, 0x0A84, 0x093C, 0x3D74, 0x00E8, 0x3FD4,
+ 0x3FCC, 0x00F8, 0x3D50, 0x0A38, 0x0990, 0x3D64, 0x00F0, 0x3FD0,
+ 0x3FD0, 0x00F4, 0x3D58, 0x09E0, 0x09E4, 0x3D5C, 0x00F4, 0x3FD0
+};
+
+//=========================================
+// <num_taps> = 8
+// <num_phases> = 64
+// <scale_ratio> = 1.16666 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_8tap_64p_116[264] = {
+ 0x0080, 0x3E90, 0x0268, 0x0D14, 0x0264, 0x3E90, 0x0080, 0x0000,
+ 0x007C, 0x3E9C, 0x0238, 0x0D14, 0x0298, 0x3E84, 0x0080, 0x0000,
+ 0x0078, 0x3EAC, 0x0200, 0x0D10, 0x02D0, 0x3E78, 0x0084, 0x0000,
+ 0x0078, 0x3EB8, 0x01D0, 0x0D0C, 0x0304, 0x3E6C, 0x0084, 0x0000,
+ 0x0074, 0x3EC8, 0x01A0, 0x0D00, 0x033C, 0x3E60, 0x0088, 0x0000,
+ 0x0070, 0x3ED4, 0x0170, 0x0D00, 0x0374, 0x3E54, 0x0088, 0x3FFC,
+ 0x006C, 0x3EE4, 0x0140, 0x0CF8, 0x03AC, 0x3E48, 0x0088, 0x3FFC,
+ 0x006C, 0x3EF0, 0x0114, 0x0CE8, 0x03E4, 0x3E3C, 0x008C, 0x3FFC,
+ 0x0068, 0x3F00, 0x00E8, 0x0CD8, 0x041C, 0x3E34, 0x008C, 0x3FFC,
+ 0x0064, 0x3F10, 0x00BC, 0x0CCC, 0x0454, 0x3E28, 0x008C, 0x3FFC,
+ 0x0060, 0x3F1C, 0x0090, 0x0CBC, 0x0490, 0x3E20, 0x008C, 0x3FFC,
+ 0x005C, 0x3F2C, 0x0068, 0x0CA4, 0x04CC, 0x3E18, 0x008C, 0x3FFC,
+ 0x0058, 0x3F38, 0x0040, 0x0C94, 0x0504, 0x3E10, 0x008C, 0x3FFC,
+ 0x0054, 0x3F48, 0x001C, 0x0C7C, 0x0540, 0x3E08, 0x0088, 0x3FFC,
+ 0x0050, 0x3F54, 0x3FF8, 0x0C60, 0x057C, 0x3E04, 0x0088, 0x3FFC,
+ 0x004C, 0x3F64, 0x3FD4, 0x0C44, 0x05B8, 0x3DFC, 0x0088, 0x3FFC,
+ 0x0048, 0x3F70, 0x3FB4, 0x0C28, 0x05F4, 0x3DF8, 0x0084, 0x3FFC,
+ 0x0044, 0x3F80, 0x3F90, 0x0C0C, 0x0630, 0x3DF4, 0x0080, 0x3FFC,
+ 0x0040, 0x3F8C, 0x3F70, 0x0BE8, 0x066C, 0x3DF4, 0x0080, 0x3FFC,
+ 0x003C, 0x3F9C, 0x3F50, 0x0BC8, 0x06A8, 0x3DF0, 0x007C, 0x3FFC,
+ 0x0038, 0x3FA8, 0x3F34, 0x0BA0, 0x06E4, 0x3DF0, 0x0078, 0x0000,
+ 0x0034, 0x3FB4, 0x3F18, 0x0B80, 0x071C, 0x3DF0, 0x0074, 0x0000,
+ 0x0030, 0x3FC0, 0x3EFC, 0x0B5C, 0x0758, 0x3DF0, 0x0070, 0x0000,
+ 0x002C, 0x3FCC, 0x3EE4, 0x0B34, 0x0794, 0x3DF4, 0x0068, 0x0000,
+ 0x002C, 0x3FDC, 0x3ECC, 0x0B08, 0x07CC, 0x3DF4, 0x0064, 0x0000,
+ 0x0028, 0x3FE4, 0x3EB4, 0x0AE0, 0x0808, 0x3DF8, 0x0060, 0x0000,
+ 0x0024, 0x3FF0, 0x3EA0, 0x0AB0, 0x0840, 0x3E00, 0x0058, 0x0004,
+ 0x0020, 0x3FFC, 0x3E90, 0x0A84, 0x0878, 0x3E04, 0x0050, 0x0004,
+ 0x001C, 0x0004, 0x3E7C, 0x0A54, 0x08B0, 0x3E0C, 0x004C, 0x0008,
+ 0x0018, 0x000C, 0x3E68, 0x0A28, 0x08E8, 0x3E18, 0x0044, 0x0008,
+ 0x0018, 0x0018, 0x3E54, 0x09F4, 0x0920, 0x3E20, 0x003C, 0x000C,
+ 0x0014, 0x0020, 0x3E48, 0x09C0, 0x0954, 0x3E2C, 0x0034, 0x0010,
+ 0x0010, 0x002C, 0x3E3C, 0x098C, 0x0988, 0x3E38, 0x002C, 0x0010
+};
+
+//=========================================
+// <num_taps> = 8
+// <num_phases> = 64
+// <scale_ratio> = 1.49999 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_8tap_64p_149[264] = {
+ 0x0008, 0x3E8C, 0x03F8, 0x0AE8, 0x03F8, 0x3E8C, 0x0008, 0x0000,
+ 0x000C, 0x3E8C, 0x03D0, 0x0AE8, 0x0420, 0x3E90, 0x0000, 0x0000,
+ 0x000C, 0x3E8C, 0x03AC, 0x0AE8, 0x0444, 0x3E90, 0x0000, 0x0000,
+ 0x0010, 0x3E90, 0x0384, 0x0AE0, 0x046C, 0x3E94, 0x3FFC, 0x0000,
+ 0x0014, 0x3E90, 0x035C, 0x0ADC, 0x0494, 0x3E94, 0x3FF8, 0x0004,
+ 0x0018, 0x3E90, 0x0334, 0x0AD8, 0x04BC, 0x3E98, 0x3FF4, 0x0004,
+ 0x001C, 0x3E94, 0x0310, 0x0AD0, 0x04E4, 0x3E9C, 0x3FEC, 0x0004,
+ 0x0020, 0x3E98, 0x02E8, 0x0AC4, 0x050C, 0x3EA0, 0x3FE8, 0x0008,
+ 0x0020, 0x3E98, 0x02C4, 0x0AC0, 0x0534, 0x3EA4, 0x3FE4, 0x0008,
+ 0x0024, 0x3E9C, 0x02A0, 0x0AB4, 0x055C, 0x3EAC, 0x3FDC, 0x0008,
+ 0x0024, 0x3EA0, 0x027C, 0x0AA8, 0x0584, 0x3EB0, 0x3FD8, 0x000C,
+ 0x0028, 0x3EA4, 0x0258, 0x0A9C, 0x05AC, 0x3EB8, 0x3FD0, 0x000C,
+ 0x0028, 0x3EA8, 0x0234, 0x0A90, 0x05D4, 0x3EC0, 0x3FC8, 0x0010,
+ 0x002C, 0x3EAC, 0x0210, 0x0A80, 0x05FC, 0x3EC8, 0x3FC4, 0x0010,
+ 0x002C, 0x3EB4, 0x01F0, 0x0A70, 0x0624, 0x3ED0, 0x3FBC, 0x0010,
+ 0x002C, 0x3EB8, 0x01CC, 0x0A60, 0x064C, 0x3EDC, 0x3FB4, 0x0014,
+ 0x0030, 0x3EBC, 0x01A8, 0x0A50, 0x0674, 0x3EE4, 0x3FB0, 0x0014,
+ 0x0030, 0x3EC4, 0x0188, 0x0A38, 0x069C, 0x3EF0, 0x3FA8, 0x0018,
+ 0x0030, 0x3ECC, 0x0168, 0x0A28, 0x06C0, 0x3EFC, 0x3FA0, 0x0018,
+ 0x0030, 0x3ED0, 0x0148, 0x0A14, 0x06E8, 0x3F08, 0x3F98, 0x001C,
+ 0x0030, 0x3ED8, 0x012C, 0x0A00, 0x070C, 0x3F14, 0x3F90, 0x001C,
+ 0x0034, 0x3EE0, 0x0108, 0x09E4, 0x0734, 0x3F24, 0x3F8C, 0x001C,
+ 0x0034, 0x3EE4, 0x00EC, 0x09CC, 0x0758, 0x3F34, 0x3F84, 0x0020,
+ 0x0034, 0x3EEC, 0x00D0, 0x09B8, 0x077C, 0x3F40, 0x3F7C, 0x0020,
+ 0x0034, 0x3EF4, 0x00B4, 0x0998, 0x07A4, 0x3F50, 0x3F74, 0x0024,
+ 0x0030, 0x3EFC, 0x0098, 0x0980, 0x07C8, 0x3F64, 0x3F6C, 0x0024,
+ 0x0030, 0x3F04, 0x0080, 0x0968, 0x07E8, 0x3F74, 0x3F64, 0x0024,
+ 0x0030, 0x3F0C, 0x0060, 0x094C, 0x080C, 0x3F88, 0x3F5C, 0x0028,
+ 0x0030, 0x3F14, 0x0048, 0x0930, 0x0830, 0x3F98, 0x3F54, 0x0028,
+ 0x0030, 0x3F1C, 0x0030, 0x0914, 0x0850, 0x3FAC, 0x3F4C, 0x0028,
+ 0x0030, 0x3F24, 0x0018, 0x08F0, 0x0874, 0x3FC0, 0x3F44, 0x002C,
+ 0x002C, 0x3F2C, 0x0000, 0x08D4, 0x0894, 0x3FD8, 0x3F3C, 0x002C,
+ 0x002C, 0x3F34, 0x3FEC, 0x08B4, 0x08B4, 0x3FEC, 0x3F34, 0x002C
+};
+
+//=========================================
+// <num_taps> = 8
+// <num_phases> = 64
+// <scale_ratio> = 1.83332 (input/output)
+// <sharpness> = 0
+// <CoefType> = ModifiedLanczos
+// <CoefQuant> = 1.10
+// <CoefOut> = 1.12
+//=========================================
+static const uint16_t filter_8tap_64p_183[264] = {
+ 0x3F88, 0x0048, 0x047C, 0x0768, 0x047C, 0x0048, 0x3F88, 0x0000,
+ 0x3F88, 0x003C, 0x0468, 0x076C, 0x0490, 0x0054, 0x3F84, 0x0000,
+ 0x3F8C, 0x0034, 0x0454, 0x0768, 0x04A4, 0x005C, 0x3F84, 0x0000,
+ 0x3F8C, 0x0028, 0x0444, 0x076C, 0x04B4, 0x0068, 0x3F80, 0x0000,
+ 0x3F90, 0x0020, 0x042C, 0x0768, 0x04C8, 0x0074, 0x3F80, 0x0000,
+ 0x3F90, 0x0018, 0x041C, 0x0764, 0x04DC, 0x0080, 0x3F7C, 0x0000,
+ 0x3F94, 0x0010, 0x0408, 0x075C, 0x04F0, 0x008C, 0x3F7C, 0x0000,
+ 0x3F94, 0x0004, 0x03F8, 0x0760, 0x0500, 0x0098, 0x3F7C, 0x3FFC,
+ 0x3F98, 0x0000, 0x03E0, 0x075C, 0x0514, 0x00A4, 0x3F78, 0x3FFC,
+ 0x3F9C, 0x3FF8, 0x03CC, 0x0754, 0x0528, 0x00B0, 0x3F78, 0x3FFC,
+ 0x3F9C, 0x3FF0, 0x03B8, 0x0754, 0x0538, 0x00BC, 0x3F78, 0x3FFC,
+ 0x3FA0, 0x3FE8, 0x03A4, 0x0750, 0x054C, 0x00CC, 0x3F74, 0x3FF8,
+ 0x3FA4, 0x3FE0, 0x0390, 0x074C, 0x055C, 0x00D8, 0x3F74, 0x3FF8,
+ 0x3FA4, 0x3FDC, 0x037C, 0x0744, 0x0570, 0x00E4, 0x3F74, 0x3FF8,
+ 0x3FA8, 0x3FD4, 0x0368, 0x0740, 0x0580, 0x00F4, 0x3F74, 0x3FF4,
+ 0x3FA8, 0x3FCC, 0x0354, 0x073C, 0x0590, 0x0104, 0x3F74, 0x3FF4,
+ 0x3FAC, 0x3FC8, 0x0340, 0x0730, 0x05A4, 0x0110, 0x3F74, 0x3FF4,
+ 0x3FB0, 0x3FC0, 0x0330, 0x0728, 0x05B4, 0x0120, 0x3F74, 0x3FF0,
+ 0x3FB0, 0x3FBC, 0x031C, 0x0724, 0x05C4, 0x0130, 0x3F70, 0x3FF0,
+ 0x3FB4, 0x3FB4, 0x0308, 0x0720, 0x05D4, 0x013C, 0x3F70, 0x3FF0,
+ 0x3FB8, 0x3FB0, 0x02F4, 0x0714, 0x05E4, 0x014C, 0x3F74, 0x3FEC,
+ 0x3FB8, 0x3FAC, 0x02E0, 0x0708, 0x05F8, 0x015C, 0x3F74, 0x3FEC,
+ 0x3FBC, 0x3FA8, 0x02CC, 0x0704, 0x0604, 0x016C, 0x3F74, 0x3FE8,
+ 0x3FC0, 0x3FA0, 0x02BC, 0x06F8, 0x0614, 0x017C, 0x3F74, 0x3FE8,
+ 0x3FC0, 0x3F9C, 0x02A8, 0x06F4, 0x0624, 0x018C, 0x3F74, 0x3FE4,
+ 0x3FC4, 0x3F98, 0x0294, 0x06E8, 0x0634, 0x019C, 0x3F74, 0x3FE4,
+ 0x3FC8, 0x3F94, 0x0284, 0x06D8, 0x0644, 0x01AC, 0x3F78, 0x3FE0,
+ 0x3FC8, 0x3F90, 0x0270, 0x06D4, 0x0650, 0x01BC, 0x3F78, 0x3FE0,
+ 0x3FCC, 0x3F8C, 0x025C, 0x06C8, 0x0660, 0x01D0, 0x3F78, 0x3FDC,
+ 0x3FCC, 0x3F8C, 0x024C, 0x06B8, 0x066C, 0x01E0, 0x3F7C, 0x3FDC,
+ 0x3FD0, 0x3F88, 0x0238, 0x06B0, 0x067C, 0x01F0, 0x3F7C, 0x3FD8,
+ 0x3FD4, 0x3F84, 0x0228, 0x069C, 0x0688, 0x0204, 0x3F80, 0x3FD8,
+ 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4
+};
+
+static const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_one.value)
+ return filter_3tap_64p_upscale;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 3).value)
+ return filter_3tap_64p_116;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 3).value)
+ return filter_3tap_64p_149;
+ else
+ return filter_3tap_64p_183;
+}
+
+static const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_one.value)
+ return filter_4tap_64p_upscale;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 3).value)
+ return filter_4tap_64p_116;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 3).value)
+ return filter_4tap_64p_149;
+ else
+ return filter_4tap_64p_183;
+}
+
+static const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_one.value)
+ return filter_5tap_64p_upscale;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 3).value)
+ return filter_5tap_64p_116;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 3).value)
+ return filter_5tap_64p_149;
+ else
+ return filter_5tap_64p_183;
+}
+
+static const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_one.value)
+ return filter_6tap_64p_upscale;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 3).value)
+ return filter_6tap_64p_116;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 3).value)
+ return filter_6tap_64p_149;
+ else
+ return filter_6tap_64p_183;
+}
+
+static const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_one.value)
+ return filter_7tap_64p_upscale;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 3).value)
+ return filter_7tap_64p_116;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 3).value)
+ return filter_7tap_64p_149;
+ else
+ return filter_7tap_64p_183;
+}
+
+static const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio)
+{
+ if (ratio.value < spl_fixpt_one.value)
+ return filter_8tap_64p_upscale;
+ else if (ratio.value < spl_fixpt_from_fraction(4, 3).value)
+ return filter_8tap_64p_116;
+ else if (ratio.value < spl_fixpt_from_fraction(5, 3).value)
+ return filter_8tap_64p_149;
+ else
+ return filter_8tap_64p_183;
+}
+
+static const uint16_t *spl_get_filter_2tap_64p(void)
+{
+ return filter_2tap_64p;
+}
+
+const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio)
+{
+ if (taps == 8)
+ return spl_get_filter_8tap_64p(ratio);
+ else if (taps == 7)
+ return spl_get_filter_7tap_64p(ratio);
+ else if (taps == 6)
+ return spl_get_filter_6tap_64p(ratio);
+ else if (taps == 5)
+ return spl_get_filter_5tap_64p(ratio);
+ else if (taps == 4)
+ return spl_get_filter_4tap_64p(ratio);
+ else if (taps == 3)
+ return spl_get_filter_3tap_64p(ratio);
+ else if (taps == 2)
+ return spl_get_filter_2tap_64p();
+ else if (taps == 1)
+ return NULL;
+ else {
+ /* should never happen, bug */
+ SPL_BREAK_TO_DEBUGGER();
+ return NULL;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.h
new file mode 100644
index 000000000000..c315a438d064
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_scl_filters.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_SPL_SCL_FILTERS_H__
+#define __DC_SPL_SCL_FILTERS_H__
+
+#include "dc_spl_types.h"
+
+/* public API */
+const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio);
+
+#endif /* __DC_SPL_SCL_FILTERS_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h
new file mode 100644
index 000000000000..23d254dea18f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DC_SPL_TYPES_H__
+#define __DC_SPL_TYPES_H__
+
+#include "spl_debug.h"
+#include "spl_os_types.h" // swap
+#include "spl_fixpt31_32.h" // fixed31_32 and related functions
+#include "spl_custom_float.h" // custom float and related functions
+
+struct spl_size {
+ uint32_t width;
+ uint32_t height;
+};
+struct spl_rect {
+ int x;
+ int y;
+ int width;
+ int height;
+};
+
+struct spl_ratios {
+ struct spl_fixed31_32 horz;
+ struct spl_fixed31_32 vert;
+ struct spl_fixed31_32 horz_c;
+ struct spl_fixed31_32 vert_c;
+};
+struct spl_inits {
+ struct spl_fixed31_32 h;
+ struct spl_fixed31_32 h_c;
+ struct spl_fixed31_32 v;
+ struct spl_fixed31_32 v_c;
+};
+
+struct spl_taps {
+ uint32_t v_taps;
+ uint32_t h_taps;
+ uint32_t v_taps_c;
+ uint32_t h_taps_c;
+ bool integer_scaling;
+};
+enum spl_view_3d {
+ SPL_VIEW_3D_NONE = 0,
+ SPL_VIEW_3D_FRAME_SEQUENTIAL,
+ SPL_VIEW_3D_SIDE_BY_SIDE,
+ SPL_VIEW_3D_TOP_AND_BOTTOM,
+ SPL_VIEW_3D_COUNT,
+ SPL_VIEW_3D_FIRST = SPL_VIEW_3D_FRAME_SEQUENTIAL
+};
+/* Pixel format */
+enum spl_pixel_format {
+ /*graph*/
+ SPL_PIXEL_FORMAT_UNINITIALIZED,
+ SPL_PIXEL_FORMAT_INDEX8,
+ SPL_PIXEL_FORMAT_RGB565,
+ SPL_PIXEL_FORMAT_ARGB8888,
+ SPL_PIXEL_FORMAT_ARGB2101010,
+ SPL_PIXEL_FORMAT_ARGB2101010_XRBIAS,
+ SPL_PIXEL_FORMAT_FP16,
+ /*video*/
+ SPL_PIXEL_FORMAT_420BPP8,
+ SPL_PIXEL_FORMAT_420BPP10,
+ /*end of pixel format definition*/
+ SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8,
+ SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16,
+ SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN = SPL_PIXEL_FORMAT_420BPP8,
+ SPL_PIXEL_FORMAT_SUBSAMPLED_END = SPL_PIXEL_FORMAT_420BPP10,
+ SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8,
+ SPL_PIXEL_FORMAT_VIDEO_END = SPL_PIXEL_FORMAT_420BPP10,
+ SPL_PIXEL_FORMAT_INVALID,
+ SPL_PIXEL_FORMAT_UNKNOWN
+};
+
+enum lb_memory_config {
+ /* Enable all 3 pieces of memory */
+ LB_MEMORY_CONFIG_0 = 0,
+
+ /* Enable only the first piece of memory */
+ LB_MEMORY_CONFIG_1 = 1,
+
+ /* Enable only the second piece of memory */
+ LB_MEMORY_CONFIG_2 = 2,
+
+ /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the
+ * last piece of chroma memory used for the luma storage
+ */
+ LB_MEMORY_CONFIG_3 = 3
+};
+
+/* Rotation angle */
+enum spl_rotation_angle {
+ SPL_ROTATION_ANGLE_0 = 0,
+ SPL_ROTATION_ANGLE_90,
+ SPL_ROTATION_ANGLE_180,
+ SPL_ROTATION_ANGLE_270,
+ SPL_ROTATION_ANGLE_COUNT
+};
+enum spl_color_space {
+ SPL_COLOR_SPACE_UNKNOWN,
+ SPL_COLOR_SPACE_SRGB,
+ SPL_COLOR_SPACE_XR_RGB,
+ SPL_COLOR_SPACE_SRGB_LIMITED,
+ SPL_COLOR_SPACE_MSREF_SCRGB,
+ SPL_COLOR_SPACE_YCBCR601,
+ SPL_COLOR_SPACE_YCBCR709,
+ SPL_COLOR_SPACE_XV_YCC_709,
+ SPL_COLOR_SPACE_XV_YCC_601,
+ SPL_COLOR_SPACE_YCBCR601_LIMITED,
+ SPL_COLOR_SPACE_YCBCR709_LIMITED,
+ SPL_COLOR_SPACE_2020_RGB_FULLRANGE,
+ SPL_COLOR_SPACE_2020_RGB_LIMITEDRANGE,
+ SPL_COLOR_SPACE_2020_YCBCR,
+ SPL_COLOR_SPACE_ADOBERGB,
+ SPL_COLOR_SPACE_DCIP3,
+ SPL_COLOR_SPACE_DISPLAYNATIVE,
+ SPL_COLOR_SPACE_DOLBYVISION,
+ SPL_COLOR_SPACE_APPCTRL,
+ SPL_COLOR_SPACE_CUSTOMPOINTS,
+ SPL_COLOR_SPACE_YCBCR709_BLACK,
+};
+
+enum chroma_cositing {
+ CHROMA_COSITING_NONE,
+ CHROMA_COSITING_LEFT,
+ CHROMA_COSITING_TOPLEFT,
+ CHROMA_COSITING_COUNT
+};
+
+// Scratch space for calculating scaler params
+struct spl_scaler_data {
+ int h_active;
+ int v_active;
+ struct spl_taps taps;
+ struct spl_rect viewport;
+ struct spl_rect viewport_c;
+ struct spl_rect recout;
+ struct spl_ratios ratios;
+ struct spl_ratios recip_ratios;
+ struct spl_inits inits;
+};
+
+enum spl_transfer_func_type {
+ SPL_TF_TYPE_PREDEFINED,
+ SPL_TF_TYPE_DISTRIBUTED_POINTS,
+ SPL_TF_TYPE_BYPASS,
+ SPL_TF_TYPE_HWPWL
+};
+
+enum spl_transfer_func_predefined {
+ SPL_TRANSFER_FUNCTION_SRGB,
+ SPL_TRANSFER_FUNCTION_BT709,
+ SPL_TRANSFER_FUNCTION_PQ,
+ SPL_TRANSFER_FUNCTION_LINEAR,
+ SPL_TRANSFER_FUNCTION_UNITY,
+ SPL_TRANSFER_FUNCTION_HLG,
+ SPL_TRANSFER_FUNCTION_HLG12,
+ SPL_TRANSFER_FUNCTION_GAMMA22,
+ SPL_TRANSFER_FUNCTION_GAMMA24,
+ SPL_TRANSFER_FUNCTION_GAMMA26
+};
+
+/*==============================================================*/
+/* Below structs are defined to hold hw register data */
+
+// SPL output is used to set below registers
+
+// MPC_SIZE - set based on scl_data h_active and v_active
+struct mpc_size {
+ uint32_t width;
+ uint32_t height;
+};
+// SCL_MODE - set based on scl_data.ratios and always_scale
+enum scl_mode {
+ SCL_MODE_SCALING_444_BYPASS = 0,
+ SCL_MODE_SCALING_444_RGB_ENABLE = 1,
+ SCL_MODE_SCALING_444_YCBCR_ENABLE = 2,
+ SCL_MODE_SCALING_420_YCBCR_ENABLE = 3,
+ SCL_MODE_SCALING_420_LUMA_BYPASS = 4,
+ SCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
+ SCL_MODE_DSCL_BYPASS = 6
+};
+// SCL_BLACK_COLOR - set based on scl_data.format
+struct scl_black_color {
+ uint32_t offset_rgb_y;
+ uint32_t offset_rgb_cbcr;
+};
+// RATIO - set based on scl_data.ratios
+struct ratio {
+ uint32_t h_scale_ratio;
+ uint32_t v_scale_ratio;
+ uint32_t h_scale_ratio_c;
+ uint32_t v_scale_ratio_c;
+};
+
+// INIT - set based on scl_data.init
+struct init {
+ // SCL_HORZ_FILTER_INIT
+ uint32_t h_filter_init_frac; // SCL_H_INIT_FRAC
+ uint32_t h_filter_init_int; // SCL_H_INIT_INT
+ // SCL_HORZ_FILTER_INIT_C
+ uint32_t h_filter_init_frac_c; // SCL_H_INIT_FRAC_C
+ uint32_t h_filter_init_int_c; // SCL_H_INIT_INT_C
+ // SCL_VERT_FILTER_INIT
+ uint32_t v_filter_init_frac; // SCL_V_INIT_FRAC
+ uint32_t v_filter_init_int; // SCL_V_INIT_INT
+ // SCL_VERT_FILTER_INIT_C
+ uint32_t v_filter_init_frac_c; // SCL_V_INIT_FRAC_C
+ uint32_t v_filter_init_int_c; // SCL_V_INIT_INT_C
+ // SCL_VERT_FILTER_INIT_BOT
+ uint32_t v_filter_init_bot_frac; // SCL_V_INIT_FRAC_BOT
+ uint32_t v_filter_init_bot_int; // SCL_V_INIT_INT_BOT
+ // SCL_VERT_FILTER_INIT_BOT_C
+ uint32_t v_filter_init_bot_frac_c; // SCL_V_INIT_FRAC_BOT_C
+ uint32_t v_filter_init_bot_int_c; // SCL_V_INIT_INT_BOT_C
+};
+
+// FILTER - calculated based on scl_data ratios and taps
+
+// iSHARP
+struct isharp_noise_det {
+ uint32_t enable; // ISHARP_NOISEDET_EN
+ uint32_t mode; // ISHARP_NOISEDET_MODE
+ uint32_t uthreshold; // ISHARP_NOISEDET_UTHRE
+ uint32_t dthreshold; // ISHARP_NOISEDET_DTHRE
+ uint32_t pwl_start_in; // ISHARP_NOISEDET_PWL_START_IN
+ uint32_t pwl_end_in; // ISHARP_NOISEDET_PWL_END_IN
+ uint32_t pwl_slope; // ISHARP_NOISEDET_PWL_SLOPE
+};
+struct isharp_lba {
+ uint32_t mode; // ISHARP_LBA_MODE
+ uint32_t in_seg[6];
+ uint32_t base_seg[6];
+ uint32_t slope_seg[6];
+};
+struct isharp_fmt {
+ uint32_t mode; // ISHARP_FMT_MODE
+ uint32_t norm; // ISHARP_FMT_NORM
+};
+struct isharp_nldelta_sclip {
+ uint32_t enable_p; // ISHARP_NLDELTA_SCLIP_EN_P
+ uint32_t pivot_p; // ISHARP_NLDELTA_SCLIP_PIVOT_P
+ uint32_t slope_p; // ISHARP_NLDELTA_SCLIP_SLOPE_P
+ uint32_t enable_n; // ISHARP_NLDELTA_SCLIP_EN_N
+ uint32_t pivot_n; // ISHARP_NLDELTA_SCLIP_PIVOT_N
+ uint32_t slope_n; // ISHARP_NLDELTA_SCLIP_SLOPE_N
+};
+enum isharp_en {
+ ISHARP_DISABLE,
+ ISHARP_ENABLE
+};
+#define ISHARP_LUT_TABLE_SIZE 32
+// Below struct holds values that can be directly used to program
+// hardware registers. No conversion/clamping is required
+struct dscl_prog_data {
+ struct spl_rect recout; // RECOUT - set based on scl_data.recout
+ struct mpc_size mpc_size;
+ uint32_t dscl_mode;
+ struct scl_black_color scl_black_color;
+ struct ratio ratios;
+ struct init init;
+ struct spl_taps taps; // TAPS - set based on scl_data.taps
+ struct spl_rect viewport;
+ struct spl_rect viewport_c;
+ // raw filter
+ const uint16_t *filter_h;
+ const uint16_t *filter_v;
+ const uint16_t *filter_h_c;
+ const uint16_t *filter_v_c;
+ // EASF registers
+ uint32_t easf_matrix_mode;
+ uint32_t easf_ltonl_en;
+ uint32_t easf_v_en;
+ uint32_t easf_v_sharp_factor;
+ uint32_t easf_v_ring;
+ uint32_t easf_v_bf1_en;
+ uint32_t easf_v_bf2_mode;
+ uint32_t easf_v_bf3_mode;
+ uint32_t easf_v_bf2_flat1_gain;
+ uint32_t easf_v_bf2_flat2_gain;
+ uint32_t easf_v_bf2_roc_gain;
+ uint32_t easf_v_ringest_3tap_dntilt_uptilt;
+ uint32_t easf_v_ringest_3tap_uptilt_max;
+ uint32_t easf_v_ringest_3tap_dntilt_slope;
+ uint32_t easf_v_ringest_3tap_uptilt1_slope;
+ uint32_t easf_v_ringest_3tap_uptilt2_slope;
+ uint32_t easf_v_ringest_3tap_uptilt2_offset;
+ uint32_t easf_v_ringest_eventap_reduceg1;
+ uint32_t easf_v_ringest_eventap_reduceg2;
+ uint32_t easf_v_ringest_eventap_gain1;
+ uint32_t easf_v_ringest_eventap_gain2;
+ uint32_t easf_v_bf_maxa;
+ uint32_t easf_v_bf_maxb;
+ uint32_t easf_v_bf_mina;
+ uint32_t easf_v_bf_minb;
+ uint32_t easf_v_bf1_pwl_in_seg0;
+ uint32_t easf_v_bf1_pwl_base_seg0;
+ uint32_t easf_v_bf1_pwl_slope_seg0;
+ uint32_t easf_v_bf1_pwl_in_seg1;
+ uint32_t easf_v_bf1_pwl_base_seg1;
+ uint32_t easf_v_bf1_pwl_slope_seg1;
+ uint32_t easf_v_bf1_pwl_in_seg2;
+ uint32_t easf_v_bf1_pwl_base_seg2;
+ uint32_t easf_v_bf1_pwl_slope_seg2;
+ uint32_t easf_v_bf1_pwl_in_seg3;
+ uint32_t easf_v_bf1_pwl_base_seg3;
+ uint32_t easf_v_bf1_pwl_slope_seg3;
+ uint32_t easf_v_bf1_pwl_in_seg4;
+ uint32_t easf_v_bf1_pwl_base_seg4;
+ uint32_t easf_v_bf1_pwl_slope_seg4;
+ uint32_t easf_v_bf1_pwl_in_seg5;
+ uint32_t easf_v_bf1_pwl_base_seg5;
+ uint32_t easf_v_bf1_pwl_slope_seg5;
+ uint32_t easf_v_bf1_pwl_in_seg6;
+ uint32_t easf_v_bf1_pwl_base_seg6;
+ uint32_t easf_v_bf1_pwl_slope_seg6;
+ uint32_t easf_v_bf1_pwl_in_seg7;
+ uint32_t easf_v_bf1_pwl_base_seg7;
+ uint32_t easf_v_bf3_pwl_in_set0;
+ uint32_t easf_v_bf3_pwl_base_set0;
+ uint32_t easf_v_bf3_pwl_slope_set0;
+ uint32_t easf_v_bf3_pwl_in_set1;
+ uint32_t easf_v_bf3_pwl_base_set1;
+ uint32_t easf_v_bf3_pwl_slope_set1;
+ uint32_t easf_v_bf3_pwl_in_set2;
+ uint32_t easf_v_bf3_pwl_base_set2;
+ uint32_t easf_v_bf3_pwl_slope_set2;
+ uint32_t easf_v_bf3_pwl_in_set3;
+ uint32_t easf_v_bf3_pwl_base_set3;
+ uint32_t easf_v_bf3_pwl_slope_set3;
+ uint32_t easf_v_bf3_pwl_in_set4;
+ uint32_t easf_v_bf3_pwl_base_set4;
+ uint32_t easf_v_bf3_pwl_slope_set4;
+ uint32_t easf_v_bf3_pwl_in_set5;
+ uint32_t easf_v_bf3_pwl_base_set5;
+ uint32_t easf_h_en;
+ uint32_t easf_h_sharp_factor;
+ uint32_t easf_h_ring;
+ uint32_t easf_h_bf1_en;
+ uint32_t easf_h_bf2_mode;
+ uint32_t easf_h_bf3_mode;
+ uint32_t easf_h_bf2_flat1_gain;
+ uint32_t easf_h_bf2_flat2_gain;
+ uint32_t easf_h_bf2_roc_gain;
+ uint32_t easf_h_ringest_eventap_reduceg1;
+ uint32_t easf_h_ringest_eventap_reduceg2;
+ uint32_t easf_h_ringest_eventap_gain1;
+ uint32_t easf_h_ringest_eventap_gain2;
+ uint32_t easf_h_bf_maxa;
+ uint32_t easf_h_bf_maxb;
+ uint32_t easf_h_bf_mina;
+ uint32_t easf_h_bf_minb;
+ uint32_t easf_h_bf1_pwl_in_seg0;
+ uint32_t easf_h_bf1_pwl_base_seg0;
+ uint32_t easf_h_bf1_pwl_slope_seg0;
+ uint32_t easf_h_bf1_pwl_in_seg1;
+ uint32_t easf_h_bf1_pwl_base_seg1;
+ uint32_t easf_h_bf1_pwl_slope_seg1;
+ uint32_t easf_h_bf1_pwl_in_seg2;
+ uint32_t easf_h_bf1_pwl_base_seg2;
+ uint32_t easf_h_bf1_pwl_slope_seg2;
+ uint32_t easf_h_bf1_pwl_in_seg3;
+ uint32_t easf_h_bf1_pwl_base_seg3;
+ uint32_t easf_h_bf1_pwl_slope_seg3;
+ uint32_t easf_h_bf1_pwl_in_seg4;
+ uint32_t easf_h_bf1_pwl_base_seg4;
+ uint32_t easf_h_bf1_pwl_slope_seg4;
+ uint32_t easf_h_bf1_pwl_in_seg5;
+ uint32_t easf_h_bf1_pwl_base_seg5;
+ uint32_t easf_h_bf1_pwl_slope_seg5;
+ uint32_t easf_h_bf1_pwl_in_seg6;
+ uint32_t easf_h_bf1_pwl_base_seg6;
+ uint32_t easf_h_bf1_pwl_slope_seg6;
+ uint32_t easf_h_bf1_pwl_in_seg7;
+ uint32_t easf_h_bf1_pwl_base_seg7;
+ uint32_t easf_h_bf3_pwl_in_set0;
+ uint32_t easf_h_bf3_pwl_base_set0;
+ uint32_t easf_h_bf3_pwl_slope_set0;
+ uint32_t easf_h_bf3_pwl_in_set1;
+ uint32_t easf_h_bf3_pwl_base_set1;
+ uint32_t easf_h_bf3_pwl_slope_set1;
+ uint32_t easf_h_bf3_pwl_in_set2;
+ uint32_t easf_h_bf3_pwl_base_set2;
+ uint32_t easf_h_bf3_pwl_slope_set2;
+ uint32_t easf_h_bf3_pwl_in_set3;
+ uint32_t easf_h_bf3_pwl_base_set3;
+ uint32_t easf_h_bf3_pwl_slope_set3;
+ uint32_t easf_h_bf3_pwl_in_set4;
+ uint32_t easf_h_bf3_pwl_base_set4;
+ uint32_t easf_h_bf3_pwl_slope_set4;
+ uint32_t easf_h_bf3_pwl_in_set5;
+ uint32_t easf_h_bf3_pwl_base_set5;
+ uint32_t easf_matrix_c0;
+ uint32_t easf_matrix_c1;
+ uint32_t easf_matrix_c2;
+ uint32_t easf_matrix_c3;
+ // iSharp
+ uint32_t isharp_en; // ISHARP_EN
+ struct isharp_noise_det isharp_noise_det; // ISHARP_NOISEDET
+ uint32_t isharp_nl_en; // ISHARP_NL_EN ? TODO:check this
+ struct isharp_lba isharp_lba; // ISHARP_LBA
+ struct isharp_fmt isharp_fmt; // ISHARP_FMT
+ uint32_t isharp_delta[ISHARP_LUT_TABLE_SIZE];
+ struct isharp_nldelta_sclip isharp_nldelta_sclip; // ISHARP_NLDELTA_SCLIP
+ /* blur and scale filter */
+ const uint16_t *filter_blur_scale_v;
+ const uint16_t *filter_blur_scale_h;
+ int sharpness_level; /* Track sharpness level */
+};
+
+/* SPL input and output definitions */
+// SPL scratch struct
+struct spl_scratch {
+ // Pack all SPL outputs in scl_data
+ struct spl_scaler_data scl_data;
+};
+
+/* SPL input and output definitions */
+// SPL outputs struct
+struct spl_out {
+ // Pack all output need to program hw registers
+ struct dscl_prog_data *dscl_prog_data;
+};
+
+// end of SPL outputs
+
+// SPL inputs
+
+// opp extra adjustment for rect
+struct spl_opp_adjust {
+ int x;
+ int y;
+ int width;
+ int height;
+};
+
+// Basic input information
+struct basic_in {
+ enum spl_pixel_format format; // Pixel Format
+ enum chroma_cositing cositing; /* Chroma Subsampling Offset */
+ struct spl_rect src_rect; // Source rect
+ struct spl_rect dst_rect; // Destination Rect
+ struct spl_rect clip_rect; // Clip rect
+ enum spl_rotation_angle rotation; // Rotation
+ bool horizontal_mirror; // Horizontal mirror
+ struct { // previous mpc_combine_h - split count
+ bool use_recout_width_aligned;
+ union {
+ int mpc_num_h_slices;
+ int mpc_recout_width_align;
+ } num_slices_recout_width;
+ } num_h_slices_recout_width_align;
+ int mpc_h_slice_index; // previous mpc_combine_v - split_idx
+ struct spl_opp_adjust opp_recout_adjust;
+ // Inputs for adaptive scaler - TODO
+ enum spl_transfer_func_type tf_type; /* Transfer function type */
+ enum spl_transfer_func_predefined tf_predefined_type; /* Transfer function predefined type */
+ // enum dc_transfer_func_predefined tf;
+ enum spl_color_space color_space; // Color Space
+ unsigned int max_luminance; // Max Luminance TODO: Is determined in dc_hw_sequencer.c is_sdr
+ bool film_grain_applied; // Film Grain Applied // TODO: To check from where to get this?
+ int custom_width; // Width for non-standard segmentation - used when != 0
+ int custom_x; // Start x for non-standard segmentation - used when custom_width != 0
+};
+
+// Basic output information
+struct basic_out {
+ struct spl_size output_size; // Output Size
+ struct spl_rect dst_rect; // Destination Rect
+ struct spl_rect src_rect; // Source rect
+ int odm_combine_factor; // deprecated
+ struct spl_rect odm_slice_rect; // OPP input rect in timing active
+ enum spl_view_3d view_format; // TODO: View format Check if it is chroma subsampling
+ bool always_scale; // Is always scale enabled? Required for getting SCL_MODE
+ int max_downscale_src_width; // Required to get optimal no of taps
+ bool alpha_en;
+ bool use_two_pixels_per_container;
+};
+enum sharpness_setting {
+ SHARPNESS_HW_OFF = 0,
+ SHARPNESS_ZERO,
+ SHARPNESS_CUSTOM
+};
+enum sharpness_range_source {
+ SHARPNESS_RANGE_DCN = 0,
+ SHARPNESS_RANGE_DCN_OVERRIDE
+};
+struct spl_sharpness_range {
+ int sdr_rgb_min;
+ int sdr_rgb_max;
+ int sdr_rgb_mid;
+ int sdr_yuv_min;
+ int sdr_yuv_max;
+ int sdr_yuv_mid;
+ int hdr_rgb_min;
+ int hdr_rgb_max;
+ int hdr_rgb_mid;
+};
+struct adaptive_sharpness {
+ bool enable;
+ unsigned int sharpness_level;
+ struct spl_sharpness_range sharpness_range;
+};
+enum linear_light_scaling { // convert it in translation logic
+ LLS_PREF_DONT_CARE = 0,
+ LLS_PREF_YES,
+ LLS_PREF_NO
+};
+enum sharpen_policy {
+ SHARPEN_ALWAYS = 0,
+ SHARPEN_YUV = 1,
+ SHARPEN_RGB_FULLSCREEN_YUV = 2,
+ SHARPEN_FULLSCREEN_ALL = 3
+};
+enum scale_to_sharpness_policy {
+ NO_SCALE_TO_SHARPNESS_ADJ = 0,
+ SCALE_TO_SHARPNESS_ADJ_YUV = 1,
+ SCALE_TO_SHARPNESS_ADJ_ALL = 2
+};
+struct spl_callbacks {
+ void (*spl_calc_lb_num_partitions)
+ (bool alpha_en,
+ const struct spl_scaler_data *scl_data,
+ enum lb_memory_config lb_config,
+ int *num_part_y,
+ int *num_part_c);
+};
+
+struct spl_debug {
+ int visual_confirm_base_offset;
+ int visual_confirm_dpp_offset;
+ enum scale_to_sharpness_policy scale_to_sharpness_policy;
+};
+
+struct spl_in {
+ struct basic_out basic_out;
+ struct basic_in basic_in;
+ // Basic slice information
+ int odm_slice_index; // ODM Slice Index using get_odm_split_index
+ struct spl_taps scaling_quality; // Explicit Scaling Quality
+ struct spl_callbacks callbacks;
+ // Inputs for isharp and EASF
+ struct adaptive_sharpness adaptive_sharpness; // Adaptive Sharpness
+ enum linear_light_scaling lls_pref; // Linear Light Scaling
+ bool prefer_easf;
+ bool disable_easf;
+ struct spl_debug debug;
+ bool is_fullscreen;
+ bool is_hdr_on;
+ int h_active;
+ int v_active;
+ int min_viewport_size;
+ int sdr_white_level_nits;
+ enum sharpen_policy sharpen_policy;
+};
+// end of SPL inputs
+
+#endif /* __DC_SPL_TYPES_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.c b/drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.c
new file mode 100644
index 000000000000..be2f34d034c5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "spl_debug.h"
+#include "spl_custom_float.h"
+
+static bool spl_build_custom_float(struct spl_fixed31_32 value,
+ const struct spl_custom_float_format *format,
+ bool *negative,
+ uint32_t *mantissa,
+ uint32_t *exponenta)
+{
+ uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
+
+ const struct spl_fixed31_32 mantissa_constant_plus_max_fraction =
+ spl_fixpt_from_fraction((1LL << (format->mantissa_bits + 1)) - 1,
+ 1LL << format->mantissa_bits);
+
+ struct spl_fixed31_32 mantiss;
+
+ if (spl_fixpt_eq(value, spl_fixpt_zero)) {
+ *negative = false;
+ *mantissa = 0;
+ *exponenta = 0;
+ return true;
+ }
+
+ if (spl_fixpt_lt(value, spl_fixpt_zero)) {
+ *negative = format->sign;
+ value = spl_fixpt_neg(value);
+ } else {
+ *negative = false;
+ }
+
+ if (spl_fixpt_lt(value, spl_fixpt_one)) {
+ uint32_t i = 1;
+
+ do {
+ value = spl_fixpt_shl(value, 1);
+ ++i;
+ } while (spl_fixpt_lt(value, spl_fixpt_one));
+
+ --i;
+
+ if (exp_offset <= i) {
+ *mantissa = 0;
+ *exponenta = 0;
+ return true;
+ }
+
+ *exponenta = exp_offset - i;
+ } else if (spl_fixpt_le(mantissa_constant_plus_max_fraction, value)) {
+ uint32_t i = 1;
+
+ do {
+ value = spl_fixpt_shr(value, 1);
+ ++i;
+ } while (spl_fixpt_lt(mantissa_constant_plus_max_fraction, value));
+
+ *exponenta = exp_offset + i - 1;
+ } else {
+ *exponenta = exp_offset;
+ }
+
+ mantiss = spl_fixpt_sub(value, spl_fixpt_one);
+
+ if (spl_fixpt_lt(mantiss, spl_fixpt_zero) ||
+ spl_fixpt_lt(spl_fixpt_one, mantiss))
+ mantiss = spl_fixpt_zero;
+ else
+ mantiss = spl_fixpt_shl(mantiss, format->mantissa_bits);
+
+ *mantissa = spl_fixpt_floor(mantiss);
+
+ return true;
+}
+
+static bool spl_setup_custom_float(const struct spl_custom_float_format *format,
+ bool negative,
+ uint32_t mantissa,
+ uint32_t exponenta,
+ uint32_t *result)
+{
+ uint32_t i = 0;
+ uint32_t j = 0;
+ uint32_t value = 0;
+
+ /* verification code:
+ * once calculation is ok we can remove it
+ */
+
+ const uint32_t mantissa_mask =
+ (1 << (format->mantissa_bits + 1)) - 1;
+
+ const uint32_t exponenta_mask =
+ (1 << (format->exponenta_bits + 1)) - 1;
+
+ if (mantissa & ~mantissa_mask) {
+ SPL_BREAK_TO_DEBUGGER();
+ mantissa = mantissa_mask;
+ }
+
+ if (exponenta & ~exponenta_mask) {
+ SPL_BREAK_TO_DEBUGGER();
+ exponenta = exponenta_mask;
+ }
+
+ /* end of verification code */
+
+ while (i < format->mantissa_bits) {
+ uint32_t mask = 1 << i;
+
+ if (mantissa & mask)
+ value |= mask;
+
+ ++i;
+ }
+
+ while (j < format->exponenta_bits) {
+ uint32_t mask = 1 << j;
+
+ if (exponenta & mask)
+ value |= mask << i;
+
+ ++j;
+ }
+
+ if (negative && format->sign)
+ value |= 1 << (i + j);
+
+ *result = value;
+
+ return true;
+}
+
+bool spl_convert_to_custom_float_format(struct spl_fixed31_32 value,
+ const struct spl_custom_float_format *format,
+ uint32_t *result)
+{
+ uint32_t mantissa;
+ uint32_t exponenta;
+ bool negative;
+
+ return spl_build_custom_float(value, format, &negative, &mantissa, &exponenta) &&
+ spl_setup_custom_float(format,
+ negative,
+ mantissa,
+ exponenta,
+ result);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.h b/drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.h
new file mode 100644
index 000000000000..cdc4e107b9de
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_custom_float.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef SPL_CUSTOM_FLOAT_H_
+#define SPL_CUSTOM_FLOAT_H_
+
+#include "spl_os_types.h"
+#include "spl_fixpt31_32.h"
+
+struct spl_custom_float_format {
+ uint32_t mantissa_bits;
+ uint32_t exponenta_bits;
+ bool sign;
+};
+
+struct spl_custom_float_value {
+ uint32_t mantissa;
+ uint32_t exponenta;
+ uint32_t value;
+ bool negative;
+};
+
+bool spl_convert_to_custom_float_format(
+ struct spl_fixed31_32 value,
+ const struct spl_custom_float_format *format,
+ uint32_t *result);
+
+#endif //SPL_CUSTOM_FLOAT_H_
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_debug.h b/drivers/gpu/drm/amd/display/dc/sspl/spl_debug.h
new file mode 100644
index 000000000000..a6f6132df241
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_debug.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef SPL_DEBUG_H
+#define SPL_DEBUG_H
+
+#if defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB)
+#define SPL_ASSERT_CRITICAL(expr) do { \
+ if (WARN_ON(!(expr))) { \
+ kgdb_breakpoint(); \
+ } \
+} while (0)
+#else
+#define SPL_ASSERT_CRITICAL(expr) do { \
+ if (WARN_ON(!(expr))) { \
+ ; \
+ } \
+} while (0)
+#endif /* CONFIG_HAVE_KGDB || CONFIG_KGDB */
+
+#if defined(CONFIG_DEBUG_KERNEL_DC)
+#define SPL_ASSERT(expr) SPL_ASSERT_CRITICAL(expr)
+#else
+#define SPL_ASSERT(expr) WARN_ON(!(expr))
+#endif /* CONFIG_DEBUG_KERNEL_DC */
+
+#define SPL_BREAK_TO_DEBUGGER() SPL_ASSERT(0)
+
+#endif // SPL_DEBUG_H
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c
new file mode 100644
index 000000000000..ebf0287417e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "spl_fixpt31_32.h"
+
+static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL };
+static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL };
+static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL };
+
+static inline unsigned long long abs_i64(
+ long long arg)
+{
+ if (arg > 0)
+ return (unsigned long long)arg;
+ else
+ return (unsigned long long)(-arg);
+}
+
+/*
+ * @brief
+ * result = dividend / divisor
+ * *remainder = dividend % divisor
+ */
+static inline unsigned long long spl_complete_integer_division_u64(
+ unsigned long long dividend,
+ unsigned long long divisor,
+ unsigned long long *remainder)
+{
+ unsigned long long result;
+
+ result = spl_div64_u64_rem(dividend, divisor, remainder);
+
+ return result;
+}
+
+
+#define FRACTIONAL_PART_MASK \
+ ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
+
+#define GET_INTEGER_PART(x) \
+ ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
+
+#define GET_FRACTIONAL_PART(x) \
+ (FRACTIONAL_PART_MASK & (x))
+
+struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator)
+{
+ struct spl_fixed31_32 res;
+
+ bool arg1_negative = numerator < 0;
+ bool arg2_negative = denominator < 0;
+
+ unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
+ unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
+
+ unsigned long long remainder;
+
+ /* determine integer part */
+
+ unsigned long long res_value = spl_complete_integer_division_u64(
+ arg1_value, arg2_value, &remainder);
+
+ SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX);
+
+ /* determine fractional part */
+ {
+ unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
+
+ do {
+ remainder <<= 1;
+
+ res_value <<= 1;
+
+ if (remainder >= arg2_value) {
+ res_value |= 1;
+ remainder -= arg2_value;
+ }
+ } while (--i != 0);
+ }
+
+ /* round up LSB */
+ {
+ unsigned long long summand = (remainder << 1) >= arg2_value;
+
+ SPL_ASSERT(res_value <= (unsigned long long)LLONG_MAX - summand);
+
+ res_value += summand;
+ }
+
+ res.value = (long long)res_value;
+
+ if (arg1_negative ^ arg2_negative)
+ res.value = -res.value;
+
+ return res;
+}
+
+struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ struct spl_fixed31_32 res;
+
+ bool arg1_negative = arg1.value < 0;
+ bool arg2_negative = arg2.value < 0;
+
+ unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
+ unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
+
+ unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
+ unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
+
+ unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
+ unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
+
+ unsigned long long tmp;
+
+ res.value = arg1_int * arg2_int;
+
+ SPL_ASSERT(res.value <= (long long)LONG_MAX);
+
+ res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
+
+ tmp = arg1_int * arg2_fra;
+
+ SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
+
+ res.value += tmp;
+
+ tmp = arg2_int * arg1_fra;
+
+ SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
+
+ res.value += tmp;
+
+ tmp = arg1_fra * arg2_fra;
+
+ tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
+ (tmp >= (unsigned long long)spl_fixpt_half.value);
+
+ SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
+
+ res.value += tmp;
+
+ if (arg1_negative ^ arg2_negative)
+ res.value = -res.value;
+
+ return res;
+}
+
+struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg)
+{
+ struct spl_fixed31_32 res;
+
+ unsigned long long arg_value = abs_i64(arg.value);
+
+ unsigned long long arg_int = GET_INTEGER_PART(arg_value);
+
+ unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
+
+ unsigned long long tmp;
+
+ res.value = arg_int * arg_int;
+
+ SPL_ASSERT(res.value <= (long long)LONG_MAX);
+
+ res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
+
+ tmp = arg_int * arg_fra;
+
+ SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
+
+ res.value += tmp;
+
+ SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
+
+ res.value += tmp;
+
+ tmp = arg_fra * arg_fra;
+
+ tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
+ (tmp >= (unsigned long long)spl_fixpt_half.value);
+
+ SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
+
+ res.value += tmp;
+
+ return res;
+}
+
+struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg)
+{
+ /*
+ * @note
+ * Good idea to use Newton's method
+ */
+
+ return spl_fixpt_from_fraction(
+ spl_fixpt_one.value,
+ arg.value);
+}
+
+struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg)
+{
+ struct spl_fixed31_32 square;
+
+ struct spl_fixed31_32 res = spl_fixpt_one;
+
+ int n = 27;
+
+ struct spl_fixed31_32 arg_norm = arg;
+
+ if (spl_fixpt_le(
+ spl_fixpt_two_pi,
+ spl_fixpt_abs(arg))) {
+ arg_norm = spl_fixpt_sub(
+ arg_norm,
+ spl_fixpt_mul_int(
+ spl_fixpt_two_pi,
+ (int)spl_div64_s64(
+ arg_norm.value,
+ spl_fixpt_two_pi.value)));
+ }
+
+ square = spl_fixpt_sqr(arg_norm);
+
+ do {
+ res = spl_fixpt_sub(
+ spl_fixpt_one,
+ spl_fixpt_div_int(
+ spl_fixpt_mul(
+ square,
+ res),
+ n * (n - 1)));
+
+ n -= 2;
+ } while (n > 2);
+
+ if (arg.value != arg_norm.value)
+ res = spl_fixpt_div(
+ spl_fixpt_mul(res, arg_norm),
+ arg);
+
+ return res;
+}
+
+struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg)
+{
+ return spl_fixpt_mul(
+ arg,
+ spl_fixpt_sinc(arg));
+}
+
+struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg)
+{
+ /* TODO implement argument normalization */
+
+ const struct spl_fixed31_32 square = spl_fixpt_sqr(arg);
+
+ struct spl_fixed31_32 res = spl_fixpt_one;
+
+ int n = 26;
+
+ do {
+ res = spl_fixpt_sub(
+ spl_fixpt_one,
+ spl_fixpt_div_int(
+ spl_fixpt_mul(
+ square,
+ res),
+ n * (n - 1)));
+
+ n -= 2;
+ } while (n != 0);
+
+ return res;
+}
+
+/*
+ * @brief
+ * result = exp(arg),
+ * where abs(arg) < 1
+ *
+ * Calculated as Taylor series.
+ */
+static struct spl_fixed31_32 spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg)
+{
+ unsigned int n = 9;
+
+ struct spl_fixed31_32 res = spl_fixpt_from_fraction(
+ n + 2,
+ n + 1);
+ /* TODO find correct res */
+
+ SPL_ASSERT(spl_fixpt_lt(arg, spl_fixpt_one));
+
+ do
+ res = spl_fixpt_add(
+ spl_fixpt_one,
+ spl_fixpt_div_int(
+ spl_fixpt_mul(
+ arg,
+ res),
+ n));
+ while (--n != 1);
+
+ return spl_fixpt_add(
+ spl_fixpt_one,
+ spl_fixpt_mul(
+ arg,
+ res));
+}
+
+struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg)
+{
+ /*
+ * @brief
+ * Main equation is:
+ * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
+ * where m = round(x / ln(2)), r = x - m * ln(2)
+ */
+
+ if (spl_fixpt_le(
+ spl_fixpt_ln2_div_2,
+ spl_fixpt_abs(arg))) {
+ int m = spl_fixpt_round(
+ spl_fixpt_div(
+ arg,
+ spl_fixpt_ln2));
+
+ struct spl_fixed31_32 r = spl_fixpt_sub(
+ arg,
+ spl_fixpt_mul_int(
+ spl_fixpt_ln2,
+ m));
+
+ SPL_ASSERT(m != 0);
+
+ SPL_ASSERT(spl_fixpt_lt(
+ spl_fixpt_abs(r),
+ spl_fixpt_one));
+
+ if (m > 0)
+ return spl_fixpt_shl(
+ spl_fixed31_32_exp_from_taylor_series(r),
+ (unsigned int)m);
+ else
+ return spl_fixpt_div_int(
+ spl_fixed31_32_exp_from_taylor_series(r),
+ 1LL << -m);
+ } else if (arg.value != 0)
+ return spl_fixed31_32_exp_from_taylor_series(arg);
+ else
+ return spl_fixpt_one;
+}
+
+struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg)
+{
+ struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one);
+ /* TODO improve 1st estimation */
+
+ struct spl_fixed31_32 error;
+
+ SPL_ASSERT(arg.value > 0);
+ /* TODO if arg is negative, return NaN */
+ /* TODO if arg is zero, return -INF */
+
+ do {
+ struct spl_fixed31_32 res1 = spl_fixpt_add(
+ spl_fixpt_sub(
+ res,
+ spl_fixpt_one),
+ spl_fixpt_div(
+ arg,
+ spl_fixpt_exp(res)));
+
+ error = spl_fixpt_sub(
+ res,
+ res1);
+
+ res = res1;
+ /* TODO determine max_allowed_error based on quality of exp() */
+ } while (abs_i64(error.value) > 100ULL);
+
+ return res;
+}
+
+
+/* this function is a generic helper to translate fixed point value to
+ * specified integer format that will consist of integer_bits integer part and
+ * fractional_bits fractional part. For example it is used in
+ * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
+ * part in 32 bits. It is used in hw programming (scaler)
+ */
+
+static inline unsigned int spl_ux_dy(
+ long long value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
+{
+ /* 1. create mask of integer part */
+ unsigned int result = (1 << integer_bits) - 1;
+ /* 2. mask out fractional part */
+ unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
+ /* 3. shrink fixed point integer part to be of integer_bits width*/
+ result &= GET_INTEGER_PART(value);
+ /* 4. make space for fractional part to be filled in after integer */
+ result <<= fractional_bits;
+ /* 5. shrink fixed point fractional part to of fractional_bits width*/
+ fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
+ /* 6. merge the result */
+ return result | fractional_part;
+}
+
+static inline unsigned int spl_clamp_ux_dy(
+ long long value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits,
+ unsigned int min_clamp)
+{
+ unsigned int truncated_val = spl_ux_dy(value, integer_bits, fractional_bits);
+
+ if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
+ return (1 << (integer_bits + fractional_bits)) - 1;
+ else if (truncated_val > min_clamp)
+ return truncated_val;
+ else
+ return min_clamp;
+}
+
+unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg)
+{
+ return spl_ux_dy(arg.value, 4, 19);
+}
+
+unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg)
+{
+ return spl_ux_dy(arg.value, 3, 19);
+}
+
+unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg)
+{
+ return spl_ux_dy(arg.value, 2, 19);
+}
+
+unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg)
+{
+ return spl_ux_dy(arg.value, 0, 19);
+}
+
+unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg)
+{
+ return spl_clamp_ux_dy(arg.value, 0, 14, 1);
+}
+
+unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg)
+{
+ return spl_clamp_ux_dy(arg.value, 0, 10, 1);
+}
+
+int spl_fixpt_s4d19(struct spl_fixed31_32 arg)
+{
+ if (arg.value < 0)
+ return -(int)spl_ux_dy(spl_fixpt_abs(arg).value, 4, 19);
+ else
+ return spl_ux_dy(arg.value, 4, 19);
+}
+
+struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
+{
+ struct spl_fixed31_32 fixpt_value = spl_fixpt_zero;
+ struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero;
+ long long frac_mask = ((long long)1 << (long long)integer_bits) - 1;
+
+ fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ frac_mask = frac_mask << fractional_bits;
+ fixpt_int_value.value = value & frac_mask;
+ fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ fixpt_value.value |= fixpt_int_value.value;
+ return fixpt_value;
+}
+
+struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
+ unsigned int frac_value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits)
+{
+ struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value);
+
+ fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits);
+ return fixpt_value;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.h b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.h
new file mode 100644
index 000000000000..9f349ffe9148
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.h
@@ -0,0 +1,522 @@
+/* SPDX-License-Identifier: MIT */
+
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef __SPL_FIXED31_32_H__
+#define __SPL_FIXED31_32_H__
+
+#include "spl_debug.h"
+#include "spl_os_types.h" // swap
+
+#ifndef LLONG_MAX
+#define LLONG_MAX 9223372036854775807ll
+#endif
+#ifndef LLONG_MIN
+#define LLONG_MIN (-LLONG_MAX - 1ll)
+#endif
+
+#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32
+#ifndef LLONG_MIN
+#define LLONG_MIN (1LL<<63)
+#endif
+#ifndef LLONG_MAX
+#define LLONG_MAX (-1LL>>1)
+#endif
+
+/*
+ * @brief
+ * Arithmetic operations on real numbers
+ * represented as fixed-point numbers.
+ * There are: 1 bit for sign,
+ * 31 bit for integer part,
+ * 32 bits for fractional part.
+ *
+ * @note
+ * Currently, overflows and underflows are asserted;
+ * no special result returned.
+ */
+
+struct spl_fixed31_32 {
+ long long value;
+};
+
+
+/*
+ * @brief
+ * Useful constants
+ */
+
+static const struct spl_fixed31_32 spl_fixpt_zero = { 0 };
+static const struct spl_fixed31_32 spl_fixpt_epsilon = { 1LL };
+static const struct spl_fixed31_32 spl_fixpt_half = { 0x80000000LL };
+static const struct spl_fixed31_32 spl_fixpt_one = { 0x100000000LL };
+
+/*
+ * @brief
+ * Initialization routines
+ */
+
+/*
+ * @brief
+ * result = numerator / denominator
+ */
+struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator);
+
+/*
+ * @brief
+ * result = arg
+ */
+static inline struct spl_fixed31_32 spl_fixpt_from_int(int arg)
+{
+ struct spl_fixed31_32 res;
+
+ res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART;
+
+ return res;
+}
+
+/*
+ * @brief
+ * Unary operators
+ */
+
+/*
+ * @brief
+ * result = -arg
+ */
+static inline struct spl_fixed31_32 spl_fixpt_neg(struct spl_fixed31_32 arg)
+{
+ struct spl_fixed31_32 res;
+
+ res.value = -arg.value;
+
+ return res;
+}
+
+/*
+ * @brief
+ * result = abs(arg) := (arg >= 0) ? arg : -arg
+ */
+static inline struct spl_fixed31_32 spl_fixpt_abs(struct spl_fixed31_32 arg)
+{
+ if (arg.value < 0)
+ return spl_fixpt_neg(arg);
+ else
+ return arg;
+}
+
+/*
+ * @brief
+ * Binary relational operators
+ */
+
+/*
+ * @brief
+ * result = arg1 < arg2
+ */
+static inline bool spl_fixpt_lt(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ return arg1.value < arg2.value;
+}
+
+/*
+ * @brief
+ * result = arg1 <= arg2
+ */
+static inline bool spl_fixpt_le(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ return arg1.value <= arg2.value;
+}
+
+/*
+ * @brief
+ * result = arg1 == arg2
+ */
+static inline bool spl_fixpt_eq(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ return arg1.value == arg2.value;
+}
+
+/*
+ * @brief
+ * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_min(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ if (arg1.value <= arg2.value)
+ return arg1;
+ else
+ return arg2;
+}
+
+/*
+ * @brief
+ * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1
+ */
+static inline struct spl_fixed31_32 spl_fixpt_max(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ if (arg1.value <= arg2.value)
+ return arg2;
+ else
+ return arg1;
+}
+
+/*
+ * @brief
+ * | min_value, when arg <= min_value
+ * result = | arg, when min_value < arg < max_value
+ * | max_value, when arg >= max_value
+ */
+static inline struct spl_fixed31_32 spl_fixpt_clamp(
+ struct spl_fixed31_32 arg,
+ struct spl_fixed31_32 min_value,
+ struct spl_fixed31_32 max_value)
+{
+ if (spl_fixpt_le(arg, min_value))
+ return min_value;
+ else if (spl_fixpt_le(max_value, arg))
+ return max_value;
+ else
+ return arg;
+}
+
+/*
+ * @brief
+ * Binary shift operators
+ */
+
+/*
+ * @brief
+ * result = arg << shift
+ */
+static inline struct spl_fixed31_32 spl_fixpt_shl(struct spl_fixed31_32 arg, unsigned int shift)
+{
+ SPL_ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) ||
+ ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift))));
+
+ arg.value = arg.value << shift;
+
+ return arg;
+}
+
+/*
+ * @brief
+ * result = arg >> shift
+ */
+static inline struct spl_fixed31_32 spl_fixpt_shr(struct spl_fixed31_32 arg, unsigned int shift)
+{
+ bool negative = arg.value < 0;
+
+ if (negative)
+ arg.value = -arg.value;
+ arg.value = arg.value >> shift;
+ if (negative)
+ arg.value = -arg.value;
+ return arg;
+}
+
+/*
+ * @brief
+ * Binary additive operators
+ */
+
+/*
+ * @brief
+ * result = arg1 + arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_add(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ struct spl_fixed31_32 res;
+
+ SPL_ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) ||
+ ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value)));
+
+ res.value = arg1.value + arg2.value;
+
+ return res;
+}
+
+/*
+ * @brief
+ * result = arg1 + arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_add_int(struct spl_fixed31_32 arg1, int arg2)
+{
+ return spl_fixpt_add(arg1, spl_fixpt_from_int(arg2));
+}
+
+/*
+ * @brief
+ * result = arg1 - arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_sub(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ struct spl_fixed31_32 res;
+
+ SPL_ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) ||
+ ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value)));
+
+ res.value = arg1.value - arg2.value;
+
+ return res;
+}
+
+/*
+ * @brief
+ * result = arg1 - arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_sub_int(struct spl_fixed31_32 arg1, int arg2)
+{
+ return spl_fixpt_sub(arg1, spl_fixpt_from_int(arg2));
+}
+
+
+/*
+ * @brief
+ * Binary multiplicative operators
+ */
+
+/*
+ * @brief
+ * result = arg1 * arg2
+ */
+struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2);
+
+
+/*
+ * @brief
+ * result = arg1 * arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_mul_int(struct spl_fixed31_32 arg1, int arg2)
+{
+ return spl_fixpt_mul(arg1, spl_fixpt_from_int(arg2));
+}
+
+/*
+ * @brief
+ * result = square(arg) := arg * arg
+ */
+struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg);
+
+/*
+ * @brief
+ * result = arg1 / arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_div_int(struct spl_fixed31_32 arg1, long long arg2)
+{
+ return spl_fixpt_from_fraction(arg1.value, spl_fixpt_from_int((int)arg2).value);
+}
+
+/*
+ * @brief
+ * result = arg1 / arg2
+ */
+static inline struct spl_fixed31_32 spl_fixpt_div(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ return spl_fixpt_from_fraction(arg1.value, arg2.value);
+}
+
+/*
+ * @brief
+ * Reciprocal function
+ */
+
+/*
+ * @brief
+ * result = reciprocal(arg) := 1 / arg
+ *
+ * @note
+ * No special actions taken in case argument is zero.
+ */
+struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg);
+
+/*
+ * @brief
+ * Trigonometric functions
+ */
+
+/*
+ * @brief
+ * result = sinc(arg) := sin(arg) / arg
+ *
+ * @note
+ * Argument specified in radians,
+ * internally it's normalized to [-2pi...2pi] range.
+ */
+struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg);
+
+/*
+ * @brief
+ * result = sin(arg)
+ *
+ * @note
+ * Argument specified in radians,
+ * internally it's normalized to [-2pi...2pi] range.
+ */
+struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg);
+
+/*
+ * @brief
+ * result = cos(arg)
+ *
+ * @note
+ * Argument specified in radians
+ * and should be in [-2pi...2pi] range -
+ * passing arguments outside that range
+ * will cause incorrect result!
+ */
+struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg);
+
+/*
+ * @brief
+ * Transcendent functions
+ */
+
+/*
+ * @brief
+ * result = exp(arg)
+ *
+ * @note
+ * Currently, function is verified for abs(arg) <= 1.
+ */
+struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg);
+
+/*
+ * @brief
+ * result = log(arg)
+ *
+ * @note
+ * Currently, abs(arg) should be less than 1.
+ * No normalization is done.
+ * Currently, no special actions taken
+ * in case of invalid argument(s). Take care!
+ */
+struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg);
+
+/*
+ * @brief
+ * Power function
+ */
+
+/*
+ * @brief
+ * result = pow(arg1, arg2)
+ *
+ * @note
+ * Currently, abs(arg1) should be less than 1. Take care!
+ */
+static inline struct spl_fixed31_32 spl_fixpt_pow(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2)
+{
+ if (arg1.value == 0)
+ return arg2.value == 0 ? spl_fixpt_one : spl_fixpt_zero;
+
+ return spl_fixpt_exp(
+ spl_fixpt_mul(
+ spl_fixpt_log(arg1),
+ arg2));
+}
+
+/*
+ * @brief
+ * Rounding functions
+ */
+
+/*
+ * @brief
+ * result = floor(arg) := greatest integer lower than or equal to arg
+ */
+static inline int spl_fixpt_floor(struct spl_fixed31_32 arg)
+{
+ unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+ if (arg.value >= 0)
+ return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ else
+ return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
+
+/*
+ * @brief
+ * result = round(arg) := integer nearest to arg
+ */
+static inline int spl_fixpt_round(struct spl_fixed31_32 arg)
+{
+ unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+ const long long summand = spl_fixpt_half.value;
+
+ SPL_ASSERT(LLONG_MAX - (long long)arg_value >= summand);
+
+ arg_value += summand;
+
+ if (arg.value >= 0)
+ return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ else
+ return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
+
+/*
+ * @brief
+ * result = ceil(arg) := lowest integer greater than or equal to arg
+ */
+static inline int spl_fixpt_ceil(struct spl_fixed31_32 arg)
+{
+ unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+ const long long summand = spl_fixpt_one.value -
+ spl_fixpt_epsilon.value;
+
+ SPL_ASSERT(LLONG_MAX - (long long)arg_value >= summand);
+
+ arg_value += summand;
+
+ if (arg.value >= 0)
+ return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ else
+ return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
+
+/* the following two function are used in scaler hw programming to convert fixed
+ * point value to format 2 bits from integer part and 19 bits from fractional
+ * part. The same applies for u0d19, 0 bits from integer part and 19 bits from
+ * fractional
+ */
+
+unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg);
+
+unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg);
+
+unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg);
+
+unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg);
+
+unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg);
+
+unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg);
+
+int spl_fixpt_s4d19(struct spl_fixed31_32 arg);
+
+static inline struct spl_fixed31_32 spl_fixpt_truncate(struct spl_fixed31_32 arg, unsigned int frac_bits)
+{
+ bool negative = arg.value < 0;
+
+ if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) {
+ SPL_ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART);
+ return arg;
+ }
+
+ if (negative)
+ arg.value = -arg.value;
+ arg.value &= (~0ULL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits);
+ if (negative)
+ arg.value = -arg.value;
+ return arg;
+}
+
+struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits);
+struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value,
+ unsigned int frac_value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits);
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_os_types.h b/drivers/gpu/drm/amd/display/dc/sspl/spl_os_types.h
new file mode 100644
index 000000000000..2e6ba71960ac
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_os_types.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+/* Copyright 2019 Raptor Engineering, LLC */
+
+#ifndef _SPL_OS_TYPES_H_
+#define _SPL_OS_TYPES_H_
+
+#include "spl_debug.h"
+
+#include <linux/slab.h>
+#include <linux/kgdb.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+
+/*
+ *
+ * general debug capabilities
+ *
+ */
+
+static inline uint64_t spl_div_u64_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder)
+{
+ return div_u64_rem(dividend, divisor, remainder);
+}
+
+static inline uint64_t spl_div_u64(uint64_t dividend, uint32_t divisor)
+{
+ return div_u64(dividend, divisor);
+}
+
+static inline uint64_t spl_div64_u64(uint64_t dividend, uint64_t divisor)
+{
+ return div64_u64(dividend, divisor);
+}
+
+static inline uint64_t spl_div64_u64_rem(uint64_t dividend, uint64_t divisor, uint64_t *remainder)
+{
+ return div64_u64_rem(dividend, divisor, remainder);
+}
+
+static inline int64_t spl_div64_s64(int64_t dividend, int64_t divisor)
+{
+ return div64_s64(dividend, divisor);
+}
+
+#define spl_swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#ifndef spl_min
+#define spl_min(a, b) (((a) < (b)) ? (a):(b))
+#endif
+
+#endif /* _SPL_OS_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
index ad088d70e189..6ffc74fc9dcd 100644
--- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
@@ -44,6 +44,11 @@ static void virtual_stream_encoder_dvi_set_stream_attribute(
struct dc_crtc_timing *crtc_timing,
bool is_dual_link) {}
+static void virtual_stream_encoder_lvds_set_stream_attribute(
+ struct stream_encoder *enc,
+ struct dc_crtc_timing *crtc_timing)
+{}
+
static void virtual_stream_encoder_set_throttled_vcp_size(
struct stream_encoder *enc,
struct fixed31_32 avg_time_slots_per_mtp)
@@ -115,6 +120,8 @@ static const struct stream_encoder_funcs virtual_str_enc_funcs = {
virtual_stream_encoder_hdmi_set_stream_attribute,
.dvi_set_stream_attribute =
virtual_stream_encoder_dvi_set_stream_attribute,
+ .lvds_set_stream_attribute =
+ virtual_stream_encoder_lvds_set_stream_attribute,
.set_throttled_vcp_size =
virtual_stream_encoder_set_throttled_vcp_size,
.update_hdmi_info_packets =
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index 2d995c87fbb9..338fdc651f2c 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -51,8 +51,8 @@
* for the cache windows.
*
* The call to dmub_srv_hw_init() programs the DMCUB registers to prepare
- * for command submission. Commands can be queued via dmub_srv_cmd_queue()
- * and executed via dmub_srv_cmd_execute().
+ * for command submission. Commands can be queued via dmub_srv_fb_cmd_queue()
+ * and executed via dmub_srv_fb_cmd_execute().
*
* If the queue is full the dmub_srv_wait_for_idle() call can be used to
* wait until the queue has been cleared.
@@ -65,10 +65,12 @@
*/
#include "inc/dmub_cmd.h"
+#include "dc/dc_types.h"
-#if defined(__cplusplus)
-extern "C" {
-#endif
+#define DMUB_PC_SNAPSHOT_COUNT 10
+
+/* Default tracebuffer size if meta is absent. */
+#define DMUB_TRACE_BUFFER_SIZE (64 * 1024)
/* Forward declarations */
struct dmub_srv;
@@ -77,6 +79,12 @@ struct dmub_srv_dcn31_regs;
struct dmcub_trace_buf_entry;
+/* enum dmub_window_memory_type - memory location type specification for windows */
+enum dmub_window_memory_type {
+ DMUB_WINDOW_MEMORY_TYPE_FB = 0,
+ DMUB_WINDOW_MEMORY_TYPE_GART
+};
+
/* enum dmub_status - return code for dmcub functions */
enum dmub_status {
DMUB_STATUS_OK = 0,
@@ -85,6 +93,7 @@ enum dmub_status {
DMUB_STATUS_TIMEOUT,
DMUB_STATUS_INVALID,
DMUB_STATUS_HW_FAILURE,
+ DMUB_STATUS_POWER_STATE_D3
};
/* enum dmub_asic - dmub asic identifier */
@@ -103,6 +112,10 @@ enum dmub_asic {
DMUB_ASIC_DCN316,
DMUB_ASIC_DCN32,
DMUB_ASIC_DCN321,
+ DMUB_ASIC_DCN35,
+ DMUB_ASIC_DCN351,
+ DMUB_ASIC_DCN36,
+ DMUB_ASIC_DCN401,
DMUB_ASIC_MAX,
};
@@ -116,6 +129,9 @@ enum dmub_window_id {
DMUB_WINDOW_5_TRACEBUFF,
DMUB_WINDOW_6_FW_STATE,
DMUB_WINDOW_7_SCRATCH_MEM,
+ DMUB_WINDOW_IB_MEM,
+ DMUB_WINDOW_SHARED_STATE,
+ DMUB_WINDOW_LSDMA_BUFFER,
DMUB_WINDOW_TOTAL,
};
@@ -127,6 +143,8 @@ enum dmub_notification_type {
DMUB_NOTIFICATION_HPD_IRQ,
DMUB_NOTIFICATION_SET_CONFIG_REPLY,
DMUB_NOTIFICATION_DPIA_NOTIFICATION,
+ DMUB_NOTIFICATION_HPD_SENSE_NOTIFY,
+ DMUB_NOTIFICATION_FUSED_IO,
DMUB_NOTIFICATION_MAX
};
@@ -141,6 +159,27 @@ enum dpia_notify_bw_alloc_status {
DPIA_BW_ALLOC_CAPS_CHANGED
};
+/* enum dmub_memory_access_type - memory access method */
+enum dmub_memory_access_type {
+ DMUB_MEMORY_ACCESS_DEFAULT,
+ DMUB_MEMORY_ACCESS_CPU = DMUB_MEMORY_ACCESS_DEFAULT,
+ DMUB_MEMORY_ACCESS_DMA
+};
+
+/* enum dmub_power_state type - to track DC power state in dmub_srv */
+enum dmub_srv_power_state_type {
+ DMUB_POWER_STATE_UNDEFINED = 0,
+ DMUB_POWER_STATE_D0 = 1,
+ DMUB_POWER_STATE_D3 = 8
+};
+
+/* enum dmub_inbox_cmd_interface type - defines default interface for host->dmub commands */
+enum dmub_inbox_cmd_interface_type {
+ DMUB_CMD_INTERFACE_DEFAULT = 0,
+ DMUB_CMD_INTERFACE_FB = 1,
+ DMUB_CMD_INTERFACE_REG = 2,
+};
+
/**
* struct dmub_region - dmub hw memory region
* @base: base address for region, must be 256 byte aligned
@@ -186,6 +225,7 @@ struct dmub_srv_region_params {
uint32_t vbios_size;
const uint8_t *fw_inst_const;
const uint8_t *fw_bss_data;
+ const enum dmub_window_memory_type *window_memory_type;
};
/**
@@ -205,20 +245,26 @@ struct dmub_srv_region_params {
*/
struct dmub_srv_region_info {
uint32_t fb_size;
+ uint32_t gart_size;
uint8_t num_regions;
struct dmub_region regions[DMUB_WINDOW_TOTAL];
};
/**
- * struct dmub_srv_fb_params - parameters used for driver fb setup
+ * struct dmub_srv_memory_params - parameters used for driver fb setup
* @region_info: region info calculated by dmub service
- * @cpu_addr: base cpu address for the framebuffer
- * @gpu_addr: base gpu virtual address for the framebuffer
+ * @cpu_fb_addr: base cpu address for the framebuffer
+ * @cpu_inbox_addr: base cpu address for the gart
+ * @gpu_fb_addr: base gpu virtual address for the framebuffer
+ * @gpu_inbox_addr: base gpu virtual address for the gart
*/
-struct dmub_srv_fb_params {
+struct dmub_srv_memory_params {
const struct dmub_srv_region_info *region_info;
- void *cpu_addr;
- uint64_t gpu_addr;
+ void *cpu_fb_addr;
+ void *cpu_gart_addr;
+ uint64_t gpu_fb_addr;
+ uint64_t gpu_gart_addr;
+ const enum dmub_window_memory_type *window_memory_type;
};
/**
@@ -263,6 +309,25 @@ struct dmub_srv_hw_params {
bool dpia_hpd_int_enable_supported;
bool disable_clock_gate;
bool disallow_dispclk_dppclk_ds;
+ bool ips_sequential_ono;
+ enum dmub_memory_access_type mem_access_type;
+ enum dmub_ips_disable_type disable_ips;
+ bool disallow_phy_access;
+ bool disable_sldo_opt;
+ bool enable_non_transparent_setconfig;
+ bool lower_hbr3_phy_ssc;
+ bool override_hbr3_pll_vco;
+};
+
+/**
+ * struct dmub_srv_debug - Debug info for dmub_srv
+ * @timeout_occured: Indicates a timeout occured on any message from driver to dmub
+ * @timeout_cmd: first cmd sent from driver that timed out - subsequent timeouts are not stored
+ */
+struct dmub_timeout_info {
+ bool timeout_occured;
+ union dmub_rb_cmd timeout_cmd;
+ unsigned long long timestamp;
};
/**
@@ -272,7 +337,7 @@ struct dmub_srv_hw_params {
struct dmub_diagnostic_data {
uint32_t dmcub_version;
uint32_t scratch[17];
- uint32_t pc;
+ uint32_t pc[DMUB_PC_SNAPSHOT_COUNT];
uint32_t undefined_address_fault_addr;
uint32_t inst_fetch_fault_addr;
uint32_t data_write_fault_addr;
@@ -282,13 +347,33 @@ struct dmub_diagnostic_data {
uint32_t inbox0_rptr;
uint32_t inbox0_wptr;
uint32_t inbox0_size;
+ uint32_t outbox1_rptr;
+ uint32_t outbox1_wptr;
+ uint32_t outbox1_size;
uint32_t gpint_datain0;
+ struct dmub_timeout_info timeout_info;
uint8_t is_dmcub_enabled : 1;
uint8_t is_dmcub_soft_reset : 1;
uint8_t is_dmcub_secure_reset : 1;
uint8_t is_traceport_en : 1;
uint8_t is_cw0_enabled : 1;
uint8_t is_cw6_enabled : 1;
+ uint8_t is_pwait : 1;
+};
+
+struct dmub_srv_inbox {
+ /* generic status */
+ uint64_t num_submitted;
+ uint64_t num_reported;
+ union {
+ /* frame buffer mailbox status */
+ struct dmub_rb rb;
+ /* register mailbox status */
+ struct {
+ bool is_pending;
+ bool is_multi_pending;
+ };
+ };
};
/**
@@ -336,7 +421,8 @@ struct dmub_srv_hw_funcs {
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6);
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6);
void (*setup_mailbox)(struct dmub_srv *dmub,
const struct dmub_region *inbox1);
@@ -363,6 +449,8 @@ struct dmub_srv_hw_funcs {
uint32_t (*emul_get_inbox1_rptr)(struct dmub_srv *dmub);
+ uint32_t (*emul_get_inbox1_wptr)(struct dmub_srv *dmub);
+
void (*emul_set_inbox1_wptr)(struct dmub_srv *dmub, uint32_t wptr_offset);
bool (*is_supported)(struct dmub_srv *dmub);
@@ -370,6 +458,7 @@ struct dmub_srv_hw_funcs {
bool (*is_psrsu_supported)(struct dmub_srv *dmub);
bool (*is_hw_init)(struct dmub_srv *dmub);
+ bool (*is_hw_powered_up)(struct dmub_srv *dmub);
void (*enable_dmub_boot_options)(struct dmub_srv *dmub,
const struct dmub_srv_hw_params *params);
@@ -396,9 +485,28 @@ struct dmub_srv_hw_funcs {
void (*send_inbox0_cmd)(struct dmub_srv *dmub, union dmub_inbox0_data_register data);
uint32_t (*get_current_time)(struct dmub_srv *dmub);
- void (*get_diagnostic_data)(struct dmub_srv *dmub, struct dmub_diagnostic_data *dmub_oca);
+ void (*get_diagnostic_data)(struct dmub_srv *dmub);
bool (*should_detect)(struct dmub_srv *dmub);
+ void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx);
+
+ void (*subvp_save_surf_addr)(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
+
+ void (*send_reg_inbox0_cmd_msg)(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd);
+ uint32_t (*read_reg_inbox0_rsp_int_status)(struct dmub_srv *dmub);
+ void (*read_reg_inbox0_cmd_rsp)(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd);
+ void (*write_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub);
+ void (*clear_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub);
+ void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable);
+
+ uint32_t (*read_reg_outbox0_rdy_int_status)(struct dmub_srv *dmub);
+ void (*write_reg_outbox0_rdy_int_ack)(struct dmub_srv *dmub);
+ void (*read_reg_outbox0_msg)(struct dmub_srv *dmub, uint32_t *msg);
+ void (*write_reg_outbox0_rsp)(struct dmub_srv *dmub, uint32_t *rsp);
+ uint32_t (*read_reg_outbox0_rsp_int_status)(struct dmub_srv *dmub);
+ void (*enable_reg_outbox0_rdy_int)(struct dmub_srv *dmub, bool enable);
};
/**
@@ -417,6 +525,7 @@ struct dmub_srv_create_params {
enum dmub_asic asic;
uint32_t fw_version;
bool is_virtual;
+ enum dmub_inbox_cmd_interface_type inbox_type;
};
/**
@@ -425,6 +534,7 @@ struct dmub_srv_create_params {
* @user_ctx: user provided context for the dmub_srv
* @fw_version: the current firmware version, if any
* @is_virtual: false if hardware support only
+ * @shared_state: dmub shared state between firmware and driver
* @fw_state: dmub firmware state pointer
*/
struct dmub_srv {
@@ -433,17 +543,21 @@ struct dmub_srv {
uint32_t fw_version;
bool is_virtual;
struct dmub_fb scratch_mem_fb;
+ struct dmub_fb ib_mem_gart;
+ volatile struct dmub_shared_state_feature_block *shared_state;
volatile const struct dmub_fw_state *fw_state;
/* private: internal use only */
const struct dmub_srv_common_regs *regs;
const struct dmub_srv_dcn31_regs *regs_dcn31;
- const struct dmub_srv_dcn32_regs *regs_dcn32;
-
+ struct dmub_srv_dcn32_regs *regs_dcn32;
+ struct dmub_srv_dcn35_regs *regs_dcn35;
+ const struct dmub_srv_dcn401_regs *regs_dcn401;
struct dmub_srv_base_funcs funcs;
struct dmub_srv_hw_funcs hw_funcs;
- struct dmub_rb inbox1_rb;
+ struct dmub_srv_inbox inbox1;
uint32_t inbox1_last_wptr;
+ struct dmub_srv_inbox reg_inbox0;
/**
* outbox1_rb is accessed without locks (dal & dc)
* and to be used only in dmub_srv_stat_get_notification()
@@ -454,14 +568,21 @@ struct dmub_srv {
bool sw_init;
bool hw_init;
+ bool dpia_supported;
uint64_t fb_base;
uint64_t fb_offset;
uint32_t psp_version;
/* Feature capabilities reported by fw */
+ struct dmub_fw_meta_info meta_info;
struct dmub_feature_caps feature_caps;
struct dmub_visual_confirm_color visual_confirm_color;
+ enum dmub_inbox_cmd_interface_type inbox_type;
+
+ enum dmub_srv_power_state_type power_state;
+ struct dmub_diagnostic_data debug;
+ struct dmub_fb lsdma_rb_fb;
};
/**
@@ -478,15 +599,15 @@ struct dmub_notification {
enum dmub_notification_type type;
uint8_t link_index;
uint8_t result;
+ /* notify instance from DMUB */
+ uint8_t instance;
bool pending_notification;
union {
struct aux_reply_data aux_reply;
enum dp_hpd_status hpd_status;
enum set_config_status sc_status;
- /**
- * DPIA notification command.
- */
- struct dmub_rb_cmd_dpia_notification dpia_notification;
+ struct dmub_rb_cmd_hpd_sense_notify_data hpd_sense_notify;
+ struct dmub_cmd_fused_request fused_request;
};
};
@@ -546,8 +667,8 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
* DMUB_STATUS_OK - success
* DMUB_STATUS_INVALID - unspecified error
*/
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
- const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+ const struct dmub_srv_memory_params *params,
struct dmub_srv_fb_info *out);
/**
@@ -607,44 +728,50 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub);
/**
- * dmub_srv_sync_inbox1() - sync sw state with hw state
+ * dmub_srv_fb_cmd_queue() - queues a command to the DMUB
* @dmub: the dmub service
+ * @cmd: the command to queue
*
- * Sync sw state with hw state when resume from S0i3
+ * Queues a command to the DMUB service but does not begin execution
+ * immediately.
*
* Return:
* DMUB_STATUS_OK - success
+ * DMUB_STATUS_QUEUE_FULL - no remaining room in queue
* DMUB_STATUS_INVALID - unspecified error
*/
-enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub);
+enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub,
+ const union dmub_rb_cmd *cmd);
/**
- * dmub_srv_cmd_queue() - queues a command to the DMUB
+ * dmub_srv_fb_cmd_execute() - Executes a queued sequence to the dmub
* @dmub: the dmub service
- * @cmd: the command to queue
*
- * Queues a command to the DMUB service but does not begin execution
- * immediately.
+ * Begins execution of queued commands on the dmub.
*
* Return:
* DMUB_STATUS_OK - success
- * DMUB_STATUS_QUEUE_FULL - no remaining room in queue
* DMUB_STATUS_INVALID - unspecified error
*/
-enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
- const union dmub_rb_cmd *cmd);
+enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub);
/**
- * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub
+ * dmub_srv_wait_for_hw_pwr_up() - Waits for firmware hardware power up is completed
* @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
*
- * Begins execution of queued commands on the dmub.
+ * Waits until firmware hardware is powered up. The maximum
+ * wait time is given in microseconds to prevent spinning forever.
*
* Return:
* DMUB_STATUS_OK - success
+ * DMUB_STATUS_TIMEOUT - timed out
* DMUB_STATUS_INVALID - unspecified error
*/
-enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub);
+enum dmub_status dmub_srv_wait_for_hw_pwr_up(struct dmub_srv *dmub,
+ uint32_t timeout_us);
+
+bool dmub_srv_is_hw_pwr_up(struct dmub_srv *dmub);
/**
* dmub_srv_wait_for_auto_load() - Waits for firmware auto load to complete
@@ -685,6 +812,23 @@ enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub,
uint32_t timeout_us);
/**
+ * dmub_srv_wait_for_pending() - Re-entrant wait for messages currently pending
+ * @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
+ *
+ * Waits until the commands queued prior to this call are complete.
+ * If interfaces remain busy due to additional work being submitted
+ * concurrently, this function will not continue to wait.
+ *
+ * Return:
+ * DMUB_STATUS_OK - success
+ * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out
+ * DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub,
+ uint32_t timeout_us);
+
+/**
* dmub_srv_wait_for_idle() - Waits for the DMUB to be idle
* @dmub: the dmub service
* @timeout_us: the maximum number of microseconds to wait
@@ -782,15 +926,12 @@ enum dmub_status dmub_srv_get_fw_boot_status(struct dmub_srv *dmub,
enum dmub_status dmub_srv_get_fw_boot_option(struct dmub_srv *dmub,
union dmub_fw_boot_options *option);
-enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
- union dmub_rb_cmd *cmd);
-
enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub,
bool skip);
bool dmub_srv_get_outbox0_msg(struct dmub_srv *dmub, struct dmcub_trace_buf_entry *entry);
-bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data);
+bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub);
bool dmub_srv_should_detect(struct dmub_srv *dmub);
@@ -833,8 +974,98 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti
*/
enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub);
-#if defined(__cplusplus)
-}
-#endif
+/**
+ * dmub_srv_subvp_save_surf_addr() - Save primary and meta address for subvp on each flip
+ * @dmub: The dmub service
+ * @addr: The surface address to be programmed on the current flip
+ * @subvp_index: Index of subvp pipe, indicates which subvp pipe the address should be saved for
+ *
+ * Function to save the surface flip addr into scratch registers. This is to fix a race condition
+ * between FW and driver reading / writing to the surface address at the same time. This is
+ * required because there is no EARLIEST_IN_USE_META.
+ *
+ * Return:
+ * void
+ */
+void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
+
+/**
+ * dmub_srv_set_power_state() - Track DC power state in dmub_srv
+ * @dmub: The dmub service
+ * @power_state: DC power state setting
+ *
+ * Store DC power state in dmub_srv. If dmub_srv is in D3, then don't send messages to DMUB
+ *
+ * Return:
+ * void
+ */
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state);
+
+/**
+ * dmub_srv_reg_cmd_execute() - Executes provided command to the dmub
+ * @dmub: the dmub service
+ * @cmd: the command packet to be executed
+ *
+ * Executes a single command for the dmub.
+ *
+ * Return:
+ * DMUB_STATUS_OK - success
+ * DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd);
+
+
+/**
+ * dmub_srv_cmd_get_response() - Copies return data for command into buffer
+ * @dmub: the dmub service
+ * @cmd_rsp: response buffer
+ *
+ * Copies return data for command into buffer
+ */
+void dmub_srv_cmd_get_response(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd_rsp);
+
+/**
+ * dmub_srv_sync_inboxes() - Sync inbox state
+ * @dmub: the dmub service
+ *
+ * Sync inbox state
+ *
+ * Return:
+ * DMUB_STATUS_OK - success
+ * DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub);
+
+/**
+ * dmub_srv_wait_for_inbox_free() - Waits for space in the DMUB inbox to free up
+ * @dmub: the dmub service
+ * @timeout_us: the maximum number of microseconds to wait
+ * @num_free_required: number of free entries required
+ *
+ * Waits until the DMUB buffer is freed to the specified number.
+ * The maximum wait time is given in microseconds to prevent spinning
+ * forever.
+ *
+ * Return:
+ * DMUB_STATUS_OK - success
+ * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out
+ * DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_wait_for_inbox_free(struct dmub_srv *dmub,
+ uint32_t timeout_us,
+ uint32_t num_free_required);
+
+/**
+ * dmub_srv_update_inbox_status() - Updates pending status for inbox & reg inbox0
+ * @dmub: the dmub service
+ *
+ * Return:
+ * DMUB_STATUS_OK - success
+ * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out
+ * DMUB_STATUS_HW_FAILURE - issue with HW programming
+ * DMUB_STATUS_INVALID - unspecified error
+ */
+enum dmub_status dmub_srv_update_inbox_status(struct dmub_srv *dmub);
#endif /* _DMUB_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 7afa78b918b5..92248224b713 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -26,15 +26,6 @@
#ifndef DMUB_CMD_H
#define DMUB_CMD_H
-#if defined(_TEST_HARNESS) || defined(FPGA_USB4)
-#include "dmub_fw_types.h"
-#include "include_legacy/atomfirmware.h"
-
-#if defined(_TEST_HARNESS)
-#include <string.h>
-#endif
-#else
-
#include <asm/byteorder.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -42,11 +33,12 @@
#include "atomfirmware.h"
-#endif // defined(_TEST_HARNESS) || defined(FPGA_USB4)
-
//<DMUB_TYPES>==================================================================
/* Basic type definitions. */
+#ifdef __forceinline
+#undef __forceinline
+#endif
#define __forceinline inline
/**
@@ -92,6 +84,16 @@
*/
#define NUM_BL_CURVE_SEGS 16
+/**
+ * Maximum number of segments in ABM ACE curve.
+ */
+#define ABM_MAX_NUM_OF_ACE_SEGMENTS 64
+
+/**
+ * Maximum number of bins in ABM histogram.
+ */
+#define ABM_MAX_NUM_OF_HG_BINS 64
+
/* Maximum number of SubVP streams */
#define DMUB_MAX_SUBVP_STREAMS 2
@@ -102,14 +104,25 @@
*/
#define DMUB_MAX_FPO_STREAMS 4
+/* Define to ensure that the "common" members always appear in the same
+ * order in different structs for back compat purposes
+ */
+#define COMMON_STREAM_STATIC_SUB_STATE \
+ struct dmub_fams2_cmd_legacy_stream_static_state legacy; \
+ struct dmub_fams2_cmd_subvp_stream_static_state subvp; \
+ struct dmub_fams2_cmd_drr_stream_static_state drr;
+
/* Maximum number of streams on any ASIC. */
#define DMUB_MAX_STREAMS 6
/* Maximum number of planes on any ASIC. */
#define DMUB_MAX_PLANES 6
+/* Maximum number of phantom planes on any ASIC */
+#define DMUB_MAX_PHANTOM_PLANES ((DMUB_MAX_PLANES) / 2)
+
/* Trace buffer offset for entry */
-#define TRACE_BUFFER_ENTRY_OFFSET 16
+#define TRACE_BUFFER_ENTRY_OFFSET 16
/**
* Maximum number of dirty rects supported by FW.
@@ -158,8 +171,11 @@
#define dmub_memset(dest, val, bytes) memset((dest), (val), (bytes))
#endif
-#if defined(__cplusplus)
-extern "C" {
+/**
+ * OS/FW agnostic memcmp
+ */
+#ifndef dmub_memcmp
+#define dmub_memcmp(lhs, rhs, bytes) memcmp((lhs), (rhs), (bytes))
#endif
/**
@@ -172,6 +188,11 @@ extern "C" {
#pragma pack(push, 1)
#define ABM_NUM_OF_ACE_SEGMENTS 5
+/**
+ * Debug FW state offset
+ */
+#define DMUB_DEBUG_FW_STATE_OFFSET 0x300
+
union abm_flags {
struct {
/**
@@ -185,8 +206,7 @@ union abm_flags {
unsigned int disable_abm_requested : 1;
/**
- * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled
- * immediately.
+ * @disable_abm_immediately: Indicates if driver has requested ABM to be disabled immediately.
*/
unsigned int disable_abm_immediately : 1;
@@ -206,6 +226,16 @@ union abm_flags {
* of user backlight level.
*/
unsigned int abm_gradual_bl_change : 1;
+
+ /**
+ * @abm_new_frame: Indicates if a new frame update needed for ABM to ramp up into steady
+ */
+ unsigned int abm_new_frame : 1;
+
+ /**
+ * @vb_scaling_enabled: Indicates variBright Scaling Enable
+ */
+ unsigned int vb_scaling_enabled : 1;
} bitfields;
unsigned int u32All;
@@ -269,6 +299,31 @@ union dmub_addr {
} u; /*<< Low/high bit access */
uint64_t quad_part; /*<< 64 bit address */
};
+
+/* Flattened structure containing SOC BB parameters stored in the VBIOS
+ * It is not practical to store the entire bounding box in VBIOS since the bounding box struct can gain new parameters.
+ * This also prevents alighment issues when new parameters are added to the SoC BB.
+ * The following parameters should be added since these values can't be obtained elsewhere:
+ * -dml2_soc_power_management_parameters
+ * -dml2_soc_vmin_clock_limits
+ */
+struct dmub_soc_bb_params {
+ uint32_t dram_clk_change_blackout_ns;
+ uint32_t dram_clk_change_read_only_ns;
+ uint32_t dram_clk_change_write_only_ns;
+ uint32_t fclk_change_blackout_ns;
+ uint32_t g7_ppt_blackout_ns;
+ uint32_t stutter_enter_plus_exit_latency_ns;
+ uint32_t stutter_exit_latency_ns;
+ uint32_t z8_stutter_enter_plus_exit_latency_ns;
+ uint32_t z8_stutter_exit_latency_ns;
+ uint32_t z8_min_idle_time_ns;
+ uint32_t type_b_dram_clk_change_blackout_ns;
+ uint32_t type_b_ppt_blackout_ns;
+ uint32_t vmin_limit_dispclk_khz;
+ uint32_t vmin_limit_dcfclk_khz;
+ uint32_t g7_temperature_read_blackout_ns;
+};
#pragma pack(pop)
/**
@@ -329,6 +384,10 @@ union dmub_psr_debug_flags {
*/
uint32_t back_to_back_flip : 1;
+ /**
+ * Enable visual confirm for IPS
+ */
+ uint32_t enable_ips_visual_confirm : 1;
} bitfields;
/**
@@ -343,53 +402,140 @@ union dmub_psr_debug_flags {
union replay_debug_flags {
struct {
/**
+ * 0x1 (bit 0)
* Enable visual confirm in FW.
*/
uint32_t visual_confirm : 1;
/**
+ * 0x2 (bit 1)
* @skip_crc: Set if need to skip CRC.
*/
uint32_t skip_crc : 1;
/**
+ * 0x4 (bit 2)
* @force_link_power_on: Force disable ALPM control
*/
uint32_t force_link_power_on : 1;
/**
+ * 0x8 (bit 3)
* @force_phy_power_on: Force phy power on
*/
uint32_t force_phy_power_on : 1;
/**
+ * 0x10 (bit 4)
* @timing_resync_disabled: Disabled Replay normal sleep mode timing resync
*/
uint32_t timing_resync_disabled : 1;
/**
+ * 0x20 (bit 5)
* @skip_crtc_disabled: CRTC disable skipped
*/
uint32_t skip_crtc_disabled : 1;
/**
+ * 0x40 (bit 6)
* @force_defer_one_frame_update: Force defer one frame update in ultra sleep mode
*/
uint32_t force_defer_one_frame_update : 1;
+
/**
+ * 0x80 (bit 7)
* @disable_delay_alpm_on: Force disable delay alpm on
*/
uint32_t disable_delay_alpm_on : 1;
+
/**
+ * 0x100 (bit 8)
* @disable_desync_error_check: Force disable desync error check
*/
uint32_t disable_desync_error_check : 1;
+
/**
- * @disable_desync_error_check: Force disable desync error check
+ * 0x200 (bit 9)
+ * @force_self_update_when_abm_non_steady: Force self update if abm is not steady
+ */
+ uint32_t force_self_update_when_abm_non_steady : 1;
+
+ /**
+ * 0x400 (bit 10)
+ * @enable_ips_visual_confirm: Enable IPS visual confirm when entering IPS
+ * If we enter IPS2, the Visual confirm bar will change to yellow
+ */
+ uint32_t enable_ips_visual_confirm : 1;
+
+ /**
+ * 0x800 (bit 11)
+ * @enable_ips_residency_profiling: Enable IPS residency profiling
*/
- uint32_t disable_dmub_save_restore : 1;
+ uint32_t enable_ips_residency_profiling : 1;
- uint32_t reserved : 22;
+ /**
+ * 0x1000 (bit 12)
+ * @enable_coasting_vtotal_check: Enable Coasting_vtotal_check
+ */
+ uint32_t enable_coasting_vtotal_check : 1;
+ /**
+ * 0x2000 (bit 13)
+ * @enable_visual_confirm_debug: Enable Visual Confirm Debug
+ */
+ uint32_t enable_visual_confirm_debug : 1;
+
+ uint32_t reserved : 18;
+ } bitfields;
+
+ uint32_t u32All;
+};
+
+/**
+ * Flags record error state.
+ */
+union replay_visual_confirm_error_state_flags {
+ struct {
+ /**
+ * 0x1 (bit 0) - Desync Error flag.
+ */
+ uint32_t desync_error : 1;
+
+ /**
+ * 0x2 (bit 1) - State Transition Error flag.
+ */
+ uint32_t state_transition_error : 1;
+
+ /**
+ * 0x4 (bit 2) - Crc Error flag
+ */
+ uint32_t crc_error : 1;
+
+ /**
+ * 0x8 (bit 3) - Reserved
+ */
+ uint32_t reserved_3 : 1;
+
+ /**
+ * 0x10 (bit 4) - Incorrect Coasting vtotal checking --> use debug flag to control DPCD write.
+ * Added new debug flag to control DPCD.
+ */
+ uint32_t incorrect_vtotal_in_static_screen : 1;
+
+ /**
+ * 0x20 (bit 5) - No doubled Refresh Rate.
+ */
+ uint32_t no_double_rr : 1;
+
+ /**
+ * Reserved bit 6-7
+ */
+ uint32_t reserved_6_7 : 2;
+
+ /**
+ * Reserved bit 9-31
+ */
+ uint32_t reserved_9_31 : 24;
} bitfields;
uint32_t u32All;
@@ -421,11 +567,6 @@ union replay_hw_flags {
uint32_t smu_optimizations_en : 1;
/**
- * @otg_powered_down: Flag to keep track of OTG power state.
- */
- uint32_t otg_powered_down : 1;
-
- /**
* @phy_power_state: Indicates current phy power state
*/
uint32_t phy_power_state : 1;
@@ -438,23 +579,44 @@ union replay_hw_flags {
* Use TPS3 signal when restore main link.
*/
uint32_t force_wakeup_by_tps3 : 1;
+ /**
+ * @is_alpm_initialized: Indicates whether ALPM is initialized
+ */
+ uint32_t is_alpm_initialized : 1;
+
+ /**
+ * @alpm_mode: Indicates ALPM mode selected
+ */
+ uint32_t alpm_mode : 2;
} bitfields;
uint32_t u32All;
};
+union fw_assisted_mclk_switch_version {
+ struct {
+ uint8_t minor : 5;
+ uint8_t major : 3;
+ };
+ uint8_t ver;
+};
+
/**
- * DMUB visual confirm color
+ * DMUB feature capabilities.
+ * After DMUB init, driver will query FW capabilities prior to enabling certain features.
*/
struct dmub_feature_caps {
/**
* Max PSR version supported by FW.
*/
uint8_t psr;
- uint8_t fw_assisted_mclk_switch;
+ uint8_t fw_assisted_mclk_switch_ver;
uint8_t reserved[4];
uint8_t subvp_psr_support;
uint8_t gecc_enable;
+ uint8_t replay_supported;
+ uint8_t replay_reserved[3];
+ uint8_t abm_aux_backlight_support;
};
struct dmub_visual_confirm_color {
@@ -467,10 +629,6 @@ struct dmub_visual_confirm_color {
uint16_t panel_inst;
};
-#if defined(__cplusplus)
-}
-#endif
-
//==============================================================================
//</DMUB_TYPES>=================================================================
//==============================================================================
@@ -485,6 +643,17 @@ struct dmub_visual_confirm_color {
#define DMUB_FW_META_OFFSET 0x24
/**
+ * union dmub_fw_meta_feature_bits - Static feature bits for pre-initialization
+ */
+union dmub_fw_meta_feature_bits {
+ struct {
+ uint32_t shared_state_link_detection : 1; /**< 1 supports link detection via shared state */
+ uint32_t reserved : 31;
+ } bits; /**< status bits */
+ uint32_t all; /**< 32-bit access to status bits */
+};
+
+/**
* struct dmub_fw_meta_info - metadata associated with fw binary
*
* NOTE: This should be considered a stable API. Fields should
@@ -496,6 +665,8 @@ struct dmub_visual_confirm_color {
* @trace_buffer_size: size of the tracebuffer region
* @fw_version: the firmware version information
* @dal_fw: 1 if the firmware is DAL
+ * @shared_state_size: size of the shared state region in bytes
+ * @shared_state_features: number of shared state features
*/
struct dmub_fw_meta_info {
uint32_t magic_value; /**< magic value identifying DMUB firmware meta info */
@@ -504,6 +675,10 @@ struct dmub_fw_meta_info {
uint32_t fw_version; /**< the firmware version information */
uint8_t dal_fw; /**< 1 if the firmware is DAL */
uint8_t reserved[3]; /**< padding bits */
+ uint32_t shared_state_size; /**< size of the shared state region in bytes */
+ uint16_t shared_state_features; /**< number of shared state features */
+ uint16_t reserved2; /**< padding bytes */
+ union dmub_fw_meta_feature_bits feature_bits; /**< static feature bits */
};
/**
@@ -519,6 +694,7 @@ union dmub_fw_meta {
//==============================================================================
//< DMUB Trace Buffer>================================================================
//==============================================================================
+#if !defined(TENSILICA) && !defined(DMUB_TRACE_ENTRY_DEFINED)
/**
* dmub_trace_code_t - firmware trace code, 32-bits
*/
@@ -533,6 +709,7 @@ struct dmcub_trace_buf_entry {
uint32_t param0; /**< trace defined parameter 0 */
uint32_t param1; /**< trace defined parameter 1 */
};
+#endif
//==============================================================================
//< DMUB_STATUS>================================================================
@@ -560,6 +737,7 @@ union dmub_fw_boot_status {
uint32_t fams_enabled : 1; /**< 1 if VBIOS data is deferred programmed */
uint32_t detection_required: 1; /**< if detection need to be triggered by driver */
uint32_t hw_power_init_done: 1; /**< 1 if hw power init is completed */
+ uint32_t ono_regions_enabled: 1; /**< 1 if ONO regions are enabled */
} bits; /**< status bits */
uint32_t all; /**< 32-bit access to status bits */
};
@@ -576,6 +754,7 @@ enum dmub_fw_boot_status_bit {
DMUB_FW_BOOT_STATUS_BIT_FAMS_ENABLED = (1 << 5), /**< 1 if FAMS is enabled*/
DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED = (1 << 6), /**< 1 if detection need to be triggered by driver*/
DMUB_FW_BOOT_STATUS_BIT_HW_POWER_INIT_DONE = (1 << 7), /**< 1 if hw power init is completed */
+ DMUB_FW_BOOT_STATUS_BIT_ONO_REGIONS_ENABLED = (1 << 8), /**< 1 if ONO regions are enabled */
};
/* Register bit definition for SCRATCH5 */
@@ -594,9 +773,44 @@ enum dmub_lvtma_status_bit {
};
enum dmub_ips_disable_type {
- DMUB_IPS_DISABLE_IPS1 = 1,
- DMUB_IPS_DISABLE_IPS2 = 2,
- DMUB_IPS_DISABLE_IPS2_Z10 = 3,
+ DMUB_IPS_ENABLE = 0,
+ DMUB_IPS_DISABLE_ALL = 1,
+ DMUB_IPS_DISABLE_IPS1 = 2,
+ DMUB_IPS_DISABLE_IPS2 = 3,
+ DMUB_IPS_DISABLE_IPS2_Z10 = 4,
+ DMUB_IPS_DISABLE_DYNAMIC = 5,
+ DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF = 6,
+ DMUB_IPS_DISABLE_Z8_RETENTION = 7,
+};
+
+enum dmub_ips_rcg_disable_type {
+ DMUB_IPS_RCG_ENABLE = 0,
+ DMUB_IPS0_RCG_DISABLE = 1,
+ DMUB_IPS1_RCG_DISABLE = 2,
+ DMUB_IPS_RCG_DISABLE = 3
+};
+
+enum dmub_ips_in_vpb_disable_type {
+ DMUB_IPS_VPB_RCG_ONLY = 0, // Legacy behaviour
+ DMUB_IPS_VPB_DISABLE_ALL = 1,
+ DMUB_IPS_VPB_ENABLE_IPS1_AND_RCG = 2,
+ DMUB_IPS_VPB_ENABLE_ALL = 3 // Enable IPS1 Z8, IPS1 and RCG
+};
+
+#define DMUB_IPS1_ALLOW_MASK 0x00000001
+#define DMUB_IPS2_ALLOW_MASK 0x00000002
+#define DMUB_IPS1_COMMIT_MASK 0x00000004
+#define DMUB_IPS2_COMMIT_MASK 0x00000008
+
+enum dmub_ips_comand_type {
+ /**
+ * Start/stop IPS residency measurements for a given IPS mode
+ */
+ DMUB_CMD__IPS_RESIDENCY_CNTL = 0,
+ /**
+ * Query IPS residency information for a given IPS mode
+ */
+ DMUB_CMD__IPS_QUERY_RESIDENCY_INFO = 1,
};
/**
@@ -614,19 +828,23 @@ union dmub_fw_boot_options {
uint32_t enable_dpia: 1; /**< 1 if DPIA should be enabled */
uint32_t invalid_vbios_data: 1; /**< 1 if VBIOS data table is invalid */
uint32_t dpia_supported: 1; /**< 1 if DPIA is supported on this platform */
- uint32_t sel_mux_phy_c_d_phy_f_g: 1; /**< 1 if PHYF/PHYG should be enabled */
+ uint32_t sel_mux_phy_c_d_phy_f_g: 1; /**< 1 if PHYF/PHYG should be enabled on DCN31 */
/**< 1 if all root clock gating is enabled and low power memory is enabled*/
uint32_t power_optimization: 1;
uint32_t diag_env: 1; /* 1 if diagnostic environment */
uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/
uint32_t usb4_cm_version: 1; /**< 1 CM support */
uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */
- uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */
+ uint32_t enable_non_transparent_setconfig: 1; /* 1 if dpia use conventional dp lt flow*/
uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/
uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */
uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
- uint32_t ips_disable: 2; /* options to disable ips support*/
- uint32_t reserved : 10; /**< reserved */
+ uint32_t ips_disable: 3; /* options to disable ips support*/
+ uint32_t ips_sequential_ono: 1; /**< 1 to enable sequential ONO IPS sequence */
+ uint32_t disable_sldo_opt: 1; /**< 1 to disable SLDO optimizations */
+ uint32_t lower_hbr3_phy_ssc: 1; /**< 1 to lower hbr3 phy ssc to 0.125 percent */
+ uint32_t override_hbr3_pll_vco: 1; /**< 1 to override the hbr3 pll vco to 0 */
+ uint32_t reserved : 5; /**< reserved */
} bits; /**< boot bits */
uint32_t all; /**< 32-bit access to bits */
};
@@ -638,6 +856,143 @@ enum dmub_fw_boot_options_bit {
};
//==============================================================================
+//< DMUB_SHARED_STATE>==========================================================
+//==============================================================================
+
+/**
+ * Shared firmware state between driver and firmware for lockless communication
+ * in situations where the inbox/outbox may be unavailable.
+ *
+ * Each structure *must* be at most 256-bytes in size. The layout allocation is
+ * described below:
+ *
+ * [Header (256 Bytes)][Feature 1 (256 Bytes)][Feature 2 (256 Bytes)]...
+ */
+
+/**
+ * enum dmub_shared_state_feature_id - List of shared state features.
+ */
+enum dmub_shared_state_feature_id {
+ DMUB_SHARED_SHARE_FEATURE__INVALID = 0,
+ DMUB_SHARED_SHARE_FEATURE__IPS_FW = 1,
+ DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER = 2,
+ DMUB_SHARED_SHARE_FEATURE__DEBUG_SETUP = 3,
+ DMUB_SHARED_STATE_FEATURE__LAST, /* Total number of features. */
+};
+
+/**
+ * struct dmub_shared_state_ips_fw - Firmware signals for IPS.
+ */
+union dmub_shared_state_ips_fw_signals {
+ struct {
+ uint32_t ips1_commit : 1; /**< 1 if in IPS1 or IPS0 RCG */
+ uint32_t ips2_commit : 1; /**< 1 if in IPS2 */
+ uint32_t in_idle : 1; /**< 1 if DMCUB is in idle */
+ uint32_t detection_required : 1; /**< 1 if detection is required */
+ uint32_t ips1z8_commit: 1; /**< 1 if in IPS1 Z8 Retention */
+ uint32_t reserved_bits : 27; /**< Reversed */
+ } bits;
+ uint32_t all;
+};
+
+/**
+ * struct dmub_shared_state_ips_signals - Firmware signals for IPS.
+ */
+union dmub_shared_state_ips_driver_signals {
+ struct {
+ uint32_t allow_pg : 1; /**< 1 if PG is allowed */
+ uint32_t allow_ips1 : 1; /**< 1 is IPS1 is allowed */
+ uint32_t allow_ips2 : 1; /**< 1 is IPS1 is allowed */
+ uint32_t allow_z10 : 1; /**< 1 if Z10 is allowed */
+ uint32_t allow_idle: 1; /**< 1 if driver is allowing idle */
+ uint32_t allow_ips0_rcg : 1; /**< 1 is IPS0 RCG is allowed */
+ uint32_t allow_ips1_rcg : 1; /**< 1 is IPS1 RCG is allowed */
+ uint32_t allow_ips1z8 : 1; /**< 1 is IPS1 Z8 Retention is allowed */
+ uint32_t allow_dynamic_ips1 : 1; /**< 1 if IPS1 is allowed in dynamic use cases such as VPB */
+ uint32_t allow_dynamic_ips1_z8: 1; /**< 1 if IPS1 z8 ret is allowed in dynamic use cases such as VPB */
+ uint32_t reserved_bits : 22; /**< Reversed bits */
+ } bits;
+ uint32_t all;
+};
+
+/**
+ * IPS FW Version
+ */
+#define DMUB_SHARED_STATE__IPS_FW_VERSION 1
+
+struct dmub_shared_state_debug_setup {
+ union {
+ struct {
+ uint32_t exclude_points[62];
+ } profile_mode;
+ };
+};
+
+/**
+ * struct dmub_shared_state_ips_fw - Firmware state for IPS.
+ */
+struct dmub_shared_state_ips_fw {
+ union dmub_shared_state_ips_fw_signals signals; /**< 4 bytes, IPS signal bits */
+ uint32_t rcg_entry_count; /**< Entry counter for RCG */
+ uint32_t rcg_exit_count; /**< Exit counter for RCG */
+ uint32_t ips1_entry_count; /**< Entry counter for IPS1 */
+ uint32_t ips1_exit_count; /**< Exit counter for IPS1 */
+ uint32_t ips2_entry_count; /**< Entry counter for IPS2 */
+ uint32_t ips2_exit_count; /**< Exit counter for IPS2 */
+ uint32_t ips1_z8ret_entry_count; /**< Entry counter for IPS1 Z8 Retention */
+ uint32_t ips1_z8ret_exit_count; /**< Exit counter for IPS1 Z8 Retention */
+ uint32_t reserved[53]; /**< Reversed, to be updated when adding new fields. */
+}; /* 248-bytes, fixed */
+
+/**
+ * IPS Driver Version
+ */
+#define DMUB_SHARED_STATE__IPS_DRIVER_VERSION 1
+
+/**
+ * struct dmub_shared_state_ips_driver - Driver state for IPS.
+ */
+struct dmub_shared_state_ips_driver {
+ union dmub_shared_state_ips_driver_signals signals; /**< 4 bytes, IPS signal bits */
+ uint32_t reserved[61]; /**< Reversed, to be updated when adding new fields. */
+}; /* 248-bytes, fixed */
+
+/**
+ * enum dmub_shared_state_feature_common - Generic payload.
+ */
+struct dmub_shared_state_feature_common {
+ uint32_t padding[62];
+}; /* 248-bytes, fixed */
+
+/**
+ * enum dmub_shared_state_feature_header - Feature description.
+ */
+struct dmub_shared_state_feature_header {
+ uint16_t id; /**< Feature ID */
+ uint16_t version; /**< Feature version */
+ uint32_t reserved; /**< Reserved bytes. */
+}; /* 8 bytes, fixed */
+
+/**
+ * struct dmub_shared_state_feature_block - Feature block.
+ */
+struct dmub_shared_state_feature_block {
+ struct dmub_shared_state_feature_header header; /**< Shared state header. */
+ union dmub_shared_feature_state_union {
+ struct dmub_shared_state_feature_common common; /**< Generic data */
+ struct dmub_shared_state_ips_fw ips_fw; /**< IPS firmware state */
+ struct dmub_shared_state_ips_driver ips_driver; /**< IPS driver state */
+ struct dmub_shared_state_debug_setup debug_setup; /**< Debug setup */
+ } data; /**< Shared state data. */
+}; /* 256-bytes, fixed */
+
+/**
+ * Shared state size in bytes.
+ */
+#define DMUB_FW_HEADER_SHARED_STATE_SIZE \
+ ((DMUB_SHARED_STATE_FEATURE__LAST + 1) * sizeof(struct dmub_shared_state_feature_block))
+
+//==============================================================================
//</DMUB_STATUS>================================================================
//==============================================================================
//< DMUB_VBIOS>=================================================================
@@ -675,6 +1030,10 @@ enum dmub_cmd_vbios_type {
*/
DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT = 26,
/**
+ * Control PHY FSM
+ */
+ DMUB_CMD__VBIOS_TRANSMITTER_SET_PHY_FSM = 29,
+ /**
* Controls domain power gating
*/
DMUB_CMD__VBIOS_DOMAIN_CONTROL = 28,
@@ -761,6 +1120,11 @@ enum dmub_gpint_command {
DMUB_GPINT__PSR_RESIDENCY = 9,
/**
+ * DESC: Notifies DMCUB detection is done so detection required can be cleared.
+ */
+ DMUB_GPINT__NOTIFY_DETECTION_DONE = 12,
+
+ /**
* DESC: Get REPLAY state from FW.
* RETURN: REPLAY state enum. This enum may need to be converted to the legacy REPLAY state value.
*/
@@ -774,29 +1138,201 @@ enum dmub_gpint_command {
*/
DMUB_GPINT__REPLAY_RESIDENCY = 14,
+ /**
+ * DESC: Copy bounding box to the host.
+ * ARGS: Version of bounding box to copy
+ * RETURN: Result of copying bounding box
+ */
+ DMUB_GPINT__BB_COPY = 96,
/**
- * DESC: Notifies DMCUB detection is done so detection required can be cleared.
+ * DESC: Updates the host addresses bit48~bit63 for bounding box.
+ * ARGS: The word3 for the 64 bit address
*/
- DMUB_GPINT__NOTIFY_DETECTION_DONE = 12,
+ DMUB_GPINT__SET_BB_ADDR_WORD3 = 97,
+
+ /**
+ * DESC: Updates the host addresses bit32~bit47 for bounding box.
+ * ARGS: The word2 for the 64 bit address
+ */
+ DMUB_GPINT__SET_BB_ADDR_WORD2 = 98,
+
+ /**
+ * DESC: Updates the host addresses bit16~bit31 for bounding box.
+ * ARGS: The word1 for the 64 bit address
+ */
+ DMUB_GPINT__SET_BB_ADDR_WORD1 = 99,
+
+ /**
+ * DESC: Updates the host addresses bit0~bit15 for bounding box.
+ * ARGS: The word0 for the 64 bit address
+ */
+ DMUB_GPINT__SET_BB_ADDR_WORD0 = 100,
+
/**
* DESC: Updates the trace buffer lower 32-bit mask.
* ARGS: The new mask
* RETURN: Lower 32-bit mask.
*/
DMUB_GPINT__UPDATE_TRACE_BUFFER_MASK = 101,
+
/**
- * DESC: Updates the trace buffer lower 32-bit mask.
+ * DESC: Updates the trace buffer mask bit0~bit15.
* ARGS: The new mask
* RETURN: Lower 32-bit mask.
*/
DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD0 = 102,
+
/**
- * DESC: Updates the trace buffer mask bi0~bit15.
+ * DESC: Updates the trace buffer mask bit16~bit31.
* ARGS: The new mask
* RETURN: Lower 32-bit mask.
*/
DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD1 = 103,
+
+ /**
+ * DESC: Updates the trace buffer mask bit32~bit47.
+ * ARGS: The new mask
+ * RETURN: Lower 32-bit mask.
+ */
+ DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD2 = 114,
+
+ /**
+ * DESC: Updates the trace buffer mask bit48~bit63.
+ * ARGS: The new mask
+ * RETURN: Lower 32-bit mask.
+ */
+ DMUB_GPINT__SET_TRACE_BUFFER_MASK_WORD3 = 115,
+
+ /**
+ * DESC: Read the trace buffer mask bi0~bit15.
+ */
+ DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD0 = 116,
+
+ /**
+ * DESC: Read the trace buffer mask bit16~bit31.
+ */
+ DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD1 = 117,
+
+ /**
+ * DESC: Read the trace buffer mask bi32~bit47.
+ */
+ DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD2 = 118,
+
+ /**
+ * DESC: Updates the trace buffer mask bit32~bit63.
+ */
+ DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119,
+
+ /**
+ * DESC: Set IPS residency measurement
+ * ARGS: 0 - Disable ips measurement
+ * 1 - Enable ips measurement
+ */
+ DMUB_GPINT__IPS_RESIDENCY = 121,
+
+ /**
+ * DESC: Enable measurements for various task duration
+ * ARGS: 0 - Disable measurement
+ * 1 - Enable measurement
+ */
+ DMUB_GPINT__TRACE_DMUB_WAKE_ACTIVITY = 123,
+
+ /**
+ * DESC: Gets IPS residency in microseconds
+ * ARGS: 0 - Return IPS1 residency
+ * 1 - Return IPS2 residency
+ * 2 - Return IPS1_RCG residency
+ * 3 - Return IPS1_ONO2_ON residency
+ * RETURN: Total residency in microseconds - lower 32 bits
+ */
+ DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_LO = 124,
+
+ /**
+ * DESC: Gets IPS1 histogram counts
+ * ARGS: Bucket index
+ * RETURN: Total count for the bucket
+ */
+ DMUB_GPINT__GET_IPS1_HISTOGRAM_COUNTER = 125,
+
+ /**
+ * DESC: Gets IPS2 histogram counts
+ * ARGS: Bucket index
+ * RETURN: Total count for the bucket
+ */
+ DMUB_GPINT__GET_IPS2_HISTOGRAM_COUNTER = 126,
+
+ /**
+ * DESC: Gets IPS residency
+ * ARGS: 0 - Return IPS1 residency
+ * 1 - Return IPS2 residency
+ * 2 - Return IPS1_RCG residency
+ * 3 - Return IPS1_ONO2_ON residency
+ * RETURN: Total residency in milli-percent.
+ */
+ DMUB_GPINT__GET_IPS_RESIDENCY_PERCENT = 127,
+
+ /**
+ * DESC: Gets IPS1_RCG histogram counts
+ * ARGS: Bucket index
+ * RETURN: Total count for the bucket
+ */
+ DMUB_GPINT__GET_IPS1_RCG_HISTOGRAM_COUNTER = 128,
+
+ /**
+ * DESC: Gets IPS1_ONO2_ON histogram counts
+ * ARGS: Bucket index
+ * RETURN: Total count for the bucket
+ */
+ DMUB_GPINT__GET_IPS1_ONO2_ON_HISTOGRAM_COUNTER = 129,
+
+ /**
+ * DESC: Gets IPS entry counter during residency measurement
+ * ARGS: 0 - Return IPS1 entry counts
+ * 1 - Return IPS2 entry counts
+ * 2 - Return IPS1_RCG entry counts
+ * 3 - Return IPS2_ONO2_ON entry counts
+ * RETURN: Entry counter for selected IPS mode
+ */
+ DMUB_GPINT__GET_IPS_RESIDENCY_ENTRY_COUNTER = 130,
+
+ /**
+ * DESC: Gets IPS inactive residency in microseconds
+ * ARGS: 0 - Return IPS1_MAX residency
+ * 1 - Return IPS2 residency
+ * 2 - Return IPS1_RCG residency
+ * 3 - Return IPS1_ONO2_ON residency
+ * RETURN: Total inactive residency in microseconds - lower 32 bits
+ */
+ DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_LO = 131,
+
+ /**
+ * DESC: Gets IPS inactive residency in microseconds
+ * ARGS: 0 - Return IPS1_MAX residency
+ * 1 - Return IPS2 residency
+ * 2 - Return IPS1_RCG residency
+ * 3 - Return IPS1_ONO2_ON residency
+ * RETURN: Total inactive residency in microseconds - upper 32 bits
+ */
+ DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_HI = 132,
+
+ /**
+ * DESC: Gets IPS residency in microseconds
+ * ARGS: 0 - Return IPS1 residency
+ * 1 - Return IPS2 residency
+ * 2 - Return IPS1_RCG residency
+ * 3 - Return IPS1_ONO2_ON residency
+ * RETURN: Total residency in microseconds - upper 32 bits
+ */
+ DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_HI = 133,
+ /**
+ * DESC: Setup debug configs.
+ */
+ DMUB_GPINT__SETUP_DEBUG_MODE = 136,
+ /**
+ * DESC: Initiates IPS wake sequence.
+ */
+ DMUB_GPINT__IPS_DEBUG_WAKE = 137,
};
/**
@@ -876,6 +1412,16 @@ enum dmub_inbox0_command {
#define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY)
/**
+ * Maximum number of items in the DMUB REG INBOX0 internal ringbuffer.
+ */
+#define DMUB_REG_INBOX0_RB_MAX_ENTRY 16
+
+/**
+ * Ringbuffer size in bytes.
+ */
+#define DMUB_REG_INBOX0_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_REG_INBOX0_RB_MAX_ENTRY)
+
+/**
* REG_SET mask for reg offload.
*/
#define REG_SET_MASK 0xFFFF
@@ -964,8 +1510,9 @@ enum dmub_cmd_type {
* Command type used for all panel control commands.
*/
DMUB_CMD__PANEL_CNTL = 74,
+
/**
- * Command type used for <TODO:description>
+ * Command type used for all CAB commands.
*/
DMUB_CMD__CAB_FOR_SS = 75,
@@ -990,7 +1537,6 @@ enum dmub_cmd_type {
/**
* Command type used for all VBIOS interface commands.
*/
-
/**
* Command type used for all REPLAY commands.
*/
@@ -1006,6 +1552,26 @@ enum dmub_cmd_type {
*/
DMUB_CMD__DPIA_HPD_INT_ENABLE = 86,
+ /**
+ * Command type used for all PSP commands.
+ */
+ DMUB_CMD__PSP = 88,
+
+ /**
+ * Command type used for all Fused IO commands.
+ */
+ DMUB_CMD__FUSED_IO = 89,
+
+ /**
+ * Command type used for all LSDMA commands.
+ */
+ DMUB_CMD__LSDMA = 90,
+
+ /**
+ * Command type use for all IPS commands.
+ */
+ DMUB_CMD__IPS = 91,
+
DMUB_CMD__VBIOS = 128,
};
@@ -1033,13 +1599,23 @@ enum dmub_out_cmd_type {
* Command type used for USB4 DPIA notification
*/
DMUB_OUT_CMD__DPIA_NOTIFICATION = 5,
+ /**
+ * Command type used for HPD redetect notification
+ */
+ DMUB_OUT_CMD__HPD_SENSE_NOTIFY = 6,
+ /**
+ * Command type used for Fused IO notification
+ */
+ DMUB_OUT_CMD__FUSED_IO = 7,
};
/* DMUB_CMD__DPIA command sub-types. */
enum dmub_cmd_dpia_type {
DMUB_CMD__DPIA_DIG1_DPIA_CONTROL = 0,
- DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1,
+ DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1, // will be replaced by DPIA_SET_CONFIG_REQUEST
DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2,
+ DMUB_CMD__DPIA_SET_TPS_NOTIFICATION = 3,
+ DMUB_CMD__DPIA_SET_CONFIG_REQUEST = 4,
};
/* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */
@@ -1057,7 +1633,8 @@ struct dmub_cmd_header {
unsigned int sub_type : 8; /**< command sub type */
unsigned int ret_status : 1; /**< 1 if returned data, 0 otherwise */
unsigned int multi_cmd_pending : 1; /**< 1 if multiple commands chained together */
- unsigned int reserved0 : 6; /**< reserved bits */
+ unsigned int is_reg_based : 1; /**< 1 if register based mailbox cmd, 0 if FB based cmd */
+ unsigned int reserved0 : 5; /**< reserved bits */
unsigned int payload_bytes : 6; /* payload excluding header - up to 60 bytes */
unsigned int reserved1 : 2; /**< reserved bits */
};
@@ -1195,11 +1772,11 @@ struct dmub_cmd_PLAT_54186_wa {
uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; /**< reg value */
uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; /**< reg value */
struct {
- uint8_t hubp_inst : 4; /**< HUBP instance */
- uint8_t tmz_surface : 1; /**< TMZ enable or disable */
- uint8_t immediate :1; /**< Immediate flip */
- uint8_t vmid : 4; /**< VMID */
- uint8_t grph_stereo : 1; /**< 1 if stereo */
+ uint32_t hubp_inst : 4; /**< HUBP instance */
+ uint32_t tmz_surface : 1; /**< TMZ enable or disable */
+ uint32_t immediate :1; /**< Immediate flip */
+ uint32_t vmid : 4; /**< VMID */
+ uint32_t grph_stereo : 1; /**< 1 if stereo */
uint32_t reserved : 21; /**< Reserved */
} flip_params; /**< Pageflip parameters */
uint32_t reserved[9]; /**< Reserved bits */
@@ -1214,6 +1791,28 @@ struct dmub_rb_cmd_PLAT_54186_wa {
};
/**
+ * enum dmub_cmd_mall_type - MALL commands
+ */
+enum dmub_cmd_mall_type {
+ /**
+ * Allows display refresh from MALL.
+ */
+ DMUB_CMD__MALL_ACTION_ALLOW = 0,
+ /**
+ * Disallows display refresh from MALL.
+ */
+ DMUB_CMD__MALL_ACTION_DISALLOW = 1,
+ /**
+ * Cursor copy for MALL.
+ */
+ DMUB_CMD__MALL_ACTION_COPY_CURSOR = 2,
+ /**
+ * Controls DF requests.
+ */
+ DMUB_CMD__MALL_ACTION_NO_DF_REQ = 3,
+};
+
+/**
* struct dmub_rb_cmd_mall - MALL command data.
*/
struct dmub_rb_cmd_mall {
@@ -1248,6 +1847,10 @@ enum dmub_cmd_cab_type {
* Fit surfaces in CAB (i.e. CAB enable)
*/
DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB = 2,
+ /**
+ * Do not fit surfaces in CAB (i.e. no CAB)
+ */
+ DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB = 3,
};
/**
@@ -1350,6 +1953,458 @@ struct dmub_rb_cmd_fw_assisted_mclk_switch_v2 {
struct dmub_cmd_fw_assisted_mclk_switch_config_v2 config_data;
};
+struct dmub_flip_addr_info {
+ uint32_t surf_addr_lo;
+ uint32_t surf_addr_c_lo;
+ uint32_t meta_addr_lo;
+ uint32_t meta_addr_c_lo;
+ uint16_t surf_addr_hi;
+ uint16_t surf_addr_c_hi;
+ uint16_t meta_addr_hi;
+ uint16_t meta_addr_c_hi;
+};
+
+struct dmub_fams2_flip_info {
+ union {
+ struct {
+ uint8_t is_immediate: 1;
+ } bits;
+ uint8_t all;
+ } config;
+ uint8_t otg_inst;
+ uint8_t pipe_mask;
+ uint8_t pad;
+ struct dmub_flip_addr_info addr_info;
+};
+
+struct dmub_rb_cmd_fams2_flip {
+ struct dmub_cmd_header header;
+ struct dmub_fams2_flip_info flip_info;
+};
+
+struct dmub_cmd_lsdma_data {
+ union {
+ struct lsdma_init_data {
+ union dmub_addr gpu_addr_base;
+ uint32_t ring_size;
+ } init_data;
+ struct lsdma_tiled_copy_data {
+ uint32_t src_addr_lo;
+ uint32_t src_addr_hi;
+
+ uint32_t dst_addr_lo;
+ uint32_t dst_addr_hi;
+
+ uint32_t src_x : 16;
+ uint32_t src_y : 16;
+
+ uint32_t dst_x : 16;
+ uint32_t dst_y : 16;
+
+ uint32_t src_width : 16;
+ uint32_t src_height : 16;
+
+ uint32_t dst_width : 16;
+ uint32_t dst_height : 16;
+
+ uint32_t rect_x : 16;
+ uint32_t rect_y : 16;
+
+ uint32_t src_swizzle_mode : 5;
+ uint32_t src_mip_max : 5;
+ uint32_t src_mip_id : 5;
+ uint32_t dst_mip_max : 5;
+ uint32_t dst_swizzle_mode : 5;
+ uint32_t dst_mip_id : 5;
+ uint32_t tmz : 1;
+ uint32_t dcc : 1;
+
+ uint32_t data_format : 6;
+ uint32_t padding1 : 4;
+ uint32_t dst_element_size : 3;
+ uint32_t num_type : 3;
+ uint32_t src_element_size : 3;
+ uint32_t write_compress : 2;
+ uint32_t cache_policy_dst : 2;
+ uint32_t cache_policy_src : 2;
+ uint32_t read_compress : 2;
+ uint32_t src_dim : 2;
+ uint32_t dst_dim : 2;
+ uint32_t max_uncom : 1;
+
+ uint32_t max_com : 2;
+ uint32_t padding : 30;
+ } tiled_copy_data;
+ struct lsdma_linear_copy_data {
+ uint32_t src_lo;
+ uint32_t src_hi;
+
+ uint32_t dst_lo;
+ uint32_t dst_hi;
+
+ uint32_t count : 30;
+ uint32_t cache_policy_dst : 2;
+
+ uint32_t tmz : 1;
+ uint32_t cache_policy_src : 2;
+ uint32_t padding : 29;
+ } linear_copy_data;
+ struct lsdma_linear_sub_window_copy_data {
+ uint32_t src_lo;
+ uint32_t src_hi;
+
+ uint32_t dst_lo;
+ uint32_t dst_hi;
+
+ uint32_t src_x : 16;
+ uint32_t src_y : 16;
+
+ uint32_t dst_x : 16;
+ uint32_t dst_y : 16;
+
+ uint32_t rect_x : 16;
+ uint32_t rect_y : 16;
+
+ uint32_t src_pitch : 16;
+ uint32_t dst_pitch : 16;
+
+ uint32_t src_slice_pitch;
+ uint32_t dst_slice_pitch;
+
+ uint32_t tmz : 1;
+ uint32_t element_size : 3;
+ uint32_t src_cache_policy : 3;
+ uint32_t dst_cache_policy : 3;
+ uint32_t reserved0 : 22;
+ } linear_sub_window_copy_data;
+ struct lsdma_reg_write_data {
+ uint32_t reg_addr;
+ uint32_t reg_data;
+ } reg_write_data;
+ struct lsdma_pio_copy_data {
+ uint32_t src_lo;
+ uint32_t src_hi;
+
+ uint32_t dst_lo;
+ uint32_t dst_hi;
+
+ union {
+ struct {
+ uint32_t byte_count : 26;
+ uint32_t src_loc : 1;
+ uint32_t dst_loc : 1;
+ uint32_t src_addr_inc : 1;
+ uint32_t dst_addr_inc : 1;
+ uint32_t overlap_disable : 1;
+ uint32_t constant_fill : 1;
+ } fields;
+ uint32_t raw;
+ } packet;
+ } pio_copy_data;
+ struct lsdma_pio_constfill_data {
+ uint32_t dst_lo;
+ uint32_t dst_hi;
+
+ union {
+ struct {
+ uint32_t byte_count : 26;
+ uint32_t src_loc : 1;
+ uint32_t dst_loc : 1;
+ uint32_t src_addr_inc : 1;
+ uint32_t dst_addr_inc : 1;
+ uint32_t overlap_disable : 1;
+ uint32_t constant_fill : 1;
+ } fields;
+ uint32_t raw;
+ } packet;
+
+ uint32_t data;
+ } pio_constfill_data;
+
+ uint32_t all[14];
+ } u;
+};
+
+struct dmub_rb_cmd_lsdma {
+ struct dmub_cmd_header header;
+ struct dmub_cmd_lsdma_data lsdma_data;
+};
+
+struct dmub_optc_state_v2 {
+ uint32_t v_total_min;
+ uint32_t v_total_max;
+ uint32_t v_total_mid;
+ uint32_t v_total_mid_frame_num;
+ uint8_t program_manual_trigger;
+ uint8_t tg_inst;
+ uint8_t pad[2];
+};
+
+struct dmub_optc_position {
+ uint32_t vpos;
+ uint32_t hpos;
+ uint32_t frame;
+};
+
+struct dmub_rb_cmd_fams2_drr_update {
+ struct dmub_cmd_header header;
+ struct dmub_optc_state_v2 dmub_optc_state_req;
+};
+
+/* HW and FW global configuration data for FAMS2 */
+/* FAMS2 types and structs */
+enum fams2_stream_type {
+ FAMS2_STREAM_TYPE_NONE = 0,
+ FAMS2_STREAM_TYPE_VBLANK = 1,
+ FAMS2_STREAM_TYPE_VACTIVE = 2,
+ FAMS2_STREAM_TYPE_DRR = 3,
+ FAMS2_STREAM_TYPE_SUBVP = 4,
+};
+
+struct dmub_rect16 {
+ /**
+ * Dirty rect x offset.
+ */
+ uint16_t x;
+
+ /**
+ * Dirty rect y offset.
+ */
+ uint16_t y;
+
+ /**
+ * Dirty rect width.
+ */
+ uint16_t width;
+
+ /**
+ * Dirty rect height.
+ */
+ uint16_t height;
+};
+
+/* static stream state */
+struct dmub_fams2_legacy_stream_static_state {
+ uint8_t vactive_det_fill_delay_otg_vlines;
+ uint8_t programming_delay_otg_vlines;
+}; //v0
+
+struct dmub_fams2_subvp_stream_static_state {
+ uint16_t vratio_numerator;
+ uint16_t vratio_denominator;
+ uint16_t phantom_vtotal;
+ uint16_t phantom_vactive;
+ union {
+ struct {
+ uint8_t is_multi_planar : 1;
+ uint8_t is_yuv420 : 1;
+ } bits;
+ uint8_t all;
+ } config;
+ uint8_t programming_delay_otg_vlines;
+ uint8_t prefetch_to_mall_otg_vlines;
+ uint8_t phantom_otg_inst;
+ uint8_t phantom_pipe_mask;
+ uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough)
+}; //v0
+
+struct dmub_fams2_drr_stream_static_state {
+ uint16_t nom_stretched_vtotal;
+ uint8_t programming_delay_otg_vlines;
+ uint8_t only_stretch_if_required;
+ uint8_t pad[2];
+}; //v0
+
+struct dmub_fams2_cmd_legacy_stream_static_state {
+ uint16_t vactive_det_fill_delay_otg_vlines;
+ uint16_t programming_delay_otg_vlines;
+}; //v1
+
+struct dmub_fams2_cmd_subvp_stream_static_state {
+ uint16_t vratio_numerator;
+ uint16_t vratio_denominator;
+ uint16_t phantom_vtotal;
+ uint16_t phantom_vactive;
+ uint16_t programming_delay_otg_vlines;
+ uint16_t prefetch_to_mall_otg_vlines;
+ union {
+ struct {
+ uint8_t is_multi_planar : 1;
+ uint8_t is_yuv420 : 1;
+ } bits;
+ uint8_t all;
+ } config;
+ uint8_t phantom_otg_inst;
+ uint8_t phantom_pipe_mask;
+ uint8_t pad0;
+ uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough)
+ uint8_t pad1[4 - (DMUB_MAX_PHANTOM_PLANES % 4)];
+}; //v1
+
+struct dmub_fams2_cmd_drr_stream_static_state {
+ uint16_t nom_stretched_vtotal;
+ uint16_t programming_delay_otg_vlines;
+ uint8_t only_stretch_if_required;
+ uint8_t pad[3];
+}; //v1
+
+union dmub_fams2_stream_static_sub_state {
+ struct dmub_fams2_legacy_stream_static_state legacy;
+ struct dmub_fams2_subvp_stream_static_state subvp;
+ struct dmub_fams2_drr_stream_static_state drr;
+}; //v0
+
+union dmub_fams2_cmd_stream_static_sub_state {
+ COMMON_STREAM_STATIC_SUB_STATE
+}; //v1
+
+union dmub_fams2_stream_static_sub_state_v2 {
+ COMMON_STREAM_STATIC_SUB_STATE
+}; //v2
+
+struct dmub_fams2_stream_static_state {
+ enum fams2_stream_type type;
+ uint32_t otg_vline_time_ns;
+ uint32_t otg_vline_time_ticks;
+ uint16_t htotal;
+ uint16_t vtotal; // nominal vtotal
+ uint16_t vblank_start;
+ uint16_t vblank_end;
+ uint16_t max_vtotal;
+ uint16_t allow_start_otg_vline;
+ uint16_t allow_end_otg_vline;
+ uint16_t drr_keepout_otg_vline; // after this vline, vtotal cannot be changed
+ uint8_t scheduling_delay_otg_vlines; // min time to budget for ready to microschedule start
+ uint8_t contention_delay_otg_vlines; // time to budget for contention on execution
+ uint8_t vline_int_ack_delay_otg_vlines; // min time to budget for vertical interrupt firing
+ uint8_t allow_to_target_delay_otg_vlines; // time from allow vline to target vline
+ union {
+ struct {
+ uint8_t is_drr: 1; // stream is DRR enabled
+ uint8_t clamp_vtotal_min: 1; // clamp vtotal to min instead of nominal
+ uint8_t min_ttu_vblank_usable: 1; // if min ttu vblank is above wm, no force pstate is needed in blank
+ } bits;
+ uint8_t all;
+ } config;
+ uint8_t otg_inst;
+ uint8_t pipe_mask; // pipe mask for the whole config
+ uint8_t num_planes;
+ uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough)
+ uint8_t pad[4 - (DMUB_MAX_PLANES % 4)];
+ union dmub_fams2_stream_static_sub_state sub_state;
+}; //v0
+
+struct dmub_fams2_cmd_stream_static_base_state {
+ enum fams2_stream_type type;
+ uint32_t otg_vline_time_ns;
+ uint32_t otg_vline_time_ticks;
+ uint16_t htotal;
+ uint16_t vtotal; // nominal vtotal
+ uint16_t vblank_start;
+ uint16_t vblank_end;
+ uint16_t max_vtotal;
+ uint16_t allow_start_otg_vline;
+ uint16_t allow_end_otg_vline;
+ uint16_t drr_keepout_otg_vline; // after this vline, vtotal cannot be changed
+ uint16_t scheduling_delay_otg_vlines; // min time to budget for ready to microschedule start
+ uint16_t contention_delay_otg_vlines; // time to budget for contention on execution
+ uint16_t vline_int_ack_delay_otg_vlines; // min time to budget for vertical interrupt firing
+ uint16_t allow_to_target_delay_otg_vlines; // time from allow vline to target vline
+ union {
+ struct {
+ uint8_t is_drr : 1; // stream is DRR enabled
+ uint8_t clamp_vtotal_min : 1; // clamp vtotal to min instead of nominal
+ uint8_t min_ttu_vblank_usable : 1; // if min ttu vblank is above wm, no force pstate is needed in blank
+ } bits;
+ uint8_t all;
+ } config;
+ uint8_t otg_inst;
+ uint8_t pipe_mask; // pipe mask for the whole config
+ uint8_t num_planes;
+ uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough)
+ uint8_t pad[4 - (DMUB_MAX_PLANES % 4)];
+}; //v1
+
+struct dmub_fams2_stream_static_state_v1 {
+ struct dmub_fams2_cmd_stream_static_base_state base;
+ union dmub_fams2_stream_static_sub_state_v2 sub_state;
+}; //v1
+
+/**
+ * enum dmub_fams2_allow_delay_check_mode - macroscheduler mode for breaking on excessive
+ * p-state request to allow latency
+ */
+enum dmub_fams2_allow_delay_check_mode {
+ /* No check for request to allow delay */
+ FAMS2_ALLOW_DELAY_CHECK_NONE = 0,
+ /* Check for request to allow delay */
+ FAMS2_ALLOW_DELAY_CHECK_FROM_START = 1,
+ /* Check for prepare to allow delay */
+ FAMS2_ALLOW_DELAY_CHECK_FROM_PREPARE = 2,
+};
+
+union dmub_fams2_global_feature_config {
+ struct {
+ uint32_t enable: 1;
+ uint32_t enable_ppt_check: 1;
+ uint32_t enable_stall_recovery: 1;
+ uint32_t enable_debug: 1;
+ uint32_t enable_offload_flip: 1;
+ uint32_t enable_visual_confirm: 1;
+ uint32_t allow_delay_check_mode: 2;
+ uint32_t reserved: 24;
+ } bits;
+ uint32_t all;
+};
+
+struct dmub_cmd_fams2_global_config {
+ uint32_t max_allow_delay_us; // max delay to assert allow from uclk change begin
+ uint32_t lock_wait_time_us; // time to forecast acquisition of lock
+ uint32_t num_streams;
+ union dmub_fams2_global_feature_config features;
+ uint32_t recovery_timeout_us;
+ uint32_t hwfq_flip_programming_delay_us;
+ uint32_t max_allow_to_target_delta_us; // how early DCN could assert P-State allow compared to the P-State target
+};
+
+union dmub_cmd_fams2_config {
+ struct dmub_cmd_fams2_global_config global;
+ struct dmub_fams2_stream_static_state stream; //v0
+ union {
+ struct dmub_fams2_cmd_stream_static_base_state base;
+ union dmub_fams2_cmd_stream_static_sub_state sub_state;
+ } stream_v1; //v1
+};
+
+struct dmub_fams2_config_v2 {
+ struct dmub_cmd_fams2_global_config global;
+ struct dmub_fams2_stream_static_state_v1 stream_v1[DMUB_MAX_STREAMS]; //v1
+};
+
+/**
+ * DMUB rb command definition for FAMS2 (merged SubVP, FPO, Legacy)
+ */
+struct dmub_rb_cmd_fams2 {
+ struct dmub_cmd_header header;
+ union dmub_cmd_fams2_config config;
+};
+
+/**
+ * Indirect buffer descriptor
+ */
+struct dmub_ib_data {
+ union dmub_addr src; // location of indirect buffer in memory
+ uint16_t size; // indirect buffer size in bytes
+};
+
+/**
+ * DMUB rb command definition for commands passed over indirect buffer
+ */
+struct dmub_rb_cmd_ib {
+ struct dmub_cmd_header header;
+ struct dmub_ib_data ib_data;
+};
+
/**
* enum dmub_cmd_idle_opt_type - Idle optimization command type.
*/
@@ -1367,7 +2422,17 @@ enum dmub_cmd_idle_opt_type {
/**
* DCN hardware notify idle.
*/
- DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2
+ DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2,
+
+ /**
+ * DCN hardware notify power state.
+ */
+ DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE = 3,
+
+ /**
+ * DCN notify to release HW.
+ */
+ DMUB_CMD__IDLE_OPT_RELEASE_HW = 4,
};
/**
@@ -1382,7 +2447,8 @@ struct dmub_rb_cmd_idle_opt_dcn_restore {
*/
struct dmub_dcn_notify_idle_cntl_data {
uint8_t driver_idle;
- uint8_t pad[1];
+ uint8_t skip_otg_disable;
+ uint8_t reserved[58];
};
/**
@@ -1394,6 +2460,33 @@ struct dmub_rb_cmd_idle_opt_dcn_notify_idle {
};
/**
+ * enum dmub_idle_opt_dc_power_state - DC power states.
+ */
+enum dmub_idle_opt_dc_power_state {
+ DMUB_IDLE_OPT_DC_POWER_STATE_UNKNOWN = 0,
+ DMUB_IDLE_OPT_DC_POWER_STATE_D0 = 1,
+ DMUB_IDLE_OPT_DC_POWER_STATE_D1 = 2,
+ DMUB_IDLE_OPT_DC_POWER_STATE_D2 = 4,
+ DMUB_IDLE_OPT_DC_POWER_STATE_D3 = 8,
+};
+
+/**
+ * struct dmub_idle_opt_set_dc_power_state_data - Data passed to FW in a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command.
+ */
+struct dmub_idle_opt_set_dc_power_state_data {
+ uint8_t power_state; /**< power state */
+ uint8_t pad[3]; /**< padding */
+};
+
+/**
+ * struct dmub_rb_cmd_idle_opt_set_dc_power_state - Data passed to FW in a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command.
+ */
+struct dmub_rb_cmd_idle_opt_set_dc_power_state {
+ struct dmub_cmd_header header; /**< header */
+ struct dmub_idle_opt_set_dc_power_state_data data;
+};
+
+/**
* struct dmub_clocks - Clock update notification.
*/
struct dmub_clocks {
@@ -1485,7 +2578,8 @@ struct dmub_dig_transmitter_control_data_v1_7 {
uint8_t connobj_id; /**< Connector Object Id defined in ObjectId.h */
uint8_t HPO_instance; /**< HPO instance (0: inst0, 1: inst1) */
uint8_t reserved1; /**< For future use */
- uint8_t reserved2[3]; /**< For future use */
+ uint8_t skip_phy_ssc_reduction;
+ uint8_t reserved2[2]; /**< For future use */
uint32_t reserved3[11]; /**< For future use */
};
@@ -1551,7 +2645,7 @@ struct dmub_rb_cmd_dig1_dpia_control {
};
/**
- * SET_CONFIG Command Payload
+ * SET_CONFIG Command Payload (deprecated)
*/
struct set_config_cmd_payload {
uint8_t msg_type; /* set config message type */
@@ -1559,7 +2653,7 @@ struct set_config_cmd_payload {
};
/**
- * Data passed from driver to FW in a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command.
+ * Data passed from driver to FW in a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command. (deprecated)
*/
struct dmub_cmd_set_config_control_data {
struct set_config_cmd_payload cmd_pkt;
@@ -1568,6 +2662,17 @@ struct dmub_cmd_set_config_control_data {
};
/**
+ * SET_CONFIG Request Command Payload
+ */
+struct set_config_request_cmd_payload {
+ uint8_t instance; /* DPIA instance */
+ uint8_t immed_status; /* Immediate status returned in case of error */
+ uint8_t msg_type; /* set config message type */
+ uint8_t reserved;
+ uint32_t msg_data; /* set config message data */
+};
+
+/**
* DMUB command structure for SET_CONFIG command.
*/
struct dmub_rb_cmd_set_config_access {
@@ -1576,6 +2681,14 @@ struct dmub_rb_cmd_set_config_access {
};
/**
+ * DMUB command structure for SET_CONFIG request command.
+ */
+struct dmub_rb_cmd_set_config_request {
+ struct dmub_cmd_header header; /* header */
+ struct set_config_request_cmd_payload payload; /* set config request payload */
+};
+
+/**
* Data passed from driver to FW in a DMUB_CMD__DPIA_MST_ALLOC_SLOTS command.
*/
struct dmub_cmd_mst_alloc_slots_control_data {
@@ -1594,6 +2707,24 @@ struct dmub_rb_cmd_set_mst_alloc_slots {
};
/**
+ * Data passed from driver to FW in a DMUB_CMD__SET_TPS_NOTIFICATION command.
+ */
+struct dmub_cmd_tps_notification_data {
+ uint8_t instance; /* DPIA instance */
+ uint8_t tps; /* requested training pattern */
+ uint8_t reserved1;
+ uint8_t reserved2;
+};
+
+/**
+ * DMUB command structure for SET_TPS_NOTIFICATION command.
+ */
+struct dmub_rb_cmd_set_tps_notification {
+ struct dmub_cmd_header header; /* header */
+ struct dmub_cmd_tps_notification_data tps_notification; /* set tps_notification data */
+};
+
+/**
* DMUB command structure for DPIA HPD int enable control.
*/
struct dmub_rb_cmd_dpia_hpd_int_enable {
@@ -1795,7 +2926,11 @@ enum dp_hpd_type {
/**
* DP HPD short pulse
*/
- DP_IRQ
+ DP_IRQ = 1,
+ /**
+ * Failure to acquire DP HPD state
+ */
+ DP_NONE_HPD = 2
};
/**
@@ -1973,6 +3108,22 @@ struct dmub_rb_cmd_query_hpd_state {
struct dmub_cmd_hpd_state_query_data data;
};
+/**
+ * struct dmub_rb_cmd_hpd_sense_notify - HPD sense notification data.
+ */
+struct dmub_rb_cmd_hpd_sense_notify_data {
+ uint32_t old_hpd_sense_mask; /**< Old HPD sense mask */
+ uint32_t new_hpd_sense_mask; /**< New HPD sense mask */
+};
+
+/**
+ * struct dmub_rb_cmd_hpd_sense_notify - DMUB_OUT_CMD__HPD_SENSE_NOTIFY command.
+ */
+struct dmub_rb_cmd_hpd_sense_notify {
+ struct dmub_cmd_header header; /**< header */
+ struct dmub_rb_cmd_hpd_sense_notify_data data; /**< payload */
+};
+
/*
* Command IDs should be treated as stable ABI.
* Do not reuse or modify IDs.
@@ -2021,6 +3172,18 @@ enum dmub_cmd_psr_type {
DMUB_CMD__SET_PSR_POWER_OPT = 7,
};
+/**
+ * Different PSR residency modes.
+ * Different modes change the definition of PSR residency.
+ */
+enum psr_residency_mode {
+ PSR_RESIDENCY_MODE_PHY = 0,
+ PSR_RESIDENCY_MODE_ALPM,
+ PSR_RESIDENCY_MODE_ENABLEMENT_PERIOD,
+ /* Do not add below. */
+ PSR_RESIDENCY_MODE_LAST_ELEMENT,
+};
+
enum dmub_cmd_fams_type {
DMUB_CMD__FAMS_SETUP_FW_CTRL = 0,
DMUB_CMD__FAMS_DRR_UPDATE = 1,
@@ -2031,6 +3194,10 @@ enum dmub_cmd_fams_type {
* on (for any SubVP cases that use a DRR display)
*/
DMUB_CMD__FAMS_SET_MANUAL_TRIGGER = 3,
+ DMUB_CMD__FAMS2_CONFIG = 4,
+ DMUB_CMD__FAMS2_DRR_UPDATE = 5,
+ DMUB_CMD__FAMS2_FLIP = 6,
+ DMUB_CMD__FAMS2_IB_CONFIG = 7,
};
/**
@@ -2048,29 +3215,7 @@ enum psr_version {
/**
* PSR not supported.
*/
- PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF,
-};
-
-/**
- * enum dmub_cmd_mall_type - MALL commands
- */
-enum dmub_cmd_mall_type {
- /**
- * Allows display refresh from MALL.
- */
- DMUB_CMD__MALL_ACTION_ALLOW = 0,
- /**
- * Disallows display refresh from MALL.
- */
- DMUB_CMD__MALL_ACTION_DISALLOW = 1,
- /**
- * Cursor copy for MALL.
- */
- DMUB_CMD__MALL_ACTION_COPY_CURSOR = 2,
- /**
- * Controls DF requests.
- */
- DMUB_CMD__MALL_ACTION_NO_DF_REQ = 3,
+ PSR_VERSION_UNSUPPORTED = 0xFF, // psr_version field is only 8 bits wide
};
/**
@@ -2125,6 +3270,11 @@ enum phy_link_rate {
* UHBR10 - 20.0 Gbps/Lane
*/
PHY_RATE_2000 = 11,
+
+ PHY_RATE_675 = 12,
+ /**
+ * Rate 12 - 6.75 Gbps/Lane
+ */
};
/**
@@ -2143,6 +3293,7 @@ enum dmub_phy_fsm_state {
DMUB_PHY_FSM_POWER_DOWN,
DMUB_PHY_FSM_PLL_EN,
DMUB_PHY_FSM_TX_EN,
+ DMUB_PHY_FSM_TX_EN_TEST_MODE,
DMUB_PHY_FSM_FAST_LP,
DMUB_PHY_FSM_P2_PLL_OFF_CPM,
DMUB_PHY_FSM_P2_PLL_OFF_PG,
@@ -2278,17 +3429,23 @@ struct dmub_cmd_psr_copy_settings_data {
*/
uint8_t relock_delay_frame_cnt;
/**
- * Explicit padding to 2 byte boundary.
+ * esd recovery indicate.
*/
- uint8_t pad3;
+ uint8_t esd_recovery;
/**
* DSC Slice height.
*/
uint16_t dsc_slice_height;
/**
- * Explicit padding to 4 byte boundary.
+ * Some panels request main link off before xth vertical line
*/
- uint16_t pad;
+ uint16_t poweroff_before_vertical_line;
+ /**
+ * Some panels cannot handle idle pattern during PSR entry.
+ * To power down phy before disable stream to avoid sending
+ * idle pattern.
+ */
+ uint8_t power_down_phy_before_disable_stream;
};
/**
@@ -2733,22 +3890,75 @@ struct dmub_cmd_psr_set_power_opt_data {
uint32_t power_opt;
};
+/**
+ * Definition of a DMUB_CMD__SET_PSR_POWER_OPT command.
+ */
+struct dmub_rb_cmd_psr_set_power_opt {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Definition of a DMUB_CMD__SET_PSR_POWER_OPT command.
+ */
+ struct dmub_cmd_psr_set_power_opt_data psr_set_power_opt_data;
+};
+
+enum dmub_alpm_mode {
+ ALPM_AUXWAKE = 0,
+ ALPM_AUXLESS = 1,
+ ALPM_UNSUPPORTED = 2,
+};
+
+/**
+ * Definition of Replay Residency GPINT command.
+ * Bit[0] - Residency mode for Revision 0
+ * Bit[1] - Enable/Disable state
+ * Bit[2-3] - Revision number
+ * Bit[4-7] - Residency mode for Revision 1
+ * Bit[8] - Panel instance
+ * Bit[9-15] - Reserved
+ */
+
+enum pr_residency_mode {
+ PR_RESIDENCY_MODE_PHY = 0x0,
+ PR_RESIDENCY_MODE_ALPM,
+ PR_RESIDENCY_MODE_IPS2,
+ PR_RESIDENCY_MODE_FRAME_CNT,
+ PR_RESIDENCY_MODE_ENABLEMENT_PERIOD,
+};
+
#define REPLAY_RESIDENCY_MODE_SHIFT (0)
#define REPLAY_RESIDENCY_ENABLE_SHIFT (1)
+#define REPLAY_RESIDENCY_REVISION_SHIFT (2)
+#define REPLAY_RESIDENCY_MODE2_SHIFT (4)
#define REPLAY_RESIDENCY_MODE_MASK (0x1 << REPLAY_RESIDENCY_MODE_SHIFT)
-# define REPLAY_RESIDENCY_MODE_PHY (0x0 << REPLAY_RESIDENCY_MODE_SHIFT)
-# define REPLAY_RESIDENCY_MODE_ALPM (0x1 << REPLAY_RESIDENCY_MODE_SHIFT)
+# define REPLAY_RESIDENCY_FIELD_MODE_PHY (0x0 << REPLAY_RESIDENCY_MODE_SHIFT)
+# define REPLAY_RESIDENCY_FIELD_MODE_ALPM (0x1 << REPLAY_RESIDENCY_MODE_SHIFT)
+
+#define REPLAY_RESIDENCY_MODE2_MASK (0xF << REPLAY_RESIDENCY_MODE2_SHIFT)
+# define REPLAY_RESIDENCY_FIELD_MODE2_IPS (0x1 << REPLAY_RESIDENCY_MODE2_SHIFT)
+# define REPLAY_RESIDENCY_FIELD_MODE2_FRAME_CNT (0x2 << REPLAY_RESIDENCY_MODE2_SHIFT)
+# define REPLAY_RESIDENCY_FIELD_MODE2_EN_PERIOD (0x3 << REPLAY_RESIDENCY_MODE2_SHIFT)
#define REPLAY_RESIDENCY_ENABLE_MASK (0x1 << REPLAY_RESIDENCY_ENABLE_SHIFT)
# define REPLAY_RESIDENCY_DISABLE (0x0 << REPLAY_RESIDENCY_ENABLE_SHIFT)
# define REPLAY_RESIDENCY_ENABLE (0x1 << REPLAY_RESIDENCY_ENABLE_SHIFT)
+#define REPLAY_RESIDENCY_REVISION_MASK (0x3 << REPLAY_RESIDENCY_REVISION_SHIFT)
+# define REPLAY_RESIDENCY_REVISION_0 (0x0 << REPLAY_RESIDENCY_REVISION_SHIFT)
+# define REPLAY_RESIDENCY_REVISION_1 (0x1 << REPLAY_RESIDENCY_REVISION_SHIFT)
+
+/**
+ * Definition of a replay_state.
+ */
enum replay_state {
REPLAY_STATE_0 = 0x0,
REPLAY_STATE_1 = 0x10,
REPLAY_STATE_1A = 0x11,
REPLAY_STATE_2 = 0x20,
+ REPLAY_STATE_2A = 0x21,
REPLAY_STATE_3 = 0x30,
REPLAY_STATE_3INIT = 0x31,
REPLAY_STATE_4 = 0x40,
@@ -2756,6 +3966,7 @@ enum replay_state {
REPLAY_STATE_4B = 0x42,
REPLAY_STATE_4C = 0x43,
REPLAY_STATE_4D = 0x44,
+ REPLAY_STATE_4E = 0x45,
REPLAY_STATE_4B_LOCKED = 0x4A,
REPLAY_STATE_4C_UNLOCKED = 0x4B,
REPLAY_STATE_5 = 0x50,
@@ -2789,6 +4000,64 @@ enum dmub_cmd_replay_type {
* Set coasting vtotal.
*/
DMUB_CMD__REPLAY_SET_COASTING_VTOTAL = 3,
+ /**
+ * Set power opt and coasting vtotal.
+ */
+ DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL = 4,
+ /**
+ * Set disabled iiming sync.
+ */
+ DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED = 5,
+ /**
+ * Set Residency Frameupdate Timer.
+ */
+ DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER = 6,
+ /**
+ * Set pseudo vtotal
+ */
+ DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL = 7,
+ /**
+ * Set adaptive sync sdp enabled
+ */
+ DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP = 8,
+ /**
+ * Set version
+ */
+ DMUB_CMD__REPLAY_SET_VERSION = 9,
+ /**
+ * Set Replay General command.
+ */
+ DMUB_CMD__REPLAY_SET_GENERAL_CMD = 16,
+};
+
+/**
+ * Replay general command sub-types.
+ */
+enum dmub_cmd_replay_general_subtype {
+ REPLAY_GENERAL_CMD_NOT_SUPPORTED = -1,
+ /**
+ * TODO: For backward compatible, allow new command only.
+ * REPLAY_GENERAL_CMD_SET_TIMING_SYNC_SUPPORTED,
+ * REPLAY_GENERAL_CMD_SET_RESIDENCY_FRAMEUPDATE_TIMER,
+ * REPLAY_GENERAL_CMD_SET_PSEUDO_VTOTAL,
+ */
+ REPLAY_GENERAL_CMD_DISABLED_ADAPTIVE_SYNC_SDP,
+ REPLAY_GENERAL_CMD_DISABLED_DESYNC_ERROR_DETECTION,
+ REPLAY_GENERAL_CMD_UPDATE_ERROR_STATUS,
+ REPLAY_GENERAL_CMD_SET_LOW_RR_ACTIVATE,
+};
+
+struct dmub_alpm_auxless_data {
+ uint16_t lfps_setup_ns;
+ uint16_t lfps_period_ns;
+ uint16_t lfps_silence_ns;
+ uint16_t lfps_t1_t2_override_us;
+ short lfps_t1_t2_offset_us;
+ uint8_t lttpr_count;
+ /*
+ * Padding to align structure to 4 byte boundary.
+ */
+ uint8_t pad[1];
};
/**
@@ -2861,6 +4130,83 @@ struct dmub_cmd_replay_copy_settings_data {
* Use FSM state for Replay power up/down
*/
uint8_t use_phy_fsm;
+ /**
+ * Use for AUX-less ALPM LFPS wake operation
+ */
+ struct dmub_alpm_auxless_data auxless_alpm_data;
+ /**
+ * @hpo_stream_enc_inst: HPO stream encoder instance
+ */
+ uint8_t hpo_stream_enc_inst;
+ /**
+ * @hpo_link_enc_inst: HPO link encoder instance
+ */
+ uint8_t hpo_link_enc_inst;
+ /**
+ * Determines if fast resync in ultra sleep mode is enabled/disabled.
+ */
+ uint8_t replay_support_fast_resync_in_ultra_sleep_mode;
+ /**
+ * @pad: Align structure to 4 byte boundary.
+ */
+ uint8_t pad[1];
+};
+
+
+/**
+ * Replay versions.
+ */
+enum replay_version {
+ /**
+ * FreeSync Replay
+ */
+ REPLAY_VERSION_FREESYNC_REPLAY = 0,
+ /**
+ * Panel Replay
+ */
+ REPLAY_VERSION_PANEL_REPLAY = 1,
+ /**
+ * Replay not supported.
+ */
+ REPLAY_VERSION_UNSUPPORTED = 0xFF,
+};
+
+/**
+ * Data passed from driver to FW in a DMUB_CMD___SET_REPLAY_VERSION command.
+ */
+struct dmub_cmd_replay_set_version_data {
+ /**
+ * Panel Instance.
+ * Panel instance to identify which psr_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * PSR version that FW should implement.
+ */
+ enum replay_version version;
+ /**
+ * PSR control version.
+ */
+ uint8_t cmd_version;
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad[2];
+};
+
+/**
+ * Definition of a DMUB_CMD__REPLAY_SET_VERSION command.
+ */
+struct dmub_rb_cmd_replay_set_version {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_VERSION command.
+ */
+ struct dmub_cmd_replay_set_version_data replay_set_version_data;
};
/**
@@ -2952,6 +4298,80 @@ struct dmub_cmd_replay_set_power_opt_data {
};
/**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_cmd_replay_set_timing_sync_data {
+ /**
+ * Panel Instance.
+ * Panel isntance to identify which replay_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * REPLAY set_timing_sync
+ */
+ uint8_t timing_sync_supported;
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad[2];
+};
+
+/**
+ * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+struct dmub_cmd_replay_set_pseudo_vtotal {
+ /**
+ * Panel Instance.
+ * Panel isntance to identify which replay_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * Source Vtotal that Replay + IPS + ABM full screen video src vtotal
+ */
+ uint16_t vtotal;
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad;
+};
+struct dmub_cmd_replay_disabled_adaptive_sync_sdp_data {
+ /**
+ * Panel Instance.
+ * Panel isntance to identify which replay_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * enabled: set adaptive sync sdp enabled
+ */
+ uint8_t force_disabled;
+
+ uint8_t pad[2];
+};
+struct dmub_cmd_replay_set_general_cmd_data {
+ /**
+ * Panel Instance.
+ * Panel isntance to identify which replay_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * subtype: replay general cmd sub type
+ */
+ uint8_t subtype;
+
+ uint8_t pad[2];
+ /**
+ * config data with param1 and param2
+ */
+ uint32_t param1;
+
+ uint32_t param2;
+};
+
+/**
* Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
*/
struct dmub_rb_cmd_replay_set_power_opt {
@@ -2983,6 +4403,14 @@ struct dmub_cmd_replay_set_coasting_vtotal_data {
* Currently the support is only for 0 or 1
*/
uint8_t panel_inst;
+ /**
+ * 16-bit value dicated by driver that indicates the coasting vtotal high byte part.
+ */
+ uint16_t coasting_vtotal_high;
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad[2];
};
/**
@@ -3000,17 +4428,146 @@ struct dmub_rb_cmd_replay_set_coasting_vtotal {
};
/**
- * Definition of a DMUB_CMD__SET_PSR_POWER_OPT command.
+ * Definition of a DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL command.
*/
-struct dmub_rb_cmd_psr_set_power_opt {
+struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal {
/**
* Command header.
*/
struct dmub_cmd_header header;
/**
- * Definition of a DMUB_CMD__SET_PSR_POWER_OPT command.
+ * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
*/
- struct dmub_cmd_psr_set_power_opt_data psr_set_power_opt_data;
+ struct dmub_cmd_replay_set_power_opt_data replay_set_power_opt_data;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_SET_COASTING_VTOTAL command.
+ */
+ struct dmub_cmd_replay_set_coasting_vtotal_data replay_set_coasting_vtotal_data;
+};
+
+/**
+ * Definition of a DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+struct dmub_rb_cmd_replay_set_timing_sync {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command.
+ */
+ struct dmub_cmd_replay_set_timing_sync_data replay_set_timing_sync_data;
+};
+
+/**
+ * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+struct dmub_rb_cmd_replay_set_pseudo_vtotal {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+ struct dmub_cmd_replay_set_pseudo_vtotal data;
+};
+
+/**
+ * Definition of a DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP command.
+ */
+struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Definition of DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP command.
+ */
+ struct dmub_cmd_replay_disabled_adaptive_sync_sdp_data data;
+};
+
+/**
+ * Definition of a DMUB_CMD__REPLAY_SET_GENERAL_CMD command.
+ */
+struct dmub_rb_cmd_replay_set_general_cmd {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_GENERAL_CMD command.
+ */
+ struct dmub_cmd_replay_set_general_cmd_data data;
+};
+
+/**
+ * Data passed from driver to FW in DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+ */
+struct dmub_cmd_replay_frameupdate_timer_data {
+ /**
+ * Panel Instance.
+ * Panel isntance to identify which replay_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * Replay Frameupdate Timer Enable or not
+ */
+ uint8_t enable;
+ /**
+ * REPLAY force reflash frame update number
+ */
+ uint16_t frameupdate_count;
+};
+/**
+ * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER
+ */
+struct dmub_rb_cmd_replay_set_frameupdate_timer {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+ /**
+ * Definition of a DMUB_CMD__SET_REPLAY_POWER_OPT command.
+ */
+ struct dmub_cmd_replay_frameupdate_timer_data data;
+};
+
+/**
+ * Definition union of replay command set
+ */
+union dmub_replay_cmd_set {
+ /**
+ * Panel Instance.
+ * Panel isntance to identify which replay_state to use
+ * Currently the support is only for 0 or 1
+ */
+ uint8_t panel_inst;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED command data.
+ */
+ struct dmub_cmd_replay_set_timing_sync_data sync_data;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command data.
+ */
+ struct dmub_cmd_replay_frameupdate_timer_data timer_data;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command data.
+ */
+ struct dmub_cmd_replay_set_pseudo_vtotal pseudo_vtotal_data;
+ /**
+ * Definition of DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP command data.
+ */
+ struct dmub_cmd_replay_disabled_adaptive_sync_sdp_data disabled_adaptive_sync_sdp_data;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_VERSION command data.
+ */
+ struct dmub_cmd_replay_set_version_data version_data;
+ /**
+ * Definition of DMUB_CMD__REPLAY_SET_GENERAL_CMD command data.
+ */
+ struct dmub_cmd_replay_set_general_cmd_data set_general_cmd_data;
};
/**
@@ -3089,10 +4646,12 @@ enum hw_lock_client {
* PSR SU is the client of HW Lock Manager.
*/
HW_LOCK_CLIENT_PSR_SU = 1,
+ HW_LOCK_CLIENT_SUBVP = 3,
/**
* Replay is the client of HW Lock Manager.
*/
- HW_LOCK_CLIENT_REPLAY = 4,
+ HW_LOCK_CLIENT_REPLAY = 4,
+ HW_LOCK_CLIENT_FAMS2 = 5,
/**
* Invalid client.
*/
@@ -3185,6 +4744,132 @@ enum dmub_cmd_abm_type {
* on restore we update state with passed in data.
*/
DMUB_CMD__ABM_SAVE_RESTORE = 7,
+
+ /**
+ * Query ABM caps.
+ */
+ DMUB_CMD__ABM_QUERY_CAPS = 8,
+
+ /**
+ * Set ABM Events
+ */
+ DMUB_CMD__ABM_SET_EVENT = 9,
+
+ /**
+ * Get the current ACE curve.
+ */
+ DMUB_CMD__ABM_GET_ACE_CURVE = 10,
+
+ /**
+ * Get current histogram data
+ */
+ DMUB_CMD__ABM_GET_HISTOGRAM_DATA = 11,
+};
+
+/**
+ * LSDMA command sub-types.
+ */
+enum dmub_cmd_lsdma_type {
+ /**
+ * Initialize parameters for LSDMA.
+ * Ring buffer is mapped to the ring buffer
+ */
+ DMUB_CMD__LSDMA_INIT_CONFIG = 0,
+ /**
+ * LSDMA copies data from source to destination linearly
+ */
+ DMUB_CMD__LSDMA_LINEAR_COPY = 1,
+ /**
+ * LSDMA copies data from source to destination linearly in sub window
+ */
+ DMUB_CMD__LSDMA_LINEAR_SUB_WINDOW_COPY = 2,
+ /**
+ * Send the tiled-to-tiled copy command
+ */
+ DMUB_CMD__LSDMA_TILED_TO_TILED_COPY = 3,
+ /**
+ * Send the poll reg write command
+ */
+ DMUB_CMD__LSDMA_POLL_REG_WRITE = 4,
+ /**
+ * Send the pio copy command
+ */
+ DMUB_CMD__LSDMA_PIO_COPY = 5,
+ /**
+ * Send the pio constfill command
+ */
+ DMUB_CMD__LSDMA_PIO_CONSTFILL = 6,
+};
+
+struct abm_ace_curve {
+ /**
+ * @offsets: ACE curve offsets.
+ */
+ uint32_t offsets[ABM_MAX_NUM_OF_ACE_SEGMENTS];
+
+ /**
+ * @thresholds: ACE curve thresholds.
+ */
+ uint32_t thresholds[ABM_MAX_NUM_OF_ACE_SEGMENTS];
+
+ /**
+ * @slopes: ACE curve slopes.
+ */
+ uint32_t slopes[ABM_MAX_NUM_OF_ACE_SEGMENTS];
+};
+
+struct fixed_pt_format {
+ /**
+ * @sign_bit: Indicates whether one bit is reserved for the sign.
+ */
+ bool sign_bit;
+
+ /**
+ * @num_int_bits: Number of bits used for integer part.
+ */
+ uint8_t num_int_bits;
+
+ /**
+ * @num_frac_bits: Number of bits used for fractional part.
+ */
+ uint8_t num_frac_bits;
+
+ /**
+ * @pad: Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad;
+};
+
+struct abm_caps {
+ /**
+ * @num_hg_bins: Number of histogram bins.
+ */
+ uint8_t num_hg_bins;
+
+ /**
+ * @num_ace_segments: Number of ACE curve segments.
+ */
+ uint8_t num_ace_segments;
+
+ /**
+ * @pad: Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad[2];
+
+ /**
+ * @ace_thresholds_format: Format of the ACE thresholds. If not programmable, it is set to 0.
+ */
+ struct fixed_pt_format ace_thresholds_format;
+
+ /**
+ * @ace_offsets_format: Format of the ACE offsets. If not programmable, it is set to 0.
+ */
+ struct fixed_pt_format ace_offsets_format;
+
+ /**
+ * @ace_slopes_format: Format of the ACE slopes.
+ */
+ struct fixed_pt_format ace_slopes_format;
};
/**
@@ -3301,6 +4986,16 @@ struct dmub_cmd_abm_set_pipe_data {
* TODO: Remove.
*/
uint8_t ramping_boundary;
+
+ /**
+ * PwrSeq HW Instance.
+ */
+ uint8_t pwrseq_inst;
+
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad[3];
};
/**
@@ -3319,6 +5014,24 @@ struct dmub_rb_cmd_abm_set_pipe {
};
/**
+ * Type of backlight control method to be used by ABM module
+ */
+enum dmub_backlight_control_type {
+ /**
+ * PWM Backlight control
+ */
+ DMU_BACKLIGHT_CONTROL_PWM = 0,
+ /**
+ * VESA Aux-based backlight control
+ */
+ DMU_BACKLIGHT_CONTROL_VESA_AUX = 1,
+ /**
+ * AMD DPCD Aux-based backlight control
+ */
+ DMU_BACKLIGHT_CONTROL_AMD_AUX = 2,
+};
+
+/**
* Data passed from driver to FW in a DMUB_CMD__ABM_SET_BACKLIGHT command.
*/
struct dmub_cmd_abm_set_backlight_data {
@@ -3345,9 +5058,42 @@ struct dmub_cmd_abm_set_backlight_data {
uint8_t panel_mask;
/**
+ * AUX HW Instance.
+ */
+ uint8_t aux_inst;
+
+ /**
* Explicit padding to 4 byte boundary.
*/
- uint8_t pad[2];
+ uint8_t pad[1];
+
+ /**
+ * Backlight control type.
+ * Value 0 is PWM backlight control.
+ * Value 1 is VAUX backlight control.
+ * Value 2 is AMD DPCD AUX backlight control.
+ */
+ enum dmub_backlight_control_type backlight_control_type;
+
+ /**
+ * Minimum luminance in nits.
+ */
+ uint32_t min_luminance;
+
+ /**
+ * Maximum luminance in nits.
+ */
+ uint32_t max_luminance;
+
+ /**
+ * Minimum backlight in pwm.
+ */
+ uint32_t min_backlight_pwm;
+
+ /**
+ * Maximum backlight in pwm.
+ */
+ uint32_t max_backlight_pwm;
};
/**
@@ -3569,7 +5315,6 @@ struct dmub_cmd_abm_pause_data {
uint8_t pad[1];
};
-
/**
* Definition of a DMUB_CMD__ABM_PAUSE command.
*/
@@ -3586,6 +5331,152 @@ struct dmub_rb_cmd_abm_pause {
};
/**
+ * Data passed from driver to FW in a DMUB_CMD__ABM_QUERY_CAPS command.
+ */
+struct dmub_cmd_abm_query_caps_in {
+ /**
+ * Panel instance.
+ */
+ uint8_t panel_inst;
+
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad[3];
+};
+
+/**
+ * Data passed from FW to driver in a DMUB_CMD__ABM_QUERY_CAPS command.
+ */
+struct dmub_cmd_abm_query_caps_out {
+ /**
+ * SW Algorithm caps.
+ */
+ struct abm_caps sw_caps;
+
+ /**
+ * ABM HW caps.
+ */
+ struct abm_caps hw_caps;
+};
+
+/**
+ * Definition of a DMUB_CMD__ABM_QUERY_CAPS command.
+ */
+struct dmub_rb_cmd_abm_query_caps {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+
+ /**
+ * Data passed between FW and driver in a DMUB_CMD__ABM_QUERY_CAPS command.
+ */
+ union {
+ struct dmub_cmd_abm_query_caps_in abm_query_caps_in;
+ struct dmub_cmd_abm_query_caps_out abm_query_caps_out;
+ } data;
+};
+
+/**
+ * enum dmub_abm_ace_curve_type - ACE curve type.
+ */
+enum dmub_abm_ace_curve_type {
+ /**
+ * ACE curve as defined by the SW layer.
+ */
+ ABM_ACE_CURVE_TYPE__SW = 0,
+ /**
+ * ACE curve as defined by the SW to HW translation interface layer.
+ */
+ ABM_ACE_CURVE_TYPE__SW_IF = 1,
+};
+
+/**
+ * enum dmub_abm_histogram_type - Histogram type.
+ */
+enum dmub_abm_histogram_type {
+ /**
+ * ACE curve as defined by the SW layer.
+ */
+ ABM_HISTOGRAM_TYPE__SW = 0,
+ /**
+ * ACE curve as defined by the SW to HW translation interface layer.
+ */
+ ABM_HISTOGRAM_TYPE__SW_IF = 1,
+};
+
+/**
+ * Definition of a DMUB_CMD__ABM_GET_ACE_CURVE command.
+ */
+struct dmub_rb_cmd_abm_get_ace_curve {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+
+ /**
+ * Address where ACE curve should be copied.
+ */
+ union dmub_addr dest;
+
+ /**
+ * Type of ACE curve being queried.
+ */
+ enum dmub_abm_ace_curve_type ace_type;
+
+ /**
+ * Indirect buffer length.
+ */
+ uint16_t bytes;
+
+ /**
+ * eDP panel instance.
+ */
+ uint8_t panel_inst;
+
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad;
+};
+
+/**
+ * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command.
+ */
+struct dmub_rb_cmd_abm_get_histogram {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+
+ /**
+ * Address where Histogram should be copied.
+ */
+ union dmub_addr dest;
+
+ /**
+ * Type of Histogram being queried.
+ */
+ enum dmub_abm_histogram_type histogram_type;
+
+ /**
+ * Indirect buffer length.
+ */
+ uint16_t bytes;
+
+ /**
+ * eDP panel instance.
+ */
+ uint8_t panel_inst;
+
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad;
+};
+
+/**
* Definition of a DMUB_CMD__ABM_SAVE_RESTORE command.
*/
struct dmub_rb_cmd_abm_save_restore {
@@ -3616,6 +5507,52 @@ struct dmub_rb_cmd_abm_save_restore {
};
/**
+ * Data passed from driver to FW in a DMUB_CMD__ABM_SET_EVENT command.
+ */
+
+struct dmub_cmd_abm_set_event_data {
+
+ /**
+ * VB Scaling Init. Strength Mapping
+ * Byte 0: 0~255 for VB level 0
+ * Byte 1: 0~255 for VB level 1
+ * Byte 2: 0~255 for VB level 2
+ * Byte 3: 0~255 for VB level 3
+ */
+ uint32_t vb_scaling_strength_mapping;
+ /**
+ * VariBright Scaling Enable
+ */
+ uint8_t vb_scaling_enable;
+ /**
+ * Panel Control HW instance mask.
+ * Bit 0 is Panel Control HW instance 0.
+ * Bit 1 is Panel Control HW instance 1.
+ */
+ uint8_t panel_mask;
+
+ /**
+ * Explicit padding to 4 byte boundary.
+ */
+ uint8_t pad[2];
+};
+
+/**
+ * Definition of a DMUB_CMD__ABM_SET_EVENT command.
+ */
+struct dmub_rb_cmd_abm_set_event {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+
+ /**
+ * Data passed from driver to FW in a DMUB_CMD__ABM_SET_EVENT command.
+ */
+ struct dmub_cmd_abm_set_event_data abm_set_event_data;
+};
+
+/**
* Data passed from driver to FW in a DMUB_CMD__QUERY_FEATURE_CAPS command.
*/
struct dmub_cmd_query_feature_caps_data {
@@ -3645,17 +5582,16 @@ struct dmub_rb_cmd_query_feature_caps {
*/
struct dmub_cmd_visual_confirm_color_data {
/**
- * DMUB feature capabilities.
- * After DMUB init, driver will query FW capabilities prior to enabling certain features.
+ * DMUB visual confirm color
*/
-struct dmub_visual_confirm_color visual_confirm_color;
+ struct dmub_visual_confirm_color visual_confirm_color;
};
/**
* Definition of a DMUB_CMD__GET_VISUAL_CONFIRM_COLOR command.
*/
struct dmub_rb_cmd_get_visual_confirm_color {
- /**
+ /**
* Command header.
*/
struct dmub_cmd_header header;
@@ -3665,38 +5601,6 @@ struct dmub_rb_cmd_get_visual_confirm_color {
struct dmub_cmd_visual_confirm_color_data visual_confirm_color_data;
};
-struct dmub_optc_state {
- uint32_t v_total_max;
- uint32_t v_total_min;
- uint32_t tg_inst;
-};
-
-struct dmub_rb_cmd_drr_update {
- struct dmub_cmd_header header;
- struct dmub_optc_state dmub_optc_state_req;
-};
-
-struct dmub_cmd_fw_assisted_mclk_switch_pipe_data {
- uint32_t pix_clk_100hz;
- uint8_t max_ramp_step;
- uint8_t pipes;
- uint8_t min_refresh_in_hz;
- uint8_t pipe_count;
- uint8_t pipe_index[4];
-};
-
-struct dmub_cmd_fw_assisted_mclk_switch_config {
- uint8_t fams_enabled;
- uint8_t visual_confirm_enabled;
- uint16_t vactive_stretch_margin_us; // Extra vblank stretch required when doing FPO + Vactive
- struct dmub_cmd_fw_assisted_mclk_switch_pipe_data pipe_data[DMUB_MAX_FPO_STREAMS];
-};
-
-struct dmub_rb_cmd_fw_assisted_mclk_switch {
- struct dmub_cmd_header header;
- struct dmub_cmd_fw_assisted_mclk_switch_config config_data;
-};
-
/**
* enum dmub_cmd_panel_cntl_type - Panel control command.
*/
@@ -3709,13 +5613,17 @@ enum dmub_cmd_panel_cntl_type {
* Queries backlight info for the embedded panel.
*/
DMUB_CMD__PANEL_CNTL_QUERY_BACKLIGHT_INFO = 1,
+ /**
+ * Sets the PWM Freq as per user's requirement.
+ */
+ DMUB_CMD__PANEL_DEBUG_PWM_FREQ = 2,
};
/**
* struct dmub_cmd_panel_cntl_data - Panel control data.
*/
struct dmub_cmd_panel_cntl_data {
- uint32_t inst; /**< panel instance */
+ uint32_t pwrseq_inst; /**< pwrseq instance */
uint32_t current_backlight; /* in/out */
uint32_t bl_pwm_cntl; /* in/out */
uint32_t bl_pwm_period_cntl; /* in/out */
@@ -3735,6 +5643,38 @@ struct dmub_rb_cmd_panel_cntl {
struct dmub_cmd_panel_cntl_data data; /**< payload */
};
+struct dmub_optc_state {
+ uint32_t v_total_max;
+ uint32_t v_total_min;
+ uint32_t tg_inst;
+};
+
+struct dmub_rb_cmd_drr_update {
+ struct dmub_cmd_header header;
+ struct dmub_optc_state dmub_optc_state_req;
+};
+
+struct dmub_cmd_fw_assisted_mclk_switch_pipe_data {
+ uint32_t pix_clk_100hz;
+ uint8_t max_ramp_step;
+ uint8_t pipes;
+ uint8_t min_refresh_in_hz;
+ uint8_t pipe_count;
+ uint8_t pipe_index[4];
+};
+
+struct dmub_cmd_fw_assisted_mclk_switch_config {
+ uint8_t fams_enabled;
+ uint8_t visual_confirm_enabled;
+ uint16_t vactive_stretch_margin_us; // Extra vblank stretch required when doing FPO + Vactive
+ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data pipe_data[DMUB_MAX_FPO_STREAMS];
+};
+
+struct dmub_rb_cmd_fw_assisted_mclk_switch {
+ struct dmub_cmd_header header;
+ struct dmub_cmd_fw_assisted_mclk_switch_config config_data;
+};
+
/**
* Data passed from driver to FW in a DMUB_CMD__VBIOS_LVTMA_CONTROL command.
*/
@@ -3742,7 +5682,7 @@ struct dmub_cmd_lvtma_control_data {
uint8_t uc_pwr_action; /**< LVTMA_ACTION */
uint8_t bypass_panel_control_wait;
uint8_t reserved_0[2]; /**< For future use */
- uint8_t panel_inst; /**< LVTMA control instance */
+ uint8_t pwrseq_inst; /**< LVTMA control instance */
uint8_t reserved_1[3]; /**< For future use */
};
@@ -3778,6 +5718,34 @@ struct dmub_rb_cmd_transmitter_query_dp_alt {
struct dmub_rb_cmd_transmitter_query_dp_alt_data data; /**< payload */
};
+struct phy_test_mode {
+ uint8_t mode;
+ uint8_t pat0;
+ uint8_t pad[2];
+};
+
+/**
+ * Data passed in/out in a DMUB_CMD__VBIOS_TRANSMITTER_SET_PHY_FSM command.
+ */
+struct dmub_rb_cmd_transmitter_set_phy_fsm_data {
+ uint8_t phy_id; /**< 0=UNIPHYA, 1=UNIPHYB, 2=UNIPHYC, 3=UNIPHYD, 4=UNIPHYE, 5=UNIPHYF */
+ uint8_t mode; /**< HDMI/DP/DP2 etc */
+ uint8_t lane_num; /**< Number of lanes */
+ uint32_t symclk_100Hz; /**< PLL symclock in 100hz */
+ struct phy_test_mode test_mode;
+ enum dmub_phy_fsm_state state;
+ uint32_t status;
+ uint8_t pad;
+};
+
+/**
+ * Definition of a DMUB_CMD__VBIOS_TRANSMITTER_SET_PHY_FSM command.
+ */
+struct dmub_rb_cmd_transmitter_set_phy_fsm {
+ struct dmub_cmd_header header; /**< header */
+ struct dmub_rb_cmd_transmitter_set_phy_fsm_data data; /**< payload */
+};
+
/**
* Maximum number of bytes a chunk sent to DMUB for parsing
*/
@@ -3872,13 +5840,98 @@ struct dmub_rb_cmd_get_usbc_cable_id {
} data;
};
+enum dmub_cmd_fused_io_sub_type {
+ DMUB_CMD__FUSED_IO_EXECUTE = 0,
+ DMUB_CMD__FUSED_IO_ABORT = 1,
+};
+
+enum dmub_cmd_fused_request_type {
+ FUSED_REQUEST_READ,
+ FUSED_REQUEST_WRITE,
+ FUSED_REQUEST_POLL,
+};
+
+enum dmub_cmd_fused_request_status {
+ FUSED_REQUEST_STATUS_SUCCESS,
+ FUSED_REQUEST_STATUS_BEGIN,
+ FUSED_REQUEST_STATUS_SUBMIT,
+ FUSED_REQUEST_STATUS_REPLY,
+ FUSED_REQUEST_STATUS_POLL,
+ FUSED_REQUEST_STATUS_ABORTED,
+ FUSED_REQUEST_STATUS_FAILED = 0x80,
+ FUSED_REQUEST_STATUS_INVALID,
+ FUSED_REQUEST_STATUS_BUSY,
+ FUSED_REQUEST_STATUS_TIMEOUT,
+ FUSED_REQUEST_STATUS_POLL_TIMEOUT,
+};
+
+struct dmub_cmd_fused_request {
+ uint8_t status;
+ uint8_t type : 2;
+ uint8_t _reserved0 : 3;
+ uint8_t poll_mask_msb : 3; // Number of MSB to zero out from last byte before comparing
+ uint8_t identifier;
+ uint8_t _reserved1;
+ uint32_t timeout_us;
+ union dmub_cmd_fused_request_location {
+ struct dmub_cmd_fused_request_location_i2c {
+ uint8_t is_aux : 1; // False
+ uint8_t ddc_line : 3;
+ uint8_t over_aux : 1;
+ uint8_t _reserved0 : 3;
+ uint8_t address;
+ uint8_t offset;
+ uint8_t length;
+ } i2c;
+ struct dmub_cmd_fused_request_location_aux {
+ uint32_t is_aux : 1; // True
+ uint32_t ddc_line : 3;
+ uint32_t address : 20;
+ uint32_t length : 8; // Automatically split into 16B transactions
+ } aux;
+ } u;
+ uint8_t buffer[0x30]; // Read: out, write: in, poll: expected
+};
+
+struct dmub_rb_cmd_fused_io {
+ struct dmub_cmd_header header;
+ struct dmub_cmd_fused_request request;
+};
+
/**
* Command type of a DMUB_CMD__SECURE_DISPLAY command
*/
enum dmub_cmd_secure_display_type {
DMUB_CMD__SECURE_DISPLAY_TEST_CMD = 0, /* test command to only check if inbox message works */
DMUB_CMD__SECURE_DISPLAY_CRC_STOP_UPDATE,
- DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY
+ DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY,
+ DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE,
+ DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY
+};
+
+#define MAX_ROI_NUM 2
+
+struct dmub_cmd_roi_info {
+ uint16_t x_start;
+ uint16_t x_end;
+ uint16_t y_start;
+ uint16_t y_end;
+ uint8_t otg_id;
+ uint8_t phy_id;
+};
+
+struct dmub_cmd_roi_window_ctl {
+ uint16_t x_start;
+ uint16_t x_end;
+ uint16_t y_start;
+ uint16_t y_end;
+ bool enable;
+};
+
+struct dmub_cmd_roi_ctl_info {
+ uint8_t otg_id;
+ uint8_t phy_id;
+ struct dmub_cmd_roi_window_ctl roi_ctl[MAX_ROI_NUM];
};
/**
@@ -3889,14 +5942,125 @@ struct dmub_rb_cmd_secure_display {
/**
* Data passed from driver to dmub firmware.
*/
- struct dmub_cmd_roi_info {
- uint16_t x_start;
- uint16_t x_end;
- uint16_t y_start;
- uint16_t y_end;
- uint8_t otg_id;
- uint8_t phy_id;
- } roi_info;
+ struct dmub_cmd_roi_info roi_info;
+ struct dmub_cmd_roi_ctl_info mul_roi_ctl;
+};
+
+/**
+ * Command type of a DMUB_CMD__PSP command
+ */
+enum dmub_cmd_psp_type {
+ DMUB_CMD__PSP_ASSR_ENABLE = 0
+};
+
+/**
+ * Data passed from driver to FW in a DMUB_CMD__PSP_ASSR_ENABLE command.
+ */
+struct dmub_cmd_assr_enable_data {
+ /**
+ * ASSR enable or disable.
+ */
+ uint8_t enable;
+ /**
+ * PHY port type.
+ * Indicates eDP / non-eDP port type
+ */
+ uint8_t phy_port_type;
+ /**
+ * PHY port ID.
+ */
+ uint8_t phy_port_id;
+ /**
+ * Link encoder index.
+ */
+ uint8_t link_enc_index;
+ /**
+ * HPO mode.
+ */
+ uint8_t hpo_mode;
+
+ /**
+ * Reserved field.
+ */
+ uint8_t reserved[7];
+};
+
+/**
+ * Definition of a DMUB_CMD__PSP_ASSR_ENABLE command.
+ */
+struct dmub_rb_cmd_assr_enable {
+ /**
+ * Command header.
+ */
+ struct dmub_cmd_header header;
+
+ /**
+ * Assr data.
+ */
+ struct dmub_cmd_assr_enable_data assr_data;
+
+ /**
+ * Reserved field.
+ */
+ uint32_t reserved[3];
+};
+
+/**
+ * Current definition of "ips_mode" from driver
+ */
+enum ips_residency_mode {
+ IPS_RESIDENCY__IPS1_MAX,
+ IPS_RESIDENCY__IPS2,
+ IPS_RESIDENCY__IPS1_RCG,
+ IPS_RESIDENCY__IPS1_ONO2_ON,
+ IPS_RESIDENCY__IPS1_Z8_RETENTION,
+ IPS_RESIDENCY__PG_ONO_LAST_SEEN_IN_IPS,
+ IPS_RESIDENCY__PG_ONO_CURRENT_STATE
+};
+
+#define NUM_IPS_HISTOGRAM_BUCKETS 16
+
+/**
+ * IPS residency statistics to be sent to driver - subset of struct dmub_ips_residency_stats
+ */
+struct dmub_ips_residency_info {
+ uint32_t residency_millipercent;
+ uint32_t entry_counter;
+ uint32_t histogram[NUM_IPS_HISTOGRAM_BUCKETS];
+ uint64_t total_time_us;
+ uint64_t total_inactive_time_us;
+ uint32_t ono_pg_state_at_collection;
+ uint32_t ono_pg_state_last_seen_in_ips;
+};
+
+/**
+ * Data passed from driver to FW in a DMUB_CMD__IPS_RESIDENCY_CNTL command.
+ */
+struct dmub_cmd_ips_residency_cntl_data {
+ uint8_t panel_inst;
+ uint8_t start_measurement;
+ uint8_t padding[2]; // align to 4-byte boundary
+};
+
+struct dmub_rb_cmd_ips_residency_cntl {
+ struct dmub_cmd_header header;
+ struct dmub_cmd_ips_residency_cntl_data cntl_data;
+};
+
+/**
+ * Data passed from FW to driver in a DMUB_CMD__IPS_QUERY_RESIDENCY_INFO command.
+ */
+struct dmub_cmd_ips_query_residency_info_data {
+ union dmub_addr dest;
+ uint32_t size;
+ uint32_t ips_mode;
+ uint8_t panel_inst;
+ uint8_t padding[3]; // align to 4-byte boundary
+};
+
+struct dmub_rb_cmd_ips_query_residency_info {
+ struct dmub_cmd_header header;
+ struct dmub_cmd_ips_query_residency_info_data info_data;
};
/**
@@ -3996,6 +6160,7 @@ union dmub_rb_cmd {
* Definition of a DMUB_CMD__MALL command.
*/
struct dmub_rb_cmd_mall mall;
+
/**
* Definition of a DMUB_CMD__CAB command.
*/
@@ -4017,6 +6182,7 @@ union dmub_rb_cmd {
* Definition of DMUB_CMD__PANEL_CNTL commands.
*/
struct dmub_rb_cmd_panel_cntl panel_cntl;
+
/**
* Definition of a DMUB_CMD__ABM_SET_PIPE command.
*/
@@ -4058,6 +6224,26 @@ union dmub_rb_cmd {
struct dmub_rb_cmd_abm_save_restore abm_save_restore;
/**
+ * Definition of a DMUB_CMD__ABM_QUERY_CAPS command.
+ */
+ struct dmub_rb_cmd_abm_query_caps abm_query_caps;
+
+ /**
+ * Definition of a DMUB_CMD__ABM_GET_ACE_CURVE command.
+ */
+ struct dmub_rb_cmd_abm_get_ace_curve abm_get_ace_curve;
+
+ /**
+ * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command.
+ */
+ struct dmub_rb_cmd_abm_get_histogram abm_get_histogram;
+
+ /**
+ * Definition of a DMUB_CMD__ABM_SET_EVENT command.
+ */
+ struct dmub_rb_cmd_abm_set_event abm_set_event;
+
+ /**
* Definition of a DMUB_CMD__DP_AUX_ACCESS command.
*/
struct dmub_rb_cmd_dp_aux_access dp_aux_access;
@@ -4088,18 +6274,30 @@ union dmub_rb_cmd {
*/
struct dmub_rb_cmd_transmitter_query_dp_alt query_dp_alt;
/**
+ * Definition of a DMUB_CMD__VBIOS_TRANSMITTER_SET_PHY_FSM command.
+ */
+ struct dmub_rb_cmd_transmitter_set_phy_fsm set_phy_fsm;
+ /**
* Definition of a DMUB_CMD__DPIA_DIG1_CONTROL command.
*/
struct dmub_rb_cmd_dig1_dpia_control dig1_dpia_control;
/**
* Definition of a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command.
*/
- struct dmub_rb_cmd_set_config_access set_config_access;
+ struct dmub_rb_cmd_set_config_access set_config_access; // (deprecated)
+ /**
+ * Definition of a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command.
+ */
+ struct dmub_rb_cmd_set_config_request set_config_request;
/**
* Definition of a DMUB_CMD__DPIA_MST_ALLOC_SLOTS command.
*/
struct dmub_rb_cmd_set_mst_alloc_slots set_mst_alloc_slots;
/**
+ * Definition of a DMUB_CMD__DPIA_SET_TPS_NOTIFICATION command.
+ */
+ struct dmub_rb_cmd_set_tps_notification set_tps_notification;
+ /**
* Definition of a DMUB_CMD__EDID_CEA command.
*/
struct dmub_rb_cmd_edid_cea edid_cea;
@@ -4125,6 +6323,14 @@ union dmub_rb_cmd {
* Definition of a DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE command.
*/
struct dmub_rb_cmd_idle_opt_dcn_notify_idle idle_opt_notify_idle;
+ /**
+ * Definition of a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command.
+ */
+ struct dmub_rb_cmd_idle_opt_set_dc_power_state idle_opt_set_dc_power_state;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_SET_VERSION command.
+ */
+ struct dmub_rb_cmd_replay_set_version replay_set_version;
/*
* Definition of a DMUB_CMD__REPLAY_COPY_SETTINGS command.
*/
@@ -4141,6 +6347,51 @@ union dmub_rb_cmd {
* Definition of a DMUB_CMD__REPLAY_SET_COASTING_VTOTAL command.
*/
struct dmub_rb_cmd_replay_set_coasting_vtotal replay_set_coasting_vtotal;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL command.
+ */
+ struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal replay_set_power_opt_and_coasting_vtotal;
+
+ struct dmub_rb_cmd_replay_set_timing_sync replay_set_timing_sync;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER command.
+ */
+ struct dmub_rb_cmd_replay_set_frameupdate_timer replay_set_frameupdate_timer;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL command.
+ */
+ struct dmub_rb_cmd_replay_set_pseudo_vtotal replay_set_pseudo_vtotal;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP command.
+ */
+ struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp replay_disabled_adaptive_sync_sdp;
+ /**
+ * Definition of a DMUB_CMD__REPLAY_SET_GENERAL_CMD command.
+ */
+ struct dmub_rb_cmd_replay_set_general_cmd replay_set_general_cmd;
+ /**
+ * Definition of a DMUB_CMD__PSP_ASSR_ENABLE command.
+ */
+ struct dmub_rb_cmd_assr_enable assr_enable;
+
+ struct dmub_rb_cmd_fams2 fams2_config;
+
+ struct dmub_rb_cmd_ib ib_fams2_config;
+
+ struct dmub_rb_cmd_fams2_drr_update fams2_drr_update;
+
+ struct dmub_rb_cmd_fams2_flip fams2_flip;
+
+ struct dmub_rb_cmd_fused_io fused_io;
+
+ /**
+ * Definition of a DMUB_CMD__LSDMA command.
+ */
+ struct dmub_rb_cmd_lsdma lsdma;
+
+ struct dmub_rb_cmd_ips_residency_cntl ips_residency_cntl;
+
+ struct dmub_rb_cmd_ips_query_residency_info ips_query_residency_info;
};
/**
@@ -4167,6 +6418,11 @@ union dmub_rb_out_cmd {
* DPIA notification command.
*/
struct dmub_rb_cmd_dpia_notification dpia_notification;
+ /**
+ * HPD sense notification command.
+ */
+ struct dmub_rb_cmd_hpd_sense_notify hpd_sense_notify;
+ struct dmub_rb_cmd_fused_io fused_io;
};
#pragma pack(pop)
@@ -4177,10 +6433,6 @@ union dmub_rb_out_cmd {
//< DMUB_RB>====================================================================
//==============================================================================
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
/**
* struct dmub_rb_init_params - Initialization params for DMUB ringbuffer
*/
@@ -4218,6 +6470,45 @@ static inline bool dmub_rb_empty(struct dmub_rb *rb)
}
/**
+ * @brief gets number of outstanding requests in the RB
+ *
+ * @param rb DMUB Ringbuffer
+ * @return true if full
+ */
+static inline uint32_t dmub_rb_num_outstanding(struct dmub_rb *rb)
+{
+ uint32_t data_count;
+
+ if (rb->wrpt >= rb->rptr)
+ data_count = rb->wrpt - rb->rptr;
+ else
+ data_count = rb->capacity - (rb->rptr - rb->wrpt);
+
+ return data_count / DMUB_RB_CMD_SIZE;
+}
+
+/**
+ * @brief gets number of free buffers in the RB
+ *
+ * @param rb DMUB Ringbuffer
+ * @return true if full
+ */
+static inline uint32_t dmub_rb_num_free(struct dmub_rb *rb)
+{
+ uint32_t data_count;
+
+ if (rb->wrpt >= rb->rptr)
+ data_count = rb->wrpt - rb->rptr;
+ else
+ data_count = rb->capacity - (rb->rptr - rb->wrpt);
+
+ /* +1 because 1 entry is always unusable */
+ data_count += DMUB_RB_CMD_SIZE;
+
+ return (rb->capacity - data_count) / DMUB_RB_CMD_SIZE;
+}
+
+/**
* @brief Checks if the ringbuffer is full
*
* @param rb DMUB Ringbuffer
@@ -4233,6 +6524,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
else
data_count = rb->capacity - (rb->rptr - rb->wrpt);
+ /* -1 because 1 entry is always unusable */
return (data_count == (rb->capacity - DMUB_RB_CMD_SIZE));
}
@@ -4247,15 +6539,18 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
static inline bool dmub_rb_push_front(struct dmub_rb *rb,
const union dmub_rb_cmd *cmd)
{
- uint64_t volatile *dst = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rb->wrpt);
- const uint64_t *src = (const uint64_t *)cmd;
+ uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt;
+ const uint8_t *src = (const uint8_t *)cmd;
uint8_t i;
+ if (rb->capacity == 0)
+ return false;
+
if (dmub_rb_full(rb))
return false;
// copying data
- for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
+ for (i = 0; i < DMUB_RB_CMD_SIZE; i++)
*dst++ = *src++;
rb->wrpt += DMUB_RB_CMD_SIZE;
@@ -4280,6 +6575,9 @@ static inline bool dmub_rb_out_push_front(struct dmub_rb *rb,
uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt;
const uint8_t *src = (const uint8_t *)cmd;
+ if (rb->capacity == 0)
+ return false;
+
if (dmub_rb_full(rb))
return false;
@@ -4325,6 +6623,9 @@ static inline void dmub_rb_get_rptr_with_offset(struct dmub_rb *rb,
uint32_t num_cmds,
uint32_t *next_rptr)
{
+ if (rb->capacity == 0)
+ return;
+
*next_rptr = rb->rptr + DMUB_RB_CMD_SIZE * num_cmds;
if (*next_rptr >= rb->capacity)
@@ -4388,6 +6689,9 @@ static inline bool dmub_rb_out_front(struct dmub_rb *rb,
*/
static inline bool dmub_rb_pop_front(struct dmub_rb *rb)
{
+ if (rb->capacity == 0)
+ return false;
+
if (dmub_rb_empty(rb))
return false;
@@ -4412,14 +6716,13 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb)
uint32_t rptr = rb->rptr;
uint32_t wptr = rb->wrpt;
+ if (rb->capacity == 0)
+ return;
+
while (rptr != wptr) {
uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr);
uint8_t i;
- /* Don't remove this.
- * The contents need to actually be read from the ring buffer
- * for this function to be effective.
- */
for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++)
(void)READ_ONCE(*data++);
@@ -4461,12 +6764,7 @@ static inline void dmub_rb_get_return_data(struct dmub_rb *rb,
dmub_memcpy(cmd, rd_ptr, DMUB_RB_CMD_SIZE);
}
-#if defined(__cplusplus)
-}
-#endif
-
//==============================================================================
//</DMUB_RB>====================================================================
//==============================================================================
-
#endif /* _DMUB_CMD_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/Makefile b/drivers/gpu/drm/amd/display/dmub/src/Makefile
index caf095aca8f3..468b768c11ae 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/Makefile
+++ b/drivers/gpu/drm/amd/display/dmub/src/Makefile
@@ -24,6 +24,10 @@ DMUB = dmub_srv.o dmub_srv_stat.o dmub_reg.o dmub_dcn20.o dmub_dcn21.o
DMUB += dmub_dcn30.o dmub_dcn301.o dmub_dcn302.o dmub_dcn303.o
DMUB += dmub_dcn31.o dmub_dcn314.o dmub_dcn315.o dmub_dcn316.o
DMUB += dmub_dcn32.o
+DMUB += dmub_dcn35.o
+DMUB += dmub_dcn351.o
+DMUB += dmub_dcn36.o
+DMUB += dmub_dcn401.o
AMD_DAL_DMUB = $(addprefix $(AMDDALPATH)/dmub/src/,$(DMUB))
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
index 98dad0d47e72..73888c1bea93 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
@@ -191,7 +191,8 @@ void dmub_dcn20_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6)
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6)
{
union dmub_addr offset;
uint64_t fb_base, fb_offset;
@@ -413,62 +414,66 @@ uint32_t dmub_dcn20_get_current_time(struct dmub_srv *dmub)
return REG_READ(DMCUB_TIMER_CURRENT);
}
-void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data)
+void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub)
{
uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset;
uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled;
+ struct dmub_timeout_info timeout = {0};
- if (!dmub || !diag_data)
+ if (!dmub)
return;
- memset(diag_data, 0, sizeof(*diag_data));
-
- diag_data->dmcub_version = dmub->fw_version;
-
- diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0);
- diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1);
- diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2);
- diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3);
- diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4);
- diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5);
- diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6);
- diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7);
- diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8);
- diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9);
- diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10);
- diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11);
- diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12);
- diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13);
- diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14);
- diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15);
-
- diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
- diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
- diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
-
- diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
- diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
- diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
-
- diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
- diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
- diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+ /* timeout data filled externally, cache before resetting memory */
+ timeout = dmub->debug.timeout_info;
+ memset(&dmub->debug, 0, sizeof(dmub->debug));
+ dmub->debug.timeout_info = timeout;
+
+ dmub->debug.dmcub_version = dmub->fw_version;
+
+ dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0);
+ dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1);
+ dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2);
+ dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3);
+ dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4);
+ dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5);
+ dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6);
+ dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7);
+ dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8);
+ dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9);
+ dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10);
+ dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11);
+ dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12);
+ dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13);
+ dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14);
+ dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15);
+
+ dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
+ dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
+ dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
+
+ dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
+ dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
+ dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
+
+ dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
+ dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
+ dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
- diag_data->is_dmcub_enabled = is_dmub_enabled;
+ dmub->debug.is_dmcub_enabled = is_dmub_enabled;
REG_GET(DMCUB_CNTL, DMCUB_SOFT_RESET, &is_soft_reset);
- diag_data->is_dmcub_soft_reset = is_soft_reset;
+ dmub->debug.is_dmcub_soft_reset = is_soft_reset;
REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset);
- diag_data->is_dmcub_secure_reset = is_sec_reset;
+ dmub->debug.is_dmcub_secure_reset = is_sec_reset;
REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
- diag_data->is_traceport_en = is_traceport_enabled;
+ dmub->debug.is_traceport_en = is_traceport_enabled;
REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled);
- diag_data->is_cw0_enabled = is_cw0_enabled;
+ dmub->debug.is_cw0_enabled = is_cw0_enabled;
REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
- diag_data->is_cw6_enabled = is_cw6_enabled;
+ dmub->debug.is_cw6_enabled = is_cw6_enabled;
}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
index 1df128e57ed3..42c1fb4bc73f 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
@@ -197,7 +197,8 @@ void dmub_dcn20_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6);
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6);
void dmub_dcn20_setup_mailbox(struct dmub_srv *dmub,
const struct dmub_region *inbox1);
@@ -246,6 +247,6 @@ bool dmub_dcn20_use_cached_trace_buffer(struct dmub_srv *dmub);
uint32_t dmub_dcn20_get_current_time(struct dmub_srv *dmub);
-void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *dmub_oca);
+void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub);
#endif /* _DMUB_DCN20_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c
index 81dae75e9ff8..a4abe951c838 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c
@@ -124,7 +124,8 @@ void dmub_dcn30_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6)
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6)
{
union dmub_addr offset;
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h
index 9a3afffd9b0f..066f35a50094 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.h
@@ -43,7 +43,8 @@ void dmub_dcn30_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6);
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6);
#endif /* _DMUB_DCN30_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c
index b42369984473..878700160fa9 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c
@@ -2,7 +2,26 @@
/*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
* Authors: AMD
+ *
*/
#include "../dmub_srv.h"
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.h
index 84141d450256..abe087251cc1 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.h
@@ -2,7 +2,26 @@
/*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
* Authors: AMD
+ *
*/
#ifndef _DMUB_DCN303_H_
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
index 094e9f864557..4777c7203b2c 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
@@ -83,8 +83,8 @@ static inline void dmub_dcn31_translate_addr(const union dmub_addr *addr_in,
void dmub_dcn31_reset(struct dmub_srv *dmub)
{
union dmub_gpint_data_register cmd;
- const uint32_t timeout = 100;
- uint32_t in_reset, scratch, i, pwait_mode;
+ const uint32_t timeout = 100000;
+ uint32_t in_reset, is_enabled, scratch, i, pwait_mode;
REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset);
@@ -108,7 +108,7 @@ void dmub_dcn31_reset(struct dmub_srv *dmub)
}
for (i = 0; i < timeout; ++i) {
- scratch = dmub->hw_funcs.get_gpint_response(dmub);
+ scratch = REG_READ(DMCUB_SCRATCH7);
if (scratch == DMUB_GPINT__STOP_FW_RESPONSE)
break;
@@ -125,9 +125,14 @@ void dmub_dcn31_reset(struct dmub_srv *dmub)
/* Force reset in case we timed out, DMCUB is likely hung. */
}
- REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1);
- REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
- REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled);
+
+ if (is_enabled) {
+ REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1);
+ REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
+ REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+ }
+
REG_WRITE(DMCUB_INBOX1_RPTR, 0);
REG_WRITE(DMCUB_INBOX1_WPTR, 0);
REG_WRITE(DMCUB_OUTBOX1_RPTR, 0);
@@ -187,7 +192,8 @@ void dmub_dcn31_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6)
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6)
{
union dmub_addr offset;
@@ -370,6 +376,8 @@ void dmub_dcn31_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu
boot_options.bits.usb4_cm_version = params->usb4_cm_version;
boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported;
boot_options.bits.power_optimization = params->power_optimization;
+ boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc;
+ boot_options.bits.override_hbr3_pll_vco = params->override_hbr3_pll_vco;
boot_options.bits.sel_mux_phy_c_d_phy_f_g = (dmub->asic == DMUB_ASIC_DCN31B) ? 1 : 0;
@@ -407,64 +415,75 @@ uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub)
return REG_READ(DMCUB_TIMER_CURRENT);
}
-void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data)
+void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub)
{
- uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset;
+ uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset, is_pwait;
uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled;
+ struct dmub_timeout_info timeout = {0};
- if (!dmub || !diag_data)
+ if (!dmub)
return;
- memset(diag_data, 0, sizeof(*diag_data));
-
- diag_data->dmcub_version = dmub->fw_version;
-
- diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0);
- diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1);
- diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2);
- diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3);
- diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4);
- diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5);
- diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6);
- diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7);
- diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8);
- diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9);
- diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10);
- diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11);
- diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12);
- diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13);
- diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14);
- diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15);
-
- diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
- diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
- diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
-
- diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
- diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
- diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
-
- diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
- diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
- diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+ /* timeout data filled externally, cache before resetting memory */
+ timeout = dmub->debug.timeout_info;
+ memset(&dmub->debug, 0, sizeof(dmub->debug));
+ dmub->debug.timeout_info = timeout;
+
+ dmub->debug.dmcub_version = dmub->fw_version;
+
+ dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0);
+ dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1);
+ dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2);
+ dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3);
+ dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4);
+ dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5);
+ dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6);
+ dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7);
+ dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8);
+ dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9);
+ dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10);
+ dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11);
+ dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12);
+ dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13);
+ dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14);
+ dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15);
+
+ dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
+ dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
+ dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
+
+ dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
+ dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
+ dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
+
+ dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
+ dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
+ dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+
+ dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR);
+ dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR);
+ dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE);
REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
- diag_data->is_dmcub_enabled = is_dmub_enabled;
+ dmub->debug.is_dmcub_enabled = is_dmub_enabled;
+
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait);
+ dmub->debug.is_pwait = is_pwait;
REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
- diag_data->is_dmcub_soft_reset = is_soft_reset;
+ dmub->debug.is_dmcub_soft_reset = is_soft_reset;
REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset);
- diag_data->is_dmcub_secure_reset = is_sec_reset;
+ dmub->debug.is_dmcub_secure_reset = is_sec_reset;
REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
- diag_data->is_traceport_en = is_traceport_enabled;
+ dmub->debug.is_traceport_en = is_traceport_enabled;
REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled);
- diag_data->is_cw0_enabled = is_cw0_enabled;
+ dmub->debug.is_cw0_enabled = is_cw0_enabled;
REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
- diag_data->is_cw6_enabled = is_cw6_enabled;
+ dmub->debug.is_cw6_enabled = is_cw6_enabled;
}
bool dmub_dcn31_should_detect(struct dmub_srv *dmub)
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h
index 4d520a893c7b..1c43ef2bca66 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h
@@ -199,7 +199,8 @@ void dmub_dcn31_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6);
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6);
void dmub_dcn31_setup_mailbox(struct dmub_srv *dmub,
const struct dmub_region *inbox1);
@@ -250,7 +251,7 @@ void dmub_dcn31_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub);
-void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data);
+void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub);
bool dmub_dcn31_should_detect(struct dmub_srv *dmub);
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
index bf5994e292d9..ce041f6239dc 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c
@@ -26,33 +26,38 @@
#include "../dmub_srv.h"
#include "dmub_reg.h"
#include "dmub_dcn32.h"
+#include "dc/dc_types.h"
+#include "dc_hw_types.h"
#include "dcn/dcn_3_2_0_offset.h"
#include "dcn/dcn_3_2_0_sh_mask.h"
-#define DCN_BASE__INST0_SEG2 0x000034C0
-
-#define BASE_INNER(seg) DCN_BASE__INST0_SEG##seg
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
#define CTX dmub
#define REGS dmub->regs_dcn32
-#define REG_OFFSET_EXP(reg_name) (BASE(reg##reg_name##_BASE_IDX) + reg##reg_name)
-
-const struct dmub_srv_dcn32_regs dmub_srv_dcn32_regs = {
-#define DMUB_SR(reg) REG_OFFSET_EXP(reg),
- {
- DMUB_DCN32_REGS()
- DMCUB_INTERNAL_REGS()
- },
+#define REG_OFFSET_EXP(reg_name) BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+void dmub_srv_dcn32_regs_init(struct dmub_srv *dmub, struct dc_context *ctx)
+{
+ struct dmub_srv_dcn32_regs *regs = dmub->regs_dcn32;
+
+#define REG_STRUCT regs
+
+#define DMUB_SR(reg) REG_STRUCT->offset.reg = REG_OFFSET_EXP(reg);
+ DMUB_DCN32_REGS()
+ DMCUB_INTERNAL_REGS()
#undef DMUB_SR
-#define DMUB_SF(reg, field) FD_MASK(reg, field),
- { DMUB_DCN32_FIELDS() },
+#define DMUB_SF(reg, field) REG_STRUCT->mask.reg##__##field = FD_MASK(reg, field);
+ DMUB_DCN32_FIELDS()
#undef DMUB_SF
-#define DMUB_SF(reg, field) FD_SHIFT(reg, field),
- { DMUB_DCN32_FIELDS() },
+#define DMUB_SF(reg, field) REG_STRUCT->shift.reg##__##field = FD_SHIFT(reg, field);
+ DMUB_DCN32_FIELDS()
#undef DMUB_SF
-};
+
+#undef REG_STRUCT
+}
static void dmub_dcn32_get_fb_base_offset(struct dmub_srv *dmub,
uint64_t *fb_base,
@@ -84,44 +89,50 @@ static inline void dmub_dcn32_translate_addr(const union dmub_addr *addr_in,
void dmub_dcn32_reset(struct dmub_srv *dmub)
{
union dmub_gpint_data_register cmd;
- const uint32_t timeout = 30;
- uint32_t in_reset, scratch, i;
+ const uint32_t timeout = 100000;
+ uint32_t in_reset, is_enabled, scratch, i, pwait_mode;
REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset);
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled);
- if (in_reset == 0) {
+ if (in_reset == 0 && is_enabled != 0) {
cmd.bits.status = 1;
cmd.bits.command_code = DMUB_GPINT__STOP_FW;
cmd.bits.param = 0;
dmub->hw_funcs.set_gpint(dmub, cmd);
- /**
- * Timeout covers both the ACK and the wait
- * for remaining work to finish.
- *
- * This is mostly bound by the PHY disable sequence.
- * Each register check will be greater than 1us, so
- * don't bother using udelay.
- */
-
for (i = 0; i < timeout; ++i) {
if (dmub->hw_funcs.is_gpint_acked(dmub, cmd))
break;
+
+ udelay(1);
}
for (i = 0; i < timeout; ++i) {
- scratch = dmub->hw_funcs.get_gpint_response(dmub);
+ scratch = REG_READ(DMCUB_SCRATCH7);
if (scratch == DMUB_GPINT__STOP_FW_RESPONSE)
break;
+
+ udelay(1);
}
+ for (i = 0; i < timeout; ++i) {
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode);
+ if (pwait_mode & (1 << 0))
+ break;
+
+ udelay(1);
+ }
/* Force reset in case we timed out, DMCUB is likely hung. */
}
- REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1);
- REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
- REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
+ if (is_enabled) {
+ REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1);
+ udelay(1);
+ REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+ }
+
REG_WRITE(DMCUB_INBOX1_RPTR, 0);
REG_WRITE(DMCUB_INBOX1_WPTR, 0);
REG_WRITE(DMCUB_OUTBOX1_RPTR, 0);
@@ -130,7 +141,7 @@ void dmub_dcn32_reset(struct dmub_srv *dmub)
REG_WRITE(DMCUB_OUTBOX0_WPTR, 0);
REG_WRITE(DMCUB_SCRATCH0, 0);
- /* Clear the GPINT command manually so we don't reset again. */
+ /* Clear the GPINT command manually so we don't send anything during boot. */
cmd.all = 0;
dmub->hw_funcs.set_gpint(dmub, cmd);
}
@@ -211,7 +222,8 @@ void dmub_dcn32_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6)
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6)
{
union dmub_addr offset;
@@ -411,67 +423,72 @@ uint32_t dmub_dcn32_get_current_time(struct dmub_srv *dmub)
return REG_READ(DMCUB_TIMER_CURRENT);
}
-void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data)
+void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub)
{
- uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset;
- uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled;
+ uint32_t is_dmub_enabled, is_soft_reset, is_pwait;
+ uint32_t is_traceport_enabled, is_cw6_enabled;
+ struct dmub_timeout_info timeout = {0};
- if (!dmub || !diag_data)
+ if (!dmub)
return;
- memset(diag_data, 0, sizeof(*diag_data));
-
- diag_data->dmcub_version = dmub->fw_version;
-
- diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0);
- diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1);
- diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2);
- diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3);
- diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4);
- diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5);
- diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6);
- diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7);
- diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8);
- diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9);
- diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10);
- diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11);
- diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12);
- diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13);
- diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14);
- diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15);
- diag_data->scratch[16] = REG_READ(DMCUB_SCRATCH16);
-
- diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
- diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
- diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
-
- diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
- diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
- diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
-
- diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
- diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
- diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+ /* timeout data filled externally, cache before resetting memory */
+ timeout = dmub->debug.timeout_info;
+ memset(&dmub->debug, 0, sizeof(dmub->debug));
+ dmub->debug.timeout_info = timeout;
+
+ dmub->debug.dmcub_version = dmub->fw_version;
+
+ dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0);
+ dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1);
+ dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2);
+ dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3);
+ dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4);
+ dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5);
+ dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6);
+ dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7);
+ dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8);
+ dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9);
+ dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10);
+ dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11);
+ dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12);
+ dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13);
+ dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14);
+ dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15);
+ dmub->debug.scratch[16] = REG_READ(DMCUB_SCRATCH16);
+
+ dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
+ dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
+ dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
+
+ dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
+ dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
+ dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
+
+ dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
+ dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
+ dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+
+ dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR);
+ dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR);
+ dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE);
REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
- diag_data->is_dmcub_enabled = is_dmub_enabled;
+ dmub->debug.is_dmcub_enabled = is_dmub_enabled;
- REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
- diag_data->is_dmcub_soft_reset = is_soft_reset;
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait);
+ dmub->debug.is_pwait = is_pwait;
- REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset);
- diag_data->is_dmcub_secure_reset = is_sec_reset;
+ REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
+ dmub->debug.is_dmcub_soft_reset = is_soft_reset;
REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
- diag_data->is_traceport_en = is_traceport_enabled;
-
- REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled);
- diag_data->is_cw0_enabled = is_cw0_enabled;
+ dmub->debug.is_traceport_en = is_traceport_enabled;
REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
- diag_data->is_cw6_enabled = is_cw6_enabled;
+ dmub->debug.is_cw6_enabled = is_cw6_enabled;
- diag_data->gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0);
+ dmub->debug.gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0);
}
void dmub_dcn32_configure_dmub_in_system_memory(struct dmub_srv *dmub)
{
@@ -500,3 +517,32 @@ uint32_t dmub_dcn32_read_inbox0_ack_register(struct dmub_srv *dmub)
{
return REG_READ(DMCUB_SCRATCH17);
}
+
+void dmub_dcn32_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index)
+{
+ uint32_t index = 0;
+
+ if (subvp_index == 0) {
+ index = REG_READ(DMCUB_SCRATCH15);
+ if (index) {
+ REG_WRITE(DMCUB_SCRATCH9, addr->grph.addr.low_part);
+ REG_WRITE(DMCUB_SCRATCH11, addr->grph.meta_addr.low_part);
+ } else {
+ REG_WRITE(DMCUB_SCRATCH12, addr->grph.addr.low_part);
+ REG_WRITE(DMCUB_SCRATCH13, addr->grph.meta_addr.low_part);
+ }
+ REG_WRITE(DMCUB_SCRATCH15, !index);
+ } else if (subvp_index == 1) {
+ index = REG_READ(DMCUB_SCRATCH23);
+ if (index) {
+ REG_WRITE(DMCUB_SCRATCH18, addr->grph.addr.low_part);
+ REG_WRITE(DMCUB_SCRATCH19, addr->grph.meta_addr.low_part);
+ } else {
+ REG_WRITE(DMCUB_SCRATCH20, addr->grph.addr.low_part);
+ REG_WRITE(DMCUB_SCRATCH22, addr->grph.meta_addr.low_part);
+ }
+ REG_WRITE(DMCUB_SCRATCH23, !index);
+ } else {
+ return;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h
index d58a1e4b9f1c..daf81027d663 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h
@@ -89,6 +89,9 @@ struct dmub_srv;
DMUB_SR(DMCUB_REGION5_OFFSET) \
DMUB_SR(DMCUB_REGION5_OFFSET_HIGH) \
DMUB_SR(DMCUB_REGION5_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION6_OFFSET) \
+ DMUB_SR(DMCUB_REGION6_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION6_TOP_ADDRESS) \
DMUB_SR(DMCUB_SCRATCH0) \
DMUB_SR(DMCUB_SCRATCH1) \
DMUB_SR(DMCUB_SCRATCH2) \
@@ -107,6 +110,12 @@ struct dmub_srv;
DMUB_SR(DMCUB_SCRATCH15) \
DMUB_SR(DMCUB_SCRATCH16) \
DMUB_SR(DMCUB_SCRATCH17) \
+ DMUB_SR(DMCUB_SCRATCH18) \
+ DMUB_SR(DMCUB_SCRATCH19) \
+ DMUB_SR(DMCUB_SCRATCH20) \
+ DMUB_SR(DMCUB_SCRATCH21) \
+ DMUB_SR(DMCUB_SCRATCH22) \
+ DMUB_SR(DMCUB_SCRATCH23) \
DMUB_SR(DMCUB_GPINT_DATAIN0) \
DMUB_SR(DMCUB_GPINT_DATAIN1) \
DMUB_SR(DMCUB_GPINT_DATAOUT) \
@@ -149,6 +158,8 @@ struct dmub_srv;
DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \
DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_TOP_ADDRESS) \
DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_ENABLE) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_ENABLE) \
DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \
DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \
@@ -156,7 +167,8 @@ struct dmub_srv;
DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \
DMUB_SF(DMCUB_REGION3_TMR_AXI_SPACE, DMCUB_REGION3_TMR_AXI_SPACE) \
DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \
- DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK)
+ DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \
+ DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS)
struct dmub_srv_dcn32_reg_offset {
#define DMUB_SR(reg) uint32_t reg;
@@ -178,13 +190,11 @@ struct dmub_srv_dcn32_reg_mask {
};
struct dmub_srv_dcn32_regs {
- const struct dmub_srv_dcn32_reg_offset offset;
- const struct dmub_srv_dcn32_reg_mask mask;
- const struct dmub_srv_dcn32_reg_shift shift;
+ struct dmub_srv_dcn32_reg_offset offset;
+ struct dmub_srv_dcn32_reg_mask mask;
+ struct dmub_srv_dcn32_reg_shift shift;
};
-extern const struct dmub_srv_dcn32_regs dmub_srv_dcn32_regs;
-
void dmub_dcn32_reset(struct dmub_srv *dmub);
void dmub_dcn32_reset_release(struct dmub_srv *dmub);
@@ -202,7 +212,8 @@ void dmub_dcn32_setup_windows(struct dmub_srv *dmub,
const struct dmub_window *cw3,
const struct dmub_window *cw4,
const struct dmub_window *cw5,
- const struct dmub_window *cw6);
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6);
void dmub_dcn32_setup_mailbox(struct dmub_srv *dmub,
const struct dmub_region *inbox1);
@@ -249,11 +260,14 @@ void dmub_dcn32_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
uint32_t dmub_dcn32_get_current_time(struct dmub_srv *dmub);
-void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data);
+void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub);
void dmub_dcn32_configure_dmub_in_system_memory(struct dmub_srv *dmub);
void dmub_dcn32_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data);
void dmub_dcn32_clear_inbox0_ack_register(struct dmub_srv *dmub);
uint32_t dmub_dcn32_read_inbox0_ack_register(struct dmub_srv *dmub);
+void dmub_dcn32_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index);
+
+void dmub_srv_dcn32_regs_init(struct dmub_srv *dmub, struct dc_context *ctx);
#endif /* _DMUB_DCN32_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
new file mode 100644
index 000000000000..834e5434ccb8
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "../dmub_srv.h"
+#include "dc_types.h"
+#include "dmub_reg.h"
+#include "dmub_dcn35.h"
+#include "dc/dc_types.h"
+
+#include "dcn/dcn_3_5_0_offset.h"
+#include "dcn/dcn_3_5_0_sh_mask.h"
+
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+#define CTX dmub
+#define REGS dmub->regs_dcn35
+#define REG_OFFSET_EXP(reg_name) BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+void dmub_srv_dcn35_regs_init(struct dmub_srv *dmub, struct dc_context *ctx) {
+ struct dmub_srv_dcn35_regs *regs = dmub->regs_dcn35;
+#define REG_STRUCT regs
+
+#define DMUB_SR(reg) REG_STRUCT->offset.reg = REG_OFFSET_EXP(reg);
+ DMUB_DCN35_REGS()
+ DMCUB_INTERNAL_REGS()
+#undef DMUB_SR
+
+#define DMUB_SF(reg, field) REG_STRUCT->mask.reg##__##field = FD_MASK(reg, field);
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+
+#define DMUB_SF(reg, field) REG_STRUCT->shift.reg##__##field = FD_SHIFT(reg, field);
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+#undef REG_STRUCT
+}
+
+static void dmub_dcn35_get_fb_base_offset(struct dmub_srv *dmub,
+ uint64_t *fb_base,
+ uint64_t *fb_offset)
+{
+ uint32_t tmp;
+
+ /*
+ if (dmub->fb_base || dmub->fb_offset) {
+ *fb_base = dmub->fb_base;
+ *fb_offset = dmub->fb_offset;
+ return;
+ }
+ */
+
+ REG_GET(DCN_VM_FB_LOCATION_BASE, FB_BASE, &tmp);
+ *fb_base = (uint64_t)tmp << 24;
+
+ REG_GET(DCN_VM_FB_OFFSET, FB_OFFSET, &tmp);
+ *fb_offset = (uint64_t)tmp << 24;
+}
+
+static inline void dmub_dcn35_translate_addr(const union dmub_addr *addr_in,
+ uint64_t fb_base,
+ uint64_t fb_offset,
+ union dmub_addr *addr_out)
+{
+ addr_out->quad_part = addr_in->quad_part - fb_base + fb_offset;
+}
+
+void dmub_dcn35_reset(struct dmub_srv *dmub)
+{
+ union dmub_gpint_data_register cmd;
+ const uint32_t timeout = 100000;
+ uint32_t in_reset, is_enabled, scratch, i, pwait_mode;
+
+ REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset);
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled);
+
+ if (in_reset == 0 && is_enabled != 0) {
+ cmd.bits.status = 1;
+ cmd.bits.command_code = DMUB_GPINT__STOP_FW;
+ cmd.bits.param = 0;
+
+ dmub->hw_funcs.set_gpint(dmub, cmd);
+
+ for (i = 0; i < timeout; ++i) {
+ if (dmub->hw_funcs.is_gpint_acked(dmub, cmd))
+ break;
+
+ udelay(1);
+ }
+
+ for (i = 0; i < timeout; ++i) {
+ scratch = REG_READ(DMCUB_SCRATCH7);
+ if (scratch == DMUB_GPINT__STOP_FW_RESPONSE)
+ break;
+
+ udelay(1);
+ }
+
+ for (i = 0; i < timeout; ++i) {
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode);
+ if (pwait_mode & (1 << 0))
+ break;
+
+ udelay(1);
+ }
+ /* Force reset in case we timed out, DMCUB is likely hung. */
+ }
+
+ if (is_enabled) {
+ REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1);
+ udelay(1);
+ REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+ }
+
+ REG_WRITE(DMCUB_INBOX1_RPTR, 0);
+ REG_WRITE(DMCUB_INBOX1_WPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX1_RPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX1_WPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX0_RPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX0_WPTR, 0);
+ REG_WRITE(DMCUB_SCRATCH0, 0);
+
+ /* Clear the GPINT command manually so we don't send anything during boot. */
+ cmd.all = 0;
+ dmub->hw_funcs.set_gpint(dmub, cmd);
+}
+
+void dmub_dcn35_reset_release(struct dmub_srv *dmub)
+{
+ REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF);
+
+ REG_UPDATE_3(DMU_CLK_CNTL,
+ LONO_DISPCLK_GATE_DISABLE, 1,
+ LONO_SOCCLK_GATE_DISABLE, 1,
+ LONO_DMCUBCLK_GATE_DISABLE, 1);
+
+ REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1);
+ REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0);
+ REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 0);
+}
+
+void dmub_dcn35_backdoor_load(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1)
+{
+ union dmub_addr offset;
+ uint64_t fb_base, fb_offset;
+
+ dmub_dcn35_get_fb_base_offset(dmub, &fb_base, &fb_offset);
+
+ dmub_dcn35_translate_addr(&cw0->offset, fb_base, fb_offset, &offset);
+
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW0_BASE_ADDRESS, cw0->region.base);
+ REG_SET_2(DMCUB_REGION3_CW0_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW0_TOP_ADDRESS, cw0->region.top,
+ DMCUB_REGION3_CW0_ENABLE, 1);
+
+ dmub_dcn35_translate_addr(&cw1->offset, fb_base, fb_offset, &offset);
+
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW1_BASE_ADDRESS, cw1->region.base);
+ REG_SET_2(DMCUB_REGION3_CW1_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top,
+ DMCUB_REGION3_CW1_ENABLE, 1);
+
+ /* TODO: Do we need to set DMCUB_MEM_UNIT_ID? */
+ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0);
+}
+
+void dmub_dcn35_backdoor_load_zfb_mode(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1)
+{
+ union dmub_addr offset;
+
+ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1);
+ offset = cw0->offset;
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW0_BASE_ADDRESS, cw0->region.base);
+ REG_SET_2(DMCUB_REGION3_CW0_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW0_TOP_ADDRESS, cw0->region.top,
+ DMCUB_REGION3_CW0_ENABLE, 1);
+ offset = cw1->offset;
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW1_BASE_ADDRESS, cw1->region.base);
+ REG_SET_2(DMCUB_REGION3_CW1_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top,
+ DMCUB_REGION3_CW1_ENABLE, 1);
+ REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID,
+ 0x20);
+}
+void dmub_dcn35_setup_windows(struct dmub_srv *dmub,
+ const struct dmub_window *cw2,
+ const struct dmub_window *cw3,
+ const struct dmub_window *cw4,
+ const struct dmub_window *cw5,
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6)
+{
+ union dmub_addr offset;
+
+ offset = cw3->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW3_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW3_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW3_BASE_ADDRESS, cw3->region.base);
+ REG_SET_2(DMCUB_REGION3_CW3_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW3_TOP_ADDRESS, cw3->region.top,
+ DMCUB_REGION3_CW3_ENABLE, 1);
+
+ offset = cw4->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW4_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW4_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW4_BASE_ADDRESS, cw4->region.base);
+ REG_SET_2(DMCUB_REGION3_CW4_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW4_TOP_ADDRESS, cw4->region.top,
+ DMCUB_REGION3_CW4_ENABLE, 1);
+
+ offset = cw5->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW5_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW5_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW5_BASE_ADDRESS, cw5->region.base);
+ REG_SET_2(DMCUB_REGION3_CW5_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW5_TOP_ADDRESS, cw5->region.top,
+ DMCUB_REGION3_CW5_ENABLE, 1);
+
+ REG_WRITE(DMCUB_REGION5_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION5_OFFSET_HIGH, offset.u.high_part);
+ REG_SET_2(DMCUB_REGION5_TOP_ADDRESS, 0,
+ DMCUB_REGION5_TOP_ADDRESS,
+ cw5->region.top - cw5->region.base - 1,
+ DMCUB_REGION5_ENABLE, 1);
+
+ offset = cw6->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW6_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW6_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW6_BASE_ADDRESS, cw6->region.base);
+ REG_SET_2(DMCUB_REGION3_CW6_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW6_TOP_ADDRESS, cw6->region.top,
+ DMCUB_REGION3_CW6_ENABLE, 1);
+
+ offset = region6->offset;
+
+ REG_WRITE(DMCUB_REGION6_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION6_OFFSET_HIGH, offset.u.high_part);
+ REG_SET_2(DMCUB_REGION6_TOP_ADDRESS, 0,
+ DMCUB_REGION6_TOP_ADDRESS,
+ region6->region.top - region6->region.base - 1,
+ DMCUB_REGION6_ENABLE, 1);
+}
+
+void dmub_dcn35_setup_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *inbox1)
+{
+ REG_WRITE(DMCUB_INBOX1_BASE_ADDRESS, inbox1->base);
+ REG_WRITE(DMCUB_INBOX1_SIZE, inbox1->top - inbox1->base);
+}
+
+uint32_t dmub_dcn35_get_inbox1_wptr(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_INBOX1_WPTR);
+}
+
+uint32_t dmub_dcn35_get_inbox1_rptr(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_INBOX1_RPTR);
+}
+
+void dmub_dcn35_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset)
+{
+ REG_WRITE(DMCUB_INBOX1_WPTR, wptr_offset);
+}
+
+void dmub_dcn35_setup_out_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *outbox1)
+{
+ REG_WRITE(DMCUB_OUTBOX1_BASE_ADDRESS, outbox1->base);
+ REG_WRITE(DMCUB_OUTBOX1_SIZE, outbox1->top - outbox1->base);
+}
+
+uint32_t dmub_dcn35_get_outbox1_wptr(struct dmub_srv *dmub)
+{
+ /**
+ * outbox1 wptr register is accessed without locks (dal & dc)
+ * and to be called only by dmub_srv_stat_get_notification()
+ */
+ return REG_READ(DMCUB_OUTBOX1_WPTR);
+}
+
+void dmub_dcn35_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset)
+{
+ /**
+ * outbox1 rptr register is accessed without locks (dal & dc)
+ * and to be called only by dmub_srv_stat_get_notification()
+ */
+ REG_WRITE(DMCUB_OUTBOX1_RPTR, rptr_offset);
+}
+
+bool dmub_dcn35_is_hw_init(struct dmub_srv *dmub)
+{
+ union dmub_fw_boot_status status;
+ uint32_t is_enable;
+
+ status.all = REG_READ(DMCUB_SCRATCH0);
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enable);
+
+ return is_enable != 0 && status.bits.dal_fw;
+}
+
+bool dmub_dcn35_is_supported(struct dmub_srv *dmub)
+{
+ uint32_t supported = 0;
+
+ REG_GET(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE, &supported);
+
+ return supported;
+}
+
+void dmub_dcn35_set_gpint(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg)
+{
+ REG_WRITE(DMCUB_GPINT_DATAIN1, reg.all);
+}
+
+bool dmub_dcn35_is_gpint_acked(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg)
+{
+ union dmub_gpint_data_register test;
+
+ reg.bits.status = 0;
+ test.all = REG_READ(DMCUB_GPINT_DATAIN1);
+
+ return test.all == reg.all;
+}
+
+uint32_t dmub_dcn35_get_gpint_response(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_SCRATCH7);
+}
+
+uint32_t dmub_dcn35_get_gpint_dataout(struct dmub_srv *dmub)
+{
+ uint32_t dataout = REG_READ(DMCUB_GPINT_DATAOUT);
+
+ REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 0);
+
+ REG_WRITE(DMCUB_GPINT_DATAOUT, 0);
+ REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 1);
+ REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 0);
+
+ REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 1);
+
+ return dataout;
+}
+
+union dmub_fw_boot_status dmub_dcn35_get_fw_boot_status(struct dmub_srv *dmub)
+{
+ union dmub_fw_boot_status status;
+
+ status.all = REG_READ(DMCUB_SCRATCH0);
+ return status;
+}
+
+union dmub_fw_boot_options dmub_dcn35_get_fw_boot_option(struct dmub_srv *dmub)
+{
+ union dmub_fw_boot_options option;
+
+ option.all = REG_READ(DMCUB_SCRATCH14);
+ return option;
+}
+
+void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params)
+{
+ union dmub_fw_boot_options boot_options = {0};
+
+ if (!dmub->dpia_supported) {
+ dmub->dpia_supported = dmub_dcn35_get_fw_boot_option(dmub).bits.enable_dpia;
+ }
+
+ boot_options.bits.z10_disable = params->disable_z10;
+ boot_options.bits.dpia_supported = params->dpia_supported;
+ boot_options.bits.enable_dpia = dmub->dpia_supported && !params->disable_dpia;
+ boot_options.bits.usb4_cm_version = params->usb4_cm_version;
+ boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported;
+ boot_options.bits.power_optimization = params->power_optimization;
+ boot_options.bits.disable_clk_ds = params->disallow_dispclk_dppclk_ds;
+ boot_options.bits.disable_clk_gate = params->disable_clock_gate;
+ boot_options.bits.ips_disable = params->disable_ips;
+ boot_options.bits.ips_sequential_ono = params->ips_sequential_ono;
+ boot_options.bits.disable_sldo_opt = params->disable_sldo_opt;
+ boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig;
+ boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc;
+
+ REG_WRITE(DMCUB_SCRATCH14, boot_options.all);
+}
+
+void dmub_dcn35_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip)
+{
+ union dmub_fw_boot_options boot_options;
+ boot_options.all = REG_READ(DMCUB_SCRATCH14);
+ boot_options.bits.skip_phy_init_panel_sequence = skip;
+ REG_WRITE(DMCUB_SCRATCH14, boot_options.all);
+}
+
+void dmub_dcn35_setup_outbox0(struct dmub_srv *dmub,
+ const struct dmub_region *outbox0)
+{
+ REG_WRITE(DMCUB_OUTBOX0_BASE_ADDRESS, outbox0->base);
+
+ REG_WRITE(DMCUB_OUTBOX0_SIZE, outbox0->top - outbox0->base);
+}
+
+uint32_t dmub_dcn35_get_outbox0_wptr(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_OUTBOX0_WPTR);
+}
+
+void dmub_dcn35_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset)
+{
+ REG_WRITE(DMCUB_OUTBOX0_RPTR, rptr_offset);
+}
+
+uint32_t dmub_dcn35_get_current_time(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_TIMER_CURRENT);
+}
+
+void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub)
+{
+ uint32_t is_dmub_enabled, is_soft_reset, is_pwait;
+ uint32_t is_traceport_enabled, is_cw6_enabled;
+ struct dmub_timeout_info timeout = {0};
+
+ if (!dmub)
+ return;
+
+ /* timeout data filled externally, cache before resetting memory */
+ timeout = dmub->debug.timeout_info;
+ memset(&dmub->debug, 0, sizeof(dmub->debug));
+ dmub->debug.timeout_info = timeout;
+
+ dmub->debug.dmcub_version = dmub->fw_version;
+
+ dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0);
+ dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1);
+ dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2);
+ dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3);
+ dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4);
+ dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5);
+ dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6);
+ dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7);
+ dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8);
+ dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9);
+ dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10);
+ dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11);
+ dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12);
+ dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13);
+ dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14);
+ dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15);
+ dmub->debug.scratch[16] = REG_READ(DMCUB_SCRATCH16);
+
+ dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
+ dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
+ dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
+
+ dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
+ dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
+ dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
+
+ dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
+ dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
+ dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+
+ dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR);
+ dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR);
+ dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE);
+
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
+ dmub->debug.is_dmcub_enabled = is_dmub_enabled;
+
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait);
+ dmub->debug.is_pwait = is_pwait;
+
+ REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
+ dmub->debug.is_dmcub_soft_reset = is_soft_reset;
+
+ REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
+ dmub->debug.is_traceport_en = is_traceport_enabled;
+
+ REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
+ dmub->debug.is_cw6_enabled = is_cw6_enabled;
+
+ dmub->debug.gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0);
+}
+void dmub_dcn35_configure_dmub_in_system_memory(struct dmub_srv *dmub)
+{
+ /* DMCUB_REGION3_TMR_AXI_SPACE values:
+ * 0b011 (0x3) - FB physical address
+ * 0b100 (0x4) - GPU virtual address
+ *
+ * Default value is 0x3 (FB Physical address for TMR). When programming
+ * DMUB to be in system memory, change to 0x4. The system memory allocated
+ * is accessible by both GPU and CPU, so we use GPU virtual address.
+ */
+ REG_WRITE(DMCUB_REGION3_TMR_AXI_SPACE, 0x4);
+}
+
+bool dmub_dcn35_should_detect(struct dmub_srv *dmub)
+{
+ uint32_t fw_boot_status = REG_READ(DMCUB_SCRATCH0);
+ bool should_detect = (fw_boot_status & DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED) != 0;
+ return should_detect;
+}
+
+void dmub_dcn35_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data)
+{
+ REG_WRITE(DMCUB_INBOX0_WPTR, data.inbox0_cmd_common.all);
+}
+
+void dmub_dcn35_clear_inbox0_ack_register(struct dmub_srv *dmub)
+{
+ REG_WRITE(DMCUB_SCRATCH17, 0);
+}
+
+uint32_t dmub_dcn35_read_inbox0_ack_register(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_SCRATCH17);
+}
+
+bool dmub_dcn35_is_hw_powered_up(struct dmub_srv *dmub)
+{
+ union dmub_fw_boot_status status;
+ uint32_t is_enable;
+
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enable);
+ if (is_enable == 0)
+ return false;
+
+ status.all = REG_READ(DMCUB_SCRATCH0);
+
+ return (status.bits.dal_fw && status.bits.hw_power_init_done && status.bits.mailbox_rdy) ||
+ (!status.bits.dal_fw && status.bits.mailbox_rdy);
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h
new file mode 100644
index 000000000000..39fcb7275da5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h
@@ -0,0 +1,288 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _DMUB_DCN35_H_
+#define _DMUB_DCN35_H_
+
+#include "dmub_dcn31.h"
+
+struct dmub_srv;
+
+/* DCN35 register definitions. */
+
+#define DMUB_DCN35_REGS() \
+ DMUB_SR(DMCUB_CNTL) \
+ DMUB_SR(DMCUB_CNTL2) \
+ DMUB_SR(DMCUB_SEC_CNTL) \
+ DMUB_SR(DMCUB_INBOX0_SIZE) \
+ DMUB_SR(DMCUB_INBOX0_RPTR) \
+ DMUB_SR(DMCUB_INBOX0_WPTR) \
+ DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_INBOX1_SIZE) \
+ DMUB_SR(DMCUB_INBOX1_RPTR) \
+ DMUB_SR(DMCUB_INBOX1_WPTR) \
+ DMUB_SR(DMCUB_OUTBOX0_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_OUTBOX0_SIZE) \
+ DMUB_SR(DMCUB_OUTBOX0_RPTR) \
+ DMUB_SR(DMCUB_OUTBOX0_WPTR) \
+ DMUB_SR(DMCUB_OUTBOX1_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_OUTBOX1_SIZE) \
+ DMUB_SR(DMCUB_OUTBOX1_RPTR) \
+ DMUB_SR(DMCUB_OUTBOX1_WPTR) \
+ DMUB_SR(DMCUB_REGION3_CW0_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW1_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW2_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW3_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW4_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW5_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW6_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW7_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW0_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW1_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW2_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW3_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW4_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW5_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW6_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW7_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW0_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW1_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW2_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW3_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW4_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW5_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW6_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW7_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW0_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW1_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW2_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW3_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW4_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW5_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW6_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW7_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION4_OFFSET) \
+ DMUB_SR(DMCUB_REGION4_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION4_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION5_OFFSET) \
+ DMUB_SR(DMCUB_REGION5_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION5_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION6_OFFSET) \
+ DMUB_SR(DMCUB_REGION6_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION6_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_SCRATCH0) \
+ DMUB_SR(DMCUB_SCRATCH1) \
+ DMUB_SR(DMCUB_SCRATCH2) \
+ DMUB_SR(DMCUB_SCRATCH3) \
+ DMUB_SR(DMCUB_SCRATCH4) \
+ DMUB_SR(DMCUB_SCRATCH5) \
+ DMUB_SR(DMCUB_SCRATCH6) \
+ DMUB_SR(DMCUB_SCRATCH7) \
+ DMUB_SR(DMCUB_SCRATCH8) \
+ DMUB_SR(DMCUB_SCRATCH9) \
+ DMUB_SR(DMCUB_SCRATCH10) \
+ DMUB_SR(DMCUB_SCRATCH11) \
+ DMUB_SR(DMCUB_SCRATCH12) \
+ DMUB_SR(DMCUB_SCRATCH13) \
+ DMUB_SR(DMCUB_SCRATCH14) \
+ DMUB_SR(DMCUB_SCRATCH15) \
+ DMUB_SR(DMCUB_SCRATCH16) \
+ DMUB_SR(DMCUB_SCRATCH17) \
+ DMUB_SR(DMCUB_SCRATCH18) \
+ DMUB_SR(DMCUB_SCRATCH19) \
+ DMUB_SR(DMCUB_SCRATCH20) \
+ DMUB_SR(DMCUB_SCRATCH21) \
+ DMUB_SR(DMCUB_GPINT_DATAIN0) \
+ DMUB_SR(DMCUB_GPINT_DATAIN1) \
+ DMUB_SR(DMCUB_GPINT_DATAOUT) \
+ DMUB_SR(CC_DC_PIPE_DIS) \
+ DMUB_SR(MMHUBBUB_SOFT_RESET) \
+ DMUB_SR(DCN_VM_FB_LOCATION_BASE) \
+ DMUB_SR(DCN_VM_FB_OFFSET) \
+ DMUB_SR(DMCUB_TIMER_CURRENT) \
+ DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \
+ DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \
+ DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) \
+ DMUB_SR(DMCUB_REGION3_TMR_AXI_SPACE) \
+ DMUB_SR(DMCUB_INTERRUPT_ENABLE) \
+ DMUB_SR(DMCUB_INTERRUPT_ACK) \
+ DMUB_SR(DMU_CLK_CNTL)
+
+#define DMUB_DCN35_FIELDS() \
+ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \
+ DMUB_SF(DMCUB_CNTL, DMCUB_TRACEPORT_EN) \
+ DMUB_SF(DMCUB_CNTL2, DMCUB_SOFT_RESET) \
+ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \
+ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \
+ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \
+ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \
+ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \
+ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_ENABLE) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_ENABLE) \
+ DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
+ DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \
+ DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \
+ DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \
+ DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \
+ DMUB_SF(DMCUB_REGION3_TMR_AXI_SPACE, DMCUB_REGION3_TMR_AXI_SPACE) \
+ DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \
+ DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \
+ DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS) \
+ DMUB_SF(DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE) \
+ DMUB_SF(DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE) \
+ DMUB_SF(DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE)
+
+struct dmub_srv_dcn35_reg_offset {
+#define DMUB_SR(reg) uint32_t reg;
+ DMUB_DCN35_REGS()
+ DMCUB_INTERNAL_REGS()
+#undef DMUB_SR
+};
+
+struct dmub_srv_dcn35_reg_shift {
+#define DMUB_SF(reg, field) uint8_t reg##__##field;
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+};
+
+struct dmub_srv_dcn35_reg_mask {
+#define DMUB_SF(reg, field) uint32_t reg##__##field;
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+};
+
+struct dmub_srv_dcn35_regs {
+ struct dmub_srv_dcn35_reg_offset offset;
+ struct dmub_srv_dcn35_reg_mask mask;
+ struct dmub_srv_dcn35_reg_shift shift;
+};
+
+/* Hardware functions. */
+
+
+void dmub_dcn35_init(struct dmub_srv *dmub);
+
+void dmub_dcn35_reset(struct dmub_srv *dmub);
+
+void dmub_dcn35_reset_release(struct dmub_srv *dmub);
+
+void dmub_dcn35_backdoor_load(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1);
+
+void dmub_dcn35_backdoor_load_zfb_mode(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1);
+
+void dmub_dcn35_setup_windows(struct dmub_srv *dmub,
+ const struct dmub_window *cw2,
+ const struct dmub_window *cw3,
+ const struct dmub_window *cw4,
+ const struct dmub_window *cw5,
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6);
+
+void dmub_dcn35_setup_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *inbox1);
+
+uint32_t dmub_dcn35_get_inbox1_wptr(struct dmub_srv *dmub);
+
+uint32_t dmub_dcn35_get_inbox1_rptr(struct dmub_srv *dmub);
+
+void dmub_dcn35_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset);
+
+void dmub_dcn35_setup_out_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *outbox1);
+
+uint32_t dmub_dcn35_get_outbox1_wptr(struct dmub_srv *dmub);
+
+void dmub_dcn35_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
+
+bool dmub_dcn35_is_hw_init(struct dmub_srv *dmub);
+
+bool dmub_dcn35_is_supported(struct dmub_srv *dmub);
+
+void dmub_dcn35_set_gpint(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg);
+
+bool dmub_dcn35_is_gpint_acked(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg);
+
+uint32_t dmub_dcn35_get_gpint_response(struct dmub_srv *dmub);
+
+uint32_t dmub_dcn35_get_gpint_dataout(struct dmub_srv *dmub);
+
+void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params);
+
+void dmub_dcn35_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip);
+
+union dmub_fw_boot_status dmub_dcn35_get_fw_boot_status(struct dmub_srv *dmub);
+
+union dmub_fw_boot_options dmub_dcn35_get_fw_boot_option(struct dmub_srv *dmub);
+
+void dmub_dcn35_setup_outbox0(struct dmub_srv *dmub,
+ const struct dmub_region *outbox0);
+
+uint32_t dmub_dcn35_get_outbox0_wptr(struct dmub_srv *dmub);
+
+void dmub_dcn35_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
+
+uint32_t dmub_dcn35_get_current_time(struct dmub_srv *dmub);
+
+void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub);
+
+void dmub_dcn35_configure_dmub_in_system_memory(struct dmub_srv *dmub);
+
+void dmub_dcn35_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data);
+
+void dmub_dcn35_clear_inbox0_ack_register(struct dmub_srv *dmub);
+
+uint32_t dmub_dcn35_read_inbox0_ack_register(struct dmub_srv *dmub);
+
+bool dmub_dcn35_should_detect(struct dmub_srv *dmub);
+
+bool dmub_dcn35_is_hw_powered_up(struct dmub_srv *dmub);
+
+void dmub_srv_dcn35_regs_init(struct dmub_srv *dmub, struct dc_context *ctx);
+
+#endif /* _DMUB_DCN35_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.c
new file mode 100644
index 000000000000..8f40b9f6706c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#include "../dmub_srv.h"
+#include "dmub_reg.h"
+#include "dmub_dcn351.h"
+
+#include "dcn/dcn_3_5_1_offset.h"
+#include "dcn/dcn_3_5_1_sh_mask.h"
+
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+#define CTX dmub
+#define REGS dmub->regs_dcn35
+#define REG_OFFSET_EXP(reg_name) BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+void dmub_srv_dcn351_regs_init(struct dmub_srv *dmub, struct dc_context *ctx)
+{
+ struct dmub_srv_dcn35_regs *regs = dmub->regs_dcn35;
+#define REG_STRUCT regs
+
+#define DMUB_SR(reg) REG_STRUCT->offset.reg = REG_OFFSET_EXP(reg);
+ DMUB_DCN35_REGS()
+ DMCUB_INTERNAL_REGS()
+#undef DMUB_SR
+
+#define DMUB_SF(reg, field) REG_STRUCT->mask.reg##__##field = FD_MASK(reg, field);
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+
+#define DMUB_SF(reg, field) REG_STRUCT->shift.reg##__##field = FD_SHIFT(reg, field);
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+#undef REG_STRUCT
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.h
new file mode 100644
index 000000000000..4121fa1b301d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn351.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2024 Advanced Micro Devices, Inc. */
+
+#ifndef _DMUB_DCN351_H_
+#define _DMUB_DCN351_H_
+
+#include "dmub_dcn35.h"
+
+struct dmub_srv;
+
+void dmub_srv_dcn351_regs_init(struct dmub_srv *dmub, struct dc_context *ctx);
+
+#endif /* _DMUB_DCN351_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.c
new file mode 100644
index 000000000000..b1ce09d48920
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2025 Advanced Micro Devices, Inc. */
+
+#include "../dmub_srv.h"
+#include "dmub_reg.h"
+#include "dmub_dcn36.h"
+
+#include "dcn/dcn_3_6_0_offset.h"
+#include "dcn/dcn_3_6_0_sh_mask.h"
+
+#define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
+#define CTX dmub
+#define REGS dmub->regs_dcn35
+#define REG_OFFSET_EXP(reg_name) BASE(reg##reg_name##_BASE_IDX) + reg##reg_name
+
+void dmub_srv_dcn36_regs_init(struct dmub_srv *dmub, struct dc_context *ctx)
+{
+ struct dmub_srv_dcn35_regs *regs = dmub->regs_dcn35;
+#define REG_STRUCT regs
+
+#define DMUB_SR(reg) REG_STRUCT->offset.reg = REG_OFFSET_EXP(reg);
+ DMUB_DCN35_REGS()
+ DMCUB_INTERNAL_REGS()
+#undef DMUB_SR
+
+#define DMUB_SF(reg, field) REG_STRUCT->mask.reg##__##field = FD_MASK(reg, field);
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+
+#define DMUB_SF(reg, field) REG_STRUCT->shift.reg##__##field = FD_SHIFT(reg, field);
+ DMUB_DCN35_FIELDS()
+#undef DMUB_SF
+#undef REG_STRUCT
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.h
new file mode 100644
index 000000000000..57850550f682
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn36.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright 2025 Advanced Micro Devices, Inc. */
+
+#ifndef _DMUB_DCN36_H_
+#define _DMUB_DCN36_H_
+
+#include "dmub_dcn35.h"
+
+struct dmub_srv;
+
+void dmub_srv_dcn36_regs_init(struct dmub_srv *dmub, struct dc_context *ctx);
+
+#endif /* _DMUB_DCN36_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c
new file mode 100644
index 000000000000..b31adbd0d685
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c
@@ -0,0 +1,667 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "../dmub_srv.h"
+#include "dmub_reg.h"
+#include "dmub_dcn401.h"
+
+#include "dcn/dcn_4_1_0_offset.h"
+#include "dcn/dcn_4_1_0_sh_mask.h"
+
+#define DCN_BASE__INST0_SEG2 0x000034C0
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG##seg
+#define CTX dmub
+#define REGS dmub->regs_dcn401
+#define REG_OFFSET_EXP(reg_name) (BASE(reg##reg_name##_BASE_IDX) + reg##reg_name)
+
+const struct dmub_srv_dcn401_regs dmub_srv_dcn401_regs = {
+#define DMUB_SR(reg) REG_OFFSET_EXP(reg),
+ {
+ DMUB_DCN401_REGS()
+ DMCUB_INTERNAL_REGS()
+ },
+#undef DMUB_SR
+
+#define DMUB_SF(reg, field) FD_MASK(reg, field),
+ { DMUB_DCN401_FIELDS() },
+#undef DMUB_SF
+
+#define DMUB_SF(reg, field) FD_SHIFT(reg, field),
+ { DMUB_DCN401_FIELDS() },
+#undef DMUB_SF
+};
+
+static void dmub_dcn401_get_fb_base_offset(struct dmub_srv *dmub,
+ uint64_t *fb_base,
+ uint64_t *fb_offset)
+{
+ uint32_t tmp;
+
+ if (dmub->fb_base || dmub->fb_offset) {
+ *fb_base = dmub->fb_base;
+ *fb_offset = dmub->fb_offset;
+ return;
+ }
+
+ REG_GET(DCN_VM_FB_LOCATION_BASE, FB_BASE, &tmp);
+ *fb_base = (uint64_t)tmp << 24;
+
+ REG_GET(DCN_VM_FB_OFFSET, FB_OFFSET, &tmp);
+ *fb_offset = (uint64_t)tmp << 24;
+}
+
+static inline void dmub_dcn401_translate_addr(const union dmub_addr *addr_in,
+ uint64_t fb_base,
+ uint64_t fb_offset,
+ union dmub_addr *addr_out)
+{
+ addr_out->quad_part = addr_in->quad_part - fb_base + fb_offset;
+}
+
+void dmub_dcn401_reset(struct dmub_srv *dmub)
+{
+ union dmub_gpint_data_register cmd;
+ const uint32_t timeout_us = 1 * 1000 * 1000; //1s
+ const uint32_t poll_delay_us = 1; //1us
+ uint32_t i = 0;
+ uint32_t enabled, in_reset, scratch, pwait_mode;
+
+ REG_GET(DMCUB_CNTL,
+ DMCUB_ENABLE, &enabled);
+ REG_GET(DMCUB_CNTL2,
+ DMCUB_SOFT_RESET, &in_reset);
+
+ if (enabled && in_reset == 0) {
+ cmd.bits.status = 1;
+ cmd.bits.command_code = DMUB_GPINT__STOP_FW;
+ cmd.bits.param = 0;
+
+ dmub->hw_funcs.set_gpint(dmub, cmd);
+
+ for (; i < timeout_us; i++) {
+ scratch = dmub->hw_funcs.get_gpint_response(dmub);
+ if (scratch == DMUB_GPINT__STOP_FW_RESPONSE)
+ break;
+
+ udelay(poll_delay_us);
+ }
+
+ for (; i < timeout_us; i++) {
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode);
+ if (pwait_mode & (1 << 0))
+ break;
+
+ udelay(poll_delay_us);
+ }
+ }
+
+ if (i >= timeout_us) {
+ /* timeout should never occur */
+ BREAK_TO_DEBUGGER();
+ }
+
+ REG_WRITE(DMCUB_INBOX1_RPTR, 0);
+ REG_WRITE(DMCUB_INBOX1_WPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX1_RPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX1_WPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX0_RPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX0_WPTR, 0);
+ REG_WRITE(DMCUB_SCRATCH0, 0);
+
+ /* Clear the GPINT command manually so we don't reset again. */
+ cmd.all = 0;
+ dmub->hw_funcs.set_gpint(dmub, cmd);
+}
+
+void dmub_dcn401_reset_release(struct dmub_srv *dmub)
+{
+ REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0);
+ REG_WRITE(DMCUB_SCRATCH15, dmub->psp_version & 0x001100FF);
+ REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1);
+ REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 0);
+}
+
+void dmub_dcn401_backdoor_load(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1)
+{
+ union dmub_addr offset;
+ uint64_t fb_base, fb_offset;
+
+ dmub_dcn401_get_fb_base_offset(dmub, &fb_base, &fb_offset);
+
+ /* reset and disable DMCUB and MMHUBBUB DMUIF */
+ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1);
+ REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
+ REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+
+ dmub_dcn401_translate_addr(&cw0->offset, fb_base, fb_offset, &offset);
+
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW0_BASE_ADDRESS, cw0->region.base);
+ REG_SET_2(DMCUB_REGION3_CW0_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW0_TOP_ADDRESS, cw0->region.top,
+ DMCUB_REGION3_CW0_ENABLE, 1);
+
+ dmub_dcn401_translate_addr(&cw1->offset, fb_base, fb_offset, &offset);
+
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW1_BASE_ADDRESS, cw1->region.base);
+ REG_SET_2(DMCUB_REGION3_CW1_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top,
+ DMCUB_REGION3_CW1_ENABLE, 1);
+
+ /* release DMCUB reset only to prevent premature execution */
+ REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID,
+ 0x20);
+}
+
+void dmub_dcn401_backdoor_load_zfb_mode(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1)
+{
+ union dmub_addr offset;
+
+ /* reset and disable DMCUB and MMHUBBUB DMUIF */
+ REG_UPDATE(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 1);
+ REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1);
+ REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0);
+
+ offset = cw0->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW0_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW0_BASE_ADDRESS, cw0->region.base);
+ REG_SET_2(DMCUB_REGION3_CW0_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW0_TOP_ADDRESS, cw0->region.top,
+ DMCUB_REGION3_CW0_ENABLE, 1);
+
+ offset = cw1->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW1_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW1_BASE_ADDRESS, cw1->region.base);
+ REG_SET_2(DMCUB_REGION3_CW1_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW1_TOP_ADDRESS, cw1->region.top,
+ DMCUB_REGION3_CW1_ENABLE, 1);
+
+ /* release DMCUB reset only to prevent premature execution */
+ REG_UPDATE_2(DMCUB_SEC_CNTL, DMCUB_SEC_RESET, 0, DMCUB_MEM_UNIT_ID,
+ 0x20);
+}
+
+void dmub_dcn401_setup_windows(struct dmub_srv *dmub,
+ const struct dmub_window *cw2,
+ const struct dmub_window *cw3,
+ const struct dmub_window *cw4,
+ const struct dmub_window *cw5,
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6)
+{
+ union dmub_addr offset;
+
+ offset = cw3->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW3_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW3_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW3_BASE_ADDRESS, cw3->region.base);
+ REG_SET_2(DMCUB_REGION3_CW3_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW3_TOP_ADDRESS, cw3->region.top,
+ DMCUB_REGION3_CW3_ENABLE, 1);
+
+ offset = cw4->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW4_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW4_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW4_BASE_ADDRESS, cw4->region.base);
+ REG_SET_2(DMCUB_REGION3_CW4_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW4_TOP_ADDRESS, cw4->region.top,
+ DMCUB_REGION3_CW4_ENABLE, 1);
+
+ offset = cw5->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW5_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW5_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW5_BASE_ADDRESS, cw5->region.base);
+ REG_SET_2(DMCUB_REGION3_CW5_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW5_TOP_ADDRESS, cw5->region.top,
+ DMCUB_REGION3_CW5_ENABLE, 1);
+
+ REG_WRITE(DMCUB_REGION5_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION5_OFFSET_HIGH, offset.u.high_part);
+ REG_SET_2(DMCUB_REGION5_TOP_ADDRESS, 0,
+ DMCUB_REGION5_TOP_ADDRESS,
+ cw5->region.top - cw5->region.base - 1,
+ DMCUB_REGION5_ENABLE, 1);
+
+ offset = cw6->offset;
+
+ REG_WRITE(DMCUB_REGION3_CW6_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION3_CW6_OFFSET_HIGH, offset.u.high_part);
+ REG_WRITE(DMCUB_REGION3_CW6_BASE_ADDRESS, cw6->region.base);
+ REG_SET_2(DMCUB_REGION3_CW6_TOP_ADDRESS, 0,
+ DMCUB_REGION3_CW6_TOP_ADDRESS, cw6->region.top,
+ DMCUB_REGION3_CW6_ENABLE, 1);
+
+ offset = region6->offset;
+
+ REG_WRITE(DMCUB_REGION6_OFFSET, offset.u.low_part);
+ REG_WRITE(DMCUB_REGION6_OFFSET_HIGH, offset.u.high_part);
+ REG_SET_2(DMCUB_REGION6_TOP_ADDRESS, 0,
+ DMCUB_REGION6_TOP_ADDRESS,
+ region6->region.top - region6->region.base - 1,
+ DMCUB_REGION6_ENABLE, 1);
+}
+
+void dmub_dcn401_setup_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *inbox1)
+{
+ REG_WRITE(DMCUB_INBOX1_BASE_ADDRESS, inbox1->base);
+ REG_WRITE(DMCUB_INBOX1_SIZE, inbox1->top - inbox1->base);
+}
+
+uint32_t dmub_dcn401_get_inbox1_wptr(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_INBOX1_WPTR);
+}
+
+uint32_t dmub_dcn401_get_inbox1_rptr(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_INBOX1_RPTR);
+}
+
+void dmub_dcn401_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset)
+{
+ REG_WRITE(DMCUB_INBOX1_WPTR, wptr_offset);
+}
+
+void dmub_dcn401_setup_out_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *outbox1)
+{
+ REG_WRITE(DMCUB_OUTBOX1_BASE_ADDRESS, outbox1->base);
+ REG_WRITE(DMCUB_OUTBOX1_SIZE, outbox1->top - outbox1->base);
+}
+
+uint32_t dmub_dcn401_get_outbox1_wptr(struct dmub_srv *dmub)
+{
+ /**
+ * outbox1 wptr register is accessed without locks (dal & dc)
+ * and to be called only by dmub_srv_stat_get_notification()
+ */
+ return REG_READ(DMCUB_OUTBOX1_WPTR);
+}
+
+void dmub_dcn401_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset)
+{
+ /**
+ * outbox1 rptr register is accessed without locks (dal & dc)
+ * and to be called only by dmub_srv_stat_get_notification()
+ */
+ REG_WRITE(DMCUB_OUTBOX1_RPTR, rptr_offset);
+}
+
+bool dmub_dcn401_is_hw_init(struct dmub_srv *dmub)
+{
+ union dmub_fw_boot_status status;
+ uint32_t is_hw_init;
+
+ status.all = REG_READ(DMCUB_SCRATCH0);
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_hw_init);
+
+ return is_hw_init != 0 && status.bits.dal_fw;
+}
+
+bool dmub_dcn401_is_supported(struct dmub_srv *dmub)
+{
+ uint32_t supported = 0;
+
+ REG_GET(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE, &supported);
+
+ return supported;
+}
+
+void dmub_dcn401_set_gpint(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg)
+{
+ REG_WRITE(DMCUB_GPINT_DATAIN1, reg.all);
+}
+
+bool dmub_dcn401_is_gpint_acked(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg)
+{
+ union dmub_gpint_data_register test;
+
+ reg.bits.status = 0;
+ test.all = REG_READ(DMCUB_GPINT_DATAIN1);
+
+ return test.all == reg.all;
+}
+
+uint32_t dmub_dcn401_get_gpint_response(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_SCRATCH7);
+}
+
+uint32_t dmub_dcn401_get_gpint_dataout(struct dmub_srv *dmub)
+{
+ uint32_t dataout = REG_READ(DMCUB_GPINT_DATAOUT);
+
+ REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 0);
+
+ REG_WRITE(DMCUB_GPINT_DATAOUT, 0);
+ REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 1);
+ REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 0);
+
+ REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 1);
+
+ return dataout;
+}
+
+union dmub_fw_boot_status dmub_dcn401_get_fw_boot_status(struct dmub_srv *dmub)
+{
+ union dmub_fw_boot_status status;
+
+ status.all = REG_READ(DMCUB_SCRATCH0);
+ return status;
+}
+
+void dmub_dcn401_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params)
+{
+ union dmub_fw_boot_options boot_options = {0};
+
+ boot_options.bits.z10_disable = params->disable_z10;
+
+ boot_options.bits.skip_phy_access = params->disallow_phy_access;
+
+ REG_WRITE(DMCUB_SCRATCH14, boot_options.all);
+}
+
+void dmub_dcn401_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip)
+{
+ union dmub_fw_boot_options boot_options;
+ boot_options.all = REG_READ(DMCUB_SCRATCH14);
+ boot_options.bits.skip_phy_init_panel_sequence = skip;
+ REG_WRITE(DMCUB_SCRATCH14, boot_options.all);
+}
+
+void dmub_dcn401_setup_outbox0(struct dmub_srv *dmub,
+ const struct dmub_region *outbox0)
+{
+ REG_WRITE(DMCUB_OUTBOX0_BASE_ADDRESS, outbox0->base);
+
+ REG_WRITE(DMCUB_OUTBOX0_SIZE, outbox0->top - outbox0->base);
+}
+
+uint32_t dmub_dcn401_get_outbox0_wptr(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_OUTBOX0_WPTR);
+}
+
+void dmub_dcn401_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset)
+{
+ REG_WRITE(DMCUB_OUTBOX0_RPTR, rptr_offset);
+}
+
+uint32_t dmub_dcn401_get_current_time(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_TIMER_CURRENT);
+}
+
+void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub)
+{
+ uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset, is_pwait;
+ uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled;
+ struct dmub_timeout_info timeout = {0};
+
+ if (!dmub)
+ return;
+
+ /* timeout data filled externally, cache before resetting memory */
+ timeout = dmub->debug.timeout_info;
+ memset(&dmub->debug, 0, sizeof(dmub->debug));
+ dmub->debug.timeout_info = timeout;
+
+ dmub->debug.dmcub_version = dmub->fw_version;
+
+ dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0);
+ dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1);
+ dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2);
+ dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3);
+ dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4);
+ dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5);
+ dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6);
+ dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7);
+ dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8);
+ dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9);
+ dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10);
+ dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11);
+ dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12);
+ dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13);
+ dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14);
+ dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15);
+ dmub->debug.scratch[16] = REG_READ(DMCUB_SCRATCH16);
+
+ dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
+ dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
+ dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
+
+ dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
+ dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
+ dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
+
+ dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
+ dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
+ dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+
+ dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR);
+ dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR);
+ dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE);
+
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
+ dmub->debug.is_dmcub_enabled = is_dmub_enabled;
+
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait);
+ dmub->debug.is_pwait = is_pwait;
+
+ REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
+ dmub->debug.is_dmcub_soft_reset = is_soft_reset;
+
+ REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset);
+ dmub->debug.is_dmcub_secure_reset = is_sec_reset;
+
+ REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
+ dmub->debug.is_traceport_en = is_traceport_enabled;
+
+ REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled);
+ dmub->debug.is_cw0_enabled = is_cw0_enabled;
+
+ REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
+ dmub->debug.is_cw6_enabled = is_cw6_enabled;
+
+ dmub->debug.gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0);
+}
+void dmub_dcn401_configure_dmub_in_system_memory(struct dmub_srv *dmub)
+{
+ /* DMCUB_REGION3_TMR_AXI_SPACE values:
+ * 0b011 (0x3) - FB physical address
+ * 0b100 (0x4) - GPU virtual address
+ *
+ * Default value is 0x3 (FB Physical address for TMR). When programming
+ * DMUB to be in system memory, change to 0x4. The system memory allocated
+ * is accessible by both GPU and CPU, so we use GPU virtual address.
+ */
+ REG_WRITE(DMCUB_REGION3_TMR_AXI_SPACE, 0x4);
+}
+
+void dmub_dcn401_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data)
+{
+ REG_WRITE(DMCUB_INBOX0_WPTR, data.inbox0_cmd_common.all);
+}
+
+void dmub_dcn401_clear_inbox0_ack_register(struct dmub_srv *dmub)
+{
+ REG_WRITE(DMCUB_SCRATCH17, 0);
+}
+
+uint32_t dmub_dcn401_read_inbox0_ack_register(struct dmub_srv *dmub)
+{
+ return REG_READ(DMCUB_SCRATCH17);
+}
+
+void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd)
+{
+ uint32_t *dwords = (uint32_t *)cmd;
+ int32_t payload_size_bytes = cmd->cmd_common.header.payload_bytes;
+ uint32_t msg_index;
+ static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch");
+
+ /* read remaining data based on payload size */
+ for (msg_index = 0; msg_index < 15; msg_index++) {
+ if (payload_size_bytes <= msg_index * 4) {
+ break;
+ }
+
+ switch (msg_index) {
+ case 0:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[msg_index + 1]);
+ break;
+ case 1:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[msg_index + 1]);
+ break;
+ case 2:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[msg_index + 1]);
+ break;
+ case 3:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[msg_index + 1]);
+ break;
+ case 4:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[msg_index + 1]);
+ break;
+ case 5:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[msg_index + 1]);
+ break;
+ case 6:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[msg_index + 1]);
+ break;
+ case 7:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[msg_index + 1]);
+ break;
+ case 8:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[msg_index + 1]);
+ break;
+ case 9:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[msg_index + 1]);
+ break;
+ case 10:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[msg_index + 1]);
+ break;
+ case 11:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[msg_index + 1]);
+ break;
+ case 12:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[msg_index + 1]);
+ break;
+ case 13:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[msg_index + 1]);
+ break;
+ case 14:
+ REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[msg_index + 1]);
+ break;
+ }
+ }
+
+ /* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY
+ * interrupt.
+ */
+ REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[0]);
+}
+
+uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub)
+{
+ uint32_t status;
+
+ REG_GET(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_STAT, &status);
+ return status;
+}
+
+void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd)
+{
+ uint32_t *dwords = (uint32_t *)cmd;
+
+ static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch");
+
+ dwords[0] = REG_READ(DMCUB_REG_INBOX0_RSP);
+ dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG0);
+ dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG1);
+ dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG2);
+ dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG3);
+ dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG4);
+ dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG5);
+ dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG6);
+ dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG7);
+ dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG8);
+ dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG9);
+ dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG10);
+ dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG11);
+ dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG12);
+ dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG13);
+ dwords[15] = REG_READ(DMCUB_REG_INBOX0_MSG14);
+}
+
+void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub)
+{
+ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1);
+}
+
+void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub)
+{
+ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0);
+}
+
+void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable)
+{
+ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0);
+}
+
+void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub)
+{
+ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 1);
+ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 0);
+}
+
+void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg)
+{
+ *msg = REG_READ(DMCUB_REG_OUTBOX0_MSG0);
+}
+
+void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *rsp)
+{
+ REG_WRITE(DMCUB_REG_OUTBOX0_RSP, *rsp);
+}
+
+uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub)
+{
+ uint32_t status;
+
+ REG_GET(DMCUB_INTERRUPT_STATUS, DMCUB_REG_OUTBOX0_RSP_INT_STAT, &status);
+ return status;
+}
+
+void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable)
+{
+ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN, enable ? 1:0);
+}
+
+uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub)
+{
+ uint32_t status;
+
+ REG_GET(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_STAT, &status);
+ return status;
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h
new file mode 100644
index 000000000000..88c3a44d67d9
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef _DMUB_DCN401_H_
+#define _DMUB_DCN401_H_
+
+#include "dmub_dcn31.h"
+
+struct dmub_srv;
+
+/* DCN401 register definitions. */
+
+#define DMUB_DCN401_REGS() \
+ DMUB_SR(DMCUB_CNTL) \
+ DMUB_SR(DMCUB_CNTL2) \
+ DMUB_SR(DMCUB_SEC_CNTL) \
+ DMUB_SR(DMCUB_INBOX0_SIZE) \
+ DMUB_SR(DMCUB_INBOX0_RPTR) \
+ DMUB_SR(DMCUB_INBOX0_WPTR) \
+ DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_INBOX1_SIZE) \
+ DMUB_SR(DMCUB_INBOX1_RPTR) \
+ DMUB_SR(DMCUB_INBOX1_WPTR) \
+ DMUB_SR(DMCUB_OUTBOX0_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_OUTBOX0_SIZE) \
+ DMUB_SR(DMCUB_OUTBOX0_RPTR) \
+ DMUB_SR(DMCUB_OUTBOX0_WPTR) \
+ DMUB_SR(DMCUB_OUTBOX1_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_OUTBOX1_SIZE) \
+ DMUB_SR(DMCUB_OUTBOX1_RPTR) \
+ DMUB_SR(DMCUB_OUTBOX1_WPTR) \
+ DMUB_SR(DMCUB_REGION3_CW0_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW1_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW2_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW3_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW4_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW5_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW6_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW7_OFFSET) \
+ DMUB_SR(DMCUB_REGION3_CW0_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW1_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW2_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW3_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW4_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW5_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW6_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW7_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION3_CW0_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW1_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW2_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW3_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW4_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW5_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW6_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW7_BASE_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW0_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW1_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW2_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW3_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW4_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW5_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW6_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION3_CW7_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION4_OFFSET) \
+ DMUB_SR(DMCUB_REGION4_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION4_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION5_OFFSET) \
+ DMUB_SR(DMCUB_REGION5_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION5_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_REGION6_OFFSET) \
+ DMUB_SR(DMCUB_REGION6_OFFSET_HIGH) \
+ DMUB_SR(DMCUB_REGION6_TOP_ADDRESS) \
+ DMUB_SR(DMCUB_SCRATCH0) \
+ DMUB_SR(DMCUB_SCRATCH1) \
+ DMUB_SR(DMCUB_SCRATCH2) \
+ DMUB_SR(DMCUB_SCRATCH3) \
+ DMUB_SR(DMCUB_SCRATCH4) \
+ DMUB_SR(DMCUB_SCRATCH5) \
+ DMUB_SR(DMCUB_SCRATCH6) \
+ DMUB_SR(DMCUB_SCRATCH7) \
+ DMUB_SR(DMCUB_SCRATCH8) \
+ DMUB_SR(DMCUB_SCRATCH9) \
+ DMUB_SR(DMCUB_SCRATCH10) \
+ DMUB_SR(DMCUB_SCRATCH11) \
+ DMUB_SR(DMCUB_SCRATCH12) \
+ DMUB_SR(DMCUB_SCRATCH13) \
+ DMUB_SR(DMCUB_SCRATCH14) \
+ DMUB_SR(DMCUB_SCRATCH15) \
+ DMUB_SR(DMCUB_SCRATCH16) \
+ DMUB_SR(DMCUB_SCRATCH17) \
+ DMUB_SR(DMCUB_GPINT_DATAIN0) \
+ DMUB_SR(DMCUB_GPINT_DATAIN1) \
+ DMUB_SR(DMCUB_GPINT_DATAOUT) \
+ DMUB_SR(CC_DC_PIPE_DIS) \
+ DMUB_SR(MMHUBBUB_SOFT_RESET) \
+ DMUB_SR(DCN_VM_FB_LOCATION_BASE) \
+ DMUB_SR(DCN_VM_FB_OFFSET) \
+ DMUB_SR(DMCUB_TIMER_CURRENT) \
+ DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \
+ DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \
+ DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) \
+ DMUB_SR(DMCUB_REGION3_TMR_AXI_SPACE) \
+ DMUB_SR(DMCUB_INTERRUPT_ENABLE) \
+ DMUB_SR(DMCUB_INTERRUPT_ACK) \
+ DMUB_SR(DMCUB_INTERRUPT_STATUS) \
+ DMUB_SR(DMCUB_REG_INBOX0_RDY) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG0) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG1) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG2) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG3) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG4) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG5) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG6) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG7) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG8) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG9) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG10) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG11) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG12) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG13) \
+ DMUB_SR(DMCUB_REG_INBOX0_MSG14) \
+ DMUB_SR(DMCUB_REG_INBOX0_RSP) \
+ DMUB_SR(DMCUB_REG_OUTBOX0_RDY) \
+ DMUB_SR(DMCUB_REG_OUTBOX0_MSG0) \
+ DMUB_SR(DMCUB_REG_OUTBOX0_RSP) \
+ DMUB_SR(HOST_INTERRUPT_CSR)
+
+#define DMUB_DCN401_FIELDS() \
+ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \
+ DMUB_SF(DMCUB_CNTL, DMCUB_TRACEPORT_EN) \
+ DMUB_SF(DMCUB_CNTL2, DMCUB_SOFT_RESET) \
+ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \
+ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \
+ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \
+ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW2_TOP_ADDRESS, DMCUB_REGION3_CW2_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW3_TOP_ADDRESS, DMCUB_REGION3_CW3_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW4_TOP_ADDRESS, DMCUB_REGION3_CW4_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW5_TOP_ADDRESS, DMCUB_REGION3_CW5_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE) \
+ DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION3_CW7_TOP_ADDRESS, DMCUB_REGION3_CW7_ENABLE) \
+ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \
+ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_ENABLE) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_TOP_ADDRESS) \
+ DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_ENABLE) \
+ DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
+ DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \
+ DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \
+ DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \
+ DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \
+ DMUB_SF(DMCUB_REGION3_TMR_AXI_SPACE, DMCUB_REGION3_TMR_AXI_SPACE) \
+ DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \
+ DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \
+ DMUB_SF(DMCUB_INTERRUPT_STATUS, DMCUB_REG_OUTBOX0_RSP_INT_STAT) \
+ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK) \
+ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_STAT) \
+ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN) \
+ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK) \
+ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_STAT) \
+ DMUB_SF(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN) \
+ DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS)
+
+struct dmub_srv_dcn401_reg_offset {
+#define DMUB_SR(reg) uint32_t reg;
+ DMUB_DCN401_REGS()
+ DMCUB_INTERNAL_REGS()
+#undef DMUB_SR
+};
+
+struct dmub_srv_dcn401_reg_shift {
+#define DMUB_SF(reg, field) uint8_t reg##__##field;
+ DMUB_DCN401_FIELDS()
+#undef DMUB_SF
+};
+
+struct dmub_srv_dcn401_reg_mask {
+#define DMUB_SF(reg, field) uint32_t reg##__##field;
+ DMUB_DCN401_FIELDS()
+#undef DMUB_SF
+};
+
+struct dmub_srv_dcn401_regs {
+ const struct dmub_srv_dcn401_reg_offset offset;
+ const struct dmub_srv_dcn401_reg_mask mask;
+ const struct dmub_srv_dcn401_reg_shift shift;
+};
+
+extern const struct dmub_srv_dcn401_regs dmub_srv_dcn401_regs;
+
+void dmub_dcn401_reset(struct dmub_srv *dmub);
+
+void dmub_dcn401_reset_release(struct dmub_srv *dmub);
+
+void dmub_dcn401_backdoor_load(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1);
+
+void dmub_dcn401_backdoor_load_zfb_mode(struct dmub_srv *dmub,
+ const struct dmub_window *cw0,
+ const struct dmub_window *cw1);
+
+void dmub_dcn401_setup_windows(struct dmub_srv *dmub,
+ const struct dmub_window *cw2,
+ const struct dmub_window *cw3,
+ const struct dmub_window *cw4,
+ const struct dmub_window *cw5,
+ const struct dmub_window *cw6,
+ const struct dmub_window *region6);
+
+void dmub_dcn401_setup_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *inbox1);
+
+uint32_t dmub_dcn401_get_inbox1_wptr(struct dmub_srv *dmub);
+
+uint32_t dmub_dcn401_get_inbox1_rptr(struct dmub_srv *dmub);
+
+void dmub_dcn401_set_inbox1_wptr(struct dmub_srv *dmub, uint32_t wptr_offset);
+
+void dmub_dcn401_setup_out_mailbox(struct dmub_srv *dmub,
+ const struct dmub_region *outbox1);
+
+uint32_t dmub_dcn401_get_outbox1_wptr(struct dmub_srv *dmub);
+
+void dmub_dcn401_set_outbox1_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
+
+bool dmub_dcn401_is_hw_init(struct dmub_srv *dmub);
+
+bool dmub_dcn401_is_supported(struct dmub_srv *dmub);
+
+void dmub_dcn401_set_gpint(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg);
+
+bool dmub_dcn401_is_gpint_acked(struct dmub_srv *dmub,
+ union dmub_gpint_data_register reg);
+
+uint32_t dmub_dcn401_get_gpint_response(struct dmub_srv *dmub);
+
+uint32_t dmub_dcn401_get_gpint_dataout(struct dmub_srv *dmub);
+
+void dmub_dcn401_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params);
+
+void dmub_dcn401_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip);
+
+union dmub_fw_boot_status dmub_dcn401_get_fw_boot_status(struct dmub_srv *dmub);
+
+void dmub_dcn401_setup_outbox0(struct dmub_srv *dmub,
+ const struct dmub_region *outbox0);
+
+uint32_t dmub_dcn401_get_outbox0_wptr(struct dmub_srv *dmub);
+
+void dmub_dcn401_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
+
+uint32_t dmub_dcn401_get_current_time(struct dmub_srv *dmub);
+
+void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub);
+
+void dmub_dcn401_configure_dmub_in_system_memory(struct dmub_srv *dmub);
+void dmub_dcn401_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data);
+void dmub_dcn401_clear_inbox0_ack_register(struct dmub_srv *dmub);
+uint32_t dmub_dcn401_read_inbox0_ack_register(struct dmub_srv *dmub);
+
+void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd);
+uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub);
+void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd);
+void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub);
+void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub);
+void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable);
+
+void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub);
+void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg);
+void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg);
+uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub);
+void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable);
+uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub);
+
+#endif /* _DMUB_DCN401_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h
index 96603d07c23d..123d1704670e 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h
@@ -108,7 +108,6 @@ struct dmub_srv;
FN(reg, f4), v4)
/* Register field getting. */
-
#define REG_GET(reg_name, field, val) \
dmub_reg_get(CTX, REG(reg_name), FN(reg_name, field), val)
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 93624ffe4eb8..b17a19400c06 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -36,6 +36,10 @@
#include "dmub_dcn315.h"
#include "dmub_dcn316.h"
#include "dmub_dcn32.h"
+#include "dmub_dcn35.h"
+#include "dmub_dcn351.h"
+#include "dmub_dcn36.h"
+#include "dmub_dcn401.h"
#include "os_types.h"
/*
* Note: the DMUB service is standalone. No additional headers should be
@@ -58,12 +62,14 @@
/* Default state size if meta is absent. */
#define DMUB_FW_STATE_SIZE (64 * 1024)
-/* Default tracebuffer size if meta is absent. */
-#define DMUB_TRACE_BUFFER_SIZE (64 * 1024)
+/* Default scratch mem size. */
+#define DMUB_SCRATCH_MEM_SIZE (1024)
+/* Default indirect buffer size. */
+#define DMUB_IB_MEM_SIZE (1280)
-/* Default scratch mem size. */
-#define DMUB_SCRATCH_MEM_SIZE (256)
+/* Default LSDMA ring buffer size. */
+#define DMUB_LSDMA_RB_SIZE (64 * 1024)
/* Number of windows in use. */
#define DMUB_NUM_WINDOWS (DMUB_WINDOW_TOTAL)
@@ -77,6 +83,10 @@
#define DMUB_CW6_BASE (0x66000000)
#define DMUB_REGION5_BASE (0xA0000000)
+#define DMUB_REGION6_BASE (0xC0000000)
+
+static struct dmub_srv_dcn32_regs dmub_srv_dcn32_regs;
+static struct dmub_srv_dcn35_regs dmub_srv_dcn35_regs;
static inline uint32_t dmub_align(uint32_t val, uint32_t factor)
{
@@ -153,6 +163,9 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
{
struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs;
+ /* default to specifying now inbox type */
+ enum dmub_inbox_cmd_interface_type default_inbox_type = DMUB_CMD_INTERFACE_DEFAULT;
+
switch (asic) {
case DMUB_ASIC_DCN20:
case DMUB_ASIC_DCN21:
@@ -276,6 +289,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
funcs->send_inbox0_cmd = dmub_dcn32_send_inbox0_cmd;
funcs->clear_inbox0_ack_register = dmub_dcn32_clear_inbox0_ack_register;
funcs->read_inbox0_ack_register = dmub_dcn32_read_inbox0_ack_register;
+ funcs->subvp_save_surf_addr = dmub_dcn32_save_surf_addr;
funcs->reset = dmub_dcn32_reset;
funcs->reset_release = dmub_dcn32_reset_release;
funcs->backdoor_load = dmub_dcn32_backdoor_load;
@@ -304,13 +318,127 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
funcs->set_outbox0_rptr = dmub_dcn32_set_outbox0_rptr;
funcs->get_current_time = dmub_dcn32_get_current_time;
funcs->get_diagnostic_data = dmub_dcn32_get_diagnostic_data;
+ funcs->init_reg_offsets = dmub_srv_dcn32_regs_init;
break;
+ case DMUB_ASIC_DCN35:
+ case DMUB_ASIC_DCN351:
+ case DMUB_ASIC_DCN36:
+ dmub->regs_dcn35 = &dmub_srv_dcn35_regs;
+ funcs->configure_dmub_in_system_memory = dmub_dcn35_configure_dmub_in_system_memory;
+ funcs->send_inbox0_cmd = dmub_dcn35_send_inbox0_cmd;
+ funcs->clear_inbox0_ack_register = dmub_dcn35_clear_inbox0_ack_register;
+ funcs->read_inbox0_ack_register = dmub_dcn35_read_inbox0_ack_register;
+ funcs->reset = dmub_dcn35_reset;
+ funcs->reset_release = dmub_dcn35_reset_release;
+ funcs->backdoor_load = dmub_dcn35_backdoor_load;
+ funcs->backdoor_load_zfb_mode = dmub_dcn35_backdoor_load_zfb_mode;
+ funcs->setup_windows = dmub_dcn35_setup_windows;
+ funcs->setup_mailbox = dmub_dcn35_setup_mailbox;
+ funcs->get_inbox1_wptr = dmub_dcn35_get_inbox1_wptr;
+ funcs->get_inbox1_rptr = dmub_dcn35_get_inbox1_rptr;
+ funcs->set_inbox1_wptr = dmub_dcn35_set_inbox1_wptr;
+ funcs->setup_out_mailbox = dmub_dcn35_setup_out_mailbox;
+ funcs->get_outbox1_wptr = dmub_dcn35_get_outbox1_wptr;
+ funcs->set_outbox1_rptr = dmub_dcn35_set_outbox1_rptr;
+ funcs->is_supported = dmub_dcn35_is_supported;
+ funcs->is_hw_init = dmub_dcn35_is_hw_init;
+ funcs->set_gpint = dmub_dcn35_set_gpint;
+ funcs->is_gpint_acked = dmub_dcn35_is_gpint_acked;
+ funcs->get_gpint_response = dmub_dcn35_get_gpint_response;
+ funcs->get_gpint_dataout = dmub_dcn35_get_gpint_dataout;
+ funcs->get_fw_status = dmub_dcn35_get_fw_boot_status;
+ funcs->get_fw_boot_option = dmub_dcn35_get_fw_boot_option;
+ funcs->enable_dmub_boot_options = dmub_dcn35_enable_dmub_boot_options;
+ funcs->skip_dmub_panel_power_sequence = dmub_dcn35_skip_dmub_panel_power_sequence;
+ //outbox0 call stacks
+ funcs->setup_outbox0 = dmub_dcn35_setup_outbox0;
+ funcs->get_outbox0_wptr = dmub_dcn35_get_outbox0_wptr;
+ funcs->set_outbox0_rptr = dmub_dcn35_set_outbox0_rptr;
+
+ funcs->get_current_time = dmub_dcn35_get_current_time;
+ funcs->get_diagnostic_data = dmub_dcn35_get_diagnostic_data;
+
+ funcs->init_reg_offsets = dmub_srv_dcn35_regs_init;
+ if (asic == DMUB_ASIC_DCN351)
+ funcs->init_reg_offsets = dmub_srv_dcn351_regs_init;
+ if (asic == DMUB_ASIC_DCN36)
+ funcs->init_reg_offsets = dmub_srv_dcn36_regs_init;
+
+ funcs->is_hw_powered_up = dmub_dcn35_is_hw_powered_up;
+ funcs->should_detect = dmub_dcn35_should_detect;
+ break;
+
+ case DMUB_ASIC_DCN401:
+ dmub->regs_dcn401 = &dmub_srv_dcn401_regs;
+ funcs->configure_dmub_in_system_memory = dmub_dcn401_configure_dmub_in_system_memory;
+ funcs->send_inbox0_cmd = dmub_dcn401_send_inbox0_cmd;
+ funcs->clear_inbox0_ack_register = dmub_dcn401_clear_inbox0_ack_register;
+ funcs->read_inbox0_ack_register = dmub_dcn401_read_inbox0_ack_register;
+ funcs->reset = dmub_dcn401_reset;
+ funcs->reset_release = dmub_dcn401_reset_release;
+ funcs->backdoor_load = dmub_dcn401_backdoor_load;
+ funcs->backdoor_load_zfb_mode = dmub_dcn401_backdoor_load_zfb_mode;
+ funcs->setup_windows = dmub_dcn401_setup_windows;
+ funcs->setup_mailbox = dmub_dcn401_setup_mailbox;
+ funcs->get_inbox1_wptr = dmub_dcn401_get_inbox1_wptr;
+ funcs->get_inbox1_rptr = dmub_dcn401_get_inbox1_rptr;
+ funcs->set_inbox1_wptr = dmub_dcn401_set_inbox1_wptr;
+ funcs->setup_out_mailbox = dmub_dcn401_setup_out_mailbox;
+ funcs->get_outbox1_wptr = dmub_dcn401_get_outbox1_wptr;
+ funcs->set_outbox1_rptr = dmub_dcn401_set_outbox1_rptr;
+ funcs->is_supported = dmub_dcn401_is_supported;
+ funcs->is_hw_init = dmub_dcn401_is_hw_init;
+ funcs->set_gpint = dmub_dcn401_set_gpint;
+ funcs->is_gpint_acked = dmub_dcn401_is_gpint_acked;
+ funcs->get_gpint_response = dmub_dcn401_get_gpint_response;
+ funcs->get_gpint_dataout = dmub_dcn401_get_gpint_dataout;
+ funcs->get_fw_status = dmub_dcn401_get_fw_boot_status;
+ funcs->enable_dmub_boot_options = dmub_dcn401_enable_dmub_boot_options;
+ funcs->skip_dmub_panel_power_sequence = dmub_dcn401_skip_dmub_panel_power_sequence;
+ //outbox0 call stacks
+ funcs->setup_outbox0 = dmub_dcn401_setup_outbox0;
+ funcs->get_outbox0_wptr = dmub_dcn401_get_outbox0_wptr;
+ funcs->set_outbox0_rptr = dmub_dcn401_set_outbox0_rptr;
+
+ funcs->get_current_time = dmub_dcn401_get_current_time;
+ funcs->get_diagnostic_data = dmub_dcn401_get_diagnostic_data;
+
+ funcs->send_reg_inbox0_cmd_msg = dmub_dcn401_send_reg_inbox0_cmd_msg;
+ funcs->read_reg_inbox0_rsp_int_status = dmub_dcn401_read_reg_inbox0_rsp_int_status;
+ funcs->read_reg_inbox0_cmd_rsp = dmub_dcn401_read_reg_inbox0_cmd_rsp;
+ funcs->write_reg_inbox0_rsp_int_ack = dmub_dcn401_write_reg_inbox0_rsp_int_ack;
+ funcs->clear_reg_inbox0_rsp_int_ack = dmub_dcn401_clear_reg_inbox0_rsp_int_ack;
+ funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int;
+ default_inbox_type = DMUB_CMD_INTERFACE_FB; // still default to FB for now
+
+ funcs->write_reg_outbox0_rdy_int_ack = dmub_dcn401_write_reg_outbox0_rdy_int_ack;
+ funcs->read_reg_outbox0_msg = dmub_dcn401_read_reg_outbox0_msg;
+ funcs->write_reg_outbox0_rsp = dmub_dcn401_write_reg_outbox0_rsp;
+ funcs->read_reg_outbox0_rdy_int_status = dmub_dcn401_read_reg_outbox0_rdy_int_status;
+ funcs->read_reg_outbox0_rsp_int_status = dmub_dcn401_read_reg_outbox0_rsp_int_status;
+ funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int;
+ funcs->enable_reg_outbox0_rdy_int = dmub_dcn401_enable_reg_outbox0_rdy_int;
+ break;
default:
return false;
}
+ /* set default inbox type if not overriden */
+ if (dmub->inbox_type == DMUB_CMD_INTERFACE_DEFAULT) {
+ if (default_inbox_type != DMUB_CMD_INTERFACE_DEFAULT) {
+ /* use default inbox type as specified by DCN rev */
+ dmub->inbox_type = default_inbox_type;
+ } else if (funcs->send_reg_inbox0_cmd_msg) {
+ /* prefer reg as default inbox type if present */
+ dmub->inbox_type = DMUB_CMD_INTERFACE_REG;
+ } else {
+ /* use fb as fallback */
+ dmub->inbox_type = DMUB_CMD_INTERFACE_FB;
+ }
+ }
+
return true;
}
@@ -326,6 +454,7 @@ enum dmub_status dmub_srv_create(struct dmub_srv *dmub,
dmub->asic = params->asic;
dmub->fw_version = params->fw_version;
dmub->is_virtual = params->is_virtual;
+ dmub->inbox_type = params->inbox_type;
/* Setup asic dependent hardware funcs. */
if (!dmub_srv_hw_setup(dmub, params->asic)) {
@@ -369,56 +498,53 @@ void dmub_srv_destroy(struct dmub_srv *dmub)
dmub_memset(dmub, 0, sizeof(*dmub));
}
+static uint32_t dmub_srv_calc_regions_for_memory_type(const struct dmub_srv_region_params *params,
+ struct dmub_srv_region_info *out,
+ const uint32_t *window_sizes,
+ enum dmub_window_memory_type memory_type)
+{
+ uint32_t i, top = 0;
+
+ for (i = 0; i < DMUB_WINDOW_TOTAL; ++i) {
+ if (params->window_memory_type[i] == memory_type) {
+ struct dmub_region *region = &out->regions[i];
+
+ region->base = dmub_align(top, 256);
+ region->top = region->base + dmub_align(window_sizes[i], 64);
+ top = region->top;
+ }
+ }
+
+ return dmub_align(top, 4096);
+}
+
enum dmub_status
-dmub_srv_calc_region_info(struct dmub_srv *dmub,
- const struct dmub_srv_region_params *params,
- struct dmub_srv_region_info *out)
-{
- struct dmub_region *inst = &out->regions[DMUB_WINDOW_0_INST_CONST];
- struct dmub_region *stack = &out->regions[DMUB_WINDOW_1_STACK];
- struct dmub_region *data = &out->regions[DMUB_WINDOW_2_BSS_DATA];
- struct dmub_region *bios = &out->regions[DMUB_WINDOW_3_VBIOS];
- struct dmub_region *mail = &out->regions[DMUB_WINDOW_4_MAILBOX];
- struct dmub_region *trace_buff = &out->regions[DMUB_WINDOW_5_TRACEBUFF];
- struct dmub_region *fw_state = &out->regions[DMUB_WINDOW_6_FW_STATE];
- struct dmub_region *scratch_mem = &out->regions[DMUB_WINDOW_7_SCRATCH_MEM];
+ dmub_srv_calc_region_info(struct dmub_srv *dmub,
+ const struct dmub_srv_region_params *params,
+ struct dmub_srv_region_info *out)
+{
const struct dmub_fw_meta_info *fw_info;
uint32_t fw_state_size = DMUB_FW_STATE_SIZE;
uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE;
- uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE;
+ uint32_t shared_state_size = DMUB_FW_HEADER_SHARED_STATE_SIZE;
+ uint32_t window_sizes[DMUB_WINDOW_TOTAL] = { 0 };
if (!dmub->sw_init)
return DMUB_STATUS_INVALID;
memset(out, 0, sizeof(*out));
+ memset(window_sizes, 0, sizeof(window_sizes));
out->num_regions = DMUB_NUM_WINDOWS;
- inst->base = 0x0;
- inst->top = inst->base + params->inst_const_size;
-
- data->base = dmub_align(inst->top, 256);
- data->top = data->base + params->bss_data_size;
-
- /*
- * All cache windows below should be aligned to the size
- * of the DMCUB cache line, 64 bytes.
- */
-
- stack->base = dmub_align(data->top, 256);
- stack->top = stack->base + DMUB_STACK_SIZE + DMUB_CONTEXT_SIZE;
-
- bios->base = dmub_align(stack->top, 256);
- bios->top = bios->base + params->vbios_size;
-
- mail->base = dmub_align(bios->top, 256);
- mail->top = mail->base + DMUB_MAILBOX_SIZE;
-
fw_info = dmub_get_fw_meta_info(params);
if (fw_info) {
+ memcpy(&dmub->meta_info, fw_info, sizeof(*fw_info));
+
fw_state_size = fw_info->fw_region_size;
trace_buffer_size = fw_info->trace_buffer_size;
+ shared_state_size = fw_info->shared_state_size;
/**
* If DM didn't fill in a version, then fill it in based on
@@ -431,26 +557,31 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
dmub->fw_version = fw_info->fw_version;
}
- trace_buff->base = dmub_align(mail->top, 256);
- trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64);
+ window_sizes[DMUB_WINDOW_0_INST_CONST] = params->inst_const_size;
+ window_sizes[DMUB_WINDOW_1_STACK] = DMUB_STACK_SIZE + DMUB_CONTEXT_SIZE;
+ window_sizes[DMUB_WINDOW_2_BSS_DATA] = params->bss_data_size;
+ window_sizes[DMUB_WINDOW_3_VBIOS] = params->vbios_size;
+ window_sizes[DMUB_WINDOW_4_MAILBOX] = DMUB_MAILBOX_SIZE;
+ window_sizes[DMUB_WINDOW_5_TRACEBUFF] = trace_buffer_size;
+ window_sizes[DMUB_WINDOW_6_FW_STATE] = fw_state_size;
+ window_sizes[DMUB_WINDOW_7_SCRATCH_MEM] = DMUB_SCRATCH_MEM_SIZE;
+ window_sizes[DMUB_WINDOW_IB_MEM] = DMUB_IB_MEM_SIZE;
+ window_sizes[DMUB_WINDOW_SHARED_STATE] = max(DMUB_FW_HEADER_SHARED_STATE_SIZE, shared_state_size);
+ window_sizes[DMUB_WINDOW_LSDMA_BUFFER] = DMUB_LSDMA_RB_SIZE;
- fw_state->base = dmub_align(trace_buff->top, 256);
- fw_state->top = fw_state->base + dmub_align(fw_state_size, 64);
+ out->fb_size =
+ dmub_srv_calc_regions_for_memory_type(params, out, window_sizes, DMUB_WINDOW_MEMORY_TYPE_FB);
- scratch_mem->base = dmub_align(fw_state->top, 256);
- scratch_mem->top = scratch_mem->base + dmub_align(scratch_mem_size, 64);
-
- out->fb_size = dmub_align(scratch_mem->top, 4096);
+ out->gart_size =
+ dmub_srv_calc_regions_for_memory_type(params, out, window_sizes, DMUB_WINDOW_MEMORY_TYPE_GART);
return DMUB_STATUS_OK;
}
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
- const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+ const struct dmub_srv_memory_params *params,
struct dmub_srv_fb_info *out)
{
- uint8_t *cpu_base;
- uint64_t gpu_base;
uint32_t i;
if (!dmub->sw_init)
@@ -461,15 +592,18 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
if (params->region_info->num_regions != DMUB_NUM_WINDOWS)
return DMUB_STATUS_INVALID;
- cpu_base = (uint8_t *)params->cpu_addr;
- gpu_base = params->gpu_addr;
-
for (i = 0; i < DMUB_NUM_WINDOWS; ++i) {
const struct dmub_region *reg =
&params->region_info->regions[i];
- out->fb[i].cpu_addr = cpu_base + reg->base;
- out->fb[i].gpu_addr = gpu_base + reg->base;
+ if (params->window_memory_type[i] == DMUB_WINDOW_MEMORY_TYPE_GART) {
+ out->fb[i].cpu_addr = (uint8_t *)params->cpu_gart_addr + reg->base;
+ out->fb[i].gpu_addr = params->gpu_gart_addr + reg->base;
+ } else {
+ out->fb[i].cpu_addr = (uint8_t *)params->cpu_fb_addr + reg->base;
+ out->fb[i].gpu_addr = params->gpu_fb_addr + reg->base;
+ }
+
out->fb[i].size = reg->top - reg->base;
}
@@ -519,16 +653,18 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
struct dmub_fb *tracebuff_fb = params->fb[DMUB_WINDOW_5_TRACEBUFF];
struct dmub_fb *fw_state_fb = params->fb[DMUB_WINDOW_6_FW_STATE];
struct dmub_fb *scratch_mem_fb = params->fb[DMUB_WINDOW_7_SCRATCH_MEM];
+ struct dmub_fb *ib_mem_gart = params->fb[DMUB_WINDOW_IB_MEM];
+ struct dmub_fb *shared_state_fb = params->fb[DMUB_WINDOW_SHARED_STATE];
struct dmub_rb_init_params rb_params, outbox0_rb_params;
- struct dmub_window cw0, cw1, cw2, cw3, cw4, cw5, cw6;
+ struct dmub_window cw0, cw1, cw2, cw3, cw4, cw5, cw6, region6;
struct dmub_region inbox1, outbox1, outbox0;
if (!dmub->sw_init)
return DMUB_STATUS_INVALID;
if (!inst_fb || !stack_fb || !data_fb || !bios_fb || !mail_fb ||
- !tracebuff_fb || !fw_state_fb || !scratch_mem_fb) {
+ !tracebuff_fb || !fw_state_fb || !scratch_mem_fb || !ib_mem_gart) {
ASSERT(0);
return DMUB_STATUS_INVALID;
}
@@ -560,7 +696,8 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
* DMCUB when backdoor loading if the write from x86 hasn't been
* flushed yet. This only occurs in backdoor loading.
*/
- dmub_flush_buffer_mem(inst_fb);
+ if (params->mem_access_type == DMUB_MEMORY_ACCESS_CPU)
+ dmub_flush_buffer_mem(inst_fb);
if (params->fw_in_system_memory && dmub->hw_funcs.backdoor_load_zfb_mode)
dmub->hw_funcs.backdoor_load_zfb_mode(dmub, &cw0, &cw1);
@@ -590,7 +727,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
inbox1.base = cw4.region.base;
inbox1.top = cw4.region.base + DMUB_RB_SIZE;
outbox1.base = inbox1.top;
- outbox1.top = cw4.region.top;
+ outbox1.top = inbox1.top + DMUB_RB_SIZE;
cw5.offset.quad_part = tracebuff_fb->gpu_addr;
cw5.region.base = DMUB_CW5_BASE;
@@ -603,12 +740,20 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
cw6.region.base = DMUB_CW6_BASE;
cw6.region.top = cw6.region.base + fw_state_fb->size;
- dmub->fw_state = fw_state_fb->cpu_addr;
+ dmub->fw_state = (void *)((uintptr_t)(fw_state_fb->cpu_addr) + DMUB_DEBUG_FW_STATE_OFFSET);
+
+ region6.offset.quad_part = shared_state_fb->gpu_addr;
+ region6.region.base = DMUB_CW6_BASE;
+ region6.region.top = region6.region.base + shared_state_fb->size;
+
+ dmub->shared_state = shared_state_fb->cpu_addr;
dmub->scratch_mem_fb = *scratch_mem_fb;
+ dmub->ib_mem_gart = *ib_mem_gart;
+
if (dmub->hw_funcs.setup_windows)
- dmub->hw_funcs.setup_windows(dmub, &cw2, &cw3, &cw4, &cw5, &cw6);
+ dmub->hw_funcs.setup_windows(dmub, &cw2, &cw3, &cw4, &cw5, &cw6, &region6);
if (dmub->hw_funcs.setup_outbox0)
dmub->hw_funcs.setup_outbox0(dmub, &outbox0);
@@ -617,12 +762,16 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
dmub->hw_funcs.setup_mailbox(dmub, &inbox1);
if (dmub->hw_funcs.setup_out_mailbox)
dmub->hw_funcs.setup_out_mailbox(dmub, &outbox1);
+ if (dmub->hw_funcs.enable_reg_inbox0_rsp_int)
+ dmub->hw_funcs.enable_reg_inbox0_rsp_int(dmub, true);
+ if (dmub->hw_funcs.enable_reg_outbox0_rdy_int)
+ dmub->hw_funcs.enable_reg_outbox0_rdy_int(dmub, true);
dmub_memset(&rb_params, 0, sizeof(rb_params));
rb_params.ctx = dmub;
rb_params.base_address = mail_fb->cpu_addr;
rb_params.capacity = DMUB_RB_SIZE;
- dmub_rb_init(&dmub->inbox1_rb, &rb_params);
+ dmub_rb_init(&dmub->inbox1.rb, &rb_params);
// Initialize outbox1 ring buffer
rb_params.ctx = dmub;
@@ -648,20 +797,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub,
dmub->hw_funcs.reset_release(dmub);
dmub->hw_init = true;
-
- return DMUB_STATUS_OK;
-}
-
-enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
-{
- if (!dmub->sw_init)
- return DMUB_STATUS_INVALID;
-
- if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
- dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
- dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
- dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
- }
+ dmub->power_state = DMUB_POWER_STATE_D0;
return DMUB_STATUS_OK;
}
@@ -676,8 +812,13 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub)
/* mailboxes have been reset in hw, so reset the sw state as well */
dmub->inbox1_last_wptr = 0;
- dmub->inbox1_rb.wrpt = 0;
- dmub->inbox1_rb.rptr = 0;
+ dmub->inbox1.rb.wrpt = 0;
+ dmub->inbox1.rb.rptr = 0;
+ dmub->inbox1.num_reported = 0;
+ dmub->inbox1.num_submitted = 0;
+ dmub->reg_inbox0.num_reported = 0;
+ dmub->reg_inbox0.num_submitted = 0;
+ dmub->reg_inbox0.is_pending = 0;
dmub->outbox0_rb.wrpt = 0;
dmub->outbox0_rb.rptr = 0;
dmub->outbox1_rb.wrpt = 0;
@@ -688,45 +829,88 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub)
return DMUB_STATUS_OK;
}
-enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
+enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub,
const union dmub_rb_cmd *cmd)
{
if (!dmub->hw_init)
return DMUB_STATUS_INVALID;
- if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
+ if (dmub->power_state != DMUB_POWER_STATE_D0)
+ return DMUB_STATUS_POWER_STATE_D3;
+
+ if (dmub->inbox1.rb.rptr > dmub->inbox1.rb.capacity ||
+ dmub->inbox1.rb.wrpt > dmub->inbox1.rb.capacity) {
+ return DMUB_STATUS_HW_FAILURE;
+ }
+
+ if (dmub_rb_push_front(&dmub->inbox1.rb, cmd)) {
+ dmub->inbox1.num_submitted++;
return DMUB_STATUS_OK;
+ }
return DMUB_STATUS_QUEUE_FULL;
}
-enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
+enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub)
{
struct dmub_rb flush_rb;
if (!dmub->hw_init)
return DMUB_STATUS_INVALID;
+ if (dmub->power_state != DMUB_POWER_STATE_D0)
+ return DMUB_STATUS_POWER_STATE_D3;
+
/**
* Read back all the queued commands to ensure that they've
* been flushed to framebuffer memory. Otherwise DMCUB might
* read back stale, fully invalid or partially invalid data.
*/
- flush_rb = dmub->inbox1_rb;
+ flush_rb = dmub->inbox1.rb;
flush_rb.rptr = dmub->inbox1_last_wptr;
dmub_rb_flush_pending(&flush_rb);
- dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt);
+ dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1.rb.wrpt);
- dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+ dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt;
return DMUB_STATUS_OK;
}
+bool dmub_srv_is_hw_pwr_up(struct dmub_srv *dmub)
+{
+ if (!dmub->hw_funcs.is_hw_powered_up)
+ return true;
+
+ if (!dmub->hw_funcs.is_hw_powered_up(dmub))
+ return false;
+
+ return true;
+}
+
+enum dmub_status dmub_srv_wait_for_hw_pwr_up(struct dmub_srv *dmub,
+ uint32_t timeout_us)
+{
+ uint32_t i;
+
+ if (!dmub->hw_init)
+ return DMUB_STATUS_INVALID;
+
+ for (i = 0; i <= timeout_us; i += 100) {
+ if (dmub_srv_is_hw_pwr_up(dmub))
+ return DMUB_STATUS_OK;
+
+ udelay(100);
+ }
+
+ return DMUB_STATUS_TIMEOUT;
+}
+
enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub,
uint32_t timeout_us)
{
uint32_t i;
+ bool hw_on = true;
if (!dmub->hw_init)
return DMUB_STATUS_INVALID;
@@ -734,7 +918,10 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub,
for (i = 0; i <= timeout_us; i += 100) {
union dmub_fw_boot_status status = dmub->hw_funcs.get_fw_status(dmub);
- if (status.bits.dal_fw && status.bits.mailbox_rdy)
+ if (dmub->hw_funcs.is_hw_powered_up)
+ hw_on = dmub->hw_funcs.is_hw_powered_up(dmub);
+
+ if (status.bits.dal_fw && status.bits.mailbox_rdy && hw_on)
return DMUB_STATUS_OK;
udelay(100);
@@ -743,26 +930,84 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub,
return DMUB_STATUS_TIMEOUT;
}
+static void dmub_srv_update_reg_inbox0_status(struct dmub_srv *dmub)
+{
+ if (dmub->reg_inbox0.is_pending) {
+ dmub->reg_inbox0.is_pending = dmub->hw_funcs.read_reg_inbox0_rsp_int_status &&
+ !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub);
+
+ if (!dmub->reg_inbox0.is_pending) {
+ /* ack the rsp interrupt */
+ if (dmub->hw_funcs.write_reg_inbox0_rsp_int_ack)
+ dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub);
+
+ /* only update the reported count if commands aren't being batched */
+ if (!dmub->reg_inbox0.is_pending && !dmub->reg_inbox0.is_multi_pending) {
+ dmub->reg_inbox0.num_reported = dmub->reg_inbox0.num_submitted;
+ }
+ }
+ }
+}
+
+enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub,
+ uint32_t timeout_us)
+{
+ uint32_t i;
+ const uint32_t polling_interval_us = 1;
+ struct dmub_srv_inbox scratch_reg_inbox0 = dmub->reg_inbox0;
+ struct dmub_srv_inbox scratch_inbox1 = dmub->inbox1;
+ const volatile struct dmub_srv_inbox *reg_inbox0 = &dmub->reg_inbox0;
+ const volatile struct dmub_srv_inbox *inbox1 = &dmub->inbox1;
+
+ if (!dmub->hw_init ||
+ !dmub->hw_funcs.get_inbox1_wptr)
+ return DMUB_STATUS_INVALID;
+
+ for (i = 0; i <= timeout_us; i += polling_interval_us) {
+ scratch_inbox1.rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
+ scratch_inbox1.rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+
+ scratch_reg_inbox0.is_pending = scratch_reg_inbox0.is_pending &&
+ dmub->hw_funcs.read_reg_inbox0_rsp_int_status &&
+ !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub);
+
+ if (scratch_inbox1.rb.rptr > dmub->inbox1.rb.capacity)
+ return DMUB_STATUS_HW_FAILURE;
+
+ /* check current HW state first, but use command submission vs reported as a fallback */
+ if ((dmub_rb_empty(&scratch_inbox1.rb) ||
+ inbox1->num_reported >= scratch_inbox1.num_submitted) &&
+ (!scratch_reg_inbox0.is_pending ||
+ reg_inbox0->num_reported >= scratch_reg_inbox0.num_submitted))
+ return DMUB_STATUS_OK;
+
+ udelay(polling_interval_us);
+ }
+
+ return DMUB_STATUS_TIMEOUT;
+}
+
enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub,
uint32_t timeout_us)
{
- uint32_t i, rptr;
+ enum dmub_status status;
+ uint32_t i;
+ const uint32_t polling_interval_us = 1;
if (!dmub->hw_init)
return DMUB_STATUS_INVALID;
- for (i = 0; i <= timeout_us; ++i) {
- rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+ for (i = 0; i < timeout_us; i += polling_interval_us) {
+ status = dmub_srv_update_inbox_status(dmub);
- if (rptr > dmub->inbox1_rb.capacity)
- return DMUB_STATUS_HW_FAILURE;
-
- dmub->inbox1_rb.rptr = rptr;
+ if (status != DMUB_STATUS_OK)
+ return status;
- if (dmub_rb_empty(&dmub->inbox1_rb))
+ /* check for idle */
+ if (dmub_rb_empty(&dmub->inbox1.rb) && !dmub->reg_inbox0.is_pending)
return DMUB_STATUS_OK;
- udelay(1);
+ udelay(polling_interval_us);
}
return DMUB_STATUS_TIMEOUT;
@@ -873,35 +1118,6 @@ enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub,
return DMUB_STATUS_OK;
}
-enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub,
- union dmub_rb_cmd *cmd)
-{
- enum dmub_status status = DMUB_STATUS_OK;
-
- // Queue command
- status = dmub_srv_cmd_queue(dmub, cmd);
-
- if (status != DMUB_STATUS_OK)
- return status;
-
- // Execute command
- status = dmub_srv_cmd_execute(dmub);
-
- if (status != DMUB_STATUS_OK)
- return status;
-
- // Wait for DMUB to process command
- status = dmub_srv_wait_for_idle(dmub, 100000);
-
- if (status != DMUB_STATUS_OK)
- return status;
-
- // Copy data back from ring buffer into command
- dmub_rb_get_return_data(&dmub->inbox1_rb, cmd);
-
- return status;
-}
-
static inline bool dmub_rb_out_trace_buffer_front(struct dmub_rb *rb,
void *entry)
{
@@ -932,11 +1148,11 @@ bool dmub_srv_get_outbox0_msg(struct dmub_srv *dmub, struct dmcub_trace_buf_entr
return dmub_rb_out_trace_buffer_front(&dmub->outbox0_rb, (void *)entry);
}
-bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data)
+bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub)
{
- if (!dmub || !dmub->hw_funcs.get_diagnostic_data || !diag_data)
+ if (!dmub || !dmub->hw_funcs.get_diagnostic_data)
return false;
- dmub->hw_funcs.get_diagnostic_data(dmub, diag_data);
+ dmub->hw_funcs.get_diagnostic_data(dmub);
return true;
}
@@ -969,6 +1185,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti
ack = dmub->hw_funcs.read_inbox0_ack_register(dmub);
if (ack)
return DMUB_STATUS_OK;
+ udelay(1);
}
return DMUB_STATUS_TIMEOUT;
}
@@ -982,3 +1199,172 @@ enum dmub_status dmub_srv_send_inbox0_cmd(struct dmub_srv *dmub,
dmub->hw_funcs.send_inbox0_cmd(dmub, data);
return DMUB_STATUS_OK;
}
+
+void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index)
+{
+ if (dmub->hw_funcs.subvp_save_surf_addr) {
+ dmub->hw_funcs.subvp_save_surf_addr(dmub,
+ addr,
+ subvp_index);
+ }
+}
+
+void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state)
+{
+ if (!dmub || !dmub->hw_init)
+ return;
+
+ dmub->power_state = dmub_srv_power_state;
+}
+
+enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd)
+{
+ uint32_t num_pending = 0;
+
+ if (!dmub->hw_init)
+ return DMUB_STATUS_INVALID;
+
+ if (dmub->power_state != DMUB_POWER_STATE_D0)
+ return DMUB_STATUS_POWER_STATE_D3;
+
+ if (!dmub->hw_funcs.send_reg_inbox0_cmd_msg ||
+ !dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack)
+ return DMUB_STATUS_INVALID;
+
+ if (dmub->reg_inbox0.num_submitted >= dmub->reg_inbox0.num_reported)
+ num_pending = dmub->reg_inbox0.num_submitted - dmub->reg_inbox0.num_reported;
+ else
+ /* num_submitted wrapped */
+ num_pending = DMUB_REG_INBOX0_RB_MAX_ENTRY -
+ (dmub->reg_inbox0.num_reported - dmub->reg_inbox0.num_submitted);
+
+ if (num_pending >= DMUB_REG_INBOX0_RB_MAX_ENTRY)
+ return DMUB_STATUS_QUEUE_FULL;
+
+ /* clear last rsp ack and send message */
+ dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack(dmub);
+ dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd);
+
+ dmub->reg_inbox0.num_submitted++;
+ dmub->reg_inbox0.is_pending = true;
+ dmub->reg_inbox0.is_multi_pending = cmd->cmd_common.header.multi_cmd_pending;
+
+ return DMUB_STATUS_OK;
+}
+
+void dmub_srv_cmd_get_response(struct dmub_srv *dmub,
+ union dmub_rb_cmd *cmd_rsp)
+{
+ if (dmub) {
+ if (dmub->inbox_type == DMUB_CMD_INTERFACE_REG &&
+ dmub->hw_funcs.read_reg_inbox0_cmd_rsp) {
+ dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd_rsp);
+ } else {
+ dmub_rb_get_return_data(&dmub->inbox1.rb, cmd_rsp);
+ }
+ }
+}
+
+static enum dmub_status dmub_srv_sync_reg_inbox0(struct dmub_srv *dmub)
+{
+ if (!dmub || !dmub->sw_init)
+ return DMUB_STATUS_INVALID;
+
+ dmub->reg_inbox0.is_pending = 0;
+ dmub->reg_inbox0.is_multi_pending = 0;
+
+ return DMUB_STATUS_OK;
+}
+
+static enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
+{
+ if (!dmub->sw_init)
+ return DMUB_STATUS_INVALID;
+
+ if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
+ uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+ uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+ if (rptr > dmub->inbox1.rb.capacity || wptr > dmub->inbox1.rb.capacity) {
+ return DMUB_STATUS_HW_FAILURE;
+ } else {
+ dmub->inbox1.rb.rptr = rptr;
+ dmub->inbox1.rb.wrpt = wptr;
+ dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt;
+ }
+ }
+
+ return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub)
+{
+ enum dmub_status status;
+
+ status = dmub_srv_sync_reg_inbox0(dmub);
+ if (status != DMUB_STATUS_OK)
+ return status;
+
+ status = dmub_srv_sync_inbox1(dmub);
+ if (status != DMUB_STATUS_OK)
+ return status;
+
+ return DMUB_STATUS_OK;
+}
+
+enum dmub_status dmub_srv_wait_for_inbox_free(struct dmub_srv *dmub,
+ uint32_t timeout_us,
+ uint32_t num_free_required)
+{
+ enum dmub_status status;
+ uint32_t i;
+ const uint32_t polling_interval_us = 1;
+
+ if (!dmub->hw_init)
+ return DMUB_STATUS_INVALID;
+
+ for (i = 0; i < timeout_us; i += polling_interval_us) {
+ status = dmub_srv_update_inbox_status(dmub);
+
+ if (status != DMUB_STATUS_OK)
+ return status;
+
+ /* check for space in inbox1 */
+ if (dmub_rb_num_free(&dmub->inbox1.rb) >= num_free_required)
+ return DMUB_STATUS_OK;
+
+ udelay(polling_interval_us);
+ }
+
+ return DMUB_STATUS_TIMEOUT;
+}
+
+enum dmub_status dmub_srv_update_inbox_status(struct dmub_srv *dmub)
+{
+ uint32_t rptr;
+
+ if (!dmub->hw_init)
+ return DMUB_STATUS_INVALID;
+
+ if (dmub->power_state != DMUB_POWER_STATE_D0)
+ return DMUB_STATUS_POWER_STATE_D3;
+
+ /* update inbox1 state */
+ rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+
+ if (rptr > dmub->inbox1.rb.capacity)
+ return DMUB_STATUS_HW_FAILURE;
+
+ if (dmub->inbox1.rb.rptr > rptr) {
+ /* rb wrapped */
+ dmub->inbox1.num_reported += (rptr + dmub->inbox1.rb.capacity - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE;
+ } else {
+ dmub->inbox1.num_reported += (rptr - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE;
+ }
+ dmub->inbox1.rb.rptr = rptr;
+
+ /* update reg_inbox0 */
+ dmub_srv_update_reg_inbox0_status(dmub);
+
+ return DMUB_STATUS_OK;
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
index 74189102eaec..e7a58b140388 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c
@@ -71,7 +71,7 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub,
switch (cmd.cmd_common.header.type) {
case DMUB_OUT_CMD__DP_AUX_REPLY:
notify->type = DMUB_NOTIFICATION_AUX_REPLY;
- notify->link_index = cmd.dp_aux_reply.control.instance;
+ notify->instance = cmd.dp_aux_reply.control.instance;
notify->result = cmd.dp_aux_reply.control.result;
dmub_memcpy((void *)&notify->aux_reply,
(void *)&cmd.dp_aux_reply.reply_data, sizeof(struct aux_reply_data));
@@ -84,34 +84,27 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub,
notify->type = DMUB_NOTIFICATION_HPD_IRQ;
}
- notify->link_index = cmd.dp_hpd_notify.hpd_data.instance;
+ notify->instance = cmd.dp_hpd_notify.hpd_data.instance;
notify->result = AUX_RET_SUCCESS;
break;
case DMUB_OUT_CMD__SET_CONFIG_REPLY:
notify->type = DMUB_NOTIFICATION_SET_CONFIG_REPLY;
- notify->link_index = cmd.set_config_reply.set_config_reply_control.instance;
+ notify->instance = cmd.set_config_reply.set_config_reply_control.instance;
notify->sc_status = cmd.set_config_reply.set_config_reply_control.status;
break;
case DMUB_OUT_CMD__DPIA_NOTIFICATION:
notify->type = DMUB_NOTIFICATION_DPIA_NOTIFICATION;
- notify->link_index = cmd.dpia_notification.payload.header.instance;
-
- if (cmd.dpia_notification.payload.header.type == DPIA_NOTIFY__BW_ALLOCATION) {
-
- notify->dpia_notification.payload.data.dpia_bw_alloc.estimated_bw =
- cmd.dpia_notification.payload.data.dpia_bw_alloc.estimated_bw;
- notify->dpia_notification.payload.data.dpia_bw_alloc.allocated_bw =
- cmd.dpia_notification.payload.data.dpia_bw_alloc.allocated_bw;
-
- if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_failed)
- notify->result = DPIA_BW_REQ_FAILED;
- else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_succeeded)
- notify->result = DPIA_BW_REQ_SUCCESS;
- else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.est_bw_changed)
- notify->result = DPIA_EST_BW_CHANGED;
- else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_alloc_cap_changed)
- notify->result = DPIA_BW_ALLOC_CAPS_CHANGED;
- }
+ notify->instance = cmd.dpia_notification.payload.header.instance;
+ break;
+ case DMUB_OUT_CMD__HPD_SENSE_NOTIFY:
+ notify->type = DMUB_NOTIFICATION_HPD_SENSE_NOTIFY;
+ dmub_memcpy(&notify->hpd_sense_notify,
+ &cmd.hpd_sense_notify.data,
+ sizeof(cmd.hpd_sense_notify.data));
+ break;
+ case DMUB_OUT_CMD__FUSED_IO:
+ notify->type = DMUB_NOTIFICATION_FUSED_IO;
+ dmub_memcpy(&notify->fused_request, &cmd.fused_io.request, sizeof(cmd.fused_io.request));
break;
default:
notify->type = DMUB_NOTIFICATION_NO_DATA;
diff --git a/drivers/gpu/drm/amd/display/include/audio_types.h b/drivers/gpu/drm/amd/display/include/audio_types.h
index 66a54da0641c..e4a26143f14c 100644
--- a/drivers/gpu/drm/amd/display/include/audio_types.h
+++ b/drivers/gpu/drm/amd/display/include/audio_types.h
@@ -27,11 +27,21 @@
#define __AUDIO_TYPES_H__
#include "signal_types.h"
+#include "fixed31_32.h"
+#include "dc_dp_types.h"
#define AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS 20
#define MAX_HW_AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS 18
#define MULTI_CHANNEL_SPLIT_NO_ASSO_INFO 0xFFFFFFFF
+struct audio_dp_link_info {
+ uint32_t link_bandwidth_kbps;
+ uint32_t hblank_min_symbol_width;
+ enum dp_link_encoding encoding;
+ enum dc_link_rate link_rate;
+ enum dc_lane_count lane_count;
+ bool is_mst;
+};
struct audio_crtc_info {
uint32_t h_total;
@@ -42,7 +52,10 @@ struct audio_crtc_info {
uint32_t calculated_pixel_clock_100Hz; /* in 100Hz */
uint32_t refresh_rate;
enum dc_color_depth color_depth;
+ enum dc_pixel_encoding pixel_encoding;
bool interlaced;
+ uint32_t dsc_bits_per_pixel;
+ uint32_t dsc_num_slices;
};
struct azalia_clock_info {
uint32_t pixel_clock_in_10khz;
@@ -64,7 +77,7 @@ enum audio_dto_source {
/* PLL information required for AZALIA DTO calculation */
struct audio_pll_info {
- uint32_t dp_dto_source_clock_in_khz;
+ uint32_t audio_dto_source_clock_in_khz;
uint32_t feed_back_divider;
enum audio_dto_source dto_source;
bool ss_enabled;
@@ -95,6 +108,8 @@ struct audio_output {
enum signal_type signal;
/* video timing */
struct audio_crtc_info crtc_info;
+ /* DP link info */
+ struct audio_dp_link_info dp_link_info;
/* PLL for audio */
struct audio_pll_info pll_info;
};
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index e317089cf6ee..8aea50aa9533 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -213,6 +213,11 @@ enum {
#endif
#define DEVICE_ID_NV_13FE 0x13FE // CYAN_SKILLFISH
#define DEVICE_ID_NV_143F 0x143F
+#define DEVICE_ID_NV_13F9 0x13F9
+#define DEVICE_ID_NV_13FA 0x13FA
+#define DEVICE_ID_NV_13FB 0x13FB
+#define DEVICE_ID_NV_13FC 0x13FC
+#define DEVICE_ID_NV_13DB 0x13DB
#define FAMILY_VGH 144
#define DEVICE_ID_VGH_163F 0x163F
#define DEVICE_ID_VGH_1435 0x1435
@@ -250,11 +255,28 @@ enum {
#define GC_11_0_0_A0 0x1
#define GC_11_0_2_A0 0x10
#define GC_11_0_3_A0 0x20
+#define GC_11_0_4_A0 0xC0
#define GC_11_UNKNOWN 0xFF
#define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0)
#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0)
#define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN)
+#define ASICREV_IS_GC_11_0_4(eChipRev) (eChipRev >= GC_11_0_4_A0 && eChipRev < GC_11_UNKNOWN)
+#define ASICREV_IS_DCN36(eChipRev) ((eChipRev) >= 0x50 && (eChipRev) < 0xC0)
+
+#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
+
+enum {
+ GC_12_0_0_A0 = 0x50,
+ GC_12_0_1_A0 = 0x40,
+ GC_12_UNKNOWN = 0xFF,
+};
+
+#define ASICREV_IS_GC_12_0_1_A0(eChipRev) (eChipRev >= GC_12_0_1_A0 && eChipRev < GC_12_0_0_A0)
+#define ASICREV_IS_GC_12_0_0_A0(eChipRev) (eChipRev >= GC_12_0_0_A0 && eChipRev < 0xFF)
+
+#define ASICREV_IS_DCN4(eChipRev) (eChipRev >= GC_12_0_1_A0 && eChipRev < GC_12_0_0_A0)
+#define ASICREV_IS_DCN401(eChipRev) (eChipRev >= GC_12_0_0_A0 && eChipRev < GC_12_UNKNOWN)
/*
* ASIC chip ID
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index d2427cf1155f..a021d12acd74 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -27,7 +27,6 @@
#define __DAL_TYPES_H__
#include "signal_types.h"
-#include "dc_types.h"
struct dal_logger;
struct dc_bios;
@@ -62,6 +61,10 @@ enum dce_version {
DCN_VERSION_3_16,
DCN_VERSION_3_2,
DCN_VERSION_3_21,
+ DCN_VERSION_3_5,
+ DCN_VERSION_3_51,
+ DCN_VERSION_3_6,
+ DCN_VERSION_4_01,
DCN_VERSION_MAX
};
diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h
index 68dfc7968017..1c603b12957f 100644
--- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h
+++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h
@@ -39,6 +39,7 @@
#define DP_BRANCH_HW_REV_10 0x10
#define DP_BRANCH_HW_REV_20 0x20
+#define DP_DEVICE_ID_0022B9 0x0022B9
#define DP_DEVICE_ID_38EC11 0x38EC11
#define DP_DEVICE_ID_BA4159 0xBA4159
#define DP_FORCE_PSRSU_CAPABILITY 0x40F
diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
index 914f28e9f224..de8f3cfed6c8 100644
--- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h
+++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
@@ -164,17 +164,23 @@ enum dpcd_psr_sink_states {
PSR_SINK_STATE_SINK_INTERNAL_ERROR = 7,
};
-#define DP_SOURCE_SEQUENCE 0x30c
-#define DP_SOURCE_TABLE_REVISION 0x310
-#define DP_SOURCE_PAYLOAD_SIZE 0x311
-#define DP_SOURCE_SINK_CAP 0x317
-#define DP_SOURCE_BACKLIGHT_LEVEL 0x320
-#define DP_SOURCE_BACKLIGHT_CURRENT_PEAK 0x326
-#define DP_SOURCE_BACKLIGHT_CONTROL 0x32E
-#define DP_SOURCE_BACKLIGHT_ENABLE 0x32F
-#define DP_SOURCE_MINIMUM_HBLANK_SUPPORTED 0x340
+#define DP_SOURCE_SEQUENCE 0x30C
+#define DP_SOURCE_TABLE_REVISION 0x310
+#define DP_SOURCE_PAYLOAD_SIZE 0x311
+#define DP_SOURCE_SINK_CAP 0x317
+#define DP_SOURCE_BACKLIGHT_LEVEL 0x320
+#define DP_SOURCE_BACKLIGHT_CURRENT_PEAK 0x326
+#define DP_SOURCE_BACKLIGHT_CONTROL 0x32E
+#define DP_SOURCE_BACKLIGHT_ENABLE 0x32F
+#define DP_SOURCE_MINIMUM_HBLANK_SUPPORTED 0x340
#define DP_SINK_PR_REPLAY_STATUS 0x378
#define DP_SINK_PR_PIXEL_DEVIATION_PER_LINE 0x379
#define DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE 0x37A
+#define DP_SINK_EMISSION_RATE 0x37E
+
+/* Remove once drm_dp_helper.h is updated upstream */
+#ifndef DP_TOTAL_LTTPR_CNT
+#define DP_TOTAL_LTTPR_CNT 0xF000A /* 2.1 */
+#endif
#endif /* __DAL_DPCD_DEFS_H__ */
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index d4cf7ead1d87..990fa1f19c22 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -531,4 +531,10 @@ static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigne
return arg;
}
+struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits);
+struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value,
+ unsigned int frac_value,
+ unsigned int integer_bits,
+ unsigned int fractional_bits);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h
index 7e3240e73c1f..63813009a3a6 100644
--- a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h
+++ b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h
@@ -86,6 +86,9 @@ enum dc_irq_source dal_irq_get_source(
enum dc_irq_source dal_irq_get_rx_source(
const struct gpio *irq);
+enum dc_irq_source dal_irq_get_read_request(
+ const struct gpio *irq);
+
enum gpio_result dal_irq_setup_hpd_filter(
struct gpio *irq,
struct gpio_hpd_config *config);
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
index bc96d0211360..cc467031651d 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
@@ -417,12 +417,14 @@ struct integrated_info {
/* V2.1 */
struct edp_info edp1_info;
struct edp_info edp2_info;
+ uint32_t gpuclk_ss_percentage;
+ uint32_t gpuclk_ss_type;
};
/*
* DFS-bypass flag
*/
-/* Copy of SYS_INFO_GPUCAPS__ENABEL_DFS_BYPASS from atombios.h */
+/* Copy of SYS_INFO_GPUCAPS__ENABLE_DFS_BYPASS from atombios.h */
enum {
DFS_BYPASS_ENABLE = 0x10
};
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_id.h b/drivers/gpu/drm/amd/display/include/grph_object_id.h
index c6bbd262f1ac..54e33062b3c0 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_id.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_id.h
@@ -226,8 +226,8 @@ enum dp_alt_mode {
struct graphics_object_id {
uint32_t id:8;
- uint32_t enum_id:4;
- uint32_t type:4;
+ enum object_enum_id enum_id :4;
+ enum object_type type :4;
uint32_t reserved:16; /* for padding. total size should be u32 */
};
diff --git a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
index 42229b4effdc..eced9ad91f1d 100644
--- a/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
+++ b/drivers/gpu/drm/amd/display/include/hdcp_msg_types.h
@@ -69,6 +69,11 @@ enum hdcp_message_id {
HDCP_MESSAGE_ID_READ_RXSTATUS,
HDCP_MESSAGE_ID_WRITE_CONTENT_STREAM_TYPE,
+ /* PS175 chip */
+
+ HDCP_MESSAGE_ID_WRITE_PS175_CMD,
+ HDCP_MESSAGE_ID_READ_PS175_RSP,
+
HDCP_MESSAGE_ID_MAX
};
diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h
index 1b8ab20f1715..da74ed66c8f9 100644
--- a/drivers/gpu/drm/amd/display/include/link_service_types.h
+++ b/drivers/gpu/drm/amd/display/include/link_service_types.h
@@ -73,7 +73,6 @@ struct link_training_settings {
enum dc_pre_emphasis *pre_emphasis;
enum dc_post_cursor2 *post_cursor2;
bool should_set_fec_ready;
- /* TODO - factor lane_settings out because it changes during LT */
union dc_dp_ffe_preset *ffe_preset;
uint16_t cr_pattern_time;
@@ -90,6 +89,8 @@ struct link_training_settings {
bool enhanced_framing;
enum lttpr_mode lttpr_mode;
+ bool lttpr_early_tps2;
+
/* disallow different lanes to have different lane settings */
bool disallow_per_lane_settings;
/* dpcd lane settings will always use the same hw lane settings
@@ -169,6 +170,15 @@ enum dp_test_pattern {
DP_TEST_PATTERN_UNSUPPORTED
};
+#define IS_DP_PHY_SQUARE_PATTERN(test_pattern)\
+ (DP_TEST_PATTERN_SQUARE_BEGIN <= test_pattern &&\
+ test_pattern <= DP_TEST_PATTERN_SQUARE_END)
+
+#define IS_DP_PHY_PATTERN(test_pattern)\
+ ((DP_TEST_PATTERN_PHY_PATTERN_BEGIN <= test_pattern &&\
+ test_pattern <= DP_TEST_PATTERN_PHY_PATTERN_END) ||\
+ test_pattern == DP_TEST_PATTERN_VIDEO_MODE)
+
enum dp_test_pattern_color_space {
DP_TEST_PATTERN_COLOR_SPACE_RGB,
DP_TEST_PATTERN_COLOR_SPACE_YCBCR601,
diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h
index 02c23b04d34b..4c01514b926c 100644
--- a/drivers/gpu/drm/amd/display/include/logger_interface.h
+++ b/drivers/gpu/drm/amd/display/include/logger_interface.h
@@ -40,11 +40,6 @@ struct dc_state;
*
*/
-void pre_surface_trace(
- struct dc *dc,
- const struct dc_plane_state *const *plane_states,
- int surface_count);
-
void update_surface_trace(
struct dc *dc,
const struct dc_surface_update *updates,
@@ -52,10 +47,6 @@ void update_surface_trace(
void post_surface_trace(struct dc *dc);
-void context_timing_trace(
- struct dc *dc,
- struct resource_context *res_ctx);
-
void context_clock_trace(
struct dc *dc,
struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
index 3bf08a60c45c..177acb0574f1 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -28,53 +28,47 @@
#include "os_types.h"
-#define MAX_NAME_LEN 32
-
-#define DC_LOG_ERROR(...) DRM_ERROR(__VA_ARGS__)
-#define DC_LOG_WARNING(...) DRM_WARN(__VA_ARGS__)
-#define DC_LOG_DEBUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_ERROR(...) drm_err((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_WARNING(...) drm_warn((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_DEBUG(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_DC(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_INFO(...) drm_info((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__)
-#define DC_LOG_CURSOR(...) pr_debug("[CURSOR]:"__VA_ARGS__)
-#define DC_LOG_PFLIP(...) pr_debug("[PFLIP]:"__VA_ARGS__)
-#define DC_LOG_VBLANK(...) pr_debug("[VBLANK]:"__VA_ARGS__)
-#define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_HOTPLUG(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_HW_LINK_TRAINING(...) pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__)
-#define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_HW_RESUME_S3(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_RESUME_S3(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_HW_AUDIO(...) pr_debug("[HW_AUDIO]:"__VA_ARGS__)
-#define DC_LOG_HW_HPD_IRQ(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_MST(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_HPD_IRQ(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_MST(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_SCALER(...) pr_debug("[SCALER]:"__VA_ARGS__)
#define DC_LOG_BIOS(...) pr_debug("[BIOS]:"__VA_ARGS__)
#define DC_LOG_BANDWIDTH_CALCS(...) pr_debug("[BANDWIDTH_CALCS]:"__VA_ARGS__)
-#define DC_LOG_BANDWIDTH_VALIDATION(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_I2C_AUX(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_SYNC(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_BACKLIGHT(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_FEATURE_OVERRIDE(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_DETECTION_EDID_PARSER(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_DETECTION_DP_CAPS(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_RESOURCE(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_BANDWIDTH_VALIDATION(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_SYNC(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_BACKLIGHT(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_DETECTION_EDID_PARSER(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_DETECTION_DP_CAPS(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_DML(...) pr_debug("[DML]:"__VA_ARGS__)
-#define DC_LOG_EVENT_MODE_SET(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_EVENT_DETECTION(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_EVENT_LINK_TRAINING(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_EVENT_LINK_LOSS(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_EVENT_UNDERFLOW(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_MODE_SET(...) drm_dbg_kms((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_EVENT_DETECTION(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_EVENT_LINK_TRAINING(...) \
+ drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_EVENT_LINK_LOSS(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_IF_TRACE(...) pr_debug("[IF_TRACE]:"__VA_ARGS__)
-#define DC_LOG_PERF_TRACE(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_RETIMER_REDRIVER(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_PERF_TRACE(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_RETIMER_REDRIVER(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_GAMMA(...) pr_debug("[GAMMA]:"__VA_ARGS__)
#define DC_LOG_ALL_GAMMA(...) pr_debug("[GAMMA]:"__VA_ARGS__)
#define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__)
-#define DC_LOG_DSC(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DSC(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
#define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__)
-#define DC_LOG_DWB(...) DRM_DEBUG_KMS(__VA_ARGS__)
-#define DC_LOG_DP2(...) DRM_DEBUG_KMS(__VA_ARGS__)
-
-struct dal_logger;
+#define DC_LOG_DWB(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_DP2(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__)
+#define DC_LOG_AUTO_DPM_TEST(...) pr_debug("[AutoDPMTest]: "__VA_ARGS__)
+#define DC_LOG_IPS(...) pr_debug("[IPS]: "__VA_ARGS__)
+#define DC_LOG_MALL(...) pr_debug("[MALL]:"__VA_ARGS__)
+#define DC_LOG_REGISTER_READ(...) pr_debug("[REGISTER_READ]: "__VA_ARGS__)
+#define DC_LOG_REGISTER_WRITE(...) pr_debug("[REGISTER_WRITE]: "__VA_ARGS__)
struct dc_log_buffer_ctx {
char *buf;
@@ -82,88 +76,8 @@ struct dc_log_buffer_ctx {
size_t size;
};
-enum dc_log_type {
- LOG_ERROR = 0,
- LOG_WARNING,
- LOG_DEBUG,
- LOG_DC,
- LOG_DTN,
- LOG_SURFACE,
- LOG_HW_HOTPLUG,
- LOG_HW_LINK_TRAINING,
- LOG_HW_SET_MODE,
- LOG_HW_RESUME_S3,
- LOG_HW_AUDIO,
- LOG_HW_HPD_IRQ,
- LOG_MST,
- LOG_SCALER,
- LOG_BIOS,
- LOG_BANDWIDTH_CALCS,
- LOG_BANDWIDTH_VALIDATION,
- LOG_I2C_AUX,
- LOG_SYNC,
- LOG_BACKLIGHT,
- LOG_FEATURE_OVERRIDE,
- LOG_DETECTION_EDID_PARSER,
- LOG_DETECTION_DP_CAPS,
- LOG_RESOURCE,
- LOG_DML,
- LOG_EVENT_MODE_SET,
- LOG_EVENT_DETECTION,
- LOG_EVENT_LINK_TRAINING,
- LOG_EVENT_LINK_LOSS,
- LOG_EVENT_UNDERFLOW,
- LOG_IF_TRACE,
- LOG_PERF_TRACE,
- LOG_DISPLAYSTATS,
- LOG_HDMI_RETIMER_REDRIVER,
- LOG_DSC,
- LOG_SMU_MSG,
- LOG_DC2RESERVED4,
- LOG_DC2RESERVED5,
- LOG_DWB,
- LOG_GAMMA_DEBUG,
- LOG_MAX_HW_POINTS,
- LOG_ALL_TF_CHANNELS,
- LOG_SAMPLE_1DLUT,
- LOG_DP2,
- LOG_DC2RESERVED12,
+struct dal_logger {
+ struct drm_device *dev;
};
-#define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \
- (1 << LOG_DETECTION_EDID_PARSER))
-
-#define DC_DEFAULT_LOG_MASK ((1ULL << LOG_ERROR) | \
- (1ULL << LOG_WARNING) | \
- (1ULL << LOG_EVENT_MODE_SET) | \
- (1ULL << LOG_EVENT_DETECTION) | \
- (1ULL << LOG_EVENT_LINK_TRAINING) | \
- (1ULL << LOG_EVENT_LINK_LOSS) | \
- (1ULL << LOG_EVENT_UNDERFLOW) | \
- (1ULL << LOG_RESOURCE) | \
- (1ULL << LOG_FEATURE_OVERRIDE) | \
- (1ULL << LOG_DETECTION_EDID_PARSER) | \
- (1ULL << LOG_DC) | \
- (1ULL << LOG_HW_HOTPLUG) | \
- (1ULL << LOG_HW_SET_MODE) | \
- (1ULL << LOG_HW_RESUME_S3) | \
- (1ULL << LOG_HW_HPD_IRQ) | \
- (1ULL << LOG_SYNC) | \
- (1ULL << LOG_BANDWIDTH_VALIDATION) | \
- (1ULL << LOG_MST) | \
- (1ULL << LOG_DETECTION_DP_CAPS) | \
- (1ULL << LOG_BACKLIGHT)) | \
- (1ULL << LOG_I2C_AUX) | \
- (1ULL << LOG_IF_TRACE) | \
- (1ULL << LOG_HDMI_FRL) | \
- (1ULL << LOG_SCALER) | \
- (1ULL << LOG_DTN) /* | \
- (1ULL << LOG_DEBUG) | \
- (1ULL << LOG_BIOS) | \
- (1ULL << LOG_SURFACE) | \
- (1ULL << LOG_DML) | \
- (1ULL << LOG_HW_LINK_TRAINING) | \
- (1ULL << LOG_HW_AUDIO)| \
- (1ULL << LOG_BANDWIDTH_CALCS)*/
-
#endif /* __DAL_LOGGER_TYPES_H__ */
diff --git a/drivers/gpu/drm/amd/display/include/signal_types.h b/drivers/gpu/drm/amd/display/include/signal_types.h
index 325c5ba4c82a..a10d6b988aab 100644
--- a/drivers/gpu/drm/amd/display/include/signal_types.h
+++ b/drivers/gpu/drm/amd/display/include/signal_types.h
@@ -118,6 +118,19 @@ static inline bool dc_is_dvi_signal(enum signal_type signal)
}
}
+static inline bool dc_is_tmds_signal(enum signal_type signal)
+{
+ switch (signal) {
+ case SIGNAL_TYPE_DVI_SINGLE_LINK:
+ case SIGNAL_TYPE_DVI_DUAL_LINK:
+ case SIGNAL_TYPE_HDMI_TYPE_A:
+ return true;
+ break;
+ default:
+ return false;
+ }
+}
+
static inline bool dc_is_dvi_single_link_signal(enum signal_type signal)
{
return (signal == SIGNAL_TYPE_DVI_SINGLE_LINK);
@@ -132,7 +145,6 @@ static inline bool dc_is_audio_capable_signal(enum signal_type signal)
{
return (signal == SIGNAL_TYPE_DISPLAY_PORT ||
signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
- signal == SIGNAL_TYPE_VIRTUAL ||
dc_is_hdmi_signal(signal));
}
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index ff8e5708735d..a71df052cf25 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -37,6 +37,104 @@
static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];
+// Hardcoded table that depends on setup_x_points_distribution and sdr_level=80
+// If x points are changed, then PQ Y points will be misaligned and a new
+// table would need to be generated. Or use old method that calls compute_pq.
+// The last point is above PQ formula range (0-125 in normalized FP16)
+// The value for the last point (128) is such that interpolation from
+// 120 to 128 will give 1.0 for X = 125.0
+// first couple points are 0 - HW LUT is mirrored around zero, so making first
+// segment 0 to 0 will effectively clip it, and these are very low PQ codes
+// min nonzero value below (216825) is a little under 12-bit PQ code 1.
+static const unsigned long long pq_divider = 1000000000;
+static const unsigned long long pq_numerator[MAX_HW_POINTS + 1] = {
+ 0, 0, 0, 0, 216825, 222815,
+ 228691, 234460, 240128, 245702, 251187, 256587,
+ 261908, 267152, 272324, 277427, 282465, 292353,
+ 302011, 311456, 320704, 329768, 338661, 347394,
+ 355975, 364415, 372721, 380900, 388959, 396903,
+ 404739, 412471, 420104, 435089, 449727, 464042,
+ 478060, 491800, 505281, 518520, 531529, 544324,
+ 556916, 569316, 581533, 593576, 605454, 617175,
+ 628745, 651459, 673643, 695337, 716578, 737395,
+ 757817, 777869, 797572, 816947, 836012, 854782,
+ 873274, 891500, 909474, 927207, 944709, 979061,
+ 1012601, 1045391, 1077485, 1108931, 1139770, 1170042,
+ 1199778, 1229011, 1257767, 1286071, 1313948, 1341416,
+ 1368497, 1395207, 1421563, 1473272, 1523733, 1573041,
+ 1621279, 1668520, 1714828, 1760262, 1804874, 1848710,
+ 1891814, 1934223, 1975973, 2017096, 2057622, 2097578,
+ 2136989, 2214269, 2289629, 2363216, 2435157, 2505564,
+ 2574539, 2642169, 2708536, 2773711, 2837760, 2900742,
+ 2962712, 3023719, 3083810, 3143025, 3201405, 3315797,
+ 3427246, 3535974, 3642181, 3746038, 3847700, 3947305,
+ 4044975, 4140823, 4234949, 4327445, 4418394, 4507872,
+ 4595951, 4682694, 4768161, 4935487, 5098326, 5257022,
+ 5411878, 5563161, 5711107, 5855928, 5997812, 6136929,
+ 6273436, 6407471, 6539163, 6668629, 6795976, 6921304,
+ 7044703, 7286050, 7520623, 7748950, 7971492, 8188655,
+ 8400800, 8608247, 8811286, 9010175, 9205149, 9396421,
+ 9584186, 9768620, 9949889, 10128140, 10303513, 10646126,
+ 10978648, 11301874, 11616501, 11923142, 12222340, 12514578,
+ 12800290, 13079866, 13353659, 13621988, 13885144, 14143394,
+ 14396982, 14646132, 14891052, 15368951, 15832050, 16281537,
+ 16718448, 17143696, 17558086, 17962337, 18357092, 18742927,
+ 19120364, 19489877, 19851894, 20206810, 20554983, 20896745,
+ 21232399, 21886492, 22519276, 23132491, 23727656, 24306104,
+ 24869013, 25417430, 25952292, 26474438, 26984626, 27483542,
+ 27971811, 28450000, 28918632, 29378184, 29829095, 30706591,
+ 31554022, 32373894, 33168387, 33939412, 34688657, 35417620,
+ 36127636, 36819903, 37495502, 38155408, 38800507, 39431607,
+ 40049446, 40654702, 41247996, 42400951, 43512407, 44585892,
+ 45624474, 46630834, 47607339, 48556082, 49478931, 50377558,
+ 51253467, 52108015, 52942436, 53757848, 54555277, 55335659,
+ 56099856, 57582802, 59009766, 60385607, 61714540, 63000246,
+ 64245964, 65454559, 66628579, 67770304, 68881781, 69964856,
+ 71021203, 72052340, 73059655, 74044414, 75007782, 76874537,
+ 78667536, 80393312, 82057522, 83665098, 85220372, 86727167,
+ 88188883, 89608552, 90988895, 92332363, 93641173, 94917336,
+ 96162685, 97378894, 98567496, 100867409, 103072439, 105191162,
+ 107230989, 109198368, 111098951, 112937723, 114719105, 116447036,
+ 118125045, 119756307, 121343688, 122889787, 124396968, 125867388,
+ 127303021, 130077030, 132731849, 135278464, 137726346, 140083726,
+ 142357803, 144554913, 146680670, 148740067, 150737572, 152677197,
+ 154562560, 156396938, 158183306, 159924378, 161622632, 164899602,
+ 168030318, 171028513, 173906008, 176673051, 179338593, 181910502,
+ 184395731, 186800463, 189130216, 191389941, 193584098, 195716719,
+ 197791463, 199811660, 201780351, 205574133, 209192504, 212652233,
+ 215967720, 219151432, 222214238, 225165676, 228014163, 230767172,
+ 233431363, 236012706, 238516569, 240947800, 243310793, 245609544,
+ 247847696, 252155270, 256257056, 260173059, 263920427, 267513978,
+ 270966613, 274289634, 277493001, 280585542, 283575118, 286468763,
+ 289272796, 291992916, 294634284, 297201585, 299699091, 304500003,
+ 309064541, 313416043, 317574484, 321557096, 325378855, 329052864,
+ 332590655, 336002433, 339297275, 342483294, 345567766, 348557252,
+ 351457680, 354274432, 357012407, 362269536, 367260561, 372012143,
+ 376547060, 380884936, 385042798, 389035522, 392876185, 396576344,
+ 400146265, 403595112, 406931099, 410161619, 413293351, 416332348,
+ 419284117, 424945627, 430313203, 435416697, 440281572, 444929733,
+ 449380160, 453649415, 457752035, 461700854, 465507260, 469181407,
+ 472732388, 476168376, 479496748, 482724188, 485856764, 491858986,
+ 497542280, 502939446, 508078420, 512983199, 517674549, 522170569,
+ 526487126, 530638214, 534636233, 538492233, 542216094, 545816693,
+ 549302035, 552679362, 555955249, 562226134, 568156709, 573782374,
+ 579133244, 584235153, 589110430, 593778512, 598256421, 602559154,
+ 606699989, 610690741, 614541971, 618263157, 621862836, 625348729,
+ 628727839, 635190643, 641295921, 647081261, 652578597, 657815287,
+ 662814957, 667598146, 672182825, 676584810, 680818092, 684895111,
+ 688826974, 692623643, 696294085, 699846401, 703287935, 709864782,
+ 716071394, 721947076, 727525176, 732834238, 737898880, 742740485,
+ 747377745, 751827095, 756103063, 760218552, 764185078, 768012958,
+ 771711474, 775289005, 778753144, 785368225, 791604988, 797503949,
+ 803099452, 808420859, 813493471, 818339244, 822977353, 827424644,
+ 831695997, 835804619, 839762285, 843579541, 847265867, 850829815,
+ 854279128, 860861356, 867061719, 872921445, 878475444, 883753534,
+ 888781386, 893581259, 898172578, 902572393, 906795754, 910856010,
+ 914765057, 918533538, 922171018, 925686119, 929086644, 935571664,
+ 941675560, 947439782, 952899395, 958084324, 963020312, 967729662,
+ 972231821, 976543852, 980680801, 984656009, 988481353, 992167459,
+ 995723865, 999159168, 1002565681};
+
// these are helpers for calculations to reduce stack usage
// do not depend on these being preserved across calls
@@ -239,14 +337,19 @@ static void compute_hlg_oetf(struct fixed31_32 in_x, struct fixed31_32 *out_y,
void precompute_pq(void)
{
int i;
+ struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
+
+ for (i = 0; i <= MAX_HW_POINTS; i++)
+ pq_table[i] = dc_fixpt_from_fraction(pq_numerator[i], pq_divider);
+
+ /* below is old method that uses run-time calculation in fixed pt space */
+ /* pow function has problems with arguments too small */
+ /*
struct fixed31_32 x;
const struct hw_x_point *coord_x = coordinates_x + 32;
struct fixed31_32 scaling_factor =
dc_fixpt_from_fraction(80, 10000);
- struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
-
- /* pow function has problems with arguments too small */
for (i = 0; i < 32; i++)
pq_table[i] = dc_fixpt_zero;
@@ -255,6 +358,7 @@ void precompute_pq(void)
compute_pq(x, &pq_table[i]);
++coord_x;
}
+ */
}
/* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */
@@ -778,8 +882,6 @@ static void build_pq(struct pwl_float_data_ex *rgb_regamma,
/* should really not happen? */
if (dc_fixpt_lt(output, dc_fixpt_zero))
output = dc_fixpt_zero;
- else if (dc_fixpt_lt(dc_fixpt_one, output))
- output = dc_fixpt_one;
rgb->r = output;
rgb->g = output;
@@ -957,7 +1059,7 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
struct fixed31_32 min_display;
struct fixed31_32 max_content;
struct fixed31_32 clip = dc_fixpt_one;
- struct fixed31_32 output;
+ struct fixed31_32 output = dc_fixpt_zero;
bool use_eetf = false;
bool is_clipped = false;
struct fixed31_32 sdr_white_level;
@@ -1297,71 +1399,6 @@ static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
}
-/* todo: all these scale_gamma functions are inherently the same but
- * take different structures as params or different format for ramp
- * values. We could probably implement it in a more generic fashion
- */
-static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
- const struct regamma_ramp *ramp,
- struct dividers dividers)
-{
- unsigned short max_driver = 0xFFFF;
- unsigned short max_os = 0xFF00;
- unsigned short scaler = max_os;
- uint32_t i;
- struct pwl_float_data *rgb = pwl_rgb;
- struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1;
-
- i = 0;
- do {
- if (ramp->gamma[i] > max_os ||
- ramp->gamma[i + 256] > max_os ||
- ramp->gamma[i + 512] > max_os) {
- scaler = max_driver;
- break;
- }
- i++;
- } while (i != GAMMA_RGB_256_ENTRIES);
-
- i = 0;
- do {
- rgb->r = dc_fixpt_from_fraction(
- ramp->gamma[i], scaler);
- rgb->g = dc_fixpt_from_fraction(
- ramp->gamma[i + 256], scaler);
- rgb->b = dc_fixpt_from_fraction(
- ramp->gamma[i + 512], scaler);
-
- ++rgb;
- ++i;
- } while (i != GAMMA_RGB_256_ENTRIES);
-
- rgb->r = dc_fixpt_mul(rgb_last->r,
- dividers.divider1);
- rgb->g = dc_fixpt_mul(rgb_last->g,
- dividers.divider1);
- rgb->b = dc_fixpt_mul(rgb_last->b,
- dividers.divider1);
-
- ++rgb;
-
- rgb->r = dc_fixpt_mul(rgb_last->r,
- dividers.divider2);
- rgb->g = dc_fixpt_mul(rgb_last->g,
- dividers.divider2);
- rgb->b = dc_fixpt_mul(rgb_last->b,
- dividers.divider2);
-
- ++rgb;
-
- rgb->r = dc_fixpt_mul(rgb_last->r,
- dividers.divider3);
- rgb->g = dc_fixpt_mul(rgb_last->g,
- dividers.divider3);
- rgb->b = dc_fixpt_mul(rgb_last->b,
- dividers.divider3);
-}
-
/*
* RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here
* Input is evenly distributed in the output color space as specified in
@@ -1561,106 +1598,6 @@ static bool calculate_interpolated_hardware_curve(
return true;
}
-/* The "old" interpolation uses a complicated scheme to build an array of
- * coefficients while also using an array of 0-255 normalized to 0-1
- * Then there's another loop using both of the above + new scaled user ramp
- * and we concatenate them. It also searches for points of interpolation and
- * uses enums for positions.
- *
- * This function uses a different approach:
- * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
- * To find index for hwX , we notice the following:
- * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1
- * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
- *
- * Once the index is known, combined Y is simply:
- * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
- *
- * We should switch to this method in all cases, it's simpler and faster
- * ToDo one day - for now this only applies to ADL regamma to avoid regression
- * for regular use cases (sRGB and PQ)
- */
-static void interpolate_user_regamma(uint32_t hw_points_num,
- struct pwl_float_data *rgb_user,
- bool apply_degamma,
- struct dc_transfer_func_distributed_points *tf_pts)
-{
- uint32_t i;
- uint32_t color = 0;
- int32_t index;
- int32_t index_next;
- struct fixed31_32 *tf_point;
- struct fixed31_32 hw_x;
- struct fixed31_32 norm_factor =
- dc_fixpt_from_int(255);
- struct fixed31_32 norm_x;
- struct fixed31_32 index_f;
- struct fixed31_32 lut1;
- struct fixed31_32 lut2;
- struct fixed31_32 delta_lut;
- struct fixed31_32 delta_index;
- const struct fixed31_32 one = dc_fixpt_from_int(1);
-
- i = 0;
- /* fixed_pt library has problems handling too small values */
- while (i != 32) {
- tf_pts->red[i] = dc_fixpt_zero;
- tf_pts->green[i] = dc_fixpt_zero;
- tf_pts->blue[i] = dc_fixpt_zero;
- ++i;
- }
- while (i <= hw_points_num + 1) {
- for (color = 0; color < 3; color++) {
- if (color == 0)
- tf_point = &tf_pts->red[i];
- else if (color == 1)
- tf_point = &tf_pts->green[i];
- else
- tf_point = &tf_pts->blue[i];
-
- if (apply_degamma) {
- if (color == 0)
- hw_x = coordinates_x[i].regamma_y_red;
- else if (color == 1)
- hw_x = coordinates_x[i].regamma_y_green;
- else
- hw_x = coordinates_x[i].regamma_y_blue;
- } else
- hw_x = coordinates_x[i].x;
-
- if (dc_fixpt_le(one, hw_x))
- hw_x = one;
-
- norm_x = dc_fixpt_mul(norm_factor, hw_x);
- index = dc_fixpt_floor(norm_x);
- if (index < 0 || index > 255)
- continue;
-
- index_f = dc_fixpt_from_int(index);
- index_next = (index == 255) ? index : index + 1;
-
- if (color == 0) {
- lut1 = rgb_user[index].r;
- lut2 = rgb_user[index_next].r;
- } else if (color == 1) {
- lut1 = rgb_user[index].g;
- lut2 = rgb_user[index_next].g;
- } else {
- lut1 = rgb_user[index].b;
- lut2 = rgb_user[index_next].b;
- }
-
- // we have everything now, so interpolate
- delta_lut = dc_fixpt_sub(lut2, lut1);
- delta_index = dc_fixpt_sub(norm_x, index_f);
-
- *tf_point = dc_fixpt_add(lut1,
- dc_fixpt_mul(delta_index, delta_lut));
- }
- ++i;
- }
-}
-
static void build_new_custom_resulted_curve(
uint32_t hw_points_num,
struct dc_transfer_func_distributed_points *tf_pts)
@@ -1682,29 +1619,6 @@ static void build_new_custom_resulted_curve(
}
}
-static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
- uint32_t hw_points_num, struct calculate_buffer *cal_buffer)
-{
- uint32_t i;
-
- struct gamma_coefficients coeff;
- struct pwl_float_data_ex *rgb = rgb_regamma;
- const struct hw_x_point *coord_x = coordinates_x;
-
- build_coefficients(&coeff, TRANSFER_FUNCTION_SRGB);
-
- i = 0;
- while (i != hw_points_num + 1) {
- rgb->r = translate_from_linear_space_ex(
- coord_x->x, &coeff, 0, cal_buffer);
- rgb->g = rgb->r;
- rgb->b = rgb->r;
- ++coord_x;
- ++rgb;
- ++i;
- }
-}
-
static bool map_regamma_hw_to_x_user(
const struct dc_gamma *ramp,
struct pixel_gamma_point *coeff128,
@@ -1753,125 +1667,6 @@ static bool map_regamma_hw_to_x_user(
#define _EXTRA_POINTS 3
-bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
- const struct regamma_lut *regamma,
- struct calculate_buffer *cal_buffer,
- const struct dc_gamma *ramp)
-{
- struct gamma_coefficients coeff;
- const struct hw_x_point *coord_x = coordinates_x;
- uint32_t i = 0;
-
- do {
- coeff.a0[i] = dc_fixpt_from_fraction(
- regamma->coeff.A0[i], 10000000);
- coeff.a1[i] = dc_fixpt_from_fraction(
- regamma->coeff.A1[i], 1000);
- coeff.a2[i] = dc_fixpt_from_fraction(
- regamma->coeff.A2[i], 1000);
- coeff.a3[i] = dc_fixpt_from_fraction(
- regamma->coeff.A3[i], 1000);
- coeff.user_gamma[i] = dc_fixpt_from_fraction(
- regamma->coeff.gamma[i], 1000);
-
- ++i;
- } while (i != 3);
-
- i = 0;
- /* fixed_pt library has problems handling too small values */
- while (i != 32) {
- output_tf->tf_pts.red[i] = dc_fixpt_zero;
- output_tf->tf_pts.green[i] = dc_fixpt_zero;
- output_tf->tf_pts.blue[i] = dc_fixpt_zero;
- ++coord_x;
- ++i;
- }
- while (i != MAX_HW_POINTS + 1) {
- output_tf->tf_pts.red[i] = translate_from_linear_space_ex(
- coord_x->x, &coeff, 0, cal_buffer);
- output_tf->tf_pts.green[i] = translate_from_linear_space_ex(
- coord_x->x, &coeff, 1, cal_buffer);
- output_tf->tf_pts.blue[i] = translate_from_linear_space_ex(
- coord_x->x, &coeff, 2, cal_buffer);
- ++coord_x;
- ++i;
- }
-
- if (ramp && ramp->type == GAMMA_CS_TFM_1D)
- apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
-
- // this function just clamps output to 0-1
- build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts);
- output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
-
- return true;
-}
-
-bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
- const struct regamma_lut *regamma,
- struct calculate_buffer *cal_buffer,
- const struct dc_gamma *ramp)
-{
- struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
- struct dividers dividers;
-
- struct pwl_float_data *rgb_user = NULL;
- struct pwl_float_data_ex *rgb_regamma = NULL;
- bool ret = false;
-
- if (regamma == NULL)
- return false;
-
- output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
-
- rgb_user = kcalloc(GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS,
- sizeof(*rgb_user),
- GFP_KERNEL);
- if (!rgb_user)
- goto rgb_user_alloc_fail;
-
- rgb_regamma = kcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
- sizeof(*rgb_regamma),
- GFP_KERNEL);
- if (!rgb_regamma)
- goto rgb_regamma_alloc_fail;
-
- dividers.divider1 = dc_fixpt_from_fraction(3, 2);
- dividers.divider2 = dc_fixpt_from_int(2);
- dividers.divider3 = dc_fixpt_from_fraction(5, 2);
-
- scale_user_regamma_ramp(rgb_user, &regamma->ramp, dividers);
-
- if (regamma->flags.bits.applyDegamma == 1) {
- apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS, cal_buffer);
- copy_rgb_regamma_to_coordinates_x(coordinates_x,
- MAX_HW_POINTS, rgb_regamma);
- }
-
- interpolate_user_regamma(MAX_HW_POINTS, rgb_user,
- regamma->flags.bits.applyDegamma, tf_pts);
-
- // no custom HDR curves!
- tf_pts->end_exponent = 0;
- tf_pts->x_point_at_y1_red = 1;
- tf_pts->x_point_at_y1_green = 1;
- tf_pts->x_point_at_y1_blue = 1;
-
- if (ramp && ramp->type == GAMMA_CS_TFM_1D)
- apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
-
- // this function just clamps output to 0-1
- build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts);
-
- ret = true;
-
- kfree(rgb_regamma);
-rgb_regamma_alloc_fail:
- kfree(rgb_user);
-rgb_user_alloc_fail:
- return ret;
-}
-
bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
struct dc_transfer_func *input_tf,
const struct dc_gamma *ramp, bool map_user_ramp)
@@ -2192,7 +1987,8 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
cal_buffer);
if (ret) {
- do_clamping = !(output_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
+ do_clamping = !(output_tf->tf == TRANSFER_FUNCTION_PQ) &&
+ !(output_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
fs_params != NULL && fs_params->skip_tm == 0);
map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index ee5c466613de..97e55278940e 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -115,15 +115,4 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
struct dc_transfer_func *output_tf,
const struct dc_gamma *ramp, bool mapUserRamp);
-bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
- const struct regamma_lut *regamma,
- struct calculate_buffer *cal_buffer,
- const struct dc_gamma *ramp);
-
-bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
- const struct regamma_lut *regamma,
- struct calculate_buffer *cal_buffer,
- const struct dc_gamma *ramp);
-
-
#endif /* COLOR_MOD_COLOR_GAMMA_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index ef3a67409021..ce421bcddcb0 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -48,6 +48,7 @@
#define VSYNCS_BETWEEN_FLIP_THRESHOLD 2
#define FREESYNC_CONSEC_FLIP_AFTER_VSYNC 5
#define FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US 500
+#define MICRO_HZ_TO_HZ(x) (x / 1000000)
struct core_freesync {
struct mod_freesync public;
@@ -81,6 +82,7 @@ fail_alloc_context:
void mod_freesync_destroy(struct mod_freesync *mod_freesync)
{
struct core_freesync *core_freesync = NULL;
+
if (mod_freesync == NULL)
return;
core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync);
@@ -120,6 +122,17 @@ static unsigned int calc_duration_in_us_from_v_total(
return duration_in_us;
}
+static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
+{
+ unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
+
+ if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
+ max_hw_v_total -= stream->timing.v_front_porch + 1;
+ }
+
+ return max_hw_v_total;
+}
+
unsigned int mod_freesync_calc_v_total_from_refresh(
const struct dc_stream_state *stream,
unsigned int refresh_in_uhz)
@@ -127,13 +140,34 @@ unsigned int mod_freesync_calc_v_total_from_refresh(
unsigned int v_total;
unsigned int frame_duration_in_ns;
+ if (refresh_in_uhz == 0)
+ return stream->timing.v_total;
+
frame_duration_in_ns =
((unsigned int)(div64_u64((1000000000ULL * 1000000),
refresh_in_uhz)));
- v_total = div64_u64(div64_u64(((unsigned long long)(
- frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
- stream->timing.h_total), 1000000);
+ if (refresh_in_uhz <= stream->timing.min_refresh_in_uhz) {
+ /* When the target refresh rate is the minimum panel refresh rate,
+ * round down the vtotal value to avoid stretching vblank over
+ * panel's vtotal boundary.
+ */
+ v_total = div64_u64(div64_u64(((unsigned long long)(
+ frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
+ stream->timing.h_total), 1000000);
+ } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) {
+ /* When the target refresh rate is the maximum panel refresh rate
+ * round up the vtotal value to prevent off-by-one error causing
+ * v_total_min to be below the panel's lower bound
+ */
+ v_total = div64_u64(div64_u64(((unsigned long long)(
+ frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
+ stream->timing.h_total) + (1000000 - 1), 1000000);
+ } else {
+ v_total = div64_u64(div64_u64(((unsigned long long)(
+ frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
+ stream->timing.h_total) + 500000, 1000000);
+ }
/* v_total cannot be less than nominal */
if (v_total < stream->timing.v_total) {
@@ -157,13 +191,13 @@ static unsigned int calc_v_total_from_duration(
if (duration_in_us > vrr->max_duration_in_us)
duration_in_us = vrr->max_duration_in_us;
- if (dc_is_hdmi_signal(stream->signal)) {
+ if (dc_is_hdmi_signal(stream->signal)) { // change for HDMI to comply with spec
uint32_t h_total_up_scaled;
h_total_up_scaled = stream->timing.h_total * 10000;
v_total = div_u64((unsigned long long)duration_in_us
* stream->timing.pix_clk_100hz + (h_total_up_scaled - 1),
- h_total_up_scaled);
+ h_total_up_scaled); //ceiling for MMax and MMin for MVRR
} else {
v_total = div64_u64(div64_u64(((unsigned long long)(
duration_in_us) * (stream->timing.pix_clk_100hz / 10)),
@@ -192,8 +226,8 @@ static void update_v_total_for_static_ramp(
unsigned int target_duration_in_us =
calc_duration_in_us_from_refresh_in_uhz(
in_out_vrr->fixed.target_refresh_in_uhz);
- bool ramp_direction_is_up = (current_duration_in_us >
- target_duration_in_us) ? true : false;
+ bool ramp_direction_is_up = current_duration_in_us >
+ target_duration_in_us;
/* Calculate ratio between new and current frame duration with 3 digit */
unsigned int frame_duration_ratio = div64_u64(1000000,
@@ -278,9 +312,8 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
}
} else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) {
/* Enter Below the Range */
- if (!in_out_vrr->btr.btr_active) {
+ if (!in_out_vrr->btr.btr_active)
in_out_vrr->btr.btr_active = true;
- }
}
/* BTR set to "not active" so disengage */
@@ -527,43 +560,6 @@ static bool vrr_settings_require_update(struct core_freesync *core_freesync,
return false;
}
-bool mod_freesync_get_vmin_vmax(struct mod_freesync *mod_freesync,
- const struct dc_stream_state *stream,
- unsigned int *vmin,
- unsigned int *vmax)
-{
- *vmin = stream->adjust.v_total_min;
- *vmax = stream->adjust.v_total_max;
-
- return true;
-}
-
-bool mod_freesync_get_v_position(struct mod_freesync *mod_freesync,
- struct dc_stream_state *stream,
- unsigned int *nom_v_pos,
- unsigned int *v_pos)
-{
- struct core_freesync *core_freesync = NULL;
- struct crtc_position position;
-
- if (mod_freesync == NULL)
- return false;
-
- core_freesync = MOD_FREESYNC_TO_CORE(mod_freesync);
-
- if (dc_stream_get_crtc_position(core_freesync->dc, &stream, 1,
- &position.vertical_count,
- &position.nominal_vcount)) {
-
- *nom_v_pos = position.nominal_vcount;
- *v_pos = position.vertical_count;
-
- return true;
- }
-
- return false;
-}
-
static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr,
struct dc_info_packet *infopacket,
bool freesync_on_desktop)
@@ -626,7 +622,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
unsigned int max_refresh;
unsigned int fixed_refresh;
unsigned int min_programmed;
- unsigned int max_programmed;
/* PB1 = 0x1A (24bit AMD IEEE OUI (0x00001A) - Byte 0) */
infopacket->sb[1] = 0x1A;
@@ -672,21 +667,17 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
(vrr->state == VRR_STATE_INACTIVE) ? min_refresh :
max_refresh; // Non-fs case, program nominal range
- max_programmed = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? fixed_refresh :
- (vrr->state == VRR_STATE_ACTIVE_VARIABLE) ? max_refresh :
- max_refresh;// Non-fs case, program nominal range
-
/* PB7 = FreeSync Minimum refresh rate (Hz) */
infopacket->sb[7] = min_programmed & 0xFF;
/* PB8 = FreeSync Maximum refresh rate (Hz) */
- infopacket->sb[8] = max_programmed & 0xFF;
+ infopacket->sb[8] = max_refresh & 0xFF;
/* PB11 : MSB FreeSync Minimum refresh rate [Hz] - bits 9:8 */
infopacket->sb[11] = (min_programmed >> 8) & 0x03;
/* PB12 : MSB FreeSync Maximum refresh rate [Hz] - bits 9:8 */
- infopacket->sb[12] = (max_programmed >> 8) & 0x03;
+ infopacket->sb[12] = (max_refresh >> 8) & 0x03;
/* PB16 : Reserved bits 7:1, FixedRate bit 0 */
infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0;
@@ -698,10 +689,12 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
if (app_tf != TRANSFER_FUNC_UNKNOWN) {
infopacket->valid = true;
- if (app_tf != TRANSFER_FUNC_PQ2084) {
+ if (app_tf == TRANSFER_FUNC_PQ2084)
+ infopacket->sb[9] |= 0x20; // PB9 = [Bit 5 = PQ EOTF Active]
+ else {
infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active]
if (app_tf == TRANSFER_FUNC_GAMMA_22)
- infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active]
+ infopacket->sb[9] |= 0x04; // PB9 = [Bit 2 = Gamma 2.2 EOTF Active]
}
}
}
@@ -1005,7 +998,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
if (stream->ctx->dc->caps.max_v_total != 0 && stream->timing.h_total != 0) {
min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
- (stream->timing.h_total * stream->ctx->dc->caps.max_v_total));
+ (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
}
/* Limit minimum refresh rate to what can be supported by hardware */
min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ?
@@ -1060,7 +1053,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
in_out_vrr->fixed_refresh_in_uhz = 0;
refresh_range = div_u64(in_out_vrr->max_refresh_in_uhz + 500000, 1000000) -
-+ div_u64(in_out_vrr->min_refresh_in_uhz + 500000, 1000000);
+ div_u64(in_out_vrr->min_refresh_in_uhz + 500000, 1000000);
in_out_vrr->supported = true;
}
@@ -1129,6 +1122,8 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
in_out_vrr->adjust.v_total_min = stream->timing.v_total;
in_out_vrr->adjust.v_total_max = stream->timing.v_total;
}
+
+ in_out_vrr->adjust.allow_otg_v_count_halt = (in_config->state == VRR_STATE_ACTIVE_FIXED) ? true : false;
}
void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync,
@@ -1267,28 +1262,6 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync,
}
}
-void mod_freesync_get_settings(struct mod_freesync *mod_freesync,
- const struct mod_vrr_params *vrr,
- unsigned int *v_total_min, unsigned int *v_total_max,
- unsigned int *event_triggers,
- unsigned int *window_min, unsigned int *window_max,
- unsigned int *lfc_mid_point_in_us,
- unsigned int *inserted_frames,
- unsigned int *inserted_duration_in_us)
-{
- if (mod_freesync == NULL)
- return;
-
- if (vrr->supported) {
- *v_total_min = vrr->adjust.v_total_min;
- *v_total_max = vrr->adjust.v_total_max;
- *event_triggers = 0;
- *lfc_mid_point_in_us = vrr->btr.mid_point_in_us;
- *inserted_frames = vrr->btr.frames_to_insert;
- *inserted_duration_in_us = vrr->btr.inserted_duration_in_us;
- }
-}
-
unsigned long long mod_freesync_calc_nominal_field_rate(
const struct dc_stream_state *stream)
{
@@ -1304,85 +1277,7 @@ unsigned long long mod_freesync_calc_nominal_field_rate(
return nominal_field_rate_in_uhz;
}
-unsigned long long mod_freesync_calc_field_rate_from_timing(
- unsigned int vtotal, unsigned int htotal, unsigned int pix_clk)
-{
- unsigned long long field_rate_in_uhz = 0;
- unsigned int total = htotal * vtotal;
-
- /* Calculate nominal field rate for stream, rounded up to nearest integer */
- field_rate_in_uhz = pix_clk;
- field_rate_in_uhz *= 1000000ULL;
-
- field_rate_in_uhz = div_u64(field_rate_in_uhz, total);
-
- return field_rate_in_uhz;
-}
-
bool mod_freesync_get_freesync_enabled(struct mod_vrr_params *pVrr)
{
return (pVrr->state != VRR_STATE_UNSUPPORTED) && (pVrr->state != VRR_STATE_DISABLED);
}
-
-bool mod_freesync_is_valid_range(uint32_t min_refresh_cap_in_uhz,
- uint32_t max_refresh_cap_in_uhz,
- uint32_t nominal_field_rate_in_uhz)
-{
-
- /* Typically nominal refresh calculated can have some fractional part.
- * Allow for some rounding error of actual video timing by taking floor
- * of caps and request. Round the nominal refresh rate.
- *
- * Dividing will convert everything to units in Hz although input
- * variable name is in uHz!
- *
- * Also note, this takes care of rounding error on the nominal refresh
- * so by rounding error we only expect it to be off by a small amount,
- * such as < 0.1 Hz. i.e. 143.9xxx or 144.1xxx.
- *
- * Example 1. Caps Min = 40 Hz, Max = 144 Hz
- * Request Min = 40 Hz, Max = 144 Hz
- * Nominal = 143.5x Hz rounded to 144 Hz
- * This function should allow this as valid request
- *
- * Example 2. Caps Min = 40 Hz, Max = 144 Hz
- * Request Min = 40 Hz, Max = 144 Hz
- * Nominal = 144.4x Hz rounded to 144 Hz
- * This function should allow this as valid request
- *
- * Example 3. Caps Min = 40 Hz, Max = 144 Hz
- * Request Min = 40 Hz, Max = 144 Hz
- * Nominal = 120.xx Hz rounded to 120 Hz
- * This function should return NOT valid since the requested
- * max is greater than current timing's nominal
- *
- * Example 4. Caps Min = 40 Hz, Max = 120 Hz
- * Request Min = 40 Hz, Max = 120 Hz
- * Nominal = 144.xx Hz rounded to 144 Hz
- * This function should return NOT valid since the nominal
- * is greater than the capability's max refresh
- */
- nominal_field_rate_in_uhz =
- div_u64(nominal_field_rate_in_uhz + 500000, 1000000);
- min_refresh_cap_in_uhz /= 1000000;
- max_refresh_cap_in_uhz /= 1000000;
-
- /* Check nominal is within range */
- if (nominal_field_rate_in_uhz > max_refresh_cap_in_uhz ||
- nominal_field_rate_in_uhz < min_refresh_cap_in_uhz)
- return false;
-
- /* If nominal is less than max, limit the max allowed refresh rate */
- if (nominal_field_rate_in_uhz < max_refresh_cap_in_uhz)
- max_refresh_cap_in_uhz = nominal_field_rate_in_uhz;
-
- /* Check min is within range */
- if (min_refresh_cap_in_uhz > max_refresh_cap_in_uhz)
- return false;
-
- /* For variable range, check for at least 10 Hz range */
- if (nominal_field_rate_in_uhz - min_refresh_cap_in_uhz < 10)
- return false;
-
- return true;
-}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
index 5e01c6e24cbc..c760216a6240 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c
@@ -29,6 +29,7 @@ static void push_error_status(struct mod_hdcp *hdcp,
enum mod_hdcp_status status)
{
struct mod_hdcp_trace *trace = &hdcp->connection.trace;
+ const uint8_t retry_limit = hdcp->connection.link.adjust.retry_limit;
if (trace->error_count < MAX_NUM_OF_ERROR_TRACE) {
trace->errors[trace->error_count].status = status;
@@ -39,11 +40,11 @@ static void push_error_status(struct mod_hdcp *hdcp,
if (is_hdcp1(hdcp)) {
hdcp->connection.hdcp1_retry_count++;
- if (hdcp->connection.hdcp1_retry_count == MAX_NUM_OF_ATTEMPTS)
+ if (hdcp->connection.hdcp1_retry_count == retry_limit)
hdcp->connection.link.adjust.hdcp1.disable = 1;
} else if (is_hdcp2(hdcp)) {
hdcp->connection.hdcp2_retry_count++;
- if (hdcp->connection.hdcp2_retry_count == MAX_NUM_OF_ATTEMPTS)
+ if (hdcp->connection.hdcp2_retry_count == retry_limit)
hdcp->connection.link.adjust.hdcp2.disable = 1;
}
}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
index 55c7d873175f..a37634942b07 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h
@@ -386,6 +386,7 @@ enum mod_hdcp_status mod_hdcp_write_repeater_auth_ack(struct mod_hdcp *hdcp);
enum mod_hdcp_status mod_hdcp_write_stream_manage(struct mod_hdcp *hdcp);
enum mod_hdcp_status mod_hdcp_write_content_type(struct mod_hdcp *hdcp);
enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp);
+enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp);
/* hdcp version helpers */
static inline uint8_t is_dp_hdcp(struct mod_hdcp *hdcp)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
index 1ddb4f5eac8e..8bc377560787 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c
@@ -63,6 +63,7 @@ static inline enum mod_hdcp_status check_hdcp_capable_dp(struct mod_hdcp *hdcp)
static inline enum mod_hdcp_status check_r0p_available_dp(struct mod_hdcp *hdcp)
{
enum mod_hdcp_status status;
+
if (is_dp_hdcp(hdcp)) {
status = (hdcp->auth.msg.hdcp1.bstatus &
DP_BSTATUS_R0_PRIME_READY) ?
@@ -131,9 +132,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
{
/* Avoid device count == 0 to do authentication */
- if (0 == get_device_count(hdcp)) {
+ if (get_device_count(hdcp) == 0)
return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
- }
/* Some MST display may choose to report the internal panel as an HDCP RX.
* To update this condition with 1(because the immediate repeater's internal
@@ -432,18 +432,18 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp,
goto out;
}
- if (status == MOD_HDCP_STATUS_SUCCESS)
- mod_hdcp_execute_and_set(mod_hdcp_read_bstatus,
- &input->bstatus_read, &status,
- hdcp, "bstatus_read");
- if (status == MOD_HDCP_STATUS_SUCCESS)
- mod_hdcp_execute_and_set(check_link_integrity_dp,
- &input->link_integrity_check, &status,
- hdcp, "link_integrity_check");
- if (status == MOD_HDCP_STATUS_SUCCESS)
- mod_hdcp_execute_and_set(check_no_reauthentication_request_dp,
- &input->reauth_request_check, &status,
- hdcp, "reauth_request_check");
+ mod_hdcp_execute_and_set(mod_hdcp_read_bstatus,
+ &input->bstatus_read, &status,
+ hdcp, "bstatus_read");
+
+ mod_hdcp_execute_and_set(check_link_integrity_dp,
+ &input->link_integrity_check, &status,
+ hdcp, "link_integrity_check");
+
+ mod_hdcp_execute_and_set(check_no_reauthentication_request_dp,
+ &input->reauth_request_check, &status,
+ hdcp, "reauth_request_check");
+
out:
return status;
}
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
index 91c22b96ebde..bb8ae80b37f8 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c
@@ -27,6 +27,11 @@
#include "hdcp.h"
+static inline uint16_t get_hdmi_rxstatus_msg_size(const uint8_t rxstatus[2])
+{
+ return HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(rxstatus[1]) << 8 | rxstatus[0];
+}
+
static inline enum mod_hdcp_status check_receiver_id_list_ready(struct mod_hdcp *hdcp)
{
uint8_t is_ready = 0;
@@ -35,8 +40,7 @@ static inline enum mod_hdcp_status check_receiver_id_list_ready(struct mod_hdcp
is_ready = HDCP_2_2_DP_RXSTATUS_READY(hdcp->auth.msg.hdcp2.rxstatus_dp) ? 1 : 0;
else
is_ready = (HDCP_2_2_HDMI_RXSTATUS_READY(hdcp->auth.msg.hdcp2.rxstatus[1]) &&
- (HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
- hdcp->auth.msg.hdcp2.rxstatus[0])) ? 1 : 0;
+ get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus) != 0) ? 1 : 0;
return is_ready ? MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY;
}
@@ -84,15 +88,13 @@ static inline enum mod_hdcp_status check_link_integrity_failure_dp(
static enum mod_hdcp_status check_ake_cert_available(struct mod_hdcp *hdcp)
{
enum mod_hdcp_status status;
- uint16_t size;
if (is_dp_hdcp(hdcp)) {
status = MOD_HDCP_STATUS_SUCCESS;
} else {
status = mod_hdcp_read_rxstatus(hdcp);
if (status == MOD_HDCP_STATUS_SUCCESS) {
- size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
- hdcp->auth.msg.hdcp2.rxstatus[0];
+ const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus);
status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_cert)) ?
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING;
@@ -104,7 +106,6 @@ static enum mod_hdcp_status check_ake_cert_available(struct mod_hdcp *hdcp)
static enum mod_hdcp_status check_h_prime_available(struct mod_hdcp *hdcp)
{
enum mod_hdcp_status status;
- uint8_t size;
status = mod_hdcp_read_rxstatus(hdcp);
if (status != MOD_HDCP_STATUS_SUCCESS)
@@ -115,8 +116,7 @@ static enum mod_hdcp_status check_h_prime_available(struct mod_hdcp *hdcp)
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING;
} else {
- size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
- hdcp->auth.msg.hdcp2.rxstatus[0];
+ const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus);
status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)) ?
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING;
@@ -128,7 +128,6 @@ out:
static enum mod_hdcp_status check_pairing_info_available(struct mod_hdcp *hdcp)
{
enum mod_hdcp_status status;
- uint8_t size;
status = mod_hdcp_read_rxstatus(hdcp);
if (status != MOD_HDCP_STATUS_SUCCESS)
@@ -139,8 +138,7 @@ static enum mod_hdcp_status check_pairing_info_available(struct mod_hdcp *hdcp)
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING;
} else {
- size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
- hdcp->auth.msg.hdcp2.rxstatus[0];
+ const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus);
status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info)) ?
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING;
@@ -151,8 +149,7 @@ out:
static enum mod_hdcp_status poll_l_prime_available(struct mod_hdcp *hdcp)
{
- enum mod_hdcp_status status;
- uint8_t size;
+ enum mod_hdcp_status status = MOD_HDCP_STATUS_FAILURE;
uint16_t max_wait = 20; // units of ms
uint16_t num_polls = 5;
uint16_t wait_time = max_wait / num_polls;
@@ -167,8 +164,7 @@ static enum mod_hdcp_status poll_l_prime_available(struct mod_hdcp *hdcp)
if (status != MOD_HDCP_STATUS_SUCCESS)
break;
- size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
- hdcp->auth.msg.hdcp2.rxstatus[0];
+ const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus);
status = (size == sizeof(hdcp->auth.msg.hdcp2.lc_l_prime)) ?
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING;
@@ -181,7 +177,6 @@ static enum mod_hdcp_status poll_l_prime_available(struct mod_hdcp *hdcp)
static enum mod_hdcp_status check_stream_ready_available(struct mod_hdcp *hdcp)
{
enum mod_hdcp_status status;
- uint8_t size;
if (is_dp_hdcp(hdcp)) {
status = MOD_HDCP_STATUS_INVALID_OPERATION;
@@ -189,8 +184,7 @@ static enum mod_hdcp_status check_stream_ready_available(struct mod_hdcp *hdcp)
status = mod_hdcp_read_rxstatus(hdcp);
if (status != MOD_HDCP_STATUS_SUCCESS)
goto out;
- size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
- hdcp->auth.msg.hdcp2.rxstatus[0];
+ const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus);
status = (size == sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready)) ?
MOD_HDCP_STATUS_SUCCESS :
MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING;
@@ -208,9 +202,8 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp)
static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp)
{
/* Avoid device count == 0 to do authentication */
- if (0 == get_device_count(hdcp)) {
+ if (get_device_count(hdcp) == 0)
return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE;
- }
/* Some MST display may choose to report the internal panel as an HDCP RX. */
/* To update this condition with 1(because the immediate repeater's internal */
@@ -250,8 +243,7 @@ static uint8_t process_rxstatus(struct mod_hdcp *hdcp,
sizeof(hdcp->auth.msg.hdcp2.rx_id_list);
else
hdcp->auth.msg.hdcp2.rx_id_list_size =
- HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 |
- hdcp->auth.msg.hdcp2.rxstatus[0];
+ get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus);
}
out:
return (*status == MOD_HDCP_STATUS_SUCCESS);
@@ -460,21 +452,12 @@ out:
return status;
}
-static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp,
+static enum mod_hdcp_status locality_check_sw(struct mod_hdcp *hdcp,
struct mod_hdcp_event_context *event_ctx,
struct mod_hdcp_transition_input_hdcp2 *input)
{
enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
- if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
- event_ctx->unexpected_event = 1;
- goto out;
- }
-
- if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init,
- &input->lc_init_prepare, &status,
- hdcp, "lc_init_prepare"))
- goto out;
if (!mod_hdcp_execute_and_set(mod_hdcp_write_lc_init,
&input->lc_init_write, &status,
hdcp, "lc_init_write"))
@@ -490,6 +473,48 @@ static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp,
&input->l_prime_read, &status,
hdcp, "l_prime_read"))
goto out;
+out:
+ return status;
+}
+
+static enum mod_hdcp_status locality_check_fw(struct mod_hdcp *hdcp,
+ struct mod_hdcp_event_context *event_ctx,
+ struct mod_hdcp_transition_input_hdcp2 *input)
+{
+ enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+
+ if (!mod_hdcp_execute_and_set(mod_hdcp_write_poll_read_lc_fw,
+ &input->l_prime_read, &status,
+ hdcp, "l_prime_read"))
+ goto out;
+
+out:
+ return status;
+}
+
+static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp,
+ struct mod_hdcp_event_context *event_ctx,
+ struct mod_hdcp_transition_input_hdcp2 *input)
+{
+ enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+ const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c
+ && hdcp->config.ddc.funcs.atomic_write_poll_read_aux
+ && !hdcp->connection.link.adjust.hdcp2.force_sw_locality_check;
+
+ if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) {
+ event_ctx->unexpected_event = 1;
+ goto out;
+ }
+
+ if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init,
+ &input->lc_init_prepare, &status,
+ hdcp, "lc_init_prepare"))
+ goto out;
+
+ status = (use_fw ? locality_check_fw : locality_check_sw)(hdcp, event_ctx, input);
+ if (status != MOD_HDCP_STATUS_SUCCESS)
+ goto out;
+
if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_l_prime,
&input->l_prime_validation, &status,
hdcp, "l_prime_validation"))
@@ -689,9 +714,8 @@ static enum mod_hdcp_status validate_stream_ready(struct mod_hdcp *hdcp,
if (is_hdmi_dvi_sl_hdcp(hdcp)) {
if (!process_rxstatus(hdcp, event_ctx, input, &status))
goto out;
- if (event_ctx->rx_id_list_ready) {
+ if (event_ctx->rx_id_list_ready)
goto out;
- }
}
if (is_hdmi_dvi_sl_hdcp(hdcp))
if (!mod_hdcp_execute_and_set(check_stream_ready_available,
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
index c5f6c11de7e5..89ffb89e1932 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c
@@ -184,17 +184,28 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp,
callback_in_ms(0, output);
set_state_id(hdcp, output, H2_A2_LOCALITY_CHECK);
break;
- case H2_A2_LOCALITY_CHECK:
+ case H2_A2_LOCALITY_CHECK: {
+ const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c
+ && !adjust->hdcp2.force_sw_locality_check;
+
+ /*
+ * 1A-05: consider disconnection after LC init a failure
+ * 1A-13-1: consider invalid l' a failure
+ * 1A-13-2: consider l' timeout a failure
+ */
if (hdcp->state.stay_count > 10 ||
input->lc_init_prepare != PASS ||
- input->lc_init_write != PASS ||
- input->l_prime_available_poll != PASS ||
- input->l_prime_read != PASS) {
- /*
- * 1A-05: consider disconnection after LC init a failure
- * 1A-13-1: consider invalid l' a failure
- * 1A-13-2: consider l' timeout a failure
- */
+ (!use_fw && input->lc_init_write != PASS) ||
+ (!use_fw && input->l_prime_available_poll != PASS)) {
+ fail_and_restart_in_ms(0, &status, output);
+ break;
+ } else if (input->l_prime_read != PASS) {
+ if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) {
+ adjust->hdcp2.force_sw_locality_check = true;
+ callback_in_ms(0, output);
+ break;
+ }
+
fail_and_restart_in_ms(0, &status, output);
break;
} else if (input->l_prime_validation != PASS) {
@@ -205,6 +216,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp,
callback_in_ms(0, output);
set_state_id(hdcp, output, H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER);
break;
+ }
case H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
if (input->eks_prepare != PASS ||
input->eks_write != PASS) {
@@ -498,14 +510,25 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp,
callback_in_ms(0, output);
set_state_id(hdcp, output, D2_A2_LOCALITY_CHECK);
break;
- case D2_A2_LOCALITY_CHECK:
+ case D2_A2_LOCALITY_CHECK: {
+ const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_aux
+ && !adjust->hdcp2.force_sw_locality_check;
+
if (hdcp->state.stay_count > 10 ||
input->lc_init_prepare != PASS ||
- input->lc_init_write != PASS ||
- input->l_prime_read != PASS) {
+ (!use_fw && input->lc_init_write != PASS)) {
/* 1A-12: consider invalid l' a failure */
fail_and_restart_in_ms(0, &status, output);
break;
+ } else if (input->l_prime_read != PASS) {
+ if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) {
+ adjust->hdcp2.force_sw_locality_check = true;
+ callback_in_ms(0, output);
+ break;
+ }
+
+ fail_and_restart_in_ms(0, &status, output);
+ break;
} else if (input->l_prime_validation != PASS) {
callback_in_ms(0, output);
increment_stay_counter(hdcp);
@@ -514,6 +537,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp,
callback_in_ms(0, output);
set_state_id(hdcp, output, D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER);
break;
+ }
case D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER:
if (input->eks_prepare != PASS ||
input->eks_write != PASS) {
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
index f7b5583ee609..2e6408579194 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c
@@ -25,7 +25,9 @@
#include "hdcp.h"
+#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
#define HDCP_I2C_ADDR 0x3a /* 0x74 >> 1*/
#define KSV_READ_SIZE 0xf /* 0x6803b - 0x6802c */
#define HDCP_MAX_AUX_TRANSACTION_SIZE 16
@@ -156,7 +158,15 @@ static enum mod_hdcp_status read(struct mod_hdcp *hdcp,
uint32_t cur_size = 0;
uint32_t data_offset = 0;
+ if (msg_id == MOD_HDCP_MESSAGE_ID_INVALID ||
+ msg_id >= MOD_HDCP_MESSAGE_ID_MAX)
+ return MOD_HDCP_STATUS_DDC_FAILURE;
+
if (is_dp_hdcp(hdcp)) {
+ int num_dpcd_addrs = ARRAY_SIZE(hdcp_dpcd_addrs);
+ if (msg_id >= num_dpcd_addrs)
+ return MOD_HDCP_STATUS_DDC_FAILURE;
+
while (buf_len > 0) {
cur_size = MIN(buf_len, HDCP_MAX_AUX_TRANSACTION_SIZE);
success = hdcp->config.ddc.funcs.read_dpcd(hdcp->config.ddc.handle,
@@ -171,6 +181,10 @@ static enum mod_hdcp_status read(struct mod_hdcp *hdcp,
data_offset += cur_size;
}
} else {
+ int num_i2c_offsets = ARRAY_SIZE(hdcp_i2c_offsets);
+ if (msg_id >= num_i2c_offsets)
+ return MOD_HDCP_STATUS_DDC_FAILURE;
+
success = hdcp->config.ddc.funcs.read_i2c(
hdcp->config.ddc.handle,
HDCP_I2C_ADDR,
@@ -215,7 +229,15 @@ static enum mod_hdcp_status write(struct mod_hdcp *hdcp,
uint32_t cur_size = 0;
uint32_t data_offset = 0;
+ if (msg_id == MOD_HDCP_MESSAGE_ID_INVALID ||
+ msg_id >= MOD_HDCP_MESSAGE_ID_MAX)
+ return MOD_HDCP_STATUS_DDC_FAILURE;
+
if (is_dp_hdcp(hdcp)) {
+ int num_dpcd_addrs = ARRAY_SIZE(hdcp_dpcd_addrs);
+ if (msg_id >= num_dpcd_addrs)
+ return MOD_HDCP_STATUS_DDC_FAILURE;
+
while (buf_len > 0) {
cur_size = MIN(buf_len, HDCP_MAX_AUX_TRANSACTION_SIZE);
success = hdcp->config.ddc.funcs.write_dpcd(
@@ -231,6 +253,10 @@ static enum mod_hdcp_status write(struct mod_hdcp *hdcp,
data_offset += cur_size;
}
} else {
+ int num_i2c_offsets = ARRAY_SIZE(hdcp_i2c_offsets);
+ if (msg_id >= num_i2c_offsets)
+ return MOD_HDCP_STATUS_DDC_FAILURE;
+
hdcp->buf[0] = hdcp_i2c_offsets[msg_id];
memmove(&hdcp->buf[1], buf, buf_len);
success = hdcp->config.ddc.funcs.write_i2c(
@@ -662,3 +688,76 @@ enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp)
return MOD_HDCP_STATUS_INVALID_OPERATION;
}
+
+static bool write_stall_read_lc_fw_aux(struct mod_hdcp *hdcp)
+{
+ struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2;
+
+ struct mod_hdcp_atomic_op_aux write = {
+ hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT],
+ hdcp2->lc_init + 1,
+ sizeof(hdcp2->lc_init) - 1,
+ };
+ struct mod_hdcp_atomic_op_aux stall = { 0, NULL, 0, };
+ struct mod_hdcp_atomic_op_aux read = {
+ hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME],
+ hdcp2->lc_l_prime + 1,
+ sizeof(hdcp2->lc_l_prime) - 1,
+ };
+
+ hdcp2->lc_l_prime[0] = HDCP_2_2_LC_SEND_LPRIME;
+
+ return hdcp->config.ddc.funcs.atomic_write_poll_read_aux(
+ hdcp->config.ddc.handle,
+ &write,
+ &stall,
+ &read,
+ 16 * 1000,
+ 0
+ );
+}
+
+static bool write_poll_read_lc_fw_i2c(struct mod_hdcp *hdcp)
+{
+ struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2;
+ uint8_t expected_rxstatus[2] = { sizeof(hdcp2->lc_l_prime) };
+
+ hdcp->buf[0] = hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT];
+ memmove(&hdcp->buf[1], hdcp2->lc_init, sizeof(hdcp2->lc_init));
+
+ struct mod_hdcp_atomic_op_i2c write = {
+ HDCP_I2C_ADDR,
+ 0,
+ hdcp->buf,
+ sizeof(hdcp2->lc_init) + 1,
+ };
+ struct mod_hdcp_atomic_op_i2c poll = {
+ HDCP_I2C_ADDR,
+ hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_RXSTATUS],
+ expected_rxstatus,
+ sizeof(expected_rxstatus),
+ };
+ struct mod_hdcp_atomic_op_i2c read = {
+ HDCP_I2C_ADDR,
+ hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME],
+ hdcp2->lc_l_prime,
+ sizeof(hdcp2->lc_l_prime),
+ };
+
+ return hdcp->config.ddc.funcs.atomic_write_poll_read_i2c(
+ hdcp->config.ddc.handle,
+ &write,
+ &poll,
+ &read,
+ 20 * 1000,
+ 6
+ );
+}
+
+enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp)
+{
+ const bool success = (is_dp_hdcp(hdcp) ? write_stall_read_lc_fw_aux : write_poll_read_lc_fw_i2c)(hdcp);
+
+ return success ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_DDC_FAILURE;
+}
+
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
index c62df3bcc7cb..1d83c1b9da10 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h
@@ -86,10 +86,12 @@
#define HDCP_CPIRQ_TRACE(hdcp) \
HDCP_LOG_FSM(hdcp, "[Link %d] --> CPIRQ", hdcp->config.index)
#define HDCP_EVENT_TRACE(hdcp, event) \
- if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
- HDCP_TIMEOUT_TRACE(hdcp); \
- else if (event == MOD_HDCP_EVENT_CPIRQ) \
- HDCP_CPIRQ_TRACE(hdcp)
+ do { \
+ if (event == MOD_HDCP_EVENT_WATCHDOG_TIMEOUT) \
+ HDCP_TIMEOUT_TRACE(hdcp); \
+ else if (event == MOD_HDCP_EVENT_CPIRQ) \
+ HDCP_CPIRQ_TRACE(hdcp); \
+ } while (0)
/* TODO: find some way to tell if logging is off to save time */
#define HDCP_DDC_READ_TRACE(hdcp, msg_name, msg, msg_size) do { \
mod_hdcp_dump_binary_message(msg, msg_size, hdcp->buf, \
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
index ee67a35c2a8e..6b7db8ec9a53 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -260,6 +260,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp)
return MOD_HDCP_STATUS_FAILURE;
}
+ if (!display)
+ return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
+
hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf;
mutex_lock(&psp->hdcp_context.mutex);
@@ -368,6 +371,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp)
struct mod_hdcp_display *display = get_first_active_display(hdcp);
enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
+ if (!display)
+ return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
+
mutex_lock(&psp->hdcp_context.mutex);
hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf;
memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
@@ -443,7 +449,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp
for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
- continue;
+ continue;
memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
@@ -926,7 +932,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp
for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) {
if (hdcp->displays[i].adjust.disable || hdcp->displays[i].state != MOD_HDCP_DISPLAY_ACTIVE)
- continue;
+ continue;
hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.display_handle = hdcp->displays[i].index;
hdcp_cmd->in_msg.hdcp2_enable_dp_stream_encryption.session_handle = hdcp->auth.id;
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
index 5b71bc96b98c..7844ea91650b 100644
--- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
+++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.h
@@ -98,9 +98,9 @@ enum ta_dtm_encoder_type {
* This enum defines software value for dio_output_type
*/
typedef enum {
- TA_DTM_DIO_OUTPUT_TYPE__INVALID,
- TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
- TA_DTM_DIO_OUTPUT_TYPE__DPIA
+ TA_DTM_DIO_OUTPUT_TYPE__INVALID,
+ TA_DTM_DIO_OUTPUT_TYPE__DIRECT,
+ TA_DTM_DIO_OUTPUT_TYPE__DPIA
} ta_dtm_dio_output_type;
struct ta_dtm_topology_update_input_v3 {
@@ -237,11 +237,11 @@ enum ta_hdcp2_hdcp2_msg_id_max_size {
#define TA_HDCP__HDCP1_KSV_LIST_MAX_ENTRIES 127
#define TA_HDCP__HDCP1_V_PRIME_SIZE 20
#define TA_HDCP__HDCP2_TX_BUF_MAX_SIZE \
- TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6
+ (TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_NO_STORED_KM + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_STORED_KM + 6)
// 64 bits boundaries
#define TA_HDCP__HDCP2_RX_BUF_MAX_SIZE \
- TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4
+ (TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_SEND_CERT + TA_HDCP_HDCP2_MSG_ID_MAX_SIZE__AKE_RECEIVER_INFO + 4)
enum ta_hdcp_status {
TA_HDCP_STATUS__SUCCESS = 0x00,
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index afe1f6cce528..57916ed98c86 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -23,34 +23,6 @@
*
*/
-
-
-
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
#ifndef MOD_FREESYNC_H_
#define MOD_FREESYNC_H_
@@ -138,25 +110,6 @@ struct mod_vrr_params {
struct mod_freesync *mod_freesync_create(struct dc *dc);
void mod_freesync_destroy(struct mod_freesync *mod_freesync);
-bool mod_freesync_get_vmin_vmax(struct mod_freesync *mod_freesync,
- const struct dc_stream_state *stream,
- unsigned int *vmin,
- unsigned int *vmax);
-
-bool mod_freesync_get_v_position(struct mod_freesync *mod_freesync,
- struct dc_stream_state *stream,
- unsigned int *nom_v_pos,
- unsigned int *v_pos);
-
-void mod_freesync_get_settings(struct mod_freesync *mod_freesync,
- const struct mod_vrr_params *vrr,
- unsigned int *v_total_min, unsigned int *v_total_max,
- unsigned int *event_triggers,
- unsigned int *window_min, unsigned int *window_max,
- unsigned int *lfc_mid_point_in_us,
- unsigned int *inserted_frames,
- unsigned int *inserted_duration_in_us);
-
void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync,
const struct dc_stream_state *stream,
const struct mod_vrr_params *vrr,
@@ -183,13 +136,6 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync,
unsigned long long mod_freesync_calc_nominal_field_rate(
const struct dc_stream_state *stream);
-unsigned long long mod_freesync_calc_field_rate_from_timing(
- unsigned int vtotal, unsigned int htotal, unsigned int pix_clk);
-
-bool mod_freesync_is_valid_range(uint32_t min_refresh_cap_in_uhz,
- uint32_t max_refresh_cap_in_uhz,
- uint32_t nominal_field_rate_in_uhz);
-
unsigned int mod_freesync_calc_v_total_from_refresh(
const struct dc_stream_state *stream,
unsigned int refresh_in_uhz);
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
index a4d344a4db9e..b51ddf2846df 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h
@@ -133,9 +133,22 @@ enum mod_hdcp_display_disable_option {
MOD_HDCP_DISPLAY_DISABLE_ENCRYPTION,
};
+struct mod_hdcp_atomic_op_i2c {
+ uint8_t address;
+ uint8_t offset;
+ uint8_t *data;
+ uint32_t size;
+};
+
+struct mod_hdcp_atomic_op_aux {
+ uint32_t address;
+ uint8_t *data;
+ uint32_t size;
+};
+
struct mod_hdcp_ddc {
void *handle;
- struct {
+ struct mod_hdcp_ddc_funcs {
bool (*read_i2c)(void *handle,
uint32_t address,
uint8_t offset,
@@ -153,6 +166,22 @@ struct mod_hdcp_ddc {
uint32_t address,
const uint8_t *data,
uint32_t size);
+ bool (*atomic_write_poll_read_i2c)(
+ void *handle,
+ const struct mod_hdcp_atomic_op_i2c *write,
+ const struct mod_hdcp_atomic_op_i2c *poll,
+ struct mod_hdcp_atomic_op_i2c *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+ );
+ bool (*atomic_write_poll_read_aux)(
+ void *handle,
+ const struct mod_hdcp_atomic_op_aux *write,
+ const struct mod_hdcp_atomic_op_aux *poll,
+ struct mod_hdcp_atomic_op_aux *read,
+ uint32_t poll_timeout_us,
+ uint8_t poll_mask_msb
+ );
} funcs;
};
@@ -185,11 +214,13 @@ struct mod_hdcp_link_adjustment_hdcp2 {
uint8_t force_type : 2;
uint8_t force_no_stored_km : 1;
uint8_t increase_h_prime_timeout: 1;
- uint8_t reserved : 3;
+ uint8_t force_sw_locality_check : 1;
+ uint8_t reserved : 2;
};
struct mod_hdcp_link_adjustment {
uint8_t auth_delay;
+ uint8_t retry_limit;
struct mod_hdcp_link_adjustment_hdcp1 hdcp1;
struct mod_hdcp_link_adjustment_hdcp2 hdcp2;
};
@@ -272,6 +303,10 @@ struct mod_hdcp_display_query {
struct mod_hdcp_config {
struct mod_hdcp_psp psp;
struct mod_hdcp_ddc ddc;
+ struct {
+ uint8_t lc_enable_sw_fallback : 1;
+ uint8_t reserved : 7;
+ } debug;
uint8_t index;
};
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
index 4220fd8fdd60..8ce6c22e5d04 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
@@ -53,14 +53,14 @@ void mod_stats_dump(struct mod_stats *mod_stats);
void mod_stats_reset_data(struct mod_stats *mod_stats);
void mod_stats_update_event(struct mod_stats *mod_stats,
- char *event_string,
+ const char *event_string,
unsigned int length);
void mod_stats_update_flip(struct mod_stats *mod_stats,
- unsigned long timestamp_in_ns);
+ unsigned long long timestamp_in_ns);
void mod_stats_update_vupdate(struct mod_stats *mod_stats,
- unsigned long timestamp_in_ns);
+ unsigned long long timestamp_in_ns);
void mod_stats_update_freesync(struct mod_stats *mod_stats,
unsigned int v_total_min,
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 84f9b412a4f1..b3d55cac3569 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -383,10 +383,10 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
colorimetryFormat = ColorimetryYCC_DP_ITU709;
else if (cs == COLOR_SPACE_ADOBERGB)
colorimetryFormat = ColorimetryYCC_DP_AdobeYCC;
- else if (cs == COLOR_SPACE_2020_YCBCR)
+ else if (cs == COLOR_SPACE_2020_YCBCR_LIMITED)
colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr;
- if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22)
+ if (cs == COLOR_SPACE_2020_YCBCR_LIMITED && tf == TRANSFER_FUNC_GAMMA_22)
colorimetryFormat = ColorimetryYCC_DP_ITU709;
break;
@@ -536,8 +536,6 @@ void mod_build_adaptive_sync_infopacket(const struct dc_stream_state *stream,
mod_build_adaptive_sync_infopacket_v2(stream, param, info_packet);
break;
case FREESYNC_TYPE_PCON_IN_WHITELIST:
- mod_build_adaptive_sync_infopacket_v1(info_packet);
- break;
case ADAPTIVE_SYNC_TYPE_EDP:
mod_build_adaptive_sync_infopacket_v1(info_packet);
break;
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 73a2b37fbbd7..29ccd3532d13 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -31,7 +31,7 @@
#define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b))
#define bswap16_based_on_endian(big_endian, value) \
- (big_endian) ? cpu_to_be16(value) : cpu_to_le16(value)
+ ((big_endian) ? cpu_to_be16(value) : cpu_to_le16(value))
/* Possible Min Reduction config from least aggressive to most aggressive
* 0 1 2 3 4 5 6 7 8 9 10 11 12
@@ -743,13 +743,13 @@ bool dmub_init_abm_config(struct resource_pool *res_pool,
for (i = 0; i < NUM_AGGR_LEVEL; i++) {
config.blRampReduction[i] = params.backlight_ramping_reduction;
config.blRampStart[i] = params.backlight_ramping_start;
- }
- } else {
- for (i = 0; i < NUM_AGGR_LEVEL; i++) {
- config.blRampReduction[i] = abm_settings[set][i].blRampReduction;
- config.blRampStart[i] = abm_settings[set][i].blRampStart;
- }
- }
+ }
+ } else {
+ for (i = 0; i < NUM_AGGR_LEVEL; i++) {
+ config.blRampReduction[i] = abm_settings[set][i].blRampReduction;
+ config.blRampStart[i] = abm_settings[set][i].blRampStart;
+ }
+ }
config.min_abm_backlight = ram_table.min_abm_backlight;
@@ -839,6 +839,10 @@ bool is_psr_su_specific_panel(struct dc_link *link)
((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) ||
(dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07)))
isPSRSUSupported = false;
+ else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03)
+ isPSRSUSupported = false;
+ else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x01)
+ isPSRSUSupported = false;
else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1)
isPSRSUSupported = true;
}
@@ -944,11 +948,11 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link,
uint16_t slice_height;
config->dsc_slice_height = 0;
- if ((link->connector_signal & SIGNAL_TYPE_EDP) &&
- (!dc->caps.edp_dsc_support ||
+ if (!(link->connector_signal & SIGNAL_TYPE_EDP) ||
+ !dc->caps.edp_dsc_support ||
link->panel_config.dsc.disable_dsc_edp ||
!link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT ||
- !stream->timing.dsc_cfg.num_slices_v))
+ !stream->timing.dsc_cfg.num_slices_v)
return true;
pic_height = stream->timing.v_addressable +
@@ -971,6 +975,49 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link,
return true;
}
+void set_replay_defer_update_coasting_vtotal(struct dc_link *link,
+ enum replay_coasting_vtotal_type type,
+ uint32_t vtotal)
+{
+ link->replay_settings.defer_update_coasting_vtotal_table[type] = vtotal;
+}
+
+void update_replay_coasting_vtotal_from_defer(struct dc_link *link,
+ enum replay_coasting_vtotal_type type)
+{
+ link->replay_settings.coasting_vtotal_table[type] =
+ link->replay_settings.defer_update_coasting_vtotal_table[type];
+}
+
+void set_replay_coasting_vtotal(struct dc_link *link,
+ enum replay_coasting_vtotal_type type,
+ uint32_t vtotal)
+{
+ link->replay_settings.coasting_vtotal_table[type] = vtotal;
+}
+
+void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal)
+{
+ link->replay_settings.low_rr_full_screen_video_pseudo_vtotal = vtotal;
+}
+
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+ uint16_t vtotal, uint16_t htotal)
+{
+ uint8_t max_link_off_frame_count = 0;
+ uint16_t max_deviation_line = 0, pixel_deviation_per_line = 0;
+
+ max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line;
+ pixel_deviation_per_line = link->dpcd_caps.pr_info.pixel_deviation_per_line;
+
+ if (htotal != 0 && vtotal != 0 && pixel_deviation_per_line != 0)
+ max_link_off_frame_count = htotal * max_deviation_line / (pixel_deviation_per_line * vtotal);
+ else
+ ASSERT(0);
+
+ link->replay_settings.link_off_frame_count = max_link_off_frame_count;
+}
+
bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps)
{
unsigned int data_points_size;
@@ -992,3 +1039,8 @@ bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_back
memcpy(caps->data_points, custom_backlight_profiles[config_no].data_points, data_points_size);
return true;
}
+
+void reset_replay_dsync_error_count(struct dc_link *link)
+{
+ link->replay_settings.replay_desync_error_fail_count = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index d9e0d67d67f7..391209a3bf29 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -54,6 +54,17 @@ bool dmub_init_abm_config(struct resource_pool *res_pool,
unsigned int inst);
void init_replay_config(struct dc_link *link, struct replay_config *pr_config);
+void set_replay_coasting_vtotal(struct dc_link *link,
+ enum replay_coasting_vtotal_type type,
+ uint32_t vtotal);
+void set_replay_defer_update_coasting_vtotal(struct dc_link *link,
+ enum replay_coasting_vtotal_type type,
+ uint32_t vtotal);
+void update_replay_coasting_vtotal_from_defer(struct dc_link *link,
+ enum replay_coasting_vtotal_type type);
+void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal);
+void calculate_replay_link_off_frame_count(struct dc_link *link,
+ uint16_t vtotal, uint16_t htotal);
bool is_psr_su_specific_panel(struct dc_link *link);
void mod_power_calc_psr_configs(struct psr_config *psr_config,
@@ -67,4 +78,7 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link,
bool fill_custom_backlight_caps(unsigned int config_no,
struct dm_acpi_atif_backlight_caps *caps);
+void reset_replay_dsync_error_count(struct dc_link *link);
+void change_replay_to_psr(struct dc_link *link);
+void change_psr_to_replay(struct dc_link *link);
#endif /* MODULES_POWER_POWER_HELPERS_H_ */
diff --git a/drivers/gpu/drm/amd/include/amd_acpi.h b/drivers/gpu/drm/amd/include/amd_acpi.h
index 2d089d30518f..06badbf0c5b9 100644
--- a/drivers/gpu/drm/amd/include/amd_acpi.h
+++ b/drivers/gpu/drm/amd/include/amd_acpi.h
@@ -61,7 +61,7 @@ struct atif_qbtc_arguments {
struct atif_qbtc_data_point {
u8 luminance; /* luminance in percent */
- u8 ipnut_signal; /* input signal in range 0-255 */
+ u8 input_signal; /* input signal in range 0-255 */
} __packed;
struct atif_qbtc_output {
@@ -75,6 +75,8 @@ struct atif_qbtc_output {
u8 number_of_points; /* number of data points */
struct atif_qbtc_data_point data_points[ATIF_QBTC_MAX_DATA_POINTS];
} __packed;
+static_assert(ATIF_QBTC_MAX_DATA_POINTS == MAX_LUMINANCE_DATA_POINTS);
+static_assert(sizeof(struct atif_qbtc_data_point) == sizeof(struct amdgpu_dm_luminance_data));
#define ATIF_NOTIFY_MASK 0x3
#define ATIF_NOTIFY_NONE 0
diff --git a/drivers/gpu/drm/amd/include/amd_cper.h b/drivers/gpu/drm/amd/include/amd_cper.h
new file mode 100644
index 000000000000..086869264425
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amd_cper.h
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMD_CPER_H__
+#define __AMD_CPER_H__
+
+#include <linux/uuid.h>
+
+#define CPER_HDR_REV_1 (0x100)
+#define CPER_SEC_MINOR_REV_1 (0x01)
+#define CPER_SEC_MAJOR_REV_22 (0x22)
+#define CPER_MAX_OAM_COUNT (8)
+
+#define CPER_CTX_TYPE_CRASH (1)
+#define CPER_CTX_TYPE_BOOT (9)
+
+#define CPER_CREATOR_ID_AMDGPU "amdgpu"
+
+#define CPER_NOTIFY_MCE \
+ GUID_INIT(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \
+ 0xE1, 0x49, 0x13, 0xBB)
+#define CPER_NOTIFY_CMC \
+ GUID_INIT(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \
+ 0xEB, 0xD4, 0xF8, 0x90)
+#define BOOT_TYPE \
+ GUID_INIT(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \
+ 0xD4, 0x64, 0xB3, 0x8F)
+
+#define AMD_CRASHDUMP \
+ GUID_INIT(0x32AC0C78, 0x2623, 0x48F6, 0xB0, 0xD0, 0x73, 0x65, \
+ 0x72, 0x5F, 0xD6, 0xAE)
+#define AMD_GPU_NONSTANDARD_ERROR \
+ GUID_INIT(0x32AC0C78, 0x2623, 0x48F6, 0x81, 0xA2, 0xAC, 0x69, \
+ 0x17, 0x80, 0x55, 0x1D)
+#define PROC_ERR_SECTION_TYPE \
+ GUID_INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \
+ 0x24, 0x2B, 0x6E, 0x1D)
+
+enum cper_error_severity {
+ CPER_SEV_NON_FATAL_UNCORRECTED = 0,
+ CPER_SEV_FATAL = 1,
+ CPER_SEV_NON_FATAL_CORRECTED = 2,
+ CPER_SEV_NUM = 3,
+
+ CPER_SEV_UNUSED = 10,
+};
+
+enum cper_aca_reg {
+ CPER_ACA_REG_CTL_LO = 0,
+ CPER_ACA_REG_CTL_HI = 1,
+ CPER_ACA_REG_STATUS_LO = 2,
+ CPER_ACA_REG_STATUS_HI = 3,
+ CPER_ACA_REG_ADDR_LO = 4,
+ CPER_ACA_REG_ADDR_HI = 5,
+ CPER_ACA_REG_MISC0_LO = 6,
+ CPER_ACA_REG_MISC0_HI = 7,
+ CPER_ACA_REG_CONFIG_LO = 8,
+ CPER_ACA_REG_CONFIG_HI = 9,
+ CPER_ACA_REG_IPID_LO = 10,
+ CPER_ACA_REG_IPID_HI = 11,
+ CPER_ACA_REG_SYND_LO = 12,
+ CPER_ACA_REG_SYND_HI = 13,
+
+ CPER_ACA_REG_COUNT = 32,
+};
+
+#pragma pack(push, 1)
+
+struct cper_timestamp {
+ uint8_t seconds;
+ uint8_t minutes;
+ uint8_t hours;
+ uint8_t flag;
+ uint8_t day;
+ uint8_t month;
+ uint8_t year;
+ uint8_t century;
+};
+
+struct cper_hdr {
+ char signature[4]; /* "CPER" */
+ uint16_t revision;
+ uint32_t signature_end; /* 0xFFFFFFFF */
+ uint16_t sec_cnt;
+ enum cper_error_severity error_severity;
+ union {
+ struct {
+ uint32_t platform_id : 1;
+ uint32_t timestamp : 1;
+ uint32_t partition_id : 1;
+ uint32_t reserved : 29;
+ } valid_bits;
+ uint32_t valid_mask;
+ };
+ uint32_t record_length; /* Total size of CPER Entry */
+ struct cper_timestamp timestamp;
+ char platform_id[16];
+ guid_t partition_id; /* Reserved */
+ char creator_id[16];
+ guid_t notify_type; /* CMC, MCE */
+ char record_id[8]; /* Unique CPER Entry ID */
+ uint32_t flags; /* Reserved */
+ uint64_t persistence_info; /* Reserved */
+ uint8_t reserved[12]; /* Reserved */
+};
+
+struct cper_sec_desc {
+ uint32_t sec_offset; /* Offset from the start of CPER entry */
+ uint32_t sec_length;
+ uint8_t revision_minor; /* CPER_SEC_MINOR_REV_1 */
+ uint8_t revision_major; /* CPER_SEC_MAJOR_REV_22 */
+ union {
+ struct {
+ uint8_t fru_id : 1;
+ uint8_t fru_text : 1;
+ uint8_t reserved : 6;
+ } valid_bits;
+ uint8_t valid_mask;
+ };
+ uint8_t reserved;
+ union {
+ struct {
+ uint32_t primary : 1;
+ uint32_t reserved1 : 2;
+ uint32_t exceed_err_threshold : 1;
+ uint32_t latent_err : 1;
+ uint32_t reserved2 : 27;
+ } flag_bits;
+ uint32_t flag_mask;
+ };
+ guid_t sec_type;
+ char fru_id[16];
+ enum cper_error_severity severity;
+ char fru_text[20];
+};
+
+struct cper_sec_nonstd_err_hdr {
+ union {
+ struct {
+ uint64_t apic_id : 1;
+ uint64_t fw_id : 1;
+ uint64_t err_info_cnt : 6;
+ uint64_t err_context_cnt : 6;
+ } valid_bits;
+ uint64_t valid_mask;
+ };
+ uint64_t apic_id;
+ char fw_id[48];
+};
+
+struct cper_sec_nonstd_err_info {
+ guid_t error_type;
+ union {
+ struct {
+ uint64_t ms_chk : 1;
+ uint64_t target_addr_id : 1;
+ uint64_t req_id : 1;
+ uint64_t resp_id : 1;
+ uint64_t instr_ptr : 1;
+ uint64_t reserved : 59;
+ } valid_bits;
+ uint64_t valid_mask;
+ };
+ union {
+ struct {
+ uint64_t err_type_valid : 1;
+ uint64_t pcc_valid : 1;
+ uint64_t uncorr_valid : 1;
+ uint64_t precise_ip_valid : 1;
+ uint64_t restartable_ip_valid : 1;
+ uint64_t overflow_valid : 1;
+ uint64_t reserved1 : 10;
+ uint64_t err_type : 2;
+ uint64_t pcc : 1;
+ uint64_t uncorr : 1;
+ uint64_t precised_ip : 1;
+ uint64_t restartable_ip : 1;
+ uint64_t overflow : 1;
+ uint64_t reserved2 : 41;
+ } ms_chk_bits;
+ uint64_t ms_chk_mask;
+ };
+ uint64_t target_addr_id;
+ uint64_t req_id;
+ uint64_t resp_id;
+ uint64_t instr_ptr;
+};
+
+struct cper_sec_nonstd_err_ctx {
+ uint16_t reg_ctx_type;
+ uint16_t reg_arr_size;
+ uint32_t msr_addr;
+ uint64_t mm_reg_addr;
+ uint32_t reg_dump[CPER_ACA_REG_COUNT];
+};
+
+struct cper_sec_nonstd_err {
+ struct cper_sec_nonstd_err_hdr hdr;
+ struct cper_sec_nonstd_err_info info;
+ struct cper_sec_nonstd_err_ctx ctx;
+};
+
+struct cper_sec_crashdump_hdr {
+ uint64_t reserved1;
+ uint64_t reserved2;
+ char fw_id[48];
+ uint64_t reserved3[8];
+};
+
+struct cper_sec_crashdump_reg_data {
+ uint32_t status_lo;
+ uint32_t status_hi;
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t ipid_lo;
+ uint32_t ipid_hi;
+ uint32_t synd_lo;
+ uint32_t synd_hi;
+};
+
+struct cper_sec_crashdump_body_fatal {
+ uint16_t reg_ctx_type;
+ uint16_t reg_arr_size;
+ uint32_t reserved1;
+ uint64_t reserved2;
+ struct cper_sec_crashdump_reg_data data;
+};
+
+struct cper_sec_crashdump_body_boot {
+ uint16_t reg_ctx_type;
+ uint16_t reg_arr_size;
+ uint32_t reserved1;
+ uint64_t reserved2;
+ uint64_t msg[CPER_MAX_OAM_COUNT];
+};
+
+struct cper_sec_crashdump_fatal {
+ struct cper_sec_crashdump_hdr hdr;
+ struct cper_sec_crashdump_body_fatal body;
+};
+
+struct cper_sec_crashdump_boot {
+ struct cper_sec_crashdump_hdr hdr;
+ struct cper_sec_crashdump_body_boot body;
+};
+
+#pragma pack(pop)
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/amd_pcie.h b/drivers/gpu/drm/amd/include/amd_pcie.h
index a1ece3eecdf5..a08611cb8041 100644
--- a/drivers/gpu/drm/amd/include/amd_pcie.h
+++ b/drivers/gpu/drm/amd/include/amd_pcie.h
@@ -49,6 +49,17 @@
| CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3)
/* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */
+
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1 0x00000001
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 0x00000002
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 0x00000004
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 0x00000008
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 0x00000010
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 0x00000020
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 0x00000040
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_MASK 0x0000FFFF
+#define CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_SHIFT 0
+
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 0x00010000
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 0x00020000
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 0x00040000
@@ -56,6 +67,7 @@
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 0x00100000
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 0x00200000
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 0x00400000
+#define CAIL_PCIE_LINK_WIDTH_SUPPORT_MASK 0xFFFF0000
#define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT 16
/* 1/2/4/8/16 lanes */
@@ -65,4 +77,10 @@
| CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \
| CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+#define AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1 \
+ | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \
+ | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \
+ | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \
+ | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16)
+
#endif
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 67d7b7ee8a2a..75efda2969cf 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -24,9 +24,12 @@
#define __AMD_SHARED_H__
#include <drm/amd_asic_type.h>
+#include <drm/drm_print.h>
#define AMD_MAX_USEC_TIMEOUT 1000000 /* 1000 ms */
+struct amdgpu_ip_block;
+
/*
* Chip flags
@@ -60,7 +63,7 @@ enum amd_apu_flags {
* acquires the list of IP blocks for the GPU in use on initialization.
* It can then operate on this list to perform standard driver operations
* such as: init, fini, suspend, resume, etc.
-*
+*
*
* IP block implementations are named using the following convention:
* <functionality>_v<version> (E.g.: gfx_v6_0).
@@ -83,6 +86,9 @@ enum amd_apu_flags {
* @AMD_IP_BLOCK_TYPE_VCN: Video Core/Codec Next
* @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler
* @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine
+* @AMD_IP_BLOCK_TYPE_VPE: Video Processing Engine
+* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Scheduler for Multimedia
+* @AMD_IP_BLOCK_TYPE_ISP: Image Signal Processor
* @AMD_IP_BLOCK_TYPE_NUM: Total number of IP block types
*/
enum amd_ip_block_type {
@@ -100,6 +106,9 @@ enum amd_ip_block_type {
AMD_IP_BLOCK_TYPE_VCN,
AMD_IP_BLOCK_TYPE_MES,
AMD_IP_BLOCK_TYPE_JPEG,
+ AMD_IP_BLOCK_TYPE_VPE,
+ AMD_IP_BLOCK_TYPE_UMSCH_MM,
+ AMD_IP_BLOCK_TYPE_ISP,
AMD_IP_BLOCK_TYPE_NUM,
};
@@ -170,6 +179,7 @@ enum amd_powergating_state {
#define AMD_PG_SUPPORT_ATHUB (1 << 16)
#define AMD_PG_SUPPORT_JPEG (1 << 17)
#define AMD_PG_SUPPORT_IH_SRAM_PG (1 << 18)
+#define AMD_PG_SUPPORT_JPEG_DPG (1 << 19)
/**
* enum PP_FEATURE_MASK - Used to mask power play features.
@@ -229,30 +239,177 @@ enum amd_harvest_ip_mask {
AMD_HARVEST_IP_DMU_MASK = 0x4,
};
+/**
+ * enum DC_FEATURE_MASK - Bits that control DC feature defaults
+ */
enum DC_FEATURE_MASK {
//Default value can be found at "uint amdgpu_dc_feature_mask"
- DC_FBC_MASK = (1 << 0), //0x1, disabled by default
- DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default
- DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default
- DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1
- DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default
- DC_DISABLE_LTTPR_DP1_4A = (1 << 5), //0x20, disabled by default
- DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
- DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
- DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
- DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
+ /**
+ * @DC_FBC_MASK: (0x1) disabled by default
+ */
+ DC_FBC_MASK = (1 << 0),
+ /**
+ * @DC_MULTI_MON_PP_MCLK_SWITCH_MASK: (0x2) enabled by default
+ */
+ DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1),
+ /**
+ * @DC_DISABLE_FRACTIONAL_PWM_MASK: (0x4) disabled by default
+ */
+ DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2),
+ /**
+ * @DC_PSR_MASK: (0x8) disabled by default for DCN < 3.1
+ */
+ DC_PSR_MASK = (1 << 3),
+ /**
+ * @DC_EDP_NO_POWER_SEQUENCING: (0x10) disabled by default
+ */
+ DC_EDP_NO_POWER_SEQUENCING = (1 << 4),
+ /**
+ * @DC_DISABLE_LTTPR_DP1_4A: (0x20) disabled by default
+ */
+ DC_DISABLE_LTTPR_DP1_4A = (1 << 5),
+ /**
+ * @DC_DISABLE_LTTPR_DP2_0: (0x40) disabled by default
+ */
+ DC_DISABLE_LTTPR_DP2_0 = (1 << 6),
+ /**
+ * @DC_PSR_ALLOW_SMU_OPT: (0x80) disabled by default
+ */
+ DC_PSR_ALLOW_SMU_OPT = (1 << 7),
+ /**
+ * @DC_PSR_ALLOW_MULTI_DISP_OPT: (0x100) disabled by default
+ */
+ DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8),
+ /**
+ * @DC_REPLAY_MASK: (0x200) disabled by default for DCN < 3.1.4
+ */
+ DC_REPLAY_MASK = (1 << 9),
};
+/**
+ * enum DC_DEBUG_MASK - Bits that are useful for debugging the Display Core IP
+ */
enum DC_DEBUG_MASK {
+ /**
+ * @DC_DISABLE_PIPE_SPLIT: (0x1) If set, disable pipe-splitting
+ */
DC_DISABLE_PIPE_SPLIT = 0x1,
+
+ /**
+ * @DC_DISABLE_STUTTER: (0x2) If set, disable memory stutter mode
+ */
DC_DISABLE_STUTTER = 0x2,
+
+ /**
+ * @DC_DISABLE_DSC: (0x4) If set, disable display stream compression
+ */
DC_DISABLE_DSC = 0x4,
+
+ /**
+ * @DC_DISABLE_CLOCK_GATING: (0x8) If set, disable clock gating optimizations
+ */
DC_DISABLE_CLOCK_GATING = 0x8,
+
+ /**
+ * @DC_DISABLE_PSR: (0x10) If set, disable Panel self refresh v1 and PSR-SU
+ */
DC_DISABLE_PSR = 0x10,
+
+ /**
+ * @DC_FORCE_SUBVP_MCLK_SWITCH: (0x20) If set, force mclk switch in subvp, even
+ * if mclk switch in vblank is possible
+ */
DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
+
+ /**
+ * @DC_DISABLE_MPO: (0x40) If set, disable multi-plane offloading
+ */
DC_DISABLE_MPO = 0x40,
- DC_DISABLE_REPLAY = 0x50,
+
+ /**
+ * @DC_ENABLE_DPIA_TRACE: (0x80) If set, enable trace logging for DPIA
+ */
DC_ENABLE_DPIA_TRACE = 0x80,
+
+ /**
+ * @DC_ENABLE_DML2: (0x100) If set, force usage of DML2, even if the DCN version
+ * does not default to it.
+ */
+ DC_ENABLE_DML2 = 0x100,
+
+ /**
+ * @DC_DISABLE_PSR_SU: (0x200) If set, disable PSR SU
+ */
+ DC_DISABLE_PSR_SU = 0x200,
+
+ /**
+ * @DC_DISABLE_REPLAY: (0x400) If set, disable Panel Replay
+ */
+ DC_DISABLE_REPLAY = 0x400,
+
+ /**
+ * @DC_DISABLE_IPS: (0x800) If set, disable all Idle Power States, all the time.
+ * If more than one IPS debug bit is set, the lowest bit takes
+ * precedence. For example, if DC_FORCE_IPS_ENABLE and
+ * DC_DISABLE_IPS_DYNAMIC are set, then DC_DISABLE_IPS_DYNAMIC takes
+ * precedence.
+ */
+ DC_DISABLE_IPS = 0x800,
+
+ /**
+ * @DC_DISABLE_IPS_DYNAMIC: (0x1000) If set, disable all IPS, all the time,
+ * *except* when driver goes into suspend.
+ */
+ DC_DISABLE_IPS_DYNAMIC = 0x1000,
+
+ /**
+ * @DC_DISABLE_IPS2_DYNAMIC: (0x2000) If set, disable IPS2 (IPS1 allowed) if
+ * there is an enabled display. Otherwise, enable all IPS.
+ */
+ DC_DISABLE_IPS2_DYNAMIC = 0x2000,
+
+ /**
+ * @DC_FORCE_IPS_ENABLE: (0x4000) If set, force enable all IPS, all the time.
+ */
+ DC_FORCE_IPS_ENABLE = 0x4000,
+ /**
+ * @DC_DISABLE_ACPI_EDID: (0x8000) If set, don't attempt to fetch EDID for
+ * eDP display from ACPI _DDC method.
+ */
+ DC_DISABLE_ACPI_EDID = 0x8000,
+
+ /**
+ * @DC_DISABLE_HDMI_CEC: (0x10000) If set, disable HDMI-CEC feature in amdgpu driver.
+ */
+ DC_DISABLE_HDMI_CEC = 0x10000,
+
+ /**
+ * @DC_DISABLE_SUBVP_FAMS: (0x20000) If set, disable DCN Sub-Viewport & Firmware Assisted
+ * Memory Clock Switching (FAMS) feature in amdgpu driver.
+ */
+ DC_DISABLE_SUBVP_FAMS = 0x20000,
+ /**
+ * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: (0x40000) If set, disable support for custom
+ * brightness curves
+ */
+ DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE = 0x40000,
+
+ /**
+ * @DC_HDCP_LC_FORCE_FW_ENABLE: (0x80000) If set, use HDCP Locality Check FW
+ * path regardless of reported HW capabilities.
+ */
+ DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000,
+
+ /**
+ * @DC_HDCP_LC_ENABLE_SW_FALLBACK: (0x100000) If set, upon HDCP Locality Check FW
+ * path failure, retry using legacy SW path.
+ */
+ DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000,
+
+ /**
+ * @DC_SKIP_DETECTION_LT: (0x200000) If set, skip detection link training
+ */
+ DC_SKIP_DETECTION_LT = 0x200000,
};
enum amd_dpm_forced_level;
@@ -269,8 +426,11 @@ enum amd_dpm_forced_level;
* @hw_init: sets up the hw state
* @hw_fini: tears down the hw state
* @late_fini: final cleanup
+ * @prepare_suspend: handle IP specific changes to prepare for suspend
+ * (such as allocating any required memory)
* @suspend: handles IP specific hw/sw changes for suspend
* @resume: handles IP specific hw/sw changes for resume
+ * @complete: handles IP specific changes after resume
* @is_idle: returns current IP block idle status
* @wait_for_idle: poll for idle
* @check_soft_reset: check soft reset the IP block
@@ -280,6 +440,8 @@ enum amd_dpm_forced_level;
* @set_clockgating_state: enable/disable cg for the IP block
* @set_powergating_state: enable/disable pg for the IP block
* @get_clockgating_state: get current clockgating status
+ * @dump_ip_state: dump the IP state of the ASIC during a gpu hang
+ * @print_ip_state: print the IP state in devcoredump for each IP of the ASIC
*
* These hooks provide an interface for controlling the operational state
* of IP blocks. After acquiring a list of IP blocks for the GPU in use,
@@ -289,27 +451,31 @@ enum amd_dpm_forced_level;
*/
struct amd_ip_funcs {
char *name;
- int (*early_init)(void *handle);
- int (*late_init)(void *handle);
- int (*sw_init)(void *handle);
- int (*sw_fini)(void *handle);
- int (*early_fini)(void *handle);
- int (*hw_init)(void *handle);
- int (*hw_fini)(void *handle);
- void (*late_fini)(void *handle);
- int (*suspend)(void *handle);
- int (*resume)(void *handle);
- bool (*is_idle)(void *handle);
- int (*wait_for_idle)(void *handle);
- bool (*check_soft_reset)(void *handle);
- int (*pre_soft_reset)(void *handle);
- int (*soft_reset)(void *handle);
- int (*post_soft_reset)(void *handle);
- int (*set_clockgating_state)(void *handle,
+ int (*early_init)(struct amdgpu_ip_block *ip_block);
+ int (*late_init)(struct amdgpu_ip_block *ip_block);
+ int (*sw_init)(struct amdgpu_ip_block *ip_block);
+ int (*sw_fini)(struct amdgpu_ip_block *ip_block);
+ int (*early_fini)(struct amdgpu_ip_block *ip_block);
+ int (*hw_init)(struct amdgpu_ip_block *ip_block);
+ int (*hw_fini)(struct amdgpu_ip_block *ip_block);
+ void (*late_fini)(struct amdgpu_ip_block *ip_block);
+ int (*prepare_suspend)(struct amdgpu_ip_block *ip_block);
+ int (*suspend)(struct amdgpu_ip_block *ip_block);
+ int (*resume)(struct amdgpu_ip_block *ip_block);
+ void (*complete)(struct amdgpu_ip_block *ip_block);
+ bool (*is_idle)(struct amdgpu_ip_block *ip_block);
+ int (*wait_for_idle)(struct amdgpu_ip_block *ip_block);
+ bool (*check_soft_reset)(struct amdgpu_ip_block *ip_block);
+ int (*pre_soft_reset)(struct amdgpu_ip_block *ip_block);
+ int (*soft_reset)(struct amdgpu_ip_block *ip_block);
+ int (*post_soft_reset)(struct amdgpu_ip_block *ip_block);
+ int (*set_clockgating_state)(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
- int (*set_powergating_state)(void *handle,
+ int (*set_powergating_state)(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
- void (*get_clockgating_state)(void *handle, u64 *flags);
+ void (*get_clockgating_state)(struct amdgpu_ip_block *ip_block, u64 *flags);
+ void (*dump_ip_state)(struct amdgpu_ip_block *ip_block);
+ void (*print_ip_state)(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
};
diff --git a/drivers/gpu/drm/amd/include/amdgpu_reg_state.h b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
new file mode 100644
index 000000000000..335980e2afbf
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/amdgpu_reg_state.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_REG_STATE_H__
+#define __AMDGPU_REG_STATE_H__
+
+enum amdgpu_reg_state {
+ AMDGPU_REG_STATE_TYPE_INVALID = 0,
+ AMDGPU_REG_STATE_TYPE_XGMI = 1,
+ AMDGPU_REG_STATE_TYPE_WAFL = 2,
+ AMDGPU_REG_STATE_TYPE_PCIE = 3,
+ AMDGPU_REG_STATE_TYPE_USR = 4,
+ AMDGPU_REG_STATE_TYPE_USR_1 = 5
+};
+
+enum amdgpu_sysfs_reg_offset {
+ AMDGPU_SYS_REG_STATE_XGMI = 0x0000,
+ AMDGPU_SYS_REG_STATE_WAFL = 0x1000,
+ AMDGPU_SYS_REG_STATE_PCIE = 0x2000,
+ AMDGPU_SYS_REG_STATE_USR = 0x3000,
+ AMDGPU_SYS_REG_STATE_USR_1 = 0x4000,
+ AMDGPU_SYS_REG_STATE_END = 0x5000,
+};
+
+struct amdgpu_reg_state_header {
+ uint16_t structure_size;
+ uint8_t format_revision;
+ uint8_t content_revision;
+ uint8_t state_type;
+ uint8_t num_instances;
+ uint16_t pad;
+};
+
+enum amdgpu_reg_inst_state {
+ AMDGPU_INST_S_OK,
+ AMDGPU_INST_S_EDISABLED,
+ AMDGPU_INST_S_EACCESS,
+};
+
+struct amdgpu_smn_reg_data {
+ uint64_t addr;
+ uint32_t value;
+ uint32_t pad;
+};
+
+struct amdgpu_reg_inst_header {
+ uint16_t instance;
+ uint16_t state;
+ uint16_t num_smn_regs;
+ uint16_t pad;
+};
+
+
+struct amdgpu_regs_xgmi_v1_0 {
+ struct amdgpu_reg_inst_header inst_header;
+
+ struct amdgpu_smn_reg_data smn_reg_values[];
+};
+
+struct amdgpu_reg_state_xgmi_v1_0 {
+ /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_XGMI */
+ struct amdgpu_reg_state_header common_header;
+
+ struct amdgpu_regs_xgmi_v1_0 xgmi_state_regs[];
+};
+
+struct amdgpu_regs_wafl_v1_0 {
+ struct amdgpu_reg_inst_header inst_header;
+
+ struct amdgpu_smn_reg_data smn_reg_values[];
+};
+
+struct amdgpu_reg_state_wafl_v1_0 {
+ /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_WAFL */
+ struct amdgpu_reg_state_header common_header;
+
+ struct amdgpu_regs_wafl_v1_0 wafl_state_regs[];
+};
+
+struct amdgpu_regs_pcie_v1_0 {
+ struct amdgpu_reg_inst_header inst_header;
+
+ uint16_t device_status;
+ uint16_t link_status;
+ uint32_t sub_bus_number_latency;
+ uint32_t pcie_corr_err_status;
+ uint32_t pcie_uncorr_err_status;
+
+ struct amdgpu_smn_reg_data smn_reg_values[];
+};
+
+struct amdgpu_reg_state_pcie_v1_0 {
+ /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_PCIE */
+ struct amdgpu_reg_state_header common_header;
+
+ struct amdgpu_regs_pcie_v1_0 pci_state_regs[];
+};
+
+struct amdgpu_regs_usr_v1_0 {
+ struct amdgpu_reg_inst_header inst_header;
+
+ struct amdgpu_smn_reg_data smn_reg_values[];
+};
+
+struct amdgpu_reg_state_usr_v1_0 {
+ /* common_header.state_type must be AMDGPU_REG_STATE_TYPE_USR */
+ struct amdgpu_reg_state_header common_header;
+
+ struct amdgpu_regs_usr_v1_0 usr_state_regs[];
+};
+
+static inline size_t amdgpu_reginst_size(uint16_t num_inst, size_t inst_size,
+ uint16_t num_regs)
+{
+ return num_inst *
+ (inst_size + num_regs * sizeof(struct amdgpu_smn_reg_data));
+}
+
+#define amdgpu_asic_get_reg_state_supported(adev) \
+ (((adev)->asic_funcs && (adev)->asic_funcs->get_reg_state) ? 1 : 0)
+
+#define amdgpu_asic_get_reg_state(adev, state, buf, size) \
+ ((adev)->asic_funcs->get_reg_state ? \
+ (adev)->asic_funcs->get_reg_state((adev), (state), (buf), \
+ (size)) : \
+ 0)
+
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/arct_ip_offset.h b/drivers/gpu/drm/amd/include/arct_ip_offset.h
index af1c46991429..7dd876f7df74 100644
--- a/drivers/gpu/drm/amd/include/arct_ip_offset.h
+++ b/drivers/gpu/drm/amd/include/arct_ip_offset.h
@@ -25,13 +25,11 @@
#define MAX_SEGMENT 6
-struct IP_BASE_INSTANCE
-{
+struct IP_BASE_INSTANCE {
unsigned int segment[MAX_SEGMENT];
} __maybe_unused;
-struct IP_BASE
-{
+struct IP_BASE {
struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
} __maybe_unused;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_offset.h
new file mode 100644
index 000000000000..84483366ab6a
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_offset.h
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef _athub_4_1_0_OFFSET_HEADER
+#define _athub_4_1_0_OFFSET_HEADER
+
+
+
+// addressBlock: athub_xpbdec
+// base address: 0x3000
+#define regXPB_RTR_SRC_APRTR0 0x0000
+#define regXPB_RTR_SRC_APRTR0_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR1 0x0001
+#define regXPB_RTR_SRC_APRTR1_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR2 0x0002
+#define regXPB_RTR_SRC_APRTR2_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR3 0x0003
+#define regXPB_RTR_SRC_APRTR3_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR4 0x0004
+#define regXPB_RTR_SRC_APRTR4_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR5 0x0005
+#define regXPB_RTR_SRC_APRTR5_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR6 0x0006
+#define regXPB_RTR_SRC_APRTR6_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR7 0x0007
+#define regXPB_RTR_SRC_APRTR7_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR8 0x0008
+#define regXPB_RTR_SRC_APRTR8_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR9 0x0009
+#define regXPB_RTR_SRC_APRTR9_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR10 0x000a
+#define regXPB_RTR_SRC_APRTR10_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR11 0x000b
+#define regXPB_RTR_SRC_APRTR11_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR12 0x000c
+#define regXPB_RTR_SRC_APRTR12_BASE_IDX 0
+#define regXPB_RTR_SRC_APRTR13 0x000d
+#define regXPB_RTR_SRC_APRTR13_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP0 0x000e
+#define regXPB_RTR_DEST_MAP0_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP1 0x000f
+#define regXPB_RTR_DEST_MAP1_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP2 0x0010
+#define regXPB_RTR_DEST_MAP2_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP3 0x0011
+#define regXPB_RTR_DEST_MAP3_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP4 0x0012
+#define regXPB_RTR_DEST_MAP4_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP5 0x0013
+#define regXPB_RTR_DEST_MAP5_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP6 0x0014
+#define regXPB_RTR_DEST_MAP6_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP7 0x0015
+#define regXPB_RTR_DEST_MAP7_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP8 0x0016
+#define regXPB_RTR_DEST_MAP8_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP9 0x0017
+#define regXPB_RTR_DEST_MAP9_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP10 0x0018
+#define regXPB_RTR_DEST_MAP10_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP11 0x0019
+#define regXPB_RTR_DEST_MAP11_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP12 0x001a
+#define regXPB_RTR_DEST_MAP12_BASE_IDX 0
+#define regXPB_RTR_DEST_MAP13 0x001b
+#define regXPB_RTR_DEST_MAP13_BASE_IDX 0
+#define regXPB_CLG_CFG0 0x001c
+#define regXPB_CLG_CFG0_BASE_IDX 0
+#define regXPB_CLG_CFG1 0x001d
+#define regXPB_CLG_CFG1_BASE_IDX 0
+#define regXPB_CLG_CFG2 0x001e
+#define regXPB_CLG_CFG2_BASE_IDX 0
+#define regXPB_CLG_CFG3 0x001f
+#define regXPB_CLG_CFG3_BASE_IDX 0
+#define regXPB_CLG_CFG4 0x0020
+#define regXPB_CLG_CFG4_BASE_IDX 0
+#define regXPB_CLG_CFG5 0x0021
+#define regXPB_CLG_CFG5_BASE_IDX 0
+#define regXPB_CLG_CFG6 0x0022
+#define regXPB_CLG_CFG6_BASE_IDX 0
+#define regXPB_CLG_CFG7 0x0023
+#define regXPB_CLG_CFG7_BASE_IDX 0
+#define regXPB_CLG_EXTRA0 0x0024
+#define regXPB_CLG_EXTRA0_BASE_IDX 0
+#define regXPB_CLG_EXTRA1 0x0025
+#define regXPB_CLG_EXTRA1_BASE_IDX 0
+#define regXPB_CLG_EXTRA_MSK 0x0026
+#define regXPB_CLG_EXTRA_MSK_BASE_IDX 0
+#define regXPB_LB_ADDR 0x0027
+#define regXPB_LB_ADDR_BASE_IDX 0
+#define regXPB_HST_CFG 0x0028
+#define regXPB_HST_CFG_BASE_IDX 0
+#define regXPB_P2P_BAR_CFG 0x0029
+#define regXPB_P2P_BAR_CFG_BASE_IDX 0
+#define regXPB_P2P_BAR0 0x002a
+#define regXPB_P2P_BAR0_BASE_IDX 0
+#define regXPB_P2P_BAR1 0x002b
+#define regXPB_P2P_BAR1_BASE_IDX 0
+#define regXPB_P2P_BAR2 0x002c
+#define regXPB_P2P_BAR2_BASE_IDX 0
+#define regXPB_P2P_BAR3 0x002d
+#define regXPB_P2P_BAR3_BASE_IDX 0
+#define regXPB_P2P_BAR4 0x002e
+#define regXPB_P2P_BAR4_BASE_IDX 0
+#define regXPB_P2P_BAR5 0x002f
+#define regXPB_P2P_BAR5_BASE_IDX 0
+#define regXPB_P2P_BAR6 0x0030
+#define regXPB_P2P_BAR6_BASE_IDX 0
+#define regXPB_P2P_BAR7 0x0031
+#define regXPB_P2P_BAR7_BASE_IDX 0
+#define regXPB_P2P_BAR_SETUP 0x0032
+#define regXPB_P2P_BAR_SETUP_BASE_IDX 0
+#define regXPB_P2P_BAR_DELTA_ABOVE 0x0034
+#define regXPB_P2P_BAR_DELTA_ABOVE_BASE_IDX 0
+#define regXPB_P2P_BAR_DELTA_BELOW 0x0035
+#define regXPB_P2P_BAR_DELTA_BELOW_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR0 0x0036
+#define regXPB_PEER_SYS_BAR0_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR1 0x0037
+#define regXPB_PEER_SYS_BAR1_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR2 0x0038
+#define regXPB_PEER_SYS_BAR2_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR3 0x0039
+#define regXPB_PEER_SYS_BAR3_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR4 0x003a
+#define regXPB_PEER_SYS_BAR4_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR5 0x003b
+#define regXPB_PEER_SYS_BAR5_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR6 0x003c
+#define regXPB_PEER_SYS_BAR6_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR7 0x003d
+#define regXPB_PEER_SYS_BAR7_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR8 0x003e
+#define regXPB_PEER_SYS_BAR8_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR9 0x003f
+#define regXPB_PEER_SYS_BAR9_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR10 0x0040
+#define regXPB_PEER_SYS_BAR10_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR11 0x0041
+#define regXPB_PEER_SYS_BAR11_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR12 0x0042
+#define regXPB_PEER_SYS_BAR12_BASE_IDX 0
+#define regXPB_PEER_SYS_BAR13 0x0043
+#define regXPB_PEER_SYS_BAR13_BASE_IDX 0
+#define regXPB_CLK_GAT 0x0044
+#define regXPB_CLK_GAT_BASE_IDX 0
+#define regXPB_INTF_CFG 0x0045
+#define regXPB_INTF_CFG_BASE_IDX 0
+#define regXPB_INTF_STS 0x0046
+#define regXPB_INTF_STS_BASE_IDX 0
+#define regXPB_PIPE_STS 0x0047
+#define regXPB_PIPE_STS_BASE_IDX 0
+#define regXPB_WCB_STS 0x0048
+#define regXPB_WCB_STS_BASE_IDX 0
+#define regXPB_MAP_INVERT_FLUSH_NUM_LSB 0x0049
+#define regXPB_MAP_INVERT_FLUSH_NUM_LSB_BASE_IDX 0
+#define regXPB_STICKY 0x004a
+#define regXPB_STICKY_BASE_IDX 0
+#define regXPB_STICKY_W1C 0x004b
+#define regXPB_STICKY_W1C_BASE_IDX 0
+#define regXPB_SUB_CTRL 0x004c
+#define regXPB_SUB_CTRL_BASE_IDX 0
+#define regXPB_PERF_KNOBS 0x004d
+#define regXPB_PERF_KNOBS_BASE_IDX 0
+#define regXPB_MISC_CFG 0x004e
+#define regXPB_MISC_CFG_BASE_IDX 0
+#define regXPB_INTF_CFG2 0x004f
+#define regXPB_INTF_CFG2_BASE_IDX 0
+#define regXPB_CLG_EXTRA_RD 0x0050
+#define regXPB_CLG_EXTRA_RD_BASE_IDX 0
+#define regXPB_CLG_EXTRA_MSK_RD 0x0051
+#define regXPB_CLG_EXTRA_MSK_RD_BASE_IDX 0
+#define regXPB_CLG_GFX_MATCH 0x0052
+#define regXPB_CLG_GFX_MATCH_BASE_IDX 0
+#define regXPB_CLG_GFX_MATCH_VLD 0x0053
+#define regXPB_CLG_GFX_MATCH_VLD_BASE_IDX 0
+#define regXPB_CLG_GFX_MATCH_MSK 0x0054
+#define regXPB_CLG_GFX_MATCH_MSK_BASE_IDX 0
+#define regXPB_CLG_MM_MATCH 0x0055
+#define regXPB_CLG_MM_MATCH_BASE_IDX 0
+#define regXPB_CLG_MM_MATCH_VLD 0x0056
+#define regXPB_CLG_MM_MATCH_VLD_BASE_IDX 0
+#define regXPB_CLG_MM_MATCH_MSK 0x0057
+#define regXPB_CLG_MM_MATCH_MSK_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING0 0x005a
+#define regXPB_CLG_GFX_UNITID_MAPPING0_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING1 0x005b
+#define regXPB_CLG_GFX_UNITID_MAPPING1_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING2 0x005c
+#define regXPB_CLG_GFX_UNITID_MAPPING2_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING3 0x005d
+#define regXPB_CLG_GFX_UNITID_MAPPING3_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING4 0x005e
+#define regXPB_CLG_GFX_UNITID_MAPPING4_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING5 0x005f
+#define regXPB_CLG_GFX_UNITID_MAPPING5_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING6 0x0060
+#define regXPB_CLG_GFX_UNITID_MAPPING6_BASE_IDX 0
+#define regXPB_CLG_GFX_UNITID_MAPPING7 0x0061
+#define regXPB_CLG_GFX_UNITID_MAPPING7_BASE_IDX 0
+#define regXPB_CLG_MM_UNITID_MAPPING0 0x0062
+#define regXPB_CLG_MM_UNITID_MAPPING0_BASE_IDX 0
+#define regXPB_CLG_MM_UNITID_MAPPING1 0x0063
+#define regXPB_CLG_MM_UNITID_MAPPING1_BASE_IDX 0
+#define regXPB_CLG_MM_UNITID_MAPPING2 0x0064
+#define regXPB_CLG_MM_UNITID_MAPPING2_BASE_IDX 0
+#define regXPB_CLG_MM_UNITID_MAPPING3 0x0065
+#define regXPB_CLG_MM_UNITID_MAPPING3_BASE_IDX 0
+
+
+// addressBlock: athub_rpbdec
+// base address: 0x31d0
+#define regATHUB_SHARED_VIRT_RESET_REQ 0x0074
+#define regATHUB_SHARED_VIRT_RESET_REQ_BASE_IDX 0
+#define regATHUB_MEM_POWER_LS 0x007f
+#define regATHUB_MEM_POWER_LS_BASE_IDX 0
+#define regATHUB_MISC_CNTL 0x0080
+#define regATHUB_MISC_CNTL_BASE_IDX 0
+#define regRPB_PASSPW_CONF 0x0081
+#define regRPB_PASSPW_CONF_BASE_IDX 0
+#define regRPB_BLOCKLEVEL_CONF 0x0082
+#define regRPB_BLOCKLEVEL_CONF_BASE_IDX 0
+#define regRPB_TAG_CONF 0x0083
+#define regRPB_TAG_CONF_BASE_IDX 0
+#define regRPB_ARB_CNTL 0x0085
+#define regRPB_ARB_CNTL_BASE_IDX 0
+#define regRPB_ARB_CNTL2 0x0086
+#define regRPB_ARB_CNTL2_BASE_IDX 0
+#define regRPB_BIF_CNTL 0x0087
+#define regRPB_BIF_CNTL_BASE_IDX 0
+#define regRPB_BIF_CNTL2 0x0088
+#define regRPB_BIF_CNTL2_BASE_IDX 0
+#define regRPB_SDPPORT_CNTL 0x0089
+#define regRPB_SDPPORT_CNTL_BASE_IDX 0
+#define regRPB_NBIF_SDPPORT_CNTL 0x008a
+#define regRPB_NBIF_SDPPORT_CNTL_BASE_IDX 0
+#define regRPB_DEINTRLV_COMBINE_CNTL 0x008c
+#define regRPB_DEINTRLV_COMBINE_CNTL_BASE_IDX 0
+#define regRPB_VC_SWITCH_RDWR 0x008d
+#define regRPB_VC_SWITCH_RDWR_BASE_IDX 0
+#define regRPB_ATS_CNTL3 0x008e
+#define regRPB_ATS_CNTL3_BASE_IDX 0
+#define regRPB_DF_SDPPORT_CNTL 0x008f
+#define regRPB_DF_SDPPORT_CNTL_BASE_IDX 0
+#define regRPB_ATS_CNTL 0x0090
+#define regRPB_ATS_CNTL_BASE_IDX 0
+#define regRPB_ATS_CNTL2 0x0091
+#define regRPB_ATS_CNTL2_BASE_IDX 0
+#define regRPB_PERFCOUNTER0_CFG 0x0092
+#define regRPB_PERFCOUNTER0_CFG_BASE_IDX 0
+#define regRPB_PERFCOUNTER1_CFG 0x0093
+#define regRPB_PERFCOUNTER1_CFG_BASE_IDX 0
+#define regRPB_PERFCOUNTER2_CFG 0x0094
+#define regRPB_PERFCOUNTER2_CFG_BASE_IDX 0
+#define regRPB_PERFCOUNTER3_CFG 0x0095
+#define regRPB_PERFCOUNTER3_CFG_BASE_IDX 0
+#define regRPB_PERFCOUNTER_RSLT_CNTL 0x0096
+#define regRPB_PERFCOUNTER_RSLT_CNTL_BASE_IDX 0
+#define regRPB_PERF_COUNTER_CNTL 0x0097
+#define regRPB_PERF_COUNTER_CNTL_BASE_IDX 0
+#define regRPB_PERFCOUNTER_HI 0x0098
+#define regRPB_PERFCOUNTER_HI_BASE_IDX 0
+#define regRPB_PERFCOUNTER_LO 0x0099
+#define regRPB_PERFCOUNTER_LO_BASE_IDX 0
+#define regRPB_PERF_COUNTER_STATUS 0x009a
+#define regRPB_PERF_COUNTER_STATUS_BASE_IDX 0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_sh_mask.h
new file mode 100644
index 000000000000..56499fd62239
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/athub/athub_4_1_0_sh_mask.h
@@ -0,0 +1,1348 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef _athub_4_1_0_SH_MASK_HEADER
+#define _athub_4_1_0_SH_MASK_HEADER
+
+
+// addressBlock: athub_xpbdec
+//XPB_RTR_SRC_APRTR0
+#define XPB_RTR_SRC_APRTR0__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR0__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR1
+#define XPB_RTR_SRC_APRTR1__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR1__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR2
+#define XPB_RTR_SRC_APRTR2__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR2__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR3
+#define XPB_RTR_SRC_APRTR3__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR3__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR4
+#define XPB_RTR_SRC_APRTR4__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR4__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR5
+#define XPB_RTR_SRC_APRTR5__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR5__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR6
+#define XPB_RTR_SRC_APRTR6__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR6__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR7
+#define XPB_RTR_SRC_APRTR7__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR7__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR8
+#define XPB_RTR_SRC_APRTR8__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR8__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR9
+#define XPB_RTR_SRC_APRTR9__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR9__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR10
+#define XPB_RTR_SRC_APRTR10__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR10__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR11
+#define XPB_RTR_SRC_APRTR11__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR11__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR12
+#define XPB_RTR_SRC_APRTR12__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR12__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_SRC_APRTR13
+#define XPB_RTR_SRC_APRTR13__BASE_ADDR__SHIFT 0x0
+#define XPB_RTR_SRC_APRTR13__BASE_ADDR_MASK 0x7FFFFFFFL
+//XPB_RTR_DEST_MAP0
+#define XPB_RTR_DEST_MAP0__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP0__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP0__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP0__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP0__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP0__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP0__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP0__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP0__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP0__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP0__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP0__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP1
+#define XPB_RTR_DEST_MAP1__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP1__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP1__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP1__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP1__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP1__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP1__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP1__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP1__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP1__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP1__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP1__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP2
+#define XPB_RTR_DEST_MAP2__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP2__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP2__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP2__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP2__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP2__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP2__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP2__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP2__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP2__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP2__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP2__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP3
+#define XPB_RTR_DEST_MAP3__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP3__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP3__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP3__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP3__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP3__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP3__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP3__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP3__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP3__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP3__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP3__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP4
+#define XPB_RTR_DEST_MAP4__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP4__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP4__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP4__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP4__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP4__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP4__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP4__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP4__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP4__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP4__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP4__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP5
+#define XPB_RTR_DEST_MAP5__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP5__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP5__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP5__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP5__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP5__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP5__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP5__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP5__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP5__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP5__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP5__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP6
+#define XPB_RTR_DEST_MAP6__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP6__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP6__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP6__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP6__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP6__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP6__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP6__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP6__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP6__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP6__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP6__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP7
+#define XPB_RTR_DEST_MAP7__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP7__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP7__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP7__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP7__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP7__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP7__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP7__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP7__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP7__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP7__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP7__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP8
+#define XPB_RTR_DEST_MAP8__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP8__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP8__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP8__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP8__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP8__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP8__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP8__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP8__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP8__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP8__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP8__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP9
+#define XPB_RTR_DEST_MAP9__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP9__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP9__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP9__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP9__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP9__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP9__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP9__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP9__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP9__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP9__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP9__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP10
+#define XPB_RTR_DEST_MAP10__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP10__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP10__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP10__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP10__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP10__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP10__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP10__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP10__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP10__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP10__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP10__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP11
+#define XPB_RTR_DEST_MAP11__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP11__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP11__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP11__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP11__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP11__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP11__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP11__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP11__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP11__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP11__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP11__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP12
+#define XPB_RTR_DEST_MAP12__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP12__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP12__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP12__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP12__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP12__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP12__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP12__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP12__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP12__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP12__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP12__APRTR_SIZE_MASK 0x7C000000L
+//XPB_RTR_DEST_MAP13
+#define XPB_RTR_DEST_MAP13__NMR__SHIFT 0x0
+#define XPB_RTR_DEST_MAP13__DEST_OFFSET__SHIFT 0x1
+#define XPB_RTR_DEST_MAP13__DEST_SEL__SHIFT 0x14
+#define XPB_RTR_DEST_MAP13__DEST_SEL_RPB__SHIFT 0x18
+#define XPB_RTR_DEST_MAP13__SIDE_OK__SHIFT 0x19
+#define XPB_RTR_DEST_MAP13__APRTR_SIZE__SHIFT 0x1a
+#define XPB_RTR_DEST_MAP13__NMR_MASK 0x00000001L
+#define XPB_RTR_DEST_MAP13__DEST_OFFSET_MASK 0x000FFFFEL
+#define XPB_RTR_DEST_MAP13__DEST_SEL_MASK 0x00F00000L
+#define XPB_RTR_DEST_MAP13__DEST_SEL_RPB_MASK 0x01000000L
+#define XPB_RTR_DEST_MAP13__SIDE_OK_MASK 0x02000000L
+#define XPB_RTR_DEST_MAP13__APRTR_SIZE_MASK 0x7C000000L
+//XPB_CLG_CFG0
+#define XPB_CLG_CFG0__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG0__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG0__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG0__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG0__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG0__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG0__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG0__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG0__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG0__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_CFG1
+#define XPB_CLG_CFG1__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG1__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG1__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG1__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG1__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG1__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG1__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG1__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG1__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG1__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_CFG2
+#define XPB_CLG_CFG2__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG2__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG2__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG2__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG2__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG2__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG2__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG2__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG2__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG2__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_CFG3
+#define XPB_CLG_CFG3__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG3__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG3__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG3__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG3__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG3__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG3__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG3__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG3__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG3__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_CFG4
+#define XPB_CLG_CFG4__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG4__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG4__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG4__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG4__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG4__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG4__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG4__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG4__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG4__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_CFG5
+#define XPB_CLG_CFG5__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG5__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG5__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG5__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG5__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG5__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG5__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG5__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG5__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG5__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_CFG6
+#define XPB_CLG_CFG6__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG6__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG6__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG6__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG6__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG6__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG6__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG6__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG6__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG6__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_CFG7
+#define XPB_CLG_CFG7__WCB_NUM__SHIFT 0x0
+#define XPB_CLG_CFG7__LB_TYPE__SHIFT 0x4
+#define XPB_CLG_CFG7__P2P_BAR__SHIFT 0x7
+#define XPB_CLG_CFG7__HOST_FLUSH__SHIFT 0xa
+#define XPB_CLG_CFG7__SIDE_FLUSH__SHIFT 0xe
+#define XPB_CLG_CFG7__WCB_NUM_MASK 0x0000000FL
+#define XPB_CLG_CFG7__LB_TYPE_MASK 0x00000070L
+#define XPB_CLG_CFG7__P2P_BAR_MASK 0x00000380L
+#define XPB_CLG_CFG7__HOST_FLUSH_MASK 0x00003C00L
+#define XPB_CLG_CFG7__SIDE_FLUSH_MASK 0x0003C000L
+//XPB_CLG_EXTRA0
+#define XPB_CLG_EXTRA0__CMP0_HIGH__SHIFT 0x0
+#define XPB_CLG_EXTRA0__CMP0_LOW__SHIFT 0x8
+#define XPB_CLG_EXTRA0__VLD0__SHIFT 0xd
+#define XPB_CLG_EXTRA0__CLG0_NUM__SHIFT 0xe
+#define XPB_CLG_EXTRA0__CMP0_HIGH_MASK 0x000000FFL
+#define XPB_CLG_EXTRA0__CMP0_LOW_MASK 0x00001F00L
+#define XPB_CLG_EXTRA0__VLD0_MASK 0x00002000L
+#define XPB_CLG_EXTRA0__CLG0_NUM_MASK 0x0001C000L
+//XPB_CLG_EXTRA1
+#define XPB_CLG_EXTRA1__CMP1_HIGH__SHIFT 0x0
+#define XPB_CLG_EXTRA1__CMP1_LOW__SHIFT 0x8
+#define XPB_CLG_EXTRA1__VLD1__SHIFT 0xd
+#define XPB_CLG_EXTRA1__CLG1_NUM__SHIFT 0xe
+#define XPB_CLG_EXTRA1__CMP1_HIGH_MASK 0x000000FFL
+#define XPB_CLG_EXTRA1__CMP1_LOW_MASK 0x00001F00L
+#define XPB_CLG_EXTRA1__VLD1_MASK 0x00002000L
+#define XPB_CLG_EXTRA1__CLG1_NUM_MASK 0x0001C000L
+//XPB_CLG_EXTRA_MSK
+#define XPB_CLG_EXTRA_MSK__MSK0_HIGH__SHIFT 0x0
+#define XPB_CLG_EXTRA_MSK__MSK0_LOW__SHIFT 0x8
+#define XPB_CLG_EXTRA_MSK__MSK1_HIGH__SHIFT 0xd
+#define XPB_CLG_EXTRA_MSK__MSK1_LOW__SHIFT 0x15
+#define XPB_CLG_EXTRA_MSK__MSK0_HIGH_MASK 0x000000FFL
+#define XPB_CLG_EXTRA_MSK__MSK0_LOW_MASK 0x00001F00L
+#define XPB_CLG_EXTRA_MSK__MSK1_HIGH_MASK 0x001FE000L
+#define XPB_CLG_EXTRA_MSK__MSK1_LOW_MASK 0x03E00000L
+//XPB_LB_ADDR
+#define XPB_LB_ADDR__CMP0__SHIFT 0x0
+#define XPB_LB_ADDR__MASK0__SHIFT 0xa
+#define XPB_LB_ADDR__CMP1__SHIFT 0x14
+#define XPB_LB_ADDR__MASK1__SHIFT 0x1a
+#define XPB_LB_ADDR__CMP0_MASK 0x000003FFL
+#define XPB_LB_ADDR__MASK0_MASK 0x000FFC00L
+#define XPB_LB_ADDR__CMP1_MASK 0x03F00000L
+#define XPB_LB_ADDR__MASK1_MASK 0xFC000000L
+//XPB_HST_CFG
+#define XPB_HST_CFG__BAR_UP_WR_CMD__SHIFT 0x0
+#define XPB_HST_CFG__BAR_UP_WR_CMD_MASK 0x00000001L
+//XPB_P2P_BAR_CFG
+#define XPB_P2P_BAR_CFG__ADDR_SIZE__SHIFT 0x0
+#define XPB_P2P_BAR_CFG__SEND_BAR__SHIFT 0x4
+#define XPB_P2P_BAR_CFG__SNOOP__SHIFT 0x6
+#define XPB_P2P_BAR_CFG__SEND_DIS__SHIFT 0x7
+#define XPB_P2P_BAR_CFG__COMPRESS_DIS__SHIFT 0x8
+#define XPB_P2P_BAR_CFG__UPDATE_DIS__SHIFT 0x9
+#define XPB_P2P_BAR_CFG__REGBAR_FROM_SYSBAR__SHIFT 0xa
+#define XPB_P2P_BAR_CFG__RD_EN__SHIFT 0xb
+#define XPB_P2P_BAR_CFG__ATC_TRANSLATED__SHIFT 0xc
+#define XPB_P2P_BAR_CFG__ADDR_SIZE_MASK 0x0000000FL
+#define XPB_P2P_BAR_CFG__SEND_BAR_MASK 0x00000030L
+#define XPB_P2P_BAR_CFG__SNOOP_MASK 0x00000040L
+#define XPB_P2P_BAR_CFG__SEND_DIS_MASK 0x00000080L
+#define XPB_P2P_BAR_CFG__COMPRESS_DIS_MASK 0x00000100L
+#define XPB_P2P_BAR_CFG__UPDATE_DIS_MASK 0x00000200L
+#define XPB_P2P_BAR_CFG__REGBAR_FROM_SYSBAR_MASK 0x00000400L
+#define XPB_P2P_BAR_CFG__RD_EN_MASK 0x00000800L
+#define XPB_P2P_BAR_CFG__ATC_TRANSLATED_MASK 0x00001000L
+//XPB_P2P_BAR0
+#define XPB_P2P_BAR0__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR0__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR0__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR0__VALID__SHIFT 0xc
+#define XPB_P2P_BAR0__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR0__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR0__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR0__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR0__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR0__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR0__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR0__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR0__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR0__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR0__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR0__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR1
+#define XPB_P2P_BAR1__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR1__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR1__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR1__VALID__SHIFT 0xc
+#define XPB_P2P_BAR1__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR1__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR1__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR1__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR1__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR1__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR1__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR1__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR1__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR1__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR1__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR1__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR2
+#define XPB_P2P_BAR2__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR2__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR2__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR2__VALID__SHIFT 0xc
+#define XPB_P2P_BAR2__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR2__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR2__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR2__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR2__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR2__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR2__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR2__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR2__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR2__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR2__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR2__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR3
+#define XPB_P2P_BAR3__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR3__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR3__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR3__VALID__SHIFT 0xc
+#define XPB_P2P_BAR3__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR3__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR3__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR3__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR3__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR3__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR3__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR3__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR3__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR3__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR3__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR3__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR4
+#define XPB_P2P_BAR4__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR4__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR4__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR4__VALID__SHIFT 0xc
+#define XPB_P2P_BAR4__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR4__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR4__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR4__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR4__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR4__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR4__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR4__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR4__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR4__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR4__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR4__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR5
+#define XPB_P2P_BAR5__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR5__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR5__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR5__VALID__SHIFT 0xc
+#define XPB_P2P_BAR5__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR5__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR5__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR5__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR5__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR5__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR5__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR5__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR5__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR5__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR5__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR5__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR6
+#define XPB_P2P_BAR6__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR6__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR6__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR6__VALID__SHIFT 0xc
+#define XPB_P2P_BAR6__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR6__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR6__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR6__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR6__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR6__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR6__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR6__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR6__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR6__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR6__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR6__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR7
+#define XPB_P2P_BAR7__HOST_FLUSH__SHIFT 0x0
+#define XPB_P2P_BAR7__REG_SYS_BAR__SHIFT 0x4
+#define XPB_P2P_BAR7__MEM_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR7__VALID__SHIFT 0xc
+#define XPB_P2P_BAR7__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR7__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR7__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR7__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR7__HOST_FLUSH_MASK 0x0000000FL
+#define XPB_P2P_BAR7__REG_SYS_BAR_MASK 0x000000F0L
+#define XPB_P2P_BAR7__MEM_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR7__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR7__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR7__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR7__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR7__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR_SETUP
+#define XPB_P2P_BAR_SETUP__SEL__SHIFT 0x0
+#define XPB_P2P_BAR_SETUP__REG_SYS_BAR__SHIFT 0x8
+#define XPB_P2P_BAR_SETUP__VALID__SHIFT 0xc
+#define XPB_P2P_BAR_SETUP__SEND_DIS__SHIFT 0xd
+#define XPB_P2P_BAR_SETUP__COMPRESS_DIS__SHIFT 0xe
+#define XPB_P2P_BAR_SETUP__RESERVE__SHIFT 0xf
+#define XPB_P2P_BAR_SETUP__ADDRESS__SHIFT 0x10
+#define XPB_P2P_BAR_SETUP__SEL_MASK 0x000000FFL
+#define XPB_P2P_BAR_SETUP__REG_SYS_BAR_MASK 0x00000F00L
+#define XPB_P2P_BAR_SETUP__VALID_MASK 0x00001000L
+#define XPB_P2P_BAR_SETUP__SEND_DIS_MASK 0x00002000L
+#define XPB_P2P_BAR_SETUP__COMPRESS_DIS_MASK 0x00004000L
+#define XPB_P2P_BAR_SETUP__RESERVE_MASK 0x00008000L
+#define XPB_P2P_BAR_SETUP__ADDRESS_MASK 0xFFFF0000L
+//XPB_P2P_BAR_DELTA_ABOVE
+#define XPB_P2P_BAR_DELTA_ABOVE__EN__SHIFT 0x0
+#define XPB_P2P_BAR_DELTA_ABOVE__DELTA__SHIFT 0x8
+#define XPB_P2P_BAR_DELTA_ABOVE__EN_MASK 0x000000FFL
+#define XPB_P2P_BAR_DELTA_ABOVE__DELTA_MASK 0x0FFFFF00L
+//XPB_P2P_BAR_DELTA_BELOW
+#define XPB_P2P_BAR_DELTA_BELOW__EN__SHIFT 0x0
+#define XPB_P2P_BAR_DELTA_BELOW__DELTA__SHIFT 0x8
+#define XPB_P2P_BAR_DELTA_BELOW__EN_MASK 0x000000FFL
+#define XPB_P2P_BAR_DELTA_BELOW__DELTA_MASK 0x0FFFFF00L
+//XPB_PEER_SYS_BAR0
+#define XPB_PEER_SYS_BAR0__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR0__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR0__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR0__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR1
+#define XPB_PEER_SYS_BAR1__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR1__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR1__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR1__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR2
+#define XPB_PEER_SYS_BAR2__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR2__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR2__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR2__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR3
+#define XPB_PEER_SYS_BAR3__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR3__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR3__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR3__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR4
+#define XPB_PEER_SYS_BAR4__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR4__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR4__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR4__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR5
+#define XPB_PEER_SYS_BAR5__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR5__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR5__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR5__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR6
+#define XPB_PEER_SYS_BAR6__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR6__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR6__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR6__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR7
+#define XPB_PEER_SYS_BAR7__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR7__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR7__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR7__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR8
+#define XPB_PEER_SYS_BAR8__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR8__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR8__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR8__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR9
+#define XPB_PEER_SYS_BAR9__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR9__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR9__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR9__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR10
+#define XPB_PEER_SYS_BAR10__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR10__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR10__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR10__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR11
+#define XPB_PEER_SYS_BAR11__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR11__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR11__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR11__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR12
+#define XPB_PEER_SYS_BAR12__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR12__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR12__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR12__ADDR_MASK 0xFFFFFFFEL
+//XPB_PEER_SYS_BAR13
+#define XPB_PEER_SYS_BAR13__VALID__SHIFT 0x0
+#define XPB_PEER_SYS_BAR13__ADDR__SHIFT 0x1
+#define XPB_PEER_SYS_BAR13__VALID_MASK 0x00000001L
+#define XPB_PEER_SYS_BAR13__ADDR_MASK 0xFFFFFFFEL
+//XPB_CLK_GAT
+#define XPB_CLK_GAT__ONDLY__SHIFT 0x0
+#define XPB_CLK_GAT__OFFDLY__SHIFT 0x6
+#define XPB_CLK_GAT__RDYDLY__SHIFT 0xc
+#define XPB_CLK_GAT__ENABLE__SHIFT 0x12
+#define XPB_CLK_GAT__MEM_LS_ENABLE__SHIFT 0x13
+#define XPB_CLK_GAT__ONDLY_MASK 0x0000003FL
+#define XPB_CLK_GAT__OFFDLY_MASK 0x00000FC0L
+#define XPB_CLK_GAT__RDYDLY_MASK 0x0003F000L
+#define XPB_CLK_GAT__ENABLE_MASK 0x00040000L
+#define XPB_CLK_GAT__MEM_LS_ENABLE_MASK 0x00080000L
+//XPB_INTF_CFG
+#define XPB_INTF_CFG__RPB_WRREQ_CRD__SHIFT 0x0
+#define XPB_INTF_CFG__MC_WRRET_ASK__SHIFT 0x8
+#define XPB_INTF_CFG__XSP_REQ_CRD__SHIFT 0x10
+#define XPB_INTF_CFG__P2P_WR_CHAIN_BREAK__SHIFT 0x17
+#define XPB_INTF_CFG__XSP_SNOOP_SEL__SHIFT 0x1b
+#define XPB_INTF_CFG__XSP_SNOOP_VAL__SHIFT 0x1d
+#define XPB_INTF_CFG__XSP_ORDERING_SEL__SHIFT 0x1e
+#define XPB_INTF_CFG__QUALIFY_P2P_FOR_GPA__SHIFT 0x1f
+#define XPB_INTF_CFG__RPB_WRREQ_CRD_MASK 0x000000FFL
+#define XPB_INTF_CFG__MC_WRRET_ASK_MASK 0x0000FF00L
+#define XPB_INTF_CFG__XSP_REQ_CRD_MASK 0x007F0000L
+#define XPB_INTF_CFG__P2P_WR_CHAIN_BREAK_MASK 0x00800000L
+#define XPB_INTF_CFG__XSP_SNOOP_SEL_MASK 0x18000000L
+#define XPB_INTF_CFG__XSP_SNOOP_VAL_MASK 0x20000000L
+#define XPB_INTF_CFG__XSP_ORDERING_SEL_MASK 0x40000000L
+#define XPB_INTF_CFG__QUALIFY_P2P_FOR_GPA_MASK 0x80000000L
+//XPB_INTF_STS
+#define XPB_INTF_STS__RPB_WRREQ_CRD__SHIFT 0x0
+#define XPB_INTF_STS__XSP_REQ_CRD__SHIFT 0x8
+#define XPB_INTF_STS__HOP_DATA_BUF_FULL__SHIFT 0xf
+#define XPB_INTF_STS__HOP_ATTR_BUF_FULL__SHIFT 0x10
+#define XPB_INTF_STS__CNS_BUF_FULL__SHIFT 0x11
+#define XPB_INTF_STS__CNS_BUF_BUSY__SHIFT 0x12
+#define XPB_INTF_STS__RPB_RDREQ_CRD__SHIFT 0x13
+#define XPB_INTF_STS__RPB_WRREQ_CRD_MASK 0x000000FFL
+#define XPB_INTF_STS__XSP_REQ_CRD_MASK 0x00007F00L
+#define XPB_INTF_STS__HOP_DATA_BUF_FULL_MASK 0x00008000L
+#define XPB_INTF_STS__HOP_ATTR_BUF_FULL_MASK 0x00010000L
+#define XPB_INTF_STS__CNS_BUF_FULL_MASK 0x00020000L
+#define XPB_INTF_STS__CNS_BUF_BUSY_MASK 0x00040000L
+#define XPB_INTF_STS__RPB_RDREQ_CRD_MASK 0x07F80000L
+//XPB_PIPE_STS
+#define XPB_PIPE_STS__WCB_ANY_PBUF__SHIFT 0x0
+#define XPB_PIPE_STS__WCB_HST_DATA_BUF_CNT__SHIFT 0x1
+#define XPB_PIPE_STS__WCB_SID_DATA_BUF_CNT__SHIFT 0x8
+#define XPB_PIPE_STS__WCB_HST_RD_PTR_BUF_FULL__SHIFT 0xf
+#define XPB_PIPE_STS__WCB_SID_RD_PTR_BUF_FULL__SHIFT 0x10
+#define XPB_PIPE_STS__WCB_HST_REQ_FIFO_FULL__SHIFT 0x11
+#define XPB_PIPE_STS__WCB_SID_REQ_FIFO_FULL__SHIFT 0x12
+#define XPB_PIPE_STS__WCB_HST_REQ_OBUF_FULL__SHIFT 0x13
+#define XPB_PIPE_STS__WCB_SID_REQ_OBUF_FULL__SHIFT 0x14
+#define XPB_PIPE_STS__WCB_HST_DATA_OBUF_FULL__SHIFT 0x15
+#define XPB_PIPE_STS__WCB_SID_DATA_OBUF_FULL__SHIFT 0x16
+#define XPB_PIPE_STS__RET_BUF_FULL__SHIFT 0x17
+#define XPB_PIPE_STS__XPB_CLK_BUSY_BITS__SHIFT 0x18
+#define XPB_PIPE_STS__WCB_ANY_PBUF_MASK 0x00000001L
+#define XPB_PIPE_STS__WCB_HST_DATA_BUF_CNT_MASK 0x000000FEL
+#define XPB_PIPE_STS__WCB_SID_DATA_BUF_CNT_MASK 0x00007F00L
+#define XPB_PIPE_STS__WCB_HST_RD_PTR_BUF_FULL_MASK 0x00008000L
+#define XPB_PIPE_STS__WCB_SID_RD_PTR_BUF_FULL_MASK 0x00010000L
+#define XPB_PIPE_STS__WCB_HST_REQ_FIFO_FULL_MASK 0x00020000L
+#define XPB_PIPE_STS__WCB_SID_REQ_FIFO_FULL_MASK 0x00040000L
+#define XPB_PIPE_STS__WCB_HST_REQ_OBUF_FULL_MASK 0x00080000L
+#define XPB_PIPE_STS__WCB_SID_REQ_OBUF_FULL_MASK 0x00100000L
+#define XPB_PIPE_STS__WCB_HST_DATA_OBUF_FULL_MASK 0x00200000L
+#define XPB_PIPE_STS__WCB_SID_DATA_OBUF_FULL_MASK 0x00400000L
+#define XPB_PIPE_STS__RET_BUF_FULL_MASK 0x00800000L
+#define XPB_PIPE_STS__XPB_CLK_BUSY_BITS_MASK 0xFF000000L
+//XPB_WCB_STS
+#define XPB_WCB_STS__PBUF_VLD__SHIFT 0x0
+#define XPB_WCB_STS__WCB_HST_DATA_BUF_CNT__SHIFT 0x10
+#define XPB_WCB_STS__WCB_SID_DATA_BUF_CNT__SHIFT 0x17
+#define XPB_WCB_STS__PBUF_VLD_MASK 0x0000FFFFL
+#define XPB_WCB_STS__WCB_HST_DATA_BUF_CNT_MASK 0x007F0000L
+#define XPB_WCB_STS__WCB_SID_DATA_BUF_CNT_MASK 0x3F800000L
+//XPB_MAP_INVERT_FLUSH_NUM_LSB
+#define XPB_MAP_INVERT_FLUSH_NUM_LSB__ALTER_FLUSH_NUM__SHIFT 0x0
+#define XPB_MAP_INVERT_FLUSH_NUM_LSB__ALTER_FLUSH_NUM_MASK 0x0000FFFFL
+//XPB_STICKY
+#define XPB_STICKY__BITS__SHIFT 0x0
+#define XPB_STICKY__BITS_MASK 0xFFFFFFFFL
+//XPB_STICKY_W1C
+#define XPB_STICKY_W1C__BITS__SHIFT 0x0
+#define XPB_STICKY_W1C__BITS_MASK 0xFFFFFFFFL
+//XPB_SUB_CTRL
+#define XPB_SUB_CTRL__WRREQ_BYPASS_XPB__SHIFT 0x0
+#define XPB_SUB_CTRL__STALL_CNS_RTR_REQ__SHIFT 0x1
+#define XPB_SUB_CTRL__STALL_RTR_RPB_WRREQ__SHIFT 0x2
+#define XPB_SUB_CTRL__STALL_RTR_MAP_REQ__SHIFT 0x3
+#define XPB_SUB_CTRL__STALL_MAP_WCB_REQ__SHIFT 0x4
+#define XPB_SUB_CTRL__STALL_WCB_SID_REQ__SHIFT 0x5
+#define XPB_SUB_CTRL__STALL_MC_XSP_REQ_SEND__SHIFT 0x6
+#define XPB_SUB_CTRL__STALL_WCB_HST_REQ__SHIFT 0x7
+#define XPB_SUB_CTRL__STALL_HST_HOP_REQ__SHIFT 0x8
+#define XPB_SUB_CTRL__STALL_XPB_RPB_REQ_ATTR__SHIFT 0x9
+#define XPB_SUB_CTRL__RESET_CNS__SHIFT 0xa
+#define XPB_SUB_CTRL__RESET_RTR__SHIFT 0xb
+#define XPB_SUB_CTRL__RESET_RET__SHIFT 0xc
+#define XPB_SUB_CTRL__RESET_MAP__SHIFT 0xd
+#define XPB_SUB_CTRL__RESET_WCB__SHIFT 0xe
+#define XPB_SUB_CTRL__RESET_HST__SHIFT 0xf
+#define XPB_SUB_CTRL__RESET_HOP__SHIFT 0x10
+#define XPB_SUB_CTRL__RESET_SID__SHIFT 0x11
+#define XPB_SUB_CTRL__RESET_SRB__SHIFT 0x12
+#define XPB_SUB_CTRL__RESET_CGR__SHIFT 0x13
+#define XPB_SUB_CTRL__WRREQ_BYPASS_XPB_MASK 0x00000001L
+#define XPB_SUB_CTRL__STALL_CNS_RTR_REQ_MASK 0x00000002L
+#define XPB_SUB_CTRL__STALL_RTR_RPB_WRREQ_MASK 0x00000004L
+#define XPB_SUB_CTRL__STALL_RTR_MAP_REQ_MASK 0x00000008L
+#define XPB_SUB_CTRL__STALL_MAP_WCB_REQ_MASK 0x00000010L
+#define XPB_SUB_CTRL__STALL_WCB_SID_REQ_MASK 0x00000020L
+#define XPB_SUB_CTRL__STALL_MC_XSP_REQ_SEND_MASK 0x00000040L
+#define XPB_SUB_CTRL__STALL_WCB_HST_REQ_MASK 0x00000080L
+#define XPB_SUB_CTRL__STALL_HST_HOP_REQ_MASK 0x00000100L
+#define XPB_SUB_CTRL__STALL_XPB_RPB_REQ_ATTR_MASK 0x00000200L
+#define XPB_SUB_CTRL__RESET_CNS_MASK 0x00000400L
+#define XPB_SUB_CTRL__RESET_RTR_MASK 0x00000800L
+#define XPB_SUB_CTRL__RESET_RET_MASK 0x00001000L
+#define XPB_SUB_CTRL__RESET_MAP_MASK 0x00002000L
+#define XPB_SUB_CTRL__RESET_WCB_MASK 0x00004000L
+#define XPB_SUB_CTRL__RESET_HST_MASK 0x00008000L
+#define XPB_SUB_CTRL__RESET_HOP_MASK 0x00010000L
+#define XPB_SUB_CTRL__RESET_SID_MASK 0x00020000L
+#define XPB_SUB_CTRL__RESET_SRB_MASK 0x00040000L
+#define XPB_SUB_CTRL__RESET_CGR_MASK 0x00080000L
+//XPB_PERF_KNOBS
+#define XPB_PERF_KNOBS__CNS_FIFO_DEPTH__SHIFT 0x0
+#define XPB_PERF_KNOBS__WCB_HST_FIFO_DEPTH__SHIFT 0x6
+#define XPB_PERF_KNOBS__WCB_SID_FIFO_DEPTH__SHIFT 0xc
+#define XPB_PERF_KNOBS__CNS_FIFO_DEPTH_MASK 0x0000003FL
+#define XPB_PERF_KNOBS__WCB_HST_FIFO_DEPTH_MASK 0x00000FC0L
+#define XPB_PERF_KNOBS__WCB_SID_FIFO_DEPTH_MASK 0x0003F000L
+//XPB_MISC_CFG
+#define XPB_MISC_CFG__FIELDNAME0__SHIFT 0x0
+#define XPB_MISC_CFG__FIELDNAME1__SHIFT 0x8
+#define XPB_MISC_CFG__FIELDNAME2__SHIFT 0x10
+#define XPB_MISC_CFG__FIELDNAME3__SHIFT 0x18
+#define XPB_MISC_CFG__TRIGGERNAME__SHIFT 0x1f
+#define XPB_MISC_CFG__FIELDNAME0_MASK 0x000000FFL
+#define XPB_MISC_CFG__FIELDNAME1_MASK 0x0000FF00L
+#define XPB_MISC_CFG__FIELDNAME2_MASK 0x00FF0000L
+#define XPB_MISC_CFG__FIELDNAME3_MASK 0x7F000000L
+#define XPB_MISC_CFG__TRIGGERNAME_MASK 0x80000000L
+//XPB_INTF_CFG2
+#define XPB_INTF_CFG2__RPB_RDREQ_CRD__SHIFT 0x0
+#define XPB_INTF_CFG2__RPB_RDREQ_CRD_MASK 0x000000FFL
+//XPB_CLG_EXTRA_RD
+#define XPB_CLG_EXTRA_RD__CMP0_HIGH__SHIFT 0x0
+#define XPB_CLG_EXTRA_RD__CMP0_LOW__SHIFT 0x6
+#define XPB_CLG_EXTRA_RD__VLD0__SHIFT 0xb
+#define XPB_CLG_EXTRA_RD__CLG0_NUM__SHIFT 0xc
+#define XPB_CLG_EXTRA_RD__CMP1_HIGH__SHIFT 0xf
+#define XPB_CLG_EXTRA_RD__CMP1_LOW__SHIFT 0x15
+#define XPB_CLG_EXTRA_RD__VLD1__SHIFT 0x1a
+#define XPB_CLG_EXTRA_RD__CLG1_NUM__SHIFT 0x1b
+#define XPB_CLG_EXTRA_RD__CMP0_HIGH_MASK 0x0000003FL
+#define XPB_CLG_EXTRA_RD__CMP0_LOW_MASK 0x000007C0L
+#define XPB_CLG_EXTRA_RD__VLD0_MASK 0x00000800L
+#define XPB_CLG_EXTRA_RD__CLG0_NUM_MASK 0x00007000L
+#define XPB_CLG_EXTRA_RD__CMP1_HIGH_MASK 0x001F8000L
+#define XPB_CLG_EXTRA_RD__CMP1_LOW_MASK 0x03E00000L
+#define XPB_CLG_EXTRA_RD__VLD1_MASK 0x04000000L
+#define XPB_CLG_EXTRA_RD__CLG1_NUM_MASK 0x38000000L
+//XPB_CLG_EXTRA_MSK_RD
+#define XPB_CLG_EXTRA_MSK_RD__MSK0_HIGH__SHIFT 0x0
+#define XPB_CLG_EXTRA_MSK_RD__MSK0_LOW__SHIFT 0x6
+#define XPB_CLG_EXTRA_MSK_RD__MSK1_HIGH__SHIFT 0xb
+#define XPB_CLG_EXTRA_MSK_RD__MSK1_LOW__SHIFT 0x11
+#define XPB_CLG_EXTRA_MSK_RD__MSK0_HIGH_MASK 0x0000003FL
+#define XPB_CLG_EXTRA_MSK_RD__MSK0_LOW_MASK 0x000007C0L
+#define XPB_CLG_EXTRA_MSK_RD__MSK1_HIGH_MASK 0x0001F800L
+#define XPB_CLG_EXTRA_MSK_RD__MSK1_LOW_MASK 0x003E0000L
+//XPB_CLG_GFX_MATCH
+#define XPB_CLG_GFX_MATCH__FARBIRC0_ID__SHIFT 0x0
+#define XPB_CLG_GFX_MATCH__FARBIRC1_ID__SHIFT 0x8
+#define XPB_CLG_GFX_MATCH__FARBIRC2_ID__SHIFT 0x10
+#define XPB_CLG_GFX_MATCH__FARBIRC3_ID__SHIFT 0x18
+#define XPB_CLG_GFX_MATCH__FARBIRC0_ID_MASK 0x000000FFL
+#define XPB_CLG_GFX_MATCH__FARBIRC1_ID_MASK 0x0000FF00L
+#define XPB_CLG_GFX_MATCH__FARBIRC2_ID_MASK 0x00FF0000L
+#define XPB_CLG_GFX_MATCH__FARBIRC3_ID_MASK 0xFF000000L
+//XPB_CLG_GFX_MATCH_VLD
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC0_VLD__SHIFT 0x0
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC1_VLD__SHIFT 0x1
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC2_VLD__SHIFT 0x2
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC3_VLD__SHIFT 0x3
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC0_VLD_MASK 0x00000001L
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC1_VLD_MASK 0x00000002L
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC2_VLD_MASK 0x00000004L
+#define XPB_CLG_GFX_MATCH_VLD__FARBIRC3_VLD_MASK 0x00000008L
+//XPB_CLG_GFX_MATCH_MSK
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC0_ID_MSK__SHIFT 0x0
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC1_ID_MSK__SHIFT 0x8
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC2_ID_MSK__SHIFT 0x10
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC3_ID_MSK__SHIFT 0x18
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC0_ID_MSK_MASK 0x000000FFL
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC1_ID_MSK_MASK 0x0000FF00L
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC2_ID_MSK_MASK 0x00FF0000L
+#define XPB_CLG_GFX_MATCH_MSK__FARBIRC3_ID_MSK_MASK 0xFF000000L
+//XPB_CLG_MM_MATCH
+#define XPB_CLG_MM_MATCH__FARBIRC0_ID__SHIFT 0x0
+#define XPB_CLG_MM_MATCH__FARBIRC1_ID__SHIFT 0x8
+#define XPB_CLG_MM_MATCH__FARBIRC2_ID__SHIFT 0x10
+#define XPB_CLG_MM_MATCH__FARBIRC3_ID__SHIFT 0x18
+#define XPB_CLG_MM_MATCH__FARBIRC0_ID_MASK 0x000000FFL
+#define XPB_CLG_MM_MATCH__FARBIRC1_ID_MASK 0x0000FF00L
+#define XPB_CLG_MM_MATCH__FARBIRC2_ID_MASK 0x00FF0000L
+#define XPB_CLG_MM_MATCH__FARBIRC3_ID_MASK 0xFF000000L
+//XPB_CLG_MM_MATCH_VLD
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC0_VLD__SHIFT 0x0
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC1_VLD__SHIFT 0x1
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC2_VLD__SHIFT 0x2
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC3_VLD__SHIFT 0x3
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC0_VLD_MASK 0x00000001L
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC1_VLD_MASK 0x00000002L
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC2_VLD_MASK 0x00000004L
+#define XPB_CLG_MM_MATCH_VLD__FARBIRC3_VLD_MASK 0x00000008L
+//XPB_CLG_MM_MATCH_MSK
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC0_ID_MSK__SHIFT 0x0
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC1_ID_MSK__SHIFT 0x8
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC2_ID_MSK__SHIFT 0x10
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC3_ID_MSK__SHIFT 0x18
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC0_ID_MSK_MASK 0x000000FFL
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC1_ID_MSK_MASK 0x0000FF00L
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC2_ID_MSK_MASK 0x00FF0000L
+#define XPB_CLG_MM_MATCH_MSK__FARBIRC3_ID_MSK_MASK 0xFF000000L
+//XPB_CLG_GFX_UNITID_MAPPING0
+#define XPB_CLG_GFX_UNITID_MAPPING0__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING0__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING0__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING0__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING0__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING0__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_GFX_UNITID_MAPPING1
+#define XPB_CLG_GFX_UNITID_MAPPING1__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING1__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING1__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING1__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING1__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING1__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_GFX_UNITID_MAPPING2
+#define XPB_CLG_GFX_UNITID_MAPPING2__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING2__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING2__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING2__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING2__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING2__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_GFX_UNITID_MAPPING3
+#define XPB_CLG_GFX_UNITID_MAPPING3__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING3__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING3__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING3__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING3__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING3__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_GFX_UNITID_MAPPING4
+#define XPB_CLG_GFX_UNITID_MAPPING4__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING4__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING4__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING4__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING4__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING4__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_GFX_UNITID_MAPPING5
+#define XPB_CLG_GFX_UNITID_MAPPING5__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING5__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING5__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING5__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING5__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING5__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_GFX_UNITID_MAPPING6
+#define XPB_CLG_GFX_UNITID_MAPPING6__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING6__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING6__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING6__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING6__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING6__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_GFX_UNITID_MAPPING7
+#define XPB_CLG_GFX_UNITID_MAPPING7__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_GFX_UNITID_MAPPING7__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_GFX_UNITID_MAPPING7__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_GFX_UNITID_MAPPING7__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_GFX_UNITID_MAPPING7__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_GFX_UNITID_MAPPING7__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_MM_UNITID_MAPPING0
+#define XPB_CLG_MM_UNITID_MAPPING0__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_MM_UNITID_MAPPING0__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_MM_UNITID_MAPPING0__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_MM_UNITID_MAPPING0__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_MM_UNITID_MAPPING0__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_MM_UNITID_MAPPING0__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_MM_UNITID_MAPPING1
+#define XPB_CLG_MM_UNITID_MAPPING1__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_MM_UNITID_MAPPING1__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_MM_UNITID_MAPPING1__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_MM_UNITID_MAPPING1__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_MM_UNITID_MAPPING1__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_MM_UNITID_MAPPING1__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_MM_UNITID_MAPPING2
+#define XPB_CLG_MM_UNITID_MAPPING2__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_MM_UNITID_MAPPING2__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_MM_UNITID_MAPPING2__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_MM_UNITID_MAPPING2__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_MM_UNITID_MAPPING2__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_MM_UNITID_MAPPING2__DEST_CLG_NUM_MASK 0x000001C0L
+//XPB_CLG_MM_UNITID_MAPPING3
+#define XPB_CLG_MM_UNITID_MAPPING3__UNITID_LOW__SHIFT 0x0
+#define XPB_CLG_MM_UNITID_MAPPING3__UNITID_VLD__SHIFT 0x5
+#define XPB_CLG_MM_UNITID_MAPPING3__DEST_CLG_NUM__SHIFT 0x6
+#define XPB_CLG_MM_UNITID_MAPPING3__UNITID_LOW_MASK 0x0000001FL
+#define XPB_CLG_MM_UNITID_MAPPING3__UNITID_VLD_MASK 0x00000020L
+#define XPB_CLG_MM_UNITID_MAPPING3__DEST_CLG_NUM_MASK 0x000001C0L
+
+
+// addressBlock: athub_rpbdec
+//ATHUB_SHARED_VIRT_RESET_REQ
+#define ATHUB_SHARED_VIRT_RESET_REQ__VF__SHIFT 0x0
+#define ATHUB_SHARED_VIRT_RESET_REQ__PF__SHIFT 0x1f
+#define ATHUB_SHARED_VIRT_RESET_REQ__VF_MASK 0x7FFFFFFFL
+#define ATHUB_SHARED_VIRT_RESET_REQ__PF_MASK 0x80000000L
+//ATHUB_MEM_POWER_LS
+#define ATHUB_MEM_POWER_LS__LS_SETUP__SHIFT 0x0
+#define ATHUB_MEM_POWER_LS__LS_HOLD__SHIFT 0x6
+#define ATHUB_MEM_POWER_LS__LS_SETUP_MASK 0x0000003FL
+#define ATHUB_MEM_POWER_LS__LS_HOLD_MASK 0x0007FFC0L
+//ATHUB_MISC_CNTL
+#define ATHUB_MISC_CNTL__CG_OFFDLY__SHIFT 0x0
+#define ATHUB_MISC_CNTL__CG_ENABLE__SHIFT 0x6
+#define ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE__SHIFT 0x7
+#define ATHUB_MISC_CNTL__PG_ENABLE__SHIFT 0x8
+#define ATHUB_MISC_CNTL__PG_OFFDLY__SHIFT 0x9
+#define ATHUB_MISC_CNTL__ALWAYS_BUSY__SHIFT 0xf
+#define ATHUB_MISC_CNTL__CG_STATUS__SHIFT 0x10
+#define ATHUB_MISC_CNTL__PG_STATUS__SHIFT 0x11
+#define ATHUB_MISC_CNTL__RPB_BUSY__SHIFT 0x12
+#define ATHUB_MISC_CNTL__XPB_BUSY__SHIFT 0x13
+#define ATHUB_MISC_CNTL__ATS_BUSY__SHIFT 0x14
+#define ATHUB_MISC_CNTL__SDPNCS_BUSY__SHIFT 0x15
+#define ATHUB_MISC_CNTL__DFPORT_BUSY__SHIFT 0x16
+#define ATHUB_MISC_CNTL__SWITCH_CNTL__SHIFT 0x17
+#define ATHUB_MISC_CNTL__LS_DELAY_ENABLE__SHIFT 0x18
+#define ATHUB_MISC_CNTL__LS_DELAY_TIME__SHIFT 0x19
+#define ATHUB_MISC_CNTL__RESETB_PG_CLK_GATING_ENABLE__SHIFT 0x1e
+#define ATHUB_MISC_CNTL__CG_OFFDLY_MASK 0x0000003FL
+#define ATHUB_MISC_CNTL__CG_ENABLE_MASK 0x00000040L
+#define ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK 0x00000080L
+#define ATHUB_MISC_CNTL__PG_ENABLE_MASK 0x00000100L
+#define ATHUB_MISC_CNTL__PG_OFFDLY_MASK 0x00007E00L
+#define ATHUB_MISC_CNTL__ALWAYS_BUSY_MASK 0x00008000L
+#define ATHUB_MISC_CNTL__CG_STATUS_MASK 0x00010000L
+#define ATHUB_MISC_CNTL__PG_STATUS_MASK 0x00020000L
+#define ATHUB_MISC_CNTL__RPB_BUSY_MASK 0x00040000L
+#define ATHUB_MISC_CNTL__XPB_BUSY_MASK 0x00080000L
+#define ATHUB_MISC_CNTL__ATS_BUSY_MASK 0x00100000L
+#define ATHUB_MISC_CNTL__SDPNCS_BUSY_MASK 0x00200000L
+#define ATHUB_MISC_CNTL__DFPORT_BUSY_MASK 0x00400000L
+#define ATHUB_MISC_CNTL__SWITCH_CNTL_MASK 0x00800000L
+#define ATHUB_MISC_CNTL__LS_DELAY_ENABLE_MASK 0x01000000L
+#define ATHUB_MISC_CNTL__LS_DELAY_TIME_MASK 0x3E000000L
+#define ATHUB_MISC_CNTL__RESETB_PG_CLK_GATING_ENABLE_MASK 0x40000000L
+//RPB_PASSPW_CONF
+#define RPB_PASSPW_CONF__XPB_PASSPW_OVERRIDE__SHIFT 0x0
+#define RPB_PASSPW_CONF__XPB_RSPPASSPW_OVERRIDE__SHIFT 0x1
+#define RPB_PASSPW_CONF__ATC_VC5_TR_PASSPW_OVERRIDE__SHIFT 0x2
+#define RPB_PASSPW_CONF__ATC_VC5_TR_PASSPW_OVERRIDE_EN__SHIFT 0x3
+#define RPB_PASSPW_CONF__ATC_VC5_RSPPASSPW_OVERRIDE__SHIFT 0x4
+#define RPB_PASSPW_CONF__ATC_VC5_RSPPASSPW_OVERRIDE_EN__SHIFT 0x5
+#define RPB_PASSPW_CONF__ATC_VC0_TR_PASSPW_OVERRIDE__SHIFT 0x6
+#define RPB_PASSPW_CONF__ATC_VC0_TR_PASSPW_OVERRIDE_EN__SHIFT 0x7
+#define RPB_PASSPW_CONF__ATC_VC0_RSPPASSPW_OVERRIDE__SHIFT 0x8
+#define RPB_PASSPW_CONF__ATC_VC0_RSPPASSPW_OVERRIDE_EN__SHIFT 0x9
+#define RPB_PASSPW_CONF__ATC_PAGE_PASSPW_OVERRIDE__SHIFT 0xa
+#define RPB_PASSPW_CONF__ATC_PAGE_PASSPW_OVERRIDE_EN__SHIFT 0xb
+#define RPB_PASSPW_CONF__ATC_PAGE_RSPPASSPW_OVERRIDE__SHIFT 0xc
+#define RPB_PASSPW_CONF__ATC_PAGE_RSPPASSPW_OVERRIDE_EN__SHIFT 0xd
+#define RPB_PASSPW_CONF__WR_PASSPW_OVERRIDE__SHIFT 0xe
+#define RPB_PASSPW_CONF__WR_RSPPASSPW_OVERRIDE__SHIFT 0xf
+#define RPB_PASSPW_CONF__RD_PASSPW_OVERRIDE__SHIFT 0x10
+#define RPB_PASSPW_CONF__RD_RSPPASSPW_OVERRIDE__SHIFT 0x11
+#define RPB_PASSPW_CONF__ATOMIC_PASSPW_OVERRIDE__SHIFT 0x12
+#define RPB_PASSPW_CONF__ATOMIC_RSPPASSPW_OVERRIDE__SHIFT 0x13
+#define RPB_PASSPW_CONF__WRRSP_PASSPW_OVERRIDE__SHIFT 0x14
+#define RPB_PASSPW_CONF__WRRSP_PASSPW_OVERRIDE_EN__SHIFT 0x15
+#define RPB_PASSPW_CONF__RDRSP_PASSPW_OVERRIDE__SHIFT 0x16
+#define RPB_PASSPW_CONF__RDRSP_PASSPW_OVERRIDE_EN__SHIFT 0x17
+#define RPB_PASSPW_CONF__XPB_PASSPW_OVERRIDE_MASK 0x00000001L
+#define RPB_PASSPW_CONF__XPB_RSPPASSPW_OVERRIDE_MASK 0x00000002L
+#define RPB_PASSPW_CONF__ATC_VC5_TR_PASSPW_OVERRIDE_MASK 0x00000004L
+#define RPB_PASSPW_CONF__ATC_VC5_TR_PASSPW_OVERRIDE_EN_MASK 0x00000008L
+#define RPB_PASSPW_CONF__ATC_VC5_RSPPASSPW_OVERRIDE_MASK 0x00000010L
+#define RPB_PASSPW_CONF__ATC_VC5_RSPPASSPW_OVERRIDE_EN_MASK 0x00000020L
+#define RPB_PASSPW_CONF__ATC_VC0_TR_PASSPW_OVERRIDE_MASK 0x00000040L
+#define RPB_PASSPW_CONF__ATC_VC0_TR_PASSPW_OVERRIDE_EN_MASK 0x00000080L
+#define RPB_PASSPW_CONF__ATC_VC0_RSPPASSPW_OVERRIDE_MASK 0x00000100L
+#define RPB_PASSPW_CONF__ATC_VC0_RSPPASSPW_OVERRIDE_EN_MASK 0x00000200L
+#define RPB_PASSPW_CONF__ATC_PAGE_PASSPW_OVERRIDE_MASK 0x00000400L
+#define RPB_PASSPW_CONF__ATC_PAGE_PASSPW_OVERRIDE_EN_MASK 0x00000800L
+#define RPB_PASSPW_CONF__ATC_PAGE_RSPPASSPW_OVERRIDE_MASK 0x00001000L
+#define RPB_PASSPW_CONF__ATC_PAGE_RSPPASSPW_OVERRIDE_EN_MASK 0x00002000L
+#define RPB_PASSPW_CONF__WR_PASSPW_OVERRIDE_MASK 0x00004000L
+#define RPB_PASSPW_CONF__WR_RSPPASSPW_OVERRIDE_MASK 0x00008000L
+#define RPB_PASSPW_CONF__RD_PASSPW_OVERRIDE_MASK 0x00010000L
+#define RPB_PASSPW_CONF__RD_RSPPASSPW_OVERRIDE_MASK 0x00020000L
+#define RPB_PASSPW_CONF__ATOMIC_PASSPW_OVERRIDE_MASK 0x00040000L
+#define RPB_PASSPW_CONF__ATOMIC_RSPPASSPW_OVERRIDE_MASK 0x00080000L
+#define RPB_PASSPW_CONF__WRRSP_PASSPW_OVERRIDE_MASK 0x00100000L
+#define RPB_PASSPW_CONF__WRRSP_PASSPW_OVERRIDE_EN_MASK 0x00200000L
+#define RPB_PASSPW_CONF__RDRSP_PASSPW_OVERRIDE_MASK 0x00400000L
+#define RPB_PASSPW_CONF__RDRSP_PASSPW_OVERRIDE_EN_MASK 0x00800000L
+//RPB_BLOCKLEVEL_CONF
+#define RPB_BLOCKLEVEL_CONF__XPB_BLOCKLEVEL_OVERRIDE__SHIFT 0x0
+#define RPB_BLOCKLEVEL_CONF__XPB_BLOCKLEVEL_OVERRIDE_EN__SHIFT 0x2
+#define RPB_BLOCKLEVEL_CONF__ATC_VC5_TR_BLOCKLEVEL__SHIFT 0x3
+#define RPB_BLOCKLEVEL_CONF__ATC_VC0_TR_BLOCKLEVEL__SHIFT 0x5
+#define RPB_BLOCKLEVEL_CONF__ATC_PAGE_BLOCKLEVEL__SHIFT 0x7
+#define RPB_BLOCKLEVEL_CONF__ATC_INV_BLOCKLEVEL__SHIFT 0x9
+#define RPB_BLOCKLEVEL_CONF__IO_WR_BLOCKLEVEL_OVERRIDE__SHIFT 0xb
+#define RPB_BLOCKLEVEL_CONF__IO_WR_BLOCKLEVEL_OVERRIDE_EN__SHIFT 0xd
+#define RPB_BLOCKLEVEL_CONF__IO_RD_BLOCKLEVEL_OVERRIDE__SHIFT 0xe
+#define RPB_BLOCKLEVEL_CONF__IO_RD_BLOCKLEVEL_OVERRIDE_EN__SHIFT 0x10
+#define RPB_BLOCKLEVEL_CONF__ATOMIC_BLOCKLEVEL_OVERRIDE__SHIFT 0x11
+#define RPB_BLOCKLEVEL_CONF__ATOMIC_BLOCKLEVEL_OVERRIDE_EN__SHIFT 0x13
+#define RPB_BLOCKLEVEL_CONF__XPB_BLOCKLEVEL_OVERRIDE_MASK 0x00000003L
+#define RPB_BLOCKLEVEL_CONF__XPB_BLOCKLEVEL_OVERRIDE_EN_MASK 0x00000004L
+#define RPB_BLOCKLEVEL_CONF__ATC_VC5_TR_BLOCKLEVEL_MASK 0x00000018L
+#define RPB_BLOCKLEVEL_CONF__ATC_VC0_TR_BLOCKLEVEL_MASK 0x00000060L
+#define RPB_BLOCKLEVEL_CONF__ATC_PAGE_BLOCKLEVEL_MASK 0x00000180L
+#define RPB_BLOCKLEVEL_CONF__ATC_INV_BLOCKLEVEL_MASK 0x00000600L
+#define RPB_BLOCKLEVEL_CONF__IO_WR_BLOCKLEVEL_OVERRIDE_MASK 0x00001800L
+#define RPB_BLOCKLEVEL_CONF__IO_WR_BLOCKLEVEL_OVERRIDE_EN_MASK 0x00002000L
+#define RPB_BLOCKLEVEL_CONF__IO_RD_BLOCKLEVEL_OVERRIDE_MASK 0x0000C000L
+#define RPB_BLOCKLEVEL_CONF__IO_RD_BLOCKLEVEL_OVERRIDE_EN_MASK 0x00010000L
+#define RPB_BLOCKLEVEL_CONF__ATOMIC_BLOCKLEVEL_OVERRIDE_MASK 0x00060000L
+#define RPB_BLOCKLEVEL_CONF__ATOMIC_BLOCKLEVEL_OVERRIDE_EN_MASK 0x00080000L
+//RPB_TAG_CONF
+#define RPB_TAG_CONF__RPB_IO_RD__SHIFT 0x0
+#define RPB_TAG_CONF__RPB_IO_WR__SHIFT 0xa
+#define RPB_TAG_CONF__RPB_IO_MAX_LIMIT__SHIFT 0x14
+#define RPB_TAG_CONF__RPB_IO_RD_MASK 0x000003FFL
+#define RPB_TAG_CONF__RPB_IO_WR_MASK 0x000FFC00L
+#define RPB_TAG_CONF__RPB_IO_MAX_LIMIT_MASK 0x7FF00000L
+//RPB_ARB_CNTL
+#define RPB_ARB_CNTL__RD_SWITCH_NUM__SHIFT 0x0
+#define RPB_ARB_CNTL__WR_SWITCH_NUM__SHIFT 0x8
+#define RPB_ARB_CNTL__ATC_TR_SWITCH_NUM__SHIFT 0x10
+#define RPB_ARB_CNTL__ARB_MODE__SHIFT 0x18
+#define RPB_ARB_CNTL__SWITCH_NUM_MODE__SHIFT 0x19
+#define RPB_ARB_CNTL__RPB_VC0_CRD__SHIFT 0x1a
+#define RPB_ARB_CNTL__DISABLE_FED__SHIFT 0x1f
+#define RPB_ARB_CNTL__RD_SWITCH_NUM_MASK 0x000000FFL
+#define RPB_ARB_CNTL__WR_SWITCH_NUM_MASK 0x0000FF00L
+#define RPB_ARB_CNTL__ATC_TR_SWITCH_NUM_MASK 0x00FF0000L
+#define RPB_ARB_CNTL__ARB_MODE_MASK 0x01000000L
+#define RPB_ARB_CNTL__SWITCH_NUM_MODE_MASK 0x02000000L
+#define RPB_ARB_CNTL__RPB_VC0_CRD_MASK 0x7C000000L
+#define RPB_ARB_CNTL__DISABLE_FED_MASK 0x80000000L
+//RPB_ARB_CNTL2
+#define RPB_ARB_CNTL2__P2P_SWITCH_NUM__SHIFT 0x0
+#define RPB_ARB_CNTL2__ATOMIC_SWITCH_NUM__SHIFT 0x8
+#define RPB_ARB_CNTL2__ATC_PAGE_SWITCH_NUM__SHIFT 0x10
+#define RPB_ARB_CNTL2__RPB_VC1_CRD__SHIFT 0x18
+#define RPB_ARB_CNTL2__P2P_SWITCH_NUM_MASK 0x000000FFL
+#define RPB_ARB_CNTL2__ATOMIC_SWITCH_NUM_MASK 0x0000FF00L
+#define RPB_ARB_CNTL2__ATC_PAGE_SWITCH_NUM_MASK 0x00FF0000L
+#define RPB_ARB_CNTL2__RPB_VC1_CRD_MASK 0x1F000000L
+//RPB_BIF_CNTL
+#define RPB_BIF_CNTL__VC0_SWITCH_NUM__SHIFT 0x0
+#define RPB_BIF_CNTL__VC1_SWITCH_NUM__SHIFT 0x8
+#define RPB_BIF_CNTL__VC2_SWITCH_NUM__SHIFT 0x10
+#define RPB_BIF_CNTL__NBIF_DMA_ORIGCLKCTL_EN__SHIFT 0x18
+#define RPB_BIF_CNTL__TR_QOS_VC__SHIFT 0x19
+#define RPB_BIF_CNTL__RESERVE__SHIFT 0x1c
+#define RPB_BIF_CNTL__VC0_SWITCH_NUM_MASK 0x000000FFL
+#define RPB_BIF_CNTL__VC1_SWITCH_NUM_MASK 0x0000FF00L
+#define RPB_BIF_CNTL__VC2_SWITCH_NUM_MASK 0x00FF0000L
+#define RPB_BIF_CNTL__NBIF_DMA_ORIGCLKCTL_EN_MASK 0x01000000L
+#define RPB_BIF_CNTL__TR_QOS_VC_MASK 0x0E000000L
+#define RPB_BIF_CNTL__RESERVE_MASK 0xF0000000L
+//RPB_BIF_CNTL2
+#define RPB_BIF_CNTL2__ARB_MODE__SHIFT 0x0
+#define RPB_BIF_CNTL2__DRAIN_VC_NUM__SHIFT 0x1
+#define RPB_BIF_CNTL2__SWITCH_ENABLE__SHIFT 0x3
+#define RPB_BIF_CNTL2__SWITCH_THRESHOLD__SHIFT 0x4
+#define RPB_BIF_CNTL2__PAGE_PRI_EN__SHIFT 0xc
+#define RPB_BIF_CNTL2__VC5_TR_PRI_EN__SHIFT 0xd
+#define RPB_BIF_CNTL2__VC0_TR_PRI_EN__SHIFT 0xe
+#define RPB_BIF_CNTL2__VC0_CHAINED_OVERRIDE__SHIFT 0xf
+#define RPB_BIF_CNTL2__VC1_CHAINED_OVERRIDE__SHIFT 0x10
+#define RPB_BIF_CNTL2__VC1_CHAINED_OVERRIDE_EN__SHIFT 0x11
+#define RPB_BIF_CNTL2__NBIF_HST_COMPCLKCTL_EN__SHIFT 0x12
+#define RPB_BIF_CNTL2__ATHUB_NBIF_UNITID__SHIFT 0x13
+#define RPB_BIF_CNTL2__RESERVE__SHIFT 0x1e
+#define RPB_BIF_CNTL2__ARB_MODE_MASK 0x00000001L
+#define RPB_BIF_CNTL2__DRAIN_VC_NUM_MASK 0x00000006L
+#define RPB_BIF_CNTL2__SWITCH_ENABLE_MASK 0x00000008L
+#define RPB_BIF_CNTL2__SWITCH_THRESHOLD_MASK 0x00000FF0L
+#define RPB_BIF_CNTL2__PAGE_PRI_EN_MASK 0x00001000L
+#define RPB_BIF_CNTL2__VC5_TR_PRI_EN_MASK 0x00002000L
+#define RPB_BIF_CNTL2__VC0_TR_PRI_EN_MASK 0x00004000L
+#define RPB_BIF_CNTL2__VC0_CHAINED_OVERRIDE_MASK 0x00008000L
+#define RPB_BIF_CNTL2__VC1_CHAINED_OVERRIDE_MASK 0x00010000L
+#define RPB_BIF_CNTL2__VC1_CHAINED_OVERRIDE_EN_MASK 0x00020000L
+#define RPB_BIF_CNTL2__NBIF_HST_COMPCLKCTL_EN_MASK 0x00040000L
+#define RPB_BIF_CNTL2__ATHUB_NBIF_UNITID_MASK 0x3FF80000L
+#define RPB_BIF_CNTL2__RESERVE_MASK 0xC0000000L
+//RPB_SDPPORT_CNTL
+#define RPB_SDPPORT_CNTL__NBIF_DMA_SELF_ACTIVATE__SHIFT 0x0
+#define RPB_SDPPORT_CNTL__NBIF_DMA_CFG_MODE__SHIFT 0x1
+#define RPB_SDPPORT_CNTL__NBIF_DMA_ENABLE_REISSUE_CREDIT__SHIFT 0x3
+#define RPB_SDPPORT_CNTL__NBIF_DMA_ENABLE_SATURATE_COUNTER__SHIFT 0x4
+#define RPB_SDPPORT_CNTL__NBIF_DMA_ENABLE_DISRUPT_FULLDIS__SHIFT 0x5
+#define RPB_SDPPORT_CNTL__NBIF_DMA_HALT_THRESHOLD__SHIFT 0x6
+#define RPB_SDPPORT_CNTL__RESERVE1__SHIFT 0xa
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPCKEN__SHIFT 0x16
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPCKENRCV__SHIFT 0x17
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPDATACKEN__SHIFT 0x18
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPDATACKENRCV__SHIFT 0x19
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_WRRSPCKEN__SHIFT 0x1a
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_WRRSPCKENRCV__SHIFT 0x1b
+#define RPB_SDPPORT_CNTL__CG_BUSY_PORT__SHIFT 0x1c
+#define RPB_SDPPORT_CNTL__RESERVE__SHIFT 0x1d
+#define RPB_SDPPORT_CNTL__NBIF_DMA_SELF_ACTIVATE_MASK 0x00000001L
+#define RPB_SDPPORT_CNTL__NBIF_DMA_CFG_MODE_MASK 0x00000006L
+#define RPB_SDPPORT_CNTL__NBIF_DMA_ENABLE_REISSUE_CREDIT_MASK 0x00000008L
+#define RPB_SDPPORT_CNTL__NBIF_DMA_ENABLE_SATURATE_COUNTER_MASK 0x00000010L
+#define RPB_SDPPORT_CNTL__NBIF_DMA_ENABLE_DISRUPT_FULLDIS_MASK 0x00000020L
+#define RPB_SDPPORT_CNTL__NBIF_DMA_HALT_THRESHOLD_MASK 0x000003C0L
+#define RPB_SDPPORT_CNTL__RESERVE1_MASK 0x003FFC00L
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPCKEN_MASK 0x00400000L
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPCKENRCV_MASK 0x00800000L
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPDATACKEN_MASK 0x01000000L
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_RDRSPDATACKENRCV_MASK 0x02000000L
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_WRRSPCKEN_MASK 0x04000000L
+#define RPB_SDPPORT_CNTL__DF_SDPVDCI_WRRSPCKENRCV_MASK 0x08000000L
+#define RPB_SDPPORT_CNTL__CG_BUSY_PORT_MASK 0x10000000L
+#define RPB_SDPPORT_CNTL__RESERVE_MASK 0xE0000000L
+//RPB_NBIF_SDPPORT_CNTL
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_DMA_WRRSP_CRD__SHIFT 0x0
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_DMA_RDRSP_CRD__SHIFT 0x8
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_HST_REQ_CRD__SHIFT 0x10
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_HST_DATA_CRD__SHIFT 0x18
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_DMA_WRRSP_CRD_MASK 0x000000FFL
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_DMA_RDRSP_CRD_MASK 0x0000FF00L
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_HST_REQ_CRD_MASK 0x00FF0000L
+#define RPB_NBIF_SDPPORT_CNTL__NBIF_HST_DATA_CRD_MASK 0xFF000000L
+//RPB_DEINTRLV_COMBINE_CNTL
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_CHAINED_FLUSH_TIMER__SHIFT 0x0
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_CHAINED_BREAK_EN__SHIFT 0x4
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_HANDLE_CHECK_DISABLE__SHIFT 0x5
+#define RPB_DEINTRLV_COMBINE_CNTL__XPB_WRREQ_CRD__SHIFT 0x6
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_CLI_INTLV_EN__SHIFT 0xe
+#define RPB_DEINTRLV_COMBINE_CNTL__RESERVE__SHIFT 0xf
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_CHAINED_FLUSH_TIMER_MASK 0x0000000FL
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_CHAINED_BREAK_EN_MASK 0x00000010L
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_HANDLE_CHECK_DISABLE_MASK 0x00000020L
+#define RPB_DEINTRLV_COMBINE_CNTL__XPB_WRREQ_CRD_MASK 0x00003FC0L
+#define RPB_DEINTRLV_COMBINE_CNTL__WC_CLI_INTLV_EN_MASK 0x00004000L
+#define RPB_DEINTRLV_COMBINE_CNTL__RESERVE_MASK 0xFFFF8000L
+//RPB_VC_SWITCH_RDWR
+#define RPB_VC_SWITCH_RDWR__MODE__SHIFT 0x0
+#define RPB_VC_SWITCH_RDWR__NUM_RD__SHIFT 0x2
+#define RPB_VC_SWITCH_RDWR__NUM_WR__SHIFT 0xa
+#define RPB_VC_SWITCH_RDWR__XPB_RDREQ_CRD__SHIFT 0x12
+#define RPB_VC_SWITCH_RDWR__CENTER_MARGIN__SHIFT 0x1a
+#define RPB_VC_SWITCH_RDWR__MODE_MASK 0x00000003L
+#define RPB_VC_SWITCH_RDWR__NUM_RD_MASK 0x000003FCL
+#define RPB_VC_SWITCH_RDWR__NUM_WR_MASK 0x0003FC00L
+#define RPB_VC_SWITCH_RDWR__XPB_RDREQ_CRD_MASK 0x03FC0000L
+#define RPB_VC_SWITCH_RDWR__CENTER_MARGIN_MASK 0xFC000000L
+//RPB_ATS_CNTL3
+#define RPB_ATS_CNTL3__RPB_ATS_VC5_TR__SHIFT 0x0
+#define RPB_ATS_CNTL3__RPB_ATS_VC0_TR__SHIFT 0x9
+#define RPB_ATS_CNTL3__RPB_ATS_PR__SHIFT 0x12
+#define RPB_ATS_CNTL3__RPB_ATS_VC5_TR_MASK 0x000001FFL
+#define RPB_ATS_CNTL3__RPB_ATS_VC0_TR_MASK 0x0003FE00L
+#define RPB_ATS_CNTL3__RPB_ATS_PR_MASK 0x07FC0000L
+//RPB_DF_SDPPORT_CNTL
+#define RPB_DF_SDPPORT_CNTL__DF_REQ_CRD__SHIFT 0x0
+#define RPB_DF_SDPPORT_CNTL__DF_DATA_CRD__SHIFT 0x6
+#define RPB_DF_SDPPORT_CNTL__DF_HALT_THRESHOLD__SHIFT 0xe
+#define RPB_DF_SDPPORT_CNTL__DF_RELEASE_CREDIT_MODE__SHIFT 0x12
+#define RPB_DF_SDPPORT_CNTL__DF_ORIG_ACK_TIMER__SHIFT 0x13
+#define RPB_DF_SDPPORT_CNTL__DF_RAW_EA_CHECK_ENABLE__SHIFT 0x1b
+#define RPB_DF_SDPPORT_CNTL__DF_RAW_CHECK_ENABLE__SHIFT 0x1c
+#define RPB_DF_SDPPORT_CNTL__DF_RAAT_CHECK_ENABLE__SHIFT 0x1d
+#define RPB_DF_SDPPORT_CNTL__DF_ATAR_CHECK_ENABLE__SHIFT 0x1e
+#define RPB_DF_SDPPORT_CNTL__DF_VC3_READ_CHECK__SHIFT 0x1f
+#define RPB_DF_SDPPORT_CNTL__DF_REQ_CRD_MASK 0x0000003FL
+#define RPB_DF_SDPPORT_CNTL__DF_DATA_CRD_MASK 0x00003FC0L
+#define RPB_DF_SDPPORT_CNTL__DF_HALT_THRESHOLD_MASK 0x0003C000L
+#define RPB_DF_SDPPORT_CNTL__DF_RELEASE_CREDIT_MODE_MASK 0x00040000L
+#define RPB_DF_SDPPORT_CNTL__DF_ORIG_ACK_TIMER_MASK 0x07F80000L
+#define RPB_DF_SDPPORT_CNTL__DF_RAW_EA_CHECK_ENABLE_MASK 0x08000000L
+#define RPB_DF_SDPPORT_CNTL__DF_RAW_CHECK_ENABLE_MASK 0x10000000L
+#define RPB_DF_SDPPORT_CNTL__DF_RAAT_CHECK_ENABLE_MASK 0x20000000L
+#define RPB_DF_SDPPORT_CNTL__DF_ATAR_CHECK_ENABLE_MASK 0x40000000L
+#define RPB_DF_SDPPORT_CNTL__DF_VC3_READ_CHECK_MASK 0x80000000L
+//RPB_ATS_CNTL
+#define RPB_ATS_CNTL__PAGE_MIN_LATENCY_ENABLE__SHIFT 0x0
+#define RPB_ATS_CNTL__TR_MIN_LATENCY_ENABLE__SHIFT 0x1
+#define RPB_ATS_CNTL__SWITCH_THRESHOLD__SHIFT 0x2
+#define RPB_ATS_CNTL__TIME_SLICE__SHIFT 0x7
+#define RPB_ATS_CNTL__ATCTR_VC0_SWITCH_NUM__SHIFT 0xf
+#define RPB_ATS_CNTL__ATCPAGE_SWITCH_NUM__SHIFT 0x13
+#define RPB_ATS_CNTL__WR_AT__SHIFT 0x17
+#define RPB_ATS_CNTL__MM_TRANS_VC5_ENABLE__SHIFT 0x19
+#define RPB_ATS_CNTL__GC_TRANS_VC5_ENABLE__SHIFT 0x1a
+#define RPB_ATS_CNTL__PAGE_MIN_LATENCY_ENABLE_MASK 0x00000001L
+#define RPB_ATS_CNTL__TR_MIN_LATENCY_ENABLE_MASK 0x00000002L
+#define RPB_ATS_CNTL__SWITCH_THRESHOLD_MASK 0x0000007CL
+#define RPB_ATS_CNTL__TIME_SLICE_MASK 0x00007F80L
+#define RPB_ATS_CNTL__ATCTR_VC0_SWITCH_NUM_MASK 0x00078000L
+#define RPB_ATS_CNTL__ATCPAGE_SWITCH_NUM_MASK 0x00780000L
+#define RPB_ATS_CNTL__WR_AT_MASK 0x01800000L
+#define RPB_ATS_CNTL__MM_TRANS_VC5_ENABLE_MASK 0x02000000L
+#define RPB_ATS_CNTL__GC_TRANS_VC5_ENABLE_MASK 0x04000000L
+//RPB_ATS_CNTL2
+#define RPB_ATS_CNTL2__INVAL_COM_CMD__SHIFT 0x0
+#define RPB_ATS_CNTL2__TRANS_CMD__SHIFT 0x6
+#define RPB_ATS_CNTL2__PAGE_REQ_CMD__SHIFT 0xc
+#define RPB_ATS_CNTL2__PAGE_ROUTING_CODE__SHIFT 0x12
+#define RPB_ATS_CNTL2__INVAL_COM_ROUTING_CODE__SHIFT 0x15
+#define RPB_ATS_CNTL2__VENDOR_ID__SHIFT 0x18
+#define RPB_ATS_CNTL2__RPB_VC5_CRD__SHIFT 0x1a
+#define RPB_ATS_CNTL2__INVAL_COM_CMD_MASK 0x0000003FL
+#define RPB_ATS_CNTL2__TRANS_CMD_MASK 0x00000FC0L
+#define RPB_ATS_CNTL2__PAGE_REQ_CMD_MASK 0x0003F000L
+#define RPB_ATS_CNTL2__PAGE_ROUTING_CODE_MASK 0x001C0000L
+#define RPB_ATS_CNTL2__INVAL_COM_ROUTING_CODE_MASK 0x00E00000L
+#define RPB_ATS_CNTL2__VENDOR_ID_MASK 0x03000000L
+#define RPB_ATS_CNTL2__RPB_VC5_CRD_MASK 0x7C000000L
+//RPB_PERFCOUNTER0_CFG
+#define RPB_PERFCOUNTER0_CFG__PERF_SEL__SHIFT 0x0
+#define RPB_PERFCOUNTER0_CFG__PERF_SEL_END__SHIFT 0x8
+#define RPB_PERFCOUNTER0_CFG__PERF_MODE__SHIFT 0x18
+#define RPB_PERFCOUNTER0_CFG__ENABLE__SHIFT 0x1c
+#define RPB_PERFCOUNTER0_CFG__CLEAR__SHIFT 0x1d
+#define RPB_PERFCOUNTER0_CFG__PERF_SEL_MASK 0x000000FFL
+#define RPB_PERFCOUNTER0_CFG__PERF_SEL_END_MASK 0x0000FF00L
+#define RPB_PERFCOUNTER0_CFG__PERF_MODE_MASK 0x0F000000L
+#define RPB_PERFCOUNTER0_CFG__ENABLE_MASK 0x10000000L
+#define RPB_PERFCOUNTER0_CFG__CLEAR_MASK 0x20000000L
+//RPB_PERFCOUNTER1_CFG
+#define RPB_PERFCOUNTER1_CFG__PERF_SEL__SHIFT 0x0
+#define RPB_PERFCOUNTER1_CFG__PERF_SEL_END__SHIFT 0x8
+#define RPB_PERFCOUNTER1_CFG__PERF_MODE__SHIFT 0x18
+#define RPB_PERFCOUNTER1_CFG__ENABLE__SHIFT 0x1c
+#define RPB_PERFCOUNTER1_CFG__CLEAR__SHIFT 0x1d
+#define RPB_PERFCOUNTER1_CFG__PERF_SEL_MASK 0x000000FFL
+#define RPB_PERFCOUNTER1_CFG__PERF_SEL_END_MASK 0x0000FF00L
+#define RPB_PERFCOUNTER1_CFG__PERF_MODE_MASK 0x0F000000L
+#define RPB_PERFCOUNTER1_CFG__ENABLE_MASK 0x10000000L
+#define RPB_PERFCOUNTER1_CFG__CLEAR_MASK 0x20000000L
+//RPB_PERFCOUNTER2_CFG
+#define RPB_PERFCOUNTER2_CFG__PERF_SEL__SHIFT 0x0
+#define RPB_PERFCOUNTER2_CFG__PERF_SEL_END__SHIFT 0x8
+#define RPB_PERFCOUNTER2_CFG__PERF_MODE__SHIFT 0x18
+#define RPB_PERFCOUNTER2_CFG__ENABLE__SHIFT 0x1c
+#define RPB_PERFCOUNTER2_CFG__CLEAR__SHIFT 0x1d
+#define RPB_PERFCOUNTER2_CFG__PERF_SEL_MASK 0x000000FFL
+#define RPB_PERFCOUNTER2_CFG__PERF_SEL_END_MASK 0x0000FF00L
+#define RPB_PERFCOUNTER2_CFG__PERF_MODE_MASK 0x0F000000L
+#define RPB_PERFCOUNTER2_CFG__ENABLE_MASK 0x10000000L
+#define RPB_PERFCOUNTER2_CFG__CLEAR_MASK 0x20000000L
+//RPB_PERFCOUNTER3_CFG
+#define RPB_PERFCOUNTER3_CFG__PERF_SEL__SHIFT 0x0
+#define RPB_PERFCOUNTER3_CFG__PERF_SEL_END__SHIFT 0x8
+#define RPB_PERFCOUNTER3_CFG__PERF_MODE__SHIFT 0x18
+#define RPB_PERFCOUNTER3_CFG__ENABLE__SHIFT 0x1c
+#define RPB_PERFCOUNTER3_CFG__CLEAR__SHIFT 0x1d
+#define RPB_PERFCOUNTER3_CFG__PERF_SEL_MASK 0x000000FFL
+#define RPB_PERFCOUNTER3_CFG__PERF_SEL_END_MASK 0x0000FF00L
+#define RPB_PERFCOUNTER3_CFG__PERF_MODE_MASK 0x0F000000L
+#define RPB_PERFCOUNTER3_CFG__ENABLE_MASK 0x10000000L
+#define RPB_PERFCOUNTER3_CFG__CLEAR_MASK 0x20000000L
+//RPB_PERFCOUNTER_RSLT_CNTL
+#define RPB_PERFCOUNTER_RSLT_CNTL__PERF_COUNTER_SELECT__SHIFT 0x0
+#define RPB_PERFCOUNTER_RSLT_CNTL__START_TRIGGER__SHIFT 0x8
+#define RPB_PERFCOUNTER_RSLT_CNTL__STOP_TRIGGER__SHIFT 0x10
+#define RPB_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY__SHIFT 0x18
+#define RPB_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL__SHIFT 0x19
+#define RPB_PERFCOUNTER_RSLT_CNTL__STOP_ALL_ON_SATURATE__SHIFT 0x1a
+#define RPB_PERFCOUNTER_RSLT_CNTL__PERF_COUNTER_SELECT_MASK 0x0000000FL
+#define RPB_PERFCOUNTER_RSLT_CNTL__START_TRIGGER_MASK 0x0000FF00L
+#define RPB_PERFCOUNTER_RSLT_CNTL__STOP_TRIGGER_MASK 0x00FF0000L
+#define RPB_PERFCOUNTER_RSLT_CNTL__ENABLE_ANY_MASK 0x01000000L
+#define RPB_PERFCOUNTER_RSLT_CNTL__CLEAR_ALL_MASK 0x02000000L
+#define RPB_PERFCOUNTER_RSLT_CNTL__STOP_ALL_ON_SATURATE_MASK 0x04000000L
+//RPB_PERF_COUNTER_CNTL
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_SELECT__SHIFT 0x0
+#define RPB_PERF_COUNTER_CNTL__CLEAR_SELECTED_PERF_COUNTER__SHIFT 0x2
+#define RPB_PERF_COUNTER_CNTL__CLEAR_ALL_PERF_COUNTERS__SHIFT 0x3
+#define RPB_PERF_COUNTER_CNTL__STOP_ON_COUNTER_SATURATION__SHIFT 0x4
+#define RPB_PERF_COUNTER_CNTL__ENABLE_PERF_COUNTERS__SHIFT 0x5
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_0__SHIFT 0x9
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_1__SHIFT 0xe
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_2__SHIFT 0x13
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_3__SHIFT 0x18
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_SELECT_MASK 0x00000003L
+#define RPB_PERF_COUNTER_CNTL__CLEAR_SELECTED_PERF_COUNTER_MASK 0x00000004L
+#define RPB_PERF_COUNTER_CNTL__CLEAR_ALL_PERF_COUNTERS_MASK 0x00000008L
+#define RPB_PERF_COUNTER_CNTL__STOP_ON_COUNTER_SATURATION_MASK 0x00000010L
+#define RPB_PERF_COUNTER_CNTL__ENABLE_PERF_COUNTERS_MASK 0x000001E0L
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_0_MASK 0x00003E00L
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_1_MASK 0x0007C000L
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_2_MASK 0x00F80000L
+#define RPB_PERF_COUNTER_CNTL__PERF_COUNTER_ASSIGN_3_MASK 0x1F000000L
+//RPB_PERFCOUNTER_HI
+#define RPB_PERFCOUNTER_HI__COUNTER_HI__SHIFT 0x0
+#define RPB_PERFCOUNTER_HI__COMPARE_VALUE__SHIFT 0x10
+#define RPB_PERFCOUNTER_HI__COUNTER_HI_MASK 0x0000FFFFL
+#define RPB_PERFCOUNTER_HI__COMPARE_VALUE_MASK 0xFFFF0000L
+//RPB_PERFCOUNTER_LO
+#define RPB_PERFCOUNTER_LO__COUNTER_LO__SHIFT 0x0
+#define RPB_PERFCOUNTER_LO__COUNTER_LO_MASK 0xFFFFFFFFL
+//RPB_PERF_COUNTER_STATUS
+#define RPB_PERF_COUNTER_STATUS__PERFORMANCE_COUNTER_VALUE__SHIFT 0x0
+#define RPB_PERF_COUNTER_STATUS__PERFORMANCE_COUNTER_VALUE_MASK 0xFFFFFFFFL
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h
index 9de01ae574c0..067eddd9c62d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h
@@ -4115,6 +4115,7 @@
#define mmSCL0_SCL_COEF_RAM_CONFLICT_STATUS 0x1B55
#define mmSCL0_SCL_COEF_RAM_SELECT 0x1B40
#define mmSCL0_SCL_COEF_RAM_TAP_DATA 0x1B41
+#define mmSCL0_SCL_SCALER_ENABLE 0x1B42
#define mmSCL0_SCL_CONTROL 0x1B44
#define mmSCL0_SCL_DEBUG 0x1B6A
#define mmSCL0_SCL_DEBUG2 0x1B69
@@ -4144,6 +4145,7 @@
#define mmSCL1_SCL_COEF_RAM_CONFLICT_STATUS 0x1E55
#define mmSCL1_SCL_COEF_RAM_SELECT 0x1E40
#define mmSCL1_SCL_COEF_RAM_TAP_DATA 0x1E41
+#define mmSCL1_SCL_SCALER_ENABLE 0x1E42
#define mmSCL1_SCL_CONTROL 0x1E44
#define mmSCL1_SCL_DEBUG 0x1E6A
#define mmSCL1_SCL_DEBUG2 0x1E69
@@ -4173,6 +4175,7 @@
#define mmSCL2_SCL_COEF_RAM_CONFLICT_STATUS 0x4155
#define mmSCL2_SCL_COEF_RAM_SELECT 0x4140
#define mmSCL2_SCL_COEF_RAM_TAP_DATA 0x4141
+#define mmSCL2_SCL_SCALER_ENABLE 0x4142
#define mmSCL2_SCL_CONTROL 0x4144
#define mmSCL2_SCL_DEBUG 0x416A
#define mmSCL2_SCL_DEBUG2 0x4169
@@ -4202,6 +4205,7 @@
#define mmSCL3_SCL_COEF_RAM_CONFLICT_STATUS 0x4455
#define mmSCL3_SCL_COEF_RAM_SELECT 0x4440
#define mmSCL3_SCL_COEF_RAM_TAP_DATA 0x4441
+#define mmSCL3_SCL_SCALER_ENABLE 0x4442
#define mmSCL3_SCL_CONTROL 0x4444
#define mmSCL3_SCL_DEBUG 0x446A
#define mmSCL3_SCL_DEBUG2 0x4469
@@ -4231,6 +4235,7 @@
#define mmSCL4_SCL_COEF_RAM_CONFLICT_STATUS 0x4755
#define mmSCL4_SCL_COEF_RAM_SELECT 0x4740
#define mmSCL4_SCL_COEF_RAM_TAP_DATA 0x4741
+#define mmSCL4_SCL_SCALER_ENABLE 0x4742
#define mmSCL4_SCL_CONTROL 0x4744
#define mmSCL4_SCL_DEBUG 0x476A
#define mmSCL4_SCL_DEBUG2 0x4769
@@ -4260,6 +4265,7 @@
#define mmSCL5_SCL_COEF_RAM_CONFLICT_STATUS 0x4A55
#define mmSCL5_SCL_COEF_RAM_SELECT 0x4A40
#define mmSCL5_SCL_COEF_RAM_TAP_DATA 0x4A41
+#define mmSCL5_SCL_SCALER_ENABLE 0x4A42
#define mmSCL5_SCL_CONTROL 0x4A44
#define mmSCL5_SCL_DEBUG 0x4A6A
#define mmSCL5_SCL_DEBUG2 0x4A69
@@ -4287,6 +4293,7 @@
#define mmSCL_COEF_RAM_CONFLICT_STATUS 0x1B55
#define mmSCL_COEF_RAM_SELECT 0x1B40
#define mmSCL_COEF_RAM_TAP_DATA 0x1B41
+#define mmSCL_SCALER_ENABLE 0x1B42
#define mmSCL_CONTROL 0x1B44
#define mmSCL_DEBUG 0x1B6A
#define mmSCL_DEBUG2 0x1B69
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
index bd8085ec54ed..9317a7afa621 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h
@@ -5242,6 +5242,8 @@
#define DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT 0x0000000c
#define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE_MASK 0x00000003L
#define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT 0x00000000
+#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE_MASK 0x00000300L
+#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT 0x00000008
#define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE_MASK 0x00000030L
#define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT 0x00000004
#define DENORM_CONTROL__DENORM_MODE_MASK 0x00000007L
@@ -8648,6 +8650,8 @@
#define REGAMMA_LUT_INDEX__REGAMMA_LUT_INDEX__SHIFT 0x00000000
#define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK_MASK 0x00000007L
#define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK__SHIFT 0x00000000
+#define SCL_SCALER_ENABLE__SCL_SCALE_EN_MASK 0x00000001L
+#define SCL_SCALER_ENABLE__SCL_SCALE_EN__SHIFT 0x00000000
#define SCL_ALU_CONTROL__SCL_ALU_DISABLE_MASK 0x00000001L
#define SCL_ALU_CONTROL__SCL_ALU_DISABLE__SHIFT 0x00000000
#define SCL_BYPASS_CONTROL__SCL_BYPASS_MODE_MASK 0x00000003L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h
index cae1a7e74323..73c5dd5e83d4 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h
@@ -19,8 +19,8 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifndef _dcn_2_0_3_OFFSET_HEADER
-#define _dcn_2_0_3_OFFSET_HEADER
+#ifndef _dcn_2_0_1_OFFSET_HEADER
+#define _dcn_2_0_1_OFFSET_HEADER
// addressBlock: dce_dc_dccg_dccg_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h
index ca1e1eb39256..290d807800a6 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h
@@ -18,8 +18,8 @@
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifndef _dcn_2_0_3_SH_MASK_HEADER
-#define _dcn_2_0_3_SH_MASK_HEADER
+#ifndef _dcn_2_0_1_SH_MASK_HEADER
+#define _dcn_2_0_1_SH_MASK_HEADER
// addressBlock: dce_dc_dccg_dccg_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
index f2f8f9b39c6b..f32649047374 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
@@ -311,6 +311,10 @@
#define mmPHYESYMCLK_CLOCK_CNTL_BASE_IDX 2
#define mmPHYFSYMCLK_CLOCK_CNTL 0x0057
#define mmPHYFSYMCLK_CLOCK_CNTL_BASE_IDX 2
+#define regHDMICHARCLK0_CLOCK_CNTL 0x004a
+#define regHDMICHARCLK0_CLOCK_CNTL_BASE_IDX 2
+#define mmHDMICHARCLK0_CLOCK_CNTL 0x004a
+#define mmHDMICHARCLK0_CLOCK_CNTL_BASE_IDX 2
// addressBlock: dce_dc_dccg_dccg_dfs_dispdec
@@ -4513,6 +4517,10 @@
#define mmCM0_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM0_CM_3DLUT_OUT_OFFSET_B 0x0e18
#define mmCM0_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_INDEX 0x0e19
+#define mmCM0_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_DATA 0x0e1a
+#define mmCM0_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -5201,6 +5209,10 @@
#define mmCM1_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM1_CM_3DLUT_OUT_OFFSET_B 0x0f83
#define mmCM1_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM1_CM_TEST_DEBUG_INDEX 0x0f84
+#define mmCM1_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM1_CM_TEST_DEBUG_DATA 0x0f85
+#define mmCM1_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -5888,6 +5900,10 @@
#define mmCM2_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM2_CM_3DLUT_OUT_OFFSET_B 0x10ee
#define mmCM2_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM2_CM_TEST_DEBUG_INDEX 0x10ef
+#define mmCM2_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM2_CM_TEST_DEBUG_DATA 0x10f0
+#define mmCM2_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -6576,6 +6592,10 @@
#define mmCM3_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM3_CM_3DLUT_OUT_OFFSET_B 0x1259
#define mmCM3_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM3_CM_TEST_DEBUG_INDEX 0x125a
+#define mmCM3_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM3_CM_TEST_DEBUG_DATA 0x125b
+#define mmCM3_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -7264,6 +7284,10 @@
#define mmCM4_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM4_CM_3DLUT_OUT_OFFSET_B 0x13c4
#define mmCM4_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM4_CM_TEST_DEBUG_INDEX 0x13c5
+#define mmCM4_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM4_CM_TEST_DEBUG_DATA 0x13c6
+#define mmCM4_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp4_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -7952,6 +7976,10 @@
#define mmCM5_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM5_CM_3DLUT_OUT_OFFSET_B 0x152f
#define mmCM5_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM5_CM_TEST_DEBUG_INDEX 0x1530
+#define mmCM5_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM5_CM_TEST_DEBUG_DATA 0x1531
+#define mmCM5_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp5_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -12827,6 +12855,24 @@
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3035
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX0 0x3036
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX1 0x3037
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX2 0x3038
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX3 0x3039
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_BUS_ROTATE 0x303a
+#define mmDSCC0_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA0 0x303b
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA1 0x303c
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA2 0x303d
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA3 0x303e
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc0_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -12957,6 +13003,24 @@
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3091
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX0 0x3092
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX1 0x3093
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX2 0x3094
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX3 0x3095
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_BUS_ROTATE 0x3096
+#define mmDSCC1_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA0 0x3097
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA1 0x3098
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA2 0x3099
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA3 0x309a
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc1_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -13087,6 +13151,24 @@
#define mmDSCC2_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC2_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x30ed
#define mmDSCC2_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX0 0x30ee
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX1 0x30ef
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX2 0x30f0
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX3 0x30f1
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_BUS_ROTATE 0x30f2
+#define mmDSCC2_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA0 0x30f3
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA1 0x30f4
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA2 0x30f5
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA3 0x30f6
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc2_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -13217,6 +13299,24 @@
#define mmDSCC3_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC3_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3149
#define mmDSCC3_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX0 0x314a
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX1 0x314b
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX2 0x314c
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX3 0x314d
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_BUS_ROTATE 0x314e
+#define mmDSCC3_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA0 0x314f
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA1 0x3150
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA2 0x3151
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA3 0x3152
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc3_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -13347,6 +13447,24 @@
#define mmDSCC4_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC4_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x31a5
#define mmDSCC4_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX0 0x31a6
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX1 0x31a7
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX2 0x31a8
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX3 0x31a9
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_BUS_ROTATE 0x31aa
+#define mmDSCC4_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA0 0x31ab
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA1 0x31ac
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA2 0x31ad
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA3 0x31ae
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc4_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -13476,6 +13594,24 @@
#define mmDSCC5_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC5_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3201
#define mmDSCC5_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX0 0x3202
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX1 0x3203
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX2 0x3204
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX3 0x3205
+#define mmDSCC5_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_BUS_ROTATE 0x3206
+#define mmDSCC5_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA0 0x3207
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA1 0x3208
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA2 0x3209
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA3 0x320a
+#define mmDSCC5_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc5_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
index e0a447351623..4005c73c2c9f 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
@@ -1189,6 +1189,11 @@
#define PHYFSYMCLK_CLOCK_CNTL__PHYFSYMCLK_FORCE_SRC_SEL__SHIFT 0x4
#define PHYFSYMCLK_CLOCK_CNTL__PHYFSYMCLK_FORCE_EN_MASK 0x00000001L
#define PHYFSYMCLK_CLOCK_CNTL__PHYFSYMCLK_FORCE_SRC_SEL_MASK 0x00000010L
+//HDMICHARCLK0_CLOCK_CNTL
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_EN__SHIFT 0x0
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_SRC_SEL__SHIFT 0x4
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_EN_MASK 0x00000001L
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_SRC_SEL_MASK 0x00000070L
// addressBlock: dce_dc_dccg_dccg_dfs_dispdec
@@ -16739,6 +16744,15 @@
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM0_CM_TEST_DEBUG_INDEX
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM0_CM_TEST_DEBUG_DATA
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
+
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
//DC_PERFMON12_PERFCOUNTER_CNTL
@@ -18934,6 +18948,15 @@
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM1_CM_TEST_DEBUG_INDEX
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM1_CM_TEST_DEBUG_DATA
+#define CM1_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM1_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
+
// addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
//DC_PERFMON13_PERFCOUNTER_CNTL
@@ -21128,6 +21151,15 @@
#define CM2_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM2_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM2_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM2_CM_TEST_DEBUG_INDEX
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM2_CM_TEST_DEBUG_DATA
+#define CM2_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM2_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
+
// addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
//DC_PERFMON14_PERFCOUNTER_CNTL
@@ -23323,6 +23355,15 @@
#define CM3_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM3_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM3_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM3_CM_TEST_DEBUG_INDEX
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM3_CM_TEST_DEBUG_DATA
+#define CM3_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM3_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
+
// addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
//DC_PERFMON15_PERFCOUNTER_CNTL
@@ -25517,6 +25558,15 @@
#define CM4_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM4_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM4_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM4_CM_TEST_DEBUG_INDEX
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM4_CM_TEST_DEBUG_DATA
+#define CM4_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM4_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
+
// addressBlock: dce_dc_dpp4_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
//DC_PERFMON16_PERFCOUNTER_CNTL
@@ -27712,6 +27762,15 @@
#define CM5_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM5_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM5_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM5_CM_TEST_DEBUG_INDEX
+#define CM5_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM5_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM5_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM5_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM5_CM_TEST_DEBUG_DATA
+#define CM5_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM5_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
+
// addressBlock: dce_dc_dpp5_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
//DC_PERFMON17_PERFCOUNTER_CNTL
@@ -50276,7 +50335,9 @@
#define DSC_TOP0_DSC_TOP_CONTROL__DSC_DSCCLK_R_GATE_DIS_MASK 0x00000100L
//DSC_TOP0_DSC_DEBUG_CONTROL
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN__SHIFT 0x0
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL__SHIFT 0x4
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN_MASK 0x00000001L
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL_MASK 0x00000070L
// addressBlock: dce_dc_dsc0_dispdec_dsccif_dispdec
@@ -50648,6 +50709,15 @@
//DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL
#define DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__SHIFT 0x0
#define DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_MASK 0x0003FFFFL
+//DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS0_ROTATE__SHIFT 0x0
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS1_ROTATE__SHIFT 0x8
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS2_ROTATE__SHIFT 0x10
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS3_ROTATE__SHIFT 0x18
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS0_ROTATE_MASK 0x0000001FL
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS1_ROTATE_MASK 0x00001F00L
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS2_ROTATE_MASK 0x001F0000L
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS3_ROTATE_MASK 0x1F000000L
// addressBlock: dce_dc_dsc0_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_offset.h
index b45a35aae241..b2962b5ce31e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_offset.h
@@ -4466,6 +4466,10 @@
#define mmCM0_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM0_CM_3DLUT_OUT_OFFSET_B 0x0e18
#define mmCM0_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_INDEX 0x0e19
+#define mmCM0_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_DATA 0x0e1a
+#define mmCM0_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -5154,6 +5158,10 @@
#define mmCM1_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM1_CM_3DLUT_OUT_OFFSET_B 0x0f83
#define mmCM1_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM1_CM_TEST_DEBUG_INDEX 0x0f84
+#define mmCM1_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM1_CM_TEST_DEBUG_DATA 0x0f85
+#define mmCM1_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -5841,6 +5849,10 @@
#define mmCM2_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM2_CM_3DLUT_OUT_OFFSET_B 0x10ee
#define mmCM2_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM2_CM_TEST_DEBUG_INDEX 0x10ef
+#define mmCM2_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM2_CM_TEST_DEBUG_DATA 0x10f0
+#define mmCM2_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -6529,6 +6541,10 @@
#define mmCM3_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM3_CM_3DLUT_OUT_OFFSET_B 0x1259
#define mmCM3_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM3_CM_TEST_DEBUG_INDEX 0x125a
+#define mmCM3_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM3_CM_TEST_DEBUG_DATA 0x125b
+#define mmCM3_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -7217,6 +7233,10 @@
#define mmCM4_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM4_CM_3DLUT_OUT_OFFSET_B 0x13c4
#define mmCM4_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM4_CM_TEST_DEBUG_INDEX 0x13c5
+#define mmCM4_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM4_CM_TEST_DEBUG_DATA 0x13c6
+#define mmCM4_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp4_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -11671,6 +11691,24 @@
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3035
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX0 0x3036
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX1 0x3037
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX2 0x3038
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX3 0x3039
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_BUS_ROTATE 0x303a
+#define mmDSCC0_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA0 0x303b
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA1 0x303c
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA2 0x303d
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA3 0x303e
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc0_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -11801,6 +11839,24 @@
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3091
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX0 0x3092
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX1 0x3093
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX2 0x3094
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX3 0x3095
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_BUS_ROTATE 0x3096
+#define mmDSCC1_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA0 0x3097
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA1 0x3098
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA2 0x3099
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA3 0x309a
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc1_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -11931,6 +11987,24 @@
#define mmDSCC2_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC2_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x30ed
#define mmDSCC2_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX0 0x30ee
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX1 0x30ef
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX2 0x30f0
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX3 0x30f1
+#define mmDSCC2_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_BUS_ROTATE 0x30f2
+#define mmDSCC2_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA0 0x30f3
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA1 0x30f4
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA2 0x30f5
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA3 0x30f6
+#define mmDSCC2_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc2_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -12061,6 +12135,24 @@
#define mmDSCC3_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC3_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3149
#define mmDSCC3_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX0 0x314a
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX1 0x314b
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX2 0x314c
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX3 0x314d
+#define mmDSCC3_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_BUS_ROTATE 0x314e
+#define mmDSCC3_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA0 0x314f
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA1 0x3150
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA2 0x3151
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA3 0x3152
+#define mmDSCC3_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc3_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -12191,6 +12283,24 @@
#define mmDSCC4_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC4_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x31a5
#define mmDSCC4_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX0 0x31a6
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX1 0x31a7
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX2 0x31a8
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX3 0x31a9
+#define mmDSCC4_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_BUS_ROTATE 0x31aa
+#define mmDSCC4_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA0 0x31ab
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA1 0x31ac
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA2 0x31ad
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA3 0x31ae
+#define mmDSCC4_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc4_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h
index 3dae29f9581e..7f8f0a646422 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_2_sh_mask.h
@@ -15676,6 +15676,14 @@
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM0_CM_TEST_DEBUG_INDEX
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM0_CM_TEST_DEBUG_DATA
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -17876,6 +17884,14 @@
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM1_CM_TEST_DEBUG_INDEX
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM1_CM_TEST_DEBUG_DATA
+#define CM1_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM1_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -20076,6 +20092,14 @@
#define CM2_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM2_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM2_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM2_CM_TEST_DEBUG_INDEX
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM2_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM2_CM_TEST_DEBUG_DATA
+#define CM2_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM2_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -22276,6 +22300,14 @@
#define CM3_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM3_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM3_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM3_CM_TEST_DEBUG_INDEX
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM3_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM3_CM_TEST_DEBUG_DATA
+#define CM3_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM3_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -24476,6 +24508,14 @@
#define CM4_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM4_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM4_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM4_CM_TEST_DEBUG_INDEX
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM4_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM4_CM_TEST_DEBUG_DATA
+#define CM4_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM4_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp4_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -43642,7 +43682,9 @@
#define DSC_TOP0_DSC_TOP_CONTROL__DSC_DSCCLK_R_GATE_DIS_MASK 0x00000100L
//DSC_TOP0_DSC_DEBUG_CONTROL
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN__SHIFT 0x0
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL__SHIFT 0x4
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN_MASK 0x00000001L
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL_MASK 0x00000070L
// addressBlock: dce_dc_dsc0_dispdec_dsccif_dispdec
@@ -44015,6 +44057,15 @@
//DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL
#define DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__SHIFT 0x0
#define DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_MASK 0x0003FFFFL
+//DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS0_ROTATE__SHIFT 0x0
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS1_ROTATE__SHIFT 0x8
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS2_ROTATE__SHIFT 0x10
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS3_ROTATE__SHIFT 0x18
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS0_ROTATE_MASK 0x0000001FL
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS1_ROTATE_MASK 0x00001F00L
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS2_ROTATE_MASK 0x001F0000L
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS3_ROTATE_MASK 0x1F000000L
// addressBlock: dce_dc_dsc0_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -44165,6 +44216,7 @@
#define DSC_TOP1_DSC_TOP_CONTROL__DSC_DSCCLK_R_GATE_DIS_MASK 0x00000100L
//DSC_TOP1_DSC_DEBUG_CONTROL
#define DSC_TOP1_DSC_DEBUG_CONTROL__DSC_DBG_EN__SHIFT 0x0
+#define DSC_TOP1_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL__SHIFT 0x4
#define DSC_TOP1_DSC_DEBUG_CONTROL__DSC_DBG_EN_MASK 0x00000001L
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_offset.h
index daa8130636f0..a3373d1e1736 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_offset.h
@@ -3110,6 +3110,10 @@
#define mmCM0_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM0_CM_3DLUT_OUT_OFFSET_B 0x0e18
#define mmCM0_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_INDEX 0x0e19
+#define mmCM0_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM0_CM_TEST_DEBUG_DATA 0x0e1a
+#define mmCM0_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -3798,6 +3802,10 @@
#define mmCM1_CM_3DLUT_OUT_OFFSET_G_BASE_IDX 2
#define mmCM1_CM_3DLUT_OUT_OFFSET_B 0x0f83
#define mmCM1_CM_3DLUT_OUT_OFFSET_B_BASE_IDX 2
+#define mmCM1_CM_TEST_DEBUG_INDEX 0x0f84
+#define mmCM1_CM_TEST_DEBUG_INDEX_BASE_IDX 2
+#define mmCM1_CM_TEST_DEBUG_DATA 0x0f85
+#define mmCM1_CM_TEST_DEBUG_DATA_BASE_IDX 2
// addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -5687,6 +5695,24 @@
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3035
#define mmDSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX0 0x3036
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX1 0x3037
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX2 0x3038
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX3 0x3039
+#define mmDSCC0_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_BUS_ROTATE 0x303a
+#define mmDSCC0_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA0 0x303b
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA1 0x303c
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA2 0x303d
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA3 0x303e
+#define mmDSCC0_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc0_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
@@ -5817,6 +5843,24 @@
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL_BASE_IDX 2
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL 0x3091
#define mmDSCC1_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX0 0x3092
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX0_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX1 0x3093
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX1_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX2 0x3094
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX2_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX3 0x3095
+#define mmDSCC1_DSCC_TEST_DEBUG_INDEX3_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_BUS_ROTATE 0x3096
+#define mmDSCC1_DSCC_TEST_DEBUG_BUS_ROTATE_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA0 0x3097
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA0_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA1 0x3098
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA1_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA2 0x3099
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA2_BASE_IDX 2
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA3 0x309a
+#define mmDSCC1_DSCC_TEST_DEBUG_DATA3_BASE_IDX 2
// addressBlock: dce_dc_dsc1_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h
index 5c469cf635e5..9549494b65b5 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_3_sh_mask.h
@@ -10701,6 +10701,13 @@
#define CM0_CM_COEF_FORMAT__CM_BIAS_FORMAT_MASK 0x00000001L
#define CM0_CM_COEF_FORMAT__CM_POST_CSC_COEF_FORMAT_MASK 0x00000010L
#define CM0_CM_COEF_FORMAT__CM_GAMUT_REMAP_COEF_FORMAT_MASK 0x00000100L
+
+//CM0_CM_TEST_DEBUG_INDEX
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+
//CM0_CM_SHAPER_CONTROL
#define CM0_CM_SHAPER_CONTROL__CM_SHAPER_LUT_MODE__SHIFT 0x0
#define CM0_CM_SHAPER_CONTROL__CM_SHAPER_MODE_CURRENT__SHIFT 0x2
@@ -11159,6 +11166,14 @@
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM0_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM0_CM_TEST_DEBUG_INDEX
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM0_CM_TEST_DEBUG_DATA
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -13359,6 +13374,14 @@
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B__SHIFT 0x10
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_OFFSET_B_MASK 0x0000FFFFL
#define CM1_CM_3DLUT_OUT_OFFSET_B__CM_3DLUT_OUT_SCALE_B_MASK 0xFFFF0000L
+//CM1_CM_TEST_DEBUG_INDEX
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL
+#define CM1_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L
+//CM1_CM_TEST_DEBUG_DATA
+#define CM1_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0
+#define CM1_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL
// addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -22258,7 +22281,9 @@
#define DSC_TOP0_DSC_TOP_CONTROL__DSC_DSCCLK_R_GATE_DIS_MASK 0x00000100L
//DSC_TOP0_DSC_DEBUG_CONTROL
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN__SHIFT 0x0
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL__SHIFT 0x4
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN_MASK 0x00000001L
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL_MASK 0x00000070L
// addressBlock: dce_dc_dsc0_dispdec_dsccif_dispdec
@@ -22631,6 +22656,15 @@
//DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL
#define DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__SHIFT 0x0
#define DSCC0_DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL__DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL_MASK 0x0003FFFFL
+//DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS0_ROTATE__SHIFT 0x0
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS1_ROTATE__SHIFT 0x8
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS2_ROTATE__SHIFT 0x10
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS3_ROTATE__SHIFT 0x18
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS0_ROTATE_MASK 0x0000001FL
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS1_ROTATE_MASK 0x00001F00L
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS2_ROTATE_MASK 0x001F0000L
+#define DSCC0_DSCC_TEST_DEBUG_BUS_ROTATE__DSCC_TEST_DEBUG_BUS3_ROTATE_MASK 0x1F000000L
// addressBlock: dce_dc_dsc0_dispdec_dsc_dcperfmon_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h
index f268d33c4744..7fd906f10803 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_offset.h
@@ -424,6 +424,8 @@
#define regDTBCLK_DTO2_MODULO_BASE_IDX 2
#define regDTBCLK_DTO3_MODULO 0x0022
#define regDTBCLK_DTO3_MODULO_BASE_IDX 2
+#define regHDMICHARCLK0_CLOCK_CNTL 0x004a
+#define regHDMICHARCLK0_CLOCK_CNTL_BASE_IDX 2
#define regPHYASYMCLK_CLOCK_CNTL 0x0052
#define regPHYASYMCLK_CLOCK_CNTL_BASE_IDX 2
#define regPHYBSYMCLK_CLOCK_CNTL 0x0053
@@ -434,6 +436,8 @@
#define regPHYDSYMCLK_CLOCK_CNTL_BASE_IDX 2
#define regPHYESYMCLK_CLOCK_CNTL 0x0056
#define regPHYESYMCLK_CLOCK_CNTL_BASE_IDX 2
+#define regHDMISTREAMCLK_CNTL 0x0059
+#define regHDMISTREAMCLK_CNTL_BASE_IDX 2
#define regDCCG_GATE_DISABLE_CNTL3 0x005a
#define regDCCG_GATE_DISABLE_CNTL3_BASE_IDX 2
#define regHDMISTREAMCLK0_DTO_PARAM 0x005b
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h
index cf3398f15666..07fbfafe6056 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h
@@ -1372,6 +1372,11 @@
//DTBCLK_DTO3_MODULO
#define DTBCLK_DTO3_MODULO__DTBCLK_DTO3_MODULO__SHIFT 0x0
#define DTBCLK_DTO3_MODULO__DTBCLK_DTO3_MODULO_MASK 0xFFFFFFFFL
+//HDMICHARCLK0_CLOCK_CNTL
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_EN__SHIFT 0x0
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_SRC_SEL__SHIFT 0x4
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_EN_MASK 0x00000001L
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_SRC_SEL_MASK 0x00000070L
//PHYASYMCLK_CLOCK_CNTL
#define PHYASYMCLK_CLOCK_CNTL__PHYASYMCLK_FORCE_EN__SHIFT 0x0
#define PHYASYMCLK_CLOCK_CNTL__PHYASYMCLK_FORCE_SRC_SEL__SHIFT 0x4
@@ -1397,6 +1402,13 @@
#define PHYESYMCLK_CLOCK_CNTL__PHYESYMCLK_FORCE_SRC_SEL__SHIFT 0x4
#define PHYESYMCLK_CLOCK_CNTL__PHYESYMCLK_FORCE_EN_MASK 0x00000001L
#define PHYESYMCLK_CLOCK_CNTL__PHYESYMCLK_FORCE_SRC_SEL_MASK 0x00000030L
+//HDMISTREAMCLK_CNTL
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_SRC_SEL__SHIFT 0x0
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_EN__SHIFT 0x3
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_DTO_FORCE_DIS__SHIFT 0x4
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_SRC_SEL_MASK 0x00000007L
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_EN_MASK 0x00000008L
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_DTO_FORCE_DIS_MASK 0x00000010L
//DCCG_GATE_DISABLE_CNTL3
#define DCCG_GATE_DISABLE_CNTL3__HDMISTREAMCLK0_GATE_DISABLE__SHIFT 0x0
#define DCCG_GATE_DISABLE_CNTL3__HDMISTREAMCLK1_GATE_DISABLE__SHIFT 0x1
@@ -46978,6 +46990,13 @@
#define DSC_TOP0_DSC_TOP_CONTROL__DSC_CLOCK_EN_MASK 0x00000001L
#define DSC_TOP0_DSC_TOP_CONTROL__DSC_DISPCLK_R_GATE_DIS_MASK 0x00000010L
#define DSC_TOP0_DSC_TOP_CONTROL__DSC_DSCCLK_R_GATE_DIS_MASK 0x00000100L
+
+
+//DSC_TOP0_DSC_DEBUG_CONTROL
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN__SHIFT 0x0
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL__SHIFT 0x4
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN_MASK 0x00000001L
+#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL_MASK 0x00000070L
//DSC_TOP0_DSC_DEBUG_CONTROL
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_DBG_EN__SHIFT 0x0
#define DSC_TOP0_DSC_DEBUG_CONTROL__DSC_TEST_CLOCK_MUX_SEL__SHIFT 0x4
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_offset.h
index 50c34d88c17c..16a69d17bb1e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_offset.h
@@ -213,6 +213,8 @@
#define regDTBCLK_DTO2_MODULO_BASE_IDX 2
#define regDTBCLK_DTO3_MODULO 0x0022
#define regDTBCLK_DTO3_MODULO_BASE_IDX 2
+#define regHDMICHARCLK0_CLOCK_CNTL 0x004a
+#define regHDMICHARCLK0_CLOCK_CNTL_BASE_IDX 2
#define regPHYASYMCLK_CLOCK_CNTL 0x0052
#define regPHYASYMCLK_CLOCK_CNTL_BASE_IDX 2
#define regPHYBSYMCLK_CLOCK_CNTL 0x0053
@@ -233,6 +235,8 @@
#define regDCCG_AUDIO_DTBCLK_DTO_MODULO_BASE_IDX 2
#define regDTBCLK_DTO_DBUF_EN 0x0063
#define regDTBCLK_DTO_DBUF_EN_BASE_IDX 2
+#define regHDMISTREAMCLK_CNTL 0x0059
+#define regHDMISTREAMCLK_CNTL_BASE_IDX 2
// addressBlock: dce_dc_dccg_dccg_dcperfmon0_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_sh_mask.h
index 295e0dac9ffa..6473362e39a8 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_5_sh_mask.h
@@ -886,6 +886,11 @@
//DTBCLK_DTO3_MODULO
#define DTBCLK_DTO3_MODULO__DTBCLK_DTO3_MODULO__SHIFT 0x0
#define DTBCLK_DTO3_MODULO__DTBCLK_DTO3_MODULO_MASK 0xFFFFFFFFL
+//HDMICHARCLK0_CLOCK_CNTL
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_EN__SHIFT 0x0
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_SRC_SEL__SHIFT 0x4
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_EN_MASK 0x00000001L
+#define HDMICHARCLK0_CLOCK_CNTL__HDMICHARCLK0_SRC_SEL_MASK 0x00000070L
//PHYASYMCLK_CLOCK_CNTL
#define PHYASYMCLK_CLOCK_CNTL__PHYASYMCLK_FORCE_EN__SHIFT 0x0
#define PHYASYMCLK_CLOCK_CNTL__PHYASYMCLK_FORCE_SRC_SEL__SHIFT 0x4
@@ -911,6 +916,11 @@
#define PHYESYMCLK_CLOCK_CNTL__PHYESYMCLK_FORCE_SRC_SEL__SHIFT 0x4
#define PHYESYMCLK_CLOCK_CNTL__PHYESYMCLK_FORCE_EN_MASK 0x00000001L
#define PHYESYMCLK_CLOCK_CNTL__PHYESYMCLK_FORCE_SRC_SEL_MASK 0x00000030L
+//HDMISTREAMCLK_CNTL
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_SRC_SEL__SHIFT 0x0
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_DTO_FORCE_DIS__SHIFT 0x10
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_SRC_SEL_MASK 0x00000003L
+#define HDMISTREAMCLK_CNTL__HDMISTREAMCLK0_DTO_FORCE_DIS_MASK 0x00010000L
//DCCG_GATE_DISABLE_CNTL3
#define DCCG_GATE_DISABLE_CNTL3__HDMISTREAMCLK0_GATE_DISABLE__SHIFT 0x0
#define DCCG_GATE_DISABLE_CNTL3__HDMISTREAMCLK1_GATE_DISABLE__SHIFT 0x1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_offset.h
index 222fa8d13269..a05bf8e4f58d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_6_offset.h